@@ -214,10 +214,11 @@ static int mp3enc_encode_frame(mp3enc_t * enc, const float * pcm) {
214214 int nch = enc -> channels ;
215215 int padding = mp3enc_get_padding (enc );
216216
217- // Use MS stereo for stereo input (joint stereo mode)
218- // mode=1 (joint), mode_ext=2 (MS on, intensity off)
217+ // MS stereo decision is deferred until after MDCT energy analysis.
218+ // mode=1 (joint) allows switching M/S on or off per frame.
219+ // mode=0 (stereo) would force L/R always. mode=3 (mono).
219220 int mode = (nch == 1 ) ? 3 : 1 ;
220- int mode_ext = ( nch == 1 ) ? 0 : 2 ;
221+ int mode_ext = 0 ; // set after MDCT energy analysis
221222
222223 // Setup header
223224 mp3enc_header hdr ;
@@ -257,74 +258,136 @@ static int mp3enc_encode_frame(mp3enc_t * enc, const float * pcm) {
257258 // Total main_data bits: this frame's area + reservoir from previous frame
258259 int total_md_bits = main_data_bits + resv_bytes * 8 ;
259260
260- // Mean bits per granule (from total budget)
261- int mean_bits = total_md_bits / 2 ;
261+ // Phase 1: compute MDCT + psy for ALL granules before bit allocation.
262+ // We need PE from both granules to weight the bit budget.
263+ float mdct_all [2 ][2 ][576 ]; // [granule][channel][576 MDCT lines]
264+ float saved_xmin [2 ][2 ][MP3ENC_PSY_SFB_MAX ];
265+ float saved_pe [2 ] = { 0.0f , 0.0f };
262266
263- int ix [2 ][2 ][576 ]; // [granule][channel][line]
264- float mdct_lr [2 ][576 ]; // MDCT output per channel before M/S transform
265-
266- int total_bits_used = 0 ;
267- int intra_resv = 0 ; // intra-frame reservoir: bits saved by granule 0 for granule 1
267+ // Accumulators for MS stereo decision
268+ float energy_mid = 0.0f ;
269+ float energy_side = 0.0f ;
268270
269271 for (int gr = 0 ; gr < 2 ; gr ++ ) {
270272 int pcm_offset = gr * 576 ;
271273
272- // Step 1: filterbank + MDCT for all channels
274+ // filterbank + MDCT for all channels
273275 for (int ch = 0 ; ch < nch ; ch ++ ) {
274276 const float * ch_pcm = pcm + ch * 1152 + pcm_offset ;
275-
276- // Run analysis filterbank: 576 PCM samples = 18 calls of 32 samples
277- float sb_out [32 ];
277+ float sb_out [32 ];
278278 for (int slot = 0 ; slot < 18 ; slot ++ ) {
279279 enc -> filter [ch ].process (ch_pcm + slot * 32 , sb_out );
280280 for (int sb = 0 ; sb < 32 ; sb ++ ) {
281281 enc -> sb_cur [ch ][sb ][slot ] = sb_out [sb ];
282282 }
283-
284283 // frequency inversion: negate odd subbands at odd time slots
285284 if (slot & 1 ) {
286285 for (int sb = 1 ; sb < 32 ; sb += 2 ) {
287286 enc -> sb_cur [ch ][sb ][slot ] = - enc -> sb_cur [ch ][sb ][slot ];
288287 }
289288 }
290289 }
291-
292- // MDCT: transform subbands to 576 frequency lines
293- mp3enc_mdct_granule (enc -> sb_prev [ch ], enc -> sb_cur [ch ], mdct_lr [ch ]);
294-
295- // Save current subbands as previous for next granule
290+ mp3enc_mdct_granule (enc -> sb_prev [ch ], enc -> sb_cur [ch ], mdct_all [gr ][ch ]);
296291 memcpy (enc -> sb_prev [ch ], enc -> sb_cur [ch ], sizeof (enc -> sb_cur [ch ]));
297292 }
298293
299- // Step 2: MS stereo transform
294+ // MS stereo energy analysis (before transform, on L/R data).
295+ // Accumulate mid and side energy across both granules to decide
296+ // whether M/S coding is beneficial for this frame.
297+ // M/S wins when L and R are correlated (side energy is small).
300298 if (nch == 2 ) {
299+ for (int i = 0 ; i < enc -> lowpass_line && i < 576 ; i ++ ) {
300+ float l = mdct_all [gr ][0 ][i ];
301+ float r = mdct_all [gr ][1 ][i ];
302+ float m = l + r ;
303+ float s = l - r ;
304+ energy_mid += m * m ;
305+ energy_side += s * s ;
306+ }
307+ }
308+ }
309+
310+ // MS stereo decision: use M/S when channels are correlated enough
311+ // that the side channel is cheap to encode. FhG "almost always uses
312+ // ms_stereo" (GPSYCHO docs). We use it unless side energy dominates,
313+ // which means the channels are very different (rare for music).
314+ bool use_ms = false;
315+ if (nch == 2 ) {
316+ // Use M/S unless side channel has more energy than mid.
317+ // This is generous -- almost always enables M/S (like FhG).
318+ use_ms = (energy_side < energy_mid * 1.2f ) || (energy_mid < 1e-20f );
319+ mode_ext = use_ms ? 2 : 0 ;
320+ hdr .mode_ext = mode_ext ;
321+ }
322+
323+ // Now apply M/S transform + lowpass + psy for both granules
324+ for (int gr = 0 ; gr < 2 ; gr ++ ) {
325+ // MS stereo transform
326+ if (use_ms ) {
301327 static const float ms_scale = 0.7071067811865476f ; // 1/sqrt(2)
302328 for (int i = 0 ; i < 576 ; i ++ ) {
303- float l = mdct_lr [0 ][i ];
304- float r = mdct_lr [1 ][i ];
305- mdct_lr [0 ][i ] = (l + r ) * ms_scale ;
306- mdct_lr [1 ][i ] = (l - r ) * ms_scale ;
329+ float l = mdct_all [ gr ] [0 ][i ];
330+ float r = mdct_all [ gr ] [1 ][i ];
331+ mdct_all [ gr ] [0 ][i ] = (l + r ) * ms_scale ;
332+ mdct_all [ gr ] [1 ][i ] = (l - r ) * ms_scale ;
307333 }
308334 }
309335
310- // Step 2b: adaptive lowpass
336+ // adaptive lowpass
311337 for (int ch = 0 ; ch < nch ; ch ++ ) {
312338 for (int i = enc -> lowpass_line ; i < 576 ; i ++ ) {
313- mdct_lr [ch ][i ] = 0.0f ;
339+ mdct_all [ gr ] [ch ][i ] = 0.0f ;
314340 }
315341 }
316342
317- // Step 3: run psy for all channels before bit allocation
318- float saved_xmin [2 ][MP3ENC_PSY_SFB_MAX ];
343+ // psy: compute masking thresholds and perceptual entropy
319344 for (int ch = 0 ; ch < nch ; ch ++ ) {
320- enc -> psy .compute (mdct_lr [ch ], sfb_long , enc -> sr_index , ch );
321- memcpy (saved_xmin [ch ], enc -> psy .xmin , sizeof (enc -> psy .xmin ));
345+ enc -> psy .compute (mdct_all [gr ][ch ], sfb_long , enc -> sr_index , ch );
346+ memcpy (saved_xmin [gr ][ch ], enc -> psy .xmin , sizeof (enc -> psy .xmin ));
347+ saved_pe [gr ] += enc -> psy .pe ;
322348 }
349+ }
323350
324- // Step 4: bit allocation
325- int max_bits = mean_bits + intra_resv ;
351+ // Phase 2: PE-weighted bit allocation + quantization.
352+ // Give more bits to granules with higher perceptual entropy (more complex
353+ // signal). ISO 11172-3 computes PE but then ignores it for allocation;
354+ // LAME uses it to drive the bit reservoir. We weight the per-granule
355+ // budget proportionally to PE, clamped to avoid starving either granule.
356+ int gr_budget [2 ];
357+ float pe_sum = saved_pe [0 ] + saved_pe [1 ];
358+ if (pe_sum > 1e-6f ) {
359+ float frac0 = saved_pe [0 ] / pe_sum ;
360+ gr_budget [0 ] = (int ) ((float ) total_md_bits * frac0 );
361+ gr_budget [1 ] = total_md_bits - gr_budget [0 ];
362+ // clamp: neither granule gets less than 20% or more than 80%
363+ int lo = total_md_bits / 5 ;
364+ int hi = total_md_bits * 4 / 5 ;
365+ for (int gr = 0 ; gr < 2 ; gr ++ ) {
366+ if (gr_budget [gr ] < lo ) {
367+ gr_budget [gr ] = lo ;
368+ }
369+ if (gr_budget [gr ] > hi ) {
370+ gr_budget [gr ] = hi ;
371+ }
372+ }
373+ // re-normalize after clamp
374+ int clamped_sum = gr_budget [0 ] + gr_budget [1 ];
375+ int bits_to_spread = total_md_bits - clamped_sum ;
376+ gr_budget [0 ] += bits_to_spread / 2 ;
377+ gr_budget [1 ] += bits_to_spread - bits_to_spread / 2 ;
378+ } else {
379+ gr_budget [0 ] = total_md_bits / 2 ;
380+ gr_budget [1 ] = total_md_bits - gr_budget [0 ];
381+ }
382+
383+ int ix [2 ][2 ][576 ];
384+ int total_bits_used = 0 ;
385+ int intra_resv = 0 ;
386+
387+ for (int gr = 0 ; gr < 2 ; gr ++ ) {
388+ int max_bits = gr_budget [gr ] + intra_resv ;
326389
327- // Don 't exceed remaining budget
390+ // don 't exceed remaining budget
328391 int remaining_bits = total_md_bits - total_bits_used ;
329392 if (max_bits > remaining_bits ) {
330393 max_bits = remaining_bits ;
@@ -335,17 +398,16 @@ static int mp3enc_encode_frame(mp3enc_t * enc, const float * pcm) {
335398
336399 int bits_per_ch = max_bits / nch ;
337400
338- // Step 5: quantize each channel with saved thresholds
401+ // quantize each channel with psy thresholds
339402 int gr_bits_used = 0 ;
340403 for (int ch = 0 ; ch < nch ; ch ++ ) {
341- int bits = mp3enc_outer_loop (mdct_lr [ ch ], ix [gr ][ch ], si .gr [gr ][ch ], saved_xmin [ch ], bits_per_ch , sfb_long ,
342- enc -> sr_index , gr , si .scfsi [ch ]);
404+ int bits = mp3enc_outer_loop (mdct_all [ gr ][ ch ], ix [gr ][ch ], si .gr [gr ][ch ], saved_xmin [gr ][ ch ], bits_per_ch ,
405+ sfb_long , enc -> sr_index , gr , si .scfsi [ch ]);
343406 gr_bits_used += bits ;
344407 }
345408
346- // Track intra-frame savings: bits not used by this granule
347- // become available for the next granule in this frame.
348- intra_resv += mean_bits - gr_bits_used ;
409+ // intra-frame savings: unused bits carry to next granule
410+ intra_resv += gr_budget [gr ] - gr_bits_used ;
349411 if (intra_resv < 0 ) {
350412 intra_resv = 0 ;
351413 }
0 commit comments