mbelib_rs/encode/
spectral.rs

1// SPDX-FileCopyrightText: 2026 Swift Raccoon
2// SPDX-License-Identifier: GPL-2.0-or-later OR GPL-3.0-or-later
3//
4// Algorithmic reference: Pavel Yazev's `imbe_vocoder/sa_encode.cc`
5// (OP25, 2009, GPLv3). This module handles the raw harmonic
6// magnitude extraction from the FFT output; the log-magnitude
7// predictor + PRBA/HOC vector quantization the reference chains
8// onto it lives in [`crate::encode::quantize`] (fed by our output
9// via [`crate::encode::encoder::AmbeEncoder::encode_frame`]).
10
11//! Spectral amplitude extraction: FFT bins → per-harmonic magnitudes.
12
13use realfft::num_complex::Complex;
14
15/// Maximum number of harmonic amplitudes (matches IMBE/AMBE L).
16pub const MAX_HARMONICS: usize = 56;
17
18/// Per-frame spectral amplitudes.
19#[derive(Debug, Clone, Copy)]
20pub struct SpectralAmplitudes {
21    /// Linear magnitudes per harmonic. Only `num_harmonics` are valid.
22    pub magnitudes: [f32; MAX_HARMONICS],
23    /// Number of harmonics actually computed for this frame.
24    pub num_harmonics: usize,
25}
26
27/// Extract harmonic amplitudes from the FFT half-spectrum.
28///
29/// For each harmonic `k` from 1 to L, integrate the magnitude across
30/// the three bins nearest to `k · f0_bin` (centre bin ± 1). Using a
31/// 3-bin window rather than a single round-to-nearest bin recovers
32/// energy for harmonics that fall between bin centres (fractional
33/// `f0_bin` from pitch periods that aren't integer factors of the
34/// FFT size). Without this, harmonics at bin offset ±0.5 drop ~3 dB
35/// below their true amplitude — measurable on real voice captures
36/// against the DVSI chip's own extraction, where the missing energy
37/// produced flat Gm vectors that the PRBA codebook search always
38/// resolved to near-origin entries (flat envelope → no formants).
39///
40/// We use sum-of-squares then `sqrt` so the window accumulates power
41/// rather than raw magnitudes; this is the canonical way to pool
42/// nearby bins without the magnitude-vs-phase ambiguity.
43///
44/// L is limited by both the FFT size and the AMBE codec:
45/// `L = min(floor((N − 1) / f0_bin), MAX_HARMONICS)`.
46#[must_use]
47#[allow(
48    clippy::cast_possible_truncation,
49    clippy::cast_sign_loss,
50    clippy::cast_precision_loss,
51    reason = "DSP bin math; values bounded by FFT length"
52)]
53pub fn extract_spectral_amplitudes(fft_out: &[Complex<f32>], f0_bin: f32) -> SpectralAmplitudes {
54    let mut magnitudes = [0.0_f32; MAX_HARMONICS];
55    if f0_bin < 0.5 {
56        return SpectralAmplitudes {
57            magnitudes,
58            num_harmonics: 0,
59        };
60    }
61    let max_k = ((fft_out.len() - 1) as f32 / f0_bin).floor() as usize;
62    let num_harmonics = max_k.min(MAX_HARMONICS);
63    for k in 1..=num_harmonics {
64        let centre = (f0_bin * k as f32).round() as usize;
65        // Integrate power over the 3-bin window [centre-1, centre+1].
66        // `saturating_sub` handles the k=1 boundary; the `min` on the
67        // upper end handles the Nyquist boundary.
68        let lo = centre.saturating_sub(1);
69        let hi = (centre + 1).min(fft_out.len() - 1);
70        let mut power = 0.0_f32;
71        for b in lo..=hi {
72            if let Some(c) = fft_out.get(b) {
73                power += c.norm_sqr();
74            }
75        }
76        if let Some(slot) = magnitudes.get_mut(k - 1) {
77            *slot = power.sqrt();
78        }
79    }
80    SpectralAmplitudes {
81        magnitudes,
82        num_harmonics,
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use super::extract_spectral_amplitudes;
89    use realfft::num_complex::Complex;
90
91    #[test]
92    fn extracts_sparse_harmonic_peaks() {
93        let mut fft_out = vec![Complex::new(0.0, 0.0); 129];
94        let f0_bin = 6.4_f32; // 200 Hz at 8 kHz
95        // Place 3 harmonics with known magnitudes.
96        for (k, mag) in [(1_usize, 1.0_f32), (2, 0.5), (3, 0.25)] {
97            #[allow(
98                clippy::cast_possible_truncation,
99                clippy::cast_sign_loss,
100                clippy::cast_precision_loss,
101                reason = "test-only bin math, values small"
102            )]
103            let bin = (f0_bin * k as f32).round() as usize;
104            if let Some(c) = fft_out.get_mut(bin) {
105                *c = Complex::new(mag, 0.0);
106            }
107        }
108        let amps = extract_spectral_amplitudes(&fft_out, f0_bin);
109        assert!(amps.num_harmonics >= 3);
110        assert!((amps.magnitudes[0] - 1.0).abs() < 1e-6);
111        assert!((amps.magnitudes[1] - 0.5).abs() < 1e-6);
112        assert!((amps.magnitudes[2] - 0.25).abs() < 1e-6);
113    }
114
115    #[test]
116    fn zero_spectrum_gives_zero_magnitudes() {
117        let fft_out = vec![Complex::new(0.0, 0.0); 129];
118        let amps = extract_spectral_amplitudes(&fft_out, 6.4);
119        assert!(amps.magnitudes.iter().all(|&m| m == 0.0));
120    }
121
122    #[test]
123    fn num_harmonics_tracks_pitch() {
124        let fft_out = vec![Complex::new(0.0, 0.0); 129];
125        // Low pitch → more harmonics fit below Nyquist.
126        let low = extract_spectral_amplitudes(&fft_out, 3.0);
127        // High pitch → fewer harmonics.
128        let high = extract_spectral_amplitudes(&fft_out, 20.0);
129        assert!(
130            low.num_harmonics > high.num_harmonics,
131            "low={}, high={}",
132            low.num_harmonics,
133            high.num_harmonics,
134        );
135    }
136}