mbelib_rs/encode/
analyze.rs

1// SPDX-FileCopyrightText: 2009 Pavel Yazev (OP25 imbe_vocoder/encode.cc)
2// SPDX-FileCopyrightText: 2026 Swift Raccoon
3// SPDX-License-Identifier: GPL-2.0-or-later OR GPL-3.0-or-later
4
5//! Front-end analysis: PCM frame → (pitch-buffers updated, FFT output).
6//!
7//! Port of the signal-conditioning and FFT section of
8//! `imbe_vocoder::encode()` in OP25. Accepts one 160-sample PCM frame
9//! (16 kHz-equivalent energy — i.e. i16 inputs normalized to `[-1,
10//! 1)`) and produces:
11//!
12//! 1. updated pitch-history buffers (DC-removed in `pitch_ref_buf`,
13//!    LPF'd in `pitch_est_buf`), ready for downstream pitch estimation
14//! 2. a 256-point complex FFT of the windowed signal, centered on
15//!    `pitch_ref_buf[150]`, ready for pitch refinement and spectral
16//!    amplitude extraction
17//!
18//! The pitch-estimation step (`pitch_est()`) itself runs in phase P3
19//! and consumes `pitch_est_buf`. Phase P4 consumes the FFT output.
20
21use realfft::RealFftPlanner;
22use realfft::num_complex::Complex;
23
24#[cfg(not(feature = "kenwood-tables"))]
25use crate::encode::dc_rmv::dc_rmv;
26use crate::encode::pe_lpf::pe_lpf;
27use crate::encode::state::{EncoderBuffers, FFT_LENGTH, FRAME, PITCH_EST_BUF_SIZE};
28use crate::encode::window::WR_HALF;
29
30/// Per-stream FFT planning cache.
31///
32/// Plans are thread-bound in `realfft` — we keep one plan per
33/// encoder instance rather than re-planning every frame. Each call
34/// to [`analyze_frame`] borrows the planner through a `&mut`.
35pub struct FftPlan {
36    planner: RealFftPlanner<f32>,
37    /// Scratch buffer the realfft plan uses internally. Sized by the
38    /// plan on first use.
39    scratch: Vec<Complex<f32>>,
40}
41
42impl std::fmt::Debug for FftPlan {
43    // `RealFftPlanner` does not implement `Debug` (upstream choice —
44    // the FFTW-style plans hold runtime codegen state). We print the
45    // cached scratch size instead so debug output remains useful.
46    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47        f.debug_struct("FftPlan")
48            .field("planner", &"RealFftPlanner<f32> { .. }")
49            .field("scratch_len", &self.scratch.len())
50            .finish()
51    }
52}
53
54impl FftPlan {
55    /// Construct a fresh FFT plan cache. Cheap; the actual plan is
56    /// built lazily on the first [`analyze_frame`] call.
57    #[must_use]
58    pub fn new() -> Self {
59        Self {
60            planner: RealFftPlanner::<f32>::new(),
61            scratch: Vec::new(),
62        }
63    }
64}
65
66impl Default for FftPlan {
67    fn default() -> Self {
68        Self::new()
69    }
70}
71
72/// Ingest one 160-sample frame and run the front-end analysis.
73///
74/// - `snd` is the new audio frame as f32 samples in nominal `[-1, 1)`.
75///   Callers converting from i16 should divide by 32768.0.
76/// - `bufs` is the per-stream working state; buffers are shifted and
77///   updated in place.
78/// - `plan` carries the realfft plan cache.
79/// - `fft_out` receives a complex spectrum of length `FFT_LENGTH / 2 + 1`
80///   = 129. Realfft returns only the non-redundant half of the
81///   Hermitian-symmetric complex FFT; for the purposes of pitch
82///   refinement and spectral amplitude extraction that's sufficient.
83///
84/// # Panics
85///
86/// Panics if `snd.len() < FRAME` or `fft_out.len() != FFT_LENGTH / 2 + 1`.
87/// Both are caller contracts; the downstream encoder controls both.
88pub fn analyze_frame(
89    snd: &[f32],
90    bufs: &mut EncoderBuffers,
91    plan: &mut FftPlan,
92    fft_out: &mut [Complex<f32>],
93) {
94    assert!(
95        snd.len() >= FRAME,
96        "analyze_frame: snd must contain at least FRAME samples",
97    );
98    assert_eq!(
99        fft_out.len(),
100        FFT_LENGTH / 2 + 1,
101        "analyze_frame: fft_out must be FFT_LENGTH/2 + 1 complex bins",
102    );
103
104    // Step 1: slide both pitch-history buffers one frame left to make
105    // room for the new samples at the tail.
106    bufs.shift_pitch_history();
107
108    // Step 2: input conditioning — remove DC (plus, when Kenwood
109    // tables are enabled, everything below ≈345 Hz) and write the
110    // result into `pitch_ref_buf`'s tail. The default path uses
111    // OP25's 13 Hz first-order HPF; the `kenwood-tables` path uses
112    // the radio's own 345 Hz biquad HPF (zeros on the unit circle
113    // at DC, corner ≈345 Hz) which rejects rumble entirely before
114    // pitch analysis sees it. Either way, the tail slice of
115    // `pitch_ref_buf` is what downstream pitch refinement + FFT
116    // stages read — swapping the filter doesn't change the data
117    // flow, only the frequency response of the input.
118    let tail_start = PITCH_EST_BUF_SIZE - FRAME;
119    let (prefix, tail) = bufs.pitch_ref_buf.split_at_mut(tail_start);
120    let _ = prefix; // keeping older history intact
121    #[cfg(not(feature = "kenwood-tables"))]
122    dc_rmv(&snd[..FRAME], tail, &mut bufs.dc_rmv_mem);
123    #[cfg(feature = "kenwood-tables")]
124    crate::encode::kenwood::filter::biquad_df1_section(
125        &crate::encode::kenwood::biquads::HPF_345HZ_COEFFS,
126        &snd[..FRAME],
127        tail,
128        &mut bufs.kenwood_hpf_mem,
129    );
130
131    // Step 3: apply the pitch-estimation LPF from pitch_ref_buf into
132    // pitch_est_buf (both in the same tail slot). We can't borrow
133    // `bufs.pitch_ref_buf` and `bufs.pitch_est_buf` mutably together,
134    // so copy the DC-removed samples into a small scratch vec first.
135    let dc_removed_tail: Vec<f32> = bufs
136        .pitch_ref_buf
137        .get(tail_start..)
138        .map(<[f32]>::to_vec)
139        .unwrap_or_default();
140    let est_tail = bufs
141        .pitch_est_buf
142        .get_mut(tail_start..)
143        .expect("tail slice in range");
144    pe_lpf(&dc_removed_tail, est_tail, &mut bufs.pe_lpf_mem);
145
146    // Step 4: build a 256-sample real-input buffer with Yazev's
147    // scatter layout — windowed signal at indices [146..256] and
148    // [0, 1..111], zeros at [111..146]. This places the center of the
149    // 221-sample analysis window at index 0 of the FFT input (time
150    // domain) so the spectrum comes out with zero group delay for the
151    // center sample.
152    let mut real_buf = vec![0.0_f32; FFT_LENGTH];
153    // [146..256] ← pitch_ref_buf[40..150] × WR_HALF[0..110]
154    for i in 0..110 {
155        let sig = *bufs.pitch_ref_buf.get(40 + i).unwrap_or(&0.0);
156        let w = *WR_HALF.get(i).unwrap_or(&0.0);
157        if let Some(slot) = real_buf.get_mut(146 + i) {
158            *slot = sig * w;
159        }
160    }
161    // [0] ← pitch_ref_buf[150] (center sample, unwindowed)
162    if let Some(slot) = real_buf.get_mut(0) {
163        *slot = *bufs.pitch_ref_buf.get(150).unwrap_or(&0.0);
164    }
165    // [1..111] ← pitch_ref_buf[151..261] × WR_HALF[109..=0] (descending)
166    for i in 0..110 {
167        let sig = *bufs.pitch_ref_buf.get(151 + i).unwrap_or(&0.0);
168        let w = *WR_HALF.get(109 - i).unwrap_or(&0.0);
169        if let Some(slot) = real_buf.get_mut(1 + i) {
170            *slot = sig * w;
171        }
172    }
173    // [111..146] already zero.
174
175    // Step 5: forward FFT. RealFftPlanner reuses a plan per length;
176    // the first call per-stream is O(FFT_LENGTH log FFT_LENGTH)
177    // planning cost, subsequent calls reuse.
178    let fft = plan.planner.plan_fft_forward(FFT_LENGTH);
179    let scratch_len = fft.get_scratch_len();
180    if plan.scratch.len() < scratch_len {
181        plan.scratch.resize(scratch_len, Complex::new(0.0, 0.0));
182    }
183    // realfft returns an error on length mismatch; our lengths are
184    // invariant so we handle it by zeroing output on any unexpected
185    // failure rather than propagating.
186    let result = fft.process_with_scratch(&mut real_buf, fft_out, &mut plan.scratch);
187    if result.is_err() {
188        for bin in fft_out.iter_mut() {
189            *bin = Complex::new(0.0, 0.0);
190        }
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::{FFT_LENGTH, FRAME, FftPlan, analyze_frame};
197    use crate::encode::state::EncoderBuffers;
198    use realfft::num_complex::Complex;
199
200    fn fresh_state() -> (EncoderBuffers, FftPlan, Vec<Complex<f32>>) {
201        (
202            EncoderBuffers::new(),
203            FftPlan::new(),
204            vec![Complex::new(0.0, 0.0); FFT_LENGTH / 2 + 1],
205        )
206    }
207
208    /// A zero input produces a zero spectrum.
209    #[test]
210    fn silent_input_produces_zero_spectrum() {
211        let (mut bufs, mut plan, mut fft_out) = fresh_state();
212        let snd = [0.0_f32; FRAME];
213        analyze_frame(&snd, &mut bufs, &mut plan, &mut fft_out);
214        for bin in &fft_out {
215            assert!(bin.norm() < 1e-9, "nonzero bin {bin} for silent input",);
216        }
217    }
218
219    /// A pure sine at 500 Hz sampled at 8 kHz (bin 16 in a 256-pt FFT
220    /// at 8 kHz SR) should show a peak near bin 16 after the front-end
221    /// stabilizes. We feed the sine for several frames to let both
222    /// the DC-remover and the LPF settle.
223    #[test]
224    fn sine_input_peaks_near_expected_bin() {
225        let (mut bufs, mut plan, mut fft_out) = fresh_state();
226        let freq_hz = 500.0_f32;
227        let sample_rate = 8000.0_f32;
228        // Run 5 frames to let transients settle.
229        for frame_idx in 0..5 {
230            let snd: Vec<f32> = (0..FRAME)
231                .map(|i| {
232                    #[allow(clippy::cast_precision_loss)]
233                    let t = (frame_idx * FRAME + i) as f32;
234                    (t * 2.0 * std::f32::consts::PI * freq_hz / sample_rate).sin()
235                })
236                .collect();
237            analyze_frame(&snd, &mut bufs, &mut plan, &mut fft_out);
238        }
239
240        // Bin index for 500 Hz at 8 kHz / 256-pt FFT: k = f * N / SR =
241        // 500 * 256 / 8000 = 16.
242        // The window spreads energy across neighboring bins, so we
243        // accept a small cluster around bin 16.
244        let target = 16;
245        let mut max_bin = 0;
246        let mut max_mag = 0.0_f32;
247        for (i, bin) in fft_out.iter().enumerate() {
248            let m = bin.norm();
249            if m > max_mag {
250                max_mag = m;
251                max_bin = i;
252            }
253        }
254        let delta = (i64::try_from(max_bin).unwrap_or(0) - target).abs();
255        assert!(
256            delta <= 2,
257            "peak at bin {max_bin}, expected near {target} (mag {max_mag})",
258        );
259    }
260}
mbelib_rs/encode/analyze.rs

mbelib_rs/encode/
analyze.rs