mbelib_rs/encode/analyze.rs
1// SPDX-FileCopyrightText: 2009 Pavel Yazev (OP25 imbe_vocoder/encode.cc)
2// SPDX-FileCopyrightText: 2026 Swift Raccoon
3// SPDX-License-Identifier: GPL-2.0-or-later OR GPL-3.0-or-later
4
5//! Front-end analysis: PCM frame → (pitch-buffers updated, FFT output).
6//!
7//! Port of the signal-conditioning and FFT section of
8//! `imbe_vocoder::encode()` in OP25. Accepts one 160-sample PCM frame
9//! (16 kHz-equivalent energy — i.e. i16 inputs normalized to `[-1,
10//! 1)`) and produces:
11//!
12//! 1. updated pitch-history buffers (DC-removed in `pitch_ref_buf`,
13//! LPF'd in `pitch_est_buf`), ready for downstream pitch estimation
14//! 2. a 256-point complex FFT of the windowed signal, centered on
15//! `pitch_ref_buf[150]`, ready for pitch refinement and spectral
16//! amplitude extraction
17//!
18//! The pitch-estimation step (`pitch_est()`) itself runs in phase P3
19//! and consumes `pitch_est_buf`. Phase P4 consumes the FFT output.
20
21use realfft::RealFftPlanner;
22use realfft::num_complex::Complex;
23
24#[cfg(not(feature = "kenwood-tables"))]
25use crate::encode::dc_rmv::dc_rmv;
26use crate::encode::pe_lpf::pe_lpf;
27use crate::encode::state::{EncoderBuffers, FFT_LENGTH, FRAME, PITCH_EST_BUF_SIZE};
28use crate::encode::window::WR_HALF;
29
30/// Per-stream FFT planning cache.
31///
32/// Plans are thread-bound in `realfft` — we keep one plan per
33/// encoder instance rather than re-planning every frame. Each call
34/// to [`analyze_frame`] borrows the planner through a `&mut`.
35pub struct FftPlan {
36 planner: RealFftPlanner<f32>,
37 /// Scratch buffer the realfft plan uses internally. Sized by the
38 /// plan on first use.
39 scratch: Vec<Complex<f32>>,
40}
41
42impl std::fmt::Debug for FftPlan {
43 // `RealFftPlanner` does not implement `Debug` (upstream choice —
44 // the FFTW-style plans hold runtime codegen state). We print the
45 // cached scratch size instead so debug output remains useful.
46 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47 f.debug_struct("FftPlan")
48 .field("planner", &"RealFftPlanner<f32> { .. }")
49 .field("scratch_len", &self.scratch.len())
50 .finish()
51 }
52}
53
54impl FftPlan {
55 /// Construct a fresh FFT plan cache. Cheap; the actual plan is
56 /// built lazily on the first [`analyze_frame`] call.
57 #[must_use]
58 pub fn new() -> Self {
59 Self {
60 planner: RealFftPlanner::<f32>::new(),
61 scratch: Vec::new(),
62 }
63 }
64}
65
66impl Default for FftPlan {
67 fn default() -> Self {
68 Self::new()
69 }
70}
71
72/// Ingest one 160-sample frame and run the front-end analysis.
73///
74/// - `snd` is the new audio frame as f32 samples in nominal `[-1, 1)`.
75/// Callers converting from i16 should divide by 32768.0.
76/// - `bufs` is the per-stream working state; buffers are shifted and
77/// updated in place.
78/// - `plan` carries the realfft plan cache.
79/// - `fft_out` receives a complex spectrum of length `FFT_LENGTH / 2 + 1`
80/// = 129. Realfft returns only the non-redundant half of the
81/// Hermitian-symmetric complex FFT; for the purposes of pitch
82/// refinement and spectral amplitude extraction that's sufficient.
83///
84/// # Panics
85///
86/// Panics if `snd.len() < FRAME` or `fft_out.len() != FFT_LENGTH / 2 + 1`.
87/// Both are caller contracts; the downstream encoder controls both.
88pub fn analyze_frame(
89 snd: &[f32],
90 bufs: &mut EncoderBuffers,
91 plan: &mut FftPlan,
92 fft_out: &mut [Complex<f32>],
93) {
94 assert!(
95 snd.len() >= FRAME,
96 "analyze_frame: snd must contain at least FRAME samples",
97 );
98 assert_eq!(
99 fft_out.len(),
100 FFT_LENGTH / 2 + 1,
101 "analyze_frame: fft_out must be FFT_LENGTH/2 + 1 complex bins",
102 );
103
104 // Step 1: slide both pitch-history buffers one frame left to make
105 // room for the new samples at the tail.
106 bufs.shift_pitch_history();
107
108 // Step 2: input conditioning — remove DC (plus, when Kenwood
109 // tables are enabled, everything below ≈345 Hz) and write the
110 // result into `pitch_ref_buf`'s tail. The default path uses
111 // OP25's 13 Hz first-order HPF; the `kenwood-tables` path uses
112 // the radio's own 345 Hz biquad HPF (zeros on the unit circle
113 // at DC, corner ≈345 Hz) which rejects rumble entirely before
114 // pitch analysis sees it. Either way, the tail slice of
115 // `pitch_ref_buf` is what downstream pitch refinement + FFT
116 // stages read — swapping the filter doesn't change the data
117 // flow, only the frequency response of the input.
118 let tail_start = PITCH_EST_BUF_SIZE - FRAME;
119 let (prefix, tail) = bufs.pitch_ref_buf.split_at_mut(tail_start);
120 let _ = prefix; // keeping older history intact
121 #[cfg(not(feature = "kenwood-tables"))]
122 dc_rmv(&snd[..FRAME], tail, &mut bufs.dc_rmv_mem);
123 #[cfg(feature = "kenwood-tables")]
124 crate::encode::kenwood::filter::biquad_df1_section(
125 &crate::encode::kenwood::biquads::HPF_345HZ_COEFFS,
126 &snd[..FRAME],
127 tail,
128 &mut bufs.kenwood_hpf_mem,
129 );
130
131 // Step 3: apply the pitch-estimation LPF from pitch_ref_buf into
132 // pitch_est_buf (both in the same tail slot). We can't borrow
133 // `bufs.pitch_ref_buf` and `bufs.pitch_est_buf` mutably together,
134 // so copy the DC-removed samples into a small scratch vec first.
135 let dc_removed_tail: Vec<f32> = bufs
136 .pitch_ref_buf
137 .get(tail_start..)
138 .map(<[f32]>::to_vec)
139 .unwrap_or_default();
140 let est_tail = bufs
141 .pitch_est_buf
142 .get_mut(tail_start..)
143 .expect("tail slice in range");
144 pe_lpf(&dc_removed_tail, est_tail, &mut bufs.pe_lpf_mem);
145
146 // Step 4: build a 256-sample real-input buffer with Yazev's
147 // scatter layout — windowed signal at indices [146..256] and
148 // [0, 1..111], zeros at [111..146]. This places the center of the
149 // 221-sample analysis window at index 0 of the FFT input (time
150 // domain) so the spectrum comes out with zero group delay for the
151 // center sample.
152 let mut real_buf = vec![0.0_f32; FFT_LENGTH];
153 // [146..256] ← pitch_ref_buf[40..150] × WR_HALF[0..110]
154 for i in 0..110 {
155 let sig = *bufs.pitch_ref_buf.get(40 + i).unwrap_or(&0.0);
156 let w = *WR_HALF.get(i).unwrap_or(&0.0);
157 if let Some(slot) = real_buf.get_mut(146 + i) {
158 *slot = sig * w;
159 }
160 }
161 // [0] ← pitch_ref_buf[150] (center sample, unwindowed)
162 if let Some(slot) = real_buf.get_mut(0) {
163 *slot = *bufs.pitch_ref_buf.get(150).unwrap_or(&0.0);
164 }
165 // [1..111] ← pitch_ref_buf[151..261] × WR_HALF[109..=0] (descending)
166 for i in 0..110 {
167 let sig = *bufs.pitch_ref_buf.get(151 + i).unwrap_or(&0.0);
168 let w = *WR_HALF.get(109 - i).unwrap_or(&0.0);
169 if let Some(slot) = real_buf.get_mut(1 + i) {
170 *slot = sig * w;
171 }
172 }
173 // [111..146] already zero.
174
175 // Step 5: forward FFT. RealFftPlanner reuses a plan per length;
176 // the first call per-stream is O(FFT_LENGTH log FFT_LENGTH)
177 // planning cost, subsequent calls reuse.
178 let fft = plan.planner.plan_fft_forward(FFT_LENGTH);
179 let scratch_len = fft.get_scratch_len();
180 if plan.scratch.len() < scratch_len {
181 plan.scratch.resize(scratch_len, Complex::new(0.0, 0.0));
182 }
183 // realfft returns an error on length mismatch; our lengths are
184 // invariant so we handle it by zeroing output on any unexpected
185 // failure rather than propagating.
186 let result = fft.process_with_scratch(&mut real_buf, fft_out, &mut plan.scratch);
187 if result.is_err() {
188 for bin in fft_out.iter_mut() {
189 *bin = Complex::new(0.0, 0.0);
190 }
191 }
192}
193
194#[cfg(test)]
195mod tests {
196 use super::{FFT_LENGTH, FRAME, FftPlan, analyze_frame};
197 use crate::encode::state::EncoderBuffers;
198 use realfft::num_complex::Complex;
199
200 fn fresh_state() -> (EncoderBuffers, FftPlan, Vec<Complex<f32>>) {
201 (
202 EncoderBuffers::new(),
203 FftPlan::new(),
204 vec![Complex::new(0.0, 0.0); FFT_LENGTH / 2 + 1],
205 )
206 }
207
208 /// A zero input produces a zero spectrum.
209 #[test]
210 fn silent_input_produces_zero_spectrum() {
211 let (mut bufs, mut plan, mut fft_out) = fresh_state();
212 let snd = [0.0_f32; FRAME];
213 analyze_frame(&snd, &mut bufs, &mut plan, &mut fft_out);
214 for bin in &fft_out {
215 assert!(bin.norm() < 1e-9, "nonzero bin {bin} for silent input",);
216 }
217 }
218
219 /// A pure sine at 500 Hz sampled at 8 kHz (bin 16 in a 256-pt FFT
220 /// at 8 kHz SR) should show a peak near bin 16 after the front-end
221 /// stabilizes. We feed the sine for several frames to let both
222 /// the DC-remover and the LPF settle.
223 #[test]
224 fn sine_input_peaks_near_expected_bin() {
225 let (mut bufs, mut plan, mut fft_out) = fresh_state();
226 let freq_hz = 500.0_f32;
227 let sample_rate = 8000.0_f32;
228 // Run 5 frames to let transients settle.
229 for frame_idx in 0..5 {
230 let snd: Vec<f32> = (0..FRAME)
231 .map(|i| {
232 #[allow(clippy::cast_precision_loss)]
233 let t = (frame_idx * FRAME + i) as f32;
234 (t * 2.0 * std::f32::consts::PI * freq_hz / sample_rate).sin()
235 })
236 .collect();
237 analyze_frame(&snd, &mut bufs, &mut plan, &mut fft_out);
238 }
239
240 // Bin index for 500 Hz at 8 kHz / 256-pt FFT: k = f * N / SR =
241 // 500 * 256 / 8000 = 16.
242 // The window spreads energy across neighboring bins, so we
243 // accept a small cluster around bin 16.
244 let target = 16;
245 let mut max_bin = 0;
246 let mut max_mag = 0.0_f32;
247 for (i, bin) in fft_out.iter().enumerate() {
248 let m = bin.norm();
249 if m > max_mag {
250 max_mag = m;
251 max_bin = i;
252 }
253 }
254 let delta = (i64::try_from(max_bin).unwrap_or(0) - target).abs();
255 assert!(
256 delta <= 2,
257 "peak at bin {max_bin}, expected near {target} (mag {max_mag})",
258 );
259 }
260}