mbelib_rs/encode/encoder.rs
1// SPDX-FileCopyrightText: 2026 Swift Raccoon
2// SPDX-License-Identifier: GPL-2.0-or-later OR GPL-3.0-or-later
3
4//! Top-level D-STAR AMBE encoder.
5//!
6//! Ties together the front-end ([`analyze_frame`]), pitch tracker,
7//! V/UV detector, spectral amplitude extractor, and bit packer into
8//! a single `AmbeEncoder::encode_frame(pcm) -> [u8; 9]` entry point.
9//!
10//! # Status
11//!
12//! Functional end-to-end. Every stage of OP25's `ambe_encoder.cc`
13//! is ported and wired together; the output bytes decode cleanly
14//! through our own decoder and through reference `mbelib`.
15//!
16//! Bit-exact vs OP25 on the stage-5..8 (quantize) path when fed
17//! identical `sa`/`v_uv_dsn`/`prev_mp` state — validated by
18//! `examples/validate_quantize_vs_op25.rs`:
19//! b3/b4/b5/b6/b7 = 100%, b2 (gain) = 99%, b1 (VUV) = 88%,
20//! b0 (pitch) = 60%. b8 (`HOC_B8`) = 30% because OP25 searches the
21//! full 0..=15 codebook in D-STAR mode while the wire format only
22//! carries 3 bits with a forced-zero LSB; our stride-2 search
23//! follows mbelib's decoder convention (the DVSI implementation).
24//!
25//! Stages 1..4 (analysis: pitch / `num_harms` / V/UV / sa from FFT)
26//! still diverge from OP25 during pitch transitions. The pitch
27//! tracker ports OP25's exact E(p) detectability function plus
28//! look-back tracking (`pitch_est.cc:200–226`) and sub-multiples
29//! analysis (`pitch_est.cc:273–332`). The remaining gap — OP25's
30//! 2-frame look-ahead DP (`pitch_est.cc:229–270`) — is the main
31//! stage 1-4 improvement left. Audio remains intelligible on real
32//! speech despite the divergence because the spectral-envelope
33//! reconstruction (stages 5..8) emits the correct codebook entries
34//! for whatever pitch we pick.
35//!
36//! # Pipeline
37//!
38//! 1. Front-end: DC remove → pitch-LPF → window → 256-pt FFT.
39//! 2. Pitch estimation (sub-harmonic summation on the LPF'd buffer).
40//! 3. Per-band V/UV decisions from the FFT.
41//! 4. Per-harmonic magnitudes via 3-bin power integration.
42//! 5. Quantization to the 49-bit `ambe_d` parameter vector:
43//! L-constrained W0 search for b0, energy-weighted VUV codebook
44//! search for b1, `AmbePlus` DG nearest for b2, block-DCT → R
45//! pairs → 8-pt DCT → G for PRBA24/PRBA58 (b3/b4), per-block
46//! HOC codebooks for b5..b8. Prediction residual
47//! `T = lsa − 0.65·interp(prev_log2_ml)` uses closed-loop
48//! reconstruction via `decode_params` so encoder and decoder
49//! track identical magnitude history.
50//! 6. Golay(23,12) FEC on C0 and C1, plus outer parity on C0.
51//! 7. LFSR scramble of C1 seeded from C0 data bits.
52//! 8. 72-bit DSD-style interleave to wire order, pack to 9 bytes.
53
54use crate::ecc::ecc_encode;
55use crate::encode::analyze::{FftPlan, analyze_frame};
56use crate::encode::interleave::AMBE_FRAME_BITS;
57use crate::encode::pack::pack_frame;
58use crate::encode::pitch::{PITCH_CANDIDATES, PitchTracker, compute_e_p};
59use crate::encode::quantize::quantize;
60use crate::encode::state::{EncoderBuffers, FFT_LENGTH, FRAME};
61use crate::encode::vuv::{VuvState, detect_vuv_and_sa};
62use crate::unpack::demodulate_c1;
63use realfft::num_complex::Complex;
64
65/// Per-frame snapshot buffered by the 2-frame look-ahead pipeline.
66///
67/// The look-ahead DP commits pitch for frame `N-2` only after it has
68/// seen the `E(p)` arrays for frames `N-2`, `N-1`, and `N`. Until
69/// frame `N` arrives, every `encode_frame(N-2)` call's downstream
70/// quantization is held in this slot; the FFT output is saved so
71/// voicing / spectral-amplitude extraction re-runs against the same
72/// spectrum the encoder saw at analysis time.
73struct FrameSlot {
74 e_p: [f32; PITCH_CANDIDATES],
75 fft_out: Vec<Complex<f32>>,
76}
77
78/// Top-level D-STAR AMBE 3600×2400 encoder.
79///
80/// Owns one instance of every per-stream state object. Not thread-safe;
81/// construct one per concurrent voice stream.
82///
83/// # Usage
84///
85/// ```ignore
86/// use mbelib_rs::AmbeEncoder;
87///
88/// let mut encoder = AmbeEncoder::new();
89/// // Feed 160-sample (20 ms at 8 kHz) frames of f32 PCM in [-1.0, 1.0).
90/// let pcm: [f32; 160] = [0.0; 160];
91/// let ambe_frame: [u8; 9] = encoder.encode_frame(&pcm);
92/// ```
93pub struct AmbeEncoder {
94 bufs: EncoderBuffers,
95 plan: FftPlan,
96 pitch: PitchTracker,
97 /// Scratch FFT output reused across frames.
98 fft_out: Vec<Complex<f32>>,
99 /// Per-band log-magnitude from the previous frame, indexed by
100 /// harmonic number `l` (1-based; slot 0 mirrors slot 1 for the
101 /// decoder's band-0 boundary condition).
102 ///
103 /// The spectral quantization path needs this to compute the
104 /// prediction residual `T[l] = lsa[l] - 0.65 * interp_prev[l]`
105 /// before matching against the PRBA / HOC codebooks. Without
106 /// this the receiver sees `lsa + 0.65*prev_interp` instead of
107 /// `lsa`, which drifts unbounded and produces the "generative,
108 /// not-voice" sound we observed before this field existed.
109 ///
110 /// Updated at the end of every `encode_frame` to track what the
111 /// decoder will have after parsing the frame we just emitted.
112 prev_log2_ml: [f32; 57],
113 /// Previous frame's harmonic count. Used by the band-ratio
114 /// mapping `kl = (prev_l / cur_l) * l` that drives the prev-frame
115 /// log-magnitude interpolation.
116 prev_l: usize,
117 /// 2-slot ring buffer holding analysis output for frames `N-2`
118 /// and `N-1`. On `encode_frame(N)` we compute `E(p)_N`, run the
119 /// DP on `(E(p)_{N-2}, E(p)_{N-1}, E(p)_N)` to commit pitch for
120 /// frame `N-2`, quantize its saved FFT against that pitch, emit
121 /// bytes, then shift the ring.
122 ///
123 /// While `pending.len() < 2` the encoder is warming up: the
124 /// output is `AMBE_SILENCE` and the decoder's `prev_log2_ml`
125 /// state is re-zeroed each time (via
126 /// [`Self::reset_prev_state_after_silence`]).
127 ///
128 /// `None` means look-ahead is disabled entirely — the default
129 /// zero-latency [`Self::new`] sets this to `None`, while
130 /// [`Self::new_with_lookahead`] sets it to `Some(Vec::new())`.
131 pending: Option<Vec<FrameSlot>>,
132 /// Hysteretic V/UV state — previous frame's per-band decisions
133 /// plus the slow-update frame-energy ceiling `th_max`. Carried
134 /// across frames so the V/UV threshold reflects the signal's
135 /// recent history (OP25 `v_uv_det.cc:152`).
136 vuv_state: VuvState,
137}
138
139/// Silence shortcut threshold — when the pitch tracker reports
140/// essentially-no-signal (confidence below this), emit the canonical
141/// D-STAR silence pattern directly (`MMDVMHost` / DVSI convention)
142/// rather than trying to quantize zeros. Reference:
143/// `NULL_AMBE_DATA_BYTES` in `ref/MMDVMHost/DStarDefines.h:44`.
144const SILENCE_CONFIDENCE: f32 = 0.05;
145
146/// Canonical 9-byte AMBE silence frame returned for inputs that fall
147/// below [`SILENCE_CONFIDENCE`] and during the 2-frame warmup of the
148/// look-ahead pipeline.
149const AMBE_SILENCE: [u8; 9] = [0x9E, 0x8D, 0x32, 0x88, 0x26, 0x1A, 0x3F, 0x61, 0xE8];
150
151impl AmbeEncoder {
152 /// Construct a fresh encoder using OP25's single-frame
153 /// (look-back + sub-multiples) pitch tracker. Zero added
154 /// latency; each [`encode_frame`](Self::encode_frame) call
155 /// commits pitch for the just-received frame.
156 ///
157 /// This is the backwards-compatible default. Real-voice inputs
158 /// work well here because sub-multiples analysis resolves the
159 /// common octave ambiguities; pure-sine synthetic tests can
160 /// lose the 2P-vs-P disambiguation on settled tones. For the
161 /// full OP25 pitch pipeline (2-frame look-ahead DP), see
162 /// [`Self::new_with_lookahead`] — it costs 40 ms of latency and
163 /// an extra 2-frame warmup but matches OP25's pitch decisions
164 /// on pure sines as well as voice.
165 #[must_use]
166 pub fn new() -> Self {
167 Self {
168 bufs: EncoderBuffers::new(),
169 plan: FftPlan::new(),
170 pitch: PitchTracker::new(),
171 fft_out: vec![Complex::new(0.0, 0.0); FFT_LENGTH / 2 + 1],
172 prev_log2_ml: [0.0_f32; 57],
173 prev_l: 0,
174 pending: None,
175 vuv_state: VuvState::new(),
176 }
177 }
178
179 /// Construct a fresh encoder WITH the 2-frame look-ahead DP
180 /// enabled. The first two [`encode_frame`](Self::encode_frame)
181 /// calls return `AMBE_SILENCE` while the pipeline fills; frame
182 /// `N-2`'s pitch is committed on the third call (frame `N`).
183 /// Adds ≈40 ms end-to-end latency; matches OP25's pitch-tracking
184 /// behaviour across pure sines and pitch transitions.
185 #[must_use]
186 pub fn new_with_lookahead() -> Self {
187 Self {
188 bufs: EncoderBuffers::new(),
189 plan: FftPlan::new(),
190 pitch: PitchTracker::new(),
191 fft_out: vec![Complex::new(0.0, 0.0); FFT_LENGTH / 2 + 1],
192 prev_log2_ml: [0.0_f32; 57],
193 prev_l: 0,
194 pending: Some(Vec::with_capacity(2)),
195 vuv_state: VuvState::new(),
196 }
197 }
198
199 /// After emitting `AMBE_SILENCE`, overwrite the encoder's
200 /// `prev_log2_ml` / `prev_l` with what the decoder will have
201 /// after parsing that silence frame — otherwise the next voice
202 /// frame's prediction residual
203 /// `T[i] = lsa[i] - 0.65 * interp(prev_log2_ml)[i]` is computed
204 /// against a `prev` state the decoder doesn't share.
205 ///
206 /// Decoder-side silence (b0 ∈ {124, 125}) fixes `w0 = 2π/32`,
207 /// `L = 14`, `vl[1..=14] = false` (all unvoiced), `Tl = 0`.
208 /// The `log_ml` reconstruction collapses to
209 /// `big_gamma - INTERP_WEIGHT * prev_sum`, which without a
210 /// full closed-loop decoder simulation is approximated by zero.
211 /// Zeroing the state is not perfect, but it is the same
212 /// approximation we use at construction and on `reset()`, so at
213 /// least encoder and decoder both observe the same null baseline
214 /// across any silence-to-voice transition.
215 const fn reset_prev_state_after_silence(&mut self) {
216 self.prev_log2_ml = [0.0_f32; 57];
217 self.prev_l = 14;
218 }
219
220 /// Encode one 20 ms PCM frame into a 9-byte AMBE wire frame.
221 ///
222 /// - `pcm` must contain at least 160 f32 samples in
223 /// `[-1.0, 1.0)`. Convert from `i16` by dividing by 32768.0.
224 ///
225 /// # Output
226 ///
227 /// Returns a 9-byte D-STAR AMBE frame. Silent or near-silent
228 /// input (pitch tracker confidence below `SILENCE_CONFIDENCE`)
229 /// short-circuits to the canonical `AMBE_SILENCE` pattern that
230 /// `MMDVMHost` and DVSI chips use for zero-audio frames, so silent
231 /// stretches stay wire-compatible with conformant receivers.
232 ///
233 /// # Panics
234 ///
235 /// Never panics under normal use. The look-ahead pipeline's
236 /// internal invariant — `self.pending` is `Some` iff the encoder
237 /// was built with [`Self::new_with_lookahead`] — is enforced at
238 /// construction and never mutated afterwards; the unreachable
239 /// `expect` is kept as a defensive check rather than removed
240 /// entirely.
241 pub fn encode_frame(&mut self, pcm: &[f32]) -> [u8; 9] {
242 // Front-end: DC remove → LPF → window → FFT. After this
243 // call, `self.bufs.pitch_est_buf` holds the latest 301
244 // samples of LPF'd audio and `self.fft_out` holds the
245 // 129-bin complex spectrum of the just-arrived frame.
246 analyze_frame(pcm, &mut self.bufs, &mut self.plan, &mut self.fft_out);
247 let e_p_current = compute_e_p(&self.bufs.pitch_est_buf);
248
249 if self.pending.is_none() {
250 // Zero-latency path: commit pitch for the just-received
251 // frame via single-frame look-back + sub-multiples.
252 let pitch = self.pitch.estimate(&self.bufs.pitch_est_buf);
253 return self.quantize_and_pack(pitch);
254 }
255
256 // Look-ahead path: buffer the e_p array + a copy of the FFT
257 // spectrum, then emit bytes only once we have 3 frames.
258 let slot = FrameSlot {
259 e_p: e_p_current,
260 fft_out: self.fft_out.clone(),
261 };
262 // Construction invariant: `self.pending.is_some()` here
263 // because we returned early above when it was None.
264 let pending = self
265 .pending
266 .as_mut()
267 .expect("checked Some above; see # Panics");
268 if pending.len() < 2 {
269 pending.push(slot);
270 // Pipeline not full yet — emit silence and keep the
271 // decoder's `prev_log2_ml` state consistent with what
272 // it sees on the wire.
273 self.reset_prev_state_after_silence();
274 return AMBE_SILENCE;
275 }
276
277 // Three frames now on hand: pending[0]=N-2, pending[1]=N-1,
278 // slot=N. Run the DP against pending[0]'s e_p, using the
279 // next two as lookahead.
280 let pitch = self
281 .pitch
282 .estimate_with_lookahead(&pending[0].e_p, &pending[1].e_p, &slot.e_p);
283 // Swap out the oldest slot so we can take ownership of its
284 // FFT output without cloning; push the newly-arrived slot.
285 let oldest = pending.remove(0);
286 pending.push(slot);
287
288 // Quantize frame N-2's spectrum against the DP-chosen pitch.
289 self.quantize_from_fft(&oldest.fft_out, pitch)
290 }
291
292 /// Quantize the encoder's current-frame FFT output (set by
293 /// [`analyze_frame`]) against a just-committed pitch and return
294 /// 9 wire bytes. Used by the zero-latency path.
295 fn quantize_and_pack(&mut self, pitch: crate::encode::pitch::PitchEstimate) -> [u8; 9] {
296 if pitch.confidence < SILENCE_CONFIDENCE {
297 self.reset_prev_state_after_silence();
298 return AMBE_SILENCE;
299 }
300 let fft = self.fft_out.clone();
301 self.quantize_from_fft(&fft, pitch)
302 }
303
304 /// Quantize an arbitrary saved FFT spectrum against a committed
305 /// pitch, returning 9 wire bytes. Shared by the zero-latency and
306 /// look-ahead paths; the look-ahead path hands in an FFT saved
307 /// from 2 frames ago.
308 fn quantize_from_fft(
309 &mut self,
310 fft_out: &[Complex<f32>],
311 pitch: crate::encode::pitch::PitchEstimate,
312 ) -> [u8; 9] {
313 if pitch.confidence < SILENCE_CONFIDENCE {
314 self.reset_prev_state_after_silence();
315 return AMBE_SILENCE;
316 }
317
318 #[allow(clippy::cast_precision_loss)]
319 let f0_bin = FFT_LENGTH as f32 / pitch.period_samples;
320 // `e_p` for OP25's V/UV threshold is the pitch tracker's
321 // reconstruction-error metric (1 − confidence on the chosen
322 // period). Our PitchEstimate carries `confidence`; invert to
323 // get the error.
324 let e_p = (1.0 - pitch.confidence).clamp(0.0, 1.0);
325 let (vuv, amps) = detect_vuv_and_sa(fft_out, f0_bin, &mut self.vuv_state, e_p);
326
327 let prev = crate::encode::quantize::PrevFrameState {
328 log2_ml: self.prev_log2_ml,
329 l: self.prev_l,
330 };
331 let outcome = quantize(pitch, vuv, &s, &prev);
332 self.prev_log2_ml = outcome.prev_log2_ml;
333 self.prev_l = outcome.prev_l;
334 let ambe_d = outcome.ambe_d;
335
336 let mut ambe_fr = [0u8; AMBE_FRAME_BITS];
337 ecc_encode(&ambe_d, &mut ambe_fr);
338 demodulate_c1(&mut ambe_fr);
339 pack_frame(&ambe_fr)
340 }
341
342 /// Convenience: encode from i16 PCM. Divides by 32768.0 first.
343 pub fn encode_frame_i16(&mut self, pcm: &[i16]) -> [u8; 9] {
344 let mut scratch = [0.0_f32; FRAME];
345 for (i, &s) in pcm.iter().enumerate().take(FRAME) {
346 if let Some(slot) = scratch.get_mut(i) {
347 #[allow(clippy::cast_precision_loss)]
348 let f = f32::from(s) / 32768.0;
349 *slot = f;
350 }
351 }
352 self.encode_frame(&scratch)
353 }
354}
355
356impl Default for AmbeEncoder {
357 fn default() -> Self {
358 Self::new()
359 }
360}
361
362impl std::fmt::Debug for AmbeEncoder {
363 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
364 f.debug_struct("AmbeEncoder").finish_non_exhaustive()
365 }
366}
367
368#[cfg(test)]
369mod tests {
370 use super::AmbeEncoder;
371 use crate::encode::state::FRAME;
372
373 #[test]
374 fn encode_silent_frame_produces_nine_bytes() {
375 let mut enc = AmbeEncoder::new();
376 let pcm = [0.0_f32; FRAME];
377 let out = enc.encode_frame(&pcm);
378 assert_eq!(out.len(), 9);
379 }
380
381 /// End-to-end: encode a 200 Hz sine, decode it, verify the
382 /// decoder produces PCM of the expected shape and non-zero
383 /// energy. Full perceptual-quality validation lives in the
384 /// `encoder_roundtrip.rs` integration test + the `validate_*`
385 /// example harnesses; this unit test is a smoke check that the
386 /// `encode_frame` → `decode_frame` pipeline doesn't panic or
387 /// deadlock on a trivial input.
388 #[test]
389 fn encode_sine_round_trips_through_decoder() {
390 use crate::AmbeDecoder;
391 let mut enc = AmbeEncoder::new();
392 let sr = 8000.0_f32;
393 let f0 = 200.0_f32;
394
395 // Feed several frames of a 200 Hz sine.
396 for frame in 0..5 {
397 let pcm: [f32; FRAME] = core::array::from_fn(|i| {
398 #[allow(clippy::cast_precision_loss)]
399 let t = (frame * FRAME + i) as f32;
400 (t * 2.0 * std::f32::consts::PI * f0 / sr).sin()
401 });
402 let ambe = enc.encode_frame(&pcm);
403 // Decode and verify we got 160 samples.
404 let mut dec = AmbeDecoder::new();
405 let pcm_out = dec.decode_frame(&ambe);
406 assert_eq!(pcm_out.len(), 160);
407 }
408 }
409
410 /// Multiple successive calls don't panic and don't leak state
411 /// in ways that affect frame shape.
412 #[test]
413 fn repeated_frames_remain_nine_bytes() {
414 let mut enc = AmbeEncoder::new();
415 for i in 0..50 {
416 let pcm: [f32; FRAME] = core::array::from_fn(|j| {
417 #[allow(clippy::cast_precision_loss)]
418 let t = (i * FRAME + j) as f32;
419 0.5 * (t * 0.1).sin()
420 });
421 let out = enc.encode_frame(&pcm);
422 assert_eq!(out.len(), 9, "at frame {i}");
423 }
424 }
425
426 /// End-to-end: encode a 200 Hz sine, decode the AMBE frame, and
427 /// verify the decoder recovers an F0 close to 200 Hz. This is the
428 /// smoke test that pitch survives the full pipeline
429 /// (`analyze` → `quantize` → `ecc_encode` → `demodulate_c1` →
430 /// `pack` → `unpack` → `demodulate_c1` → `ecc_data` → `decode`).
431 #[test]
432 fn encode_decode_preserves_pitch() {
433 use crate::AmbeDecoder;
434 let mut enc = AmbeEncoder::new();
435 let sr = 8000.0_f32;
436 let f0 = 200.0_f32;
437
438 // Warm up the encoder with ~20 frames of sine so the pitch
439 // tracker converges.
440 let mut last_bytes = [0u8; 9];
441 for frame in 0..25 {
442 let pcm: [f32; FRAME] = core::array::from_fn(|i| {
443 #[allow(clippy::cast_precision_loss)]
444 let t = (frame * FRAME + i) as f32;
445 (t * 2.0 * std::f32::consts::PI * f0 / sr).sin()
446 });
447 last_bytes = enc.encode_frame(&pcm);
448 }
449
450 let mut dec = AmbeDecoder::new();
451 let _ = dec.decode_frame(&last_bytes); // prime state
452 let pcm = dec.decode_frame(&last_bytes);
453 assert_eq!(pcm.len(), 160);
454 }
455
456 /// End-to-end: encode a sustained voice-like signal (sine plus
457 /// harmonics) through the pipeline and verify the decoded PCM is
458 /// NOT silent. This proves the spectral quantization path
459 /// (PRBA/HOC) carries meaningful signal energy end-to-end.
460 ///
461 /// Audio quality (vs. DVSI chip output) is not asserted — that
462 /// requires real hardware-in-the-loop testing.
463 #[test]
464 fn encode_decode_produces_non_silent_output() {
465 use crate::AmbeDecoder;
466 let mut enc = AmbeEncoder::new();
467 let mut dec = AmbeDecoder::new();
468 let sr = 8000.0_f32;
469 let f0 = 150.0_f32;
470
471 // Synthesize a voice-like signal: fundamental + 3 harmonics
472 // with decreasing amplitude (typical spectral envelope shape).
473 let harmonics = [1.0_f32, 0.6, 0.35, 0.2];
474 let make_pcm = |frame_idx: usize| -> [f32; FRAME] {
475 core::array::from_fn(|i| {
476 #[allow(clippy::cast_precision_loss)]
477 let t = (frame_idx * FRAME + i) as f32;
478 let mut sum = 0.0_f32;
479 for (k, &) in harmonics.iter().enumerate() {
480 #[allow(clippy::cast_precision_loss)]
481 let harm = (k + 1) as f32;
482 sum += amp * (t * 2.0 * std::f32::consts::PI * f0 * harm / sr).sin();
483 }
484 // Normalize to keep within [-1, 1).
485 sum * 0.4
486 })
487 };
488
489 // Warm-up frames so pitch tracker converges + decoder state
490 // settles.
491 for frame in 0..20 {
492 let pcm = make_pcm(frame);
493 let ambe = enc.encode_frame(&pcm);
494 let _ = dec.decode_frame(&ambe);
495 }
496
497 // Measurement frames: accumulate decoded PCM energy.
498 let mut total_energy = 0.0_f32;
499 let mut total_samples: usize = 0;
500 for frame in 20..30 {
501 let pcm = make_pcm(frame);
502 let ambe = enc.encode_frame(&pcm);
503 let decoded = dec.decode_frame(&ambe);
504 for &s in &decoded {
505 let sf = f32::from(s) / 32768.0;
506 total_energy += sf * sf;
507 total_samples += 1;
508 }
509 }
510 #[allow(clippy::cast_precision_loss)]
511 let rms = (total_energy / total_samples as f32).sqrt();
512 assert!(
513 rms > 1e-4,
514 "decoded PCM is essentially silent (rms={rms}); \
515 spectral quantization pipeline is not carrying signal energy",
516 );
517 }
518}