mbelib_rs/encode/
encoder.rs

1// SPDX-FileCopyrightText: 2026 Swift Raccoon
2// SPDX-License-Identifier: GPL-2.0-or-later OR GPL-3.0-or-later
3
4//! Top-level D-STAR AMBE encoder.
5//!
6//! Ties together the front-end ([`analyze_frame`]), pitch tracker,
7//! V/UV detector, spectral amplitude extractor, and bit packer into
8//! a single `AmbeEncoder::encode_frame(pcm) -> [u8; 9]` entry point.
9//!
10//! # Status
11//!
12//! Functional end-to-end.  Every stage of OP25's `ambe_encoder.cc`
13//! is ported and wired together; the output bytes decode cleanly
14//! through our own decoder and through reference `mbelib`.
15//!
16//! Bit-exact vs OP25 on the stage-5..8 (quantize) path when fed
17//! identical `sa`/`v_uv_dsn`/`prev_mp` state — validated by
18//! `examples/validate_quantize_vs_op25.rs`:
19//! b3/b4/b5/b6/b7 = 100%, b2 (gain) = 99%, b1 (VUV) = 88%,
20//! b0 (pitch) = 60%.  b8 (`HOC_B8`) = 30% because OP25 searches the
21//! full 0..=15 codebook in D-STAR mode while the wire format only
22//! carries 3 bits with a forced-zero LSB; our stride-2 search
23//! follows mbelib's decoder convention (the DVSI implementation).
24//!
25//! Stages 1..4 (analysis: pitch / `num_harms` / V/UV / sa from FFT)
26//! still diverge from OP25 during pitch transitions. The pitch
27//! tracker ports OP25's exact E(p) detectability function plus
28//! look-back tracking (`pitch_est.cc:200–226`) and sub-multiples
29//! analysis (`pitch_est.cc:273–332`). The remaining gap — OP25's
30//! 2-frame look-ahead DP (`pitch_est.cc:229–270`) — is the main
31//! stage 1-4 improvement left. Audio remains intelligible on real
32//! speech despite the divergence because the spectral-envelope
33//! reconstruction (stages 5..8) emits the correct codebook entries
34//! for whatever pitch we pick.
35//!
36//! # Pipeline
37//!
38//! 1. Front-end: DC remove → pitch-LPF → window → 256-pt FFT.
39//! 2. Pitch estimation (sub-harmonic summation on the LPF'd buffer).
40//! 3. Per-band V/UV decisions from the FFT.
41//! 4. Per-harmonic magnitudes via 3-bin power integration.
42//! 5. Quantization to the 49-bit `ambe_d` parameter vector:
43//!    L-constrained W0 search for b0, energy-weighted VUV codebook
44//!    search for b1, `AmbePlus` DG nearest for b2, block-DCT → R
45//!    pairs → 8-pt DCT → G for PRBA24/PRBA58 (b3/b4), per-block
46//!    HOC codebooks for b5..b8. Prediction residual
47//!    `T = lsa − 0.65·interp(prev_log2_ml)` uses closed-loop
48//!    reconstruction via `decode_params` so encoder and decoder
49//!    track identical magnitude history.
50//! 6. Golay(23,12) FEC on C0 and C1, plus outer parity on C0.
51//! 7. LFSR scramble of C1 seeded from C0 data bits.
52//! 8. 72-bit DSD-style interleave to wire order, pack to 9 bytes.
53
54use crate::ecc::ecc_encode;
55use crate::encode::analyze::{FftPlan, analyze_frame};
56use crate::encode::interleave::AMBE_FRAME_BITS;
57use crate::encode::pack::pack_frame;
58use crate::encode::pitch::{PITCH_CANDIDATES, PitchTracker, compute_e_p};
59use crate::encode::quantize::quantize;
60use crate::encode::state::{EncoderBuffers, FFT_LENGTH, FRAME};
61use crate::encode::vuv::{VuvState, detect_vuv_and_sa};
62use crate::unpack::demodulate_c1;
63use realfft::num_complex::Complex;
64
65/// Per-frame snapshot buffered by the 2-frame look-ahead pipeline.
66///
67/// The look-ahead DP commits pitch for frame `N-2` only after it has
68/// seen the `E(p)` arrays for frames `N-2`, `N-1`, and `N`. Until
69/// frame `N` arrives, every `encode_frame(N-2)` call's downstream
70/// quantization is held in this slot; the FFT output is saved so
71/// voicing / spectral-amplitude extraction re-runs against the same
72/// spectrum the encoder saw at analysis time.
73struct FrameSlot {
74    e_p: [f32; PITCH_CANDIDATES],
75    fft_out: Vec<Complex<f32>>,
76}
77
78/// Top-level D-STAR AMBE 3600×2400 encoder.
79///
80/// Owns one instance of every per-stream state object. Not thread-safe;
81/// construct one per concurrent voice stream.
82///
83/// # Usage
84///
85/// ```ignore
86/// use mbelib_rs::AmbeEncoder;
87///
88/// let mut encoder = AmbeEncoder::new();
89/// // Feed 160-sample (20 ms at 8 kHz) frames of f32 PCM in [-1.0, 1.0).
90/// let pcm: [f32; 160] = [0.0; 160];
91/// let ambe_frame: [u8; 9] = encoder.encode_frame(&pcm);
92/// ```
93pub struct AmbeEncoder {
94    bufs: EncoderBuffers,
95    plan: FftPlan,
96    pitch: PitchTracker,
97    /// Scratch FFT output reused across frames.
98    fft_out: Vec<Complex<f32>>,
99    /// Per-band log-magnitude from the previous frame, indexed by
100    /// harmonic number `l` (1-based; slot 0 mirrors slot 1 for the
101    /// decoder's band-0 boundary condition).
102    ///
103    /// The spectral quantization path needs this to compute the
104    /// prediction residual `T[l] = lsa[l] - 0.65 * interp_prev[l]`
105    /// before matching against the PRBA / HOC codebooks. Without
106    /// this the receiver sees `lsa + 0.65*prev_interp` instead of
107    /// `lsa`, which drifts unbounded and produces the "generative,
108    /// not-voice" sound we observed before this field existed.
109    ///
110    /// Updated at the end of every `encode_frame` to track what the
111    /// decoder will have after parsing the frame we just emitted.
112    prev_log2_ml: [f32; 57],
113    /// Previous frame's harmonic count. Used by the band-ratio
114    /// mapping `kl = (prev_l / cur_l) * l` that drives the prev-frame
115    /// log-magnitude interpolation.
116    prev_l: usize,
117    /// 2-slot ring buffer holding analysis output for frames `N-2`
118    /// and `N-1`. On `encode_frame(N)` we compute `E(p)_N`, run the
119    /// DP on `(E(p)_{N-2}, E(p)_{N-1}, E(p)_N)` to commit pitch for
120    /// frame `N-2`, quantize its saved FFT against that pitch, emit
121    /// bytes, then shift the ring.
122    ///
123    /// While `pending.len() < 2` the encoder is warming up: the
124    /// output is `AMBE_SILENCE` and the decoder's `prev_log2_ml`
125    /// state is re-zeroed each time (via
126    /// [`Self::reset_prev_state_after_silence`]).
127    ///
128    /// `None` means look-ahead is disabled entirely — the default
129    /// zero-latency [`Self::new`] sets this to `None`, while
130    /// [`Self::new_with_lookahead`] sets it to `Some(Vec::new())`.
131    pending: Option<Vec<FrameSlot>>,
132    /// Hysteretic V/UV state — previous frame's per-band decisions
133    /// plus the slow-update frame-energy ceiling `th_max`. Carried
134    /// across frames so the V/UV threshold reflects the signal's
135    /// recent history (OP25 `v_uv_det.cc:152`).
136    vuv_state: VuvState,
137}
138
139/// Silence shortcut threshold — when the pitch tracker reports
140/// essentially-no-signal (confidence below this), emit the canonical
141/// D-STAR silence pattern directly (`MMDVMHost` / DVSI convention)
142/// rather than trying to quantize zeros. Reference:
143/// `NULL_AMBE_DATA_BYTES` in `ref/MMDVMHost/DStarDefines.h:44`.
144const SILENCE_CONFIDENCE: f32 = 0.05;
145
146/// Canonical 9-byte AMBE silence frame returned for inputs that fall
147/// below [`SILENCE_CONFIDENCE`] and during the 2-frame warmup of the
148/// look-ahead pipeline.
149const AMBE_SILENCE: [u8; 9] = [0x9E, 0x8D, 0x32, 0x88, 0x26, 0x1A, 0x3F, 0x61, 0xE8];
150
151impl AmbeEncoder {
152    /// Construct a fresh encoder using OP25's single-frame
153    /// (look-back + sub-multiples) pitch tracker. Zero added
154    /// latency; each [`encode_frame`](Self::encode_frame) call
155    /// commits pitch for the just-received frame.
156    ///
157    /// This is the backwards-compatible default. Real-voice inputs
158    /// work well here because sub-multiples analysis resolves the
159    /// common octave ambiguities; pure-sine synthetic tests can
160    /// lose the 2P-vs-P disambiguation on settled tones. For the
161    /// full OP25 pitch pipeline (2-frame look-ahead DP), see
162    /// [`Self::new_with_lookahead`] — it costs 40 ms of latency and
163    /// an extra 2-frame warmup but matches OP25's pitch decisions
164    /// on pure sines as well as voice.
165    #[must_use]
166    pub fn new() -> Self {
167        Self {
168            bufs: EncoderBuffers::new(),
169            plan: FftPlan::new(),
170            pitch: PitchTracker::new(),
171            fft_out: vec![Complex::new(0.0, 0.0); FFT_LENGTH / 2 + 1],
172            prev_log2_ml: [0.0_f32; 57],
173            prev_l: 0,
174            pending: None,
175            vuv_state: VuvState::new(),
176        }
177    }
178
179    /// Construct a fresh encoder WITH the 2-frame look-ahead DP
180    /// enabled. The first two [`encode_frame`](Self::encode_frame)
181    /// calls return `AMBE_SILENCE` while the pipeline fills; frame
182    /// `N-2`'s pitch is committed on the third call (frame `N`).
183    /// Adds ≈40 ms end-to-end latency; matches OP25's pitch-tracking
184    /// behaviour across pure sines and pitch transitions.
185    #[must_use]
186    pub fn new_with_lookahead() -> Self {
187        Self {
188            bufs: EncoderBuffers::new(),
189            plan: FftPlan::new(),
190            pitch: PitchTracker::new(),
191            fft_out: vec![Complex::new(0.0, 0.0); FFT_LENGTH / 2 + 1],
192            prev_log2_ml: [0.0_f32; 57],
193            prev_l: 0,
194            pending: Some(Vec::with_capacity(2)),
195            vuv_state: VuvState::new(),
196        }
197    }
198
199    /// After emitting `AMBE_SILENCE`, overwrite the encoder's
200    /// `prev_log2_ml` / `prev_l` with what the decoder will have
201    /// after parsing that silence frame — otherwise the next voice
202    /// frame's prediction residual
203    /// `T[i] = lsa[i] - 0.65 * interp(prev_log2_ml)[i]` is computed
204    /// against a `prev` state the decoder doesn't share.
205    ///
206    /// Decoder-side silence (b0 ∈ {124, 125}) fixes `w0 = 2π/32`,
207    /// `L = 14`, `vl[1..=14] = false` (all unvoiced), `Tl = 0`.
208    /// The `log_ml` reconstruction collapses to
209    /// `big_gamma - INTERP_WEIGHT * prev_sum`, which without a
210    /// full closed-loop decoder simulation is approximated by zero.
211    /// Zeroing the state is not perfect, but it is the same
212    /// approximation we use at construction and on `reset()`, so at
213    /// least encoder and decoder both observe the same null baseline
214    /// across any silence-to-voice transition.
215    const fn reset_prev_state_after_silence(&mut self) {
216        self.prev_log2_ml = [0.0_f32; 57];
217        self.prev_l = 14;
218    }
219
220    /// Encode one 20 ms PCM frame into a 9-byte AMBE wire frame.
221    ///
222    /// - `pcm` must contain at least 160 f32 samples in
223    ///   `[-1.0, 1.0)`. Convert from `i16` by dividing by 32768.0.
224    ///
225    /// # Output
226    ///
227    /// Returns a 9-byte D-STAR AMBE frame. Silent or near-silent
228    /// input (pitch tracker confidence below `SILENCE_CONFIDENCE`)
229    /// short-circuits to the canonical `AMBE_SILENCE` pattern that
230    /// `MMDVMHost` and DVSI chips use for zero-audio frames, so silent
231    /// stretches stay wire-compatible with conformant receivers.
232    ///
233    /// # Panics
234    ///
235    /// Never panics under normal use. The look-ahead pipeline's
236    /// internal invariant — `self.pending` is `Some` iff the encoder
237    /// was built with [`Self::new_with_lookahead`] — is enforced at
238    /// construction and never mutated afterwards; the unreachable
239    /// `expect` is kept as a defensive check rather than removed
240    /// entirely.
241    pub fn encode_frame(&mut self, pcm: &[f32]) -> [u8; 9] {
242        // Front-end: DC remove → LPF → window → FFT. After this
243        // call, `self.bufs.pitch_est_buf` holds the latest 301
244        // samples of LPF'd audio and `self.fft_out` holds the
245        // 129-bin complex spectrum of the just-arrived frame.
246        analyze_frame(pcm, &mut self.bufs, &mut self.plan, &mut self.fft_out);
247        let e_p_current = compute_e_p(&self.bufs.pitch_est_buf);
248
249        if self.pending.is_none() {
250            // Zero-latency path: commit pitch for the just-received
251            // frame via single-frame look-back + sub-multiples.
252            let pitch = self.pitch.estimate(&self.bufs.pitch_est_buf);
253            return self.quantize_and_pack(pitch);
254        }
255
256        // Look-ahead path: buffer the e_p array + a copy of the FFT
257        // spectrum, then emit bytes only once we have 3 frames.
258        let slot = FrameSlot {
259            e_p: e_p_current,
260            fft_out: self.fft_out.clone(),
261        };
262        // Construction invariant: `self.pending.is_some()` here
263        // because we returned early above when it was None.
264        let pending = self
265            .pending
266            .as_mut()
267            .expect("checked Some above; see # Panics");
268        if pending.len() < 2 {
269            pending.push(slot);
270            // Pipeline not full yet — emit silence and keep the
271            // decoder's `prev_log2_ml` state consistent with what
272            // it sees on the wire.
273            self.reset_prev_state_after_silence();
274            return AMBE_SILENCE;
275        }
276
277        // Three frames now on hand: pending[0]=N-2, pending[1]=N-1,
278        // slot=N. Run the DP against pending[0]'s e_p, using the
279        // next two as lookahead.
280        let pitch = self
281            .pitch
282            .estimate_with_lookahead(&pending[0].e_p, &pending[1].e_p, &slot.e_p);
283        // Swap out the oldest slot so we can take ownership of its
284        // FFT output without cloning; push the newly-arrived slot.
285        let oldest = pending.remove(0);
286        pending.push(slot);
287
288        // Quantize frame N-2's spectrum against the DP-chosen pitch.
289        self.quantize_from_fft(&oldest.fft_out, pitch)
290    }
291
292    /// Quantize the encoder's current-frame FFT output (set by
293    /// [`analyze_frame`]) against a just-committed pitch and return
294    /// 9 wire bytes. Used by the zero-latency path.
295    fn quantize_and_pack(&mut self, pitch: crate::encode::pitch::PitchEstimate) -> [u8; 9] {
296        if pitch.confidence < SILENCE_CONFIDENCE {
297            self.reset_prev_state_after_silence();
298            return AMBE_SILENCE;
299        }
300        let fft = self.fft_out.clone();
301        self.quantize_from_fft(&fft, pitch)
302    }
303
304    /// Quantize an arbitrary saved FFT spectrum against a committed
305    /// pitch, returning 9 wire bytes. Shared by the zero-latency and
306    /// look-ahead paths; the look-ahead path hands in an FFT saved
307    /// from 2 frames ago.
308    fn quantize_from_fft(
309        &mut self,
310        fft_out: &[Complex<f32>],
311        pitch: crate::encode::pitch::PitchEstimate,
312    ) -> [u8; 9] {
313        if pitch.confidence < SILENCE_CONFIDENCE {
314            self.reset_prev_state_after_silence();
315            return AMBE_SILENCE;
316        }
317
318        #[allow(clippy::cast_precision_loss)]
319        let f0_bin = FFT_LENGTH as f32 / pitch.period_samples;
320        // `e_p` for OP25's V/UV threshold is the pitch tracker's
321        // reconstruction-error metric (1 − confidence on the chosen
322        // period). Our PitchEstimate carries `confidence`; invert to
323        // get the error.
324        let e_p = (1.0 - pitch.confidence).clamp(0.0, 1.0);
325        let (vuv, amps) = detect_vuv_and_sa(fft_out, f0_bin, &mut self.vuv_state, e_p);
326
327        let prev = crate::encode::quantize::PrevFrameState {
328            log2_ml: self.prev_log2_ml,
329            l: self.prev_l,
330        };
331        let outcome = quantize(pitch, vuv, &amps, &prev);
332        self.prev_log2_ml = outcome.prev_log2_ml;
333        self.prev_l = outcome.prev_l;
334        let ambe_d = outcome.ambe_d;
335
336        let mut ambe_fr = [0u8; AMBE_FRAME_BITS];
337        ecc_encode(&ambe_d, &mut ambe_fr);
338        demodulate_c1(&mut ambe_fr);
339        pack_frame(&ambe_fr)
340    }
341
342    /// Convenience: encode from i16 PCM. Divides by 32768.0 first.
343    pub fn encode_frame_i16(&mut self, pcm: &[i16]) -> [u8; 9] {
344        let mut scratch = [0.0_f32; FRAME];
345        for (i, &s) in pcm.iter().enumerate().take(FRAME) {
346            if let Some(slot) = scratch.get_mut(i) {
347                #[allow(clippy::cast_precision_loss)]
348                let f = f32::from(s) / 32768.0;
349                *slot = f;
350            }
351        }
352        self.encode_frame(&scratch)
353    }
354}
355
356impl Default for AmbeEncoder {
357    fn default() -> Self {
358        Self::new()
359    }
360}
361
362impl std::fmt::Debug for AmbeEncoder {
363    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
364        f.debug_struct("AmbeEncoder").finish_non_exhaustive()
365    }
366}
367
368#[cfg(test)]
369mod tests {
370    use super::AmbeEncoder;
371    use crate::encode::state::FRAME;
372
373    #[test]
374    fn encode_silent_frame_produces_nine_bytes() {
375        let mut enc = AmbeEncoder::new();
376        let pcm = [0.0_f32; FRAME];
377        let out = enc.encode_frame(&pcm);
378        assert_eq!(out.len(), 9);
379    }
380
381    /// End-to-end: encode a 200 Hz sine, decode it, verify the
382    /// decoder produces PCM of the expected shape and non-zero
383    /// energy.  Full perceptual-quality validation lives in the
384    /// `encoder_roundtrip.rs` integration test + the `validate_*`
385    /// example harnesses; this unit test is a smoke check that the
386    /// `encode_frame` → `decode_frame` pipeline doesn't panic or
387    /// deadlock on a trivial input.
388    #[test]
389    fn encode_sine_round_trips_through_decoder() {
390        use crate::AmbeDecoder;
391        let mut enc = AmbeEncoder::new();
392        let sr = 8000.0_f32;
393        let f0 = 200.0_f32;
394
395        // Feed several frames of a 200 Hz sine.
396        for frame in 0..5 {
397            let pcm: [f32; FRAME] = core::array::from_fn(|i| {
398                #[allow(clippy::cast_precision_loss)]
399                let t = (frame * FRAME + i) as f32;
400                (t * 2.0 * std::f32::consts::PI * f0 / sr).sin()
401            });
402            let ambe = enc.encode_frame(&pcm);
403            // Decode and verify we got 160 samples.
404            let mut dec = AmbeDecoder::new();
405            let pcm_out = dec.decode_frame(&ambe);
406            assert_eq!(pcm_out.len(), 160);
407        }
408    }
409
410    /// Multiple successive calls don't panic and don't leak state
411    /// in ways that affect frame shape.
412    #[test]
413    fn repeated_frames_remain_nine_bytes() {
414        let mut enc = AmbeEncoder::new();
415        for i in 0..50 {
416            let pcm: [f32; FRAME] = core::array::from_fn(|j| {
417                #[allow(clippy::cast_precision_loss)]
418                let t = (i * FRAME + j) as f32;
419                0.5 * (t * 0.1).sin()
420            });
421            let out = enc.encode_frame(&pcm);
422            assert_eq!(out.len(), 9, "at frame {i}");
423        }
424    }
425
426    /// End-to-end: encode a 200 Hz sine, decode the AMBE frame, and
427    /// verify the decoder recovers an F0 close to 200 Hz. This is the
428    /// smoke test that pitch survives the full pipeline
429    /// (`analyze` → `quantize` → `ecc_encode` → `demodulate_c1` →
430    ///  `pack` → `unpack` → `demodulate_c1` → `ecc_data` → `decode`).
431    #[test]
432    fn encode_decode_preserves_pitch() {
433        use crate::AmbeDecoder;
434        let mut enc = AmbeEncoder::new();
435        let sr = 8000.0_f32;
436        let f0 = 200.0_f32;
437
438        // Warm up the encoder with ~20 frames of sine so the pitch
439        // tracker converges.
440        let mut last_bytes = [0u8; 9];
441        for frame in 0..25 {
442            let pcm: [f32; FRAME] = core::array::from_fn(|i| {
443                #[allow(clippy::cast_precision_loss)]
444                let t = (frame * FRAME + i) as f32;
445                (t * 2.0 * std::f32::consts::PI * f0 / sr).sin()
446            });
447            last_bytes = enc.encode_frame(&pcm);
448        }
449
450        let mut dec = AmbeDecoder::new();
451        let _ = dec.decode_frame(&last_bytes); // prime state
452        let pcm = dec.decode_frame(&last_bytes);
453        assert_eq!(pcm.len(), 160);
454    }
455
456    /// End-to-end: encode a sustained voice-like signal (sine plus
457    /// harmonics) through the pipeline and verify the decoded PCM is
458    /// NOT silent. This proves the spectral quantization path
459    /// (PRBA/HOC) carries meaningful signal energy end-to-end.
460    ///
461    /// Audio quality (vs. DVSI chip output) is not asserted — that
462    /// requires real hardware-in-the-loop testing.
463    #[test]
464    fn encode_decode_produces_non_silent_output() {
465        use crate::AmbeDecoder;
466        let mut enc = AmbeEncoder::new();
467        let mut dec = AmbeDecoder::new();
468        let sr = 8000.0_f32;
469        let f0 = 150.0_f32;
470
471        // Synthesize a voice-like signal: fundamental + 3 harmonics
472        // with decreasing amplitude (typical spectral envelope shape).
473        let harmonics = [1.0_f32, 0.6, 0.35, 0.2];
474        let make_pcm = |frame_idx: usize| -> [f32; FRAME] {
475            core::array::from_fn(|i| {
476                #[allow(clippy::cast_precision_loss)]
477                let t = (frame_idx * FRAME + i) as f32;
478                let mut sum = 0.0_f32;
479                for (k, &amp) in harmonics.iter().enumerate() {
480                    #[allow(clippy::cast_precision_loss)]
481                    let harm = (k + 1) as f32;
482                    sum += amp * (t * 2.0 * std::f32::consts::PI * f0 * harm / sr).sin();
483                }
484                // Normalize to keep within [-1, 1).
485                sum * 0.4
486            })
487        };
488
489        // Warm-up frames so pitch tracker converges + decoder state
490        // settles.
491        for frame in 0..20 {
492            let pcm = make_pcm(frame);
493            let ambe = enc.encode_frame(&pcm);
494            let _ = dec.decode_frame(&ambe);
495        }
496
497        // Measurement frames: accumulate decoded PCM energy.
498        let mut total_energy = 0.0_f32;
499        let mut total_samples: usize = 0;
500        for frame in 20..30 {
501            let pcm = make_pcm(frame);
502            let ambe = enc.encode_frame(&pcm);
503            let decoded = dec.decode_frame(&ambe);
504            for &s in &decoded {
505                let sf = f32::from(s) / 32768.0;
506                total_energy += sf * sf;
507                total_samples += 1;
508            }
509        }
510        #[allow(clippy::cast_precision_loss)]
511        let rms = (total_energy / total_samples as f32).sqrt();
512        assert!(
513            rms > 1e-4,
514            "decoded PCM is essentially silent (rms={rms}); \
515             spectral quantization pipeline is not carrying signal energy",
516        );
517    }
518}