mbelib_rs/encode/pitch.rs
1// SPDX-FileCopyrightText: 2009 Pavel Yazev (OP25 imbe_vocoder/pitch_est.cc)
2// SPDX-FileCopyrightText: 2026 Swift Raccoon
3// SPDX-License-Identifier: GPL-2.0-or-later OR GPL-3.0-or-later
4//
5// Algorithmic reference: Pavel Yazev's `imbe_vocoder/pitch_est.cc`
6// (OP25, 2009, GPLv3). This port implements:
7//
8// * `e_p()` — the detectability function that returns one value of
9// E(p) per candidate pitch period. A fully periodic signal at
10// period `p` scores E(p) ≈ 0; non-periodic / aperiodic candidates
11// score E(p) closer to 1.
12// * Look-back pitch tracking (pitch_est.cc:200–226): search within
13// an empirically-learned window around the previous pitch and
14// accept the minimum if the 3-frame accumulated error stays
15// below 0.48 (Q4.12).
16//
17// Look-AHEAD pitch tracking + sub-multiples analysis
18// (pitch_est.cc:229–332) is deferred: they need a 2-frame lookahead
19// buffer (40 ms added latency), which is a caller-visible change and
20// is tracked as its own piece of work. When look-back's confidence
21// threshold isn't met, this module falls back to a single-frame
22// global minimum over the E(p) array — better than autocorrelation
23// + YIN on formant-rich speech, worse than full OP25.
24//
25// Q-format constants are carried from OP25's `globals.h` verbatim so
26// threshold comparisons map directly across the port. The fixed-point
27// math itself is f32 here — the overflow guards OP25 needs in Q15
28// arithmetic are implicit in f32's much larger dynamic range.
29
30//! Pitch (F0) estimation from the pitch-estimation history buffer.
31//!
32//! Given the 301-sample LPF'd buffer produced by [`crate::encode::analyze`],
33//! produces a fractional pitch period in samples and the corresponding
34//! F0 in Hz, plus a confidence score.
35//!
36//! # Algorithm — OP25 `pitch_est` port
37//!
38//! For each frame, [`PitchTracker::estimate`]:
39//!
40//! 1. Windows the 301-sample pitch-estimation buffer with `WI[]`.
41//! 2. Computes autocorrelation at integer lags `21..=150` plus the
42//! half-integer lags interpolated between them (259 values, one
43//! per OP25 `corr[]` slot).
44//! 3. Evaluates `E(p)` over 203 candidate periods from 21 to 122
45//! samples in 0.5-sample steps — the IMBE-native pitch grid.
46//! 4. Look-back pitch search: within the allowed window
47//! [`MIN_MAX_TBL`] indexed by `prev_pitch_idx`, find the period minimizing
48//! `E`. If the 3-frame cumulative error (current + prev + prev-prev)
49//! stays below `CNST_0_48_Q4_12`, commit that as the new pitch.
50//! 5. Otherwise fall back to the global minimum of `E` across all
51//! 203 candidates. (This is the single-frame approximation of
52//! OP25's look-ahead DP; the full DP is a follow-on port.)
53//!
54//! The AMBE codebooks quantize F0 via the `W0_TABLE` in
55//! [`crate::tables`]; [`PitchEstimate::period_samples`] is the
56//! fractional period this module hands downstream.
57
58use crate::encode::state::PITCH_EST_BUF_SIZE;
59use crate::encode::window::WI;
60
61/// OP25 `min_max_tbl[203]` — per-pitch allowed search window.
62///
63/// Indexed by the previous frame's pitch index `prev_pitch_idx` in
64/// `0..203`. Each entry packs `(min_index, max_index)` as
65/// `(hi_byte, lo_byte)`. The allowed window for the current frame
66/// is `[min_index, max_index]` inclusive, in the same index space.
67///
68/// Reference: `pitch_est.cc:37` in OP25. Values were fit empirically
69/// to the speech-pitch dynamics the IMBE encoder sees; we carry them
70/// over verbatim.
71#[rustfmt::skip]
72const MIN_MAX_TBL: [u16; 203] = [
73 0x0008, 0x0009, 0x000a, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0012, 0x0013,
74 0x0014, 0x0115, 0x0216, 0x0218, 0x0319, 0x041a, 0x051b, 0x061c, 0x061e, 0x071f,
75 0x0820, 0x0921, 0x0a22, 0x0a24, 0x0b25, 0x0c26, 0x0d27, 0x0e28, 0x0e2a, 0x0f2b,
76 0x102c, 0x112d, 0x122e, 0x1230, 0x1331, 0x1432, 0x1533, 0x1634, 0x1636, 0x1737,
77 0x1838, 0x1939, 0x1a3a, 0x1a3c, 0x1b3d, 0x1c3e, 0x1d3f, 0x1e40, 0x1e42, 0x1f43,
78 0x2044, 0x2145, 0x2246, 0x2248, 0x2349, 0x244a, 0x254b, 0x264c, 0x264e, 0x274f,
79 0x2850, 0x2951, 0x2a52, 0x2a54, 0x2b55, 0x2c56, 0x2d57, 0x2e58, 0x2e5a, 0x2f5b,
80 0x305c, 0x315d, 0x325e, 0x3260, 0x3361, 0x3462, 0x3563, 0x3664, 0x3666, 0x3767,
81 0x3868, 0x3969, 0x3a6a, 0x3a6c, 0x3b6d, 0x3c6e, 0x3d6f, 0x3e70, 0x3e72, 0x3f73,
82 0x4074, 0x4175, 0x4276, 0x4278, 0x4379, 0x447a, 0x457b, 0x467c, 0x467e, 0x477f,
83 0x4880, 0x4981, 0x4a82, 0x4a84, 0x4b85, 0x4c86, 0x4d87, 0x4e88, 0x4e8a, 0x4f8b,
84 0x508c, 0x518d, 0x528e, 0x5290, 0x5391, 0x5492, 0x5593, 0x5694, 0x5696, 0x5797,
85 0x5898, 0x5999, 0x5a9a, 0x5a9c, 0x5b9d, 0x5c9e, 0x5d9f, 0x5ea0, 0x5ea2, 0x5fa3,
86 0x60a4, 0x61a5, 0x62a6, 0x62a8, 0x63a9, 0x64aa, 0x65ab, 0x66ac, 0x66ae, 0x67af,
87 0x68b0, 0x69b1, 0x6ab2, 0x6ab4, 0x6bb5, 0x6cb6, 0x6db7, 0x6eb8, 0x6eba, 0x6fbb,
88 0x70bc, 0x71bd, 0x72be, 0x72c0, 0x73c1, 0x74c2, 0x75c3, 0x76c4, 0x76c6, 0x77c7,
89 0x78c8, 0x79c9, 0x7aca, 0x7aca, 0x7bca, 0x7cca, 0x7dca, 0x7eca, 0x7eca, 0x7fca,
90 0x80ca, 0x81ca, 0x82ca, 0x82ca, 0x83ca, 0x84ca, 0x85ca, 0x86ca, 0x86ca, 0x87ca,
91 0x88ca, 0x89ca, 0x8aca, 0x8aca, 0x8bca, 0x8cca, 0x8dca, 0x8eca, 0x8eca, 0x8fca,
92 0x90ca, 0x91ca, 0x92ca, 0x92ca, 0x93ca, 0x94ca, 0x95ca, 0x96ca, 0x96ca, 0x97ca,
93 0x98ca, 0x99ca, 0x9aca,
94];
95
96/// Look-back cumulative-error threshold (`CNST_0_48_Q4_12` in OP25).
97///
98/// If `E(current) + E(prev) + E(prev_prev) ≤ 0.48`, the look-back
99/// pitch choice is considered stable enough that no look-ahead
100/// disambiguation is needed. Empirically tuned by the IMBE authors.
101const CEB_THRESHOLD: f32 = 0.48;
102
103/// Number of E(p) candidates. Corresponds to pitch periods 21.0, 21.5,
104/// 22.0, ..., 121.5, 122.0 — the OP25 index space.
105pub(crate) const PITCH_CANDIDATES: usize = 203;
106
107/// Default pitch index used on a fresh tracker.
108///
109/// OP25 initializes `prev_pitch = 158` (Q15.1 format = 2 × period +
110/// 42). In our 0-based index space that's `158 − 42 = 116`, which
111/// corresponds to a period of `21 + 116 × 0.5 = 79` samples (≈100 Hz)
112/// — a reasonable baseline for unvoiced speech onset.
113const PITCH_DEFAULT_IDX: usize = 116;
114
115/// Convert an OP25 pitch index (0..203) to the corresponding period
116/// in samples. Index 0 → period 21.0, index 1 → 21.5, ...,
117/// index 202 → 122.0.
118#[inline]
119#[allow(clippy::cast_precision_loss, reason = "pitch index always ≤ 202")]
120const fn idx_to_period(idx: usize) -> f32 {
121 21.0 + (idx as f32) * 0.5
122}
123
124/// Inverse of [`idx_to_period`], clamping out-of-range periods into
125/// the valid `0..PITCH_CANDIDATES` index space.
126#[inline]
127#[allow(
128 clippy::cast_possible_truncation,
129 clippy::cast_sign_loss,
130 clippy::cast_precision_loss,
131 reason = "period is small, finite, and already clamped to valid range"
132)]
133fn period_to_idx(period: f32) -> usize {
134 let idx = ((period - 21.0) * 2.0).round();
135 if idx < 0.0 {
136 0
137 } else if idx >= PITCH_CANDIDATES as f32 {
138 PITCH_CANDIDATES - 1
139 } else {
140 idx as usize
141 }
142}
143
144/// Result of a pitch-estimation pass on one 20 ms frame.
145#[derive(Debug, Clone, Copy, PartialEq)]
146pub struct PitchEstimate {
147 /// Pitch period in samples at 8 kHz. Fractional — either an
148 /// integer or integer + 0.5 on the IMBE pitch grid.
149 pub period_samples: f32,
150 /// Fundamental frequency in Hz (8000 / `period_samples`).
151 pub f0_hz: f32,
152 /// Confidence score in `[0, 1]`, computed as `1 − E(p)` at the
153 /// chosen pitch. Values above ≈0.5 typically indicate stable
154 /// voiced speech; below ≈0.05 indicates noise or silence and is
155 /// the trigger for the encoder to emit `AMBE_SILENCE`.
156 pub confidence: f32,
157}
158
159/// Per-stream pitch-tracker state, matching OP25's `pitch_est` member
160/// variables. All fields are prefixed `prev` by design — they're the
161/// rolling history the look-back / cumulative-error tests consume.
162#[derive(Debug, Clone, Copy)]
163#[allow(
164 clippy::struct_field_names,
165 reason = "OP25 uses the same `prev_*` naming; lint fires on the pattern, not on real confusion"
166)]
167pub struct PitchTracker {
168 /// Previous frame's pitch index (0..203).
169 prev_pitch_idx: usize,
170 /// Pitch index from two frames ago.
171 prev_prev_pitch_idx: usize,
172 /// E(p) value at the previous frame's chosen pitch.
173 prev_e_p: f32,
174 /// E(p) value two frames ago.
175 prev_prev_e_p: f32,
176}
177
178impl PitchTracker {
179 /// Fresh tracker. Initializes to OP25's canonical defaults:
180 /// `prev_pitch_idx = 116` (period ≈ 79 samples), `prev_e_p = 0`.
181 #[must_use]
182 pub const fn new() -> Self {
183 Self {
184 prev_pitch_idx: PITCH_DEFAULT_IDX,
185 prev_prev_pitch_idx: PITCH_DEFAULT_IDX,
186 prev_e_p: 0.0,
187 prev_prev_e_p: 0.0,
188 }
189 }
190
191 /// Estimate pitch from a pitch-history buffer via IMBE `e_p` +
192 /// look-back tracking only.
193 ///
194 /// `pitch_est_buf` is the 301-sample LPF'd buffer. See
195 /// [`compute_e_p`] for the `E(p)` derivation. This single-frame
196 /// entry point is zero-latency; for the full OP25 look-ahead DP
197 /// use [`Self::estimate_with_lookahead`].
198 ///
199 /// # Algorithm (single-frame path)
200 ///
201 /// 1. [`compute_e_p`] the 203-entry `E(p)` array for this frame.
202 /// 2. Look-back search: pick the `E(p)` minimum inside
203 /// `MIN_MAX_TBL[prev_pitch_idx]`. If
204 /// `E + prev_E + prev_prev_E ≤ 0.48`, commit it.
205 /// 3. Otherwise fall back to the global `E(p)` minimum.
206 /// 4. Sub-multiples analysis tests `p/2..p/5` per OP25's 3-tier
207 /// threshold cascade; the winner is committed.
208 pub fn estimate(&mut self, pitch_est_buf: &[f32; PITCH_EST_BUF_SIZE]) -> PitchEstimate {
209 let e_p = compute_e_p(pitch_est_buf);
210 if e_p.iter().all(|&v| v >= 1.0 - f32::EPSILON) {
211 // Silent buffer — hold previous state, return zero-confidence.
212 let period = idx_to_period(self.prev_pitch_idx);
213 return PitchEstimate {
214 period_samples: period,
215 f0_hz: 8000.0 / period,
216 confidence: 0.0,
217 };
218 }
219 self.track_single_frame(&e_p)
220 }
221
222 /// Run the single-frame look-back + sub-multiples decision over a
223 /// pre-computed `E(p)` array. Used by [`Self::estimate`] and
224 /// internally by [`Self::estimate_with_lookahead`] as its
225 /// look-back half.
226 #[allow(
227 clippy::similar_names,
228 reason = "pb/pf and ceb/cef mirror OP25's variable naming for cross-reference"
229 )]
230 fn track_single_frame(&mut self, e_p: &[f32; PITCH_CANDIDATES]) -> PitchEstimate {
231 // --- Look-back pitch tracking (pitch_est.cc:200–226) ---
232 let entry = MIN_MAX_TBL
233 .get(self.prev_pitch_idx)
234 .copied()
235 .unwrap_or(0x0008);
236 let min_idx = ((entry >> 8) & 0xFF) as usize;
237 let max_idx = (entry & 0xFF) as usize;
238
239 let mut pb = min_idx;
240 let mut e_pb = e_p.get(min_idx).copied().unwrap_or(1.0);
241 for idx in (min_idx + 1)..=max_idx.min(PITCH_CANDIDATES - 1) {
242 let e_val = e_p.get(idx).copied().unwrap_or(1.0);
243 if e_val < e_pb {
244 e_pb = e_val;
245 pb = idx;
246 }
247 }
248 let ceb = e_pb + self.prev_e_p + self.prev_prev_e_p;
249
250 let mut chosen_idx = if ceb <= CEB_THRESHOLD {
251 pb
252 } else {
253 // --- Fallback: global E(p) minimum (deferred DP stand-in) ---
254 //
255 // OP25 runs a 2-frame look-ahead DP here (pitch_est.cc:229).
256 // Without that lookahead, a single-frame global argmin is
257 // the best we can do without inventing data — it matches
258 // OP25 exactly whenever the true pitch already sits at the
259 // global minimum (stable voiced speech), and falls back
260 // gracefully to the best-scored candidate otherwise.
261 let mut best_idx = 0;
262 let mut best_e = e_p.first().copied().unwrap_or(1.0);
263 for (idx, &val) in e_p.iter().enumerate().skip(1) {
264 if val < best_e {
265 best_e = val;
266 best_idx = idx;
267 }
268 }
269 best_idx
270 };
271
272 // --- Sub-multiples analysis (pitch_est.cc:273–332) ---
273 //
274 // For harmonic-rich signals, E(p) has near-zero minima at the
275 // true period AND at integer multiples of it (the signal is
276 // trivially also periodic at 2P, 3P, ...). Look-back alone
277 // can't break this tie — if the tracker converges on 2·P_true
278 // it keeps scoring E ≈ 0 there and never considers P_true.
279 //
280 // OP25 works around this by testing P_est/2, /3, /4, /5 after
281 // the DP and switching to the smallest sub-multiple whose E
282 // is comparable to or smaller than the current best. We do
283 // the same using the single-frame E(p) array — less precise
284 // than OP25's look-ahead-augmented cef but enough to fix the
285 // 2P lock-in on voice-like signals.
286 //
287 // Thresholds carried verbatim from OP25's Q4.12 constants.
288 if chosen_idx >= 42 {
289 // Choose how far to divide. OP25 picks i ∈ {1..=4}
290 // inversely to the pitch index.
291 let max_div = if chosen_idx < 84 {
292 1
293 } else if chosen_idx < 126 {
294 2
295 } else if chosen_idx < 168 {
296 3
297 } else {
298 4
299 };
300 let cef_est = e_p.get(chosen_idx).copied().unwrap_or(1.0);
301 for div in (1..=max_div).rev() {
302 // OP25's p_fp = (chosen_idx + 42) in Q7.1 shifted.
303 // Here we work directly on the real period.
304 let base_period = idx_to_period(chosen_idx);
305 let divisor = match div {
306 1 => 2.0_f32,
307 2 => 3.0,
308 3 => 4.0,
309 _ => 5.0,
310 };
311 let sub_period = base_period / divisor;
312 if sub_period < 21.0 {
313 continue; // below grid minimum
314 }
315 let sub_idx = period_to_idx(sub_period);
316 let cef = e_p.get(sub_idx).copied().unwrap_or(1.0);
317
318 let accept = if cef <= 0.05 {
319 // Tier 3 — always accept small-enough sub-multiple.
320 true
321 } else if cef <= 0.4 && cef <= 3.5 * cef_est {
322 // Tier 2 — accept if within 3.5× of current best.
323 true
324 } else if cef <= 0.85 && cef <= 1.7 * cef_est {
325 // Tier 1 — accept if within 1.7× of current best.
326 true
327 } else {
328 false
329 };
330 if accept {
331 chosen_idx = sub_idx;
332 break;
333 }
334 }
335 }
336
337 let chosen_e = e_p.get(chosen_idx).copied().unwrap_or(1.0);
338
339 // Commit state. Mirror OP25: prev_prev gets the old prev,
340 // prev gets the newly-chosen pitch.
341 self.prev_prev_pitch_idx = self.prev_pitch_idx;
342 self.prev_prev_e_p = self.prev_e_p;
343 self.prev_pitch_idx = chosen_idx;
344 self.prev_e_p = chosen_e;
345
346 let period = idx_to_period(chosen_idx);
347 let confidence = (1.0 - chosen_e).clamp(0.0, 1.0);
348 PitchEstimate {
349 period_samples: period,
350 f0_hz: 8000.0 / period,
351 confidence,
352 }
353 }
354
355 /// For regression tests / diagnostics: the internal state after the
356 /// last [`Self::estimate`] call.
357 #[must_use]
358 #[cfg(test)]
359 pub(crate) const fn state(&self) -> (usize, usize, f32, f32) {
360 (
361 self.prev_pitch_idx,
362 self.prev_prev_pitch_idx,
363 self.prev_e_p,
364 self.prev_prev_e_p,
365 )
366 }
367}
368
369impl Default for PitchTracker {
370 fn default() -> Self {
371 Self::new()
372 }
373}
374
375impl PitchTracker {
376 /// Run OP25's full pitch tracker: single-frame look-back, fall
377 /// through to 2-frame-look-ahead DP on `pitch_est.cc:229–270`,
378 /// then sub-multiples analysis, then pick look-back vs
379 /// look-ahead by comparing cumulative-error scores.
380 ///
381 /// `e_p_current` is the `E(p)` array for the frame being
382 /// committed; `e_p_next` is the array for the frame that will
383 /// be committed next, and `e_p_nextnext` the one after that.
384 /// Callers are expected to buffer 2 frames of future
385 /// [`compute_e_p`] output before invoking this method; the
386 /// encoder's 40 ms effective latency is the cost.
387 ///
388 /// State is advanced exactly as in the single-frame path
389 /// — `prev_pitch_idx`, `prev_e_p`, and their `prev_prev`
390 /// shadows roll forward.
391 #[must_use]
392 #[allow(
393 clippy::similar_names,
394 clippy::too_many_lines,
395 reason = "Mirrors OP25's pitch_est block so the port can be read side-by-side with the reference"
396 )]
397 pub fn estimate_with_lookahead(
398 &mut self,
399 e_p_current: &[f32; PITCH_CANDIDATES],
400 e_p_next: &[f32; PITCH_CANDIDATES],
401 e_p_nextnext: &[f32; PITCH_CANDIDATES],
402 ) -> PitchEstimate {
403 // --- Step 1: single-frame look-back on e_p_current ---
404 // Per pitch_est.cc:200–226, look-back alone commits if
405 // ceb = E_current + prev_E + prev_prev_E ≤ 0.48.
406 let entry = MIN_MAX_TBL
407 .get(self.prev_pitch_idx)
408 .copied()
409 .unwrap_or(0x0008);
410 let min_idx = ((entry >> 8) & 0xFF) as usize;
411 let max_idx = (entry & 0xFF) as usize;
412 let mut pb = min_idx;
413 let mut e_pb = e_p_current.get(min_idx).copied().unwrap_or(1.0);
414 for idx in (min_idx + 1)..=max_idx.min(PITCH_CANDIDATES - 1) {
415 let e_val = e_p_current.get(idx).copied().unwrap_or(1.0);
416 if e_val < e_pb {
417 e_pb = e_val;
418 pb = idx;
419 }
420 }
421 let ceb = e_pb + self.prev_e_p + self.prev_prev_e_p;
422
423 if ceb <= CEB_THRESHOLD {
424 return self.commit_pitch(pb, e_pb);
425 }
426
427 // --- Step 2: 2-frame-look-ahead DP (pitch_est.cc:229–270) ---
428 //
429 // Build e_p_nextnext_min[p1]: for each p1, the minimum of
430 // e_p_nextnext over the p2 window defined by MIN_MAX_TBL[p1].
431 // This is the "best future-future score assuming p1 at
432 // t+1", computed once so the inner loop over p0 doesn't
433 // repeat the work.
434 let mut e_p_nextnext_min = [1.0_f32; PITCH_CANDIDATES];
435 for p1 in 0..PITCH_CANDIDATES {
436 let entry = MIN_MAX_TBL.get(p1).copied().unwrap_or(0x0008);
437 let min_p2 = ((entry >> 8) & 0xFF) as usize;
438 let max_p2 = (entry & 0xFF) as usize;
439 let mut best = e_p_nextnext.get(min_p2).copied().unwrap_or(1.0);
440 for p2 in (min_p2 + 1)..=max_p2.min(PITCH_CANDIDATES - 1) {
441 let v = e_p_nextnext.get(p2).copied().unwrap_or(1.0);
442 if v < best {
443 best = v;
444 }
445 }
446 if let Some(slot) = e_p_nextnext_min.get_mut(p1) {
447 *slot = best;
448 }
449 }
450
451 // e1p1_e2p2_est_save[p0] = min over p1 in MIN_MAX_TBL[p0] of
452 // (e_p_next[p1] + e_p_nextnext_min[p1]).
453 // cef[p0] = e_p_current[p0] + e1p1_e2p2_est_save[p0]; pick the
454 // p0 minimizing cef.
455 let mut e1p1_e2p2_est_save = [1.0_f32; PITCH_CANDIDATES];
456 let mut p0_est = 0_usize;
457 let mut cef_est = e_p_current[0] + e_p_next[0] + e_p_nextnext[0];
458 for p0 in 0..PITCH_CANDIDATES {
459 let entry = MIN_MAX_TBL.get(p0).copied().unwrap_or(0x0008);
460 let min_p1 = ((entry >> 8) & 0xFF) as usize;
461 let max_p1 = (entry & 0xFF) as usize;
462 let mut e1p1_e2p2_est = e_p_next.get(p0).copied().unwrap_or(1.0)
463 + e_p_nextnext_min.get(p0).copied().unwrap_or(1.0);
464 for p1 in min_p1..=max_p1.min(PITCH_CANDIDATES - 1) {
465 let cand = e_p_next.get(p1).copied().unwrap_or(1.0)
466 + e_p_nextnext_min.get(p1).copied().unwrap_or(1.0);
467 if cand < e1p1_e2p2_est {
468 e1p1_e2p2_est = cand;
469 }
470 }
471 if let Some(slot) = e1p1_e2p2_est_save.get_mut(p0) {
472 *slot = e1p1_e2p2_est;
473 }
474 let cef = e_p_current.get(p0).copied().unwrap_or(1.0) + e1p1_e2p2_est;
475 if cef < cef_est {
476 cef_est = cef;
477 p0_est = p0;
478 }
479 }
480
481 // --- Step 3: sub-multiples analysis on pf=p0_est ---
482 // Same 3-tier threshold cascade as the single-frame path,
483 // but testing against `cef = e_p_current[p_sub] +
484 // e1p1_e2p2_est_save[p_sub]` which consults all 3 frames.
485 let mut pf = p0_est;
486 if pf >= 42 {
487 let max_div = if pf < 84 {
488 1
489 } else if pf < 126 {
490 2
491 } else if pf < 168 {
492 3
493 } else {
494 4
495 };
496 for div in (1..=max_div).rev() {
497 let base_period = idx_to_period(pf);
498 let divisor = match div {
499 1 => 2.0_f32,
500 2 => 3.0,
501 3 => 4.0,
502 _ => 5.0,
503 };
504 let sub_period = base_period / divisor;
505 if sub_period < 21.0 {
506 continue;
507 }
508 let sub_idx = period_to_idx(sub_period);
509 let cef_sub = e_p_current.get(sub_idx).copied().unwrap_or(1.0)
510 + e1p1_e2p2_est_save.get(sub_idx).copied().unwrap_or(1.0);
511 // 3-tier acceptance cascade from pitch_est.cc:314–330.
512 // A sub-multiple is accepted when its combined error is
513 // very small on its own, small-and-close-to-best, or
514 // moderate-but-far-better-than-best.
515 let accept = cef_sub <= 0.05
516 || (cef_sub <= 0.4 && cef_sub <= 3.5 * cef_est)
517 || (cef_sub <= 0.85 && cef_sub <= 1.7 * cef_est);
518 if accept {
519 pf = sub_idx;
520 break;
521 }
522 }
523 }
524
525 // --- Step 4: pick look-back vs look-ahead ---
526 // cef_pf = e_p_current[pf] + e1p1_e2p2_est_save[pf]; if
527 // ceb ≤ cef_pf, look-back wins; else look-ahead.
528 let cef_pf = e_p_current.get(pf).copied().unwrap_or(1.0)
529 + e1p1_e2p2_est_save.get(pf).copied().unwrap_or(1.0);
530 let (chosen, chosen_e) = if ceb <= cef_pf {
531 (pb, e_pb)
532 } else {
533 (pf, e_p_current.get(pf).copied().unwrap_or(1.0))
534 };
535 self.commit_pitch(chosen, chosen_e)
536 }
537
538 /// Roll tracker state forward and return the corresponding
539 /// [`PitchEstimate`]. Shared between all track-and-commit paths.
540 fn commit_pitch(&mut self, chosen_idx: usize, chosen_e: f32) -> PitchEstimate {
541 self.prev_prev_pitch_idx = self.prev_pitch_idx;
542 self.prev_prev_e_p = self.prev_e_p;
543 self.prev_pitch_idx = chosen_idx;
544 self.prev_e_p = chosen_e;
545
546 let period = idx_to_period(chosen_idx);
547 let confidence = (1.0 - chosen_e).clamp(0.0, 1.0);
548 PitchEstimate {
549 period_samples: period,
550 f0_hz: 8000.0 / period,
551 confidence,
552 }
553 }
554}
555
556/// Compute IMBE's `E(p)` detectability function for one 301-sample
557/// pitch-estimation buffer.
558///
559/// Returns 203 fractional values in `[0, 1]`, one per candidate
560/// pitch index on the 0.5-sample grid 21.0 .. 122.0. A value near
561/// zero at index `i` means the signal is well-explained by a
562/// period of `21 + i·0.5` samples (plus its harmonic multiples).
563///
564/// Exposed so the encoder can maintain a ring of 3 `E(p)` arrays
565/// and hand them to [`PitchTracker::estimate_with_lookahead`]. The
566/// single-frame [`PitchTracker::estimate`] calls this internally.
567#[must_use]
568#[allow(
569 clippy::similar_names,
570 clippy::too_many_lines,
571 reason = "OP25's e_p() body kept flat for easy cross-reference with the reference implementation"
572)]
573pub fn compute_e_p(pitch_est_buf: &[f32; PITCH_EST_BUF_SIZE]) -> [f32; PITCH_CANDIDATES] {
574 let mut windowed = [0.0_f32; PITCH_EST_BUF_SIZE];
575 for (i, (&x, &w)) in pitch_est_buf.iter().zip(WI.iter()).enumerate() {
576 if let Some(slot) = windowed.get_mut(i) {
577 *slot = x * w;
578 }
579 }
580
581 // L_sum = Σ(s² · w) — single-windowed energy (OP25's L_sum via
582 // `L_mpy_ls(L_mult(s, s), wi)`).
583 let l_sum: f32 = pitch_est_buf
584 .iter()
585 .zip(WI.iter())
586 .map(|(&x, &w)| x * x * w)
587 .sum();
588 if l_sum < 1e-12 {
589 // Silent buffer — return "all bad" so track_single_frame
590 // / estimate_with_lookahead fall into their silent branches.
591 return [1.0; PITCH_CANDIDATES];
592 }
593
594 // L_e0 = Σ((s·w)²) — doubly-windowed self-energy. Scaled by
595 // 1/128 to match OP25's `L_shr(L_e0, 7)` compensation.
596 let l_e0_raw: f32 = windowed.iter().map(|&x| x * x).sum();
597 let l_e0 = l_e0_raw / 128.0;
598
599 // Build corr[0..259] covering integer lags 21..150 at even
600 // indices and their half-integer linear interpolations at odd
601 // indices.
602 let mut corr = [0.0_f32; 259];
603 for (i, shift) in (21..=150).enumerate() {
604 let j = i * 2;
605 if let Some(slot) = corr.get_mut(j) {
606 let mut acc = 0.0_f32;
607 for k in 0..(PITCH_EST_BUF_SIZE - shift) {
608 let a = windowed.get(k).copied().unwrap_or(0.0);
609 let b = windowed.get(k + shift).copied().unwrap_or(0.0);
610 acc = a.mul_add(b, acc);
611 }
612 *slot = acc;
613 }
614 }
615 for i in (1..258).step_by(2) {
616 let prev = corr.get(i - 1).copied().unwrap_or(0.0);
617 let next = corr.get(i + 1).copied().unwrap_or(0.0);
618 if let Some(slot) = corr.get_mut(i) {
619 *slot = 0.5 * (prev + next);
620 }
621 }
622
623 let mut e_p = [0.0_f32; PITCH_CANDIDATES];
624 for i in 0..PITCH_CANDIDATES {
625 let index_step = 42 + i;
626 let mut sum_corr = 0.0_f32;
627 let mut j = i;
628 while j <= 258 {
629 sum_corr += corr.get(j).copied().unwrap_or(0.0);
630 j += index_step;
631 }
632 let sum_corr_scaled = sum_corr / 64.0;
633 #[allow(clippy::cast_precision_loss, reason = "index_step ≤ 244")]
634 let p = index_step as f32;
635 let l_num = p.mul_add(-(l_e0 + sum_corr_scaled), l_sum);
636 let ratio = (l_num / l_sum).clamp(0.0, 1.0);
637 if let Some(slot) = e_p.get_mut(i) {
638 *slot = ratio;
639 }
640 }
641 e_p
642}
643
644#[cfg(test)]
645mod tests {
646 use super::{PITCH_CANDIDATES, PITCH_EST_BUF_SIZE, PitchTracker, idx_to_period, period_to_idx};
647
648 #[test]
649 fn idx_to_period_covers_op25_range() {
650 assert!((idx_to_period(0) - 21.0).abs() < 1e-6);
651 assert!((idx_to_period(1) - 21.5).abs() < 1e-6);
652 assert!((idx_to_period(PITCH_CANDIDATES - 1) - 122.0).abs() < 1e-6);
653 }
654
655 #[test]
656 fn period_to_idx_inverts_idx_to_period() {
657 for idx in 0..PITCH_CANDIDATES {
658 let p = idx_to_period(idx);
659 assert_eq!(period_to_idx(p), idx);
660 }
661 }
662
663 /// Zero input → confidence 0, period held at the default.
664 #[test]
665 fn silent_input_gives_zero_confidence() {
666 let buf = [0.0_f32; PITCH_EST_BUF_SIZE];
667 let mut tracker = PitchTracker::new();
668 let est = tracker.estimate(&buf);
669 assert!(
670 est.confidence.abs() < f32::EPSILON,
671 "expected confidence 0 on silent input, got {}",
672 est.confidence
673 );
674 }
675
676 /// 150 Hz pure sine has period 53.33 samples. The OP25 look-back
677 /// tracker's default window starts at `pitch_idx` 116 (period 79)
678 /// and converges inward: each frame narrows the allowed window
679 /// around `prev_pitch_idx`, so within a handful of frames we should
680 /// land on `pitch_idx` 64 or 65 (period 53.0 or 53.5).
681 ///
682 /// Pure tones have octave-symmetric E(p) minima (the signal is
683 /// trivially also periodic at 2P, 3P, ...), so this test only
684 /// asserts "within 3 samples of the true period OR within 3
685 /// samples of 2× the true period" — the remaining octave
686 /// ambiguity is exactly what the deferred 2-frame look-ahead DP
687 /// resolves.
688 #[test]
689 fn sine_at_150hz_lands_on_valid_octave() {
690 let mut buf = [0.0_f32; PITCH_EST_BUF_SIZE];
691 let f0_hz = 150.0_f32;
692 let sr = 8000.0_f32;
693 for (i, slot) in buf.iter_mut().enumerate() {
694 #[allow(clippy::cast_precision_loss)]
695 let t = i as f32;
696 *slot = (t * 2.0 * std::f32::consts::PI * f0_hz / sr).sin();
697 }
698 let mut tracker = PitchTracker::new();
699 for _ in 0..20 {
700 let _ = tracker.estimate(&buf);
701 }
702 let est = tracker.estimate(&buf);
703 let acceptable_periods = [53.3_f32, 106.6];
704 let matched = acceptable_periods
705 .iter()
706 .any(|&target| (est.period_samples - target).abs() < 3.0);
707 assert!(
708 matched,
709 "period {:.2} not within 3 of 53.3 or 106.6 (f0={:.1}, conf={:.3})",
710 est.period_samples, est.f0_hz, est.confidence
711 );
712 }
713
714 /// Pure sine at 200 Hz (period 40) — the look-ahead DP breaks
715 /// the octave ambiguity that stuck the single-frame tracker on
716 /// `2·P` or `P`. Feeds the same `E(p)` array for all three
717 /// look-ahead slots to simulate a long steady-state sine.
718 #[test]
719 fn sine_at_200hz_locks_to_valid_octave_via_dp() {
720 use super::compute_e_p;
721 let mut buf = [0.0_f32; PITCH_EST_BUF_SIZE];
722 let f0_hz = 200.0_f32;
723 let sr = 8000.0_f32;
724 for (i, slot) in buf.iter_mut().enumerate() {
725 #[allow(clippy::cast_precision_loss)]
726 let t = i as f32;
727 *slot = (t * 2.0 * std::f32::consts::PI * f0_hz / sr).sin();
728 }
729 let e_p = compute_e_p(&buf);
730 let mut tracker = PitchTracker::new();
731 // Warm the look-back state with a few estimates so the DP's
732 // prev_e_p history is representative.
733 for _ in 0..5 {
734 let _ = tracker.estimate_with_lookahead(&e_p, &e_p, &e_p);
735 }
736 let est = tracker.estimate_with_lookahead(&e_p, &e_p, &e_p);
737 // Valid octaves for 200 Hz at 8 kHz: 40, 80, 120. Sub-multiples
738 // analysis in the DP should prefer the smallest acceptable
739 // period — 40 — since E(40) ≈ E(80) ≈ E(120) on pure sines.
740 let valid = [40.0_f32, 80.0, 120.0];
741 let matched = valid.iter().any(|&m| (est.period_samples - m).abs() < 3.0);
742 assert!(
743 matched,
744 "period {:.2} not within 3 of any multiple of 40 ({valid:?}); \
745 conf={:.3}",
746 est.period_samples, est.confidence
747 );
748 }
749
750 /// The DP prefers a period that scores low across ALL three
751 /// frames. If frames 0 and 1 favour `P`, but frame 2 favours `2P`,
752 /// cef at `P` stays low (sum of good scores) while cef at `2P` is
753 /// pulled up by frame 0/1's bad score — the DP picks P.
754 #[test]
755 fn lookahead_dp_picks_pitch_stable_across_frames() {
756 use super::{PITCH_CANDIDATES, compute_e_p};
757 let mut tracker = PitchTracker::new();
758
759 // Generate a 200 Hz sine for all three slots. All three
760 // frames agree on pitch, so DP picks the stable answer.
761 let mut buf = [0.0_f32; PITCH_EST_BUF_SIZE];
762 for (i, slot) in buf.iter_mut().enumerate() {
763 #[allow(clippy::cast_precision_loss)]
764 let t = i as f32;
765 *slot = (t * 2.0 * std::f32::consts::PI * 200.0 / 8000.0).sin();
766 }
767 let e_p = compute_e_p(&buf);
768
769 let est = tracker.estimate_with_lookahead(&e_p, &e_p, &e_p);
770 // Confidence should be high — 3 frames agree perfectly.
771 assert!(
772 est.confidence > 0.9,
773 "expected high confidence on 3 matching frames, got {:.3}",
774 est.confidence
775 );
776 // The chosen pitch index must correspond to one of the
777 // sine's valid octaves (stored in the tracker state).
778 let (chosen_idx, _, _, _) = tracker.state();
779 assert!(chosen_idx < PITCH_CANDIDATES);
780 }
781
782 /// Voice-like signal: fundamental + 3 harmonics with decreasing
783 /// amplitude. Real speech has harmonic-amplitude asymmetry that
784 /// breaks the pure-tone octave tie; the tracker should lock onto
785 /// the true period rather than an octave multiple.
786 #[test]
787 fn voice_like_signal_locks_to_true_period() {
788 let mut buf = [0.0_f32; PITCH_EST_BUF_SIZE];
789 let f0_hz = 150.0_f32;
790 let sr = 8000.0_f32;
791 let harmonics = [1.0_f32, 0.6, 0.35, 0.2];
792 for (i, slot) in buf.iter_mut().enumerate() {
793 #[allow(clippy::cast_precision_loss)]
794 let t = i as f32;
795 let mut sum = 0.0_f32;
796 for (k, &) in harmonics.iter().enumerate() {
797 #[allow(clippy::cast_precision_loss)]
798 let harm = (k + 1) as f32;
799 sum += amp * (t * 2.0 * std::f32::consts::PI * f0_hz * harm / sr).sin();
800 }
801 *slot = sum * 0.4;
802 }
803 let mut tracker = PitchTracker::new();
804 for _ in 0..20 {
805 let _ = tracker.estimate(&buf);
806 }
807 let est = tracker.estimate(&buf);
808 let expected = 53.3_f32;
809 assert!(
810 (est.period_samples - expected).abs() < 3.0,
811 "period {:.2} off from 53.3 (err {:.2}) — harmonic signal \
812 should break the octave tie; f0={:.1}, conf={:.3}",
813 est.period_samples,
814 (est.period_samples - expected).abs(),
815 est.f0_hz,
816 est.confidence,
817 );
818 }
819
820 /// Look-back state tracks across frames: after one estimate, the
821 /// stored `prev_pitch_idx` should equal the freshly-chosen index,
822 /// and `prev_prev_pitch_idx` should hold the tracker's initial
823 /// default (`PITCH_DEFAULT_IDX`).
824 #[test]
825 fn state_rolls_forward_each_frame() {
826 let mut buf = [0.0_f32; PITCH_EST_BUF_SIZE];
827 let f0_hz = 150.0_f32;
828 let sr = 8000.0_f32;
829 for (i, slot) in buf.iter_mut().enumerate() {
830 #[allow(clippy::cast_precision_loss)]
831 let t = i as f32;
832 *slot = (t * 2.0 * std::f32::consts::PI * f0_hz / sr).sin();
833 }
834 let mut tracker = PitchTracker::new();
835 let (init_idx, _, _, _) = tracker.state();
836 let _ = tracker.estimate(&buf);
837 let (p1, pp1, _, _) = tracker.state();
838 let _ = tracker.estimate(&buf);
839 let (p2, pp2, _, _) = tracker.state();
840 assert_eq!(
841 pp1, init_idx,
842 "first frame: prev_prev should be initial default"
843 );
844 assert_eq!(
845 pp2, p1,
846 "second frame: prev_prev should be first frame's chosen idx"
847 );
848 let _ = p2;
849 }
850}