mbelib_rs/encode/vuv.rs
1// SPDX-FileCopyrightText: 2009 Pavel Yazev (OP25 imbe_vocoder/v_uv_det.cc)
2// SPDX-FileCopyrightText: 2026 Swift Raccoon
3// SPDX-License-Identifier: GPL-2.0-or-later OR GPL-3.0-or-later
4
5//! Per-band voiced/unvoiced decisions + integrated spectral amplitude
6//! extraction.
7//!
8//! Port of OP25's `imbe_vocoder::v_uv_det`. For each harmonic in the
9//! `±f0/2` analysis window, fit a weighted sinusoid via the spectral
10//! response window [`WR_SP`](crate::encode::wr_sp::WR_SP), then decide
11//! voicing per band from the reconstruction-error-to-energy ratio
12//! `Dk = D_num / D_den`. Bands group 3 adjacent harmonics except the
13//! final band which takes the remainder.
14//!
15//! # Differences from a bare V/UV
16//!
17//! The OP25 algorithm couples V/UV with spectral amplitude
18//! extraction: the same sinusoidal-fit pass that produces the
19//! per-bin fitted amplitude feeds into both the `Dk` ratio (V/UV
20//! output) and the per-harmonic SA output. Re-running extraction in
21//! a separate pass using centre-bin integration (our old
22//! [`extract_spectral_amplitudes`](crate::encode::extract_spectral_amplitudes))
23//! produces different numerical SAs because the 3-bin-power
24//! integration doesn't account for the Hamming spectral lobe the
25//! analysis window imparts on each harmonic.
26//!
27//! # State carried across frames
28//!
29//! [`VuvState`] holds the `v_uv_dsn` hysteresis array (previous
30//! frame's decision per band) and the ``th_max`` sliding frame-quality
31//! maximum. Both are part of OP25's steady-state behaviour:
32//!
33//! - Hysteresis: a band that was voiced last frame has a lower bar
34//! to stay voiced (threshold 0.5625 · `M_fcn` − …). An unvoiced band
35//! has a higher bar to become voiced (threshold 0.45 · `M_fcn` − …).
36//! Prevents per-frame V/UV ping-pong on borderline signals.
37//!
38//! - `th_max`: slow-update ceiling of total frame energy. When the
39//! current frame's total energy `th0 > th_max`, `th_max` jumps to
40//! the midpoint; otherwise it decays at 0.99 per frame. The
41//! V/UV threshold is scaled by `M_fcn = (th0 + 0.0025·th_max) /
42//! (th0 + 0.01·th_max)`, which is near 1 for loud frames and
43//! approaches 0.25 for very quiet ones. Quiet frames are thus
44//! pushed toward unvoiced (the simpler, noisier synthesis mode).
45
46use realfft::num_complex::Complex;
47
48use crate::encode::spectral::{MAX_HARMONICS, SpectralAmplitudes};
49use crate::encode::wr_sp::{WR_SP, WR_SP_CENTER};
50
51/// Maximum number of V/UV bands (12 per AMBE spec).
52pub const MAX_BANDS: usize = 12;
53
54/// Harmonics per band (OP25 `NUM_HARMS_PER_BAND`).
55const HARMS_PER_BAND: usize = 3;
56
57/// Per-stream V/UV state carried across frames.
58#[derive(Debug, Clone, Copy)]
59pub struct VuvState {
60 /// Previous-frame V/UV decision per band. Used to bias the
61 /// current-frame threshold: a band that was voiced is easier to
62 /// keep voiced (hysteresis).
63 prev_voiced: [bool; MAX_BANDS],
64 /// Slow-update frame-energy ceiling used to compute `M_fcn`, the
65 /// per-frame threshold multiplier. Approaches the peak frame
66 /// energy seen so far; decays at 0.99 per frame when the current
67 /// frame is below the peak.
68 th_max: f32,
69}
70
71impl VuvState {
72 /// Fresh state — no hysteresis, `th_max` starts at 0.
73 #[must_use]
74 pub const fn new() -> Self {
75 Self {
76 prev_voiced: [false; MAX_BANDS],
77 th_max: 0.0,
78 }
79 }
80}
81
82impl Default for VuvState {
83 fn default() -> Self {
84 Self::new()
85 }
86}
87
88/// Per-frame V/UV decision vector.
89#[derive(Debug, Clone, Copy)]
90pub struct VuvDecisions {
91 /// `true` = voiced band (periodic content dominates), `false` =
92 /// unvoiced (noise-like). Only the first `num_bands` entries
93 /// are valid; the rest are padding zeros.
94 pub voiced: [bool; MAX_BANDS],
95 /// Number of active harmonic bands for this frame (derived from
96 /// the pitch).
97 pub num_bands: usize,
98}
99
100/// Stateless one-shot V/UV convenience wrapper.
101///
102/// Builds a fresh [`VuvState`] internally and calls
103/// [`detect_vuv_and_sa`] with a neutral `e_p = 0.5`. Discards the
104/// spectral amplitudes. Exposed for validators and tests that want a
105/// simple "classify this spectrum" call; the encoder itself uses
106/// [`detect_vuv_and_sa`] directly so that hysteresis + ``th_max`` carry
107/// across frames.
108#[must_use]
109#[allow(
110 clippy::cast_possible_truncation,
111 clippy::cast_sign_loss,
112 clippy::cast_precision_loss,
113 reason = "DSP bin math; inputs bounded by FFT length"
114)]
115pub fn detect_vuv(fft_out: &[Complex<f32>], f0_bin: f32) -> VuvDecisions {
116 let mut state = VuvState::new();
117 let (vuv, _sa) = detect_vuv_and_sa(fft_out, f0_bin, &mut state, 0.5);
118 vuv
119}
120
121/// Integrated V/UV + spectral amplitude extraction.
122///
123/// Ports OP25's `v_uv_det`. For each harmonic `k ∈ 1..=num_harms`,
124/// extracts the analysis window `[k·f0 − f0/2, k·f0 + f0/2]` from
125/// the FFT, fits a windowed sinusoid, and accumulates error and
126/// energy. Every `HARMS_PER_BAND` harmonics, commits a band decision
127/// based on `Dk = D_num / D_den < dsn_thr`.
128///
129/// `e_p` is the current-frame pitch-error metric (output of the
130/// pitch tracker); passing a large value (`> 0.55`) disables voicing
131/// in all but the first band — the pitch quality is too low to trust
132/// the harmonic model.
133#[must_use]
134#[allow(
135 clippy::cast_possible_truncation,
136 clippy::cast_sign_loss,
137 clippy::cast_precision_loss,
138 clippy::too_many_lines,
139 reason = "Mirrors OP25's v_uv_det block so the port can be cross-referenced line-by-line"
140)]
141pub fn detect_vuv_and_sa(
142 fft_out: &[Complex<f32>],
143 f0_bin: f32,
144 state: &mut VuvState,
145 e_p: f32,
146) -> (VuvDecisions, SpectralAmplitudes) {
147 // Derive num_harms / num_bands from pitch. OP25:
148 // num_harms = min(max(int((pitch/2 + 0.5) · 0.9254), NUM_HARMS_MIN), NUM_HARMS_MAX)
149 // In our bin-domain, pitch_period_samples = 256 / f0_bin.
150 let period = if f0_bin > 0.5 { 256.0 / f0_bin } else { 256.0 };
151 // OP25 computes `num_harms` via TWO successive integer
152 // truncations (`v_uv_det.cc:117-118`):
153 //
154 // tmp = shr(add(shr(ref_pitch, 1), CNST_0_25_Q8_8), 8)
155 // = int_part(period / 2 + 0.25)
156 // num_harms = extract_h(tmp * CNST_0_9254_Q0_16)
157 // = int_part(tmp * 0.9254)
158 //
159 // The inner truncation matters: a pure
160 // `floor((period/2 + 0.5) * 0.9254)` (our previous formula) is
161 // off-by-one on many mid-range pitches — e.g. `period = 54.75`
162 // gives `(27.375 + 0.5) * 0.9254 = 25.8 → 25`, while OP25's
163 // pathway gives `floor(27.375 + 0.25) * 0.9254 = 27 * 0.9254 =
164 // 24.98 → 24`. That 1-harmonic disagreement shifts
165 // `L_TABLE`-indexed `b0` selection and cascades into b1/b2/b3
166 // quantizer searches. Matched exactly here.
167 let num_harms: usize = {
168 #[allow(
169 clippy::cast_possible_truncation,
170 clippy::cast_sign_loss,
171 reason = "period ∈ (0, 1024); period/2 + 0.25 < 513; truncating to usize is exact"
172 )]
173 let tmp = (period * 0.5 + 0.25) as usize;
174 #[allow(clippy::cast_precision_loss, reason = "tmp ≤ 513; exact in f32")]
175 let raw = tmp as f32 * 0.9254;
176 let clamped = raw.max(9.0).min(MAX_HARMONICS as f32);
177 #[allow(
178 clippy::cast_possible_truncation,
179 clippy::cast_sign_loss,
180 reason = "clamped above to [9.0, MAX_HARMONICS]; truncation is exact"
181 )]
182 let n = clamped as usize;
183 n
184 };
185 let num_bands = if num_harms <= 36 {
186 ((num_harms + 2) / HARMS_PER_BAND).min(MAX_BANDS)
187 } else {
188 MAX_BANDS
189 };
190
191 // ── Frame-level quality metric `M_fcn` ──────────────────────
192 //
193 // `energy_low` = low-half-spectrum energy, `energy_high` =
194 // high-half, `energy_total` = sum. Slow-update `th_max` so
195 // `M_fcn` reflects "this frame vs recent loudest". OP25's
196 // thresholds match int16 signal scale; we operate on normalized
197 // f32 [-1, 1), so the numerical magnitudes are tiny (~1e-4 for
198 // speech). `M_fcn`'s ratio form keeps it in [~0.25, ~1.0]
199 // regardless.
200 let bin_half = fft_out.len() / 2;
201 let energy_low: f32 = fft_out
202 .iter()
203 .take(bin_half)
204 .map(Complex::<f32>::norm_sqr)
205 .sum();
206 let energy_high: f32 = fft_out
207 .iter()
208 .skip(bin_half)
209 .map(Complex::<f32>::norm_sqr)
210 .sum();
211 let energy_total = energy_low + energy_high;
212 state.th_max = if energy_total > state.th_max {
213 0.5 * (state.th_max + energy_total)
214 } else {
215 0.99_f32.mul_add(state.th_max, 0.01 * energy_total)
216 };
217 let mut m_fcn = {
218 let num = 0.0025_f32.mul_add(state.th_max, energy_total);
219 let den = 0.01_f32.mul_add(state.th_max, energy_total);
220 if den < 1e-30 { 0.25 } else { num / den }
221 };
222 // If low-frequency energy is much smaller than high-frequency,
223 // dampen `M_fcn` (the signal lacks the low-F structure typical of
224 // speech, so the voicing decision needs more evidence).
225 let hi5 = 5.0 * energy_high;
226 if energy_low < hi5 && hi5 > 1e-30 {
227 m_fcn *= (energy_low / hi5).sqrt();
228 }
229
230 // ── Per-harmonic sinusoidal fit ────────────────────────────
231 let mut sa = [0.0_f32; MAX_HARMONICS];
232 // Cache the sc_coef and window energy per harmonic so the final
233 // SA calculation can reuse them after the band decision lands.
234 let mut m_num = [0.0_f32; MAX_HARMONICS]; // observed window energy
235 let mut sc_coef = [0.0_f32; MAX_HARMONICS]; // 1 / Σ wr_sp² for this harmonic
236 let mut bin_counts = [0_usize; MAX_HARMONICS]; // bins used per harmonic
237
238 let mut voiced = [false; MAX_BANDS];
239 let mut d_num = 0.0_f32;
240 let mut d_den = 0.0_f32;
241 let mut band_cnt = 0_usize;
242 let mut num_harms_cnt = 0_usize;
243 let mut dsn_thr = 0.0_f32;
244 // Cumulative band-center frequency (scaled to bin_half span). Used
245 // to damp thresholds at high-frequency bands where harmonic
246 // energy is typically lower.
247 let mut band_center_norm = 0.0_f32;
248 let band_center_step = f0_bin * HARMS_PER_BAND as f32 / (bin_half as f32);
249
250 for k in 0..num_harms {
251 let center = (k as f32 + 1.0) * f0_bin;
252 let half_win = f0_bin * 0.5;
253 let bin_lo = (center - half_win).ceil().max(0.0) as usize;
254 let bin_hi = ((center + half_win).ceil() as usize).min(fft_out.len());
255 if bin_lo >= bin_hi {
256 continue;
257 }
258 bin_counts[k] = bin_hi - bin_lo;
259
260 // Compute per-band threshold once per band.
261 if num_harms_cnt == 0 {
262 dsn_thr = if e_p > 0.55 && band_cnt >= 1 {
263 0.0
264 } else if state.prev_voiced[band_cnt] {
265 (-0.1741_f32).mul_add(band_center_norm, 0.5625) * m_fcn
266 } else {
267 (-0.1393_f32).mul_add(band_center_norm, 0.45) * m_fcn
268 };
269 band_center_norm += band_center_step;
270 }
271
272 // Pass 1: windowed-sinusoid fit.
273 let mut amp_re = 0.0_f32;
274 let mut amp_im = 0.0_f32;
275 let mut m_den_sum = 0.0_f32;
276 for bin in bin_lo..bin_hi {
277 let w = wr_sp_sample(bin, center);
278 let c = fft_out
279 .get(bin)
280 .copied()
281 .unwrap_or_else(|| Complex::new(0.0, 0.0));
282 amp_re = c.re.mul_add(w, amp_re);
283 amp_im = c.im.mul_add(w, amp_im);
284 m_den_sum = w.mul_add(w, m_den_sum);
285 }
286 let sc = if m_den_sum > 1e-12 {
287 1.0 / m_den_sum
288 } else {
289 0.0
290 };
291 sc_coef[k] = sc;
292 let fit_re = amp_re * sc;
293 let fit_im = amp_im * sc;
294
295 // Pass 2: error + energy accumulation.
296 let mut m_num_sum = 0.0_f32;
297 for bin in bin_lo..bin_hi {
298 let w = wr_sp_sample(bin, center);
299 let rec_re = fit_re * w;
300 let rec_im = fit_im * w;
301 let c = fft_out
302 .get(bin)
303 .copied()
304 .unwrap_or_else(|| Complex::new(0.0, 0.0));
305 let err_re = c.re - rec_re;
306 let err_im = c.im - rec_im;
307 d_num += err_re.mul_add(err_re, err_im * err_im);
308 m_num_sum += c.norm_sqr();
309 }
310 m_num[k] = m_num_sum;
311 d_den += m_num_sum;
312
313 // Commit band every HARMS_PER_BAND harmonics (except last).
314 num_harms_cnt += 1;
315 let last_harmonic = k + 1 == num_harms;
316 let full_band = num_harms_cnt == HARMS_PER_BAND && band_cnt < num_bands - 1;
317 let commit = full_band || last_harmonic;
318 if commit {
319 let dk = if d_den > 1e-12 { d_num / d_den } else { 1.0 };
320 let band_voiced = dk < dsn_thr;
321 voiced[band_cnt] = band_voiced;
322
323 // Emit SA for the harmonics that make up this band.
324 let first_k = (k + 1) - num_harms_cnt;
325 for kk in first_k..=k {
326 sa[kk] = if band_voiced {
327 voiced_sa_calc(m_num[kk], sc_coef[kk])
328 } else {
329 unvoiced_sa_calc(m_num[kk], bin_counts[kk])
330 };
331 }
332
333 d_num = 0.0;
334 d_den = 0.0;
335 num_harms_cnt = 0;
336 band_cnt += 1;
337 }
338 }
339
340 state.prev_voiced = voiced;
341 let vuv = VuvDecisions { voiced, num_bands };
342 let amps = SpectralAmplitudes {
343 magnitudes: sa,
344 num_harmonics: num_harms,
345 };
346 (vuv, amps)
347}
348
349/// Look up `WR_SP[round(WR_SP_CENTER + (bin − harmonic_center) · 64)]`
350/// with bounds-check, returning `0.0` outside the table. Centralises
351/// the f32→index arithmetic + sign/precision-loss justification for
352/// every site that samples the spectral-response window.
353#[inline]
354fn wr_sp_sample(bin: usize, harmonic_center: f32) -> f32 {
355 // `bin` < 129 for a 256-pt real FFT, so `bin as f32` is lossless.
356 // `harmonic_center` ∈ (0, 128) for any valid pitch. The `.round()`
357 // output is therefore in `[WR_SP_CENTER − 64·128, WR_SP_CENTER +
358 // 64·128] ⊂ i32`. Bounds check against `WR_SP_LEN` happens
359 // before indexing; out-of-range returns zero.
360 #[allow(
361 clippy::cast_precision_loss,
362 reason = "bin ≤ 128 and WR_SP_CENTER = 160; both fit exactly in f32"
363 )]
364 let offset_raw = 64.0_f32.mul_add(bin as f32 - harmonic_center, WR_SP_CENTER as f32);
365 let offset = offset_raw.round();
366 if !offset.is_finite() || offset < 0.0 {
367 return 0.0;
368 }
369 #[allow(
370 clippy::cast_possible_truncation,
371 clippy::cast_sign_loss,
372 reason = "Checked above: offset is finite and ≥ 0; truncation is exact for the integer result of `.round()` on a bounded small value"
373 )]
374 let idx = offset as usize;
375 WR_SP.get(idx).copied().unwrap_or(0.0)
376}
377
378/// Voiced spectral amplitude: square root of observed energy
379/// normalized by the window-self-energy. Float port of OP25's
380/// `voiced_sa_calc(M_num, M_den)` where `M_den` is `sc_coef =
381/// 1 / Σ wr_sp²`.
382#[inline]
383fn voiced_sa_calc(m_num: f32, sc: f32) -> f32 {
384 // OP25 comment: `2 * 256 * sqrt(2 * num / den)` — but den there
385 // is `sc_coef = 1/M_den_sum`, so `2*num/den = 2*num*M_den_sum`.
386 // In float that's `sqrt(2 · m_num · sc⁻¹) · 512`. For unit
387 // harmonic amplitude (the fitted-sinusoid case) this reduces
388 // algebraically to `fitted_amplitude · √(m_den_sum) · 512`.
389 // We just compute `sqrt(m_num * sc⁻¹)` directly — the downstream
390 // quantizer's SA_SCALE handles absolute scaling.
391 if sc < 1e-12 {
392 return 0.0;
393 }
394 (m_num / sc).sqrt()
395}
396
397/// Unvoiced spectral amplitude: RMS per-bin observed energy scaled
398/// by 0.1454 (compensates for unvoiced-synthesis overshoot). Float
399/// port of OP25's `unvoiced_sa_calc(M_num, bin_count)`.
400#[inline]
401fn unvoiced_sa_calc(m_num: f32, bin_count: usize) -> f32 {
402 if bin_count == 0 {
403 return 0.0;
404 }
405 #[allow(clippy::cast_precision_loss, reason = "bin_count ≤ 128")]
406 let n = bin_count as f32;
407 0.1454 * (m_num / n).sqrt() * (2.0_f32).sqrt()
408}
409
410#[cfg(test)]
411mod tests {
412 use super::{MAX_BANDS, VuvState, detect_vuv, detect_vuv_and_sa};
413 use realfft::num_complex::Complex;
414
415 /// A silent spectrum is classified as entirely unvoiced (all
416 /// bands false).
417 #[test]
418 fn silent_spectrum_is_unvoiced() {
419 let fft_out = vec![Complex::new(0.0, 0.0); 129];
420 let decisions = detect_vuv(&fft_out, 6.4);
421 assert!(decisions.voiced.iter().all(|&v| !v));
422 }
423
424 /// A spectrum shaped like the analysis window's sinusoidal
425 /// response — `wr_sp` centered at each `k · f0_bin` — should be
426 /// classified as voiced. Single-bin-impulse inputs don't work
427 /// for the integrated detector: its sinusoidal fit expects the
428 /// tapered `wr_sp` spectral lobe the real encoder's analysis
429 /// window produces, so impulses leave `D_num` large and the ratio
430 /// test fails even though the spectrum "looks" harmonic.
431 #[test]
432 fn harmonic_spectrum_is_voiced() {
433 use crate::encode::wr_sp::{WR_SP, WR_SP_CENTER};
434 let mut fft_out = vec![Complex::new(0.0, 0.0); 129];
435 let f0_bin = 6.4_f32;
436 // Paint the wr_sp lobe at each harmonic. The lobe spans ≈2.5
437 // bins either side of center (160/64 ≈ 2.5), tapered to zero
438 // at the edges.
439 for k in 1..=10 {
440 #[allow(
441 clippy::cast_precision_loss,
442 reason = "k ∈ [1, 10]; f32 rounding is exact below 2^24"
443 )]
444 let center = f0_bin * k as f32;
445 for (wr_idx, &w) in WR_SP.iter().enumerate() {
446 #[allow(
447 clippy::cast_precision_loss,
448 reason = "wr_idx ∈ [0, 321); exact in f32"
449 )]
450 let offset_bins = (wr_idx as f32 - WR_SP_CENTER as f32) / 64.0;
451 // `bin` is bounded: center ∈ [6.4, 64], offset_bins
452 // ∈ [−2.5, 2.5], so `bin ∈ [3, 67]` — well within
453 // usize and within the 129-bin FFT.
454 #[allow(
455 clippy::cast_possible_truncation,
456 clippy::cast_sign_loss,
457 reason = "bounded per comment above; truncation is exact for `.round()`"
458 )]
459 let bin = (center + offset_bins).round() as usize;
460 if let Some(c) = fft_out.get_mut(bin) {
461 c.re += w;
462 }
463 }
464 }
465 let decisions = detect_vuv(&fft_out, f0_bin);
466 let any_voiced = decisions.voiced.iter().any(|&v| v);
467 assert!(
468 any_voiced,
469 "expected at least one voiced band; got {:?}",
470 decisions.voiced
471 );
472 }
473
474 /// A flat spectrum without clear harmonic peaks should not be
475 /// called voiced.
476 #[test]
477 fn flat_spectrum_is_not_voiced() {
478 let mut fft_out = vec![Complex::new(0.0, 0.0); 129];
479 for c in &mut fft_out {
480 *c = Complex::new(0.1, 0.0);
481 }
482 let decisions = detect_vuv(&fft_out, 6.4);
483 let voiced_count = decisions.voiced.iter().filter(|&&v| v).count();
484 // A flat spectrum's sinusoidal fit has residual error
485 // comparable to the signal itself → Dk is near 1 → not voiced.
486 assert!(
487 voiced_count < decisions.num_bands,
488 "all bands voiced for flat spectrum: {voiced_count}/{}",
489 decisions.num_bands
490 );
491 }
492
493 /// `num_bands` ≈ `num_harms / 3`, capped at `MAX_BANDS`.
494 #[test]
495 fn band_count_tracks_pitch() {
496 let fft_out = vec![Complex::new(0.0, 0.0); 129];
497 // f0_bin=2 ⇒ period=128 ⇒ num_harms≈floor((64+0.5)·0.9254)=59→56 → num_bands=12
498 let d_low = detect_vuv(&fft_out, 2.0);
499 assert_eq!(d_low.num_bands, MAX_BANDS);
500 // f0_bin=20 ⇒ period=12.8 ⇒ num_harms≈floor(6.4·0.9254)=5→9 (clamped) → num_bands=3
501 let d_high = detect_vuv(&fft_out, 20.0);
502 assert!(d_high.num_bands >= 1 && d_high.num_bands <= 4);
503 }
504
505 /// Hysteresis: a band that was voiced is easier to keep voiced
506 /// on the next frame.
507 #[test]
508 fn hysteresis_biases_voicing_decision() {
509 // Build a spectrum that's a borderline voicing case — the
510 // same ratio produces different decisions depending on
511 // `state.prev_voiced`.
512 let mut fft_out = vec![Complex::new(0.0, 0.0); 129];
513 let f0_bin = 6.4_f32;
514 for k in 1..=10 {
515 #[allow(
516 clippy::cast_precision_loss,
517 clippy::cast_possible_truncation,
518 clippy::cast_sign_loss
519 )]
520 let bin = (f0_bin * k as f32).round() as usize;
521 if let Some(c) = fft_out.get_mut(bin) {
522 *c = Complex::new(0.6, 0.0);
523 }
524 }
525 // Add some background noise.
526 for c in &mut fft_out {
527 c.re += 0.15;
528 }
529
530 // Fresh state: all bands unvoiced → harder threshold.
531 let mut fresh = VuvState::new();
532 let (d1, _) = detect_vuv_and_sa(&fft_out, f0_bin, &mut fresh, 0.0);
533
534 // Biased state: band 0 was voiced → easier threshold.
535 let mut biased = VuvState::new();
536 biased.prev_voiced[0] = true;
537 let (d2, _) = detect_vuv_and_sa(&fft_out, f0_bin, &mut biased, 0.0);
538
539 // In the biased case, at least as many bands voiced as fresh.
540 let count_fresh = d1.voiced.iter().filter(|&&v| v).count();
541 let count_biased = d2.voiced.iter().filter(|&&v| v).count();
542 assert!(
543 count_biased >= count_fresh,
544 "hysteresis failed to bias voicing: fresh={count_fresh}, biased={count_biased}"
545 );
546 }
547
548 /// Integrated SA path should produce non-zero magnitudes when the
549 /// spectrum has visible harmonic content.
550 #[test]
551 fn integrated_sa_nonzero_for_harmonic_input() {
552 let mut fft_out = vec![Complex::new(0.0, 0.0); 129];
553 let f0_bin = 6.4_f32;
554 for k in 1..=10 {
555 #[allow(
556 clippy::cast_precision_loss,
557 clippy::cast_possible_truncation,
558 clippy::cast_sign_loss
559 )]
560 let bin = (f0_bin * k as f32).round() as usize;
561 if let Some(c) = fft_out.get_mut(bin) {
562 *c = Complex::new(1.0, 0.0);
563 }
564 }
565 let mut state = VuvState::new();
566 let (_vuv, amps) = detect_vuv_and_sa(&fft_out, f0_bin, &mut state, 0.0);
567 let total: f32 = amps.magnitudes.iter().sum();
568 assert!(
569 total > 0.0,
570 "integrated SA all zero on harmonic input ({amps:?})"
571 );
572 }
573}