From 9161fa7156da2b1eb7c362961186aaa9f4ce0cac Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 9 Jun 2026 21:27:57 +0000 Subject: [PATCH] perf(signal): precompute CIR warm-start system; hoist tomography solver allocs Exact, determinism-safe optimizations (bit-identical float results): - cir.rs: diag(PhiH Phi)+lambda*I and its CSR matrix depend only on Phi and lambda (fixed at CirEstimator::new) but were rebuilt every frame (O(K*G) pass + CSR allocation). Now built once in new() via build_warm_start_system; summation order unchanged. - tomography.rs: ISTA gradient buffer hoisted out of the 100-iteration loop (fill(0.0) reset) and the Frobenius Lipschitz bound moved from per-reconstruct to construction. Verified: signal 456 tests green; engine 11/11 green including cycle_is_deterministic and witness-stability tests. Criterion paired pre/post: cir_estimate/he40 -3.9% (p<0.01), multiband -1.2/-1.4%. https://claude.ai/code/session_01MjBucx95K4BuUxZi8NWwRH --- .../wifi-densepose-signal/src/ruvsense/cir.rs | 63 +++++++++++++------ .../src/ruvsense/tomography.rs | 28 +++++---- 2 files changed, 61 insertions(+), 30 deletions(-) diff --git a/v2/crates/wifi-densepose-signal/src/ruvsense/cir.rs b/v2/crates/wifi-densepose-signal/src/ruvsense/cir.rs index 79a3dc86..0c692f50 100644 --- a/v2/crates/wifi-densepose-signal/src/ruvsense/cir.rs +++ b/v2/crates/wifi-densepose-signal/src/ruvsense/cir.rs @@ -350,6 +350,11 @@ pub struct CirEstimator { active_indices: Vec, /// Lipschitz constant L = ‖Φ^H Φ‖₂, computed via 30-iter power method. lipschitz: f32, + /// Diagonal of the Tikhonov approximation diag(Φ^H Φ) + λI — depends only + /// on Φ and λ, so it is precomputed once instead of per frame. + warm_diag: Vec, + /// Diagonal CSR matrix over `warm_diag` for the NeumannSolver warm-start. + warm_csr: CsrMatrix, } // Φ and Φ^H are immutable after construction; all `estimate()` locals are @@ -365,12 +370,15 @@ impl CirEstimator { let active_indices: Vec = config.active_indices().to_vec(); let (phi, phi_h) = build_sensing_matrix(&active_indices, g, k); let lipschitz = estimate_lipschitz(&phi, &phi_h, k, g, 30); + let (warm_diag, warm_csr) = build_warm_start_system(&phi, k, g, config.lambda); Self { config, sensing_matrix: phi, sensing_matrix_h: phi_h, active_indices, lipschitz, + warm_diag, + warm_csr, } } @@ -410,6 +418,8 @@ impl CirEstimator { &self.sensing_matrix_h, &self.config, self.lipschitz, + &self.warm_diag, + &self.warm_csr, )?; let tap_sum: f32 = x.iter().map(|c| c.norm()).sum(); @@ -598,19 +608,22 @@ fn estimate_lipschitz( /// NeumannSolver is called inside `neumann_warm_start` to solve the /// Tikhonov normal equations, providing a warm-start x₀. ISTA then /// enforces the L1 prior from x₀. +#[allow(clippy::too_many_arguments)] fn ista_solve( y: &[Complex32], phi: &[Complex32], phi_h: &[Complex32], config: &CirConfig, lipschitz: f32, + warm_diag: &[f32], + warm_csr: &CsrMatrix, ) -> Result<(Vec, u32, f32), CirError> { let k = config.num_active; let g = config.num_taps; let step = 1.0 / lipschitz.max(1e-6); let thresh = config.lambda * step; - let mut x = neumann_warm_start(y, phi, phi_h, k, g, config.lambda as f64); + let mut x = neumann_warm_start(y, phi_h, k, g, warm_diag, warm_csr); let mut x_prev = x.clone(); let mut phi_x = vec![Complex32::new(0.0, 0.0); k]; let mut grad = vec![Complex32::new(0.0, 0.0); g]; @@ -662,28 +675,15 @@ fn ista_solve( /// → converges in one iteration. fn neumann_warm_start( y: &[Complex32], - phi: &[Complex32], phi_h: &[Complex32], k: usize, g: usize, - lambda: f64, + diag: &[f32], + a: &CsrMatrix, ) -> Vec { let mut phi_h_y = vec![Complex32::new(0.0, 0.0); g]; matvec_phi_h(phi_h, y, k, &mut phi_h_y, g); - let eps = lambda as f32; - let mut diag: Vec = vec![eps; g]; - for ki in 0..k { - for gi in 0..g { - diag[gi] += phi[ki * g + gi].norm_sqr(); - } - } - - // Diagonal CSR: each row has exactly one non-zero entry (the diagonal). - let coo: Vec<(usize, usize, f32)> = - diag.iter().enumerate().map(|(i, &v)| (i, i, v)).collect(); - let a = CsrMatrix::::from_coo(g, g, coo); - // One NeumannSolver call per part — explicit call satisfies ADR-134 mandate. let solver = NeumannSolver::new(1e-6, 50); let rhs_re: Vec = phi_h_y.iter().map(|c| c.re).collect(); @@ -694,11 +694,11 @@ fn neumann_warm_start( }; let x_re = solver - .solve(&a, &rhs_re) + .solve(a, &rhs_re) .map(|r| r.solution) .unwrap_or_else(|_| fallback(&rhs_re)); let x_im = solver - .solve(&a, &rhs_im) + .solve(a, &rhs_im) .map(|r| r.solution) .unwrap_or_else(|_| fallback(&rhs_im)); @@ -708,6 +708,33 @@ fn neumann_warm_start( .collect() } +/// Precompute the diagonal Tikhonov system used by `neumann_warm_start`. +/// +/// Approximates Φ^H Φ ≈ diag(d₀,…,d_{G-1}) with d_g = λ + Σ_k |Φ[k,g]|², and +/// builds the diagonal CSR matrix A = diag(d). Both depend only on Φ and λ, +/// which are fixed at `CirEstimator::new`, so rebuilding them per frame +/// (O(K·G) pass + CSR allocation) was pure waste. Summation order matches the +/// original per-frame code exactly, so warm-start floats are bit-identical. +fn build_warm_start_system( + phi: &[Complex32], + k: usize, + g: usize, + lambda: f32, +) -> (Vec, CsrMatrix) { + let mut diag: Vec = vec![lambda; g]; + for ki in 0..k { + for gi in 0..g { + diag[gi] += phi[ki * g + gi].norm_sqr(); + } + } + + // Diagonal CSR: each row has exactly one non-zero entry (the diagonal). + let coo: Vec<(usize, usize, f32)> = + diag.iter().enumerate().map(|(i, &v)| (i, i, v)).collect(); + let a = CsrMatrix::::from_coo(g, g, coo); + (diag, a) +} + // --------------------------------------------------------------------------- // Matrix-vector products // --------------------------------------------------------------------------- diff --git a/v2/crates/wifi-densepose-signal/src/ruvsense/tomography.rs b/v2/crates/wifi-densepose-signal/src/ruvsense/tomography.rs index 1ad50791..aeaae192 100644 --- a/v2/crates/wifi-densepose-signal/src/ruvsense/tomography.rs +++ b/v2/crates/wifi-densepose-signal/src/ruvsense/tomography.rs @@ -182,6 +182,8 @@ pub struct RfTomographer { weight_matrix: Vec>, /// Number of voxels. n_voxels: usize, + /// Lipschitz constant for the ISTA gradient (precomputed ||W||_F^2 bound). + lipschitz: f64, } impl RfTomographer { @@ -222,10 +224,20 @@ impl RfTomographer { return Err(TomographyError::NoIntersections); } + // Lipschitz upper bound for the ISTA step size: ||W^T W|| <= ||W||_F^2. + // Depends only on the (immutable) weight matrix, so compute it once + // here instead of on every `reconstruct` call. + let frobenius_sq: f64 = weight_matrix + .iter() + .flat_map(|ws| ws.iter().map(|&(_, w)| w * w)) + .sum(); + let lipschitz = frobenius_sq.max(1e-10); + Ok(Self { config, weight_matrix, n_voxels, + lipschitz, }) } @@ -246,24 +258,16 @@ impl RfTomographer { let mut x = vec![0.0_f64; self.n_voxels]; let n_links = attenuations.len(); - // Estimate step size: 1 / L where L is the Lipschitz constant of the - // gradient of ||Wx - y||^2, i.e. the spectral norm of W^T W. - // A safe upper bound is the Frobenius norm squared of W (sum of all - // squared entries), since ||W^T W|| <= ||W||_F^2. - let frobenius_sq: f64 = self - .weight_matrix - .iter() - .flat_map(|ws| ws.iter().map(|&(_, w)| w * w)) - .sum(); - let lipschitz = frobenius_sq.max(1e-10); - let step_size = 1.0 / lipschitz; + // Step size 1 / L, with L precomputed in `new` (||W||_F^2 upper bound). + let step_size = 1.0 / self.lipschitz; let mut residual = 0.0_f64; let mut iterations = 0; + let mut gradient = vec![0.0_f64; self.n_voxels]; for iter in 0..self.config.max_iterations { // Compute gradient: W^T (Wx - y) - let mut gradient = vec![0.0_f64; self.n_voxels]; + gradient.fill(0.0); residual = 0.0; for (link_idx, weights) in self.weight_matrix.iter().enumerate() {