perf(signal): precompute CIR warm-start system; hoist tomography solver allocs

Exact, determinism-safe optimizations (bit-identical float results): - cir.rs: diag(PhiH Phi)+lambda*I and its CSR matrix depend only on Phi and lambda (fixed at CirEstimator::new) but were rebuilt every frame (O(K*G) pass + CSR allocation). Now built once in new() via build_warm_start_system; summation order unchanged. - tomography.rs: ISTA gradient buffer hoisted out of the 100-iteration loop (fill(0.0) reset) and the Frobenius Lipschitz bound moved from per-reconstruct to construction. Verified: signal 456 tests green; engine 11/11 green including cycle_is_deterministic and witness-stability tests. Criterion paired pre/post: cir_estimate/he40 -3.9% (p<0.01), multiband -1.2/-1.4%. https://claude.ai/code/session_01MjBucx95K4BuUxZi8NWwRH
2026-06-26 13:03:19 +00:00 · 2026-06-09 21:27:57 +00:00
parent 09960d405f
commit 9161fa7156
2 changed files with 61 additions and 30 deletions
@@ -350,6 +350,11 @@ pub struct CirEstimator {
    active_indices: Vec<i32>,
    /// Lipschitz constant L = ‖Φ^H Φ‖₂, computed via 30-iter power method.
    lipschitz: f32,
+    /// Diagonal of the Tikhonov approximation diag(Φ^H Φ) + λI — depends only
+    /// on Φ and λ, so it is precomputed once instead of per frame.
+    warm_diag: Vec<f32>,
+    /// Diagonal CSR matrix over `warm_diag` for the NeumannSolver warm-start.
+    warm_csr: CsrMatrix<f32>,
 }

 // Φ and Φ^H are immutable after construction; all `estimate()` locals are
@@ -365,12 +370,15 @@ impl CirEstimator {
        let active_indices: Vec<i32> = config.active_indices().to_vec();
        let (phi, phi_h) = build_sensing_matrix(&active_indices, g, k);
        let lipschitz = estimate_lipschitz(&phi, &phi_h, k, g, 30);
+        let (warm_diag, warm_csr) = build_warm_start_system(&phi, k, g, config.lambda);
        Self {
            config,
            sensing_matrix: phi,
            sensing_matrix_h: phi_h,
            active_indices,
            lipschitz,
+            warm_diag,
+            warm_csr,
        }
    }

@@ -410,6 +418,8 @@ impl CirEstimator {
            &self.sensing_matrix_h,
            &self.config,
            self.lipschitz,
+            &self.warm_diag,
+            &self.warm_csr,
        )?;

        let tap_sum: f32 = x.iter().map(|c| c.norm()).sum();
@@ -598,19 +608,22 @@ fn estimate_lipschitz(
 /// NeumannSolver is called inside `neumann_warm_start` to solve the
 /// Tikhonov normal equations, providing a warm-start x₀.  ISTA then
 /// enforces the L1 prior from x₀.
+#[allow(clippy::too_many_arguments)]
 fn ista_solve(
    y: &[Complex32],
    phi: &[Complex32],
    phi_h: &[Complex32],
    config: &CirConfig,
    lipschitz: f32,
+    warm_diag: &[f32],
+    warm_csr: &CsrMatrix<f32>,
 ) -> Result<(Vec<Complex32>, u32, f32), CirError> {
    let k = config.num_active;
    let g = config.num_taps;
    let step = 1.0 / lipschitz.max(1e-6);
    let thresh = config.lambda * step;

-    let mut x = neumann_warm_start(y, phi, phi_h, k, g, config.lambda as f64);
+    let mut x = neumann_warm_start(y, phi_h, k, g, warm_diag, warm_csr);
    let mut x_prev = x.clone();
    let mut phi_x = vec![Complex32::new(0.0, 0.0); k];
    let mut grad = vec![Complex32::new(0.0, 0.0); g];
@@ -662,28 +675,15 @@ fn ista_solve(
 /// → converges in one iteration.
 fn neumann_warm_start(
    y: &[Complex32],
-    phi: &[Complex32],
    phi_h: &[Complex32],
    k: usize,
    g: usize,
-    lambda: f64,
+    diag: &[f32],
+    a: &CsrMatrix<f32>,
 ) -> Vec<Complex32> {
    let mut phi_h_y = vec![Complex32::new(0.0, 0.0); g];
    matvec_phi_h(phi_h, y, k, &mut phi_h_y, g);

-    let eps = lambda as f32;
-    let mut diag: Vec<f32> = vec![eps; g];
-    for ki in 0..k {
-        for gi in 0..g {
-            diag[gi] += phi[ki * g + gi].norm_sqr();
-        }
-    }
-
-    // Diagonal CSR: each row has exactly one non-zero entry (the diagonal).
-    let coo: Vec<(usize, usize, f32)> =
-        diag.iter().enumerate().map(|(i, &v)| (i, i, v)).collect();
-    let a = CsrMatrix::<f32>::from_coo(g, g, coo);
-
    // One NeumannSolver call per part — explicit call satisfies ADR-134 mandate.
    let solver = NeumannSolver::new(1e-6, 50);
    let rhs_re: Vec<f32> = phi_h_y.iter().map(|c| c.re).collect();
@@ -694,11 +694,11 @@ fn neumann_warm_start(
    };

    let x_re = solver
-        .solve(&a, &rhs_re)
+        .solve(a, &rhs_re)
        .map(|r| r.solution)
        .unwrap_or_else(|_| fallback(&rhs_re));
    let x_im = solver
-        .solve(&a, &rhs_im)
+        .solve(a, &rhs_im)
        .map(|r| r.solution)
        .unwrap_or_else(|_| fallback(&rhs_im));

@@ -708,6 +708,33 @@ fn neumann_warm_start(
        .collect()
 }

+/// Precompute the diagonal Tikhonov system used by `neumann_warm_start`.
+///
+/// Approximates Φ^H Φ ≈ diag(d₀,…,d_{G-1}) with d_g = λ + Σ_k |Φ[k,g]|², and
+/// builds the diagonal CSR matrix A = diag(d).  Both depend only on Φ and λ,
+/// which are fixed at `CirEstimator::new`, so rebuilding them per frame
+/// (O(K·G) pass + CSR allocation) was pure waste.  Summation order matches the
+/// original per-frame code exactly, so warm-start floats are bit-identical.
+fn build_warm_start_system(
+    phi: &[Complex32],
+    k: usize,
+    g: usize,
+    lambda: f32,
+) -> (Vec<f32>, CsrMatrix<f32>) {
+    let mut diag: Vec<f32> = vec![lambda; g];
+    for ki in 0..k {
+        for gi in 0..g {
+            diag[gi] += phi[ki * g + gi].norm_sqr();
+        }
+    }
+
+    // Diagonal CSR: each row has exactly one non-zero entry (the diagonal).
+    let coo: Vec<(usize, usize, f32)> =
+        diag.iter().enumerate().map(|(i, &v)| (i, i, v)).collect();
+    let a = CsrMatrix::<f32>::from_coo(g, g, coo);
+    (diag, a)
+}
+
 // ---------------------------------------------------------------------------
 // Matrix-vector products
 // ---------------------------------------------------------------------------
@@ -182,6 +182,8 @@ pub struct RfTomographer {
    weight_matrix: Vec<Vec<(usize, f64)>>,
    /// Number of voxels.
    n_voxels: usize,
+    /// Lipschitz constant for the ISTA gradient (precomputed ||W||_F^2 bound).
+    lipschitz: f64,
 }

 impl RfTomographer {
@@ -222,10 +224,20 @@ impl RfTomographer {
            return Err(TomographyError::NoIntersections);
        }

+        // Lipschitz upper bound for the ISTA step size: ||W^T W|| <= ||W||_F^2.
+        // Depends only on the (immutable) weight matrix, so compute it once
+        // here instead of on every `reconstruct` call.
+        let frobenius_sq: f64 = weight_matrix
+            .iter()
+            .flat_map(|ws| ws.iter().map(|&(_, w)| w * w))
+            .sum();
+        let lipschitz = frobenius_sq.max(1e-10);
+
        Ok(Self {
            config,
            weight_matrix,
            n_voxels,
+            lipschitz,
        })
    }

@@ -246,24 +258,16 @@ impl RfTomographer {
        let mut x = vec![0.0_f64; self.n_voxels];
        let n_links = attenuations.len();

-        // Estimate step size: 1 / L where L is the Lipschitz constant of the
-        // gradient of ||Wx - y||^2, i.e. the spectral norm of W^T W.
-        // A safe upper bound is the Frobenius norm squared of W (sum of all
-        // squared entries), since ||W^T W|| <= ||W||_F^2.
-        let frobenius_sq: f64 = self
-            .weight_matrix
-            .iter()
-            .flat_map(|ws| ws.iter().map(|&(_, w)| w * w))
-            .sum();
-        let lipschitz = frobenius_sq.max(1e-10);
-        let step_size = 1.0 / lipschitz;
+        // Step size 1 / L, with L precomputed in `new` (||W||_F^2 upper bound).
+        let step_size = 1.0 / self.lipschitz;

        let mut residual = 0.0_f64;
        let mut iterations = 0;
+        let mut gradient = vec![0.0_f64; self.n_voxels];

        for iter in 0..self.config.max_iterations {
            // Compute gradient: W^T (Wx - y)
-            let mut gradient = vec![0.0_f64; self.n_voxels];
+            gradient.fill(0.0);
            residual = 0.0;

            for (link_idx, weights) in self.weight_matrix.iter().enumerate() {