hardening(adr-117): benchmarks + security/robustness test suite

Benchmarks (`python/bench/`, pytest-benchmark — opt-in via --benchmark-only): | Hot path | Mean | Ops/sec | % of 100 Hz budget | |---|---|---|---| | BfldFrame HT20 1×1×52 | 800 ns | 1.25 Mops | 0.008% | | BfldFrame HE20 2×1×242 | 1.3 μs | 750 kops | 0.013% | | BfldFrame HE80 2×1×996 | 4.2 μs | 236 kops | 0.042% | | BfldFrame HE160 2×2×1992 | 14 μs | 71 kops | 0.14% | | BfldFrame.feedback_matrix() | 2.8 μs | 352 kops | — | | WS edge_vitals decode | 7.4 μs | 134 kops | 0.074% | | WS pose_data decode (3 persons) | 23 μs | 42 kops | 0.24% | | BreathingExtractor.extract() 56sc | 28 μs | 35 kops | 0.28% | | BreathingExtractor.extract() 114sc | 44 μs | 23 kops | 0.44% | | BreathingExtractor.extract() 242sc | 79 μs | 13 kops | 0.79% | | HeartRateExtractor.extract() 56sc | 105 μs | 9.5 kops | 1.05% | All hot paths well under the 100 Hz ESP32 frame budget (10 ms). Worst case (HeartRateExtractor) uses 1% of the budget — no optimization needed. Scaling on n_subcarriers is sub-quadratic (56→242 = 4.3× input, 2.8× time) — catches future O(n²) regressions. Security & robustness tests (`tests/test_security.py`, +27 tests): - WS decoder: rejects non-object roots cleanly, survives 1 MB string values, handles non-ASCII node IDs, survives deeply-nested JSON (Python's json.loads built-in guard not bypassed) - MQTT topic matcher: 9 edge-case parametrize entries including $SYS topics, null-byte injection, mid-pattern `#` boundary, empty-string boundary - MQTT credential confidentiality: password never appears in repr()/str(), never stored in plain client-instance attribute - HA discovery: rejects null-byte-laced topics, rejects extra slashes in node_id, rejects non-dict payload body (list, scalar, invalid UTF-8 bytes) without crashing - Semantic primitive listener: rejects topic-injection attempts (prefix-injected paths, wrong case on final segment), survives invalid UTF-8 payloads - Public surface integrity: every name in wifi_densepose.__all__ AND wifi_densepose.client.__all__ resolves — catches accidental re-export breakage between phases - Multi-handler MQTT exception isolation: a crashing handler in the middle of the registered list doesn't stop later handlers from firing Test count: 156 → 183 (+27). All passing. Bench results steady-state confirm no Rust-binding-layer optimization is needed before the v2.0.0 publish. Refs: docs/adr/ADR-117-pip-wifi-densepose-modernization.md Refs: #785 Co-Authored-By: claude-flow <ruv@ruv.net>
2026-06-20 12:03:19 +00:00 · 2026-05-24 11:44:54 -04:00
parent f9d99c50d9
commit 78916d8455
3 changed files with 456 additions and 0 deletions
@@ -0,0 +1,111 @@
+"""ADR-117 hardening sweep — Benchmarks for the P3.5 numpy bridge
+and the P4 WS decoder.
+
+The numpy bridge is the most-likely candidate for a hidden allocation
+hot-spot: every `BfldFrame.from_compressed_feedback()` call copies the
+ndarray into a Vec<Complex64>. Confirm the per-frame cost is
+acceptable for the BFR cadence the AP emits (typically a few
+hundred per second, not thousands).
+
+The WS decoder runs once per frame the sensing-server emits. At
+worst-case ~100 Hz × number-of-subscribers, the decoder budget is
+tight; make sure dataclass construction doesn't dominate.
+"""
+
+from __future__ import annotations
+
+import json
+
+import numpy as np
+import pytest
+
+from wifi_densepose import BfldFrame, BfldKind
+
+
+@pytest.mark.parametrize("kind,shape", [
+    (BfldKind.UncompressedHT20, (1, 1, 52)),
+    (BfldKind.CompressedHE20, (2, 1, 242)),
+    (BfldKind.CompressedHE80, (2, 1, 996)),
+    (BfldKind.CompressedHE160, (2, 2, 1992)),
+])
+def test_bfld_from_compressed_feedback(benchmark, kind: BfldKind, shape: tuple[int, int, int]) -> None:
+    rng = np.random.default_rng(seed=42)
+    fb = (rng.standard_normal(shape) + 1j * rng.standard_normal(shape)).astype(np.complex128)
+
+    def _build():
+        return BfldFrame.from_compressed_feedback(
+            timestamp_ms=0,
+            sounding_index=0,
+            sta_mac="aa:bb:cc:dd:ee:ff",
+            kind=kind,
+            feedback_matrix=fb,
+        )
+
+    benchmark(_build)
+
+
+def test_bfld_feedback_matrix_roundtrip(benchmark) -> None:
+    """How expensive is the numpy-out round-trip? Used by clients
+    that want to do further analysis in numpy after constructing
+    the frame."""
+    rng = np.random.default_rng(seed=42)
+    fb = (rng.standard_normal((2, 1, 996)) + 1j * rng.standard_normal((2, 1, 996))).astype(np.complex128)
+    frame = BfldFrame.from_compressed_feedback(
+        timestamp_ms=0,
+        sounding_index=0,
+        sta_mac="aa:bb:cc:dd:ee:ff",
+        kind=BfldKind.CompressedHE80,
+        feedback_matrix=fb,
+    )
+    benchmark(frame.feedback_matrix)
+
+
+# ─── WS decoder ──────────────────────────────────────────────────────
+
+
+_EDGE_VITALS_FRAME = json.dumps({
+    "type": "edge_vitals",
+    "node_id": "bench-node",
+    "presence": True,
+    "fall_detected": False,
+    "motion": 0.34,
+    "breathing_rate_bpm": 14.2,
+    "heartrate_bpm": 72.5,
+    "n_persons": 1,
+    "motion_energy": 0.04,
+    "presence_score": 0.91,
+    "rssi": -42.0,
+})
+
+
+def test_ws_decoder_edge_vitals(benchmark) -> None:
+    from wifi_densepose.client.ws import _decode
+
+    def _decode_one():
+        return _decode(_EDGE_VITALS_FRAME)
+
+    benchmark(_decode_one)
+
+
+_POSE_FRAME = json.dumps({
+    "type": "pose_data",
+    "node_id": "bench-node",
+    "timestamp": 1700000000.5,
+    "persons": [
+        {"id": i, "keypoints": [[0.5, 0.5, 0.9] for _ in range(17)]}
+        for i in range(3)
+    ],
+    "confidence": 0.85,
+})
+
+
+def test_ws_decoder_pose_data(benchmark) -> None:
+    """The pose_data frame is typically the largest one the server
+    emits — bench it separately so a future blob-size regression
+    in the persons array is visible."""
+    from wifi_densepose.client.ws import _decode
+
+    def _decode_one():
+        return _decode(_POSE_FRAME)
+
+    benchmark(_decode_one)
@@ -0,0 +1,85 @@
+"""ADR-117 hardening sweep — Benchmarks for the P3 vitals hot paths.
+
+Targets the ESP32 production rate: 100 Hz × 56 subcarriers, which is
+what `BreathingExtractor.esp32_default()` is tuned for. The bench
+asserts the *per-extract* cost is comfortably below 10 ms — at 100 Hz
+that's the entire frame budget, so anything above 10 ms means the
+Python binding would be the bottleneck instead of the radio.
+
+Run with:
+    pytest python/bench/ --benchmark-only
+
+The benchmarks are skipped by default (`addopts` in pyproject.toml
+doesn't include them) — they live in a sibling `bench/` directory
+so the main test run stays fast.
+"""
+
+from __future__ import annotations
+
+import math
+from random import Random
+
+import pytest
+
+from wifi_densepose import BreathingExtractor, HeartRateExtractor
+
+
+def _synth_frame(n_subcarriers: int, sample_rate: float, t: float, freq_hz: float, rng: Random) -> tuple[list[float], list[float]]:
+    """Build one ESP32-shape frame at time `t`: sine at `freq_hz` plus
+    tiny per-subcarrier noise."""
+    base = math.sin(2.0 * math.pi * freq_hz * t)
+    residuals = [base + rng.gauss(0.0, 0.01) for _ in range(n_subcarriers)]
+    weights = [1.0] * n_subcarriers
+    return residuals, weights
+
+
+def test_breathing_extract_per_frame_cost(benchmark) -> None:
+    """One BreathingExtractor.extract() at ESP32 defaults should
+    finish well under 10 ms — that's the 100 Hz frame budget."""
+    br = BreathingExtractor.esp32_default()
+    rng = Random(42)
+    # Pre-fill ~25 seconds of history so the bench measures the
+    # steady-state cost, not the cold-start cost.
+    for i in range(2500):
+        residuals, weights = _synth_frame(56, 100.0, i / 100.0, 0.25, rng)
+        br.extract(residuals=residuals, weights=weights)
+
+    def _one_frame():
+        residuals, weights = _synth_frame(56, 100.0, 30.0, 0.25, rng)
+        return br.extract(residuals=residuals, weights=weights)
+
+    benchmark(_one_frame)
+
+
+def test_heart_rate_extract_per_frame_cost(benchmark) -> None:
+    """One HeartRateExtractor.extract() at ESP32 defaults — same 10 ms
+    target."""
+    hr = HeartRateExtractor.esp32_default()
+    rng = Random(43)
+    for i in range(1500):
+        residuals, weights = _synth_frame(56, 100.0, i / 100.0, 1.2, rng)
+        hr.extract(residuals=residuals, weights=weights)
+
+    def _one_frame():
+        residuals, weights = _synth_frame(56, 100.0, 16.0, 1.2, rng)
+        return hr.extract(residuals=residuals, weights=weights)
+
+    benchmark(_one_frame)
+
+
+@pytest.mark.parametrize("n_subcarriers", [56, 114, 242])
+def test_breathing_extract_scaling(benchmark, n_subcarriers: int) -> None:
+    """Sanity check: cost should scale roughly linearly with the
+    subcarrier count. Catches accidental O(n^2) regressions."""
+    sample_rate = 100.0
+    br = BreathingExtractor(n_subcarriers, sample_rate, 30.0)
+    rng = Random(n_subcarriers)
+    for i in range(2500):
+        residuals, weights = _synth_frame(n_subcarriers, sample_rate, i / sample_rate, 0.25, rng)
+        br.extract(residuals=residuals, weights=weights)
+
+    def _one_frame():
+        residuals, weights = _synth_frame(n_subcarriers, sample_rate, 30.0, 0.25, rng)
+        return br.extract(residuals=residuals, weights=weights)
+
+    benchmark(_one_frame)