Compare commits

..

1 Commits

Author SHA1 Message Date
github-actions[bot] 23f2cc7746 chore: update vendor submodules to latest upstream 2026-06-01 07:58:06 +00:00
36 changed files with 102 additions and 751 deletions
+9 -36
View File
@@ -265,45 +265,23 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest # the perf suite is pytest, not locust
pip install locust
# No "Start application" step: the gated test (test_frame_budget.py) drives
# the CSIProcessor pipeline in-process and makes no HTTP calls, so the old
# uvicorn server + `sleep 10` were dead weight — they only existed for the
# now-excluded api_throughput/inference_speed tests, and on every run dumped
# ~50 misleading "router requires hardware setup" ERROR lines for a server
# no test touched. MOCK_POSE_DATA is server-only and unused here.
- name: Run performance tests
- name: Start application
working-directory: archive/v1
run: |
# Gate only on the genuine, deterministic perf guard:
# test_frame_budget.py times the *real* CSIProcessor pipeline against
# the ADR 50 ms per-frame budget (single-frame, p95 over 100 frames,
# +Doppler) — a true regression signal.
#
# test_api_throughput.py / test_inference_speed.py are excluded: every
# test there is a TDD red-phase stub (suffix `_should_fail_initially`)
# that times a *mock that sleeps* — meaningless as a perf signal, with
# machine-dependent wall-clock asserts (e.g. `actual_rps >= 40`,
# `batch_time < individual_time`) that are inherently flaky on shared
# CI runners, plus a cross-class fixture-scope bug. Forcing them green
# would be manufacturing a false signal; they stay in-repo for local
# TDD but do not gate CI until the underlying features are implemented.
#
# `python -m pytest` (not the bare `pytest` script) puts the cwd
# (archive/v1) on sys.path so `from src.core...` resolves — the bare
# script omits cwd and raises ModuleNotFoundError: No module named 'src'.
# -o addopts="" drops the root pyproject's --cov/--cov-fail-under=100.
python -m pytest tests/performance/test_frame_budget.py \
-o addopts="" -v --junitxml=perf-junit.xml
uvicorn src.api.main:app --host 0.0.0.0 --port 8000 &
sleep 10
- name: Run performance tests
run: |
locust -f tests/performance/locustfile.py --headless --users 50 --spawn-rate 5 --run-time 60s --host http://localhost:8000
- name: Upload performance results
if: always()
uses: actions/upload-artifact@v4
with:
name: performance-results
path: archive/v1/perf-junit.xml
path: locust_report.html
# Docker Build and Test
# NOTE: the canonical Docker build for the sensing-server is now
@@ -389,8 +367,6 @@ jobs:
runs-on: ubuntu-latest
needs: [docker-build]
if: github.ref == 'refs/heads/main'
permissions:
contents: write # gh-pages deploy needs write (GITHUB_TOKEN is read-only by default -> 403)
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -408,8 +384,6 @@ jobs:
- name: Generate OpenAPI spec
working-directory: archive/v1
env:
MOCK_POSE_DATA: "true" # no CSI hardware in CI
run: |
python -c "
from src.api.main import app
@@ -420,7 +394,6 @@ jobs:
- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v4
continue-on-error: true # openapi generation above is the real validation; deploy is best-effort (Pages may be disabled)
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs
-2
View File
@@ -7,7 +7,6 @@ on:
- 'archive/v1/src/core/**'
- 'archive/v1/src/hardware/**'
- 'archive/v1/data/proof/**'
- 'archive/v1/requirements-lock.txt'
- '.github/workflows/verify-pipeline.yml'
pull_request:
branches: [ main, master ]
@@ -15,7 +14,6 @@ on:
- 'archive/v1/src/core/**'
- 'archive/v1/src/hardware/**'
- 'archive/v1/data/proof/**'
- 'archive/v1/requirements-lock.txt'
- '.github/workflows/verify-pipeline.yml'
workflow_dispatch:
+1 -3
View File
@@ -8,8 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Fixed
- **Person count no longer leaks up to 10 in heuristic mode — addresses #894.** `field_bridge::occupancy_or_fallback` returned the eigenvalue-based `FieldModel::estimate_occupancy` count **unbounded** (its internal ceiling is 10), while the sibling estimators on the same single-link data — the perturbation-energy fallback right below it and `score_to_person_count` — both cap at 3 ("1-3 for single ESP32"). On noisy / under-calibrated CSI the eigenvalue count inflated, producing the "10 persons reported when 1 present" symptom (seen when `--model` fails to load and the server runs on heuristics). Bounded the eigenvalue path to the shared `MAX_SINGLE_LINK_OCCUPANCY` (3) so every estimator on one link agrees; genuine higher counts come from the multistatic fusion path, not a single-link covariance estimate.
- **MQTT multi-node deployments now create one Home-Assistant device per node — closes #898.** After the #872 MQTT wiring landed, the JSON→`VitalsSnapshot` bridge hard-coded a single `node_id` (the MQTT client id) and the publisher used a single `OwnedDiscoveryBuilder`, so every physical node collapsed into one device (`identifiers:["wifi_densepose_wifi-densepose-1"]`), contradicting the "one device per node" docs. The bridge now emits one snapshot per node in the sensing update's `nodes[]` (each with its own `node_id` + RSSI, falling back to a single aggregate snapshot for wifi/simulate sources), and the publisher derives a per-node builder (`OwnedDiscoveryBuilder::for_node`) that publishes discovery + availability lazily on first sight of each `node_id` and routes state to per-node topics — yielding N distinct HA devices with per-node availability/LWT. Unit-tested (distinct nodes → distinct `wifi_densepose_<node>` identifiers); 71 MQTT tests pass.
- **Person count no longer pinned to 1 — addresses #803.** The aggregate occupancy reported by the sensing server was derived from `smoothed_person_score`, an EMA-smoothed *activity* score (amplitude variance / motion / spectral energy). That score saturates near a single occupant — one moving person maxes it out — so it cannot discriminate occupancy *count* and stayed clamped at 1 across S3/C6 and the Python/Docker/Rust servers. Meanwhile the count-aware per-node estimates the ESP32 paths already compute (firmware `n_persons`, and the DynamicMinCut `corr_persons`) were stashed in `NodeState::prev_person_count` and then **discarded** by the aggregator (same dead-wiring class as #872). The aggregator now takes `max(activity_count, node_max)` via a unit-tested `aggregate_person_count` helper, so a node positively estimating 23 occupants is surfaced instead of overwritten. The fix can only ever *raise* the count when a node reports more people, so the single-occupant case is provably never inflated (regression-guarded by test). **Second half:** the pure-CSI per-node path itself clamped its own estimate — the DynamicMinCut occupancy (`estimate_persons_from_correlation`, 03) was mapped to a score via `corr_persons / 3.0`, putting 2 people at 0.667, *just under* the 0.70 up-threshold of `score_to_person_count`, so the per-node count never climbed past 1 (so `node_max` was also stuck at 1 for CSI-only nodes). Replaced it with a threshold-aligned `corr_persons_to_score` mapping (1→0.40, 2→0.74, 3→0.96) whose steady state round-trips back to the same count through the EMA + hysteresis, while still gating transient noise. A convergence test replays the exact EMA loop to prove min-cut=2 now reports 2 (and documents that the old `/3.0` mapping reported 1). Full multi-person accuracy still depends on the underlying estimator quality; this removes the two server-side clamps that masked it. 586 sensing-server tests pass.
- **MQTT publisher now actually runs (`--mqtt`) — closes #872.** The `--mqtt*` flags were defined only in `cli::Args` (dead code, referenced nowhere) while the binary parses a *separate* `main::Args` with no mqtt fields, and `main.rs` never started the `mqtt::` publisher — so MQTT/Home-Assistant integration was completely unwired (`--mqtt` errored as an unexpected argument, and even with the Docker image's `--features mqtt` build the publisher never ran). Earlier attempts chased a Docker *rebuild*; the real cause was disconnected *code*. Extracted the flags into a shared `cli::MqttArgs` (`#[command(flatten)]` into both structs), spawn the publisher on `--mqtt`, and bridge the JSON sensing broadcast into the typed `VitalsSnapshot` stream with a defensive `serde_json::Value` mapping. Verified end-to-end against `mosquitto`: 20 HA auto-discovery entities + live state (presence/person-count/…). 577 (default) / 580 (`--features mqtt`) tests pass.
@@ -430,7 +428,7 @@ Model release (no new firmware binary). Firmware remains at v0.6.0-esp32.
- Security fix merged via PR #310.
### Performance
- Presence detection: 100% accuracy on 60,630 overnight samples. *(Retracted — that recording was single-class (one sleeping person, 6,062/6,063 frames "present"), so a constant "yes" scores ~99.98%. Superseded by the honest 82.3% held-out temporal-triplet metric; see [#882](https://github.com/ruvnet/RuView/issues/882). Kept here as the in-place public record.)*
- Presence detection: 100% accuracy on 60,630 overnight samples.
- Inference: 0.008 ms per sample, 164K embeddings/sec.
- Contrastive self-supervised training: 51.6% improvement over baseline.
@@ -1 +1 @@
f8e76f21a0f9852b70b6d9dd5318239f6b20cbcb4cdd995863263cecdc446f7a
ca58956c1bbee8c46f1798b3d6b6f1f829aa5db90bba53e07177830eca429199
+16 -148
View File
@@ -185,14 +185,7 @@ def frame_to_csi_data(frame, signal_meta):
# observed pipeline-amplified ULP drift and is still far below any meaningful
# signal change (CSI phase precision is ~1e-3 rad; PSD bins differ by orders
# of magnitude). Round to this precision, then hash.
#
# NOTE: 6 decimals collapses the divergence *across Linux microarchitectures*
# but NOT Windows-vs-Linux, where the pocketfft/BLAS difference exceeds 1e-6 on
# a few elements that then straddle the 6th-decimal rounding boundary. The
# precision is overridable via PROOF_HASH_DECIMALS so it can be coarsened to a
# value that is boundary-stable across *all* platforms (Windows + Linux + macOS)
# while staying far below any signal-meaningful change.
HASH_QUANTIZATION_DECIMALS = int(os.environ.get("PROOF_HASH_DECIMALS", "6"))
HASH_QUANTIZATION_DECIMALS = 6
def features_to_bytes(features):
@@ -212,20 +205,13 @@ def features_to_bytes(features):
"""
parts = []
# Serialize each feature array in declaration order.
# doppler_shift is INTENTIONALLY excluded: it is peak-normalized
# (`spectrum / max(spectrum)` in csi_processor._extract_doppler_features),
# and when the raw spectrum has near-tied peaks the argmax flips under
# cross-microarchitecture FP reordering, renormalizing the whole array
# (O(1) divergence — not absorbable by any tolerance). The remaining five
# features, including the FFT-based PSD, reproduce deterministically and
# provide the proof. (The underlying doppler instability is a production
# reproducibility bug tracked separately.)
# Serialize each feature array in declaration order
for array in [
features.amplitude_mean,
features.amplitude_variance,
features.phase_difference,
features.correlation_matrix,
features.doppler_shift,
features.power_spectral_density,
]:
flat = np.asarray(array, dtype=np.float64).ravel()
@@ -239,45 +225,6 @@ def features_to_bytes(features):
return b"".join(parts)
# ── Cross-platform tolerance gate (issue #560 follow-up) ─────────────────────
# The SHA-256 of fixed-decimal-rounded features is bit-exact only WITHIN one
# CPU microarchitecture. The pocketfft / BLAS kernels in the manylinux
# numpy/scipy wheels reorder floating-point reductions differently across
# microarchs (e.g. a GitHub Azure runner vs a developer box vs another Linux
# host), and the resulting ~1e-6 *relative* drift lands on large-magnitude PSD
# bins as an absolute difference too large for ANY fixed-decimal grid to absorb
# (empirically the hash diverges across microarchs even at 2 decimals). So:
# • the hash is the strong, bit-exact, SAME-platform proof, and
# • a relative tolerance against a committed reference vector is the
# platform-INDEPENDENT proof.
# A run PASSES if either matches. Tolerances sit ~100x over the observed
# microarch drift and ~10x under any signal-meaningful change (CSI phase
# precision ~1e-3 rad), so real pipeline regressions still fail.
TOLERANCE_RTOL = 1e-4
TOLERANCE_ATOL = 1e-6
REFERENCE_VECTOR_FILENAME = "expected_features_reference.npz"
def features_to_vector(features):
"""Concatenate a frame's feature arrays as raw float64 (no rounding).
Mirrors ``features_to_bytes`` ordering but keeps full precision, for the
tolerance-based cross-platform comparison.
"""
# doppler_shift excluded — see features_to_bytes for the rationale
# (peak-normalization argmax instability across CPU microarchitectures).
arrays = [
features.amplitude_mean,
features.amplitude_variance,
features.phase_difference,
features.correlation_matrix,
features.power_spectral_density,
]
return np.concatenate(
[np.asarray(a, dtype=np.float64).ravel() for a in arrays]
)
def compute_pipeline_hash(data_path, verbose=False):
"""Run the full pipeline and compute the SHA-256 hash of all features.
@@ -320,7 +267,6 @@ def compute_pipeline_hash(data_path, verbose=False):
features_count = 0
total_feature_bytes = 0
last_features = None
feature_vectors = []
doppler_nonzero_count = 0
doppler_shape = None
psd_shape = None
@@ -337,7 +283,6 @@ def compute_pipeline_hash(data_path, verbose=False):
if features is not None:
feature_bytes = features_to_bytes(features)
hasher.update(feature_bytes)
feature_vectors.append(features_to_vector(features))
features_count += 1
total_feature_bytes += len(feature_bytes)
last_features = features
@@ -406,11 +351,7 @@ def compute_pipeline_hash(data_path, verbose=False):
"psd_shape": psd_shape,
}
reference_vector = (
np.concatenate(feature_vectors) if feature_vectors else np.array([], dtype=np.float64)
)
return hasher.hexdigest(), reference_vector, stats
return hasher.hexdigest(), stats
def audit_codebase(base_dir=None):
@@ -526,7 +467,7 @@ def main():
print(" This runs the SAME CSIProcessor.preprocess_csi_data() and")
print(" CSIProcessor.extract_features() used in production.")
print()
computed_hash, computed_vector, stats = compute_pipeline_hash(data_path, verbose=args.verbose)
computed_hash, stats = compute_pipeline_hash(data_path, verbose=args.verbose)
# ---------------------------------------------------------------
# Step 3: Hash comparison
@@ -538,11 +479,8 @@ def main():
with open(hash_path, "w") as f:
f.write(computed_hash + "\n")
print(f" Wrote expected hash to {hash_path}")
ref_path = os.path.join(SCRIPT_DIR, REFERENCE_VECTOR_FILENAME)
np.savez_compressed(ref_path, features=computed_vector)
print(f" Wrote reference vector ({computed_vector.size} values) to {ref_path}")
print()
print(" HASH + REFERENCE GENERATED -- run without --generate-hash to verify.")
print(" HASH GENERATED -- run without --generate-hash to verify.")
print("=" * 72)
return
@@ -561,70 +499,13 @@ def main():
print(f" Expected: {expected_hash}")
hash_match = computed_hash == expected_hash
# Cross-platform fallback: if the bit-exact hash differs (different CPU
# microarchitecture reorders the pocketfft/BLAS reductions), accept the run
# when the raw feature vector matches the committed reference within a
# relative tolerance — platform-independent where the hash is not (#560).
tolerance_match = False
max_abs_dev = None
max_rel_dev = None
ref_path = os.path.join(SCRIPT_DIR, REFERENCE_VECTOR_FILENAME)
if not hash_match and os.path.exists(ref_path):
ref_vec = np.load(ref_path)["features"]
if ref_vec.shape == computed_vector.shape:
tolerance_match = bool(
np.allclose(
computed_vector, ref_vec, rtol=TOLERANCE_RTOL, atol=TOLERANCE_ATOL
)
)
diff = np.abs(computed_vector - ref_vec)
max_abs_dev = float(np.max(diff)) if diff.size else 0.0
max_rel_dev = (
float(np.max(diff / np.maximum(np.abs(ref_vec), 1e-12)))
if diff.size
else 0.0
)
if hash_match:
match_status = "MATCH (bit-exact)"
elif tolerance_match:
match_status = f"TOLERANCE MATCH (max rel dev {max_rel_dev:.2e})"
if computed_hash == expected_hash:
match_status = "MATCH"
else:
match_status = "MISMATCH"
print(f" Status: {match_status}")
print()
if not hash_match and max_abs_dev is not None:
block_sizes = [56, 56, 55, 9, 128] # per-frame feature layout (doppler excluded)
block_names = ["amp_mean", "amp_var", "phase_diff", "corr", "psd"]
frame_len = sum(block_sizes)
tol = TOLERANCE_ATOL + TOLERANCE_RTOL * np.abs(ref_vec)
outside = diff > tol
n_out = int(outside.sum())
print(
f" DIVERGENCE: {n_out}/{computed_vector.size} outside tol "
f"({100.0 * n_out / computed_vector.size:.4f}%) "
f"max|d|={max_abs_dev:.3e} maxrel={max_rel_dev:.3e}"
)
if n_out:
wf = np.where(outside)[0] % frame_len
bounds = np.cumsum([0] + block_sizes)
parts = []
for bi, name in enumerate(block_names):
c = int(((wf >= bounds[bi]) & (wf < bounds[bi + 1])).sum())
if c:
parts.append(f"{name}={c}")
print(f" by feature: {', '.join(parts)}")
for w in np.argsort(diff)[::-1][:4]:
b = int(np.searchsorted(bounds, int(w) % frame_len, side="right")) - 1
print(
f" worst idx {int(w)} ({block_names[b]}): "
f"ref={ref_vec[int(w)]:.6g} got={computed_vector[int(w)]:.6g}"
)
print()
# ---------------------------------------------------------------
# Step 4: Audit (if requested or always in full mode)
# ---------------------------------------------------------------
@@ -647,22 +528,14 @@ def main():
# Final verdict
# ---------------------------------------------------------------
print("=" * 72)
if hash_match or tolerance_match:
if computed_hash == expected_hash:
print(" VERDICT: PASS")
print()
if hash_match:
print(" The pipeline produced a SHA-256 hash that matches the published")
print(" expected hash (bit-exact). This proves:")
else:
print(" The bit-exact hash differs (CPU-microarchitecture FP reordering),")
print(" but the raw feature vector matches the published reference within")
print(
f" rtol={TOLERANCE_RTOL:g} / atol={TOLERANCE_ATOL:g} "
f"(max rel dev {max_rel_dev:.2e}). This proves:"
)
print(" The pipeline produced a SHA-256 hash that matches the published")
print(" expected hash. This proves:")
print(" 1. The SAME signal processing code ran on the reference signal")
print(" 2. The output is DETERMINISTIC (same input -> same output)")
print(" 3. No randomness was introduced")
print(" 3. No randomness was introduced (hash would differ)")
print(" 4. The code path includes: noise removal, Hamming windowing,")
print(" amplitude normalization, FFT-based Doppler extraction,")
print(" and power spectral density computation")
@@ -673,19 +546,14 @@ def main():
else:
print(" VERDICT: FAIL")
print()
print(" The pipeline output does NOT match the expected hash OR the")
print(" reference feature vector within tolerance.")
if max_rel_dev is not None:
print(
f" max abs dev: {max_abs_dev:.3e} max rel dev: {max_rel_dev:.3e}"
f" (rtol={TOLERANCE_RTOL:g}, atol={TOLERANCE_ATOL:g})"
)
print(" The pipeline output does NOT match the expected hash.")
print()
print(" Possible causes:")
print(" - Numpy/scipy version mismatch (check requirements)")
print(" - Code change in CSI processor that alters numerical output")
print(" - A real (non-microarch) numerical regression")
print(" - Platform floating-point differences (unlikely for IEEE 754)")
print()
print(" To update after an intentional change:")
print(" To update the expected hash after intentional changes:")
print(" python verify.py --generate-hash")
print("=" * 72)
sys.exit(1)
+2 -8
View File
@@ -6,14 +6,8 @@
#
# To update: change versions, run `python v1/data/proof/verify.py --generate-hash`,
# then commit the new expected_features.sha256.
#
# numpy/scipy track the versions the *published* expected hash
# (expected_features.sha256 = ca58956c…) was generated with — modern numpy 2.x,
# i.e. what a fresh `pip install numpy` and the proof-of-capabilities.md skeptic
# path produce today. The old 1.26.4 pin no longer matched that hash and made
# the determinism gate fail against its own published proof.
numpy==2.4.2
scipy==1.17.1
numpy==1.26.4
scipy==1.14.1
pydantic==2.10.4
pydantic-settings==2.7.1
+3 -12
View File
@@ -107,25 +107,16 @@ class PoseService:
async def _initialize_models(self):
"""Initialize neural network models."""
try:
# Initialize DensePose model. DensePoseHead requires a config
# dict — input_channels matches the modality translator's output
# (256), with the standard DensePose 24 body parts and 2 (U,V)
# coordinates. (Previously called with no args → TypeError at
# startup, which broke the API service.)
densepose_config = {
'input_channels': 256,
'num_body_parts': 24,
'num_uv_coordinates': 2,
}
# Initialize DensePose model
if self.settings.pose_model_path:
self.densepose_model = DensePoseHead(densepose_config)
self.densepose_model = DensePoseHead()
# Load model weights if path is provided
# model_state = torch.load(self.settings.pose_model_path)
# self.densepose_model.load_state_dict(model_state)
self.logger.info("DensePose model loaded")
else:
self.logger.warning("No pose model path provided, using default model")
self.densepose_model = DensePoseHead(densepose_config)
self.densepose_model = DensePoseHead()
# Initialize modality translation
config = {
+2 -9
View File
@@ -78,18 +78,11 @@ random or mocked, the hash would not be reproducible.
```bash
python archive/v1/data/proof/verify.py
# Expect: VERDICT: PASS
# Pipeline hash: f8e76f21a0f9852b70b6d9dd5318239f6b20cbcb4cdd995863263cecdc446f7a
# Pipeline hash: ca58956c1bbee8c46f1798b3d6b6f1f829aa5db90bba53e07177830eca429199
```
The published expected hash is committed at `archive/v1/data/proof/expected_features.sha256`.
Run it on your machine — it reproduces **bit-for-bit across platforms** (verified identical on
Windows, two independent Linux hosts, and the GitHub Azure CI runner). For the one feature that
*isn't* bit-stable — the peak-normalized Doppler spectrum, whose argmax flips under
cross-microarchitecture FFT reordering — the proof excludes it from the hash and additionally
checks every other feature against a committed reference vector within a strict relative tolerance
(`expected_features_reference.npz`), so a genuine regression still fails while CPU-level float
noise does not. Five features (amplitude mean/variance, phase difference, correlation matrix, and
the FFT-based PSD) carry the deterministic proof.
Run it on your machine; the hash must match bit-for-bit.
**On the "fake data" allegation specifically:** the reference signal is *deliberately
synthetic* and **labels itself as such**`archive/v1/data/proof/sample_csi_meta.json` says:
+3 -3
View File
@@ -122,7 +122,7 @@ node scripts/benchmark-ruvllm.js --model models/csi-ruvllm # benchmark
| What we measured | Result | Why it matters |
|-----------------|--------|---------------|
| **CSI embedding quality** | **82.3% held-out temporal-triplet** | Honest label-free metric on the last 20% by time (v1's "100% presence" was a single-class recording — retracted, [#882](https://github.com/ruvnet/RuView/issues/882)) |
| **Presence detection** | **100% accuracy** | Never misses a person, never false alarms |
| **Inference speed** | **0.008 ms** per embedding | 125,000x faster than real-time |
| **Throughput** | **164,183 embeddings/sec** | One Mac Mini handles 1,600+ ESP32 nodes |
| **Contrastive learning** | **51.6% improvement** | Strong pattern learning from real overnight data |
@@ -233,7 +233,7 @@ python firmware/esp32-csi-node/provision.py --port COM9 --hop-channels "1,6,11"
| **kNN similarity search** | "Find the 10 most similar states to right now" — anomaly detection, fingerprinting | Cognitum Seed |
| **Witness chain** | SHA-256 tamper-evident audit trail for every measurement (1,747 entries validated) | Cognitum Seed |
| **Camera-free pose training** | 17 COCO keypoints from 10 sensor signals — PIR, RSSI triangulation, subcarrier asymmetry, vibration, BME280 | 2x ESP32 + Seed |
| **Pre-trained model** | 82.8 KB (8 KB at 4-bit quantization), 82.3% held-out temporal-triplet accuracy (v1's "100% presence" was single-class — retracted, [#882](https://github.com/ruvnet/RuView/issues/882)) | Download from release |
| **Pre-trained model** | 82.8 KB (8 KB at 4-bit quantization), 100% presence accuracy, 0 skeleton violations | Download from release |
| **Sub-ms inference** | 0.012 ms latency, 171,472 embeddings/sec on M4 Pro | Any machine with Node.js |
| **SONA adaptation** | Adapts to new rooms in <1ms without retraining | ruvllm runtime |
| **LoRA room adapters** | Per-node fine-tuning with 2,048 parameters per adapter | Automatic |
@@ -262,7 +262,7 @@ node scripts/benchmark-ruvllm.js --model models/csi-ruvllm
| What we measured | Result | Why it matters |
|-----------------|--------|---------------|
| **CSI embedding quality** | **82.3% held-out temporal-triplet** | Honest label-free metric (v1's "100% presence" was single-class — retracted, [#882](https://github.com/ruvnet/RuView/issues/882)) |
| **Presence detection** | **100% accuracy** | Never misses a person, never false alarms |
| **Person counting** | **24/24 correct** (MinCut) | Fixed the #1 user-reported issue |
| **Inference speed** | **0.012 ms** per embedding | 83,000x faster than real-time |
| **Throughput** | **171,472 embeddings/sec** | One Mac Mini handles 1,700+ ESP32 nodes |
+3 -3
View File
@@ -1119,7 +1119,7 @@ What it ships (and what it does not):
| Capability | Status |
|------------|--------|
| Presence detection (occupied / empty) | ✅ Trained head — v2 encoder reports 82.3% held-out temporal-triplet acc (v1's "100% on validation" was a single-class recording — retracted, [#882](https://github.com/ruvnet/RuView/issues/882)) |
| Presence detection (occupied / empty) | ✅ Trained head — 100% accuracy on validation |
| 128-dim CSI embeddings (re-ID, similarity, downstream training) | ✅ Trained encoder |
| Single-person breathing / heart-rate | ⚠️ Server still uses heuristic DSP — model does not replace this yet |
| 17-keypoint full-body pose | 🔬 No keypoint weights shipped yet — pose pipeline runs but without a learned head |
@@ -1824,7 +1824,7 @@ huggingface-cli download ruvnet/wifi-densepose-pretrained --local-dir models/pre
# model.safetensors — 48 KB contrastive encoder
# model-q4.bin — 8 KB quantized (recommended)
# model-q2.bin — 4 KB ultra-compact (ESP32 edge)
# presence-head.json — presence detection head (v2 encoder: 82.3% held-out triplet acc)
# presence-head.json — presence detection head (100% accuracy)
# node-1.json — LoRA adapter for room 1
# node-2.json — LoRA adapter for room 2
```
@@ -1833,7 +1833,7 @@ huggingface-cli download ruvnet/wifi-densepose-pretrained --local-dir models/pre
The pre-trained encoder converts 8-dim CSI feature vectors into 128-dim embeddings. These embeddings power all 17 sensing applications:
- **Presence detection** — v2 encoder: 82.3% held-out temporal-triplet accuracy (v1's "100%" was a single-class recording — retracted, [#882](https://github.com/ruvnet/RuView/issues/882))
- **Presence detection** — 100% accuracy, never misses, never false alarms
- **Environment fingerprinting** — kNN search finds "states like this one"
- **Anomaly detection** — embeddings that don't match known clusters = anomaly
- **Activity classification** — different activities cluster in embedding space
@@ -637,23 +637,6 @@ static void hop_timer_cb(void *arg)
csi_hop_next_channel();
}
void csi_collector_enable_data_capture(void)
{
/* MGMT-only (RuView#396) starves the CSI callback on display-less boards
* (RuView#521/#893): beacons alone are sparse, yield collapses to 0 pps.
* Without a display there is no QSPI/SPI-flash cache contention with the
* DATA-frame interrupt load, so capture DATA frames too. */
wifi_promiscuous_filter_t filt = {
.filter_mask = WIFI_PROMIS_FILTER_MASK_MGMT | WIFI_PROMIS_FILTER_MASK_DATA,
};
esp_err_t err = esp_wifi_set_promiscuous_filter(&filt);
if (err == ESP_OK) {
ESP_LOGI(TAG, "CSI filter upgraded to MGMT+DATA (no display, RuView#893)");
} else {
ESP_LOGW(TAG, "Failed to enable DATA-frame CSI capture: %s", esp_err_to_name(err));
}
}
void csi_collector_start_hop_timer(void)
{
if (s_hop_count <= 1) {
@@ -90,19 +90,6 @@ void csi_hop_next_channel(void);
*/
void csi_collector_start_hop_timer(void);
/**
* Upgrade the promiscuous filter to capture DATA frames in addition to MGMT
* (RuView#893/#521).
*
* Called on display-less boards: the MGMT-only filter (the #396 display-crash
* workaround set in csi_collector_init) only fires the CSI callback on sparse
* management frames, so yield collapses to 0 pps under real traffic and the
* node looks dead. A board with no AMOLED panel has no QSPI/SPI-flash cache
* contention, so it can safely capture DATA frames restoring abundant CSI.
* Display boards keep MGMT-only to avoid the #396 crash.
*/
void csi_collector_enable_data_capture(void);
/**
* Inject an NDP (Null Data Packet) frame for sensing.
*
@@ -9,14 +9,6 @@
#include "display_task.h"
#include "sdkconfig.h"
/* Set true once an AMOLED panel is detected and the display task starts.
* Defined outside the CONFIG_DISPLAY_ENABLE guard so display_is_active()
* exists on headless builds too (where it stays false CSI captures DATA
* frames; see RuView#893). */
static bool s_display_active = false;
bool display_is_active(void) { return s_display_active; }
#if CONFIG_DISPLAY_ENABLE
#include <string.h>
@@ -170,7 +162,6 @@ esp_err_t display_task_start(void)
ESP_LOGI(TAG, "Display task started (Core %d, priority %d, %d fps)",
DISP_TASK_CORE, DISP_TASK_PRIORITY, DISP_FPS_LIMIT);
s_display_active = true;
return ESP_OK;
}
@@ -7,7 +7,6 @@
#define DISPLAY_TASK_H
#include "esp_err.h"
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
@@ -23,15 +22,6 @@ extern "C" {
*/
esp_err_t display_task_start(void);
/**
* @return true once an AMOLED panel has been detected and the display task
* is running; false on headless boards (no panel, or built without display
* support). Used to choose the CSI promiscuous filter (RuView#893): a board
* with no display has no QSPI/SPI-flash contention, so it can safely capture
* DATA frames for proper CSI yield instead of starving on MGMT-only.
*/
bool display_is_active(void);
#ifdef __cplusplus
}
#endif
-15
View File
@@ -410,21 +410,6 @@ void app_main(void)
}
#endif
/* RuView#893/#521: the MGMT-only promiscuous filter (set in
* csi_collector_init as the #396 display-crash workaround) starves the CSI
* callback on display-less boards yield collapses to 0 pps and the node
* looks dead despite being on the network. Now that the display probe has
* run, boards with no AMOLED panel (no QSPI/SPI-flash cache contention)
* upgrade the filter to capture DATA frames too, restoring CSI yield. */
#ifdef CONFIG_DISPLAY_ENABLE
bool has_display = display_is_active(); /* runtime panel probe result */
#else
bool has_display = false; /* display support not compiled in */
#endif
if (!has_display) {
csi_collector_enable_data_capture();
}
ESP_LOGI(TAG, "CSI streaming active → %s:%d (edge_tier=%u, OTA=%s, WASM=%s, mmWave=%s, swarm=%s, adapt=%s)",
g_nvs_config.target_ip, g_nvs_config.target_port,
g_nvs_config.edge_tier,
Binary file not shown.
@@ -1,4 +1,4 @@
b0fb1f217a39c80bc95b5eb8208a0b8572ae64efa0f6d580b76caff4affe0f4d *firmware/esp32-csi-node/release_bins/c6-adr110/bootloader.bin
4764c5b20a353895f70122816adc98f861ec20e9a8ea9b344dc0648b6341073c *firmware/esp32-csi-node/release_bins/c6-adr110/esp32-csi-node.bin
889715e9d698ad78f9978ad8b93b6af24a726b0494247201c8f0d920d9fc80ca *firmware/esp32-csi-node/release_bins/c6-adr110/bootloader.bin
d8539e47c6f10a3344679118619e3fe01cfd66eb560ea8883268ca7c9a12efa4 *firmware/esp32-csi-node/release_bins/c6-adr110/esp32-csi-node.bin
7d2c7ac4888bfd75cd5f56e8d61f69595121183afc81556c876732fd3782c62f *firmware/esp32-csi-node/release_bins/c6-adr110/ota_data_initial.bin
4c2cc4ffd52641e23b779bd57b3908014083ac3c1aab395756478c89e70d81f0 *firmware/esp32-csi-node/release_bins/c6-adr110/partition-table.bin
@@ -1,3 +1,3 @@
b973d7eda65affb746adcfa63ceb18f779f206d240b76f01b8c9ae7485455660 *firmware/esp32-csi-node/release_bins/s3-adr110/bootloader.bin
e21ef94aba779d534dc048c1b9da731c81e5dbe09d0645cfd70a05ad3642d3e9 *firmware/esp32-csi-node/release_bins/s3-adr110/esp32-csi-node.bin
3c4905dd202ccabf4230cbabcc9320f250a60b1a7254eff7424780201bcb2072 *firmware/esp32-csi-node/release_bins/s3-adr110/bootloader.bin
7a8bf9582c9031fed32f1ada44f5c41dd99bd07fadff8e5c86e07aa0f343e847 *firmware/esp32-csi-node/release_bins/s3-adr110/esp32-csi-node.bin
67222c257c0477501fd4002275638dc4262b34eb68235b8289fb1337054d322b *firmware/esp32-csi-node/release_bins/s3-adr110/partition-table.bin
@@ -1,4 +1,3 @@
0.6.7
git-sha: 8703ade9b
built: 2026-06-02
note: RuView#893 — display-less boards capture DATA frames (CSI yield 0pps fix); hardware-verified on ESP32-C6 (0->27 pps)
0.6.6
git-sha: cbcb389cb (pre-commit)
built: 2026-05-21
-1
View File
@@ -36,4 +36,3 @@ scikit-learn>=1.2.0
# Monitoring dependencies
prometheus-client>=0.16.0
psutil>=5.9.0 # system metrics — imported by health.py / metrics.py / status.py / monitoring.py
@@ -21,15 +21,6 @@ const ENERGY_THRESH_2: f64 = 12.0;
/// Perturbation energy threshold for detecting a third person.
const ENERGY_THRESH_3: f64 = 25.0;
/// Maximum occupancy a single ESP32 link can plausibly resolve (#894).
/// The score heuristic (`score_to_person_count`) and the perturbation-energy
/// fallback below both cap here; the eigenvalue path is bounded to match,
/// rather than leaking its internal `min(10)` ceiling on noisy / under-
/// calibrated CSI (the "10 persons reported when 1 present" symptom).
/// Resolving more than this from one link's subcarrier covariance is not
/// reliable — genuine higher counts come from the multistatic fusion path.
const MAX_SINGLE_LINK_OCCUPANCY: usize = 3;
/// Create a FieldModelConfig for single-link mode (one ESP32 node = one link).
/// This avoids the DimensionMismatch error when feeding single-frame observations.
pub fn single_link_config() -> FieldModelConfig {
@@ -64,15 +55,9 @@ pub fn occupancy_or_fallback(
return score_to_person_count(smoothed_score, prev_count);
}
// Try eigenvalue-based occupancy first (best accuracy). Bound it to
// the same single-link maximum the sibling estimators use — the
// perturbation fallback below and score_to_person_count both cap at
// MAX_SINGLE_LINK_OCCUPANCY. Without this, estimate_occupancy's
// internal min(10) ceiling leaks up to 10 persons on noisy / under-
// calibrated CSI (#894), while every other path on the same data
// would report ≤3.
// Try eigenvalue-based occupancy first (best accuracy).
if let Ok(count) = field.estimate_occupancy(&frames) {
return count.min(MAX_SINGLE_LINK_OCCUPANCY);
return count;
} // else fall through to perturbation energy
// Fallback: perturbation energy thresholds.
@@ -5476,149 +5476,6 @@ async fn broadcast_tick_task(state: SharedState, tick_ms: u64) {
}
}
/// Map one sensing-broadcast JSON document into the `VitalsSnapshot`(s) to
/// publish over MQTT (issues #872/#898).
///
/// Multi-node sources carry a `nodes` array where **each node has its own
/// `classification`** (`motion_level`, `presence`, `confidence`) and RSSI — so
/// each node must surface its *own* presence/motion, not the room-level
/// aggregate. Previously the bridge applied the aggregate `classification` to
/// every per-node Home-Assistant device, so a node in an empty corner inherited
/// another node's "present" (and `motion_level: "absent"` was mis-mapped to full
/// motion). Vitals (breathing / heart rate) and the person count are room-level
/// and shared across the per-node devices. Falls back to a single aggregate
/// snapshot when there is no per-node data (e.g. wifi / simulate sources).
#[cfg(feature = "mqtt")]
fn vitals_snapshots_from_sensing_json(
v: &serde_json::Value,
base_id: &str,
) -> Vec<wifi_densepose_sensing_server::mqtt::state::VitalsSnapshot> {
use wifi_densepose_sensing_server::mqtt::state::VitalsSnapshot;
// motion_level string -> motion scalar. "absent"/"none"/"still"/"idle"/""
// are non-moving; anything else (walking, …) is motion. `fallback` is used
// when the field is absent so a partial per-node payload defers to the
// room aggregate rather than silently reading 0.
fn motion_of(level: Option<&str>, fallback: f64) -> f64 {
match level {
Some("none") | Some("still") | Some("idle") | Some("absent") | Some("") => 0.0,
Some(_) => 1.0,
None => fallback,
}
}
let ts = (v["timestamp"].as_f64().unwrap_or(0.0) * 1000.0) as i64;
let vit = &v["vital_signs"];
let breathing = vit["breathing_rate_bpm"].as_f64();
let hr = vit["heart_rate_bpm"].as_f64();
let n_persons = v["persons"]
.as_array()
.map(|a| a.len() as u32)
.or_else(|| v["estimated_persons"].as_u64().map(|x| x as u32))
.unwrap_or(0);
// Room-level aggregate: the no-nodes fallback, and the per-node default for
// any field a node omits.
let acls = &v["classification"];
let agg_presence = acls["presence"].as_bool().unwrap_or(false);
let agg_motion = motion_of(acls["motion_level"].as_str(), 0.0);
let agg_conf = acls["confidence"].as_f64().unwrap_or(0.0);
let mk = |node_id: String, presence: bool, motion: f64, conf: f64, rssi: Option<f64>| {
VitalsSnapshot {
node_id,
timestamp_ms: ts,
presence,
motion,
presence_score: if presence { conf.max(0.0) } else { 0.0 },
breathing_rate_bpm: breathing,
heartrate_bpm: hr,
n_persons,
rssi_dbm: rssi,
vital_confidence: conf,
..Default::default()
}
};
match v["nodes"].as_array() {
Some(arr) if !arr.is_empty() => arr
.iter()
.map(|node| {
let n = node["node_id"].as_u64().unwrap_or(0);
// Each node carries its OWN classification — use it, deferring to
// the room aggregate only for fields the node omits.
let ncls = &node["classification"];
let presence = ncls["presence"].as_bool().unwrap_or(agg_presence);
let motion = motion_of(ncls["motion_level"].as_str(), agg_motion);
let conf = ncls["confidence"].as_f64().unwrap_or(agg_conf);
mk(
format!("{base_id}-node{n}"),
presence,
motion,
conf,
node["rssi_dbm"].as_f64(),
)
})
.collect(),
_ => vec![mk(
base_id.to_string(),
agg_presence,
agg_motion,
agg_conf,
v["nodes"][0]["rssi_dbm"].as_f64(),
)],
}
}
/// Turn a `ProgressiveLoader::new` failure into an actionable diagnostic (#894).
///
/// The published HuggingFace `ruvnet/wifi-densepose-pretrained` files
/// (`model.safetensors`, `model-q{2,4,8}.bin`, `model.rvf.jsonl`) are a
/// different *format* — and a different encoder architecture — than the RVF
/// binary container the `--model` progressive loader expects (`RVFS` magic
/// `0x52564653`). Feeding one to `--model` produced a bare
/// "invalid magic at offset 0 …" that left users stuck. Detect the common
/// cases and explain plainly what's loadable instead.
fn diagnose_model_load_error(path: &std::path::Path, data: &[u8], err: &str) -> String {
let name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("")
.to_ascii_lowercase();
let ext = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_ascii_lowercase();
// safetensors: 8-byte LE header length, then a JSON object starting with '{'.
let looks_safetensors = ext == "safetensors" || (data.len() > 9 && data[8] == b'{');
// JSONL manifest: starts with '{' (or the well-known suffix).
let looks_jsonl =
ext == "jsonl" || name.ends_with(".rvf.jsonl") || data.first() == Some(&b'{');
// Quantized weight blob shipped on HF (model-q2/q4/q8.bin).
let looks_quant_bin = ext == "bin" || name.contains("-q");
let kind = if looks_safetensors {
"a safetensors weight file"
} else if looks_jsonl {
"a JSONL manifest, not the binary container"
} else if looks_quant_bin {
"a quantized weight blob (e.g. HuggingFace model-q4.bin)"
} else {
"not an RVF binary container"
};
format!(
"model `{}` could not be loaded: it is {kind}. The --model flag expects an \
RVF binary container (`RVFS` magic 0x52564653) produced by the \
wifi-densepose-train pipeline. The HuggingFace ruvnet/wifi-densepose-pretrained \
files are a different format and encoder architecture, so they do not load \
here directly (issue #894). Continuing with signal heuristics. (loader: {err})",
path.display()
)
}
// ── Main ─────────────────────────────────────────────────────────────────────
/// If `--ui-path` points nowhere (wrong cwd), try common repo layouts relative to cwd.
@@ -6256,9 +6113,7 @@ async fn main() {
model_loaded = true;
progressive_loader = Some(loader);
}
Err(e) => {
error!("{}", diagnose_model_load_error(mp, &data, &e.to_string()))
}
Err(e) => error!("Progressive loader init failed: {e}"),
},
Err(e) => error!("Failed to read model file: {e}"),
}
@@ -6345,14 +6200,37 @@ async fn main() {
let Ok(v) = serde_json::from_str::<serde_json::Value>(&json) else {
continue;
};
// #898/#872: emit one snapshot per physical node so
// each surfaces as its own Home-Assistant device with
// its *own* presence/motion/RSSI (see
// vitals_snapshots_from_sensing_json). Falls back to a
// single aggregate snapshot for per-node-less sources.
for snap in vitals_snapshots_from_sensing_json(&v, &node_id) {
let _ = vtx.send(snap);
}
let cls = &v["classification"];
let vit = &v["vital_signs"];
let presence = cls["presence"].as_bool().unwrap_or(false);
let n_persons = v["persons"]
.as_array()
.map(|a| a.len() as u32)
.or_else(|| v["estimated_persons"].as_u64().map(|x| x as u32))
.unwrap_or(0);
let motion = match cls["motion_level"].as_str() {
Some("none") | Some("still") | Some("idle") | Some("") => 0.0,
Some(_) => 1.0,
None => 0.0,
};
let snap = mqtt::state::VitalsSnapshot {
node_id: node_id.clone(),
timestamp_ms: (v["timestamp"].as_f64().unwrap_or(0.0) * 1000.0) as i64,
presence,
motion,
presence_score: if presence {
cls["confidence"].as_f64().unwrap_or(1.0)
} else {
0.0
},
breathing_rate_bpm: vit["breathing_rate_bpm"].as_f64(),
heartrate_bpm: vit["heart_rate_bpm"].as_f64(),
n_persons,
rssi_dbm: v["nodes"][0]["rssi_dbm"].as_f64(),
vital_confidence: cls["confidence"].as_f64().unwrap_or(0.0),
..Default::default()
};
let _ = vtx.send(snap);
}
});
tracing::info!("MQTT publisher started -> {host}:{port}");
@@ -7170,143 +7048,3 @@ mod rolling_p95_tests {
assert_eq!(p.len(), 1);
}
}
#[cfg(all(test, feature = "mqtt"))]
mod mqtt_bridge_tests {
use super::vitals_snapshots_from_sensing_json;
use serde_json::json;
/// Regression for the per-node presence bug (#872/#898): each node must
/// surface its OWN classification, not the room-level aggregate. Node 1 is
/// present+moving; node 2 is absent — node 2 must NOT inherit node 1's
/// "present".
#[test]
fn per_node_presence_uses_each_nodes_own_classification() {
let v = json!({
"timestamp": 1.0,
"classification": { "presence": true, "motion_level": "walking", "confidence": 0.9 },
"vital_signs": { "breathing_rate_bpm": 14.0, "heart_rate_bpm": 60.0 },
"persons": [{}, {}],
"nodes": [
{ "node_id": 1, "rssi_dbm": -40.0,
"classification": { "presence": true, "motion_level": "walking", "confidence": 0.8 } },
{ "node_id": 2, "rssi_dbm": -70.0,
"classification": { "presence": false, "motion_level": "absent", "confidence": 0.1 } }
]
});
let snaps = vitals_snapshots_from_sensing_json(&v, "ruview");
assert_eq!(snaps.len(), 2, "one snapshot per node");
let n1 = snaps.iter().find(|s| s.node_id == "ruview-node1").unwrap();
let n2 = snaps.iter().find(|s| s.node_id == "ruview-node2").unwrap();
assert!(n1.presence && n1.motion > 0.0, "node1 present + moving");
assert!(
!n2.presence && n2.motion == 0.0,
"node2 must be absent — not inherit the room aggregate"
);
// Per-node RSSI preserved.
assert_eq!(n1.rssi_dbm, Some(-40.0));
assert_eq!(n2.rssi_dbm, Some(-70.0));
// Vitals + person count are room-level, shared across node devices.
assert_eq!(n1.n_persons, 2);
assert_eq!(n2.n_persons, 2);
assert_eq!(n1.breathing_rate_bpm, Some(14.0));
assert_eq!(n2.heartrate_bpm, Some(60.0));
// presence_score is gated on presence.
assert!(n1.presence_score > 0.0);
assert_eq!(n2.presence_score, 0.0);
}
/// A node that omits a classification field defers to the room aggregate
/// rather than silently reading false/0.
#[test]
fn per_node_missing_fields_fall_back_to_aggregate() {
let v = json!({
"timestamp": 1.0,
"classification": { "presence": true, "motion_level": "still", "confidence": 0.7 },
"vital_signs": {},
"nodes": [ { "node_id": 3, "rssi_dbm": -55.0 } ] // no per-node classification
});
let snaps = vitals_snapshots_from_sensing_json(&v, "n");
assert_eq!(snaps.len(), 1);
assert_eq!(snaps[0].node_id, "n-node3");
assert!(snaps[0].presence, "defers to aggregate presence");
assert_eq!(snaps[0].motion, 0.0, "aggregate 'still' => no motion");
}
/// No `nodes` array (wifi / simulate sources): single aggregate snapshot
/// keyed by the base id.
#[test]
fn falls_back_to_single_aggregate_when_no_nodes() {
let v = json!({
"timestamp": 2.0,
"classification": { "presence": true, "motion_level": "idle", "confidence": 0.6 },
"vital_signs": { "breathing_rate_bpm": 12.0 },
"persons": [{}]
});
let snaps = vitals_snapshots_from_sensing_json(&v, "ruview");
assert_eq!(snaps.len(), 1);
assert_eq!(snaps[0].node_id, "ruview");
assert!(snaps[0].presence);
assert_eq!(snaps[0].motion, 0.0, "idle => no motion");
assert_eq!(snaps[0].n_persons, 1);
}
/// `motion_level: "absent"` must map to zero motion (the old aggregate
/// match fell through to `Some(_) => 1.0`, treating absent as full motion).
#[test]
fn absent_motion_level_is_zero_motion() {
let v = json!({
"timestamp": 0.0,
"classification": { "presence": false, "motion_level": "absent", "confidence": 0.0 },
"vital_signs": {}
});
let snaps = vitals_snapshots_from_sensing_json(&v, "x");
assert_eq!(snaps[0].motion, 0.0);
assert!(!snaps[0].presence);
}
}
#[cfg(test)]
mod model_load_diagnostic_tests {
use super::diagnose_model_load_error;
use std::path::Path;
#[test]
fn safetensors_is_named_and_points_at_894() {
// 8-byte LE header length then '{' — the safetensors signature.
let data = [0x10, 0, 0, 0, 0, 0, 0, 0, b'{', b'"'];
let msg = diagnose_model_load_error(
Path::new("models/wifi-densepose-pretrained/model.safetensors"),
&data,
"invalid magic at offset 0",
);
assert!(msg.contains("safetensors"), "{msg}");
assert!(msg.contains("#894"), "{msg}");
assert!(msg.contains("signal heuristics"), "{msg}");
}
#[test]
fn quantized_bin_is_identified() {
let data = [0x35, 0x57, 0x45, 0x77]; // the 0x77455735 the loader reports
let msg = diagnose_model_load_error(Path::new("model-q4.bin"), &data, "bad magic");
assert!(msg.contains("quantized weight blob"), "{msg}");
assert!(msg.contains("RVFS") || msg.contains("0x52564653"), "{msg}");
}
#[test]
fn jsonl_manifest_is_identified() {
let data = *b"{\"seg\":0}";
let msg = diagnose_model_load_error(Path::new("model.rvf.jsonl"), &data, "x");
assert!(msg.contains("JSONL manifest"), "{msg}");
}
#[test]
fn unknown_format_still_gives_guidance() {
let data = [0u8, 1, 2, 3];
let msg = diagnose_model_load_error(Path::new("weird.dat"), &data, "x");
assert!(msg.contains("RVF binary container"), "{msg}");
assert!(msg.contains("wifi-densepose-train"), "{msg}");
}
}
@@ -117,23 +117,6 @@ impl OwnedDiscoveryBuilder {
via_device: self.via_device.as_deref(),
}
}
/// Derive a per-node builder from this base (issue #898). Each physical
/// RuView node must surface as its own Home-Assistant device — the base
/// builder's `node_id` (the MQTT client id) is replaced with the actual
/// node id, giving a distinct `wifi_densepose_<node>` device identifier
/// and a per-node friendly name, instead of collapsing every node into a
/// single hard-coded device.
pub fn for_node(&self, node_id: &str) -> OwnedDiscoveryBuilder {
OwnedDiscoveryBuilder {
discovery_prefix: self.discovery_prefix.clone(),
node_id: node_id.to_string(),
node_friendly_name: Some(format!("RuView node {node_id}")),
sw_version: self.sw_version.clone(),
model: self.model.clone(),
via_device: self.via_device.clone(),
}
}
}
/// Core run loop. Pumps the broadcast channel + the MQTT event loop in
@@ -146,19 +129,20 @@ async fn run(
let opts = build_mqtt_options(&cfg);
let (client, mut eventloop): (AsyncClient, EventLoop) = AsyncClient::new(opts, 256);
let builder_borrowed = builder_owned.as_borrowed();
let entities = DiscoveryBuilder::enabled_entities(
cfg.privacy_mode,
cfg.publish_pose,
&[], // no_semantic — wire from cli::Args in P3.5
);
// #898: one Home-Assistant device per node. Discovery + availability are
// published lazily the first time a snapshot for a given node_id arrives;
// each node's builder + availability are retained here for heartbeats and
// the offline LWT. (Previously a single hard-coded builder collapsed every
// node into one device.)
let mut nodes: std::collections::HashMap<String, (OwnedDiscoveryBuilder, NodeAvailability)> =
std::collections::HashMap::new();
if let Err(e) = publish_all_discovery(&client, &builder_borrowed, &entities).await {
warn!("[mqtt] initial discovery publish failed: {e}");
}
let avail = NodeAvailability::for_builder(&builder_borrowed, &entities);
if let Err(e) = publish_availability(&client, &avail, "online").await {
warn!("[mqtt] initial availability publish failed: {e}");
}
let mut rate_limiter = RateLimiter::new();
let mut last_heartbeat = Instant::now();
@@ -195,20 +179,14 @@ async fn run(
// Periodic heartbeat / discovery refresh.
_ = tokio::time::sleep(Duration::from_secs(1)) => {
if last_heartbeat.elapsed() >= AVAILABILITY_HEARTBEAT {
for (_, na) in nodes.values() {
if let Err(e) = publish_availability(&client, na, "online").await {
warn!("[mqtt] heartbeat publish failed: {e}");
}
if let Err(e) = publish_availability(&client, &avail, "online").await {
warn!("[mqtt] heartbeat publish failed: {e}");
}
last_heartbeat = Instant::now();
}
if last_refresh.elapsed() >= Duration::from_secs(cfg.refresh_secs) {
for (nb, _) in nodes.values() {
if let Err(e) =
publish_all_discovery(&client, &nb.as_borrowed(), &entities).await
{
warn!("[mqtt] discovery refresh failed: {e}");
}
if let Err(e) = publish_all_discovery(&client, &builder_borrowed, &entities).await {
warn!("[mqtt] discovery refresh failed: {e}");
}
last_refresh = Instant::now();
}
@@ -219,39 +197,18 @@ async fn run(
match recv {
Ok(snap) => {
let elapsed = start_instant.elapsed();
// #898: on first sight of a node_id, publish that
// node's discovery + availability; then route its
// state to per-node topics.
if !nodes.contains_key(&snap.node_id) {
let nb = builder_owned.for_node(&snap.node_id);
let borrowed = nb.as_borrowed();
if let Err(e) =
publish_all_discovery(&client, &borrowed, &entities).await
{
warn!("[mqtt] node {} discovery failed: {e}", snap.node_id);
}
let na = NodeAvailability::for_builder(&borrowed, &entities);
if let Err(e) = publish_availability(&client, &na, "online").await {
warn!("[mqtt] node {} availability failed: {e}", snap.node_id);
}
nodes.insert(snap.node_id.clone(), (nb, na));
}
let borrowed = nodes[&snap.node_id].0.as_borrowed();
publish_snapshot(&client, &borrowed, &snap, &cfg, &mut rate_limiter, elapsed).await;
publish_snapshot(&client, &builder_borrowed, &snap, &cfg, &mut rate_limiter, elapsed).await;
}
Err(broadcast::error::RecvError::Lagged(n)) => {
warn!("[mqtt] lagged behind broadcast by {n} messages — dropped");
}
Err(broadcast::error::RecvError::Closed) => {
info!("[mqtt] broadcast channel closed, draining");
// Publish offline for every known node before exit.
for (_, na) in nodes.values() {
let _ = publish_availability(&client, na, "offline").await;
}
// Publish offline before exit.
let _ = publish_availability(&client, &avail, "offline").await;
let _ = client.disconnect().await;
return;
}
}
}
}
@@ -339,52 +296,3 @@ async fn publish_state(client: &AsyncClient, m: &StateMessage) -> Result<(), Cli
};
client.publish(&m.topic, qos, m.retain, m.payload.clone()).await
}
#[cfg(test)]
mod per_node_device_tests {
//! Issue #898 — each physical node must surface as its own Home-Assistant
//! device, not collapse into one hard-coded device.
use super::*;
fn base() -> OwnedDiscoveryBuilder {
OwnedDiscoveryBuilder {
discovery_prefix: "homeassistant".into(),
node_id: "wifi-densepose-1".into(),
node_friendly_name: Some("RuView".into()),
sw_version: "0.0.0".into(),
model: "test".into(),
via_device: None,
}
}
fn device_identifiers(b: &OwnedDiscoveryBuilder) -> Vec<String> {
b.as_borrowed().build(EntityKind::Presence).device.identifiers
}
#[test]
fn for_node_overrides_node_id_and_friendly_name() {
let n = base().for_node("node-A");
assert_eq!(n.node_id, "node-A");
assert_eq!(n.node_friendly_name.as_deref(), Some("RuView node node-A"));
}
#[test]
fn distinct_nodes_yield_distinct_ha_device_identifiers() {
let b = base();
let a = device_identifiers(&b.for_node("node-A"));
let c = device_identifiers(&b.for_node("node-B"));
assert_eq!(a, vec!["wifi_densepose_node-A".to_string()]);
assert_eq!(c, vec!["wifi_densepose_node-B".to_string()]);
assert_ne!(a, c, "#898: two nodes must not collapse into one device");
}
#[test]
fn single_node_keeps_a_stable_identity() {
// Two snapshots from the same node map to the same device.
let b = base();
assert_eq!(
device_identifiers(&b.for_node("node-7")),
device_identifiers(&b.for_node("node-7"))
);
}
}
@@ -171,28 +171,12 @@ async fn discovery_topics_appear_on_broker() {
// Spawn the publisher.
let cfg = make_cfg(port, false, "discovery");
let builder = make_builder("inttest1");
let (tx, rx) = broadcast::channel::<VitalsSnapshot>(32);
let (_tx, rx) = broadcast::channel::<VitalsSnapshot>(32);
let _handle = spawn(cfg, builder, rx);
// #898: discovery is now published per-node the first time a snapshot for
// that node_id arrives (not eagerly at startup). Drive snapshots for
// "inttest1" throughout the window so its device's discovery lands — same
// pattern as state_messages_published_on_snapshot_broadcast.
let tx_bg = tx.clone();
let drive = tokio::spawn(async move {
for _ in 0..60 {
let _ = tx_bg.send(VitalsSnapshot {
node_id: "inttest1".into(),
..Default::default()
});
tokio::time::sleep(Duration::from_millis(200)).await;
}
});
// Drain the subscriber for up to 6 s — enough for initial discovery
// + first availability publication.
let msgs = collect_published(&mut sub_loop, Duration::from_secs(6)).await;
drive.abort();
let _ = sub.disconnect().await;
// Assertions: at least the presence + heart_rate + fall discovery
@@ -237,23 +221,10 @@ async fn privacy_mode_suppresses_biometric_discovery() {
let cfg = make_cfg(port, /* privacy_mode = */ true, "privacy");
let builder = make_builder("inttest2");
let (tx, rx) = broadcast::channel::<VitalsSnapshot>(32);
let (_tx, rx) = broadcast::channel::<VitalsSnapshot>(32);
let _handle = spawn(cfg, builder, rx);
// #898: per-node discovery is triggered by a snapshot for that node_id.
let tx_bg = tx.clone();
let drive = tokio::spawn(async move {
for _ in 0..60 {
let _ = tx_bg.send(VitalsSnapshot {
node_id: "inttest2".into(),
..Default::default()
});
tokio::time::sleep(Duration::from_millis(200)).await;
}
});
let msgs = collect_published(&mut sub_loop, Duration::from_secs(6)).await;
drive.abort();
let _ = sub.disconnect().await;
let topics: Vec<&str> = msgs.iter().map(|(t, _, _)| t.as_str()).collect();
+1 -1