fix(#505,#506): bump firmware to v0.6.4 + no-signal UI indicator

Fixes #505 — version string mismatch: - firmware/esp32-csi-node/version.txt: 0.6.2 → 0.6.4 - .github/workflows/firmware-ci.yml: add verify-embedded-version step that extracts the esp_app_desc version string from the built binary and fails CI if it doesn't match version.txt. Prevents the v0.6.3 regression (tagged without bumping version.txt) from recurring. Fixes #506 — stale skeleton shown after ESP32 unplugged: - csi_pipeline.rs: add CsiPipelineState::last_csi_received (Instant), stamped on every process_frame() call; PipelineOutput gains csi_live:bool (true iff a real frame arrived within the last 5 s). - stream.rs: /api/splats now includes csi_live in the JSON response. - viewer.html: add #no-signal banner (red, centered overlay) shown when the server reports csi_live=false in live/remote transport mode. Skeleton is cleared while the banner is visible so stale pose data is not presented as live activity. Co-Authored-By: claude-flow <ruv@ruv.net>
2026-07-02 14:03:19 +00:00 · 2026-05-06 13:28:45 -04:00
1607 changed files with 36360 additions and 241524 deletions
@@ -1,55 +1,50 @@
 {
  "running": true,
-  "startedAt": "2026-05-24T22:26:25.030Z",
+  "startedAt": "2026-03-09T15:26:00.921Z",
  "workers": {
    "map": {
-      "runCount": 64,
+      "runCount": 49,
-      "successCount": 64,
+      "successCount": 49,
      "failureCount": 0,
-      "averageDurationMs": 136.171875,
+      "averageDurationMs": 1.2857142857142858,
-      "lastRun": "2026-05-25T06:07:33.387Z",
+      "lastRun": "2026-02-28T16:13:19.194Z",
-      "lastStartedAt": "2026-05-25T06:07:33.381Z",
+      "nextRun": "2026-03-09T15:56:00.928Z",
      "nextRun": "2026-05-25T06:26:25.410Z",
      "isRunning": false
    },
    "audit": {
-      "runCount": 72,
+      "runCount": 45,
-      "successCount": 27,
+      "successCount": 0,
      "failureCount": 45,
-      "averageDurationMs": 26260.11111111111,
+      "averageDurationMs": 0,
-      "lastRun": "2026-05-25T06:08:29.594Z",
+      "lastRun": "2026-03-09T15:43:00.933Z",
-      "lastStartedAt": "2026-05-25T06:07:33.416Z",
+      "nextRun": "2026-03-09T15:38:00.914Z",
      "nextRun": "2026-05-25T06:18:32.928Z",
      "isRunning": false
    },
    "optimize": {
-      "runCount": 54,
+      "runCount": 34,
-      "successCount": 9,
+      "successCount": 0,
-      "failureCount": 45,
+      "failureCount": 34,
-      "averageDurationMs": 40303.377578766485,
+      "averageDurationMs": 0,
-      "lastRun": "2026-05-25T05:59:05.330Z",
+      "lastRun": "2026-02-28T16:23:19.387Z",
-      "lastStartedAt": "2026-05-25T05:54:05.318Z",
+      "nextRun": "2026-03-09T15:45:00.915Z",
      "nextRun": "2026-05-25T06:20:15.145Z",
      "isRunning": false
    },
    "consolidate": {
-      "runCount": 32,
+      "runCount": 23,
-      "successCount": 32,
+      "successCount": 23,
      "failureCount": 0,
-      "averageDurationMs": 4.71875,
+      "averageDurationMs": 0.6521739130434783,
-      "lastRun": "2026-05-25T05:38:20.449Z",
+      "lastRun": "2026-02-28T16:05:19.091Z",
-      "lastStartedAt": "2026-05-25T05:38:20.443Z",
+      "nextRun": "2026-03-09T16:02:00.918Z",
      "nextRun": "2026-05-25T06:32:25.248Z",
      "isRunning": false
    },
    "testgaps": {
-      "runCount": 100,
+      "runCount": 27,
-      "successCount": 63,
+      "successCount": 0,
-      "failureCount": 37,
+      "failureCount": 27,
-      "averageDurationMs": 108604.0537328991,
+      "averageDurationMs": 0,
-      "lastRun": "2026-05-25T06:11:52.529Z",
+      "lastRun": "2026-02-28T16:08:19.369Z",
-      "lastStartedAt": "2026-05-25T06:07:33.390Z",
+      "nextRun": "2026-03-09T15:54:00.920Z",
      "nextRun": "2026-05-25T06:14:25.296Z",
      "isRunning": false
    },
    "predict": {
@@ -69,8 +64,8 @@
  },
  "config": {
    "autoStart": false,
-    "logDir": "C:\\Users\\ruv\\Projects\\wifi-densepose\\.claude-flow\\logs",
+    "logDir": "/Users/cohen/GitHub/ruvnet/RuView/.claude-flow/logs",
-    "stateFile": "C:\\Users\\ruv\\Projects\\wifi-densepose\\.claude-flow\\daemon-state.json",
+    "stateFile": "/Users/cohen/GitHub/ruvnet/RuView/.claude-flow/daemon-state.json",
    "maxConcurrent": 2,
    "workerTimeoutMs": 300000,
    "resourceThresholds": {
@@ -136,5 +131,5 @@
      }
    ]
  },
-  "savedAt": "2026-05-25T06:11:52.530Z"
+  "savedAt": "2026-03-09T15:43:00.933Z"
 }
@@ -1,119 +0,0 @@
 {
  "id": "aether-arena-aa",
  "name": "AetherArena (AA) — Official Spatial-Intelligence Benchmark",
  "adr": "ADR-149",
  "adrPath": "docs/adr/ADR-149-public-community-leaderboard-huggingface.md",
  "status": "Accepted",
  "initializedDate": "2026-05-30",
  "targetDate": "2026-08-31",
  "exitCriteria": "Benchmark INFRASTRUCTURE done, tested, CI-gated, deploy-ready: aa_score_runner.rs passes deterministic fixture test; CI harness-gate green on every PR; aether-arena repo scaffold committed (README four-part framing + aa-submission.toml schema + VERIFY.md); public smoke split committed; HF Space lifecycle skeleton deployed; signed Parquet ledger functional; RuView baseline PCK@20 ~2.5% entered; ADR-149 §7 acceptance test (five-step stranger test) passes. NOTE: ML SOTA (MM-Fi PCK@20 ~72%) is a separate long-running stretch goal blocked on ADR-079 camera-ground-truth — it is NOT an infra exit criterion.",
  "baselineState": {
    "adrStatus": "Accepted, committed 2026-05-30",
    "scorerCode": "ruview_metrics.rs + ablation.rs + proof.rs exist in wifi-densepose-train; aa_score_runner.rs not yet created",
    "aetherArenaRepo": "does not exist yet — needs user authorization to create ruvnet/aether-arena public repo",
    "hfSpace": "does not exist yet — needs HF_TOKEN and user authorization to deploy ruvnet/aether-arena HF Space",
    "smokeDataset": "not committed",
    "resultsLedger": "not created",
    "ruviewBaseline": "PCK@20 ~2.5% self-reported, not formally entered",
    "ciGate": "not added to workflow"
  },
  "milestones": {
    "m1": {
      "name": "ADR-149 Accepted + committed",
      "status": "DONE",
      "completedDate": "2026-05-30",
      "completionCriteria": "ADR-149 file committed to docs/adr/ with status Accepted",
      "notes": "Done this session. File at docs/adr/ADR-149-public-community-leaderboard-huggingface.md"
    },
    "m2": {
      "name": "Deterministic scorer runner bin (aa_score_runner.rs)",
      "status": "NOT_STARTED",
      "completionCriteria": "aa_score_runner.rs compiles, runs ruview_metrics on a committed fixture, emits RuViewTier + SHA-256 proof hash, mirrors existing *_proof_runner.rs pattern; cargo test passes",
      "estimatedEffort": "3-5 days",
      "owner": "wifi-densepose-train crate or new aa-scorer crate"
    },
    "m3": {
      "name": "CI harness-gate: GitHub Actions workflow",
      "status": "NOT_STARTED",
      "completionCriteria": "A GitHub Actions workflow runs aa_score_runner on every PR as a build gate; PR fails if scorer fails determinism check; workflow committed and green",
      "estimatedEffort": "2-3 days",
      "dependency": "M2 must be done first"
    },
    "m4": {
      "name": "aether-arena repo scaffold",
      "status": "NOT_STARTED",
      "completionCriteria": "ruvnet/aether-arena repo created with: README (four-part framing: Public leaderboard / Private eval split / Open scorer / Signed results); aa-submission.toml manifest schema; VERIFY.md (ADR-149 §7 stranger acceptance test); neutrality/governance section (§2.8); contribution guide",
      "estimatedEffort": "3-5 days",
      "blockers": ["Needs user authorization to create public ruvnet/aether-arena repo on GitHub"]
    },
    "m5": {
      "name": "Public smoke split committed + private MM-Fi held-out split prep",
      "status": "NOT_STARTED",
      "completionCriteria": "Public smoke split committed to aether-arena repo (stranger can score locally); private MM-Fi held-out split prepared under non-public path with CC BY-NC 4.0 attribution; Wi-Pose explicitly excluded from v0",
      "estimatedEffort": "5-7 days",
      "riskNotes": "MM-Fi CC BY-NC 4.0: AA must remain non-commercial and carry MM-Fi attribution; raw frames stay in private split; only derived CSI features + scores may be exposed"
    },
    "m6": {
      "name": "HF Space (Gradio) skeleton",
      "status": "BLOCKED",
      "completionCriteria": "HF Space deployed at ruvnet/aether-arena with submission lifecycle (submitted->validated->quarantined->smoke_scored->full_scored->published/rejected); sandboxed scorer container wired; basic leaderboard table rendered",
      "estimatedEffort": "7-10 days",
      "blockers": [
        "Needs HF_TOKEN — check .env for HF_TOKEN or HUGGINGFACE_TOKEN",
        "Needs user authorization to create/deploy ruvnet/aether-arena HF Space (outward-facing public deployment)"
      ]
    },
    "m7": {
      "name": "Signed append-only Parquet results ledger",
      "status": "NOT_STARTED",
      "completionCriteria": "HF dataset ruvnet/aether-arena-results created; append-only Parquet ledger with signed rows; determinism_gate enforced; no row can be silently edited",
      "estimatedEffort": "3-5 days",
      "ledgerSchema": "submitter, model_ref, category, feature_set, tier, pck20, oks, mota, vitals_bpm_err, latency_p50, latency_p95, privacy_leakage, cross_room_deg, proof_sha256, scored_at, harness_version",
      "dependency": "M6 must be scaffolded first"
    },
    "m8": {
      "name": "RuView baseline entry + public launch",
      "status": "NOT_STARTED",
      "completionCriteria": "RuView wifi-densepose-pretrained baseline entered (honest PCK@20 ~2.5%); ADR-149 §7 five-step stranger acceptance test passes; v0 live with Presence + Pose + Edge-latency + Determinism categories active; Privacy and Cross-room shown as gated/coming-soon",
      "estimatedEffort": "3-5 days",
      "dependency": "M4+M5+M6+M7 complete",
      "notes": "ML SOTA improvement (PCK@20 ~72%) is a SEPARATE stretch goal blocked on ADR-079 P7-P9 camera ground truth. NOT a blocker for infra launch."
    }
  },
  "activeMilestone": "m2",
  "completedMilestones": ["m1"],
  "knownRisks": [
    "HF_TOKEN not confirmed present in .env — check before M6 work begins",
    "ruvnet/aether-arena public repo creation is outward-facing — needs explicit user authorization",
    "MM-Fi CC BY-NC 4.0: AA must stay legally non-commercial and brand-distinct from commercial RuView product; or seek MM-Fi commercial grant before any paid tier",
    "Wi-Pose has research-use-only terms (no redistribution grant) — excluded from v0; revisit only if terms are clarified with authors",
    "HF Space free CPU tier may be too slow for Candle/tch inference pipeline — may need ZeroGPU or self-hosted scorer on cognitum-20260110 GCloud A100/L4",
    "ADR-079 camera-ground-truth (PCK@20 SOTA) is P7-P9 pending — NOT an infra blocker; must not be conflated with AA infra completion",
    "Neutrality/governance risk: RuView seeded the scorer — must be demonstrably scored through the same public pipeline as any other entrant (§2.8 controls)"
  ],
  "driftSignals": {
    "timeline": "GREEN — just initialized, no timeline pressure yet",
    "scope": "GREEN — scope locked at four-part structure per ADR-149 §2 decision",
    "approach": "GREEN — reuse pattern (existing ruview_metrics + proof.rs) confirmed in ADR-149",
    "dependency": "YELLOW — HF_TOKEN and ruvnet/aether-arena repo authorization are external blockers with unknown ETA",
    "priority": "GREEN — active feature branch feat/adr-136-146-streaming-engine in progress; AA infra can proceed in parallel on its own branch"
  },
  "stretchGoals": {
    "sotaML": "MM-Fi PCK@20 SOTA ~72% — separate ML effort blocked on ADR-079 P7-P9 camera-ground-truth data collection; NOT an infra exit criterion",
    "privacyAxis": "ADR-145 §10 membership-inference attacker — activate Privacy leaderboard axis once attacker is implemented and published",
    "crossRoom": "Multi-room held-out split — activate Cross-room generalization axis",
    "multiOrgSteering": "Invite co-maintainers from other projects once >=N external entries land"
  },
  "sessionHistory": [
    {
      "date": "2026-05-30",
      "type": "initialization",
      "accomplished": [
        "ADR-149 Accepted and committed to docs/adr/",
        "Horizon record initialized in .claude-flow/horizons/aether-arena-aa.json",
        "Memory stored in horizons namespace under key horizon-aether-arena-aa",
        "Session check-in record stored in horizon-sessions namespace"
      ]
    }
  ]
 }
@@ -1,11 +1,11 @@
 {
-  "timestamp": "2026-05-25T06:07:33.385Z",
+  "timestamp": "2026-02-28T16:13:19.193Z",
-  "projectRoot": "C:\\Users\\ruv\\Projects\\wifi-densepose",
+  "projectRoot": "/home/user/wifi-densepose",
  "structure": {
    "hasPackageJson": false,
    "hasTsConfig": false,
    "hasClaudeConfig": true,
    "hasClaudeFlow": true
  },
-  "scannedAt": 1779689253386
+  "scannedAt": 1772295199193
 }
@@ -1,5 +1,5 @@
 {
-  "timestamp": "2026-05-25T05:38:20.448Z",
+  "timestamp": "2026-02-28T16:05:19.091Z",
  "patternsConsolidated": 0,
  "memoryCleaned": 0,
  "duplicatesRemoved": 0
@@ -1,17 +0,0 @@
 {
  "timestamp": "2026-05-25T05:59:05.405Z",
  "mode": "local",
  "memoryUsage": {
    "rss": 9891840,
    "heapTotal": 35598336,
    "heapUsed": 26516560,
    "external": 3952418,
    "arrayBuffers": 55689
  },
  "uptime": 27163.5846658,
  "optimizations": {
    "cacheHitRate": 0.78,
    "avgResponseTime": 45
  },
  "note": "Install Claude Code CLI for AI-powered optimization suggestions"
 }
@@ -1,84 +1,12 @@
 {
-  "timestamp": "2026-05-25T06:08:29.589Z",
+  "timestamp": "2026-03-06T13:17:27.368Z",
-  "mode": "headless",
+  "mode": "local",
-  "workerType": "audit",
+  "checks": {
-  "model": "haiku",
+    "envFilesProtected": true,
-  "durationMs": 56168,
+    "gitIgnoreExists": true,
-  "executionId": "audit_1779689253421_dfflmb",
+    "noHardcodedSecrets": true
  "success": true,
  "findings": {
    "vulnerabilities": [
      {
        "severity": "high",
        "file": ".claude/helpers/github-safe.js",
        "line": 50,
        "description": "Command injection vulnerability in execSync call. User-controlled arguments in `newArgs` are joined without shell escaping. An attacker can inject shell metacharacters (e.g., `; rm -rf /`) via the body content or through command/subcommand parameters. The temp file approach is safe, but the command construction `gh ${command} ${subcommand} ${newArgs.join(' ')}` allows shell injection.",
        "example": "gh issue comment 123 'test`whoami`' would execute whoami"
      },
      {
        "severity": "high",
        "file": "scripts/csi-spectrogram.js",
        "line": 45,
        "description": "Sensitive credential exposure via command-line arguments. The `--seed-token` parameter is passed as a CLI argument, which is visible in process listings (ps aux output). This violates secure credential handling practices. Tokens should be read from environment variables or secure config files, not command-line args.",
        "example": "node scripts/csi-spectrogram.js --seed-token secret_abc_123 exposes token in process list"
      },
      {
        "severity": "medium",
        "file": "scripts/apnea-detector.js",
        "line": 71,
        "description": "Unsafe buffer reading without comprehensive length validation. The code checks `buf.length` at 32 bytes (line 70) but then reads at fixed offsets (lines 72-76) without validating that each read stays within bounds. If a malformed packet is received, `readInt8/readUInt16LE/readUInt32LE` may read unintended data or zeros.",
        "example": "A 33-byte buffer would pass the check but reading UInt32LE at offset 8 would go out of bounds"
      },
      {
        "severity": "medium",
        "file": "scripts/benchmark-rf-scan.js",
        "line": 110,
        "description": "Potential out-of-bounds buffer access in parseCSIFrame. While the bounds check at line 107 is present, the `nSubcarriers` value from the packet is used to calculate required buffer size without validation of the value itself. A maliciously crafted packet with extremely large nSubcarriers could cause memory issues.",
        "example": "Packet with nSubcarriers=999999 would request excessive buffer allocation"
      },
      {
        "severity": "medium",
        "file": "scripts/csi-spectrogram.js",
        "line": 39,
        "description": "Unsafe URL construction with untrusted `seed-url` parameter. The `--seed-url` argument is used directly for HTTPS requests without validation. This could allow SSRF (Server-Side Request Forgery) or DNS rebinding attacks if an attacker controls the seed URL.",
        "example": "node scripts/csi-spectrogram.js --seed-url http://internal.local:9000 could access internal services"
      },
      {
        "severity": "low",
        "file": ".claude/helpers/statusline.js",
        "line": 140,
        "description": "Shell command injection risk in execSync calls. Commands like `ps aux 2>/dev/null | grep -c agentic-flow` use grep patterns that could be vulnerable if any variables are interpolated (though currently hardcoded). The `execSync` with shell=true is generally risky.",
        "example": "If any pattern becomes user-controlled: `grep -c ${pattern}` could inject shell metacharacters"
      },
      {
        "severity": "low",
        "file": ".claude/helpers/memory.js",
        "line": 10,
        "description": "Unvalidated JSON parsing. The code parses JSON from MEMORY_FILE without try-catch in the loadMemory function (catches error but doesn't validate structure). Malformed JSON or corrupted memory file could cause issues.",
        "example": "Memory file with circular JSON structure could cause issues when stringifying"
      },
      {
        "severity": "low",
        "file": "scripts/device-fingerprint.js",
        "line": 72,
        "description": "Hardcoded device fingerprints and network configuration. While not a traditional 'hardcoded secret', the KNOWN_DEVICES array contains identifiable SSIDs and MAC addresses that could be used to correlate network infrastructure. This data should be externalized or sanitized.",
        "example": "SSID 'ruv.net' and 'Cohen-Guest' could identify specific installations"
      }
    ],
    "riskScore": 42,
    "recommendations": [
      "**CRITICAL**: Replace `execSync` command construction in github-safe.js with proper shell escaping using `child_process.execFile()` instead of `execSync()`, or use the `shell: false` option with array arguments to avoid shell parsing entirely.",
      "**CRITICAL**: Move `--seed-token` from CLI arguments to environment variable `SEED_TOKEN` in csi-spectrogram.js. Update documentation to instruct users: `export SEED_TOKEN=...` instead of passing via CLI.",
      "**HIGH**: Add comprehensive buffer bounds validation in all UDP packet parsing functions (apnea-detector.js, benchmark-rf-scan.js, etc.). Validate both the buffer length AND the parsed header values before using them in calculations.",
      "**HIGH**: Validate and sanitize the `--seed-url` parameter in csi-spectrogram.js. Whitelist allowed domains or restrict to localhost/internal IPs only. Add URL scheme validation (https only).",
      "**MEDIUM**: Replace hardcoded device fingerprints (KNOWN_DEVICES) with externalized configuration or environment variables. Document that this data contains identifiable network information.",
      "**MEDIUM**: Add input validation to `parseArgs()` results in all scripts. Validate numeric ranges, file paths, and enum values before use.",
      "**LOW**: Wrap JSON.parse() calls in try-catch blocks throughout (memory.js, session.js) with explicit error handling and recovery.",
      "**LOW**: Audit all uses of `require()` with dynamic paths. Ensure paths are always derived from fixed `__dirname` and not user-controlled.",
      "**LOW**: Remove or sandbox the ability to pass arbitrary URLs via CLI. Consider using a configuration file (YAML/JSON) for endpoint URLs instead.",
      "**INFO**: Add a pre-commit hook to detect hardcoded credentials using tools like `detect-secrets` or `truffleHog`."
    ]
  },
-  "rawOutputPreview": "# Security Audit Report — wifi-densepose\n\n```json\n{\n  \"vulnerabilities\": [\n    {\n      \"severity\": \"high\",\n      \"file\": \".claude/helpers/github-safe.js\",\n      \"line\": 50,\n      \"description\": \"Command injection vulnerability in execSync call. User-controlled arguments in `newArgs` are joined without shell escaping. An attacker can inject shell metacharacters (e.g., `; rm -rf /`) via the body content or through command/subcommand parameters. The temp file approach is safe, but the command construction `gh ${command} ${subcommand} ${newArgs.join(' ')}` allows shell injection.\",\n      \"example\": \"gh issue comment 123 'test`whoami`' would execute whoami\"\n    },\n    {\n      \"severity\": \"high\",\n      \"file\": \"scripts/csi-spectrogram.js\",\n      \"line\": 45,\n      \"description\": \"Sensitive credential exposure via command-line arguments. The `--seed-token` parameter is passed as a CLI argument, which is visible in process listings (ps aux output). This violates secure credential handling practices. Tokens should be read from environment variables or secure config files, not command-line args.\",\n      \"example\": \"node scripts/csi-spectrogram.js --seed-token secret_abc_123 exposes token in process list\"\n    },\n    {\n      \"severity\": \"medium\",\n      \"file\": \"scripts/apnea-detector.js\",\n      \"line\": 71,\n      \"description\": \"Unsafe buffer reading without comprehensive length validation. The code checks `buf.length` at 32 bytes (line 70) but then reads at fixed offsets (lines 72-76) without validating that each read stays within bounds. If a malformed packet is received, `readInt8/readUInt16LE/readUInt32LE` may read unintended data or zeros.\",\n      \"example\": \"A 33-byte buffer would pass the check but reading UInt32LE at offset 8 would go out of bounds\"\n    },\n    {\n      \"severity\": \"medium\",\n      \"file\": \"scripts/benchmark-rf-scan.js\",\n      \"line\": 110,\n      \"description\": \"Potential out-of-bounds buffer access in parseCSIFrame. While the bounds check at line 107 is pres",
+  "riskLevel": "low",
-  "rawOutputLength": 7077
+  "recommendations": [],
  "note": "Install Claude Code CLI for AI-powered security analysis"
 }
@@ -1,106 +0,0 @@
 {
  "timestamp": "2026-05-25T06:11:52.519Z",
  "mode": "headless",
  "workerType": "testgaps",
  "model": "sonnet",
  "durationMs": 259124,
  "executionId": "testgaps_1779689253395_srltd5",
  "success": true,
  "findings": {
    "sections": [
      {
        "title": "Test Coverage Gap Analysis — wifi-densepose",
        "content": "\n",
        "level": 2
      },
      {
        "title": "Coverage Summary by Crate",
        "content": "\n| Crate | Tests Found | Status | Priority |\n|-------|-------------|--------|----------|\n| `wifi-densepose-core` | 26 inline | Good | Low |\n| `wifi-densepose-signal` | ~60 (validation only) | Moderate | **High** |\n| `wifi-densepose-nn` | **0** | Critical | **P1** |\n| `wifi-densepose-train` | ~60 (config/dataset) | Moderate | High |\n| `wifi-densepose-mat` | 1 integration test | Critical | **P1** |\n| `wifi-densepose-ruvector` | **0** | Critical | **P1** |\n| `wifi-densepose-sensing-server` | 4 integration tests | Moderate | High |\n| `wifi-densepose-wasm` | 3 compliance tests | Low | Low |\n\n---\n\n",
        "level": 3
      },
      {
        "title": "Tier 1: Critical Gaps",
        "content": "\n",
        "level": 2
      },
      {
        "title": "1. `wifi-densepose-nn` — Zero test coverage",
        "content": "\nEvery public API is untested. Place these at `v2/crates/wifi-densepose-nn/tests/inference_tests.rs`:\n\n```rust\n// v2/crates/wifi-densepose-nn/tests/inference_tests.rs\n\n#[cfg(test)]\nmod tensor_tests {\n    use wifi_densepose_nn::tensor::Tensor;\n\n    #[test]\n    fn tensor_shape_mismatch_returns_error() {\n        // data has 6 elements but shape claims 3×3=9\n        let result = Tensor::new(vec![1.0f32; 6], &[3, 3]);\n        assert!(result.is_err(), \"shape mismatch must be rejected\");\n    }\n\n    #[test]\n    fn tensor_empty_data_returns_error() {\n        let result = Tensor::new(vec![], &[0]);\n        assert!(result.is_err());\n    }\n\n    #[test]\n    fn tensor_nan_values_are_detected() {\n        let t = Tensor::new(vec![f32::NAN, 1.0, 2.0], &[3]).unwrap();\n        assert!(t.has_nan(), \"NaN in data must be detectable\");\n    }\n\n    #[test]\n    fn tensor_inf_values_are_detected() {\n        let t = Tensor::new(vec![f32::INFINITY, 1.0], &[2]).unwrap();\n        assert!(t.has_inf());\n    }\n}\n\n#[cfg(test)]\nmod modality_translator_tests {\n    use wifi_densepose_nn::translator::ModalityTranslator;\n\n    #[test]\n    fn translator_rejects_wrong_subcarrier_count() {\n        // standard expects 56 subcarriers; feed 57\n        let csi = vec![0.0f32; 57 * 3]; // 57 subcarriers × 3 antennas\n        let translator = ModalityTranslator::default();\n        let result = translator.translate(&csi, 57, 3);\n        assert!(result.is_err());\n    }\n\n    #[test]\n    fn translator_handles_all_zeros() {\n        let csi = vec![0.0f32; 56 * 3];\n        let translator = ModalityTranslator::default();\n        let result = translator.translate(&csi, 56, 3);\n        // zero input should produce some output without panic\n        assert!(result.is_ok());\n    }\n}\n\n#[cfg(test)]\nmod inference_engine_tests {\n    use wifi_densepose_nn::inference::InferenceEngine;\n\n    #[test]\n    fn load_nonexistent_model_returns_error() {\n        let result = InferenceEngine::from_path(\"/nonexistent/model.onnx\");\n        assert!(result.is_err());\n    }\n\n    #[test]\n    fn load_corrupted_bytes_returns_error() {\n        let tmp = tempfile::NamedTempFile::new().unwrap();\n        std::fs::write(tmp.path(), b\"not a valid onnx file\").unwrap();\n        let result = InferenceEngine::from_path(tmp.path());\n        assert!(result.is_err());\n    }\n\n    #[test]\n    fn batch_size_zero_returns_error() {\n        // can't run inference on an empty batch\n        // requires a valid model; skip if no model file in test fixtures\n        // use #[ignore] or a feature flag for CI\n    }\n}\n```\n\n---\n\n",
        "level": 3
      },
      {
        "title": "2. `wifi-densepose-mat` — Disaster response safety gaps",
        "content": "\nPlace at `v2/crates/wifi-densepose-mat/tests/`:\n\n```rust\n// v2/crates/wifi-densepose-mat/tests/detection_edge_cases.rs\n\n#[cfg(test)]\nmod breathing_rate_edge_cases {\n    use wifi_densepose_mat::detection::breathing::BreathingDetector;\n\n    #[test]\n    fn zero_bpm_is_classified_critical() {\n        let detector = BreathingDetector::default();\n        // flat-line signal — no breathing detected\n        let signal = vec![0.0f32; 1000];\n        let result = detector.classify(&signal).unwrap();\n        assert_eq!(result.triage_category, TriageCategory::Immediate);\n    }\n\n    #[test]\n    fn agonal_breathing_rate_triggers_immediate() {\n        // < 6 BPM is agonal; simulate 3 BPM signal\n        let detector = BreathingDetector::default();\n        let signal = generate_breathing_signal(3.0, 1000, 100.0); // 3 BPM, 1000 samples @ 100 Hz\n        let result = detector.classify(&signal).unwrap();\n        assert_eq!(result.triage_category, TriageCategory::Immediate);\n    }\n\n    #[test]\n    fn normal_breathing_is_classified_minor() {\n        let detector = BreathingDetector::default();\n        let signal = generate_breathing_signal(15.0, 1000, 100.0); // 15 BPM\n        let result = detector.classify(&signal).unwrap();\n        assert_eq!(result.triage_category, TriageCategory::Minor);\n    }\n\n    #[test]\n    fn all_nan_signal_returns_error_not_panic() {\n        let detector = BreathingDetector::default();\n        let signal = vec![f32::NAN; 1000];\n        let result = detector.classify(&signal);\n        assert!(result.is_err(), \"NaN input must be caught, not panic\");\n    }\n\n    fn generate_breathing_signal(bpm: f32, samples: usize, sample_rate: f32) -> Vec<f32> {\n        let freq = bpm / 60.0;\n        (0..samples)\n            .map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / sample_rate).sin())\n            .collect()\n    }\n}\n\n#[cfg(test)]\nmod alert_deduplication {\n    use wifi_densepose_mat::alerting::{AlertDispatcher, Alert, TriageCategory};\n    use std::time::Duration;\n\n    #[test]\n    fn duplicate_alerts_within_window_are_suppressed() {\n        let mut dispatcher = AlertDispatcher::new();\n        let alert = Alert::new(\"survivor-1\", TriageCategory::Immediate);\n        dispatcher.dispatch(alert.clone());\n        dispatcher.dispatch(alert.clone()); // same survivor, same category\n        assert_eq!(dispatcher.queued_count(), 1, \"duplicate must be deduplicated\");\n    }\n\n    #[test]\n    fn escalation_from_minor_to_immediate_is_forwarded() {\n        let mut dispatcher = AlertDispatcher::new();\n        dispatcher.dispatch(Alert::new(\"survivor-1\", TriageCategory::Minor));\n        dispatcher.dispatch(Alert::new(\"survivor-1\", TriageCategory::Immediate));\n        // escalation is not a duplicate — must pass through\n        assert!(dispatcher.last_alert_for(\"survivor-1\").map(|a| a.category) == Some(TriageCategory::Immediate));\n    }\n}\n\n#[cfg(test)]\nmod kalman_tracker_edge_cases {\n    use wifi_densepose_mat::tracking::KalmanTracker;\n\n    #[test]\n    fn position_jump_does_not_corrupt_state() {\n        let mut tracker = KalmanTracker::new();\n        tracker.update([1.0, 1.0, 0.5]);  // initial position\n        tracker.update([50.0, 50.0, 0.5]); // physically impossible jump\n        let pos = tracker.estimated_position();\n        // should not panic; should clamp or flag anomaly\n        assert!(pos.iter().all(|v| v.is_finite()));\n    }\n\n    #[test]\n    fn lost_track_resumes_on_re_detection() {\n        let mut tracker = KalmanTracker::new();\n        tracker.update([1.0, 1.0, 0.5]);\n        // simulate 10 missed frames\n        for _ in 0..10 { tracker.predict(); }\n        assert_eq!(tracker.state(), TrackState::Lost);\n        tracker.update([1.1, 1.1, 0.5]); // re-detected nearby\n        assert_eq!(tracker.state(), TrackState::Confirmed);\n    }\n}\n```\n\n---\n\n",
        "level": 3
      },
      {
        "title": "3. `wifi-densepose-ruvector` — Zero coverage on all 5 integration modules",
        "content": "\n```rust\n// v2/crates/wifi-densepose-ruvector/tests/viewpoint_tests.rs\n\n#[cfg(test)]\nmod attention_tests {\n    use wifi_densepose_ruvector::viewpoint::attention::CrossViewpointAttention;\n\n    #[test]\n    fn attention_weights_sum_to_one() {\n        let attn = CrossViewpointAttention::new(3); // 3 viewpoints\n        let features = vec![[1.0f32; 64], [2.0f32; 64], [3.0f32; 64]];\n        let weights = attn.compute_weights(&features);\n        let sum: f32 = weights.iter().sum();\n        assert!((sum - 1.0).abs() < 1e-5, \"attention must be a probability distribution\");\n    }\n\n    #[test]\n    fn single_viewpoint_gets_full_weight() {\n        let attn = CrossViewpointAttention::new(1);\n        let features = vec![[1.0f32; 64]];\n        let weights = attn.compute_weights(&features);\n        assert!((weights[0] - 1.0).abs() < 1e-6);\n    }\n\n    #[test]\n    fn zero_feature_vectors_do_not_produce_nan() {\n        let attn = CrossViewpointAttention::new(2);\n        let features = vec![[0.0f32; 64], [0.0f32; 64]];\n        let weights = attn.compute_weights(&features);\n        assert!(weights.iter().all(|w| w.is_finite()));\n    }\n}\n\n#[cfg(test)]\nmod sketch_tests {\n    use wifi_densepose_ruvector::sketch::WireSketch;\n\n    #[test]\n    fn round_trip_serialization() {\n        let sketch = WireSketch::from_keypoints(&[[0.5f32, 0.5], [0.3, 0.7]]);\n        let bytes = sketch.to_bytes();\n        let restored = WireSketch::from_bytes(&bytes).unwrap();\n        assert_eq!(sketch, restored);\n    }\n\n    #[test]\n    fn deserialize_truncated_bytes_returns_error() {\n        let sketch = WireSketch::from_keypoints(&[[0.5f32, 0.5]]);\n        let mut bytes = sketch.to_bytes();\n        bytes.truncate(bytes.len() / 2); // truncate halfway\n        assert!(WireSketch::from_bytes(&bytes).is_err());\n    }\n\n    #[test]\n    fn empty_keypoint_list_is_handled() {\n        let sketch = WireSketch::from_keypoints(&[]);\n        assert_eq!(sketch.keypoint_count(), 0);\n    }\n}\n```\n\n---\n\n",
        "level": 3
      },
      {
        "title": "Tier 2: Signal Processing Gaps",
        "content": "\n",
        "level": 2
      },
      {
        "title": "4. `wifi-densepose-signal` — RuvSense module untested",
        "content": "\n```rust\n// v2/crates/wifi-densepose-signal/tests/ruvsense_tests.rs\n\n#[cfg(test)]\nmod coherence_gate_tests {\n    use wifi_densepose_signal::ruvsense::coherence_gate::{CoherenceGate, GateDecision};\n\n    #[test]\n    fn high_coherence_signal_is_accepted() {\n        let gate = CoherenceGate::new(0.7); // threshold = 0.7\n        let decision = gate.evaluate(0.95);\n        assert_eq!(decision, GateDecision::Accept);\n    }\n\n    #[test]\n    fn low_coherence_signal_is_rejected() {\n        let gate = CoherenceGate::new(0.7);\n        let decision = gate.evaluate(0.3);\n        assert_eq!(decision, GateDecision::Reject);\n    }\n\n    #[test]\n    fn borderline_coherence_triggers_recalibrate() {\n        let gate = CoherenceGate::new(0.7);\n        let decision = gate.evaluate(0.68); // just below threshold\n        assert_eq!(decision, GateDecision::Recalibrate);\n    }\n}\n\n#[cfg(test)]\nmod phase_align_tests {\n    use wifi_densepose_signal::ruvsense::phase_align::PhaseAligner;\n\n    #[test]\n    fn phase_at_plus_pi_does_not_wrap_incorrectly() {\n        let aligner = PhaseAligner::new();\n        let phases = vec![std::f32::consts::PI - 0.001, std::f32::consts::PI + 0.001];\n        let aligned = aligner.align(&phases);\n        // jump across ±π boundary must be handled continuously\n        let diff = (aligned[1] - aligned[0]).abs();\n        assert!(diff < 0.01, \"phase jump at ±π must be < 0.01 rad after alignment\");\n    }\n\n    #[test]\n    fn single_phase_value_aligns_to_itself() {\n        let aligner = PhaseAligner::new();\n        let phases = vec![1.5f32];\n        let aligned = aligner.align(&phases);\n        assert_eq!(aligned.len(), 1);\n        assert!((aligned[0] - 1.5).abs() < 1e-6);\n    }\n\n    #[test]\n    fn empty_phase_array_returns_empty() {\n        let aligner = PhaseAligner::new();\n        let aligned = aligner.align(&[]);\n        assert!(aligned.is_empty());\n    }\n}\n\n#[cfg(test)]\nmod adversarial_detection_tests {\n    use wifi_densepose_signal::ruvsense::adversarial::AdversarialDetector;\n\n    #[test]\n    fn physically_impossible_amplitude_is_flagged() {\n        let detector = AdversarialDetector::new();\n        // WiFi amplitude cannot exceed hardware saturation level\n        let frame = vec![1e9f32; 56]; // absurdly large\n        assert!(detector.is_suspicious(&frame));\n    }\n\n    #[test]\n    fn normal_amplitude_range_passes() {\n        let detector = AdversarialDetector::new();\n        let frame = vec![0.5f32; 56]; // typical normalized value\n        assert!(!detector.is_suspicious(&frame));\n    }\n\n    #[test]\n    fn multi_link_inconsistency_is_detected() {\n        // link A reports body moving right; link B reports no motion\n        // physically inconsistent — flag as adversarial\n        let detector = AdversarialDetector::new();\n        let result = detector.check_multi_link_consistency(\n            &[1.0, 2.0, 3.0], // link A\n            &[0.0, 0.0, 0.0], // link B (no motion)\n        );\n        assert!(result.is_inconsistent());\n    }\n}\n```\n\n---\n\n",
        "level": 3
      },
      {
        "title": "Tier 2: Training Pipeline Gaps",
        "content": "\n",
        "level": 2
      },
      {
        "title": "5. `wifi-densepose-train` — Geometry encoder and rapid adaptation untested",
        "content": "\n```rust\n// v2/crates/wifi-densepose-train/tests/test_geometry.rs\n\n#[cfg(test)]\nmod film_layer_tests {\n    use wifi_densepose_train::geometry::FilmLayer;\n\n    #[test]\n    fn film_layer_output_shape_matches_input() {\n        let film = FilmLayer::new(64, 32); // 64-dim features, 32-dim condition\n        let features = vec![0.5f32; 64];\n        let condition = vec![1.0f32; 32];\n        let output = film.forward(&features, &condition).unwrap();\n        assert_eq!(output.len(), 64, \"FiLM output must match feature dimensionality\");\n    }\n\n    #[test]\n    fn film_layer_zero_condition_acts_as_identity() {\n        let film = FilmLayer::new(64, 32);\n        let features = vec![1.0f32; 64];\n        let zero_condition = vec![0.0f32; 32];\n        let output = film.forward(&features, &zero_condition).unwrap();\n        // scale=1, shift=0 → identity; output ≈ input\n        for (o, f) in output.iter().zip(features.iter()) {\n            assert!((o - f).abs() < 0.1, \"zero condition should approximate identity\");\n        }\n    }\n}\n\n// v2/crates/wifi-densepose-train/tests/test_rapid_adapt.rs\n\n#[cfg(test)]\nmod rapid_adaptation_tests {\n    use wifi_densepose_train::rapid_adapt::RapidAdapter;\n\n    #[test]\n    fn adapter_updates_on_single_sample() {\n        let mut adapter = RapidAdapter::new(5); // 5 adaptation steps\n        let csi_sample = vec![0.1f32; 56 * 3];\n        let pose_label = vec![0.5f32; 17 * 2]; // 17 keypoints × (x, y)\n        let result = adapter.adapt_step(&csi_sample, &pose_label);\n        assert!(result.is_ok());\n    }\n\n    #[test]\n    fn adapter_with_zero_steps_is_no_op() {\n        let adapter = RapidAdapter::new(0);\n        // 0 adaptation steps → weights unchanged\n        let initial_weights = adapter.clone_weights();\n        let _ = adapter.adapt_step(&vec![0.1f32; 168], &vec![0.5f32; 34]);\n        assert_eq!(adapter.clone_weights(), initial_weights);\n    }\n}\n```\n\n---\n\n",
        "level": 3
      },
      {
        "title": "Tier 3: Server Integration Gaps",
        "content": "\n",
        "level": 2
      },
      {
        "title": "6. `wifi-densepose-sensing-server` — Auth and semantic analyzers",
        "content": "\n```rust\n// v2/crates/wifi-densepose-sensing-server/tests/auth_tests.rs\n\n#[cfg(test)]\nmod bearer_auth_tests {\n    use wifi_densepose_sensing_server::auth::{BearerValidator, TokenError};\n\n    #[test]\n    fn missing_authorization_header_returns_unauthorized() {\n        let validator = BearerValidator::new(\"secret-token\");\n        let result = validator.validate(None);\n        assert!(matches!(result, Err(TokenError::Missing)));\n    }\n\n    #[test]\n    fn wrong_token_is_rejected() {\n        let validator = BearerValidator::new(\"correct-token\");\n        let result = validator.validate(Some(\"Bearer wrong-token\"));\n        assert!(matches!(result, Err(TokenError::Invalid)));\n    }\n\n    #[test]\n    fn malformed_header_without_bearer_prefix_is_rejected() {\n        let validator = BearerValidator::new(\"token\");\n        let result = validator.validate(Some(\"token\")); // missing \"Bearer \" prefix\n        assert!(matches!(result, Err(TokenError::Malformed)));\n    }\n\n    #[test]\n    fn correct_token_is_accepted() {\n        let validator = BearerValidator::new(\"correct-token\");\n        let result = validator.validate(Some(\"Bearer correct-token\"));\n        assert!(result.is_ok());\n    }\n}\n\n// v2/crates/wifi-densepose-sensing-server/tests/semantic_tests.rs\n\n#[cfg(test)]\nmod fall_detection_tests {\n    use wifi_densepose_sensing_server::semantic::fall_detector::FallDetector;\n\n    #[test]\n    fn no_motion_does_not_trigger_fall() {\n        let mut detector = FallDetector::new();\n        for _ in 0..30 { // 30 frames of stillness\n            detector.update_pose(stationary_pose());\n        }\n        assert!(!detector.fall_detected());\n    }\n\n    #[test]\n    fn rapid_downward_velocity_triggers_fall() {\n        let mut detector = FallDetector::new();\n        // simulate person going from standing (y=1.7m) to prone (y=0.3m) in 3 frames\n        for (frame, y) in [(0, 1.7f32), (1, 1.0), (2, 0.3)] {\n            detector.update_pose(pose_at_height(y));\n        }\n        assert!(detector.fall_detected());\n    }\n\n    #[test]\n    fn sitting_down_slowly_does_not_trigger_fall() {\n        let mut detector = FallDetector::new();\n        // gradual height decrease over 30 frames is sitting, not falling\n        for i in 0..30 {\n            let y = 1.7f32 - (i as f32 * 0.04); // ~1.2m drop over 30 frames\n            detector.update_pose(pose_at_height(y));\n        }\n        assert!(!detector.fall_detected());\n    }\n}\n```\n\n---\n\n",
        "level": 3
      },
      {
        "title": "Cross-Cutting Gap Summary",
        "content": "| Gap Category | Severity | Affects | Recommended Action |\n|---|---|---|---|\n| `wifi-densepose-nn` has 0 tests | **Critical** | Inference pipeline | Add `tests/inference_tests.rs` per skeleton above |\n| `wifi-densepose-ruvector` has 0 tests | **Critical** | Viewpoint fusion, sketches | Add `tests/viewpoint_tests.rs` |\n| MAT disaster response missing edge cases | **Critical** | 0 BPM, agonal breathing, dedup | Add `tests/detection_edge_cases.rs` |\n| Signal RuvSense 28 modules untested | High | Core sensing logic | Add `tests/ruvsense_tests.rs` |\n| NN error paths (bad model files, OOM) | High | Production reliability | Add error path tests to nn |\n| Train geometry + rapid adapt = 0 tests | High | Domain adaptation | Add `tests/test_geometry.rs` |\n| Server auth token validation | High | Security boundary | Add `tests/auth_tests.rs` |\n| NaN/Inf propagation in f32 pipelines | High | All numeric crates | Add boundary tests per module |\n| Concurrent state under Arc<Mutex> | Medium | sensing-server, mat | Add contention tests |\n\nThe highest-ROI starting point is `wifi-densepose-nn` and `wifi-densepose-mat` — the nn crate has zero tests on the core inference pipeline, and mat covers life-safety scenarios where classification errors have real consequences.",
        "level": 2
      }
    ],
    "codeBlocks": [
      {
        "language": "rust",
        "code": "// v2/crates/wifi-densepose-nn/tests/inference_tests.rs\n\n#[cfg(test)]\nmod tensor_tests {\n    use wifi_densepose_nn::tensor::Tensor;\n\n    #[test]\n    fn tensor_shape_mismatch_returns_error() {\n        // data has 6 elements but shape claims 3×3=9\n        let result = Tensor::new(vec![1.0f32; 6], &[3, 3]);\n        assert!(result.is_err(), \"shape mismatch must be rejected\");\n    }\n\n    #[test]\n    fn tensor_empty_data_returns_error() {\n        let result = Tensor::new(vec![], &[0]);\n        assert!(result.is_err());\n    }\n\n    #[test]\n    fn tensor_nan_values_are_detected() {\n        let t = Tensor::new(vec![f32::NAN, 1.0, 2.0], &[3]).unwrap();\n        assert!(t.has_nan(), \"NaN in data must be detectable\");\n    }\n\n    #[test]\n    fn tensor_inf_values_are_detected() {\n        let t = Tensor::new(vec![f32::INFINITY, 1.0], &[2]).unwrap();\n        assert!(t.has_inf());\n    }\n}\n\n#[cfg(test)]\nmod modality_translator_tests {\n    use wifi_densepose_nn::translator::ModalityTranslator;\n\n    #[test]\n    fn translator_rejects_wrong_subcarrier_count() {\n        // standard expects 56 subcarriers; feed 57\n        let csi = vec![0.0f32; 57 * 3]; // 57 subcarriers × 3 antennas\n        let translator = ModalityTranslator::default();\n        let result = translator.translate(&csi, 57, 3);\n        assert!(result.is_err());\n    }\n\n    #[test]\n    fn translator_handles_all_zeros() {\n        let csi = vec![0.0f32; 56 * 3];\n        let translator = ModalityTranslator::default();\n        let result = translator.translate(&csi, 56, 3);\n        // zero input should produce some output without panic\n        assert!(result.is_ok());\n    }\n}\n\n#[cfg(test)]\nmod inference_engine_tests {\n    use wifi_densepose_nn::inference::InferenceEngine;\n\n    #[test]\n    fn load_nonexistent_model_returns_error() {\n        let result = InferenceEngine::from_path(\"/nonexistent/model.onnx\");\n        assert!(result.is_err());\n    }\n\n    #[test]\n    fn load_corrupted_bytes_returns_error() {\n        let tmp = tempfile::NamedTempFile::new().unwrap();\n        std::fs::write(tmp.path(), b\"not a valid onnx file\").unwrap();\n        let result = InferenceEngine::from_path(tmp.path());\n        assert!(result.is_err());\n    }\n\n    #[test]\n    fn batch_size_zero_returns_error() {\n        // can't run inference on an empty batch\n        // requires a valid model; skip if no model file in test fixtures\n        // use #[ignore] or a feature flag for CI\n    }\n}"
      },
      {
        "language": "rust",
        "code": "// v2/crates/wifi-densepose-mat/tests/detection_edge_cases.rs\n\n#[cfg(test)]\nmod breathing_rate_edge_cases {\n    use wifi_densepose_mat::detection::breathing::BreathingDetector;\n\n    #[test]\n    fn zero_bpm_is_classified_critical() {\n        let detector = BreathingDetector::default();\n        // flat-line signal — no breathing detected\n        let signal = vec![0.0f32; 1000];\n        let result = detector.classify(&signal).unwrap();\n        assert_eq!(result.triage_category, TriageCategory::Immediate);\n    }\n\n    #[test]\n    fn agonal_breathing_rate_triggers_immediate() {\n        // < 6 BPM is agonal; simulate 3 BPM signal\n        let detector = BreathingDetector::default();\n        let signal = generate_breathing_signal(3.0, 1000, 100.0); // 3 BPM, 1000 samples @ 100 Hz\n        let result = detector.classify(&signal).unwrap();\n        assert_eq!(result.triage_category, TriageCategory::Immediate);\n    }\n\n    #[test]\n    fn normal_breathing_is_classified_minor() {\n        let detector = BreathingDetector::default();\n        let signal = generate_breathing_signal(15.0, 1000, 100.0); // 15 BPM\n        let result = detector.classify(&signal).unwrap();\n        assert_eq!(result.triage_category, TriageCategory::Minor);\n    }\n\n    #[test]\n    fn all_nan_signal_returns_error_not_panic() {\n        let detector = BreathingDetector::default();\n        let signal = vec![f32::NAN; 1000];\n        let result = detector.classify(&signal);\n        assert!(result.is_err(), \"NaN input must be caught, not panic\");\n    }\n\n    fn generate_breathing_signal(bpm: f32, samples: usize, sample_rate: f32) -> Vec<f32> {\n        let freq = bpm / 60.0;\n        (0..samples)\n            .map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / sample_rate).sin())\n            .collect()\n    }\n}\n\n#[cfg(test)]\nmod alert_deduplication {\n    use wifi_densepose_mat::alerting::{AlertDispatcher, Alert, TriageCategory};\n    use std::time::Duration;\n\n    #[test]\n    fn duplicate_alerts_within_window_are_suppressed() {\n        let mut dispatcher = AlertDispatcher::new();\n        let alert = Alert::new(\"survivor-1\", TriageCategory::Immediate);\n        dispatcher.dispatch(alert.clone());\n        dispatcher.dispatch(alert.clone()); // same survivor, same category\n        assert_eq!(dispatcher.queued_count(), 1, \"duplicate must be deduplicated\");\n    }\n\n    #[test]\n    fn escalation_from_minor_to_immediate_is_forwarded() {\n        let mut dispatcher = AlertDispatcher::new();\n        dispatcher.dispatch(Alert::new(\"survivor-1\", TriageCategory::Minor));\n        dispatcher.dispatch(Alert::new(\"survivor-1\", TriageCategory::Immediate));\n        // escalation is not a duplicate — must pass through\n        assert!(dispatcher.last_alert_for(\"survivor-1\").map(|a| a.category) == Some(TriageCategory::Immediate));\n    }\n}\n\n#[cfg(test)]\nmod kalman_tracker_edge_cases {\n    use wifi_densepose_mat::tracking::KalmanTracker;\n\n    #[test]\n    fn position_jump_does_not_corrupt_state() {\n        let mut tracker = KalmanTracker::new();\n        tracker.update([1.0, 1.0, 0.5]);  // initial position\n        tracker.update([50.0, 50.0, 0.5]); // physically impossible jump\n        let pos = tracker.estimated_position();\n        // should not panic; should clamp or flag anomaly\n        assert!(pos.iter().all(|v| v.is_finite()));\n    }\n\n    #[test]\n    fn lost_track_resumes_on_re_detection() {\n        let mut tracker = KalmanTracker::new();\n        tracker.update([1.0, 1.0, 0.5]);\n        // simulate 10 missed frames\n        for _ in 0..10 { tracker.predict(); }\n        assert_eq!(tracker.state(), TrackState::Lost);\n        tracker.update([1.1, 1.1, 0.5]); // re-detected nearby\n        assert_eq!(tracker.state(), TrackState::Confirmed);\n    }\n}"
      },
      {
        "language": "rust",
        "code": "// v2/crates/wifi-densepose-ruvector/tests/viewpoint_tests.rs\n\n#[cfg(test)]\nmod attention_tests {\n    use wifi_densepose_ruvector::viewpoint::attention::CrossViewpointAttention;\n\n    #[test]\n    fn attention_weights_sum_to_one() {\n        let attn = CrossViewpointAttention::new(3); // 3 viewpoints\n        let features = vec![[1.0f32; 64], [2.0f32; 64], [3.0f32; 64]];\n        let weights = attn.compute_weights(&features);\n        let sum: f32 = weights.iter().sum();\n        assert!((sum - 1.0).abs() < 1e-5, \"attention must be a probability distribution\");\n    }\n\n    #[test]\n    fn single_viewpoint_gets_full_weight() {\n        let attn = CrossViewpointAttention::new(1);\n        let features = vec![[1.0f32; 64]];\n        let weights = attn.compute_weights(&features);\n        assert!((weights[0] - 1.0).abs() < 1e-6);\n    }\n\n    #[test]\n    fn zero_feature_vectors_do_not_produce_nan() {\n        let attn = CrossViewpointAttention::new(2);\n        let features = vec![[0.0f32; 64], [0.0f32; 64]];\n        let weights = attn.compute_weights(&features);\n        assert!(weights.iter().all(|w| w.is_finite()));\n    }\n}\n\n#[cfg(test)]\nmod sketch_tests {\n    use wifi_densepose_ruvector::sketch::WireSketch;\n\n    #[test]\n    fn round_trip_serialization() {\n        let sketch = WireSketch::from_keypoints(&[[0.5f32, 0.5], [0.3, 0.7]]);\n        let bytes = sketch.to_bytes();\n        let restored = WireSketch::from_bytes(&bytes).unwrap();\n        assert_eq!(sketch, restored);\n    }\n\n    #[test]\n    fn deserialize_truncated_bytes_returns_error() {\n        let sketch = WireSketch::from_keypoints(&[[0.5f32, 0.5]]);\n        let mut bytes = sketch.to_bytes();\n        bytes.truncate(bytes.len() / 2); // truncate halfway\n        assert!(WireSketch::from_bytes(&bytes).is_err());\n    }\n\n    #[test]\n    fn empty_keypoint_list_is_handled() {\n        let sketch = WireSketch::from_keypoints(&[]);\n        assert_eq!(sketch.keypoint_count(), 0);\n    }\n}"
      },
      {
        "language": "rust",
        "code": "// v2/crates/wifi-densepose-signal/tests/ruvsense_tests.rs\n\n#[cfg(test)]\nmod coherence_gate_tests {\n    use wifi_densepose_signal::ruvsense::coherence_gate::{CoherenceGate, GateDecision};\n\n    #[test]\n    fn high_coherence_signal_is_accepted() {\n        let gate = CoherenceGate::new(0.7); // threshold = 0.7\n        let decision = gate.evaluate(0.95);\n        assert_eq!(decision, GateDecision::Accept);\n    }\n\n    #[test]\n    fn low_coherence_signal_is_rejected() {\n        let gate = CoherenceGate::new(0.7);\n        let decision = gate.evaluate(0.3);\n        assert_eq!(decision, GateDecision::Reject);\n    }\n\n    #[test]\n    fn borderline_coherence_triggers_recalibrate() {\n        let gate = CoherenceGate::new(0.7);\n        let decision = gate.evaluate(0.68); // just below threshold\n        assert_eq!(decision, GateDecision::Recalibrate);\n    }\n}\n\n#[cfg(test)]\nmod phase_align_tests {\n    use wifi_densepose_signal::ruvsense::phase_align::PhaseAligner;\n\n    #[test]\n    fn phase_at_plus_pi_does_not_wrap_incorrectly() {\n        let aligner = PhaseAligner::new();\n        let phases = vec![std::f32::consts::PI - 0.001, std::f32::consts::PI + 0.001];\n        let aligned = aligner.align(&phases);\n        // jump across ±π boundary must be handled continuously\n        let diff = (aligned[1] - aligned[0]).abs();\n        assert!(diff < 0.01, \"phase jump at ±π must be < 0.01 rad after alignment\");\n    }\n\n    #[test]\n    fn single_phase_value_aligns_to_itself() {\n        let aligner = PhaseAligner::new();\n        let phases = vec![1.5f32];\n        let aligned = aligner.align(&phases);\n        assert_eq!(aligned.len(), 1);\n        assert!((aligned[0] - 1.5).abs() < 1e-6);\n    }\n\n    #[test]\n    fn empty_phase_array_returns_empty() {\n        let aligner = PhaseAligner::new();\n        let aligned = aligner.align(&[]);\n        assert!(aligned.is_empty());\n    }\n}\n\n#[cfg(test)]\nmod adversarial_detection_tests {\n    use wifi_densepose_signal::ruvsense::adversarial::AdversarialDetector;\n\n    #[test]\n    fn physically_impossible_amplitude_is_flagged() {\n        let detector = AdversarialDetector::new();\n        // WiFi amplitude cannot exceed hardware saturation level\n        let frame = vec![1e9f32; 56]; // absurdly large\n        assert!(detector.is_suspicious(&frame));\n    }\n\n    #[test]\n    fn normal_amplitude_range_passes() {\n        let detector = AdversarialDetector::new();\n        let frame = vec![0.5f32; 56]; // typical normalized value\n        assert!(!detector.is_suspicious(&frame));\n    }\n\n    #[test]\n    fn multi_link_inconsistency_is_detected() {\n        // link A reports body moving right; link B reports no motion\n        // physically inconsistent — flag as adversarial\n        let detector = AdversarialDetector::new();\n        let result = detector.check_multi_link_consistency(\n            &[1.0, 2.0, 3.0], // link A\n            &[0.0, 0.0, 0.0], // link B (no motion)\n        );\n        assert!(result.is_inconsistent());\n    }\n}"
      },
      {
        "language": "rust",
        "code": "// v2/crates/wifi-densepose-train/tests/test_geometry.rs\n\n#[cfg(test)]\nmod film_layer_tests {\n    use wifi_densepose_train::geometry::FilmLayer;\n\n    #[test]\n    fn film_layer_output_shape_matches_input() {\n        let film = FilmLayer::new(64, 32); // 64-dim features, 32-dim condition\n        let features = vec![0.5f32; 64];\n        let condition = vec![1.0f32; 32];\n        let output = film.forward(&features, &condition).unwrap();\n        assert_eq!(output.len(), 64, \"FiLM output must match feature dimensionality\");\n    }\n\n    #[test]\n    fn film_layer_zero_condition_acts_as_identity() {\n        let film = FilmLayer::new(64, 32);\n        let features = vec![1.0f32; 64];\n        let zero_condition = vec![0.0f32; 32];\n        let output = film.forward(&features, &zero_condition).unwrap();\n        // scale=1, shift=0 → identity; output ≈ input\n        for (o, f) in output.iter().zip(features.iter()) {\n            assert!((o - f).abs() < 0.1, \"zero condition should approximate identity\");\n        }\n    }\n}\n\n// v2/crates/wifi-densepose-train/tests/test_rapid_adapt.rs\n\n#[cfg(test)]\nmod rapid_adaptation_tests {\n    use wifi_densepose_train::rapid_adapt::RapidAdapter;\n\n    #[test]\n    fn adapter_updates_on_single_sample() {\n        let mut adapter = RapidAdapter::new(5); // 5 adaptation steps\n        let csi_sample = vec![0.1f32; 56 * 3];\n        let pose_label = vec![0.5f32; 17 * 2]; // 17 keypoints × (x, y)\n        let result = adapter.adapt_step(&csi_sample, &pose_label);\n        assert!(result.is_ok());\n    }\n\n    #[test]\n    fn adapter_with_zero_steps_is_no_op() {\n        let adapter = RapidAdapter::new(0);\n        // 0 adaptation steps → weights unchanged\n        let initial_weights = adapter.clone_weights();\n        let _ = adapter.adapt_step(&vec![0.1f32; 168], &vec![0.5f32; 34]);\n        assert_eq!(adapter.clone_weights(), initial_weights);\n    }\n}"
      },
      {
        "language": "rust",
        "code": "// v2/crates/wifi-densepose-sensing-server/tests/auth_tests.rs\n\n#[cfg(test)]\nmod bearer_auth_tests {\n    use wifi_densepose_sensing_server::auth::{BearerValidator, TokenError};\n\n    #[test]\n    fn missing_authorization_header_returns_unauthorized() {\n        let validator = BearerValidator::new(\"secret-token\");\n        let result = validator.validate(None);\n        assert!(matches!(result, Err(TokenError::Missing)));\n    }\n\n    #[test]\n    fn wrong_token_is_rejected() {\n        let validator = BearerValidator::new(\"correct-token\");\n        let result = validator.validate(Some(\"Bearer wrong-token\"));\n        assert!(matches!(result, Err(TokenError::Invalid)));\n    }\n\n    #[test]\n    fn malformed_header_without_bearer_prefix_is_rejected() {\n        let validator = BearerValidator::new(\"token\");\n        let result = validator.validate(Some(\"token\")); // missing \"Bearer \" prefix\n        assert!(matches!(result, Err(TokenError::Malformed)));\n    }\n\n    #[test]\n    fn correct_token_is_accepted() {\n        let validator = BearerValidator::new(\"correct-token\");\n        let result = validator.validate(Some(\"Bearer correct-token\"));\n        assert!(result.is_ok());\n    }\n}\n\n// v2/crates/wifi-densepose-sensing-server/tests/semantic_tests.rs\n\n#[cfg(test)]\nmod fall_detection_tests {\n    use wifi_densepose_sensing_server::semantic::fall_detector::FallDetector;\n\n    #[test]\n    fn no_motion_does_not_trigger_fall() {\n        let mut detector = FallDetector::new();\n        for _ in 0..30 { // 30 frames of stillness\n            detector.update_pose(stationary_pose());\n        }\n        assert!(!detector.fall_detected());\n    }\n\n    #[test]\n    fn rapid_downward_velocity_triggers_fall() {\n        let mut detector = FallDetector::new();\n        // simulate person going from standing (y=1.7m) to prone (y=0.3m) in 3 frames\n        for (frame, y) in [(0, 1.7f32), (1, 1.0), (2, 0.3)] {\n            detector.update_pose(pose_at_height(y));\n        }\n        assert!(detector.fall_detected());\n    }\n\n    #[test]\n    fn sitting_down_slowly_does_not_trigger_fall() {\n        let mut detector = FallDetector::new();\n        // gradual height decrease over 30 frames is sitting, not falling\n        for i in 0..30 {\n            let y = 1.7f32 - (i as f32 * 0.04); // ~1.2m drop over 30 frames\n            detector.update_pose(pose_at_height(y));\n        }\n        assert!(!detector.fall_detected());\n    }\n}"
      }
    ]
  },
  "rawOutputPreview": "The exploration is complete. Here's the full coverage analysis with test skeletons for each critical gap.\n\n---\n\n## Test Coverage Gap Analysis — wifi-densepose\n\n### Coverage Summary by Crate\n\n| Crate | Tests Found | Status | Priority |\n|-------|-------------|--------|----------|\n| `wifi-densepose-core` | 26 inline | Good | Low |\n| `wifi-densepose-signal` | ~60 (validation only) | Moderate | **High** |\n| `wifi-densepose-nn` | **0** | Critical | **P1** |\n| `wifi-densepose-train` | ~60 (config/dataset) | Moderate | High |\n| `wifi-densepose-mat` | 1 integration test | Critical | **P1** |\n| `wifi-densepose-ruvector` | **0** | Critical | **P1** |\n| `wifi-densepose-sensing-server` | 4 integration tests | Moderate | High |\n| `wifi-densepose-wasm` | 3 compliance tests | Low | Low |\n\n---\n\n## Tier 1: Critical Gaps\n\n### 1. `wifi-densepose-nn` — Zero test coverage\n\nEvery public API is untested. Place these at `v2/crates/wifi-densepose-nn/tests/inference_tests.rs`:\n\n```rust\n// v2/crates/wifi-densepose-nn/tests/inference_tests.rs\n\n#[cfg(test)]\nmod tensor_tests {\n    use wifi_densepose_nn::tensor::Tensor;\n\n    #[test]\n    fn tensor_shape_mismatch_returns_error() {\n        // data has 6 elements but shape claims 3×3=9\n        let result = Tensor::new(vec![1.0f32; 6], &[3, 3]);\n        assert!(result.is_err(), \"shape mismatch must be rejected\");\n    }\n\n    #[test]\n    fn tensor_empty_data_returns_error() {\n        let result = Tensor::new(vec![], &[0]);\n        assert!(result.is_err());\n    }\n\n    #[test]\n    fn tensor_nan_values_are_detected() {\n        let t = Tensor::new(vec![f32::NAN, 1.0, 2.0], &[3]).unwrap();\n        assert!(t.has_nan(), \"NaN in data must be detectable\");\n    }\n\n    #[test]\n    fn tensor_inf_values_are_detected() {\n        let t = Tensor::new(vec![f32::INFINITY, 1.0], &[2]).unwrap();\n        assert!(t.has_inf());\n    }\n}\n\n#[cfg(test)]\nmod modality_translator_tests {\n    use wifi_densepose_nn::translator::ModalityTranslator;\n\n    #[test]\n    fn translator_rejects",
  "rawOutputLength": 18269
 }
@@ -1,15 +0,0 @@
 {
  "name": "ruview",
  "description": "RuView Marketplace: Claude Code + Codex plugins for WiFi sensing — configuration, applications, model training, and onboarding, from practical to advanced",
  "owner": {
    "name": "ruvnet",
    "url": "https://github.com/ruvnet/RuView"
  },
  "plugins": [
    {
      "name": "ruview",
      "source": "./plugins/ruview",
      "description": "End-to-end RuView toolkit: getting started, ESP32 hardware setup, configuration, sensing applications (presence / vitals / pose / sleep / MAT), camera-free + camera-supervised model training, advanced multistatic sensing, CLI / API / WASM, mmWave radar, and witness verification"
    }
  ]
 }
@@ -1 +0,0 @@
 {"sessionId":"d80c93c2-51b7-42e8-a0fc-dc47cff1200f","pid":45748,"acquiredAt":1779668018388}
@@ -126,7 +126,10 @@
      "Bash(node .claude/*)",
      "mcp__claude-flow__:*"
    ],
-    "deny": []
+    "deny": [
      "Read(./.env)",
      "Read(./.env.*)"
    ]
  },
  "attribution": {
    "commit": "Co-Authored-By: claude-flow <ruv@ruv.net>",
@@ -1,96 +0,0 @@
 name: AetherArena harness gate (ADR-149)
 # Runs the AetherArena scoring harness as a PR build gate. Every PR that touches
 # the scorer, the metrics, or the benchmark scaffold must keep the deterministic
 # score hash stable (ADR-149 §2.5 determinism_gate). If the scoring maths changes,
 # the hash moves and this gate fails until `expected_score.sha256` is regenerated
 # and reviewed — so scorer drift can never land silently.
 #
 # This is the "a PR that runs the harness as part of the build process" requirement.
 on:
  pull_request:
    paths:
      - 'v2/crates/wifi-densepose-train/src/ruview_metrics.rs'
      - 'v2/crates/wifi-densepose-train/src/ablation.rs'
      - 'v2/crates/wifi-densepose-train/src/bin/aa_score_runner.rs'
      - 'aether-arena/**'
      - '.github/workflows/aether-arena-harness.yml'
  push:
    branches: ['feat/adr-149-aether-arena']
  workflow_dispatch:
 permissions:
  contents: read
  pull-requests: write
 jobs:
  harness-gate:
    name: Run AA scorer harness (determinism gate)
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: v2
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Install Rust toolchain
        run: rustup show && rustc --version
      - name: Cache cargo
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            v2/target
          key: aa-harness-${{ runner.os }}-${{ hashFiles('v2/Cargo.lock') }}
      # 1. Build the pure-Rust scorer (no torch / no GPU → fast PR gate).
      - name: Build AA score runner
        run: cargo build -p wifi-densepose-train --bin aa_score_runner --no-default-features
      # 2. Determinism gate: the committed expected hash must still match. A
      #    non-zero exit here fails the PR.
      - name: Run determinism gate
        run: cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features
      # 3. Repeatability analysis (witness chain): the harness must produce one
      #    identical proof hash across many runs — any nondeterminism fails here.
      - name: Repeatability analysis (16 runs)
        run: cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16
      # 4. Real-scoring smoke: score a sample prediction against the public smoke
      #    split, exercising the actual model-scoring path (not just the fixture).
      - name: Real-scoring smoke test
        run: |
          cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- \
            --split ../aether-arena/fixtures/smoke_split.json \
            --pred  ../aether-arena/fixtures/smoke_pred.json --json
      # 5. Witness ledger chain integrity: the append-only results ledger must
      #    verify (every prev_hash link + row_hash intact = no silent edits).
      - name: Verify witness ledger chain
        working-directory: aether-arena/ledger
        run: python3 ledger_tools.py verify
      # 6. Emit the witness row + repeatability into the PR run summary.
      - name: Witness row → job summary
        if: always()
        run: |
          ROW=$(cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --json)
          REP=$(cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16)
          {
            echo "## AetherArena harness gate (witness chain)"
            echo ""
            echo "Deterministic witness (ADR-149 §2.2 / proof + repeatability):"
            echo '```json'
            echo "$ROW"
            echo "$REP"
            echo '```'
            echo ""
            echo "If the determinism gate failed, the scoring maths changed: regenerate with"
            echo '`cargo run -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --generate-hash > aether-arena/fixtures/expected_score.sha256` and review the diff.'
          } >> "$GITHUB_STEP_SUMMARY"
@@ -1,199 +0,0 @@
 name: Bench Regression Guard
 # Sub-deliverable 8.3 of the benchmark/optimization milestone.
 #
 # HONEST SCOPE (read this before assuming this gates on timing):
 #   * The `bench-compile` job is a REAL, HARD-FAILING regression gate. It runs
 #     `cargo bench --no-default-features --no-run`, which type-checks and links
 #     EVERY criterion bench in the v2/ workspace without running a single
 #     measurement. Benches are not part of `cargo test`, so they silently
 #     bit-rot when a public API they call changes — this job catches that the
 #     moment it happens. This is the part of this workflow that can fail a PR.
 #
 #   * The `bench-fast-run` job runs a small, curated subset of pure-CPU benches
 #     in criterion "quick mode" (short warm-up / measurement / 10 samples) and
 #     is INFORMATIONAL ONLY (`continue-on-error: true`). It does NOT gate on
 #     timing. Wall-clock timings on shared GitHub-hosted runners vary by
 #     2-3x run-to-run (noisy neighbours, CPU throttling, no pinned frequency),
 #     so a hard ">X ms" threshold here would flake constantly and teach
 #     everyone to ignore it. We deliberately do not pretend to do timing
 #     regression-gating we cannot deliver reliably. The numbers are surfaced in
 #     the job log + uploaded as an artifact for humans to eyeball trends.
 #
 # WHY NO criterion --baseline COMPARE GATE:
 #   criterion's `--save-baseline` / `--baseline` compare is the textbook
 #   regression mechanism, but it only produces a trustworthy verdict when the
 #   baseline and the candidate were measured on the SAME hardware under the SAME
 #   conditions. GitHub-hosted runners give neither (the baseline commit and the
 #   PR commit land on different physical machines). Committing a baseline JSON
 #   measured on one runner and comparing a different runner against it would
 #   manufacture false regressions. If/when these benches run on a dedicated,
 #   frequency-pinned self-hosted runner, a `--baseline` compare with a generous
 #   (>2x) noise floor becomes honest and can be added then. Until then,
 #   compile-verify + informational-run is the honest gate.
 on:
  push:
    branches: [ main, develop, 'feat/*' ]
    paths:
      - 'v2/crates/**/benches/**'
      - 'v2/crates/**/Cargo.toml'
      - 'v2/crates/**/src/**'
      - 'v2/Cargo.toml'
      - 'v2/Cargo.lock'
      - '.github/workflows/bench-regression.yml'
  pull_request:
    paths:
      - 'v2/crates/**/benches/**'
      - 'v2/crates/**/Cargo.toml'
      - 'v2/crates/**/src/**'
      - 'v2/Cargo.toml'
      - 'v2/Cargo.lock'
      - '.github/workflows/bench-regression.yml'
  workflow_dispatch:
 permissions:
  contents: read
 env:
  CARGO_TERM_COLOR: always
  # Debuginfo is useless in CI and the 38-crate workspace target dir otherwise
  # exhausts the runner disk (mirrors ci.yml's rust-tests job). The bench
  # profile inherits release + debug = true (v2/Cargo.toml [profile.bench]);
  # force it off so the link step does not run out of space.
  CARGO_PROFILE_BENCH_DEBUG: "0"
  CARGO_PROFILE_RELEASE_DEBUG: "0"
 jobs:
  # ── HARD GATE: every bench must still compile + link ─────────────────────
  bench-compile:
    name: bench compile-verify (--no-run)
    runs-on: ubuntu-latest
    steps:
      - name: Checkout (recursive — wifi-densepose-rufield path-deps vendor/rufield)
        uses: actions/checkout@v4
        with:
          # The workspace includes `wifi-densepose-rufield`, which path-deps the
          # `vendor/rufield` submodule crates. Without a recursive checkout the
          # whole workspace fails to resolve before any bench is built.
          submodules: recursive
      # The workspace pulls in `wifi-densepose-desktop` (Tauri v2) whose -sys
      # crates need the GTK/WebKit/serial dev libraries via pkg-config, exactly
      # as ci.yml's rust-tests job documents. A `--workspace` bench build links
      # the whole graph, so these are required here too.
      - name: Install Tauri / GTK / serial system dev libraries
        run: |
          sudo apt-get update
          sudo apt-get install -y --no-install-recommends \
            libglib2.0-dev \
            libgtk-3-dev \
            libsoup-3.0-dev \
            libjavascriptcoregtk-4.1-dev \
            libwebkit2gtk-4.1-dev \
            libayatana-appindicator3-dev \
            librsvg2-dev \
            libxdo-dev \
            libudev-dev \
            libdbus-1-dev \
            libssl-dev \
            pkg-config
      - name: Install Rust toolchain
        uses: dtolnay/rust-toolchain@stable
      - name: Cache cargo (Swatinem/rust-cache)
        uses: Swatinem/rust-cache@v2
        with:
          workspaces: v2
          # Distinct cache scope from ci.yml's rust-tests so the bench profile
          # artifacts (release+opt) do not evict the test profile cache.
          key: bench-regression
      # The core regression guard. `--no-run` compiles + links every bench
      # target in the workspace's DEFAULT feature set but runs no measurement,
      # so it is deterministic and fast-ish (build only). A bench that no longer
      # compiles — because a type/signature it calls changed and nobody updated
      # the bench — fails the build here. `--no-default-features` is the
      # workspace's standard gate flag (openblas/tch/ort/onnx stay opt-out).
      - name: Compile all workspace benches (default features)
        working-directory: v2
        run: cargo bench --workspace --no-default-features --no-run
      # Feature-gated benches are skipped by the default build above because
      # their `[[bench]]` entries carry `required-features`. Compile the ones we
      # can guard so they are also covered against bit-rot.
      #   * cir → wifi-densepose-signal/benches/cir_bench.rs (ADR-134). The
      #     `cir` feature is pure-Rust (`cir = []`), so it builds on the stock
      #     runner and is a real, hard-failing guard like the step above.
      #
      # NOT guarded here (honest scope):
      #   * crv → wifi-densepose-ruvector/benches/crv_bench.rs. The `crv` feature
      #     pulls the crates.io dependency `ruvector-crv 0.1.1`, which currently
      #     FAILS to compile on stable (E0308 type mismatch in its own
      #     `stage_iii.rs` — an UPSTREAM bug, unrelated to bench bit-rot).
      #     Adding a hard `--features crv` compile step would make this workflow
      #     red for a reason this gate is not meant to police. Re-add this step
      #     once `ruvector-crv` ships a fixed release. (mqtt/onnx benches are
      #     likewise left to their own crate workflows.)
      - name: Compile feature-gated benches (cir)
        working-directory: v2
        run: cargo bench -p wifi-densepose-signal --no-default-features --features cir --bench cir_bench --no-run
  # ── INFORMATIONAL: run a curated fast subset (never gates) ───────────────
  bench-fast-run:
    name: bench fast-run (informational, non-gating)
    runs-on: ubuntu-latest
    # NEVER fail the workflow on this job — timings are noise-prone on shared
    # runners (see header). It exists to surface trends for humans, not to gate.
    continue-on-error: true
    needs: [bench-compile]
    steps:
      - name: Checkout (recursive)
        uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Install Rust toolchain
        uses: dtolnay/rust-toolchain@stable
      - name: Cache cargo (Swatinem/rust-cache)
        uses: Swatinem/rust-cache@v2
        with:
          workspaces: v2
          key: bench-regression
      # Curated subset = pure-CPU, fast, dependency-light criterion benches that
      # finish in seconds under quick-mode flags. Each is targeted by `--bench`
      # (NOT a bare `cargo bench -p`) because the crates' lib targets use the
      # libtest harness, which rejects criterion's CLI flags (--warm-up-time
      # etc.) and aborts the run. Quick-mode: 1s warm-up, 2s measure, 10 samples.
      - name: nvsim pipeline_throughput (quick)
        working-directory: v2
        run: |
          mkdir -p ../bench-out
          cargo bench -p nvsim --no-default-features --bench pipeline_throughput -- \
            --warm-up-time 1 --measurement-time 2 --sample-size 10 \
            | tee ../bench-out/nvsim_pipeline_throughput.txt
      - name: ruvector sketch_bench (quick)
        working-directory: v2
        run: |
          cargo bench -p wifi-densepose-ruvector --no-default-features --bench sketch_bench -- \
            --warm-up-time 1 --measurement-time 2 --sample-size 10 \
            | tee ../bench-out/ruvector_sketch_bench.txt
      - name: ruvector fusion_bench (quick)
        working-directory: v2
        run: |
          cargo bench -p wifi-densepose-ruvector --no-default-features --bench fusion_bench -- \
            --warm-up-time 1 --measurement-time 2 --sample-size 10 \
            | tee ../bench-out/ruvector_fusion_bench.txt
      - name: Upload informational bench logs
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: bench-fast-run-logs
          path: bench-out/
          if-no-files-found: warn
@@ -1,101 +0,0 @@
 name: BFLD MQTT Integration
 # Runs the env-gated mosquitto integration tests from iters 24 + 29 of the
 # BFLD rollout (ADR-118 / ADR-122 §2.2). Spins up an eclipse-mosquitto:2
 # service container, exports BFLD_MQTT_BROKER, runs `cargo test --features
 # mqtt`. Local developers can reproduce with:
 #
 #     scoop install mosquitto   # Windows
 #     # or: docker run -p 1883:1883 eclipse-mosquitto:2
 #     BFLD_MQTT_BROKER=tcp://localhost:1883 \
 #       cargo test -p wifi-densepose-bfld --features mqtt
 on:
  push:
    branches:
      - main
      - 'feat/adr-118-*'
      - 'feat/bfld-*'
    paths:
      - 'v2/crates/wifi-densepose-bfld/**'
      - '.github/workflows/bfld-mqtt-integration.yml'
  pull_request:
    paths:
      - 'v2/crates/wifi-densepose-bfld/**'
      - '.github/workflows/bfld-mqtt-integration.yml'
  workflow_dispatch:
 jobs:
  mqtt-live-broker:
    name: cargo test --features mqtt (live mosquitto)
    runs-on: ubuntu-latest
    timeout-minutes: 15
    services:
      mosquitto:
        image: eclipse-mosquitto:2
        ports:
          - 1883:1883
        # Allow anonymous connections — local-only CI broker, no exposure
        # to the public internet, never touches production credentials.
        options: >-
          --health-cmd "mosquitto_pub -h localhost -t healthcheck -m ping || exit 1"
          --health-interval 5s
          --health-timeout 3s
          --health-retries 10
    env:
      BFLD_MQTT_BROKER: tcp://localhost:1883
      CARGO_TERM_COLOR: always
      CARGO_INCREMENTAL: 0
      RUSTFLAGS: -D warnings
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Install Rust toolchain
        uses: dtolnay/rust-toolchain@stable
        with:
          components: clippy
      - name: Cache cargo registry + target
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            v2/target
          key: bfld-mqtt-${{ runner.os }}-${{ hashFiles('v2/Cargo.lock') }}
      - name: Wait for mosquitto to be ready
        run: |
          for i in {1..20}; do
            if nc -z localhost 1883; then
              echo "mosquitto reachable on port 1883 (attempt $i)"
              exit 0
            fi
            echo "waiting for mosquitto ($i/20)..."
            sleep 1
          done
          echo "mosquitto never became reachable" >&2
          exit 1
      - name: cargo test --no-default-features (baseline regression)
        working-directory: v2
        run: cargo test -p wifi-densepose-bfld --no-default-features
      - name: cargo test (default features)
        working-directory: v2
        run: cargo test -p wifi-densepose-bfld
      - name: cargo test --features mqtt (incl. live mosquitto roundtrip)
        working-directory: v2
        run: cargo test -p wifi-densepose-bfld --features mqtt
      - name: cargo clippy --features mqtt (lint gate)
        working-directory: v2
        run: cargo clippy -p wifi-densepose-bfld --features mqtt --all-targets -- -D warnings
        continue-on-error: true
@@ -42,8 +42,6 @@ jobs:
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Determine deployment environment
      id: determine-env
@@ -88,8 +86,6 @@ jobs:
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Set up kubectl
      uses: azure/setup-kubectl@v3
@@ -136,8 +132,6 @@ jobs:
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Set up kubectl
      uses: azure/setup-kubectl@v3
@@ -15,51 +15,38 @@ env:
 jobs:
  # Code Quality and Security Checks
  # The Python codebase moved to `archive/v1/` when the runtime was rewritten in
  # Rust under `v2/`. The lint/format/type/scan checks below still run against
  # the archive for hygiene, but with `continue-on-error: true` everywhere — the
  # archive is frozen reference code, not active development, so a stale lint
  # rule shouldn't gate PRs to the Rust workspace.
  code-quality:
    name: Code Quality & Security
    runs-on: ubuntu-latest
    continue-on-error: true
    steps:
    - name: Checkout code
      continue-on-error: true
      uses: actions/checkout@v4
      with:
        submodules: recursive
        fetch-depth: 0
    - name: Set up Python
-      continue-on-error: true
+      uses: actions/setup-python@v5
      uses: actions/setup-python@v6
      with:
        python-version: ${{ env.PYTHON_VERSION }}
        cache: 'pip'
    - name: Install dependencies
      continue-on-error: true
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
        pip install black flake8 mypy bandit safety
    - name: Code formatting check (Black)
-      continue-on-error: true
+      run: black --check --diff src/ tests/
      run: black --check --diff archive/v1/src archive/v1/tests
    - name: Linting (Flake8)
-      continue-on-error: true
+      run: flake8 src/ tests/ --max-line-length=88 --extend-ignore=E203,W503
      run: flake8 archive/v1/src archive/v1/tests --max-line-length=88 --extend-ignore=E203,W503
    - name: Type checking (MyPy)
-      continue-on-error: true
+      run: mypy src/ --ignore-missing-imports
      run: mypy archive/v1/src --ignore-missing-imports
    - name: Security scan (Bandit)
-      run: bandit -r archive/v1/src -f json -o bandit-report.json
+      run: bandit -r src/ -f json -o bandit-report.json
      continue-on-error: true
    - name: Dependency vulnerability scan (Safety)
@@ -67,7 +54,6 @@ jobs:
      continue-on-error: true
    - name: Upload security reports
      continue-on-error: true
      uses: actions/upload-artifact@v4
      if: always()
      with:
@@ -83,103 +69,30 @@ jobs:
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      with:
        submodules: recursive
      # ADR-262 P1: `wifi-densepose-rufield` path-deps the `vendor/rufield`
      # submodule. Without a recursive checkout the workspace build fails to
      # resolve those path deps in CI even though it passes locally.
      with:
        submodules: recursive
    # `wifi-densepose-desktop` is a Tauri v2 app — `glib-sys`, `gtk-sys`,
    # `webkit2gtk-sys`, etc. need the Linux dev libraries via pkg-config or the
    # workspace test fails at the build step before any test runs (every recent
    # main CI run has been red on this for exactly this reason). Install the
    # standard Tauri-on-Ubuntu set.
    - name: Install Tauri / GTK / serial system dev libraries
      run: |
        sudo apt-get update
        sudo apt-get install -y --no-install-recommends \
          libglib2.0-dev \
          libgtk-3-dev \
          libsoup-3.0-dev \
          libjavascriptcoregtk-4.1-dev \
          libwebkit2gtk-4.1-dev \
          libayatana-appindicator3-dev \
          librsvg2-dev \
          libxdo-dev \
          libudev-dev \
          libdbus-1-dev \
          libssl-dev \
          pkg-config
    - name: Install Rust toolchain
      uses: dtolnay/rust-toolchain@stable
-    # Swatinem/rust-cache replaces a naive `actions/cache` of the whole
+    - name: Cache cargo
-    # `v2/target`. That manual cache of a 38-crate target dir (multi-GB) was an
+      uses: actions/cache@v4
    # intermittent failure source — several CI runs this cycle died at the
    # cache/setup step (after toolchain install, before "Run Rust tests"),
    # needing a rerun. rust-cache is purpose-built for Rust: it caches the
    # registry + git + a pruned target, evicts stale deps, and restores far more
    # reliably (and faster) on large workspaces. `workspaces: v2` points it at
    # the v2/ cargo workspace (keys on v2/Cargo.lock, caches v2/target).
    - name: Cache cargo (Swatinem/rust-cache)
      uses: Swatinem/rust-cache@v2
      with:
-        workspaces: v2
+        path: |
          ~/.cargo/registry
          ~/.cargo/git
          v2/target
        key: ${{ runner.os }}-cargo-${{ hashFiles('v2/Cargo.lock') }}
        restore-keys: |
          ${{ runner.os }}-cargo-
    # The 38-crate workspace debug build exhausts the runner's disk when built
    # with full debuginfo (observed: "final link failed: No space left on
    # device" once the engine/benchmark crates landed; the same tree's local
    # debug target measured 151 GB). Debuginfo is useless in CI — tests either
    # pass or print their failure — so build without it; target shrinks ~5-10x.
    - name: Run Rust tests
      working-directory: v2
      env:
        CARGO_PROFILE_DEV_DEBUG: "0"
        CARGO_PROFILE_TEST_DEBUG: "0"
      run: cargo test --workspace --no-default-features
    - name: Run ADR-147 worldmodel tests
      working-directory: v2
      env:
        CARGO_PROFILE_DEV_DEBUG: "0"
        CARGO_PROFILE_TEST_DEBUG: "0"
      run: cargo test -p wifi-densepose-worldmodel --no-default-features
    # ADR-134 CIR tests are behind the `cir` feature so the bench dependency
    # (Criterion) only pulls when actually exercised. Run them as a separate
    # step so a CIR-only regression is unambiguously attributable.
    - name: Run ADR-134 CIR tests
      working-directory: v2
      run: cargo test -p wifi-densepose-signal --no-default-features --features cir --tests
    # ADR-134 + ADR-028 witness guard. The CIR proof runner produces a
    # bit-deterministic SHA-256 over CirEstimator output on the synthetic
    # reference signal. Any algorithmic regression — changes to ISTA
    # convergence, sensing matrix construction, soft-thresholding, or input
    # padding — breaks the hash and fails the build. To regenerate after an
    # *intentional* change:
    #   cd v2 && cargo run -p wifi-densepose-signal --bin cir_proof_runner \
    #     --release --no-default-features -- --generate-hash \
    #     > ../archive/v1/data/proof/expected_cir_features.sha256
    - name: ADR-134 CIR witness proof (determinism guard)
      run: bash scripts/verify-cir-proof.sh
    - name: ADR-135 calibration witness proof (determinism guard)
      run: bash scripts/verify-calibration-proof.sh
  # Unit and Integration Tests
  # Python pytest matrix — runs against the archived v1 Python tree.
  # `continue-on-error: true` for the same reason as code-quality above:
  # the archive is frozen reference, not blocking the Rust workspace PRs.
  test:
    name: Tests
    runs-on: ubuntu-latest
    continue-on-error: true
    strategy:
      fail-fast: false
      matrix:
        python-version: ['3.10', '3.11', '3.12']
    services:
@@ -208,53 +121,44 @@ jobs:
    steps:
    - name: Checkout code
      continue-on-error: true
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Set up Python ${{ matrix.python-version }}
-      continue-on-error: true
+      uses: actions/setup-python@v5
      uses: actions/setup-python@v6
      with:
        python-version: ${{ matrix.python-version }}
        cache: 'pip'
    - name: Install dependencies
      continue-on-error: true
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
        pip install pytest-cov pytest-xdist
    - name: Run unit tests
      continue-on-error: true
      env:
        DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_wifi_densepose
        REDIS_URL: redis://localhost:6379/0
        ENVIRONMENT: test
      run: |
-        pytest archive/v1/tests/unit/ -v --cov=archive/v1/src --cov-report=xml --cov-report=html --junitxml=junit.xml
+        pytest tests/unit/ -v --cov=src --cov-report=xml --cov-report=html --junitxml=junit.xml
    - name: Run integration tests
      continue-on-error: true
      env:
        DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_wifi_densepose
        REDIS_URL: redis://localhost:6379/0
        ENVIRONMENT: test
      run: |
-        pytest archive/v1/tests/integration/ -v --junitxml=integration-junit.xml
+        pytest tests/integration/ -v --junitxml=integration-junit.xml
    - name: Upload coverage reports
-      continue-on-error: true
+      uses: codecov/codecov-action@v4
      uses: codecov/codecov-action@v6
      with:
        file: ./coverage.xml
        flags: unittests
        name: codecov-umbrella
    - name: Upload test results
      continue-on-error: true
      uses: actions/upload-artifact@v4
      if: always()
      with:
@@ -265,23 +169,17 @@ jobs:
          htmlcov/
  # Performance and Load Tests
  # NOTE: tests/performance/locustfile.py and the src.api.main app path both
  # predate the v1→archive/v1 reorganisation. continue-on-error: true until a
  # proper locust suite is added under archive/v1/tests/performance/.
  performance-test:
    name: Performance Tests
    runs-on: ubuntu-latest
    needs: [test]
    continue-on-error: true
    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Set up Python
-      uses: actions/setup-python@v6
+      uses: actions/setup-python@v5
      with:
        python-version: ${{ env.PYTHON_VERSION }}
        cache: 'pip'
@@ -290,72 +188,36 @@ jobs:
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
-        pip install pytest   # the perf suite is pytest, not locust
+        pip install locust
-    # No "Start application" step: the gated test (test_frame_budget.py) drives
+    - name: Start application
-    # the CSIProcessor pipeline in-process and makes no HTTP calls, so the old
+      run: |
-    # uvicorn server + `sleep 10` were dead weight — they only existed for the
+        uvicorn src.api.main:app --host 0.0.0.0 --port 8000 &
-    # now-excluded api_throughput/inference_speed tests, and on every run dumped
+        sleep 10
    # ~50 misleading "router requires hardware setup" ERROR lines for a server
    # no test touched. MOCK_POSE_DATA is server-only and unused here.
    - name: Run performance tests
      working-directory: archive/v1
      run: |
-        # Gate only on the genuine, deterministic perf guard:
+        locust -f tests/performance/locustfile.py --headless --users 50 --spawn-rate 5 --run-time 60s --host http://localhost:8000
        # test_frame_budget.py times the *real* CSIProcessor pipeline against
        # the ADR 50 ms per-frame budget (single-frame, p95 over 100 frames,
        # +Doppler) — a true regression signal.
        #
        # test_api_throughput.py / test_inference_speed.py are excluded: every
        # test there is a TDD red-phase stub (suffix `_should_fail_initially`)
        # that times a *mock that sleeps* — meaningless as a perf signal, with
        # machine-dependent wall-clock asserts (e.g. `actual_rps >= 40`,
        # `batch_time < individual_time`) that are inherently flaky on shared
        # CI runners, plus a cross-class fixture-scope bug. Forcing them green
        # would be manufacturing a false signal; they stay in-repo for local
        # TDD but do not gate CI until the underlying features are implemented.
        #
        # `python -m pytest` (not the bare `pytest` script) puts the cwd
        # (archive/v1) on sys.path so `from src.core...` resolves — the bare
        # script omits cwd and raises ModuleNotFoundError: No module named 'src'.
        # -o addopts="" drops the root pyproject's --cov/--cov-fail-under=100.
        python -m pytest tests/performance/test_frame_budget.py \
          -o addopts="" -v --junitxml=perf-junit.xml
    - name: Upload performance results
      if: always()
      uses: actions/upload-artifact@v4
      with:
        name: performance-results
-        path: archive/v1/perf-junit.xml
+        path: locust_report.html
  # Docker Build and Test
  # NOTE: the canonical Docker build for the sensing-server is now
  # `.github/workflows/sensing-server-docker.yml` (multi-registry push, asset
  # smoke tests, bearer-auth smoke tests — #520/#514/#443). This job predates
  # that workflow, points at a non-existent root `Dockerfile` with a
  # non-existent `target: production`, and pushes to a mis-cased image name —
  # `continue-on-error: true` until it's deleted or rewired to call the new
  # workflow, so it doesn't gate the rest of the pipeline.
  docker-build:
    name: Docker Build & Test
    runs-on: ubuntu-latest
    needs: [code-quality, test, rust-tests]
    continue-on-error: true
    steps:
    - name: Checkout code
      continue-on-error: true
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Set up Docker Buildx
      continue-on-error: true
      uses: docker/setup-buildx-action@v3
    - name: Log in to Container Registry
      continue-on-error: true
      uses: docker/login-action@v3
      with:
        registry: ${{ env.REGISTRY }}
@@ -363,9 +225,8 @@ jobs:
        password: ${{ secrets.GITHUB_TOKEN }}
    - name: Extract metadata
      continue-on-error: true
      id: meta
-      uses: docker/metadata-action@v6
+      uses: docker/metadata-action@v5
      with:
        images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
        tags: |
@@ -375,8 +236,7 @@ jobs:
          type=raw,value=latest,enable={{is_default_branch}}
    - name: Build and push Docker image
-      continue-on-error: true
+      uses: docker/build-push-action@v5
      uses: docker/build-push-action@v7
      with:
        context: .
        target: production
@@ -388,7 +248,6 @@ jobs:
        platforms: linux/amd64,linux/arm64
    - name: Test Docker image
      continue-on-error: true
      run: |
        docker run --rm -d --name test-container -p 8000:8000 ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
        sleep 10
@@ -396,7 +255,6 @@ jobs:
        docker stop test-container
    - name: Run container security scan
      continue-on-error: true
      uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
      with:
        image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
@@ -404,7 +262,6 @@ jobs:
        output: 'trivy-results.sarif'
    - name: Upload Trivy scan results
      continue-on-error: true
      uses: github/codeql-action/upload-sarif@v3
      if: always()
      with:
@@ -416,16 +273,12 @@ jobs:
    runs-on: ubuntu-latest
    needs: [docker-build]
    if: github.ref == 'refs/heads/main'
    permissions:
      contents: write   # gh-pages deploy needs write (GITHUB_TOKEN is read-only by default -> 403)
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Set up Python
-      uses: actions/setup-python@v6
+      uses: actions/setup-python@v5
      with:
        python-version: ${{ env.PYTHON_VERSION }}
        cache: 'pip'
@@ -436,9 +289,6 @@ jobs:
        pip install -r requirements.txt
    - name: Generate OpenAPI spec
      working-directory: archive/v1
      env:
        MOCK_POSE_DATA: "true"   # no CSI hardware in CI
      run: |
        python -c "
        from src.api.main import app
@@ -449,7 +299,6 @@ jobs:
    - name: Deploy to GitHub Pages
      uses: peaceiris/actions-gh-pages@v4
      continue-on-error: true   # openapi generation above is the real validation; deploy is best-effort (Pages may be disabled)
      with:
        github_token: ${{ secrets.GITHUB_TOKEN }}
        publish_dir: ./docs
@@ -461,8 +310,6 @@ jobs:
    runs-on: ubuntu-latest
    needs: [code-quality, test, rust-tests, performance-test, docker-build, docs]
    if: always()
    permissions:
      contents: write   # required by softprops/action-gh-release
    # GitHub Actions does not allow `secrets.X` directly in step-level `if:`
    # expressions — only `env.X`. Promote the secret to env at job scope so
    # the gating expression below is parseable.
@@ -1,151 +0,0 @@
 name: GitHub Clone Tracking → data/clone-data.rvf
 # Persists rolling 14-day clone-traffic snapshots to data/clone-data.rvf in
 # the ruvector JSONL RVF format. GitHub's /traffic/clones endpoint only
 # retains the last 14 days server-side, so without this scheduled scrape
 # the data is gone forever the moment it falls outside the window.
 #
 # Format: JSONL RVF
 #   - line 1 is a `metadata` segment that initializes the file
 #   - each subsequent run appends one `clone_snapshot` segment carrying the
 #     14-day rollup PLUS per-day breakdown
 #   - file is idempotent: per-day entries are keyed by `timestamp` so a
 #     downstream reader can dedupe across overlapping snapshot windows
 #
 # Schedule: every 14 days (1st + 15th of each month, ~14-day cadence in
 # practice). Workflow can also be dispatched manually for backfill or test.
 on:
  schedule:
    # 01:23 UTC on the 1st and 15th of every month — close to 14-day cadence
    # without cron's "every 14 days" monthly-reset weirdness. Picking :23
    # avoids the cron herd on :00.
    - cron: '23 1 1,15 * *'
  workflow_dispatch:
 permissions:
  contents: write
 concurrency:
  group: clone-tracking
  cancel-in-progress: false
 jobs:
  snapshot:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Fetch /traffic/clones + /traffic/views from GitHub
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          mkdir -p data
          gh api repos/${{ github.repository }}/traffic/clones > /tmp/clones.json
          gh api repos/${{ github.repository }}/traffic/views > /tmp/views.json
          echo "--- clones rollup ---"
          jq '{count, uniques, days: (.clones | length)}' /tmp/clones.json
          echo "--- views rollup ---"
          jq '{count, uniques, days: (.views | length)}' /tmp/views.json
      - name: Append snapshot to data/clone-data.rvf
        env:
          REPO: ${{ github.repository }}
        run: |
          set -e
          RVF="data/clone-data.rvf"
          FETCHED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
          # Initialize the file with a metadata segment on first run.
          if [ ! -f "$RVF" ]; then
            echo "Initializing $RVF with metadata segment"
            jq -n --arg repo "$REPO" --arg ts "$FETCHED_AT" '{
              type: "metadata",
              name: "ruview-clone-traffic-history",
              version: "1.0.0",
              schema: "ruvector.rvf.jsonl/v1",
              format: "github-traffic-snapshots",
              repo: $repo,
              source: "GitHub Traffic API /repos/{repo}/traffic/{clones,views}",
              policy: "GitHub retains only 14 days server-side; this file is the long-term record.",
              segments: ["metadata", "clone_snapshot", "view_snapshot"],
              created_at: $ts,
              custom: {
                cadence: "twice monthly (1st and 15th, ~14-day intervals)",
                idempotency_key: "timestamp (per-day records de-duplicate across overlapping snapshot windows)"
              }
            }' >> "$RVF"
          fi
          # Append the clone snapshot.
          jq --arg ts "$FETCHED_AT" '{
            type: "clone_snapshot",
            fetched_at: $ts,
            window_count: .count,
            window_uniques: .uniques,
            per_day: .clones
          }' /tmp/clones.json >> "$RVF"
          # Append the views snapshot (free with the same auth).
          jq --arg ts "$FETCHED_AT" '{
            type: "view_snapshot",
            fetched_at: $ts,
            window_count: .count,
            window_uniques: .uniques,
            per_day: .views
          }' /tmp/views.json >> "$RVF"
          echo "--- RVF tail (last 4 lines) ---"
          tail -4 "$RVF" | jq -c '{type, fetched_at, window_count, window_uniques}' || true
          echo "--- file size ---"
          wc -l "$RVF"
      - name: Compute aggregates for the commit summary
        id: agg
        run: |
          # Count distinct per-day entries across all snapshots so we can
          # show "cumulative observed clones" in the commit message.
          python3 - <<'PY'
          import json, os
          path = "data/clone-data.rvf"
          per_day_clones = {}
          per_day_views = {}
          with open(path, encoding="utf-8") as f:
              for line in f:
                  if not line.strip():
                      continue
                  d = json.loads(line)
                  if d.get("type") == "clone_snapshot":
                      for entry in d.get("per_day", []):
                          per_day_clones[entry["timestamp"]] = entry
                  elif d.get("type") == "view_snapshot":
                      for entry in d.get("per_day", []):
                          per_day_views[entry["timestamp"]] = entry
          tot_clones = sum(e.get("count", 0) for e in per_day_clones.values())
          tot_uniq_clones = sum(e.get("uniques", 0) for e in per_day_clones.values())
          tot_views = sum(e.get("count", 0) for e in per_day_views.values())
          tot_uniq_views = sum(e.get("uniques", 0) for e in per_day_views.values())
          print(f"clone days observed: {len(per_day_clones)}  total clones: {tot_clones:,}  total unique cloners: {tot_uniq_clones:,}")
          print(f"view  days observed: {len(per_day_views)}  total views:  {tot_views:,}  total unique viewers:  {tot_uniq_views:,}")
          with open(os.environ["GITHUB_OUTPUT"], "a") as out:
              out.write(f"clones={tot_clones}\n")
              out.write(f"clone_days={len(per_day_clones)}\n")
              out.write(f"views={tot_views}\n")
              out.write(f"view_days={len(per_day_views)}\n")
          PY
      - name: Commit + push if changed
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
          if git diff --quiet data/clone-data.rvf; then
            echo "no changes to commit"
            exit 0
          fi
          git add data/clone-data.rvf
          git commit -m "chore(traffic): clone snapshot — ${{ steps.agg.outputs.clone_days }} days observed → ${{ steps.agg.outputs.clones }} clones, ${{ steps.agg.outputs.view_days }} view-days → ${{ steps.agg.outputs.views }} views"
          git push
@@ -1,206 +0,0 @@
 name: Cog HA-Matter Release
 # ADR-116 P8 — Build + sign + bundle the cog-ha-matter cog on a
 # version tag. Upload to gs://cognitum-apps/ runs only when the
 # GCP_CREDENTIALS + COGNITUM_OWNER_SIGNING_KEY secrets are set, so
 # this workflow is safe to merge before the production credentials
 # land — it'll bundle release artifacts to the workflow run page
 # either way.
 on:
  push:
    tags:
      - 'cog-ha-matter-v*'
  workflow_dispatch:
    inputs:
      dry_run:
        description: 'Build + sign + bundle but skip GCS upload'
        required: false
        default: 'true'
 env:
  CARGO_TERM_COLOR: always
  CRATE: cog-ha-matter
 jobs:
  build-x86_64:
    name: Build x86_64
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Setup Rust
        uses: dtolnay/rust-toolchain@stable
        with:
          targets: x86_64-unknown-linux-gnu
      - name: Cache cargo registry
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            v2/target
          key: cog-ha-matter-x86_64-${{ hashFiles('v2/Cargo.lock') }}
      - name: Build release binary
        working-directory: v2/crates/cog-ha-matter/cog
        run: make build-x86_64
      - name: Compute SHA-256
        working-directory: v2/crates/cog-ha-matter/cog
        run: make sign-x86_64
      - name: Sign with Ed25519 (gated)
        if: ${{ env.SIGNING_KEY != '' }}
        env:
          SIGNING_KEY: ${{ secrets.COGNITUM_OWNER_SIGNING_KEY }}
        working-directory: v2/crates/cog-ha-matter/cog
        run: |
          printf '%s' "$SIGNING_KEY" \
            | openssl pkeyutl -sign -inkey /dev/stdin -rawin \
                -in dist/cog-ha-matter-x86_64.sha256 \
            | base64 -w0 > dist/cog-ha-matter-x86_64.sig
          echo "Signed cog-ha-matter-x86_64 ($(wc -c < dist/cog-ha-matter-x86_64.sig) bytes)"
      - name: Upload workflow artifact
        uses: actions/upload-artifact@v4
        with:
          name: cog-ha-matter-x86_64
          path: |
            v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-x86_64
            v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-x86_64.sha256
            v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-x86_64.sig
          if-no-files-found: warn
  build-arm:
    name: Build aarch64 (arm)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Setup Rust
        uses: dtolnay/rust-toolchain@stable
        with:
          targets: aarch64-unknown-linux-gnu
      - name: Install cross-compiler
        run: |
          sudo apt-get update
          sudo apt-get install -y gcc-aarch64-linux-gnu
      - name: Cache cargo registry
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            v2/target
          key: cog-ha-matter-arm-${{ hashFiles('v2/Cargo.lock') }}
      - name: Build release binary
        working-directory: v2
        env:
          CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER: aarch64-linux-gnu-gcc
        run: |
          cargo build -p cog-ha-matter --release --target aarch64-unknown-linux-gnu
          mkdir -p crates/cog-ha-matter/cog/dist
          cp target/aarch64-unknown-linux-gnu/release/cog-ha-matter \
             crates/cog-ha-matter/cog/dist/cog-ha-matter-arm
          # ^ matches Makefile's `dist/$(CRATE)-arm` so `make sign-arm` finds it
      - name: Compute SHA-256
        working-directory: v2/crates/cog-ha-matter/cog
        run: make sign-arm
      - name: Sign with Ed25519 (gated)
        if: ${{ env.SIGNING_KEY != '' }}
        env:
          SIGNING_KEY: ${{ secrets.COGNITUM_OWNER_SIGNING_KEY }}
        working-directory: v2/crates/cog-ha-matter/cog
        run: |
          printf '%s' "$SIGNING_KEY" \
            | openssl pkeyutl -sign -inkey /dev/stdin -rawin \
                -in dist/cog-ha-matter-arm.sha256 \
            | base64 -w0 > dist/cog-ha-matter-arm.sig
          echo "Signed cog-ha-matter-arm ($(wc -c < dist/cog-ha-matter-arm.sig) bytes)"
      - name: Upload workflow artifact
        uses: actions/upload-artifact@v4
        with:
          name: cog-ha-matter-arm
          path: |
            v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-arm
            v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-arm.sha256
            v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-arm.sig
          if-no-files-found: warn
  publish-gcs:
    name: Upload to GCS (gated)
    needs: [build-x86_64, build-arm]
    runs-on: ubuntu-latest
    # Skip on dry-run dispatch; skip on tags when GCP_CREDENTIALS unset.
    if: >
      github.event_name == 'push' &&
      vars.HAS_GCP_CREDENTIALS == 'true'
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Download x86_64 artifact
        uses: actions/download-artifact@v4
        with:
          name: cog-ha-matter-x86_64
          path: dist/
      - name: Download arm artifact
        uses: actions/download-artifact@v4
        with:
          name: cog-ha-matter-arm
          path: dist/
      - name: Auth to GCP
        uses: google-github-actions/auth@v2
        with:
          credentials_json: ${{ secrets.GCP_CREDENTIALS }}
      - name: Set up gcloud
        uses: google-github-actions/setup-gcloud@v2
      - name: Upload binaries + sidecars
        run: |
          gsutil cp dist/cog-ha-matter-x86_64       gs://cognitum-apps/cogs/x86_64/cog-ha-matter-x86_64
          gsutil cp dist/cog-ha-matter-x86_64.sha256 gs://cognitum-apps/cogs/x86_64/cog-ha-matter-x86_64.sha256
          gsutil cp dist/cog-ha-matter-arm           gs://cognitum-apps/cogs/arm/cog-ha-matter-arm
          gsutil cp dist/cog-ha-matter-arm.sha256    gs://cognitum-apps/cogs/arm/cog-ha-matter-arm.sha256
          if [ -f dist/cog-ha-matter-x86_64.sig ]; then
            gsutil cp dist/cog-ha-matter-x86_64.sig gs://cognitum-apps/cogs/x86_64/cog-ha-matter-x86_64.sig
          fi
          if [ -f dist/cog-ha-matter-arm.sig ]; then
            gsutil cp dist/cog-ha-matter-arm.sig    gs://cognitum-apps/cogs/arm/cog-ha-matter-arm.sig
          fi
      - name: Print app-registry.json snippet for the cognitum-one PR
        run: |
          for arch in arm x86_64; do
            sha=$(cat dist/cog-cog-ha-matter-$arch.sha256)
            sig=$([ -f dist/cog-cog-ha-matter-$arch.sig ] && cat dist/cog-cog-ha-matter-$arch.sig || echo "")
            cat <<EOF
          --- $arch ---
          {
            "id": "ha-matter",
            "version": "${GITHUB_REF_NAME#cog-ha-matter-v}",
            "binary_url": "https://storage.googleapis.com/cognitum-apps/cogs/$arch/cog-cog-ha-matter-$arch",
            "binary_sha256": "$sha",
            "binary_signature": "$sig",
            "description": "Home Assistant + Matter Cognitum Seed cog (mDNS + witness chain)",
            "min_seed_version": "0.6.0",
            "installable_on": ["$arch"]
          }
          EOF
          done
@@ -20,8 +20,6 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - uses: dtolnay/rust-toolchain@stable
        with: { targets: wasm32-unknown-unknown }
@@ -36,7 +34,7 @@ jobs:
            --out-dir ../../dashboard/public/nvsim-pkg \
            --release -- --no-default-features --features wasm
-      - uses: actions/setup-node@v6
+      - uses: actions/setup-node@v4
        with: { node-version: 20, cache: npm, cache-dependency-path: dashboard/package-lock.json }
      - working-directory: dashboard
@@ -26,8 +26,6 @@ jobs:
    steps:
      - name: Checkout main
        uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Install Rust + wasm32 target
        uses: dtolnay/rust-toolchain@stable
@@ -59,7 +57,7 @@ jobs:
            -- --no-default-features --features wasm
      - name: Setup Node 20
-        uses: actions/setup-node@v6
+        uses: actions/setup-node@v4
        with:
          node-version: 20
          cache: npm
@@ -28,11 +28,9 @@ jobs:
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Setup Node.js
-        uses: actions/setup-node@v6
+        uses: actions/setup-node@v4
        with:
          node-version: '20'
@@ -85,11 +83,9 @@ jobs:
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Setup Node.js
-        uses: actions/setup-node@v6
+        uses: actions/setup-node@v4
        with:
          node-version: '20'
@@ -135,8 +131,6 @@ jobs:
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Download all artifacts
        uses: actions/download-artifact@v4
@@ -2,11 +2,6 @@ name: Firmware CI
 on:
  push:
    branches:
      - '**'
    tags:
      # ESP32 firmware release tags — build + version-consistency guard (RuView#505).
      - 'v*-esp32'
    paths:
      - 'firmware/**'
      - '.github/workflows/firmware-ci.yml'
@@ -16,31 +11,8 @@ on:
      - '.github/workflows/firmware-ci.yml'
 jobs:
  version-guard:
    name: Verify version.txt matches release tag
    runs-on: ubuntu-latest
    if: github.ref_type == 'tag'
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Check firmware version.txt == tag
        run: |
          # Tag form: vX.Y.Z-esp32  →  expect version.txt to contain X.Y.Z
          TAG="${GITHUB_REF_NAME}"
          EXPECTED="${TAG#v}"
          EXPECTED="${EXPECTED%-esp32}"
          ACTUAL="$(tr -d '[:space:]' < firmware/esp32-csi-node/version.txt)"
          echo "Tag: $TAG  →  expected version.txt: $EXPECTED  |  actual: $ACTUAL"
          if [ "$EXPECTED" != "$ACTUAL" ]; then
            echo "::error::firmware/esp32-csi-node/version.txt is '$ACTUAL' but tag '$TAG' expects '$EXPECTED'."
            echo "::error::Bump version.txt and re-tag so esp_app_get_description()->version is correct (RuView#505)."
            exit 1
          fi
          echo "version.txt matches the release tag."
  build:
-    name: Build firmware (${{ matrix.target }} / ${{ matrix.variant }})
+    name: Build ESP32-S3 Firmware (${{ matrix.variant }})
    runs-on: ubuntu-latest
    container:
      image: espressif/idf:v5.4
@@ -49,53 +21,31 @@ jobs:
      matrix:
        include:
          - variant: 8mb
            target: esp32s3
            sdkconfig: sdkconfig.defaults
            partition_table_name: partitions_display.csv
            size_limit_kb: 1100
            artifact_app: esp32-csi-node.bin
            artifact_pt: partition-table.bin
          - variant: 4mb
            target: esp32s3
            sdkconfig: sdkconfig.defaults.4mb
            partition_table_name: partitions_4mb.csv
            size_limit_kb: 1100
            artifact_app: esp32-csi-node-4mb.bin
            artifact_pt: partition-table-4mb.bin
          # ADR-110: ESP32-C6 research target (Wi-Fi 6 / 802.15.4 / TWT / LP-core)
          - variant: c6-4mb
            target: esp32c6
            sdkconfig: sdkconfig.defaults
            partition_table_name: partitions_4mb.csv
            size_limit_kb: 1100
            artifact_app: esp32-csi-node-c6.bin
            artifact_pt: partition-table-c6.bin
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Build firmware (${{ matrix.variant }})
        working-directory: firmware/esp32-csi-node
        run: |
          . $IDF_PATH/export.sh
-          # 4mb variant supplies its own sdkconfig.defaults overlay.
+          if [ "${{ matrix.variant }}" != "8mb" ]; then
          # c6-4mb variant relies on the auto-applied sdkconfig.defaults.esp32c6
          # overlay (ESP-IDF auto-loads sdkconfig.defaults.$TARGET when present).
          if [ "${{ matrix.variant }}" = "4mb" ]; then
            cp "${{ matrix.sdkconfig }}" sdkconfig.defaults
          fi
-          idf.py set-target ${{ matrix.target }}
+          idf.py set-target esp32s3
          idf.py build
      - name: Build and run host-side ADR-110 unit tests
        if: matrix.variant == 'c6-4mb'
        working-directory: firmware/esp32-csi-node/test
        run: |
          make test_adr110
          ./test_adr110
      - name: Verify binary size (< ${{ matrix.size_limit_kb }} KB gate)
        working-directory: firmware/esp32-csi-node
        run: |
@@ -148,6 +98,32 @@ jobs:
            echo "Flash image integrity verified"
          fi
      - name: Verify embedded version string matches version.txt (fixes #505)
        working-directory: firmware/esp32-csi-node
        run: |
          EXPECTED=$(cat version.txt | tr -d '[:space:]')
          BIN=build/esp32-csi-node.bin
          # Extract version from ESP-IDF app_desc: magic 0xABCD5432 at offset 0
          # followed by version string at offset 16, null-terminated, max 32 chars.
          EMBEDDED=$(python3 -c "
 import struct, sys
 data = open('$BIN','rb').read()
 magic = struct.pack('<I', 0xABCD5432)
 i = data.find(magic)
 if i < 0:
    sys.exit('app_desc magic not found')
 ver = data[i+16:i+48].split(b'\\x00',1)[0].decode('ascii','replace')
 print(ver)
 " 2>&1)
          echo "Expected version: $EXPECTED"
          echo "Embedded version: $EMBEDDED"
          if [ "$EMBEDDED" != "$EXPECTED" ]; then
            echo "::error::Version string mismatch! version.txt='$EXPECTED' but binary reports '$EMBEDDED'."
            echo "::error::Ensure version.txt is updated before building and tagging."
            exit 1
          fi
          echo "Version string verified: $EMBEDDED"
      - name: Stage release binaries with variant-specific names
        working-directory: firmware/esp32-csi-node
        run: |
@@ -100,8 +100,6 @@ jobs:
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Download QEMU artifact
        uses: actions/download-artifact@v4
@@ -216,8 +214,6 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Install clang
        run: |
@@ -267,8 +263,6 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Install NVS generator
        run: pip install esp-idf-nvs-partition-gen
@@ -323,8 +317,6 @@ jobs:
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Download QEMU artifact
        uses: actions/download-artifact@v4
@@ -1,56 +0,0 @@
 name: Fix-Marker Regression Guard
 # Asserts that previously-shipped fixes are still present in the tree.
 # Manifest: scripts/fix-markers.json   Checker: scripts/check_fix_markers.py
 # Run locally:  python scripts/check_fix_markers.py   (also --list / --json)
 #
 # This complements the heavyweight checks (firmware build, deterministic
 # pipeline proof, witness bundle) with a fast per-PR "did someone revert a
 # known fix?" gate — the CI analogue of the ruflo witness fix-marker system.
 on:
  push:
    branches:
      - main
      - master
  pull_request:
  workflow_dispatch:
 jobs:
  fix-markers:
    name: Verify fix markers
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - uses: actions/setup-python@v6
        with:
          python-version: '3.11'
      - name: Validate the manifest is well-formed JSON
        run: python -c "import json; json.load(open('scripts/fix-markers.json')); print('manifest OK')"
      - name: Check fix markers
        run: python scripts/check_fix_markers.py
      - name: Emit machine-readable result (for the run summary)
        if: always()
        run: |
          python scripts/check_fix_markers.py --json > fix-markers-result.json || true
          {
            echo '### Fix-marker regression guard'
            echo ''
            echo '```'
            python scripts/check_fix_markers.py || true
            echo '```'
          } >> "$GITHUB_STEP_SUMMARY"
      - name: Upload result artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: fix-markers-result
          path: fix-markers-result.json
          retention-days: 30
@@ -1,112 +0,0 @@
 name: ADR-115 MQTT integration tests
 # Runs the Mosquitto-broker-backed integration tests for ADR-115's MQTT
 # publisher. These prove the publisher reaches a real broker, emits the
 # expected HA-discovery topic shape, and honours --privacy-mode at the
 # wire boundary (not just in unit-test logic).
 #
 # Default `cargo test --workspace` does not run these tests because they
 # require a broker and pull rumqttc into the build. This workflow opts
 # into both by setting --features mqtt and RUVIEW_RUN_INTEGRATION=1.
 on:
  pull_request:
    paths:
      - 'v2/crates/wifi-densepose-sensing-server/src/mqtt/**'
      - 'v2/crates/wifi-densepose-sensing-server/tests/mqtt_integration.rs'
      - 'v2/crates/wifi-densepose-sensing-server/Cargo.toml'
      - '.github/workflows/mqtt-integration.yml'
  push:
    branches: [main]
    paths:
      - 'v2/crates/wifi-densepose-sensing-server/src/mqtt/**'
  workflow_dispatch: {}
 jobs:
  mqtt-integration:
    runs-on: ubuntu-latest
    timeout-minutes: 20
    # NB: we don't use a `services:` mosquitto container here because the
    # eclipse-mosquitto:2.x image rejects anonymous connections by default
    # and GH Actions `services` doesn't easily support mounting a custom
    # config file. We start mosquitto manually in a step below with an
    # inline `allow_anonymous true` config.
    env:
      RUVIEW_RUN_INTEGRATION: "1"
      RUVIEW_TEST_MQTT_PORT: "11883"
      CARGO_TERM_COLOR: always
      RUST_BACKTRACE: 1
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Install mosquitto + clients and start with allow_anonymous
        run: |
          sudo apt-get update -qq
          sudo apt-get install -y mosquitto mosquitto-clients
          sudo systemctl stop mosquitto || true
          # Inline config: anon listener on 11883 only — no TLS, no auth,
          # OK for CI because we test the wire shape, not security.
          # Production deployments enable mTLS per ADR-115 §3.9.
          cat > /tmp/mosquitto-ci.conf <<'EOF'
          listener 11883
          allow_anonymous true
          persistence false
          log_dest stdout
          EOF
          mosquitto -c /tmp/mosquitto-ci.conf -d
          for i in {1..20}; do
            if mosquitto_pub -h 127.0.0.1 -p 11883 -t healthcheck -m ok -q 0 2>/dev/null; then
              echo "mosquitto reachable on 11883"; exit 0
            fi
            sleep 2
          done
          echo "mosquitto never became reachable" >&2
          tail -50 /var/log/mosquitto/*.log 2>/dev/null || true
          exit 1
      - name: Install Rust toolchain
        uses: dtolnay/rust-toolchain@stable
        with:
          toolchain: stable
      - name: Cache cargo registry + build
        uses: Swatinem/rust-cache@v2
        with:
          workspaces: v2 -> target
      - name: Validate HA Blueprints
        run: |
          python -m pip install --quiet pyyaml
          python scripts/validate-ha-blueprints.py
      - name: Verify unit tests still pass under --features mqtt
        working-directory: v2
        # `cargo test` accepts a single TESTNAME filter, so we run the
        # whole --lib suite here. That gives us the full 410-test green
        # bar under --features mqtt (which is more reassuring than
        # filtering anyway).
        run: >-
          cargo test -p wifi-densepose-sensing-server
          --features mqtt --no-default-features
          --lib
          --no-fail-fast
      - name: Run integration tests against mosquitto
        working-directory: v2
        run: >-
          cargo test -p wifi-densepose-sensing-server
          --features mqtt --no-default-features
          --test mqtt_integration
          --no-fail-fast
          -- --test-threads=1 --nocapture
      - name: Dump broker logs on failure
        if: failure()
        run: |
          docker ps -a
          docker logs $(docker ps -aqf "ancestor=eclipse-mosquitto:2.0.18") || true
@@ -26,8 +26,6 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - uses: docker/setup-buildx-action@v3
@@ -39,7 +37,7 @@ jobs:
      - name: Extract metadata
        id: meta
-        uses: docker/metadata-action@v6
+        uses: docker/metadata-action@v5
        with:
          images: ghcr.io/ruvnet/nvsim-server
          tags: |
@@ -49,7 +47,7 @@ jobs:
            type=raw,value=latest,enable={{is_default_branch}}
      - name: Build + push
-        uses: docker/build-push-action@v7
+        uses: docker/build-push-action@v5
        with:
          context: v2
          file: v2/crates/nvsim-server/Dockerfile
@@ -1,292 +0,0 @@
 # ADR-117 P5 — cibuildwheel + PyPI publish workflow for `wifi-densepose`
 #
 # This workflow is **explicitly NOT** triggered on every push. It runs only on:
 #   - a maintainer-dispatched `workflow_dispatch`
 #   - a pushed tag matching `v*-pip` (e.g. `v2.0.0-pip`)
 #
 # The reason for the `-pip` tag suffix is that the repo already cuts
 # `v0.X.Y-esp32` tags for firmware releases (see CLAUDE.md). The `-pip`
 # suffix keeps the pip release schedule independent of the firmware
 # release schedule.
 #
 # Sequencing on release day (per ADR-117 §7.3):
 #   1. cut tag `v1.99.0-pip`  → publishes the tombstone wheel first
 #   2. cut tag `v2.0.0-pip`   → publishes the PyO3 v2 wheel matrix
 #
 # Publishes via the `PYPI_API_TOKEN` GitHub Actions secret. The
 # token-refresh runbook (GCP Secret Manager → gh secret set) lives in
 # docs/integrations/pypi-release.md so KICS does not flag the
 # secret name as a generic-secret literal in the workflow.
 #
 # Q3 (witness hash v2 — open in ADR-117 §11.3) MUST be resolved
 # before the first v2.0.0 publish. When v2 lands, add a parallel
 # step that verifies the v2 hash against the Rust pipeline.
 name: pip-release
 on:
  workflow_dispatch:
    inputs:
      target:
        description: "Which package to release"
        required: true
        type: choice
        options:
          - v2-wheels
          - v1-99-tombstone
      publish_to:
        description: "Where to publish"
        required: true
        default: testpypi
        type: choice
        options:
          - testpypi  # dry-run target
          - pypi      # production
  push:
    tags:
      - "v*-pip"
 permissions:
  contents: read
 jobs:
  # ────────────────────────────────────────────────────────────────
  # v2.0.0 — cibuildwheel matrix (5 wheels + sdist)
  # ────────────────────────────────────────────────────────────────
  build-wheels:
    name: Build ${{ matrix.os }} ${{ matrix.arch }}
    if: |
      github.event_name == 'workflow_dispatch' && inputs.target == 'v2-wheels' ||
      startsWith(github.ref, 'refs/tags/v2.')
    strategy:
      fail-fast: false
      matrix:
        include:
          - os: ubuntu-latest
            arch: x86_64
          - os: ubuntu-latest
            arch: aarch64
          - os: macos-13          # x86_64 runner
            arch: x86_64
          - os: macos-14          # arm64 runner
            arch: arm64
          - os: windows-latest
            arch: AMD64
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      # Linux aarch64 needs QEMU for cross-build on x86_64 runners.
      - name: Set up QEMU
        if: matrix.os == 'ubuntu-latest' && matrix.arch == 'aarch64'
        uses: docker/setup-qemu-action@v3
      # ADR-117 §5.4: abi3-py310 — one binary per OS/arch covers all
      # Python minor versions ≥ 3.10. Build only cp310 wheels.
      - name: Build wheels (cibuildwheel)
        uses: pypa/cibuildwheel@v2.21
        env:
          CIBW_BUILD: "cp310-*"
          CIBW_ARCHS_LINUX: ${{ matrix.arch }}
          CIBW_ARCHS_MACOS: ${{ matrix.arch }}
          CIBW_ARCHS_WINDOWS: ${{ matrix.arch }}
          CIBW_BUILD_FRONTEND: "build"
          CIBW_BEFORE_BUILD: "pip install maturin>=1.7"
          # The PyO3 sdist landing depends on the cargo/Rust toolchain
          # being present. cibuildwheel images carry rustup on Linux
          # but we also pin a known-good version for reproducibility.
          CIBW_BEFORE_ALL_LINUX: "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.82"
          CIBW_ENVIRONMENT_LINUX: 'PATH="$HOME/.cargo/bin:$PATH"'
          # Smoke-test every built wheel before accepting it. Catches
          # the case where the wheel imports but the compiled symbols
          # are missing.
          CIBW_TEST_REQUIRES: "pytest>=8.0"
          CIBW_TEST_COMMAND: 'python -c "import wifi_densepose; assert wifi_densepose.hello() == \"ok\"; print(wifi_densepose.__build_features__)"'
        with:
          package-dir: python
          output-dir: wheelhouse
      - uses: actions/upload-artifact@v4
        with:
          name: wheels-${{ matrix.os }}-${{ matrix.arch }}
          path: wheelhouse/*.whl
          if-no-files-found: error
  build-sdist:
    name: Build v2 sdist
    if: |
      github.event_name == 'workflow_dispatch' && inputs.target == 'v2-wheels' ||
      startsWith(github.ref, 'refs/tags/v2.')
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Install maturin
        run: pip install maturin>=1.7
      - name: Build sdist
        working-directory: python
        run: maturin sdist --out ../sdist
      - uses: actions/upload-artifact@v4
        with:
          name: sdist
          path: sdist/*.tar.gz
          if-no-files-found: error
  # ────────────────────────────────────────────────────────────────
  # v1.99.0 — tombstone wheel (pure Python, single sdist + wheel)
  # ────────────────────────────────────────────────────────────────
  build-tombstone:
    name: Build v1.99.0 tombstone
    if: |
      github.event_name == 'workflow_dispatch' && inputs.target == 'v1-99-tombstone' ||
      startsWith(github.ref, 'refs/tags/v1.99')
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - uses: actions/setup-python@v5
        with:
          python-version: '3.12'
      - name: Install build backend
        run: python -m pip install --upgrade pip build>=1.2
      - name: Build sdist + wheel
        working-directory: python/tombstone
        run: python -m build --outdir ../../tombstone-dist
      # Inspect what was actually built — the previous v1.99.0-pip run
      # showed an `import wifi_densepose` that returned cleanly instead
      # of raising, even though build logs said `adding 'wifi_densepose/__init__.py'`.
      # Print the wheel manifest + the __init__.py content so any
      # future regression is debuggable from the run log alone.
      - name: Inspect wheel contents
        run: |
          set -e
          WHL=tombstone-dist/wifi_densepose-1.99.0-py3-none-any.whl
          echo "--- wheel listing ---"
          python -m zipfile -l "$WHL"
          echo "--- wifi_densepose/__init__.py inside the wheel ---"
          python -m zipfile -e "$WHL" /tmp/tomb-inspect
          cat /tmp/tomb-inspect/wifi_densepose/__init__.py
          echo "--- size in bytes ---"
          wc -c /tmp/tomb-inspect/wifi_densepose/__init__.py
      # Smoke-test in an ISOLATED venv. The previous run's failure
      # mode was that the ubuntu-latest runner's system `python` had
      # site-packages picking up something other than the user-installed
      # wheel, so the import resolved to a different module. A clean
      # venv removes any ambiguity about which wifi_densepose is loaded.
      - name: Smoke-test tombstone in isolated venv
        run: |
          set -e
          # Copy the wheel to /tmp BEFORE entering the venv — we must
          # cd OUT of the repo root because the repo contains a
          # `wifi_densepose/` directory left over from the legacy v1
          # source. Python puts cwd at sys.path[0], so an import from
          # the repo root would resolve to the legacy directory and
          # bypass the freshly-installed wheel entirely (this was the
          # silent failure mode of the previous two run attempts).
          cp tombstone-dist/wifi_densepose-1.99.0-py3-none-any.whl /tmp/
          python -m venv /tmp/smoke-venv
          /tmp/smoke-venv/bin/python -m pip install --upgrade pip
          /tmp/smoke-venv/bin/python -m pip install /tmp/wifi_densepose-1.99.0-py3-none-any.whl
          cd /tmp  # away from the repo root's stray wifi_densepose/
          /tmp/smoke-venv/bin/python -c "import importlib.util as u; s = u.find_spec('wifi_densepose'); print('Resolved to:', s.origin); print('--- file content ---'); print(open(s.origin).read())"
          set +e
          /tmp/smoke-venv/bin/python -c "import wifi_densepose" 2> import-output.txt
          rc=$?
          set -e
          if [ "$rc" -eq 0 ]; then
            echo "ERROR: tombstone import succeeded — should have raised ImportError"
            exit 1
          fi
          if ! grep -q "github.com/ruvnet/RuView" import-output.txt; then
            echo "ERROR: tombstone ImportError missing migration URL"
            cat import-output.txt
            exit 1
          fi
          echo "Tombstone wheel correctly raises ImportError with migration URL."
      - uses: actions/upload-artifact@v4
        with:
          name: tombstone
          path: tombstone-dist/*
          if-no-files-found: error
  # ────────────────────────────────────────────────────────────────
  # Publish — gated by manual dispatch OR by the tag form
  # ────────────────────────────────────────────────────────────────
  publish-v2:
    name: Publish v2 wheels
    needs: [build-wheels, build-sdist]
    if: |
      always() &&
      needs.build-wheels.result == 'success' &&
      needs.build-sdist.result == 'success' &&
      (
        github.event_name == 'workflow_dispatch' && inputs.target == 'v2-wheels' ||
        startsWith(github.ref, 'refs/tags/v2.')
      )
    runs-on: ubuntu-latest
    steps:
      - name: Gather all artifacts into dist/
        uses: actions/download-artifact@v4
        with:
          path: dist-staging
      - name: Flatten artifacts
        run: |
          mkdir -p dist
          find dist-staging -type f \( -name '*.whl' -o -name '*.tar.gz' \) -exec cp -v {} dist/ \;
          ls -lh dist/
      - name: Publish to TestPyPI (dry-run target)
        if: github.event_name == 'workflow_dispatch' && inputs.publish_to == 'testpypi'
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          repository-url: https://test.pypi.org/legacy/
          password: ${{ secrets.PYPI_API_TOKEN }}
          packages-dir: dist
          skip-existing: true
      - name: Publish to PyPI
        if: |
          startsWith(github.ref, 'refs/tags/v2.') ||
          (github.event_name == 'workflow_dispatch' && inputs.publish_to == 'pypi')
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          password: ${{ secrets.PYPI_API_TOKEN }}
          packages-dir: dist
  publish-tombstone:
    name: Publish v1.99 tombstone
    needs: [build-tombstone]
    if: |
      always() &&
      needs.build-tombstone.result == 'success' &&
      (
        github.event_name == 'workflow_dispatch' && inputs.target == 'v1-99-tombstone' ||
        startsWith(github.ref, 'refs/tags/v1.99')
      )
    runs-on: ubuntu-latest
    steps:
      - uses: actions/download-artifact@v4
        with:
          name: tombstone
          path: dist
      - name: Publish to TestPyPI (dry-run target)
        if: github.event_name == 'workflow_dispatch' && inputs.publish_to == 'testpypi'
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          repository-url: https://test.pypi.org/legacy/
          password: ${{ secrets.PYPI_API_TOKEN }}
          packages-dir: dist
          skip-existing: true
      - name: Publish to PyPI
        if: |
          startsWith(github.ref, 'refs/tags/v1.99') ||
          (github.event_name == 'workflow_dispatch' && inputs.publish_to == 'pypi')
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          password: ${{ secrets.PYPI_API_TOKEN }}
          packages-dir: dist
@@ -29,8 +29,6 @@ jobs:
    steps:
      - name: Checkout main
        uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Stage viewer for Pages
        run: |
@@ -1,157 +0,0 @@
 name: ruview-swarm CI guard
 # Dedicated guard for the ADR-148 drone swarm crate (`v2/crates/ruview-swarm`).
 # The main ci.yml runs `cargo test --workspace --no-default-features`, which
 # only exercises ruview-swarm's DEFAULT feature set. This guard additionally:
 #   - tests every feature combination (train / ruflo+itar / full)
 #   - fails on ANY clippy warning in the crate's own code (--no-deps)
 #   - asserts the ITAR + publish guards stay in place (USML Cat VIII(h)(12))
 #   - builds the GPU training binary under the `train` feature
 #
 # Path-scoped so it only runs when the crate or this workflow changes.
 on:
  push:
    branches: [ main, 'feat/*' ]
    paths:
      - 'v2/crates/ruview-swarm/**'
      - '.github/workflows/ruview-swarm-ci.yml'
  pull_request:
    paths:
      - 'v2/crates/ruview-swarm/**'
      - '.github/workflows/ruview-swarm-ci.yml'
  workflow_dispatch:
 env:
  CARGO_TERM_COLOR: always
 jobs:
  # ── Feature-matrix tests ─────────────────────────────────────────────────
  tests:
    name: tests (${{ matrix.features.label }})
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        features:
          - { label: 'default',          flags: '--no-default-features' }
          - { label: 'train',            flags: '--features train' }
          - { label: 'ruflo',            flags: '--features ruflo' }
          - { label: 'full+train',       flags: '--features full,train' }
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - uses: dtolnay/rust-toolchain@stable
      - name: Cache cargo
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            v2/target
          key: ${{ runner.os }}-ruview-swarm-${{ hashFiles('v2/Cargo.lock') }}
          restore-keys: ${{ runner.os }}-ruview-swarm-
      - name: cargo test -p ruview-swarm ${{ matrix.features.flags }}
        working-directory: v2
        run: cargo test -p ruview-swarm ${{ matrix.features.flags }} --lib
  # ── Clippy: zero warnings in the crate's own code ────────────────────────
  clippy:
    name: clippy (-D warnings, --no-deps)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      # v2/rust-toolchain.toml pins channel "1.89" with profile "minimal" (no
      # clippy). dtolnay@stable installs clippy on the floating "stable"
      # toolchain, but the override makes cargo use the separate "1.89"
      # toolchain — so `cargo clippy` errors "cargo-clippy is not installed for
      # 1.89". Install clippy on the pinned toolchain that cargo actually uses.
      - uses: dtolnay/rust-toolchain@stable
        with:
          toolchain: "1.89"
          components: clippy
      - name: Cache cargo
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            v2/target
          key: ${{ runner.os }}-ruview-swarm-clippy-${{ hashFiles('v2/Cargo.lock') }}
          restore-keys: ${{ runner.os }}-ruview-swarm-clippy-
      # --no-deps confines linting to ruview-swarm's own source, so pre-existing
      # warnings in dependency crates don't gate this PR.
      - name: clippy (default)
        working-directory: v2
        run: cargo clippy -p ruview-swarm --no-default-features --no-deps -- -D warnings
      - name: clippy (full,train)
        working-directory: v2
        run: cargo clippy -p ruview-swarm --features full,train --no-deps -- -D warnings
  # ── Build the GPU training binary (train feature) ────────────────────────
  train-bin:
    name: build train_marl bin
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - uses: dtolnay/rust-toolchain@stable
      - name: Cache cargo
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            v2/target
          key: ${{ runner.os }}-ruview-swarm-bin-${{ hashFiles('v2/Cargo.lock') }}
          restore-keys: ${{ runner.os }}-ruview-swarm-bin-
      - name: cargo build --bin train_marl --features train
        working-directory: v2
        run: cargo build -p ruview-swarm --features train --bin train_marl
      - name: train_marl is excluded from the default build
        working-directory: v2
        run: |
          # The training binary requires the `train` feature; a default `--bins`
          # build must NOT produce it (keeps default/CI builds light + Candle-free).
          # Remove any prior artifact first so this checks what the DEFAULT build
          # produces, not a leftover from the train-feature build above.
          rm -f target/debug/train_marl
          cargo build -p ruview-swarm --no-default-features --bins
          if [ -f target/debug/train_marl ]; then
            echo "ERROR: train_marl built without the 'train' feature" >&2
            exit 1
          fi
          echo "OK: train_marl correctly gated behind the 'train' feature"
  # ── ITAR + publish guards ────────────────────────────────────────────────
  export-control-guard:
    name: ITAR / publish guard
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: publish = false is present (no accidental crates.io publish)
        run: |
          CARGO=v2/crates/ruview-swarm/Cargo.toml
          if ! grep -qE '^\s*publish\s*=\s*false' "$CARGO"; then
            echo "ERROR: ruview-swarm Cargo.toml must keep 'publish = false' until" >&2
            echo "       PR merge + dependency publish + ITAR export sign-off." >&2
            exit 1
          fi
          echo "OK: publish = false present"
      - name: default feature set does NOT enable itar-unrestricted
        run: |
          CARGO=v2/crates/ruview-swarm/Cargo.toml
          # USML Cat VIII(h)(12): swarming coordination must be opt-in, never default.
          DEFAULT_LINE=$(grep -E '^\s*default\s*=' "$CARGO" || true)
          echo "default = $DEFAULT_LINE"
          if echo "$DEFAULT_LINE" | grep -q 'itar-unrestricted'; then
            echo "ERROR: 'itar-unrestricted' must NOT be in the default feature set" >&2
            exit 1
          fi
          echo "OK: ITAR-gated coordination features are opt-in, not default"
@@ -18,28 +18,23 @@ jobs:
  sast:
    name: Static Application Security Testing
    runs-on: ubuntu-latest
    continue-on-error: true   # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
    permissions:
      security-events: write
      actions: read
      contents: read
    steps:
    - name: Checkout code
      continue-on-error: true
      uses: actions/checkout@v4
      with:
        submodules: recursive
        fetch-depth: 0
    - name: Set up Python
-      continue-on-error: true
+      uses: actions/setup-python@v5
      uses: actions/setup-python@v6
      with:
        python-version: ${{ env.PYTHON_VERSION }}
        cache: 'pip'
    - name: Install dependencies
      continue-on-error: true
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
@@ -47,38 +42,34 @@ jobs:
    - name: Run Bandit security scan
      run: |
-        # The Python codebase lives under archive/v1/src (it moved there when
+        bandit -r src/ -f sarif -o bandit-results.sarif
        # the runtime was rewritten in Rust). Scanning `src/` matched nothing,
        # so this SAST step was a silent no-op.
        bandit -r archive/v1/src/ -f sarif -o bandit-results.sarif
      continue-on-error: true
    - name: Upload Bandit results to GitHub Security
      continue-on-error: true
      uses: github/codeql-action/upload-sarif@v3
      if: always()
      with:
        sarif_file: bandit-results.sarif
        category: bandit
-    # Removed the deprecated `returntocorp/semgrep-action@v1` step: it was
+    - name: Run Semgrep security scan
-    # redundant (the pip `semgrep --sarif` below is what feeds GitHub Security;
+      uses: returntocorp/semgrep-action@v1
-    # the action only pushed to the Semgrep cloud app via SEMGREP_APP_TOKEN) and
+      with:
-    # it pulled `returntocorp/semgrep-agent:v1` from Docker Hub on every run,
+        config: >-
-    # which intermittently timed out and turned this check red. The pip semgrep
+          p/security-audit
-    # (installed above) needs no Docker pull. The action's `p/docker` +
+          p/secrets
-    # `p/kubernetes` rulesets are folded into the command below so coverage is
+          p/python
-    # preserved.
+          p/docker
-    - name: Run Semgrep + generate SARIF
+          p/kubernetes
      env:
        SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }}
    - name: Generate Semgrep SARIF
      run: |
-        semgrep \
+        semgrep --config=p/security-audit --config=p/secrets --config=p/python --sarif --output=semgrep.sarif src/
          --config=p/security-audit --config=p/secrets --config=p/python \
          --config=p/docker --config=p/kubernetes \
          --sarif --output=semgrep.sarif archive/v1/src/
      continue-on-error: true
    - name: Upload Semgrep results to GitHub Security
      continue-on-error: true
      uses: github/codeql-action/upload-sarif@v3
      if: always()
      with:
@@ -89,27 +80,21 @@ jobs:
  dependency-scan:
    name: Dependency Vulnerability Scan
    runs-on: ubuntu-latest
    continue-on-error: true   # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
    permissions:
      security-events: write
      actions: read
      contents: read
    steps:
    - name: Checkout code
      continue-on-error: true
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Set up Python
-      continue-on-error: true
+      uses: actions/setup-python@v5
      uses: actions/setup-python@v6
      with:
        python-version: ${{ env.PYTHON_VERSION }}
        cache: 'pip'
    - name: Install dependencies
      continue-on-error: true
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
@@ -134,7 +119,6 @@ jobs:
      continue-on-error: true
    - name: Upload Snyk results to GitHub Security
      continue-on-error: true
      uses: github/codeql-action/upload-sarif@v3
      if: always()
      with:
@@ -142,7 +126,6 @@ jobs:
        category: snyk
    - name: Upload vulnerability reports
      continue-on-error: true
      uses: actions/upload-artifact@v4
      if: always()
      with:
@@ -156,7 +139,6 @@ jobs:
  container-scan:
    name: Container Security Scan
    runs-on: ubuntu-latest
    continue-on-error: true   # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
    needs: []
    if: github.event_name == 'push' || github.event_name == 'schedule'
    permissions:
@@ -165,18 +147,13 @@ jobs:
      contents: read
    steps:
    - name: Checkout code
      continue-on-error: true
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Set up Docker Buildx
      continue-on-error: true
      uses: docker/setup-buildx-action@v3
    - name: Build Docker image for scanning
-      continue-on-error: true
+      uses: docker/build-push-action@v5
      uses: docker/build-push-action@v7
      with:
        context: .
        target: production
@@ -186,7 +163,6 @@ jobs:
        cache-to: type=gha,mode=max
    - name: Run Trivy vulnerability scanner
      continue-on-error: true
      uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
      with:
        image-ref: 'wifi-densepose:scan'
@@ -194,7 +170,6 @@ jobs:
        output: 'trivy-results.sarif'
    - name: Upload Trivy results to GitHub Security
      continue-on-error: true
      uses: github/codeql-action/upload-sarif@v3
      if: always()
      with:
@@ -202,8 +177,7 @@ jobs:
        category: trivy
    - name: Run Grype vulnerability scanner
-      continue-on-error: true
+      uses: anchore/scan-action@v3
      uses: anchore/scan-action@v7
      id: grype-scan
      with:
        image: 'wifi-densepose:scan'
@@ -212,7 +186,6 @@ jobs:
        output-format: sarif
    - name: Upload Grype results to GitHub Security
      continue-on-error: true
      uses: github/codeql-action/upload-sarif@v3
      if: always()
      with:
@@ -220,7 +193,6 @@ jobs:
        category: grype
    - name: Run Docker Scout
      continue-on-error: true
      uses: docker/scout-action@v1
      if: always()
      with:
@@ -230,7 +202,6 @@ jobs:
        summary: true
    - name: Upload Docker Scout results
      continue-on-error: true
      uses: github/codeql-action/upload-sarif@v3
      if: always()
      with:
@@ -241,20 +212,15 @@ jobs:
  iac-scan:
    name: Infrastructure Security Scan
    runs-on: ubuntu-latest
    continue-on-error: true   # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
    permissions:
      security-events: write
      actions: read
      contents: read
    steps:
    - name: Checkout code
      continue-on-error: true
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Run Checkov IaC scan
      continue-on-error: true
      uses: bridgecrewio/checkov-action@99bb2caf247dfd9f03cf984373bc6043d4e32ebf # v12.1347.0
      with:
        directory: .
@@ -265,7 +231,6 @@ jobs:
        soft_fail: true
    - name: Upload Checkov results to GitHub Security
      continue-on-error: true
      uses: github/codeql-action/upload-sarif@v3
      if: always()
      with:
@@ -273,7 +238,6 @@ jobs:
        category: checkov
    - name: Run Terrascan IaC scan
      continue-on-error: true
      uses: tenable/terrascan-action@3a6e87da8e244513bd77b631e624552643f794c6 # v1.4.1
      with:
        iac_type: 'k8s'
@@ -283,7 +247,6 @@ jobs:
        sarif_upload: true
    - name: Run KICS IaC scan
      continue-on-error: true
      uses: checkmarx/kics-github-action@05aa5eb70eede1355220f4ca5238d96b397e30a6 # v2.1.20
      with:
        path: '.'
@@ -293,7 +256,6 @@ jobs:
        exclude_queries: 'a7ef1e8c-fbf8-4ac1-b8c7-2c3b0e6c6c6c'
    - name: Upload KICS results to GitHub Security
      continue-on-error: true
      uses: github/codeql-action/upload-sarif@v3
      if: always()
      with:
@@ -304,21 +266,17 @@ jobs:
  secret-scan:
    name: Secret Scanning
    runs-on: ubuntu-latest
    continue-on-error: true   # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
    permissions:
      security-events: write
      actions: read
      contents: read
    steps:
    - name: Checkout code
      continue-on-error: true
      uses: actions/checkout@v4
      with:
        submodules: recursive
        fetch-depth: 0
    - name: Run TruffleHog secret scan
      continue-on-error: true
      uses: trufflesecurity/trufflehog@17456f8c7d042d8c82c9a8ca9e937231f9f42e26 # v3.95.2
      with:
        path: ./
@@ -327,7 +285,6 @@ jobs:
        extra_args: --debug --only-verified
    - name: Run GitLeaks secret scan
      continue-on-error: true
      uses: gitleaks/gitleaks-action@v2
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -344,36 +301,28 @@ jobs:
  license-scan:
    name: License Compliance Scan
    runs-on: ubuntu-latest
    continue-on-error: true   # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
    steps:
    - name: Checkout code
      continue-on-error: true
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Set up Python
-      continue-on-error: true
+      uses: actions/setup-python@v5
      uses: actions/setup-python@v6
      with:
        python-version: ${{ env.PYTHON_VERSION }}
        cache: 'pip'
    - name: Install dependencies
      continue-on-error: true
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
        pip install pip-licenses licensecheck
    - name: Run license check
      continue-on-error: true
      run: |
        pip-licenses --format=json --output-file=licenses.json
        licensecheck --zero
    - name: Upload license report
      continue-on-error: true
      uses: actions/upload-artifact@v4
      with:
        name: license-report
@@ -383,16 +332,11 @@ jobs:
  compliance-check:
    name: Security Policy Compliance
    runs-on: ubuntu-latest
    continue-on-error: true   # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
    steps:
    - name: Checkout code
      continue-on-error: true
      uses: actions/checkout@v4
      with:
        submodules: recursive
    - name: Check security policy files
      continue-on-error: true
      run: |
        # Check for required security files
        files=("SECURITY.md" ".github/SECURITY.md" "docs/SECURITY.md")
@@ -410,13 +354,11 @@ jobs:
        fi
    - name: Check for security headers in code
      continue-on-error: true
      run: |
        # Check for security-related configurations
        grep -r "X-Frame-Options\|X-Content-Type-Options\|X-XSS-Protection\|Content-Security-Policy" src/ || echo "⚠️ Consider adding security headers"
    - name: Validate Kubernetes security contexts
      continue-on-error: true
      run: |
        # Check for security contexts in Kubernetes manifests
        if [[ -d "k8s" ]]; then
@@ -433,7 +375,6 @@ jobs:
  security-report:
    name: Security Report
    runs-on: ubuntu-latest
    continue-on-error: true   # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
    needs: [sast, dependency-scan, container-scan, iac-scan, secret-scan, license-scan, compliance-check]
    if: always()
    # Promote secret to env-scope so the gating `if:` on the Slack-notify
@@ -443,11 +384,9 @@ jobs:
      SECURITY_SLACK_WEBHOOK_URL: ${{ secrets.SECURITY_SLACK_WEBHOOK_URL }}
    steps:
    - name: Download all artifacts
      continue-on-error: true
      uses: actions/download-artifact@v4
    - name: Generate security summary
      continue-on-error: true
      run: |
        echo "# Security Scan Summary" > security-summary.md
        echo "" >> security-summary.md
@@ -463,7 +402,6 @@ jobs:
        echo "Generated on: $(date)" >> security-summary.md
    - name: Upload security summary
      continue-on-error: true
      uses: actions/upload-artifact@v4
      with:
        name: security-summary
@@ -473,7 +411,6 @@ jobs:
    # use env.X instead. Inherits SECURITY_SLACK_WEBHOOK_URL from the
    # job-level env block (added below).
    - name: Notify security team on critical findings
      continue-on-error: true
      if: ${{ env.SECURITY_SLACK_WEBHOOK_URL != '' && (needs.sast.result == 'failure' || needs.dependency-scan.result == 'failure' || needs.container-scan.result == 'failure') }}
      uses: 8398a7/action-slack@v3
      with:
@@ -489,7 +426,6 @@ jobs:
        SLACK_WEBHOOK_URL: ${{ env.SECURITY_SLACK_WEBHOOK_URL }}
    - name: Create security issue on critical findings
      continue-on-error: true
      if: needs.sast.result == 'failure' || needs.dependency-scan.result == 'failure'
      uses: actions/github-script@v6
      with:
@@ -1,181 +0,0 @@
 name: wifi-densepose sensing-server → Docker Hub + ghcr.io
 # Build + publish the `wifi-densepose` sensing-server image to both Docker Hub
 # (`ruvnet/wifi-densepose`) and ghcr.io (`ghcr.io/ruvnet/wifi-densepose`) on:
 #   - push to main affecting the Dockerfile, the server crate, the UI assets,
 #     or this workflow itself,
 #   - tag push matching v* (release builds),
 #   - manual workflow_dispatch.
 #
 # Closes #520 and #514: the stale `:latest` is rebuilt and pushed automatically
 # whenever the surface that produces it changes, and the Dockerfile fails the
 # build if the observatory/pose-fusion UI assets ever go missing again.
 #
 # Secrets:
 #   DOCKERHUB_USERNAME — `ruvnet` (Docker Hub login name)
 #   DOCKERHUB_TOKEN    — Docker Hub access token with read/write/delete scope
 # (ghcr.io uses the workflow's GITHUB_TOKEN — no secret needed.)
 on:
  push:
    branches: [main]
    paths:
      - 'docker/Dockerfile.rust'
      - 'docker/docker-entrypoint.sh'
      - 'v2/crates/wifi-densepose-sensing-server/**'
      - 'v2/crates/wifi-densepose-signal/**'
      - 'v2/crates/wifi-densepose-vitals/**'
      - 'v2/crates/wifi-densepose-wifiscan/**'
      - 'v2/crates/wifi-densepose-bfld/**'
      - 'v2/crates/cog-ha-matter/**'
      - 'v2/Cargo.toml'
      - 'v2/Cargo.lock'
      - 'ui/**'
      - '.github/workflows/sensing-server-docker.yml'
    tags: ['v*']
  workflow_dispatch: {}
 permissions:
  contents: read
  packages: write
 concurrency:
  group: sensing-server-docker-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  build-and-publish:
    name: build · push · smoke-test
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      # QEMU is required so the amd64 GitHub runner can cross-build the
      # linux/arm64 layer below (Dockerfile.rust is arch-agnostic — no `--target`
      # flag — so buildx + QEMU is all that's needed; arm64 builds are emulated
      # by the runner, not built on a separate arm64 host).
      - uses: docker/setup-qemu-action@v3
      - uses: docker/setup-buildx-action@v3
      - name: Log in to Docker Hub
        # Bypassing docker/login-action@v3: the action kept emitting
        # "malformed HTTP Authorization header" against a known-good
        # dckr_pat_* token (verified by direct curl against the Hub API).
        # `docker login --password-stdin` is the documented credential
        # path and avoids whatever encoding step the action injects.
        env:
          DH_USER: ${{ secrets.DOCKERHUB_USERNAME }}
          DH_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
        run: |
          printf '%s' "$DH_TOKEN" | docker login docker.io -u "$DH_USER" --password-stdin
      - name: Log in to ghcr.io
        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Compute tags
        id: meta
        uses: docker/metadata-action@v6
        with:
          images: |
            docker.io/ruvnet/wifi-densepose
            ghcr.io/ruvnet/wifi-densepose
          tags: |
            type=ref,event=branch
            type=ref,event=tag
            type=sha,format=short
            type=raw,value=latest,enable={{is_default_branch}}
      - name: Build + push
        id: build
        uses: docker/build-push-action@v7
        with:
          context: .
          file: docker/Dockerfile.rust
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
          # README badge advertises `amd64 + arm64`, and #547 promised multi-arch
          # as part of the docker publish refresh; arm64 was never actually wired
          # in, so Apple Silicon Macs hit `no matching manifest for linux/arm64/v8`
          # on `docker pull ruvnet/wifi-densepose:latest` (#136, #625). Build both.
          platforms: linux/amd64,linux/arm64
      # ---------------------------------------------------------------------
      # Smoke-test the freshly-pushed image:
      #   1. UI assets that closed #520 are inside `/app/ui` (the Dockerfile's
      #      RUN guard catches missing ones at build time, this re-checks the
      #      pushed artifact post-hoc as belt-and-braces).
      #   2. /health is up.
      #   3. /api/v1/info returns 200 with no auth (LAN-mode default).
      #   4. With RUVIEW_API_TOKEN set, /api/v1/info returns 401 without a
      #      Bearer header, 200 with the correct one (the #443 auth middleware).
      # ---------------------------------------------------------------------
      - name: Smoke-test image assets + LAN-mode HTTP
        run: |
          set -euo pipefail
          IMAGE="ghcr.io/ruvnet/wifi-densepose:sha-${GITHUB_SHA::7}"
          docker pull "$IMAGE"
          docker run --rm "$IMAGE" sh -c \
            'ls /app/ui/observatory.html /app/ui/pose-fusion.html /app/ui/index.html /app/ui/viz.html >/dev/null'
          docker run --rm "$IMAGE" sh -c 'ls -d /app/ui/observatory /app/ui/pose-fusion >/dev/null'
          docker run -d --name sm -p 3000:3000 -e CSI_SOURCE=simulated "$IMAGE"
          # Wait up to 30 s for /health.
          for _ in $(seq 1 30); do
            if curl -fsS http://127.0.0.1:3000/health >/dev/null 2>&1; then break; fi
            sleep 1
          done
          curl -fsS http://127.0.0.1:3000/health
          curl -fsS http://127.0.0.1:3000/api/v1/info >/dev/null
          curl -fsS http://127.0.0.1:3000/ui/observatory.html >/dev/null
          curl -fsS http://127.0.0.1:3000/ui/pose-fusion.html >/dev/null
          docker stop sm
      - name: Smoke-test the bearer-token auth path
        run: |
          set -euo pipefail
          IMAGE="ghcr.io/ruvnet/wifi-densepose:sha-${GITHUB_SHA::7}"
          docker run -d --name auth \
            -p 3000:3000 \
            -e CSI_SOURCE=simulated \
            -e RUVIEW_API_TOKEN=smoke-test-token-do-not-use \
            "$IMAGE"
          for _ in $(seq 1 30); do
            if curl -fsS http://127.0.0.1:3000/health >/dev/null 2>&1; then break; fi
            sleep 1
          done
          # /health stays unauthenticated.
          curl -fsS http://127.0.0.1:3000/health >/dev/null
          # /api/v1/info without a bearer → 401.
          code=$(curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:3000/api/v1/info)
          test "$code" = "401" || { echo "expected 401, got $code"; exit 1; }
          # Wrong bearer → 401.
          code=$(curl -s -o /dev/null -w '%{http_code}' -H 'Authorization: Bearer wrong' http://127.0.0.1:3000/api/v1/info)
          test "$code" = "401" || { echo "expected 401 (wrong token), got $code"; exit 1; }
          # Correct bearer → 200.
          curl -fsS -H 'Authorization: Bearer smoke-test-token-do-not-use' http://127.0.0.1:3000/api/v1/info >/dev/null
          docker stop auth
      - name: Summary
        if: always()
        run: |
          {
            echo "## sensing-server image published"
            echo
            echo "Tags:"
            echo '```'
            echo "${{ steps.meta.outputs.tags }}"
            echo '```'
            echo
            echo "Closes #520 (missing observatory/pose-fusion UI assets) and #514 (stale `:latest` for the v0.6+ packet format)."
            echo "The Dockerfile fails the build if those UI assets ever disappear again, and this workflow rebuilds + pushes automatically on every change to the surface."
          } >> "$GITHUB_STEP_SUMMARY"
@@ -1,72 +0,0 @@
 name: three.js demos → GitHub Pages
 # Publishes the ADR-097 three.js demos under gh-pages/three.js/.
 # Uses keep_files: true so the existing observatory/, pose-fusion/,
 # pointcloud/, nvsim/, and root index.html demos are preserved.
 #
 # Demos 04 and 05 require a Mixamo "X Bot.fbx" placed in assets/.
 # That file is intentionally gitignored (license boundary), so this
 # workflow does NOT ship it. Demos 01-03 work standalone; the index
 # page documents the FBX requirement honestly.
 on:
  push:
    branches: [main]
    paths:
      - 'examples/three.js/**'
      - '.github/workflows/threejs-pages.yml'
  workflow_dispatch:
 permissions:
  contents: write
 concurrency:
  group: threejs-pages
  cancel-in-progress: true
 jobs:
  build-and-deploy:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout main
        uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Stage demos for Pages
        run: |
          mkdir -p _site/three.js
          # Copy everything except the local Python server (CI doesn't need it)
          # and any stray scratch screenshots.
          cp -r examples/three.js/demos      _site/three.js/demos
          cp -r examples/three.js/screenshots _site/three.js/screenshots
          cp    examples/three.js/README.md   _site/three.js/README.md
          # An index.html that lists the 5 demos with the FBX caveat.
          cp examples/three.js/index.html _site/three.js/index.html
          # Mixamo FBX is gitignored — assets dir won't exist in CI.
          # Drop an empty placeholder so the relative path 'assets/' resolves
          # to a directory listing (404 on missing file) instead of an opaque
          # network error. Browsers showing the 404 path makes the failure
          # visible to anyone trying demos 04/05 without their own FBX.
          mkdir -p _site/three.js/assets
          cat > _site/three.js/assets/README.txt <<'EOF'
          The Mixamo "X Bot.fbx" required by demos 04-skinned-fbx.html and
          05-skinned-realtime.html is intentionally not redistributed here.
          Download your own from https://mixamo.com (FBX Binary, T-Pose,
          Without Skin) and place it here as "X Bot.fbx" if you want to
          run those demos locally. See examples/three.js/README.md in the
          repo for context.
          EOF
          echo "Staged contents:"
          ls -R _site/three.js/ | head -30
      - name: Deploy to GitHub Pages
        uses: peaceiris/actions-gh-pages@v3
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          publish_dir: _site
          # Critical: preserve observatory/, pose-fusion/, pointcloud/, nvsim/
          # and the root index.html already on gh-pages.
          keep_files: true
          commit_message: 'three.js demos: ${{ github.event.head_commit.message }}'
@@ -19,24 +19,8 @@ jobs:
          fetch-depth: 0
          token: ${{ secrets.GITHUB_TOKEN }}
-      # Identity must be set BEFORE any operation that can create a commit.
+      - name: Update submodules to latest main
-      # `git submodule update --remote --merge` used to fail here with
+        run: git submodule update --remote --merge
      # "Committer identity unknown" because the merge inside vendor/ruvector
      # needs an author when the pinned commit isn't a fast-forward of upstream.
      - name: Configure git identity
        run: |
          git config --global user.name  "github-actions[bot]"
          git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
      # Use a plain `--remote` checkout (detached HEAD at each submodule's
      # configured `branch` tip from .gitmodules) rather than `--merge`. We only
      # want to bump the superproject's gitlink to the latest upstream commit;
      # there's no reason to create merge commits inside the vendored repos, and
      # `--merge` breaks whenever the current pin has diverged from that branch.
      - name: Update submodules to latest tracked branch
        run: |
          git submodule sync --recursive
          git submodule update --remote --recursive
      - name: Check for changes
        id: check
@@ -45,22 +29,21 @@ jobs:
            echo "changed=false" >> "$GITHUB_OUTPUT"
          else
            echo "changed=true" >> "$GITHUB_OUTPUT"
            echo "--- submodule pointer changes ---"
            git submodule status --recursive || true
            git diff --submodule=log -- vendor/ || true
          fi
      - name: Create PR with updates
        if: steps.check.outputs.changed == 'true'
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
          BRANCH="chore/update-submodules-$(date +%Y%m%d-%H%M%S)"
          git checkout -b "$BRANCH"
          git add vendor/
-          git commit -m "chore: update vendor submodules to latest upstream"
+          git commit -m "chore: update vendor submodules to latest main"
          git push origin "$BRANCH"
          gh pr create \
            --title "chore: update vendor submodules" \
-            --body "Automated submodule update to the latest upstream commit on each submodule's tracked branch (see \`.gitmodules\`). Review the pointer diff before merging." \
+            --body "Automated submodule update to latest upstream main." \
            --base main \
            --head "$BRANCH"
        env:
@@ -7,7 +7,6 @@ on:
      - 'archive/v1/src/core/**'
      - 'archive/v1/src/hardware/**'
      - 'archive/v1/data/proof/**'
      - 'archive/v1/requirements-lock.txt'
      - '.github/workflows/verify-pipeline.yml'
  pull_request:
    branches: [ main, master ]
@@ -15,7 +14,6 @@ on:
      - 'archive/v1/src/core/**'
      - 'archive/v1/src/hardware/**'
      - 'archive/v1/data/proof/**'
      - 'archive/v1/requirements-lock.txt'
      - '.github/workflows/verify-pipeline.yml'
  workflow_dispatch:
@@ -30,11 +28,9 @@ jobs:
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v6
+        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
@@ -61,18 +57,7 @@ jobs:
          "
      - name: Run pipeline verification
-        working-directory: archive/v1
+        working-directory: v1
        env:
          # Pin thread count for scipy.fft / BLAS — multi-threaded reduction
          # order is otherwise non-deterministic across CI runs (issue #560
          # follow-up: 9- and 6-decimal quantization were not enough because
          # the divergence is from threading order, not SIMD reordering).
          # Single-threaded keeps the proof reproducible at a ~2-3x slowdown.
          OMP_NUM_THREADS: "1"
          OPENBLAS_NUM_THREADS: "1"
          MKL_NUM_THREADS: "1"
          VECLIB_MAXIMUM_THREADS: "1"
          NUMEXPR_NUM_THREADS: "1"
        run: |
          echo "=== Running pipeline verification ==="
          python data/proof/verify.py
@@ -80,13 +65,7 @@ jobs:
          echo "Pipeline verification PASSED."
      - name: Run verification twice to confirm determinism
-        working-directory: archive/v1
+        working-directory: v1
        env:
          OMP_NUM_THREADS: "1"
          OPENBLAS_NUM_THREADS: "1"
          MKL_NUM_THREADS: "1"
          VECLIB_MAXIMUM_THREADS: "1"
          NUMEXPR_NUM_THREADS: "1"
        run: |
          echo "=== Second run for determinism confirmation ==="
          python data/proof/verify.py
@@ -13,18 +13,6 @@ firmware/esp32-csi-node/managed_components/
 firmware/esp32-csi-node/dependencies.lock
 firmware/esp32-csi-node/sdkconfig.defaults.bak
 # ESP-IDF set-target backup (local only)
 firmware/esp32-hello-world/sdkconfig.old
 # Host-built firmware test binaries (compiled from test/*.c, not source)
 firmware/esp32-csi-node/test/test_adr110
 firmware/esp32-csi-node/test/test_vitals
 firmware/esp32-csi-node/test/fuzz_serialize
 firmware/esp32-csi-node/test/fuzz_edge
 firmware/esp32-csi-node/test/fuzz_nvs
 firmware/esp32-csi-node/test/*.exe
 firmware/esp32-csi-node/test/*.obj
 # Claude Flow swarm runtime state
 .swarm/
@@ -264,19 +252,3 @@ firmware/esp32-csi-node/build_firmware.batdata/
 models/
 demo_pointcloud.ply
 demo_splats.json
 # rvCSI napi-rs addon — generated by `napi build` (do not commit)
 v2/crates/rvcsi-node/*.node
 v2/crates/rvcsi-node/binding.js
 v2/crates/rvcsi-node/binding.d.ts
 v2/crates/rvcsi-node/npm/
 # AetherArena private optimization staging — never published until reviewed
 aether-arena/staging/
 # MM-Fi benchmark dataset archives — large data, fetch separately, never commit
 assets/MM-Fi/E0*.zip
 assets/MM-Fi/*.zip
 # through-wall demo: regenerable trained model artifact
 examples/through-wall/model/
@@ -10,22 +10,3 @@
 	path = vendor/sublinear-time-solver
 	url = https://github.com/ruvnet/sublinear-time-solver
 	branch = main
 [submodule "vendor/rvcsi"]
 	path = vendor/rvcsi
 	url = https://github.com/ruvnet/rvcsi
 	branch = main
 [submodule "v2/crates/ruv-neural"]
 	path = v2/crates/ruv-neural
 	url = https://github.com/ruvnet/ruv-neural.git
 	branch = main
 [submodule "vendor/rufield"]
 	path = vendor/rufield
 	url = https://github.com/ruvnet/rufield
 [submodule "v2/crates/ruview-swarm"]
 	path = v2/crates/ruview-swarm
 	url = https://github.com/ruvnet/ruv-drone.git
 	branch = main
 [submodule "v2/crates/worldgraph"]
 	path = v2/crates/worldgraph
 	url = https://github.com/ruvnet/worldgraph.git
 	branch = main
@@ -8,23 +8,21 @@ Dual codebase: Python v1 (`v1/`) and Rust port (`v2/`).
 | Crate | Description |
 |-------|-------------|
 | `wifi-densepose-core` | Core types, traits, error types, CSI frame primitives |
-| `wifi-densepose-signal` | SOTA signal processing + RuvSense multistatic sensing (16 modules) |
+| `wifi-densepose-signal` | SOTA signal processing + RuvSense multistatic sensing (14 modules) |
 | `wifi-densepose-nn` | Neural network inference (ONNX, PyTorch, Candle backends) |
-| `wifi-densepose-train` | Training pipeline with ruvector integration + ruview_metrics; MAE pretraining recipe (`mae.rs`, ADR-152 §2.3) + WiFlow-STD port (`wiflow_std/`, tch-gated) |
+| `wifi-densepose-train` | Training pipeline with ruvector integration + ruview_metrics |
 | `wifi-densepose-mat` | Mass Casualty Assessment Tool — disaster survivor detection |
-| `wifi-densepose-hardware` | ESP32 aggregator, TDM protocol, channel hopping firmware; `ieee80211bf/` 802.11bf forward-compat protocol model (ADR-153) |
+| `wifi-densepose-hardware` | ESP32 aggregator, TDM protocol, channel hopping firmware |
 | `wifi-densepose-ruvector` | RuVector v2.0.4 integration + cross-viewpoint fusion (5 modules) |
 | `wifi-densepose-api` | REST API (Axum) |
 | `wifi-densepose-db` | Database layer (Postgres, SQLite, Redis) |
 | `wifi-densepose-config` | Configuration management |
 | `wifi-densepose-wasm` | WebAssembly bindings for browser deployment |
-| `wifi-densepose-cli` | CLI tool (`wifi-densepose` binary) — `calibrate`/`calibrate-serve`/`enroll`/`train-room`/`room-watch` + MAT (MAT gated behind the `mat` feature; build `--no-default-features` for the aarch64/appliance calibration binary) |
+| `wifi-densepose-cli` | CLI tool (`wifi-densepose` binary) |
 | `wifi-densepose-calibration` | ADR-151 per-room calibration & specialist training — `baseline → enroll → extract → train` → bank of small specialists (presence/posture/breathing/heartbeat/restlessness/anomaly) + multistatic fusion; pure Rust, edge-deployable |
 | `wifi-densepose-sensing-server` | Lightweight Axum server for WiFi sensing UI |
 | `wifi-densepose-wifiscan` | Multi-BSSID WiFi scanning (ADR-022) |
 | `wifi-densepose-vitals` | ESP32 CSI-grade vital sign extraction (ADR-021) |
 | `nvsim` | Deterministic NV-diamond magnetometer pipeline simulator (ADR-089) — standalone leaf, WASM-ready |
 | `vendor/rvcsi` (submodule) | **rvCSI** — edge RF sensing runtime (ADR-095/096): 9 crates (`rvcsi-core`/`-dsp`/`-events`/`-adapter-file`/`-adapter-nexmon`/`-ruvector`/`-runtime`/`-node`/`-cli`). Lives in its own repo ([github.com/ruvnet/rvcsi](https://github.com/ruvnet/rvcsi)), vendored here under `vendor/rvcsi`, published to crates.io as `rvcsi-* 0.3.x` and to npm as `@ruv/rvcsi`. Not a `v2/` workspace member — depend on the published crates (or the submodule's `crates/rvcsi-*` paths). Normalized `CsiFrame`/`CsiWindow`/`CsiEvent` schema, validate-before-FFI, reusable DSP, typed confidence-scored events, the napi-c Nexmon shim (real nexmon_csi `.pcap` from a Raspberry Pi 5 / 4 / 3B+ — BCM43455c0), the napi-rs SDK, the `rvcsi` CLI, a Claude Code plugin. |
 | `vendor/rufield` (submodule) | **RuField MFS** — the open spec for camera-free multimodal field sensing (ADR-260). A common `FieldEvent`/`FieldTensor`/`FusionGraph`/`PrivacyClass`/`ProvenanceReceipt` model *above* WiFi CSI/CIR/BFLD, UWB, BLE Channel Sounding, mmWave radar, ultrasound, subsonic, infrared, and quantum sensors. Lives in its own repo ([github.com/ruvnet/rufield](https://github.com/ruvnet/rufield)), vendored here under `vendor/rufield`. Not a `v2/` workspace member. v0.1 reference stack = 7 crates (`rufield-core`/`-provenance`/`-privacy`/`-adapters`/`-fusion`/`-bench`/`-viewer`), 72 tests/0 failed; `rufield-viewer` is an Axum + vanilla-JS read-only dashboard (`cargo run -p rufield-viewer`) completing ADR-260 §27.9. The WiFi-CSI modality is now **real-replay-backed** via `CsiReplayAdapter` (ingests real captured `.csi.jsonl` → fused presence/breathing inferences; replay-from-file, unlabeled CSI-variance proxy, not validated accuracy); mmWave/thermal + all synthetic-bench F1 numbers remain **SYNTHETIC** (no live hardware — live streaming + labeled accuracy are roadmap). |
 | `wifi-densepose-rufield` | ADR-262 P1 **anti-corruption bridge** — converts RuView WiFi-CSI sensing output (`SensingSnapshot` mirroring `SensingUpdate` + `TrustedOutput`, owned primitives, no dep on `wifi-densepose-sensing-server`) into **signed RuField `FieldEvent`s** (`Modality::WifiCsi`, real `timestamp_ns`, sha256 + ed25519 provenance, `synthetic=false`). The single coupling point between RuView and the standalone RuField MFS spec (§5.4); path-deps the `vendor/rufield` submodule crates (`rufield-core`/`-provenance`/`-privacy`/`-fusion`). **Critical §3.3 privacy mapping** (`map_privacy`): maps RuView class → RuField P0–P5 by **information content, never byte value**, fail-closed (`Derived → P4/P5`, never P1; `demoted` floors to ≥ P2). 15 tests / 0 failed (round-trip / `is_fusable` / fusion-ingest / privacy-safety / determinism). P1 plumbing — not wired into the live server (P3), no accuracy claim. |
 | `ruview-swarm` | Drone swarm control system (ADR-148) — hierarchical-mesh topology, Raft consensus, MARL, CSI sensing payload, MAVLink/PX4 compat, Ruflo AI-agent integration |
 ### RuvSense Modules (`signal/src/ruvsense/`)
 | Module | Purpose |
@@ -42,8 +40,6 @@ Dual codebase: Python v1 (`v1/`) and Rust port (`v2/`).
 | `cross_room.rs` | Environment fingerprinting, transition graph |
 | `gesture.rs` | DTW template matching gesture classifier |
 | `adversarial.rs` | Physically impossible signal detection, multi-link consistency |
 | `cir.rs` | ADR-134 CSI→CIR via ISTA L1 sparse recovery (NeumannSolver warm-start) |
 | `calibration.rs` | ADR-135 empty-room baseline (Welford amplitude + von Mises phase, drift trigger) |
 ### Cross-Viewpoint Fusion (`ruvector/src/viewpoint/`)
 | Module | Purpose |
@@ -74,17 +70,14 @@ All 5 ruvector crates integrated in workspace:
 - ADR-030: RuvSense persistent field model (Proposed)
 - ADR-031: RuView sensing-first RF mode (Proposed)
 - ADR-032: Multistatic mesh security hardening (Proposed)
 - ADR-148: Drone swarm control system / `ruview-swarm` (In Progress)
 - ADR-152: WiFi-Pose SOTA 2026 intake — geometry conditioning, WiFlow-STD benchmark (measurement (a) complete: claims MEASURED-EQUIVALENT at ~96% PCK@20), MAE recipe (Proposed; §2.1–2.3, 2.6 implemented)
 - ADR-153: IEEE 802.11bf-2025 forward-compatibility protocol model (Accepted — amends ADR-152 §2.4)
 ### Supported Hardware
 | Device | Port | Chip | Role | Cost |
 |--------|------|------|------|------|
-| ESP32-S3 (8MB flash) | COM9 (ruvzen, was COM7) | Xtensa dual-core | WiFi CSI sensing node | ~$9 |
+| ESP32-S3 (8MB flash) | COM7 | Xtensa dual-core | WiFi CSI sensing node | ~$9 |
 | ESP32-S3 SuperMini (4MB) | — | Xtensa dual-core | WiFi CSI (compact) | ~$6 |
-| ESP32-C6 + Seeed MR60BHA2 | COM12 (ruvzen, was COM4) | RISC-V + 60 GHz FMCW | mmWave HR/BR/presence + WiFi CSI | ~$15 |
+| ESP32-C6 + Seeed MR60BHA2 | COM4 | RISC-V + 60 GHz FMCW | mmWave HR/BR/presence | ~$15 |
 | HLK-LD2410 | — | 24 GHz FMCW | Presence + distance | ~$3 |
 **Not supported:** ESP32 (original), ESP32-C3 — single-core, can't run CSI DSP pipeline.
@@ -141,14 +134,17 @@ Crates must be published in dependency order:
 2. `wifi-densepose-vitals` (no internal deps)
 3. `wifi-densepose-wifiscan` (no internal deps)
 4. `wifi-densepose-hardware` (no internal deps)
-5. `wifi-densepose-signal` (depends on core)
+5. `wifi-densepose-config` (no internal deps)
-6. `wifi-densepose-nn` (no internal deps, workspace only)
+6. `wifi-densepose-db` (no internal deps)
-7. `wifi-densepose-ruvector` (no internal deps, workspace only)
+7. `wifi-densepose-signal` (depends on core)
-8. `wifi-densepose-train` (depends on signal, nn)
+8. `wifi-densepose-nn` (no internal deps, workspace only)
-9. `wifi-densepose-mat` (depends on core, signal, nn)
+9. `wifi-densepose-ruvector` (no internal deps, workspace only)
-10. `wifi-densepose-wasm` (depends on mat)
+10. `wifi-densepose-train` (depends on signal, nn)
-11. `wifi-densepose-sensing-server` (depends on wifiscan)
+11. `wifi-densepose-mat` (depends on core, signal, nn)
-12. `wifi-densepose-cli` (depends on mat)
+12. `wifi-densepose-api` (no internal deps)
 13. `wifi-densepose-wasm` (depends on mat)
 14. `wifi-densepose-sensing-server` (depends on wifiscan)
 15. `wifi-densepose-cli` (depends on mat)
 ### Validation & Witness Verification (ADR-028)
@@ -1,78 +0,0 @@
 # PROOF — reproduce every claim, or find the one we can't yet
 This project (RuView / wifi-densepose) has been publicly called "AI slop" and
 "fake." This document is the answer: **a skeptic can clone the repo, run one
 script, and have every headline claim either verified on their own machine or
 shown — explicitly — as "CLAIMED, not yet reproduced (here's exactly what it
 needs)."** Nothing below is asserted without a command you can run.
 ```bash
 git clone https://github.com/ruvnet/RuView && cd RuView
 bash scripts/prove.sh          # core gate + the anti-slop assertion tests
 bash scripts/prove.sh --full   # also attempt the feature-gated subset
 ```
 `prove.sh` exits 0 only if every **non-gated** claim passes. Gated claims never
 fail the run; they print the prerequisite (a GPU, a dataset, real hardware, a
 trained checkpoint) so you can reproduce them yourself.
 ## Grading
 - **MEASURED** — reproduced on our hardware, with the exact command recorded, and
  pinned by a test that *fails on the pre-fix code*. `prove.sh` re-runs these.
 - **CLAIMED** — cited from a source, or measured by the source, but not
  reproduced in this repo's automated harness.
 - **DATA-GATED / HARDWARE-GATED** — the *code path* is real and tested, but the
  *accuracy/throughput claim* needs data or hardware we don't ship. We never
  fabricate the number; the code carries a typed error or a `weights_trained`/
  provenance flag instead.
 ## The hard gate (run on any machine with Rust + Python)
 | Claim | Grade | Reproduce |
 |---|---|---|
 | Rust workspace: 3,128 tests, 0 failed | **MEASURED** | `cd v2 && cargo test --workspace --no-default-features` |
 | Deterministic CSI pipeline proof (bit-exact SHA-256) | **MEASURED** | `python archive/v1/data/proof/verify.py` → `VERDICT: PASS` |
 ## Anti-slop assertion tests (each fails on the pre-fix code)
 | Claim | Grade | Test (run via `cargo test -p <crate> <name>`) |
 |---|---|---|
 | Fusion crafted-input DoS panics are closed (ADR-156 §2.2) | **MEASURED** | `wifi-densepose-ruvector :: triangulation_out_of_range_index_returns_none_no_panic` |
 | **The "Soul Signature" identity claim, honestly bounded:** on WiFi-only cardiac+respiratory channels two people are **not separable** (gap ≈ 0.0005) | **MEASURED** | `wifi-densepose-bfld :: cardiac_alone_cannot_separate_identity_matches_audit` |
 | OccWorld `predict()` is real (input-dependent), not random noise | **MEASURED** | `wifi-densepose-occworld-candle :: predict_is_deterministic_for_same_input` |
 | Pose runtime emits frames under its own default config (ADR-159 A1) | **MEASURED** | `cog-pose-estimation :: default_config_emits_frames_with_real_model` |
 | Person-count flags untrained classes — no count inflation (ADR-159 A2) | **MEASURED** | `cog-person-count :: untrained_class_argmax_is_flagged_low_confidence` |
 | Medical edge skills carry a "not a medical device" disclaimer (ADR-160 A1) | **MEASURED** | `wifi-densepose-wasm-edge :: a1_med_modules_have_clinical_disclaimer` (`--features std`) |
 | Survivor dedup 3→1, count-inflation killed (ADR-158 §2) | **MEASURED** | `wifi-densepose-mat :: test_identical_vitals_no_location_dedup_to_one` (`--features mat`) |
 ## Measured performance (criterion; reproduce on your machine)
 | Claim | Grade | Reproduce |
 |---|---|---|
 | PSD FFT-planner cache 2.0–3.1×, DTW band 2.4–4.1× (ADR-154) | **MEASURED** | `cd v2 && cargo bench -p wifi-densepose-signal` |
 | fuse() double-clone removed ~2.17× marshalling (ADR-156) | **MEASURED** | `cd v2 && cargo bench -p wifi-densepose-ruvector --bench fusion_bench` |
 | zero-copy ORT input ~1.48× (ADR-155) | **MEASURED** | `cd v2 && cargo bench -p wifi-densepose-nn --features onnx --bench onnx_bench` |
 | pointcloud splats 9→2 passes ~1.24× (ADR-160 research) | **MEASURED** | `cd v2 && cargo bench -p wifi-densepose-pointcloud --bench splats_bench` |
 | native wlanapi multi-BSSID scan 9.74 Hz (vs netsh ~2 Hz) | **MEASURED (Windows)** | `cd v2 && cargo test -p wifi-densepose-wifiscan -- --ignored measure_native_scan_rate` |
 | wasm-edge `process_frame` hot-path latency (host proxy, ADR-163) | **MEASURED-on-host** (NOT the ESP32/WASM3 budget — needs hardware) | `cd v2/crates/wifi-densepose-wasm-edge && cargo bench --features std` |
 | cog steady-state CPU infer latency ~305 µs (ADR-163; NOT the manifest cold-start) | **MEASURED-on-host** | `cd v2 && cargo bench -p cog-person-count -p cog-pose-estimation --no-default-features --bench infer_bench` |
 ## What we do NOT claim (the honest negatives — the strongest anti-slop signal)
 | Capability | Status |
 |---|---|
 | **Named person-identity from WiFi** | **NOT achieved, and measured why.** The §3.6 matcher is real, but identity does not lock on WiFi-only channels (gap 0.0005). DATA-GATED on a real enrollment feeding the AETHER/body-resonance channel — never done. No named-identity claim is made. |
 | WiFlow-STD ~96% PCK@20 | **CLAIMED-reproduced** on our RTX 5080 (`benchmarks/wiflow-std/RESULTS.md`); HARDWARE-GATED for you (needs an NVIDIA GPU + the MM-Fi dataset). The upstream *shipped checkpoint* was **REFUTED** (0.08% PCK) — we publish that. |
 | OccWorld trajectory accuracy | DATA-GATED on a trained checkpoint; `predict()` carries `weights_trained=false` until one is loaded — never silently faked. |
 | Edge-skill detection accuracy (seizure, weapon, affect, …) | UNVALIDATED — every such module is now disclaimer-gated as experimental/research; the DSP is real, the accuracy is not claimed. |
 | 802.11bf-2025 OTA conformance | No commodity silicon ships a conformant interface as of 2026; ours is a simulation-tested forward-compat protocol model, not a certified implementation. |
 ## Provenance
 Every claim above traces to a committed ADR (`docs/adr/ADR-154`…`ADR-163`), a
 test, a criterion bench, `benchmarks/wiflow-std/RESULTS.md`, or
 `benchmarks/edge-latency/RESULTS.md`. The history
 includes published **retractions** (the 92.9% PCK retraction; the WiFlow-STD
 shipped-checkpoint refutation; the NV-diamond BOM reality check) — a faker hides
 failures; we commit them.
@@ -1,25 +1,21 @@
 # π RuView
 <p align="center">
-  <a href="https://cognitum.one/seed">
+  <a href="https://x.com/rUv/status/2037556932802761004">
-    <img src="assets/ruview-seed.png" alt="RuView - WiFi DensePose" width="100%">
+    <img src="assets/ruview-small-gemini.jpg" alt="RuView - WiFi DensePose" width="100%">
  </a>
 </p>
 <p align="center">
  <a href="https://cognitum.one/seed">
    <img src="assets/seed.png" alt="Cognitum Seed" width="100%">
  </a>
 </p>
 > **Beta Software** — Under active development. APIs and firmware may change. Known limitations:
 > - ESP32-C3 and original ESP32 are not supported (single-core, insufficient for CSI DSP)
 > - Single ESP32 deployments have limited spatial resolution — use 2+ nodes or add a [Cognitum Seed](https://cognitum.one) for best results
 > - Camera-free pose accuracy is limited — use [camera ground-truth training](docs/adr/ADR-079-camera-ground-truth-training.md) for 92.9% PCK@20
 >
 > Contributions and bug reports welcome at [Issues](https://github.com/ruvnet/RuView/issues).
 ## **See through walls with WiFi** ##
-**Turn ordinary WiFi into a spatial intelligence / sensing system.** Detect people, measure breathing and heart rate, track movement, and monitor rooms — through walls, in the dark, with no cameras or wearables. Just physics.
+**Turn ordinary WiFi into a spacial intelligence / sensing system.** Detect people, measure breathing and heart rate, track movement, and monitor rooms — through walls, in the dark, with no cameras or wearables. Just physics.
 Works natively with the four major smart-home ecosystems: **[Home Assistant](docs/integrations/home-assistant.md)** via the HA-DISCO MQTT publisher, **[Apple Home & HomePod](docs/user-guide-apple-homepod.md)** as a discoverable HAP-1.1 bridge, **[Google Home](docs/integrations/home-assistant.md)** + **[Amazon Alexa](docs/integrations/home-assistant.md)** via the same HA bridge or a [Matter](docs/adr/ADR-122-bfld-ruview-ha-matter-exposure.md) endpoint. Siri, Google Assistant, and Alexa can voice presence and vitals by room with zero custom skills.
 [![Works with Home Assistant](https://img.shields.io/badge/Works%20with-Home%20Assistant-blue?logo=home-assistant&logoColor=white&labelColor=41BDF5)](docs/integrations/home-assistant.md) [![Works with Matter](https://img.shields.io/badge/Works%20with-Matter-blue?labelColor=4285F4)](docs/adr/ADR-122-bfld-ruview-ha-matter-exposure.md) [![Works with Apple Home](https://img.shields.io/badge/Works%20with-Apple%20Home-black?logo=apple)](docs/user-guide-apple-homepod.md) [![Works with Google Home](https://img.shields.io/badge/Works%20with-Google%20Home-blue?logo=googlehome)](docs/integrations/home-assistant.md) [![Works with Alexa](https://img.shields.io/badge/Works%20with-Alexa-blue?logo=amazon&logoColor=white&labelColor=00CAFF)](docs/integrations/home-assistant.md)
 > Drop into any **Home Assistant** install with one `--mqtt` flag. Or pair into **Apple Home / Google Home / Alexa / SmartThings** as a Matter Bridge. Ships 21 entities per node (11 raw signals + 10 inferred semantic states: someone-sleeping, possible-distress, room-active, elderly-inactivity-anomaly, meeting-in-progress, bathroom-occupied, fall-risk-elevated, bed-exit, no-movement, multi-room-transition) plus 3 starter HA Blueprints. See [`docs/integrations/home-assistant.md`](docs/integrations/home-assistant.md) · [ADR-115](docs/adr/ADR-115-home-assistant-integration.md).
 ### π RuView is a WiFi sensing platform that turns radio signals into spatial intelligence.
@@ -36,7 +32,7 @@ Built on [RuVector](https://github.com/ruvnet/ruvector/) and [Cognitum Seed](htt
 The system learns each environment locally using spiking neural networks that adapt in under 30 seconds, with multi-frequency mesh scanning across 6 WiFi channels that uses your neighbors' routers as free radar illuminators. Every measurement is cryptographically attested via an Ed25519 witness chain.
-RuView turns ordinary WiFi into a contactless sensor. A $9 ESP32 board reads the radio reflections off the people in a room, and a small pretrained model — published on Hugging Face at [`ruvnet/wifi-densepose-pretrained`](https://huggingface.co/ruvnet/wifi-densepose-pretrained) — tells you who's there, how they're breathing, and how their heart rate is trending. The model fits in 8 KB (4-bit quantized) and runs in microseconds on a Raspberry Pi. (The [v2 encoder](https://huggingface.co/ruvnet/wifi-densepose-pretrained) reports an honest, label-free held-out **temporal-triplet accuracy of 82.3%** — up from 66.4% raw; the older "100% presence" figure was measured on a single-class recording and has been retracted in favor of this.) No cameras, no wearables, no app on the user's phone.
+RuView also supports pose estimation (17 COCO keypoints via the WiFlow architecture), trained entirely without cameras using 10 sensor signals — a technique pioneered from the original *DensePose From WiFi* research at Carnegie Mellon University.
 ### Built for low-power edge applications
@@ -49,30 +45,20 @@ RuView turns ordinary WiFi into a contactless sensor. A $9 ESP32 board reads the
 [![Vital Signs](https://img.shields.io/badge/vital%20signs-breathing%20%2B%20heartbeat-red.svg)](#vital-sign-detection)
 [![ESP32 Ready](https://img.shields.io/badge/ESP32--S3-CSI%20streaming-purple.svg)](#esp32-s3-hardware-pipeline)
 [![crates.io](https://img.shields.io/crates/v/wifi-densepose-ruvector.svg)](https://crates.io/crates/wifi-densepose-ruvector)
 [![Downloads](https://img.shields.io/badge/downloads-10M%2B-brightgreen.svg)](#-edge-module-catalog)
-> | What | How | Speed / scale |
+> | What | How | Speed |
-> |------|-----|---------------|
+> |------|-----|-------|
-> | 🫁 **Breathing rate** | Bandpass 0.1–0.5 Hz on wrapped phase, circular variance, zero-crossing BPM ([#593](https://github.com/ruvnet/RuView/issues/593)) | 6–30 BPM, real-time |
+> | 🦴 **Pose estimation** | CSI subcarrier amplitude/phase → 17 COCO keypoints | 171K emb/s (M4 Pro) |
-> | 💓 **Heart rate** | Bandpass 0.8–2.0 Hz, zero-crossing BPM | 40–120 BPM, real-time |
+> | 🫁 **Breathing detection** | Bandpass 0.1-0.5 Hz → zero-crossing BPM | 6-30 BPM |
-> | 👤 **Presence detection** | Trained head on Hugging Face ([`ruvnet/wifi-densepose-pretrained`](https://huggingface.co/ruvnet/wifi-densepose-pretrained); v2 encoder = 82.3% held-out temporal-triplet acc, honestly re-benchmarked) + a phase-variance fallback that needs no model | < 1 ms, ~30 s ambient calibration |
+> | 💓 **Heart rate** | Bandpass 0.8-2.0 Hz → zero-crossing BPM | 40-120 BPM |
-> | 🧬 **CSI embeddings** | 128-dim contrastive encoder shipped on Hugging Face, 4-bit quantised variant fits in 8 KB | **164,183 emb/s** on M4 Pro |
+> | 👤 **Presence sensing** | Trained model + PIR fusion — 100% accuracy | 0.012 ms latency |
-> | 🦴 **17-keypoint pose estimation** | `cog-pose-estimation` Cog v0.0.1 — signed aarch64 + x86_64 binaries on GCS, loads `pose_v1.safetensors` via Candle. Train your own from paired data in 2.1 s on an RTX 5080 ([ADR-101](docs/adr/ADR-101-pose-estimation-cog.md), [benchmarks](docs/benchmarks/pose-estimation-cog.md)). **SOTA on MM-Fi:** [`ruvnet/wifi-densepose-mmfi-pose`](https://huggingface.co/ruvnet/wifi-densepose-mmfi-pose) hits **82.69% torso-PCK@20** (ensemble 83.59%), beating MultiFormer (72.25%) and CSI2Pose (68.41%) on the matched MM-Fi `random_split` protocol — self-corrected and auditable on [AetherArena](https://huggingface.co/spaces/ruvnet/aether-arena) | 8.4 ms cold-start on a Pi 5 |
+> | 🧱 **Through-wall** | Fresnel zone geometry + multipath modeling | Up to 5m depth |
-> | 🚶 **Motion / activity** | Motion-band power + phase acceleration | Real-time |
+> | 🧠 **Edge intelligence** | 8-dim feature vectors + RVF store on Cognitum Seed | $140 total BOM |
-> | 🤸 **Fall detection** | Phase-acceleration threshold + 3-frame debounce + 5 s cooldown ([#263](https://github.com/ruvnet/RuView/issues/263)) | < 200 ms |
+> | 🎯 **Camera-free training** | 10 sensor signals, no labels needed | 84s on M4 Pro |
-> | 🧮 **Multi-person count** | Adaptive P95 normalisation + runtime-tunable dedup factor (`/api/v1/config/dedup-factor`, [#491](https://github.com/ruvnet/RuView/pull/491)). Six specialised learned counters available as Cogs: `occupancy-zones`, `elevator-count`, `queue-length`, `customer-flow`, `clean-room`, `person-matching` | Real-time, self-calibrating |
+> | 📷 **Camera-supervised training** | MediaPipe + ESP32 CSI → 92.9% PCK@20 | 19 min on laptop |
-> | 🌍 **World model prediction** | OccWorld TransVQVAE — 15-frame future occupancy prediction, 209 ms inference, 3.4 GB VRAM on RTX 5080; fine-tune on your space with `occworld_retrain.py` ([ADR-147](docs/adr/ADR-147-nvidia-cosmos-world-foundation-model-integration.md)) | 15 frames × 200×200×16 vox |
+> | 📡 **Multi-frequency mesh** | Channel hopping across 6 bands, neighbor APs as illuminators | 3x sensing bandwidth |
-> | 🧱 **Through-wall sensing** | Fresnel-zone geometry + multipath modeling | Up to ~5 m, signal-dependent |
+> | 🌐 **3D point cloud** *(optional fusion)* | Camera depth (MiDaS) + WiFi CSI + mmWave radar → unified spatial model | 22 ms pipeline · 19K+ points/frame |
 > | 🧠 **Edge intelligence** | **105-cog catalog** ([ADR-102](docs/adr/ADR-102-edge-module-registry.md)) live from `app-registry.json` — health, security, building, retail, industrial, research, AI, swarm, signal, network, and developer modules. Optional Cognitum Seed adds persistent vector store + kNN + witness chain | $140 total BOM |
 > | 🎯 **Camera-free pre-training** | Self-supervised contrastive encoder, 12.2M training steps on 60K frames, shipped on Hugging Face | 84 s/epoch retrain on M4 Pro |
 > | 📷 **Camera-supervised fine-tune** | MediaPipe + ESP32 CSI paired training, end-to-end Candle pipeline on RTX 5080 ([ADR-079](docs/adr/ADR-079-camera-supervised-pose-finetune.md)) | 2.1 s for 400 epochs (~5 ms/epoch) |
 > | 📡 **Multi-frequency mesh** | Channel hopping across 6 bands, TDM slot scheduling ([ADR-029](docs/adr/ADR-029-multifrequency-mesh.md)) | 3× sensing bandwidth |
 > | 🌐 **3D point cloud fusion** | Camera depth (MiDaS) + WiFi CSI + mmWave radar → unified spatial model | 22 ms pipeline · 19K+ points/frame |
 >
 > Browse the full 105-module catalog (with practical descriptions, sizes, and difficulty) below in [🧩 Edge Module Catalog](#-edge-module-catalog), or visit [seed.cognitum.one/store](https://seed.cognitum.one/store).
 >
 > 🤗 **Pretrained weights**: download from [`ruvnet/wifi-densepose-pretrained`](https://huggingface.co/ruvnet/wifi-densepose-pretrained) — see [Loading the pretrained model](#loading-the-pretrained-model) below for one-command setup.
 ```bash
 # Option 1: Docker (simulated data, no hardware needed)
@@ -80,7 +66,7 @@ docker pull ruvnet/wifi-densepose:latest
 docker run -p 3000:3000 ruvnet/wifi-densepose:latest
 # Open http://localhost:3000
-# Option 2a: Live sensing with ESP32-S3 hardware ($9)
+# Option 2: Live sensing with ESP32-S3 hardware ($9)
 # Flash firmware, provision WiFi, and start sensing:
 python -m esptool --chip esp32s3 --port COM9 --baud 460800 \
  write_flash 0x0 bootloader.bin 0x8000 partition-table.bin \
@@ -88,39 +74,13 @@ python -m esptool --chip esp32s3 --port COM9 --baud 460800 \
 python firmware/esp32-csi-node/provision.py --port COM9 \
  --ssid "YourWiFi" --password "secret" --target-ip 192.168.1.20
 # Option 2b: WiFi 6 + 802.15.4 research sensing with ESP32-C6 ($6-10, ADR-110)
 # Same csi-node firmware compiled for the C6 target — picks up the C6
 # overlay (sdkconfig.defaults.esp32c6) automatically.
 cd firmware/esp32-csi-node
 idf.py set-target esp32c6 && idf.py build
 idf.py -p COM6 flash
 # C6 boot extras (vs S3): HE-LTF subcarrier tagging in ADR-018 bytes 18-19,
 #   802.15.4 mesh time-sync on channel 15, TWT setup when the AP supports it,
 #   opt-in LP-core wake-on-motion for ~5 µA battery seed nodes.
 # v0.6.7 adds: real LP-core RISC-V motion-gate program (debounce + motion
 #   counter) and a Wi-Fi 6 soft-AP with TWT Responder so two C6 boards can
 #   benchmark real iTWT without buying an 11ax router. Both default off,
 #   flip CONFIG_C6_{LP_CORE,SOFTAP_HE}_ENABLE to turn them on.
 # Option 3: Full system with Cognitum Seed ($140)
 # ESP32 streams CSI → bridge forwards to Seed for persistent storage + kNN + witness chain
 node scripts/rf-scan.js --port 5006           # Live RF room scan
 node scripts/snn-csi-processor.js --port 5006  # SNN real-time learning
 node scripts/mincut-person-counter.js --port 5006  # Correct person counting
 # Option 4: Python — live on PyPI (ADR-117)
 pip install ruview                        # or: pip install wifi-densepose
 # Both ship the same compiled PyO3 wheel (~250 KB, abi3-py310, Linux/macOS/Windows).
 # Add [client] for the asyncio WebSocket + paho-mqtt clients:
 pip install "ruview[client]"              # or: pip install "wifi-densepose[client]"
 # from ruview import BreathingExtractor, HeartRateExtractor   # equivalent to:
 # from wifi_densepose import BreathingExtractor, HeartRateExtractor
 # from ruview.client import SensingClient, RuViewMqttClient
 ```
 [![PyPI ruview](https://img.shields.io/pypi/v/ruview?label=ruview)](https://pypi.org/project/ruview/) [![PyPI wifi-densepose](https://img.shields.io/pypi/v/wifi-densepose?label=wifi-densepose)](https://pypi.org/project/wifi-densepose/)
 > [!NOTE]
 > **CSI-capable hardware recommended.** Presence, vital signs, through-wall sensing, and all advanced capabilities require Channel State Information (CSI) from an ESP32-S3 ($9) or research NIC. The Docker image runs with simulated data for evaluation. Consumer WiFi laptops provide RSSI-only presence detection.
@@ -128,11 +88,10 @@ pip install "ruview[client]"              # or: pip install "wifi-densepose[clie
 >
 > | Option | Hardware | Cost | Full CSI | Capabilities |
 > |--------|----------|------|----------|-------------|
-> | **ESP32 + Cognitum Seed** (recommended) | ESP32-S3 + [Cognitum Seed](https://cognitum.one) | ~$140 | Yes | Presence, motion, breathing, heart rate, fall detection, multi-person counting, 17-keypoint pose (signed Cog binary), 105-cog catalog, persistent vector store, kNN search, witness chain, MCP proxy |
+> | **ESP32 + Cognitum Seed** (recommended) | ESP32-S3 + [Cognitum Seed](https://cognitum.one) | ~$140 | Yes | Pose, breathing, heartbeat, motion, presence + persistent vector store, kNN search, witness chain, MCP proxy |
-> | **ESP32 Mesh** | 3-6× ESP32-S3 + WiFi router | ~$54 | Yes | Same capabilities as above without the persistent-memory features |
+> | **ESP32 Mesh** | 3-6x ESP32-S3 + WiFi router | ~$54 | Yes | Pose, breathing, heartbeat, motion, presence |
 > | **ESP32-C6 research node** ([ADR-110](docs/adr/ADR-110-esp32-c6-firmware-extension.md), [witness](docs/WITNESS-LOG-110.md), [reviewer guide](docs/ADR-110-REVIEW-GUIDE.md), [firmware v0.7.0](https://github.com/ruvnet/RuView/releases/tag/v0.7.0-esp32)) | ESP32-C6-DevKit ($6–10) | ~$10 | Yes (Wi-Fi 6 capable) | Same CSI pipeline as S3 with the dual-target firmware. **Firmware-side ADR-110 substrate now closed** (v0.7.0): ESP-NOW cross-board mesh quantified at **99.56 % match / 104 µs smoothed offset stdev / 3.95× EMA suppression** over a 5-min two-board soak (witness §A0.10), 32-byte UDP sync packet with operator-tunable cadence (§A0.12), ADR-018 byte 19 bit 4 wire-fix sourced from the working ESP-NOW path (§A0.13). Wire format ready for HE-LTF PPDU tagging in ADR-018 bytes 18-19 (firmware encoder + Rust + Python decoders verified end-to-end across 23 unit tests). LP-core motion-gate RISC-V program and Wi-Fi 6 soft-AP with TWT Responder both ship as opt-in code paths (default off). **Hardware-gated for measurement**: HE-LTF live subcarrier capture needs an 11ax AP (IDF v5.4 doesn't expose AP-side HE config — §A0.6); ~5 µA LP-core hibernation needs an INA meter to capture; 802.15.4 raw RX is broken in IDF v5.4 (workaround: ESP-NOW transport, shipped + measured). See witness log for the empirical / claimed split. |
 > | **Research NIC** | Intel 5300 / Atheros AR9580 | ~$50-100 | Yes | Full CSI with 3x3 MIMO |
-> | **Any WiFi** | Windows, macOS, or Linux laptop | $0 | No | RSSI-only: coarse presence and motion (see [tutorial #36](https://github.com/ruvnet/RuView/issues/36)) |
+> | **Any WiFi** | Windows, macOS, or Linux laptop | $0 | No | RSSI-only: coarse presence and motion |
 >
 > No hardware? Verify the signal processing pipeline with the deterministic reference signal: `python archive/v1/data/proof/verify.py`
 >
@@ -150,231 +109,10 @@ pip install "ruview[client]"              # or: pip install "wifi-densepose[clie
  <a href="https://ruvnet.github.io/RuView/pose-fusion.html"><strong>▶ Dual-Modal Pose Fusion Demo</strong></a>
  &nbsp;|&nbsp;
  <a href="https://ruvnet.github.io/RuView/pointcloud/"><strong>▶ Live 3D Point Cloud</strong></a>
  &nbsp;|&nbsp;
  <a href="https://ruvnet.github.io/RuView/three.js/"><strong>▶ three.js Demos (5)</strong></a>
 > The [server](#-quick-start) is optional for visualization and aggregation — the ESP32 [runs independently](#esp32-s3-hardware-pipeline) for presence detection, vital signs, and fall alerts.
 >
 > **Live ESP32 pipeline**: Connect an ESP32-S3 node → run the [sensing server](#sensing-server) → open the [pose fusion demo](https://ruvnet.github.io/RuView/pose-fusion.html) for real-time dual-modal pose estimation (webcam + WiFi CSI). See [ADR-059](docs/adr/ADR-059-live-esp32-csi-pipeline.md).
 >
 > **three.js scene gallery** at [`/three.js/`](https://ruvnet.github.io/RuView/three.js/) — five progressively richer ADR-097 demos: helpers, cinematic, GLTF skinned, FBX skinned, and a live MediaPipe→Mixamo retargeting feed driven by ESP32 CSI. Demos 04 and 05 require a local Mixamo `X Bot.fbx` (license boundary — not redistributed).
 ## 🤗 Pretrained model on Hugging Face
 Pretrained CSI weights live at [`ruvnet/wifi-densepose-pretrained`](https://huggingface.co/ruvnet/wifi-densepose-pretrained) — 12.2M training steps on 60K frames / 610K contrastive triplets, **82.3% held-out temporal-triplet accuracy** (up from 66.4% raw; the older "100% presence" figure was measured on a single-class recording and has been retracted), 4-bit quantized variant fits in 8 KB. The release includes a contrastive **CSI encoder** producing 128-dim embeddings (164,183 emb/s on M4 Pro) and a **presence-detection head**. Per-node LoRA adapters are included for environment-specific fine-tuning.
 ```bash
 # Download the model bundle
 pip install huggingface_hub
 huggingface-cli download ruvnet/wifi-densepose-pretrained --local-dir models/wifi-densepose-pretrained
 ```
 **What works today vs. what's pending wiring:**
 | Consumer | Format used | Status |
 |----------|-------------|--------|
 | Python training / evaluation / embedding extraction | `model.safetensors` | ✅ Works — load with `safetensors.torch.load_file` |
 | Inspect / re-export the bundle | `model.rvf.jsonl` (line-by-line JSON) | ✅ Works — plain JSONL |
 | Sensing-server `--model <PATH>` flag | binary RVF (`RVFS` magic) | ⚠️ Loader does not yet accept the JSONL container |
 **Known gap:** the HF model ships in JSONL RVF format, but `v2/crates/wifi-densepose-sensing-server/src/rvf_container.rs` only parses the binary RVF segment format. Pointing `--model` at `model.rvf.jsonl` currently errors with `invalid magic at offset 0: expected 0x52564653, got 0x7974227B` and the live pipeline degrades to null output rather than falling back to heuristic mode — so for the live sensing-server, run **without** `--model` until a JSONL adapter lands (or the model is re-published as binary RVF). Use the weights from Python / training in the meantime.
 **Quantization choices** (all in the HF repo): `model-q2.bin` (4 KB) · `model-q4.bin` ⭐ recommended (8 KB) · `model-q8.bin` (16 KB) · `model.safetensors` full (48 KB)
 The separate **17-keypoint pose-estimation model** is now published at [`ruvnet/wifi-densepose-mmfi-pose`](https://huggingface.co/ruvnet/wifi-densepose-mmfi-pose) — **82.69% torso-PCK@20** on MM-Fi (single model) / **83.59%** (3-model ensemble + TTA), beating the prior published SOTA MultiFormer (72.25%) and CSI2Pose (68.41%) on the matched `random_split` protocol. See **Results & proof** below.
 ### Results & proof
 | What | Where | Numbers |
 |------|-------|---------|
 | **MM-Fi pose model (SOTA)** | [`ruvnet/wifi-densepose-mmfi-pose`](https://huggingface.co/ruvnet/wifi-densepose-mmfi-pose) | 82.69% torso-PCK@20 (single) · 83.59% (ensemble+TTA) · 75K-param micro variant 74.30% |
 | **AetherArena benchmark Space** | [`ruvnet/aether-arena`](https://huggingface.co/spaces/ruvnet/aether-arena) | self-correcting, auditable MM-Fi leaderboard |
 | **Full MM-Fi study (honest picture)** | [`docs/benchmarks/mmfi-wifi-sensing-study.md`](docs/benchmarks/mmfi-wifi-sensing-study.md) | pose + action; zero-shot cross-subject ~64%, +~30 s in-room calibration → 72.2% |
 | **Efficiency frontier** | [`docs/benchmarks/wifi-pose-efficiency-frontier.md`](docs/benchmarks/wifi-pose-efficiency-frontier.md) | SOTA-beating WiFi pose in a 20 KB int4 edge model |
 | **Pretrained encoder** | [`ruvnet/wifi-densepose-pretrained`](https://huggingface.co/ruvnet/wifi-densepose-pretrained) | 82.3% held-out temporal-triplet, 8 KB int4 |
 | **Reproducible proof (Trust Kill Switch)** | [`archive/v1/data/proof/verify.py`](archive/v1/data/proof/verify.py) + [`expected_features.sha256`](archive/v1/data/proof/expected_features.sha256) | one-command deterministic pipeline replay (SHA-256 of output vs published hash) |
 | **Benchmark-proof ADR** | [ADR-168](docs/adr/ADR-168-benchmark-proof.md) | how the numbers are produced and verified |
 | **Witness attestation** | [`docs/WITNESS-LOG-028.md`](docs/WITNESS-LOG-028.md) | 33-row capability attestation matrix with per-claim evidence |
 ```bash
 # Reproduce the deterministic pipeline proof yourself (must print VERDICT: PASS):
 python archive/v1/data/proof/verify.py
 ```
 Tracked in [#509](https://github.com/ruvnet/RuView/issues/509); see [ADR-079](docs/adr/ADR-079-camera-supervised-pose-finetune.md) phases P7–P9 for the camera-supervised fine-tune path.
 ## 🧩 Edge Module Catalog
 <details>
 <summary><b>🧩 105 edge modules ready to install on a Cognitum appliance</b> &mdash; live catalog from <code>app-registry.json</code> v2.1.0 (updated 2026-05-13). Browse + install at <a href="https://seed.cognitum.one/store">seed.cognitum.one/store</a> or your local appliance <code>http://&lt;appliance&gt;:9000/cogs</code>.</summary>
 Each module is a small signed binary (~400 KB) that runs alongside the WiFi-DensePose sensing stack on a Cognitum-V0 appliance. The catalog updates over the air &mdash; your appliance fetches it via <code>GET /api/v1/edge/registry</code> ([ADR-102](docs/adr/ADR-102-edge-module-registry.md)) and verifies each binary against an Ed25519 signature ([ADR-100](docs/adr/ADR-100-cog-packaging-specification.md)) before install.
 ### 🫀 Health &mdash; <sub>14 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `air-quality-index` | Track indoor air quality with CO2 and particle sensors | 8 KB | Easy |
 | `baby-cry` | Sustained mid-band energy detector for nursery / infant monitoring. Audio-only, no camera. | 451 KB | Easy |
 | `breathing-sync` | Detects when two people breathe in sync | 10 KB | Hard |
 | `cardiac-arrhythmia` | Spots irregular heartbeats and abnormal heart rhythms | 8 KB | Hard |
 | `cough-detect` | Acoustic transient + spectral cough detector with 30s cluster aggregation. Early-warning signal for respiratory illness. | 451 KB | Easy |
 | `dream-stage` | Tracks your sleep stages — light, deep, and dreaming | 14 KB | Hard |
 | `fall-detect` | Two-stage impact + stillness fall detector over ambient feature stream (ESP32 motion / mic). Optional ruview-mode for CSI-based pose reinforcement. | 402 KB | Easy |
 | `gait-analysis` | Detects walking problems and scores fall risk | 12 KB | Hard |
 | `health-monitor` | Contactless heart rate, breathing, sleep, and fall alerts | 30 KB | Med |
 | `respiratory-distress` | Alerts when breathing becomes labored or dangerously fast | 10 KB | Hard |
 | `seizure-detect` | Recognizes seizures and sends immediate alerts | 10 KB | Hard |
 | `sleep-apnea` | Detects when someone stops breathing during sleep | 4 KB | Easy |
 | `snore-monitor` | Periodic low-band energy tracker for sleep-quality / apnea-risk trending. Companion to sleep-apnea cog. | 451 KB | Easy |
 | `vital-trend` | Tracks breathing and heart rate trends over weeks | 6 KB | Med |
 ### 🔒 Security &mdash; <sub>14 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `audit-logger` | Record every action for compliance — tamper-proof log | 8 KB | Easy |
 | `behavioral-profiler` | Learns normal behavior and flags anything unusual | 12 KB | Hard |
 | `fleet-auth` | Manage device certificates and access across all seeds | 12 KB | Med |
 | `glass-break` | Two-phase bang + shatter acoustic detector. Distinguishes glass break from ordinary impulse noise. | 451 KB | Easy |
 | `gunshot-detect` | Saturating peak + exponential decay acoustic detector with optional ruview CSI motion-drop reinforcement. | 451 KB | Easy |
 | `intrusion` | Alerts when an unauthorized person enters a room | 6 KB | Med |
 | `intrusion-detect-ml` | Detect network attacks using machine learning | 14 KB | Hard |
 | `loitering` | Alerts when someone lingers too long in one spot | 3 KB | Easy |
 | `network-firewall` | Block unauthorized network access per cog | 6 KB | Easy |
 | `panic-motion` | Detects sudden panicked or erratic movement | 6 KB | Med |
 | `perimeter-breach` | Guards multiple zones and shows entry direction | 10 KB | Med |
 | `prompt-shield` | Blocks signal replay and injection attacks on the seed | 10 KB | Med |
 | `tailgating` | Catches when someone sneaks in behind a badge holder | 6 KB | Med |
 | `weapon-detect` | Detects concealed metal objects on a person | 8 KB | Hard |
 ### 🏢 Building &mdash; <sub>11 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `beehive-monitor` | Acoustic hive state classifier. Detects healthy / chaotic / queenless / swarming / robbing via hum-band energy + chaos + piping autocorr. | 451 KB | Easy |
 | `elevator-count` | Counts how many people are in an elevator | 8 KB | Med |
 | `energy-audit` | Learns your schedule and cuts wasted energy | 6 KB | Med |
 | `frost-warning` | Predicts frost 6 hours ahead via temperature trend + dewpoint-depression gate. Field/orchard agriculture. | 451 KB | Easy |
 | `hvac-presence` | Turns heating and cooling on when you arrive | 3 KB | Easy |
 | `lighting-zones` | Turns lights on and off as people move between rooms | 4 KB | Easy |
 | `meeting-room` | Shows if a meeting room is free or occupied | 5 KB | Easy |
 | `occupancy-zones` | Counts people in each room through walls | 8 KB | Med |
 | `predictive-maintenance` | Vibration harmonic analyzer for rotating equipment. Tracks F1 / 2×F1 / high-order / sideband energy to score degradation severity. | 451 KB | Easy |
 | `smoke-fire` | Multi-signal smoke and fire detector. Fuses acoustic crackle, thermal drift proxy, and optional ruview CSI plume signature. Not a UL-listed replacement for code-required smoke alarms. | 451 KB | Easy |
 | `water-leak` | Persistent low-amplitude hiss + periodic drip acoustic detector with multi-minute persistence gate. Two-stage likely → confirmed. | 451 KB | Easy |
 ### 🛍️ Retail &mdash; <sub>7 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `customer-flow` | Counts foot traffic in and out of each entrance | 8 KB | Med |
 | `dwell-heatmap` | Shows where customers spend the most time | 6 KB | Med |
 | `package-detect` | Sustained CSI-shift detector for porch / loading bay package arrivals and departures. Requires ESP32 CSI ruview input. | 451 KB | Easy |
 | `parking-occupancy` | Per-zone parking occupancy via ESP32 CSI subcarrier-amplitude shift. Tracks utilization and churn-per-hour. Requires ruview. | 451 KB | Easy |
 | `queue-length` | Estimates line length and wait time | 6 KB | Med |
 | `shelf-engagement` | Detects when customers interact with products | 6 KB | Med |
 | `table-turnover` | Tracks which restaurant tables are free or occupied | 4 KB | Easy |
 ### 🏭 Industrial &mdash; <sub>7 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `clean-room` | Enforces max headcount in controlled environments | 4 KB | Easy |
 | `confined-space` | Monitors workers in tight spaces for safety | 5 KB | Med |
 | `forklift-proximity` | Warns if a forklift gets too close to workers | 10 KB | Hard |
 | `livestock-monitor` | Monitors animals for distress, escape, or illness | 6 KB | Med |
 | `ppe-compliance` | Cog-composition layer: alerts when ruview-densepose detects presence in a restricted zone without an accompanying PPE-camera-cog confirmation vector. | 387 KB | Easy |
 | `slip-fall-zone` | Pre-fall risk detector. Fires when motion-variance drop, splash audio, and optional cautious-gait CSI all signal elevated slip risk. | 451 KB | Easy |
 | `structural-vibration` | Detects dangerous vibrations in buildings or machines | 8 KB | Hard |
 ### 🔬 Research &mdash; <sub>12 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `emotion-detect` | Reads stress and calm from body language and breathing | 10 KB | Hard |
 | `energy-harvester` | Optimize solar and battery for off-grid seed deployment | 6 KB | Med |
 | `gesture-language` | Recognizes sign language gestures in real time | 12 KB | Hard |
 | `ghost-hunter` | Finds unexplained environmental anomalies — for fun | 10 KB | Hard |
 | `happiness-score` | Estimates well-being from movement and mood signals | 8 KB | Med |
 | `hyperbolic-space` | Maps data into curved space for tree-like structures | 12 KB | Hard |
 | `music-conductor` | Reads a conductor's gestures for tempo and dynamics | 12 KB | Hard |
 | `plant-growth` | Tracks plant growth rate and day/night cycles | 8 KB | Med |
 | `rain-detect` | Detects when rain starts, stops, and how heavy it is | 6 KB | Med |
 | `ruview-densepose` | Full body pose tracking from WiFi — no cameras needed | 50 KB | Hard |
 | `sound-classifier` | Identify sounds like glass break, alarm, or baby cry | 16 KB | Hard |
 | `time-crystal` | Experiments with repeating time-pattern symmetry | 12 KB | Hard |
 ### 🤖 Ai &mdash; <sub>15 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `anomaly-attractor` | Learns what's normal and catches anything weird | 10 KB | Hard |
 | `cognitive-pipeline` | FastGRNN anomaly gate + SmolLM2 sparse-LLM inference for on-device Pi Zero 2W cognitive events | 320 KB | Hard |
 | `dtw-gesture-learn` | Teach custom hand gestures by showing examples | 14 KB | Med |
 | `ewc-lifelong` | Learns new things without forgetting old lessons | 8 KB | Hard |
 | `federated-learning` | Train AI across seeds without sharing raw data | 18 KB | Hard |
 | `goap-autonomy` | Plans and executes goals on its own | 14 KB | Hard |
 | `meta-adapt` | Automatically tunes itself for best performance | 10 KB | Hard |
 | `micro-hnsw` | Fast on-device fingerprinting and classification | 12 KB | Med |
 | `neural-trader` | Spot market patterns and trends from live data | 20 KB | Hard |
 | `pagerank-influence` | Finds the most influential person in a group | 12 KB | Med |
 | `pattern-sequence` | Detects daily routines and repeated habits | 10 KB | Med |
 | `rag-local` | Search your documents using AI — runs on the seed | 14 KB | Med |
 | `spiking-tracker` | Brain-inspired tracker that runs on tiny hardware | 16 KB | Hard |
 | `temporal-logic` | Enforces safety rules on live event streams | 12 KB | Hard |
 | `time-series-forecast` | Predict sensor trends using historical patterns | 12 KB | Med |
 ### 🐝 Swarm &mdash; <sub>11 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `swarm-backup-restore` | Auto-backup data to other seeds — one-click restore | 8 KB | Easy |
 | `swarm-cluster-monitor` | Live dashboard of every seed's health and status | 6 KB | Easy |
 | `swarm-consensus` | Seeds vote before making critical changes together | 16 KB | Hard |
 | `swarm-delta-sync` | Auto-sync data between seeds — only sends changes | 8 KB | Med |
 | `swarm-deploy` | Install or remove cogs on all seeds at once | 10 KB | Med |
 | `swarm-distributed-store` | Spread data across seeds and search them all at once | 14 KB | Hard |
 | `swarm-edge-orchestrator` | Manage all ESP32 sensor nodes from one place | 14 KB | Hard |
 | `swarm-load-balancer` | Spread queries across seeds so no single one overloads | 10 KB | Med |
 | `swarm-mesh-manager` | Find, connect, and monitor all seeds on your network | 12 KB | Easy |
 | `swarm-mqtt-bridge` | Share events between seeds over MQTT messaging | 6 KB | Easy |
 | `swarm-witness-federation` | Share tamper-proof audit trails across seeds | 12 KB | Hard |
 ### 📡 Signal &mdash; <sub>6 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `coherence-gate` | Filters out noisy signals and keeps clean ones | 8 KB | Med |
 | `flash-attention` | Focuses sensing on specific areas for better accuracy | 12 KB | Med |
 | `optimal-transport` | Measures motion using shape-aware signal comparison | 12 KB | Hard |
 | `person-matching` | Tells apart multiple people in the same room | 18 KB | Hard |
 | `sparse-recovery` | Recovers missing signal data from partial readings | 16 KB | Hard |
 | `temporal-compress` | Shrinks old data to save memory without losing meaning | 14 KB | Med |
 ### 🌐 Network &mdash; <sub>1 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `tailscale` | Reach the seed from anywhere via a private WireGuard mesh (Tailscale). Userspace mode — no root. | 700 KB | Med |
 ### 🛠️ Developer &mdash; <sub>7 modules</sub>
 | ID | What it does | Size | Difficulty |
 |----|--------------|-----:|:----------:|
 | `adversarial` | Detects tampered or spoofed sensor signals | 4 KB | Easy |
 | `coherence` | Monitors signal quality across multiple channels | 4 KB | Easy |
 | `gesture` | Core gesture recognition building block for cogs | 6 KB | Med |
 | `interference-search` | Searches many possibilities at once for fast answers | 14 KB | Hard |
 | `psycho-symbolic` | Reasons over knowledge graphs with multiple styles | 16 KB | Hard |
 | `quantum-coherence` | Quantum-inspired model for advanced signal states | 16 KB | Hard |
 | `self-healing-mesh` | Keeps sensor mesh running even when nodes drop out | 14 KB | Hard |
 > ℹ️ Build your own cog: see [ADR-100](docs/adr/ADR-100-cog-packaging-specification.md) for the packaging spec. The first cog this repo ships into the catalog lives in [v2/crates/cog-pose-estimation/](v2/crates/cog-pose-estimation/) (17-keypoint WiFi pose, [ADR-101](docs/adr/ADR-101-pose-estimation-cog.md)).
 </details>
 ## 🔬 How It Works
@@ -490,6 +228,178 @@ These scenarios exploit WiFi's ability to penetrate solid materials — concrete
 </details>
 <details>
 <summary><strong>🧩 Edge Intelligence (<a href="docs/adr/ADR-041-wasm-module-collection.md">ADR-041</a>)</strong> — 60 WASM modules across 13 categories, all implemented (609 tests)</summary>
 Small programs that run directly on the ESP32 sensor — no internet needed, no cloud fees, instant response. Each module is a tiny WASM file (5-30 KB) that you upload to the device over-the-air. It reads WiFi signal data and makes decisions locally in under 10 ms. [ADR-041](docs/adr/ADR-041-wasm-module-collection.md) defines 60 modules across 13 categories — all 60 are implemented with 609 tests passing.
 | | Category | Examples |
 |---|----------|---------|
 | 🏥 | [**Medical & Health**](docs/edge-modules/medical.md) | Sleep apnea detection, cardiac arrhythmia, gait analysis, seizure detection |
 | 🔐 | [**Security & Safety**](docs/edge-modules/security.md) | Intrusion detection, perimeter breach, loitering, panic motion |
 | 🏢 | [**Smart Building**](docs/edge-modules/building.md) | Zone occupancy, HVAC control, elevator counting, meeting room tracking |
 | 🛒 | [**Retail & Hospitality**](docs/edge-modules/retail.md) | Queue length, dwell heatmaps, customer flow, table turnover |
 | 🏭 | [**Industrial**](docs/edge-modules/industrial.md) | Forklift proximity, confined space monitoring, structural vibration |
 | 🔮 | [**Exotic & Research**](docs/edge-modules/exotic.md) | Sleep staging, emotion detection, sign language, breathing sync |
 | 📡 | [**Signal Intelligence**](docs/edge-modules/signal-intelligence.md) | Cleans and sharpens raw WiFi signals — focuses on important regions, filters noise, fills in missing data, and tracks which person is which |
 | 🧠 | [**Adaptive Learning**](docs/edge-modules/adaptive-learning.md) | The sensor learns new gestures and patterns on its own over time — no cloud needed, remembers what it learned even after updates |
 | 🗺️ | [**Spatial Reasoning**](docs/edge-modules/spatial-temporal.md) | Figures out where people are in a room, which zones matter most, and tracks movement across areas using graph-based spatial logic |
 | ⏱️ | [**Temporal Analysis**](docs/edge-modules/spatial-temporal.md) | Learns daily routines, detects when patterns break (someone didn't get up), and verifies safety rules are being followed over time |
 | 🛡️ | [**AI Security**](docs/edge-modules/ai-security.md) | Detects signal replay attacks, WiFi jamming, injection attempts, and flags abnormal behavior that could indicate tampering |
 | ⚛️ | [**Quantum-Inspired**](docs/edge-modules/autonomous.md) | Uses quantum-inspired math to map room-wide signal coherence and search for optimal sensor configurations |
 | 🤖 | [**Autonomous & Exotic**](docs/edge-modules/autonomous.md) | Self-managing sensor mesh — auto-heals dropped nodes, plans its own actions, and explores experimental signal representations |
 All implemented modules are `no_std` Rust, share a [common utility library](v2/crates/wifi-densepose-wasm-edge/src/vendor_common.rs), and talk to the host through a 12-function API. Full documentation: [**Edge Modules Guide**](docs/edge-modules/README.md). See the [complete implemented module list](#edge-module-list) below.
 </details>
 <details id="edge-module-list">
 <summary><strong>🧩 Edge Intelligence — <a href="docs/edge-modules/README.md">All 65 Modules Implemented</a></strong> (ADR-041 complete)</summary>
 All 60 modules are implemented, tested (609 tests passing), and ready to deploy. They compile to `wasm32-unknown-unknown`, run on ESP32-S3 via WASM3, and share a [common utility library](v2/crates/wifi-densepose-wasm-edge/src/vendor_common.rs). Source: [`crates/wifi-densepose-wasm-edge/src/`](v2/crates/wifi-densepose-wasm-edge/src/)
 **Core modules** (ADR-040 flagship + early implementations):
 | Module | File | What It Does |
 |--------|------|-------------|
 | Gesture Classifier | [`gesture.rs`](v2/crates/wifi-densepose-wasm-edge/src/gesture.rs) | DTW template matching for hand gestures |
 | Coherence Filter | [`coherence.rs`](v2/crates/wifi-densepose-wasm-edge/src/coherence.rs) | Phase coherence gating for signal quality |
 | Adversarial Detector | [`adversarial.rs`](v2/crates/wifi-densepose-wasm-edge/src/adversarial.rs) | Detects physically impossible signal patterns |
 | Intrusion Detector | [`intrusion.rs`](v2/crates/wifi-densepose-wasm-edge/src/intrusion.rs) | Human vs non-human motion classification |
 | Occupancy Counter | [`occupancy.rs`](v2/crates/wifi-densepose-wasm-edge/src/occupancy.rs) | Zone-level person counting |
 | Vital Trend | [`vital_trend.rs`](v2/crates/wifi-densepose-wasm-edge/src/vital_trend.rs) | Long-term breathing and heart rate trending |
 | RVF Parser | [`rvf.rs`](v2/crates/wifi-densepose-wasm-edge/src/rvf.rs) | RVF container format parsing |
 **Vendor-integrated modules** (24 modules, ADR-041 Category 7):
 **📡 Signal Intelligence** — Real-time CSI analysis and feature extraction
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Flash Attention | [`sig_flash_attention.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_flash_attention.rs) | Tiled attention over 8 subcarrier groups — finds spatial focus regions and entropy | S (<5ms) |
 | Coherence Gate | [`sig_coherence_gate.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_coherence_gate.rs) | Z-score phasor gating with hysteresis: Accept / PredictOnly / Reject / Recalibrate | L (<2ms) |
 | Temporal Compress | [`sig_temporal_compress.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_temporal_compress.rs) | 3-tier adaptive quantization (8-bit hot / 5-bit warm / 3-bit cold) | L (<2ms) |
 | Sparse Recovery | [`sig_sparse_recovery.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_sparse_recovery.rs) | ISTA L1 reconstruction for dropped subcarriers | H (<10ms) |
 | Person Match | [`sig_mincut_person_match.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_mincut_person_match.rs) | Hungarian-lite bipartite assignment for multi-person tracking | S (<5ms) |
 | Optimal Transport | [`sig_optimal_transport.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_optimal_transport.rs) | Sliced Wasserstein-1 distance with 4 projections | L (<2ms) |
 **🧠 Adaptive Learning** — On-device learning without cloud connectivity
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | DTW Gesture Learn | [`lrn_dtw_gesture_learn.rs`](v2/crates/wifi-densepose-wasm-edge/src/lrn_dtw_gesture_learn.rs) | User-teachable gesture recognition — 3-rehearsal protocol, 16 templates | S (<5ms) |
 | Anomaly Attractor | [`lrn_anomaly_attractor.rs`](v2/crates/wifi-densepose-wasm-edge/src/lrn_anomaly_attractor.rs) | 4D dynamical system attractor classification with Lyapunov exponents | H (<10ms) |
 | Meta Adapt | [`lrn_meta_adapt.rs`](v2/crates/wifi-densepose-wasm-edge/src/lrn_meta_adapt.rs) | Hill-climbing self-optimization with safety rollback | L (<2ms) |
 | EWC Lifelong | [`lrn_ewc_lifelong.rs`](v2/crates/wifi-densepose-wasm-edge/src/lrn_ewc_lifelong.rs) | Elastic Weight Consolidation — remembers past tasks while learning new ones | S (<5ms) |
 **🗺️ Spatial Reasoning** — Location, proximity, and influence mapping
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | PageRank Influence | [`spt_pagerank_influence.rs`](v2/crates/wifi-densepose-wasm-edge/src/spt_pagerank_influence.rs) | 4x4 cross-correlation graph with power iteration PageRank | L (<2ms) |
 | Micro HNSW | [`spt_micro_hnsw.rs`](v2/crates/wifi-densepose-wasm-edge/src/spt_micro_hnsw.rs) | 64-vector navigable small-world graph for nearest-neighbor search | S (<5ms) |
 | Spiking Tracker | [`spt_spiking_tracker.rs`](v2/crates/wifi-densepose-wasm-edge/src/spt_spiking_tracker.rs) | 32 LIF neurons + 4 output zone neurons with STDP learning | S (<5ms) |
 **⏱️ Temporal Analysis** — Activity patterns, logic verification, autonomous planning
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Pattern Sequence | [`tmp_pattern_sequence.rs`](v2/crates/wifi-densepose-wasm-edge/src/tmp_pattern_sequence.rs) | Activity routine detection and deviation alerts | S (<5ms) |
 | Temporal Logic Guard | [`tmp_temporal_logic_guard.rs`](v2/crates/wifi-densepose-wasm-edge/src/tmp_temporal_logic_guard.rs) | LTL formula verification on CSI event streams | S (<5ms) |
 | GOAP Autonomy | [`tmp_goap_autonomy.rs`](v2/crates/wifi-densepose-wasm-edge/src/tmp_goap_autonomy.rs) | Goal-Oriented Action Planning for autonomous module management | S (<5ms) |
 **🛡️ AI Security** — Tamper detection and behavioral anomaly profiling
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Prompt Shield | [`ais_prompt_shield.rs`](v2/crates/wifi-densepose-wasm-edge/src/ais_prompt_shield.rs) | FNV-1a replay detection, injection detection (10x amplitude), jamming (SNR) | L (<2ms) |
 | Behavioral Profiler | [`ais_behavioral_profiler.rs`](v2/crates/wifi-densepose-wasm-edge/src/ais_behavioral_profiler.rs) | 6D behavioral profile with Mahalanobis anomaly scoring | S (<5ms) |
 **⚛️ Quantum-Inspired** — Quantum computing metaphors applied to CSI analysis
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Quantum Coherence | [`qnt_quantum_coherence.rs`](v2/crates/wifi-densepose-wasm-edge/src/qnt_quantum_coherence.rs) | Bloch sphere mapping, Von Neumann entropy, decoherence detection | S (<5ms) |
 | Interference Search | [`qnt_interference_search.rs`](v2/crates/wifi-densepose-wasm-edge/src/qnt_interference_search.rs) | 16 room-state hypotheses with Grover-inspired oracle + diffusion | S (<5ms) |
 **🤖 Autonomous Systems** — Self-governing and self-healing behaviors
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Psycho-Symbolic | [`aut_psycho_symbolic.rs`](v2/crates/wifi-densepose-wasm-edge/src/aut_psycho_symbolic.rs) | 16-rule forward-chaining knowledge base with contradiction detection | S (<5ms) |
 | Self-Healing Mesh | [`aut_self_healing_mesh.rs`](v2/crates/wifi-densepose-wasm-edge/src/aut_self_healing_mesh.rs) | 8-node mesh with health tracking, degradation/recovery, coverage healing | S (<5ms) |
 **🔮 Exotic (Vendor)** — Novel mathematical models for CSI interpretation
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Time Crystal | [`exo_time_crystal.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_time_crystal.rs) | Autocorrelation subharmonic detection in 256-frame history | S (<5ms) |
 | Hyperbolic Space | [`exo_hyperbolic_space.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_hyperbolic_space.rs) | Poincare ball embedding with 32 reference locations, hyperbolic distance | S (<5ms) |
 **🏥 Medical & Health** (Category 1) — Contactless health monitoring
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Sleep Apnea | [`med_sleep_apnea.rs`](v2/crates/wifi-densepose-wasm-edge/src/med_sleep_apnea.rs) | Detects breathing pauses during sleep | S (<5ms) |
 | Cardiac Arrhythmia | [`med_cardiac_arrhythmia.rs`](v2/crates/wifi-densepose-wasm-edge/src/med_cardiac_arrhythmia.rs) | Monitors heart rate for irregular rhythms | S (<5ms) |
 | Respiratory Distress | [`med_respiratory_distress.rs`](v2/crates/wifi-densepose-wasm-edge/src/med_respiratory_distress.rs) | Alerts on abnormal breathing patterns | S (<5ms) |
 | Gait Analysis | [`med_gait_analysis.rs`](v2/crates/wifi-densepose-wasm-edge/src/med_gait_analysis.rs) | Tracks walking patterns and detects changes | S (<5ms) |
 | Seizure Detection | [`med_seizure_detect.rs`](v2/crates/wifi-densepose-wasm-edge/src/med_seizure_detect.rs) | 6-state machine for tonic-clonic seizure recognition | S (<5ms) |
 **🔐 Security & Safety** (Category 2) — Perimeter and threat detection
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Perimeter Breach | [`sec_perimeter_breach.rs`](v2/crates/wifi-densepose-wasm-edge/src/sec_perimeter_breach.rs) | Detects boundary crossings with approach/departure | S (<5ms) |
 | Weapon Detection | [`sec_weapon_detect.rs`](v2/crates/wifi-densepose-wasm-edge/src/sec_weapon_detect.rs) | Metal anomaly detection via CSI amplitude shifts | S (<5ms) |
 | Tailgating | [`sec_tailgating.rs`](v2/crates/wifi-densepose-wasm-edge/src/sec_tailgating.rs) | Detects unauthorized follow-through at access points | S (<5ms) |
 | Loitering | [`sec_loitering.rs`](v2/crates/wifi-densepose-wasm-edge/src/sec_loitering.rs) | Alerts when someone lingers too long in a zone | S (<5ms) |
 | Panic Motion | [`sec_panic_motion.rs`](v2/crates/wifi-densepose-wasm-edge/src/sec_panic_motion.rs) | Detects fleeing, struggling, or panic movement | S (<5ms) |
 **🏢 Smart Building** (Category 3) — Automation and energy efficiency
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | HVAC Presence | [`bld_hvac_presence.rs`](v2/crates/wifi-densepose-wasm-edge/src/bld_hvac_presence.rs) | Occupancy-driven HVAC control with departure countdown | S (<5ms) |
 | Lighting Zones | [`bld_lighting_zones.rs`](v2/crates/wifi-densepose-wasm-edge/src/bld_lighting_zones.rs) | Auto-dim/off lighting based on zone activity | S (<5ms) |
 | Elevator Count | [`bld_elevator_count.rs`](v2/crates/wifi-densepose-wasm-edge/src/bld_elevator_count.rs) | Counts people entering/leaving with overload warning | S (<5ms) |
 | Meeting Room | [`bld_meeting_room.rs`](v2/crates/wifi-densepose-wasm-edge/src/bld_meeting_room.rs) | Tracks meeting lifecycle: start, headcount, end, availability | S (<5ms) |
 | Energy Audit | [`bld_energy_audit.rs`](v2/crates/wifi-densepose-wasm-edge/src/bld_energy_audit.rs) | Tracks after-hours usage and room utilization rates | S (<5ms) |
 **🛒 Retail & Hospitality** (Category 4) — Customer insights without cameras
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Queue Length | [`ret_queue_length.rs`](v2/crates/wifi-densepose-wasm-edge/src/ret_queue_length.rs) | Estimates queue size and wait times | S (<5ms) |
 | Dwell Heatmap | [`ret_dwell_heatmap.rs`](v2/crates/wifi-densepose-wasm-edge/src/ret_dwell_heatmap.rs) | Shows where people spend time (hot/cold zones) | S (<5ms) |
 | Customer Flow | [`ret_customer_flow.rs`](v2/crates/wifi-densepose-wasm-edge/src/ret_customer_flow.rs) | Counts ins/outs and tracks net occupancy | S (<5ms) |
 | Table Turnover | [`ret_table_turnover.rs`](v2/crates/wifi-densepose-wasm-edge/src/ret_table_turnover.rs) | Restaurant table lifecycle: seated, dining, vacated | S (<5ms) |
 | Shelf Engagement | [`ret_shelf_engagement.rs`](v2/crates/wifi-densepose-wasm-edge/src/ret_shelf_engagement.rs) | Detects browsing, considering, and reaching for products | S (<5ms) |
 **🏭 Industrial & Specialized** (Category 5) — Safety and compliance
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Forklift Proximity | [`ind_forklift_proximity.rs`](v2/crates/wifi-densepose-wasm-edge/src/ind_forklift_proximity.rs) | Warns when people get too close to vehicles | S (<5ms) |
 | Confined Space | [`ind_confined_space.rs`](v2/crates/wifi-densepose-wasm-edge/src/ind_confined_space.rs) | OSHA-compliant worker monitoring with extraction alerts | S (<5ms) |
 | Clean Room | [`ind_clean_room.rs`](v2/crates/wifi-densepose-wasm-edge/src/ind_clean_room.rs) | Occupancy limits and turbulent motion detection | S (<5ms) |
 | Livestock Monitor | [`ind_livestock_monitor.rs`](v2/crates/wifi-densepose-wasm-edge/src/ind_livestock_monitor.rs) | Animal presence, stillness, and escape alerts | S (<5ms) |
 | Structural Vibration | [`ind_structural_vibration.rs`](v2/crates/wifi-densepose-wasm-edge/src/ind_structural_vibration.rs) | Seismic events, mechanical resonance, structural drift | S (<5ms) |
 **🔮 Exotic & Research** (Category 6) — Experimental sensing applications
 | Module | File | What It Does | Budget |
 |--------|------|-------------|--------|
 | Dream Stage | [`exo_dream_stage.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_dream_stage.rs) | Contactless sleep stage classification (wake/light/deep/REM) | S (<5ms) |
 | Emotion Detection | [`exo_emotion_detect.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_emotion_detect.rs) | Arousal, stress, and calm detection from micro-movements | S (<5ms) |
 | Gesture Language | [`exo_gesture_language.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_gesture_language.rs) | Sign language letter recognition via WiFi | S (<5ms) |
 | Music Conductor | [`exo_music_conductor.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_music_conductor.rs) | Tempo and dynamic tracking from conducting gestures | S (<5ms) |
 | Plant Growth | [`exo_plant_growth.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_plant_growth.rs) | Monitors plant growth, circadian rhythms, wilt detection | S (<5ms) |
 | Ghost Hunter | [`exo_ghost_hunter.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_ghost_hunter.rs) | Environmental anomaly classification (draft/insect/wind/unknown) | S (<5ms) |
 | Rain Detection | [`exo_rain_detect.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_rain_detect.rs) | Detects rain onset, intensity, and cessation via signal scatter | S (<5ms) |
 | Breathing Sync | [`exo_breathing_sync.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_breathing_sync.rs) | Detects synchronized breathing between multiple people | S (<5ms) |
 </details>
 ---
@@ -501,7 +411,7 @@ Every WiFi signal that passes through a room creates a unique fingerprint of tha
 **What it does in plain terms:**
 - Turns any WiFi signal into a 128-number "fingerprint" that uniquely describes what's happening in a room
 - Learns entirely on its own from raw WiFi data — no cameras, no labeling, no human supervision needed
- Recognizes rooms, detects intruders, and classifies activities using only WiFi (named person-identity is an experimental, data-gated research capability — see below, not a shipped feature)
+- Recognizes rooms, detects intruders, identifies people, and classifies activities using only WiFi
 - Runs on an $8 ESP32 chip (the entire model fits in 55 KB of memory)
 - Produces both body pose tracking AND environment fingerprints in a single computation
@@ -512,7 +422,7 @@ Every WiFi signal that passes through a room creates a unique fingerprint of tha
 | **Self-supervised learning** | The model watches WiFi signals and teaches itself what "similar" and "different" look like, without any human-labeled data | Deploy anywhere — just plug in a WiFi sensor and wait 10 minutes |
 | **Room identification** | Each room produces a distinct WiFi fingerprint pattern | Know which room someone is in without GPS or beacons |
 | **Anomaly detection** | An unexpected person or event creates a fingerprint that doesn't match anything seen before | Automatic intrusion and fall detection as a free byproduct |
-| **Person re-identification** *(experimental, research)* | A real per-channel similarity matcher (Soul Signature §3.6, `wifi-densepose-bfld`); **measured** result: on WiFi-only cardiac+respiratory channels alone two people are *not* separable (gap ~0.0005) | Honest research capability — **named identity is not claimed** and is data-gated on enrollment with the decisive AETHER/body-resonance channel. See [#1021](https://github.com/ruvnet/RuView/issues/1021) |
+| **Person re-identification** | Each person disturbs WiFi in a slightly different way, creating a personal signature | Track individuals across sessions without cameras |
 | **Environment adaptation** | MicroLoRA adapters (1,792 parameters per room) fine-tune the model for each new space | Adapts to a new room with minimal data — 93% less than retraining from scratch |
 | **Memory preservation** | EWC++ regularization remembers what was learned during pretraining | Switching to a new task doesn't erase prior knowledge |
 | **Hard-negative mining** | Training focuses on the most confusing examples to learn faster | Better accuracy with the same amount of training data |
@@ -575,74 +485,24 @@ See [`docs/adr/ADR-024-contrastive-csi-embedding-model.md`](docs/adr/ADR-024-con
 ---
 ## 🧩 Claude Code & Codex Plugin
 RuView ships a [Claude Code](https://docs.anthropic.com/en/docs/claude-code) plugin (and Codex prompt mirror) that wraps the whole workflow — onboarding, ESP32 setup, configuration, sensing apps, model training, advanced multistatic sensing, CLI/API/WASM, mmWave radar, and witness verification — as 9 skills, 7 `/ruview-*` commands, and 3 agents. It lives in [`plugins/ruview/`](plugins/ruview/README.md); the marketplace manifest is [`.claude-plugin/marketplace.json`](.claude-plugin/marketplace.json) at the repo root.
 ```bash
 # In Claude Code — add this repo as a plugin marketplace, then install:
 /plugin marketplace add ruvnet/RuView
 /plugin install ruview@ruview
 # Or try it for one session without installing (from a local clone of the repo):
 claude --plugin-dir ./plugins/ruview
 # Then, in Claude Code:
 #   /ruview-start      → onboarding (Docker demo / repo build / live ESP32)
 #   /ruview-flash      → build + flash ESP32 firmware
 #   /ruview-provision  → provision WiFi creds, sink IP, channel/MAC, mesh slots
 #   /ruview-app        → run a sensing application (presence / vitals / pose / sleep / MAT / point cloud)
 #   /ruview-train      → train / evaluate / publish a model (incl. GPU on GCloud)
 #   /ruview-advanced   → multistatic / tomography / cross-viewpoint / mesh-security
 #   /ruview-verify     → tests + deterministic proof + witness bundle
 ```
 **Codex (OpenAI CLI):** `cp plugins/ruview/codex/prompts/*.md ~/.codex/prompts/` — the seven `/ruview-*` commands are mirrored as Codex prompts; [`plugins/ruview/codex/AGENTS.md`](plugins/ruview/codex/AGENTS.md) carries the project rules. See [`plugins/ruview/codex/README.md`](plugins/ruview/codex/README.md).
 Verify the plugin structure: `bash plugins/ruview/scripts/smoke.sh`. Full details: [`plugins/ruview/README.md`](plugins/ruview/README.md).
 ---
 ## 📖 Documentation
 | Document | Description |
 |----------|-------------|
 | [User Guide](docs/user-guide.md) | Step-by-step guide: installation, first run, API usage, hardware setup, training |
 | [Build Guide](docs/build-guide.md) | Building from source (Rust and Python) |
-| [**Home Assistant + Matter Integration**](docs/integrations/home-assistant.md) | **Works with Home Assistant** via MQTT auto-discovery + **Works with Matter** (Apple Home / Google Home / Alexa / SmartThings) — full entity catalog, 3 starter blueprints, Lovelace dashboards, privacy mode, threshold tuning ([ADR-115](docs/adr/ADR-115-home-assistant-integration.md)). |
+| [Architecture Decisions](docs/adr/README.md) | 79 ADRs — why each technical choice was made, organized by domain (hardware, signal processing, ML, platform, infrastructure) |
-| [**BFLD — Beamforming Feedback Layer for Detection**](v2/crates/wifi-densepose-bfld/README.md) | New privacy-gated WiFi sensing layer that measures + structurally prevents identity leakage from 802.11ac/ax Beamforming Feedback Information. Three type-enforced invariants (raw BFI never exits node, identity embedding is in-RAM-only, cross-site correlation cryptographically impossible via per-site BLAKE3 keyed hash + daily rotation). Ships full operator surface (`BfldPipeline`, `BfldPipelineHandle`, the Soul Signature §3.6 per-channel matcher `EnrolledMatcher`/`SoulMatchOracle` — experimental; named identity is data-gated, **measured** as not-separable on WiFi-only channels alone), MQTT topic router + HA-DISCO + availability + LWT, 3 operator HA blueprints, two runnable examples, eclipse-mosquitto:2 CI service container. 327+ tests. [ADR-118](docs/adr/ADR-118-bfld-beamforming-feedback-layer-for-detection.md) umbrella + sub-ADRs [119](docs/adr/ADR-119-bfld-frame-format-and-wire-protocol.md)/[120](docs/adr/ADR-120-bfld-privacy-class-and-hash-rotation.md)/[121](docs/adr/ADR-121-bfld-identity-risk-scoring.md)/[122](docs/adr/ADR-122-bfld-ruview-ha-matter-exposure.md)/[123](docs/adr/ADR-123-bfld-capture-path-nexmon-and-esp32.md). Research dossier: [`docs/research/BFLD/`](docs/research/BFLD/) (11 files, 13,544 words). |
+| [Domain Models](docs/ddd/README.md) | 7 DDD models (RuvSense, Signal Processing, Training Pipeline, Hardware Platform, Sensing Server, WiFi-Mat, CHCI) — bounded contexts, aggregates, domain events, and ubiquitous language |
 | [**SENSE-BRIDGE — rvagent MCP server**](tools/ruview-mcp/README.md) | Dual-transport MCP server (`@ruvnet/rvagent`) bridging the RuView sensing stack to AI agents (Claude Code, Cursor, ruflo swarms). 6 tools wired: `ruview.presence.now`, `ruview.vitals.get_{breathing,heart_rate,all}`, `ruview.bfld.last_scan`, `ruview.bfld.subscribe`. stdio + Streamable HTTP (`POST /mcp`, Origin-validated, bearer-token auth, `127.0.0.1` bind). Full 20-tool Zod schema barrel + 5 RUVIEW-POLICY governance tools. 93 tests. [ADR-124](docs/adr/ADR-124-rvagent-mcp-ruvector-npm-integration.md). Try: `npx @ruvnet/rvagent stdio`. |
 | [Semantic Primitives — Precision/Recall](docs/integrations/semantic-primitives-metrics.md) | Per-primitive F1 on the held-out paired-capture set: someone-sleeping, possible-distress, room-active, elderly-inactivity-anomaly, meeting, bathroom, fall-risk, bed-exit, no-movement, multi-room. |
 | [Claude Code / Codex Plugin](plugins/ruview/README.md) | The `ruview` plugin + marketplace — skills, `/ruview-*` commands, agents, and the Codex prompt mirror |
 | [Architecture Decisions](docs/adr/README.md) | 96 ADRs — why each technical choice was made, organized by domain (hardware, signal processing, ML, platform, infrastructure) |
 | [Domain Models](docs/ddd/README.md) | 8 DDD models (RuvSense, Signal Processing, Training Pipeline, Hardware Platform, Sensing Server, WiFi-Mat, CHCI, rvCSI) — bounded contexts, aggregates, domain events, and ubiquitous language |
 | [rvCSI — edge RF sensing runtime](https://github.com/ruvnet/rvcsi) | Rust-first / TypeScript-accessible / hardware-abstracted CSI runtime: multi-source ingestion (incl. real nexmon_csi `.pcap` from a **Raspberry Pi 5** / Pi 4 / Pi 3B+ — CYW43455 / BCM43455c0) → validation → DSP → typed events → RuVector RF memory ([ADR-095](docs/adr/ADR-095-rvcsi-edge-rf-sensing-platform.md), [ADR-096](docs/adr/ADR-096-rvcsi-ffi-crate-layout.md), [domain model](docs/ddd/rvcsi-domain-model.md)). Now its own repo — [`ruvnet/rvcsi`](https://github.com/ruvnet/rvcsi) — vendored here under `vendor/rvcsi`; 9 `rvcsi-*` crates on crates.io, `@ruv/rvcsi` on npm, plus a Claude Code plugin. |
 | [Desktop App](v2/crates/wifi-densepose-desktop/README.md) | **WIP** — Tauri v2 desktop app for node management, OTA updates, WASM deployment, and mesh visualization |
 | `ruview-swarm` | Drone swarm control system (ADR-148) — hierarchical-mesh topology, Raft consensus, MARL, CSI sensing payload, MAVLink/PX4/ArduPilot compatibility, Ruflo AI-agent integration |
 | [Medical Examples](examples/medical/README.md) | Contactless blood pressure, heart rate, breathing rate via 60 GHz mmWave radar — $15 hardware, no wearable |
 | [Extended Documentation](docs/readme-details.md) | Latest additions, key features, installation, quick start, signal processing, training, CLI, testing, deployment, and changelog |
 ---
 ## 🚧 Beta software
 > **Beta Software** — Under active development. APIs and firmware may change. Known limitations:
 > - ESP32-C3 and original ESP32 are not supported (single-core, insufficient for CSI DSP)
 > - Single ESP32 deployments have limited spatial resolution — use 2+ nodes or add a [Cognitum Seed](https://cognitum.one) for best results
 > - Camera-free pose accuracy is limited (PCK@20 ≈ 2.5% with proxy labels) — [camera ground-truth training](docs/adr/ADR-079-camera-ground-truth-training.md) targets **35%+ PCK@20**; the pipeline is implemented, but the data-collection and evaluation phases (ADR-079 P7–P9) are still pending.
 >
 > Contributions and bug reports welcome at [Issues](https://github.com/ruvnet/RuView/issues).
 ## 📄 License
 MIT License — see [LICENSE](LICENSE) for details.
 ## 🤝 Creator Affiliate Program
 **For TikTok · Instagram · YouTube creators** — earn **25% on every Cognitum sale** you refer. The RuFlo, RuView, and RuVector videos you're already making have done millions of views; get paid for the orders they drive. Click-tracking activates instantly; commissions activate after a quick manual review (usually under 24 hours).
 [Apply now → cognitum.one/affiliate](https://cognitum.one/affiliate)
 ## 📞 Support
 [GitHub Issues](https://github.com/ruvnet/RuView/issues) | [Discussions](https://github.com/ruvnet/RuView/discussions) | [PyPI](https://pypi.org/project/wifi-densepose/)
@@ -1,50 +0,0 @@
 # AetherArena ("AA") — The Official Spatial-Intelligence Benchmark
 > **Public leaderboard. Private evaluation split. Open scorer. Signed results.**
 AetherArena is a **standalone, project-agnostic benchmark** for camera-free **spatial intelligence** — pose, presence, occupancy, tracking, and vitals from RF/WiFi (and, over time, mmWave / UWB / radar / lidar / multimodal). It is **not** a single-vendor leaderboard: any team, framework, or sensing modality can enter, and every entrant — including the RuView baseline that donated the seed scorer — is scored by the identical, open, pinned harness.
 Specified in [ADR-149](../docs/adr/ADR-149-public-community-leaderboard-huggingface.md) (Accepted).
 Canonical home: **`ruvnet/aether-arena`** + a Hugging Face Space (deploy pending — see `STATUS`).
 ---
 ## Why
 WiFi/RF spatial sensing has no shared yardstick — papers self-report against inconsistent splits and metrics, with **no accounting for latency, reproducibility, or privacy leakage**. AA fixes the *measurement*, not just the models: a single deterministic scorer, a private held-out split nobody can train on, and a signed result ledger that can't be silently edited.
 ## What gets measured (v0)
 | Category | Metric | Status |
 |----------|--------|--------|
 | **Pose** | PCK@0.2 (all / torso), OKS | Ranked |
 | **Presence** | accuracy, FP/FN | Ranked |
 | **Edge latency** | p50 / p95 / p99 ms | Ranked |
 | **Determinism** | proof-hash pass/fail | Ranked (gate) |
 | Tracking (MOTA) | — | activates when multi-person clips land |
 | Vitals (BPM err) | — | activates when paired vitals ground truth lands |
 | **Privacy leakage** | membership-inference ∈ [0,1] | **gated — not ranked** until the attacker ships |
 | Cross-room | degradation ratio | coming soon |
 The headline rank is the **category metric**; an optional `arena_score = quality × latency_factor × privacy_factor × determinism_gate` is exposed alongside (never instead) so accuracy can't win at any cost. See ADR-149 §2.5.
 ## How scoring works
 The scorer is RuView's **already-published** `wifi-densepose-train` acceptance harness (`ruview_metrics` + ADR-145 `ablation`), run in a pinned sandbox. **You submit a model, not predictions** — predictions on data you hold prove nothing. Your model is scored against a **private** MM-Fi held-out split (CC BY-NC 4.0; Wi-Pose excluded for redistribution reasons), and one **signed, append-only** row is written to the results ledger with a determinism proof hash.
 Submission lifecycle: `submitted → validated → quarantined → smoke_scored → full_scored → published` (or `rejected` with a reason). The model only ever runs inside a no-network, read-only-FS sandbox.
 ## Submit (when the Space is live)
 1. Write a manifest: [`schema/aa-submission.toml`](schema/aa-submission.toml).
 2. Push your model artifact (`.safetensors` / `.rvf` / LoRA adapter) + manifest to the Space.
 3. Watch it move through the lifecycle; your signed row appears on the board.
 ## Verify it's fair (you don't have to trust us)
 See [`VERIFY.md`](VERIFY.md) — run the **open scorer** locally on the **public smoke split**, reproduce the determinism hash, and confirm RuView's own entries were scored by the identical path. That five-step check is the launch gate (ADR-149 §7).
 ## Neutrality
 AA is a neutral commons. The scorer is open and versioned; any metric change is a public `harness_version` bump that **re-scores all entries**. RuView donated the seed harness and enters as one baseline — it gets no special treatment (ADR-149 §2.8).
@@ -1,30 +0,0 @@
 # AetherArena — Build Status
 Tracks ADR-149 implementation milestones. "Complete" = benchmark **infrastructure** done,
 tested, CI-gated, deploy-ready, RuView baseline entered, §7 acceptance test passing.
 Model **SOTA** (e.g. MM-Fi PCK@20 ~72%) is a separate long-running ML effort, blocked on
 ADR-079 camera-ground-truth collection — *not* an infra-completion blocker.
 | # | Milestone | Status |
 |---|-----------|--------|
 | M1 | ADR-149 Accepted + committed | ✅ done |
 | M2 | Scorer runner (`aa_score_runner`) — **real model scoring** + witness (proof+inputs hash) + **repeatability analysis** | ✅ done — builds `--no-default-features`, determinism gate PASS, repeatable 16/16 |
 | M3 | CI harness-gate workflow (PR runs scorer + repeatability + real-scoring smoke + ledger verify) | ✅ done — `.github/workflows/aether-arena-harness.yml` |
 | M4 | Scaffold: README + submission schema + VERIFY (acceptance test) | ✅ done |
 | M5 | Public smoke split (committed) + private MM-Fi held-out split prep | 🟡 smoke split done (`fixtures/smoke_*.json`); private MM-Fi prep pending |
 | M6 | HF Space (Gradio) — leaderboard + ledger integrity + submit/verify/about | ✅ deployed → https://huggingface.co/spaces/ruvnet/aether-arena (sandboxed scorer container = later hardening) |
 | M7 | **Witness ledger chain** — append-only, hash-chained, tamper-evident | ✅ done — `ledger/ledger_tools.py` (seed/append/verify); tamper test fails as designed |
 | M8 | Public launch | ✅ Space **LIVE** (gradio 5.9.1, serving 200) — **board empty, awaiting first real harness score** (benchmark-first: no seeded numbers) |
 ## v0 infrastructure: COMPLETE
 Implement ✅ · Test ✅ · Deploy to HF ✅ (https://huggingface.co/spaces/ruvnet/aether-arena) · Instructions+Verification ✅ · PR runs the harness ✅ (PR #874, AA harness gate **passed**).
 Remaining = data + hardening, not infra: private MM-Fi held-out split (M5), sandboxed scorer container (M6), privacy-leakage attacker (gated category), and **model SOTA** (separate ML effort, blocked on ADR-079 — explicitly not an infra exit).
 ## Benchmark-first posture (per user direction)
 - **No placeholder numbers on the board.** The ledger seeds to genesis only; every result is a real scoring-pipeline witness. RuView gets no seeded baseline.
 - **Witness chain** = `inputs_sha256` (binds witness to exact inputs) + `proof_sha256` (cross-platform-stable score hash) + the append-only hash-chained ledger. Repeatability analysis (`--repeat N`) proves the proof hash is identical across runs.
 ## Blockers / decisions needed
 - **HF deploy (M6)** — token is in GCP Secret Manager (`HUGGINGFACE_API_KEY`); creating the public `ruvnet/aether-arena` Space still wants explicit go.
 - **MM-Fi is CC BY-NC** → AA must stay non-commercial / legally distinct from the commercial RuView product.
 - **Private MM-Fi split (M5)** — needs the dataset pulled + a held-out split assembled before real public scoring replaces the smoke fixture.
@@ -1,78 +0,0 @@
 # Verifying AetherArena (you don't have to trust us)
 AA's credibility rests on a stranger being able to reproduce a score and see that the rules are fair. This is the **launch gate** (ADR-149 §7): v0 does not ship until all five checks below pass for someone with no insider access.
 > **Wider context:** this page covers the *leaderboard scorer*. For the whole-platform answer to
 > "is this real / does it actually work?" — including the deterministic pipeline proof, the
 > published models + public-benchmark numbers, and the built-in-public development trail — see
 > [`docs/proof-of-capabilities.md`](../docs/proof-of-capabilities.md).
 ## The open scorer
 The scoring engine is a pure-Rust, GPU-free binary: `aa_score_runner` in `wifi-densepose-train`. It runs the real `ruview_metrics` pose-acceptance harness on a fixed fixture and emits a cross-platform-stable SHA-256 **determinism proof**.
 ### Reproduce the determinism hash locally
 ```bash
 cd v2
 # Verify the committed expected hash still matches (this is the CI gate):
 cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features
 # → prints the witness (inputs_sha256 + proof_sha256) and "VERDICT: PASS"
 # See the witness row as JSON:
 cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --json
 ```
 ### Witness chain — proof + repeatability analysis
 Every score is a **witness**: `inputs_sha256` (binds it to the exact inputs scored)
 + `proof_sha256` (cross-platform-stable hash of the quantised score) + `harness_version`.
 Witnesses are recorded in an **append-only, hash-chained ledger** (each row references
 the previous row's hash), so a silent edit to any past row breaks the chain.
 ```bash
 # Repeatability: run the scorer K times, confirm ONE identical proof hash:
 cd v2
 cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16
 # → {"repeatability":{"runs":16,"unique_proof_hashes":1,"repeatable":true,...}}
 # Real model scoring (score predictions against an eval split):
 cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- \
  --split ../aether-arena/fixtures/smoke_split.json \
  --pred  ../aether-arena/fixtures/smoke_pred.json --json
 # Verify the witness ledger chain is intact (tamper-evident):
 cd ../aether-arena/ledger && python3 ledger_tools.py verify
 # → "OK: N rows, chain intact"   (edit any row and it reports the broken link)
 ```
 The expected hash is committed at [`fixtures/expected_score.sha256`](fixtures/expected_score.sha256). Same harness version + same fixture → same hash on glibc / MSVC / Apple. If your local run prints `VERDICT: PASS`, you have reproduced the scorer.
 ### What happens if the scoring maths changes
 Any edit to `ruview_metrics.rs`, `ablation.rs`, or `aa_score_runner.rs` moves the hash and **fails the CI gate** (`.github/workflows/aether-arena-harness.yml`) until the maintainer regenerates and reviews:
 ```bash
 cargo run -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --generate-hash \
  > aether-arena/fixtures/expected_score.sha256
 ```
 So a scorer change is always a reviewed, public diff — never silent. That's `harness_version` pinning + `determinism_gate` in action (ADR-149 §2.4–§2.5).
 ## The five-step acceptance test (v0 launch gate)
 A stranger must be able to:
 1. **Submit** a model (artifact + `schema/aa-submission.toml`) with no insider help.
 2. **Get a deterministic score** — same model + same `harness_version` → same numbers.
 3. **See the signed row** appended to the public results ledger.
 4. **Rerun the scorer locally** on the public smoke split and reproduce the logic (the command above).
 5. **Understand why the rank is fair** — private split, open scorer, pinned version, proof hash — from these docs alone.
 If any step fails, v0 is not ready.
 ## Current status
 - ✅ Step 4 (rerun the open scorer locally, reproduce the hash) — **works today** via `aa_score_runner`.
 - ✅ CI harness gate runs the scorer on every PR.
 - ⏳ Steps 1–3, 5 (HF Space submission flow + signed ledger) — in progress; require the HF Space deploy (needs an HF token / maintainer authorization).
@@ -1,87 +0,0 @@
 # RuView Calibration Service (reference implementation)
 Turn a **shared WiFi-CSI pose base model** into a room-specific one with a **30-second labeled
 calibration** and a **~11 KB per-room LoRA adapter**. This is the deployable resolution of the
 cross-subject / cross-environment generalization problem (full study: [ADR-150 §3.3–3.6](../../docs/adr/ADR-150-rf-foundation-encoder.md)).
 ## Why
 Zero-shot WiFi pose generalizes poorly to a **new room or new person** — an unseen room can drop a
 strong model to near-random. But that gap is **not** algorithmically closeable (CORAL, DANN,
 instance-norm, contrastive foundation-pretraining all failed) and **not** closeable by collecting
 more subjects (saturates ~64%). It **is** closeable, cheaply, at deployment time: a handful of
 labeled frames from the actual room pin down its multipath instantly.
 | Deployment case | Zero-shot | + in-room calibration |
 |-----------------|----------:|----------------------:|
 | Same room, new person (cross-subject) | 64% | **76%** (200 samples) |
 | **New room + new person (cross-environment)** | **~10%** | **60% @ 5 samples → 73% @ 200** |
 **Verified demo (this code, source-only base on an unseen MM-Fi room E04):**
 `zero-shot 3.09% → after 200-sample calibration 74.29%` (+71 pts).
 ## How it works
 A frozen shared **base** (transformer + temporal attention pool + skeleton-graph head, the published
 [`ruvnet/wifi-densepose-mmfi-pose`](https://huggingface.co/ruvnet/wifi-densepose-mmfi-pose)) plus a
 tiny **LoRA adapter** (rank 8 on the input projection + pose head — **11,200 params ≈ 11 KB int8 /
 22 KB fp16**) fitted per room. Thousands of room-adapters hang off one base.
 ## Usage
 ```bash
 # 1) Capture a short labeled clip in the deployment room -> calib.npz {X:[N,3,114,10], Y:[N,17,2]}
 #    (~100–200 samples recommended; below ~20 the adapter can underperform zero-shot)
 # 2) Fit the per-room adapter (~11 KB):
 python calibrate.py --base pose_mmfi_best.pt --data calib.npz --out room.adapter.npz
 # 3) Run calibrated inference (base + room adapter):
 python infer.py --base pose_mmfi_best.pt --adapter room.adapter.npz --data frames.npz --out kp.npy
 #    omit --adapter to run the uncalibrated (zero-shot) base
 ```
 `X` is CSI amplitude `[N, 3 antennas, 114 subcarriers, 10 frames]` (per-sample standardization is
 applied internally). `Y` is `[N,17,2]` COCO keypoints in `[0,1]`.
 ## Calibration budget (measured, rank-8 LoRA, 3 seeds — ADR-150 §3.5)
 | Labeled samples/room | cross-subject | cross-environment |
 |---------------------:|--------------:|------------------:|
 | 0 (zero-shot) | 64% | ~10% |
 | 5 | — | 60% |
 | 20 | 66% | 66% |
 | 50 | 70% | 70% |
 | 200 | 72% | 73% |
 Knee at ~50 samples (~70%); **below ~20 samples the adapter can hurt** (too few to fit reliably).
 ## Two models, two producers (not interchangeable)
 Adapters are **model-specific**. There are two calibration producers here:
 | Producer | Target model | Input | Adapter format | Consumer |
 |----------|--------------|-------|----------------|----------|
 | `calibrate.py` | MM-Fi **transformer** (`pose_mmfi_best.pt`, 3×114×10) | `[N,3,114,10]` | `.npz` (`proj`/`head` LoRA) | this Python `infer.py` |
 | `cog_calibrate.py` | cog **conv+MLP** (`pose_v1.safetensors`, 56×20) | `[N,56,20]` | `.safetensors` (`fc1.a`/`fc1.b`/`fc2.a`/`fc2.b`) | Rust `cog-pose-estimation run --adapter` |
 ```bash
 # Produce a cog-format per-room adapter for the deployed Rust pose engine:
 python cog_calibrate.py --base pose_v1.safetensors --data calib.npz --out room.safetensors
 # then in the cog runtime:
 cog-pose-estimation run --config <cfg> --adapter room.safetensors
 ```
 Same LoRA *mechanism* (ADR-150 §3.5), different architecture and key layout — an adapter from one
 producer will not load into the other model.
 ## Notes
 - **Calibration only helps when the base hasn't already seen the room.** The published flagship was
  trained on MM-Fi `random_split`, so calibrating it on an MM-Fi subject is a near-no-op (it already
  saw them); for a genuinely new real-world room it is zero-shot and calibration applies. To
  *reproduce the demo* on a held-out MM-Fi room, train a source-only base (exclude the target
  environment) — see `ADR-150 §3.6` and the few-shot harness in `aether-arena/staging/`.
 - Adapter is saved fp16 (~22 KB); quantize to int8 for the ~11 KB on-device form.
 - Inference is real-time on CPU (the 75 K-param `micro` variant runs in 0.135 ms single-thread x86;
  see [`docs/benchmarks/wifi-pose-efficiency-frontier.md`](../../docs/benchmarks/wifi-pose-efficiency-frontier.md)).
@@ -1,71 +0,0 @@
 """RuView per-room calibration — fit a ~11 KB LoRA adapter from a short labeled in-room capture.
    python calibrate.py --base pose_mmfi_best.pt --data room_calib.npz --out room_A.adapter.npz
 `room_calib.npz` must contain `X` [N,3,114,10] CSI amplitude and `Y` [N,17,2] (or [N,34]) keypoints
 in [0,1] — the labeled calibration samples from the deployment room (~100–200 recommended; ≥20).
 Outputs a tiny adapter (.npz, ~11 KB) that, loaded over the shared base at inference, recovers
 SOTA-level pose for that room/person (ADR-150 §3.5–3.6).
 """
 import argparse
 import numpy as np
 import torch
 import torch.nn as nn
 from model import PoseNet, standardize
 def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--base", required=True, help="base checkpoint (pose_mmfi_best.pt)")
    ap.add_argument("--data", required=True, help="labeled calibration .npz with X and Y")
    ap.add_argument("--out", required=True, help="output adapter .npz")
    ap.add_argument("--rank", type=int, default=8)
    ap.add_argument("--iters", type=int, default=600)
    ap.add_argument("--lr", type=float, default=8e-4)
    ap.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
    a = ap.parse_args()
    z = np.load(a.data)
    X = torch.tensor(z["X"].astype(np.float32))
    Y = torch.tensor(z["Y"].reshape(len(z["Y"]), 34).astype(np.float32))
    n = len(X)
    if n < 20:
        print(f"WARNING: only {n} calibration samples — below ~20 the adapter may underperform "
              f"zero-shot (ADR-150 §3.5). Recommend ~100–200.")
    dev = a.device
    net = PoseNet().to(dev)
    net.load_state_dict(torch.load(a.base, map_location=dev), strict=False)
    net.add_lora(r=a.rank).to(dev)
    for k, p in net.named_parameters():
        p.requires_grad = k.endswith(".A") or k.endswith(".B")
    trainable = [p for p in net.parameters() if p.requires_grad]
    n_tr = sum(p.numel() for p in trainable)
    Xs = standardize(X.to(dev))
    Yt = Y.to(dev)
    opt = torch.optim.AdamW(trainable, lr=a.lr, weight_decay=0.0)
    lossf = nn.SmoothL1Loss(beta=0.1)
    bs = min(128, n)
    net.train()
    for it in range(a.iters):
        bi = torch.randint(0, n, (bs,), device=dev)
        xb = Xs[bi]
        # light augmentation (subcarrier dropout + noise) — matches training-time regularization
        m = (torch.rand(xb.shape[0], xb.shape[1], 1, 1, device=dev) > 0.15).float()
        xb = xb * m + 0.03 * torch.randn_like(xb) * torch.rand(xb.shape[0], 1, 1, 1, device=dev)
        opt.zero_grad()
        lossf(net(xb), Yt[bi]).backward()
        opt.step()
    adapter = net.lora_state()
    nbytes = sum(v.astype(np.float16).nbytes for v in adapter.values())
    np.savez(a.out, **{k: v.astype(np.float16) for k, v in adapter.items()},
             _meta=np.array([a.rank, n, n_tr], dtype=np.int64))
    print(f"saved {a.out} | rank {a.rank} | {n_tr:,} params | ~{nbytes/1024:.1f} KB fp16 | "
          f"from {n} labeled samples")
 if __name__ == "__main__":
    main()
@@ -1,120 +0,0 @@
 """Per-room calibration producer for the cog-pose-estimation **conv+MLP** model
 (`pose_v1.safetensors`, 56 subcarriers x 20 frames). Companion to `calibrate.py`
 (which targets the MM-Fi *transformer* model) — different model, different adapter
 key layout, NOT interchangeable (ADR-150 §3.5).
 Fits a rank-r LoRA on the pose head (fc1, fc2) from a short labeled in-room capture and
 writes a **safetensors** adapter with keys `fc1.a`/`fc1.b`/`fc2.a`/`fc2.b` (scale baked
 into `b`) — exactly what `cog-pose-estimation run --adapter <file>` consumes.
    python cog_calibrate.py --base pose_v1.safetensors --data calib.npz --out room.safetensors
 `calib.npz`: `X` [N,56,20] CSI window + `Y` [N,17,2] (or [N,34]) keypoints in [0,1].
 """
 import argparse
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class CogPose(nn.Module):
    """Mirrors cog-pose-estimation's PoseNet (Candle) exactly — same safetensors keys."""
    def __init__(self):
        super().__init__()
        self.enc = nn.ModuleDict({
            "c1": nn.Conv1d(56, 64, 3, padding=1, dilation=1),
            "c2": nn.Conv1d(64, 128, 3, padding=2, dilation=2),
            "c3": nn.Conv1d(128, 128, 3, padding=4, dilation=4),
        })
        self.head = nn.ModuleDict({"fc1": nn.Linear(128, 256), "fc2": nn.Linear(256, 34)})
        self.fc1_lora = None
        self.fc2_lora = None
    def _lora(self, slot, x, y):
        if slot is None:
            return y
        a, b = slot
        return y + (x @ a) @ b
    def forward(self, x):                       # x: [B, 56, 20]
        h = F.relu(self.enc["c1"](x))
        h = F.relu(self.enc["c2"](h))
        h = F.relu(self.enc["c3"](h))
        h = h.mean(2)                            # [B, 128]
        z1 = self.head["fc1"](h)
        z1 = self._lora(self.fc1_lora, h, z1)
        h1 = F.relu(z1)
        z2 = self.head["fc2"](h1)
        z2 = self._lora(self.fc2_lora, h1, z2)
        return torch.sigmoid(z2)                 # [B, 34]
    def add_lora(self, r=4):
        self.fc1_lora = (nn.Parameter(torch.randn(128, r) * 0.02), nn.Parameter(torch.zeros(r, 256)))
        self.fc2_lora = (nn.Parameter(torch.randn(256, r) * 0.02), nn.Parameter(torch.zeros(r, 34)))
        for p in (*self.fc1_lora, *self.fc2_lora):
            self.register_parameter(f"lora_{id(p)}", p)
        return self
 def load_base(net: CogPose, path: str):
    from safetensors.torch import load_file
    sd = load_file(path)
    # remap "enc.c1.weight" -> module dict keys
    mapped = {}
    for k, v in sd.items():
        mapped[k.replace("enc.", "enc.").replace("head.", "head.")] = v
    net.load_state_dict(mapped, strict=False)
    return net
 def fit(base: str, data: str, out: str, rank: int = 4, iters: int = 400, lr: float = 1e-3):
    z = np.load(data)
    X = torch.tensor(z["X"].astype(np.float32))          # [N,56,20]
    Y = torch.tensor(z["Y"].reshape(len(z["Y"]), 34).astype(np.float32))
    n = len(X)
    net = CogPose()
    load_base(net, base)
    net.add_lora(rank)
    for p in net.parameters():
        p.requires_grad = False
    lora = [*net.fc1_lora, *net.fc2_lora]
    for p in lora:
        p.requires_grad = True
    opt = torch.optim.AdamW(lora, lr=lr, weight_decay=0.0)
    lossf = nn.SmoothL1Loss(beta=0.1)
    bs = min(64, n)
    net.train()
    for _ in range(iters):
        bi = torch.randint(0, n, (bs,))
        opt.zero_grad()
        lossf(net(X[bi]), Y[bi]).backward()
        opt.step()
    alpha = 16.0
    scale = alpha / rank
    a1, b1 = net.fc1_lora
    a2, b2 = net.fc2_lora
    tensors = {
        "fc1.a": a1.detach().contiguous(),
        "fc1.b": (b1.detach() * scale).contiguous(),    # bake scale into b
        "fc2.a": a2.detach().contiguous(),
        "fc2.b": (b2.detach() * scale).contiguous(),
    }
    from safetensors.torch import save_file
    save_file(tensors, out)
    return out, sum(p.numel() for p in lora), n
 if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("--base", required=True)
    ap.add_argument("--data", required=True)
    ap.add_argument("--out", required=True)
    ap.add_argument("--rank", type=int, default=4)
    ap.add_argument("--iters", type=int, default=400)
    a = ap.parse_args()
    out, np_, n = fit(a.base, a.data, a.out, a.rank, a.iters)
    print(f"saved {out} | {np_} LoRA params from {n} samples "
          f"(keys fc1.a/fc1.b/fc2.a/fc2.b — load with cog-pose-estimation run --adapter)")
@@ -1,49 +0,0 @@
 """Run calibrated WiFi-CSI pose inference: shared base + a per-room LoRA adapter.
    python infer.py --base pose_mmfi_best.pt --adapter room_A.adapter.npz --data frames.npz
 `frames.npz` contains `X` [N,3,114,10] CSI amplitude. Prints/saves [N,17,2] keypoints in [0,1].
 Omit --adapter to run the uncalibrated (zero-shot) base. With a room adapter, expect SOTA-level
 accuracy in that room/person; without one, zero-shot degrades in unseen rooms (ADR-150 §3.6).
 """
 import argparse
 import numpy as np
 import torch
 from model import PoseNet, standardize
 def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--base", required=True)
    ap.add_argument("--adapter", default=None, help="per-room .adapter.npz (omit for zero-shot)")
    ap.add_argument("--data", required=True, help=".npz with X [N,3,114,10]")
    ap.add_argument("--out", default=None, help="optional .npy to save [N,17,2] keypoints")
    ap.add_argument("--rank", type=int, default=8)
    ap.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
    a = ap.parse_args()
    dev = a.device
    net = PoseNet().to(dev)
    net.load_state_dict(torch.load(a.base, map_location=dev), strict=False)
    if a.adapter:
        net.add_lora(r=a.rank).to(dev)
        z = np.load(a.adapter)
        net.load_lora({k: z[k].astype(np.float32) for k in z.files if k.endswith(".A") or k.endswith(".B")})
    net.eval()
    X = torch.tensor(np.load(a.data)["X"].astype(np.float32)).to(dev)
    Xs = standardize(X)
    out = []
    with torch.no_grad():
        for i in range(0, len(Xs), 4096):
            out.append(net(Xs[i:i + 4096]).cpu().numpy())
    kp = np.concatenate(out).reshape(-1, 17, 2)
    print(f"inferred {len(kp)} frames | adapter={'yes' if a.adapter else 'NONE (zero-shot)'}")
    if a.out:
        np.save(a.out, kp)
        print(f"saved keypoints -> {a.out}")
 if __name__ == "__main__":
    main()
@@ -1,107 +0,0 @@
 """WiFi-CSI pose model + LoRA adapter for the RuView calibration service.
 Architecture matches the published flagship checkpoint
 [`ruvnet/wifi-densepose-mmfi-pose`](https://huggingface.co/ruvnet/wifi-densepose-mmfi-pose)
 (`pose_mmfi_best.pt`): transformer encoder + temporal attention pooling + skeleton-graph head.
 The calibration service freezes this base and fits a tiny per-room **LoRA adapter** (rank 8 on the
 input projection + pose head ≈ 11 KB) from ~100–200 labeled in-room samples. Empirically that lifts
 cross-subject 64→72% and cross-environment 11→73% (ADR-150 §3.3–3.6).
 """
 import numpy as np
 import torch
 import torch.nn as nn
 # COCO-17 skeleton edges for the graph-refinement head.
 EDGES = [(0, 1), (0, 2), (1, 3), (2, 4), (5, 6), (5, 7), (7, 9), (6, 8), (8, 10),
         (5, 11), (6, 12), (11, 12), (11, 13), (13, 15), (12, 14), (14, 16)]
 _A = np.eye(17, dtype=np.float32)
 for _i, _j in EDGES:
    _A[_i, _j] = _A[_j, _i] = 1.0
 _A = _A / _A.sum(1, keepdims=True)
 class LoRA(nn.Module):
    """Low-rank adapter wrapping a frozen Linear: y = W·x + (x·A·B)·(alpha/r)."""
    def __init__(self, base: nn.Linear, r: int = 8, alpha: int = 16):
        super().__init__()
        self.base = base
        for p in self.base.parameters():
            p.requires_grad = False
        self.A = nn.Parameter(torch.zeros(base.in_features, r))
        self.B = nn.Parameter(torch.zeros(r, base.out_features))
        nn.init.normal_(self.A, std=0.02)
        self.scale = alpha / r
    def forward(self, x):
        return self.base(x) + (x @ self.A @ self.B) * self.scale
 class GR(nn.Module):
    """Skeleton-graph refinement: nudges joints toward anatomically consistent positions."""
    def __init__(self, d=256, h=96):
        super().__init__()
        self.je = nn.Parameter(torch.randn(17, 32) * 0.02)
        self.inp = nn.Linear(d + 34, h)
        self.g1 = nn.Linear(h, h)
        self.g2 = nn.Linear(h, h)
        self.out = nn.Linear(h, 2)
        self.register_buffer("A", torch.tensor(_A))
    def forward(self, z, kp0):
        B = z.shape[0]
        f = torch.relu(self.inp(torch.cat(
            [z.unsqueeze(1).expand(-1, 17, -1), self.je.unsqueeze(0).expand(B, -1, -1), kp0], -1)))
        f = torch.relu(self.g1(torch.einsum('ij,bjh->bih', self.A, f)))
        f = torch.relu(self.g2(torch.einsum('ij,bjh->bih', self.A, f)))
        return kp0 + 0.3 * torch.tanh(self.out(f))
 class PoseNet(nn.Module):
    """Flagship pose model. Input [B,3,114,10] CSI amplitude (per-sample standardized) -> [B,34]."""
    def __init__(self, na=3, nsc=114, nt=10, d=256, L=4, H=8):
        super().__init__()
        self.proj = nn.Linear(na * nsc, d)
        self.pos = nn.Parameter(torch.randn(1, nt, d) * 0.02)
        enc = nn.TransformerEncoderLayer(d, H, d * 2, dropout=0.2, batch_first=True, activation='gelu')
        self.tf = nn.TransformerEncoder(enc, L)
        self.att = nn.Linear(d, 1)
        self.head = nn.Sequential(nn.Linear(d, 256), nn.GELU(), nn.Dropout(0.3), nn.Linear(256, 34))
        self.gr = GR(d)
        self.na, self.nsc, self.nt = na, nsc, nt
    def forward(self, x):
        B = x.shape[0]
        t = x.permute(0, 3, 1, 2).reshape(B, self.nt, self.na * self.nsc)
        h = self.tf(self.proj(t) + self.pos)
        w = torch.softmax(self.att(h), 1)
        z = (h * w).sum(1)
        kp0 = torch.sigmoid(self.head(z)).reshape(B, 17, 2)
        return self.gr(z, kp0).reshape(B, 34)
    def add_lora(self, r=8, alpha=16):
        """Wrap the input projection + pose head with LoRA adapters (the ~11 KB calibration set)."""
        self.proj = LoRA(self.proj, r, alpha)
        self.head[0] = LoRA(self.head[0], r, alpha)
        self.head[3] = LoRA(self.head[3], r, alpha)
        return self
    def lora_state(self) -> dict:
        """Extract just the LoRA A/B tensors (the per-room adapter to save)."""
        return {k: v.detach().cpu().numpy() for k, v in self.state_dict().items()
                if k.endswith(".A") or k.endswith(".B")}
    def load_lora(self, adapter: dict):
        sd = self.state_dict()
        for k, v in adapter.items():
            sd[k] = torch.tensor(v)
        self.load_state_dict(sd)
        return self
 def standardize(x: torch.Tensor) -> torch.Tensor:
    """Per-sample standardization used in training/inference."""
    return (x - x.mean((1, 2, 3), keepdim=True)) / (x.std((1, 2, 3), keepdim=True) + 1e-6)
@@ -1,103 +0,0 @@
 """Self-contained regression test for the RuView calibration service.
 Exercises the committed CLI end-to-end on synthetic data (CPU, no GPU, no real checkpoint):
  build a base -> calibrate.py fits an adapter -> infer.py runs base+adapter -> assert the
  adapter is small, inference is shape-correct and finite, and the adapter actually changes output.
 Run:  python test_calibration.py    (or via pytest)
 """
 import json
 import subprocess
 import sys
 import tempfile
 from pathlib import Path
 import numpy as np
 import torch
 HERE = Path(__file__).parent
 sys.path.insert(0, str(HERE))
 from model import PoseNet, standardize  # noqa: E402
 def _make_base(path: Path):
    torch.manual_seed(0)
    net = PoseNet()
    # Save without the deterministic gr.A buffer (mirrors the published checkpoint;
    # calibrate.py/infer.py load with strict=False).
    sd = {k: v for k, v in net.state_dict().items() if k != "gr.A"}
    torch.save(sd, path)
 def _make_data(path: Path, n: int, seed: int):
    rng = np.random.default_rng(seed)
    X = rng.standard_normal((n, 3, 114, 10)).astype(np.float32)
    Y = rng.random((n, 17, 2)).astype(np.float32)  # keypoints in [0,1]
    np.savez(path, X=X, Y=Y)
 def _run(*args):
    r = subprocess.run(
        [sys.executable, str(HERE / args[0]), *map(str, args[1:])],
        capture_output=True, text=True,
    )
    assert r.returncode == 0, f"{args[0]} failed:\n{r.stdout}\n{r.stderr}"
    return r.stdout
 def test_calibration_end_to_end():
    with tempfile.TemporaryDirectory() as d:
        d = Path(d)
        base = d / "base.pt"
        calib = d / "calib.npz"
        frames = d / "frames.npz"
        adapter = d / "room.adapter.npz"
        kp = d / "kp.npy"
        _make_base(base)
        _make_data(calib, n=40, seed=1)     # ≥20 → no underfit warning
        _make_data(frames, n=16, seed=2)
        # 1) calibrate -> adapter
        out = _run("calibrate.py", "--base", base, "--data", calib, "--out", adapter,
                   "--iters", "50", "--device", "cpu")
        assert adapter.exists(), "adapter not written"
        assert "saved" in out.lower()
        sz = adapter.stat().st_size
        assert sz < 200_000, f"adapter unexpectedly large ({sz} bytes)"
        # adapter contains the expected LoRA tensors (materialize + close so the
        # Windows tempdir can be cleaned up — np.load keeps a lazy file handle).
        with np.load(adapter) as z:
            keys = [k for k in z.files if k.endswith(".A") or k.endswith(".B")]
            assert keys, f"adapter has no LoRA tensors: {z.files}"
            lora = {k: z[k].astype(np.float32) for k in keys}
        # 2) infer with adapter -> keypoints
        _run("infer.py", "--base", base, "--adapter", adapter, "--data", frames,
             "--out", kp, "--device", "cpu")
        out_kp = np.load(kp)
        assert out_kp.shape == (16, 17, 2), f"bad keypoint shape {out_kp.shape}"
        assert np.isfinite(out_kp).all(), "non-finite keypoints"
        assert (out_kp >= 0).all() and (out_kp <= 1).all(), "keypoints out of [0,1]"
        # 3) adapter must actually change the output vs the zero-shot base
        with np.load(frames) as fz:
            frames_x = fz["X"][:]
        net = PoseNet()
        net.load_state_dict(torch.load(base, map_location="cpu"), strict=False)
        net.eval()
        x = standardize(torch.tensor(frames_x))
        with torch.no_grad():
            base_kp = net(x).reshape(16, 17, 2).numpy()
        net.add_lora()
        net.load_lora(lora)
        net.eval()
        with torch.no_grad():
            cal_kp = net(x).reshape(16, 17, 2).numpy()
        assert np.abs(base_kp - cal_kp).sum() > 1e-4, "adapter did not change output"
 if __name__ == "__main__":
    test_calibration_end_to_end()
    print("PASS: calibration service end-to-end (calibrate -> adapter -> infer)")
@@ -1,75 +0,0 @@
 """Regression test for the cog-pose adapter producer (cog_calibrate.py).
 Uses the in-repo `pose_v1.safetensors` (skips if absent). Verifies the produced adapter:
  - has the exact keys/shapes the Rust `cog-pose-estimation --adapter` loader expects,
  - reduces calibration fit error,
  - actually changes inference output,
  - is tiny.
 Run: python test_cog_calibration.py   (or via pytest)
 """
 import os
 import sys
 import tempfile
 from pathlib import Path
 import numpy as np
 import torch
 import torch.nn.functional as F
 HERE = Path(__file__).parent
 sys.path.insert(0, str(HERE))
 import cog_calibrate as C  # noqa: E402
 BASE = HERE / "../../v2/crates/cog-pose-estimation/cog/artifacts/pose_v1.safetensors"
 def test_cog_adapter_producer():
    if not BASE.exists():
        print(f"(skip — {BASE} not present)")
        return
    from safetensors.torch import load_file
    rng = np.random.default_rng(0)
    n = 120
    X = rng.standard_normal((n, 56, 20)).astype("float32")
    Y = (0.5 + 0.1 * X[:, :34, 0].reshape(n, 34)).clip(0, 1).astype("float32")
    with tempfile.TemporaryDirectory() as d:
        calib = os.path.join(d, "calib.npz")
        adapter = os.path.join(d, "room.safetensors")
        np.savez(calib, X=X, Y=Y)
        net0 = C.CogPose()
        C.load_base(net0, str(BASE))
        net0.eval()
        with torch.no_grad():
            base_err = F.smooth_l1_loss(net0(torch.tensor(X)), torch.tensor(Y)).item()
        _, nparam, _ = C.fit(str(BASE), calib, adapter, rank=4, iters=400)
        t = load_file(adapter)
        # exact Rust loader contract: a:[in,r], b:[r,out]
        assert tuple(t["fc1.a"].shape) == (128, 4)
        assert tuple(t["fc1.b"].shape) == (4, 256)
        assert tuple(t["fc2.a"].shape) == (256, 4)
        assert tuple(t["fc2.b"].shape) == (4, 34)
        net = C.CogPose()
        C.load_base(net, str(BASE))
        net.add_lora(4)
        with torch.no_grad():
            net.fc1_lora[0].copy_(t["fc1.a"]); net.fc1_lora[1].copy_(t["fc1.b"] / (16 / 4))
            net.fc2_lora[0].copy_(t["fc2.a"]); net.fc2_lora[1].copy_(t["fc2.b"] / (16 / 4))
        net.eval()
        with torch.no_grad():
            cal_err = F.smooth_l1_loss(net(torch.tensor(X)), torch.tensor(Y)).item()
            changed = (net0(torch.tensor(X[:8])) - net(torch.tensor(X[:8]))).abs().sum().item()
        assert cal_err < base_err, f"calibration did not reduce error ({base_err} -> {cal_err})"
        assert changed > 1e-3, "adapter inert"
        assert nparam < 5000, f"adapter unexpectedly large ({nparam} params)"
 if __name__ == "__main__":
    test_cog_adapter_producer()
    print("PASS: cog adapter producer (Rust-loadable format, reduces error, active)")
@@ -1 +0,0 @@
 9c35e541d51f00998691b98948887ebca09b907d8eb29a113f97e792340456ba
@@ -1 +0,0 @@
 {"frames": [{"pred": [[0.4003, 0.2734], [0.5038, 0.4197], [0.2053, 0.4438], [0.4397, 0.685], [0.5796, 0.7645], [0.8001, 0.2195], [0.2789, 0.2833], [0.314, 0.5439], [0.511, 0.2259], [0.6008, 0.46], [0.4837, 0.3879], [0.3475, 0.5597], [0.6569, 0.3575], [0.437, 0.6539], [0.2341, 0.6038], [0.7331, 0.392], [0.5615, 0.4915]]}, {"pred": [[0.4669, 0.6066], [0.6012, 0.7873], [0.4124, 0.5997], [0.2832, 0.281], [0.2732, 0.3635], [0.2503, 0.4848], [0.6827, 0.715], [0.4336, 0.7165], [0.295, 0.3386], [0.5337, 0.3544], [0.4397, 0.5474], [0.5163, 0.5528], [0.7547, 0.6799], [0.4195, 0.4448], [0.2257, 0.2269], [0.384, 0.2176], [0.2419, 0.4332]]}, {"pred": [[0.5585, 0.283], [0.4325, 0.2934], [0.463, 0.4744], [0.4188, 0.3454], [0.215, 0.7565], [0.527, 0.2353], [0.7084, 0.6124], [0.3015, 0.6744], [0.4103, 0.3532], [0.7243, 0.6932], [0.3302, 0.4918], [0.2072, 0.3754], [0.7914, 0.4878], [0.7618, 0.4079], [0.323, 0.3386], [0.7104, 0.4997], [0.2673, 0.6077]]}, {"pred": [[0.6372, 0.4984], [0.4184, 0.6763], [0.4498, 0.7549], [0.2924, 0.303], [0.3069, 0.7022], [0.3954, 0.5098], [0.7836, 0.6071], [0.4733, 0.7114], [0.3407, 0.3793], [0.3408, 0.4678], [0.4156, 0.4911], [0.4525, 0.7519], [0.5117, 0.1985], [0.1893, 0.6784], [0.6281, 0.5346], [0.5175, 0.673], [0.36, 0.3665]]}, {"pred": [[0.5535, 0.6537], [0.568, 0.511], [0.4705, 0.5377], [0.6372, 0.7163], [0.5493, 0.7515], [0.2559, 0.4549], [0.2553, 0.6176], [0.2991, 0.6154], [0.7185, 0.7986], [0.4586, 0.5057], [0.2975, 0.4525], [0.3263, 0.3719], [0.5131, 0.4576], [0.557, 0.5268], [0.6572, 0.7736], [0.2146, 0.6526], [0.4662, 0.7371]]}, {"pred": [[0.2924, 0.7595], [0.2612, 0.2315], [0.2488, 0.7751], [0.2329, 0.7282], [0.4744, 0.4206], [0.3618, 0.267], [0.2477, 0.285], [0.3976, 0.3746], [0.494, 0.2874], [0.3596, 0.2112], [0.3311, 0.4692], [0.6912, 0.4727], [0.4434, 0.5233], [0.4139, 0.7048], [0.425, 0.3937], [0.2326, 0.631], [0.2655, 0.7116]]}, {"pred": [[0.3609, 0.3437], [0.285, 0.486], [0.7734, 0.5468], [0.3657, 0.4093], [0.4728, 0.5019], [0.1866, 0.3545], [0.2172, 0.2028], [0.5613, 0.5238], [0.6252, 0.7205], [0.7998, 0.2954], [0.242, 0.7063], [0.6259, 0.6883], [0.5148, 0.7141], [0.5577, 0.7434], [0.3233, 0.2131], [0.2652, 0.7066], [0.5753, 0.5885]]}, {"pred": [[0.6787, 0.6504], [0.6051, 0.2297], [0.2539, 0.3475], [0.6437, 0.7807], [0.4981, 0.6149], [0.5716, 0.2367], [0.6486, 0.3632], [0.2433, 0.369], [0.6061, 0.3731], [0.4955, 0.2591], [0.7676, 0.7602], [0.6899, 0.7716], [0.3143, 0.7707], [0.3031, 0.4997], [0.7076, 0.5133], [0.3382, 0.7196], [0.2002, 0.4871]]}]}
@@ -1 +0,0 @@
 {"frames": [{"gt": [[0.3943, 0.2905], [0.5215, 0.4194], [0.2225, 0.4602], [0.4547, 0.6961], [0.5765, 0.7686], [0.7858, 0.2279], [0.2866, 0.2707], [0.3084, 0.549], [0.5286, 0.2377], [0.6082, 0.4566], [0.4719, 0.3799], [0.3465, 0.5447], [0.6377, 0.3728], [0.4509, 0.6543], [0.2235, 0.6009], [0.7253, 0.3882], [0.5479, 0.4737]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.4845, 0.5985], [0.5883, 0.7959], [0.4315, 0.6012], [0.3008, 0.2703], [0.2776, 0.3486], [0.2483, 0.4695], [0.6916, 0.7184], [0.4153, 0.7305], [0.3057, 0.3392], [0.5535, 0.3576], [0.4216, 0.5398], [0.5093, 0.5706], [0.7397, 0.668], [0.4354, 0.4394], [0.2373, 0.2404], [0.404, 0.2315], [0.2609, 0.4182]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.5684, 0.2891], [0.4185, 0.2737], [0.4796, 0.4903], [0.4056, 0.3589], [0.2139, 0.7706], [0.5259, 0.2162], [0.718, 0.6177], [0.3002, 0.6632], [0.3978, 0.3338], [0.7116, 0.6836], [0.336, 0.5106], [0.2168, 0.3677], [0.7739, 0.4683], [0.773, 0.4188], [0.318, 0.3226], [0.7043, 0.4877], [0.2509, 0.5964]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.6501, 0.4868], [0.3995, 0.6805], [0.4408, 0.7681], [0.2762, 0.2907], [0.2877, 0.6959], [0.4102, 0.5292], [0.7825, 0.5898], [0.4603, 0.723], [0.3511, 0.3758], [0.3556, 0.4514], [0.4123, 0.4749], [0.4524, 0.7506], [0.5141, 0.2112], [0.2024, 0.6795], [0.6351, 0.5339], [0.5333, 0.6706], [0.3491, 0.3662]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.537, 0.656], [0.5675, 0.5033], [0.4714, 0.52], [0.6195, 0.7259], [0.5357, 0.766], [0.273, 0.4653], [0.2439, 0.6017], [0.2927, 0.6297], [0.7297, 0.7805], [0.439, 0.4924], [0.2969, 0.4589], [0.3174, 0.3911], [0.5324, 0.4643], [0.5744, 0.5074], [0.673, 0.783], [0.2238, 0.6674], [0.4534, 0.7468]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.2896, 0.7515], [0.2537, 0.2345], [0.2434, 0.763], [0.2502, 0.7137], [0.4723, 0.4035], [0.3607, 0.2775], [0.2657, 0.2969], [0.3872, 0.383], [0.5001, 0.3067], [0.3503, 0.2092], [0.3137, 0.4849], [0.6914, 0.4593], [0.4359, 0.504], [0.4056, 0.6994], [0.4428, 0.4085], [0.2424, 0.6445], [0.2507, 0.7048]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.3692, 0.3453], [0.2945, 0.4675], [0.7836, 0.5282], [0.3857, 0.414], [0.4848, 0.5017], [0.203, 0.3585], [0.225, 0.2135], [0.5513, 0.5175], [0.6296, 0.7275], [0.7908, 0.2897], [0.2263, 0.7012], [0.6403, 0.6873], [0.5026, 0.701], [0.5504, 0.7357], [0.338, 0.2187], [0.2629, 0.7015], [0.5757, 0.6084]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.6786, 0.649], [0.5956, 0.2396], [0.2447, 0.3593], [0.6439, 0.7854], [0.4874, 0.6102], [0.5857, 0.2465], [0.6459, 0.3827], [0.2364, 0.3613], [0.6054, 0.3745], [0.4798, 0.2711], [0.7869, 0.7618], [0.6919, 0.7809], [0.3259, 0.7674], [0.285, 0.5144], [0.6921, 0.5052], [0.3388, 0.7386], [0.2022, 0.495]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}]}
@@ -1,5 +0,0 @@
 {"benchmark": "AetherArena", "created": "2026-05-30", "kind": "genesis", "note": "Official Spatial-Intelligence Benchmark \u2014 append-only signed ledger. Entries are real harness scores only; no seeded numbers.", "prev_hash": "0000000000000000000000000000000000000000000000000000000000000000", "row_hash": "940bdc6f0f5dd00f4d89e13a8fa843bab3c9ddf1b8051f426a1701e730249231", "seq": 0, "spec": "ADR-149"}
 {"abs_gain": "+9.38", "benchmark": "MM-Fi", "category": "pose", "caveat": "Protocol-matched MM-Fi random_split result; NOT solved real-world generalization. Random split has temporal/subject-adjacency effects common to this benchmark family. Leakage-free cross-subject is far lower (~11-27%) and is the real deployment frontier.", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20 (||right_shoulder-left_hip|| norm, 17 COCO kpts)", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer (4L/8H ~2M params, temporal-attention)", "prev_hash": "940bdc6f0f5dd00f4d89e13a8fa843bab3c9ddf1b8051f426a1701e730249231", "protocol": "random_split (ratio=0.8, seed=0)", "rel_gain": "+13.0%", "reproduce": "download MM-Fi -> parse_mmfi_zips.py -> train_tf_torso.py X.npy Y.npy split_random.npy (seed 0)", "row_hash": "76598d8e1320d5248f8cd854a8ffa22a99bd2a2f0e0e7f2d2b1df79af16001d5", "score_pct": 81.63, "scored_at": "2026-05-30", "seq": 1, "sota_ref": "MultiFormer 72.25 (CSI2Pose 68.41)", "submitter": "ruvnet", "tier": "Gold"}
 {"abs_gain": "+11.34", "benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer + skeleton-graph head + 3-ensemble + TTA", "note": "Best in-domain. Stacks attention-pooling + transformer + skeleton-graph refine + warmup + TTA + 3-model ensemble. Supersedes the 81.63 single-model entry.", "prev_hash": "76598d8e1320d5248f8cd854a8ffa22a99bd2a2f0e0e7f2d2b1df79af16001d5", "protocol": "random_split (0.8, seed 0)", "row_hash": "5780a4bc3e98eb0e30c1ecfa9091e57b280444fa1f21cd5146797e408580e4ab", "score_pct": 83.59, "scored_at": "2026-05-30", "seq": 2, "sota_ref": "MultiFormer 72.25 (CSI2Pose 68.41)", "submitter": "ruvnet", "tier": "Gold"}
 {"benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer", "note": "Leakage-free generalization to unseen people, shared rooms. Honest deployment-relevant number.", "prev_hash": "5780a4bc3e98eb0e30c1ecfa9091e57b280444fa1f21cd5146797e408580e4ab", "protocol": "cross_subject (official, val=S05,S10,..,S40)", "row_hash": "d989e4e1dbc0182610305fdfbde8b094413b87c913283a46bf41f4afba7a06fd", "score_pct": 64.04, "scored_at": "2026-05-30", "seq": 3, "sota_ref": "(no matched public ref)", "submitter": "ruvnet", "tier": "Silver"}
 {"benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer + CORAL domain alignment", "note": "The real deployment frontier (new room). CORAL transductive DG (+30% rel over control). Data-bound: MM-Fi has only 3 source rooms.", "prev_hash": "d989e4e1dbc0182610305fdfbde8b094413b87c913283a46bf41f4afba7a06fd", "protocol": "cross_environment (train E01-03 -> test E04, new room)", "row_hash": "bf370487bde88e198c13877956dab3c83766a6a24afef0b78b6ac7aa130bb207", "score_pct": 17.51, "scored_at": "2026-05-30", "seq": 4, "sota_ref": "(hard frontier; control 13.52)", "submitter": "ruvnet", "tier": "Bronze"}
@@ -1,100 +0,0 @@
 #!/usr/bin/env python3
 """AetherArena append-only, tamper-evident results ledger (ADR-149 §2.3/§2.4).
 Each row is hash-chained to the previous one: ``row_hash = sha256(canonical_row
 + prev_hash)``. Any silent edit to an earlier row breaks every subsequent
 ``prev_hash`` link, so the ledger is append-only and verifiable by anyone — no
 trust in the maintainer required. (Ed25519 row signing is the next hardening;
 the chain already makes tampering detectable.)
 Usage:
    python ledger_tools.py seed        # (re)build ledger.jsonl with genesis + baseline
    python ledger_tools.py verify      # verify the whole chain  -> exit 0 / 1
    python ledger_tools.py append '<json-row>'   # append one scored row
 """
 import hashlib
 import json
 import sys
 from pathlib import Path
 LEDGER = Path(__file__).parent / "ledger.jsonl"
 GENESIS_PREV = "0" * 64
 def canonical(row: dict) -> bytes:
    # Stable key order, no whitespace -> deterministic bytes for hashing.
    body = {k: row[k] for k in sorted(row) if k != "row_hash"}
    return json.dumps(body, separators=(",", ":"), sort_keys=True).encode()
 def row_hash(row: dict) -> str:
    return hashlib.sha256(canonical(row)).hexdigest()
 def read_rows() -> list[dict]:
    if not LEDGER.exists():
        return []
    return [json.loads(l) for l in LEDGER.read_text().splitlines() if l.strip()]
 def append(entry: dict) -> dict:
    rows = read_rows()
    prev = rows[-1]["row_hash"] if rows else GENESIS_PREV
    entry = dict(entry)
    entry["seq"] = len(rows)
    entry["prev_hash"] = prev
    entry["row_hash"] = row_hash(entry)
    with LEDGER.open("a") as f:
        f.write(json.dumps(entry, sort_keys=True) + "\n")
    return entry
 def verify() -> bool:
    rows = read_rows()
    prev = GENESIS_PREV
    for i, r in enumerate(rows):
        if r.get("seq") != i:
            print(f"FAIL: row {i} seq mismatch ({r.get('seq')})")
            return False
        if r.get("prev_hash") != prev:
            print(f"FAIL: row {i} prev_hash broken — ledger was edited")
            return False
        if r.get("row_hash") != row_hash(r):
            print(f"FAIL: row {i} row_hash mismatch — row was tampered")
            return False
        prev = r["row_hash"]
    print(f"OK: {len(rows)} rows, chain intact")
    return True
 def seed():
    """Rebuild with the genesis row only — an EMPTY board.
    Benchmark-first: no placeholder/hand-entered numbers ever sit on the
    leaderboard. Every result row is produced by the real scoring pipeline
    (load model -> run inference -> score against the private eval split ->
    proof hash). The board starts empty and awaits the first real harness score,
    including RuView's own — which gets no special seeding.
    """
    if LEDGER.exists():
        LEDGER.unlink()
    append({
        "kind": "genesis",
        "benchmark": "AetherArena",
        "spec": "ADR-149",
        "note": "Official Spatial-Intelligence Benchmark — append-only signed ledger. "
                "Entries are real harness scores only; no seeded numbers.",
        "created": "2026-05-30",
    })
 if __name__ == "__main__":
    cmd = sys.argv[1] if len(sys.argv) > 1 else "verify"
    if cmd == "seed":
        seed(); verify()
    elif cmd == "verify":
        sys.exit(0 if verify() else 1)
    elif cmd == "append":
        print(json.dumps(append(json.loads(sys.argv[2])), indent=2))
    else:
        print(__doc__); sys.exit(2)
@@ -1,41 +0,0 @@
 # AetherArena submission manifest (ADR-149 §2.2).
 # Accompanies a model artifact pushed to the AA Hugging Face Space.
 # This file is the contract the Space validates before quarantine + scoring.
 [submission]
 # Free-form display name shown on the leaderboard.
 name = "my-spatial-model"
 # Hugging Face repo or URL of the model artifact (.safetensors / .rvf / LoRA adapter).
 model_ref = "hf://your-org/your-model"
 # Submitter handle (HF username / org). Used to sign the ledger row.
 submitter = "your-hf-username"
 # SPDX license of the submitted model.
 license = "Apache-2.0"
 [category]
 # One of: pose | presence | tracking | vitals | multi-task
 # v0 ranks: pose, presence (tracking/vitals activate when ground truth lands).
 primary = "pose"
 [input]
 # Which ADR-145 FeatureSet the model consumes. v0 input is RF/WiFi CSI.
 #   F0 = CSI amplitude/phase   F1 = +CIR   F2 = +Doppler   F3 = +BFLD
 feature_set = "F0"
 # Tensor I/O contract so the scorer can feed the model correctly.
 input_shape = [114, 2]      # subcarriers × {amp, phase}  (example)
 output_shape = [17, 2]      # 17 keypoints × {x, y} normalised [0,1]
 # Normalisation expected on the input ("none" | "zscore" | "minmax").
 normalization = "zscore"
 [runtime]
 # Inference entrypoint inside the artifact (framework-specific).
 framework = "candle"        # candle | onnx | torch
 # Optional: target the edge-latency category with a declared device class.
 device_class = "cpu"        # cpu | pi5 | gpu
 # Notes:
 # - You submit a MODEL, never predictions on data you hold.
 # - Scoring runs against a PRIVATE MM-Fi held-out split in a no-network,
 #   read-only sandbox. You cannot see the eval data.
 # - The resulting score is a signed, append-only ledger row carrying a
 #   determinism proof hash and the pinned harness_version.
@@ -1,37 +0,0 @@
 ---
 title: AetherArena — Spatial-Intelligence Benchmark
 emoji: 📡
 colorFrom: indigo
 colorTo: purple
 sdk: gradio
 sdk_version: 5.9.1
 python_version: "3.12"
 app_file: app.py
 pinned: true
 license: cc-by-nc-4.0
 tags:
  - benchmark
  - leaderboard
  - wifi-sensing
  - spatial-intelligence
  - pose-estimation
 ---
 # AetherArena ("AA") — The Official Spatial-Intelligence Benchmark
 > Public leaderboard. Private evaluation split. Open scorer. Signed results.
 The field's standard yardstick for camera-free **spatial intelligence** (pose, presence,
 occupancy, tracking, vitals) from RF/WiFi and, over time, mmWave / UWB / multimodal.
 - **Project-agnostic** — any team, framework, or modality enters; RuView donated the seed
  scorer and is scored like everyone else.
 - **Benchmark-first** — the board starts empty; every row is a real scoring-pipeline
  **witness** (`inputs_sha256` + `proof_sha256` + `harness_version`) in an append-only,
  hash-chained, tamper-evident ledger.
 - **Reproducible** — the scorer is open; reproduce any proof hash + repeatability locally.
 Spec: [ADR-149](https://github.com/ruvnet/RuView/blob/main/docs/adr/ADR-149-public-community-leaderboard-huggingface.md).
 Source + open scorer: https://github.com/ruvnet/RuView/tree/main/aether-arena
 Non-commercial (CC BY-NC 4.0): the v0 eval split derives from MM-Fi (CC BY-NC); AA is operated non-commercially.
@@ -1,161 +0,0 @@
 """AetherArena ("AA") — The Official Spatial-Intelligence Benchmark.
 Hugging Face Space (Gradio) — the public face of the benchmark (ADR-149).
 This Space is the presentation + submission layer; the heavy scoring runs in the
 pinned RuView harness (CI / scorer container), and results land in the append-only,
 hash-chained **witness ledger** shown here.
 Benchmark-first: the board starts EMPTY. No seeded or hand-entered numbers — every
 row is a real scoring-pipeline witness (inputs_sha256 + proof_sha256 + harness_version).
 """
 import hashlib
 import json
 from pathlib import Path
 import gradio as gr
 LEDGER = Path(__file__).parent / "ledger.jsonl"
 GENESIS_PREV = "0" * 64
 def _rows():
    if not LEDGER.exists():
        return []
    return [json.loads(l) for l in LEDGER.read_text().splitlines() if l.strip()]
 def _canon(row: dict) -> bytes:
    body = {k: row[k] for k in sorted(row) if k != "row_hash"}
    return json.dumps(body, separators=(",", ":"), sort_keys=True).encode()
 def verify_chain():
    rows, prev = _rows(), GENESIS_PREV
    for i, r in enumerate(rows):
        if r.get("prev_hash") != prev or r.get("row_hash") != hashlib.sha256(_canon(r)).hexdigest():
            return f"❌ Ledger chain BROKEN at row {i} — tampering detected."
        prev = r["row_hash"]
    return f"✅ Witness ledger chain intact — {len(rows)} row(s), append-only."
 def leaderboard(category: str):
    results = [r for r in _rows() if r.get("kind") == "result" and (category == "all" or r.get("category") == category)]
    if not results:
        return [["— no entries yet —", "", "", "", "", ""]]
    results.sort(key=lambda r: r.get("score_pct") or 0, reverse=True)
    return [[
        r.get("submitter", "?"),
        r.get("model_ref", "?"),
        f"{r.get('benchmark','?')} / {r.get('protocol','?')}",
        r.get("metric", "?"),
        f"{r.get('score_pct', 0):.2f}%",
        f"{r.get('tier','?')} (vs {r.get('sota_ref','?')})",
    ] for r in results]
 FOUR_PART = "### Public leaderboard. Private evaluation split. Open scorer. Signed results."
 ABOUT = """
 **AetherArena** is the official, project-agnostic **Spatial-Intelligence Benchmark** —
 camera-free pose, presence, occupancy, tracking, and vitals from RF/WiFi (and, over
 time, mmWave / UWB / radar / multimodal). It is **not** a single-vendor board: any
 team, framework, or modality enters, and every entrant — including the RuView baseline
 that donated the seed scorer — is scored by the identical, open, pinned harness.
 The scorer reuses RuView's released `wifi-densepose-train` acceptance harness
 (`ruview_metrics` + ablation). You submit a **model, not predictions**; it is scored
 against a **private** MM-Fi held-out split; one **witness** row (inputs hash + proof
 hash + harness version) is appended to a **hash-chained, tamper-evident ledger**.
 **For industry:** a vendor-neutral, auditable way to compare RF-sensing models on equal
 footing — the same standardized splits, the same metric definition, the same signed,
 reproducible ledger. No more "trust our number on our split." Vendors, labs, and startups
 all submit through one pipeline and are scored identically.
 **Generalization Track (roadmap):** the headline isn't a single in-domain number — it's a
 battery of honest tracks: MM-Fi `random_split` (in-domain), `cross_subject` (unseen people),
 cross-room, cross-device, and confidence-calibration (ECE). Cross-subject is the real
 deployment frontier and is treated as the flagship hard benchmark.
 Spec: ADR-149. v0 ranks **pose, presence, edge-latency, determinism**. Tracking &
 vitals activate when their ground truth lands; **privacy-leakage** is gated until the
 membership-inference attacker ships. Source + the open scorer:
 https://github.com/ruvnet/RuView/tree/main/aether-arena
 """
 SUBMIT = """
 ### Submit a model
 1. Write a manifest — [`schema/aa-submission.toml`](https://github.com/ruvnet/RuView/blob/main/aether-arena/schema/aa-submission.toml):
   declare your model ref, category, the ADR-145 feature set (F0 CSI … F3 BFLD), and the tensor I/O contract.
 2. Provide your model artifact (`.safetensors` / `.rvf` / LoRA adapter).
 3. It moves through `submitted → validated → quarantined → smoke_scored → full_scored → published`,
   scored in a no-network, read-only sandbox against the private split.
 4. Your signed witness row appears on the leaderboard.
 **You submit a model, never predictions** — predictions on data you hold prove nothing.
 """
 VERIFY = """
 ### Verify it's fair (you don't have to trust us)
 The scorer is open and reproducible. Reproduce the determinism proof + repeatability locally:
 ```bash
 git clone https://github.com/ruvnet/RuView && cd RuView/v2
 # determinism gate (same as CI):
 cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features
 # repeatability — N runs, one identical proof hash:
 cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16
 # verify the append-only witness ledger chain:
 cd ../aether-arena/ledger && python3 ledger_tools.py verify
 ```
 A stranger must be able to: submit → get a deterministic score → see the signed row →
 rerun the scorer locally → understand why the rank is fair. That is the launch gate (ADR-149 §7).
 """
 with gr.Blocks(title="AetherArena — Spatial-Intelligence Benchmark") as demo:
    gr.Markdown("# 📡 AetherArena (AA)\n## The Official, Vendor-Neutral Benchmark for WiFi / RF Spatial Sensing")
    gr.Markdown(FOUR_PART)
    gr.Markdown(
        "**An open industry benchmark — for everyone, not any one vendor.** Submit any model, any framework, "
        "any modality. Every entrant — academic, startup, or incumbent — is scored *identically*: standardized "
        "protocols (MM-Fi `random_split` / `cross_subject`), matched metrics (torso-PCK@20, the published "
        "definition), and an auditable, hash-chained **witness ledger** anyone can verify and reproduce.\n\n"
        "**Why it exists:** WiFi/RF-sensing results are reported with inconsistent splits, metrics, and no "
        "auditability — so numbers aren't comparable. AetherArena fixes the *measurement*: one protocol, one "
        "metric, one signed ledger, one-command reproduction. The benchmark is the product; the leaderboard is "
        "just the scoreboard. (Reference implementation seeded by RuView, ADR-149.)"
    )
    chain = gr.Markdown(verify_chain())
    with gr.Tab("🏆 Leaderboard"):
        gr.Markdown(
            "### Current standings — MM-Fi WiFi-CSI 2D pose, torso-PCK@20\n"
            "Ranked, protocol- & metric-matched results. Each row carries its own caveats in the ledger "
            "(e.g. `random_split` has temporal-adjacency leakage that inflates *all* methods equally — the "
            "leakage-free `cross_subject` track is the real deployment frontier). **Submit yours — top the board.**"
        )
        cat = gr.Dropdown(["all", "pose", "presence"], value="all", label="Category")
        tbl = gr.Dataframe(
            headers=["Submitter", "Model", "Benchmark / Protocol", "Metric", "Score", "Tier (vs prior SOTA)"],
            value=leaderboard("all"), interactive=False, wrap=True,
        )
        cat.change(leaderboard, cat, tbl)
        gr.Markdown(
            "*Vendor-neutral & benchmark-first: every row is a real, metric- and protocol-matched result — "
            "no seeded or vendor-favored numbers. Integrity is enforced, not promised: the current top entry's "
            "score was self-corrected down from an inflated metric (91.86% bbox → 81.63% torso) before it could "
            "be published. The same scorer and ledger apply to every submitter.*"
        )
    with gr.Tab("📤 Submit"):
        gr.Markdown(SUBMIT)
    with gr.Tab("🔬 Verify"):
        gr.Markdown(VERIFY)
    with gr.Tab("ℹ️ About"):
        gr.Markdown(ABOUT)
 if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)
@@ -1,5 +0,0 @@
 {"benchmark": "AetherArena", "created": "2026-05-30", "kind": "genesis", "note": "Official Spatial-Intelligence Benchmark \u2014 append-only signed ledger. Entries are real harness scores only; no seeded numbers.", "prev_hash": "0000000000000000000000000000000000000000000000000000000000000000", "row_hash": "940bdc6f0f5dd00f4d89e13a8fa843bab3c9ddf1b8051f426a1701e730249231", "seq": 0, "spec": "ADR-149"}
 {"abs_gain": "+9.38", "benchmark": "MM-Fi", "category": "pose", "caveat": "Protocol-matched MM-Fi random_split result; NOT solved real-world generalization. Random split has temporal/subject-adjacency effects common to this benchmark family. Leakage-free cross-subject is far lower (~11-27%) and is the real deployment frontier.", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20 (||right_shoulder-left_hip|| norm, 17 COCO kpts)", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer (4L/8H ~2M params, temporal-attention)", "prev_hash": "940bdc6f0f5dd00f4d89e13a8fa843bab3c9ddf1b8051f426a1701e730249231", "protocol": "random_split (ratio=0.8, seed=0)", "rel_gain": "+13.0%", "reproduce": "download MM-Fi -> parse_mmfi_zips.py -> train_tf_torso.py X.npy Y.npy split_random.npy (seed 0)", "row_hash": "76598d8e1320d5248f8cd854a8ffa22a99bd2a2f0e0e7f2d2b1df79af16001d5", "score_pct": 81.63, "scored_at": "2026-05-30", "seq": 1, "sota_ref": "MultiFormer 72.25 (CSI2Pose 68.41)", "submitter": "ruvnet", "tier": "Gold"}
 {"abs_gain": "+11.34", "benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer + skeleton-graph head + 3-ensemble + TTA", "note": "Best in-domain. Stacks attention-pooling + transformer + skeleton-graph refine + warmup + TTA + 3-model ensemble. Supersedes the 81.63 single-model entry.", "prev_hash": "76598d8e1320d5248f8cd854a8ffa22a99bd2a2f0e0e7f2d2b1df79af16001d5", "protocol": "random_split (0.8, seed 0)", "row_hash": "5780a4bc3e98eb0e30c1ecfa9091e57b280444fa1f21cd5146797e408580e4ab", "score_pct": 83.59, "scored_at": "2026-05-30", "seq": 2, "sota_ref": "MultiFormer 72.25 (CSI2Pose 68.41)", "submitter": "ruvnet", "tier": "Gold"}
 {"benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer", "note": "Leakage-free generalization to unseen people, shared rooms. Honest deployment-relevant number.", "prev_hash": "5780a4bc3e98eb0e30c1ecfa9091e57b280444fa1f21cd5146797e408580e4ab", "protocol": "cross_subject (official, val=S05,S10,..,S40)", "row_hash": "d989e4e1dbc0182610305fdfbde8b094413b87c913283a46bf41f4afba7a06fd", "score_pct": 64.04, "scored_at": "2026-05-30", "seq": 3, "sota_ref": "(no matched public ref)", "submitter": "ruvnet", "tier": "Silver"}
 {"benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer + CORAL domain alignment", "note": "The real deployment frontier (new room). CORAL transductive DG (+30% rel over control). Data-bound: MM-Fi has only 3 source rooms.", "prev_hash": "d989e4e1dbc0182610305fdfbde8b094413b87c913283a46bf41f4afba7a06fd", "protocol": "cross_environment (train E01-03 -> test E04, new room)", "row_hash": "bf370487bde88e198c13877956dab3c83766a6a24afef0b78b6ac7aa130bb207", "score_pct": 17.51, "scored_at": "2026-05-30", "seq": 4, "sota_ref": "(hard frontier; control 13.52)", "submitter": "ruvnet", "tier": "Bronze"}
@@ -1 +0,0 @@
 gradio==5.9.1
@@ -1,130 +0,0 @@
 #!/usr/bin/env python3
 """
 CIR Verification Helper (ADR-134)
 Optional Python comparator — invokes the Rust cir_proof_runner binary and
 checks its output against expected_cir_features.sha256.
 Usage:
  python cir_verify_helper.py              # verify against stored hash
  python cir_verify_helper.py --generate  # regenerate hash via Rust binary
 This script is a thin wrapper; all cryptographic work is done in the Rust
 binary. It exists to integrate the CIR proof step into the Python verify.py
 flow if needed.
 """
 import argparse
 import os
 import subprocess
 import sys
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 REPO_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, "..", "..", "..", ".."))
 def find_binary() -> str:
    """Locate the cir_proof_runner binary."""
    candidates = [
        os.path.join(REPO_ROOT, "v2", "target", "release", "cir_proof_runner"),
        os.path.join(REPO_ROOT, "v2", "target", "release", "cir_proof_runner.exe"),
        os.path.join(REPO_ROOT, "v2", "target", "debug", "cir_proof_runner"),
        os.path.join(REPO_ROOT, "v2", "target", "debug", "cir_proof_runner.exe"),
    ]
    for path in candidates:
        if os.path.isfile(path):
            return path
    return ""
 def build_binary() -> bool:
    """Build the release binary via cargo."""
    print("Building cir_proof_runner (release)...")
    result = subprocess.run(
        [
            "cargo", "build",
            "-p", "wifi-densepose-signal",
            "--bin", "cir_proof_runner",
            "--release",
            "--no-default-features",
        ],
        cwd=os.path.join(REPO_ROOT, "v2"),
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        print("Build failed:", result.stderr[-2000:])
        return False
    return True
 def run_generate(binary: str) -> str:
    """Run the binary with --generate-hash; return the hex hash."""
    result = subprocess.run(
        [binary, "--generate-hash"],
        cwd=REPO_ROOT,
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        print("Error running binary:", result.stderr)
        return ""
    return result.stdout.strip()
 def run_verify(binary: str) -> bool:
    """Run the binary in verify mode; return True on PASS."""
    result = subprocess.run(
        [binary],
        cwd=REPO_ROOT,
        capture_output=True,
        text=True,
    )
    print(result.stdout.strip())
    if result.stderr.strip():
        print(result.stderr.strip(), file=sys.stderr)
    return result.returncode == 0
 def main() -> None:
    parser = argparse.ArgumentParser(description="CIR verification helper (ADR-134)")
    parser.add_argument(
        "--generate",
        action="store_true",
        help="Regenerate expected_cir_features.sha256 via Rust binary",
    )
    parser.add_argument(
        "--build",
        action="store_true",
        default=False,
        help="Build the binary before running (default: use cached binary)",
    )
    args = parser.parse_args()
    binary = find_binary()
    if args.build or not binary:
        if not build_binary():
            sys.exit(1)
        binary = find_binary()
    if not binary:
        print("ERROR: cir_proof_runner binary not found. Run with --build.")
        sys.exit(1)
    if args.generate:
        hash_val = run_generate(binary)
        if not hash_val:
            sys.exit(1)
        hash_file = os.path.join(SCRIPT_DIR, "expected_cir_features.sha256")
        with open(hash_file, "w") as f:
            f.write(hash_val + "\n")
        print(f"Wrote CIR hash to {hash_file}")
        print(f"Hash: {hash_val}")
    else:
        ok = run_verify(binary)
        sys.exit(0 if ok else 1)
 if __name__ == "__main__":
    main()
@@ -1 +0,0 @@
 d6bce07ecb1648e6936561df44bf4a3bfc17bb0ba5f692646b2301d105b52f67
@@ -1 +0,0 @@
 304d54690af468dc6cbf0f2a1332f109cf187d5e2eab454efd8554cebc45bdeb
@@ -1 +1 @@
-f8e76f21a0f9852b70b6d9dd5318239f6b20cbcb4cdd995863263cecdc446f7a
+8c0680d7d285739ea9597715e84959d9c356c87ee3ad35b5f1e69a4ca41151c6
@@ -164,120 +164,37 @@ def frame_to_csi_data(frame, signal_meta):
    )
 # Quantization precision for cross-platform hash stability (issue #560).
 #
 # The bytes packed below feed SHA-256. Without quantization, the hash diverges
 # across SIMD backends (Intel AVX2/AVX-512 vs ARM NEON vs different x86 micro-
 # architectures in the same CI pool) because scipy.fft's pocketfft kernels
 # reorder vectorized FP operations differently per build. IEEE 754 guarantees
 # per-operation determinism, not associativity under reordering.
 #
 # Empirically: 9 decimals was NOT enough to collapse the divergence — two
 # back-to-back Ubuntu 24.04 / Python 3.11 / scipy 1.17 CI runs landed on
 # different Azure VM microarchitectures (likely Skylake vs Cascade Lake)
 # and produced two different SHA-256s even after np.round(.., 9). The DSP
 # pipeline (preprocess → biquad bandpass → FFT → PSD → variance accumulation)
 # amplifies the ~1e-14 raw FFT divergence by several orders of magnitude
 # downstream — the actual drift at features_to_bytes() input can reach 1e-7
 # or worse.
 #
 # 6 decimals (parts per million) gives ~6 orders of magnitude headroom over
 # observed pipeline-amplified ULP drift and is still far below any meaningful
 # signal change (CSI phase precision is ~1e-3 rad; PSD bins differ by orders
 # of magnitude). Round to this precision, then hash.
 #
 # NOTE: 6 decimals collapses the divergence *across Linux microarchitectures*
 # but NOT Windows-vs-Linux, where the pocketfft/BLAS difference exceeds 1e-6 on
 # a few elements that then straddle the 6th-decimal rounding boundary. The
 # precision is overridable via PROOF_HASH_DECIMALS so it can be coarsened to a
 # value that is boundary-stable across *all* platforms (Windows + Linux + macOS)
 # while staying far below any signal-meaningful change.
 HASH_QUANTIZATION_DECIMALS = int(os.environ.get("PROOF_HASH_DECIMALS", "6"))
 def features_to_bytes(features):
    """Convert CSIFeatures to a deterministic byte representation.
-    Each feature array is quantized to ``HASH_QUANTIZATION_DECIMALS`` decimal
+    We serialize each numpy array to bytes in a canonical order
-    places before being packed as little-endian float64. The quantization is
+    using little-endian float64 representation. This ensures the
-    what makes the resulting SHA-256 hash actually platform-independent — the
+    hash is platform-independent for IEEE 754 compliant systems.
    raw float values diverge at ULP precision across scipy.fft SIMD backends
    (issue #560), even though all platforms compute the "correct" answer.
    Args:
        features: CSIFeatures instance.
    Returns:
-        bytes: Canonical, quantized byte representation.
+        bytes: Canonical byte representation.
    """
    parts = []
-    # Serialize each feature array in declaration order.
+    # Serialize each feature array in declaration order
    # doppler_shift is INTENTIONALLY excluded: it is peak-normalized
    # (`spectrum / max(spectrum)` in csi_processor._extract_doppler_features),
    # and when the raw spectrum has near-tied peaks the argmax flips under
    # cross-microarchitecture FP reordering, renormalizing the whole array
    # (O(1) divergence — not absorbable by any tolerance). The remaining five
    # features, including the FFT-based PSD, reproduce deterministically and
    # provide the proof. (The underlying doppler instability is a production
    # reproducibility bug tracked separately.)
    for array in [
        features.amplitude_mean,
        features.amplitude_variance,
        features.phase_difference,
        features.correlation_matrix,
        features.doppler_shift,
        features.power_spectral_density,
    ]:
        flat = np.asarray(array, dtype=np.float64).ravel()
        # Quantize before packing so SIMD-level FP reordering across
        # Intel AVX vs Apple Silicon NEON pocketfft kernels does not
        # leak into the SHA-256 input.
        flat = np.round(flat, HASH_QUANTIZATION_DECIMALS)
        # Pack as little-endian double (8 bytes each)
        parts.append(struct.pack(f"<{len(flat)}d", *flat))
    return b"".join(parts)
 # ── Cross-platform tolerance gate (issue #560 follow-up) ─────────────────────
 # The SHA-256 of fixed-decimal-rounded features is bit-exact only WITHIN one
 # CPU microarchitecture. The pocketfft / BLAS kernels in the manylinux
 # numpy/scipy wheels reorder floating-point reductions differently across
 # microarchs (e.g. a GitHub Azure runner vs a developer box vs another Linux
 # host), and the resulting ~1e-6 *relative* drift lands on large-magnitude PSD
 # bins as an absolute difference too large for ANY fixed-decimal grid to absorb
 # (empirically the hash diverges across microarchs even at 2 decimals). So:
 #   • the hash is the strong, bit-exact, SAME-platform proof, and
 #   • a relative tolerance against a committed reference vector is the
 #     platform-INDEPENDENT proof.
 # A run PASSES if either matches. Tolerances sit ~100x over the observed
 # microarch drift and ~10x under any signal-meaningful change (CSI phase
 # precision ~1e-3 rad), so real pipeline regressions still fail.
 TOLERANCE_RTOL = 1e-4
 TOLERANCE_ATOL = 1e-6
 REFERENCE_VECTOR_FILENAME = "expected_features_reference.npz"
 def features_to_vector(features):
    """Concatenate a frame's feature arrays as raw float64 (no rounding).
    Mirrors ``features_to_bytes`` ordering but keeps full precision, for the
    tolerance-based cross-platform comparison.
    """
    # doppler_shift excluded — see features_to_bytes for the rationale
    # (peak-normalization argmax instability across CPU microarchitectures).
    arrays = [
        features.amplitude_mean,
        features.amplitude_variance,
        features.phase_difference,
        features.correlation_matrix,
        features.power_spectral_density,
    ]
    return np.concatenate(
        [np.asarray(a, dtype=np.float64).ravel() for a in arrays]
    )
 def compute_pipeline_hash(data_path, verbose=False):
    """Run the full pipeline and compute the SHA-256 hash of all features.
@@ -320,7 +237,6 @@ def compute_pipeline_hash(data_path, verbose=False):
    features_count = 0
    total_feature_bytes = 0
    last_features = None
    feature_vectors = []
    doppler_nonzero_count = 0
    doppler_shape = None
    psd_shape = None
@@ -337,7 +253,6 @@ def compute_pipeline_hash(data_path, verbose=False):
        if features is not None:
            feature_bytes = features_to_bytes(features)
            hasher.update(feature_bytes)
            feature_vectors.append(features_to_vector(features))
            features_count += 1
            total_feature_bytes += len(feature_bytes)
            last_features = features
@@ -406,11 +321,7 @@ def compute_pipeline_hash(data_path, verbose=False):
        "psd_shape": psd_shape,
    }
-    reference_vector = (
+    return hasher.hexdigest(), stats
        np.concatenate(feature_vectors) if feature_vectors else np.array([], dtype=np.float64)
    )
    return hasher.hexdigest(), reference_vector, stats
 def audit_codebase(base_dir=None):
@@ -526,7 +437,7 @@ def main():
    print("    This runs the SAME CSIProcessor.preprocess_csi_data() and")
    print("    CSIProcessor.extract_features() used in production.")
    print()
-    computed_hash, computed_vector, stats = compute_pipeline_hash(data_path, verbose=args.verbose)
+    computed_hash, stats = compute_pipeline_hash(data_path, verbose=args.verbose)
    # ---------------------------------------------------------------
    # Step 3: Hash comparison
@@ -538,11 +449,8 @@ def main():
        with open(hash_path, "w") as f:
            f.write(computed_hash + "\n")
        print(f"    Wrote expected hash to {hash_path}")
        ref_path = os.path.join(SCRIPT_DIR, REFERENCE_VECTOR_FILENAME)
        np.savez_compressed(ref_path, features=computed_vector)
        print(f"    Wrote reference vector ({computed_vector.size} values) to {ref_path}")
        print()
-        print("  HASH + REFERENCE GENERATED -- run without --generate-hash to verify.")
+        print("  HASH GENERATED -- run without --generate-hash to verify.")
        print("=" * 72)
        return
@@ -561,70 +469,13 @@ def main():
    print(f"    Expected: {expected_hash}")
-    hash_match = computed_hash == expected_hash
+    if computed_hash == expected_hash:
-
+        match_status = "MATCH"
    # Cross-platform fallback: if the bit-exact hash differs (different CPU
    # microarchitecture reorders the pocketfft/BLAS reductions), accept the run
    # when the raw feature vector matches the committed reference within a
    # relative tolerance — platform-independent where the hash is not (#560).
    tolerance_match = False
    max_abs_dev = None
    max_rel_dev = None
    ref_path = os.path.join(SCRIPT_DIR, REFERENCE_VECTOR_FILENAME)
    if not hash_match and os.path.exists(ref_path):
        ref_vec = np.load(ref_path)["features"]
        if ref_vec.shape == computed_vector.shape:
            tolerance_match = bool(
                np.allclose(
                    computed_vector, ref_vec, rtol=TOLERANCE_RTOL, atol=TOLERANCE_ATOL
                )
            )
            diff = np.abs(computed_vector - ref_vec)
            max_abs_dev = float(np.max(diff)) if diff.size else 0.0
            max_rel_dev = (
                float(np.max(diff / np.maximum(np.abs(ref_vec), 1e-12)))
                if diff.size
                else 0.0
            )
    if hash_match:
        match_status = "MATCH (bit-exact)"
    elif tolerance_match:
        match_status = f"TOLERANCE MATCH (max rel dev {max_rel_dev:.2e})"
    else:
        match_status = "MISMATCH"
    print(f"    Status:   {match_status}")
    print()
    if not hash_match and max_abs_dev is not None:
        block_sizes = [56, 56, 55, 9, 128]  # per-frame feature layout (doppler excluded)
        block_names = ["amp_mean", "amp_var", "phase_diff", "corr", "psd"]
        frame_len = sum(block_sizes)
        tol = TOLERANCE_ATOL + TOLERANCE_RTOL * np.abs(ref_vec)
        outside = diff > tol
        n_out = int(outside.sum())
        print(
            f"    DIVERGENCE: {n_out}/{computed_vector.size} outside tol "
            f"({100.0 * n_out / computed_vector.size:.4f}%)  "
            f"max|d|={max_abs_dev:.3e} maxrel={max_rel_dev:.3e}"
        )
        if n_out:
            wf = np.where(outside)[0] % frame_len
            bounds = np.cumsum([0] + block_sizes)
            parts = []
            for bi, name in enumerate(block_names):
                c = int(((wf >= bounds[bi]) & (wf < bounds[bi + 1])).sum())
                if c:
                    parts.append(f"{name}={c}")
            print(f"    by feature: {', '.join(parts)}")
            for w in np.argsort(diff)[::-1][:4]:
                b = int(np.searchsorted(bounds, int(w) % frame_len, side="right")) - 1
                print(
                    f"      worst idx {int(w)} ({block_names[b]}): "
                    f"ref={ref_vec[int(w)]:.6g} got={computed_vector[int(w)]:.6g}"
                )
        print()
    # ---------------------------------------------------------------
    # Step 4: Audit (if requested or always in full mode)
    # ---------------------------------------------------------------
@@ -647,22 +498,14 @@ def main():
    # Final verdict
    # ---------------------------------------------------------------
    print("=" * 72)
-    if hash_match or tolerance_match:
+    if computed_hash == expected_hash:
        print("  VERDICT: PASS")
        print()
-        if hash_match:
+        print("  The pipeline produced a SHA-256 hash that matches the published")
-            print("  The pipeline produced a SHA-256 hash that matches the published")
+        print("  expected hash. This proves:")
            print("  expected hash (bit-exact). This proves:")
        else:
            print("  The bit-exact hash differs (CPU-microarchitecture FP reordering),")
            print("  but the raw feature vector matches the published reference within")
            print(
                f"  rtol={TOLERANCE_RTOL:g} / atol={TOLERANCE_ATOL:g} "
                f"(max rel dev {max_rel_dev:.2e}). This proves:"
            )
        print("    1. The SAME signal processing code ran on the reference signal")
        print("    2. The output is DETERMINISTIC (same input -> same output)")
-        print("    3. No randomness was introduced")
+        print("    3. No randomness was introduced (hash would differ)")
        print("    4. The code path includes: noise removal, Hamming windowing,")
        print("       amplitude normalization, FFT-based Doppler extraction,")
        print("       and power spectral density computation")
@@ -673,19 +516,14 @@ def main():
    else:
        print("  VERDICT: FAIL")
        print()
-        print("  The pipeline output does NOT match the expected hash OR the")
+        print("  The pipeline output does NOT match the expected hash.")
        print("  reference feature vector within tolerance.")
        if max_rel_dev is not None:
            print(
                f"    max abs dev: {max_abs_dev:.3e}   max rel dev: {max_rel_dev:.3e}"
                f"   (rtol={TOLERANCE_RTOL:g}, atol={TOLERANCE_ATOL:g})"
            )
        print()
        print("  Possible causes:")
        print("    - Numpy/scipy version mismatch (check requirements)")
        print("    - Code change in CSI processor that alters numerical output")
-        print("    - A real (non-microarch) numerical regression")
+        print("    - Platform floating-point differences (unlikely for IEEE 754)")
        print()
-        print("  To update after an intentional change:")
+        print("  To update the expected hash after intentional changes:")
        print("    python verify.py --generate-hash")
        print("=" * 72)
        sys.exit(1)
@@ -6,14 +6,8 @@
 #
 # To update: change versions, run `python v1/data/proof/verify.py --generate-hash`,
 # then commit the new expected_features.sha256.
 #
 # numpy/scipy track the versions the *published* expected hash
 # (expected_features.sha256 = ca58956c…) was generated with — modern numpy 2.x,
 # i.e. what a fresh `pip install numpy` and the proof-of-capabilities.md skeptic
 # path produce today. The old 1.26.4 pin no longer matched that hash and made
 # the determinism gate fail against its own published proof.
-numpy==2.4.2
+numpy==1.26.4
-scipy==1.17.1
+scipy==1.14.1
 pydantic==2.10.4
 pydantic-settings==2.7.1
@@ -26,12 +26,7 @@ class Settings(BaseSettings):
    workers: int = Field(default=1, description="Number of worker processes")
    # Security settings
-    secret_key: str = Field(
+    secret_key: str = Field(..., description="Secret key for JWT tokens")
        default="dev-not-secret-CHANGE-IN-PROD",
        description="Secret key for JWT tokens (production deployments "
                    "MUST override via SECRET_KEY env or .env; the dev "
                    "default is rejected by validate_production_config)",
    )
    jwt_algorithm: str = Field(default="HS256", description="JWT algorithm")
    jwt_expire_hours: int = Field(default=24, description="JWT token expiration in hours")
    allowed_hosts: List[str] = Field(default=["*"], description="Allowed hosts")
@@ -163,14 +158,7 @@ class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
-        case_sensitive=False,
+        case_sensitive=False
        # Tolerate `.env` keys that this Settings model doesn't declare
        # (e.g., NPM_TOKEN, DOCKER_HUB_TOKEN, PYPI_TOKEN used by other
        # tooling). Without `extra="ignore"` pydantic-settings 2.x
        # raises `ValidationError: Extra inputs are not permitted` and
        # leaks the offending values into the error message — a real
        # security concern for secret tokens. See verify.py / `./verify`.
        extra="ignore",
    )
    @field_validator("environment")
@@ -143,35 +143,13 @@ class ESP32BinaryParser:
        12      4     Sequence number (LE u32)
        16      1     RSSI (i8)
        17      1     Noise floor (i8)
-        18      1     PPDU type (ADR-110): 0=HT/legacy, 1=HE-SU, 2=HE-MU,
+        18      2     Reserved
                       3=HE-TB, 0xFF=unknown. Pre-ADR-110 firmware sends 0.
        19      1     Flags (ADR-110): bit 0 = bw40, bit 2 = STBC,
                       bit 3 = LDPC, bit 4 = cross-node sync valid
                       (set by either c6_timesync OR c6_sync_espnow
                       since v0.7.0 — ADR-110 §A0.13).
        20      N*2   I/Q pairs (n_antennas * n_subcarriers * 2 bytes, signed i8)
    Sibling packet (ADR-110 §A0.12, firmware v0.6.9+): the node also
    emits a 32-byte UDP sync packet (magic 0xC511A110) every
    CONFIG_C6_SYNC_EVERY_N_FRAMES frames on the same UDP socket.
    See parse_sync_packet() / SyncPacket below.
    """
    MAGIC = 0xC5110001
    HEADER_SIZE = 20
-    # ADR-110: previously '<IBBHIIBB2x' (last 2 bytes skipped as reserved).
+    HEADER_FMT = '<IBBHIIBB2x'  # magic, node_id, n_ant, n_sc, freq, seq, rssi, noise
    # Now read those 2 bytes as PPDU type + flags. Pre-ADR-110 firmware
    # sends zeros, which decode as 'HT/legacy' + 'no flags' — fully
    # backwards compatible.
    HEADER_FMT = '<IBBHIIBBBB'  # +2 bytes: ppdu_type, flags
    # ADR-110 PPDU type byte values
    PPDU_HT_LEGACY = 0
    PPDU_HE_SU = 1
    PPDU_HE_MU = 2
    PPDU_HE_TB = 3
    PPDU_UNKNOWN = 0xFF
    _PPDU_NAMES = {0: 'ht_legacy', 1: 'he_su', 2: 'he_mu', 3: 'he_tb', 0xFF: 'unknown'}
    def parse(self, raw_data: bytes) -> CSIData:
        """Parse an ADR-018 binary frame into CSIData.
@@ -190,8 +168,8 @@ class ESP32BinaryParser:
                f"Frame too short: need {self.HEADER_SIZE} bytes, got {len(raw_data)}"
            )
-        magic, node_id, n_antennas, n_subcarriers, freq_mhz, sequence, rssi_u8, noise_u8, \
+        magic, node_id, n_antennas, n_subcarriers, freq_mhz, sequence, rssi_u8, noise_u8 = \
-            ppdu_byte, flags_byte = struct.unpack_from(self.HEADER_FMT, raw_data, 0)
+            struct.unpack_from(self.HEADER_FMT, raw_data, 0)
        if magic != self.MAGIC:
            raise CSIParseError(
@@ -221,15 +199,11 @@ class ESP32BinaryParser:
        snr = float(rssi - noise_floor)
        frequency = float(freq_mhz) * 1e6
        bandwidth = 20e6  # default; could infer from n_subcarriers
-        # Bandwidth inference (issue #1005): HE-LTF uses a 4x denser tone
+        if n_subcarriers <= 56:
        # grid than HT-LTF on the same channel width — an HE-SU frame with
        # 256 bins (242 active HE20 tones) is a *20 MHz* capture, not 160.
        if ppdu_byte in (1, 2, 3):  # HE-SU / HE-MU / HE-TB
            bandwidth = 40e6 if (flags_byte & 0x01) or n_subcarriers > 256 else 20e6
        elif n_subcarriers <= 64:  # ESP32 HT20 delivers the full 64-bin FFT
            bandwidth = 20e6
-        elif n_subcarriers <= 128:
+        elif n_subcarriers <= 114:
            bandwidth = 40e6
        elif n_subcarriers <= 242:
            bandwidth = 80e6
@@ -252,128 +226,10 @@ class ESP32BinaryParser:
                'rssi_dbm': rssi,
                'noise_floor_dbm': noise_floor,
                'channel_freq_mhz': freq_mhz,
                # ADR-110 extension — zeros from pre-ADR-110 firmware land here as
                # 'ht_legacy' + all-flags-false. New consumers can branch on
                # ppdu_type / he_capable for HE-LTF-aware DSP.
                'ppdu_type': self._PPDU_NAMES.get(ppdu_byte, 'unknown'),
                'ppdu_type_raw': ppdu_byte,
                'he_capable': ppdu_byte in (1, 2, 3),
                'bw40': bool(flags_byte & 0x01),
                'stbc': bool(flags_byte & 0x04),
                'ldpc': bool(flags_byte & 0x08),
                'ieee802154_sync_valid': bool(flags_byte & 0x10),
                'adr018_flags_raw': flags_byte,
            }
        )
@dataclass
 class SyncPacket:
    """ADR-110 §A0.12 sync packet (firmware v0.6.9+, magic 0xC511A110).
    Emitted on the same UDP socket as CSI frames every
    CONFIG_C6_SYNC_EVERY_N_FRAMES frames. Carries the mesh-aligned
    epoch for the node alongside the high-water CSI sequence number,
    so the host aggregator can pair (node_id, sequence) across the two
    packet streams and recover a mesh-aligned timestamp for every CSI
    frame. See WITNESS-LOG-110 §A0.12 for the live verification.
    """
    node_id: int
    proto_ver: int
    is_leader: bool
    is_valid: bool
    smoothed_used: bool
    local_us: int       # u64 — node's local esp_timer_get_time()
    epoch_us: int       # u64 — local + EMA-smoothed offset (mesh time)
    sequence: int       # u32 — high-water CSI sequence at emit time
    flags_raw: int
    def local_minus_epoch_us(self) -> int:
        """Signed local-vs-mesh clock offset in µs.
        Negative when this node's clock is behind the leader's (typical
        for followers). Equal to ≈0 on the leader (modulo call-stack µs).
        Matches Rust's `SyncPacket::local_minus_epoch_us` byte-for-byte.
        """
        return self.local_us - self.epoch_us
    def apply_to_local(self, local_at_frame_us: int) -> int:
        """Recover a mesh-aligned timestamp for any node-local µs snapshot.
        Math (see WITNESS-LOG-110 §A0.10 / §A0.12):
            offset = epoch_us - local_us           (signed; this packet)
            mesh   = local_at_frame_us + offset
        Identical contract to Rust's `SyncPacket::apply_to_local`.
        Identity at `local_at_frame_us == self.local_us` returns `epoch_us`.
        """
        offset = self.epoch_us - self.local_us
        return local_at_frame_us + offset
    def mesh_aligned_us_for_sequence(self, frame_seq: int, fps_hz: float) -> int:
        """ADR-110 §A0.12 — recover the mesh-aligned timestamp for an
        in-flight CSI frame by its sequence number.
        Pairs the frame's sequence number against this sync packet's
        sequence high-water + an assumed/measured CSI rate. Matches the
        Rust implementation byte-for-byte at the integer level (Python
        rounds via `int()` truncation; for the canonical bench values
        this is exact).
        """
        if fps_hz <= 0:
            raise ValueError(f"fps_hz must be positive, got {fps_hz}")
        # Wrap to handle u32 sequence overflow the same way Rust does.
        dframes = (frame_seq - self.sequence) & 0xFFFFFFFF
        if dframes >= 0x80000000:
            dframes -= 0x1_0000_0000
        dus = int(dframes * 1_000_000 / fps_hz)
        local_at = self.local_us + dus
        return self.apply_to_local(local_at)
 class SyncPacketParser:
    """Parser for ADR-110 §A0.12 32-byte sync packets.
    Distinguished from CSI frames by the leading magic. Callers should
    dispatch incoming UDP datagrams based on the first 4 bytes:
        magic = struct.unpack_from('<I', data, 0)[0]
        if magic == ESP32BinaryParser.MAGIC:    # 0xC5110001 — CSI frame
            ...
        elif magic == SyncPacketParser.MAGIC:   # 0xC511A110 — sync packet
            ...
    """
    MAGIC = 0xC511A110
    SIZE = 32
    # <IBBBB QQ IB3x>
    # I=magic, B=node_id, B=proto_ver, B=flags, B=reserved,
    # Q=local_us, Q=epoch_us, I=sequence, B+3x=reserved
    HEADER_FMT = '<IBBBBQQI4x'
    @classmethod
    def parse(cls, raw_data: bytes) -> SyncPacket:
        if len(raw_data) < cls.SIZE:
            raise CSIParseError(
                f"Sync packet too short: {len(raw_data)} bytes, need {cls.SIZE}"
            )
        magic, node_id, proto_ver, flags_byte, _, local_us, epoch_us, seq = \
            struct.unpack_from(cls.HEADER_FMT, raw_data, 0)
        if magic != cls.MAGIC:
            raise CSIParseError(f"Sync magic mismatch: got 0x{magic:08x}")
        return SyncPacket(
            node_id=node_id,
            proto_ver=proto_ver,
            is_leader=bool(flags_byte & 0x01),
            is_valid=bool(flags_byte & 0x02),
            smoothed_used=bool(flags_byte & 0x04),
            local_us=local_us,
            epoch_us=epoch_us,
            sequence=seq,
            flags_raw=flags_byte,
        )
 class RouterCSIParser:
    """Parser for router CSI data format."""
@@ -9,7 +9,6 @@ from datetime import datetime, timedelta
 from fastapi import Request, Response, HTTPException, status
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from starlette.middleware.base import BaseHTTPMiddleware
 from jose import JWTError, jwt
 from passlib.context import CryptContext
@@ -156,17 +155,16 @@ class UserManager:
        return False
-class AuthenticationMiddleware(BaseHTTPMiddleware):
+class AuthenticationMiddleware:
    """Authentication middleware for FastAPI."""
-
+    
-    def __init__(self, app, settings: Settings):
+    def __init__(self, settings: Settings):
        super().__init__(app)
        self.settings = settings
        self.token_manager = TokenManager(settings)
        self.user_manager = UserManager()
        self.enabled = settings.enable_authentication
-
+    
-    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+    async def __call__(self, request: Request, call_next: Callable) -> Response:
        """Process request through authentication middleware."""
        start_time = time.time()
@@ -11,7 +11,6 @@ from collections import defaultdict, deque
 from dataclasses import dataclass
 from fastapi import Request, Response, HTTPException, status
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.types import ASGIApp
 from src.config.settings import Settings
@@ -300,16 +299,15 @@ class RateLimiter:
        }
-class RateLimitMiddleware(BaseHTTPMiddleware):
+class RateLimitMiddleware:
    """Rate limiting middleware for FastAPI."""
-
+    
-    def __init__(self, app, settings: Settings):
+    def __init__(self, settings: Settings):
        super().__init__(app)
        self.settings = settings
        self.rate_limiter = RateLimiter(settings)
        self.enabled = settings.enable_rate_limiting
-
+    
-    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+    async def __call__(self, request: Request, call_next: Callable) -> Response:
        """Process request through rate limiting middleware."""
        if not self.enabled:
            return await call_next(request)
@@ -107,25 +107,16 @@ class PoseService:
    async def _initialize_models(self):
        """Initialize neural network models."""
        try:
-            # Initialize DensePose model. DensePoseHead requires a config
+            # Initialize DensePose model
            # dict — input_channels matches the modality translator's output
            # (256), with the standard DensePose 24 body parts and 2 (U,V)
            # coordinates. (Previously called with no args → TypeError at
            # startup, which broke the API service.)
            densepose_config = {
                'input_channels': 256,
                'num_body_parts': 24,
                'num_uv_coordinates': 2,
            }
            if self.settings.pose_model_path:
-                self.densepose_model = DensePoseHead(densepose_config)
+                self.densepose_model = DensePoseHead()
                # Load model weights if path is provided
                # model_state = torch.load(self.settings.pose_model_path)
                # self.densepose_model.load_state_dict(model_state)
                self.logger.info("DensePose model loaded")
            else:
                self.logger.warning("No pose model path provided, using default model")
-                self.densepose_model = DensePoseHead(densepose_config)
+                self.densepose_model = DensePoseHead()
            # Initialize modality translation
            config = {
@@ -229,11 +220,7 @@ class PoseService:
                # Apply phase sanitization if we have phase data
                if hasattr(detection_result.features, 'phase_difference'):
                    phase_data = detection_result.features.phase_difference
-                    # PhaseSanitizer's full-pipeline method is sanitize_phase,
+                    sanitized_phase = self.phase_sanitizer.sanitize(phase_data)
                    # not sanitize (issue #612). The shorter name was an
                    # AttributeError waiting to fire on any code path that
                    # reaches this branch.
                    sanitized_phase = self.phase_sanitizer.sanitize_phase(phase_data)
                    # Combine amplitude and phase data
                    return np.concatenate([amplitude_data, sanitized_phase])
@@ -19,16 +19,11 @@ from hardware.csi_extractor import (
    CSIExtractor,
    CSIParseError,
    CSIExtractionError,
    SyncPacket,
    SyncPacketParser,
 )
 # ADR-018 constants
 MAGIC = 0xC5110001
-# ADR-110: bytes 18-19 are now PPDU type + flags (used to be `2x` reserved).
+HEADER_FMT = '<IBBHIIBB2x'
 # Pre-ADR-110 firmware sends zeros for both, which round-trip as
 # ('ht_legacy', flags=all-false) — fully backwards compatible.
 HEADER_FMT = '<IBBHIIBBBB'
 HEADER_SIZE = 20
@@ -41,8 +36,6 @@ def build_binary_frame(
    rssi: int = -50,
    noise_floor: int = -90,
    iq_pairs: list = None,
    ppdu_byte: int = 0,   # ADR-110: default 0 = HT/legacy (pre-ADR-110 behavior)
    flags_byte: int = 0,  # ADR-110: default 0 = no flags set
 ) -> bytes:
    """Build an ADR-018 binary frame for testing."""
    if iq_pairs is None:
@@ -61,8 +54,6 @@ def build_binary_frame(
        sequence,
        rssi_u8,
        noise_u8,
        ppdu_byte,
        flags_byte,
    )
    iq_data = b''
@@ -72,52 +63,6 @@ def build_binary_frame(
    return header + iq_data
 class TestAdr110ByteEncoding:
    """ADR-110: byte 18 = PPDU type, byte 19 = flags."""
    def setup_method(self):
        self.parser = ESP32BinaryParser()
    def test_pre_adr110_zeros_decode_as_ht_legacy(self):
        """Pre-ADR-110 firmware sends zeros → must surface as HT/legacy + no flags."""
        frame = build_binary_frame()  # ppdu_byte=0, flags_byte=0 default
        csi = self.parser.parse(frame)
        assert csi.metadata['ppdu_type'] == 'ht_legacy'
        assert csi.metadata['ppdu_type_raw'] == 0
        assert csi.metadata['he_capable'] is False
        assert csi.metadata['bw40'] is False
        assert csi.metadata['stbc'] is False
        assert csi.metadata['ldpc'] is False
        assert csi.metadata['ieee802154_sync_valid'] is False
    def test_he_su_decodes(self):
        frame = build_binary_frame(ppdu_byte=1)
        csi = self.parser.parse(frame)
        assert csi.metadata['ppdu_type'] == 'he_su'
        assert csi.metadata['he_capable'] is True
    def test_he_mu_and_he_tb_decode(self):
        for byte, expected in [(2, 'he_mu'), (3, 'he_tb')]:
            csi = self.parser.parse(build_binary_frame(ppdu_byte=byte))
            assert csi.metadata['ppdu_type'] == expected
            assert csi.metadata['he_capable'] is True
    def test_unknown_ppdu_byte(self):
        csi = self.parser.parse(build_binary_frame(ppdu_byte=0xFF))
        assert csi.metadata['ppdu_type'] == 'unknown'
        assert csi.metadata['ppdu_type_raw'] == 0xFF
        assert csi.metadata['he_capable'] is False
    def test_all_flags_set_round_trip(self):
        # bw40 (0x01) + STBC (0x04) + LDPC (0x08) + 15.4-sync (0x10) = 0x1D
        csi = self.parser.parse(build_binary_frame(ppdu_byte=1, flags_byte=0x1D))
        assert csi.metadata['bw40'] is True
        assert csi.metadata['stbc'] is True
        assert csi.metadata['ldpc'] is True
        assert csi.metadata['ieee802154_sync_valid'] is True
        assert csi.metadata['adr018_flags_raw'] == 0x1D
 class TestESP32BinaryParser:
    """Tests for ESP32BinaryParser."""
@@ -259,172 +204,3 @@ class TestESP32BinaryParser:
            await extractor.disconnect()
        asyncio.run(run_test())
 # ============================================================================
 # ADR-110 §A0.12 — SyncPacket / SyncPacketParser tests (firmware v0.6.9+)
 # ============================================================================
 SYNC_MAGIC = 0xC511A110
 SYNC_SIZE = 32
 SYNC_FMT = '<IBBBBQQI4x'
 def build_sync_packet(
    node_id: int = 9,
    proto_ver: int = 1,
    is_leader: bool = False,
    is_valid: bool = True,
    smoothed_used: bool = True,
    local_us: int = 28798450,
    epoch_us: int = 27634885,
    sequence: int = 20,
 ) -> bytes:
    flags = 0
    if is_leader:     flags |= 0x01
    if is_valid:      flags |= 0x02
    if smoothed_used: flags |= 0x04
    return struct.pack(
        SYNC_FMT,
        SYNC_MAGIC,
        node_id, proto_ver, flags, 0,
        local_us, epoch_us, sequence,
    )
 class TestSyncPacketParser:
    """ADR-110 §A0.12: 32-byte UDP sync packet (magic 0xC511A110)."""
    def test_follower_typical_packet_roundtrips(self):
        """Match the COM9-witnessed sync-pkt #1 byte-for-byte."""
        raw = build_sync_packet(
            node_id=9, is_leader=False, is_valid=True, smoothed_used=True,
            local_us=28798450, epoch_us=27634885, sequence=20,
        )
        assert len(raw) == SYNC_SIZE
        pkt = SyncPacketParser.parse(raw)
        assert isinstance(pkt, SyncPacket)
        assert pkt.node_id == 9
        assert pkt.proto_ver == 1
        assert pkt.is_leader is False
        assert pkt.is_valid is True
        assert pkt.smoothed_used is True
        assert pkt.local_us == 28798450
        assert pkt.epoch_us == 27634885
        assert pkt.sequence == 20
        # The 1.16-second boot delta from §A0.10 should be recoverable
        assert pkt.local_us - pkt.epoch_us == 1163565
    def test_leader_packet_has_local_close_to_epoch(self):
        """COM12 (leader) had flags=0x03 and epoch ≈ local."""
        raw = build_sync_packet(
            node_id=12, is_leader=True, is_valid=True, smoothed_used=False,
            local_us=28864932, epoch_us=28864939, sequence=20,
        )
        pkt = SyncPacketParser.parse(raw)
        assert pkt.node_id == 12
        assert pkt.is_leader is True
        assert pkt.is_valid is True
        assert pkt.smoothed_used is False
        assert pkt.flags_raw == 0x03
        assert pkt.local_us - pkt.epoch_us == -7  # leader has zero offset
    def test_magic_mismatch_raises(self):
        """A non-sync datagram must not silently decode."""
        raw = bytearray(build_sync_packet())
        raw[0] = 0x01  # corrupt magic low byte
        with pytest.raises(CSIParseError, match="magic mismatch"):
            SyncPacketParser.parse(bytes(raw))
    def test_short_packet_raises(self):
        """Below 32 bytes must error early, not silently truncate."""
        raw = build_sync_packet()[:16]
        with pytest.raises(CSIParseError, match="too short"):
            SyncPacketParser.parse(raw)
    def test_all_flag_combinations(self):
        """Each flag bit decodes independently."""
        for is_leader in (False, True):
            for is_valid in (False, True):
                for smoothed_used in (False, True):
                    raw = build_sync_packet(
                        is_leader=is_leader,
                        is_valid=is_valid,
                        smoothed_used=smoothed_used,
                    )
                    pkt = SyncPacketParser.parse(raw)
                    assert pkt.is_leader == is_leader
                    assert pkt.is_valid == is_valid
                    assert pkt.smoothed_used == smoothed_used
    def test_dispatch_distinguishes_csi_from_sync(self):
        """A host can pick CSI vs sync by leading magic."""
        csi_magic = struct.unpack_from('<I', build_binary_frame(), 0)[0]
        sync_magic = struct.unpack_from('<I', build_sync_packet(), 0)[0]
        assert csi_magic == ESP32BinaryParser.MAGIC
        assert sync_magic == SyncPacketParser.MAGIC
        assert csi_magic != sync_magic
    def test_apply_to_local_recovers_epoch_at_sync_point(self):
        """ADR-110 iter 26 — Python parity with Rust's `apply_to_local`.
        At local_at_frame == sync.local_us, the recovered mesh time must
        equal sync.epoch_us exactly."""
        pkt = SyncPacketParser.parse(build_sync_packet(
            local_us=28_798_450, epoch_us=27_634_885, sequence=20,
        ))
        assert pkt.apply_to_local(pkt.local_us) == pkt.epoch_us
        assert pkt.local_minus_epoch_us() == 1_163_565  # §A0.10's bench number
    def test_apply_to_local_preserves_inter_frame_delta(self):
        """A frame arriving 5 s after the sync packet on the follower's
        local clock must produce a mesh time exactly 5 s after sync.epoch_us."""
        pkt = SyncPacketParser.parse(build_sync_packet(
            local_us=28_798_450, epoch_us=27_634_885, sequence=20,
        ))
        local_at_frame = pkt.local_us + 5_000_000
        assert pkt.apply_to_local(local_at_frame) == pkt.epoch_us + 5_000_000
    def test_mesh_aligned_us_for_sequence_matches_rust(self):
        """Cross-language parity with Rust's
        `end_to_end_sync_decode_then_frame_mesh_recovery` test —
        100 frames after sync.sequence at 20 fps = sync.epoch_us + 5 s."""
        pkt = SyncPacketParser.parse(build_sync_packet(
            local_us=28_798_450, epoch_us=27_634_885, sequence=20,
        ))
        mesh = pkt.mesh_aligned_us_for_sequence(120, 20.0)
        assert mesh == pkt.epoch_us + 5_000_000
        # Both paths (apply_to_local + interpolation) must agree
        local_at = pkt.local_us + 5_000_000
        assert pkt.apply_to_local(local_at) == mesh
    def test_canonical_wire_bytes_match_rust_decoder(self):
        """ADR-110 iter 21 — cross-language wire-format conformance gate.
        These exact bytes also appear pinned in the Rust hardware crate's
        `canonical_wire_bytes_match_python_decoder` test (same field
        values, encoded by Rust's `SyncPacket::to_bytes`). If Python's
        hardcoded hex stops matching what Rust produces from the equivalent
        SyncPacket struct, ONE of the decoders has drifted from the wire.
        Canonical packet: COM9 sync-pkt #1 from §A0.12 live capture.
        """
        canonical = bytes.fromhex(
            "10a111c509010600"   # magic LE + node=9 + ver=1 + flags=0x06 + reserved
            "f26db70100000000"   # local_us = 28_798_450 (LE u64)
            "c5aca50100000000"   # epoch_us = 27_634_885 (LE u64)
            "1400000000000000"   # sequence = 20 (LE u32) + 4 reserved bytes
        )
        assert len(canonical) == SyncPacketParser.SIZE == 32
        pkt = SyncPacketParser.parse(canonical)
        assert pkt.node_id == 9
        assert pkt.proto_ver == 1
        assert pkt.flags_raw == 0x06
        assert pkt.is_leader is False
        assert pkt.is_valid is True
        assert pkt.smoothed_used is True
        assert pkt.local_us == 28_798_450
        assert pkt.epoch_us == 27_634_885
        assert pkt.sequence == 20
        # Recovered offset matches §A0.10's measured 1.16-second boot delta.
        assert pkt.local_us - pkt.epoch_us == 1_163_565
@@ -1,137 +0,0 @@
 # Edge-Latency Benchmark Results — ADR-163
 Converting **CLAIMED** edge latency budgets into **MEASURED-on-host** numbers,
 closing the measurement debt flagged by Milestones 5/6 (ADR-159 / ADR-160).
 Benches + docs only — **no production-code behavior changed**.
 ## The honest caveat, up front (read before citing any number)
 Two distinct gaps separate every number below from the figure it is converting:
 1. **Host ≠ ESP32.** The wasm-edge skill modules document budgets *"on ESP32-S3
   WASM3"* (e.g. `exo_time_crystal`: "H (<10 ms)"). These benches run **native
   x86_64 on a development laptop**, not the Xtensa/WASM3 target. A native host
   median is an **upper bound on the algorithm's work**, not the ESP32 number.
   WASM3 interpretation on a ~240 MHz Xtensa core is typically 1–2 orders of
   magnitude slower than native `-O` host code, so a host median far under the
   budget **does NOT prove the ESP32 meets it.** *The ESP32 figure is NOT
   reproduced here — it needs hardware.*
 2. **Bench ≠ the doc-claimed measurement.** For the cogs, the manifest cites a
   **cold-start** number (`cold_start_ms_avg`, weight-load included); these
   benches measure **steady-state** per-frame `infer` (warm, weights resident).
   Different measurements; we report both, labelled.
 Grades (per `benchmarks/wiflow-std/RESULTS.md` / ADR-152 vocabulary):
 - **MEASURED-on-host** — reproduced in this repo on the machine below, exact
  command recorded. NOT the ESP32 / NOT the cold-start figure.
 - **CLAIMED (ESP32)** — the doc budget; UNMEASURED on hardware here.
 ## Machine
 | | |
 |---|---|
 | Host | `ruvzen` (Windows 11, this dev box) |
 | CPU | Intel Core Ultra 9 285H |
 | Toolchain | `cargo 1.91.1`, `--release` (opt-level per crate profile) |
 | Bench harness | criterion 0.5 (`time: [low **median** high]` reported below) |
 | Date | 2026-06-12 |
 Run-to-run spread on this box is non-trivial (criterion's low/high bracket the
 median by a few %); the medians below are single-session captures with the smoke
 settings `--warm-up-time 1 --measurement-time 2` (wasm-edge) / `3` (cogs). Re-run
 for your own machine — the absolute numbers are host-specific.
 ---
 ## T1 — wasm-edge `process_frame` hot paths (ADR-160 deferred item → DONE host)
 The crate is **excluded from the v2 workspace**; bench from the crate dir.
 ```bash
 cd v2/crates/wifi-densepose-wasm-edge
 cargo bench --features std -- --warm-up-time 1 --measurement-time 2
 # med_seizure_detect is medical-experimental-gated:
 cargo bench --features std,medical-experimental -- --warm-up-time 1 --measurement-time 2 med_seizure
 ```
 | Hot path (M6-audit-named) | Bench id | Host median | Grade | Doc budget (CLAIMED, ESP32) |
 |---|---|---|---|---|
 | `exo_time_crystal` 256-pt × 128-lag autocorrelation (full buffer) | `exo_time_crystal::process_frame[autocorr_256x128]` | **17.3 µs** | MEASURED-on-host | "H (<10 ms) on ESP32-S3 WASM3" — **NOT reproduced here (needs hardware)** |
 | `exo_ghost_hunter` empty-room periodicity + hidden-breathing | `exo_ghost_hunter::process_frame[empty_room_periodicity]` | **1.44 µs** | MEASURED-on-host | research/exotic; no firm ESP32 figure — host proxy only |
 | `sec_weapon_detect` per-subcarrier Welford (MAX_SC=32) | `sec_weapon_detect::process_frame[per_sc_welford]` | **0.42 µs** (420 ns) | MEASURED-on-host | research-grade; calibration-gated — host proxy only |
 | `med_seizure_detect` clonic-phase rhythm path (steady-state frame) | `med_seizure_detect::process_frame[clonic_rhythm]` | **0.10 µs** (105 ns) | MEASURED-on-host (feature-gated) | doc budget "S (<5 ms) on ESP32"; **NOT reproduced here** |
 Reading these honestly:
 - `exo_time_crystal` at **17.3 µs host** is the only one whose host cost is even
  in the same *thousandths* of its 10 ms ESP32 budget — it does the most work
  (~32K MACs/frame). 17.3 µs native says the algorithm is cheap; it says
  **nothing** about whether WASM3-on-Xtensa lands under 10 ms. A naïve
  host→ESP32 extrapolation (assume 100× interpreter+clock penalty) would put it
  near ~1.7 ms, comfortably under — **but that is an extrapolation, not a
  measurement**, and is recorded here only to show the host number is not
  obviously in tension with the budget. ESP32 figure: **UNMEASURED**.
 - `med_seizure_detect`'s 105 ns is the **steady-state** per-frame cost; the
  expensive clonic autocorrelation only fires when the state machine is in the
  clonic phase, so this is a lower-bound on the heavy path, not the worst case.
  It is still a real, committed host datapoint.
 - The pre-existing `tests/budget_compliance.rs` already asserts the L/S/H
  wall-clock tiers (25 passing tests); these criterion benches add the
  regression-grade, reproducible median that ADR-160 deferred.
 ---
 ## T2 — cog steady-state inference latency (ADR-159/160 deferred item → DONE)
 Cog crates are normal workspace members; bench from `v2/`. Real weights
 (`count_v1.safetensors` / `pose_v1.safetensors`) ship in-repo under each cog's
 `cog/artifacts/`, so the bench measures the **real Candle CPU forward**, not the
 stub (the bench `assert!`s `backend().starts_with("candle-")`).
 ```bash
 cd v2
 cargo bench -p cog-person-count  --no-default-features --bench infer_bench -- --warm-up-time 1 --measurement-time 3
 cargo bench -p cog-pose-estimation --no-default-features --bench infer_bench -- --warm-up-time 1 --measurement-time 3
 ```
 | Cog | Bench id | Host median (steady-state infer, CPU) | Grade | Manifest cold-start (CLAIMED, different measurement + machine) |
 |---|---|---|---|---|
 | cog-person-count | `cog_person_count::infer[cpu_real_weights_steady_state]` | **305 µs** (idle box) | MEASURED-on-host | — (person-count manifest carries comparable provenance) |
 | cog-pose-estimation | `cog_pose_estimation::infer[cpu_real_weights_steady_state]` | **305 µs** (idle box) | MEASURED-on-host | `cold_start_ms_avg: 5.4` (30 invocations, **ruvultra/RTX 5080 host**, candle 0.9 cpu) — **cold-start, NOT steady-state; NOT this machine** |
 > Spread caveat (observed, honest): both medians above were captured with the box
 > otherwise idle. A re-run of the validate-form command *while a second cargo job
 > was loading the same cores* gave 385 µs (person-count) / 973 µs (pose) —
 > the criterion low/high bracket widens to ~0.34–1.18 ms under contention. The
 > 305 µs figures are the idle-box datapoints; the absolute number is host- and
 > load-dependent (the ~10× pose swing is core contention, not a code change).
 Reading these honestly:
 - **Steady-state ≠ cold-start.** The pose manifest's `5.4 ms` folds in one-time
  weight load / mmap / first-forward allocation. This bench warms the engine
  first and times only the recurring per-frame forward, on a *different
  machine*. The two numbers are not comparable and we do not claim this bench
  reproduces the 5.4 ms manifest figure.
 - Both cogs share the same conv encoder; person-count adds a count head +
  confidence head, pose adds a 256-wide MLP head. The host steady-state cost is
  dominated by the three dilated Conv1d layers (56→64→128→128) shared by both —
  which is why both land at ~305 µs.
 - **Empirical confirmation of the steady-state/cold-start gap:** pose
  steady-state (305 µs host) is ~18× *under* the manifest's 5.4 ms cold-start.
  Even accounting for the different machine, this is the expected shape — the
  bulk of cold-start is one-time setup, not the forward pass — and it is exactly
  why conflating the two would be dishonest.
 ---
 ## Status vs the deferred items
 | Deferred item | Was | Now |
 |---|---|---|
 | ADR-160 "Criterion benches for `process_frame` budget claims" | ACCEPTED-FUTURE | **DONE (host)**; ESP32-on-hardware still **PENDING** (needs the wasm32 target + a flashed ESP32-S3) |
 | ADR-159/160 cog inference latency (`cold_start_ms_avg` uncommitted-benched) | CLAIMED | **MEASURED-on-host (steady-state)**; cold-start-on-ruvultra remains the manifest's separate claim |
 Nothing here changes runtime behavior — these are benches + this results file
 only. No crate needs republishing.
@@ -1,132 +0,0 @@
 # Edge-Skill Synthetic-Ground-Truth Validation — RESULTS
 **Crate:** `v2/crates/wifi-densepose-wasm-edge` (workspace-EXCLUDED — build from its own dir)
 **Branch:** `feat/edge-skills-synthetic-validation`
 **ADR:** [ADR-160](../../docs/adr/ADR-160-edge-skill-library-honest-labeling.md)
 **Date:** 2026-06-13
 **Harness:** `tests/synthetic_validation.rs`
 > **HONESTY BOUNDARY — read first.** Everything below is **synthetic-ground-truth
 > validation**: a signal is *planted* with a known answer, the **real** detector
 > is run, and detection accuracy / precision / recall / rate-error is **measured**.
 > This is **NOT field accuracy.** A skill that recovers a planted sinusoid here is
 > proven to do the math it claims on a *constructed* signal; it is **NOT** proven
 > to work on real CSI in a real room. Skills whose detection target cannot be
 > honestly planted (clinical, weapon, affect, sleep-stage, sign-language) are
 > **NOT** given a number — they are listed under **DATA-GATED** with the real
 > data each would require.
 ## Reproduce
 ```bash
 cd v2/crates/wifi-densepose-wasm-edge   # workspace-excluded; build here
 cargo test --features std --test synthetic_validation -- --nocapture
 # also runs under the medical tier (med_* skills stay DATA-GATED, not validated):
 cargo test --features std,medical-experimental --test synthetic_validation -- --nocapture
 ```
 Each `MEASURED-on-synthetic | …` line printed by the harness is the source of the
 table below. Numbers are deterministic (no RNG; pseudo-noise uses a fixed LCG seed).
 ---
 ## MEASURED-on-synthetic (constructible skills)
 | Skill | What was planted (ground truth) | Result | Grade |
 |-------|----------------------------------|--------|-------|
 | **vital_trend** | BPM held N≥6 calls at each threshold band (brady/tachy-pnea <12 / >25, brady/tachy-cardia <50 / >120, apnea breathing<1.0 for ≥20) vs normal | **acc 1.000, prec 1.000, recall 1.000** (TP5 FP0 TN5 FN0) | MEASURED |
 | **exo_time_crystal** | period-2 coordinated motion vs pseudo-noise + flat | **acc 1.000** (TP1 FP0 TN2 FN0) | MEASURED † |
 | **exo_ghost_hunter** (hidden breathing) | phase sinusoid at lag-8 (breathing band 5–15) in an empty room vs flat phase | **acc 1.000**; planted score **1.000**, flat **0.000** | MEASURED |
 | **occupancy** | 220-frame flat-amplitude calibration, then strong per-zone amplitude variance vs flat | **acc 1.000** (TP1 FP0 TN1 FN0) | MEASURED |
 | **intrusion** | calibrate→arm (330 quiet frames), then per-subcarrier Δphase>1.5 + Δamp≫3σ vs quiet | **acc 1.000** (TP1 FP0 TN1 FN0) | MEASURED |
 | **exo_rain_detect** | empty room, 60-frame baseline, then broadband variance (8/8 groups, ratio≫2.5) for ≥10 frames vs stable-low | **acc 1.000** (TP1 FP0 TN1 FN0) | MEASURED |
 | **sig_flash_attention** | sustained high phase+amplitude in each of the 8 subcarrier groups; assert reported attention peak == planted group | **peak-localization 8/8 = 1.000** | MEASURED |
 | **spt_spiking_tracker** | sparse (2-subcarrier) large phase-delta in each of the 4 zones; assert tracked zone == planted zone | **zone-localization 4/4 = 1.000** | MEASURED ‡ |
 | **sig_optimal_transport** | sustained large frame-to-frame amplitude-distribution change vs stationary | **acc 1.000** (TP1 FP0 TN1 FN0) | MEASURED |
 | **sig_mincut_person_match** | 2 persons with distinct stable per-region variance signatures over 40 frames | **person ids assigned, 0 id-swaps / 40 frames** | MEASURED |
 | **lrn_dtw_gesture_learn** | stillness → 3 identical gesture rehearsals → enrollment | **template enrolled (templates=1)** | MEASURED (enroll) §|
 | **sig_sparse_recovery** | 30 clean frames to init, then 8/32 (25%) nulled subcarriers | **dropout-detect + recovery-trigger = PASS** | MEASURED (trigger) ¶|
 ### Caveats on individual results
 † **exo_time_crystal — honest discriminative limit.** A *pure* periodic signal
 already has autocorrelation peaks at lag L **and** 2L (natural harmonics), so this
 "period-doubling" detector cannot separate a true period-2 sub-harmonic from a
 plain periodic signal — an earlier plant using a clean sine produced a *false
 positive* (recorded during development). The construct it **can** discriminate
 with known ground truth is **periodic-coordination vs aperiodic** (noise/flat),
 which is what is measured (1.000). The original "sub-harmonic vs clean period"
 claim is **NOT** validatable with this algorithm.
 ‡ **spt_spiking_tracker — plant must be sparse.** With weights init'd home=1.0 /
 cross=0.25, firing all 8 inputs in a zone (8×0.25=2.0 > threshold 1.0) overdrives
 *every* output neuron and the tracker collapses to zone 0 (measured 1/4 during
 development). Firing only 2 inputs (home 2.0 fires, cross 0.5 silent) yields clean
 4/4 zone localization. The validatable claim is *single-zone* localization.
 § **lrn_dtw_gesture_learn — enrollment validated; replay-match NOT.** The
 deterministic, constructible part (stillness → 3 identical rehearsals → a template
 is enrolled) is MEASURED. The DTW *replay match* (731) did **not** fire on the
 identical replay in this run (`match_same=false`) — replay-recognition accuracy is
 **reported, not asserted**, and is not claimed as validated.
 ¶ **sig_sparse_recovery — trigger validated; recovery accuracy is NEGATIVE.**
 The dropout-detection + ISTA-recovery *trigger* pipeline fires correctly on >10%
 planted nulls (asserted). But the **measured recovery accuracy is NOT a win**:
 recovered RMSE **1.0045** vs unrecovered-null RMSE **0.9830** (**−2.2%**, i.e.
 slightly *worse* than leaving the nulls at zero) on a neighbor-correlated signal.
 The tridiagonal correlation model's fixed point does not equal the planted truth.
 **The recovery's reconstruction quality is therefore NOT validated as effective on
 synthetic data** — only its detection/trigger path is. Reported honestly; no
 positive number claimed.
 ---
 ## DATA-GATED — NOT validatable on synthetic data
 Planting a "seizure-like" / "weapon-like" / "happy-like" synthetic signal and
 claiming the detector "works" validates **nothing real** and is exactly the
 AI-slop this project fights. These skills run real DSP (per ADR-160, 0 stubs) and
 keep their ADR-160 disclaimers, but get **no accuracy number** here. Each needs
 the specific real, labelled data listed:
 | Skill | Why not constructible on synthetic | Real data required |
 |-------|------------------------------------|--------------------|
 | `med_seizure_detect` | "seizure-like" motion is not a seizure; no ground-truth signature exists synthetically | Clinical EEG-/video-labelled tonic-clonic seizure CSI from instrumented patients |
 | `med_sleep_apnea` | a planted breathing-pause is not clinical apnea (AHI scoring, hypopnea, desaturation) | Polysomnography-labelled (PSG) overnight CSI with scored apnea/hypopnea events |
 | `med_cardiac_arrhythmia` | a synthetic HR sequence cannot encode true arrhythmia morphology | ECG-labelled CSI (AFib/PVC/etc.) from clinical monitoring |
 | `med_respiratory_distress` | distress is a clinical gestalt, not a plantable rate | Clinician-labelled respiratory-distress CSI episodes |
 | `med_gait_analysis` | clinical gait metrics need a reference motion-capture standard | Mocap-/force-plate-labelled gait CSI |
 | `sec_weapon_detect` | a high variance ratio is RF reflectivity, **not** weapon discrimination (ADR-160 §A3 already renamed the event to `HIGH_METAL_REFLECTIVITY`) | Labelled metal-object-vs-no-object CSI with controlled object classes |
 | `exo_emotion_detect` | affect is not recoverable from a planted heuristic; outputs are proxies (ADR-160 §A2) | Validated affect-labelled CSI (self-report / physiological ground truth) |
 | `exo_happiness_score` | "happiness" is a gait-energy proxy, not a measured affect (ADR-160 §A2) | Validated affect/valence-labelled CSI |
 | `exo_dream_stage` | sleep staging needs PSG reference (EEG/EOG/EMG) | PSG-staged overnight CSI |
 | `exo_gesture_language` | coarse gesture clusters ≠ true sign language (ADR-160 §A4) | Labelled ASL letter/word CSI dataset |
 > The above are **not failures** — they are the honest boundary. A smaller set of
 > genuinely-measured skills plus this explicit gated list is the deliverable, per
 > the prove-everything directive.
 ---
 ## Skills not in either list
 The remaining edge skills (smart-building / retail / industrial occupancy-style,
 the other `sig_*`/`lrn_*`/`spt_*`/`tmp_*`/`qnt_*`/`aut_*`/`ais_*` algorithm-named
 modules) are **wired and exercised live** in the unified pipeline integration test
 (`tests/pipeline_all.rs`, all 59 default / 64 medical skills run without panic over
 300 synthetic frames) but were **not** given an individual planted-ground-truth
 accuracy number here. They are honest REAL-DSP modules (ADR-160) whose physical
 observable could be planted with more harness work; that is deferred, not claimed.
 ## Test counts (full crate suite)
 ```
 DEFAULT  (--features std):                     631 passed, 0 failed
  (lib 504; budget 25; honest_labeling 10; pipeline_all 4; synthetic_validation 12; bench 1; vendor 75)
 MEDICAL  (--features std,medical-experimental): 669 passed, 0 failed
  (lib 542; +16 same new tests; med_* stay DATA-GATED, not validated)
 ```
 (M6 baseline was 615 / 653; the new pipeline_all (4) + synthetic_validation (12)
 tests add 16 to each tier.)
@@ -1,26 +0,0 @@
 # Upstream clone (WiFlow-STD, DY2434) -- never commit third-party code/weights
 upstream/
 # Local python env
 .venv/
 # Downloaded data / artifacts
 data/
 downloads/
 *.pth
 *.pt
 *.npy
 *.npz
 *.zip
 *.mat
 *.safetensors
 results/parity_fixture.json
 __pycache__/
 *.onnx
 # Committed ground truth: corruption masks for the pristine Kaggle download.
 # remote/clean_v2.py zeroes the corrupted source windows IN PLACE, so these
 # masks CANNOT be regenerated from a cleaned copy (generate_corruption_masks.py
 # documents the criteria and reproduces them only from a fresh download).
 !results/nan_windows_mask.npy
 !results/big_windows_mask.npy
@@ -1,486 +0,0 @@
 # WiFlow-STD (DY2434) Benchmark Results — ADR-152 §2.2
 Upstream: <https://github.com/DY2434/WiFlow-WiFi-Pose-Estimation-with-Spatio-Temporal-Decoupling>
 pinned at `06899d29` (2026-04-05), Apache-2.0. Dataset: Kaggle `kaka2434/wiflow-dataset`
 (12.8 GB archive → 15.5 GB extracted; 360,000 windows of 540×20 CSI + 15-keypoint 2D labels).
 Published claims (README "Setting 1"): PCK@20 97.25%, PCK@30 98.63%, PCK@40 99.16%,
 PCK@50 99.48%, MPJPE 0.007 m, 2.23M params, 0.07 GFLOPs.
 ## Measurement (a): their model on their data
 ### Artifact verification (MEASURED, 2026-06-10, this repo `eval_repro.py`)
 | Check | Result |
 |---|---|
 | Parameter count | **2,225,042 (2.23M) — matches claim** |
 | FLOPs (torch profiler, batch 1) | ~0.055 GFLOPs — consistent with 0.07B claim |
 | CPU latency (Windows box, torch 2.12 CPU) | 13.2 ms/window @ batch 1 (76/s); 2.48 ms/sample @ batch 64 (403/s) |
 | Checkpoint load | `weights_only=True` (no pickle code execution) |
 ### Released checkpoint does NOT reproduce the claims — REFUTED as shipped
 Running the released `best_pose_model.pth` through the released code on the released
 dataset with the released split procedure (seed-42 file-level 70/15/15; 54,000 test
 samples) yields:
 | Metric | Published | Measured (shipped checkpoint) |
 |---|---|---|
 | PCK@20 | 97.25% | **0.08%** |
 | PCK@30 | 98.63% | 0.78% |
 | PCK@40 | 99.16% | 5.53% |
 | PCK@50 | 99.48% | 15.42% |
 | MPJPE | 0.007 | **NaN** (dataset contains NaN CSI windows) |
 Raw output: `results/repro_a.json`.
 Diagnostics (on 2,000 NaN-free windows from the first files of the dataset, i.e.
 mostly would-be *training* data — so this is not a split mismatch):
 - Predictions correlate with targets (Pearson r ≈ 0.76) — the checkpoint is a trained
  model, but in a **different keypoint normalization/order** than the released data.
 - Best-case post-hoc global per-axis affine correction: PCK@20 ≈ 20%.
 - Best-case per-keypoint affine correction (15×2 fitted transforms — generous
  cheating): PCK@20 ≈ 72%, still far below 97.25%.
 - Pred↔target keypoint correspondence matrix is degenerate (multiple predicted
  keypoints best-match the same target joint) — keypoint convention mismatch.
 ### Reproducibility defects in the released artifacts
 1. `models/__init__.py` imports `TemporalConvNet`, which `models/tcn.py` does not
   define — **the published code does not import/run as-is**.
 2. The released root checkpoint uses pre-rename module names (`att.*`, `final_conv.*`)
   vs the published code (`attention.*`, `decoder.*`) — same shapes/param count, but
   confirms the checkpoint predates the published code.
 3. The second shipped checkpoint (`cross_dataset_test/WiFlow/best_pose_model.pth`) is
   a **different architecture** (342-channel input = MM-Fi layout, 3 TCN layers,
   3-channel/3D decoder) — not usable on their own dataset.
 4. `run.py` ignores `--data_dir` and hardcodes `../preprocessed_csi_data`.
 5. The released dataset's final 13 files (indices 487–499; 9,072 windows, 2.52%)
   are corrupted: NaN values plus garbage amplitudes up to 3.4e38 (float32 max) in
   data that is otherwise [0,1]-normalized. Upstream code has no NaN/inf handling;
   training as published on this download diverges — the first corrupted batch
   overflows fp16 autocast and permanently poisons BatchNorm running statistics
   (GradScaler step-skipping does not protect BN). The authors' training curves
   show normal convergence, so their local data evidently differed from the
   Kaggle upload. Window masks: `results/nan_windows_mask.npy`,
   `results/big_windows_mask.npy`.
 ### Reproducing the corruption masks
 The two mask files (9,070 NaN/Inf windows, 9,072 with |amplitude| > 1.5;
 union 9,072, all in dataset files 487–499) are **committed ground truth**
 (gitignore-negated, ~352 KB each). They can only be regenerated from a
 **pristine** Kaggle download: `remote/clean_v2.py` repairs the dataset by
 zeroing the corrupted windows in place, after which the corruption evidence
 is gone and a rescan returns all-False. `generate_corruption_masks.py`
 re-derives them (chunked scan, criteria: any non-finite value OR
 max |finite| > 1.5 per 540×20 window) and refuses to write all-False masks,
 which indicate a cleaned copy. Verified 2026-06-11: a regeneration from the
 local pristine download is bit-identical to the committed masks.
 ### Retraining result (MEASURED, 2026-06-10): claims APPROXIMATELY REPRODUCED
 Since the shipped checkpoint is unusable, measurement (a) fell back to retraining
 with upstream code + defaults (seed 42, batch 64, early-stopped at epoch 41 of 50,
 best epoch 36, ~75 s/epoch) on ruvultra (RTX 5080). Deviations, all forced and
 documented: one-line fix for defect (1); torch 2.x+cu128 instead of pinned 2.3.1
 (Blackwell sm_120 unsupported); the 9,072 corrupted windows (defect 5) zeroed
 entirely — without this the published pipeline produces NaN from epoch 1 (observed).
 Scripts mirrored in `remote/`; raw metrics in `results/eval_retrained.json`.
 | Metric | Published | Retrained (full test, 54,000) | Retrained (corruption-free, 52,560) |
 |---|---|---|---|
 | PCK@20 | 97.25% | **96.09%** | **96.61%** |
 | PCK@30 | 98.63% | 97.89% | 98.23% |
 | PCK@40 | 99.16% | 98.58% | 98.79% |
 | PCK@50 | 99.48% | 98.99% | 99.11% |
 | MPJPE | 0.007 | 0.0098 | 0.0094 |
 Within ~0.6–1.2 PCK points of every published figure (single run, corrupted train
 windows zeroed, different torch/GPU). **Verdict: the accuracy claims are credible
 and approximately reproducible — but only after repairing the released dataset and
 code.** Val best: PCK@20 96.99%, MPJPE 0.0086 (epoch 36).
 One more defect found during the run:
 6. `train.py` calls `plot_training_history`, which is not defined anywhere — the
   built-in post-training test evaluation is unreachable as published (crashes
   with NameError after training completes).
 ## ADR-152 §2.2 citation rule
 Evidence grade for the WiFlow-STD accuracy claims after measurement (a):
 **MEASURED-EQUIVALENT (96.1–96.6% PCK@20 reproduced by retraining; shipped
 checkpoint REFUTED; dataset/code require repairs)**. RuView docs may cite
 "~96% PCK@20 (our reproduction)" — still **not comparable** to our 17-keypoint
 ESP32 numbers (different hardware, 5 subjects, in-domain random split,
 15 keypoints).
 ## Edge optimization (measured)
 ADR-152 "optimize beyond SOTA" track, 2026-06-10, this Windows box (Windows 11,
 16 torch threads, torch 2.12.0+cpu, onnxruntime 1.26.0). Subject: the retrained
 checkpoint `results/retrained_best_pose_model.pth` (2,225,042 fp32 params).
 Scripts: `quantize_bench.py`, `onnx_bench.py`, `eval_ort_accuracy.py`.
 Raw numbers: `results/edge_optimization.json`.
 Accuracy is on a **10,000-window seed-42 random subset** of the corruption-free
 test split (same seed-42 file-level 70/15/15 split as `eval_repro.py`; 54,000
 test windows, 1,440 corrupted excluded via `results/nan_windows_mask.npy` |
 `results/big_windows_mask.npy`, leaving 52,560; subset drawn with
 `np.random.default_rng(42)`). The fp32 subset PCK@20 (96.68%) matches the full
 clean-test figure (96.61%), so the subset is representative.
 Latency is CPU ms/window, median of repeated runs, 3 interleaved repetitions
 per variant (medians below; run-to-run spread on this box is large, roughly
 ±20-40% at batch 1 — reps are in the JSON).
 | Variant | Disk size | Batch 1 (ms/win) | Batch 64 (ms/win) | PCK@20 | PCK@50 | MPJPE |
 |---|---|---|---|---|---|---|
 | torch fp32 (baseline) | 9.07 MB | 11.0 | 2.27 | 96.68% | 99.15% | 0.00936 |
 | torch fp16 (`.half()`) | **4.58 MB** | 24.3 | 2.42 | 96.68% | 99.15% | 0.00946 |
 | torch int8 dynamic | 9.07 MB (unchanged) | 15.6 | 2.06 | 96.68% (identical) | 99.15% | 0.00936 |
 | ONNX fp32 (onnxruntime) | 8.97 MB | **3.2** | **2.0** | 96.68% | 99.15% | 0.00936 |
 | ONNX int8 (ORT dynamic, supplementary) | **2.44 MB** | 6.5 | 5.8 | 96.52% | 99.15% | 0.01108 |
 Findings:
 - **torch dynamic INT8 quantizes nothing on this model.** The architecture has
  **zero `nn.Linear` layers** — it is entirely Conv1d (21) + Conv2d (22) +
  BatchNorm. `torch.ao.quantization.quantize_dynamic` (requested over
  `{Linear, Conv1d, Conv2d}`) converted **0 modules / 0.0% of params**: dynamic
  quantization only has kernels for Linear/RNN-family modules and silently
  skips convolutions. The "int8" model is bit-identical to fp32 (same outputs,
  same 9.07 MB). Conv quantization would require static (PTQ) quantization
  with calibration — out of scope here; the ORT dynamic path below is the
  honest int8 datapoint.
 - **fp16 halves size for free accuracy-wise** (PCK@20 −0.005 pt, MPJPE
  +0.0001) but is *slower* on CPU at batch 1 (~2.2×) — torch CPU fp16 conv
  kernels are emulated. fp16 is a storage/transport format here, not a CPU
  runtime win.
 - **ONNX Runtime is the real batch-1 latency win: ~3.4× faster than torch**
  (3.2 vs 11.0 ms/window) at identical accuracy (parity 2.4e-7).
 ### Verdict on the paper's "~2.2 MB int8" claim
 **Plausible but not free, and unreachable by the obvious PyTorch route.**
 2,225,042 params × 1 byte ≈ 2.2 MB assumes *every* parameter quantizes.
 PyTorch dynamic quantization — the one-liner most readers would reach for —
 yields **9.07 MB (0% quantized)** because the model has no Linear layers.
 ONNX Runtime dynamic quantization, which does have int8 conv weight support,
 gets **2.44 MB** (close to the claim; the overhead is BatchNorm params/buffers
 and quantization scales kept in fp32) at a measurable accuracy cost:
 PCK@20 96.68 → 96.52% (−0.16 pt) and MPJPE 0.00936 → 0.01108 (+18%), and
 ~2× slower inference than ONNX fp32 (ConvInteger kernels). The paper does not
 state a method or an int8 accuracy; treat "2.2 MB" as a weight-arithmetic
 estimate, achievable in practice only via conv-capable quantization toolchains
 and with a small accuracy penalty.
 ### ONNX export status
 **Works.** Exported via the TorchScript exporter (`dynamo=False`), opset 17,
 with a dynamic batch axis — `results/retrained_fp32_dynamic.onnx` (8.97 MB),
 verified to run at batch 1/2/64. The axial attention's
 `view(N*W, C, H)` reshape traced correctly (sizes recorded as graph ops, not
 baked constants). The dynamo exporter also captures the graph but crashed on
 this box writing a ✅ to a cp1252 console (cosmetic Windows encoding issue, not
 a model blocker). Parity vs torch on the stored fixture
 (`results/parity_fixture.npz`, batch 2, seed 42): **max abs diff 2.4e-7 —
 PASS** (< 1e-4). ORT-quantized int8 model: `results/retrained_int8_ort_dynamic.onnx`.
 ### Static PTQ (calibrated) — follow-up
 Follow-up to the dynamic-int8 row above (2026-06-10, same box, onnxruntime
 1.26.0): ONNX Runtime **static** post-training quantization
 (`quantize_static`, QDQ format, per-channel int8 weights + int8 activations)
 of the same fp32 export, calibrated on **corruption-free TRAINING-split
 windows only** (seed-42 file-level split, same masks; 1,000 windows for
 MinMax, 512 for the histogram calibrators; never test windows). Scopes:
 "conv-only" (`op_types_to_quantize=["Conv"]` — the attention path exports as
 Einsum/Softmax, which ORT never quantizes anyway, so "all-ops" additionally
 quantizes the elementwise Mul/Sigmoid/Add/AveragePool glue). Accuracy on the
 identical 10k-window seed-42 corruption-free test subset; latency median of
 3 interleaved reps (fp32/dynamic re-benched in-session as references).
 Script: `static_ptq_bench.py`; raw: `results/edge_optimization.json`
 (`onnx_static_ptq`).
 | Variant | Disk size | Batch 1 (ms/win) | Batch 64 (ms/win) | PCK@20 | PCK@50 | MPJPE |
 |---|---|---|---|---|---|---|
 | ONNX fp32 (reference) | 8.97 MB | 2.5 | 1.9 | 96.68% | 99.15% | 0.00936 |
 | ORT dynamic int8 (baseline) | **2.44 MB** | 5.7 | 4.6 | 96.52% | 99.15% | 0.01108 |
 | static QDQ **Percentile(99.99) conv-only** | 2.53 MB | 5.3 | 4.7 | 96.61% | 99.16% | **0.01031** |
 | static QDQ MinMax conv-only | 2.53 MB | 5.2 | 3.3 | **96.63%** | 99.19% | 0.01084 |
 | static QDQ Entropy conv-only | 2.53 MB | 5.2 | 3.1 | 96.60% | 99.19% | 0.01078 |
 | static QDQ MinMax all-ops | 2.60 MB | 6.5 | 3.9 | 95.45% | 99.14% | 0.01486 |
 | static QDQ Entropy all-ops | 2.60 MB | 5.7 | 4.1 | 95.30% | 99.13% | 0.01510 |
 | static QDQ Percentile all-ops | 2.60 MB | 5.3 | 4.3 | 96.39% | 99.17% | 0.01218 |
 **Verdict: static PTQ (conv-only) is the new best int8 point on accuracy —
 but only modestly, and it does not fix int8's latency penalty.**
 - **Accuracy: beats dynamic.** All three conv-only calibrations land at
  PCK@20 96.60–96.63% (vs dynamic 96.52%, fp32 96.68% — recovers ~⅔ of the
  dynamic gap) and MPJPE 0.0103–0.0108 (vs dynamic 0.01108). Best MPJPE:
  Percentile conv-only, +10% over fp32 instead of dynamic's +18%.
 - **Size: slightly worse.** 2.53 MB vs 2.44 MB (+3.6%) — QDQ nodes and
  per-channel scales cost a little; BatchNorm stays fp32 in both (the 12 BNs
  follow Slice/Einsum/Reshape, never Conv, so they cannot be folded).
 - **Latency: a wash vs dynamic, still ~2× slower than ONNX fp32 at batch 1.**
  Batch-1 medians 5.2–5.3 vs dynamic 5.7 ms/win in-session — within this
  box's ±20–40% noise. Batch 64 leans static (3.1–3.3 for MinMax/Entropy
  conv-only vs 4.6), same caveat.
 - **All-ops QDQ is strictly worse**: up to −1.4 pt PCK@20 and +60% MPJPE for
  zero size/latency benefit — int8 activations through the elementwise glue
  around the attention blocks is where the damage is. Conv-only is the right
  scope.
 - Negative result worth recording: **Entropy calibration is a no-op here** —
  on an identical calibration set it selects full-range thresholds
  bit-identical to MinMax (all 247 scales equal; verified on a 64-window
  smoke set). Also, ORT 1.26's `CalibMaxIntermediateOutputs` raises a
  spurious "No data is collected" when the batch count divides the chunk
  size (worked around in the script).
 Deployment guidance: need speed → ONNX fp32 (3.2 ms b1). Need int8 weights
 for size → static QDQ conv-only (Percentile or MinMax,
 `results/retrained_int8_static_percentile_conv.onnx`), which strictly
 dominates dynamic int8 on accuracy at ~equal latency and +0.09 MB.
 ## Efficiency sweep (MEASURED, overnight 2026-06-10/11)
 ADR-152 beyond-SOTA track: compact purpose-built variants of the WiFlow-STD
 architecture, trained from scratch on the same cleaned dataset, identical
 seed-42 file-level split, loss and protocol as the measurement-(a) reference
 (fp32, batch 64, ≤50 epochs, patience 5; RTX 5080, ~22–29 min/variant).
 Variant transforms are pure channel/group/stride scalings of an
 architecture-exact parameterized model (validated: reproduces 2,225,042 params
 at the reference config). Scripts: `remote/sweep/`; raw:
 `results/efficiency_sweep.jsonl`; checkpoints `results/{half,quarter,tiny}_best.pth`
 (gitignored).
 | Variant | Params | vs 2.23M | Clean-test PCK@20 | PCK@50 | MPJPE | Best epoch |
 |---|---|---|---|---|---|---|
 | full (reference, meas. a) | 2,225,042 | 1× | 96.61% | 99.11% | 0.0094 | 36 |
 | **half** | **843,834** | **0.38×** | **96.62%** | **99.47%** | **0.00898** | 23 |
 | quarter | 338,600 | 0.15× | 96.05% | 99.43% | 0.00928 | 50 |
 | tiny | 56,290 | 0.025× | 94.11% | 99.36% | 0.0125 | 47 |
 Findings:
 - **The half model (843k params) strictly dominates the full reference** on
  this dataset — equal PCK@20, better PCK@50 and MPJPE, converges in fewer
  epochs. The published 2.23M architecture is over-parameterized for its own
  benchmark.
 - **tiny (56k params, 1/39.5) holds 94.11% PCK@20** — a ~220 KB fp32 /
  ~60 KB int8-class model in reach of severely constrained edge targets,
  at −2.5 pt from the full reference.
 - Caveats: in-domain (5-subject random-file split) like every number on this
  dataset; single run per variant; corruption-free test subset (52,560).
  Cross-domain behavior of compact variants is untested — ADR-150's evidence
  says capacity *hurts* cross-subject, so the compact end may generalize no
  worse, but that is a hypothesis, not a measurement.
 ### Compact-variant edge artifacts (MEASURED, 2026-06-11)
 Edge pipeline for the **tiny** checkpoint (56,290 params), same machinery and
 protocol as the full-model edge rows above (this Windows box, torch
 2.12.0+cpu, onnxruntime 1.26.0; dynamic-batch opset-17 TorchScript export;
 static QDQ **Percentile(99.99) conv-only** int8 calibrated on **512**
 corruption-free TRAIN-split windows; accuracy on the identical 10k-window
 seed-42 clean test subset; latency = median ms/window over 3 interleaved
 reps, with the full-model fp32/int8 sessions interleaved as same-session
 references). Script: `tiny_edge_bench.py`; raw:
 `results/edge_optimization.json` (`tiny_variant`). Torch-vs-ORT parity on the
 stored fixture input: **max abs diff 1.5e-7 — PASS** (< 1e-4). The tiny fp32
 subset PCK@20 (94.11%) matches the full clean-test sweep figure (94.11%)
 exactly, so the subset remains representative.
 Two forced deviations, both recorded in the JSON:
 1. **Adaptive-pool export rewrite.** tiny's derived stride schedule
   `[2,1,1,1]` leaves feature width 16, and the TorchScript exporter rejects
   `AdaptiveAvgPool2d((15,1))` when 15 is not a factor of the input height
   (the full model never hit this — its width was exactly 15). Since the
   pool over a fixed-size map is a fixed linear operator, the export wrapper
   replaces it with `mean(-1)` (W axis, a factor) + a constant averaging
   matmul using PyTorch's exact bin rule; the parity check (vs the original
   torch model with the real pool) proves exactness.
 2. **Calibration count 512, not "~500"**: ORT 1.26's histogram collector
   `np.asarray()`'s the per-batch maxima, so the calibration count must be a
   multiple of the 64-window calibration batch or the ragged last batch
   crashes it (the earlier static-PTQ run dodged this by using exactly 512).
 | Variant | Disk size | Batch 1 (ms/win) | Batch 64 (ms/win) | PCK@20 | PCK@50 | MPJPE |
 |---|---|---|---|---|---|---|
 | full ONNX fp32 (same-session ref) | 8.97 MB | 2.27 | 1.42 | 96.68% | 99.15% | 0.00936 |
 | full static QDQ Percentile conv-only (same-session ref) | 2.53 MB | 5.53 | 3.82 | 96.61% | 99.16% | 0.01031 |
 | **tiny ONNX fp32** | **0.295 MB** | **0.66** | **0.24** | **94.11%** | 99.37% | 0.01253 |
 | tiny static QDQ Percentile conv-only | 0.248 MB | 0.85 | 1.03 | 92.68% | 99.33% | 0.01491 |
 (tiny torch `.pth` checkpoint for reference: 0.34 MB on disk; 56,290 fp32
 params ≈ 225 KB of weights.)
 Findings:
 - **The smallest deployable WiFlow-class model is the tiny ONNX fp32
  artifact: ~295 KB on disk, 0.66 ms/window batch-1 CPU (~1,500 windows/s),
  94.1% PCK@20** — 30× smaller and ~3.4× faster (in-session) than the full
  ONNX fp32 model for −2.6 pt PCK@20.
 - **int8 is a bad trade at this scale.** Static QDQ conv-only — the recipe
  that cost the full model only 0.07 pt — costs tiny **−1.43 pt** PCK@20
  (94.11 → 92.68%) and +19% MPJPE, saves only 47 KB (−16%; QDQ scales and
  the fp32 BN/attention glue are proportionally larger in a small graph),
  and is *slower* than tiny fp32 (0.85 vs 0.66 ms b1; 1.03 vs 0.24 ms b64 —
  QDQ kernel overhead dominates when the convs are this small). A 56k-param
  model has little redundancy left to absorb weight+activation rounding.
 - Deployment guidance, compact edition: ship tiny as **ONNX fp32** — at
  295 KB the int8 size saving solves no real constraint and costs accuracy
  and speed. If ~250 KB vs ~295 KB ever matters, weight-only quantization
  would be the thing to try next, not QDQ.
 ## Measurement (b): BLOCKED-ON-DATA (attempted 2026-06-10)
 The fine-tune-on-ESP32 measurement stopped at dataset characterization, per the
 pre-registered stop rule (<2,000 paired windows). Findings (MEASURED):
 - **Only one trainable paired dataset exists**: `ruvultra:~/work/cog-pose-train/paired.jsonl`
  — 1,077 windows (one subject, one room, one 29.9-min session, single node;
  CSI [56, 20]; 17 COCO keypoints, MediaPipe confidence mean 0.44 — only 264
  windows pass ADR-079's own conf>0.5 training filter). Prior measured attempts
  on this exact set: 0–3% torso-PCK@20 (temporal splits, three independent
  pipelines). Fine-tuning a 2.23M-param model on ~860 train windows would
  measure memorization, not transfer.
 - **The April session behind the old "92.9% PCK@20" claim is lost** (345
  samples, 35 subcarriers; raw CSI gone from ruvzen/ruvultra/cognitum-v0; only
  a 69-sample predictions+GT holdout survives at `models/wiflow-real/eval-holdout.jsonl`).
 - **Forensic recheck of that holdout RETRACTS the 92.9% figure**: the trainer's
  `pck()` used an absolute 0.2 image-unit threshold (not torso-normalized) and
  the model output a **constant pose** (pred std 0.0000 across 69 near-static
  frames; a mean predictor scores 100% under the same protocol). The
  torso-normalized PCK@20 on the same holdout is 19.1%. This corroborates the
  2026-05-11 audit retraction (CHANGELOG, PR #535); stale doc citations were
  removed 2026-06-10 (user-guide, readme-details, ADR-152 §2.1.3). The §2.2
  no-citation rule now applies to ADR-079 accuracy claims.
 Unblock criteria: a paired collection session of ≥2k windows (≈35+ min at the
 observed stride; multi-pose, conf>0.5, ideally with the §2.1.3 two-checkerboard
 calibration), plus a re-baselined our-pipeline number under torso-PCK@20 on the
 same split. WiFlow-STD assets stand ready on ruvultra (`~/wiflow-std-bench/`).
 Also worth investigating: ADR-079's protocol predicts ~9k windows per 30 min;
 the May session under-delivered ~8× (aligner drop rate?).
 ## Measurement (b) (MEASURED 2026-06-10/11)
 The data baseline unblocked: the 2026-06-10 22:10–22:40 collection session produced
 **2,046 paired windows** (`ruvultra:~/wiflow-std-bench/paired-20260610.jsonl`; ONE
 subject, ONE room, ONE ESP32 node, varied poses: walk/raise/squat/kick/wave/turn/
 jump/sit; aligner `scripts/align-ground-truth.js`, non-overlapping 20-frame windows
 ~0.42 s; 17 COCO keypoints in normalized [0,1] camera coords; MediaPipe confidence
 mean 0.802, min 0.692 — all windows pass the conf>0.5 filter). The −4 h timestamp
 bug and the empty-frame confidence-dilution aligner findings are recorded
 separately; results only here. Trained on ruvultra (RTX 5080, torch 2.11+cu128,
 fp32, batch 32, GPU shared with the efficiency sweep). Scripts mirrored in
 `remote/measb/`; raw metrics + full training curves in `results/measurement_b.json`.
 ### Two new aligner/dataset findings (forced deviations, MEASURED)
 1. **`csi_shape` is heterogeneous, not [70, 20]**: 1,347× [70,20], 284× [134,20],
   243× [26,20], 130× [12,20], 42× [20,20]. The ESP32 stream emits mixed frame
   types and `extractCsiMatrix` stamps each window's subcarrier count from
   `window[0].subcarriers`, zero-padding/truncating the other frames — even
   native-70 windows contain ~20.4% internally zero-padded short frames
   (subcarriers 40–69 all-zero). Handling: the primary suite ("all 2,046")
   linearly resamples every frame's subcarrier axis to 70 bins (identity for
   native-70 frames) so the pre-registered n and split sizes hold; a secondary
   suite restricts to the 1,347 native [70,20] windows as a homogeneity check.
 2. **Aligner layout bug**: `extractCsiMatrix` fills `matrix[f * nSc + s]`
   (frame-major) but declares `shape: [nSc, nFrames]` — the stored shape label is
   transposed relative to the data. Confirmed by coherent per-frame zero-tails;
   corrected on load (`reshape(nFrames, nSc).T`).
 ### Protocol (pre-registered, followed)
 Temporal split, no shuffling across time: first 70% train (1,432), next 15% val
 (307), last 15% test (307); seed 42 elsewhere. Model: learned 1×1 Conv1d 70→540
 adapter prepended to the upstream WiFlow-STD trunk; K=17 via the parameter-free
 adaptive pool (`AdaptiveAvgPool2d((17,1))` — pretrained weights load strict for
 any K). CSI normalized by the TRAIN-split p99 amplitude (129.7 all / 130.9
 native-70), clipped to [0,1]. Three runs, ≤60 epochs, early-stop patience 8 on
 val MPJPE, AdamW (adapter lr 1e-4; pretrained trunk lr 1e-5, 10× lower; scratch
 all 1e-4), fp32. Pretrained init = the measurement-(a) **retrained** checkpoint
 (`upstream/test/best_pose_model.pth`, ~96% PCK@20 on WiFlow data; the
 `att.`/`final_conv.` key remap from `eval_repro.py` applied defensively — a no-op,
 that checkpoint already uses post-rename keys). Frozen-trunk run: trunk
 `requires_grad=False` **and** held in `.eval()` so BatchNorm running stats cannot
 drift — a pure transfer probe; only the 70→540 adapter (38,340 params) trains.
 PCK is torso-normalized with **torso = ‖l_shoulder(5) − l_hip(11)‖** (upstream
 `calculate_pck` math — per-frame norm clamped at 0.01, mean over keypoints ×
 frames — but upstream's `NECK_IDX/PELVIS_IDX = 2, 12` is a 15-keypoint
 convention; on 17-kp COCO those indices are right_eye/right_hip, so the indices
 were replaced, not the math). MPJPE is in normalized image units (not meters).
 ### Results — primary suite, all 2,046 windows (test = last 307)
 | Run | PCK@10 | PCK@20 | PCK@30 | PCK@40 | PCK@50 | MPJPE | pred std | best ep |
 |---|---|---|---|---|---|---|---|---|
 | **mean-pose baseline** (honesty bar) | **73.1%** | **95.9%** | **98.7%** | 99.3% | 99.3% | **0.0148** | 0 (by constr.) | — |
 | (i) pretrained-init, full fine-tune | 26.0% | 65.0% | 88.0% | 96.4% | 98.9% | 0.0313 | 0.0113 | 58/60 |
 | (ii) scratch | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 0.2554 | 0.0002 | 4 (stop @13) |
 | (iii) frozen-trunk (adapter only) | 0.0% | 0.0% | 0.2% | 3.2% | 14.4% | 0.1260 | 0.0073 | 59/60 |
 Secondary suite (native [70,20] windows only, n=1,347, test=202) reproduces the
 same ordering: mean-baseline 96.0% / pretrained 67.1% / scratch 0.0% /
 frozen-trunk 0.0% PCK@20 (MPJPE 0.0153 / 0.0318 / 0.2236 / 0.1343) — the
 subcarrier-resampling choice does not change any conclusion.
 ### Interpretation
 - **Did pretraining-transfer happen? Partially — as optimization transfer, not
  feature transfer, and not past the honesty bar.**
  - *Pretrained vs scratch*: dramatic (65.0% vs 0.0% PCK@20). The pretrained init
    is the only configuration that trains at all under the pre-registered budget.
  - *Frozen-trunk*: near-zero (0.0% PCK@20, 14.4% @50). WiFlow-STD's frozen
    features do **not** transfer to our ESP32 domain through a linear subcarrier
    adapter — the pretrained benefit is a well-conditioned initialization (incl.
    calibrated BN/output scales), not reusable CSI→pose features.
  - *Everything vs mean-pose baseline*: **no run beats it.** A constant
    train-mean pose scores 95.9% torso-PCK@20 / 0.0148 MPJPE on this test split,
    because a single subject in one camera frame barely moves in normalized
    coordinates. The fine-tuned model is a real, non-constant model
    (pred std 0.0113 > 0 — passes the constant-pose detector that retracted the
    old 92.9% figure) but its deviations from the mean hurt: it fits train-period
    temporal dynamics that do not generalize across the temporal split.
 - **Verdict for ADR-152 §2.2(b): fine-tuning WiFlow-STD on this dataset does not
  demonstrate CSI→pose signal beyond the mean pose.** Until a model beats the
  mean-pose baseline on a temporal split, no PCK number from this line may be
  cited as pose-estimation capability.
 ### Caveats (honest, pre-registered)
 - Single subject, single room, single session (30 min), single ESP32 node —
  in-domain temporal split only; nothing here speaks to cross-room or
  cross-subject generalization.
 - 2k windows vs the 360k-window WiFlow-STD corpus — **NOT comparable** to the
  ~96% in-domain measurement-(a) number, and the published 97.25% even less so.
 - The scratch run's total collapse (it cannot even reach the mean pose; its
  output BatchNorm/SiLU head must learn output scale from random init at lr 1e-4)
  is an optimization outcome under the fixed budget, not proof the architecture
  cannot learn from scratch — the pretrained-vs-scratch gap partially reflects
  this conditioning advantage.
 - Mixed-subcarrier frames (finding 1) mean even the "clean" windows carry ~20%
  zero-padded frames; collection-side frame-type filtering should precede the
  next session.
 - Mean-baseline PCK is inflated by low pose variance relative to torso size
  (~0.2–0.3 image units); PCK@10 (73.1%) shows the same ceiling effect at a
  stricter threshold — the bar is the bar, but a livelier dataset would lower it.
 ## Pending
 - (b) fine-tune on our ESP32 17-keypoint eval set — **MEASURED 2026-06-10/11**,
  see above: no run beats the mean-pose baseline; pretraining transfers as
  optimization aid only.
 - (c) our internal WiFlow on their dataset (15-keypoint subset mapping) — also
  affected: there is currently no validated internal pose model to compare
  (the 92.9% artifact is retracted; the MM-Fi SOTA models in ADR-150 §3 are a
  different input domain).
@@ -1,200 +0,0 @@
 """Shared infrastructure for the LOCAL wiflow-std benchmark scripts (ADR-152).
 This module is the single canonical implementation of the helpers that were
 previously copy-pasted across eval_repro.py / quantize_bench.py /
 onnx_bench.py / eval_ort_accuracy.py / export_to_safetensors.py:
  - ``import_upstream()``  -- sys.path setup + the models-package stub that
    works around the upstream import bug, plus the >1GB np.load mmap patch
  - ``install_np_load_mmap_patch()`` -- the mmap patch on its own
  - ``remap_legacy_keys()`` / ``load_remapped_state()`` -- checkpoint
    key remap for the pre-rename released checkpoint
  - ``load_wiflow_model()`` -- WiFlowPoseModel from a checkpoint, eval mode
  - ``set_seed()`` -- mirrors upstream run.py seeding exactly
  - ``evaluate()`` -- THE canonical batch-weighted PCK/MPJPE evaluation loop
    (thresholds 0.1-0.5, upstream utils/metrics.py math); accepts either a
    torch nn.Module or an onnxruntime InferenceSession
 The scripts under remote/ deploy to ruvultra as standalone single files and
 therefore intentionally inline private copies of these helpers; when editing
 them, treat this module as the reference implementation and keep the copies
 in sync.
 """
 import os
 import random
 import sys
 import time
 import types
 import numpy as np
 import torch
 HERE = os.path.dirname(os.path.abspath(__file__))
 UPSTREAM = os.path.join(HERE, "upstream")
 RESULTS = os.path.join(HERE, "results")
 DEFAULT_THRESHOLDS = (0.1, 0.2, 0.3, 0.4, 0.5)
 # ---------------------------------------------------------------------------
 # >1GB np.load mmap patch
 # ---------------------------------------------------------------------------
 # csi_windows.npy is ~13 GB; mmap large arrays instead of loading into RAM
 # (loading it eagerly needs ~15 GB).
 _np_load = np.load
 def _np_load_mmap(path, *a, **kw):
    if (isinstance(path, str) and path.endswith(".npy")
            and os.path.getsize(path) > 1 << 30 and "mmap_mode" not in kw):
        kw["mmap_mode"] = "r"
    return _np_load(path, *a, **kw)
 def install_np_load_mmap_patch():
    """Globally patch np.load so .npy files >1GB are mmap'd read-only.
    Idempotent. Patching the numpy module attribute is equivalent to the
    historical ``upstream_dataset.np.load = _np_load_mmap`` (dataset.np IS
    the numpy module), but works regardless of import order.
    """
    np.load = _np_load_mmap
 # ---------------------------------------------------------------------------
 # upstream import shim
 # ---------------------------------------------------------------------------
 def import_upstream(mmap_patch=True):
    """Make the upstream WiFlow-STD clone importable; returns its path.
    Upstream bug: models/__init__.py imports TemporalConvNet, which
    models/tcn.py does not define -- the package fails to import as
    published. Register a stub package so the broken __init__ never
    executes; submodules (models.pose_model etc.) still resolve via
    __path__. Idempotent.
    """
    if UPSTREAM not in sys.path:
        sys.path.insert(0, UPSTREAM)
    if "models" not in sys.modules:
        _models_pkg = types.ModuleType("models")
        _models_pkg.__path__ = [os.path.join(UPSTREAM, "models")]
        sys.modules["models"] = _models_pkg
    if mmap_patch:
        install_np_load_mmap_patch()
    return UPSTREAM
 # ---------------------------------------------------------------------------
 # checkpoint loading
 # ---------------------------------------------------------------------------
 # The released checkpoint predates the published code: modules were renamed
 # att -> attention, final_conv -> decoder (param count identical, 2.23M).
 LEGACY_RENAMES = {"att.": "attention.", "final_conv.": "decoder."}
 def remap_legacy_keys(state):
    """Remap pre-rename state_dict keys; no-op for already-new-style keys."""
    return {next((new + k[len(old):] for old, new in LEGACY_RENAMES.items()
                  if k.startswith(old)), k): v
            for k, v in state.items()}
 def load_remapped_state(path, map_location="cpu"):
    """torch.load (weights_only) + legacy key remap."""
    state = torch.load(path, map_location=map_location, weights_only=True)
    return remap_legacy_keys(state)
 def load_wiflow_model(checkpoint, map_location="cpu", dropout=0.5):
    """Full-size WiFlowPoseModel from a checkpoint, strict load, eval mode."""
    import_upstream()
    from models.pose_model import WiFlowPoseModel
    model = WiFlowPoseModel(dropout=dropout)
    model.load_state_dict(load_remapped_state(checkpoint, map_location),
                          strict=True)
    model.eval()
    return model
 # ---------------------------------------------------------------------------
 # seeding
 # ---------------------------------------------------------------------------
 def set_seed(seed=42):
    # mirror upstream run.py exactly
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
 # ---------------------------------------------------------------------------
 # THE canonical evaluation loop
 # ---------------------------------------------------------------------------
 def evaluate(model, loader, device=None, dtype=None, label="",
             thresholds=DEFAULT_THRESHOLDS, progress_every=50):
    """Batch-weighted PCK/MPJPE over a DataLoader (upstream metrics math).
    ``model`` may be a torch nn.Module (optionally evaluated on ``device``
    with inputs cast to ``dtype``) or an onnxruntime InferenceSession.
    Per-threshold PCK values are independent in upstream calculate_pck, so
    evaluating a superset of thresholds never changes any individual value.
    Returns {"samples", "mpjpe", "pck@10".."pck@50", "wall_seconds"}.
    """
    import_upstream()
    from utils.metrics import calculate_mpjpe, calculate_pck
    is_ort = hasattr(model, "get_inputs")  # onnxruntime InferenceSession
    if is_ort:
        inp = model.get_inputs()[0].name
        def forward(bx):
            return torch.from_numpy(model.run(None, {inp: bx.numpy()})[0])
    else:
        model.eval()
        def forward(bx):
            if device is not None:
                bx = bx.to(device)
            if dtype is not None:
                bx = bx.to(dtype)
            return model(bx).float()
    thresholds = list(thresholds)
    totals = {t: 0.0 for t in thresholds}
    total_mpe, n = 0.0, 0
    t0 = time.time()
    with torch.no_grad():
        for batch_idx, (bx, by) in enumerate(loader):
            out = forward(bx)
            if device is not None and not is_ort:
                by = by.to(device)
            mpe = calculate_mpjpe(out, by)
            pck = calculate_pck(out, by, thresholds=thresholds)
            bs = by.size(0)
            total_mpe += mpe * bs
            for t in totals:
                totals[t] += pck[t] * bs
            n += bs
            if batch_idx % progress_every == 0:
                tag = f"[{label}] " if label else ""
                pck20 = totals.get(0.2)
                pck20_str = f"pck20={pck20 / n:.4f} " if pck20 is not None else ""
                print(f"  {tag}batch {batch_idx}: n={n} {pck20_str}"
                      f"mpjpe={total_mpe / n:.4f} ({time.time() - t0:.0f}s)",
                      flush=True)
    return {
        "samples": n,
        "mpjpe": total_mpe / n,
        **{f"pck@{int(t * 100)}": totals[t] / n for t in thresholds},
        "wall_seconds": time.time() - t0,
    }
@@ -1,67 +0,0 @@
 """ADR-152 edge optimization: accuracy of the ONNX fp32 and ORT-dynamic-int8
 models on the same corruption-free 10k test subset used by quantize_bench.py.
 The torch dynamic-int8 path quantizes nothing (no nn.Linear in the model), so
 the only real int8 datapoint for the paper's "~2.2 MB int8" claim is the
 onnxruntime dynamically quantized model -- this script measures what that
 quantization costs in PCK/MPJPE.
 Usage:
  .venv/Scripts/python.exe eval_ort_accuracy.py \
      --data-dir <preprocessed_csi_data> [--subset 10000]
 Writes/merges into results/edge_optimization.json under key "onnx_accuracy".
 """
 import argparse
 import json
 import os
 import sys
 HERE = os.path.dirname(os.path.abspath(__file__))
 sys.path.insert(0, HERE)
 from _bench_common import RESULTS, evaluate  # noqa: E402
 from quantize_bench import build_test_subset  # noqa: E402  (sets up upstream imports)
 def evaluate_ort(sess, loader, label):
    """ORT-session evaluation via the canonical _bench_common.evaluate loop."""
    return evaluate(sess, loader, label=label)
 def main():
    import onnxruntime as ort
    parser = argparse.ArgumentParser()
    parser.add_argument("--data-dir", default=os.path.join(
        os.path.expanduser("~"), ".cache", "kagglehub", "datasets", "kaka2434",
        "wiflow-dataset", "versions", "1", "preprocessed_csi_data"))
    parser.add_argument("--subset", type=int, default=10000)
    parser.add_argument("--out", default=os.path.join(RESULTS, "edge_optimization.json"))
    args = parser.parse_args()
    loader, _n_clean = build_test_subset(args.data_dir, args.subset)
    results = {}
    for label, fname in (("onnx_fp32", "retrained_fp32_dynamic.onnx"),
                         ("onnx_int8_ort_dynamic", "retrained_int8_ort_dynamic.onnx")):
        path = os.path.join(RESULTS, fname)
        if not os.path.exists(path):
            results[label] = {"error": f"{fname} not found; run onnx_bench.py first"}
            continue
        sess = ort.InferenceSession(path, providers=["CPUExecutionProvider"])
        print(f"=== accuracy: {label} ({fname}) ===")
        results[label] = evaluate_ort(sess, loader, label)
        print(json.dumps(results[label], indent=2))
    merged = {}
    if os.path.exists(args.out):
        with open(args.out) as f:
            merged = json.load(f)
    merged["onnx_accuracy"] = results
    with open(args.out, "w") as f:
        json.dump(merged, f, indent=2)
    print(f"wrote {args.out}")
 if __name__ == "__main__":
    main()
@@ -1,102 +0,0 @@
 """ADR-152 §2.2 measurement (a): reproduce WiFlow-STD (DY2434) published test metrics.
 Runs the released pretrained checkpoint (upstream/best_pose_model.pth) against the
 released Kaggle dataset (kaka2434/wiflow-dataset) using the upstream code path:
 identical dataset class, identical file-level 70/15/15 split at seed 42, identical
 PCK/MPJPE implementations (utils/metrics.py).
 Published claims (README, "Setting 1 random split"):
  PCK@20 97.25% | PCK@30 98.63% | PCK@40 99.16% | PCK@50 99.48% | MPJPE 0.007 m
 Usage:
  .venv/Scripts/python.exe eval_repro.py --data-dir <dir containing csi_windows.npy>
 """
 import argparse
 import json
 import os
 import sys
 import torch
 from torch.utils.data import DataLoader
 from _bench_common import (UPSTREAM, evaluate, import_upstream,
                           load_remapped_state, set_seed)
 import_upstream()  # sys.path + models stub + >1GB np.load mmap patch
 from dataset import PreprocessedCSIKeypointsDataset, create_preprocessed_train_val_test_loaders  # noqa: E402
 from models.pose_model import WiFlowPoseModel  # noqa: E402
 def find_data_dir(root):
    for dirpath, _dirnames, filenames in os.walk(root):
        if "csi_windows.npy" in filenames:
            return dirpath
    return None
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--data-dir", required=True,
                        help="Directory containing csi_windows.npy (searched recursively)")
    parser.add_argument("--checkpoint", default=os.path.join(UPSTREAM, "best_pose_model.pth"))
    parser.add_argument("--batch-size", type=int, default=64)
    parser.add_argument("--out", default=os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                                      "results", "repro_a.json"))
    args = parser.parse_args()
    data_dir = args.data_dir
    if not os.path.exists(os.path.join(data_dir, "csi_windows.npy")):
        located = find_data_dir(data_dir)
        if located is None:
            sys.exit(f"csi_windows.npy not found under {data_dir}")
        data_dir = located
    print(f"data dir: {data_dir}")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"device: {device}, torch {torch.__version__}")
    set_seed(42)
    dataset = PreprocessedCSIKeypointsDataset(
        data_dir=data_dir, keypoint_scale=1000.0, enable_temporal_clean=True)
    # split must match upstream: file-level shuffle at random_seed=42, 70/15/15
    _train_loader, _val_loader, test_loader = create_preprocessed_train_val_test_loaders(
        dataset=dataset, batch_size=args.batch_size, num_workers=0, random_seed=42)
    model = WiFlowPoseModel(dropout=0.5).to(device)
    # released checkpoint predates the published code: modules were renamed
    # att -> attention, final_conv -> decoder (param count identical, 2.23M)
    state = load_remapped_state(args.checkpoint, map_location=device)
    model.load_state_dict(state, strict=True)
    n_params = sum(p.numel() for p in model.parameters())
    print(f"checkpoint: {args.checkpoint} ({n_params/1e6:.2f}M params)")
    # upstream also evaluates with drop_last=True; we report the full test set
    # (drop_last=False) and the drop_last variant for exact comparability
    results = {"published": {"pck@20": 0.9725, "pck@30": 0.9863, "pck@40": 0.9916,
                             "pck@50": 0.9948, "mpjpe": 0.007},
               "params_millions": n_params / 1e6,
               "data_dir": data_dir,
               "device": str(device)}
    print("=== test set (full, drop_last=False) ===")
    results["test_full"] = evaluate(model, test_loader, device=device)
    print(json.dumps(results["test_full"], indent=2))
    test_loader_dl = DataLoader(test_loader.dataset, batch_size=args.batch_size,
                                shuffle=False, drop_last=True)
    print("=== test set (drop_last=True, as upstream train.py) ===")
    results["test_drop_last"] = evaluate(model, test_loader_dl, device=device)
    print(json.dumps(results["test_drop_last"], indent=2))
    os.makedirs(os.path.dirname(args.out), exist_ok=True)
    with open(args.out, "w") as f:
        json.dump(results, f, indent=2)
    print(f"wrote {args.out}")
 if __name__ == "__main__":
    main()
@@ -1,174 +0,0 @@
 """ADR-152 §2.2: export the retrained WiFlow-STD PyTorch checkpoint to
 safetensors with tch-rs (VarStore) variable names, plus a numerical-parity
 fixture for the Rust port.
 Outputs (all under results/, gitignored):
  retrained_wiflow_std.safetensors  -- 248 f32 tensors named exactly as the
                                       Rust WiFlowStdModel VarStore expects
                                       (see wiflow_std/model.rs
                                       `dump_variable_names` for the
                                       authoritative name dump)
  parity_fixture.npz                -- deterministic input (seed 42,
                                       shape (2, 540, 20), uniform [0,1]) and
                                       the Python model's eval-mode output
  parity_fixture.json               -- same data as flattened f32 lists, for
                                       the dependency-free Rust test
                                       (tests/test_wiflow_std_parity.rs)
 PyTorch -> tch key mapping (derived from the VarStore dump, not guessed):
  tcn.network.{i}.conv1_group.weight        -> tcn{i}.conv1_group.weight
  tcn.network.{i}.bn*_{group,pw}.<leaf>     -> tcn{i}.bn*_{group,pw}.<leaf>
  tcn.network.{i}.downsample.0.weight       -> tcn{i}.ds_conv.weight
  tcn.network.{i}.downsample.1.<leaf>       -> tcn{i}.ds_bn.<leaf>
  up.block.{0,1,4,5,8,9}.<leaf>             -> conv_in.{conv1,bn1,conv2,bn2,conv3,bn3}.<leaf>
  up.downsample.{0,1}.<leaf>                -> conv_in.{ds_conv,ds_bn}.<leaf>
  residual_blocks.{i}.block.{...}.<leaf>    -> conv{i}.{conv1..bn3}.<leaf>
  residual_blocks.{i}.downsample.{0,1}      -> conv{i}.{ds_conv,ds_bn}
  attention.{width,height}_axis.qkv_transform.weight
                                            -> attention.{width,height}.qkv.weight
  attention.{width,height}_axis.bn_*        -> attention.{width,height}.bn_*
  decoder.{0,1,3,4}.<leaf>                  -> {dec_conv1,dec_bn1,dec_conv2,dec_bn2}.<leaf>
  *.num_batches_tracked                     -> dropped (tch BatchNorm has no such buffer)
 Legacy upstream names (att. -> attention., final_conv. -> decoder.) are
 remapped first, exactly as eval_repro.py does for the released checkpoint.
 Usage:
  .venv/Scripts/python.exe export_to_safetensors.py
 """
 import json
 import os
 import re
 import numpy as np
 import torch
 from safetensors.torch import save_file
 from _bench_common import RESULTS, import_upstream, remap_legacy_keys
 import_upstream()  # sys.path + models stub
 from models.pose_model import WiFlowPoseModel  # noqa: E402
 CHECKPOINT = os.path.join(RESULTS, "retrained_best_pose_model.pth")
 # Sequential index -> tch sub-name inside one ConvBlock1/AsymmetricConvBlock:
 # [Conv2d(0), BN(1), SiLU(2), Dropout2d(3), Conv2d(4), BN(5), SiLU(6),
 #  Dropout2d(7), Conv2d(8), BN(9)]
 _BLOCK_IDX = {"0": "conv1", "1": "bn1", "4": "conv2", "5": "bn2",
              "8": "conv3", "9": "bn3"}
 _DS_IDX = {"0": "ds_conv", "1": "ds_bn"}
 _DECODER_IDX = {"0": "dec_conv1", "1": "dec_bn1", "3": "dec_conv2",
                "4": "dec_bn2"}
 def _conv_block(new_prefix: str, rest: str) -> str:
    m = re.fullmatch(r"block\.(\d+)\.(.+)", rest)
    if m:
        return f"{new_prefix}.{_BLOCK_IDX[m.group(1)]}.{m.group(2)}"
    m = re.fullmatch(r"downsample\.(\d+)\.(.+)", rest)
    if m:
        return f"{new_prefix}.{_DS_IDX[m.group(1)]}.{m.group(2)}"
    raise KeyError(f"unmapped conv-block key: {new_prefix} / {rest}")
 def map_key(key: str) -> str:
    """Map one PyTorch state_dict key to the tch VarStore name."""
    m = re.fullmatch(r"tcn\.network\.(\d+)\.(.+)", key)
    if m:
        i, rest = m.groups()
        rest = (rest.replace("downsample.0.", "ds_conv.")
                    .replace("downsample.1.", "ds_bn."))
        return f"tcn{i}.{rest}"
    m = re.fullmatch(r"up\.(.+)", key)
    if m:
        return _conv_block("conv_in", m.group(1))
    m = re.fullmatch(r"residual_blocks\.(\d+)\.(.+)", key)
    if m:
        return _conv_block(f"conv{m.group(1)}", m.group(2))
    m = re.fullmatch(r"attention\.(width|height)_axis\.(.+)", key)
    if m:
        axis, rest = m.groups()
        rest = rest.replace("qkv_transform.", "qkv.")
        return f"attention.{axis}.{rest}"
    m = re.fullmatch(r"decoder\.(\d+)\.(.+)", key)
    if m:
        return f"{_DECODER_IDX[m.group(1)]}.{m.group(2)}"
    raise KeyError(f"unmapped checkpoint key: {key}")
 def main():
    state = torch.load(CHECKPOINT, map_location="cpu", weights_only=True)
    if not isinstance(state, dict) or "tcn.network.0.conv1_group.weight" not in {
        k for k in state
    } | {k.replace("att.", "attention.") for k in state}:
        # tolerate trainer wrappers like {"model_state_dict": ...}
        for wrapper in ("model_state_dict", "state_dict", "model"):
            if isinstance(state, dict) and wrapper in state:
                state = state[wrapper]
                break
    # Legacy upstream names predate the published code (_bench_common).
    state = remap_legacy_keys(state)
    mapped = {}
    dropped = 0
    for k, v in state.items():
        if k.endswith("num_batches_tracked"):
            dropped += 1
            continue
        tch_key = map_key(k)
        if tch_key in mapped:
            raise KeyError(f"duplicate mapped key: {k} -> {tch_key}")
        mapped[tch_key] = v.detach().to(torch.float32).contiguous()
    n_params = sum(v.numel() for k, v in mapped.items()
                   if "running_" not in k)
    print(f"checkpoint tensors: {len(state)} "
          f"(dropped {dropped} num_batches_tracked)")
    print(f"mapped tensors: {len(mapped)}, "
          f"non-buffer params: {n_params/1e6:.6f}M")
    assert len(mapped) == 248, f"expected 248 tch variables, got {len(mapped)}"
    assert n_params == 2_225_042, f"param count mismatch: {n_params}"
    st_path = os.path.join(RESULTS, "retrained_wiflow_std.safetensors")
    save_file(mapped, st_path)
    print(f"wrote {st_path}")
    # ---- parity fixture --------------------------------------------------
    model = WiFlowPoseModel(dropout=0.5)
    model.load_state_dict(state, strict=True)
    model.eval()
    gen = torch.Generator().manual_seed(42)
    x = torch.rand(2, 540, 20, generator=gen, dtype=torch.float32)
    with torch.no_grad():
        y = model(x)
    print(f"fixture input {tuple(x.shape)} -> output {tuple(y.shape)}, "
          f"output range [{y.min().item():.6f}, {y.max().item():.6f}]")
    np.savez(os.path.join(RESULTS, "parity_fixture.npz"),
             input=x.numpy(), output=y.numpy())
    fixture = {
        "seed": 42,
        "input_shape": list(x.shape),
        "input": x.flatten().tolist(),
        "output_shape": list(y.shape),
        "output": y.flatten().tolist(),
    }
    json_path = os.path.join(RESULTS, "parity_fixture.json")
    with open(json_path, "w") as f:
        json.dump(fixture, f)
    print(f"wrote {os.path.join(RESULTS, 'parity_fixture.npz')}")
    print(f"wrote {json_path}")
 if __name__ == "__main__":
    main()
@@ -1,148 +0,0 @@
 """Regenerate results/nan_windows_mask.npy + results/big_windows_mask.npy by
 scanning a PRISTINE kagglehub download of the WiFlow-STD dataset
 (kaka2434/wiflow-dataset v1, csi_windows.npy, 360,000 windows of 540x20).
 ============================ READ THIS FIRST ===============================
 This script MUST be run against an UNCLEANED copy of the dataset.
 remote/clean_v2.py (and its predecessor clean_nan.py) repair the dataset by
 zeroing the corrupted windows IN PLACE, with no backup. A cleaned copy
 contains no non-finite values and no out-of-range amplitudes, so on a cleaned
 copy this scan produces ALL-FALSE masks -- silently wrong ground truth. The
 script errors out loudly in that case (see the sanity check in main()).
 That irreversibility is exactly why the two committed mask files under
 results/ (gitignore-negated) are the canonical ground truth: once a download
 has been cleaned, the masks can NEVER be regenerated from it. Only run this
 on a fresh `kagglehub.dataset_download("kaka2434/wiflow-dataset")`.
 ============================================================================
 Criteria (per window; mirrors the original 2026-06-10 scan and the
 remote/clean_v2.py repair criteria):
  nan mask: any non-finite value (NaN/Inf) anywhere in the 540x20 window
  big mask: max |finite value| > 1.5 (the data is otherwise [0,1]-normalized;
            the corrupted files contain garbage up to 3.4e38, float32 max)
 Expected result on the pristine Kaggle download (RESULTS.md defect 5):
  nan: 9,070 True | big: 9,072 True | union: 9,072 -- all windows in dataset
  files 487-499 (the final 13 files), window indices 350,922-359,999.
 Usage:
  PYTHONUTF8=1 .venv/Scripts/python.exe generate_corruption_masks.py \
      [--data-dir <dir containing csi_windows.npy>] [--out-dir results]
 """
 import argparse
 import os
 import sys
 import numpy as np
 HERE = os.path.dirname(os.path.abspath(__file__))
 RESULTS = os.path.join(HERE, "results")
 EXPECTED = {"nan": 9070, "big": 9072, "union": 9072,
            "files": (487, 499), "windows": (350922, 359999)}
 def scan(csi_path, chunk=4000):
    """Chunked scan of the (mmap'd) windows array; returns (nan_mask, big_mask)."""
    csi = np.load(csi_path, mmap_mode="r")
    n = len(csi)
    nan_mask = np.zeros(n, dtype=bool)
    big_mask = np.zeros(n, dtype=bool)
    for i in range(0, n, chunk):
        block = np.asarray(csi[i:i + chunk])
        finite = np.isfinite(block)
        nan_mask[i:i + chunk] = (~finite).any(axis=(1, 2))
        big_mask[i:i + chunk] = (
            np.abs(np.where(finite, block, 0)).max(axis=(1, 2)) > 1.5)
        if (i // chunk) % 10 == 0:
            print(f"  scanned {min(i + chunk, n):,}/{n:,} windows "
                  f"(nan={int(nan_mask.sum()):,} big={int(big_mask.sum()):,})",
                  flush=True)
    return nan_mask, big_mask
 def describe_files(data_dir, mask):
    """Map marked windows to dataset file indices via window_info.npz."""
    info = os.path.join(data_dir, "window_info.npz")
    if not os.path.exists(info):
        return None
    w2f = np.load(info)["window_to_file"]
    return np.unique(w2f[mask])
 def main():
    parser = argparse.ArgumentParser(
        description="Regenerate the corruption masks from a PRISTINE "
                    "(uncleaned) kagglehub download. See module docstring.")
    parser.add_argument("--data-dir", default=os.path.join(
        os.path.expanduser("~"), ".cache", "kagglehub", "datasets", "kaka2434",
        "wiflow-dataset", "versions", "1", "preprocessed_csi_data"),
        help="Directory containing csi_windows.npy (PRISTINE copy)")
    parser.add_argument("--out-dir", default=RESULTS,
                        help="Where to write the two .npy masks")
    parser.add_argument("--chunk", type=int, default=4000,
                        help="Windows per scan chunk (memory/speed tradeoff)")
    args = parser.parse_args()
    csi_path = os.path.join(args.data_dir, "csi_windows.npy")
    if not os.path.exists(csi_path):
        sys.exit(f"csi_windows.npy not found in {args.data_dir}")
    print(f"scanning {csi_path} (chunk={args.chunk}) ...")
    nan_mask, big_mask = scan(csi_path, args.chunk)
    union = nan_mask | big_mask
    print(f"nan: {int(nan_mask.sum()):,} | big: {int(big_mask.sum()):,} | "
          f"union: {int(union.sum()):,} of {len(union):,} windows")
    # ---- sanity check: an all-False result means a CLEANED copy ------------
    if not union.any():
        sys.exit(
            "ERROR: scan found ZERO corrupted windows.\n"
            "\n"
            "The pristine Kaggle download (kaka2434/wiflow-dataset v1) is "
            "known to contain\n"
            "9,072 corrupted windows (NaN/Inf + amplitudes up to 3.4e38) in "
            "dataset files\n"
            "487-499 (RESULTS.md, reproducibility defect 5). Finding none "
            "means this copy\n"
            "has almost certainly already been repaired by remote/clean_v2.py "
            "(or clean_nan.py),\n"
            "which zeroes the corrupted windows IN PLACE -- after that the "
            "corruption evidence\n"
            "is gone and the masks CANNOT be regenerated from this copy.\n"
            "\n"
            "Refusing to overwrite the committed ground-truth masks with "
            "all-False ones.\n"
            "Re-download the dataset (kagglehub.dataset_download("
            "'kaka2434/wiflow-dataset'))\n"
            "and point --data-dir at the fresh, uncleaned copy.")
    files = describe_files(args.data_dir, union)
    if files is not None:
        print(f"marked windows span dataset files {files.min()}-{files.max()}: "
              f"{files.tolist()}")
        lo, hi = EXPECTED["files"]
        if files.min() != lo or files.max() != hi:
            print(f"WARNING: expected marked files exactly {lo}-{hi} "
                  f"(the pristine v1 download); got {files.min()}-{files.max()}. "
                  f"Different dataset version, or a partially cleaned copy?")
    for name, mask, exp in (("nan", nan_mask, EXPECTED["nan"]),
                            ("big", big_mask, EXPECTED["big"])):
        if int(mask.sum()) != exp:
            print(f"WARNING: {name} mask has {int(mask.sum()):,} True windows; "
                  f"the pristine v1 download yields {exp:,}.")
    os.makedirs(args.out_dir, exist_ok=True)
    for name, mask in (("nan_windows_mask.npy", nan_mask),
                       ("big_windows_mask.npy", big_mask)):
        out = os.path.join(args.out_dir, name)
        np.save(out, mask)
        print(f"wrote {out} ({int(mask.sum()):,} True)")
 if __name__ == "__main__":
    main()
@@ -1,220 +0,0 @@
 """ADR-152 edge optimization: ONNX export + onnxruntime CPU benchmark for the
 retrained WiFlow-STD checkpoint.
 - Exports fp32 to ONNX. The axial attention reshapes with python ints taken
  from tensor.size() (view(N*W, C, H)), so a traced graph bakes the batch
  size; we first try a dynamic-batch export and verify it actually works at
  batch sizes 1/2/64 -- if not, we fall back to fixed-batch exports.
 - Verifies output parity vs torch on the stored fixture
  (results/parity_fixture.npz, batch 2, seed 42): max abs diff < 1e-4.
 - Measures onnxruntime CPU latency at batch 1 and 64 (median of N runs).
 - Supplementary: onnxruntime dynamic int8 quantization of the exported model
  (weight size datapoint for the paper's "~2.2 MB int8" claim).
 Usage:
  .venv/Scripts/python.exe onnx_bench.py
 Writes/merges into results/edge_optimization.json under key "onnx".
 """
 import json
 import os
 import platform
 import statistics
 import time
 import traceback
 import numpy as np
 import torch
 from _bench_common import RESULTS, import_upstream, load_wiflow_model
 import_upstream()  # sys.path + models stub + >1GB np.load mmap patch
 CHECKPOINT = os.path.join(RESULTS, "retrained_best_pose_model.pth")
 OUT_JSON = os.path.join(RESULTS, "edge_optimization.json")
 def load_fp32_model():
    return load_wiflow_model(CHECKPOINT)
 def try_export(model, path, batch, dynamic, opset=17):
    """Returns (ok, exporter_used, error)."""
    x = torch.rand(batch, 540, 20)
    attempts = []
    if dynamic:
        attempts.append(("dynamo", dict(dynamo=True,
                                        dynamic_shapes={"x": {0: "batch"}})))
        attempts.append(("torchscript", dict(dynamo=False,
                                             dynamic_axes={"input": {0: "batch"},
                                                           "output": {0: "batch"}})))
    else:
        attempts.append(("torchscript", dict(dynamo=False)))
        attempts.append(("dynamo", dict(dynamo=True)))
    last_err = None
    for name, kw in attempts:
        try:
            with torch.no_grad():
                torch.onnx.export(model, (x,), path, opset_version=opset,
                                  input_names=["input"], output_names=["output"],
                                  **kw)
            return True, name, None
        except Exception as e:  # noqa: BLE001
            last_err = f"{name}: {type(e).__name__}: {e}"
            traceback.print_exc()
    return False, None, last_err
 def ort_session(path):
    import onnxruntime as ort
    return ort.InferenceSession(path, providers=["CPUExecutionProvider"])
 def ort_run(sess, x):
    inp = sess.get_inputs()[0].name
    return sess.run(None, {inp: x})[0]
 def bench_ort(sess, batch, n_runs):
    rng = np.random.default_rng(123)
    x = rng.random((batch, 540, 20), dtype=np.float32)
    for _ in range(max(5, n_runs // 10)):
        ort_run(sess, x)
    times = []
    for _ in range(n_runs):
        t0 = time.perf_counter()
        ort_run(sess, x)
        times.append(time.perf_counter() - t0)
    med = statistics.median(times)
    return {
        "batch_size": batch,
        "runs": n_runs,
        "median_ms_per_batch": med * 1e3,
        "median_ms_per_window": med * 1e3 / batch,
        "windows_per_second": batch / med,
    }
 def main():
    import argparse
    parser = argparse.ArgumentParser(
        description="ONNX export + onnxruntime CPU benchmark for the "
                    "retrained WiFlow-STD checkpoint (no options; see "
                    "module docstring). NB: the published "
                    "retrained_fp32_dynamic.onnx came from the TorchScript "
                    "exporter; on newer torch the dynamo attempt may succeed "
                    "first and produce a different (external-data) artifact.")
    parser.parse_args()
    import onnxruntime
    model = load_fp32_model()
    results = {
        "env": {
            "torch": torch.__version__,
            "onnxruntime": onnxruntime.__version__,
            "platform": platform.platform(),
        },
    }
    fixture = np.load(os.path.join(RESULTS, "parity_fixture.npz"))
    fx, fy = fixture["input"], fixture["output"]  # (2,540,20) -> (2,15,2)
    # ---- export: dynamic batch first, fall back to fixed --------------------
    dyn_path = os.path.join(RESULTS, "retrained_fp32_dynamic.onnx")
    ok, exporter, err = try_export(model, dyn_path, batch=2, dynamic=True)
    dynamic_works = False
    if ok:
        # verify the dynamic graph really runs at other batch sizes
        try:
            sess = ort_session(dyn_path)
            for b in (1, 2, 64):
                y = ort_run(sess, np.zeros((b, 540, 20), dtype=np.float32))
                assert y.shape == (b, 15, 2), y.shape
            dynamic_works = True
        except Exception as e:  # noqa: BLE001
            print(f"dynamic-batch model does not generalize: {e}")
    sessions = {}
    if dynamic_works:
        results["export"] = {"mode": "dynamic-batch", "exporter": exporter,
                             "file": os.path.basename(dyn_path),
                             "size_mb": os.path.getsize(dyn_path) / 1e6}
        sess = ort_session(dyn_path)
        sessions = {1: sess, 2: sess, 64: sess}
        print(f"dynamic-batch export OK via {exporter}")
    else:
        results["export"] = {"mode": "fixed-batch", "fallback_reason": err,
                             "files": {}}
        for b in (1, 2, 64):
            p = os.path.join(RESULTS, f"retrained_fp32_b{b}.onnx")
            ok, exporter, err = try_export(model, p, batch=b, dynamic=False)
            if not ok:
                results["export"]["files"][str(b)] = {"error": err}
                print(f"EXPORT FAILED at batch {b}: {err}")
                continue
            results["export"]["files"][str(b)] = {
                "exporter": exporter, "file": os.path.basename(p),
                "size_mb": os.path.getsize(p) / 1e6}
            sessions[b] = ort_session(p)
            print(f"fixed-batch {b} export OK via {exporter}")
    # ---- parity vs torch on the fixture -------------------------------------
    if 2 in sessions:
        y_ort = ort_run(sessions[2], fx)
        with torch.no_grad():
            y_torch = model(torch.from_numpy(fx)).numpy()
        results["parity"] = {
            "fixture": "results/parity_fixture.npz (batch 2, seed 42)",
            "max_abs_diff_vs_stored_fixture": float(np.abs(y_ort - fy).max()),
            "max_abs_diff_vs_torch_now": float(np.abs(y_ort - y_torch).max()),
            "pass_lt_1e-4": bool(np.abs(y_ort - y_torch).max() < 1e-4),
        }
        print("parity:", json.dumps(results["parity"], indent=2))
    # ---- latency -------------------------------------------------------------
    results["latency"] = {}
    if 1 in sessions:
        results["latency"]["batch1"] = bench_ort(sessions[1], 1, 100)
        print(f"ORT batch 1:  {results['latency']['batch1']['median_ms_per_window']:.2f} ms/window")
    if 64 in sessions:
        results["latency"]["batch64"] = bench_ort(sessions[64], 64, 30)
        print(f"ORT batch 64: {results['latency']['batch64']['median_ms_per_window']:.3f} ms/window")
    # ---- supplementary: ORT dynamic int8 (size datapoint for the 2.2MB claim)
    src = (dyn_path if dynamic_works
           else os.path.join(RESULTS, "retrained_fp32_b1.onnx"))
    if os.path.exists(src):
        try:
            from onnxruntime.quantization import QuantType, quantize_dynamic
            q_path = os.path.join(RESULTS, "retrained_int8_ort_dynamic.onnx")
            quantize_dynamic(src, q_path, weight_type=QuantType.QInt8)
            entry = {"file": os.path.basename(q_path),
                     "size_mb": os.path.getsize(q_path) / 1e6}
            try:
                qs = ort_session(q_path)
                yq = ort_run(qs, fx[:1] if not dynamic_works else fx)
                ref = fy[:1] if not dynamic_works else fy
                entry["runs"] = True
                entry["max_abs_diff_vs_fp32_fixture"] = float(np.abs(yq - ref).max())
            except Exception as e:  # noqa: BLE001
                entry["runs"] = False
                entry["run_error"] = f"{type(e).__name__}: {e}"
            results["ort_int8_dynamic_supplementary"] = entry
            print("ORT int8:", json.dumps(entry, indent=2))
        except Exception as e:  # noqa: BLE001
            results["ort_int8_dynamic_supplementary"] = {
                "error": f"{type(e).__name__}: {e}"}
    merged = {}
    if os.path.exists(OUT_JSON):
        with open(OUT_JSON) as f:
            merged = json.load(f)
    merged["onnx"] = results
    with open(OUT_JSON, "w") as f:
        json.dump(merged, f, indent=2)
    print(f"wrote {OUT_JSON}")
 if __name__ == "__main__":
    main()
@@ -1,228 +0,0 @@
 """ADR-152 "optimize beyond SOTA": edge-optimization benchmark for the
 retrained WiFlow-STD checkpoint (results/retrained_best_pose_model.pth,
 ~96% PCK@20, fp32 params 2,225,042).
 Measures, for fp32 / fp16 / dynamic-int8 torch variants:
  (a) serialized state_dict size on disk,
  (b) CPU inference latency per window at batch 1 and batch 64
      (median of repeated runs, this Windows box),
  (c) accuracy (PCK@20/50 + MPJPE, upstream metrics) on a corruption-free
      random subset of the seed-42 file-level 70/15/15 test split
      (same split as eval_repro.py; corrupted windows 487-499 excluded via
      results/nan_windows_mask.npy | results/big_windows_mask.npy).
 Also verifies the paper's "~2.2 MB int8" size claim: reports which layer
 types torch dynamic quantization actually converts (the model contains NO
 nn.Linear -- it is Conv1d/Conv2d/BatchNorm only) and the real on-disk size.
 Usage:
  .venv/Scripts/python.exe quantize_bench.py \
      --data-dir C:/Users/ruv/.cache/kagglehub/datasets/kaka2434/wiflow-dataset/versions/1/preprocessed_csi_data \
      [--subset 10000] [--skip-accuracy]
 Writes/merges into results/edge_optimization.json under key "torch".
 """
 import argparse
 import json
 import os
 import platform
 import statistics
 import time
 import numpy as np
 import torch
 import torch.nn as nn
 from torch.utils.data import DataLoader
 from _bench_common import HERE, RESULTS, evaluate, import_upstream, load_wiflow_model
 import_upstream()  # sys.path + models stub + >1GB np.load mmap patch
 from dataset import (  # noqa: E402
    PreprocessedCSIKeypointsDataset,
    create_preprocessed_train_val_test_loaders,
 )
 CHECKPOINT = os.path.join(RESULTS, "retrained_best_pose_model.pth")
 def load_fp32_model():
    # legacy upstream key remap inside is a harmless no-op on this checkpoint
    return load_wiflow_model(CHECKPOINT)
 def state_dict_size_bytes(model, path):
    torch.save(model.state_dict(), path)
    return os.path.getsize(path)
 def bench_latency(model, batch_size, n_runs, dtype=torch.float32):
    gen = torch.Generator().manual_seed(123)
    x = torch.rand(batch_size, 540, 20, generator=gen).to(dtype)
    with torch.no_grad():
        for _ in range(max(5, n_runs // 10)):  # warmup
            model(x)
        times = []
        for _ in range(n_runs):
            t0 = time.perf_counter()
            model(x)
            times.append(time.perf_counter() - t0)
    med = statistics.median(times)
    return {
        "batch_size": batch_size,
        "runs": n_runs,
        "median_ms_per_batch": med * 1e3,
        "median_ms_per_window": med * 1e3 / batch_size,
        "windows_per_second": batch_size / med,
    }
 def build_test_subset(data_dir, subset_size, batch_size=64):
    """Seed-42 file-level 70/15/15 test split (exactly as eval_repro.py),
    minus corrupted windows, then a seed-42 random subset."""
    dataset = PreprocessedCSIKeypointsDataset(
        data_dir=data_dir, keypoint_scale=1000.0, enable_temporal_clean=True)
    _tr, _va, test_loader = create_preprocessed_train_val_test_loaders(
        dataset=dataset, batch_size=batch_size, num_workers=0, random_seed=42)
    test_indices = np.asarray(test_loader.dataset.indices)
    corrupted = (np.load(os.path.join(RESULTS, "nan_windows_mask.npy"))
                 | np.load(os.path.join(RESULTS, "big_windows_mask.npy")))
    clean = test_indices[~corrupted[test_indices]]
    print(f"test split: {len(test_indices)} windows, "
          f"{len(test_indices) - len(clean)} corrupted excluded, "
          f"{len(clean)} clean")
    if subset_size and subset_size < len(clean):
        rng = np.random.default_rng(42)
        clean = np.sort(rng.choice(clean, size=subset_size, replace=False))
    subset = torch.utils.data.Subset(dataset, clean.tolist())
    loader = DataLoader(subset, batch_size=batch_size, shuffle=False,
                        num_workers=0)
    return loader, len(clean)
 def quantize_int8_dynamic(fp32_model):
    """torch.ao.quantization.quantize_dynamic on Linear/Conv where supported.
    Returns (model, report) where report documents what actually quantized."""
    qmodel = torch.ao.quantization.quantize_dynamic(
        fp32_model, {nn.Linear, nn.Conv1d, nn.Conv2d}, dtype=torch.qint8)
    quantized, total_params, quant_params = [], 0, 0
    for name, mod in qmodel.named_modules():
        cls = type(mod).__module__ + "." + type(mod).__name__
        if "quantized" in cls:
            w = mod.weight() if callable(getattr(mod, "weight", None)) else None
            numel = w.numel() if w is not None else 0
            quant_params += numel
            quantized.append({"module": name, "class": cls, "params": numel})
    for p in fp32_model.parameters():
        total_params += p.numel()
    n_linear = sum(isinstance(m, nn.Linear) for m in fp32_model.modules())
    n_conv1d = sum(isinstance(m, nn.Conv1d) for m in fp32_model.modules())
    n_conv2d = sum(isinstance(m, nn.Conv2d) for m in fp32_model.modules())
    report = {
        "eligible_module_counts": {
            "nn.Linear": n_linear, "nn.Conv1d": n_conv1d, "nn.Conv2d": n_conv2d},
        "modules_actually_quantized": quantized,
        "n_modules_quantized": len(quantized),
        "params_total": total_params,
        "params_quantized": quant_params,
        "params_quantized_fraction": quant_params / total_params,
    }
    return qmodel, report
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--data-dir", default=os.path.join(
        os.path.expanduser("~"), ".cache", "kagglehub", "datasets", "kaka2434",
        "wiflow-dataset", "versions", "1", "preprocessed_csi_data"))
    parser.add_argument("--subset", type=int, default=10000)
    parser.add_argument("--runs-b1", type=int, default=100)
    parser.add_argument("--runs-b64", type=int, default=30)
    parser.add_argument("--skip-accuracy", action="store_true")
    parser.add_argument("--out", default=os.path.join(RESULTS, "edge_optimization.json"))
    args = parser.parse_args()
    torch.manual_seed(42)
    results = {
        "env": {
            "torch": torch.__version__,
            "platform": platform.platform(),
            "processor": platform.processor(),
            "num_threads": torch.get_num_threads(),
            "checkpoint": os.path.relpath(CHECKPOINT, HERE),
        },
        "variants": {},
    }
    # ---- build variants ---------------------------------------------------
    fp32 = load_fp32_model()
    n_params = sum(p.numel() for p in fp32.parameters())
    results["env"]["params"] = n_params
    print(f"fp32 model: {n_params:,} params")
    fp16 = load_fp32_model().half()
    int8, q_report = quantize_int8_dynamic(load_fp32_model())
    results["int8_dynamic_quant_report"] = q_report
    print(f"int8 dynamic: {q_report['n_modules_quantized']} modules quantized, "
          f"{q_report['params_quantized_fraction']*100:.1f}% of params")
    variants = {
        "fp32": (fp32, torch.float32, "retrained_fp32_resaved.pth"),
        "fp16": (fp16, torch.float16, "retrained_fp16.pth"),
        "int8_dynamic": (int8, torch.float32, "retrained_int8_dynamic.pth"),
    }
    # ---- (a) size + (b) latency -------------------------------------------
    for name, (model, dtype, fname) in variants.items():
        path = os.path.join(RESULTS, fname)
        size = state_dict_size_bytes(model, path)
        print(f"\n=== {name}: {size/1e6:.3f} MB on disk ({fname}) ===")
        lat1 = bench_latency(model, 1, args.runs_b1, dtype)
        lat64 = bench_latency(model, 64, args.runs_b64, dtype)
        print(f"  batch 1:  {lat1['median_ms_per_window']:.2f} ms/window "
              f"({lat1['windows_per_second']:.0f}/s)")
        print(f"  batch 64: {lat64['median_ms_per_window']:.3f} ms/window "
              f"({lat64['windows_per_second']:.0f}/s)")
        results["variants"][name] = {
            "file": fname,
            "size_bytes": size,
            "size_mb": size / 1e6,
            "latency_batch1": lat1,
            "latency_batch64": lat64,
        }
    # ---- (c) accuracy ------------------------------------------------------
    if not args.skip_accuracy:
        loader, n_clean = build_test_subset(args.data_dir, args.subset)
        results["accuracy_subset"] = {
            "description": "seed-42 file-level 70/15/15 test split, corrupted "
                           "windows (files 487-499) excluded, seed-42 random "
                           "subset",
            "subset_size": min(args.subset, n_clean) if args.subset else n_clean,
            "clean_test_total": n_clean,
        }
        for name, (model, dtype, _f) in variants.items():
            print(f"\n=== accuracy: {name} ===")
            results["variants"][name]["accuracy"] = evaluate(
                model, loader, dtype=dtype, label=name)
            print(json.dumps(results["variants"][name]["accuracy"], indent=2))
    # ---- merge into edge_optimization.json ---------------------------------
    merged = {}
    if os.path.exists(args.out):
        with open(args.out) as f:
            merged = json.load(f)
    merged["torch"] = results
    with open(args.out, "w") as f:
        json.dump(merged, f, indent=2)
    print(f"\nwrote {args.out}")
 if __name__ == "__main__":
    main()
@@ -1,14 +0,0 @@
 import numpy as np, os
 d = os.path.expanduser('~/wiflow-std-bench/preprocessed_csi_data')
 csi = np.load(os.path.join(d, 'csi_windows.npy'), mmap_mode='r+')
 zeroed = 0
 chunk = 4000
 for i in range(0, len(csi), chunk):
    block = csi[i:i+chunk]
    finite = np.isfinite(block)
    bad = (~finite).any(axis=(1, 2)) | (np.abs(np.where(finite, block, 0)).max(axis=(1, 2)) > 1.5)
    if bad.any():
        block[bad] = 0.0
        zeroed += int(bad.sum())
 csi.flush()
 print(f'zeroed {zeroed} corrupted windows entirely')
@@ -1,112 +0,0 @@
 """Evaluate the retrained WiFlow-STD checkpoint (ADR-152 §2.2a fallback).
 Scores the model produced by run.py (train_output/best_pose_model.pth or similar)
 on the seed-42 test split: full test set AND NaN-free subset (excluding windows
 that were zero-filled by clean_nan.py — file indices 487-499).
 NOTE: deployed to ruvultra (~/wiflow-std-bench) as a standalone single file,
 so it deliberately inlines its helpers. The reference implementations (upstream
 import shim, >1GB np.load mmap patch, key-remap loader, canonical evaluate
 loop) live in benchmarks/wiflow-std/_bench_common.py — keep copies in sync.
 """
 import json, os, random, sys
 import numpy as np
 import torch
 from torch.utils.data import DataLoader, Subset
 # csi_windows.npy is ~13 GB; mmap large arrays instead of eagerly loading
 # ~15 GB into RAM (same patch as _bench_common._np_load_mmap).
 _np_load = np.load
 def _np_load_mmap(path, *a, **kw):
    if (isinstance(path, str) and path.endswith('.npy')
            and os.path.getsize(path) > 1 << 30 and 'mmap_mode' not in kw):
        kw['mmap_mode'] = 'r'
    return _np_load(path, *a, **kw)
 np.load = _np_load_mmap
 sys.path.insert(0, os.path.expanduser('~/wiflow-std-bench/upstream'))
 from dataset import PreprocessedCSIKeypointsDataset, create_preprocessed_train_val_test_loaders
 from models.pose_model import WiFlowPoseModel
 from utils.metrics import calculate_pck, calculate_mpjpe
 def find_checkpoint():
    cands = []
    for root, _, files in os.walk(os.path.expanduser('~/wiflow-std-bench/train_output')):
        for f in files:
            if f.endswith('.pth'):
                cands.append(os.path.join(root, f))
    # also upstream/test default output dir
    for root, _, files in os.walk(os.path.expanduser('~/wiflow-std-bench/upstream')):
        for f in files:
            if f.endswith('.pth') and 'best' in f and 'cross_dataset' not in root:
                p = os.path.join(root, f)
                if os.path.getmtime(p) > os.path.getmtime(os.path.expanduser('~/wiflow-std-bench/train.log')) - 86400 * 2:
                    cands.append(p)
    cands = [c for c in cands if not c.endswith('upstream/best_pose_model.pth')]
    if not cands:
        sys.exit('no retrained checkpoint found')
    return max(cands, key=os.path.getmtime)
 def evaluate(model, loader, device):
    model.eval()
    totals = {t: 0.0 for t in (0.1, 0.2, 0.3, 0.4, 0.5)}
    total_mpe, n = 0.0, 0
    with torch.no_grad():
        for bx, by in loader:
            bx, by = bx.to(device), by.to(device)
            out = model(bx)
            bs = by.size(0)
            total_mpe += calculate_mpjpe(out, by) * bs
            pck = calculate_pck(out, by, thresholds=list(totals))
            for t in totals:
                totals[t] += pck[t] * bs
            n += bs
    return {'samples': n, 'mpjpe': total_mpe / n,
            **{f'pck@{int(t*100)}': totals[t] / n for t in totals}}
 random.seed(42); np.random.seed(42); torch.manual_seed(42)
 torch.cuda.manual_seed_all(42)
 torch.backends.cudnn.deterministic = True
 d = os.path.expanduser('~/wiflow-std-bench/preprocessed_csi_data')
 dataset = PreprocessedCSIKeypointsDataset(data_dir=d, keypoint_scale=1000.0,
                                          enable_temporal_clean=True)
 _, _, test_loader = create_preprocessed_train_val_test_loaders(
    dataset=dataset, batch_size=256, num_workers=2, random_seed=42)
 device = torch.device('cuda')
 ckpt = find_checkpoint()
 print('checkpoint:', ckpt)
 model = WiFlowPoseModel(dropout=0.5).to(device)
 state = torch.load(ckpt, map_location=device, weights_only=True)
 renames = {'att.': 'attention.', 'final_conv.': 'decoder.'}
 state = {next((new + k[len(old):] for old, new in renames.items()
               if k.startswith(old)), k): v for k, v in state.items()}
 model.load_state_dict(state, strict=True)
 results = {'checkpoint': ckpt}
 print('=== full test set ===')
 results['test_full'] = evaluate(model, test_loader, device)
 print(json.dumps(results['test_full'], indent=2))
 # NaN-free subset: exclude windows from corrupted files 487-499
 test_subset = test_loader.dataset            # Subset(dataset, test_indices)
 w2f = dataset.window_to_file
 clean_idx = [i for i in test_subset.indices if w2f[i] < 487]
 print(f'=== NaN-free test subset ({len(clean_idx)} of {len(test_subset.indices)}) ===')
 clean_loader = DataLoader(Subset(dataset, clean_idx), batch_size=256, shuffle=False)
 results['test_clean'] = evaluate(model, clean_loader, device)
 print(json.dumps(results['test_clean'], indent=2))
 out = os.path.expanduser('~/wiflow-std-bench/eval_retrained.json')
 with open(out, 'w') as f:
    json.dump(results, f, indent=2)
 print('wrote', out)
@@ -1,374 +0,0 @@
 """ADR-152 SS2.2 measurement (b): WiFlow-STD fine-tuned on our fresh ESP32 paired dataset.
 Dataset: ~/wiflow-std-bench/paired-20260610.jsonl -- 2,046 paired windows collected
 2026-06-10 22:10-22:40 (ONE subject, ONE room, ONE ESP32 node, varied poses).
 Per record: csi = flat float32 list, csi_shape, kp = 17 COCO [x, y] normalized [0,1]
 camera coords, conf (MediaPipe mean confidence, all > 0.5 in this set), ts_start/ts_end.
 Aligner: scripts/align-ground-truth.js, non-overlapping 20-frame windows (~0.42 s each).
 Dataset findings (MEASURED on this file, 2026-06-10):
  - csi_shape is HETEROGENEOUS, not uniformly [70, 20]: 1,347x [70,20], 284x [134,20],
    243x [26,20], 130x [12,20], 42x [20,20]. The ESP32 stream emits mixed frame types
    and the aligner stamps each window's subcarrier count from frame[0]
    (extractCsiMatrix: nSc = window[0].subcarriers), zero-padding/truncating the rest.
    Even native-70 windows contain ~20.4% internally zero-padded short frames
    (subcarriers 40..69 all-zero for those frames).
  - LAYOUT BUG: the aligner fills matrix[f * nSc + s] (frame-major) but declares
    shape [nSc, nFrames]. The true layout is (frame, subcarrier); we reshape
    (nFrames, nSc) and transpose. Confirmed by coherent per-frame zero-tails.
  - Handling here (primary suite, "all2046"): every frame's subcarrier axis is
    linearly resampled to 70 bins (np.interp over a normalized index domain;
    identity for native-70 frames) so the pre-registered n=2,046 and split sizes
    hold. Secondary suite ("native70") restricts to the 1,347 native [70,20]
    windows (temporal 70/15/15 of those) as a homogeneity robustness check.
 Pre-registered protocol (followed exactly):
  1. TEMPORAL split (records are time-sorted; asserted): first 70% train (1,432),
     next 15% val (307), last 15% test (307). No shuffling across time. Seed 42
     for everything else.
  2. Model: upstream WiFlow-STD trunk (WiFlowPoseModel) with a learned 1x1 Conv1d
     projection 70->540 prepended, and K=17 via the parameter-free adaptive pool
     (AdaptiveAvgPool2d((17, 1)) instead of (15, 1)) -- pretrained weights load
     for any K. CSI normalization: divide by the TRAIN-split 99th-percentile
     amplitude, clip to [0, 1] (documented in output JSON).
  3. Three runs, <=60 epochs, early-stop patience 8 on val MPJPE, batch 32,
     AdamW, fp32 (no autocast):
       (i)   pretrained-init: trunk init from upstream/test/best_pose_model.pth
             (the measurement-(a) retrained checkpoint, ~96% PCK@20 on WiFlow data;
             key remap att.->attention. / final_conv.->decoder. applied defensively
             as in eval_repro.py -- a no-op for this checkpoint, which already uses
             the new names). Discriminative lr: adapter 1e-4, trunk 1e-5.
       (ii)  scratch: same architecture, random init, all params lr 1e-4.
       (iii) frozen-trunk: pretrained trunk frozen (requires_grad=False AND held in
             .eval() so BatchNorm running stats cannot drift -- pure transfer probe);
             only the 70->540 adapter trains, lr 1e-4.
  4. Metrics on the temporal TEST split: torso-normalized PCK@10/20/30/40/50 and
     MPJPE. Upstream utils/metrics.py calculate_pck(use_torso_norm=True) hardcodes
     NECK_IDX/PELVIS_IDX = 2, 12 -- a 15-keypoint convention that is WRONG for our
     17 COCO keypoints (2 = right_eye, 12 = right_hip). We therefore reimplement the
     identical math (per-frame norm distance, clamp min 0.01, mean over all
     keypoints x frames) with torso = ||l_shoulder(5) - l_hip(11)||.
     Also reported: prediction std across test frames (constant-pose detector;
     must be > 0) and the mean-pose-predictor baseline (train-split mean pose
     evaluated on test -- the honesty bar).
 Usage (on ruvultra):
  nice -n 10 nohup ~/wiflow-std-bench/venv/bin/python train_measb.py > train_measb.log 2>&1 &
 NOTE: deployed to ruvultra as a standalone single file, so it deliberately
 inlines its helpers. The reference implementations (upstream import shim,
 np.load mmap patch, key-remap loader, canonical evaluate loop) live in
 benchmarks/wiflow-std/_bench_common.py — keep copies in sync.
 """
 import json
 import os
 import random
 import sys
 import time
 import numpy as np
 import torch
 import torch.nn as nn
 BENCH = os.path.expanduser("~/wiflow-std-bench")
 UPSTREAM = os.path.join(BENCH, "upstream")
 MEASB = os.path.join(BENCH, "measb")
 DATA = os.path.join(BENCH, "paired-20260610.jsonl")
 CHECKPOINT = os.path.join(UPSTREAM, "test", "best_pose_model.pth")
 sys.path.insert(0, UPSTREAM)
 # Upstream defect (1): models/__init__.py imports a name tcn.py does not define.
 # Register a stub package so the broken __init__ never executes (as eval_repro.py).
 import types  # noqa: E402
 _models_pkg = types.ModuleType("models")
 _models_pkg.__path__ = [os.path.join(UPSTREAM, "models")]
 sys.modules["models"] = _models_pkg
 from models.pose_model import WiFlowPoseModel  # noqa: E402
 SEED = 42
 K = 17
 N_SUBC = 70
 TRUNK_IN = 540
 BATCH = 32          # <= 64 per protocol (GPU shared with the efficiency sweep)
 MAX_EPOCHS = 60
 PATIENCE = 8
 LR_ADAPTER = 1e-4
 LR_TRUNK_FT = 1e-5  # 10x lower for the pretrained trunk vs the fresh adapter
 L_SHOULDER, L_HIP = 5, 11
 THRESHOLDS = (0.1, 0.2, 0.3, 0.4, 0.5)
 def set_seed(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
 def resample_subcarriers(frame_major, n_out=N_SUBC):
    """(nFrames, nSc) -> (nFrames, n_out) by per-frame linear interpolation.
    Identity for nSc == n_out. Normalized index domain [0, 1] on both sides.
    """
    nf, nsc = frame_major.shape
    if nsc == n_out:
        return frame_major
    xi = np.linspace(0.0, 1.0, nsc)
    xo = np.linspace(0.0, 1.0, n_out)
    return np.stack([np.interp(xo, xi, frame_major[f]) for f in range(nf)]).astype(np.float32)
 def load_dataset():
    csi, kps, confs, ts, native70 = [], [], [], [], []
    shape_counts = {}
    with open(DATA) as f:
        for line in f:
            r = json.loads(line)
            nsc, nf = r["csi_shape"]
            shape_counts[f"{nsc}x{nf}"] = shape_counts.get(f"{nsc}x{nf}", 0) + 1
            assert nf == 20, r["csi_shape"]
            # Aligner layout bug: data is frame-major despite the declared
            # [nSc, nFrames] shape -- reshape (nFrames, nSc), then resample the
            # subcarrier axis to 70 and transpose to (70 subcarriers, 20 frames).
            fm = np.asarray(r["csi"], dtype=np.float32).reshape(nf, nsc)
            csi.append(resample_subcarriers(fm).T)
            kp = np.asarray(r["kp"], dtype=np.float32)
            assert kp.shape == (K, 2), kp.shape
            kps.append(kp)
            confs.append(r["conf"])
            ts.append(r["ts_start"])
            native70.append(nsc == N_SUBC)
    assert all(ts[i] <= ts[i + 1] for i in range(len(ts) - 1)), "records not time-sorted"
    return (np.stack(csi), np.stack(kps), np.asarray(confs, dtype=np.float32),
            np.asarray(native70), shape_counts, ts[0], ts[-1])
 def temporal_split(n):
    n_train = int(round(n * 0.70))
    n_val = int(round(n * 0.15))
    return slice(0, n_train), slice(n_train, n_train + n_val), slice(n_train + n_val, n)
 class AdaptedWiFlow(nn.Module):
    """1x1 Conv1d adapter 70->540 + upstream WiFlow-STD trunk with K=17 pool head."""
    def __init__(self, k=K, dropout=0.5):
        super().__init__()
        self.adapter = nn.Conv1d(N_SUBC, TRUNK_IN, kernel_size=1)
        nn.init.kaiming_normal_(self.adapter.weight, mode="fan_out", nonlinearity="relu")
        nn.init.constant_(self.adapter.bias, 0)
        self.trunk = WiFlowPoseModel(dropout=dropout)
        # K=17 via the parameter-free adaptive pool: decoder emits [B, 2, 15, 20]
        # spatial maps; pooling H->17 instead of 15 yields [B, 17, 2] with no new
        # parameters, so the pretrained state_dict loads strict=True for any K.
        self.trunk.avg_pool = nn.AdaptiveAvgPool2d((k, 1))
    def forward(self, x):
        return self.trunk(self.adapter(x))
 def load_pretrained_trunk(trunk, path):
    state = torch.load(path, map_location="cpu", weights_only=True)
    # Defensive remap as in eval_repro.py (no-op for the retrained checkpoint).
    renames = {"att.": "attention.", "final_conv.": "decoder."}
    state = {next((new + k[len(old):] for old, new in renames.items()
                   if k.startswith(old)), k): v
             for k, v in state.items()}
    trunk.load_state_dict(state, strict=True)
 def pck_torso(pred, target, thresholds=THRESHOLDS):
    """Upstream calculate_pck math, torso = l_shoulder(5)<->l_hip(11) for 17-kp COCO."""
    norm = torch.sqrt(((target[:, L_SHOULDER] - target[:, L_HIP]) ** 2).sum(dim=1))
    norm = torch.clamp(norm, min=0.01)
    dist = torch.sqrt(((pred - target) ** 2).sum(dim=2)) / norm.unsqueeze(1)
    return {f"pck@{int(t * 100)}": (dist <= t).float().mean().item() for t in thresholds}
 def mpjpe(pred, target):
    return torch.sqrt(((pred - target) ** 2).sum(dim=2)).mean().item()
@torch.no_grad()
 def predict(model, x, batch=256):
    model.eval()
    return torch.cat([model(x[i:i + batch]) for i in range(0, len(x), batch)])
 def eval_preds(pred, target):
    out = pck_torso(pred, target)
    out["mpjpe"] = mpjpe(pred, target)
    # Constant-pose detector: std across test frames per coordinate, mean over
    # the 17x2 coordinates. 0.0 == degenerate constant predictor.
    out["pred_std"] = pred.std(dim=0).mean().item()
    return out
 def train_run(name, x_tr, y_tr, x_va, y_va, device, pretrained, freeze_trunk,
              lr_trunk):
    set_seed(SEED)
    model = AdaptedWiFlow().to(device)
    if pretrained:
        load_pretrained_trunk(model.trunk, CHECKPOINT)
    if freeze_trunk:
        for p in model.trunk.parameters():
            p.requires_grad = False
        groups = [{"params": model.adapter.parameters(), "lr": LR_ADAPTER}]
    else:
        groups = [{"params": model.adapter.parameters(), "lr": LR_ADAPTER},
                  {"params": model.trunk.parameters(), "lr": lr_trunk}]
    opt = torch.optim.AdamW(groups)
    loss_fn = nn.MSELoss()
    n = len(x_tr)
    best_val, best_state, best_epoch, bad = float("inf"), None, -1, 0
    history = []
    t0 = time.time()
    for epoch in range(MAX_EPOCHS):
        model.train()
        if freeze_trunk:
            model.trunk.eval()  # keep BatchNorm running stats fixed: pure transfer
        perm = torch.randperm(n, device=device)
        ep_loss = 0.0
        for i in range(0, n, BATCH):
            idx = perm[i:i + BATCH]
            opt.zero_grad()
            loss = loss_fn(model(x_tr[idx]), y_tr[idx])
            loss.backward()
            opt.step()
            ep_loss += loss.item() * len(idx)
        val_mpjpe = mpjpe(predict(model, x_va), y_va)
        history.append({"epoch": epoch, "train_mse": ep_loss / n, "val_mpjpe": val_mpjpe})
        marker = ""
        if val_mpjpe < best_val:
            best_val, best_epoch, bad = val_mpjpe, epoch, 0
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            marker = " *"
        else:
            bad += 1
        print(f"[{name}] epoch {epoch:02d} train_mse {ep_loss / n:.6f} "
              f"val_mpjpe {val_mpjpe:.5f}{marker}", flush=True)
        if bad >= PATIENCE:
            print(f"[{name}] early stop at epoch {epoch} (best {best_epoch})", flush=True)
            break
    model.load_state_dict(best_state)
    torch.save(best_state, os.path.join(MEASB, f"{name}_best.pth"))
    return model, {"best_epoch": best_epoch, "best_val_mpjpe": best_val,
                   "epochs_run": len(history), "wall_seconds": round(time.time() - t0, 1),
                   "history": history}
 def run_suite(tag, csi, kps, device):
    """Temporal 70/15/15 split, mean-pose baseline, three training runs."""
    n = len(csi)
    tr, va, te = temporal_split(n)
    print(f"=== suite {tag}: n={n} train={tr.stop} val={va.stop - va.start} "
          f"test={te.stop - te.start} ===", flush=True)
    # CSI normalization constant from TRAIN split only.
    train_p99 = float(np.percentile(csi[tr], 99))
    train_max = float(csi[tr].max())
    print(f"[{tag}] train p99={train_p99:.3f} max={train_max:.3f} -> /p99, clip [0,1]",
          flush=True)
    csi_n = np.clip(csi / train_p99, 0.0, 1.0).astype(np.float32)
    x = torch.from_numpy(csi_n).to(device)
    y = torch.from_numpy(kps).to(device)
    x_tr, y_tr = x[tr], y[tr]
    x_va, y_va = x[va], y[va]
    x_te, y_te = x[te], y[te]
    suite = {
        "n_windows": n,
        "split": {"n_train": int(tr.stop), "n_val": int(va.stop - va.start),
                  "n_test": int(te.stop - te.start)},
        "csi_norm": {"method": "divide by train-split p99 amplitude, clip [0,1]",
                     "train_p99": train_p99, "train_max": train_max},
        "runs": {},
    }
    # Honesty bar: mean-pose predictor fit on TRAIN, evaluated on TEST.
    mean_pose = y_tr.mean(dim=0, keepdim=True).expand(len(y_te), -1, -1)
    suite["mean_pose_baseline"] = eval_preds(mean_pose, y_te)
    suite["mean_pose_baseline"]["note"] = "train-split mean pose; pred_std 0 by construction"
    print(f"[{tag}] mean-pose baseline:", json.dumps(suite["mean_pose_baseline"]),
          flush=True)
    configs = [
        ("pretrained", dict(pretrained=True, freeze_trunk=False, lr_trunk=LR_TRUNK_FT)),
        ("scratch", dict(pretrained=False, freeze_trunk=False, lr_trunk=LR_ADAPTER)),
        ("frozen_trunk", dict(pretrained=True, freeze_trunk=True, lr_trunk=0.0)),
    ]
    for name, cfg in configs:
        print(f"=== run: {tag}/{name} {cfg} ===", flush=True)
        model, train_info = train_run(f"{tag}_{name}", x_tr, y_tr, x_va, y_va,
                                      device, **cfg)
        test_metrics = eval_preds(predict(model, x_te), y_te)
        n_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
        suite["runs"][name] = {"config": cfg, "trainable_params": n_trainable,
                               "train": {k: v for k, v in train_info.items()
                                         if k != "history"},
                               "history": train_info["history"],
                               "test": test_metrics}
        print(f"[{tag}/{name}] TEST:", json.dumps(test_metrics), flush=True)
    return suite
 def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"device {device}, torch {torch.__version__}", flush=True)
    set_seed(SEED)
    csi, kps, confs, native70, shape_counts, ts_first, ts_last = load_dataset()
    print(f"shape distribution: {shape_counts}", flush=True)
    results = {
        "protocol": {
            "dataset": DATA, "n_windows": len(csi),
            "ts_first": ts_first, "ts_last": ts_last,
            "conf_mean": float(confs.mean()), "conf_min": float(confs.min()),
            "csi_shape_distribution": shape_counts,
            "csi_layout_note": "aligner stores frame-major data under a transposed "
                               "[nSc, nFrames] shape label; corrected on load",
            "csi_resample": "per-frame linear interp of subcarrier axis to 70 bins "
                            "(identity for native-70 frames); native-70 windows still "
                            "contain ~20.4% internally zero-padded short frames",
            "split": "temporal 70/15/15 (no shuffle across time)",
            "model": "1x1 Conv1d 70->540 adapter + WiFlowPoseModel trunk, "
                     "AdaptiveAvgPool2d((17,1)) head (parameter-free K=17)",
            "checkpoint": CHECKPOINT,
            "checkpoint_note": "measurement-(a) retrained checkpoint (~96% PCK@20 on "
                               "WiFlow data); att./final_conv. remap applied "
                               "defensively (no-op, already new-style keys)",
            "optimizer": f"AdamW, adapter lr {LR_ADAPTER}, fine-tuned trunk lr "
                         f"{LR_TRUNK_FT} (10x lower), scratch all {LR_ADAPTER}",
            "batch": BATCH, "max_epochs": MAX_EPOCHS, "patience": PATIENCE,
            "precision": "fp32", "seed": SEED,
            "pck": "torso-normalized, torso = ||l_shoulder(5) - l_hip(11)||, "
                   "clamp min 0.01, mean over keypoints x frames "
                   "(upstream math; upstream 2/12 indices are a 15-kp convention)",
        },
        # Primary: all 2,046 windows (pre-registered n), subcarrier axis resampled.
        "all2046": None,
        # Secondary robustness check: the 1,347 native [70,20] windows only.
        "native70": None,
    }
    results["all2046"] = run_suite("all2046", csi, kps, device)
    results["native70"] = run_suite("native70", csi[native70], kps[native70], device)
    out = os.path.join(MEASB, "measurement_b.json")
    with open(out, "w") as f:
        json.dump(results, f, indent=2)
    print(f"wrote {out}", flush=True)
 if __name__ == "__main__":
    main()
@@ -1,33 +0,0 @@
 #!/bin/bash
 set -ex
 cd ~/wiflow-std-bench
 # 1. clone upstream at the pinned commit
 if [ ! -d upstream ]; then
  git clone https://github.com/DY2434/WiFlow-WiFi-Pose-Estimation-with-Spatio-Temporal-Decoupling upstream
 fi
 cd upstream && git checkout 06899d294a0f44709d601a53e91dbf24759daefb && cd ..
 # 2. documented deviation: fix upstream import bug (TemporalConvNet does not exist)
 sed -i 's/from .tcn import TemporalConvNet/from .tcn import TemporalBlock/; s/'"'"'TemporalConvNet'"'"'/'"'"'TemporalBlock'"'"'/' upstream/models/__init__.py
 # 3. venv: torch cu128 (RTX 5080 = sm_120 needs >=2.7; their pin 2.3.1 predates Blackwell)
 if [ ! -d venv ]; then
  python3 -m venv venv
  ./venv/bin/pip install -q --upgrade pip
  ./venv/bin/pip install -q torch --index-url https://download.pytorch.org/whl/cu128
  ./venv/bin/pip install -q numpy pandas matplotlib seaborn scikit-learn opencv-python-headless scipy tqdm psutil kagglehub
 fi
 ./venv/bin/python -c "import torch; print(torch.__version__, torch.cuda.is_available(), torch.cuda.get_device_name(0))"
 # 4. dataset via kagglehub (anonymous, public dataset)
 DS=$(./venv/bin/python -c "import kagglehub; print(kagglehub.dataset_download('kaka2434/wiflow-dataset'))")
 echo "dataset at: $DS"
 # 5. run.py hardcodes ../preprocessed_csi_data relative to upstream/
 ln -sfn "$DS/preprocessed_csi_data" ~/wiflow-std-bench/preprocessed_csi_data
 # 6. train with upstream defaults (seed 42 set inside run.py)
 ../venv/bin/python ../clean_nan.py 2>/dev/null || venv/bin/python clean_nan.py
 cd upstream
 ../venv/bin/python run.py --gpu 0 --batch_size 64 --epochs 50 --output_dir ../train_output
@@ -1,332 +0,0 @@
 """Configurable compact variants of the WiFlow-STD pose model (ADR-152 efficiency sweep).
 This is a parameterized copy of upstream models/{pose_model,tcn,convnet,attention}.py
 (DY2434/WiFlow @ 06899d29, Apache-2.0). upstream/ is NOT modified. Deviations from
 upstream, all forced by shrinking channels and documented per variant in run_sweep.py:
 1. TCN grouped-conv groups: upstream hardcodes groups=20, which does not divide
   the compact channel counts (e.g. 270, 135, 85). Rule here:
   - groups_mode='gcd20': per-conv groups = gcd(channels, 20)  (== 20 wherever
     upstream's choice is valid, incl. the 540-ch input conv; falls back to the
     largest common divisor with 20 otherwise).
   - groups_mode='depthwise': groups = channels (tiny variant only).
 2. Conv2d downsampling strides: upstream uses 4 stride-(1,2) blocks because
   240/2^4 = 15 == n_keypoints. With smaller TCN output widths that would leave
   <15 rows and AdaptiveAvgPool2d((15,1)) would duplicate rows across keypoints.
   Rule: halve the width only while the result stays >= 15 (stride-2 blocks
   first, stride-1 after). Full model: 240 -> 4 halvings = upstream exactly.
 3. input_pw_groups (tiny only): the dense 540->c pointwise + residual downsample
   in TCN block 1 cost 2*540*c params (a ~117k floor that alone exceeds the
   tiny <100k budget). tiny groups these two convs (groups=4; 4 | gcd(540, 68)).
 4. Decoder mid-channels: upstream 64->32; here c_last -> max(c_last // 2, 4).
 """
 import math
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 def tcn_groups(channels: int, mode: str) -> int:
    if mode == 'depthwise':
        return channels
    if mode == 'gcd20':
        return math.gcd(channels, 20)
    raise ValueError(mode)
 # ---------------------------------------------------------------- TCN (copy of tcn.py)
 class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super().__init__()
        self.chomp_size = chomp_size
    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()
 class CompactGroupedTemporalBlock(nn.Module):
    """Upstream InnerGroupedTemporalBlock with parameterized groups."""
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding,
                 dropout=0.2, groups_mode='gcd20', pw_groups=1):
        super().__init__()
        g_in = tcn_groups(n_inputs, groups_mode)
        g_out = tcn_groups(n_outputs, groups_mode)
        self.groups = (g_in, g_out)
        self.pw_groups = pw_groups
        self.conv1_group = nn.Conv1d(n_inputs, n_inputs, kernel_size, stride=stride,
                                     padding=padding, dilation=dilation,
                                     groups=g_in, bias=False)
        self.chomp1 = Chomp1d(padding) if padding > 0 else nn.Identity()
        self.bn1_group = nn.BatchNorm1d(n_inputs)
        self.relu1_group = nn.SiLU(inplace=True)
        self.conv1_pw = nn.Conv1d(n_inputs, n_outputs, 1, groups=pw_groups, bias=False)
        self.bn1_pw = nn.BatchNorm1d(n_outputs)
        self.relu1_pw = nn.SiLU(inplace=True)
        self.dropout1 = nn.Dropout(dropout)
        self.conv2_group = nn.Conv1d(n_outputs, n_outputs, kernel_size, stride=1,
                                     padding=padding, dilation=dilation,
                                     groups=g_out, bias=False)
        self.chomp2 = Chomp1d(padding) if padding > 0 else nn.Identity()
        self.bn2_group = nn.BatchNorm1d(n_outputs)
        self.relu2_group = nn.SiLU(inplace=True)
        self.conv2_pw = nn.Conv1d(n_outputs, n_outputs, 1, bias=False)
        self.bn2_pw = nn.BatchNorm1d(n_outputs)
        self.relu2_pw = nn.SiLU(inplace=True)
        self.dropout2 = nn.Dropout(dropout)
        self.downsample = nn.Sequential(
            nn.Conv1d(n_inputs, n_outputs, 1, groups=pw_groups, bias=False),
            nn.BatchNorm1d(n_outputs)
        ) if n_inputs != n_outputs else nn.Identity()
    def forward(self, x):
        res = self.downsample(x)
        out = self.conv1_group(x)
        out = self.chomp1(out)
        out = self.bn1_group(out)
        out = self.relu1_group(out)
        out = self.conv1_pw(out)
        out = self.bn1_pw(out)
        out = self.relu1_pw(out)
        out = self.dropout1(out)
        out = self.conv2_group(out)
        out = self.chomp2(out)
        out = self.bn2_group(out)
        out = self.relu2_group(out)
        out = self.conv2_pw(out)
        out = self.bn2_pw(out)
        out = self.relu2_pw(out)
        out = self.dropout2(out)
        return F.silu(out + res)
 class CompactTemporalBlock(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=3, dropout=0.2,
                 groups_mode='gcd20', input_pw_groups=1):
        super().__init__()
        layers = []
        for i, out_channels in enumerate(num_channels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i - 1]
            layers.append(CompactGroupedTemporalBlock(
                in_channels, out_channels, kernel_size, stride=1,
                dilation=dilation_size, padding=(kernel_size - 1) * dilation_size,
                dropout=dropout, groups_mode=groups_mode,
                pw_groups=input_pw_groups if i == 0 else 1))
        self.network = nn.Sequential(*layers)
    def forward(self, x):
        return self.network(x)
 # ------------------------------------------------------- Conv2d path (copy of convnet.py)
 class AsymmetricConvBlock(nn.Module):
    """Upstream block with parameterized width stride (upstream: always (1,2))."""
    def __init__(self, in_channels, out_channels, dropout=0.3, stride_w=2):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=(1, 3),
                      stride=(1, stride_w), padding=(0, 1)),
            nn.BatchNorm2d(out_channels),
            nn.SiLU(inplace=True),
            nn.Dropout2d(dropout),
            nn.Conv2d(out_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(out_channels),
            nn.SiLU(inplace=True),
            nn.Dropout2d(dropout),
            nn.Conv2d(out_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(out_channels)
        )
        self.downsample = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1,
                      stride=(1, stride_w), bias=False),
            nn.BatchNorm2d(out_channels)
        )
        self.activation = nn.SiLU(inplace=True)
    def forward(self, x):
        return self.activation(self.block(x) + self.downsample(x))
 class ConvBlock1(nn.Module):
    def __init__(self, in_channels, out_channels, dropout=0.3):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(out_channels),
            nn.SiLU(inplace=True),
            nn.Dropout2d(dropout),
            nn.Conv2d(out_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(out_channels),
            nn.SiLU(inplace=True),
            nn.Dropout2d(dropout),
            nn.Conv2d(out_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(out_channels)
        )
        self.downsample = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels)
        )
        self.activation = nn.SiLU(inplace=True)
    def forward(self, x):
        return self.activation(self.block(x) + self.downsample(x))
 # ----------------------------------------------------- attention (verbatim attention.py)
 class AxialAttention(nn.Module):
    def __init__(self, in_planes, out_planes, groups=8, stride=1, bias=False, width=False):
        assert (in_planes % groups == 0) and (out_planes % groups == 0)
        super().__init__()
        self.in_planes = in_planes
        self.out_planes = out_planes
        self.groups = groups
        self.group_planes = out_planes // groups
        self.stride = stride
        self.bias = bias
        self.width = width
        self.qkv_transform = nn.Conv1d(in_planes, out_planes * 3, kernel_size=1,
                                       stride=1, padding=0, bias=False)
        self.bn_qkv = nn.BatchNorm1d(out_planes * 3)
        self.bn_similarity = nn.BatchNorm2d(groups)
        self.bn_output = nn.BatchNorm1d(out_planes)
        if stride > 1:
            self.pooling = nn.AvgPool2d(stride, stride=stride)
        nn.init.normal_(self.qkv_transform.weight.data, 0, math.sqrt(1. / self.in_planes))
    def forward(self, x):
        if self.width:
            x = x.permute(0, 2, 1, 3)
        else:
            x = x.permute(0, 3, 1, 2)
        N, W, C, H = x.shape
        x = x.contiguous().view(N * W, C, H)
        qkv = self.bn_qkv(self.qkv_transform(x))
        qkv = qkv.reshape(N * W, 3, self.out_planes, H).permute(1, 0, 2, 3)
        q, k, v = qkv[0], qkv[1], qkv[2]
        q = q.reshape(N * W, self.groups, self.group_planes, H)
        k = k.reshape(N * W, self.groups, self.group_planes, H)
        v = v.reshape(N * W, self.groups, self.group_planes, H)
        qk = torch.einsum('bgci, bgcj->bgij', q, k)
        qk = self.bn_similarity(qk)
        similarity = F.softmax(qk, dim=-1)
        sv = torch.einsum('bgij,bgcj->bgci', similarity, v)
        sv = sv.reshape(N * W, self.out_planes, H)
        out = self.bn_output(sv)
        out = out.view(N, W, self.out_planes, H)
        if self.width:
            out = out.permute(0, 2, 1, 3)
        else:
            out = out.permute(0, 2, 3, 1)
        if self.stride > 1:
            out = self.pooling(out)
        return out
 class DualAxialAttention(nn.Module):
    def __init__(self, in_planes, out_planes, groups=8, stride=1, bias=False):
        super().__init__()
        self.width_axis = AxialAttention(in_planes, out_planes, groups, stride, bias, width=True)
        self.height_axis = AxialAttention(out_planes, out_planes, groups, stride, bias, width=False)
    def forward(self, x):
        return self.height_axis(self.width_axis(x))
 # --------------------------------------------------------------- full model
 def compute_strides(width: int, n_blocks: int, target: int = 15):
    """Halve width while result stays >= target (upstream: 240 -> 4 halvings -> 15)."""
    strides = []
    for _ in range(n_blocks):
        nxt = (width + 1) // 2  # conv k=3 s=2 p=1: out = ceil(in/2)
        if nxt >= target:
            strides.append(2)
            width = nxt
        else:
            strides.append(1)
    return strides, width
 class CompactWiFlowPoseModel(nn.Module):
    """Parameterized upstream WiFlowPoseModel.
    Upstream config == tcn_channels=[540,440,340,240], conv_channels=[8,16,32,64],
    attn_groups=8, groups_mode='gcd20' (gcd(c,20)==20 for all upstream channels),
    input_pw_groups=1 -> identical architecture, 2,225,042 params.
    """
    def __init__(self, tcn_channels, conv_channels, attn_groups,
                 groups_mode='gcd20', input_pw_groups=1, dropout=0.3,
                 num_subcarriers=540, num_keypoints=15):
        super().__init__()
        self.tcn = CompactTemporalBlock(
            num_inputs=num_subcarriers, num_channels=tcn_channels, kernel_size=3,
            dropout=dropout, groups_mode=groups_mode, input_pw_groups=input_pw_groups)
        self.up = ConvBlock1(1, conv_channels[0])
        strides, self.final_width = compute_strides(
            tcn_channels[-1], len(conv_channels), target=num_keypoints)
        self.conv_strides = strides
        self.residual_blocks = nn.ModuleList()
        in_channels = conv_channels[0]
        for out_channels, s in zip(conv_channels, strides):
            self.residual_blocks.append(
                AsymmetricConvBlock(in_channels, out_channels, stride_w=s))
            in_channels = out_channels
        c_last = conv_channels[-1]
        self.attention = DualAxialAttention(c_last, c_last, groups=attn_groups)
        c_mid = max(c_last // 2, 4)
        self.decoder = nn.Sequential(
            nn.Conv2d(c_last, c_mid, kernel_size=3, padding=1),
            nn.BatchNorm2d(c_mid),
            nn.SiLU(inplace=True),
            nn.Conv2d(c_mid, 2, kernel_size=1),
            nn.BatchNorm2d(2),
            nn.SiLU(inplace=True)
        )
        self.avg_pool = nn.AdaptiveAvgPool2d((num_keypoints, 1))
        self._initialize_weights()
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, (nn.BatchNorm1d, nn.LayerNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
    def forward(self, x):
        # [B, 540, 20]
        x = self.tcn(x)                          # [B, C_tcn, 20]
        x = x.transpose(1, 2).unsqueeze(1)       # [B, 1, 20, C_tcn]
        x = self.up(x)
        for block in self.residual_blocks:
            x = block(x)                         # [B, C_conv, 20, W']
        x = x.permute(0, 1, 3, 2)                # [B, C_conv, W', 20]
        x = self.attention(x)
        x = self.decoder(x)                      # [B, 2, W', 20]
        x = self.avg_pool(x).squeeze(-1)         # [B, 2, 15]
        return x.transpose(1, 2)                 # [B, 15, 2]
 def describe(model: 'CompactWiFlowPoseModel'):
    params = sum(p.numel() for p in model.parameters())
    tcn_g = [blk.groups for blk in model.tcn.network]
    return {'params': params, 'tcn_groups_per_block': tcn_g,
            'conv_strides': model.conv_strides, 'final_width': model.final_width}
@@ -1,278 +0,0 @@
 """WiFlow-STD compact-variant efficiency sweep (ADR-152) — sequential overnight runner.
 Trains compact variants of the upstream WiFlow-STD architecture on the same
 data/split as the full-size reference retraining (seed 42, file-level 70/15/15,
 upstream dataset.py) and evaluates PCK@10..50 + MPJPE on the full test split and
 the corruption-free test subset (file indices < 487).
 Training mirrors upstream run.py/train.py defaults except:
 - fp32 only (no fp16 autocast / GradScaler — avoids the BN-poisoning trap
  documented in RESULTS.md defect 5; data on disk is already cleaned).
 - batch 64 (kept modest: another GPU job may share the 16 GB card tonight).
 - scheduler + early stopping keyed on val MPJPE (upstream early-stops on val MPE
  with patience 5; same here).
 Usage:
  venv/bin/python sweep/run_sweep.py --dry-run    # param counts only
  nohup venv/bin/python sweep/run_sweep.py > sweep/sweep.log 2>&1 &
 Idempotent: variants already present in sweep/results.jsonl are skipped.
 NOTE: deployed to ruvultra (~/wiflow-std-bench/sweep) as a standalone file, so
 it deliberately inlines its helpers. The reference implementations (upstream
 import shim, >1GB np.load mmap patch, key-remap loader, canonical evaluate
 loop) live in benchmarks/wiflow-std/_bench_common.py — keep copies in sync.
 """
 import argparse
 import copy
 import json
 import os
 import random
 import sys
 import time
 import numpy as np
 import torch
 from torch.utils.data import DataLoader, Subset
 # csi_windows.npy is ~13 GB; mmap large arrays instead of eagerly loading
 # ~15 GB into RAM (same patch as _bench_common._np_load_mmap).
 _np_load = np.load
 def _np_load_mmap(path, *a, **kw):
    if (isinstance(path, str) and path.endswith('.npy')
            and os.path.getsize(path) > 1 << 30 and 'mmap_mode' not in kw):
        kw['mmap_mode'] = 'r'
    return _np_load(path, *a, **kw)
 np.load = _np_load_mmap
 BENCH = os.path.expanduser('~/wiflow-std-bench')
 SWEEP = os.path.join(BENCH, 'sweep')
 sys.path.insert(0, os.path.join(BENCH, 'upstream'))
 sys.path.insert(0, SWEEP)
 from dataset import PreprocessedCSIKeypointsDataset, create_preprocessed_train_val_test_loaders  # noqa: E402
 from losses.pose_loss import PoseLoss          # noqa: E402
 from utils.metrics import calculate_pck, calculate_mpjpe  # noqa: E402
 from model_compact import CompactWiFlowPoseModel, describe  # noqa: E402
 VARIANTS = [
    # name, tcn_channels, conv_channels, attn_groups, groups_mode, input_pw_groups
    dict(name='half',    tcn=[270, 220, 170, 120], conv=[4, 8, 16, 32], attn_groups=4,
         groups_mode='gcd20', input_pw_groups=1),
    dict(name='quarter', tcn=[135, 110, 85, 60],   conv=[2, 4, 8, 16],  attn_groups=2,
         groups_mode='gcd20', input_pw_groups=1),
    dict(name='tiny',    tcn=[68, 56, 44, 32],     conv=[2, 4, 8, 16],  attn_groups=2,
         groups_mode='depthwise', input_pw_groups=4),
 ]
 BATCH = 64
 EPOCHS = 50
 PATIENCE = 5
 LR = 1e-4
 WEIGHT_DECAY = 5e-5
 SEED = 42
 CORRUPT_FILE_START = 487  # files 487-499 were zero-filled by clean_nan.py
 def set_seed(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
 def build_model(v, dropout=0.5):
    return CompactWiFlowPoseModel(
        tcn_channels=v['tcn'], conv_channels=v['conv'], attn_groups=v['attn_groups'],
        groups_mode=v['groups_mode'], input_pw_groups=v['input_pw_groups'],
        dropout=dropout)
@torch.no_grad()
 def evaluate(model, loader, device):
    model.eval()
    totals = {t: 0.0 for t in (0.1, 0.2, 0.3, 0.4, 0.5)}
    total_mpe, n = 0.0, 0
    for bx, by in loader:
        bx, by = bx.to(device), by.to(device)
        out = model(bx)
        bs = by.size(0)
        total_mpe += calculate_mpjpe(out, by) * bs
        pck = calculate_pck(out, by, thresholds=list(totals))
        for t in totals:
            totals[t] += pck[t] * bs
        n += bs
    return {'samples': n, 'mpjpe': total_mpe / n,
            **{f'pck@{int(t * 100)}': totals[t] / n for t in totals}}
 def train_variant(v, dataset, device):
    set_seed(SEED)
    train_loader, val_loader, test_loader = create_preprocessed_train_val_test_loaders(
        dataset=dataset, batch_size=BATCH, num_workers=2, random_seed=SEED)
    set_seed(SEED)  # re-seed after split so init is split-independent
    model = build_model(v).to(device)
    info = describe(model)
    print(f"[{v['name']}] params={info['params']:,} tcn_groups={info['tcn_groups_per_block']} "
          f"conv_strides={info['conv_strides']} final_width={info['final_width']}", flush=True)
    criterion = PoseLoss(position_weight=1.0, bone_weight=0.2, loss_type='smooth_l1')
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY,
                                  betas=(0.9, 0.999))
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=3, min_lr=LR / 1000,
        cooldown=1, threshold=1e-4)
    best_val_mpe = float('inf')
    best_val_pck20 = 0.0
    best_epoch = 0
    best_state = None
    patience_counter = 0
    t0 = time.time()
    error = None
    epochs_run = 0
    for epoch in range(1, EPOCHS + 1):
        model.train()
        ep_loss, nb = 0.0, 0
        te = time.time()
        for i, (bx, by) in enumerate(train_loader):
            bx = bx.to(device, non_blocking=True)
            by = by.to(device, non_blocking=True)
            optimizer.zero_grad(set_to_none=True)
            out = model(bx)
            loss, _parts = criterion(out, by)
            if not torch.isfinite(loss):
                error = f'non-finite loss at epoch {epoch} step {i}'
                break
            loss.backward()
            optimizer.step()
            ep_loss += loss.item()
            nb += 1
            if epoch == 1 and i % 500 == 0:
                print(f"[{v['name']}] e1 step {i}/{len(train_loader)} loss={loss.item():.5f}",
                      flush=True)
        if error:
            break
        epochs_run = epoch
        val = evaluate(model, val_loader, device)
        scheduler.step(val['mpjpe'])
        lr_now = optimizer.param_groups[0]['lr']
        print(f"[{v['name']}] epoch {epoch}/{EPOCHS} train_loss={ep_loss / max(nb, 1):.5f} "
              f"val_mpjpe={val['mpjpe']:.5f} val_pck20={val['pck@20'] * 100:.2f}% "
              f"lr={lr_now:.2e} ({time.time() - te:.0f}s)", flush=True)
        if val['mpjpe'] < best_val_mpe:
            best_val_mpe = val['mpjpe']
            best_val_pck20 = val['pck@20']
            best_epoch = epoch
            best_state = copy.deepcopy(model.state_dict())
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print(f"[{v['name']}] early stop at epoch {epoch} (best {best_epoch})", flush=True)
                break
    train_seconds = time.time() - t0
    result = {
        'variant': v['name'], 'params': info['params'],
        'tcn_channels': v['tcn'], 'conv_channels': v['conv'],
        'attn_groups': v['attn_groups'], 'groups_mode': v['groups_mode'],
        'input_pw_groups': v['input_pw_groups'],
        'tcn_groups_per_block': info['tcn_groups_per_block'],
        'conv_strides': info['conv_strides'], 'final_width': info['final_width'],
        'batch_size': BATCH, 'max_epochs': EPOCHS, 'patience': PATIENCE,
        'lr': LR, 'weight_decay': WEIGHT_DECAY, 'seed': SEED, 'precision': 'fp32',
        'epochs_run': epochs_run, 'best_epoch': best_epoch,
        'best_val_mpjpe': best_val_mpe if best_state else None,
        'best_val_pck20': best_val_pck20 if best_state else None,
        'train_seconds': round(train_seconds, 1),
        'torch': torch.__version__, 'error': error,
        'finished_utc': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()),
    }
    if best_state is not None:
        ckpt = os.path.join(SWEEP, f"{v['name']}_best.pth")
        torch.save(best_state, ckpt)
        result['checkpoint'] = ckpt
        model.load_state_dict(best_state)
        eval_loader = DataLoader(test_loader.dataset, batch_size=256, shuffle=False,
                                 num_workers=2)
        result['test_full'] = evaluate(model, eval_loader, device)
        w2f = dataset.window_to_file
        clean_idx = [i for i in test_loader.dataset.indices if w2f[i] < CORRUPT_FILE_START]
        clean_loader = DataLoader(Subset(dataset, clean_idx), batch_size=256,
                                  shuffle=False, num_workers=2)
        result['test_clean'] = evaluate(model, clean_loader, device)
        print(f"[{v['name']}] TEST clean: pck20={result['test_clean']['pck@20'] * 100:.2f}% "
              f"mpjpe={result['test_clean']['mpjpe']:.5f} | full: "
              f"pck20={result['test_full']['pck@20'] * 100:.2f}%", flush=True)
    return result
 def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('--dry-run', action='store_true', help='print param counts and exit')
    args = ap.parse_args()
    if args.dry_run:
        for v in VARIANTS:
            m = build_model(v)
            info = describe(m)
            x = torch.randn(2, 540, 20)
            m.eval()
            y = m(x)
            print(f"{v['name']:8s} params={info['params']:>9,} "
                  f"tcn={v['tcn']} conv={v['conv']} attn_g={v['attn_groups']} "
                  f"mode={v['groups_mode']} pw_g={v['input_pw_groups']} "
                  f"tcn_groups={info['tcn_groups_per_block']} strides={info['conv_strides']} "
                  f"W'={info['final_width']} out={tuple(y.shape)}")
        return
    results_path = os.path.join(SWEEP, 'results.jsonl')
    done = set()
    if os.path.exists(results_path):
        with open(results_path) as f:
            for line in f:
                try:
                    done.add(json.loads(line)['variant'])
                except Exception:
                    pass
    device = torch.device('cuda')
    print(f"torch {torch.__version__} on {torch.cuda.get_device_name(0)}", flush=True)
    data_dir = os.path.join(BENCH, 'preprocessed_csi_data')
    dataset = PreprocessedCSIKeypointsDataset(data_dir=data_dir, keypoint_scale=1000.0,
                                              enable_temporal_clean=True)
    for v in VARIANTS:
        if v['name'] in done:
            print(f"[{v['name']}] already in results.jsonl — skipping", flush=True)
            continue
        print(f"\n===== variant: {v['name']} =====", flush=True)
        try:
            result = train_variant(v, dataset, device)
        except Exception as e:  # record and move on to next variant
            import traceback
            traceback.print_exc()
            result = {'variant': v['name'], 'error': repr(e),
                      'finished_utc': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}
        with open(results_path, 'a') as f:
            f.write(json.dumps(result) + '\n')
            f.flush()
    print('\nSWEEP COMPLETE', flush=True)
 if __name__ == '__main__':
    main()
@@ -1,772 +0,0 @@
 {
  "torch": {
    "env": {
      "torch": "2.12.0+cpu",
      "platform": "Windows-11-10.0.26200-SP0",
      "processor": "Intel64 Family 6 Model 197 Stepping 2, GenuineIntel",
      "num_threads": 16,
      "checkpoint": "results\\retrained_best_pose_model.pth",
      "params": 2225042
    },
    "variants": {
      "fp32": {
        "file": "retrained_fp32_resaved.pth",
        "size_bytes": 9068948,
        "size_mb": 9.068948,
        "latency_batch1": {
          "batch_size": 1,
          "runs": 100,
          "median_ms_per_batch": 24.903650000851485,
          "median_ms_per_window": 24.903650000851485,
          "windows_per_second": 40.15475642991324
        },
        "latency_batch64": {
          "batch_size": 64,
          "runs": 30,
          "median_ms_per_batch": 184.02919999789447,
          "median_ms_per_window": 2.875456249967101,
          "windows_per_second": 347.77089723115813
        },
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9668200004577636,
          "pck@50": 0.9915333324432373,
          "mpjpe": 0.00936222033649683,
          "wall_seconds": 37.85407733917236
        }
      },
      "fp16": {
        "file": "retrained_fp16.pth",
        "size_bytes": 4580332,
        "size_mb": 4.580332,
        "latency_batch1": {
          "batch_size": 1,
          "runs": 100,
          "median_ms_per_batch": 23.936699999467237,
          "median_ms_per_window": 23.936699999467237,
          "windows_per_second": 41.776853117691964
        },
        "latency_batch64": {
          "batch_size": 64,
          "runs": 30,
          "median_ms_per_batch": 102.32584999903338,
          "median_ms_per_window": 1.5988414062348966,
          "windows_per_second": 625.4529036465817
        },
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.966773332977295,
          "pck@50": 0.9915066654205322,
          "mpjpe": 0.009460017587244511,
          "wall_seconds": 21.632277250289917
        }
      },
      "int8_dynamic": {
        "file": "retrained_int8_dynamic.pth",
        "size_bytes": 9068948,
        "size_mb": 9.068948,
        "latency_batch1": {
          "batch_size": 1,
          "runs": 100,
          "median_ms_per_batch": 18.105350000041653,
          "median_ms_per_window": 18.105350000041653,
          "windows_per_second": 55.23229321707117
        },
        "latency_batch64": {
          "batch_size": 64,
          "runs": 30,
          "median_ms_per_batch": 168.77549999844632,
          "median_ms_per_window": 2.6371171874757238,
          "windows_per_second": 379.20195763359703
        },
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9668200004577636,
          "pck@50": 0.9915333324432373,
          "mpjpe": 0.00936222033649683,
          "wall_seconds": 45.35376596450806
        }
      }
    },
    "int8_dynamic_quant_report": {
      "eligible_module_counts": {
        "nn.Linear": 0,
        "nn.Conv1d": 21,
        "nn.Conv2d": 22
      },
      "modules_actually_quantized": [],
      "n_modules_quantized": 0,
      "params_total": 2225042,
      "params_quantized": 0,
      "params_quantized_fraction": 0.0
    },
    "accuracy_subset": {
      "description": "seed-42 file-level 70/15/15 test split, corrupted windows (files 487-499) excluded, seed-42 random subset",
      "subset_size": 10000,
      "clean_test_total": 10000
    }
  },
  "onnx": {
    "env": {
      "torch": "2.12.0+cpu",
      "onnxruntime": "1.26.0",
      "platform": "Windows-11-10.0.26200-SP0"
    },
    "export": {
      "mode": "dynamic-batch",
      "exporter": "torchscript",
      "file": "retrained_fp32_dynamic.onnx",
      "size_mb": 8.971781
    },
    "parity": {
      "fixture": "results/parity_fixture.npz (batch 2, seed 42)",
      "max_abs_diff_vs_stored_fixture": 2.384185791015625e-07,
      "max_abs_diff_vs_torch_now": 2.384185791015625e-07,
      "pass_lt_1e-4": true
    },
    "latency": {
      "batch1": {
        "batch_size": 1,
        "runs": 100,
        "median_ms_per_batch": 2.5410999987798277,
        "median_ms_per_window": 2.5410999987798277,
        "windows_per_second": 393.5303610563043
      },
      "batch64": {
        "batch_size": 64,
        "runs": 30,
        "median_ms_per_batch": 181.95204999938142,
        "median_ms_per_window": 2.8430007812403346,
        "windows_per_second": 351.7410218803118
      }
    },
    "ort_int8_dynamic_supplementary": {
      "file": "retrained_int8_ort_dynamic.onnx",
      "size_mb": 2.438794,
      "runs": true,
      "max_abs_diff_vs_fp32_fixture": 0.00827130675315857
    }
  },
  "onnx_accuracy": {
    "onnx_fp32": {
      "samples": 10000,
      "pck@20": 0.9668200004577636,
      "pck@50": 0.9915333324432373,
      "mpjpe": 0.00936222568154335,
      "wall_seconds": 22.34790802001953
    },
    "onnx_int8_ort_dynamic": {
      "samples": 10000,
      "pck@20": 0.965240001964569,
      "pck@50": 0.9915466655731201,
      "mpjpe": 0.01108054072111845,
      "wall_seconds": 55.742953062057495
    }
  },
  "latency_controlled_rerun": {
    "note": "3 interleaved repetitions per variant, median ms/window; quiet box",
    "fp32": {
      "batch1_ms_per_window_median": 10.969150001983508,
      "batch1_reps": [
        10.969150001983508,
        12.646450000829645,
        10.49820000116597
      ],
      "batch64_ms_per_window_median": 2.2734187500077496,
      "batch64_reps": [
        2.377234374989712,
        2.124126562478068,
        2.2734187500077496
      ]
    },
    "fp16": {
      "batch1_ms_per_window_median": 24.313550000442774,
      "batch1_reps": [
        25.1078499986761,
        21.856999999727122,
        24.313550000442774
      ],
      "batch64_ms_per_window_median": 2.414695312495496,
      "batch64_reps": [
        2.5705156249955508,
        1.7137437499741281,
        2.414695312495496
      ]
    },
    "int8_dynamic": {
      "batch1_ms_per_window_median": 15.627150000000256,
      "batch1_reps": [
        17.67525000104797,
        14.627999998992891,
        15.627150000000256
      ],
      "batch64_ms_per_window_median": 2.0546906250160646,
      "batch64_reps": [
        2.0546906250160646,
        2.03407343752815,
        2.9325796875241394
      ]
    },
    "onnx_fp32": {
      "batch1_ms_per_window_median": 3.186650001225644,
      "batch1_reps": [
        2.7332500012562377,
        3.1995500012271805,
        3.186650001225644
      ],
      "batch64_ms_per_window_median": 1.9893374999924163,
      "batch64_reps": [
        1.5590843750032946,
        1.9893374999924163,
        2.2144343749914697
      ]
    },
    "onnx_int8_ort_dynamic": {
      "batch1_ms_per_window_median": 6.50984999811044,
      "batch1_reps": [
        6.50984999811044,
        6.455249998907675,
        6.789299999581999
      ],
      "batch64_ms_per_window_median": 5.770093750015803,
      "batch64_reps": [
        5.770093750015803,
        3.912374999970325,
        7.8067296875019565
      ]
    }
  },
  "onnx_static_ptq": {
    "env": {
      "onnxruntime": "1.26.0",
      "torch": "2.12.0+cpu",
      "platform": "Windows-11-10.0.26200-SP0",
      "source_model": "retrained_fp32_dynamic.onnx",
      "preprocessed_model": {
        "file": "retrained_fp32_preproc.onnx",
        "size_mb": 8.981529
      }
    },
    "variants": {
      "minmax_all": {
        "file": "retrained_int8_static_minmax_all.onnx",
        "size_bytes": 2604286,
        "size_mb": 2.604286,
        "calibration": {
          "method": "minmax",
          "windows": 1000,
          "percentile": null,
          "seconds": 5.052440166473389
        },
        "scope": "all",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 283,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 181,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.015945255756378174,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9545266661643982,
          "pck@50": 0.9913666645050049,
          "mpjpe": 0.014860070134699345,
          "wall_seconds": 43.455235958099365
        }
      },
      "minmax_conv": {
        "file": "retrained_int8_static_minmax_conv.onnx",
        "size_bytes": 2527421,
        "size_mb": 2.527421,
        "calibration": {
          "method": "minmax",
          "windows": 1000,
          "percentile": null,
          "seconds": 4.380746126174927
        },
        "scope": "conv",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 156,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 78,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.010693132877349854,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9663399996757507,
          "pck@50": 0.9918666641235352,
          "mpjpe": 0.01084446222037077,
          "wall_seconds": 35.937947034835815
        }
      },
      "entropy_all": {
        "file": "retrained_int8_static_entropy_all.onnx",
        "size_bytes": 2604268,
        "size_mb": 2.604268,
        "calibration": {
          "method": "entropy",
          "windows": 512,
          "percentile": null,
          "seconds": 23.835066318511963
        },
        "scope": "all",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 283,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 181,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.015280365943908691,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9530466662406921,
          "pck@50": 0.9912600006103516,
          "mpjpe": 0.015098519864678382,
          "wall_seconds": 51.514281034469604
        }
      },
      "entropy_conv": {
        "file": "retrained_int8_static_entropy_conv.onnx",
        "size_bytes": 2527403,
        "size_mb": 2.527403,
        "calibration": {
          "method": "entropy",
          "windows": 512,
          "percentile": null,
          "seconds": 9.634419918060303
        },
        "scope": "conv",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 156,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 78,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.012535125017166138,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9659599989891052,
          "pck@50": 0.9918666648864746,
          "mpjpe": 0.010778637571632861,
          "wall_seconds": 41.01180171966553
        }
      },
      "percentile_all": {
        "file": "retrained_int8_static_percentile_all.onnx",
        "size_bytes": 2604052,
        "size_mb": 2.604052,
        "calibration": {
          "method": "percentile",
          "windows": 512,
          "percentile": 99.99,
          "seconds": 20.221954584121704
        },
        "scope": "all",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 283,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 181,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.017689883708953857,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9639333323478698,
          "pck@50": 0.9916799991607667,
          "mpjpe": 0.012176512064039708,
          "wall_seconds": 49.365190744400024
        }
      },
      "percentile_conv": {
        "file": "retrained_int8_static_percentile_conv.onnx",
        "size_bytes": 2527241,
        "size_mb": 2.527241,
        "calibration": {
          "method": "percentile",
          "windows": 512,
          "percentile": 99.99,
          "seconds": 8.223475694656372
        },
        "scope": "conv",
        "per_channel": true,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {
          "Add": 9,
          "AveragePool": 1,
          "BatchNormalization": 12,
          "Concat": 10,
          "Conv": 43,
          "DequantizeLinear": 156,
          "Einsum": 4,
          "Gather": 16,
          "Mul": 39,
          "QuantizeLinear": 78,
          "Reshape": 14,
          "Shape": 2,
          "Sigmoid": 37,
          "Slice": 8,
          "Softmax": 2,
          "Squeeze": 1,
          "Transpose": 7,
          "Unsqueeze": 11
        },
        "max_abs_diff_vs_fp32_fixture": 0.014725983142852783,
        "accuracy": {
          "samples": 10000,
          "pck@20": 0.9660599988937378,
          "pck@50": 0.9916066654205322,
          "mpjpe": 0.010310938355326652,
          "wall_seconds": 36.89548587799072
        }
      }
    },
    "latency": {
      "note": "3 interleaved repetitions per variant, median ms/window; onnx_fp32 / onnx_int8_ort_dynamic are same-session references",
      "onnx_fp32": {
        "batch1_reps": [
          4.5327999996516155,
          2.535649999117595,
          2.167549997466267
        ],
        "batch64_reps": [
          1.9354515624740998,
          2.4948054687854437,
          1.9334703125082342
        ],
        "batch1_ms_per_window_median": 2.535649999117595,
        "batch64_ms_per_window_median": 1.9354515624740998
      },
      "onnx_int8_ort_dynamic": {
        "batch1_reps": [
          5.698599999959697,
          5.721350000385428,
          4.805099997611251
        ],
        "batch64_reps": [
          4.096601562508795,
          4.857628124995017,
          4.583800000006022
        ],
        "batch1_ms_per_window_median": 5.698599999959697,
        "batch64_ms_per_window_median": 4.583800000006022
      },
      "entropy_all": {
        "batch1_reps": [
          6.444149999879301,
          5.038299999796436,
          5.713200000172947
        ],
        "batch64_reps": [
          4.149468750028973,
          3.437125000004926,
          4.410960937491382
        ],
        "batch1_ms_per_window_median": 5.713200000172947,
        "batch64_ms_per_window_median": 4.149468750028973
      },
      "entropy_conv": {
        "batch1_reps": [
          4.874750000453787,
          5.169099998965976,
          5.236699998931726
        ],
        "batch64_reps": [
          3.010160156236452,
          3.1175546875203963,
          3.516850781238645
        ],
        "batch1_ms_per_window_median": 5.169099998965976,
        "batch64_ms_per_window_median": 3.1175546875203963
      },
      "percentile_all": {
        "batch1_reps": [
          5.184749999898486,
          5.2898499998264015,
          5.916899999647285
        ],
        "batch64_reps": [
          4.305105468745296,
          4.460741406262514,
          4.184502343747454
        ],
        "batch1_ms_per_window_median": 5.2898499998264015,
        "batch64_ms_per_window_median": 4.305105468745296
      },
      "percentile_conv": {
        "batch1_reps": [
          4.916449999655015,
          7.150899999032845,
          5.284949998895172
        ],
        "batch64_reps": [
          3.855813281262499,
          4.688969531230214,
          5.220103124997877
        ],
        "batch1_ms_per_window_median": 5.284949998895172,
        "batch64_ms_per_window_median": 4.688969531230214
      },
      "minmax_all": {
        "batch1_reps": [
          6.463300000177696,
          7.149449998905766,
          5.3209000016067876
        ],
        "batch64_reps": [
          3.9251343750095202,
          4.033442187505898,
          3.428199218745931
        ],
        "batch1_ms_per_window_median": 6.463300000177696,
        "batch64_ms_per_window_median": 3.9251343750095202
      },
      "minmax_conv": {
        "batch1_reps": [
          5.9961499991914025,
          5.236549999608542,
          4.854399998293957
        ],
        "batch64_reps": [
          4.368359375007458,
          3.249617187492504,
          3.0238906249735464
        ],
        "batch1_ms_per_window_median": 5.236549999608542,
        "batch64_ms_per_window_median": 3.249617187492504
      }
    },
    "accuracy_subset": {
      "description": "seed-42 file-level 70/15/15 test split, corrupted windows excluded, seed-42 random subset (same as quantize_bench/eval_ort_accuracy)",
      "subset_size": 10000
    }
  },
  "tiny_variant": {
    "env": {
      "torch": "2.12.0+cpu",
      "onnxruntime": "1.26.0",
      "platform": "Windows-11-10.0.26200-SP0",
      "num_threads": 16,
      "checkpoint": "results\\tiny_best.pth",
      "checkpoint_size_bytes": 340555,
      "params": 56290,
      "variant_config": {
        "tcn": [
          68,
          56,
          44,
          32
        ],
        "conv": [
          2,
          4,
          8,
          16
        ],
        "attn_groups": 2,
        "groups_mode": "depthwise",
        "input_pw_groups": 4
      }
    },
    "export": {
      "mode": "dynamic-batch",
      "exporter": "torchscript",
      "opset": 17,
      "file": "tiny_fp32_dynamic.onnx",
      "size_bytes": 295279,
      "size_mb": 0.295279,
      "verified_batches": [
        1,
        2,
        64
      ],
      "note": "AdaptiveAvgPool2d((15,1)) replaced at export by an exact mean(-1) + constant averaging matmul (final_width 16 is not a multiple of 15, which the TorchScript exporter rejects); exactness proven by the parity check vs the original torch model"
    },
    "parity": {
      "fixture": "results/parity_fixture.npz input (batch 2, seed 42); reference output recomputed with the tiny torch model",
      "max_abs_diff_vs_torch": 1.4901161193847656e-07,
      "pass_lt_1e-4": true
    },
    "int8_static_percentile_conv": {
      "file": "tiny_int8_static_percentile_conv.onnx",
      "size_bytes": 248278,
      "size_mb": 0.248278,
      "calibration": {
        "method": "percentile",
        "percentile": 99.99,
        "windows": 512,
        "scope": "conv-only TRAIN-split corruption-free",
        "seconds": 1.5347836017608643
      },
      "per_channel": true,
      "activation_type": "QInt8",
      "weight_type": "QInt8",
      "max_abs_diff_vs_fp32_fixture": 0.018491357564926147
    },
    "latency": {
      "note": "3 interleaved repetitions per variant, median ms/window; full-model sessions are same-session references",
      "tiny_onnx_fp32": {
        "batch1_reps": [
          0.6312500008789357,
          0.6834500018157996,
          0.6595999984710943
        ],
        "batch64_reps": [
          0.37747578119251557,
          0.24196640623586063,
          0.2314671875183194
        ],
        "batch1_ms_per_window_median": 0.6595999984710943,
        "batch64_ms_per_window_median": 0.24196640623586063
      },
      "tiny_onnx_int8_static_percentile_conv": {
        "batch1_reps": [
          0.7988500001374632,
          0.9382499993080273,
          0.8451000030618161
        ],
        "batch64_reps": [
          0.9211476562995813,
          1.3045390625165965,
          1.026230468767153
        ],
        "batch1_ms_per_window_median": 0.8451000030618161,
        "batch64_ms_per_window_median": 1.026230468767153
      },
      "full_onnx_fp32_reference": {
        "batch1_reps": [
          2.267249998112675,
          2.80170000041835,
          2.132149998942623
        ],
        "batch64_reps": [
          1.3050578124875756,
          1.4244992187855132,
          1.8014164062947202
        ],
        "batch1_ms_per_window_median": 2.267249998112675,
        "batch64_ms_per_window_median": 1.4244992187855132
      },
      "full_onnx_int8_static_percentile_conv_reference": {
        "batch1_reps": [
          5.529599999135826,
          4.768399998283712,
          6.215800000063609
        ],
        "batch64_reps": [
          3.815724218725336,
          3.1025562500417436,
          4.333318749957016
        ],
        "batch1_ms_per_window_median": 5.529599999135826,
        "batch64_ms_per_window_median": 3.815724218725336
      }
    },
    "accuracy_subset": {
      "description": "seed-42 file-level 70/15/15 test split, corrupted windows excluded, seed-42 random subset (same as quantize_bench/eval_ort_accuracy/static_ptq_bench)",
      "subset_size": 10000
    },
    "accuracy": {
      "tiny_onnx_fp32": {
        "samples": 10000,
        "pck@20": 0.941106667804718,
        "pck@50": 0.99369333152771,
        "mpjpe": 0.012527281279861927,
        "wall_seconds": 10.927234888076782
      },
      "tiny_onnx_int8_static_percentile_conv": {
        "samples": 10000,
        "pck@20": 0.9268133331298828,
        "pck@50": 0.9932933319091797,
        "mpjpe": 0.014906252065300942,
        "wall_seconds": 12.320892333984375
      }
    }
  }
 }
@@ -1,3 +0,0 @@
 {"variant": "half", "params": 843834, "tcn_channels": [270, 220, 170, 120], "conv_channels": [4, 8, 16, 32], "attn_groups": 4, "groups_mode": "gcd20", "input_pw_groups": 1, "tcn_groups_per_block": [[20, 10], [10, 20], [20, 10], [10, 20]], "conv_strides": [2, 2, 2, 1], "final_width": 15, "batch_size": 64, "max_epochs": 50, "patience": 5, "lr": 0.0001, "weight_decay": 5e-05, "seed": 42, "precision": "fp32", "epochs_run": 28, "best_epoch": 23, "best_val_mpjpe": 0.008576328293592842, "best_val_pck20": 0.9690593021534107, "train_seconds": 1346.4, "torch": "2.11.0+cu128", "error": null, "finished_utc": "2026-06-11T03:09:47Z", "checkpoint": "/home/ruvultra/wiflow-std-bench/sweep/half_best.pth", "test_full": {"samples": 54000, "mpjpe": 0.009419974447676428, "pck@10": 0.8740543655289544, "pck@20": 0.9610469643628156, "pck@30": 0.9813556064146537, "pck@40": 0.9896086878246731, "pck@50": 0.9934827546013726}, "test_clean": {"samples": 52560, "mpjpe": 0.008980081718602137, "pck@10": 0.8840944136840205, "pck@20": 0.9662253179869514, "pck@30": 0.9847971080282144, "pck@40": 0.9917795997050618, "pck@50": 0.9946956242600532}}
 {"variant": "quarter", "params": 338600, "tcn_channels": [135, 110, 85, 60], "conv_channels": [2, 4, 8, 16], "attn_groups": 2, "groups_mode": "gcd20", "input_pw_groups": 1, "tcn_groups_per_block": [[20, 5], [5, 10], [10, 5], [5, 20]], "conv_strides": [2, 2, 1, 1], "final_width": 15, "batch_size": 64, "max_epochs": 50, "patience": 5, "lr": 0.0001, "weight_decay": 5e-05, "seed": 42, "precision": "fp32", "epochs_run": 50, "best_epoch": 50, "best_val_mpjpe": 0.008780752391864856, "best_val_pck20": 0.9672531302240159, "train_seconds": 1754.4, "torch": "2.11.0+cu128", "error": null, "finished_utc": "2026-06-11T03:39:06Z", "checkpoint": "/home/ruvultra/wiflow-std-bench/sweep/quarter_best.pth", "test_full": {"samples": 54000, "mpjpe": 0.009705399298005634, "pck@10": 0.8646123917014511, "pck@20": 0.9553815319449813, "pck@30": 0.979827209190086, "pck@40": 0.9887037501511751, "pck@50": 0.9931309027671814}, "test_clean": {"samples": 52560, "mpjpe": 0.009279253277105465, "pck@10": 0.8742288637923323, "pck@20": 0.9605315079427745, "pck@30": 0.9833016723076865, "pck@40": 0.9908206971631566, "pck@50": 0.9942719799017071}}
 {"variant": "tiny", "params": 56290, "tcn_channels": [68, 56, 44, 32], "conv_channels": [2, 4, 8, 16], "attn_groups": 2, "groups_mode": "depthwise", "input_pw_groups": 4, "tcn_groups_per_block": [[540, 68], [68, 56], [56, 44], [44, 32]], "conv_strides": [2, 1, 1, 1], "final_width": 16, "batch_size": 64, "max_epochs": 50, "patience": 5, "lr": 0.0001, "weight_decay": 5e-05, "seed": 42, "precision": "fp32", "epochs_run": 50, "best_epoch": 47, "best_val_mpjpe": 0.012602971208592256, "best_val_pck20": 0.9397210340146666, "train_seconds": 1540.1, "torch": "2.11.0+cu128", "error": null, "finished_utc": "2026-06-11T04:04:50Z", "checkpoint": "/home/ruvultra/wiflow-std-bench/sweep/tiny_best.pth", "test_full": {"samples": 54000, "mpjpe": 0.012859782406853305, "pck@10": 0.7640358444319831, "pck@20": 0.9364815320968628, "pck@30": 0.9731568422317505, "pck@40": 0.9866444962642811, "pck@50": 0.992488939108672}, "test_clean": {"samples": 52560, "mpjpe": 0.012502924276904246, "pck@10": 0.770895526488985, "pck@20": 0.9411073559313967, "pck@30": 0.9764840687790962, "pck@40": 0.9886695077067278, "pck@50": 0.9936238432039409}}
@@ -1,21 +0,0 @@
 {
  "checkpoint": "/home/ruvultra/wiflow-std-bench/upstream/test/best_pose_model.pth",
  "test_full": {
    "samples": 54000,
    "mpjpe": 0.009834060806367133,
    "pck@10": 0.8686346120127925,
    "pck@20": 0.9608815324571398,
    "pck@30": 0.9789111610695168,
    "pck@40": 0.9857975759682832,
    "pck@50": 0.9898827553325229
  },
  "test_clean": {
    "samples": 52560,
    "mpjpe": 0.009432755044379373,
    "pck@10": 0.876996495807189,
    "pck@20": 0.9661454100405608,
    "pck@30": 0.9823453060205306,
    "pck@40": 0.987909734176537,
    "pck@50": 0.9911238361167036
  }
 }
@@ -1,32 +0,0 @@
 {
  "published": {
    "pck@20": 0.9725,
    "pck@30": 0.9863,
    "pck@40": 0.9916,
    "pck@50": 0.9948,
    "mpjpe": 0.007
  },
  "params_millions": 2.225042,
  "data_dir": "C:\\Users\\ruv\\.cache\\kagglehub\\datasets\\kaka2434\\wiflow-dataset\\versions\\1\\preprocessed_csi_data",
  "device": "cpu",
  "test_full": {
    "samples": 54000,
    "mpjpe": NaN,
    "pck@10": 5.6790124349020145e-05,
    "pck@20": 0.0007876543271596785,
    "pck@30": 0.007780246982971827,
    "pck@40": 0.05529259262923841,
    "pck@50": 0.1542370371548114,
    "wall_seconds": 118.03756999969482
  },
  "test_drop_last": {
    "samples": 53952,
    "mpjpe": NaN,
    "pck@10": 5.6840649370682976e-05,
    "pck@20": 0.0007883550872372227,
    "pck@30": 0.007787168910892621,
    "pck@40": 0.055318307667895535,
    "pck@50": 0.15425316342412276,
    "wall_seconds": 120.87458372116089
  }
 }
@@ -1,333 +0,0 @@
 """ADR-152 edge optimization follow-up: ONNX Runtime STATIC post-training
 quantization (calibration-based QDQ) of the retrained WiFlow-STD model, to
 improve on the dynamic-int8 result (2.44 MB, PCK@20 96.52%, 6.5 ms/win b1).
 Static PTQ pre-computes activation ranges from calibration data, so inference
 uses QLinearConv/QDQ kernels instead of dynamic ConvInteger -- typically both
 faster and (with good calibration) closer to fp32 accuracy.
 Method:
  - Calibration set: corruption-free windows drawn ONLY from the seed-42
    file-level TRAINING split (same split as eval_repro.py; corrupted windows
    excluded via results/nan_windows_mask.npy | big_windows_mask.npy), chosen
    with np.random.default_rng(42). Never test windows.
  - quantize_static, QuantFormat.QDQ, per-channel int8 weights, int8
    activations; calibration methods MinMax / Entropy / Percentile(99.99);
    scopes "all" (ORT default op set) vs "conv" (op_types_to_quantize=
    ["Conv"] -- leaves the attention path, which exports as Einsum/Softmax
    and elementwise ops, in fp32).
  - Model is pre-processed first (quant_pre_process: symbolic shape
    inference + ORT graph optimization, folds BatchNormalization into Conv).
  - Accuracy: identical protocol to eval_ort_accuracy.py -- the 10,000-window
    seed-42 subset of the corruption-free test split (PCK@20/50, MPJPE).
  - Latency: median ms/window at batch 1 (100 runs) and batch 64 (30 runs),
    3 interleaved repetitions across all variants (fp32 and dynamic-int8
    sessions included as same-session reference points).
 Usage:
  PYTHONUTF8=1 .venv/Scripts/python.exe static_ptq_bench.py \
      [--data-dir <preprocessed_csi_data>] [--subset 10000]
      [--calib-minmax 1000] [--calib-hist 512] [--skip-accuracy]
 Writes/merges into results/edge_optimization.json under key "onnx_static_ptq".
 """
 import argparse
 import collections
 import json
 import os
 import platform
 import statistics
 import sys
 import time
 import numpy as np
 import torch
 HERE = os.path.dirname(os.path.abspath(__file__))
 sys.path.insert(0, HERE)
 from _bench_common import RESULTS  # noqa: E402
 # quantize_bench sets up upstream imports + the np.load mmap patch
 # (both via _bench_common.import_upstream)
 from quantize_bench import build_test_subset  # noqa: E402
 import quantize_bench as qb  # noqa: E402
 from eval_ort_accuracy import evaluate_ort  # noqa: E402
 FP32_ONNX = os.path.join(RESULTS, "retrained_fp32_dynamic.onnx")
 DYN_INT8_ONNX = os.path.join(RESULTS, "retrained_int8_ort_dynamic.onnx")
 PREPROC_ONNX = os.path.join(RESULTS, "retrained_fp32_preproc.onnx")
 # ---------------------------------------------------------------------------
 # calibration data: corruption-free TRAINING-split windows only
 # ---------------------------------------------------------------------------
 def build_calibration_windows(data_dir, n_windows):
    """Seed-42 file-level 70/15/15 TRAIN split (exactly as eval_repro.py),
    minus corrupted windows, then a seed-42 random draw of n_windows."""
    dataset = qb.PreprocessedCSIKeypointsDataset(
        data_dir=data_dir, keypoint_scale=1000.0, enable_temporal_clean=True)
    train_loader, _va, _te = qb.create_preprocessed_train_val_test_loaders(
        dataset=dataset, batch_size=64, num_workers=0, random_seed=42)
    train_indices = np.asarray(train_loader.dataset.indices)
    corrupted = (np.load(os.path.join(RESULTS, "nan_windows_mask.npy"))
                 | np.load(os.path.join(RESULTS, "big_windows_mask.npy")))
    clean = train_indices[~corrupted[train_indices]]
    print(f"train split: {len(train_indices)} windows, "
          f"{len(train_indices) - len(clean)} corrupted excluded, "
          f"{len(clean)} clean")
    rng = np.random.default_rng(42)
    sel = np.sort(rng.choice(clean, size=n_windows, replace=False))
    xs = np.stack([dataset[int(i)][0].numpy() for i in sel]).astype(np.float32)
    print(f"calibration tensor: {xs.shape} from {n_windows} clean TRAIN windows")
    return xs
 def make_reader(windows, batch_size=64):
    from onnxruntime.quantization import CalibrationDataReader
    class WindowReader(CalibrationDataReader):
        def __init__(self):
            self._batches = [windows[i:i + batch_size]
                             for i in range(0, len(windows), batch_size)]
            self._it = iter(self._batches)
        def get_next(self):
            b = next(self._it, None)
            return None if b is None else {"input": b}
        def rewind(self):
            self._it = iter(self._batches)
        def __len__(self):
            return len(self._batches)
    return WindowReader()
 # ---------------------------------------------------------------------------
 # quantization variants
 # ---------------------------------------------------------------------------
 def preprocess_model():
    from onnxruntime.quantization.shape_inference import quant_pre_process
    quant_pre_process(FP32_ONNX, PREPROC_ONNX)
    return PREPROC_ONNX
 def quantize_variant(src, dst, method, scope, calib_windows):
    from onnxruntime.quantization import (CalibrationMethod, QuantFormat,
                                          QuantType, quantize_static)
    methods = {
        "minmax": CalibrationMethod.MinMax,
        "entropy": CalibrationMethod.Entropy,
        "percentile": CalibrationMethod.Percentile,
    }
    # NB: do NOT pass CalibMaxIntermediateOutputs -- in ORT 1.26 the MinMax
    # calibrater clears its buffer every N batches and then raises
    # "No data is collected" if the batch count is divisible by N.
    extra = {}
    if method == "percentile":
        extra["CalibPercentile"] = 99.99
    op_types = ["Conv"] if scope == "conv" else None
    t0 = time.time()
    quantize_static(
        src, dst, make_reader(calib_windows),
        quant_format=QuantFormat.QDQ,
        op_types_to_quantize=op_types,
        per_channel=True,
        activation_type=QuantType.QInt8,
        weight_type=QuantType.QInt8,
        calibrate_method=methods[method],
        extra_options=extra,
    )
    secs = time.time() - t0
    import onnx
    ops = collections.Counter(n.op_type for n in onnx.load(dst).graph.node)
    return {
        "file": os.path.basename(dst),
        "size_bytes": os.path.getsize(dst),
        "size_mb": os.path.getsize(dst) / 1e6,
        "calibration": {"method": method,
                        "windows": int(len(calib_windows)),
                        "percentile": extra.get("CalibPercentile"),
                        "seconds": secs},
        "scope": scope,
        "per_channel": True,
        "activation_type": "QInt8",
        "weight_type": "QInt8",
        "node_counts": {k: v for k, v in sorted(ops.items())},
    }
 # ---------------------------------------------------------------------------
 # latency (3 interleaved reps, like the latency_controlled_rerun)
 # ---------------------------------------------------------------------------
 def ort_session(path):
    import onnxruntime as ort
    return ort.InferenceSession(path, providers=["CPUExecutionProvider"])
 def bench_ort(sess, batch, n_runs):
    rng = np.random.default_rng(123)
    x = rng.random((batch, 540, 20), dtype=np.float32)
    inp = sess.get_inputs()[0].name
    for _ in range(max(5, n_runs // 10)):
        sess.run(None, {inp: x})
    times = []
    for _ in range(n_runs):
        t0 = time.perf_counter()
        sess.run(None, {inp: x})
        times.append(time.perf_counter() - t0)
    return statistics.median(times) * 1e3 / batch  # ms/window
 def interleaved_latency(sessions, reps=3, runs_b1=100, runs_b64=30):
    lat = {name: {"batch1_reps": [], "batch64_reps": []} for name in sessions}
    for rep in range(reps):
        for name, sess in sessions.items():
            lat[name]["batch1_reps"].append(bench_ort(sess, 1, runs_b1))
            lat[name]["batch64_reps"].append(bench_ort(sess, 64, runs_b64))
            print(f"  rep {rep + 1}/{reps} {name}: "
                  f"b1={lat[name]['batch1_reps'][-1]:.2f} "
                  f"b64={lat[name]['batch64_reps'][-1]:.3f} ms/win", flush=True)
    for name in lat:
        lat[name]["batch1_ms_per_window_median"] = statistics.median(
            lat[name]["batch1_reps"])
        lat[name]["batch64_ms_per_window_median"] = statistics.median(
            lat[name]["batch64_reps"])
    return lat
 # ---------------------------------------------------------------------------
 def main():
    import onnxruntime
    parser = argparse.ArgumentParser()
    parser.add_argument("--data-dir", default=os.path.join(
        os.path.expanduser("~"), ".cache", "kagglehub", "datasets", "kaka2434",
        "wiflow-dataset", "versions", "1", "preprocessed_csi_data"))
    parser.add_argument("--subset", type=int, default=10000)
    parser.add_argument("--calib-minmax", type=int, default=1000)
    parser.add_argument("--calib-hist", type=int, default=512,
                        help="calibration windows for Entropy/Percentile "
                             "(histogram calibraters hold all intermediate "
                             "activations in RAM)")
    parser.add_argument("--skip-accuracy", action="store_true")
    parser.add_argument("--methods", default="minmax,entropy,percentile",
                        help="comma list of calibration methods to (re)run; "
                             "results merge into existing onnx_static_ptq")
    parser.add_argument("--out", default=os.path.join(RESULTS, "edge_optimization.json"))
    args = parser.parse_args()
    results = {
        "env": {
            "onnxruntime": onnxruntime.__version__,
            "torch": torch.__version__,
            "platform": platform.platform(),
            "source_model": os.path.basename(FP32_ONNX),
        },
        "variants": {},
    }
    # ---- calibration data (TRAIN split only) -------------------------------
    calib_mm = build_calibration_windows(args.data_dir, args.calib_minmax)
    calib_hist = calib_mm[:args.calib_hist]
    # ---- preprocess + quantize ---------------------------------------------
    print("\n=== quant_pre_process (shape inference + graph optimization) ===")
    src = preprocess_model()
    results["env"]["preprocessed_model"] = {
        "file": os.path.basename(src),
        "size_mb": os.path.getsize(src) / 1e6,
    }
    matrix = [(m, s) for m in args.methods.split(",")
              for s in ("all", "conv")]
    for method, scope in matrix:
        name = f"{method}_{scope}"
        dst = os.path.join(RESULTS, f"retrained_int8_static_{name}.onnx")
        calib = calib_mm if method == "minmax" else calib_hist
        print(f"\n=== quantize_static: {name} "
              f"({len(calib)} calib windows) ===", flush=True)
        try:
            results["variants"][name] = quantize_variant(
                src, dst, method, scope, calib)
            print(f"  {results['variants'][name]['size_mb']:.3f} MB")
        except Exception as e:  # noqa: BLE001
            results["variants"][name] = {"error": f"{type(e).__name__}: {e}"}
            print(f"  FAILED: {e}")
    # ---- fixture parity (sanity, batch 2) ----------------------------------
    fixture = np.load(os.path.join(RESULTS, "parity_fixture.npz"))
    fx, fy = fixture["input"], fixture["output"]
    sessions = {}
    for name, info in results["variants"].items():
        if "error" in info:
            continue
        path = os.path.join(RESULTS, info["file"])
        try:
            sess = ort_session(path)
            yq = sess.run(None, {sess.get_inputs()[0].name: fx})[0]
            info["max_abs_diff_vs_fp32_fixture"] = float(np.abs(yq - fy).max())
            sessions[name] = sess
        except Exception as e:  # noqa: BLE001
            info["run_error"] = f"{type(e).__name__}: {e}"
    print("\nfixture max-abs-diff vs fp32:",
          {n: round(results["variants"][n].get("max_abs_diff_vs_fp32_fixture",
                                               float("nan")), 5)
           for n in results["variants"]})
    # ---- latency: 3 interleaved reps incl. fp32 + dynamic-int8 reference ----
    print("\n=== latency (3 interleaved reps) ===")
    lat_sessions = {"onnx_fp32": ort_session(FP32_ONNX),
                    "onnx_int8_ort_dynamic": ort_session(DYN_INT8_ONNX)}
    lat_sessions.update(sessions)
    results["latency"] = {
        "note": "3 interleaved repetitions per variant, median ms/window; "
                "onnx_fp32 / onnx_int8_ort_dynamic are same-session references",
        **interleaved_latency(lat_sessions),
    }
    # ---- accuracy on the standard 10k corruption-free test subset ----------
    if not args.skip_accuracy:
        loader, n_clean = build_test_subset(args.data_dir, args.subset)
        results["accuracy_subset"] = {
            "description": "seed-42 file-level 70/15/15 test split, corrupted "
                           "windows excluded, seed-42 random subset (same as "
                           "quantize_bench/eval_ort_accuracy)",
            "subset_size": min(args.subset, n_clean) if args.subset else n_clean,
        }
        for name, sess in sessions.items():
            print(f"\n=== accuracy: {name} ===")
            results["variants"][name]["accuracy"] = evaluate_ort(
                sess, loader, name)
            print(json.dumps(results["variants"][name]["accuracy"], indent=2))
    # ---- merge into edge_optimization.json ----------------------------------
    merged = {}
    if os.path.exists(args.out):
        with open(args.out) as f:
            merged = json.load(f)
    prev = merged.get("onnx_static_ptq")
    if prev:  # nested merge so partial --methods reruns don't clobber
        prev["env"] = results["env"]
        prev["variants"].update(results["variants"])
        prev.setdefault("latency", {}).update(results["latency"])
        if "accuracy_subset" in results:
            prev["accuracy_subset"] = results["accuracy_subset"]
    else:
        merged["onnx_static_ptq"] = results
    with open(args.out, "w") as f:
        json.dump(merged, f, indent=2)
    print(f"\nwrote {args.out}")
 if __name__ == "__main__":
    main()
--- a/Show More
+++ b/Show More
		`@@ -1 +0,0 @@`
			`{"sessionId":"d80c93c2-51b7-42e8-a0fc-dc47cff1200f","pid":45748,"acquiredAt":1779668018388}`
		`@@ -1 +0,0 @@`
			`9c35e541d51f00998691b98948887ebca09b907d8eb29a113f97e792340456ba`
		`@@ -1 +0,0 @@`
			{"frames": [{"pred": [[0.4003, 0.2734], [0.5038, 0.4197], [0.2053, 0.4438], [0.4397, 0.685], [0.5796, 0.7645], [0.8001, 0.2195], [0.2789, 0.2833], [0.314, 0.5439], [0.511, 0.2259], [0.6008, 0.46], [0.4837, 0.3879], [0.3475, 0.5597], [0.6569, 0.3575], [0.437, 0.6539], [0.2341, 0.6038], [0.7331, 0.392], [0.5615, 0.4915]]}, {"pred": [[0.4669, 0.6066], [0.6012, 0.7873], [0.4124, 0.5997], [0.2832, 0.281], [0.2732, 0.3635], [0.2503, 0.4848], [0.6827, 0.715], [0.4336, 0.7165], [0.295, 0.3386], [0.5337, 0.3544], [0.4397, 0.5474], [0.5163, 0.5528], [0.7547, 0.6799], [0.4195, 0.4448], [0.2257, 0.2269], [0.384, 0.2176], [0.2419, 0.4332]]}, {"pred": [[0.5585, 0.283], [0.4325, 0.2934], [0.463, 0.4744], [0.4188, 0.3454], [0.215, 0.7565], [0.527, 0.2353], [0.7084, 0.6124], [0.3015, 0.6744], [0.4103, 0.3532], [0.7243, 0.6932], [0.3302, 0.4918], [0.2072, 0.3754], [0.7914, 0.4878], [0.7618, 0.4079], [0.323, 0.3386], [0.7104, 0.4997], [0.2673, 0.6077]]}, {"pred": [[0.6372, 0.4984], [0.4184, 0.6763], [0.4498, 0.7549], [0.2924, 0.303], [0.3069, 0.7022], [0.3954, 0.5098], [0.7836, 0.6071], [0.4733, 0.7114], [0.3407, 0.3793], [0.3408, 0.4678], [0.4156, 0.4911], [0.4525, 0.7519], [0.5117, 0.1985], [0.1893, 0.6784], [0.6281, 0.5346], [0.5175, 0.673], [0.36, 0.3665]]}, {"pred": [[0.5535, 0.6537], [0.568, 0.511], [0.4705, 0.5377], [0.6372, 0.7163], [0.5493, 0.7515], [0.2559, 0.4549], [0.2553, 0.6176], [0.2991, 0.6154], [0.7185, 0.7986], [0.4586, 0.5057], [0.2975, 0.4525], [0.3263, 0.3719], [0.5131, 0.4576], [0.557, 0.5268], [0.6572, 0.7736], [0.2146, 0.6526], [0.4662, 0.7371]]}, {"pred": [[0.2924, 0.7595], [0.2612, 0.2315], [0.2488, 0.7751], [0.2329, 0.7282], [0.4744, 0.4206], [0.3618, 0.267], [0.2477, 0.285], [0.3976, 0.3746], [0.494, 0.2874], [0.3596, 0.2112], [0.3311, 0.4692], [0.6912, 0.4727], [0.4434, 0.5233], [0.4139, 0.7048], [0.425, 0.3937], [0.2326, 0.631], [0.2655, 0.7116]]}, {"pred": [[0.3609, 0.3437], [0.285, 0.486], [0.7734, 0.5468], [0.3657, 0.4093], [0.4728, 0.5019], [0.1866, 0.3545], [0.2172, 0.2028], [0.5613, 0.5238], [0.6252, 0.7205], [0.7998, 0.2954], [0.242, 0.7063], [0.6259, 0.6883], [0.5148, 0.7141], [0.5577, 0.7434], [0.3233, 0.2131], [0.2652, 0.7066], [0.5753, 0.5885]]}, {"pred": [[0.6787, 0.6504], [0.6051, 0.2297], [0.2539, 0.3475], [0.6437, 0.7807], [0.4981, 0.6149], [0.5716, 0.2367], [0.6486, 0.3632], [0.2433, 0.369], [0.6061, 0.3731], [0.4955, 0.2591], [0.7676, 0.7602], [0.6899, 0.7716], [0.3143, 0.7707], [0.3031, 0.4997], [0.7076, 0.5133], [0.3382, 0.7196], [0.2002, 0.4871]]}]}
		`@@ -1 +0,0 @@`
			{"frames": [{"gt": [[0.3943, 0.2905], [0.5215, 0.4194], [0.2225, 0.4602], [0.4547, 0.6961], [0.5765, 0.7686], [0.7858, 0.2279], [0.2866, 0.2707], [0.3084, 0.549], [0.5286, 0.2377], [0.6082, 0.4566], [0.4719, 0.3799], [0.3465, 0.5447], [0.6377, 0.3728], [0.4509, 0.6543], [0.2235, 0.6009], [0.7253, 0.3882], [0.5479, 0.4737]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.4845, 0.5985], [0.5883, 0.7959], [0.4315, 0.6012], [0.3008, 0.2703], [0.2776, 0.3486], [0.2483, 0.4695], [0.6916, 0.7184], [0.4153, 0.7305], [0.3057, 0.3392], [0.5535, 0.3576], [0.4216, 0.5398], [0.5093, 0.5706], [0.7397, 0.668], [0.4354, 0.4394], [0.2373, 0.2404], [0.404, 0.2315], [0.2609, 0.4182]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.5684, 0.2891], [0.4185, 0.2737], [0.4796, 0.4903], [0.4056, 0.3589], [0.2139, 0.7706], [0.5259, 0.2162], [0.718, 0.6177], [0.3002, 0.6632], [0.3978, 0.3338], [0.7116, 0.6836], [0.336, 0.5106], [0.2168, 0.3677], [0.7739, 0.4683], [0.773, 0.4188], [0.318, 0.3226], [0.7043, 0.4877], [0.2509, 0.5964]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.6501, 0.4868], [0.3995, 0.6805], [0.4408, 0.7681], [0.2762, 0.2907], [0.2877, 0.6959], [0.4102, 0.5292], [0.7825, 0.5898], [0.4603, 0.723], [0.3511, 0.3758], [0.3556, 0.4514], [0.4123, 0.4749], [0.4524, 0.7506], [0.5141, 0.2112], [0.2024, 0.6795], [0.6351, 0.5339], [0.5333, 0.6706], [0.3491, 0.3662]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.537, 0.656], [0.5675, 0.5033], [0.4714, 0.52], [0.6195, 0.7259], [0.5357, 0.766], [0.273, 0.4653], [0.2439, 0.6017], [0.2927, 0.6297], [0.7297, 0.7805], [0.439, 0.4924], [0.2969, 0.4589], [0.3174, 0.3911], [0.5324, 0.4643], [0.5744, 0.5074], [0.673, 0.783], [0.2238, 0.6674], [0.4534, 0.7468]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.2896, 0.7515], [0.2537, 0.2345], [0.2434, 0.763], [0.2502, 0.7137], [0.4723, 0.4035], [0.3607, 0.2775], [0.2657, 0.2969], [0.3872, 0.383], [0.5001, 0.3067], [0.3503, 0.2092], [0.3137, 0.4849], [0.6914, 0.4593], [0.4359, 0.504], [0.4056, 0.6994], [0.4428, 0.4085], [0.2424, 0.6445], [0.2507, 0.7048]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.3692, 0.3453], [0.2945, 0.4675], [0.7836, 0.5282], [0.3857, 0.414], [0.4848, 0.5017], [0.203, 0.3585], [0.225, 0.2135], [0.5513, 0.5175], [0.6296, 0.7275], [0.7908, 0.2897], [0.2263, 0.7012], [0.6403, 0.6873], [0.5026, 0.701], [0.5504, 0.7357], [0.338, 0.2187], [0.2629, 0.7015], [0.5757, 0.6084]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.6786, 0.649], [0.5956, 0.2396], [0.2447, 0.3593], [0.6439, 0.7854], [0.4874, 0.6102], [0.5857, 0.2465], [0.6459, 0.3827], [0.2364, 0.3613], [0.6054, 0.3745], [0.4798, 0.2711], [0.7869, 0.7618], [0.6919, 0.7809], [0.3259, 0.7674], [0.285, 0.5144], [0.6921, 0.5052], [0.3388, 0.7386], [0.2022, 0.495]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}]}
		`@@ -1 +0,0 @@`
			`d6bce07ecb1648e6936561df44bf4a3bfc17bb0ba5f692646b2301d105b52f67`
		`@@ -1 +0,0 @@`
			`304d54690af468dc6cbf0f2a1332f109cf187d5e2eab454efd8554cebc45bdeb`
`@@ -1 +1 @@`
	`f8e76f21a0f9852b70b6d9dd5318239f6b20cbcb4cdd995863263cecdc446f7a`	`8c0680d7d285739ea9597715e84959d9c356c87ee3ad35b5f1e69a4ca41151c6`