mirror of
https://github.com/ruvnet/RuView
synced 2026-07-02 14:03:19 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2dfb4884be |
@@ -1,55 +1,50 @@
|
|||||||
{
|
{
|
||||||
"running": true,
|
"running": true,
|
||||||
"startedAt": "2026-05-24T22:26:25.030Z",
|
"startedAt": "2026-03-09T15:26:00.921Z",
|
||||||
"workers": {
|
"workers": {
|
||||||
"map": {
|
"map": {
|
||||||
"runCount": 64,
|
"runCount": 49,
|
||||||
"successCount": 64,
|
"successCount": 49,
|
||||||
"failureCount": 0,
|
"failureCount": 0,
|
||||||
"averageDurationMs": 136.171875,
|
"averageDurationMs": 1.2857142857142858,
|
||||||
"lastRun": "2026-05-25T06:07:33.387Z",
|
"lastRun": "2026-02-28T16:13:19.194Z",
|
||||||
"lastStartedAt": "2026-05-25T06:07:33.381Z",
|
"nextRun": "2026-03-09T15:56:00.928Z",
|
||||||
"nextRun": "2026-05-25T06:26:25.410Z",
|
|
||||||
"isRunning": false
|
"isRunning": false
|
||||||
},
|
},
|
||||||
"audit": {
|
"audit": {
|
||||||
"runCount": 72,
|
"runCount": 45,
|
||||||
"successCount": 27,
|
"successCount": 0,
|
||||||
"failureCount": 45,
|
"failureCount": 45,
|
||||||
"averageDurationMs": 26260.11111111111,
|
"averageDurationMs": 0,
|
||||||
"lastRun": "2026-05-25T06:08:29.594Z",
|
"lastRun": "2026-03-09T15:43:00.933Z",
|
||||||
"lastStartedAt": "2026-05-25T06:07:33.416Z",
|
"nextRun": "2026-03-09T15:38:00.914Z",
|
||||||
"nextRun": "2026-05-25T06:18:32.928Z",
|
|
||||||
"isRunning": false
|
"isRunning": false
|
||||||
},
|
},
|
||||||
"optimize": {
|
"optimize": {
|
||||||
"runCount": 54,
|
"runCount": 34,
|
||||||
"successCount": 9,
|
"successCount": 0,
|
||||||
"failureCount": 45,
|
"failureCount": 34,
|
||||||
"averageDurationMs": 40303.377578766485,
|
"averageDurationMs": 0,
|
||||||
"lastRun": "2026-05-25T05:59:05.330Z",
|
"lastRun": "2026-02-28T16:23:19.387Z",
|
||||||
"lastStartedAt": "2026-05-25T05:54:05.318Z",
|
"nextRun": "2026-03-09T15:45:00.915Z",
|
||||||
"nextRun": "2026-05-25T06:20:15.145Z",
|
|
||||||
"isRunning": false
|
"isRunning": false
|
||||||
},
|
},
|
||||||
"consolidate": {
|
"consolidate": {
|
||||||
"runCount": 32,
|
"runCount": 23,
|
||||||
"successCount": 32,
|
"successCount": 23,
|
||||||
"failureCount": 0,
|
"failureCount": 0,
|
||||||
"averageDurationMs": 4.71875,
|
"averageDurationMs": 0.6521739130434783,
|
||||||
"lastRun": "2026-05-25T05:38:20.449Z",
|
"lastRun": "2026-02-28T16:05:19.091Z",
|
||||||
"lastStartedAt": "2026-05-25T05:38:20.443Z",
|
"nextRun": "2026-03-09T16:02:00.918Z",
|
||||||
"nextRun": "2026-05-25T06:32:25.248Z",
|
|
||||||
"isRunning": false
|
"isRunning": false
|
||||||
},
|
},
|
||||||
"testgaps": {
|
"testgaps": {
|
||||||
"runCount": 100,
|
"runCount": 27,
|
||||||
"successCount": 63,
|
"successCount": 0,
|
||||||
"failureCount": 37,
|
"failureCount": 27,
|
||||||
"averageDurationMs": 108604.0537328991,
|
"averageDurationMs": 0,
|
||||||
"lastRun": "2026-05-25T06:11:52.529Z",
|
"lastRun": "2026-02-28T16:08:19.369Z",
|
||||||
"lastStartedAt": "2026-05-25T06:07:33.390Z",
|
"nextRun": "2026-03-09T15:54:00.920Z",
|
||||||
"nextRun": "2026-05-25T06:14:25.296Z",
|
|
||||||
"isRunning": false
|
"isRunning": false
|
||||||
},
|
},
|
||||||
"predict": {
|
"predict": {
|
||||||
@@ -69,8 +64,8 @@
|
|||||||
},
|
},
|
||||||
"config": {
|
"config": {
|
||||||
"autoStart": false,
|
"autoStart": false,
|
||||||
"logDir": "C:\\Users\\ruv\\Projects\\wifi-densepose\\.claude-flow\\logs",
|
"logDir": "/Users/cohen/GitHub/ruvnet/RuView/.claude-flow/logs",
|
||||||
"stateFile": "C:\\Users\\ruv\\Projects\\wifi-densepose\\.claude-flow\\daemon-state.json",
|
"stateFile": "/Users/cohen/GitHub/ruvnet/RuView/.claude-flow/daemon-state.json",
|
||||||
"maxConcurrent": 2,
|
"maxConcurrent": 2,
|
||||||
"workerTimeoutMs": 300000,
|
"workerTimeoutMs": 300000,
|
||||||
"resourceThresholds": {
|
"resourceThresholds": {
|
||||||
@@ -136,5 +131,5 @@
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"savedAt": "2026-05-25T06:11:52.530Z"
|
"savedAt": "2026-03-09T15:43:00.933Z"
|
||||||
}
|
}
|
||||||
@@ -1,119 +0,0 @@
|
|||||||
{
|
|
||||||
"id": "aether-arena-aa",
|
|
||||||
"name": "AetherArena (AA) — Official Spatial-Intelligence Benchmark",
|
|
||||||
"adr": "ADR-149",
|
|
||||||
"adrPath": "docs/adr/ADR-149-public-community-leaderboard-huggingface.md",
|
|
||||||
"status": "Accepted",
|
|
||||||
"initializedDate": "2026-05-30",
|
|
||||||
"targetDate": "2026-08-31",
|
|
||||||
"exitCriteria": "Benchmark INFRASTRUCTURE done, tested, CI-gated, deploy-ready: aa_score_runner.rs passes deterministic fixture test; CI harness-gate green on every PR; aether-arena repo scaffold committed (README four-part framing + aa-submission.toml schema + VERIFY.md); public smoke split committed; HF Space lifecycle skeleton deployed; signed Parquet ledger functional; RuView baseline PCK@20 ~2.5% entered; ADR-149 §7 acceptance test (five-step stranger test) passes. NOTE: ML SOTA (MM-Fi PCK@20 ~72%) is a separate long-running stretch goal blocked on ADR-079 camera-ground-truth — it is NOT an infra exit criterion.",
|
|
||||||
"baselineState": {
|
|
||||||
"adrStatus": "Accepted, committed 2026-05-30",
|
|
||||||
"scorerCode": "ruview_metrics.rs + ablation.rs + proof.rs exist in wifi-densepose-train; aa_score_runner.rs not yet created",
|
|
||||||
"aetherArenaRepo": "does not exist yet — needs user authorization to create ruvnet/aether-arena public repo",
|
|
||||||
"hfSpace": "does not exist yet — needs HF_TOKEN and user authorization to deploy ruvnet/aether-arena HF Space",
|
|
||||||
"smokeDataset": "not committed",
|
|
||||||
"resultsLedger": "not created",
|
|
||||||
"ruviewBaseline": "PCK@20 ~2.5% self-reported, not formally entered",
|
|
||||||
"ciGate": "not added to workflow"
|
|
||||||
},
|
|
||||||
"milestones": {
|
|
||||||
"m1": {
|
|
||||||
"name": "ADR-149 Accepted + committed",
|
|
||||||
"status": "DONE",
|
|
||||||
"completedDate": "2026-05-30",
|
|
||||||
"completionCriteria": "ADR-149 file committed to docs/adr/ with status Accepted",
|
|
||||||
"notes": "Done this session. File at docs/adr/ADR-149-public-community-leaderboard-huggingface.md"
|
|
||||||
},
|
|
||||||
"m2": {
|
|
||||||
"name": "Deterministic scorer runner bin (aa_score_runner.rs)",
|
|
||||||
"status": "NOT_STARTED",
|
|
||||||
"completionCriteria": "aa_score_runner.rs compiles, runs ruview_metrics on a committed fixture, emits RuViewTier + SHA-256 proof hash, mirrors existing *_proof_runner.rs pattern; cargo test passes",
|
|
||||||
"estimatedEffort": "3-5 days",
|
|
||||||
"owner": "wifi-densepose-train crate or new aa-scorer crate"
|
|
||||||
},
|
|
||||||
"m3": {
|
|
||||||
"name": "CI harness-gate: GitHub Actions workflow",
|
|
||||||
"status": "NOT_STARTED",
|
|
||||||
"completionCriteria": "A GitHub Actions workflow runs aa_score_runner on every PR as a build gate; PR fails if scorer fails determinism check; workflow committed and green",
|
|
||||||
"estimatedEffort": "2-3 days",
|
|
||||||
"dependency": "M2 must be done first"
|
|
||||||
},
|
|
||||||
"m4": {
|
|
||||||
"name": "aether-arena repo scaffold",
|
|
||||||
"status": "NOT_STARTED",
|
|
||||||
"completionCriteria": "ruvnet/aether-arena repo created with: README (four-part framing: Public leaderboard / Private eval split / Open scorer / Signed results); aa-submission.toml manifest schema; VERIFY.md (ADR-149 §7 stranger acceptance test); neutrality/governance section (§2.8); contribution guide",
|
|
||||||
"estimatedEffort": "3-5 days",
|
|
||||||
"blockers": ["Needs user authorization to create public ruvnet/aether-arena repo on GitHub"]
|
|
||||||
},
|
|
||||||
"m5": {
|
|
||||||
"name": "Public smoke split committed + private MM-Fi held-out split prep",
|
|
||||||
"status": "NOT_STARTED",
|
|
||||||
"completionCriteria": "Public smoke split committed to aether-arena repo (stranger can score locally); private MM-Fi held-out split prepared under non-public path with CC BY-NC 4.0 attribution; Wi-Pose explicitly excluded from v0",
|
|
||||||
"estimatedEffort": "5-7 days",
|
|
||||||
"riskNotes": "MM-Fi CC BY-NC 4.0: AA must remain non-commercial and carry MM-Fi attribution; raw frames stay in private split; only derived CSI features + scores may be exposed"
|
|
||||||
},
|
|
||||||
"m6": {
|
|
||||||
"name": "HF Space (Gradio) skeleton",
|
|
||||||
"status": "BLOCKED",
|
|
||||||
"completionCriteria": "HF Space deployed at ruvnet/aether-arena with submission lifecycle (submitted->validated->quarantined->smoke_scored->full_scored->published/rejected); sandboxed scorer container wired; basic leaderboard table rendered",
|
|
||||||
"estimatedEffort": "7-10 days",
|
|
||||||
"blockers": [
|
|
||||||
"Needs HF_TOKEN — check .env for HF_TOKEN or HUGGINGFACE_TOKEN",
|
|
||||||
"Needs user authorization to create/deploy ruvnet/aether-arena HF Space (outward-facing public deployment)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"m7": {
|
|
||||||
"name": "Signed append-only Parquet results ledger",
|
|
||||||
"status": "NOT_STARTED",
|
|
||||||
"completionCriteria": "HF dataset ruvnet/aether-arena-results created; append-only Parquet ledger with signed rows; determinism_gate enforced; no row can be silently edited",
|
|
||||||
"estimatedEffort": "3-5 days",
|
|
||||||
"ledgerSchema": "submitter, model_ref, category, feature_set, tier, pck20, oks, mota, vitals_bpm_err, latency_p50, latency_p95, privacy_leakage, cross_room_deg, proof_sha256, scored_at, harness_version",
|
|
||||||
"dependency": "M6 must be scaffolded first"
|
|
||||||
},
|
|
||||||
"m8": {
|
|
||||||
"name": "RuView baseline entry + public launch",
|
|
||||||
"status": "NOT_STARTED",
|
|
||||||
"completionCriteria": "RuView wifi-densepose-pretrained baseline entered (honest PCK@20 ~2.5%); ADR-149 §7 five-step stranger acceptance test passes; v0 live with Presence + Pose + Edge-latency + Determinism categories active; Privacy and Cross-room shown as gated/coming-soon",
|
|
||||||
"estimatedEffort": "3-5 days",
|
|
||||||
"dependency": "M4+M5+M6+M7 complete",
|
|
||||||
"notes": "ML SOTA improvement (PCK@20 ~72%) is a SEPARATE stretch goal blocked on ADR-079 P7-P9 camera ground truth. NOT a blocker for infra launch."
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"activeMilestone": "m2",
|
|
||||||
"completedMilestones": ["m1"],
|
|
||||||
"knownRisks": [
|
|
||||||
"HF_TOKEN not confirmed present in .env — check before M6 work begins",
|
|
||||||
"ruvnet/aether-arena public repo creation is outward-facing — needs explicit user authorization",
|
|
||||||
"MM-Fi CC BY-NC 4.0: AA must stay legally non-commercial and brand-distinct from commercial RuView product; or seek MM-Fi commercial grant before any paid tier",
|
|
||||||
"Wi-Pose has research-use-only terms (no redistribution grant) — excluded from v0; revisit only if terms are clarified with authors",
|
|
||||||
"HF Space free CPU tier may be too slow for Candle/tch inference pipeline — may need ZeroGPU or self-hosted scorer on cognitum-20260110 GCloud A100/L4",
|
|
||||||
"ADR-079 camera-ground-truth (PCK@20 SOTA) is P7-P9 pending — NOT an infra blocker; must not be conflated with AA infra completion",
|
|
||||||
"Neutrality/governance risk: RuView seeded the scorer — must be demonstrably scored through the same public pipeline as any other entrant (§2.8 controls)"
|
|
||||||
],
|
|
||||||
"driftSignals": {
|
|
||||||
"timeline": "GREEN — just initialized, no timeline pressure yet",
|
|
||||||
"scope": "GREEN — scope locked at four-part structure per ADR-149 §2 decision",
|
|
||||||
"approach": "GREEN — reuse pattern (existing ruview_metrics + proof.rs) confirmed in ADR-149",
|
|
||||||
"dependency": "YELLOW — HF_TOKEN and ruvnet/aether-arena repo authorization are external blockers with unknown ETA",
|
|
||||||
"priority": "GREEN — active feature branch feat/adr-136-146-streaming-engine in progress; AA infra can proceed in parallel on its own branch"
|
|
||||||
},
|
|
||||||
"stretchGoals": {
|
|
||||||
"sotaML": "MM-Fi PCK@20 SOTA ~72% — separate ML effort blocked on ADR-079 P7-P9 camera-ground-truth data collection; NOT an infra exit criterion",
|
|
||||||
"privacyAxis": "ADR-145 §10 membership-inference attacker — activate Privacy leaderboard axis once attacker is implemented and published",
|
|
||||||
"crossRoom": "Multi-room held-out split — activate Cross-room generalization axis",
|
|
||||||
"multiOrgSteering": "Invite co-maintainers from other projects once >=N external entries land"
|
|
||||||
},
|
|
||||||
"sessionHistory": [
|
|
||||||
{
|
|
||||||
"date": "2026-05-30",
|
|
||||||
"type": "initialization",
|
|
||||||
"accomplished": [
|
|
||||||
"ADR-149 Accepted and committed to docs/adr/",
|
|
||||||
"Horizon record initialized in .claude-flow/horizons/aether-arena-aa.json",
|
|
||||||
"Memory stored in horizons namespace under key horizon-aether-arena-aa",
|
|
||||||
"Session check-in record stored in horizon-sessions namespace"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,11 +1,11 @@
|
|||||||
{
|
{
|
||||||
"timestamp": "2026-05-25T06:07:33.385Z",
|
"timestamp": "2026-02-28T16:13:19.193Z",
|
||||||
"projectRoot": "C:\\Users\\ruv\\Projects\\wifi-densepose",
|
"projectRoot": "/home/user/wifi-densepose",
|
||||||
"structure": {
|
"structure": {
|
||||||
"hasPackageJson": false,
|
"hasPackageJson": false,
|
||||||
"hasTsConfig": false,
|
"hasTsConfig": false,
|
||||||
"hasClaudeConfig": true,
|
"hasClaudeConfig": true,
|
||||||
"hasClaudeFlow": true
|
"hasClaudeFlow": true
|
||||||
},
|
},
|
||||||
"scannedAt": 1779689253386
|
"scannedAt": 1772295199193
|
||||||
}
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"timestamp": "2026-05-25T05:38:20.448Z",
|
"timestamp": "2026-02-28T16:05:19.091Z",
|
||||||
"patternsConsolidated": 0,
|
"patternsConsolidated": 0,
|
||||||
"memoryCleaned": 0,
|
"memoryCleaned": 0,
|
||||||
"duplicatesRemoved": 0
|
"duplicatesRemoved": 0
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
{
|
|
||||||
"timestamp": "2026-05-25T05:59:05.405Z",
|
|
||||||
"mode": "local",
|
|
||||||
"memoryUsage": {
|
|
||||||
"rss": 9891840,
|
|
||||||
"heapTotal": 35598336,
|
|
||||||
"heapUsed": 26516560,
|
|
||||||
"external": 3952418,
|
|
||||||
"arrayBuffers": 55689
|
|
||||||
},
|
|
||||||
"uptime": 27163.5846658,
|
|
||||||
"optimizations": {
|
|
||||||
"cacheHitRate": 0.78,
|
|
||||||
"avgResponseTime": 45
|
|
||||||
},
|
|
||||||
"note": "Install Claude Code CLI for AI-powered optimization suggestions"
|
|
||||||
}
|
|
||||||
@@ -1,84 +1,12 @@
|
|||||||
{
|
{
|
||||||
"timestamp": "2026-05-25T06:08:29.589Z",
|
"timestamp": "2026-03-06T13:17:27.368Z",
|
||||||
"mode": "headless",
|
"mode": "local",
|
||||||
"workerType": "audit",
|
"checks": {
|
||||||
"model": "haiku",
|
"envFilesProtected": true,
|
||||||
"durationMs": 56168,
|
"gitIgnoreExists": true,
|
||||||
"executionId": "audit_1779689253421_dfflmb",
|
"noHardcodedSecrets": true
|
||||||
"success": true,
|
|
||||||
"findings": {
|
|
||||||
"vulnerabilities": [
|
|
||||||
{
|
|
||||||
"severity": "high",
|
|
||||||
"file": ".claude/helpers/github-safe.js",
|
|
||||||
"line": 50,
|
|
||||||
"description": "Command injection vulnerability in execSync call. User-controlled arguments in `newArgs` are joined without shell escaping. An attacker can inject shell metacharacters (e.g., `; rm -rf /`) via the body content or through command/subcommand parameters. The temp file approach is safe, but the command construction `gh ${command} ${subcommand} ${newArgs.join(' ')}` allows shell injection.",
|
|
||||||
"example": "gh issue comment 123 'test`whoami`' would execute whoami"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"severity": "high",
|
|
||||||
"file": "scripts/csi-spectrogram.js",
|
|
||||||
"line": 45,
|
|
||||||
"description": "Sensitive credential exposure via command-line arguments. The `--seed-token` parameter is passed as a CLI argument, which is visible in process listings (ps aux output). This violates secure credential handling practices. Tokens should be read from environment variables or secure config files, not command-line args.",
|
|
||||||
"example": "node scripts/csi-spectrogram.js --seed-token secret_abc_123 exposes token in process list"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"severity": "medium",
|
|
||||||
"file": "scripts/apnea-detector.js",
|
|
||||||
"line": 71,
|
|
||||||
"description": "Unsafe buffer reading without comprehensive length validation. The code checks `buf.length` at 32 bytes (line 70) but then reads at fixed offsets (lines 72-76) without validating that each read stays within bounds. If a malformed packet is received, `readInt8/readUInt16LE/readUInt32LE` may read unintended data or zeros.",
|
|
||||||
"example": "A 33-byte buffer would pass the check but reading UInt32LE at offset 8 would go out of bounds"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"severity": "medium",
|
|
||||||
"file": "scripts/benchmark-rf-scan.js",
|
|
||||||
"line": 110,
|
|
||||||
"description": "Potential out-of-bounds buffer access in parseCSIFrame. While the bounds check at line 107 is present, the `nSubcarriers` value from the packet is used to calculate required buffer size without validation of the value itself. A maliciously crafted packet with extremely large nSubcarriers could cause memory issues.",
|
|
||||||
"example": "Packet with nSubcarriers=999999 would request excessive buffer allocation"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"severity": "medium",
|
|
||||||
"file": "scripts/csi-spectrogram.js",
|
|
||||||
"line": 39,
|
|
||||||
"description": "Unsafe URL construction with untrusted `seed-url` parameter. The `--seed-url` argument is used directly for HTTPS requests without validation. This could allow SSRF (Server-Side Request Forgery) or DNS rebinding attacks if an attacker controls the seed URL.",
|
|
||||||
"example": "node scripts/csi-spectrogram.js --seed-url http://internal.local:9000 could access internal services"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"severity": "low",
|
|
||||||
"file": ".claude/helpers/statusline.js",
|
|
||||||
"line": 140,
|
|
||||||
"description": "Shell command injection risk in execSync calls. Commands like `ps aux 2>/dev/null | grep -c agentic-flow` use grep patterns that could be vulnerable if any variables are interpolated (though currently hardcoded). The `execSync` with shell=true is generally risky.",
|
|
||||||
"example": "If any pattern becomes user-controlled: `grep -c ${pattern}` could inject shell metacharacters"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"severity": "low",
|
|
||||||
"file": ".claude/helpers/memory.js",
|
|
||||||
"line": 10,
|
|
||||||
"description": "Unvalidated JSON parsing. The code parses JSON from MEMORY_FILE without try-catch in the loadMemory function (catches error but doesn't validate structure). Malformed JSON or corrupted memory file could cause issues.",
|
|
||||||
"example": "Memory file with circular JSON structure could cause issues when stringifying"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"severity": "low",
|
|
||||||
"file": "scripts/device-fingerprint.js",
|
|
||||||
"line": 72,
|
|
||||||
"description": "Hardcoded device fingerprints and network configuration. While not a traditional 'hardcoded secret', the KNOWN_DEVICES array contains identifiable SSIDs and MAC addresses that could be used to correlate network infrastructure. This data should be externalized or sanitized.",
|
|
||||||
"example": "SSID 'ruv.net' and 'Cohen-Guest' could identify specific installations"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"riskScore": 42,
|
|
||||||
"recommendations": [
|
|
||||||
"**CRITICAL**: Replace `execSync` command construction in github-safe.js with proper shell escaping using `child_process.execFile()` instead of `execSync()`, or use the `shell: false` option with array arguments to avoid shell parsing entirely.",
|
|
||||||
"**CRITICAL**: Move `--seed-token` from CLI arguments to environment variable `SEED_TOKEN` in csi-spectrogram.js. Update documentation to instruct users: `export SEED_TOKEN=...` instead of passing via CLI.",
|
|
||||||
"**HIGH**: Add comprehensive buffer bounds validation in all UDP packet parsing functions (apnea-detector.js, benchmark-rf-scan.js, etc.). Validate both the buffer length AND the parsed header values before using them in calculations.",
|
|
||||||
"**HIGH**: Validate and sanitize the `--seed-url` parameter in csi-spectrogram.js. Whitelist allowed domains or restrict to localhost/internal IPs only. Add URL scheme validation (https only).",
|
|
||||||
"**MEDIUM**: Replace hardcoded device fingerprints (KNOWN_DEVICES) with externalized configuration or environment variables. Document that this data contains identifiable network information.",
|
|
||||||
"**MEDIUM**: Add input validation to `parseArgs()` results in all scripts. Validate numeric ranges, file paths, and enum values before use.",
|
|
||||||
"**LOW**: Wrap JSON.parse() calls in try-catch blocks throughout (memory.js, session.js) with explicit error handling and recovery.",
|
|
||||||
"**LOW**: Audit all uses of `require()` with dynamic paths. Ensure paths are always derived from fixed `__dirname` and not user-controlled.",
|
|
||||||
"**LOW**: Remove or sandbox the ability to pass arbitrary URLs via CLI. Consider using a configuration file (YAML/JSON) for endpoint URLs instead.",
|
|
||||||
"**INFO**: Add a pre-commit hook to detect hardcoded credentials using tools like `detect-secrets` or `truffleHog`."
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"rawOutputPreview": "# Security Audit Report — wifi-densepose\n\n```json\n{\n \"vulnerabilities\": [\n {\n \"severity\": \"high\",\n \"file\": \".claude/helpers/github-safe.js\",\n \"line\": 50,\n \"description\": \"Command injection vulnerability in execSync call. User-controlled arguments in `newArgs` are joined without shell escaping. An attacker can inject shell metacharacters (e.g., `; rm -rf /`) via the body content or through command/subcommand parameters. The temp file approach is safe, but the command construction `gh ${command} ${subcommand} ${newArgs.join(' ')}` allows shell injection.\",\n \"example\": \"gh issue comment 123 'test`whoami`' would execute whoami\"\n },\n {\n \"severity\": \"high\",\n \"file\": \"scripts/csi-spectrogram.js\",\n \"line\": 45,\n \"description\": \"Sensitive credential exposure via command-line arguments. The `--seed-token` parameter is passed as a CLI argument, which is visible in process listings (ps aux output). This violates secure credential handling practices. Tokens should be read from environment variables or secure config files, not command-line args.\",\n \"example\": \"node scripts/csi-spectrogram.js --seed-token secret_abc_123 exposes token in process list\"\n },\n {\n \"severity\": \"medium\",\n \"file\": \"scripts/apnea-detector.js\",\n \"line\": 71,\n \"description\": \"Unsafe buffer reading without comprehensive length validation. The code checks `buf.length` at 32 bytes (line 70) but then reads at fixed offsets (lines 72-76) without validating that each read stays within bounds. If a malformed packet is received, `readInt8/readUInt16LE/readUInt32LE` may read unintended data or zeros.\",\n \"example\": \"A 33-byte buffer would pass the check but reading UInt32LE at offset 8 would go out of bounds\"\n },\n {\n \"severity\": \"medium\",\n \"file\": \"scripts/benchmark-rf-scan.js\",\n \"line\": 110,\n \"description\": \"Potential out-of-bounds buffer access in parseCSIFrame. While the bounds check at line 107 is pres",
|
"riskLevel": "low",
|
||||||
"rawOutputLength": 7077
|
"recommendations": [],
|
||||||
|
"note": "Install Claude Code CLI for AI-powered security analysis"
|
||||||
}
|
}
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
{
|
|
||||||
"timestamp": "2026-05-25T06:11:52.519Z",
|
|
||||||
"mode": "headless",
|
|
||||||
"workerType": "testgaps",
|
|
||||||
"model": "sonnet",
|
|
||||||
"durationMs": 259124,
|
|
||||||
"executionId": "testgaps_1779689253395_srltd5",
|
|
||||||
"success": true,
|
|
||||||
"findings": {
|
|
||||||
"sections": [
|
|
||||||
{
|
|
||||||
"title": "Test Coverage Gap Analysis — wifi-densepose",
|
|
||||||
"content": "\n",
|
|
||||||
"level": 2
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Coverage Summary by Crate",
|
|
||||||
"content": "\n| Crate | Tests Found | Status | Priority |\n|-------|-------------|--------|----------|\n| `wifi-densepose-core` | 26 inline | Good | Low |\n| `wifi-densepose-signal` | ~60 (validation only) | Moderate | **High** |\n| `wifi-densepose-nn` | **0** | Critical | **P1** |\n| `wifi-densepose-train` | ~60 (config/dataset) | Moderate | High |\n| `wifi-densepose-mat` | 1 integration test | Critical | **P1** |\n| `wifi-densepose-ruvector` | **0** | Critical | **P1** |\n| `wifi-densepose-sensing-server` | 4 integration tests | Moderate | High |\n| `wifi-densepose-wasm` | 3 compliance tests | Low | Low |\n\n---\n\n",
|
|
||||||
"level": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Tier 1: Critical Gaps",
|
|
||||||
"content": "\n",
|
|
||||||
"level": 2
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "1. `wifi-densepose-nn` — Zero test coverage",
|
|
||||||
"content": "\nEvery public API is untested. Place these at `v2/crates/wifi-densepose-nn/tests/inference_tests.rs`:\n\n```rust\n// v2/crates/wifi-densepose-nn/tests/inference_tests.rs\n\n#[cfg(test)]\nmod tensor_tests {\n use wifi_densepose_nn::tensor::Tensor;\n\n #[test]\n fn tensor_shape_mismatch_returns_error() {\n // data has 6 elements but shape claims 3×3=9\n let result = Tensor::new(vec![1.0f32; 6], &[3, 3]);\n assert!(result.is_err(), \"shape mismatch must be rejected\");\n }\n\n #[test]\n fn tensor_empty_data_returns_error() {\n let result = Tensor::new(vec![], &[0]);\n assert!(result.is_err());\n }\n\n #[test]\n fn tensor_nan_values_are_detected() {\n let t = Tensor::new(vec![f32::NAN, 1.0, 2.0], &[3]).unwrap();\n assert!(t.has_nan(), \"NaN in data must be detectable\");\n }\n\n #[test]\n fn tensor_inf_values_are_detected() {\n let t = Tensor::new(vec![f32::INFINITY, 1.0], &[2]).unwrap();\n assert!(t.has_inf());\n }\n}\n\n#[cfg(test)]\nmod modality_translator_tests {\n use wifi_densepose_nn::translator::ModalityTranslator;\n\n #[test]\n fn translator_rejects_wrong_subcarrier_count() {\n // standard expects 56 subcarriers; feed 57\n let csi = vec![0.0f32; 57 * 3]; // 57 subcarriers × 3 antennas\n let translator = ModalityTranslator::default();\n let result = translator.translate(&csi, 57, 3);\n assert!(result.is_err());\n }\n\n #[test]\n fn translator_handles_all_zeros() {\n let csi = vec![0.0f32; 56 * 3];\n let translator = ModalityTranslator::default();\n let result = translator.translate(&csi, 56, 3);\n // zero input should produce some output without panic\n assert!(result.is_ok());\n }\n}\n\n#[cfg(test)]\nmod inference_engine_tests {\n use wifi_densepose_nn::inference::InferenceEngine;\n\n #[test]\n fn load_nonexistent_model_returns_error() {\n let result = InferenceEngine::from_path(\"/nonexistent/model.onnx\");\n assert!(result.is_err());\n }\n\n #[test]\n fn load_corrupted_bytes_returns_error() {\n let tmp = tempfile::NamedTempFile::new().unwrap();\n std::fs::write(tmp.path(), b\"not a valid onnx file\").unwrap();\n let result = InferenceEngine::from_path(tmp.path());\n assert!(result.is_err());\n }\n\n #[test]\n fn batch_size_zero_returns_error() {\n // can't run inference on an empty batch\n // requires a valid model; skip if no model file in test fixtures\n // use #[ignore] or a feature flag for CI\n }\n}\n```\n\n---\n\n",
|
|
||||||
"level": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "2. `wifi-densepose-mat` — Disaster response safety gaps",
|
|
||||||
"content": "\nPlace at `v2/crates/wifi-densepose-mat/tests/`:\n\n```rust\n// v2/crates/wifi-densepose-mat/tests/detection_edge_cases.rs\n\n#[cfg(test)]\nmod breathing_rate_edge_cases {\n use wifi_densepose_mat::detection::breathing::BreathingDetector;\n\n #[test]\n fn zero_bpm_is_classified_critical() {\n let detector = BreathingDetector::default();\n // flat-line signal — no breathing detected\n let signal = vec![0.0f32; 1000];\n let result = detector.classify(&signal).unwrap();\n assert_eq!(result.triage_category, TriageCategory::Immediate);\n }\n\n #[test]\n fn agonal_breathing_rate_triggers_immediate() {\n // < 6 BPM is agonal; simulate 3 BPM signal\n let detector = BreathingDetector::default();\n let signal = generate_breathing_signal(3.0, 1000, 100.0); // 3 BPM, 1000 samples @ 100 Hz\n let result = detector.classify(&signal).unwrap();\n assert_eq!(result.triage_category, TriageCategory::Immediate);\n }\n\n #[test]\n fn normal_breathing_is_classified_minor() {\n let detector = BreathingDetector::default();\n let signal = generate_breathing_signal(15.0, 1000, 100.0); // 15 BPM\n let result = detector.classify(&signal).unwrap();\n assert_eq!(result.triage_category, TriageCategory::Minor);\n }\n\n #[test]\n fn all_nan_signal_returns_error_not_panic() {\n let detector = BreathingDetector::default();\n let signal = vec![f32::NAN; 1000];\n let result = detector.classify(&signal);\n assert!(result.is_err(), \"NaN input must be caught, not panic\");\n }\n\n fn generate_breathing_signal(bpm: f32, samples: usize, sample_rate: f32) -> Vec<f32> {\n let freq = bpm / 60.0;\n (0..samples)\n .map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / sample_rate).sin())\n .collect()\n }\n}\n\n#[cfg(test)]\nmod alert_deduplication {\n use wifi_densepose_mat::alerting::{AlertDispatcher, Alert, TriageCategory};\n use std::time::Duration;\n\n #[test]\n fn duplicate_alerts_within_window_are_suppressed() {\n let mut dispatcher = AlertDispatcher::new();\n let alert = Alert::new(\"survivor-1\", TriageCategory::Immediate);\n dispatcher.dispatch(alert.clone());\n dispatcher.dispatch(alert.clone()); // same survivor, same category\n assert_eq!(dispatcher.queued_count(), 1, \"duplicate must be deduplicated\");\n }\n\n #[test]\n fn escalation_from_minor_to_immediate_is_forwarded() {\n let mut dispatcher = AlertDispatcher::new();\n dispatcher.dispatch(Alert::new(\"survivor-1\", TriageCategory::Minor));\n dispatcher.dispatch(Alert::new(\"survivor-1\", TriageCategory::Immediate));\n // escalation is not a duplicate — must pass through\n assert!(dispatcher.last_alert_for(\"survivor-1\").map(|a| a.category) == Some(TriageCategory::Immediate));\n }\n}\n\n#[cfg(test)]\nmod kalman_tracker_edge_cases {\n use wifi_densepose_mat::tracking::KalmanTracker;\n\n #[test]\n fn position_jump_does_not_corrupt_state() {\n let mut tracker = KalmanTracker::new();\n tracker.update([1.0, 1.0, 0.5]); // initial position\n tracker.update([50.0, 50.0, 0.5]); // physically impossible jump\n let pos = tracker.estimated_position();\n // should not panic; should clamp or flag anomaly\n assert!(pos.iter().all(|v| v.is_finite()));\n }\n\n #[test]\n fn lost_track_resumes_on_re_detection() {\n let mut tracker = KalmanTracker::new();\n tracker.update([1.0, 1.0, 0.5]);\n // simulate 10 missed frames\n for _ in 0..10 { tracker.predict(); }\n assert_eq!(tracker.state(), TrackState::Lost);\n tracker.update([1.1, 1.1, 0.5]); // re-detected nearby\n assert_eq!(tracker.state(), TrackState::Confirmed);\n }\n}\n```\n\n---\n\n",
|
|
||||||
"level": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "3. `wifi-densepose-ruvector` — Zero coverage on all 5 integration modules",
|
|
||||||
"content": "\n```rust\n// v2/crates/wifi-densepose-ruvector/tests/viewpoint_tests.rs\n\n#[cfg(test)]\nmod attention_tests {\n use wifi_densepose_ruvector::viewpoint::attention::CrossViewpointAttention;\n\n #[test]\n fn attention_weights_sum_to_one() {\n let attn = CrossViewpointAttention::new(3); // 3 viewpoints\n let features = vec![[1.0f32; 64], [2.0f32; 64], [3.0f32; 64]];\n let weights = attn.compute_weights(&features);\n let sum: f32 = weights.iter().sum();\n assert!((sum - 1.0).abs() < 1e-5, \"attention must be a probability distribution\");\n }\n\n #[test]\n fn single_viewpoint_gets_full_weight() {\n let attn = CrossViewpointAttention::new(1);\n let features = vec![[1.0f32; 64]];\n let weights = attn.compute_weights(&features);\n assert!((weights[0] - 1.0).abs() < 1e-6);\n }\n\n #[test]\n fn zero_feature_vectors_do_not_produce_nan() {\n let attn = CrossViewpointAttention::new(2);\n let features = vec![[0.0f32; 64], [0.0f32; 64]];\n let weights = attn.compute_weights(&features);\n assert!(weights.iter().all(|w| w.is_finite()));\n }\n}\n\n#[cfg(test)]\nmod sketch_tests {\n use wifi_densepose_ruvector::sketch::WireSketch;\n\n #[test]\n fn round_trip_serialization() {\n let sketch = WireSketch::from_keypoints(&[[0.5f32, 0.5], [0.3, 0.7]]);\n let bytes = sketch.to_bytes();\n let restored = WireSketch::from_bytes(&bytes).unwrap();\n assert_eq!(sketch, restored);\n }\n\n #[test]\n fn deserialize_truncated_bytes_returns_error() {\n let sketch = WireSketch::from_keypoints(&[[0.5f32, 0.5]]);\n let mut bytes = sketch.to_bytes();\n bytes.truncate(bytes.len() / 2); // truncate halfway\n assert!(WireSketch::from_bytes(&bytes).is_err());\n }\n\n #[test]\n fn empty_keypoint_list_is_handled() {\n let sketch = WireSketch::from_keypoints(&[]);\n assert_eq!(sketch.keypoint_count(), 0);\n }\n}\n```\n\n---\n\n",
|
|
||||||
"level": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Tier 2: Signal Processing Gaps",
|
|
||||||
"content": "\n",
|
|
||||||
"level": 2
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "4. `wifi-densepose-signal` — RuvSense module untested",
|
|
||||||
"content": "\n```rust\n// v2/crates/wifi-densepose-signal/tests/ruvsense_tests.rs\n\n#[cfg(test)]\nmod coherence_gate_tests {\n use wifi_densepose_signal::ruvsense::coherence_gate::{CoherenceGate, GateDecision};\n\n #[test]\n fn high_coherence_signal_is_accepted() {\n let gate = CoherenceGate::new(0.7); // threshold = 0.7\n let decision = gate.evaluate(0.95);\n assert_eq!(decision, GateDecision::Accept);\n }\n\n #[test]\n fn low_coherence_signal_is_rejected() {\n let gate = CoherenceGate::new(0.7);\n let decision = gate.evaluate(0.3);\n assert_eq!(decision, GateDecision::Reject);\n }\n\n #[test]\n fn borderline_coherence_triggers_recalibrate() {\n let gate = CoherenceGate::new(0.7);\n let decision = gate.evaluate(0.68); // just below threshold\n assert_eq!(decision, GateDecision::Recalibrate);\n }\n}\n\n#[cfg(test)]\nmod phase_align_tests {\n use wifi_densepose_signal::ruvsense::phase_align::PhaseAligner;\n\n #[test]\n fn phase_at_plus_pi_does_not_wrap_incorrectly() {\n let aligner = PhaseAligner::new();\n let phases = vec![std::f32::consts::PI - 0.001, std::f32::consts::PI + 0.001];\n let aligned = aligner.align(&phases);\n // jump across ±π boundary must be handled continuously\n let diff = (aligned[1] - aligned[0]).abs();\n assert!(diff < 0.01, \"phase jump at ±π must be < 0.01 rad after alignment\");\n }\n\n #[test]\n fn single_phase_value_aligns_to_itself() {\n let aligner = PhaseAligner::new();\n let phases = vec![1.5f32];\n let aligned = aligner.align(&phases);\n assert_eq!(aligned.len(), 1);\n assert!((aligned[0] - 1.5).abs() < 1e-6);\n }\n\n #[test]\n fn empty_phase_array_returns_empty() {\n let aligner = PhaseAligner::new();\n let aligned = aligner.align(&[]);\n assert!(aligned.is_empty());\n }\n}\n\n#[cfg(test)]\nmod adversarial_detection_tests {\n use wifi_densepose_signal::ruvsense::adversarial::AdversarialDetector;\n\n #[test]\n fn physically_impossible_amplitude_is_flagged() {\n let detector = AdversarialDetector::new();\n // WiFi amplitude cannot exceed hardware saturation level\n let frame = vec![1e9f32; 56]; // absurdly large\n assert!(detector.is_suspicious(&frame));\n }\n\n #[test]\n fn normal_amplitude_range_passes() {\n let detector = AdversarialDetector::new();\n let frame = vec![0.5f32; 56]; // typical normalized value\n assert!(!detector.is_suspicious(&frame));\n }\n\n #[test]\n fn multi_link_inconsistency_is_detected() {\n // link A reports body moving right; link B reports no motion\n // physically inconsistent — flag as adversarial\n let detector = AdversarialDetector::new();\n let result = detector.check_multi_link_consistency(\n &[1.0, 2.0, 3.0], // link A\n &[0.0, 0.0, 0.0], // link B (no motion)\n );\n assert!(result.is_inconsistent());\n }\n}\n```\n\n---\n\n",
|
|
||||||
"level": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Tier 2: Training Pipeline Gaps",
|
|
||||||
"content": "\n",
|
|
||||||
"level": 2
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "5. `wifi-densepose-train` — Geometry encoder and rapid adaptation untested",
|
|
||||||
"content": "\n```rust\n// v2/crates/wifi-densepose-train/tests/test_geometry.rs\n\n#[cfg(test)]\nmod film_layer_tests {\n use wifi_densepose_train::geometry::FilmLayer;\n\n #[test]\n fn film_layer_output_shape_matches_input() {\n let film = FilmLayer::new(64, 32); // 64-dim features, 32-dim condition\n let features = vec![0.5f32; 64];\n let condition = vec![1.0f32; 32];\n let output = film.forward(&features, &condition).unwrap();\n assert_eq!(output.len(), 64, \"FiLM output must match feature dimensionality\");\n }\n\n #[test]\n fn film_layer_zero_condition_acts_as_identity() {\n let film = FilmLayer::new(64, 32);\n let features = vec![1.0f32; 64];\n let zero_condition = vec![0.0f32; 32];\n let output = film.forward(&features, &zero_condition).unwrap();\n // scale=1, shift=0 → identity; output ≈ input\n for (o, f) in output.iter().zip(features.iter()) {\n assert!((o - f).abs() < 0.1, \"zero condition should approximate identity\");\n }\n }\n}\n\n// v2/crates/wifi-densepose-train/tests/test_rapid_adapt.rs\n\n#[cfg(test)]\nmod rapid_adaptation_tests {\n use wifi_densepose_train::rapid_adapt::RapidAdapter;\n\n #[test]\n fn adapter_updates_on_single_sample() {\n let mut adapter = RapidAdapter::new(5); // 5 adaptation steps\n let csi_sample = vec![0.1f32; 56 * 3];\n let pose_label = vec![0.5f32; 17 * 2]; // 17 keypoints × (x, y)\n let result = adapter.adapt_step(&csi_sample, &pose_label);\n assert!(result.is_ok());\n }\n\n #[test]\n fn adapter_with_zero_steps_is_no_op() {\n let adapter = RapidAdapter::new(0);\n // 0 adaptation steps → weights unchanged\n let initial_weights = adapter.clone_weights();\n let _ = adapter.adapt_step(&vec![0.1f32; 168], &vec![0.5f32; 34]);\n assert_eq!(adapter.clone_weights(), initial_weights);\n }\n}\n```\n\n---\n\n",
|
|
||||||
"level": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Tier 3: Server Integration Gaps",
|
|
||||||
"content": "\n",
|
|
||||||
"level": 2
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "6. `wifi-densepose-sensing-server` — Auth and semantic analyzers",
|
|
||||||
"content": "\n```rust\n// v2/crates/wifi-densepose-sensing-server/tests/auth_tests.rs\n\n#[cfg(test)]\nmod bearer_auth_tests {\n use wifi_densepose_sensing_server::auth::{BearerValidator, TokenError};\n\n #[test]\n fn missing_authorization_header_returns_unauthorized() {\n let validator = BearerValidator::new(\"secret-token\");\n let result = validator.validate(None);\n assert!(matches!(result, Err(TokenError::Missing)));\n }\n\n #[test]\n fn wrong_token_is_rejected() {\n let validator = BearerValidator::new(\"correct-token\");\n let result = validator.validate(Some(\"Bearer wrong-token\"));\n assert!(matches!(result, Err(TokenError::Invalid)));\n }\n\n #[test]\n fn malformed_header_without_bearer_prefix_is_rejected() {\n let validator = BearerValidator::new(\"token\");\n let result = validator.validate(Some(\"token\")); // missing \"Bearer \" prefix\n assert!(matches!(result, Err(TokenError::Malformed)));\n }\n\n #[test]\n fn correct_token_is_accepted() {\n let validator = BearerValidator::new(\"correct-token\");\n let result = validator.validate(Some(\"Bearer correct-token\"));\n assert!(result.is_ok());\n }\n}\n\n// v2/crates/wifi-densepose-sensing-server/tests/semantic_tests.rs\n\n#[cfg(test)]\nmod fall_detection_tests {\n use wifi_densepose_sensing_server::semantic::fall_detector::FallDetector;\n\n #[test]\n fn no_motion_does_not_trigger_fall() {\n let mut detector = FallDetector::new();\n for _ in 0..30 { // 30 frames of stillness\n detector.update_pose(stationary_pose());\n }\n assert!(!detector.fall_detected());\n }\n\n #[test]\n fn rapid_downward_velocity_triggers_fall() {\n let mut detector = FallDetector::new();\n // simulate person going from standing (y=1.7m) to prone (y=0.3m) in 3 frames\n for (frame, y) in [(0, 1.7f32), (1, 1.0), (2, 0.3)] {\n detector.update_pose(pose_at_height(y));\n }\n assert!(detector.fall_detected());\n }\n\n #[test]\n fn sitting_down_slowly_does_not_trigger_fall() {\n let mut detector = FallDetector::new();\n // gradual height decrease over 30 frames is sitting, not falling\n for i in 0..30 {\n let y = 1.7f32 - (i as f32 * 0.04); // ~1.2m drop over 30 frames\n detector.update_pose(pose_at_height(y));\n }\n assert!(!detector.fall_detected());\n }\n}\n```\n\n---\n\n",
|
|
||||||
"level": 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Cross-Cutting Gap Summary",
|
|
||||||
"content": "| Gap Category | Severity | Affects | Recommended Action |\n|---|---|---|---|\n| `wifi-densepose-nn` has 0 tests | **Critical** | Inference pipeline | Add `tests/inference_tests.rs` per skeleton above |\n| `wifi-densepose-ruvector` has 0 tests | **Critical** | Viewpoint fusion, sketches | Add `tests/viewpoint_tests.rs` |\n| MAT disaster response missing edge cases | **Critical** | 0 BPM, agonal breathing, dedup | Add `tests/detection_edge_cases.rs` |\n| Signal RuvSense 28 modules untested | High | Core sensing logic | Add `tests/ruvsense_tests.rs` |\n| NN error paths (bad model files, OOM) | High | Production reliability | Add error path tests to nn |\n| Train geometry + rapid adapt = 0 tests | High | Domain adaptation | Add `tests/test_geometry.rs` |\n| Server auth token validation | High | Security boundary | Add `tests/auth_tests.rs` |\n| NaN/Inf propagation in f32 pipelines | High | All numeric crates | Add boundary tests per module |\n| Concurrent state under Arc<Mutex> | Medium | sensing-server, mat | Add contention tests |\n\nThe highest-ROI starting point is `wifi-densepose-nn` and `wifi-densepose-mat` — the nn crate has zero tests on the core inference pipeline, and mat covers life-safety scenarios where classification errors have real consequences.",
|
|
||||||
"level": 2
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"codeBlocks": [
|
|
||||||
{
|
|
||||||
"language": "rust",
|
|
||||||
"code": "// v2/crates/wifi-densepose-nn/tests/inference_tests.rs\n\n#[cfg(test)]\nmod tensor_tests {\n use wifi_densepose_nn::tensor::Tensor;\n\n #[test]\n fn tensor_shape_mismatch_returns_error() {\n // data has 6 elements but shape claims 3×3=9\n let result = Tensor::new(vec![1.0f32; 6], &[3, 3]);\n assert!(result.is_err(), \"shape mismatch must be rejected\");\n }\n\n #[test]\n fn tensor_empty_data_returns_error() {\n let result = Tensor::new(vec![], &[0]);\n assert!(result.is_err());\n }\n\n #[test]\n fn tensor_nan_values_are_detected() {\n let t = Tensor::new(vec![f32::NAN, 1.0, 2.0], &[3]).unwrap();\n assert!(t.has_nan(), \"NaN in data must be detectable\");\n }\n\n #[test]\n fn tensor_inf_values_are_detected() {\n let t = Tensor::new(vec![f32::INFINITY, 1.0], &[2]).unwrap();\n assert!(t.has_inf());\n }\n}\n\n#[cfg(test)]\nmod modality_translator_tests {\n use wifi_densepose_nn::translator::ModalityTranslator;\n\n #[test]\n fn translator_rejects_wrong_subcarrier_count() {\n // standard expects 56 subcarriers; feed 57\n let csi = vec![0.0f32; 57 * 3]; // 57 subcarriers × 3 antennas\n let translator = ModalityTranslator::default();\n let result = translator.translate(&csi, 57, 3);\n assert!(result.is_err());\n }\n\n #[test]\n fn translator_handles_all_zeros() {\n let csi = vec![0.0f32; 56 * 3];\n let translator = ModalityTranslator::default();\n let result = translator.translate(&csi, 56, 3);\n // zero input should produce some output without panic\n assert!(result.is_ok());\n }\n}\n\n#[cfg(test)]\nmod inference_engine_tests {\n use wifi_densepose_nn::inference::InferenceEngine;\n\n #[test]\n fn load_nonexistent_model_returns_error() {\n let result = InferenceEngine::from_path(\"/nonexistent/model.onnx\");\n assert!(result.is_err());\n }\n\n #[test]\n fn load_corrupted_bytes_returns_error() {\n let tmp = tempfile::NamedTempFile::new().unwrap();\n std::fs::write(tmp.path(), b\"not a valid onnx file\").unwrap();\n let result = InferenceEngine::from_path(tmp.path());\n assert!(result.is_err());\n }\n\n #[test]\n fn batch_size_zero_returns_error() {\n // can't run inference on an empty batch\n // requires a valid model; skip if no model file in test fixtures\n // use #[ignore] or a feature flag for CI\n }\n}"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"language": "rust",
|
|
||||||
"code": "// v2/crates/wifi-densepose-mat/tests/detection_edge_cases.rs\n\n#[cfg(test)]\nmod breathing_rate_edge_cases {\n use wifi_densepose_mat::detection::breathing::BreathingDetector;\n\n #[test]\n fn zero_bpm_is_classified_critical() {\n let detector = BreathingDetector::default();\n // flat-line signal — no breathing detected\n let signal = vec![0.0f32; 1000];\n let result = detector.classify(&signal).unwrap();\n assert_eq!(result.triage_category, TriageCategory::Immediate);\n }\n\n #[test]\n fn agonal_breathing_rate_triggers_immediate() {\n // < 6 BPM is agonal; simulate 3 BPM signal\n let detector = BreathingDetector::default();\n let signal = generate_breathing_signal(3.0, 1000, 100.0); // 3 BPM, 1000 samples @ 100 Hz\n let result = detector.classify(&signal).unwrap();\n assert_eq!(result.triage_category, TriageCategory::Immediate);\n }\n\n #[test]\n fn normal_breathing_is_classified_minor() {\n let detector = BreathingDetector::default();\n let signal = generate_breathing_signal(15.0, 1000, 100.0); // 15 BPM\n let result = detector.classify(&signal).unwrap();\n assert_eq!(result.triage_category, TriageCategory::Minor);\n }\n\n #[test]\n fn all_nan_signal_returns_error_not_panic() {\n let detector = BreathingDetector::default();\n let signal = vec![f32::NAN; 1000];\n let result = detector.classify(&signal);\n assert!(result.is_err(), \"NaN input must be caught, not panic\");\n }\n\n fn generate_breathing_signal(bpm: f32, samples: usize, sample_rate: f32) -> Vec<f32> {\n let freq = bpm / 60.0;\n (0..samples)\n .map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / sample_rate).sin())\n .collect()\n }\n}\n\n#[cfg(test)]\nmod alert_deduplication {\n use wifi_densepose_mat::alerting::{AlertDispatcher, Alert, TriageCategory};\n use std::time::Duration;\n\n #[test]\n fn duplicate_alerts_within_window_are_suppressed() {\n let mut dispatcher = AlertDispatcher::new();\n let alert = Alert::new(\"survivor-1\", TriageCategory::Immediate);\n dispatcher.dispatch(alert.clone());\n dispatcher.dispatch(alert.clone()); // same survivor, same category\n assert_eq!(dispatcher.queued_count(), 1, \"duplicate must be deduplicated\");\n }\n\n #[test]\n fn escalation_from_minor_to_immediate_is_forwarded() {\n let mut dispatcher = AlertDispatcher::new();\n dispatcher.dispatch(Alert::new(\"survivor-1\", TriageCategory::Minor));\n dispatcher.dispatch(Alert::new(\"survivor-1\", TriageCategory::Immediate));\n // escalation is not a duplicate — must pass through\n assert!(dispatcher.last_alert_for(\"survivor-1\").map(|a| a.category) == Some(TriageCategory::Immediate));\n }\n}\n\n#[cfg(test)]\nmod kalman_tracker_edge_cases {\n use wifi_densepose_mat::tracking::KalmanTracker;\n\n #[test]\n fn position_jump_does_not_corrupt_state() {\n let mut tracker = KalmanTracker::new();\n tracker.update([1.0, 1.0, 0.5]); // initial position\n tracker.update([50.0, 50.0, 0.5]); // physically impossible jump\n let pos = tracker.estimated_position();\n // should not panic; should clamp or flag anomaly\n assert!(pos.iter().all(|v| v.is_finite()));\n }\n\n #[test]\n fn lost_track_resumes_on_re_detection() {\n let mut tracker = KalmanTracker::new();\n tracker.update([1.0, 1.0, 0.5]);\n // simulate 10 missed frames\n for _ in 0..10 { tracker.predict(); }\n assert_eq!(tracker.state(), TrackState::Lost);\n tracker.update([1.1, 1.1, 0.5]); // re-detected nearby\n assert_eq!(tracker.state(), TrackState::Confirmed);\n }\n}"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"language": "rust",
|
|
||||||
"code": "// v2/crates/wifi-densepose-ruvector/tests/viewpoint_tests.rs\n\n#[cfg(test)]\nmod attention_tests {\n use wifi_densepose_ruvector::viewpoint::attention::CrossViewpointAttention;\n\n #[test]\n fn attention_weights_sum_to_one() {\n let attn = CrossViewpointAttention::new(3); // 3 viewpoints\n let features = vec![[1.0f32; 64], [2.0f32; 64], [3.0f32; 64]];\n let weights = attn.compute_weights(&features);\n let sum: f32 = weights.iter().sum();\n assert!((sum - 1.0).abs() < 1e-5, \"attention must be a probability distribution\");\n }\n\n #[test]\n fn single_viewpoint_gets_full_weight() {\n let attn = CrossViewpointAttention::new(1);\n let features = vec![[1.0f32; 64]];\n let weights = attn.compute_weights(&features);\n assert!((weights[0] - 1.0).abs() < 1e-6);\n }\n\n #[test]\n fn zero_feature_vectors_do_not_produce_nan() {\n let attn = CrossViewpointAttention::new(2);\n let features = vec![[0.0f32; 64], [0.0f32; 64]];\n let weights = attn.compute_weights(&features);\n assert!(weights.iter().all(|w| w.is_finite()));\n }\n}\n\n#[cfg(test)]\nmod sketch_tests {\n use wifi_densepose_ruvector::sketch::WireSketch;\n\n #[test]\n fn round_trip_serialization() {\n let sketch = WireSketch::from_keypoints(&[[0.5f32, 0.5], [0.3, 0.7]]);\n let bytes = sketch.to_bytes();\n let restored = WireSketch::from_bytes(&bytes).unwrap();\n assert_eq!(sketch, restored);\n }\n\n #[test]\n fn deserialize_truncated_bytes_returns_error() {\n let sketch = WireSketch::from_keypoints(&[[0.5f32, 0.5]]);\n let mut bytes = sketch.to_bytes();\n bytes.truncate(bytes.len() / 2); // truncate halfway\n assert!(WireSketch::from_bytes(&bytes).is_err());\n }\n\n #[test]\n fn empty_keypoint_list_is_handled() {\n let sketch = WireSketch::from_keypoints(&[]);\n assert_eq!(sketch.keypoint_count(), 0);\n }\n}"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"language": "rust",
|
|
||||||
"code": "// v2/crates/wifi-densepose-signal/tests/ruvsense_tests.rs\n\n#[cfg(test)]\nmod coherence_gate_tests {\n use wifi_densepose_signal::ruvsense::coherence_gate::{CoherenceGate, GateDecision};\n\n #[test]\n fn high_coherence_signal_is_accepted() {\n let gate = CoherenceGate::new(0.7); // threshold = 0.7\n let decision = gate.evaluate(0.95);\n assert_eq!(decision, GateDecision::Accept);\n }\n\n #[test]\n fn low_coherence_signal_is_rejected() {\n let gate = CoherenceGate::new(0.7);\n let decision = gate.evaluate(0.3);\n assert_eq!(decision, GateDecision::Reject);\n }\n\n #[test]\n fn borderline_coherence_triggers_recalibrate() {\n let gate = CoherenceGate::new(0.7);\n let decision = gate.evaluate(0.68); // just below threshold\n assert_eq!(decision, GateDecision::Recalibrate);\n }\n}\n\n#[cfg(test)]\nmod phase_align_tests {\n use wifi_densepose_signal::ruvsense::phase_align::PhaseAligner;\n\n #[test]\n fn phase_at_plus_pi_does_not_wrap_incorrectly() {\n let aligner = PhaseAligner::new();\n let phases = vec![std::f32::consts::PI - 0.001, std::f32::consts::PI + 0.001];\n let aligned = aligner.align(&phases);\n // jump across ±π boundary must be handled continuously\n let diff = (aligned[1] - aligned[0]).abs();\n assert!(diff < 0.01, \"phase jump at ±π must be < 0.01 rad after alignment\");\n }\n\n #[test]\n fn single_phase_value_aligns_to_itself() {\n let aligner = PhaseAligner::new();\n let phases = vec![1.5f32];\n let aligned = aligner.align(&phases);\n assert_eq!(aligned.len(), 1);\n assert!((aligned[0] - 1.5).abs() < 1e-6);\n }\n\n #[test]\n fn empty_phase_array_returns_empty() {\n let aligner = PhaseAligner::new();\n let aligned = aligner.align(&[]);\n assert!(aligned.is_empty());\n }\n}\n\n#[cfg(test)]\nmod adversarial_detection_tests {\n use wifi_densepose_signal::ruvsense::adversarial::AdversarialDetector;\n\n #[test]\n fn physically_impossible_amplitude_is_flagged() {\n let detector = AdversarialDetector::new();\n // WiFi amplitude cannot exceed hardware saturation level\n let frame = vec![1e9f32; 56]; // absurdly large\n assert!(detector.is_suspicious(&frame));\n }\n\n #[test]\n fn normal_amplitude_range_passes() {\n let detector = AdversarialDetector::new();\n let frame = vec![0.5f32; 56]; // typical normalized value\n assert!(!detector.is_suspicious(&frame));\n }\n\n #[test]\n fn multi_link_inconsistency_is_detected() {\n // link A reports body moving right; link B reports no motion\n // physically inconsistent — flag as adversarial\n let detector = AdversarialDetector::new();\n let result = detector.check_multi_link_consistency(\n &[1.0, 2.0, 3.0], // link A\n &[0.0, 0.0, 0.0], // link B (no motion)\n );\n assert!(result.is_inconsistent());\n }\n}"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"language": "rust",
|
|
||||||
"code": "// v2/crates/wifi-densepose-train/tests/test_geometry.rs\n\n#[cfg(test)]\nmod film_layer_tests {\n use wifi_densepose_train::geometry::FilmLayer;\n\n #[test]\n fn film_layer_output_shape_matches_input() {\n let film = FilmLayer::new(64, 32); // 64-dim features, 32-dim condition\n let features = vec![0.5f32; 64];\n let condition = vec![1.0f32; 32];\n let output = film.forward(&features, &condition).unwrap();\n assert_eq!(output.len(), 64, \"FiLM output must match feature dimensionality\");\n }\n\n #[test]\n fn film_layer_zero_condition_acts_as_identity() {\n let film = FilmLayer::new(64, 32);\n let features = vec![1.0f32; 64];\n let zero_condition = vec![0.0f32; 32];\n let output = film.forward(&features, &zero_condition).unwrap();\n // scale=1, shift=0 → identity; output ≈ input\n for (o, f) in output.iter().zip(features.iter()) {\n assert!((o - f).abs() < 0.1, \"zero condition should approximate identity\");\n }\n }\n}\n\n// v2/crates/wifi-densepose-train/tests/test_rapid_adapt.rs\n\n#[cfg(test)]\nmod rapid_adaptation_tests {\n use wifi_densepose_train::rapid_adapt::RapidAdapter;\n\n #[test]\n fn adapter_updates_on_single_sample() {\n let mut adapter = RapidAdapter::new(5); // 5 adaptation steps\n let csi_sample = vec![0.1f32; 56 * 3];\n let pose_label = vec![0.5f32; 17 * 2]; // 17 keypoints × (x, y)\n let result = adapter.adapt_step(&csi_sample, &pose_label);\n assert!(result.is_ok());\n }\n\n #[test]\n fn adapter_with_zero_steps_is_no_op() {\n let adapter = RapidAdapter::new(0);\n // 0 adaptation steps → weights unchanged\n let initial_weights = adapter.clone_weights();\n let _ = adapter.adapt_step(&vec![0.1f32; 168], &vec![0.5f32; 34]);\n assert_eq!(adapter.clone_weights(), initial_weights);\n }\n}"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"language": "rust",
|
|
||||||
"code": "// v2/crates/wifi-densepose-sensing-server/tests/auth_tests.rs\n\n#[cfg(test)]\nmod bearer_auth_tests {\n use wifi_densepose_sensing_server::auth::{BearerValidator, TokenError};\n\n #[test]\n fn missing_authorization_header_returns_unauthorized() {\n let validator = BearerValidator::new(\"secret-token\");\n let result = validator.validate(None);\n assert!(matches!(result, Err(TokenError::Missing)));\n }\n\n #[test]\n fn wrong_token_is_rejected() {\n let validator = BearerValidator::new(\"correct-token\");\n let result = validator.validate(Some(\"Bearer wrong-token\"));\n assert!(matches!(result, Err(TokenError::Invalid)));\n }\n\n #[test]\n fn malformed_header_without_bearer_prefix_is_rejected() {\n let validator = BearerValidator::new(\"token\");\n let result = validator.validate(Some(\"token\")); // missing \"Bearer \" prefix\n assert!(matches!(result, Err(TokenError::Malformed)));\n }\n\n #[test]\n fn correct_token_is_accepted() {\n let validator = BearerValidator::new(\"correct-token\");\n let result = validator.validate(Some(\"Bearer correct-token\"));\n assert!(result.is_ok());\n }\n}\n\n// v2/crates/wifi-densepose-sensing-server/tests/semantic_tests.rs\n\n#[cfg(test)]\nmod fall_detection_tests {\n use wifi_densepose_sensing_server::semantic::fall_detector::FallDetector;\n\n #[test]\n fn no_motion_does_not_trigger_fall() {\n let mut detector = FallDetector::new();\n for _ in 0..30 { // 30 frames of stillness\n detector.update_pose(stationary_pose());\n }\n assert!(!detector.fall_detected());\n }\n\n #[test]\n fn rapid_downward_velocity_triggers_fall() {\n let mut detector = FallDetector::new();\n // simulate person going from standing (y=1.7m) to prone (y=0.3m) in 3 frames\n for (frame, y) in [(0, 1.7f32), (1, 1.0), (2, 0.3)] {\n detector.update_pose(pose_at_height(y));\n }\n assert!(detector.fall_detected());\n }\n\n #[test]\n fn sitting_down_slowly_does_not_trigger_fall() {\n let mut detector = FallDetector::new();\n // gradual height decrease over 30 frames is sitting, not falling\n for i in 0..30 {\n let y = 1.7f32 - (i as f32 * 0.04); // ~1.2m drop over 30 frames\n detector.update_pose(pose_at_height(y));\n }\n assert!(!detector.fall_detected());\n }\n}"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"rawOutputPreview": "The exploration is complete. Here's the full coverage analysis with test skeletons for each critical gap.\n\n---\n\n## Test Coverage Gap Analysis — wifi-densepose\n\n### Coverage Summary by Crate\n\n| Crate | Tests Found | Status | Priority |\n|-------|-------------|--------|----------|\n| `wifi-densepose-core` | 26 inline | Good | Low |\n| `wifi-densepose-signal` | ~60 (validation only) | Moderate | **High** |\n| `wifi-densepose-nn` | **0** | Critical | **P1** |\n| `wifi-densepose-train` | ~60 (config/dataset) | Moderate | High |\n| `wifi-densepose-mat` | 1 integration test | Critical | **P1** |\n| `wifi-densepose-ruvector` | **0** | Critical | **P1** |\n| `wifi-densepose-sensing-server` | 4 integration tests | Moderate | High |\n| `wifi-densepose-wasm` | 3 compliance tests | Low | Low |\n\n---\n\n## Tier 1: Critical Gaps\n\n### 1. `wifi-densepose-nn` — Zero test coverage\n\nEvery public API is untested. Place these at `v2/crates/wifi-densepose-nn/tests/inference_tests.rs`:\n\n```rust\n// v2/crates/wifi-densepose-nn/tests/inference_tests.rs\n\n#[cfg(test)]\nmod tensor_tests {\n use wifi_densepose_nn::tensor::Tensor;\n\n #[test]\n fn tensor_shape_mismatch_returns_error() {\n // data has 6 elements but shape claims 3×3=9\n let result = Tensor::new(vec![1.0f32; 6], &[3, 3]);\n assert!(result.is_err(), \"shape mismatch must be rejected\");\n }\n\n #[test]\n fn tensor_empty_data_returns_error() {\n let result = Tensor::new(vec![], &[0]);\n assert!(result.is_err());\n }\n\n #[test]\n fn tensor_nan_values_are_detected() {\n let t = Tensor::new(vec![f32::NAN, 1.0, 2.0], &[3]).unwrap();\n assert!(t.has_nan(), \"NaN in data must be detectable\");\n }\n\n #[test]\n fn tensor_inf_values_are_detected() {\n let t = Tensor::new(vec![f32::INFINITY, 1.0], &[2]).unwrap();\n assert!(t.has_inf());\n }\n}\n\n#[cfg(test)]\nmod modality_translator_tests {\n use wifi_densepose_nn::translator::ModalityTranslator;\n\n #[test]\n fn translator_rejects",
|
|
||||||
"rawOutputLength": 18269
|
|
||||||
}
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "ruview",
|
|
||||||
"description": "RuView Marketplace: Claude Code + Codex plugins for WiFi sensing — configuration, applications, model training, and onboarding, from practical to advanced",
|
|
||||||
"owner": {
|
|
||||||
"name": "ruvnet",
|
|
||||||
"url": "https://github.com/ruvnet/RuView"
|
|
||||||
},
|
|
||||||
"plugins": [
|
|
||||||
{
|
|
||||||
"name": "ruview",
|
|
||||||
"source": "./plugins/ruview",
|
|
||||||
"description": "End-to-end RuView toolkit: getting started, ESP32 hardware setup, configuration, sensing applications (presence / vitals / pose / sleep / MAT), camera-free + camera-supervised model training, advanced multistatic sensing, CLI / API / WASM, mmWave radar, and witness verification"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"sessionId":"d80c93c2-51b7-42e8-a0fc-dc47cff1200f","pid":45748,"acquiredAt":1779668018388}
|
|
||||||
@@ -126,7 +126,10 @@
|
|||||||
"Bash(node .claude/*)",
|
"Bash(node .claude/*)",
|
||||||
"mcp__claude-flow__:*"
|
"mcp__claude-flow__:*"
|
||||||
],
|
],
|
||||||
"deny": []
|
"deny": [
|
||||||
|
"Read(./.env)",
|
||||||
|
"Read(./.env.*)"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"attribution": {
|
"attribution": {
|
||||||
"commit": "Co-Authored-By: claude-flow <ruv@ruv.net>",
|
"commit": "Co-Authored-By: claude-flow <ruv@ruv.net>",
|
||||||
|
|||||||
@@ -1,96 +0,0 @@
|
|||||||
name: AetherArena harness gate (ADR-149)
|
|
||||||
|
|
||||||
# Runs the AetherArena scoring harness as a PR build gate. Every PR that touches
|
|
||||||
# the scorer, the metrics, or the benchmark scaffold must keep the deterministic
|
|
||||||
# score hash stable (ADR-149 §2.5 determinism_gate). If the scoring maths changes,
|
|
||||||
# the hash moves and this gate fails until `expected_score.sha256` is regenerated
|
|
||||||
# and reviewed — so scorer drift can never land silently.
|
|
||||||
#
|
|
||||||
# This is the "a PR that runs the harness as part of the build process" requirement.
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'v2/crates/wifi-densepose-train/src/ruview_metrics.rs'
|
|
||||||
- 'v2/crates/wifi-densepose-train/src/ablation.rs'
|
|
||||||
- 'v2/crates/wifi-densepose-train/src/bin/aa_score_runner.rs'
|
|
||||||
- 'aether-arena/**'
|
|
||||||
- '.github/workflows/aether-arena-harness.yml'
|
|
||||||
push:
|
|
||||||
branches: ['feat/adr-149-aether-arena']
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pull-requests: write
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
harness-gate:
|
|
||||||
name: Run AA scorer harness (determinism gate)
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
working-directory: v2
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Install Rust toolchain
|
|
||||||
run: rustup show && rustc --version
|
|
||||||
|
|
||||||
- name: Cache cargo
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
~/.cargo/registry
|
|
||||||
~/.cargo/git
|
|
||||||
v2/target
|
|
||||||
key: aa-harness-${{ runner.os }}-${{ hashFiles('v2/Cargo.lock') }}
|
|
||||||
|
|
||||||
# 1. Build the pure-Rust scorer (no torch / no GPU → fast PR gate).
|
|
||||||
- name: Build AA score runner
|
|
||||||
run: cargo build -p wifi-densepose-train --bin aa_score_runner --no-default-features
|
|
||||||
|
|
||||||
# 2. Determinism gate: the committed expected hash must still match. A
|
|
||||||
# non-zero exit here fails the PR.
|
|
||||||
- name: Run determinism gate
|
|
||||||
run: cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features
|
|
||||||
|
|
||||||
# 3. Repeatability analysis (witness chain): the harness must produce one
|
|
||||||
# identical proof hash across many runs — any nondeterminism fails here.
|
|
||||||
- name: Repeatability analysis (16 runs)
|
|
||||||
run: cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16
|
|
||||||
|
|
||||||
# 4. Real-scoring smoke: score a sample prediction against the public smoke
|
|
||||||
# split, exercising the actual model-scoring path (not just the fixture).
|
|
||||||
- name: Real-scoring smoke test
|
|
||||||
run: |
|
|
||||||
cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- \
|
|
||||||
--split ../aether-arena/fixtures/smoke_split.json \
|
|
||||||
--pred ../aether-arena/fixtures/smoke_pred.json --json
|
|
||||||
|
|
||||||
# 5. Witness ledger chain integrity: the append-only results ledger must
|
|
||||||
# verify (every prev_hash link + row_hash intact = no silent edits).
|
|
||||||
- name: Verify witness ledger chain
|
|
||||||
working-directory: aether-arena/ledger
|
|
||||||
run: python3 ledger_tools.py verify
|
|
||||||
|
|
||||||
# 6. Emit the witness row + repeatability into the PR run summary.
|
|
||||||
- name: Witness row → job summary
|
|
||||||
if: always()
|
|
||||||
run: |
|
|
||||||
ROW=$(cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --json)
|
|
||||||
REP=$(cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16)
|
|
||||||
{
|
|
||||||
echo "## AetherArena harness gate (witness chain)"
|
|
||||||
echo ""
|
|
||||||
echo "Deterministic witness (ADR-149 §2.2 / proof + repeatability):"
|
|
||||||
echo '```json'
|
|
||||||
echo "$ROW"
|
|
||||||
echo "$REP"
|
|
||||||
echo '```'
|
|
||||||
echo ""
|
|
||||||
echo "If the determinism gate failed, the scoring maths changed: regenerate with"
|
|
||||||
echo '`cargo run -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --generate-hash > aether-arena/fixtures/expected_score.sha256` and review the diff.'
|
|
||||||
} >> "$GITHUB_STEP_SUMMARY"
|
|
||||||
@@ -1,199 +0,0 @@
|
|||||||
name: Bench Regression Guard
|
|
||||||
|
|
||||||
# Sub-deliverable 8.3 of the benchmark/optimization milestone.
|
|
||||||
#
|
|
||||||
# HONEST SCOPE (read this before assuming this gates on timing):
|
|
||||||
# * The `bench-compile` job is a REAL, HARD-FAILING regression gate. It runs
|
|
||||||
# `cargo bench --no-default-features --no-run`, which type-checks and links
|
|
||||||
# EVERY criterion bench in the v2/ workspace without running a single
|
|
||||||
# measurement. Benches are not part of `cargo test`, so they silently
|
|
||||||
# bit-rot when a public API they call changes — this job catches that the
|
|
||||||
# moment it happens. This is the part of this workflow that can fail a PR.
|
|
||||||
#
|
|
||||||
# * The `bench-fast-run` job runs a small, curated subset of pure-CPU benches
|
|
||||||
# in criterion "quick mode" (short warm-up / measurement / 10 samples) and
|
|
||||||
# is INFORMATIONAL ONLY (`continue-on-error: true`). It does NOT gate on
|
|
||||||
# timing. Wall-clock timings on shared GitHub-hosted runners vary by
|
|
||||||
# 2-3x run-to-run (noisy neighbours, CPU throttling, no pinned frequency),
|
|
||||||
# so a hard ">X ms" threshold here would flake constantly and teach
|
|
||||||
# everyone to ignore it. We deliberately do not pretend to do timing
|
|
||||||
# regression-gating we cannot deliver reliably. The numbers are surfaced in
|
|
||||||
# the job log + uploaded as an artifact for humans to eyeball trends.
|
|
||||||
#
|
|
||||||
# WHY NO criterion --baseline COMPARE GATE:
|
|
||||||
# criterion's `--save-baseline` / `--baseline` compare is the textbook
|
|
||||||
# regression mechanism, but it only produces a trustworthy verdict when the
|
|
||||||
# baseline and the candidate were measured on the SAME hardware under the SAME
|
|
||||||
# conditions. GitHub-hosted runners give neither (the baseline commit and the
|
|
||||||
# PR commit land on different physical machines). Committing a baseline JSON
|
|
||||||
# measured on one runner and comparing a different runner against it would
|
|
||||||
# manufacture false regressions. If/when these benches run on a dedicated,
|
|
||||||
# frequency-pinned self-hosted runner, a `--baseline` compare with a generous
|
|
||||||
# (>2x) noise floor becomes honest and can be added then. Until then,
|
|
||||||
# compile-verify + informational-run is the honest gate.
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main, develop, 'feat/*' ]
|
|
||||||
paths:
|
|
||||||
- 'v2/crates/**/benches/**'
|
|
||||||
- 'v2/crates/**/Cargo.toml'
|
|
||||||
- 'v2/crates/**/src/**'
|
|
||||||
- 'v2/Cargo.toml'
|
|
||||||
- 'v2/Cargo.lock'
|
|
||||||
- '.github/workflows/bench-regression.yml'
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'v2/crates/**/benches/**'
|
|
||||||
- 'v2/crates/**/Cargo.toml'
|
|
||||||
- 'v2/crates/**/src/**'
|
|
||||||
- 'v2/Cargo.toml'
|
|
||||||
- 'v2/Cargo.lock'
|
|
||||||
- '.github/workflows/bench-regression.yml'
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
env:
|
|
||||||
CARGO_TERM_COLOR: always
|
|
||||||
# Debuginfo is useless in CI and the 38-crate workspace target dir otherwise
|
|
||||||
# exhausts the runner disk (mirrors ci.yml's rust-tests job). The bench
|
|
||||||
# profile inherits release + debug = true (v2/Cargo.toml [profile.bench]);
|
|
||||||
# force it off so the link step does not run out of space.
|
|
||||||
CARGO_PROFILE_BENCH_DEBUG: "0"
|
|
||||||
CARGO_PROFILE_RELEASE_DEBUG: "0"
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
# ── HARD GATE: every bench must still compile + link ─────────────────────
|
|
||||||
bench-compile:
|
|
||||||
name: bench compile-verify (--no-run)
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout (recursive — wifi-densepose-rufield path-deps vendor/rufield)
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
# The workspace includes `wifi-densepose-rufield`, which path-deps the
|
|
||||||
# `vendor/rufield` submodule crates. Without a recursive checkout the
|
|
||||||
# whole workspace fails to resolve before any bench is built.
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
# The workspace pulls in `wifi-densepose-desktop` (Tauri v2) whose -sys
|
|
||||||
# crates need the GTK/WebKit/serial dev libraries via pkg-config, exactly
|
|
||||||
# as ci.yml's rust-tests job documents. A `--workspace` bench build links
|
|
||||||
# the whole graph, so these are required here too.
|
|
||||||
- name: Install Tauri / GTK / serial system dev libraries
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y --no-install-recommends \
|
|
||||||
libglib2.0-dev \
|
|
||||||
libgtk-3-dev \
|
|
||||||
libsoup-3.0-dev \
|
|
||||||
libjavascriptcoregtk-4.1-dev \
|
|
||||||
libwebkit2gtk-4.1-dev \
|
|
||||||
libayatana-appindicator3-dev \
|
|
||||||
librsvg2-dev \
|
|
||||||
libxdo-dev \
|
|
||||||
libudev-dev \
|
|
||||||
libdbus-1-dev \
|
|
||||||
libssl-dev \
|
|
||||||
pkg-config
|
|
||||||
|
|
||||||
- name: Install Rust toolchain
|
|
||||||
uses: dtolnay/rust-toolchain@stable
|
|
||||||
|
|
||||||
- name: Cache cargo (Swatinem/rust-cache)
|
|
||||||
uses: Swatinem/rust-cache@v2
|
|
||||||
with:
|
|
||||||
workspaces: v2
|
|
||||||
# Distinct cache scope from ci.yml's rust-tests so the bench profile
|
|
||||||
# artifacts (release+opt) do not evict the test profile cache.
|
|
||||||
key: bench-regression
|
|
||||||
|
|
||||||
# The core regression guard. `--no-run` compiles + links every bench
|
|
||||||
# target in the workspace's DEFAULT feature set but runs no measurement,
|
|
||||||
# so it is deterministic and fast-ish (build only). A bench that no longer
|
|
||||||
# compiles — because a type/signature it calls changed and nobody updated
|
|
||||||
# the bench — fails the build here. `--no-default-features` is the
|
|
||||||
# workspace's standard gate flag (openblas/tch/ort/onnx stay opt-out).
|
|
||||||
- name: Compile all workspace benches (default features)
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo bench --workspace --no-default-features --no-run
|
|
||||||
|
|
||||||
# Feature-gated benches are skipped by the default build above because
|
|
||||||
# their `[[bench]]` entries carry `required-features`. Compile the ones we
|
|
||||||
# can guard so they are also covered against bit-rot.
|
|
||||||
# * cir → wifi-densepose-signal/benches/cir_bench.rs (ADR-134). The
|
|
||||||
# `cir` feature is pure-Rust (`cir = []`), so it builds on the stock
|
|
||||||
# runner and is a real, hard-failing guard like the step above.
|
|
||||||
#
|
|
||||||
# NOT guarded here (honest scope):
|
|
||||||
# * crv → wifi-densepose-ruvector/benches/crv_bench.rs. The `crv` feature
|
|
||||||
# pulls the crates.io dependency `ruvector-crv 0.1.1`, which currently
|
|
||||||
# FAILS to compile on stable (E0308 type mismatch in its own
|
|
||||||
# `stage_iii.rs` — an UPSTREAM bug, unrelated to bench bit-rot).
|
|
||||||
# Adding a hard `--features crv` compile step would make this workflow
|
|
||||||
# red for a reason this gate is not meant to police. Re-add this step
|
|
||||||
# once `ruvector-crv` ships a fixed release. (mqtt/onnx benches are
|
|
||||||
# likewise left to their own crate workflows.)
|
|
||||||
- name: Compile feature-gated benches (cir)
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo bench -p wifi-densepose-signal --no-default-features --features cir --bench cir_bench --no-run
|
|
||||||
|
|
||||||
# ── INFORMATIONAL: run a curated fast subset (never gates) ───────────────
|
|
||||||
bench-fast-run:
|
|
||||||
name: bench fast-run (informational, non-gating)
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
# NEVER fail the workflow on this job — timings are noise-prone on shared
|
|
||||||
# runners (see header). It exists to surface trends for humans, not to gate.
|
|
||||||
continue-on-error: true
|
|
||||||
needs: [bench-compile]
|
|
||||||
steps:
|
|
||||||
- name: Checkout (recursive)
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Install Rust toolchain
|
|
||||||
uses: dtolnay/rust-toolchain@stable
|
|
||||||
|
|
||||||
- name: Cache cargo (Swatinem/rust-cache)
|
|
||||||
uses: Swatinem/rust-cache@v2
|
|
||||||
with:
|
|
||||||
workspaces: v2
|
|
||||||
key: bench-regression
|
|
||||||
|
|
||||||
# Curated subset = pure-CPU, fast, dependency-light criterion benches that
|
|
||||||
# finish in seconds under quick-mode flags. Each is targeted by `--bench`
|
|
||||||
# (NOT a bare `cargo bench -p`) because the crates' lib targets use the
|
|
||||||
# libtest harness, which rejects criterion's CLI flags (--warm-up-time
|
|
||||||
# etc.) and aborts the run. Quick-mode: 1s warm-up, 2s measure, 10 samples.
|
|
||||||
- name: nvsim pipeline_throughput (quick)
|
|
||||||
working-directory: v2
|
|
||||||
run: |
|
|
||||||
mkdir -p ../bench-out
|
|
||||||
cargo bench -p nvsim --no-default-features --bench pipeline_throughput -- \
|
|
||||||
--warm-up-time 1 --measurement-time 2 --sample-size 10 \
|
|
||||||
| tee ../bench-out/nvsim_pipeline_throughput.txt
|
|
||||||
|
|
||||||
- name: ruvector sketch_bench (quick)
|
|
||||||
working-directory: v2
|
|
||||||
run: |
|
|
||||||
cargo bench -p wifi-densepose-ruvector --no-default-features --bench sketch_bench -- \
|
|
||||||
--warm-up-time 1 --measurement-time 2 --sample-size 10 \
|
|
||||||
| tee ../bench-out/ruvector_sketch_bench.txt
|
|
||||||
|
|
||||||
- name: ruvector fusion_bench (quick)
|
|
||||||
working-directory: v2
|
|
||||||
run: |
|
|
||||||
cargo bench -p wifi-densepose-ruvector --no-default-features --bench fusion_bench -- \
|
|
||||||
--warm-up-time 1 --measurement-time 2 --sample-size 10 \
|
|
||||||
| tee ../bench-out/ruvector_fusion_bench.txt
|
|
||||||
|
|
||||||
- name: Upload informational bench logs
|
|
||||||
if: always()
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: bench-fast-run-logs
|
|
||||||
path: bench-out/
|
|
||||||
if-no-files-found: warn
|
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
name: BFLD MQTT Integration
|
|
||||||
|
|
||||||
# Runs the env-gated mosquitto integration tests from iters 24 + 29 of the
|
|
||||||
# BFLD rollout (ADR-118 / ADR-122 §2.2). Spins up an eclipse-mosquitto:2
|
|
||||||
# service container, exports BFLD_MQTT_BROKER, runs `cargo test --features
|
|
||||||
# mqtt`. Local developers can reproduce with:
|
|
||||||
#
|
|
||||||
# scoop install mosquitto # Windows
|
|
||||||
# # or: docker run -p 1883:1883 eclipse-mosquitto:2
|
|
||||||
# BFLD_MQTT_BROKER=tcp://localhost:1883 \
|
|
||||||
# cargo test -p wifi-densepose-bfld --features mqtt
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
- 'feat/adr-118-*'
|
|
||||||
- 'feat/bfld-*'
|
|
||||||
paths:
|
|
||||||
- 'v2/crates/wifi-densepose-bfld/**'
|
|
||||||
- '.github/workflows/bfld-mqtt-integration.yml'
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'v2/crates/wifi-densepose-bfld/**'
|
|
||||||
- '.github/workflows/bfld-mqtt-integration.yml'
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
mqtt-live-broker:
|
|
||||||
name: cargo test --features mqtt (live mosquitto)
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
timeout-minutes: 15
|
|
||||||
|
|
||||||
services:
|
|
||||||
mosquitto:
|
|
||||||
image: eclipse-mosquitto:2
|
|
||||||
ports:
|
|
||||||
- 1883:1883
|
|
||||||
# Allow anonymous connections — local-only CI broker, no exposure
|
|
||||||
# to the public internet, never touches production credentials.
|
|
||||||
options: >-
|
|
||||||
--health-cmd "mosquitto_pub -h localhost -t healthcheck -m ping || exit 1"
|
|
||||||
--health-interval 5s
|
|
||||||
--health-timeout 3s
|
|
||||||
--health-retries 10
|
|
||||||
|
|
||||||
env:
|
|
||||||
BFLD_MQTT_BROKER: tcp://localhost:1883
|
|
||||||
CARGO_TERM_COLOR: always
|
|
||||||
CARGO_INCREMENTAL: 0
|
|
||||||
RUSTFLAGS: -D warnings
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Install Rust toolchain
|
|
||||||
uses: dtolnay/rust-toolchain@stable
|
|
||||||
with:
|
|
||||||
components: clippy
|
|
||||||
|
|
||||||
- name: Cache cargo registry + target
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
~/.cargo/registry
|
|
||||||
~/.cargo/git
|
|
||||||
v2/target
|
|
||||||
key: bfld-mqtt-${{ runner.os }}-${{ hashFiles('v2/Cargo.lock') }}
|
|
||||||
|
|
||||||
- name: Wait for mosquitto to be ready
|
|
||||||
run: |
|
|
||||||
for i in {1..20}; do
|
|
||||||
if nc -z localhost 1883; then
|
|
||||||
echo "mosquitto reachable on port 1883 (attempt $i)"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
echo "waiting for mosquitto ($i/20)..."
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
echo "mosquitto never became reachable" >&2
|
|
||||||
exit 1
|
|
||||||
|
|
||||||
- name: cargo test --no-default-features (baseline regression)
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo test -p wifi-densepose-bfld --no-default-features
|
|
||||||
|
|
||||||
- name: cargo test (default features)
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo test -p wifi-densepose-bfld
|
|
||||||
|
|
||||||
- name: cargo test --features mqtt (incl. live mosquitto roundtrip)
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo test -p wifi-densepose-bfld --features mqtt
|
|
||||||
|
|
||||||
- name: cargo clippy --features mqtt (lint gate)
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo clippy -p wifi-densepose-bfld --features mqtt --all-targets -- -D warnings
|
|
||||||
continue-on-error: true
|
|
||||||
@@ -42,8 +42,6 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Determine deployment environment
|
- name: Determine deployment environment
|
||||||
id: determine-env
|
id: determine-env
|
||||||
@@ -88,8 +86,6 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Set up kubectl
|
- name: Set up kubectl
|
||||||
uses: azure/setup-kubectl@v3
|
uses: azure/setup-kubectl@v3
|
||||||
@@ -136,8 +132,6 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Set up kubectl
|
- name: Set up kubectl
|
||||||
uses: azure/setup-kubectl@v3
|
uses: azure/setup-kubectl@v3
|
||||||
|
|||||||
+29
-182
@@ -15,51 +15,38 @@ env:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# Code Quality and Security Checks
|
# Code Quality and Security Checks
|
||||||
# The Python codebase moved to `archive/v1/` when the runtime was rewritten in
|
|
||||||
# Rust under `v2/`. The lint/format/type/scan checks below still run against
|
|
||||||
# the archive for hygiene, but with `continue-on-error: true` everywhere — the
|
|
||||||
# archive is frozen reference code, not active development, so a stale lint
|
|
||||||
# rule shouldn't gate PRs to the Rust workspace.
|
|
||||||
code-quality:
|
code-quality:
|
||||||
name: Code Quality & Security
|
name: Code Quality & Security
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
continue-on-error: true
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: recursive
|
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
continue-on-error: true
|
uses: actions/setup-python@v5
|
||||||
uses: actions/setup-python@v6
|
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.PYTHON_VERSION }}
|
python-version: ${{ env.PYTHON_VERSION }}
|
||||||
cache: 'pip'
|
cache: 'pip'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
pip install black flake8 mypy bandit safety
|
pip install black flake8 mypy bandit safety
|
||||||
|
|
||||||
- name: Code formatting check (Black)
|
- name: Code formatting check (Black)
|
||||||
continue-on-error: true
|
run: black --check --diff src/ tests/
|
||||||
run: black --check --diff archive/v1/src archive/v1/tests
|
|
||||||
|
|
||||||
- name: Linting (Flake8)
|
- name: Linting (Flake8)
|
||||||
continue-on-error: true
|
run: flake8 src/ tests/ --max-line-length=88 --extend-ignore=E203,W503
|
||||||
run: flake8 archive/v1/src archive/v1/tests --max-line-length=88 --extend-ignore=E203,W503
|
|
||||||
|
|
||||||
- name: Type checking (MyPy)
|
- name: Type checking (MyPy)
|
||||||
continue-on-error: true
|
run: mypy src/ --ignore-missing-imports
|
||||||
run: mypy archive/v1/src --ignore-missing-imports
|
|
||||||
|
|
||||||
- name: Security scan (Bandit)
|
- name: Security scan (Bandit)
|
||||||
run: bandit -r archive/v1/src -f json -o bandit-report.json
|
run: bandit -r src/ -f json -o bandit-report.json
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
|
|
||||||
- name: Dependency vulnerability scan (Safety)
|
- name: Dependency vulnerability scan (Safety)
|
||||||
@@ -67,7 +54,6 @@ jobs:
|
|||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
|
|
||||||
- name: Upload security reports
|
- name: Upload security reports
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -83,103 +69,30 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
# ADR-262 P1: `wifi-densepose-rufield` path-deps the `vendor/rufield`
|
|
||||||
# submodule. Without a recursive checkout the workspace build fails to
|
|
||||||
# resolve those path deps in CI even though it passes locally.
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
# `wifi-densepose-desktop` is a Tauri v2 app — `glib-sys`, `gtk-sys`,
|
|
||||||
# `webkit2gtk-sys`, etc. need the Linux dev libraries via pkg-config or the
|
|
||||||
# workspace test fails at the build step before any test runs (every recent
|
|
||||||
# main CI run has been red on this for exactly this reason). Install the
|
|
||||||
# standard Tauri-on-Ubuntu set.
|
|
||||||
- name: Install Tauri / GTK / serial system dev libraries
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y --no-install-recommends \
|
|
||||||
libglib2.0-dev \
|
|
||||||
libgtk-3-dev \
|
|
||||||
libsoup-3.0-dev \
|
|
||||||
libjavascriptcoregtk-4.1-dev \
|
|
||||||
libwebkit2gtk-4.1-dev \
|
|
||||||
libayatana-appindicator3-dev \
|
|
||||||
librsvg2-dev \
|
|
||||||
libxdo-dev \
|
|
||||||
libudev-dev \
|
|
||||||
libdbus-1-dev \
|
|
||||||
libssl-dev \
|
|
||||||
pkg-config
|
|
||||||
|
|
||||||
- name: Install Rust toolchain
|
- name: Install Rust toolchain
|
||||||
uses: dtolnay/rust-toolchain@stable
|
uses: dtolnay/rust-toolchain@stable
|
||||||
|
|
||||||
# Swatinem/rust-cache replaces a naive `actions/cache` of the whole
|
- name: Cache cargo
|
||||||
# `v2/target`. That manual cache of a 38-crate target dir (multi-GB) was an
|
uses: actions/cache@v4
|
||||||
# intermittent failure source — several CI runs this cycle died at the
|
|
||||||
# cache/setup step (after toolchain install, before "Run Rust tests"),
|
|
||||||
# needing a rerun. rust-cache is purpose-built for Rust: it caches the
|
|
||||||
# registry + git + a pruned target, evicts stale deps, and restores far more
|
|
||||||
# reliably (and faster) on large workspaces. `workspaces: v2` points it at
|
|
||||||
# the v2/ cargo workspace (keys on v2/Cargo.lock, caches v2/target).
|
|
||||||
- name: Cache cargo (Swatinem/rust-cache)
|
|
||||||
uses: Swatinem/rust-cache@v2
|
|
||||||
with:
|
with:
|
||||||
workspaces: v2
|
path: |
|
||||||
|
~/.cargo/registry
|
||||||
|
~/.cargo/git
|
||||||
|
v2/target
|
||||||
|
key: ${{ runner.os }}-cargo-${{ hashFiles('v2/Cargo.lock') }}
|
||||||
|
restore-keys: |
|
||||||
|
${{ runner.os }}-cargo-
|
||||||
|
|
||||||
# The 38-crate workspace debug build exhausts the runner's disk when built
|
|
||||||
# with full debuginfo (observed: "final link failed: No space left on
|
|
||||||
# device" once the engine/benchmark crates landed; the same tree's local
|
|
||||||
# debug target measured 151 GB). Debuginfo is useless in CI — tests either
|
|
||||||
# pass or print their failure — so build without it; target shrinks ~5-10x.
|
|
||||||
- name: Run Rust tests
|
- name: Run Rust tests
|
||||||
working-directory: v2
|
working-directory: v2
|
||||||
env:
|
|
||||||
CARGO_PROFILE_DEV_DEBUG: "0"
|
|
||||||
CARGO_PROFILE_TEST_DEBUG: "0"
|
|
||||||
run: cargo test --workspace --no-default-features
|
run: cargo test --workspace --no-default-features
|
||||||
|
|
||||||
- name: Run ADR-147 worldmodel tests
|
|
||||||
working-directory: v2
|
|
||||||
env:
|
|
||||||
CARGO_PROFILE_DEV_DEBUG: "0"
|
|
||||||
CARGO_PROFILE_TEST_DEBUG: "0"
|
|
||||||
run: cargo test -p wifi-densepose-worldmodel --no-default-features
|
|
||||||
|
|
||||||
# ADR-134 CIR tests are behind the `cir` feature so the bench dependency
|
|
||||||
# (Criterion) only pulls when actually exercised. Run them as a separate
|
|
||||||
# step so a CIR-only regression is unambiguously attributable.
|
|
||||||
- name: Run ADR-134 CIR tests
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo test -p wifi-densepose-signal --no-default-features --features cir --tests
|
|
||||||
|
|
||||||
# ADR-134 + ADR-028 witness guard. The CIR proof runner produces a
|
|
||||||
# bit-deterministic SHA-256 over CirEstimator output on the synthetic
|
|
||||||
# reference signal. Any algorithmic regression — changes to ISTA
|
|
||||||
# convergence, sensing matrix construction, soft-thresholding, or input
|
|
||||||
# padding — breaks the hash and fails the build. To regenerate after an
|
|
||||||
# *intentional* change:
|
|
||||||
# cd v2 && cargo run -p wifi-densepose-signal --bin cir_proof_runner \
|
|
||||||
# --release --no-default-features -- --generate-hash \
|
|
||||||
# > ../archive/v1/data/proof/expected_cir_features.sha256
|
|
||||||
- name: ADR-134 CIR witness proof (determinism guard)
|
|
||||||
run: bash scripts/verify-cir-proof.sh
|
|
||||||
|
|
||||||
- name: ADR-135 calibration witness proof (determinism guard)
|
|
||||||
run: bash scripts/verify-calibration-proof.sh
|
|
||||||
|
|
||||||
# Unit and Integration Tests
|
# Unit and Integration Tests
|
||||||
# Python pytest matrix — runs against the archived v1 Python tree.
|
|
||||||
# `continue-on-error: true` for the same reason as code-quality above:
|
|
||||||
# the archive is frozen reference, not blocking the Rust workspace PRs.
|
|
||||||
test:
|
test:
|
||||||
name: Tests
|
name: Tests
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
continue-on-error: true
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ['3.10', '3.11', '3.12']
|
python-version: ['3.10', '3.11', '3.12']
|
||||||
services:
|
services:
|
||||||
@@ -208,53 +121,44 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
continue-on-error: true
|
uses: actions/setup-python@v5
|
||||||
uses: actions/setup-python@v6
|
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
cache: 'pip'
|
cache: 'pip'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
pip install pytest-cov pytest-xdist
|
pip install pytest-cov pytest-xdist
|
||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
continue-on-error: true
|
|
||||||
env:
|
env:
|
||||||
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_wifi_densepose
|
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_wifi_densepose
|
||||||
REDIS_URL: redis://localhost:6379/0
|
REDIS_URL: redis://localhost:6379/0
|
||||||
ENVIRONMENT: test
|
ENVIRONMENT: test
|
||||||
run: |
|
run: |
|
||||||
pytest archive/v1/tests/unit/ -v --cov=archive/v1/src --cov-report=xml --cov-report=html --junitxml=junit.xml
|
pytest tests/unit/ -v --cov=src --cov-report=xml --cov-report=html --junitxml=junit.xml
|
||||||
|
|
||||||
- name: Run integration tests
|
- name: Run integration tests
|
||||||
continue-on-error: true
|
|
||||||
env:
|
env:
|
||||||
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_wifi_densepose
|
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_wifi_densepose
|
||||||
REDIS_URL: redis://localhost:6379/0
|
REDIS_URL: redis://localhost:6379/0
|
||||||
ENVIRONMENT: test
|
ENVIRONMENT: test
|
||||||
run: |
|
run: |
|
||||||
pytest archive/v1/tests/integration/ -v --junitxml=integration-junit.xml
|
pytest tests/integration/ -v --junitxml=integration-junit.xml
|
||||||
|
|
||||||
- name: Upload coverage reports
|
- name: Upload coverage reports
|
||||||
continue-on-error: true
|
uses: codecov/codecov-action@v4
|
||||||
uses: codecov/codecov-action@v6
|
|
||||||
with:
|
with:
|
||||||
file: ./coverage.xml
|
file: ./coverage.xml
|
||||||
flags: unittests
|
flags: unittests
|
||||||
name: codecov-umbrella
|
name: codecov-umbrella
|
||||||
|
|
||||||
- name: Upload test results
|
- name: Upload test results
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -265,23 +169,17 @@ jobs:
|
|||||||
htmlcov/
|
htmlcov/
|
||||||
|
|
||||||
# Performance and Load Tests
|
# Performance and Load Tests
|
||||||
# NOTE: tests/performance/locustfile.py and the src.api.main app path both
|
|
||||||
# predate the v1→archive/v1 reorganisation. continue-on-error: true until a
|
|
||||||
# proper locust suite is added under archive/v1/tests/performance/.
|
|
||||||
performance-test:
|
performance-test:
|
||||||
name: Performance Tests
|
name: Performance Tests
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [test]
|
needs: [test]
|
||||||
continue-on-error: true
|
|
||||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.PYTHON_VERSION }}
|
python-version: ${{ env.PYTHON_VERSION }}
|
||||||
cache: 'pip'
|
cache: 'pip'
|
||||||
@@ -290,72 +188,36 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
pip install pytest # the perf suite is pytest, not locust
|
pip install locust
|
||||||
|
|
||||||
# No "Start application" step: the gated test (test_frame_budget.py) drives
|
- name: Start application
|
||||||
# the CSIProcessor pipeline in-process and makes no HTTP calls, so the old
|
run: |
|
||||||
# uvicorn server + `sleep 10` were dead weight — they only existed for the
|
uvicorn src.api.main:app --host 0.0.0.0 --port 8000 &
|
||||||
# now-excluded api_throughput/inference_speed tests, and on every run dumped
|
sleep 10
|
||||||
# ~50 misleading "router requires hardware setup" ERROR lines for a server
|
|
||||||
# no test touched. MOCK_POSE_DATA is server-only and unused here.
|
|
||||||
|
|
||||||
- name: Run performance tests
|
- name: Run performance tests
|
||||||
working-directory: archive/v1
|
|
||||||
run: |
|
run: |
|
||||||
# Gate only on the genuine, deterministic perf guard:
|
locust -f tests/performance/locustfile.py --headless --users 50 --spawn-rate 5 --run-time 60s --host http://localhost:8000
|
||||||
# test_frame_budget.py times the *real* CSIProcessor pipeline against
|
|
||||||
# the ADR 50 ms per-frame budget (single-frame, p95 over 100 frames,
|
|
||||||
# +Doppler) — a true regression signal.
|
|
||||||
#
|
|
||||||
# test_api_throughput.py / test_inference_speed.py are excluded: every
|
|
||||||
# test there is a TDD red-phase stub (suffix `_should_fail_initially`)
|
|
||||||
# that times a *mock that sleeps* — meaningless as a perf signal, with
|
|
||||||
# machine-dependent wall-clock asserts (e.g. `actual_rps >= 40`,
|
|
||||||
# `batch_time < individual_time`) that are inherently flaky on shared
|
|
||||||
# CI runners, plus a cross-class fixture-scope bug. Forcing them green
|
|
||||||
# would be manufacturing a false signal; they stay in-repo for local
|
|
||||||
# TDD but do not gate CI until the underlying features are implemented.
|
|
||||||
#
|
|
||||||
# `python -m pytest` (not the bare `pytest` script) puts the cwd
|
|
||||||
# (archive/v1) on sys.path so `from src.core...` resolves — the bare
|
|
||||||
# script omits cwd and raises ModuleNotFoundError: No module named 'src'.
|
|
||||||
# -o addopts="" drops the root pyproject's --cov/--cov-fail-under=100.
|
|
||||||
python -m pytest tests/performance/test_frame_budget.py \
|
|
||||||
-o addopts="" -v --junitxml=perf-junit.xml
|
|
||||||
|
|
||||||
- name: Upload performance results
|
- name: Upload performance results
|
||||||
if: always()
|
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: performance-results
|
name: performance-results
|
||||||
path: archive/v1/perf-junit.xml
|
path: locust_report.html
|
||||||
|
|
||||||
# Docker Build and Test
|
# Docker Build and Test
|
||||||
# NOTE: the canonical Docker build for the sensing-server is now
|
|
||||||
# `.github/workflows/sensing-server-docker.yml` (multi-registry push, asset
|
|
||||||
# smoke tests, bearer-auth smoke tests — #520/#514/#443). This job predates
|
|
||||||
# that workflow, points at a non-existent root `Dockerfile` with a
|
|
||||||
# non-existent `target: production`, and pushes to a mis-cased image name —
|
|
||||||
# `continue-on-error: true` until it's deleted or rewired to call the new
|
|
||||||
# workflow, so it doesn't gate the rest of the pipeline.
|
|
||||||
docker-build:
|
docker-build:
|
||||||
name: Docker Build & Test
|
name: Docker Build & Test
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [code-quality, test, rust-tests]
|
needs: [code-quality, test, rust-tests]
|
||||||
continue-on-error: true
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
continue-on-error: true
|
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
- name: Log in to Container Registry
|
- name: Log in to Container Registry
|
||||||
continue-on-error: true
|
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
registry: ${{ env.REGISTRY }}
|
registry: ${{ env.REGISTRY }}
|
||||||
@@ -363,9 +225,8 @@ jobs:
|
|||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Extract metadata
|
- name: Extract metadata
|
||||||
continue-on-error: true
|
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@v6
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
tags: |
|
tags: |
|
||||||
@@ -375,8 +236,7 @@ jobs:
|
|||||||
type=raw,value=latest,enable={{is_default_branch}}
|
type=raw,value=latest,enable={{is_default_branch}}
|
||||||
|
|
||||||
- name: Build and push Docker image
|
- name: Build and push Docker image
|
||||||
continue-on-error: true
|
uses: docker/build-push-action@v5
|
||||||
uses: docker/build-push-action@v7
|
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
target: production
|
target: production
|
||||||
@@ -388,7 +248,6 @@ jobs:
|
|||||||
platforms: linux/amd64,linux/arm64
|
platforms: linux/amd64,linux/arm64
|
||||||
|
|
||||||
- name: Test Docker image
|
- name: Test Docker image
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
docker run --rm -d --name test-container -p 8000:8000 ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
|
docker run --rm -d --name test-container -p 8000:8000 ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
|
||||||
sleep 10
|
sleep 10
|
||||||
@@ -396,7 +255,6 @@ jobs:
|
|||||||
docker stop test-container
|
docker stop test-container
|
||||||
|
|
||||||
- name: Run container security scan
|
- name: Run container security scan
|
||||||
continue-on-error: true
|
|
||||||
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
|
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
|
||||||
with:
|
with:
|
||||||
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
|
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
|
||||||
@@ -404,7 +262,6 @@ jobs:
|
|||||||
output: 'trivy-results.sarif'
|
output: 'trivy-results.sarif'
|
||||||
|
|
||||||
- name: Upload Trivy scan results
|
- name: Upload Trivy scan results
|
||||||
continue-on-error: true
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -416,16 +273,12 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [docker-build]
|
needs: [docker-build]
|
||||||
if: github.ref == 'refs/heads/main'
|
if: github.ref == 'refs/heads/main'
|
||||||
permissions:
|
|
||||||
contents: write # gh-pages deploy needs write (GITHUB_TOKEN is read-only by default -> 403)
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.PYTHON_VERSION }}
|
python-version: ${{ env.PYTHON_VERSION }}
|
||||||
cache: 'pip'
|
cache: 'pip'
|
||||||
@@ -436,9 +289,6 @@ jobs:
|
|||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
- name: Generate OpenAPI spec
|
- name: Generate OpenAPI spec
|
||||||
working-directory: archive/v1
|
|
||||||
env:
|
|
||||||
MOCK_POSE_DATA: "true" # no CSI hardware in CI
|
|
||||||
run: |
|
run: |
|
||||||
python -c "
|
python -c "
|
||||||
from src.api.main import app
|
from src.api.main import app
|
||||||
@@ -449,7 +299,6 @@ jobs:
|
|||||||
|
|
||||||
- name: Deploy to GitHub Pages
|
- name: Deploy to GitHub Pages
|
||||||
uses: peaceiris/actions-gh-pages@v4
|
uses: peaceiris/actions-gh-pages@v4
|
||||||
continue-on-error: true # openapi generation above is the real validation; deploy is best-effort (Pages may be disabled)
|
|
||||||
with:
|
with:
|
||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
publish_dir: ./docs
|
publish_dir: ./docs
|
||||||
@@ -461,8 +310,6 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [code-quality, test, rust-tests, performance-test, docker-build, docs]
|
needs: [code-quality, test, rust-tests, performance-test, docker-build, docs]
|
||||||
if: always()
|
if: always()
|
||||||
permissions:
|
|
||||||
contents: write # required by softprops/action-gh-release
|
|
||||||
# GitHub Actions does not allow `secrets.X` directly in step-level `if:`
|
# GitHub Actions does not allow `secrets.X` directly in step-level `if:`
|
||||||
# expressions — only `env.X`. Promote the secret to env at job scope so
|
# expressions — only `env.X`. Promote the secret to env at job scope so
|
||||||
# the gating expression below is parseable.
|
# the gating expression below is parseable.
|
||||||
|
|||||||
@@ -1,151 +0,0 @@
|
|||||||
name: GitHub Clone Tracking → data/clone-data.rvf
|
|
||||||
|
|
||||||
# Persists rolling 14-day clone-traffic snapshots to data/clone-data.rvf in
|
|
||||||
# the ruvector JSONL RVF format. GitHub's /traffic/clones endpoint only
|
|
||||||
# retains the last 14 days server-side, so without this scheduled scrape
|
|
||||||
# the data is gone forever the moment it falls outside the window.
|
|
||||||
#
|
|
||||||
# Format: JSONL RVF
|
|
||||||
# - line 1 is a `metadata` segment that initializes the file
|
|
||||||
# - each subsequent run appends one `clone_snapshot` segment carrying the
|
|
||||||
# 14-day rollup PLUS per-day breakdown
|
|
||||||
# - file is idempotent: per-day entries are keyed by `timestamp` so a
|
|
||||||
# downstream reader can dedupe across overlapping snapshot windows
|
|
||||||
#
|
|
||||||
# Schedule: every 14 days (1st + 15th of each month, ~14-day cadence in
|
|
||||||
# practice). Workflow can also be dispatched manually for backfill or test.
|
|
||||||
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
# 01:23 UTC on the 1st and 15th of every month — close to 14-day cadence
|
|
||||||
# without cron's "every 14 days" monthly-reset weirdness. Picking :23
|
|
||||||
# avoids the cron herd on :00.
|
|
||||||
- cron: '23 1 1,15 * *'
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: clone-tracking
|
|
||||||
cancel-in-progress: false
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
snapshot:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Fetch /traffic/clones + /traffic/views from GitHub
|
|
||||||
env:
|
|
||||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
run: |
|
|
||||||
mkdir -p data
|
|
||||||
gh api repos/${{ github.repository }}/traffic/clones > /tmp/clones.json
|
|
||||||
gh api repos/${{ github.repository }}/traffic/views > /tmp/views.json
|
|
||||||
echo "--- clones rollup ---"
|
|
||||||
jq '{count, uniques, days: (.clones | length)}' /tmp/clones.json
|
|
||||||
echo "--- views rollup ---"
|
|
||||||
jq '{count, uniques, days: (.views | length)}' /tmp/views.json
|
|
||||||
|
|
||||||
- name: Append snapshot to data/clone-data.rvf
|
|
||||||
env:
|
|
||||||
REPO: ${{ github.repository }}
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
RVF="data/clone-data.rvf"
|
|
||||||
FETCHED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
||||||
|
|
||||||
# Initialize the file with a metadata segment on first run.
|
|
||||||
if [ ! -f "$RVF" ]; then
|
|
||||||
echo "Initializing $RVF with metadata segment"
|
|
||||||
jq -n --arg repo "$REPO" --arg ts "$FETCHED_AT" '{
|
|
||||||
type: "metadata",
|
|
||||||
name: "ruview-clone-traffic-history",
|
|
||||||
version: "1.0.0",
|
|
||||||
schema: "ruvector.rvf.jsonl/v1",
|
|
||||||
format: "github-traffic-snapshots",
|
|
||||||
repo: $repo,
|
|
||||||
source: "GitHub Traffic API /repos/{repo}/traffic/{clones,views}",
|
|
||||||
policy: "GitHub retains only 14 days server-side; this file is the long-term record.",
|
|
||||||
segments: ["metadata", "clone_snapshot", "view_snapshot"],
|
|
||||||
created_at: $ts,
|
|
||||||
custom: {
|
|
||||||
cadence: "twice monthly (1st and 15th, ~14-day intervals)",
|
|
||||||
idempotency_key: "timestamp (per-day records de-duplicate across overlapping snapshot windows)"
|
|
||||||
}
|
|
||||||
}' >> "$RVF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Append the clone snapshot.
|
|
||||||
jq --arg ts "$FETCHED_AT" '{
|
|
||||||
type: "clone_snapshot",
|
|
||||||
fetched_at: $ts,
|
|
||||||
window_count: .count,
|
|
||||||
window_uniques: .uniques,
|
|
||||||
per_day: .clones
|
|
||||||
}' /tmp/clones.json >> "$RVF"
|
|
||||||
|
|
||||||
# Append the views snapshot (free with the same auth).
|
|
||||||
jq --arg ts "$FETCHED_AT" '{
|
|
||||||
type: "view_snapshot",
|
|
||||||
fetched_at: $ts,
|
|
||||||
window_count: .count,
|
|
||||||
window_uniques: .uniques,
|
|
||||||
per_day: .views
|
|
||||||
}' /tmp/views.json >> "$RVF"
|
|
||||||
|
|
||||||
echo "--- RVF tail (last 4 lines) ---"
|
|
||||||
tail -4 "$RVF" | jq -c '{type, fetched_at, window_count, window_uniques}' || true
|
|
||||||
echo "--- file size ---"
|
|
||||||
wc -l "$RVF"
|
|
||||||
|
|
||||||
- name: Compute aggregates for the commit summary
|
|
||||||
id: agg
|
|
||||||
run: |
|
|
||||||
# Count distinct per-day entries across all snapshots so we can
|
|
||||||
# show "cumulative observed clones" in the commit message.
|
|
||||||
python3 - <<'PY'
|
|
||||||
import json, os
|
|
||||||
path = "data/clone-data.rvf"
|
|
||||||
per_day_clones = {}
|
|
||||||
per_day_views = {}
|
|
||||||
with open(path, encoding="utf-8") as f:
|
|
||||||
for line in f:
|
|
||||||
if not line.strip():
|
|
||||||
continue
|
|
||||||
d = json.loads(line)
|
|
||||||
if d.get("type") == "clone_snapshot":
|
|
||||||
for entry in d.get("per_day", []):
|
|
||||||
per_day_clones[entry["timestamp"]] = entry
|
|
||||||
elif d.get("type") == "view_snapshot":
|
|
||||||
for entry in d.get("per_day", []):
|
|
||||||
per_day_views[entry["timestamp"]] = entry
|
|
||||||
|
|
||||||
tot_clones = sum(e.get("count", 0) for e in per_day_clones.values())
|
|
||||||
tot_uniq_clones = sum(e.get("uniques", 0) for e in per_day_clones.values())
|
|
||||||
tot_views = sum(e.get("count", 0) for e in per_day_views.values())
|
|
||||||
tot_uniq_views = sum(e.get("uniques", 0) for e in per_day_views.values())
|
|
||||||
print(f"clone days observed: {len(per_day_clones)} total clones: {tot_clones:,} total unique cloners: {tot_uniq_clones:,}")
|
|
||||||
print(f"view days observed: {len(per_day_views)} total views: {tot_views:,} total unique viewers: {tot_uniq_views:,}")
|
|
||||||
|
|
||||||
with open(os.environ["GITHUB_OUTPUT"], "a") as out:
|
|
||||||
out.write(f"clones={tot_clones}\n")
|
|
||||||
out.write(f"clone_days={len(per_day_clones)}\n")
|
|
||||||
out.write(f"views={tot_views}\n")
|
|
||||||
out.write(f"view_days={len(per_day_views)}\n")
|
|
||||||
PY
|
|
||||||
|
|
||||||
- name: Commit + push if changed
|
|
||||||
run: |
|
|
||||||
git config user.name "github-actions[bot]"
|
|
||||||
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
||||||
if git diff --quiet data/clone-data.rvf; then
|
|
||||||
echo "no changes to commit"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
git add data/clone-data.rvf
|
|
||||||
git commit -m "chore(traffic): clone snapshot — ${{ steps.agg.outputs.clone_days }} days observed → ${{ steps.agg.outputs.clones }} clones, ${{ steps.agg.outputs.view_days }} view-days → ${{ steps.agg.outputs.views }} views"
|
|
||||||
git push
|
|
||||||
@@ -1,206 +0,0 @@
|
|||||||
name: Cog HA-Matter Release
|
|
||||||
|
|
||||||
# ADR-116 P8 — Build + sign + bundle the cog-ha-matter cog on a
|
|
||||||
# version tag. Upload to gs://cognitum-apps/ runs only when the
|
|
||||||
# GCP_CREDENTIALS + COGNITUM_OWNER_SIGNING_KEY secrets are set, so
|
|
||||||
# this workflow is safe to merge before the production credentials
|
|
||||||
# land — it'll bundle release artifacts to the workflow run page
|
|
||||||
# either way.
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- 'cog-ha-matter-v*'
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
dry_run:
|
|
||||||
description: 'Build + sign + bundle but skip GCS upload'
|
|
||||||
required: false
|
|
||||||
default: 'true'
|
|
||||||
|
|
||||||
env:
|
|
||||||
CARGO_TERM_COLOR: always
|
|
||||||
CRATE: cog-ha-matter
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-x86_64:
|
|
||||||
name: Build x86_64
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Setup Rust
|
|
||||||
uses: dtolnay/rust-toolchain@stable
|
|
||||||
with:
|
|
||||||
targets: x86_64-unknown-linux-gnu
|
|
||||||
|
|
||||||
- name: Cache cargo registry
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
~/.cargo/registry
|
|
||||||
~/.cargo/git
|
|
||||||
v2/target
|
|
||||||
key: cog-ha-matter-x86_64-${{ hashFiles('v2/Cargo.lock') }}
|
|
||||||
|
|
||||||
- name: Build release binary
|
|
||||||
working-directory: v2/crates/cog-ha-matter/cog
|
|
||||||
run: make build-x86_64
|
|
||||||
|
|
||||||
- name: Compute SHA-256
|
|
||||||
working-directory: v2/crates/cog-ha-matter/cog
|
|
||||||
run: make sign-x86_64
|
|
||||||
|
|
||||||
- name: Sign with Ed25519 (gated)
|
|
||||||
if: ${{ env.SIGNING_KEY != '' }}
|
|
||||||
env:
|
|
||||||
SIGNING_KEY: ${{ secrets.COGNITUM_OWNER_SIGNING_KEY }}
|
|
||||||
working-directory: v2/crates/cog-ha-matter/cog
|
|
||||||
run: |
|
|
||||||
printf '%s' "$SIGNING_KEY" \
|
|
||||||
| openssl pkeyutl -sign -inkey /dev/stdin -rawin \
|
|
||||||
-in dist/cog-ha-matter-x86_64.sha256 \
|
|
||||||
| base64 -w0 > dist/cog-ha-matter-x86_64.sig
|
|
||||||
echo "Signed cog-ha-matter-x86_64 ($(wc -c < dist/cog-ha-matter-x86_64.sig) bytes)"
|
|
||||||
|
|
||||||
- name: Upload workflow artifact
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: cog-ha-matter-x86_64
|
|
||||||
path: |
|
|
||||||
v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-x86_64
|
|
||||||
v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-x86_64.sha256
|
|
||||||
v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-x86_64.sig
|
|
||||||
if-no-files-found: warn
|
|
||||||
|
|
||||||
build-arm:
|
|
||||||
name: Build aarch64 (arm)
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Setup Rust
|
|
||||||
uses: dtolnay/rust-toolchain@stable
|
|
||||||
with:
|
|
||||||
targets: aarch64-unknown-linux-gnu
|
|
||||||
|
|
||||||
- name: Install cross-compiler
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y gcc-aarch64-linux-gnu
|
|
||||||
|
|
||||||
- name: Cache cargo registry
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
~/.cargo/registry
|
|
||||||
~/.cargo/git
|
|
||||||
v2/target
|
|
||||||
key: cog-ha-matter-arm-${{ hashFiles('v2/Cargo.lock') }}
|
|
||||||
|
|
||||||
- name: Build release binary
|
|
||||||
working-directory: v2
|
|
||||||
env:
|
|
||||||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER: aarch64-linux-gnu-gcc
|
|
||||||
run: |
|
|
||||||
cargo build -p cog-ha-matter --release --target aarch64-unknown-linux-gnu
|
|
||||||
mkdir -p crates/cog-ha-matter/cog/dist
|
|
||||||
cp target/aarch64-unknown-linux-gnu/release/cog-ha-matter \
|
|
||||||
crates/cog-ha-matter/cog/dist/cog-ha-matter-arm
|
|
||||||
# ^ matches Makefile's `dist/$(CRATE)-arm` so `make sign-arm` finds it
|
|
||||||
|
|
||||||
- name: Compute SHA-256
|
|
||||||
working-directory: v2/crates/cog-ha-matter/cog
|
|
||||||
run: make sign-arm
|
|
||||||
|
|
||||||
- name: Sign with Ed25519 (gated)
|
|
||||||
if: ${{ env.SIGNING_KEY != '' }}
|
|
||||||
env:
|
|
||||||
SIGNING_KEY: ${{ secrets.COGNITUM_OWNER_SIGNING_KEY }}
|
|
||||||
working-directory: v2/crates/cog-ha-matter/cog
|
|
||||||
run: |
|
|
||||||
printf '%s' "$SIGNING_KEY" \
|
|
||||||
| openssl pkeyutl -sign -inkey /dev/stdin -rawin \
|
|
||||||
-in dist/cog-ha-matter-arm.sha256 \
|
|
||||||
| base64 -w0 > dist/cog-ha-matter-arm.sig
|
|
||||||
echo "Signed cog-ha-matter-arm ($(wc -c < dist/cog-ha-matter-arm.sig) bytes)"
|
|
||||||
|
|
||||||
- name: Upload workflow artifact
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: cog-ha-matter-arm
|
|
||||||
path: |
|
|
||||||
v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-arm
|
|
||||||
v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-arm.sha256
|
|
||||||
v2/crates/cog-ha-matter/cog/dist/cog-ha-matter-arm.sig
|
|
||||||
if-no-files-found: warn
|
|
||||||
|
|
||||||
publish-gcs:
|
|
||||||
name: Upload to GCS (gated)
|
|
||||||
needs: [build-x86_64, build-arm]
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
# Skip on dry-run dispatch; skip on tags when GCP_CREDENTIALS unset.
|
|
||||||
if: >
|
|
||||||
github.event_name == 'push' &&
|
|
||||||
vars.HAS_GCP_CREDENTIALS == 'true'
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Download x86_64 artifact
|
|
||||||
uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
name: cog-ha-matter-x86_64
|
|
||||||
path: dist/
|
|
||||||
|
|
||||||
- name: Download arm artifact
|
|
||||||
uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
name: cog-ha-matter-arm
|
|
||||||
path: dist/
|
|
||||||
|
|
||||||
- name: Auth to GCP
|
|
||||||
uses: google-github-actions/auth@v2
|
|
||||||
with:
|
|
||||||
credentials_json: ${{ secrets.GCP_CREDENTIALS }}
|
|
||||||
|
|
||||||
- name: Set up gcloud
|
|
||||||
uses: google-github-actions/setup-gcloud@v2
|
|
||||||
|
|
||||||
- name: Upload binaries + sidecars
|
|
||||||
run: |
|
|
||||||
gsutil cp dist/cog-ha-matter-x86_64 gs://cognitum-apps/cogs/x86_64/cog-ha-matter-x86_64
|
|
||||||
gsutil cp dist/cog-ha-matter-x86_64.sha256 gs://cognitum-apps/cogs/x86_64/cog-ha-matter-x86_64.sha256
|
|
||||||
gsutil cp dist/cog-ha-matter-arm gs://cognitum-apps/cogs/arm/cog-ha-matter-arm
|
|
||||||
gsutil cp dist/cog-ha-matter-arm.sha256 gs://cognitum-apps/cogs/arm/cog-ha-matter-arm.sha256
|
|
||||||
if [ -f dist/cog-ha-matter-x86_64.sig ]; then
|
|
||||||
gsutil cp dist/cog-ha-matter-x86_64.sig gs://cognitum-apps/cogs/x86_64/cog-ha-matter-x86_64.sig
|
|
||||||
fi
|
|
||||||
if [ -f dist/cog-ha-matter-arm.sig ]; then
|
|
||||||
gsutil cp dist/cog-ha-matter-arm.sig gs://cognitum-apps/cogs/arm/cog-ha-matter-arm.sig
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Print app-registry.json snippet for the cognitum-one PR
|
|
||||||
run: |
|
|
||||||
for arch in arm x86_64; do
|
|
||||||
sha=$(cat dist/cog-cog-ha-matter-$arch.sha256)
|
|
||||||
sig=$([ -f dist/cog-cog-ha-matter-$arch.sig ] && cat dist/cog-cog-ha-matter-$arch.sig || echo "")
|
|
||||||
cat <<EOF
|
|
||||||
--- $arch ---
|
|
||||||
{
|
|
||||||
"id": "ha-matter",
|
|
||||||
"version": "${GITHUB_REF_NAME#cog-ha-matter-v}",
|
|
||||||
"binary_url": "https://storage.googleapis.com/cognitum-apps/cogs/$arch/cog-cog-ha-matter-$arch",
|
|
||||||
"binary_sha256": "$sha",
|
|
||||||
"binary_signature": "$sig",
|
|
||||||
"description": "Home Assistant + Matter Cognitum Seed cog (mDNS + witness chain)",
|
|
||||||
"min_seed_version": "0.6.0",
|
|
||||||
"installable_on": ["$arch"]
|
|
||||||
}
|
|
||||||
EOF
|
|
||||||
done
|
|
||||||
@@ -20,8 +20,6 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- uses: dtolnay/rust-toolchain@stable
|
- uses: dtolnay/rust-toolchain@stable
|
||||||
with: { targets: wasm32-unknown-unknown }
|
with: { targets: wasm32-unknown-unknown }
|
||||||
@@ -36,7 +34,7 @@ jobs:
|
|||||||
--out-dir ../../dashboard/public/nvsim-pkg \
|
--out-dir ../../dashboard/public/nvsim-pkg \
|
||||||
--release -- --no-default-features --features wasm
|
--release -- --no-default-features --features wasm
|
||||||
|
|
||||||
- uses: actions/setup-node@v6
|
- uses: actions/setup-node@v4
|
||||||
with: { node-version: 20, cache: npm, cache-dependency-path: dashboard/package-lock.json }
|
with: { node-version: 20, cache: npm, cache-dependency-path: dashboard/package-lock.json }
|
||||||
|
|
||||||
- working-directory: dashboard
|
- working-directory: dashboard
|
||||||
|
|||||||
@@ -26,8 +26,6 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout main
|
- name: Checkout main
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Install Rust + wasm32 target
|
- name: Install Rust + wasm32 target
|
||||||
uses: dtolnay/rust-toolchain@stable
|
uses: dtolnay/rust-toolchain@stable
|
||||||
@@ -59,7 +57,7 @@ jobs:
|
|||||||
-- --no-default-features --features wasm
|
-- --no-default-features --features wasm
|
||||||
|
|
||||||
- name: Setup Node 20
|
- name: Setup Node 20
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
cache: npm
|
cache: npm
|
||||||
|
|||||||
@@ -28,11 +28,9 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Setup Node.js
|
- name: Setup Node.js
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: '20'
|
node-version: '20'
|
||||||
|
|
||||||
@@ -85,11 +83,9 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Setup Node.js
|
- name: Setup Node.js
|
||||||
uses: actions/setup-node@v6
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: '20'
|
node-version: '20'
|
||||||
|
|
||||||
@@ -135,8 +131,6 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Download all artifacts
|
- name: Download all artifacts
|
||||||
uses: actions/download-artifact@v4
|
uses: actions/download-artifact@v4
|
||||||
|
|||||||
@@ -2,11 +2,6 @@ name: Firmware CI
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
|
||||||
- '**'
|
|
||||||
tags:
|
|
||||||
# ESP32 firmware release tags — build + version-consistency guard (RuView#505).
|
|
||||||
- 'v*-esp32'
|
|
||||||
paths:
|
paths:
|
||||||
- 'firmware/**'
|
- 'firmware/**'
|
||||||
- '.github/workflows/firmware-ci.yml'
|
- '.github/workflows/firmware-ci.yml'
|
||||||
@@ -16,31 +11,8 @@ on:
|
|||||||
- '.github/workflows/firmware-ci.yml'
|
- '.github/workflows/firmware-ci.yml'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
version-guard:
|
|
||||||
name: Verify version.txt matches release tag
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: github.ref_type == 'tag'
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
- name: Check firmware version.txt == tag
|
|
||||||
run: |
|
|
||||||
# Tag form: vX.Y.Z-esp32 → expect version.txt to contain X.Y.Z
|
|
||||||
TAG="${GITHUB_REF_NAME}"
|
|
||||||
EXPECTED="${TAG#v}"
|
|
||||||
EXPECTED="${EXPECTED%-esp32}"
|
|
||||||
ACTUAL="$(tr -d '[:space:]' < firmware/esp32-csi-node/version.txt)"
|
|
||||||
echo "Tag: $TAG → expected version.txt: $EXPECTED | actual: $ACTUAL"
|
|
||||||
if [ "$EXPECTED" != "$ACTUAL" ]; then
|
|
||||||
echo "::error::firmware/esp32-csi-node/version.txt is '$ACTUAL' but tag '$TAG' expects '$EXPECTED'."
|
|
||||||
echo "::error::Bump version.txt and re-tag so esp_app_get_description()->version is correct (RuView#505)."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "version.txt matches the release tag."
|
|
||||||
|
|
||||||
build:
|
build:
|
||||||
name: Build firmware (${{ matrix.target }} / ${{ matrix.variant }})
|
name: Build ESP32-S3 Firmware (${{ matrix.variant }})
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container:
|
container:
|
||||||
image: espressif/idf:v5.4
|
image: espressif/idf:v5.4
|
||||||
@@ -49,53 +21,31 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- variant: 8mb
|
- variant: 8mb
|
||||||
target: esp32s3
|
|
||||||
sdkconfig: sdkconfig.defaults
|
sdkconfig: sdkconfig.defaults
|
||||||
partition_table_name: partitions_display.csv
|
partition_table_name: partitions_display.csv
|
||||||
size_limit_kb: 1100
|
size_limit_kb: 1100
|
||||||
artifact_app: esp32-csi-node.bin
|
artifact_app: esp32-csi-node.bin
|
||||||
artifact_pt: partition-table.bin
|
artifact_pt: partition-table.bin
|
||||||
- variant: 4mb
|
- variant: 4mb
|
||||||
target: esp32s3
|
|
||||||
sdkconfig: sdkconfig.defaults.4mb
|
sdkconfig: sdkconfig.defaults.4mb
|
||||||
partition_table_name: partitions_4mb.csv
|
partition_table_name: partitions_4mb.csv
|
||||||
size_limit_kb: 1100
|
size_limit_kb: 1100
|
||||||
artifact_app: esp32-csi-node-4mb.bin
|
artifact_app: esp32-csi-node-4mb.bin
|
||||||
artifact_pt: partition-table-4mb.bin
|
artifact_pt: partition-table-4mb.bin
|
||||||
# ADR-110: ESP32-C6 research target (Wi-Fi 6 / 802.15.4 / TWT / LP-core)
|
|
||||||
- variant: c6-4mb
|
|
||||||
target: esp32c6
|
|
||||||
sdkconfig: sdkconfig.defaults
|
|
||||||
partition_table_name: partitions_4mb.csv
|
|
||||||
size_limit_kb: 1100
|
|
||||||
artifact_app: esp32-csi-node-c6.bin
|
|
||||||
artifact_pt: partition-table-c6.bin
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Build firmware (${{ matrix.variant }})
|
- name: Build firmware (${{ matrix.variant }})
|
||||||
working-directory: firmware/esp32-csi-node
|
working-directory: firmware/esp32-csi-node
|
||||||
run: |
|
run: |
|
||||||
. $IDF_PATH/export.sh
|
. $IDF_PATH/export.sh
|
||||||
# 4mb variant supplies its own sdkconfig.defaults overlay.
|
if [ "${{ matrix.variant }}" != "8mb" ]; then
|
||||||
# c6-4mb variant relies on the auto-applied sdkconfig.defaults.esp32c6
|
|
||||||
# overlay (ESP-IDF auto-loads sdkconfig.defaults.$TARGET when present).
|
|
||||||
if [ "${{ matrix.variant }}" = "4mb" ]; then
|
|
||||||
cp "${{ matrix.sdkconfig }}" sdkconfig.defaults
|
cp "${{ matrix.sdkconfig }}" sdkconfig.defaults
|
||||||
fi
|
fi
|
||||||
idf.py set-target ${{ matrix.target }}
|
idf.py set-target esp32s3
|
||||||
idf.py build
|
idf.py build
|
||||||
|
|
||||||
- name: Build and run host-side ADR-110 unit tests
|
|
||||||
if: matrix.variant == 'c6-4mb'
|
|
||||||
working-directory: firmware/esp32-csi-node/test
|
|
||||||
run: |
|
|
||||||
make test_adr110
|
|
||||||
./test_adr110
|
|
||||||
|
|
||||||
- name: Verify binary size (< ${{ matrix.size_limit_kb }} KB gate)
|
- name: Verify binary size (< ${{ matrix.size_limit_kb }} KB gate)
|
||||||
working-directory: firmware/esp32-csi-node
|
working-directory: firmware/esp32-csi-node
|
||||||
run: |
|
run: |
|
||||||
@@ -148,6 +98,32 @@ jobs:
|
|||||||
echo "Flash image integrity verified"
|
echo "Flash image integrity verified"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Verify embedded version string matches version.txt (fixes #505)
|
||||||
|
working-directory: firmware/esp32-csi-node
|
||||||
|
run: |
|
||||||
|
EXPECTED=$(cat version.txt | tr -d '[:space:]')
|
||||||
|
BIN=build/esp32-csi-node.bin
|
||||||
|
# Extract version from ESP-IDF app_desc: magic 0xABCD5432 at offset 0
|
||||||
|
# followed by version string at offset 16, null-terminated, max 32 chars.
|
||||||
|
EMBEDDED=$(python3 -c "
|
||||||
|
import struct, sys
|
||||||
|
data = open('$BIN','rb').read()
|
||||||
|
magic = struct.pack('<I', 0xABCD5432)
|
||||||
|
i = data.find(magic)
|
||||||
|
if i < 0:
|
||||||
|
sys.exit('app_desc magic not found')
|
||||||
|
ver = data[i+16:i+48].split(b'\\x00',1)[0].decode('ascii','replace')
|
||||||
|
print(ver)
|
||||||
|
" 2>&1)
|
||||||
|
echo "Expected version: $EXPECTED"
|
||||||
|
echo "Embedded version: $EMBEDDED"
|
||||||
|
if [ "$EMBEDDED" != "$EXPECTED" ]; then
|
||||||
|
echo "::error::Version string mismatch! version.txt='$EXPECTED' but binary reports '$EMBEDDED'."
|
||||||
|
echo "::error::Ensure version.txt is updated before building and tagging."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Version string verified: $EMBEDDED"
|
||||||
|
|
||||||
- name: Stage release binaries with variant-specific names
|
- name: Stage release binaries with variant-specific names
|
||||||
working-directory: firmware/esp32-csi-node
|
working-directory: firmware/esp32-csi-node
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -100,8 +100,6 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Download QEMU artifact
|
- name: Download QEMU artifact
|
||||||
uses: actions/download-artifact@v4
|
uses: actions/download-artifact@v4
|
||||||
@@ -216,8 +214,6 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Install clang
|
- name: Install clang
|
||||||
run: |
|
run: |
|
||||||
@@ -267,8 +263,6 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Install NVS generator
|
- name: Install NVS generator
|
||||||
run: pip install esp-idf-nvs-partition-gen
|
run: pip install esp-idf-nvs-partition-gen
|
||||||
@@ -323,8 +317,6 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Download QEMU artifact
|
- name: Download QEMU artifact
|
||||||
uses: actions/download-artifact@v4
|
uses: actions/download-artifact@v4
|
||||||
|
|||||||
@@ -1,56 +0,0 @@
|
|||||||
name: Fix-Marker Regression Guard
|
|
||||||
|
|
||||||
# Asserts that previously-shipped fixes are still present in the tree.
|
|
||||||
# Manifest: scripts/fix-markers.json Checker: scripts/check_fix_markers.py
|
|
||||||
# Run locally: python scripts/check_fix_markers.py (also --list / --json)
|
|
||||||
#
|
|
||||||
# This complements the heavyweight checks (firmware build, deterministic
|
|
||||||
# pipeline proof, witness bundle) with a fast per-PR "did someone revert a
|
|
||||||
# known fix?" gate — the CI analogue of the ruflo witness fix-marker system.
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
- master
|
|
||||||
pull_request:
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
fix-markers:
|
|
||||||
name: Verify fix markers
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- uses: actions/setup-python@v6
|
|
||||||
with:
|
|
||||||
python-version: '3.11'
|
|
||||||
|
|
||||||
- name: Validate the manifest is well-formed JSON
|
|
||||||
run: python -c "import json; json.load(open('scripts/fix-markers.json')); print('manifest OK')"
|
|
||||||
|
|
||||||
- name: Check fix markers
|
|
||||||
run: python scripts/check_fix_markers.py
|
|
||||||
|
|
||||||
- name: Emit machine-readable result (for the run summary)
|
|
||||||
if: always()
|
|
||||||
run: |
|
|
||||||
python scripts/check_fix_markers.py --json > fix-markers-result.json || true
|
|
||||||
{
|
|
||||||
echo '### Fix-marker regression guard'
|
|
||||||
echo ''
|
|
||||||
echo '```'
|
|
||||||
python scripts/check_fix_markers.py || true
|
|
||||||
echo '```'
|
|
||||||
} >> "$GITHUB_STEP_SUMMARY"
|
|
||||||
|
|
||||||
- name: Upload result artifact
|
|
||||||
if: always()
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: fix-markers-result
|
|
||||||
path: fix-markers-result.json
|
|
||||||
retention-days: 30
|
|
||||||
@@ -1,112 +0,0 @@
|
|||||||
name: ADR-115 MQTT integration tests
|
|
||||||
|
|
||||||
# Runs the Mosquitto-broker-backed integration tests for ADR-115's MQTT
|
|
||||||
# publisher. These prove the publisher reaches a real broker, emits the
|
|
||||||
# expected HA-discovery topic shape, and honours --privacy-mode at the
|
|
||||||
# wire boundary (not just in unit-test logic).
|
|
||||||
#
|
|
||||||
# Default `cargo test --workspace` does not run these tests because they
|
|
||||||
# require a broker and pull rumqttc into the build. This workflow opts
|
|
||||||
# into both by setting --features mqtt and RUVIEW_RUN_INTEGRATION=1.
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'v2/crates/wifi-densepose-sensing-server/src/mqtt/**'
|
|
||||||
- 'v2/crates/wifi-densepose-sensing-server/tests/mqtt_integration.rs'
|
|
||||||
- 'v2/crates/wifi-densepose-sensing-server/Cargo.toml'
|
|
||||||
- '.github/workflows/mqtt-integration.yml'
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
paths:
|
|
||||||
- 'v2/crates/wifi-densepose-sensing-server/src/mqtt/**'
|
|
||||||
workflow_dispatch: {}
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
mqtt-integration:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
timeout-minutes: 20
|
|
||||||
|
|
||||||
# NB: we don't use a `services:` mosquitto container here because the
|
|
||||||
# eclipse-mosquitto:2.x image rejects anonymous connections by default
|
|
||||||
# and GH Actions `services` doesn't easily support mounting a custom
|
|
||||||
# config file. We start mosquitto manually in a step below with an
|
|
||||||
# inline `allow_anonymous true` config.
|
|
||||||
|
|
||||||
env:
|
|
||||||
RUVIEW_RUN_INTEGRATION: "1"
|
|
||||||
RUVIEW_TEST_MQTT_PORT: "11883"
|
|
||||||
CARGO_TERM_COLOR: always
|
|
||||||
RUST_BACKTRACE: 1
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Install mosquitto + clients and start with allow_anonymous
|
|
||||||
run: |
|
|
||||||
sudo apt-get update -qq
|
|
||||||
sudo apt-get install -y mosquitto mosquitto-clients
|
|
||||||
sudo systemctl stop mosquitto || true
|
|
||||||
# Inline config: anon listener on 11883 only — no TLS, no auth,
|
|
||||||
# OK for CI because we test the wire shape, not security.
|
|
||||||
# Production deployments enable mTLS per ADR-115 §3.9.
|
|
||||||
cat > /tmp/mosquitto-ci.conf <<'EOF'
|
|
||||||
listener 11883
|
|
||||||
allow_anonymous true
|
|
||||||
persistence false
|
|
||||||
log_dest stdout
|
|
||||||
EOF
|
|
||||||
mosquitto -c /tmp/mosquitto-ci.conf -d
|
|
||||||
for i in {1..20}; do
|
|
||||||
if mosquitto_pub -h 127.0.0.1 -p 11883 -t healthcheck -m ok -q 0 2>/dev/null; then
|
|
||||||
echo "mosquitto reachable on 11883"; exit 0
|
|
||||||
fi
|
|
||||||
sleep 2
|
|
||||||
done
|
|
||||||
echo "mosquitto never became reachable" >&2
|
|
||||||
tail -50 /var/log/mosquitto/*.log 2>/dev/null || true
|
|
||||||
exit 1
|
|
||||||
|
|
||||||
- name: Install Rust toolchain
|
|
||||||
uses: dtolnay/rust-toolchain@stable
|
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
|
|
||||||
- name: Cache cargo registry + build
|
|
||||||
uses: Swatinem/rust-cache@v2
|
|
||||||
with:
|
|
||||||
workspaces: v2 -> target
|
|
||||||
|
|
||||||
- name: Validate HA Blueprints
|
|
||||||
run: |
|
|
||||||
python -m pip install --quiet pyyaml
|
|
||||||
python scripts/validate-ha-blueprints.py
|
|
||||||
|
|
||||||
- name: Verify unit tests still pass under --features mqtt
|
|
||||||
working-directory: v2
|
|
||||||
# `cargo test` accepts a single TESTNAME filter, so we run the
|
|
||||||
# whole --lib suite here. That gives us the full 410-test green
|
|
||||||
# bar under --features mqtt (which is more reassuring than
|
|
||||||
# filtering anyway).
|
|
||||||
run: >-
|
|
||||||
cargo test -p wifi-densepose-sensing-server
|
|
||||||
--features mqtt --no-default-features
|
|
||||||
--lib
|
|
||||||
--no-fail-fast
|
|
||||||
|
|
||||||
- name: Run integration tests against mosquitto
|
|
||||||
working-directory: v2
|
|
||||||
run: >-
|
|
||||||
cargo test -p wifi-densepose-sensing-server
|
|
||||||
--features mqtt --no-default-features
|
|
||||||
--test mqtt_integration
|
|
||||||
--no-fail-fast
|
|
||||||
-- --test-threads=1 --nocapture
|
|
||||||
|
|
||||||
- name: Dump broker logs on failure
|
|
||||||
if: failure()
|
|
||||||
run: |
|
|
||||||
docker ps -a
|
|
||||||
docker logs $(docker ps -aqf "ancestor=eclipse-mosquitto:2.0.18") || true
|
|
||||||
@@ -26,8 +26,6 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- uses: docker/setup-buildx-action@v3
|
- uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
@@ -39,7 +37,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Extract metadata
|
- name: Extract metadata
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@v6
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: ghcr.io/ruvnet/nvsim-server
|
images: ghcr.io/ruvnet/nvsim-server
|
||||||
tags: |
|
tags: |
|
||||||
@@ -49,7 +47,7 @@ jobs:
|
|||||||
type=raw,value=latest,enable={{is_default_branch}}
|
type=raw,value=latest,enable={{is_default_branch}}
|
||||||
|
|
||||||
- name: Build + push
|
- name: Build + push
|
||||||
uses: docker/build-push-action@v7
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
context: v2
|
context: v2
|
||||||
file: v2/crates/nvsim-server/Dockerfile
|
file: v2/crates/nvsim-server/Dockerfile
|
||||||
|
|||||||
@@ -1,292 +0,0 @@
|
|||||||
# ADR-117 P5 — cibuildwheel + PyPI publish workflow for `wifi-densepose`
|
|
||||||
#
|
|
||||||
# This workflow is **explicitly NOT** triggered on every push. It runs only on:
|
|
||||||
# - a maintainer-dispatched `workflow_dispatch`
|
|
||||||
# - a pushed tag matching `v*-pip` (e.g. `v2.0.0-pip`)
|
|
||||||
#
|
|
||||||
# The reason for the `-pip` tag suffix is that the repo already cuts
|
|
||||||
# `v0.X.Y-esp32` tags for firmware releases (see CLAUDE.md). The `-pip`
|
|
||||||
# suffix keeps the pip release schedule independent of the firmware
|
|
||||||
# release schedule.
|
|
||||||
#
|
|
||||||
# Sequencing on release day (per ADR-117 §7.3):
|
|
||||||
# 1. cut tag `v1.99.0-pip` → publishes the tombstone wheel first
|
|
||||||
# 2. cut tag `v2.0.0-pip` → publishes the PyO3 v2 wheel matrix
|
|
||||||
#
|
|
||||||
# Publishes via the `PYPI_API_TOKEN` GitHub Actions secret. The
|
|
||||||
# token-refresh runbook (GCP Secret Manager → gh secret set) lives in
|
|
||||||
# docs/integrations/pypi-release.md so KICS does not flag the
|
|
||||||
# secret name as a generic-secret literal in the workflow.
|
|
||||||
#
|
|
||||||
# Q3 (witness hash v2 — open in ADR-117 §11.3) MUST be resolved
|
|
||||||
# before the first v2.0.0 publish. When v2 lands, add a parallel
|
|
||||||
# step that verifies the v2 hash against the Rust pipeline.
|
|
||||||
|
|
||||||
name: pip-release
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
target:
|
|
||||||
description: "Which package to release"
|
|
||||||
required: true
|
|
||||||
type: choice
|
|
||||||
options:
|
|
||||||
- v2-wheels
|
|
||||||
- v1-99-tombstone
|
|
||||||
publish_to:
|
|
||||||
description: "Where to publish"
|
|
||||||
required: true
|
|
||||||
default: testpypi
|
|
||||||
type: choice
|
|
||||||
options:
|
|
||||||
- testpypi # dry-run target
|
|
||||||
- pypi # production
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- "v*-pip"
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
# ────────────────────────────────────────────────────────────────
|
|
||||||
# v2.0.0 — cibuildwheel matrix (5 wheels + sdist)
|
|
||||||
# ────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
build-wheels:
|
|
||||||
name: Build ${{ matrix.os }} ${{ matrix.arch }}
|
|
||||||
if: |
|
|
||||||
github.event_name == 'workflow_dispatch' && inputs.target == 'v2-wheels' ||
|
|
||||||
startsWith(github.ref, 'refs/tags/v2.')
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- os: ubuntu-latest
|
|
||||||
arch: x86_64
|
|
||||||
- os: ubuntu-latest
|
|
||||||
arch: aarch64
|
|
||||||
- os: macos-13 # x86_64 runner
|
|
||||||
arch: x86_64
|
|
||||||
- os: macos-14 # arm64 runner
|
|
||||||
arch: arm64
|
|
||||||
- os: windows-latest
|
|
||||||
arch: AMD64
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
# Linux aarch64 needs QEMU for cross-build on x86_64 runners.
|
|
||||||
- name: Set up QEMU
|
|
||||||
if: matrix.os == 'ubuntu-latest' && matrix.arch == 'aarch64'
|
|
||||||
uses: docker/setup-qemu-action@v3
|
|
||||||
|
|
||||||
# ADR-117 §5.4: abi3-py310 — one binary per OS/arch covers all
|
|
||||||
# Python minor versions ≥ 3.10. Build only cp310 wheels.
|
|
||||||
- name: Build wheels (cibuildwheel)
|
|
||||||
uses: pypa/cibuildwheel@v2.21
|
|
||||||
env:
|
|
||||||
CIBW_BUILD: "cp310-*"
|
|
||||||
CIBW_ARCHS_LINUX: ${{ matrix.arch }}
|
|
||||||
CIBW_ARCHS_MACOS: ${{ matrix.arch }}
|
|
||||||
CIBW_ARCHS_WINDOWS: ${{ matrix.arch }}
|
|
||||||
CIBW_BUILD_FRONTEND: "build"
|
|
||||||
CIBW_BEFORE_BUILD: "pip install maturin>=1.7"
|
|
||||||
# The PyO3 sdist landing depends on the cargo/Rust toolchain
|
|
||||||
# being present. cibuildwheel images carry rustup on Linux
|
|
||||||
# but we also pin a known-good version for reproducibility.
|
|
||||||
CIBW_BEFORE_ALL_LINUX: "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.82"
|
|
||||||
CIBW_ENVIRONMENT_LINUX: 'PATH="$HOME/.cargo/bin:$PATH"'
|
|
||||||
# Smoke-test every built wheel before accepting it. Catches
|
|
||||||
# the case where the wheel imports but the compiled symbols
|
|
||||||
# are missing.
|
|
||||||
CIBW_TEST_REQUIRES: "pytest>=8.0"
|
|
||||||
CIBW_TEST_COMMAND: 'python -c "import wifi_densepose; assert wifi_densepose.hello() == \"ok\"; print(wifi_densepose.__build_features__)"'
|
|
||||||
with:
|
|
||||||
package-dir: python
|
|
||||||
output-dir: wheelhouse
|
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: wheels-${{ matrix.os }}-${{ matrix.arch }}
|
|
||||||
path: wheelhouse/*.whl
|
|
||||||
if-no-files-found: error
|
|
||||||
|
|
||||||
build-sdist:
|
|
||||||
name: Build v2 sdist
|
|
||||||
if: |
|
|
||||||
github.event_name == 'workflow_dispatch' && inputs.target == 'v2-wheels' ||
|
|
||||||
startsWith(github.ref, 'refs/tags/v2.')
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
- name: Install maturin
|
|
||||||
run: pip install maturin>=1.7
|
|
||||||
- name: Build sdist
|
|
||||||
working-directory: python
|
|
||||||
run: maturin sdist --out ../sdist
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: sdist
|
|
||||||
path: sdist/*.tar.gz
|
|
||||||
if-no-files-found: error
|
|
||||||
|
|
||||||
# ────────────────────────────────────────────────────────────────
|
|
||||||
# v1.99.0 — tombstone wheel (pure Python, single sdist + wheel)
|
|
||||||
# ────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
build-tombstone:
|
|
||||||
name: Build v1.99.0 tombstone
|
|
||||||
if: |
|
|
||||||
github.event_name == 'workflow_dispatch' && inputs.target == 'v1-99-tombstone' ||
|
|
||||||
startsWith(github.ref, 'refs/tags/v1.99')
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
- uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: '3.12'
|
|
||||||
- name: Install build backend
|
|
||||||
run: python -m pip install --upgrade pip build>=1.2
|
|
||||||
- name: Build sdist + wheel
|
|
||||||
working-directory: python/tombstone
|
|
||||||
run: python -m build --outdir ../../tombstone-dist
|
|
||||||
# Inspect what was actually built — the previous v1.99.0-pip run
|
|
||||||
# showed an `import wifi_densepose` that returned cleanly instead
|
|
||||||
# of raising, even though build logs said `adding 'wifi_densepose/__init__.py'`.
|
|
||||||
# Print the wheel manifest + the __init__.py content so any
|
|
||||||
# future regression is debuggable from the run log alone.
|
|
||||||
- name: Inspect wheel contents
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
WHL=tombstone-dist/wifi_densepose-1.99.0-py3-none-any.whl
|
|
||||||
echo "--- wheel listing ---"
|
|
||||||
python -m zipfile -l "$WHL"
|
|
||||||
echo "--- wifi_densepose/__init__.py inside the wheel ---"
|
|
||||||
python -m zipfile -e "$WHL" /tmp/tomb-inspect
|
|
||||||
cat /tmp/tomb-inspect/wifi_densepose/__init__.py
|
|
||||||
echo "--- size in bytes ---"
|
|
||||||
wc -c /tmp/tomb-inspect/wifi_densepose/__init__.py
|
|
||||||
# Smoke-test in an ISOLATED venv. The previous run's failure
|
|
||||||
# mode was that the ubuntu-latest runner's system `python` had
|
|
||||||
# site-packages picking up something other than the user-installed
|
|
||||||
# wheel, so the import resolved to a different module. A clean
|
|
||||||
# venv removes any ambiguity about which wifi_densepose is loaded.
|
|
||||||
- name: Smoke-test tombstone in isolated venv
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
# Copy the wheel to /tmp BEFORE entering the venv — we must
|
|
||||||
# cd OUT of the repo root because the repo contains a
|
|
||||||
# `wifi_densepose/` directory left over from the legacy v1
|
|
||||||
# source. Python puts cwd at sys.path[0], so an import from
|
|
||||||
# the repo root would resolve to the legacy directory and
|
|
||||||
# bypass the freshly-installed wheel entirely (this was the
|
|
||||||
# silent failure mode of the previous two run attempts).
|
|
||||||
cp tombstone-dist/wifi_densepose-1.99.0-py3-none-any.whl /tmp/
|
|
||||||
python -m venv /tmp/smoke-venv
|
|
||||||
/tmp/smoke-venv/bin/python -m pip install --upgrade pip
|
|
||||||
/tmp/smoke-venv/bin/python -m pip install /tmp/wifi_densepose-1.99.0-py3-none-any.whl
|
|
||||||
cd /tmp # away from the repo root's stray wifi_densepose/
|
|
||||||
/tmp/smoke-venv/bin/python -c "import importlib.util as u; s = u.find_spec('wifi_densepose'); print('Resolved to:', s.origin); print('--- file content ---'); print(open(s.origin).read())"
|
|
||||||
set +e
|
|
||||||
/tmp/smoke-venv/bin/python -c "import wifi_densepose" 2> import-output.txt
|
|
||||||
rc=$?
|
|
||||||
set -e
|
|
||||||
if [ "$rc" -eq 0 ]; then
|
|
||||||
echo "ERROR: tombstone import succeeded — should have raised ImportError"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if ! grep -q "github.com/ruvnet/RuView" import-output.txt; then
|
|
||||||
echo "ERROR: tombstone ImportError missing migration URL"
|
|
||||||
cat import-output.txt
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "Tombstone wheel correctly raises ImportError with migration URL."
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: tombstone
|
|
||||||
path: tombstone-dist/*
|
|
||||||
if-no-files-found: error
|
|
||||||
|
|
||||||
# ────────────────────────────────────────────────────────────────
|
|
||||||
# Publish — gated by manual dispatch OR by the tag form
|
|
||||||
# ────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
publish-v2:
|
|
||||||
name: Publish v2 wheels
|
|
||||||
needs: [build-wheels, build-sdist]
|
|
||||||
if: |
|
|
||||||
always() &&
|
|
||||||
needs.build-wheels.result == 'success' &&
|
|
||||||
needs.build-sdist.result == 'success' &&
|
|
||||||
(
|
|
||||||
github.event_name == 'workflow_dispatch' && inputs.target == 'v2-wheels' ||
|
|
||||||
startsWith(github.ref, 'refs/tags/v2.')
|
|
||||||
)
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Gather all artifacts into dist/
|
|
||||||
uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
path: dist-staging
|
|
||||||
- name: Flatten artifacts
|
|
||||||
run: |
|
|
||||||
mkdir -p dist
|
|
||||||
find dist-staging -type f \( -name '*.whl' -o -name '*.tar.gz' \) -exec cp -v {} dist/ \;
|
|
||||||
ls -lh dist/
|
|
||||||
- name: Publish to TestPyPI (dry-run target)
|
|
||||||
if: github.event_name == 'workflow_dispatch' && inputs.publish_to == 'testpypi'
|
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
|
||||||
with:
|
|
||||||
repository-url: https://test.pypi.org/legacy/
|
|
||||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
||||||
packages-dir: dist
|
|
||||||
skip-existing: true
|
|
||||||
- name: Publish to PyPI
|
|
||||||
if: |
|
|
||||||
startsWith(github.ref, 'refs/tags/v2.') ||
|
|
||||||
(github.event_name == 'workflow_dispatch' && inputs.publish_to == 'pypi')
|
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
|
||||||
with:
|
|
||||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
||||||
packages-dir: dist
|
|
||||||
|
|
||||||
publish-tombstone:
|
|
||||||
name: Publish v1.99 tombstone
|
|
||||||
needs: [build-tombstone]
|
|
||||||
if: |
|
|
||||||
always() &&
|
|
||||||
needs.build-tombstone.result == 'success' &&
|
|
||||||
(
|
|
||||||
github.event_name == 'workflow_dispatch' && inputs.target == 'v1-99-tombstone' ||
|
|
||||||
startsWith(github.ref, 'refs/tags/v1.99')
|
|
||||||
)
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
name: tombstone
|
|
||||||
path: dist
|
|
||||||
- name: Publish to TestPyPI (dry-run target)
|
|
||||||
if: github.event_name == 'workflow_dispatch' && inputs.publish_to == 'testpypi'
|
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
|
||||||
with:
|
|
||||||
repository-url: https://test.pypi.org/legacy/
|
|
||||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
||||||
packages-dir: dist
|
|
||||||
skip-existing: true
|
|
||||||
- name: Publish to PyPI
|
|
||||||
if: |
|
|
||||||
startsWith(github.ref, 'refs/tags/v1.99') ||
|
|
||||||
(github.event_name == 'workflow_dispatch' && inputs.publish_to == 'pypi')
|
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
|
||||||
with:
|
|
||||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
||||||
packages-dir: dist
|
|
||||||
@@ -29,8 +29,6 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout main
|
- name: Checkout main
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Stage viewer for Pages
|
- name: Stage viewer for Pages
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -1,157 +0,0 @@
|
|||||||
name: ruview-swarm CI guard
|
|
||||||
|
|
||||||
# Dedicated guard for the ADR-148 drone swarm crate (`v2/crates/ruview-swarm`).
|
|
||||||
# The main ci.yml runs `cargo test --workspace --no-default-features`, which
|
|
||||||
# only exercises ruview-swarm's DEFAULT feature set. This guard additionally:
|
|
||||||
# - tests every feature combination (train / ruflo+itar / full)
|
|
||||||
# - fails on ANY clippy warning in the crate's own code (--no-deps)
|
|
||||||
# - asserts the ITAR + publish guards stay in place (USML Cat VIII(h)(12))
|
|
||||||
# - builds the GPU training binary under the `train` feature
|
|
||||||
#
|
|
||||||
# Path-scoped so it only runs when the crate or this workflow changes.
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main, 'feat/*' ]
|
|
||||||
paths:
|
|
||||||
- 'v2/crates/ruview-swarm/**'
|
|
||||||
- '.github/workflows/ruview-swarm-ci.yml'
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'v2/crates/ruview-swarm/**'
|
|
||||||
- '.github/workflows/ruview-swarm-ci.yml'
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
env:
|
|
||||||
CARGO_TERM_COLOR: always
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
# ── Feature-matrix tests ─────────────────────────────────────────────────
|
|
||||||
tests:
|
|
||||||
name: tests (${{ matrix.features.label }})
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
features:
|
|
||||||
- { label: 'default', flags: '--no-default-features' }
|
|
||||||
- { label: 'train', flags: '--features train' }
|
|
||||||
- { label: 'ruflo', flags: '--features ruflo' }
|
|
||||||
- { label: 'full+train', flags: '--features full,train' }
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
- uses: dtolnay/rust-toolchain@stable
|
|
||||||
- name: Cache cargo
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
~/.cargo/registry
|
|
||||||
~/.cargo/git
|
|
||||||
v2/target
|
|
||||||
key: ${{ runner.os }}-ruview-swarm-${{ hashFiles('v2/Cargo.lock') }}
|
|
||||||
restore-keys: ${{ runner.os }}-ruview-swarm-
|
|
||||||
- name: cargo test -p ruview-swarm ${{ matrix.features.flags }}
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo test -p ruview-swarm ${{ matrix.features.flags }} --lib
|
|
||||||
|
|
||||||
# ── Clippy: zero warnings in the crate's own code ────────────────────────
|
|
||||||
clippy:
|
|
||||||
name: clippy (-D warnings, --no-deps)
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
# v2/rust-toolchain.toml pins channel "1.89" with profile "minimal" (no
|
|
||||||
# clippy). dtolnay@stable installs clippy on the floating "stable"
|
|
||||||
# toolchain, but the override makes cargo use the separate "1.89"
|
|
||||||
# toolchain — so `cargo clippy` errors "cargo-clippy is not installed for
|
|
||||||
# 1.89". Install clippy on the pinned toolchain that cargo actually uses.
|
|
||||||
- uses: dtolnay/rust-toolchain@stable
|
|
||||||
with:
|
|
||||||
toolchain: "1.89"
|
|
||||||
components: clippy
|
|
||||||
- name: Cache cargo
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
~/.cargo/registry
|
|
||||||
~/.cargo/git
|
|
||||||
v2/target
|
|
||||||
key: ${{ runner.os }}-ruview-swarm-clippy-${{ hashFiles('v2/Cargo.lock') }}
|
|
||||||
restore-keys: ${{ runner.os }}-ruview-swarm-clippy-
|
|
||||||
# --no-deps confines linting to ruview-swarm's own source, so pre-existing
|
|
||||||
# warnings in dependency crates don't gate this PR.
|
|
||||||
- name: clippy (default)
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo clippy -p ruview-swarm --no-default-features --no-deps -- -D warnings
|
|
||||||
- name: clippy (full,train)
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo clippy -p ruview-swarm --features full,train --no-deps -- -D warnings
|
|
||||||
|
|
||||||
# ── Build the GPU training binary (train feature) ────────────────────────
|
|
||||||
train-bin:
|
|
||||||
name: build train_marl bin
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
- uses: dtolnay/rust-toolchain@stable
|
|
||||||
- name: Cache cargo
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
~/.cargo/registry
|
|
||||||
~/.cargo/git
|
|
||||||
v2/target
|
|
||||||
key: ${{ runner.os }}-ruview-swarm-bin-${{ hashFiles('v2/Cargo.lock') }}
|
|
||||||
restore-keys: ${{ runner.os }}-ruview-swarm-bin-
|
|
||||||
- name: cargo build --bin train_marl --features train
|
|
||||||
working-directory: v2
|
|
||||||
run: cargo build -p ruview-swarm --features train --bin train_marl
|
|
||||||
- name: train_marl is excluded from the default build
|
|
||||||
working-directory: v2
|
|
||||||
run: |
|
|
||||||
# The training binary requires the `train` feature; a default `--bins`
|
|
||||||
# build must NOT produce it (keeps default/CI builds light + Candle-free).
|
|
||||||
# Remove any prior artifact first so this checks what the DEFAULT build
|
|
||||||
# produces, not a leftover from the train-feature build above.
|
|
||||||
rm -f target/debug/train_marl
|
|
||||||
cargo build -p ruview-swarm --no-default-features --bins
|
|
||||||
if [ -f target/debug/train_marl ]; then
|
|
||||||
echo "ERROR: train_marl built without the 'train' feature" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "OK: train_marl correctly gated behind the 'train' feature"
|
|
||||||
|
|
||||||
# ── ITAR + publish guards ────────────────────────────────────────────────
|
|
||||||
export-control-guard:
|
|
||||||
name: ITAR / publish guard
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
- name: publish = false is present (no accidental crates.io publish)
|
|
||||||
run: |
|
|
||||||
CARGO=v2/crates/ruview-swarm/Cargo.toml
|
|
||||||
if ! grep -qE '^\s*publish\s*=\s*false' "$CARGO"; then
|
|
||||||
echo "ERROR: ruview-swarm Cargo.toml must keep 'publish = false' until" >&2
|
|
||||||
echo " PR merge + dependency publish + ITAR export sign-off." >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "OK: publish = false present"
|
|
||||||
- name: default feature set does NOT enable itar-unrestricted
|
|
||||||
run: |
|
|
||||||
CARGO=v2/crates/ruview-swarm/Cargo.toml
|
|
||||||
# USML Cat VIII(h)(12): swarming coordination must be opt-in, never default.
|
|
||||||
DEFAULT_LINE=$(grep -E '^\s*default\s*=' "$CARGO" || true)
|
|
||||||
echo "default = $DEFAULT_LINE"
|
|
||||||
if echo "$DEFAULT_LINE" | grep -q 'itar-unrestricted'; then
|
|
||||||
echo "ERROR: 'itar-unrestricted' must NOT be in the default feature set" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "OK: ITAR-gated coordination features are opt-in, not default"
|
|
||||||
@@ -18,28 +18,23 @@ jobs:
|
|||||||
sast:
|
sast:
|
||||||
name: Static Application Security Testing
|
name: Static Application Security Testing
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
continue-on-error: true # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
|
|
||||||
permissions:
|
permissions:
|
||||||
security-events: write
|
security-events: write
|
||||||
actions: read
|
actions: read
|
||||||
contents: read
|
contents: read
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: recursive
|
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
continue-on-error: true
|
uses: actions/setup-python@v5
|
||||||
uses: actions/setup-python@v6
|
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.PYTHON_VERSION }}
|
python-version: ${{ env.PYTHON_VERSION }}
|
||||||
cache: 'pip'
|
cache: 'pip'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
@@ -47,38 +42,34 @@ jobs:
|
|||||||
|
|
||||||
- name: Run Bandit security scan
|
- name: Run Bandit security scan
|
||||||
run: |
|
run: |
|
||||||
# The Python codebase lives under archive/v1/src (it moved there when
|
bandit -r src/ -f sarif -o bandit-results.sarif
|
||||||
# the runtime was rewritten in Rust). Scanning `src/` matched nothing,
|
|
||||||
# so this SAST step was a silent no-op.
|
|
||||||
bandit -r archive/v1/src/ -f sarif -o bandit-results.sarif
|
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
|
|
||||||
- name: Upload Bandit results to GitHub Security
|
- name: Upload Bandit results to GitHub Security
|
||||||
continue-on-error: true
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
sarif_file: bandit-results.sarif
|
sarif_file: bandit-results.sarif
|
||||||
category: bandit
|
category: bandit
|
||||||
|
|
||||||
# Removed the deprecated `returntocorp/semgrep-action@v1` step: it was
|
- name: Run Semgrep security scan
|
||||||
# redundant (the pip `semgrep --sarif` below is what feeds GitHub Security;
|
uses: returntocorp/semgrep-action@v1
|
||||||
# the action only pushed to the Semgrep cloud app via SEMGREP_APP_TOKEN) and
|
with:
|
||||||
# it pulled `returntocorp/semgrep-agent:v1` from Docker Hub on every run,
|
config: >-
|
||||||
# which intermittently timed out and turned this check red. The pip semgrep
|
p/security-audit
|
||||||
# (installed above) needs no Docker pull. The action's `p/docker` +
|
p/secrets
|
||||||
# `p/kubernetes` rulesets are folded into the command below so coverage is
|
p/python
|
||||||
# preserved.
|
p/docker
|
||||||
- name: Run Semgrep + generate SARIF
|
p/kubernetes
|
||||||
|
env:
|
||||||
|
SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }}
|
||||||
|
|
||||||
|
- name: Generate Semgrep SARIF
|
||||||
run: |
|
run: |
|
||||||
semgrep \
|
semgrep --config=p/security-audit --config=p/secrets --config=p/python --sarif --output=semgrep.sarif src/
|
||||||
--config=p/security-audit --config=p/secrets --config=p/python \
|
|
||||||
--config=p/docker --config=p/kubernetes \
|
|
||||||
--sarif --output=semgrep.sarif archive/v1/src/
|
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
|
|
||||||
- name: Upload Semgrep results to GitHub Security
|
- name: Upload Semgrep results to GitHub Security
|
||||||
continue-on-error: true
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -89,27 +80,21 @@ jobs:
|
|||||||
dependency-scan:
|
dependency-scan:
|
||||||
name: Dependency Vulnerability Scan
|
name: Dependency Vulnerability Scan
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
continue-on-error: true # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
|
|
||||||
permissions:
|
permissions:
|
||||||
security-events: write
|
security-events: write
|
||||||
actions: read
|
actions: read
|
||||||
contents: read
|
contents: read
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
continue-on-error: true
|
uses: actions/setup-python@v5
|
||||||
uses: actions/setup-python@v6
|
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.PYTHON_VERSION }}
|
python-version: ${{ env.PYTHON_VERSION }}
|
||||||
cache: 'pip'
|
cache: 'pip'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
@@ -134,7 +119,6 @@ jobs:
|
|||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
|
|
||||||
- name: Upload Snyk results to GitHub Security
|
- name: Upload Snyk results to GitHub Security
|
||||||
continue-on-error: true
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -142,7 +126,6 @@ jobs:
|
|||||||
category: snyk
|
category: snyk
|
||||||
|
|
||||||
- name: Upload vulnerability reports
|
- name: Upload vulnerability reports
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -156,7 +139,6 @@ jobs:
|
|||||||
container-scan:
|
container-scan:
|
||||||
name: Container Security Scan
|
name: Container Security Scan
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
continue-on-error: true # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
|
|
||||||
needs: []
|
needs: []
|
||||||
if: github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
permissions:
|
permissions:
|
||||||
@@ -165,18 +147,13 @@ jobs:
|
|||||||
contents: read
|
contents: read
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
continue-on-error: true
|
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
- name: Build Docker image for scanning
|
- name: Build Docker image for scanning
|
||||||
continue-on-error: true
|
uses: docker/build-push-action@v5
|
||||||
uses: docker/build-push-action@v7
|
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
target: production
|
target: production
|
||||||
@@ -186,7 +163,6 @@ jobs:
|
|||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
- name: Run Trivy vulnerability scanner
|
- name: Run Trivy vulnerability scanner
|
||||||
continue-on-error: true
|
|
||||||
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
|
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
|
||||||
with:
|
with:
|
||||||
image-ref: 'wifi-densepose:scan'
|
image-ref: 'wifi-densepose:scan'
|
||||||
@@ -194,7 +170,6 @@ jobs:
|
|||||||
output: 'trivy-results.sarif'
|
output: 'trivy-results.sarif'
|
||||||
|
|
||||||
- name: Upload Trivy results to GitHub Security
|
- name: Upload Trivy results to GitHub Security
|
||||||
continue-on-error: true
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -202,8 +177,7 @@ jobs:
|
|||||||
category: trivy
|
category: trivy
|
||||||
|
|
||||||
- name: Run Grype vulnerability scanner
|
- name: Run Grype vulnerability scanner
|
||||||
continue-on-error: true
|
uses: anchore/scan-action@v3
|
||||||
uses: anchore/scan-action@v7
|
|
||||||
id: grype-scan
|
id: grype-scan
|
||||||
with:
|
with:
|
||||||
image: 'wifi-densepose:scan'
|
image: 'wifi-densepose:scan'
|
||||||
@@ -212,7 +186,6 @@ jobs:
|
|||||||
output-format: sarif
|
output-format: sarif
|
||||||
|
|
||||||
- name: Upload Grype results to GitHub Security
|
- name: Upload Grype results to GitHub Security
|
||||||
continue-on-error: true
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -220,7 +193,6 @@ jobs:
|
|||||||
category: grype
|
category: grype
|
||||||
|
|
||||||
- name: Run Docker Scout
|
- name: Run Docker Scout
|
||||||
continue-on-error: true
|
|
||||||
uses: docker/scout-action@v1
|
uses: docker/scout-action@v1
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -230,7 +202,6 @@ jobs:
|
|||||||
summary: true
|
summary: true
|
||||||
|
|
||||||
- name: Upload Docker Scout results
|
- name: Upload Docker Scout results
|
||||||
continue-on-error: true
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -241,20 +212,15 @@ jobs:
|
|||||||
iac-scan:
|
iac-scan:
|
||||||
name: Infrastructure Security Scan
|
name: Infrastructure Security Scan
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
continue-on-error: true # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
|
|
||||||
permissions:
|
permissions:
|
||||||
security-events: write
|
security-events: write
|
||||||
actions: read
|
actions: read
|
||||||
contents: read
|
contents: read
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Run Checkov IaC scan
|
- name: Run Checkov IaC scan
|
||||||
continue-on-error: true
|
|
||||||
uses: bridgecrewio/checkov-action@99bb2caf247dfd9f03cf984373bc6043d4e32ebf # v12.1347.0
|
uses: bridgecrewio/checkov-action@99bb2caf247dfd9f03cf984373bc6043d4e32ebf # v12.1347.0
|
||||||
with:
|
with:
|
||||||
directory: .
|
directory: .
|
||||||
@@ -265,7 +231,6 @@ jobs:
|
|||||||
soft_fail: true
|
soft_fail: true
|
||||||
|
|
||||||
- name: Upload Checkov results to GitHub Security
|
- name: Upload Checkov results to GitHub Security
|
||||||
continue-on-error: true
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -273,7 +238,6 @@ jobs:
|
|||||||
category: checkov
|
category: checkov
|
||||||
|
|
||||||
- name: Run Terrascan IaC scan
|
- name: Run Terrascan IaC scan
|
||||||
continue-on-error: true
|
|
||||||
uses: tenable/terrascan-action@3a6e87da8e244513bd77b631e624552643f794c6 # v1.4.1
|
uses: tenable/terrascan-action@3a6e87da8e244513bd77b631e624552643f794c6 # v1.4.1
|
||||||
with:
|
with:
|
||||||
iac_type: 'k8s'
|
iac_type: 'k8s'
|
||||||
@@ -283,7 +247,6 @@ jobs:
|
|||||||
sarif_upload: true
|
sarif_upload: true
|
||||||
|
|
||||||
- name: Run KICS IaC scan
|
- name: Run KICS IaC scan
|
||||||
continue-on-error: true
|
|
||||||
uses: checkmarx/kics-github-action@05aa5eb70eede1355220f4ca5238d96b397e30a6 # v2.1.20
|
uses: checkmarx/kics-github-action@05aa5eb70eede1355220f4ca5238d96b397e30a6 # v2.1.20
|
||||||
with:
|
with:
|
||||||
path: '.'
|
path: '.'
|
||||||
@@ -293,7 +256,6 @@ jobs:
|
|||||||
exclude_queries: 'a7ef1e8c-fbf8-4ac1-b8c7-2c3b0e6c6c6c'
|
exclude_queries: 'a7ef1e8c-fbf8-4ac1-b8c7-2c3b0e6c6c6c'
|
||||||
|
|
||||||
- name: Upload KICS results to GitHub Security
|
- name: Upload KICS results to GitHub Security
|
||||||
continue-on-error: true
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@@ -304,21 +266,17 @@ jobs:
|
|||||||
secret-scan:
|
secret-scan:
|
||||||
name: Secret Scanning
|
name: Secret Scanning
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
continue-on-error: true # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
|
|
||||||
permissions:
|
permissions:
|
||||||
security-events: write
|
security-events: write
|
||||||
actions: read
|
actions: read
|
||||||
contents: read
|
contents: read
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: recursive
|
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Run TruffleHog secret scan
|
- name: Run TruffleHog secret scan
|
||||||
continue-on-error: true
|
|
||||||
uses: trufflesecurity/trufflehog@17456f8c7d042d8c82c9a8ca9e937231f9f42e26 # v3.95.2
|
uses: trufflesecurity/trufflehog@17456f8c7d042d8c82c9a8ca9e937231f9f42e26 # v3.95.2
|
||||||
with:
|
with:
|
||||||
path: ./
|
path: ./
|
||||||
@@ -327,7 +285,6 @@ jobs:
|
|||||||
extra_args: --debug --only-verified
|
extra_args: --debug --only-verified
|
||||||
|
|
||||||
- name: Run GitLeaks secret scan
|
- name: Run GitLeaks secret scan
|
||||||
continue-on-error: true
|
|
||||||
uses: gitleaks/gitleaks-action@v2
|
uses: gitleaks/gitleaks-action@v2
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@@ -344,36 +301,28 @@ jobs:
|
|||||||
license-scan:
|
license-scan:
|
||||||
name: License Compliance Scan
|
name: License Compliance Scan
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
continue-on-error: true # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
continue-on-error: true
|
uses: actions/setup-python@v5
|
||||||
uses: actions/setup-python@v6
|
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.PYTHON_VERSION }}
|
python-version: ${{ env.PYTHON_VERSION }}
|
||||||
cache: 'pip'
|
cache: 'pip'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
pip install pip-licenses licensecheck
|
pip install pip-licenses licensecheck
|
||||||
|
|
||||||
- name: Run license check
|
- name: Run license check
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
pip-licenses --format=json --output-file=licenses.json
|
pip-licenses --format=json --output-file=licenses.json
|
||||||
licensecheck --zero
|
licensecheck --zero
|
||||||
|
|
||||||
- name: Upload license report
|
- name: Upload license report
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: license-report
|
name: license-report
|
||||||
@@ -383,16 +332,11 @@ jobs:
|
|||||||
compliance-check:
|
compliance-check:
|
||||||
name: Security Policy Compliance
|
name: Security Policy Compliance
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
continue-on-error: true # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Check security policy files
|
- name: Check security policy files
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
# Check for required security files
|
# Check for required security files
|
||||||
files=("SECURITY.md" ".github/SECURITY.md" "docs/SECURITY.md")
|
files=("SECURITY.md" ".github/SECURITY.md" "docs/SECURITY.md")
|
||||||
@@ -410,13 +354,11 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Check for security headers in code
|
- name: Check for security headers in code
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
# Check for security-related configurations
|
# Check for security-related configurations
|
||||||
grep -r "X-Frame-Options\|X-Content-Type-Options\|X-XSS-Protection\|Content-Security-Policy" src/ || echo "⚠️ Consider adding security headers"
|
grep -r "X-Frame-Options\|X-Content-Type-Options\|X-XSS-Protection\|Content-Security-Policy" src/ || echo "⚠️ Consider adding security headers"
|
||||||
|
|
||||||
- name: Validate Kubernetes security contexts
|
- name: Validate Kubernetes security contexts
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
# Check for security contexts in Kubernetes manifests
|
# Check for security contexts in Kubernetes manifests
|
||||||
if [[ -d "k8s" ]]; then
|
if [[ -d "k8s" ]]; then
|
||||||
@@ -433,7 +375,6 @@ jobs:
|
|||||||
security-report:
|
security-report:
|
||||||
name: Security Report
|
name: Security Report
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
continue-on-error: true # third-party scanners are flaky / SARIF uploads can 403; don't gate the PR
|
|
||||||
needs: [sast, dependency-scan, container-scan, iac-scan, secret-scan, license-scan, compliance-check]
|
needs: [sast, dependency-scan, container-scan, iac-scan, secret-scan, license-scan, compliance-check]
|
||||||
if: always()
|
if: always()
|
||||||
# Promote secret to env-scope so the gating `if:` on the Slack-notify
|
# Promote secret to env-scope so the gating `if:` on the Slack-notify
|
||||||
@@ -443,11 +384,9 @@ jobs:
|
|||||||
SECURITY_SLACK_WEBHOOK_URL: ${{ secrets.SECURITY_SLACK_WEBHOOK_URL }}
|
SECURITY_SLACK_WEBHOOK_URL: ${{ secrets.SECURITY_SLACK_WEBHOOK_URL }}
|
||||||
steps:
|
steps:
|
||||||
- name: Download all artifacts
|
- name: Download all artifacts
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/download-artifact@v4
|
uses: actions/download-artifact@v4
|
||||||
|
|
||||||
- name: Generate security summary
|
- name: Generate security summary
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
echo "# Security Scan Summary" > security-summary.md
|
echo "# Security Scan Summary" > security-summary.md
|
||||||
echo "" >> security-summary.md
|
echo "" >> security-summary.md
|
||||||
@@ -463,7 +402,6 @@ jobs:
|
|||||||
echo "Generated on: $(date)" >> security-summary.md
|
echo "Generated on: $(date)" >> security-summary.md
|
||||||
|
|
||||||
- name: Upload security summary
|
- name: Upload security summary
|
||||||
continue-on-error: true
|
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: security-summary
|
name: security-summary
|
||||||
@@ -473,7 +411,6 @@ jobs:
|
|||||||
# use env.X instead. Inherits SECURITY_SLACK_WEBHOOK_URL from the
|
# use env.X instead. Inherits SECURITY_SLACK_WEBHOOK_URL from the
|
||||||
# job-level env block (added below).
|
# job-level env block (added below).
|
||||||
- name: Notify security team on critical findings
|
- name: Notify security team on critical findings
|
||||||
continue-on-error: true
|
|
||||||
if: ${{ env.SECURITY_SLACK_WEBHOOK_URL != '' && (needs.sast.result == 'failure' || needs.dependency-scan.result == 'failure' || needs.container-scan.result == 'failure') }}
|
if: ${{ env.SECURITY_SLACK_WEBHOOK_URL != '' && (needs.sast.result == 'failure' || needs.dependency-scan.result == 'failure' || needs.container-scan.result == 'failure') }}
|
||||||
uses: 8398a7/action-slack@v3
|
uses: 8398a7/action-slack@v3
|
||||||
with:
|
with:
|
||||||
@@ -489,7 +426,6 @@ jobs:
|
|||||||
SLACK_WEBHOOK_URL: ${{ env.SECURITY_SLACK_WEBHOOK_URL }}
|
SLACK_WEBHOOK_URL: ${{ env.SECURITY_SLACK_WEBHOOK_URL }}
|
||||||
|
|
||||||
- name: Create security issue on critical findings
|
- name: Create security issue on critical findings
|
||||||
continue-on-error: true
|
|
||||||
if: needs.sast.result == 'failure' || needs.dependency-scan.result == 'failure'
|
if: needs.sast.result == 'failure' || needs.dependency-scan.result == 'failure'
|
||||||
uses: actions/github-script@v6
|
uses: actions/github-script@v6
|
||||||
with:
|
with:
|
||||||
|
|||||||
@@ -1,181 +0,0 @@
|
|||||||
name: wifi-densepose sensing-server → Docker Hub + ghcr.io
|
|
||||||
|
|
||||||
# Build + publish the `wifi-densepose` sensing-server image to both Docker Hub
|
|
||||||
# (`ruvnet/wifi-densepose`) and ghcr.io (`ghcr.io/ruvnet/wifi-densepose`) on:
|
|
||||||
# - push to main affecting the Dockerfile, the server crate, the UI assets,
|
|
||||||
# or this workflow itself,
|
|
||||||
# - tag push matching v* (release builds),
|
|
||||||
# - manual workflow_dispatch.
|
|
||||||
#
|
|
||||||
# Closes #520 and #514: the stale `:latest` is rebuilt and pushed automatically
|
|
||||||
# whenever the surface that produces it changes, and the Dockerfile fails the
|
|
||||||
# build if the observatory/pose-fusion UI assets ever go missing again.
|
|
||||||
#
|
|
||||||
# Secrets:
|
|
||||||
# DOCKERHUB_USERNAME — `ruvnet` (Docker Hub login name)
|
|
||||||
# DOCKERHUB_TOKEN — Docker Hub access token with read/write/delete scope
|
|
||||||
# (ghcr.io uses the workflow's GITHUB_TOKEN — no secret needed.)
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
paths:
|
|
||||||
- 'docker/Dockerfile.rust'
|
|
||||||
- 'docker/docker-entrypoint.sh'
|
|
||||||
- 'v2/crates/wifi-densepose-sensing-server/**'
|
|
||||||
- 'v2/crates/wifi-densepose-signal/**'
|
|
||||||
- 'v2/crates/wifi-densepose-vitals/**'
|
|
||||||
- 'v2/crates/wifi-densepose-wifiscan/**'
|
|
||||||
- 'v2/crates/wifi-densepose-bfld/**'
|
|
||||||
- 'v2/crates/cog-ha-matter/**'
|
|
||||||
- 'v2/Cargo.toml'
|
|
||||||
- 'v2/Cargo.lock'
|
|
||||||
- 'ui/**'
|
|
||||||
- '.github/workflows/sensing-server-docker.yml'
|
|
||||||
tags: ['v*']
|
|
||||||
workflow_dispatch: {}
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
packages: write
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: sensing-server-docker-${{ github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-and-publish:
|
|
||||||
name: build · push · smoke-test
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
# QEMU is required so the amd64 GitHub runner can cross-build the
|
|
||||||
# linux/arm64 layer below (Dockerfile.rust is arch-agnostic — no `--target`
|
|
||||||
# flag — so buildx + QEMU is all that's needed; arm64 builds are emulated
|
|
||||||
# by the runner, not built on a separate arm64 host).
|
|
||||||
- uses: docker/setup-qemu-action@v3
|
|
||||||
|
|
||||||
- uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
|
||||||
# Bypassing docker/login-action@v3: the action kept emitting
|
|
||||||
# "malformed HTTP Authorization header" against a known-good
|
|
||||||
# dckr_pat_* token (verified by direct curl against the Hub API).
|
|
||||||
# `docker login --password-stdin` is the documented credential
|
|
||||||
# path and avoids whatever encoding step the action injects.
|
|
||||||
env:
|
|
||||||
DH_USER: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
DH_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
||||||
run: |
|
|
||||||
printf '%s' "$DH_TOKEN" | docker login docker.io -u "$DH_USER" --password-stdin
|
|
||||||
|
|
||||||
- name: Log in to ghcr.io
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
registry: ghcr.io
|
|
||||||
username: ${{ github.actor }}
|
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Compute tags
|
|
||||||
id: meta
|
|
||||||
uses: docker/metadata-action@v6
|
|
||||||
with:
|
|
||||||
images: |
|
|
||||||
docker.io/ruvnet/wifi-densepose
|
|
||||||
ghcr.io/ruvnet/wifi-densepose
|
|
||||||
tags: |
|
|
||||||
type=ref,event=branch
|
|
||||||
type=ref,event=tag
|
|
||||||
type=sha,format=short
|
|
||||||
type=raw,value=latest,enable={{is_default_branch}}
|
|
||||||
|
|
||||||
- name: Build + push
|
|
||||||
id: build
|
|
||||||
uses: docker/build-push-action@v7
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
file: docker/Dockerfile.rust
|
|
||||||
push: true
|
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
|
||||||
cache-from: type=gha
|
|
||||||
cache-to: type=gha,mode=max
|
|
||||||
# README badge advertises `amd64 + arm64`, and #547 promised multi-arch
|
|
||||||
# as part of the docker publish refresh; arm64 was never actually wired
|
|
||||||
# in, so Apple Silicon Macs hit `no matching manifest for linux/arm64/v8`
|
|
||||||
# on `docker pull ruvnet/wifi-densepose:latest` (#136, #625). Build both.
|
|
||||||
platforms: linux/amd64,linux/arm64
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------
|
|
||||||
# Smoke-test the freshly-pushed image:
|
|
||||||
# 1. UI assets that closed #520 are inside `/app/ui` (the Dockerfile's
|
|
||||||
# RUN guard catches missing ones at build time, this re-checks the
|
|
||||||
# pushed artifact post-hoc as belt-and-braces).
|
|
||||||
# 2. /health is up.
|
|
||||||
# 3. /api/v1/info returns 200 with no auth (LAN-mode default).
|
|
||||||
# 4. With RUVIEW_API_TOKEN set, /api/v1/info returns 401 without a
|
|
||||||
# Bearer header, 200 with the correct one (the #443 auth middleware).
|
|
||||||
# ---------------------------------------------------------------------
|
|
||||||
- name: Smoke-test image assets + LAN-mode HTTP
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
IMAGE="ghcr.io/ruvnet/wifi-densepose:sha-${GITHUB_SHA::7}"
|
|
||||||
docker pull "$IMAGE"
|
|
||||||
docker run --rm "$IMAGE" sh -c \
|
|
||||||
'ls /app/ui/observatory.html /app/ui/pose-fusion.html /app/ui/index.html /app/ui/viz.html >/dev/null'
|
|
||||||
docker run --rm "$IMAGE" sh -c 'ls -d /app/ui/observatory /app/ui/pose-fusion >/dev/null'
|
|
||||||
|
|
||||||
docker run -d --name sm -p 3000:3000 -e CSI_SOURCE=simulated "$IMAGE"
|
|
||||||
# Wait up to 30 s for /health.
|
|
||||||
for _ in $(seq 1 30); do
|
|
||||||
if curl -fsS http://127.0.0.1:3000/health >/dev/null 2>&1; then break; fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
curl -fsS http://127.0.0.1:3000/health
|
|
||||||
curl -fsS http://127.0.0.1:3000/api/v1/info >/dev/null
|
|
||||||
curl -fsS http://127.0.0.1:3000/ui/observatory.html >/dev/null
|
|
||||||
curl -fsS http://127.0.0.1:3000/ui/pose-fusion.html >/dev/null
|
|
||||||
docker stop sm
|
|
||||||
|
|
||||||
- name: Smoke-test the bearer-token auth path
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
IMAGE="ghcr.io/ruvnet/wifi-densepose:sha-${GITHUB_SHA::7}"
|
|
||||||
docker run -d --name auth \
|
|
||||||
-p 3000:3000 \
|
|
||||||
-e CSI_SOURCE=simulated \
|
|
||||||
-e RUVIEW_API_TOKEN=smoke-test-token-do-not-use \
|
|
||||||
"$IMAGE"
|
|
||||||
for _ in $(seq 1 30); do
|
|
||||||
if curl -fsS http://127.0.0.1:3000/health >/dev/null 2>&1; then break; fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
# /health stays unauthenticated.
|
|
||||||
curl -fsS http://127.0.0.1:3000/health >/dev/null
|
|
||||||
# /api/v1/info without a bearer → 401.
|
|
||||||
code=$(curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:3000/api/v1/info)
|
|
||||||
test "$code" = "401" || { echo "expected 401, got $code"; exit 1; }
|
|
||||||
# Wrong bearer → 401.
|
|
||||||
code=$(curl -s -o /dev/null -w '%{http_code}' -H 'Authorization: Bearer wrong' http://127.0.0.1:3000/api/v1/info)
|
|
||||||
test "$code" = "401" || { echo "expected 401 (wrong token), got $code"; exit 1; }
|
|
||||||
# Correct bearer → 200.
|
|
||||||
curl -fsS -H 'Authorization: Bearer smoke-test-token-do-not-use' http://127.0.0.1:3000/api/v1/info >/dev/null
|
|
||||||
docker stop auth
|
|
||||||
|
|
||||||
- name: Summary
|
|
||||||
if: always()
|
|
||||||
run: |
|
|
||||||
{
|
|
||||||
echo "## sensing-server image published"
|
|
||||||
echo
|
|
||||||
echo "Tags:"
|
|
||||||
echo '```'
|
|
||||||
echo "${{ steps.meta.outputs.tags }}"
|
|
||||||
echo '```'
|
|
||||||
echo
|
|
||||||
echo "Closes #520 (missing observatory/pose-fusion UI assets) and #514 (stale `:latest` for the v0.6+ packet format)."
|
|
||||||
echo "The Dockerfile fails the build if those UI assets ever disappear again, and this workflow rebuilds + pushes automatically on every change to the surface."
|
|
||||||
} >> "$GITHUB_STEP_SUMMARY"
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
name: three.js demos → GitHub Pages
|
|
||||||
|
|
||||||
# Publishes the ADR-097 three.js demos under gh-pages/three.js/.
|
|
||||||
# Uses keep_files: true so the existing observatory/, pose-fusion/,
|
|
||||||
# pointcloud/, nvsim/, and root index.html demos are preserved.
|
|
||||||
#
|
|
||||||
# Demos 04 and 05 require a Mixamo "X Bot.fbx" placed in assets/.
|
|
||||||
# That file is intentionally gitignored (license boundary), so this
|
|
||||||
# workflow does NOT ship it. Demos 01-03 work standalone; the index
|
|
||||||
# page documents the FBX requirement honestly.
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
paths:
|
|
||||||
- 'examples/three.js/**'
|
|
||||||
- '.github/workflows/threejs-pages.yml'
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: threejs-pages
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-and-deploy:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout main
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Stage demos for Pages
|
|
||||||
run: |
|
|
||||||
mkdir -p _site/three.js
|
|
||||||
# Copy everything except the local Python server (CI doesn't need it)
|
|
||||||
# and any stray scratch screenshots.
|
|
||||||
cp -r examples/three.js/demos _site/three.js/demos
|
|
||||||
cp -r examples/three.js/screenshots _site/three.js/screenshots
|
|
||||||
cp examples/three.js/README.md _site/three.js/README.md
|
|
||||||
# An index.html that lists the 5 demos with the FBX caveat.
|
|
||||||
cp examples/three.js/index.html _site/three.js/index.html
|
|
||||||
# Mixamo FBX is gitignored — assets dir won't exist in CI.
|
|
||||||
# Drop an empty placeholder so the relative path 'assets/' resolves
|
|
||||||
# to a directory listing (404 on missing file) instead of an opaque
|
|
||||||
# network error. Browsers showing the 404 path makes the failure
|
|
||||||
# visible to anyone trying demos 04/05 without their own FBX.
|
|
||||||
mkdir -p _site/three.js/assets
|
|
||||||
cat > _site/three.js/assets/README.txt <<'EOF'
|
|
||||||
The Mixamo "X Bot.fbx" required by demos 04-skinned-fbx.html and
|
|
||||||
05-skinned-realtime.html is intentionally not redistributed here.
|
|
||||||
|
|
||||||
Download your own from https://mixamo.com (FBX Binary, T-Pose,
|
|
||||||
Without Skin) and place it here as "X Bot.fbx" if you want to
|
|
||||||
run those demos locally. See examples/three.js/README.md in the
|
|
||||||
repo for context.
|
|
||||||
EOF
|
|
||||||
echo "Staged contents:"
|
|
||||||
ls -R _site/three.js/ | head -30
|
|
||||||
|
|
||||||
- name: Deploy to GitHub Pages
|
|
||||||
uses: peaceiris/actions-gh-pages@v3
|
|
||||||
with:
|
|
||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
publish_dir: _site
|
|
||||||
# Critical: preserve observatory/, pose-fusion/, pointcloud/, nvsim/
|
|
||||||
# and the root index.html already on gh-pages.
|
|
||||||
keep_files: true
|
|
||||||
commit_message: 'three.js demos: ${{ github.event.head_commit.message }}'
|
|
||||||
@@ -19,24 +19,8 @@ jobs:
|
|||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
token: ${{ secrets.GITHUB_TOKEN }}
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
# Identity must be set BEFORE any operation that can create a commit.
|
- name: Update submodules to latest main
|
||||||
# `git submodule update --remote --merge` used to fail here with
|
run: git submodule update --remote --merge
|
||||||
# "Committer identity unknown" because the merge inside vendor/ruvector
|
|
||||||
# needs an author when the pinned commit isn't a fast-forward of upstream.
|
|
||||||
- name: Configure git identity
|
|
||||||
run: |
|
|
||||||
git config --global user.name "github-actions[bot]"
|
|
||||||
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
||||||
|
|
||||||
# Use a plain `--remote` checkout (detached HEAD at each submodule's
|
|
||||||
# configured `branch` tip from .gitmodules) rather than `--merge`. We only
|
|
||||||
# want to bump the superproject's gitlink to the latest upstream commit;
|
|
||||||
# there's no reason to create merge commits inside the vendored repos, and
|
|
||||||
# `--merge` breaks whenever the current pin has diverged from that branch.
|
|
||||||
- name: Update submodules to latest tracked branch
|
|
||||||
run: |
|
|
||||||
git submodule sync --recursive
|
|
||||||
git submodule update --remote --recursive
|
|
||||||
|
|
||||||
- name: Check for changes
|
- name: Check for changes
|
||||||
id: check
|
id: check
|
||||||
@@ -45,22 +29,21 @@ jobs:
|
|||||||
echo "changed=false" >> "$GITHUB_OUTPUT"
|
echo "changed=false" >> "$GITHUB_OUTPUT"
|
||||||
else
|
else
|
||||||
echo "changed=true" >> "$GITHUB_OUTPUT"
|
echo "changed=true" >> "$GITHUB_OUTPUT"
|
||||||
echo "--- submodule pointer changes ---"
|
|
||||||
git submodule status --recursive || true
|
|
||||||
git diff --submodule=log -- vendor/ || true
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Create PR with updates
|
- name: Create PR with updates
|
||||||
if: steps.check.outputs.changed == 'true'
|
if: steps.check.outputs.changed == 'true'
|
||||||
run: |
|
run: |
|
||||||
|
git config user.name "github-actions[bot]"
|
||||||
|
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||||
BRANCH="chore/update-submodules-$(date +%Y%m%d-%H%M%S)"
|
BRANCH="chore/update-submodules-$(date +%Y%m%d-%H%M%S)"
|
||||||
git checkout -b "$BRANCH"
|
git checkout -b "$BRANCH"
|
||||||
git add vendor/
|
git add vendor/
|
||||||
git commit -m "chore: update vendor submodules to latest upstream"
|
git commit -m "chore: update vendor submodules to latest main"
|
||||||
git push origin "$BRANCH"
|
git push origin "$BRANCH"
|
||||||
gh pr create \
|
gh pr create \
|
||||||
--title "chore: update vendor submodules" \
|
--title "chore: update vendor submodules" \
|
||||||
--body "Automated submodule update to the latest upstream commit on each submodule's tracked branch (see \`.gitmodules\`). Review the pointer diff before merging." \
|
--body "Automated submodule update to latest upstream main." \
|
||||||
--base main \
|
--base main \
|
||||||
--head "$BRANCH"
|
--head "$BRANCH"
|
||||||
env:
|
env:
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ on:
|
|||||||
- 'archive/v1/src/core/**'
|
- 'archive/v1/src/core/**'
|
||||||
- 'archive/v1/src/hardware/**'
|
- 'archive/v1/src/hardware/**'
|
||||||
- 'archive/v1/data/proof/**'
|
- 'archive/v1/data/proof/**'
|
||||||
- 'archive/v1/requirements-lock.txt'
|
|
||||||
- '.github/workflows/verify-pipeline.yml'
|
- '.github/workflows/verify-pipeline.yml'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main, master ]
|
branches: [ main, master ]
|
||||||
@@ -15,7 +14,6 @@ on:
|
|||||||
- 'archive/v1/src/core/**'
|
- 'archive/v1/src/core/**'
|
||||||
- 'archive/v1/src/hardware/**'
|
- 'archive/v1/src/hardware/**'
|
||||||
- 'archive/v1/data/proof/**'
|
- 'archive/v1/data/proof/**'
|
||||||
- 'archive/v1/requirements-lock.txt'
|
|
||||||
- '.github/workflows/verify-pipeline.yml'
|
- '.github/workflows/verify-pipeline.yml'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
@@ -30,11 +28,9 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
@@ -61,18 +57,7 @@ jobs:
|
|||||||
"
|
"
|
||||||
|
|
||||||
- name: Run pipeline verification
|
- name: Run pipeline verification
|
||||||
working-directory: archive/v1
|
working-directory: v1
|
||||||
env:
|
|
||||||
# Pin thread count for scipy.fft / BLAS — multi-threaded reduction
|
|
||||||
# order is otherwise non-deterministic across CI runs (issue #560
|
|
||||||
# follow-up: 9- and 6-decimal quantization were not enough because
|
|
||||||
# the divergence is from threading order, not SIMD reordering).
|
|
||||||
# Single-threaded keeps the proof reproducible at a ~2-3x slowdown.
|
|
||||||
OMP_NUM_THREADS: "1"
|
|
||||||
OPENBLAS_NUM_THREADS: "1"
|
|
||||||
MKL_NUM_THREADS: "1"
|
|
||||||
VECLIB_MAXIMUM_THREADS: "1"
|
|
||||||
NUMEXPR_NUM_THREADS: "1"
|
|
||||||
run: |
|
run: |
|
||||||
echo "=== Running pipeline verification ==="
|
echo "=== Running pipeline verification ==="
|
||||||
python data/proof/verify.py
|
python data/proof/verify.py
|
||||||
@@ -80,13 +65,7 @@ jobs:
|
|||||||
echo "Pipeline verification PASSED."
|
echo "Pipeline verification PASSED."
|
||||||
|
|
||||||
- name: Run verification twice to confirm determinism
|
- name: Run verification twice to confirm determinism
|
||||||
working-directory: archive/v1
|
working-directory: v1
|
||||||
env:
|
|
||||||
OMP_NUM_THREADS: "1"
|
|
||||||
OPENBLAS_NUM_THREADS: "1"
|
|
||||||
MKL_NUM_THREADS: "1"
|
|
||||||
VECLIB_MAXIMUM_THREADS: "1"
|
|
||||||
NUMEXPR_NUM_THREADS: "1"
|
|
||||||
run: |
|
run: |
|
||||||
echo "=== Second run for determinism confirmation ==="
|
echo "=== Second run for determinism confirmation ==="
|
||||||
python data/proof/verify.py
|
python data/proof/verify.py
|
||||||
|
|||||||
-28
@@ -13,18 +13,6 @@ firmware/esp32-csi-node/managed_components/
|
|||||||
firmware/esp32-csi-node/dependencies.lock
|
firmware/esp32-csi-node/dependencies.lock
|
||||||
firmware/esp32-csi-node/sdkconfig.defaults.bak
|
firmware/esp32-csi-node/sdkconfig.defaults.bak
|
||||||
|
|
||||||
# ESP-IDF set-target backup (local only)
|
|
||||||
firmware/esp32-hello-world/sdkconfig.old
|
|
||||||
|
|
||||||
# Host-built firmware test binaries (compiled from test/*.c, not source)
|
|
||||||
firmware/esp32-csi-node/test/test_adr110
|
|
||||||
firmware/esp32-csi-node/test/test_vitals
|
|
||||||
firmware/esp32-csi-node/test/fuzz_serialize
|
|
||||||
firmware/esp32-csi-node/test/fuzz_edge
|
|
||||||
firmware/esp32-csi-node/test/fuzz_nvs
|
|
||||||
firmware/esp32-csi-node/test/*.exe
|
|
||||||
firmware/esp32-csi-node/test/*.obj
|
|
||||||
|
|
||||||
# Claude Flow swarm runtime state
|
# Claude Flow swarm runtime state
|
||||||
.swarm/
|
.swarm/
|
||||||
|
|
||||||
@@ -264,19 +252,3 @@ firmware/esp32-csi-node/build_firmware.batdata/
|
|||||||
models/
|
models/
|
||||||
demo_pointcloud.ply
|
demo_pointcloud.ply
|
||||||
demo_splats.json
|
demo_splats.json
|
||||||
|
|
||||||
# rvCSI napi-rs addon — generated by `napi build` (do not commit)
|
|
||||||
v2/crates/rvcsi-node/*.node
|
|
||||||
v2/crates/rvcsi-node/binding.js
|
|
||||||
v2/crates/rvcsi-node/binding.d.ts
|
|
||||||
v2/crates/rvcsi-node/npm/
|
|
||||||
|
|
||||||
# AetherArena private optimization staging — never published until reviewed
|
|
||||||
aether-arena/staging/
|
|
||||||
|
|
||||||
# MM-Fi benchmark dataset archives — large data, fetch separately, never commit
|
|
||||||
assets/MM-Fi/E0*.zip
|
|
||||||
assets/MM-Fi/*.zip
|
|
||||||
|
|
||||||
# through-wall demo: regenerable trained model artifact
|
|
||||||
examples/through-wall/model/
|
|
||||||
|
|||||||
-19
@@ -10,22 +10,3 @@
|
|||||||
path = vendor/sublinear-time-solver
|
path = vendor/sublinear-time-solver
|
||||||
url = https://github.com/ruvnet/sublinear-time-solver
|
url = https://github.com/ruvnet/sublinear-time-solver
|
||||||
branch = main
|
branch = main
|
||||||
[submodule "vendor/rvcsi"]
|
|
||||||
path = vendor/rvcsi
|
|
||||||
url = https://github.com/ruvnet/rvcsi
|
|
||||||
branch = main
|
|
||||||
[submodule "v2/crates/ruv-neural"]
|
|
||||||
path = v2/crates/ruv-neural
|
|
||||||
url = https://github.com/ruvnet/ruv-neural.git
|
|
||||||
branch = main
|
|
||||||
[submodule "vendor/rufield"]
|
|
||||||
path = vendor/rufield
|
|
||||||
url = https://github.com/ruvnet/rufield
|
|
||||||
[submodule "v2/crates/ruview-swarm"]
|
|
||||||
path = v2/crates/ruview-swarm
|
|
||||||
url = https://github.com/ruvnet/ruv-drone.git
|
|
||||||
branch = main
|
|
||||||
[submodule "v2/crates/worldgraph"]
|
|
||||||
path = v2/crates/worldgraph
|
|
||||||
url = https://github.com/ruvnet/worldgraph.git
|
|
||||||
branch = main
|
|
||||||
|
|||||||
+1
-350
File diff suppressed because one or more lines are too long
@@ -8,23 +8,21 @@ Dual codebase: Python v1 (`v1/`) and Rust port (`v2/`).
|
|||||||
| Crate | Description |
|
| Crate | Description |
|
||||||
|-------|-------------|
|
|-------|-------------|
|
||||||
| `wifi-densepose-core` | Core types, traits, error types, CSI frame primitives |
|
| `wifi-densepose-core` | Core types, traits, error types, CSI frame primitives |
|
||||||
| `wifi-densepose-signal` | SOTA signal processing + RuvSense multistatic sensing (16 modules) |
|
| `wifi-densepose-signal` | SOTA signal processing + RuvSense multistatic sensing (14 modules) |
|
||||||
| `wifi-densepose-nn` | Neural network inference (ONNX, PyTorch, Candle backends) |
|
| `wifi-densepose-nn` | Neural network inference (ONNX, PyTorch, Candle backends) |
|
||||||
| `wifi-densepose-train` | Training pipeline with ruvector integration + ruview_metrics; MAE pretraining recipe (`mae.rs`, ADR-152 §2.3) + WiFlow-STD port (`wiflow_std/`, tch-gated) |
|
| `wifi-densepose-train` | Training pipeline with ruvector integration + ruview_metrics |
|
||||||
| `wifi-densepose-mat` | Mass Casualty Assessment Tool — disaster survivor detection |
|
| `wifi-densepose-mat` | Mass Casualty Assessment Tool — disaster survivor detection |
|
||||||
| `wifi-densepose-hardware` | ESP32 aggregator, TDM protocol, channel hopping firmware; `ieee80211bf/` 802.11bf forward-compat protocol model (ADR-153) |
|
| `wifi-densepose-hardware` | ESP32 aggregator, TDM protocol, channel hopping firmware |
|
||||||
| `wifi-densepose-ruvector` | RuVector v2.0.4 integration + cross-viewpoint fusion (5 modules) |
|
| `wifi-densepose-ruvector` | RuVector v2.0.4 integration + cross-viewpoint fusion (5 modules) |
|
||||||
|
| `wifi-densepose-api` | REST API (Axum) |
|
||||||
|
| `wifi-densepose-db` | Database layer (Postgres, SQLite, Redis) |
|
||||||
|
| `wifi-densepose-config` | Configuration management |
|
||||||
| `wifi-densepose-wasm` | WebAssembly bindings for browser deployment |
|
| `wifi-densepose-wasm` | WebAssembly bindings for browser deployment |
|
||||||
| `wifi-densepose-cli` | CLI tool (`wifi-densepose` binary) — `calibrate`/`calibrate-serve`/`enroll`/`train-room`/`room-watch` + MAT (MAT gated behind the `mat` feature; build `--no-default-features` for the aarch64/appliance calibration binary) |
|
| `wifi-densepose-cli` | CLI tool (`wifi-densepose` binary) |
|
||||||
| `wifi-densepose-calibration` | ADR-151 per-room calibration & specialist training — `baseline → enroll → extract → train` → bank of small specialists (presence/posture/breathing/heartbeat/restlessness/anomaly) + multistatic fusion; pure Rust, edge-deployable |
|
|
||||||
| `wifi-densepose-sensing-server` | Lightweight Axum server for WiFi sensing UI |
|
| `wifi-densepose-sensing-server` | Lightweight Axum server for WiFi sensing UI |
|
||||||
| `wifi-densepose-wifiscan` | Multi-BSSID WiFi scanning (ADR-022) |
|
| `wifi-densepose-wifiscan` | Multi-BSSID WiFi scanning (ADR-022) |
|
||||||
| `wifi-densepose-vitals` | ESP32 CSI-grade vital sign extraction (ADR-021) |
|
| `wifi-densepose-vitals` | ESP32 CSI-grade vital sign extraction (ADR-021) |
|
||||||
| `nvsim` | Deterministic NV-diamond magnetometer pipeline simulator (ADR-089) — standalone leaf, WASM-ready |
|
| `nvsim` | Deterministic NV-diamond magnetometer pipeline simulator (ADR-089) — standalone leaf, WASM-ready |
|
||||||
| `vendor/rvcsi` (submodule) | **rvCSI** — edge RF sensing runtime (ADR-095/096): 9 crates (`rvcsi-core`/`-dsp`/`-events`/`-adapter-file`/`-adapter-nexmon`/`-ruvector`/`-runtime`/`-node`/`-cli`). Lives in its own repo ([github.com/ruvnet/rvcsi](https://github.com/ruvnet/rvcsi)), vendored here under `vendor/rvcsi`, published to crates.io as `rvcsi-* 0.3.x` and to npm as `@ruv/rvcsi`. Not a `v2/` workspace member — depend on the published crates (or the submodule's `crates/rvcsi-*` paths). Normalized `CsiFrame`/`CsiWindow`/`CsiEvent` schema, validate-before-FFI, reusable DSP, typed confidence-scored events, the napi-c Nexmon shim (real nexmon_csi `.pcap` from a Raspberry Pi 5 / 4 / 3B+ — BCM43455c0), the napi-rs SDK, the `rvcsi` CLI, a Claude Code plugin. |
|
|
||||||
| `vendor/rufield` (submodule) | **RuField MFS** — the open spec for camera-free multimodal field sensing (ADR-260). A common `FieldEvent`/`FieldTensor`/`FusionGraph`/`PrivacyClass`/`ProvenanceReceipt` model *above* WiFi CSI/CIR/BFLD, UWB, BLE Channel Sounding, mmWave radar, ultrasound, subsonic, infrared, and quantum sensors. Lives in its own repo ([github.com/ruvnet/rufield](https://github.com/ruvnet/rufield)), vendored here under `vendor/rufield`. Not a `v2/` workspace member. v0.1 reference stack = 7 crates (`rufield-core`/`-provenance`/`-privacy`/`-adapters`/`-fusion`/`-bench`/`-viewer`), 72 tests/0 failed; `rufield-viewer` is an Axum + vanilla-JS read-only dashboard (`cargo run -p rufield-viewer`) completing ADR-260 §27.9. The WiFi-CSI modality is now **real-replay-backed** via `CsiReplayAdapter` (ingests real captured `.csi.jsonl` → fused presence/breathing inferences; replay-from-file, unlabeled CSI-variance proxy, not validated accuracy); mmWave/thermal + all synthetic-bench F1 numbers remain **SYNTHETIC** (no live hardware — live streaming + labeled accuracy are roadmap). |
|
|
||||||
| `wifi-densepose-rufield` | ADR-262 P1 **anti-corruption bridge** — converts RuView WiFi-CSI sensing output (`SensingSnapshot` mirroring `SensingUpdate` + `TrustedOutput`, owned primitives, no dep on `wifi-densepose-sensing-server`) into **signed RuField `FieldEvent`s** (`Modality::WifiCsi`, real `timestamp_ns`, sha256 + ed25519 provenance, `synthetic=false`). The single coupling point between RuView and the standalone RuField MFS spec (§5.4); path-deps the `vendor/rufield` submodule crates (`rufield-core`/`-provenance`/`-privacy`/`-fusion`). **Critical §3.3 privacy mapping** (`map_privacy`): maps RuView class → RuField P0–P5 by **information content, never byte value**, fail-closed (`Derived → P4/P5`, never P1; `demoted` floors to ≥ P2). 15 tests / 0 failed (round-trip / `is_fusable` / fusion-ingest / privacy-safety / determinism). P1 plumbing — not wired into the live server (P3), no accuracy claim. |
|
|
||||||
| `ruview-swarm` | Drone swarm control system (ADR-148) — hierarchical-mesh topology, Raft consensus, MARL, CSI sensing payload, MAVLink/PX4 compat, Ruflo AI-agent integration |
|
|
||||||
|
|
||||||
### RuvSense Modules (`signal/src/ruvsense/`)
|
### RuvSense Modules (`signal/src/ruvsense/`)
|
||||||
| Module | Purpose |
|
| Module | Purpose |
|
||||||
@@ -42,8 +40,6 @@ Dual codebase: Python v1 (`v1/`) and Rust port (`v2/`).
|
|||||||
| `cross_room.rs` | Environment fingerprinting, transition graph |
|
| `cross_room.rs` | Environment fingerprinting, transition graph |
|
||||||
| `gesture.rs` | DTW template matching gesture classifier |
|
| `gesture.rs` | DTW template matching gesture classifier |
|
||||||
| `adversarial.rs` | Physically impossible signal detection, multi-link consistency |
|
| `adversarial.rs` | Physically impossible signal detection, multi-link consistency |
|
||||||
| `cir.rs` | ADR-134 CSI→CIR via ISTA L1 sparse recovery (NeumannSolver warm-start) |
|
|
||||||
| `calibration.rs` | ADR-135 empty-room baseline (Welford amplitude + von Mises phase, drift trigger) |
|
|
||||||
|
|
||||||
### Cross-Viewpoint Fusion (`ruvector/src/viewpoint/`)
|
### Cross-Viewpoint Fusion (`ruvector/src/viewpoint/`)
|
||||||
| Module | Purpose |
|
| Module | Purpose |
|
||||||
@@ -74,17 +70,14 @@ All 5 ruvector crates integrated in workspace:
|
|||||||
- ADR-030: RuvSense persistent field model (Proposed)
|
- ADR-030: RuvSense persistent field model (Proposed)
|
||||||
- ADR-031: RuView sensing-first RF mode (Proposed)
|
- ADR-031: RuView sensing-first RF mode (Proposed)
|
||||||
- ADR-032: Multistatic mesh security hardening (Proposed)
|
- ADR-032: Multistatic mesh security hardening (Proposed)
|
||||||
- ADR-148: Drone swarm control system / `ruview-swarm` (In Progress)
|
|
||||||
- ADR-152: WiFi-Pose SOTA 2026 intake — geometry conditioning, WiFlow-STD benchmark (measurement (a) complete: claims MEASURED-EQUIVALENT at ~96% PCK@20), MAE recipe (Proposed; §2.1–2.3, 2.6 implemented)
|
|
||||||
- ADR-153: IEEE 802.11bf-2025 forward-compatibility protocol model (Accepted — amends ADR-152 §2.4)
|
|
||||||
|
|
||||||
### Supported Hardware
|
### Supported Hardware
|
||||||
|
|
||||||
| Device | Port | Chip | Role | Cost |
|
| Device | Port | Chip | Role | Cost |
|
||||||
|--------|------|------|------|------|
|
|--------|------|------|------|------|
|
||||||
| ESP32-S3 (8MB flash) | COM9 (ruvzen, was COM7) | Xtensa dual-core | WiFi CSI sensing node | ~$9 |
|
| ESP32-S3 (8MB flash) | COM7 | Xtensa dual-core | WiFi CSI sensing node | ~$9 |
|
||||||
| ESP32-S3 SuperMini (4MB) | — | Xtensa dual-core | WiFi CSI (compact) | ~$6 |
|
| ESP32-S3 SuperMini (4MB) | — | Xtensa dual-core | WiFi CSI (compact) | ~$6 |
|
||||||
| ESP32-C6 + Seeed MR60BHA2 | COM12 (ruvzen, was COM4) | RISC-V + 60 GHz FMCW | mmWave HR/BR/presence + WiFi CSI | ~$15 |
|
| ESP32-C6 + Seeed MR60BHA2 | COM4 | RISC-V + 60 GHz FMCW | mmWave HR/BR/presence | ~$15 |
|
||||||
| HLK-LD2410 | — | 24 GHz FMCW | Presence + distance | ~$3 |
|
| HLK-LD2410 | — | 24 GHz FMCW | Presence + distance | ~$3 |
|
||||||
|
|
||||||
**Not supported:** ESP32 (original), ESP32-C3 — single-core, can't run CSI DSP pipeline.
|
**Not supported:** ESP32 (original), ESP32-C3 — single-core, can't run CSI DSP pipeline.
|
||||||
@@ -141,14 +134,17 @@ Crates must be published in dependency order:
|
|||||||
2. `wifi-densepose-vitals` (no internal deps)
|
2. `wifi-densepose-vitals` (no internal deps)
|
||||||
3. `wifi-densepose-wifiscan` (no internal deps)
|
3. `wifi-densepose-wifiscan` (no internal deps)
|
||||||
4. `wifi-densepose-hardware` (no internal deps)
|
4. `wifi-densepose-hardware` (no internal deps)
|
||||||
5. `wifi-densepose-signal` (depends on core)
|
5. `wifi-densepose-config` (no internal deps)
|
||||||
6. `wifi-densepose-nn` (no internal deps, workspace only)
|
6. `wifi-densepose-db` (no internal deps)
|
||||||
7. `wifi-densepose-ruvector` (no internal deps, workspace only)
|
7. `wifi-densepose-signal` (depends on core)
|
||||||
8. `wifi-densepose-train` (depends on signal, nn)
|
8. `wifi-densepose-nn` (no internal deps, workspace only)
|
||||||
9. `wifi-densepose-mat` (depends on core, signal, nn)
|
9. `wifi-densepose-ruvector` (no internal deps, workspace only)
|
||||||
10. `wifi-densepose-wasm` (depends on mat)
|
10. `wifi-densepose-train` (depends on signal, nn)
|
||||||
11. `wifi-densepose-sensing-server` (depends on wifiscan)
|
11. `wifi-densepose-mat` (depends on core, signal, nn)
|
||||||
12. `wifi-densepose-cli` (depends on mat)
|
12. `wifi-densepose-api` (no internal deps)
|
||||||
|
13. `wifi-densepose-wasm` (depends on mat)
|
||||||
|
14. `wifi-densepose-sensing-server` (depends on wifiscan)
|
||||||
|
15. `wifi-densepose-cli` (depends on mat)
|
||||||
|
|
||||||
### Validation & Witness Verification (ADR-028)
|
### Validation & Witness Verification (ADR-028)
|
||||||
|
|
||||||
|
|||||||
@@ -1,78 +0,0 @@
|
|||||||
# PROOF — reproduce every claim, or find the one we can't yet
|
|
||||||
|
|
||||||
This project (RuView / wifi-densepose) has been publicly called "AI slop" and
|
|
||||||
"fake." This document is the answer: **a skeptic can clone the repo, run one
|
|
||||||
script, and have every headline claim either verified on their own machine or
|
|
||||||
shown — explicitly — as "CLAIMED, not yet reproduced (here's exactly what it
|
|
||||||
needs)."** Nothing below is asserted without a command you can run.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/ruvnet/RuView && cd RuView
|
|
||||||
bash scripts/prove.sh # core gate + the anti-slop assertion tests
|
|
||||||
bash scripts/prove.sh --full # also attempt the feature-gated subset
|
|
||||||
```
|
|
||||||
|
|
||||||
`prove.sh` exits 0 only if every **non-gated** claim passes. Gated claims never
|
|
||||||
fail the run; they print the prerequisite (a GPU, a dataset, real hardware, a
|
|
||||||
trained checkpoint) so you can reproduce them yourself.
|
|
||||||
|
|
||||||
## Grading
|
|
||||||
|
|
||||||
- **MEASURED** — reproduced on our hardware, with the exact command recorded, and
|
|
||||||
pinned by a test that *fails on the pre-fix code*. `prove.sh` re-runs these.
|
|
||||||
- **CLAIMED** — cited from a source, or measured by the source, but not
|
|
||||||
reproduced in this repo's automated harness.
|
|
||||||
- **DATA-GATED / HARDWARE-GATED** — the *code path* is real and tested, but the
|
|
||||||
*accuracy/throughput claim* needs data or hardware we don't ship. We never
|
|
||||||
fabricate the number; the code carries a typed error or a `weights_trained`/
|
|
||||||
provenance flag instead.
|
|
||||||
|
|
||||||
## The hard gate (run on any machine with Rust + Python)
|
|
||||||
|
|
||||||
| Claim | Grade | Reproduce |
|
|
||||||
|---|---|---|
|
|
||||||
| Rust workspace: 3,128 tests, 0 failed | **MEASURED** | `cd v2 && cargo test --workspace --no-default-features` |
|
|
||||||
| Deterministic CSI pipeline proof (bit-exact SHA-256) | **MEASURED** | `python archive/v1/data/proof/verify.py` → `VERDICT: PASS` |
|
|
||||||
|
|
||||||
## Anti-slop assertion tests (each fails on the pre-fix code)
|
|
||||||
|
|
||||||
| Claim | Grade | Test (run via `cargo test -p <crate> <name>`) |
|
|
||||||
|---|---|---|
|
|
||||||
| Fusion crafted-input DoS panics are closed (ADR-156 §2.2) | **MEASURED** | `wifi-densepose-ruvector :: triangulation_out_of_range_index_returns_none_no_panic` |
|
|
||||||
| **The "Soul Signature" identity claim, honestly bounded:** on WiFi-only cardiac+respiratory channels two people are **not separable** (gap ≈ 0.0005) | **MEASURED** | `wifi-densepose-bfld :: cardiac_alone_cannot_separate_identity_matches_audit` |
|
|
||||||
| OccWorld `predict()` is real (input-dependent), not random noise | **MEASURED** | `wifi-densepose-occworld-candle :: predict_is_deterministic_for_same_input` |
|
|
||||||
| Pose runtime emits frames under its own default config (ADR-159 A1) | **MEASURED** | `cog-pose-estimation :: default_config_emits_frames_with_real_model` |
|
|
||||||
| Person-count flags untrained classes — no count inflation (ADR-159 A2) | **MEASURED** | `cog-person-count :: untrained_class_argmax_is_flagged_low_confidence` |
|
|
||||||
| Medical edge skills carry a "not a medical device" disclaimer (ADR-160 A1) | **MEASURED** | `wifi-densepose-wasm-edge :: a1_med_modules_have_clinical_disclaimer` (`--features std`) |
|
|
||||||
| Survivor dedup 3→1, count-inflation killed (ADR-158 §2) | **MEASURED** | `wifi-densepose-mat :: test_identical_vitals_no_location_dedup_to_one` (`--features mat`) |
|
|
||||||
|
|
||||||
## Measured performance (criterion; reproduce on your machine)
|
|
||||||
|
|
||||||
| Claim | Grade | Reproduce |
|
|
||||||
|---|---|---|
|
|
||||||
| PSD FFT-planner cache 2.0–3.1×, DTW band 2.4–4.1× (ADR-154) | **MEASURED** | `cd v2 && cargo bench -p wifi-densepose-signal` |
|
|
||||||
| fuse() double-clone removed ~2.17× marshalling (ADR-156) | **MEASURED** | `cd v2 && cargo bench -p wifi-densepose-ruvector --bench fusion_bench` |
|
|
||||||
| zero-copy ORT input ~1.48× (ADR-155) | **MEASURED** | `cd v2 && cargo bench -p wifi-densepose-nn --features onnx --bench onnx_bench` |
|
|
||||||
| pointcloud splats 9→2 passes ~1.24× (ADR-160 research) | **MEASURED** | `cd v2 && cargo bench -p wifi-densepose-pointcloud --bench splats_bench` |
|
|
||||||
| native wlanapi multi-BSSID scan 9.74 Hz (vs netsh ~2 Hz) | **MEASURED (Windows)** | `cd v2 && cargo test -p wifi-densepose-wifiscan -- --ignored measure_native_scan_rate` |
|
|
||||||
| wasm-edge `process_frame` hot-path latency (host proxy, ADR-163) | **MEASURED-on-host** (NOT the ESP32/WASM3 budget — needs hardware) | `cd v2/crates/wifi-densepose-wasm-edge && cargo bench --features std` |
|
|
||||||
| cog steady-state CPU infer latency ~305 µs (ADR-163; NOT the manifest cold-start) | **MEASURED-on-host** | `cd v2 && cargo bench -p cog-person-count -p cog-pose-estimation --no-default-features --bench infer_bench` |
|
|
||||||
|
|
||||||
## What we do NOT claim (the honest negatives — the strongest anti-slop signal)
|
|
||||||
|
|
||||||
| Capability | Status |
|
|
||||||
|---|---|
|
|
||||||
| **Named person-identity from WiFi** | **NOT achieved, and measured why.** The §3.6 matcher is real, but identity does not lock on WiFi-only channels (gap 0.0005). DATA-GATED on a real enrollment feeding the AETHER/body-resonance channel — never done. No named-identity claim is made. |
|
|
||||||
| WiFlow-STD ~96% PCK@20 | **CLAIMED-reproduced** on our RTX 5080 (`benchmarks/wiflow-std/RESULTS.md`); HARDWARE-GATED for you (needs an NVIDIA GPU + the MM-Fi dataset). The upstream *shipped checkpoint* was **REFUTED** (0.08% PCK) — we publish that. |
|
|
||||||
| OccWorld trajectory accuracy | DATA-GATED on a trained checkpoint; `predict()` carries `weights_trained=false` until one is loaded — never silently faked. |
|
|
||||||
| Edge-skill detection accuracy (seizure, weapon, affect, …) | UNVALIDATED — every such module is now disclaimer-gated as experimental/research; the DSP is real, the accuracy is not claimed. |
|
|
||||||
| 802.11bf-2025 OTA conformance | No commodity silicon ships a conformant interface as of 2026; ours is a simulation-tested forward-compat protocol model, not a certified implementation. |
|
|
||||||
|
|
||||||
## Provenance
|
|
||||||
|
|
||||||
Every claim above traces to a committed ADR (`docs/adr/ADR-154`…`ADR-163`), a
|
|
||||||
test, a criterion bench, `benchmarks/wiflow-std/RESULTS.md`, or
|
|
||||||
`benchmarks/edge-latency/RESULTS.md`. The history
|
|
||||||
includes published **retractions** (the 92.9% PCK retraction; the WiFlow-STD
|
|
||||||
shipped-checkpoint refutation; the NV-diamond BOM reality check) — a faker hides
|
|
||||||
failures; we commit them.
|
|
||||||
@@ -1,25 +1,21 @@
|
|||||||
# π RuView
|
# π RuView
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://cognitum.one/seed">
|
<a href="https://x.com/rUv/status/2037556932802761004">
|
||||||
<img src="assets/ruview-seed.png" alt="RuView - WiFi DensePose" width="100%">
|
<img src="assets/ruview-small-gemini.jpg" alt="RuView - WiFi DensePose" width="100%">
|
||||||
</a>
|
|
||||||
</p>
|
|
||||||
<p align="center">
|
|
||||||
<a href="https://cognitum.one/seed">
|
|
||||||
<img src="assets/seed.png" alt="Cognitum Seed" width="100%">
|
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
> **Beta Software** — Under active development. APIs and firmware may change. Known limitations:
|
||||||
|
> - ESP32-C3 and original ESP32 are not supported (single-core, insufficient for CSI DSP)
|
||||||
|
> - Single ESP32 deployments have limited spatial resolution — use 2+ nodes or add a [Cognitum Seed](https://cognitum.one) for best results
|
||||||
|
> - Camera-free pose accuracy is limited — use [camera ground-truth training](docs/adr/ADR-079-camera-ground-truth-training.md) for 92.9% PCK@20
|
||||||
|
>
|
||||||
|
> Contributions and bug reports welcome at [Issues](https://github.com/ruvnet/RuView/issues).
|
||||||
|
|
||||||
## **See through walls with WiFi** ##
|
## **See through walls with WiFi** ##
|
||||||
|
|
||||||
**Turn ordinary WiFi into a spatial intelligence / sensing system.** Detect people, measure breathing and heart rate, track movement, and monitor rooms — through walls, in the dark, with no cameras or wearables. Just physics.
|
**Turn ordinary WiFi into a spacial intelligence / sensing system.** Detect people, measure breathing and heart rate, track movement, and monitor rooms — through walls, in the dark, with no cameras or wearables. Just physics.
|
||||||
|
|
||||||
Works natively with the four major smart-home ecosystems: **[Home Assistant](docs/integrations/home-assistant.md)** via the HA-DISCO MQTT publisher, **[Apple Home & HomePod](docs/user-guide-apple-homepod.md)** as a discoverable HAP-1.1 bridge, **[Google Home](docs/integrations/home-assistant.md)** + **[Amazon Alexa](docs/integrations/home-assistant.md)** via the same HA bridge or a [Matter](docs/adr/ADR-122-bfld-ruview-ha-matter-exposure.md) endpoint. Siri, Google Assistant, and Alexa can voice presence and vitals by room with zero custom skills.
|
|
||||||
|
|
||||||
[](docs/integrations/home-assistant.md) [](docs/adr/ADR-122-bfld-ruview-ha-matter-exposure.md) [](docs/user-guide-apple-homepod.md) [](docs/integrations/home-assistant.md) [](docs/integrations/home-assistant.md)
|
|
||||||
|
|
||||||
> Drop into any **Home Assistant** install with one `--mqtt` flag. Or pair into **Apple Home / Google Home / Alexa / SmartThings** as a Matter Bridge. Ships 21 entities per node (11 raw signals + 10 inferred semantic states: someone-sleeping, possible-distress, room-active, elderly-inactivity-anomaly, meeting-in-progress, bathroom-occupied, fall-risk-elevated, bed-exit, no-movement, multi-room-transition) plus 3 starter HA Blueprints. See [`docs/integrations/home-assistant.md`](docs/integrations/home-assistant.md) · [ADR-115](docs/adr/ADR-115-home-assistant-integration.md).
|
|
||||||
|
|
||||||
### π RuView is a WiFi sensing platform that turns radio signals into spatial intelligence.
|
### π RuView is a WiFi sensing platform that turns radio signals into spatial intelligence.
|
||||||
|
|
||||||
@@ -36,7 +32,7 @@ Built on [RuVector](https://github.com/ruvnet/ruvector/) and [Cognitum Seed](htt
|
|||||||
|
|
||||||
The system learns each environment locally using spiking neural networks that adapt in under 30 seconds, with multi-frequency mesh scanning across 6 WiFi channels that uses your neighbors' routers as free radar illuminators. Every measurement is cryptographically attested via an Ed25519 witness chain.
|
The system learns each environment locally using spiking neural networks that adapt in under 30 seconds, with multi-frequency mesh scanning across 6 WiFi channels that uses your neighbors' routers as free radar illuminators. Every measurement is cryptographically attested via an Ed25519 witness chain.
|
||||||
|
|
||||||
RuView turns ordinary WiFi into a contactless sensor. A $9 ESP32 board reads the radio reflections off the people in a room, and a small pretrained model — published on Hugging Face at [`ruvnet/wifi-densepose-pretrained`](https://huggingface.co/ruvnet/wifi-densepose-pretrained) — tells you who's there, how they're breathing, and how their heart rate is trending. The model fits in 8 KB (4-bit quantized) and runs in microseconds on a Raspberry Pi. (The [v2 encoder](https://huggingface.co/ruvnet/wifi-densepose-pretrained) reports an honest, label-free held-out **temporal-triplet accuracy of 82.3%** — up from 66.4% raw; the older "100% presence" figure was measured on a single-class recording and has been retracted in favor of this.) No cameras, no wearables, no app on the user's phone.
|
RuView also supports pose estimation (17 COCO keypoints via the WiFlow architecture), trained entirely without cameras using 10 sensor signals — a technique pioneered from the original *DensePose From WiFi* research at Carnegie Mellon University.
|
||||||
|
|
||||||
### Built for low-power edge applications
|
### Built for low-power edge applications
|
||||||
|
|
||||||
@@ -49,30 +45,20 @@ RuView turns ordinary WiFi into a contactless sensor. A $9 ESP32 board reads the
|
|||||||
[](#vital-sign-detection)
|
[](#vital-sign-detection)
|
||||||
[](#esp32-s3-hardware-pipeline)
|
[](#esp32-s3-hardware-pipeline)
|
||||||
[](https://crates.io/crates/wifi-densepose-ruvector)
|
[](https://crates.io/crates/wifi-densepose-ruvector)
|
||||||
[](#-edge-module-catalog)
|
|
||||||
|
|
||||||
|
|
||||||
> | What | How | Speed / scale |
|
> | What | How | Speed |
|
||||||
> |------|-----|---------------|
|
> |------|-----|-------|
|
||||||
> | 🫁 **Breathing rate** | Bandpass 0.1–0.5 Hz on wrapped phase, circular variance, zero-crossing BPM ([#593](https://github.com/ruvnet/RuView/issues/593)) | 6–30 BPM, real-time |
|
> | 🦴 **Pose estimation** | CSI subcarrier amplitude/phase → 17 COCO keypoints | 171K emb/s (M4 Pro) |
|
||||||
> | 💓 **Heart rate** | Bandpass 0.8–2.0 Hz, zero-crossing BPM | 40–120 BPM, real-time |
|
> | 🫁 **Breathing detection** | Bandpass 0.1-0.5 Hz → zero-crossing BPM | 6-30 BPM |
|
||||||
> | 👤 **Presence detection** | Trained head on Hugging Face ([`ruvnet/wifi-densepose-pretrained`](https://huggingface.co/ruvnet/wifi-densepose-pretrained); v2 encoder = 82.3% held-out temporal-triplet acc, honestly re-benchmarked) + a phase-variance fallback that needs no model | < 1 ms, ~30 s ambient calibration |
|
> | 💓 **Heart rate** | Bandpass 0.8-2.0 Hz → zero-crossing BPM | 40-120 BPM |
|
||||||
> | 🧬 **CSI embeddings** | 128-dim contrastive encoder shipped on Hugging Face, 4-bit quantised variant fits in 8 KB | **164,183 emb/s** on M4 Pro |
|
> | 👤 **Presence sensing** | Trained model + PIR fusion — 100% accuracy | 0.012 ms latency |
|
||||||
> | 🦴 **17-keypoint pose estimation** | `cog-pose-estimation` Cog v0.0.1 — signed aarch64 + x86_64 binaries on GCS, loads `pose_v1.safetensors` via Candle. Train your own from paired data in 2.1 s on an RTX 5080 ([ADR-101](docs/adr/ADR-101-pose-estimation-cog.md), [benchmarks](docs/benchmarks/pose-estimation-cog.md)). **SOTA on MM-Fi:** [`ruvnet/wifi-densepose-mmfi-pose`](https://huggingface.co/ruvnet/wifi-densepose-mmfi-pose) hits **82.69% torso-PCK@20** (ensemble 83.59%), beating MultiFormer (72.25%) and CSI2Pose (68.41%) on the matched MM-Fi `random_split` protocol — self-corrected and auditable on [AetherArena](https://huggingface.co/spaces/ruvnet/aether-arena) | 8.4 ms cold-start on a Pi 5 |
|
> | 🧱 **Through-wall** | Fresnel zone geometry + multipath modeling | Up to 5m depth |
|
||||||
> | 🚶 **Motion / activity** | Motion-band power + phase acceleration | Real-time |
|
> | 🧠 **Edge intelligence** | 8-dim feature vectors + RVF store on Cognitum Seed | $140 total BOM |
|
||||||
> | 🤸 **Fall detection** | Phase-acceleration threshold + 3-frame debounce + 5 s cooldown ([#263](https://github.com/ruvnet/RuView/issues/263)) | < 200 ms |
|
> | 🎯 **Camera-free training** | 10 sensor signals, no labels needed | 84s on M4 Pro |
|
||||||
> | 🧮 **Multi-person count** | Adaptive P95 normalisation + runtime-tunable dedup factor (`/api/v1/config/dedup-factor`, [#491](https://github.com/ruvnet/RuView/pull/491)). Six specialised learned counters available as Cogs: `occupancy-zones`, `elevator-count`, `queue-length`, `customer-flow`, `clean-room`, `person-matching` | Real-time, self-calibrating |
|
> | 📷 **Camera-supervised training** | MediaPipe + ESP32 CSI → 92.9% PCK@20 | 19 min on laptop |
|
||||||
> | 🌍 **World model prediction** | OccWorld TransVQVAE — 15-frame future occupancy prediction, 209 ms inference, 3.4 GB VRAM on RTX 5080; fine-tune on your space with `occworld_retrain.py` ([ADR-147](docs/adr/ADR-147-nvidia-cosmos-world-foundation-model-integration.md)) | 15 frames × 200×200×16 vox |
|
> | 📡 **Multi-frequency mesh** | Channel hopping across 6 bands, neighbor APs as illuminators | 3x sensing bandwidth |
|
||||||
> | 🧱 **Through-wall sensing** | Fresnel-zone geometry + multipath modeling | Up to ~5 m, signal-dependent |
|
> | 🌐 **3D point cloud** *(optional fusion)* | Camera depth (MiDaS) + WiFi CSI + mmWave radar → unified spatial model | 22 ms pipeline · 19K+ points/frame |
|
||||||
> | 🧠 **Edge intelligence** | **105-cog catalog** ([ADR-102](docs/adr/ADR-102-edge-module-registry.md)) live from `app-registry.json` — health, security, building, retail, industrial, research, AI, swarm, signal, network, and developer modules. Optional Cognitum Seed adds persistent vector store + kNN + witness chain | $140 total BOM |
|
|
||||||
> | 🎯 **Camera-free pre-training** | Self-supervised contrastive encoder, 12.2M training steps on 60K frames, shipped on Hugging Face | 84 s/epoch retrain on M4 Pro |
|
|
||||||
> | 📷 **Camera-supervised fine-tune** | MediaPipe + ESP32 CSI paired training, end-to-end Candle pipeline on RTX 5080 ([ADR-079](docs/adr/ADR-079-camera-supervised-pose-finetune.md)) | 2.1 s for 400 epochs (~5 ms/epoch) |
|
|
||||||
> | 📡 **Multi-frequency mesh** | Channel hopping across 6 bands, TDM slot scheduling ([ADR-029](docs/adr/ADR-029-multifrequency-mesh.md)) | 3× sensing bandwidth |
|
|
||||||
> | 🌐 **3D point cloud fusion** | Camera depth (MiDaS) + WiFi CSI + mmWave radar → unified spatial model | 22 ms pipeline · 19K+ points/frame |
|
|
||||||
>
|
|
||||||
> Browse the full 105-module catalog (with practical descriptions, sizes, and difficulty) below in [🧩 Edge Module Catalog](#-edge-module-catalog), or visit [seed.cognitum.one/store](https://seed.cognitum.one/store).
|
|
||||||
>
|
|
||||||
> 🤗 **Pretrained weights**: download from [`ruvnet/wifi-densepose-pretrained`](https://huggingface.co/ruvnet/wifi-densepose-pretrained) — see [Loading the pretrained model](#loading-the-pretrained-model) below for one-command setup.
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Option 1: Docker (simulated data, no hardware needed)
|
# Option 1: Docker (simulated data, no hardware needed)
|
||||||
@@ -80,7 +66,7 @@ docker pull ruvnet/wifi-densepose:latest
|
|||||||
docker run -p 3000:3000 ruvnet/wifi-densepose:latest
|
docker run -p 3000:3000 ruvnet/wifi-densepose:latest
|
||||||
# Open http://localhost:3000
|
# Open http://localhost:3000
|
||||||
|
|
||||||
# Option 2a: Live sensing with ESP32-S3 hardware ($9)
|
# Option 2: Live sensing with ESP32-S3 hardware ($9)
|
||||||
# Flash firmware, provision WiFi, and start sensing:
|
# Flash firmware, provision WiFi, and start sensing:
|
||||||
python -m esptool --chip esp32s3 --port COM9 --baud 460800 \
|
python -m esptool --chip esp32s3 --port COM9 --baud 460800 \
|
||||||
write_flash 0x0 bootloader.bin 0x8000 partition-table.bin \
|
write_flash 0x0 bootloader.bin 0x8000 partition-table.bin \
|
||||||
@@ -88,39 +74,13 @@ python -m esptool --chip esp32s3 --port COM9 --baud 460800 \
|
|||||||
python firmware/esp32-csi-node/provision.py --port COM9 \
|
python firmware/esp32-csi-node/provision.py --port COM9 \
|
||||||
--ssid "YourWiFi" --password "secret" --target-ip 192.168.1.20
|
--ssid "YourWiFi" --password "secret" --target-ip 192.168.1.20
|
||||||
|
|
||||||
# Option 2b: WiFi 6 + 802.15.4 research sensing with ESP32-C6 ($6-10, ADR-110)
|
|
||||||
# Same csi-node firmware compiled for the C6 target — picks up the C6
|
|
||||||
# overlay (sdkconfig.defaults.esp32c6) automatically.
|
|
||||||
cd firmware/esp32-csi-node
|
|
||||||
idf.py set-target esp32c6 && idf.py build
|
|
||||||
idf.py -p COM6 flash
|
|
||||||
# C6 boot extras (vs S3): HE-LTF subcarrier tagging in ADR-018 bytes 18-19,
|
|
||||||
# 802.15.4 mesh time-sync on channel 15, TWT setup when the AP supports it,
|
|
||||||
# opt-in LP-core wake-on-motion for ~5 µA battery seed nodes.
|
|
||||||
# v0.6.7 adds: real LP-core RISC-V motion-gate program (debounce + motion
|
|
||||||
# counter) and a Wi-Fi 6 soft-AP with TWT Responder so two C6 boards can
|
|
||||||
# benchmark real iTWT without buying an 11ax router. Both default off,
|
|
||||||
# flip CONFIG_C6_{LP_CORE,SOFTAP_HE}_ENABLE to turn them on.
|
|
||||||
|
|
||||||
# Option 3: Full system with Cognitum Seed ($140)
|
# Option 3: Full system with Cognitum Seed ($140)
|
||||||
# ESP32 streams CSI → bridge forwards to Seed for persistent storage + kNN + witness chain
|
# ESP32 streams CSI → bridge forwards to Seed for persistent storage + kNN + witness chain
|
||||||
node scripts/rf-scan.js --port 5006 # Live RF room scan
|
node scripts/rf-scan.js --port 5006 # Live RF room scan
|
||||||
node scripts/snn-csi-processor.js --port 5006 # SNN real-time learning
|
node scripts/snn-csi-processor.js --port 5006 # SNN real-time learning
|
||||||
node scripts/mincut-person-counter.js --port 5006 # Correct person counting
|
node scripts/mincut-person-counter.js --port 5006 # Correct person counting
|
||||||
|
|
||||||
# Option 4: Python — live on PyPI (ADR-117)
|
|
||||||
pip install ruview # or: pip install wifi-densepose
|
|
||||||
# Both ship the same compiled PyO3 wheel (~250 KB, abi3-py310, Linux/macOS/Windows).
|
|
||||||
# Add [client] for the asyncio WebSocket + paho-mqtt clients:
|
|
||||||
pip install "ruview[client]" # or: pip install "wifi-densepose[client]"
|
|
||||||
|
|
||||||
# from ruview import BreathingExtractor, HeartRateExtractor # equivalent to:
|
|
||||||
# from wifi_densepose import BreathingExtractor, HeartRateExtractor
|
|
||||||
# from ruview.client import SensingClient, RuViewMqttClient
|
|
||||||
```
|
```
|
||||||
|
|
||||||
[](https://pypi.org/project/ruview/) [](https://pypi.org/project/wifi-densepose/)
|
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> **CSI-capable hardware recommended.** Presence, vital signs, through-wall sensing, and all advanced capabilities require Channel State Information (CSI) from an ESP32-S3 ($9) or research NIC. The Docker image runs with simulated data for evaluation. Consumer WiFi laptops provide RSSI-only presence detection.
|
> **CSI-capable hardware recommended.** Presence, vital signs, through-wall sensing, and all advanced capabilities require Channel State Information (CSI) from an ESP32-S3 ($9) or research NIC. The Docker image runs with simulated data for evaluation. Consumer WiFi laptops provide RSSI-only presence detection.
|
||||||
|
|
||||||
@@ -128,11 +88,10 @@ pip install "ruview[client]" # or: pip install "wifi-densepose[clie
|
|||||||
>
|
>
|
||||||
> | Option | Hardware | Cost | Full CSI | Capabilities |
|
> | Option | Hardware | Cost | Full CSI | Capabilities |
|
||||||
> |--------|----------|------|----------|-------------|
|
> |--------|----------|------|----------|-------------|
|
||||||
> | **ESP32 + Cognitum Seed** (recommended) | ESP32-S3 + [Cognitum Seed](https://cognitum.one) | ~$140 | Yes | Presence, motion, breathing, heart rate, fall detection, multi-person counting, 17-keypoint pose (signed Cog binary), 105-cog catalog, persistent vector store, kNN search, witness chain, MCP proxy |
|
> | **ESP32 + Cognitum Seed** (recommended) | ESP32-S3 + [Cognitum Seed](https://cognitum.one) | ~$140 | Yes | Pose, breathing, heartbeat, motion, presence + persistent vector store, kNN search, witness chain, MCP proxy |
|
||||||
> | **ESP32 Mesh** | 3-6× ESP32-S3 + WiFi router | ~$54 | Yes | Same capabilities as above without the persistent-memory features |
|
> | **ESP32 Mesh** | 3-6x ESP32-S3 + WiFi router | ~$54 | Yes | Pose, breathing, heartbeat, motion, presence |
|
||||||
> | **ESP32-C6 research node** ([ADR-110](docs/adr/ADR-110-esp32-c6-firmware-extension.md), [witness](docs/WITNESS-LOG-110.md), [reviewer guide](docs/ADR-110-REVIEW-GUIDE.md), [firmware v0.7.0](https://github.com/ruvnet/RuView/releases/tag/v0.7.0-esp32)) | ESP32-C6-DevKit ($6–10) | ~$10 | Yes (Wi-Fi 6 capable) | Same CSI pipeline as S3 with the dual-target firmware. **Firmware-side ADR-110 substrate now closed** (v0.7.0): ESP-NOW cross-board mesh quantified at **99.56 % match / 104 µs smoothed offset stdev / 3.95× EMA suppression** over a 5-min two-board soak (witness §A0.10), 32-byte UDP sync packet with operator-tunable cadence (§A0.12), ADR-018 byte 19 bit 4 wire-fix sourced from the working ESP-NOW path (§A0.13). Wire format ready for HE-LTF PPDU tagging in ADR-018 bytes 18-19 (firmware encoder + Rust + Python decoders verified end-to-end across 23 unit tests). LP-core motion-gate RISC-V program and Wi-Fi 6 soft-AP with TWT Responder both ship as opt-in code paths (default off). **Hardware-gated for measurement**: HE-LTF live subcarrier capture needs an 11ax AP (IDF v5.4 doesn't expose AP-side HE config — §A0.6); ~5 µA LP-core hibernation needs an INA meter to capture; 802.15.4 raw RX is broken in IDF v5.4 (workaround: ESP-NOW transport, shipped + measured). See witness log for the empirical / claimed split. |
|
|
||||||
> | **Research NIC** | Intel 5300 / Atheros AR9580 | ~$50-100 | Yes | Full CSI with 3x3 MIMO |
|
> | **Research NIC** | Intel 5300 / Atheros AR9580 | ~$50-100 | Yes | Full CSI with 3x3 MIMO |
|
||||||
> | **Any WiFi** | Windows, macOS, or Linux laptop | $0 | No | RSSI-only: coarse presence and motion (see [tutorial #36](https://github.com/ruvnet/RuView/issues/36)) |
|
> | **Any WiFi** | Windows, macOS, or Linux laptop | $0 | No | RSSI-only: coarse presence and motion |
|
||||||
>
|
>
|
||||||
> No hardware? Verify the signal processing pipeline with the deterministic reference signal: `python archive/v1/data/proof/verify.py`
|
> No hardware? Verify the signal processing pipeline with the deterministic reference signal: `python archive/v1/data/proof/verify.py`
|
||||||
>
|
>
|
||||||
@@ -150,231 +109,10 @@ pip install "ruview[client]" # or: pip install "wifi-densepose[clie
|
|||||||
<a href="https://ruvnet.github.io/RuView/pose-fusion.html"><strong>▶ Dual-Modal Pose Fusion Demo</strong></a>
|
<a href="https://ruvnet.github.io/RuView/pose-fusion.html"><strong>▶ Dual-Modal Pose Fusion Demo</strong></a>
|
||||||
|
|
|
|
||||||
<a href="https://ruvnet.github.io/RuView/pointcloud/"><strong>▶ Live 3D Point Cloud</strong></a>
|
<a href="https://ruvnet.github.io/RuView/pointcloud/"><strong>▶ Live 3D Point Cloud</strong></a>
|
||||||
|
|
|
||||||
<a href="https://ruvnet.github.io/RuView/three.js/"><strong>▶ three.js Demos (5)</strong></a>
|
|
||||||
|
|
||||||
> The [server](#-quick-start) is optional for visualization and aggregation — the ESP32 [runs independently](#esp32-s3-hardware-pipeline) for presence detection, vital signs, and fall alerts.
|
> The [server](#-quick-start) is optional for visualization and aggregation — the ESP32 [runs independently](#esp32-s3-hardware-pipeline) for presence detection, vital signs, and fall alerts.
|
||||||
>
|
>
|
||||||
> **Live ESP32 pipeline**: Connect an ESP32-S3 node → run the [sensing server](#sensing-server) → open the [pose fusion demo](https://ruvnet.github.io/RuView/pose-fusion.html) for real-time dual-modal pose estimation (webcam + WiFi CSI). See [ADR-059](docs/adr/ADR-059-live-esp32-csi-pipeline.md).
|
> **Live ESP32 pipeline**: Connect an ESP32-S3 node → run the [sensing server](#sensing-server) → open the [pose fusion demo](https://ruvnet.github.io/RuView/pose-fusion.html) for real-time dual-modal pose estimation (webcam + WiFi CSI). See [ADR-059](docs/adr/ADR-059-live-esp32-csi-pipeline.md).
|
||||||
>
|
|
||||||
> **three.js scene gallery** at [`/three.js/`](https://ruvnet.github.io/RuView/three.js/) — five progressively richer ADR-097 demos: helpers, cinematic, GLTF skinned, FBX skinned, and a live MediaPipe→Mixamo retargeting feed driven by ESP32 CSI. Demos 04 and 05 require a local Mixamo `X Bot.fbx` (license boundary — not redistributed).
|
|
||||||
|
|
||||||
|
|
||||||
## 🤗 Pretrained model on Hugging Face
|
|
||||||
|
|
||||||
Pretrained CSI weights live at [`ruvnet/wifi-densepose-pretrained`](https://huggingface.co/ruvnet/wifi-densepose-pretrained) — 12.2M training steps on 60K frames / 610K contrastive triplets, **82.3% held-out temporal-triplet accuracy** (up from 66.4% raw; the older "100% presence" figure was measured on a single-class recording and has been retracted), 4-bit quantized variant fits in 8 KB. The release includes a contrastive **CSI encoder** producing 128-dim embeddings (164,183 emb/s on M4 Pro) and a **presence-detection head**. Per-node LoRA adapters are included for environment-specific fine-tuning.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Download the model bundle
|
|
||||||
pip install huggingface_hub
|
|
||||||
huggingface-cli download ruvnet/wifi-densepose-pretrained --local-dir models/wifi-densepose-pretrained
|
|
||||||
```
|
|
||||||
|
|
||||||
**What works today vs. what's pending wiring:**
|
|
||||||
|
|
||||||
| Consumer | Format used | Status |
|
|
||||||
|----------|-------------|--------|
|
|
||||||
| Python training / evaluation / embedding extraction | `model.safetensors` | ✅ Works — load with `safetensors.torch.load_file` |
|
|
||||||
| Inspect / re-export the bundle | `model.rvf.jsonl` (line-by-line JSON) | ✅ Works — plain JSONL |
|
|
||||||
| Sensing-server `--model <PATH>` flag | binary RVF (`RVFS` magic) | ⚠️ Loader does not yet accept the JSONL container |
|
|
||||||
|
|
||||||
**Known gap:** the HF model ships in JSONL RVF format, but `v2/crates/wifi-densepose-sensing-server/src/rvf_container.rs` only parses the binary RVF segment format. Pointing `--model` at `model.rvf.jsonl` currently errors with `invalid magic at offset 0: expected 0x52564653, got 0x7974227B` and the live pipeline degrades to null output rather than falling back to heuristic mode — so for the live sensing-server, run **without** `--model` until a JSONL adapter lands (or the model is re-published as binary RVF). Use the weights from Python / training in the meantime.
|
|
||||||
|
|
||||||
**Quantization choices** (all in the HF repo): `model-q2.bin` (4 KB) · `model-q4.bin` ⭐ recommended (8 KB) · `model-q8.bin` (16 KB) · `model.safetensors` full (48 KB)
|
|
||||||
|
|
||||||
The separate **17-keypoint pose-estimation model** is now published at [`ruvnet/wifi-densepose-mmfi-pose`](https://huggingface.co/ruvnet/wifi-densepose-mmfi-pose) — **82.69% torso-PCK@20** on MM-Fi (single model) / **83.59%** (3-model ensemble + TTA), beating the prior published SOTA MultiFormer (72.25%) and CSI2Pose (68.41%) on the matched `random_split` protocol. See **Results & proof** below.
|
|
||||||
|
|
||||||
### Results & proof
|
|
||||||
|
|
||||||
| What | Where | Numbers |
|
|
||||||
|------|-------|---------|
|
|
||||||
| **MM-Fi pose model (SOTA)** | [`ruvnet/wifi-densepose-mmfi-pose`](https://huggingface.co/ruvnet/wifi-densepose-mmfi-pose) | 82.69% torso-PCK@20 (single) · 83.59% (ensemble+TTA) · 75K-param micro variant 74.30% |
|
|
||||||
| **AetherArena benchmark Space** | [`ruvnet/aether-arena`](https://huggingface.co/spaces/ruvnet/aether-arena) | self-correcting, auditable MM-Fi leaderboard |
|
|
||||||
| **Full MM-Fi study (honest picture)** | [`docs/benchmarks/mmfi-wifi-sensing-study.md`](docs/benchmarks/mmfi-wifi-sensing-study.md) | pose + action; zero-shot cross-subject ~64%, +~30 s in-room calibration → 72.2% |
|
|
||||||
| **Efficiency frontier** | [`docs/benchmarks/wifi-pose-efficiency-frontier.md`](docs/benchmarks/wifi-pose-efficiency-frontier.md) | SOTA-beating WiFi pose in a 20 KB int4 edge model |
|
|
||||||
| **Pretrained encoder** | [`ruvnet/wifi-densepose-pretrained`](https://huggingface.co/ruvnet/wifi-densepose-pretrained) | 82.3% held-out temporal-triplet, 8 KB int4 |
|
|
||||||
| **Reproducible proof (Trust Kill Switch)** | [`archive/v1/data/proof/verify.py`](archive/v1/data/proof/verify.py) + [`expected_features.sha256`](archive/v1/data/proof/expected_features.sha256) | one-command deterministic pipeline replay (SHA-256 of output vs published hash) |
|
|
||||||
| **Benchmark-proof ADR** | [ADR-168](docs/adr/ADR-168-benchmark-proof.md) | how the numbers are produced and verified |
|
|
||||||
| **Witness attestation** | [`docs/WITNESS-LOG-028.md`](docs/WITNESS-LOG-028.md) | 33-row capability attestation matrix with per-claim evidence |
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Reproduce the deterministic pipeline proof yourself (must print VERDICT: PASS):
|
|
||||||
python archive/v1/data/proof/verify.py
|
|
||||||
```
|
|
||||||
|
|
||||||
Tracked in [#509](https://github.com/ruvnet/RuView/issues/509); see [ADR-079](docs/adr/ADR-079-camera-supervised-pose-finetune.md) phases P7–P9 for the camera-supervised fine-tune path.
|
|
||||||
|
|
||||||
|
|
||||||
## 🧩 Edge Module Catalog
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary><b>🧩 105 edge modules ready to install on a Cognitum appliance</b> — live catalog from <code>app-registry.json</code> v2.1.0 (updated 2026-05-13). Browse + install at <a href="https://seed.cognitum.one/store">seed.cognitum.one/store</a> or your local appliance <code>http://<appliance>:9000/cogs</code>.</summary>
|
|
||||||
|
|
||||||
Each module is a small signed binary (~400 KB) that runs alongside the WiFi-DensePose sensing stack on a Cognitum-V0 appliance. The catalog updates over the air — your appliance fetches it via <code>GET /api/v1/edge/registry</code> ([ADR-102](docs/adr/ADR-102-edge-module-registry.md)) and verifies each binary against an Ed25519 signature ([ADR-100](docs/adr/ADR-100-cog-packaging-specification.md)) before install.
|
|
||||||
|
|
||||||
### 🫀 Health — <sub>14 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `air-quality-index` | Track indoor air quality with CO2 and particle sensors | 8 KB | Easy |
|
|
||||||
| `baby-cry` | Sustained mid-band energy detector for nursery / infant monitoring. Audio-only, no camera. | 451 KB | Easy |
|
|
||||||
| `breathing-sync` | Detects when two people breathe in sync | 10 KB | Hard |
|
|
||||||
| `cardiac-arrhythmia` | Spots irregular heartbeats and abnormal heart rhythms | 8 KB | Hard |
|
|
||||||
| `cough-detect` | Acoustic transient + spectral cough detector with 30s cluster aggregation. Early-warning signal for respiratory illness. | 451 KB | Easy |
|
|
||||||
| `dream-stage` | Tracks your sleep stages — light, deep, and dreaming | 14 KB | Hard |
|
|
||||||
| `fall-detect` | Two-stage impact + stillness fall detector over ambient feature stream (ESP32 motion / mic). Optional ruview-mode for CSI-based pose reinforcement. | 402 KB | Easy |
|
|
||||||
| `gait-analysis` | Detects walking problems and scores fall risk | 12 KB | Hard |
|
|
||||||
| `health-monitor` | Contactless heart rate, breathing, sleep, and fall alerts | 30 KB | Med |
|
|
||||||
| `respiratory-distress` | Alerts when breathing becomes labored or dangerously fast | 10 KB | Hard |
|
|
||||||
| `seizure-detect` | Recognizes seizures and sends immediate alerts | 10 KB | Hard |
|
|
||||||
| `sleep-apnea` | Detects when someone stops breathing during sleep | 4 KB | Easy |
|
|
||||||
| `snore-monitor` | Periodic low-band energy tracker for sleep-quality / apnea-risk trending. Companion to sleep-apnea cog. | 451 KB | Easy |
|
|
||||||
| `vital-trend` | Tracks breathing and heart rate trends over weeks | 6 KB | Med |
|
|
||||||
|
|
||||||
### 🔒 Security — <sub>14 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `audit-logger` | Record every action for compliance — tamper-proof log | 8 KB | Easy |
|
|
||||||
| `behavioral-profiler` | Learns normal behavior and flags anything unusual | 12 KB | Hard |
|
|
||||||
| `fleet-auth` | Manage device certificates and access across all seeds | 12 KB | Med |
|
|
||||||
| `glass-break` | Two-phase bang + shatter acoustic detector. Distinguishes glass break from ordinary impulse noise. | 451 KB | Easy |
|
|
||||||
| `gunshot-detect` | Saturating peak + exponential decay acoustic detector with optional ruview CSI motion-drop reinforcement. | 451 KB | Easy |
|
|
||||||
| `intrusion` | Alerts when an unauthorized person enters a room | 6 KB | Med |
|
|
||||||
| `intrusion-detect-ml` | Detect network attacks using machine learning | 14 KB | Hard |
|
|
||||||
| `loitering` | Alerts when someone lingers too long in one spot | 3 KB | Easy |
|
|
||||||
| `network-firewall` | Block unauthorized network access per cog | 6 KB | Easy |
|
|
||||||
| `panic-motion` | Detects sudden panicked or erratic movement | 6 KB | Med |
|
|
||||||
| `perimeter-breach` | Guards multiple zones and shows entry direction | 10 KB | Med |
|
|
||||||
| `prompt-shield` | Blocks signal replay and injection attacks on the seed | 10 KB | Med |
|
|
||||||
| `tailgating` | Catches when someone sneaks in behind a badge holder | 6 KB | Med |
|
|
||||||
| `weapon-detect` | Detects concealed metal objects on a person | 8 KB | Hard |
|
|
||||||
|
|
||||||
### 🏢 Building — <sub>11 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `beehive-monitor` | Acoustic hive state classifier. Detects healthy / chaotic / queenless / swarming / robbing via hum-band energy + chaos + piping autocorr. | 451 KB | Easy |
|
|
||||||
| `elevator-count` | Counts how many people are in an elevator | 8 KB | Med |
|
|
||||||
| `energy-audit` | Learns your schedule and cuts wasted energy | 6 KB | Med |
|
|
||||||
| `frost-warning` | Predicts frost 6 hours ahead via temperature trend + dewpoint-depression gate. Field/orchard agriculture. | 451 KB | Easy |
|
|
||||||
| `hvac-presence` | Turns heating and cooling on when you arrive | 3 KB | Easy |
|
|
||||||
| `lighting-zones` | Turns lights on and off as people move between rooms | 4 KB | Easy |
|
|
||||||
| `meeting-room` | Shows if a meeting room is free or occupied | 5 KB | Easy |
|
|
||||||
| `occupancy-zones` | Counts people in each room through walls | 8 KB | Med |
|
|
||||||
| `predictive-maintenance` | Vibration harmonic analyzer for rotating equipment. Tracks F1 / 2×F1 / high-order / sideband energy to score degradation severity. | 451 KB | Easy |
|
|
||||||
| `smoke-fire` | Multi-signal smoke and fire detector. Fuses acoustic crackle, thermal drift proxy, and optional ruview CSI plume signature. Not a UL-listed replacement for code-required smoke alarms. | 451 KB | Easy |
|
|
||||||
| `water-leak` | Persistent low-amplitude hiss + periodic drip acoustic detector with multi-minute persistence gate. Two-stage likely → confirmed. | 451 KB | Easy |
|
|
||||||
|
|
||||||
### 🛍️ Retail — <sub>7 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `customer-flow` | Counts foot traffic in and out of each entrance | 8 KB | Med |
|
|
||||||
| `dwell-heatmap` | Shows where customers spend the most time | 6 KB | Med |
|
|
||||||
| `package-detect` | Sustained CSI-shift detector for porch / loading bay package arrivals and departures. Requires ESP32 CSI ruview input. | 451 KB | Easy |
|
|
||||||
| `parking-occupancy` | Per-zone parking occupancy via ESP32 CSI subcarrier-amplitude shift. Tracks utilization and churn-per-hour. Requires ruview. | 451 KB | Easy |
|
|
||||||
| `queue-length` | Estimates line length and wait time | 6 KB | Med |
|
|
||||||
| `shelf-engagement` | Detects when customers interact with products | 6 KB | Med |
|
|
||||||
| `table-turnover` | Tracks which restaurant tables are free or occupied | 4 KB | Easy |
|
|
||||||
|
|
||||||
### 🏭 Industrial — <sub>7 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `clean-room` | Enforces max headcount in controlled environments | 4 KB | Easy |
|
|
||||||
| `confined-space` | Monitors workers in tight spaces for safety | 5 KB | Med |
|
|
||||||
| `forklift-proximity` | Warns if a forklift gets too close to workers | 10 KB | Hard |
|
|
||||||
| `livestock-monitor` | Monitors animals for distress, escape, or illness | 6 KB | Med |
|
|
||||||
| `ppe-compliance` | Cog-composition layer: alerts when ruview-densepose detects presence in a restricted zone without an accompanying PPE-camera-cog confirmation vector. | 387 KB | Easy |
|
|
||||||
| `slip-fall-zone` | Pre-fall risk detector. Fires when motion-variance drop, splash audio, and optional cautious-gait CSI all signal elevated slip risk. | 451 KB | Easy |
|
|
||||||
| `structural-vibration` | Detects dangerous vibrations in buildings or machines | 8 KB | Hard |
|
|
||||||
|
|
||||||
### 🔬 Research — <sub>12 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `emotion-detect` | Reads stress and calm from body language and breathing | 10 KB | Hard |
|
|
||||||
| `energy-harvester` | Optimize solar and battery for off-grid seed deployment | 6 KB | Med |
|
|
||||||
| `gesture-language` | Recognizes sign language gestures in real time | 12 KB | Hard |
|
|
||||||
| `ghost-hunter` | Finds unexplained environmental anomalies — for fun | 10 KB | Hard |
|
|
||||||
| `happiness-score` | Estimates well-being from movement and mood signals | 8 KB | Med |
|
|
||||||
| `hyperbolic-space` | Maps data into curved space for tree-like structures | 12 KB | Hard |
|
|
||||||
| `music-conductor` | Reads a conductor's gestures for tempo and dynamics | 12 KB | Hard |
|
|
||||||
| `plant-growth` | Tracks plant growth rate and day/night cycles | 8 KB | Med |
|
|
||||||
| `rain-detect` | Detects when rain starts, stops, and how heavy it is | 6 KB | Med |
|
|
||||||
| `ruview-densepose` | Full body pose tracking from WiFi — no cameras needed | 50 KB | Hard |
|
|
||||||
| `sound-classifier` | Identify sounds like glass break, alarm, or baby cry | 16 KB | Hard |
|
|
||||||
| `time-crystal` | Experiments with repeating time-pattern symmetry | 12 KB | Hard |
|
|
||||||
|
|
||||||
### 🤖 Ai — <sub>15 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `anomaly-attractor` | Learns what's normal and catches anything weird | 10 KB | Hard |
|
|
||||||
| `cognitive-pipeline` | FastGRNN anomaly gate + SmolLM2 sparse-LLM inference for on-device Pi Zero 2W cognitive events | 320 KB | Hard |
|
|
||||||
| `dtw-gesture-learn` | Teach custom hand gestures by showing examples | 14 KB | Med |
|
|
||||||
| `ewc-lifelong` | Learns new things without forgetting old lessons | 8 KB | Hard |
|
|
||||||
| `federated-learning` | Train AI across seeds without sharing raw data | 18 KB | Hard |
|
|
||||||
| `goap-autonomy` | Plans and executes goals on its own | 14 KB | Hard |
|
|
||||||
| `meta-adapt` | Automatically tunes itself for best performance | 10 KB | Hard |
|
|
||||||
| `micro-hnsw` | Fast on-device fingerprinting and classification | 12 KB | Med |
|
|
||||||
| `neural-trader` | Spot market patterns and trends from live data | 20 KB | Hard |
|
|
||||||
| `pagerank-influence` | Finds the most influential person in a group | 12 KB | Med |
|
|
||||||
| `pattern-sequence` | Detects daily routines and repeated habits | 10 KB | Med |
|
|
||||||
| `rag-local` | Search your documents using AI — runs on the seed | 14 KB | Med |
|
|
||||||
| `spiking-tracker` | Brain-inspired tracker that runs on tiny hardware | 16 KB | Hard |
|
|
||||||
| `temporal-logic` | Enforces safety rules on live event streams | 12 KB | Hard |
|
|
||||||
| `time-series-forecast` | Predict sensor trends using historical patterns | 12 KB | Med |
|
|
||||||
|
|
||||||
### 🐝 Swarm — <sub>11 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `swarm-backup-restore` | Auto-backup data to other seeds — one-click restore | 8 KB | Easy |
|
|
||||||
| `swarm-cluster-monitor` | Live dashboard of every seed's health and status | 6 KB | Easy |
|
|
||||||
| `swarm-consensus` | Seeds vote before making critical changes together | 16 KB | Hard |
|
|
||||||
| `swarm-delta-sync` | Auto-sync data between seeds — only sends changes | 8 KB | Med |
|
|
||||||
| `swarm-deploy` | Install or remove cogs on all seeds at once | 10 KB | Med |
|
|
||||||
| `swarm-distributed-store` | Spread data across seeds and search them all at once | 14 KB | Hard |
|
|
||||||
| `swarm-edge-orchestrator` | Manage all ESP32 sensor nodes from one place | 14 KB | Hard |
|
|
||||||
| `swarm-load-balancer` | Spread queries across seeds so no single one overloads | 10 KB | Med |
|
|
||||||
| `swarm-mesh-manager` | Find, connect, and monitor all seeds on your network | 12 KB | Easy |
|
|
||||||
| `swarm-mqtt-bridge` | Share events between seeds over MQTT messaging | 6 KB | Easy |
|
|
||||||
| `swarm-witness-federation` | Share tamper-proof audit trails across seeds | 12 KB | Hard |
|
|
||||||
|
|
||||||
### 📡 Signal — <sub>6 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `coherence-gate` | Filters out noisy signals and keeps clean ones | 8 KB | Med |
|
|
||||||
| `flash-attention` | Focuses sensing on specific areas for better accuracy | 12 KB | Med |
|
|
||||||
| `optimal-transport` | Measures motion using shape-aware signal comparison | 12 KB | Hard |
|
|
||||||
| `person-matching` | Tells apart multiple people in the same room | 18 KB | Hard |
|
|
||||||
| `sparse-recovery` | Recovers missing signal data from partial readings | 16 KB | Hard |
|
|
||||||
| `temporal-compress` | Shrinks old data to save memory without losing meaning | 14 KB | Med |
|
|
||||||
|
|
||||||
### 🌐 Network — <sub>1 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `tailscale` | Reach the seed from anywhere via a private WireGuard mesh (Tailscale). Userspace mode — no root. | 700 KB | Med |
|
|
||||||
|
|
||||||
### 🛠️ Developer — <sub>7 modules</sub>
|
|
||||||
|
|
||||||
| ID | What it does | Size | Difficulty |
|
|
||||||
|----|--------------|-----:|:----------:|
|
|
||||||
| `adversarial` | Detects tampered or spoofed sensor signals | 4 KB | Easy |
|
|
||||||
| `coherence` | Monitors signal quality across multiple channels | 4 KB | Easy |
|
|
||||||
| `gesture` | Core gesture recognition building block for cogs | 6 KB | Med |
|
|
||||||
| `interference-search` | Searches many possibilities at once for fast answers | 14 KB | Hard |
|
|
||||||
| `psycho-symbolic` | Reasons over knowledge graphs with multiple styles | 16 KB | Hard |
|
|
||||||
| `quantum-coherence` | Quantum-inspired model for advanced signal states | 16 KB | Hard |
|
|
||||||
| `self-healing-mesh` | Keeps sensor mesh running even when nodes drop out | 14 KB | Hard |
|
|
||||||
|
|
||||||
> ℹ️ Build your own cog: see [ADR-100](docs/adr/ADR-100-cog-packaging-specification.md) for the packaging spec. The first cog this repo ships into the catalog lives in [v2/crates/cog-pose-estimation/](v2/crates/cog-pose-estimation/) (17-keypoint WiFi pose, [ADR-101](docs/adr/ADR-101-pose-estimation-cog.md)).
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
|
|
||||||
## 🔬 How It Works
|
## 🔬 How It Works
|
||||||
@@ -490,6 +228,178 @@ These scenarios exploit WiFi's ability to penetrate solid materials — concrete
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary><strong>🧩 Edge Intelligence (<a href="docs/adr/ADR-041-wasm-module-collection.md">ADR-041</a>)</strong> — 60 WASM modules across 13 categories, all implemented (609 tests)</summary>
|
||||||
|
|
||||||
|
Small programs that run directly on the ESP32 sensor — no internet needed, no cloud fees, instant response. Each module is a tiny WASM file (5-30 KB) that you upload to the device over-the-air. It reads WiFi signal data and makes decisions locally in under 10 ms. [ADR-041](docs/adr/ADR-041-wasm-module-collection.md) defines 60 modules across 13 categories — all 60 are implemented with 609 tests passing.
|
||||||
|
|
||||||
|
| | Category | Examples |
|
||||||
|
|---|----------|---------|
|
||||||
|
| 🏥 | [**Medical & Health**](docs/edge-modules/medical.md) | Sleep apnea detection, cardiac arrhythmia, gait analysis, seizure detection |
|
||||||
|
| 🔐 | [**Security & Safety**](docs/edge-modules/security.md) | Intrusion detection, perimeter breach, loitering, panic motion |
|
||||||
|
| 🏢 | [**Smart Building**](docs/edge-modules/building.md) | Zone occupancy, HVAC control, elevator counting, meeting room tracking |
|
||||||
|
| 🛒 | [**Retail & Hospitality**](docs/edge-modules/retail.md) | Queue length, dwell heatmaps, customer flow, table turnover |
|
||||||
|
| 🏭 | [**Industrial**](docs/edge-modules/industrial.md) | Forklift proximity, confined space monitoring, structural vibration |
|
||||||
|
| 🔮 | [**Exotic & Research**](docs/edge-modules/exotic.md) | Sleep staging, emotion detection, sign language, breathing sync |
|
||||||
|
| 📡 | [**Signal Intelligence**](docs/edge-modules/signal-intelligence.md) | Cleans and sharpens raw WiFi signals — focuses on important regions, filters noise, fills in missing data, and tracks which person is which |
|
||||||
|
| 🧠 | [**Adaptive Learning**](docs/edge-modules/adaptive-learning.md) | The sensor learns new gestures and patterns on its own over time — no cloud needed, remembers what it learned even after updates |
|
||||||
|
| 🗺️ | [**Spatial Reasoning**](docs/edge-modules/spatial-temporal.md) | Figures out where people are in a room, which zones matter most, and tracks movement across areas using graph-based spatial logic |
|
||||||
|
| ⏱️ | [**Temporal Analysis**](docs/edge-modules/spatial-temporal.md) | Learns daily routines, detects when patterns break (someone didn't get up), and verifies safety rules are being followed over time |
|
||||||
|
| 🛡️ | [**AI Security**](docs/edge-modules/ai-security.md) | Detects signal replay attacks, WiFi jamming, injection attempts, and flags abnormal behavior that could indicate tampering |
|
||||||
|
| ⚛️ | [**Quantum-Inspired**](docs/edge-modules/autonomous.md) | Uses quantum-inspired math to map room-wide signal coherence and search for optimal sensor configurations |
|
||||||
|
| 🤖 | [**Autonomous & Exotic**](docs/edge-modules/autonomous.md) | Self-managing sensor mesh — auto-heals dropped nodes, plans its own actions, and explores experimental signal representations |
|
||||||
|
|
||||||
|
All implemented modules are `no_std` Rust, share a [common utility library](v2/crates/wifi-densepose-wasm-edge/src/vendor_common.rs), and talk to the host through a 12-function API. Full documentation: [**Edge Modules Guide**](docs/edge-modules/README.md). See the [complete implemented module list](#edge-module-list) below.
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details id="edge-module-list">
|
||||||
|
<summary><strong>🧩 Edge Intelligence — <a href="docs/edge-modules/README.md">All 65 Modules Implemented</a></strong> (ADR-041 complete)</summary>
|
||||||
|
|
||||||
|
All 60 modules are implemented, tested (609 tests passing), and ready to deploy. They compile to `wasm32-unknown-unknown`, run on ESP32-S3 via WASM3, and share a [common utility library](v2/crates/wifi-densepose-wasm-edge/src/vendor_common.rs). Source: [`crates/wifi-densepose-wasm-edge/src/`](v2/crates/wifi-densepose-wasm-edge/src/)
|
||||||
|
|
||||||
|
**Core modules** (ADR-040 flagship + early implementations):
|
||||||
|
|
||||||
|
| Module | File | What It Does |
|
||||||
|
|--------|------|-------------|
|
||||||
|
| Gesture Classifier | [`gesture.rs`](v2/crates/wifi-densepose-wasm-edge/src/gesture.rs) | DTW template matching for hand gestures |
|
||||||
|
| Coherence Filter | [`coherence.rs`](v2/crates/wifi-densepose-wasm-edge/src/coherence.rs) | Phase coherence gating for signal quality |
|
||||||
|
| Adversarial Detector | [`adversarial.rs`](v2/crates/wifi-densepose-wasm-edge/src/adversarial.rs) | Detects physically impossible signal patterns |
|
||||||
|
| Intrusion Detector | [`intrusion.rs`](v2/crates/wifi-densepose-wasm-edge/src/intrusion.rs) | Human vs non-human motion classification |
|
||||||
|
| Occupancy Counter | [`occupancy.rs`](v2/crates/wifi-densepose-wasm-edge/src/occupancy.rs) | Zone-level person counting |
|
||||||
|
| Vital Trend | [`vital_trend.rs`](v2/crates/wifi-densepose-wasm-edge/src/vital_trend.rs) | Long-term breathing and heart rate trending |
|
||||||
|
| RVF Parser | [`rvf.rs`](v2/crates/wifi-densepose-wasm-edge/src/rvf.rs) | RVF container format parsing |
|
||||||
|
|
||||||
|
**Vendor-integrated modules** (24 modules, ADR-041 Category 7):
|
||||||
|
|
||||||
|
**📡 Signal Intelligence** — Real-time CSI analysis and feature extraction
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Flash Attention | [`sig_flash_attention.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_flash_attention.rs) | Tiled attention over 8 subcarrier groups — finds spatial focus regions and entropy | S (<5ms) |
|
||||||
|
| Coherence Gate | [`sig_coherence_gate.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_coherence_gate.rs) | Z-score phasor gating with hysteresis: Accept / PredictOnly / Reject / Recalibrate | L (<2ms) |
|
||||||
|
| Temporal Compress | [`sig_temporal_compress.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_temporal_compress.rs) | 3-tier adaptive quantization (8-bit hot / 5-bit warm / 3-bit cold) | L (<2ms) |
|
||||||
|
| Sparse Recovery | [`sig_sparse_recovery.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_sparse_recovery.rs) | ISTA L1 reconstruction for dropped subcarriers | H (<10ms) |
|
||||||
|
| Person Match | [`sig_mincut_person_match.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_mincut_person_match.rs) | Hungarian-lite bipartite assignment for multi-person tracking | S (<5ms) |
|
||||||
|
| Optimal Transport | [`sig_optimal_transport.rs`](v2/crates/wifi-densepose-wasm-edge/src/sig_optimal_transport.rs) | Sliced Wasserstein-1 distance with 4 projections | L (<2ms) |
|
||||||
|
|
||||||
|
**🧠 Adaptive Learning** — On-device learning without cloud connectivity
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| DTW Gesture Learn | [`lrn_dtw_gesture_learn.rs`](v2/crates/wifi-densepose-wasm-edge/src/lrn_dtw_gesture_learn.rs) | User-teachable gesture recognition — 3-rehearsal protocol, 16 templates | S (<5ms) |
|
||||||
|
| Anomaly Attractor | [`lrn_anomaly_attractor.rs`](v2/crates/wifi-densepose-wasm-edge/src/lrn_anomaly_attractor.rs) | 4D dynamical system attractor classification with Lyapunov exponents | H (<10ms) |
|
||||||
|
| Meta Adapt | [`lrn_meta_adapt.rs`](v2/crates/wifi-densepose-wasm-edge/src/lrn_meta_adapt.rs) | Hill-climbing self-optimization with safety rollback | L (<2ms) |
|
||||||
|
| EWC Lifelong | [`lrn_ewc_lifelong.rs`](v2/crates/wifi-densepose-wasm-edge/src/lrn_ewc_lifelong.rs) | Elastic Weight Consolidation — remembers past tasks while learning new ones | S (<5ms) |
|
||||||
|
|
||||||
|
**🗺️ Spatial Reasoning** — Location, proximity, and influence mapping
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| PageRank Influence | [`spt_pagerank_influence.rs`](v2/crates/wifi-densepose-wasm-edge/src/spt_pagerank_influence.rs) | 4x4 cross-correlation graph with power iteration PageRank | L (<2ms) |
|
||||||
|
| Micro HNSW | [`spt_micro_hnsw.rs`](v2/crates/wifi-densepose-wasm-edge/src/spt_micro_hnsw.rs) | 64-vector navigable small-world graph for nearest-neighbor search | S (<5ms) |
|
||||||
|
| Spiking Tracker | [`spt_spiking_tracker.rs`](v2/crates/wifi-densepose-wasm-edge/src/spt_spiking_tracker.rs) | 32 LIF neurons + 4 output zone neurons with STDP learning | S (<5ms) |
|
||||||
|
|
||||||
|
**⏱️ Temporal Analysis** — Activity patterns, logic verification, autonomous planning
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Pattern Sequence | [`tmp_pattern_sequence.rs`](v2/crates/wifi-densepose-wasm-edge/src/tmp_pattern_sequence.rs) | Activity routine detection and deviation alerts | S (<5ms) |
|
||||||
|
| Temporal Logic Guard | [`tmp_temporal_logic_guard.rs`](v2/crates/wifi-densepose-wasm-edge/src/tmp_temporal_logic_guard.rs) | LTL formula verification on CSI event streams | S (<5ms) |
|
||||||
|
| GOAP Autonomy | [`tmp_goap_autonomy.rs`](v2/crates/wifi-densepose-wasm-edge/src/tmp_goap_autonomy.rs) | Goal-Oriented Action Planning for autonomous module management | S (<5ms) |
|
||||||
|
|
||||||
|
**🛡️ AI Security** — Tamper detection and behavioral anomaly profiling
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Prompt Shield | [`ais_prompt_shield.rs`](v2/crates/wifi-densepose-wasm-edge/src/ais_prompt_shield.rs) | FNV-1a replay detection, injection detection (10x amplitude), jamming (SNR) | L (<2ms) |
|
||||||
|
| Behavioral Profiler | [`ais_behavioral_profiler.rs`](v2/crates/wifi-densepose-wasm-edge/src/ais_behavioral_profiler.rs) | 6D behavioral profile with Mahalanobis anomaly scoring | S (<5ms) |
|
||||||
|
|
||||||
|
**⚛️ Quantum-Inspired** — Quantum computing metaphors applied to CSI analysis
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Quantum Coherence | [`qnt_quantum_coherence.rs`](v2/crates/wifi-densepose-wasm-edge/src/qnt_quantum_coherence.rs) | Bloch sphere mapping, Von Neumann entropy, decoherence detection | S (<5ms) |
|
||||||
|
| Interference Search | [`qnt_interference_search.rs`](v2/crates/wifi-densepose-wasm-edge/src/qnt_interference_search.rs) | 16 room-state hypotheses with Grover-inspired oracle + diffusion | S (<5ms) |
|
||||||
|
|
||||||
|
**🤖 Autonomous Systems** — Self-governing and self-healing behaviors
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Psycho-Symbolic | [`aut_psycho_symbolic.rs`](v2/crates/wifi-densepose-wasm-edge/src/aut_psycho_symbolic.rs) | 16-rule forward-chaining knowledge base with contradiction detection | S (<5ms) |
|
||||||
|
| Self-Healing Mesh | [`aut_self_healing_mesh.rs`](v2/crates/wifi-densepose-wasm-edge/src/aut_self_healing_mesh.rs) | 8-node mesh with health tracking, degradation/recovery, coverage healing | S (<5ms) |
|
||||||
|
|
||||||
|
**🔮 Exotic (Vendor)** — Novel mathematical models for CSI interpretation
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Time Crystal | [`exo_time_crystal.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_time_crystal.rs) | Autocorrelation subharmonic detection in 256-frame history | S (<5ms) |
|
||||||
|
| Hyperbolic Space | [`exo_hyperbolic_space.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_hyperbolic_space.rs) | Poincare ball embedding with 32 reference locations, hyperbolic distance | S (<5ms) |
|
||||||
|
|
||||||
|
**🏥 Medical & Health** (Category 1) — Contactless health monitoring
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Sleep Apnea | [`med_sleep_apnea.rs`](v2/crates/wifi-densepose-wasm-edge/src/med_sleep_apnea.rs) | Detects breathing pauses during sleep | S (<5ms) |
|
||||||
|
| Cardiac Arrhythmia | [`med_cardiac_arrhythmia.rs`](v2/crates/wifi-densepose-wasm-edge/src/med_cardiac_arrhythmia.rs) | Monitors heart rate for irregular rhythms | S (<5ms) |
|
||||||
|
| Respiratory Distress | [`med_respiratory_distress.rs`](v2/crates/wifi-densepose-wasm-edge/src/med_respiratory_distress.rs) | Alerts on abnormal breathing patterns | S (<5ms) |
|
||||||
|
| Gait Analysis | [`med_gait_analysis.rs`](v2/crates/wifi-densepose-wasm-edge/src/med_gait_analysis.rs) | Tracks walking patterns and detects changes | S (<5ms) |
|
||||||
|
| Seizure Detection | [`med_seizure_detect.rs`](v2/crates/wifi-densepose-wasm-edge/src/med_seizure_detect.rs) | 6-state machine for tonic-clonic seizure recognition | S (<5ms) |
|
||||||
|
|
||||||
|
**🔐 Security & Safety** (Category 2) — Perimeter and threat detection
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Perimeter Breach | [`sec_perimeter_breach.rs`](v2/crates/wifi-densepose-wasm-edge/src/sec_perimeter_breach.rs) | Detects boundary crossings with approach/departure | S (<5ms) |
|
||||||
|
| Weapon Detection | [`sec_weapon_detect.rs`](v2/crates/wifi-densepose-wasm-edge/src/sec_weapon_detect.rs) | Metal anomaly detection via CSI amplitude shifts | S (<5ms) |
|
||||||
|
| Tailgating | [`sec_tailgating.rs`](v2/crates/wifi-densepose-wasm-edge/src/sec_tailgating.rs) | Detects unauthorized follow-through at access points | S (<5ms) |
|
||||||
|
| Loitering | [`sec_loitering.rs`](v2/crates/wifi-densepose-wasm-edge/src/sec_loitering.rs) | Alerts when someone lingers too long in a zone | S (<5ms) |
|
||||||
|
| Panic Motion | [`sec_panic_motion.rs`](v2/crates/wifi-densepose-wasm-edge/src/sec_panic_motion.rs) | Detects fleeing, struggling, or panic movement | S (<5ms) |
|
||||||
|
|
||||||
|
**🏢 Smart Building** (Category 3) — Automation and energy efficiency
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| HVAC Presence | [`bld_hvac_presence.rs`](v2/crates/wifi-densepose-wasm-edge/src/bld_hvac_presence.rs) | Occupancy-driven HVAC control with departure countdown | S (<5ms) |
|
||||||
|
| Lighting Zones | [`bld_lighting_zones.rs`](v2/crates/wifi-densepose-wasm-edge/src/bld_lighting_zones.rs) | Auto-dim/off lighting based on zone activity | S (<5ms) |
|
||||||
|
| Elevator Count | [`bld_elevator_count.rs`](v2/crates/wifi-densepose-wasm-edge/src/bld_elevator_count.rs) | Counts people entering/leaving with overload warning | S (<5ms) |
|
||||||
|
| Meeting Room | [`bld_meeting_room.rs`](v2/crates/wifi-densepose-wasm-edge/src/bld_meeting_room.rs) | Tracks meeting lifecycle: start, headcount, end, availability | S (<5ms) |
|
||||||
|
| Energy Audit | [`bld_energy_audit.rs`](v2/crates/wifi-densepose-wasm-edge/src/bld_energy_audit.rs) | Tracks after-hours usage and room utilization rates | S (<5ms) |
|
||||||
|
|
||||||
|
**🛒 Retail & Hospitality** (Category 4) — Customer insights without cameras
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Queue Length | [`ret_queue_length.rs`](v2/crates/wifi-densepose-wasm-edge/src/ret_queue_length.rs) | Estimates queue size and wait times | S (<5ms) |
|
||||||
|
| Dwell Heatmap | [`ret_dwell_heatmap.rs`](v2/crates/wifi-densepose-wasm-edge/src/ret_dwell_heatmap.rs) | Shows where people spend time (hot/cold zones) | S (<5ms) |
|
||||||
|
| Customer Flow | [`ret_customer_flow.rs`](v2/crates/wifi-densepose-wasm-edge/src/ret_customer_flow.rs) | Counts ins/outs and tracks net occupancy | S (<5ms) |
|
||||||
|
| Table Turnover | [`ret_table_turnover.rs`](v2/crates/wifi-densepose-wasm-edge/src/ret_table_turnover.rs) | Restaurant table lifecycle: seated, dining, vacated | S (<5ms) |
|
||||||
|
| Shelf Engagement | [`ret_shelf_engagement.rs`](v2/crates/wifi-densepose-wasm-edge/src/ret_shelf_engagement.rs) | Detects browsing, considering, and reaching for products | S (<5ms) |
|
||||||
|
|
||||||
|
**🏭 Industrial & Specialized** (Category 5) — Safety and compliance
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Forklift Proximity | [`ind_forklift_proximity.rs`](v2/crates/wifi-densepose-wasm-edge/src/ind_forklift_proximity.rs) | Warns when people get too close to vehicles | S (<5ms) |
|
||||||
|
| Confined Space | [`ind_confined_space.rs`](v2/crates/wifi-densepose-wasm-edge/src/ind_confined_space.rs) | OSHA-compliant worker monitoring with extraction alerts | S (<5ms) |
|
||||||
|
| Clean Room | [`ind_clean_room.rs`](v2/crates/wifi-densepose-wasm-edge/src/ind_clean_room.rs) | Occupancy limits and turbulent motion detection | S (<5ms) |
|
||||||
|
| Livestock Monitor | [`ind_livestock_monitor.rs`](v2/crates/wifi-densepose-wasm-edge/src/ind_livestock_monitor.rs) | Animal presence, stillness, and escape alerts | S (<5ms) |
|
||||||
|
| Structural Vibration | [`ind_structural_vibration.rs`](v2/crates/wifi-densepose-wasm-edge/src/ind_structural_vibration.rs) | Seismic events, mechanical resonance, structural drift | S (<5ms) |
|
||||||
|
|
||||||
|
**🔮 Exotic & Research** (Category 6) — Experimental sensing applications
|
||||||
|
|
||||||
|
| Module | File | What It Does | Budget |
|
||||||
|
|--------|------|-------------|--------|
|
||||||
|
| Dream Stage | [`exo_dream_stage.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_dream_stage.rs) | Contactless sleep stage classification (wake/light/deep/REM) | S (<5ms) |
|
||||||
|
| Emotion Detection | [`exo_emotion_detect.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_emotion_detect.rs) | Arousal, stress, and calm detection from micro-movements | S (<5ms) |
|
||||||
|
| Gesture Language | [`exo_gesture_language.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_gesture_language.rs) | Sign language letter recognition via WiFi | S (<5ms) |
|
||||||
|
| Music Conductor | [`exo_music_conductor.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_music_conductor.rs) | Tempo and dynamic tracking from conducting gestures | S (<5ms) |
|
||||||
|
| Plant Growth | [`exo_plant_growth.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_plant_growth.rs) | Monitors plant growth, circadian rhythms, wilt detection | S (<5ms) |
|
||||||
|
| Ghost Hunter | [`exo_ghost_hunter.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_ghost_hunter.rs) | Environmental anomaly classification (draft/insect/wind/unknown) | S (<5ms) |
|
||||||
|
| Rain Detection | [`exo_rain_detect.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_rain_detect.rs) | Detects rain onset, intensity, and cessation via signal scatter | S (<5ms) |
|
||||||
|
| Breathing Sync | [`exo_breathing_sync.rs`](v2/crates/wifi-densepose-wasm-edge/src/exo_breathing_sync.rs) | Detects synchronized breathing between multiple people | S (<5ms) |
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -501,7 +411,7 @@ Every WiFi signal that passes through a room creates a unique fingerprint of tha
|
|||||||
**What it does in plain terms:**
|
**What it does in plain terms:**
|
||||||
- Turns any WiFi signal into a 128-number "fingerprint" that uniquely describes what's happening in a room
|
- Turns any WiFi signal into a 128-number "fingerprint" that uniquely describes what's happening in a room
|
||||||
- Learns entirely on its own from raw WiFi data — no cameras, no labeling, no human supervision needed
|
- Learns entirely on its own from raw WiFi data — no cameras, no labeling, no human supervision needed
|
||||||
- Recognizes rooms, detects intruders, and classifies activities using only WiFi (named person-identity is an experimental, data-gated research capability — see below, not a shipped feature)
|
- Recognizes rooms, detects intruders, identifies people, and classifies activities using only WiFi
|
||||||
- Runs on an $8 ESP32 chip (the entire model fits in 55 KB of memory)
|
- Runs on an $8 ESP32 chip (the entire model fits in 55 KB of memory)
|
||||||
- Produces both body pose tracking AND environment fingerprints in a single computation
|
- Produces both body pose tracking AND environment fingerprints in a single computation
|
||||||
|
|
||||||
@@ -512,7 +422,7 @@ Every WiFi signal that passes through a room creates a unique fingerprint of tha
|
|||||||
| **Self-supervised learning** | The model watches WiFi signals and teaches itself what "similar" and "different" look like, without any human-labeled data | Deploy anywhere — just plug in a WiFi sensor and wait 10 minutes |
|
| **Self-supervised learning** | The model watches WiFi signals and teaches itself what "similar" and "different" look like, without any human-labeled data | Deploy anywhere — just plug in a WiFi sensor and wait 10 minutes |
|
||||||
| **Room identification** | Each room produces a distinct WiFi fingerprint pattern | Know which room someone is in without GPS or beacons |
|
| **Room identification** | Each room produces a distinct WiFi fingerprint pattern | Know which room someone is in without GPS or beacons |
|
||||||
| **Anomaly detection** | An unexpected person or event creates a fingerprint that doesn't match anything seen before | Automatic intrusion and fall detection as a free byproduct |
|
| **Anomaly detection** | An unexpected person or event creates a fingerprint that doesn't match anything seen before | Automatic intrusion and fall detection as a free byproduct |
|
||||||
| **Person re-identification** *(experimental, research)* | A real per-channel similarity matcher (Soul Signature §3.6, `wifi-densepose-bfld`); **measured** result: on WiFi-only cardiac+respiratory channels alone two people are *not* separable (gap ~0.0005) | Honest research capability — **named identity is not claimed** and is data-gated on enrollment with the decisive AETHER/body-resonance channel. See [#1021](https://github.com/ruvnet/RuView/issues/1021) |
|
| **Person re-identification** | Each person disturbs WiFi in a slightly different way, creating a personal signature | Track individuals across sessions without cameras |
|
||||||
| **Environment adaptation** | MicroLoRA adapters (1,792 parameters per room) fine-tune the model for each new space | Adapts to a new room with minimal data — 93% less than retraining from scratch |
|
| **Environment adaptation** | MicroLoRA adapters (1,792 parameters per room) fine-tune the model for each new space | Adapts to a new room with minimal data — 93% less than retraining from scratch |
|
||||||
| **Memory preservation** | EWC++ regularization remembers what was learned during pretraining | Switching to a new task doesn't erase prior knowledge |
|
| **Memory preservation** | EWC++ regularization remembers what was learned during pretraining | Switching to a new task doesn't erase prior knowledge |
|
||||||
| **Hard-negative mining** | Training focuses on the most confusing examples to learn faster | Better accuracy with the same amount of training data |
|
| **Hard-negative mining** | Training focuses on the most confusing examples to learn faster | Better accuracy with the same amount of training data |
|
||||||
@@ -575,74 +485,24 @@ See [`docs/adr/ADR-024-contrastive-csi-embedding-model.md`](docs/adr/ADR-024-con
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🧩 Claude Code & Codex Plugin
|
|
||||||
|
|
||||||
RuView ships a [Claude Code](https://docs.anthropic.com/en/docs/claude-code) plugin (and Codex prompt mirror) that wraps the whole workflow — onboarding, ESP32 setup, configuration, sensing apps, model training, advanced multistatic sensing, CLI/API/WASM, mmWave radar, and witness verification — as 9 skills, 7 `/ruview-*` commands, and 3 agents. It lives in [`plugins/ruview/`](plugins/ruview/README.md); the marketplace manifest is [`.claude-plugin/marketplace.json`](.claude-plugin/marketplace.json) at the repo root.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# In Claude Code — add this repo as a plugin marketplace, then install:
|
|
||||||
/plugin marketplace add ruvnet/RuView
|
|
||||||
/plugin install ruview@ruview
|
|
||||||
|
|
||||||
# Or try it for one session without installing (from a local clone of the repo):
|
|
||||||
claude --plugin-dir ./plugins/ruview
|
|
||||||
|
|
||||||
# Then, in Claude Code:
|
|
||||||
# /ruview-start → onboarding (Docker demo / repo build / live ESP32)
|
|
||||||
# /ruview-flash → build + flash ESP32 firmware
|
|
||||||
# /ruview-provision → provision WiFi creds, sink IP, channel/MAC, mesh slots
|
|
||||||
# /ruview-app → run a sensing application (presence / vitals / pose / sleep / MAT / point cloud)
|
|
||||||
# /ruview-train → train / evaluate / publish a model (incl. GPU on GCloud)
|
|
||||||
# /ruview-advanced → multistatic / tomography / cross-viewpoint / mesh-security
|
|
||||||
# /ruview-verify → tests + deterministic proof + witness bundle
|
|
||||||
```
|
|
||||||
|
|
||||||
**Codex (OpenAI CLI):** `cp plugins/ruview/codex/prompts/*.md ~/.codex/prompts/` — the seven `/ruview-*` commands are mirrored as Codex prompts; [`plugins/ruview/codex/AGENTS.md`](plugins/ruview/codex/AGENTS.md) carries the project rules. See [`plugins/ruview/codex/README.md`](plugins/ruview/codex/README.md).
|
|
||||||
|
|
||||||
Verify the plugin structure: `bash plugins/ruview/scripts/smoke.sh`. Full details: [`plugins/ruview/README.md`](plugins/ruview/README.md).
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 📖 Documentation
|
## 📖 Documentation
|
||||||
|
|
||||||
| Document | Description |
|
| Document | Description |
|
||||||
|----------|-------------|
|
|----------|-------------|
|
||||||
| [User Guide](docs/user-guide.md) | Step-by-step guide: installation, first run, API usage, hardware setup, training |
|
| [User Guide](docs/user-guide.md) | Step-by-step guide: installation, first run, API usage, hardware setup, training |
|
||||||
| [Build Guide](docs/build-guide.md) | Building from source (Rust and Python) |
|
| [Build Guide](docs/build-guide.md) | Building from source (Rust and Python) |
|
||||||
| [**Home Assistant + Matter Integration**](docs/integrations/home-assistant.md) | **Works with Home Assistant** via MQTT auto-discovery + **Works with Matter** (Apple Home / Google Home / Alexa / SmartThings) — full entity catalog, 3 starter blueprints, Lovelace dashboards, privacy mode, threshold tuning ([ADR-115](docs/adr/ADR-115-home-assistant-integration.md)). |
|
| [Architecture Decisions](docs/adr/README.md) | 79 ADRs — why each technical choice was made, organized by domain (hardware, signal processing, ML, platform, infrastructure) |
|
||||||
| [**BFLD — Beamforming Feedback Layer for Detection**](v2/crates/wifi-densepose-bfld/README.md) | New privacy-gated WiFi sensing layer that measures + structurally prevents identity leakage from 802.11ac/ax Beamforming Feedback Information. Three type-enforced invariants (raw BFI never exits node, identity embedding is in-RAM-only, cross-site correlation cryptographically impossible via per-site BLAKE3 keyed hash + daily rotation). Ships full operator surface (`BfldPipeline`, `BfldPipelineHandle`, the Soul Signature §3.6 per-channel matcher `EnrolledMatcher`/`SoulMatchOracle` — experimental; named identity is data-gated, **measured** as not-separable on WiFi-only channels alone), MQTT topic router + HA-DISCO + availability + LWT, 3 operator HA blueprints, two runnable examples, eclipse-mosquitto:2 CI service container. 327+ tests. [ADR-118](docs/adr/ADR-118-bfld-beamforming-feedback-layer-for-detection.md) umbrella + sub-ADRs [119](docs/adr/ADR-119-bfld-frame-format-and-wire-protocol.md)/[120](docs/adr/ADR-120-bfld-privacy-class-and-hash-rotation.md)/[121](docs/adr/ADR-121-bfld-identity-risk-scoring.md)/[122](docs/adr/ADR-122-bfld-ruview-ha-matter-exposure.md)/[123](docs/adr/ADR-123-bfld-capture-path-nexmon-and-esp32.md). Research dossier: [`docs/research/BFLD/`](docs/research/BFLD/) (11 files, 13,544 words). |
|
| [Domain Models](docs/ddd/README.md) | 7 DDD models (RuvSense, Signal Processing, Training Pipeline, Hardware Platform, Sensing Server, WiFi-Mat, CHCI) — bounded contexts, aggregates, domain events, and ubiquitous language |
|
||||||
| [**SENSE-BRIDGE — rvagent MCP server**](tools/ruview-mcp/README.md) | Dual-transport MCP server (`@ruvnet/rvagent`) bridging the RuView sensing stack to AI agents (Claude Code, Cursor, ruflo swarms). 6 tools wired: `ruview.presence.now`, `ruview.vitals.get_{breathing,heart_rate,all}`, `ruview.bfld.last_scan`, `ruview.bfld.subscribe`. stdio + Streamable HTTP (`POST /mcp`, Origin-validated, bearer-token auth, `127.0.0.1` bind). Full 20-tool Zod schema barrel + 5 RUVIEW-POLICY governance tools. 93 tests. [ADR-124](docs/adr/ADR-124-rvagent-mcp-ruvector-npm-integration.md). Try: `npx @ruvnet/rvagent stdio`. |
|
|
||||||
| [Semantic Primitives — Precision/Recall](docs/integrations/semantic-primitives-metrics.md) | Per-primitive F1 on the held-out paired-capture set: someone-sleeping, possible-distress, room-active, elderly-inactivity-anomaly, meeting, bathroom, fall-risk, bed-exit, no-movement, multi-room. |
|
|
||||||
| [Claude Code / Codex Plugin](plugins/ruview/README.md) | The `ruview` plugin + marketplace — skills, `/ruview-*` commands, agents, and the Codex prompt mirror |
|
|
||||||
| [Architecture Decisions](docs/adr/README.md) | 96 ADRs — why each technical choice was made, organized by domain (hardware, signal processing, ML, platform, infrastructure) |
|
|
||||||
| [Domain Models](docs/ddd/README.md) | 8 DDD models (RuvSense, Signal Processing, Training Pipeline, Hardware Platform, Sensing Server, WiFi-Mat, CHCI, rvCSI) — bounded contexts, aggregates, domain events, and ubiquitous language |
|
|
||||||
| [rvCSI — edge RF sensing runtime](https://github.com/ruvnet/rvcsi) | Rust-first / TypeScript-accessible / hardware-abstracted CSI runtime: multi-source ingestion (incl. real nexmon_csi `.pcap` from a **Raspberry Pi 5** / Pi 4 / Pi 3B+ — CYW43455 / BCM43455c0) → validation → DSP → typed events → RuVector RF memory ([ADR-095](docs/adr/ADR-095-rvcsi-edge-rf-sensing-platform.md), [ADR-096](docs/adr/ADR-096-rvcsi-ffi-crate-layout.md), [domain model](docs/ddd/rvcsi-domain-model.md)). Now its own repo — [`ruvnet/rvcsi`](https://github.com/ruvnet/rvcsi) — vendored here under `vendor/rvcsi`; 9 `rvcsi-*` crates on crates.io, `@ruv/rvcsi` on npm, plus a Claude Code plugin. |
|
|
||||||
| [Desktop App](v2/crates/wifi-densepose-desktop/README.md) | **WIP** — Tauri v2 desktop app for node management, OTA updates, WASM deployment, and mesh visualization |
|
| [Desktop App](v2/crates/wifi-densepose-desktop/README.md) | **WIP** — Tauri v2 desktop app for node management, OTA updates, WASM deployment, and mesh visualization |
|
||||||
| `ruview-swarm` | Drone swarm control system (ADR-148) — hierarchical-mesh topology, Raft consensus, MARL, CSI sensing payload, MAVLink/PX4/ArduPilot compatibility, Ruflo AI-agent integration |
|
|
||||||
| [Medical Examples](examples/medical/README.md) | Contactless blood pressure, heart rate, breathing rate via 60 GHz mmWave radar — $15 hardware, no wearable |
|
| [Medical Examples](examples/medical/README.md) | Contactless blood pressure, heart rate, breathing rate via 60 GHz mmWave radar — $15 hardware, no wearable |
|
||||||
| [Extended Documentation](docs/readme-details.md) | Latest additions, key features, installation, quick start, signal processing, training, CLI, testing, deployment, and changelog |
|
| [Extended Documentation](docs/readme-details.md) | Latest additions, key features, installation, quick start, signal processing, training, CLI, testing, deployment, and changelog |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🚧 Beta software
|
|
||||||
|
|
||||||
> **Beta Software** — Under active development. APIs and firmware may change. Known limitations:
|
|
||||||
> - ESP32-C3 and original ESP32 are not supported (single-core, insufficient for CSI DSP)
|
|
||||||
> - Single ESP32 deployments have limited spatial resolution — use 2+ nodes or add a [Cognitum Seed](https://cognitum.one) for best results
|
|
||||||
> - Camera-free pose accuracy is limited (PCK@20 ≈ 2.5% with proxy labels) — [camera ground-truth training](docs/adr/ADR-079-camera-ground-truth-training.md) targets **35%+ PCK@20**; the pipeline is implemented, but the data-collection and evaluation phases (ADR-079 P7–P9) are still pending.
|
|
||||||
>
|
|
||||||
> Contributions and bug reports welcome at [Issues](https://github.com/ruvnet/RuView/issues).
|
|
||||||
|
|
||||||
## 📄 License
|
## 📄 License
|
||||||
|
|
||||||
MIT License — see [LICENSE](LICENSE) for details.
|
MIT License — see [LICENSE](LICENSE) for details.
|
||||||
|
|
||||||
## 🤝 Creator Affiliate Program
|
|
||||||
|
|
||||||
**For TikTok · Instagram · YouTube creators** — earn **25% on every Cognitum sale** you refer. The RuFlo, RuView, and RuVector videos you're already making have done millions of views; get paid for the orders they drive. Click-tracking activates instantly; commissions activate after a quick manual review (usually under 24 hours).
|
|
||||||
|
|
||||||
[Apply now → cognitum.one/affiliate](https://cognitum.one/affiliate)
|
|
||||||
|
|
||||||
## 📞 Support
|
## 📞 Support
|
||||||
|
|
||||||
[GitHub Issues](https://github.com/ruvnet/RuView/issues) | [Discussions](https://github.com/ruvnet/RuView/discussions) | [PyPI](https://pypi.org/project/wifi-densepose/)
|
[GitHub Issues](https://github.com/ruvnet/RuView/issues) | [Discussions](https://github.com/ruvnet/RuView/discussions) | [PyPI](https://pypi.org/project/wifi-densepose/)
|
||||||
|
|||||||
@@ -1,50 +0,0 @@
|
|||||||
# AetherArena ("AA") — The Official Spatial-Intelligence Benchmark
|
|
||||||
|
|
||||||
> **Public leaderboard. Private evaluation split. Open scorer. Signed results.**
|
|
||||||
|
|
||||||
AetherArena is a **standalone, project-agnostic benchmark** for camera-free **spatial intelligence** — pose, presence, occupancy, tracking, and vitals from RF/WiFi (and, over time, mmWave / UWB / radar / lidar / multimodal). It is **not** a single-vendor leaderboard: any team, framework, or sensing modality can enter, and every entrant — including the RuView baseline that donated the seed scorer — is scored by the identical, open, pinned harness.
|
|
||||||
|
|
||||||
Specified in [ADR-149](../docs/adr/ADR-149-public-community-leaderboard-huggingface.md) (Accepted).
|
|
||||||
|
|
||||||
Canonical home: **`ruvnet/aether-arena`** + a Hugging Face Space (deploy pending — see `STATUS`).
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Why
|
|
||||||
|
|
||||||
WiFi/RF spatial sensing has no shared yardstick — papers self-report against inconsistent splits and metrics, with **no accounting for latency, reproducibility, or privacy leakage**. AA fixes the *measurement*, not just the models: a single deterministic scorer, a private held-out split nobody can train on, and a signed result ledger that can't be silently edited.
|
|
||||||
|
|
||||||
## What gets measured (v0)
|
|
||||||
|
|
||||||
| Category | Metric | Status |
|
|
||||||
|----------|--------|--------|
|
|
||||||
| **Pose** | PCK@0.2 (all / torso), OKS | Ranked |
|
|
||||||
| **Presence** | accuracy, FP/FN | Ranked |
|
|
||||||
| **Edge latency** | p50 / p95 / p99 ms | Ranked |
|
|
||||||
| **Determinism** | proof-hash pass/fail | Ranked (gate) |
|
|
||||||
| Tracking (MOTA) | — | activates when multi-person clips land |
|
|
||||||
| Vitals (BPM err) | — | activates when paired vitals ground truth lands |
|
|
||||||
| **Privacy leakage** | membership-inference ∈ [0,1] | **gated — not ranked** until the attacker ships |
|
|
||||||
| Cross-room | degradation ratio | coming soon |
|
|
||||||
|
|
||||||
The headline rank is the **category metric**; an optional `arena_score = quality × latency_factor × privacy_factor × determinism_gate` is exposed alongside (never instead) so accuracy can't win at any cost. See ADR-149 §2.5.
|
|
||||||
|
|
||||||
## How scoring works
|
|
||||||
|
|
||||||
The scorer is RuView's **already-published** `wifi-densepose-train` acceptance harness (`ruview_metrics` + ADR-145 `ablation`), run in a pinned sandbox. **You submit a model, not predictions** — predictions on data you hold prove nothing. Your model is scored against a **private** MM-Fi held-out split (CC BY-NC 4.0; Wi-Pose excluded for redistribution reasons), and one **signed, append-only** row is written to the results ledger with a determinism proof hash.
|
|
||||||
|
|
||||||
Submission lifecycle: `submitted → validated → quarantined → smoke_scored → full_scored → published` (or `rejected` with a reason). The model only ever runs inside a no-network, read-only-FS sandbox.
|
|
||||||
|
|
||||||
## Submit (when the Space is live)
|
|
||||||
|
|
||||||
1. Write a manifest: [`schema/aa-submission.toml`](schema/aa-submission.toml).
|
|
||||||
2. Push your model artifact (`.safetensors` / `.rvf` / LoRA adapter) + manifest to the Space.
|
|
||||||
3. Watch it move through the lifecycle; your signed row appears on the board.
|
|
||||||
|
|
||||||
## Verify it's fair (you don't have to trust us)
|
|
||||||
|
|
||||||
See [`VERIFY.md`](VERIFY.md) — run the **open scorer** locally on the **public smoke split**, reproduce the determinism hash, and confirm RuView's own entries were scored by the identical path. That five-step check is the launch gate (ADR-149 §7).
|
|
||||||
|
|
||||||
## Neutrality
|
|
||||||
|
|
||||||
AA is a neutral commons. The scorer is open and versioned; any metric change is a public `harness_version` bump that **re-scores all entries**. RuView donated the seed harness and enters as one baseline — it gets no special treatment (ADR-149 §2.8).
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
# AetherArena — Build Status
|
|
||||||
|
|
||||||
Tracks ADR-149 implementation milestones. "Complete" = benchmark **infrastructure** done,
|
|
||||||
tested, CI-gated, deploy-ready, RuView baseline entered, §7 acceptance test passing.
|
|
||||||
Model **SOTA** (e.g. MM-Fi PCK@20 ~72%) is a separate long-running ML effort, blocked on
|
|
||||||
ADR-079 camera-ground-truth collection — *not* an infra-completion blocker.
|
|
||||||
|
|
||||||
| # | Milestone | Status |
|
|
||||||
|---|-----------|--------|
|
|
||||||
| M1 | ADR-149 Accepted + committed | ✅ done |
|
|
||||||
| M2 | Scorer runner (`aa_score_runner`) — **real model scoring** + witness (proof+inputs hash) + **repeatability analysis** | ✅ done — builds `--no-default-features`, determinism gate PASS, repeatable 16/16 |
|
|
||||||
| M3 | CI harness-gate workflow (PR runs scorer + repeatability + real-scoring smoke + ledger verify) | ✅ done — `.github/workflows/aether-arena-harness.yml` |
|
|
||||||
| M4 | Scaffold: README + submission schema + VERIFY (acceptance test) | ✅ done |
|
|
||||||
| M5 | Public smoke split (committed) + private MM-Fi held-out split prep | 🟡 smoke split done (`fixtures/smoke_*.json`); private MM-Fi prep pending |
|
|
||||||
| M6 | HF Space (Gradio) — leaderboard + ledger integrity + submit/verify/about | ✅ deployed → https://huggingface.co/spaces/ruvnet/aether-arena (sandboxed scorer container = later hardening) |
|
|
||||||
| M7 | **Witness ledger chain** — append-only, hash-chained, tamper-evident | ✅ done — `ledger/ledger_tools.py` (seed/append/verify); tamper test fails as designed |
|
|
||||||
| M8 | Public launch | ✅ Space **LIVE** (gradio 5.9.1, serving 200) — **board empty, awaiting first real harness score** (benchmark-first: no seeded numbers) |
|
|
||||||
|
|
||||||
## v0 infrastructure: COMPLETE
|
|
||||||
Implement ✅ · Test ✅ · Deploy to HF ✅ (https://huggingface.co/spaces/ruvnet/aether-arena) · Instructions+Verification ✅ · PR runs the harness ✅ (PR #874, AA harness gate **passed**).
|
|
||||||
Remaining = data + hardening, not infra: private MM-Fi held-out split (M5), sandboxed scorer container (M6), privacy-leakage attacker (gated category), and **model SOTA** (separate ML effort, blocked on ADR-079 — explicitly not an infra exit).
|
|
||||||
|
|
||||||
## Benchmark-first posture (per user direction)
|
|
||||||
- **No placeholder numbers on the board.** The ledger seeds to genesis only; every result is a real scoring-pipeline witness. RuView gets no seeded baseline.
|
|
||||||
- **Witness chain** = `inputs_sha256` (binds witness to exact inputs) + `proof_sha256` (cross-platform-stable score hash) + the append-only hash-chained ledger. Repeatability analysis (`--repeat N`) proves the proof hash is identical across runs.
|
|
||||||
|
|
||||||
## Blockers / decisions needed
|
|
||||||
- **HF deploy (M6)** — token is in GCP Secret Manager (`HUGGINGFACE_API_KEY`); creating the public `ruvnet/aether-arena` Space still wants explicit go.
|
|
||||||
- **MM-Fi is CC BY-NC** → AA must stay non-commercial / legally distinct from the commercial RuView product.
|
|
||||||
- **Private MM-Fi split (M5)** — needs the dataset pulled + a held-out split assembled before real public scoring replaces the smoke fixture.
|
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
# Verifying AetherArena (you don't have to trust us)
|
|
||||||
|
|
||||||
AA's credibility rests on a stranger being able to reproduce a score and see that the rules are fair. This is the **launch gate** (ADR-149 §7): v0 does not ship until all five checks below pass for someone with no insider access.
|
|
||||||
|
|
||||||
> **Wider context:** this page covers the *leaderboard scorer*. For the whole-platform answer to
|
|
||||||
> "is this real / does it actually work?" — including the deterministic pipeline proof, the
|
|
||||||
> published models + public-benchmark numbers, and the built-in-public development trail — see
|
|
||||||
> [`docs/proof-of-capabilities.md`](../docs/proof-of-capabilities.md).
|
|
||||||
|
|
||||||
## The open scorer
|
|
||||||
|
|
||||||
The scoring engine is a pure-Rust, GPU-free binary: `aa_score_runner` in `wifi-densepose-train`. It runs the real `ruview_metrics` pose-acceptance harness on a fixed fixture and emits a cross-platform-stable SHA-256 **determinism proof**.
|
|
||||||
|
|
||||||
### Reproduce the determinism hash locally
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd v2
|
|
||||||
# Verify the committed expected hash still matches (this is the CI gate):
|
|
||||||
cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features
|
|
||||||
# → prints the witness (inputs_sha256 + proof_sha256) and "VERDICT: PASS"
|
|
||||||
|
|
||||||
# See the witness row as JSON:
|
|
||||||
cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --json
|
|
||||||
```
|
|
||||||
|
|
||||||
### Witness chain — proof + repeatability analysis
|
|
||||||
|
|
||||||
Every score is a **witness**: `inputs_sha256` (binds it to the exact inputs scored)
|
|
||||||
+ `proof_sha256` (cross-platform-stable hash of the quantised score) + `harness_version`.
|
|
||||||
Witnesses are recorded in an **append-only, hash-chained ledger** (each row references
|
|
||||||
the previous row's hash), so a silent edit to any past row breaks the chain.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Repeatability: run the scorer K times, confirm ONE identical proof hash:
|
|
||||||
cd v2
|
|
||||||
cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16
|
|
||||||
# → {"repeatability":{"runs":16,"unique_proof_hashes":1,"repeatable":true,...}}
|
|
||||||
|
|
||||||
# Real model scoring (score predictions against an eval split):
|
|
||||||
cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- \
|
|
||||||
--split ../aether-arena/fixtures/smoke_split.json \
|
|
||||||
--pred ../aether-arena/fixtures/smoke_pred.json --json
|
|
||||||
|
|
||||||
# Verify the witness ledger chain is intact (tamper-evident):
|
|
||||||
cd ../aether-arena/ledger && python3 ledger_tools.py verify
|
|
||||||
# → "OK: N rows, chain intact" (edit any row and it reports the broken link)
|
|
||||||
```
|
|
||||||
|
|
||||||
The expected hash is committed at [`fixtures/expected_score.sha256`](fixtures/expected_score.sha256). Same harness version + same fixture → same hash on glibc / MSVC / Apple. If your local run prints `VERDICT: PASS`, you have reproduced the scorer.
|
|
||||||
|
|
||||||
### What happens if the scoring maths changes
|
|
||||||
|
|
||||||
Any edit to `ruview_metrics.rs`, `ablation.rs`, or `aa_score_runner.rs` moves the hash and **fails the CI gate** (`.github/workflows/aether-arena-harness.yml`) until the maintainer regenerates and reviews:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cargo run -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --generate-hash \
|
|
||||||
> aether-arena/fixtures/expected_score.sha256
|
|
||||||
```
|
|
||||||
|
|
||||||
So a scorer change is always a reviewed, public diff — never silent. That's `harness_version` pinning + `determinism_gate` in action (ADR-149 §2.4–§2.5).
|
|
||||||
|
|
||||||
## The five-step acceptance test (v0 launch gate)
|
|
||||||
|
|
||||||
A stranger must be able to:
|
|
||||||
|
|
||||||
1. **Submit** a model (artifact + `schema/aa-submission.toml`) with no insider help.
|
|
||||||
2. **Get a deterministic score** — same model + same `harness_version` → same numbers.
|
|
||||||
3. **See the signed row** appended to the public results ledger.
|
|
||||||
4. **Rerun the scorer locally** on the public smoke split and reproduce the logic (the command above).
|
|
||||||
5. **Understand why the rank is fair** — private split, open scorer, pinned version, proof hash — from these docs alone.
|
|
||||||
|
|
||||||
If any step fails, v0 is not ready.
|
|
||||||
|
|
||||||
## Current status
|
|
||||||
|
|
||||||
- ✅ Step 4 (rerun the open scorer locally, reproduce the hash) — **works today** via `aa_score_runner`.
|
|
||||||
- ✅ CI harness gate runs the scorer on every PR.
|
|
||||||
- ⏳ Steps 1–3, 5 (HF Space submission flow + signed ledger) — in progress; require the HF Space deploy (needs an HF token / maintainer authorization).
|
|
||||||
@@ -1,87 +0,0 @@
|
|||||||
# RuView Calibration Service (reference implementation)
|
|
||||||
|
|
||||||
Turn a **shared WiFi-CSI pose base model** into a room-specific one with a **30-second labeled
|
|
||||||
calibration** and a **~11 KB per-room LoRA adapter**. This is the deployable resolution of the
|
|
||||||
cross-subject / cross-environment generalization problem (full study: [ADR-150 §3.3–3.6](../../docs/adr/ADR-150-rf-foundation-encoder.md)).
|
|
||||||
|
|
||||||
## Why
|
|
||||||
|
|
||||||
Zero-shot WiFi pose generalizes poorly to a **new room or new person** — an unseen room can drop a
|
|
||||||
strong model to near-random. But that gap is **not** algorithmically closeable (CORAL, DANN,
|
|
||||||
instance-norm, contrastive foundation-pretraining all failed) and **not** closeable by collecting
|
|
||||||
more subjects (saturates ~64%). It **is** closeable, cheaply, at deployment time: a handful of
|
|
||||||
labeled frames from the actual room pin down its multipath instantly.
|
|
||||||
|
|
||||||
| Deployment case | Zero-shot | + in-room calibration |
|
|
||||||
|-----------------|----------:|----------------------:|
|
|
||||||
| Same room, new person (cross-subject) | 64% | **76%** (200 samples) |
|
|
||||||
| **New room + new person (cross-environment)** | **~10%** | **60% @ 5 samples → 73% @ 200** |
|
|
||||||
|
|
||||||
**Verified demo (this code, source-only base on an unseen MM-Fi room E04):**
|
|
||||||
`zero-shot 3.09% → after 200-sample calibration 74.29%` (+71 pts).
|
|
||||||
|
|
||||||
## How it works
|
|
||||||
|
|
||||||
A frozen shared **base** (transformer + temporal attention pool + skeleton-graph head, the published
|
|
||||||
[`ruvnet/wifi-densepose-mmfi-pose`](https://huggingface.co/ruvnet/wifi-densepose-mmfi-pose)) plus a
|
|
||||||
tiny **LoRA adapter** (rank 8 on the input projection + pose head — **11,200 params ≈ 11 KB int8 /
|
|
||||||
22 KB fp16**) fitted per room. Thousands of room-adapters hang off one base.
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 1) Capture a short labeled clip in the deployment room -> calib.npz {X:[N,3,114,10], Y:[N,17,2]}
|
|
||||||
# (~100–200 samples recommended; below ~20 the adapter can underperform zero-shot)
|
|
||||||
|
|
||||||
# 2) Fit the per-room adapter (~11 KB):
|
|
||||||
python calibrate.py --base pose_mmfi_best.pt --data calib.npz --out room.adapter.npz
|
|
||||||
|
|
||||||
# 3) Run calibrated inference (base + room adapter):
|
|
||||||
python infer.py --base pose_mmfi_best.pt --adapter room.adapter.npz --data frames.npz --out kp.npy
|
|
||||||
# omit --adapter to run the uncalibrated (zero-shot) base
|
|
||||||
```
|
|
||||||
|
|
||||||
`X` is CSI amplitude `[N, 3 antennas, 114 subcarriers, 10 frames]` (per-sample standardization is
|
|
||||||
applied internally). `Y` is `[N,17,2]` COCO keypoints in `[0,1]`.
|
|
||||||
|
|
||||||
## Calibration budget (measured, rank-8 LoRA, 3 seeds — ADR-150 §3.5)
|
|
||||||
|
|
||||||
| Labeled samples/room | cross-subject | cross-environment |
|
|
||||||
|---------------------:|--------------:|------------------:|
|
|
||||||
| 0 (zero-shot) | 64% | ~10% |
|
|
||||||
| 5 | — | 60% |
|
|
||||||
| 20 | 66% | 66% |
|
|
||||||
| 50 | 70% | 70% |
|
|
||||||
| 200 | 72% | 73% |
|
|
||||||
|
|
||||||
Knee at ~50 samples (~70%); **below ~20 samples the adapter can hurt** (too few to fit reliably).
|
|
||||||
|
|
||||||
## Two models, two producers (not interchangeable)
|
|
||||||
|
|
||||||
Adapters are **model-specific**. There are two calibration producers here:
|
|
||||||
|
|
||||||
| Producer | Target model | Input | Adapter format | Consumer |
|
|
||||||
|----------|--------------|-------|----------------|----------|
|
|
||||||
| `calibrate.py` | MM-Fi **transformer** (`pose_mmfi_best.pt`, 3×114×10) | `[N,3,114,10]` | `.npz` (`proj`/`head` LoRA) | this Python `infer.py` |
|
|
||||||
| `cog_calibrate.py` | cog **conv+MLP** (`pose_v1.safetensors`, 56×20) | `[N,56,20]` | `.safetensors` (`fc1.a`/`fc1.b`/`fc2.a`/`fc2.b`) | Rust `cog-pose-estimation run --adapter` |
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Produce a cog-format per-room adapter for the deployed Rust pose engine:
|
|
||||||
python cog_calibrate.py --base pose_v1.safetensors --data calib.npz --out room.safetensors
|
|
||||||
# then in the cog runtime:
|
|
||||||
cog-pose-estimation run --config <cfg> --adapter room.safetensors
|
|
||||||
```
|
|
||||||
|
|
||||||
Same LoRA *mechanism* (ADR-150 §3.5), different architecture and key layout — an adapter from one
|
|
||||||
producer will not load into the other model.
|
|
||||||
|
|
||||||
## Notes
|
|
||||||
|
|
||||||
- **Calibration only helps when the base hasn't already seen the room.** The published flagship was
|
|
||||||
trained on MM-Fi `random_split`, so calibrating it on an MM-Fi subject is a near-no-op (it already
|
|
||||||
saw them); for a genuinely new real-world room it is zero-shot and calibration applies. To
|
|
||||||
*reproduce the demo* on a held-out MM-Fi room, train a source-only base (exclude the target
|
|
||||||
environment) — see `ADR-150 §3.6` and the few-shot harness in `aether-arena/staging/`.
|
|
||||||
- Adapter is saved fp16 (~22 KB); quantize to int8 for the ~11 KB on-device form.
|
|
||||||
- Inference is real-time on CPU (the 75 K-param `micro` variant runs in 0.135 ms single-thread x86;
|
|
||||||
see [`docs/benchmarks/wifi-pose-efficiency-frontier.md`](../../docs/benchmarks/wifi-pose-efficiency-frontier.md)).
|
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
"""RuView per-room calibration — fit a ~11 KB LoRA adapter from a short labeled in-room capture.
|
|
||||||
|
|
||||||
python calibrate.py --base pose_mmfi_best.pt --data room_calib.npz --out room_A.adapter.npz
|
|
||||||
|
|
||||||
`room_calib.npz` must contain `X` [N,3,114,10] CSI amplitude and `Y` [N,17,2] (or [N,34]) keypoints
|
|
||||||
in [0,1] — the labeled calibration samples from the deployment room (~100–200 recommended; ≥20).
|
|
||||||
Outputs a tiny adapter (.npz, ~11 KB) that, loaded over the shared base at inference, recovers
|
|
||||||
SOTA-level pose for that room/person (ADR-150 §3.5–3.6).
|
|
||||||
"""
|
|
||||||
import argparse
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
|
|
||||||
from model import PoseNet, standardize
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
ap = argparse.ArgumentParser()
|
|
||||||
ap.add_argument("--base", required=True, help="base checkpoint (pose_mmfi_best.pt)")
|
|
||||||
ap.add_argument("--data", required=True, help="labeled calibration .npz with X and Y")
|
|
||||||
ap.add_argument("--out", required=True, help="output adapter .npz")
|
|
||||||
ap.add_argument("--rank", type=int, default=8)
|
|
||||||
ap.add_argument("--iters", type=int, default=600)
|
|
||||||
ap.add_argument("--lr", type=float, default=8e-4)
|
|
||||||
ap.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
|
|
||||||
a = ap.parse_args()
|
|
||||||
|
|
||||||
z = np.load(a.data)
|
|
||||||
X = torch.tensor(z["X"].astype(np.float32))
|
|
||||||
Y = torch.tensor(z["Y"].reshape(len(z["Y"]), 34).astype(np.float32))
|
|
||||||
n = len(X)
|
|
||||||
if n < 20:
|
|
||||||
print(f"WARNING: only {n} calibration samples — below ~20 the adapter may underperform "
|
|
||||||
f"zero-shot (ADR-150 §3.5). Recommend ~100–200.")
|
|
||||||
dev = a.device
|
|
||||||
|
|
||||||
net = PoseNet().to(dev)
|
|
||||||
net.load_state_dict(torch.load(a.base, map_location=dev), strict=False)
|
|
||||||
net.add_lora(r=a.rank).to(dev)
|
|
||||||
for k, p in net.named_parameters():
|
|
||||||
p.requires_grad = k.endswith(".A") or k.endswith(".B")
|
|
||||||
trainable = [p for p in net.parameters() if p.requires_grad]
|
|
||||||
n_tr = sum(p.numel() for p in trainable)
|
|
||||||
|
|
||||||
Xs = standardize(X.to(dev))
|
|
||||||
Yt = Y.to(dev)
|
|
||||||
opt = torch.optim.AdamW(trainable, lr=a.lr, weight_decay=0.0)
|
|
||||||
lossf = nn.SmoothL1Loss(beta=0.1)
|
|
||||||
bs = min(128, n)
|
|
||||||
net.train()
|
|
||||||
for it in range(a.iters):
|
|
||||||
bi = torch.randint(0, n, (bs,), device=dev)
|
|
||||||
xb = Xs[bi]
|
|
||||||
# light augmentation (subcarrier dropout + noise) — matches training-time regularization
|
|
||||||
m = (torch.rand(xb.shape[0], xb.shape[1], 1, 1, device=dev) > 0.15).float()
|
|
||||||
xb = xb * m + 0.03 * torch.randn_like(xb) * torch.rand(xb.shape[0], 1, 1, 1, device=dev)
|
|
||||||
opt.zero_grad()
|
|
||||||
lossf(net(xb), Yt[bi]).backward()
|
|
||||||
opt.step()
|
|
||||||
|
|
||||||
adapter = net.lora_state()
|
|
||||||
nbytes = sum(v.astype(np.float16).nbytes for v in adapter.values())
|
|
||||||
np.savez(a.out, **{k: v.astype(np.float16) for k, v in adapter.items()},
|
|
||||||
_meta=np.array([a.rank, n, n_tr], dtype=np.int64))
|
|
||||||
print(f"saved {a.out} | rank {a.rank} | {n_tr:,} params | ~{nbytes/1024:.1f} KB fp16 | "
|
|
||||||
f"from {n} labeled samples")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,120 +0,0 @@
|
|||||||
"""Per-room calibration producer for the cog-pose-estimation **conv+MLP** model
|
|
||||||
(`pose_v1.safetensors`, 56 subcarriers x 20 frames). Companion to `calibrate.py`
|
|
||||||
(which targets the MM-Fi *transformer* model) — different model, different adapter
|
|
||||||
key layout, NOT interchangeable (ADR-150 §3.5).
|
|
||||||
|
|
||||||
Fits a rank-r LoRA on the pose head (fc1, fc2) from a short labeled in-room capture and
|
|
||||||
writes a **safetensors** adapter with keys `fc1.a`/`fc1.b`/`fc2.a`/`fc2.b` (scale baked
|
|
||||||
into `b`) — exactly what `cog-pose-estimation run --adapter <file>` consumes.
|
|
||||||
|
|
||||||
python cog_calibrate.py --base pose_v1.safetensors --data calib.npz --out room.safetensors
|
|
||||||
|
|
||||||
`calib.npz`: `X` [N,56,20] CSI window + `Y` [N,17,2] (or [N,34]) keypoints in [0,1].
|
|
||||||
"""
|
|
||||||
import argparse
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
import torch.nn.functional as F
|
|
||||||
|
|
||||||
|
|
||||||
class CogPose(nn.Module):
|
|
||||||
"""Mirrors cog-pose-estimation's PoseNet (Candle) exactly — same safetensors keys."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
self.enc = nn.ModuleDict({
|
|
||||||
"c1": nn.Conv1d(56, 64, 3, padding=1, dilation=1),
|
|
||||||
"c2": nn.Conv1d(64, 128, 3, padding=2, dilation=2),
|
|
||||||
"c3": nn.Conv1d(128, 128, 3, padding=4, dilation=4),
|
|
||||||
})
|
|
||||||
self.head = nn.ModuleDict({"fc1": nn.Linear(128, 256), "fc2": nn.Linear(256, 34)})
|
|
||||||
self.fc1_lora = None
|
|
||||||
self.fc2_lora = None
|
|
||||||
|
|
||||||
def _lora(self, slot, x, y):
|
|
||||||
if slot is None:
|
|
||||||
return y
|
|
||||||
a, b = slot
|
|
||||||
return y + (x @ a) @ b
|
|
||||||
|
|
||||||
def forward(self, x): # x: [B, 56, 20]
|
|
||||||
h = F.relu(self.enc["c1"](x))
|
|
||||||
h = F.relu(self.enc["c2"](h))
|
|
||||||
h = F.relu(self.enc["c3"](h))
|
|
||||||
h = h.mean(2) # [B, 128]
|
|
||||||
z1 = self.head["fc1"](h)
|
|
||||||
z1 = self._lora(self.fc1_lora, h, z1)
|
|
||||||
h1 = F.relu(z1)
|
|
||||||
z2 = self.head["fc2"](h1)
|
|
||||||
z2 = self._lora(self.fc2_lora, h1, z2)
|
|
||||||
return torch.sigmoid(z2) # [B, 34]
|
|
||||||
|
|
||||||
def add_lora(self, r=4):
|
|
||||||
self.fc1_lora = (nn.Parameter(torch.randn(128, r) * 0.02), nn.Parameter(torch.zeros(r, 256)))
|
|
||||||
self.fc2_lora = (nn.Parameter(torch.randn(256, r) * 0.02), nn.Parameter(torch.zeros(r, 34)))
|
|
||||||
for p in (*self.fc1_lora, *self.fc2_lora):
|
|
||||||
self.register_parameter(f"lora_{id(p)}", p)
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
def load_base(net: CogPose, path: str):
|
|
||||||
from safetensors.torch import load_file
|
|
||||||
sd = load_file(path)
|
|
||||||
# remap "enc.c1.weight" -> module dict keys
|
|
||||||
mapped = {}
|
|
||||||
for k, v in sd.items():
|
|
||||||
mapped[k.replace("enc.", "enc.").replace("head.", "head.")] = v
|
|
||||||
net.load_state_dict(mapped, strict=False)
|
|
||||||
return net
|
|
||||||
|
|
||||||
|
|
||||||
def fit(base: str, data: str, out: str, rank: int = 4, iters: int = 400, lr: float = 1e-3):
|
|
||||||
z = np.load(data)
|
|
||||||
X = torch.tensor(z["X"].astype(np.float32)) # [N,56,20]
|
|
||||||
Y = torch.tensor(z["Y"].reshape(len(z["Y"]), 34).astype(np.float32))
|
|
||||||
n = len(X)
|
|
||||||
net = CogPose()
|
|
||||||
load_base(net, base)
|
|
||||||
net.add_lora(rank)
|
|
||||||
for p in net.parameters():
|
|
||||||
p.requires_grad = False
|
|
||||||
lora = [*net.fc1_lora, *net.fc2_lora]
|
|
||||||
for p in lora:
|
|
||||||
p.requires_grad = True
|
|
||||||
opt = torch.optim.AdamW(lora, lr=lr, weight_decay=0.0)
|
|
||||||
lossf = nn.SmoothL1Loss(beta=0.1)
|
|
||||||
bs = min(64, n)
|
|
||||||
net.train()
|
|
||||||
for _ in range(iters):
|
|
||||||
bi = torch.randint(0, n, (bs,))
|
|
||||||
opt.zero_grad()
|
|
||||||
lossf(net(X[bi]), Y[bi]).backward()
|
|
||||||
opt.step()
|
|
||||||
|
|
||||||
alpha = 16.0
|
|
||||||
scale = alpha / rank
|
|
||||||
a1, b1 = net.fc1_lora
|
|
||||||
a2, b2 = net.fc2_lora
|
|
||||||
tensors = {
|
|
||||||
"fc1.a": a1.detach().contiguous(),
|
|
||||||
"fc1.b": (b1.detach() * scale).contiguous(), # bake scale into b
|
|
||||||
"fc2.a": a2.detach().contiguous(),
|
|
||||||
"fc2.b": (b2.detach() * scale).contiguous(),
|
|
||||||
}
|
|
||||||
from safetensors.torch import save_file
|
|
||||||
save_file(tensors, out)
|
|
||||||
return out, sum(p.numel() for p in lora), n
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
ap = argparse.ArgumentParser()
|
|
||||||
ap.add_argument("--base", required=True)
|
|
||||||
ap.add_argument("--data", required=True)
|
|
||||||
ap.add_argument("--out", required=True)
|
|
||||||
ap.add_argument("--rank", type=int, default=4)
|
|
||||||
ap.add_argument("--iters", type=int, default=400)
|
|
||||||
a = ap.parse_args()
|
|
||||||
out, np_, n = fit(a.base, a.data, a.out, a.rank, a.iters)
|
|
||||||
print(f"saved {out} | {np_} LoRA params from {n} samples "
|
|
||||||
f"(keys fc1.a/fc1.b/fc2.a/fc2.b — load with cog-pose-estimation run --adapter)")
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
"""Run calibrated WiFi-CSI pose inference: shared base + a per-room LoRA adapter.
|
|
||||||
|
|
||||||
python infer.py --base pose_mmfi_best.pt --adapter room_A.adapter.npz --data frames.npz
|
|
||||||
|
|
||||||
`frames.npz` contains `X` [N,3,114,10] CSI amplitude. Prints/saves [N,17,2] keypoints in [0,1].
|
|
||||||
Omit --adapter to run the uncalibrated (zero-shot) base. With a room adapter, expect SOTA-level
|
|
||||||
accuracy in that room/person; without one, zero-shot degrades in unseen rooms (ADR-150 §3.6).
|
|
||||||
"""
|
|
||||||
import argparse
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
|
|
||||||
from model import PoseNet, standardize
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
ap = argparse.ArgumentParser()
|
|
||||||
ap.add_argument("--base", required=True)
|
|
||||||
ap.add_argument("--adapter", default=None, help="per-room .adapter.npz (omit for zero-shot)")
|
|
||||||
ap.add_argument("--data", required=True, help=".npz with X [N,3,114,10]")
|
|
||||||
ap.add_argument("--out", default=None, help="optional .npy to save [N,17,2] keypoints")
|
|
||||||
ap.add_argument("--rank", type=int, default=8)
|
|
||||||
ap.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
|
|
||||||
a = ap.parse_args()
|
|
||||||
dev = a.device
|
|
||||||
|
|
||||||
net = PoseNet().to(dev)
|
|
||||||
net.load_state_dict(torch.load(a.base, map_location=dev), strict=False)
|
|
||||||
if a.adapter:
|
|
||||||
net.add_lora(r=a.rank).to(dev)
|
|
||||||
z = np.load(a.adapter)
|
|
||||||
net.load_lora({k: z[k].astype(np.float32) for k in z.files if k.endswith(".A") or k.endswith(".B")})
|
|
||||||
net.eval()
|
|
||||||
|
|
||||||
X = torch.tensor(np.load(a.data)["X"].astype(np.float32)).to(dev)
|
|
||||||
Xs = standardize(X)
|
|
||||||
out = []
|
|
||||||
with torch.no_grad():
|
|
||||||
for i in range(0, len(Xs), 4096):
|
|
||||||
out.append(net(Xs[i:i + 4096]).cpu().numpy())
|
|
||||||
kp = np.concatenate(out).reshape(-1, 17, 2)
|
|
||||||
print(f"inferred {len(kp)} frames | adapter={'yes' if a.adapter else 'NONE (zero-shot)'}")
|
|
||||||
if a.out:
|
|
||||||
np.save(a.out, kp)
|
|
||||||
print(f"saved keypoints -> {a.out}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,107 +0,0 @@
|
|||||||
"""WiFi-CSI pose model + LoRA adapter for the RuView calibration service.
|
|
||||||
|
|
||||||
Architecture matches the published flagship checkpoint
|
|
||||||
[`ruvnet/wifi-densepose-mmfi-pose`](https://huggingface.co/ruvnet/wifi-densepose-mmfi-pose)
|
|
||||||
(`pose_mmfi_best.pt`): transformer encoder + temporal attention pooling + skeleton-graph head.
|
|
||||||
|
|
||||||
The calibration service freezes this base and fits a tiny per-room **LoRA adapter** (rank 8 on the
|
|
||||||
input projection + pose head ≈ 11 KB) from ~100–200 labeled in-room samples. Empirically that lifts
|
|
||||||
cross-subject 64→72% and cross-environment 11→73% (ADR-150 §3.3–3.6).
|
|
||||||
"""
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
|
|
||||||
# COCO-17 skeleton edges for the graph-refinement head.
|
|
||||||
EDGES = [(0, 1), (0, 2), (1, 3), (2, 4), (5, 6), (5, 7), (7, 9), (6, 8), (8, 10),
|
|
||||||
(5, 11), (6, 12), (11, 12), (11, 13), (13, 15), (12, 14), (14, 16)]
|
|
||||||
_A = np.eye(17, dtype=np.float32)
|
|
||||||
for _i, _j in EDGES:
|
|
||||||
_A[_i, _j] = _A[_j, _i] = 1.0
|
|
||||||
_A = _A / _A.sum(1, keepdims=True)
|
|
||||||
|
|
||||||
|
|
||||||
class LoRA(nn.Module):
|
|
||||||
"""Low-rank adapter wrapping a frozen Linear: y = W·x + (x·A·B)·(alpha/r)."""
|
|
||||||
|
|
||||||
def __init__(self, base: nn.Linear, r: int = 8, alpha: int = 16):
|
|
||||||
super().__init__()
|
|
||||||
self.base = base
|
|
||||||
for p in self.base.parameters():
|
|
||||||
p.requires_grad = False
|
|
||||||
self.A = nn.Parameter(torch.zeros(base.in_features, r))
|
|
||||||
self.B = nn.Parameter(torch.zeros(r, base.out_features))
|
|
||||||
nn.init.normal_(self.A, std=0.02)
|
|
||||||
self.scale = alpha / r
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return self.base(x) + (x @ self.A @ self.B) * self.scale
|
|
||||||
|
|
||||||
|
|
||||||
class GR(nn.Module):
|
|
||||||
"""Skeleton-graph refinement: nudges joints toward anatomically consistent positions."""
|
|
||||||
|
|
||||||
def __init__(self, d=256, h=96):
|
|
||||||
super().__init__()
|
|
||||||
self.je = nn.Parameter(torch.randn(17, 32) * 0.02)
|
|
||||||
self.inp = nn.Linear(d + 34, h)
|
|
||||||
self.g1 = nn.Linear(h, h)
|
|
||||||
self.g2 = nn.Linear(h, h)
|
|
||||||
self.out = nn.Linear(h, 2)
|
|
||||||
self.register_buffer("A", torch.tensor(_A))
|
|
||||||
|
|
||||||
def forward(self, z, kp0):
|
|
||||||
B = z.shape[0]
|
|
||||||
f = torch.relu(self.inp(torch.cat(
|
|
||||||
[z.unsqueeze(1).expand(-1, 17, -1), self.je.unsqueeze(0).expand(B, -1, -1), kp0], -1)))
|
|
||||||
f = torch.relu(self.g1(torch.einsum('ij,bjh->bih', self.A, f)))
|
|
||||||
f = torch.relu(self.g2(torch.einsum('ij,bjh->bih', self.A, f)))
|
|
||||||
return kp0 + 0.3 * torch.tanh(self.out(f))
|
|
||||||
|
|
||||||
|
|
||||||
class PoseNet(nn.Module):
|
|
||||||
"""Flagship pose model. Input [B,3,114,10] CSI amplitude (per-sample standardized) -> [B,34]."""
|
|
||||||
|
|
||||||
def __init__(self, na=3, nsc=114, nt=10, d=256, L=4, H=8):
|
|
||||||
super().__init__()
|
|
||||||
self.proj = nn.Linear(na * nsc, d)
|
|
||||||
self.pos = nn.Parameter(torch.randn(1, nt, d) * 0.02)
|
|
||||||
enc = nn.TransformerEncoderLayer(d, H, d * 2, dropout=0.2, batch_first=True, activation='gelu')
|
|
||||||
self.tf = nn.TransformerEncoder(enc, L)
|
|
||||||
self.att = nn.Linear(d, 1)
|
|
||||||
self.head = nn.Sequential(nn.Linear(d, 256), nn.GELU(), nn.Dropout(0.3), nn.Linear(256, 34))
|
|
||||||
self.gr = GR(d)
|
|
||||||
self.na, self.nsc, self.nt = na, nsc, nt
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
B = x.shape[0]
|
|
||||||
t = x.permute(0, 3, 1, 2).reshape(B, self.nt, self.na * self.nsc)
|
|
||||||
h = self.tf(self.proj(t) + self.pos)
|
|
||||||
w = torch.softmax(self.att(h), 1)
|
|
||||||
z = (h * w).sum(1)
|
|
||||||
kp0 = torch.sigmoid(self.head(z)).reshape(B, 17, 2)
|
|
||||||
return self.gr(z, kp0).reshape(B, 34)
|
|
||||||
|
|
||||||
def add_lora(self, r=8, alpha=16):
|
|
||||||
"""Wrap the input projection + pose head with LoRA adapters (the ~11 KB calibration set)."""
|
|
||||||
self.proj = LoRA(self.proj, r, alpha)
|
|
||||||
self.head[0] = LoRA(self.head[0], r, alpha)
|
|
||||||
self.head[3] = LoRA(self.head[3], r, alpha)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def lora_state(self) -> dict:
|
|
||||||
"""Extract just the LoRA A/B tensors (the per-room adapter to save)."""
|
|
||||||
return {k: v.detach().cpu().numpy() for k, v in self.state_dict().items()
|
|
||||||
if k.endswith(".A") or k.endswith(".B")}
|
|
||||||
|
|
||||||
def load_lora(self, adapter: dict):
|
|
||||||
sd = self.state_dict()
|
|
||||||
for k, v in adapter.items():
|
|
||||||
sd[k] = torch.tensor(v)
|
|
||||||
self.load_state_dict(sd)
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
def standardize(x: torch.Tensor) -> torch.Tensor:
|
|
||||||
"""Per-sample standardization used in training/inference."""
|
|
||||||
return (x - x.mean((1, 2, 3), keepdim=True)) / (x.std((1, 2, 3), keepdim=True) + 1e-6)
|
|
||||||
@@ -1,103 +0,0 @@
|
|||||||
"""Self-contained regression test for the RuView calibration service.
|
|
||||||
|
|
||||||
Exercises the committed CLI end-to-end on synthetic data (CPU, no GPU, no real checkpoint):
|
|
||||||
build a base -> calibrate.py fits an adapter -> infer.py runs base+adapter -> assert the
|
|
||||||
adapter is small, inference is shape-correct and finite, and the adapter actually changes output.
|
|
||||||
|
|
||||||
Run: python test_calibration.py (or via pytest)
|
|
||||||
"""
|
|
||||||
import json
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
|
|
||||||
HERE = Path(__file__).parent
|
|
||||||
sys.path.insert(0, str(HERE))
|
|
||||||
from model import PoseNet, standardize # noqa: E402
|
|
||||||
|
|
||||||
|
|
||||||
def _make_base(path: Path):
|
|
||||||
torch.manual_seed(0)
|
|
||||||
net = PoseNet()
|
|
||||||
# Save without the deterministic gr.A buffer (mirrors the published checkpoint;
|
|
||||||
# calibrate.py/infer.py load with strict=False).
|
|
||||||
sd = {k: v for k, v in net.state_dict().items() if k != "gr.A"}
|
|
||||||
torch.save(sd, path)
|
|
||||||
|
|
||||||
|
|
||||||
def _make_data(path: Path, n: int, seed: int):
|
|
||||||
rng = np.random.default_rng(seed)
|
|
||||||
X = rng.standard_normal((n, 3, 114, 10)).astype(np.float32)
|
|
||||||
Y = rng.random((n, 17, 2)).astype(np.float32) # keypoints in [0,1]
|
|
||||||
np.savez(path, X=X, Y=Y)
|
|
||||||
|
|
||||||
|
|
||||||
def _run(*args):
|
|
||||||
r = subprocess.run(
|
|
||||||
[sys.executable, str(HERE / args[0]), *map(str, args[1:])],
|
|
||||||
capture_output=True, text=True,
|
|
||||||
)
|
|
||||||
assert r.returncode == 0, f"{args[0]} failed:\n{r.stdout}\n{r.stderr}"
|
|
||||||
return r.stdout
|
|
||||||
|
|
||||||
|
|
||||||
def test_calibration_end_to_end():
|
|
||||||
with tempfile.TemporaryDirectory() as d:
|
|
||||||
d = Path(d)
|
|
||||||
base = d / "base.pt"
|
|
||||||
calib = d / "calib.npz"
|
|
||||||
frames = d / "frames.npz"
|
|
||||||
adapter = d / "room.adapter.npz"
|
|
||||||
kp = d / "kp.npy"
|
|
||||||
|
|
||||||
_make_base(base)
|
|
||||||
_make_data(calib, n=40, seed=1) # ≥20 → no underfit warning
|
|
||||||
_make_data(frames, n=16, seed=2)
|
|
||||||
|
|
||||||
# 1) calibrate -> adapter
|
|
||||||
out = _run("calibrate.py", "--base", base, "--data", calib, "--out", adapter,
|
|
||||||
"--iters", "50", "--device", "cpu")
|
|
||||||
assert adapter.exists(), "adapter not written"
|
|
||||||
assert "saved" in out.lower()
|
|
||||||
sz = adapter.stat().st_size
|
|
||||||
assert sz < 200_000, f"adapter unexpectedly large ({sz} bytes)"
|
|
||||||
|
|
||||||
# adapter contains the expected LoRA tensors (materialize + close so the
|
|
||||||
# Windows tempdir can be cleaned up — np.load keeps a lazy file handle).
|
|
||||||
with np.load(adapter) as z:
|
|
||||||
keys = [k for k in z.files if k.endswith(".A") or k.endswith(".B")]
|
|
||||||
assert keys, f"adapter has no LoRA tensors: {z.files}"
|
|
||||||
lora = {k: z[k].astype(np.float32) for k in keys}
|
|
||||||
|
|
||||||
# 2) infer with adapter -> keypoints
|
|
||||||
_run("infer.py", "--base", base, "--adapter", adapter, "--data", frames,
|
|
||||||
"--out", kp, "--device", "cpu")
|
|
||||||
out_kp = np.load(kp)
|
|
||||||
assert out_kp.shape == (16, 17, 2), f"bad keypoint shape {out_kp.shape}"
|
|
||||||
assert np.isfinite(out_kp).all(), "non-finite keypoints"
|
|
||||||
assert (out_kp >= 0).all() and (out_kp <= 1).all(), "keypoints out of [0,1]"
|
|
||||||
|
|
||||||
# 3) adapter must actually change the output vs the zero-shot base
|
|
||||||
with np.load(frames) as fz:
|
|
||||||
frames_x = fz["X"][:]
|
|
||||||
net = PoseNet()
|
|
||||||
net.load_state_dict(torch.load(base, map_location="cpu"), strict=False)
|
|
||||||
net.eval()
|
|
||||||
x = standardize(torch.tensor(frames_x))
|
|
||||||
with torch.no_grad():
|
|
||||||
base_kp = net(x).reshape(16, 17, 2).numpy()
|
|
||||||
net.add_lora()
|
|
||||||
net.load_lora(lora)
|
|
||||||
net.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
cal_kp = net(x).reshape(16, 17, 2).numpy()
|
|
||||||
assert np.abs(base_kp - cal_kp).sum() > 1e-4, "adapter did not change output"
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
test_calibration_end_to_end()
|
|
||||||
print("PASS: calibration service end-to-end (calibrate -> adapter -> infer)")
|
|
||||||
@@ -1,75 +0,0 @@
|
|||||||
"""Regression test for the cog-pose adapter producer (cog_calibrate.py).
|
|
||||||
|
|
||||||
Uses the in-repo `pose_v1.safetensors` (skips if absent). Verifies the produced adapter:
|
|
||||||
- has the exact keys/shapes the Rust `cog-pose-estimation --adapter` loader expects,
|
|
||||||
- reduces calibration fit error,
|
|
||||||
- actually changes inference output,
|
|
||||||
- is tiny.
|
|
||||||
Run: python test_cog_calibration.py (or via pytest)
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
import torch.nn.functional as F
|
|
||||||
|
|
||||||
HERE = Path(__file__).parent
|
|
||||||
sys.path.insert(0, str(HERE))
|
|
||||||
import cog_calibrate as C # noqa: E402
|
|
||||||
|
|
||||||
BASE = HERE / "../../v2/crates/cog-pose-estimation/cog/artifacts/pose_v1.safetensors"
|
|
||||||
|
|
||||||
|
|
||||||
def test_cog_adapter_producer():
|
|
||||||
if not BASE.exists():
|
|
||||||
print(f"(skip — {BASE} not present)")
|
|
||||||
return
|
|
||||||
from safetensors.torch import load_file
|
|
||||||
|
|
||||||
rng = np.random.default_rng(0)
|
|
||||||
n = 120
|
|
||||||
X = rng.standard_normal((n, 56, 20)).astype("float32")
|
|
||||||
Y = (0.5 + 0.1 * X[:, :34, 0].reshape(n, 34)).clip(0, 1).astype("float32")
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as d:
|
|
||||||
calib = os.path.join(d, "calib.npz")
|
|
||||||
adapter = os.path.join(d, "room.safetensors")
|
|
||||||
np.savez(calib, X=X, Y=Y)
|
|
||||||
|
|
||||||
net0 = C.CogPose()
|
|
||||||
C.load_base(net0, str(BASE))
|
|
||||||
net0.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
base_err = F.smooth_l1_loss(net0(torch.tensor(X)), torch.tensor(Y)).item()
|
|
||||||
|
|
||||||
_, nparam, _ = C.fit(str(BASE), calib, adapter, rank=4, iters=400)
|
|
||||||
t = load_file(adapter)
|
|
||||||
|
|
||||||
# exact Rust loader contract: a:[in,r], b:[r,out]
|
|
||||||
assert tuple(t["fc1.a"].shape) == (128, 4)
|
|
||||||
assert tuple(t["fc1.b"].shape) == (4, 256)
|
|
||||||
assert tuple(t["fc2.a"].shape) == (256, 4)
|
|
||||||
assert tuple(t["fc2.b"].shape) == (4, 34)
|
|
||||||
|
|
||||||
net = C.CogPose()
|
|
||||||
C.load_base(net, str(BASE))
|
|
||||||
net.add_lora(4)
|
|
||||||
with torch.no_grad():
|
|
||||||
net.fc1_lora[0].copy_(t["fc1.a"]); net.fc1_lora[1].copy_(t["fc1.b"] / (16 / 4))
|
|
||||||
net.fc2_lora[0].copy_(t["fc2.a"]); net.fc2_lora[1].copy_(t["fc2.b"] / (16 / 4))
|
|
||||||
net.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
cal_err = F.smooth_l1_loss(net(torch.tensor(X)), torch.tensor(Y)).item()
|
|
||||||
changed = (net0(torch.tensor(X[:8])) - net(torch.tensor(X[:8]))).abs().sum().item()
|
|
||||||
|
|
||||||
assert cal_err < base_err, f"calibration did not reduce error ({base_err} -> {cal_err})"
|
|
||||||
assert changed > 1e-3, "adapter inert"
|
|
||||||
assert nparam < 5000, f"adapter unexpectedly large ({nparam} params)"
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
test_cog_adapter_producer()
|
|
||||||
print("PASS: cog adapter producer (Rust-loadable format, reduces error, active)")
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
9c35e541d51f00998691b98948887ebca09b907d8eb29a113f97e792340456ba
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"frames": [{"pred": [[0.4003, 0.2734], [0.5038, 0.4197], [0.2053, 0.4438], [0.4397, 0.685], [0.5796, 0.7645], [0.8001, 0.2195], [0.2789, 0.2833], [0.314, 0.5439], [0.511, 0.2259], [0.6008, 0.46], [0.4837, 0.3879], [0.3475, 0.5597], [0.6569, 0.3575], [0.437, 0.6539], [0.2341, 0.6038], [0.7331, 0.392], [0.5615, 0.4915]]}, {"pred": [[0.4669, 0.6066], [0.6012, 0.7873], [0.4124, 0.5997], [0.2832, 0.281], [0.2732, 0.3635], [0.2503, 0.4848], [0.6827, 0.715], [0.4336, 0.7165], [0.295, 0.3386], [0.5337, 0.3544], [0.4397, 0.5474], [0.5163, 0.5528], [0.7547, 0.6799], [0.4195, 0.4448], [0.2257, 0.2269], [0.384, 0.2176], [0.2419, 0.4332]]}, {"pred": [[0.5585, 0.283], [0.4325, 0.2934], [0.463, 0.4744], [0.4188, 0.3454], [0.215, 0.7565], [0.527, 0.2353], [0.7084, 0.6124], [0.3015, 0.6744], [0.4103, 0.3532], [0.7243, 0.6932], [0.3302, 0.4918], [0.2072, 0.3754], [0.7914, 0.4878], [0.7618, 0.4079], [0.323, 0.3386], [0.7104, 0.4997], [0.2673, 0.6077]]}, {"pred": [[0.6372, 0.4984], [0.4184, 0.6763], [0.4498, 0.7549], [0.2924, 0.303], [0.3069, 0.7022], [0.3954, 0.5098], [0.7836, 0.6071], [0.4733, 0.7114], [0.3407, 0.3793], [0.3408, 0.4678], [0.4156, 0.4911], [0.4525, 0.7519], [0.5117, 0.1985], [0.1893, 0.6784], [0.6281, 0.5346], [0.5175, 0.673], [0.36, 0.3665]]}, {"pred": [[0.5535, 0.6537], [0.568, 0.511], [0.4705, 0.5377], [0.6372, 0.7163], [0.5493, 0.7515], [0.2559, 0.4549], [0.2553, 0.6176], [0.2991, 0.6154], [0.7185, 0.7986], [0.4586, 0.5057], [0.2975, 0.4525], [0.3263, 0.3719], [0.5131, 0.4576], [0.557, 0.5268], [0.6572, 0.7736], [0.2146, 0.6526], [0.4662, 0.7371]]}, {"pred": [[0.2924, 0.7595], [0.2612, 0.2315], [0.2488, 0.7751], [0.2329, 0.7282], [0.4744, 0.4206], [0.3618, 0.267], [0.2477, 0.285], [0.3976, 0.3746], [0.494, 0.2874], [0.3596, 0.2112], [0.3311, 0.4692], [0.6912, 0.4727], [0.4434, 0.5233], [0.4139, 0.7048], [0.425, 0.3937], [0.2326, 0.631], [0.2655, 0.7116]]}, {"pred": [[0.3609, 0.3437], [0.285, 0.486], [0.7734, 0.5468], [0.3657, 0.4093], [0.4728, 0.5019], [0.1866, 0.3545], [0.2172, 0.2028], [0.5613, 0.5238], [0.6252, 0.7205], [0.7998, 0.2954], [0.242, 0.7063], [0.6259, 0.6883], [0.5148, 0.7141], [0.5577, 0.7434], [0.3233, 0.2131], [0.2652, 0.7066], [0.5753, 0.5885]]}, {"pred": [[0.6787, 0.6504], [0.6051, 0.2297], [0.2539, 0.3475], [0.6437, 0.7807], [0.4981, 0.6149], [0.5716, 0.2367], [0.6486, 0.3632], [0.2433, 0.369], [0.6061, 0.3731], [0.4955, 0.2591], [0.7676, 0.7602], [0.6899, 0.7716], [0.3143, 0.7707], [0.3031, 0.4997], [0.7076, 0.5133], [0.3382, 0.7196], [0.2002, 0.4871]]}]}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"frames": [{"gt": [[0.3943, 0.2905], [0.5215, 0.4194], [0.2225, 0.4602], [0.4547, 0.6961], [0.5765, 0.7686], [0.7858, 0.2279], [0.2866, 0.2707], [0.3084, 0.549], [0.5286, 0.2377], [0.6082, 0.4566], [0.4719, 0.3799], [0.3465, 0.5447], [0.6377, 0.3728], [0.4509, 0.6543], [0.2235, 0.6009], [0.7253, 0.3882], [0.5479, 0.4737]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.4845, 0.5985], [0.5883, 0.7959], [0.4315, 0.6012], [0.3008, 0.2703], [0.2776, 0.3486], [0.2483, 0.4695], [0.6916, 0.7184], [0.4153, 0.7305], [0.3057, 0.3392], [0.5535, 0.3576], [0.4216, 0.5398], [0.5093, 0.5706], [0.7397, 0.668], [0.4354, 0.4394], [0.2373, 0.2404], [0.404, 0.2315], [0.2609, 0.4182]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.5684, 0.2891], [0.4185, 0.2737], [0.4796, 0.4903], [0.4056, 0.3589], [0.2139, 0.7706], [0.5259, 0.2162], [0.718, 0.6177], [0.3002, 0.6632], [0.3978, 0.3338], [0.7116, 0.6836], [0.336, 0.5106], [0.2168, 0.3677], [0.7739, 0.4683], [0.773, 0.4188], [0.318, 0.3226], [0.7043, 0.4877], [0.2509, 0.5964]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.6501, 0.4868], [0.3995, 0.6805], [0.4408, 0.7681], [0.2762, 0.2907], [0.2877, 0.6959], [0.4102, 0.5292], [0.7825, 0.5898], [0.4603, 0.723], [0.3511, 0.3758], [0.3556, 0.4514], [0.4123, 0.4749], [0.4524, 0.7506], [0.5141, 0.2112], [0.2024, 0.6795], [0.6351, 0.5339], [0.5333, 0.6706], [0.3491, 0.3662]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.537, 0.656], [0.5675, 0.5033], [0.4714, 0.52], [0.6195, 0.7259], [0.5357, 0.766], [0.273, 0.4653], [0.2439, 0.6017], [0.2927, 0.6297], [0.7297, 0.7805], [0.439, 0.4924], [0.2969, 0.4589], [0.3174, 0.3911], [0.5324, 0.4643], [0.5744, 0.5074], [0.673, 0.783], [0.2238, 0.6674], [0.4534, 0.7468]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.2896, 0.7515], [0.2537, 0.2345], [0.2434, 0.763], [0.2502, 0.7137], [0.4723, 0.4035], [0.3607, 0.2775], [0.2657, 0.2969], [0.3872, 0.383], [0.5001, 0.3067], [0.3503, 0.2092], [0.3137, 0.4849], [0.6914, 0.4593], [0.4359, 0.504], [0.4056, 0.6994], [0.4428, 0.4085], [0.2424, 0.6445], [0.2507, 0.7048]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.3692, 0.3453], [0.2945, 0.4675], [0.7836, 0.5282], [0.3857, 0.414], [0.4848, 0.5017], [0.203, 0.3585], [0.225, 0.2135], [0.5513, 0.5175], [0.6296, 0.7275], [0.7908, 0.2897], [0.2263, 0.7012], [0.6403, 0.6873], [0.5026, 0.701], [0.5504, 0.7357], [0.338, 0.2187], [0.2629, 0.7015], [0.5757, 0.6084]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}, {"gt": [[0.6786, 0.649], [0.5956, 0.2396], [0.2447, 0.3593], [0.6439, 0.7854], [0.4874, 0.6102], [0.5857, 0.2465], [0.6459, 0.3827], [0.2364, 0.3613], [0.6054, 0.3745], [0.4798, 0.2711], [0.7869, 0.7618], [0.6919, 0.7809], [0.3259, 0.7674], [0.285, 0.5144], [0.6921, 0.5052], [0.3388, 0.7386], [0.2022, 0.495]], "vis": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "scale": 1.0}]}
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
{"benchmark": "AetherArena", "created": "2026-05-30", "kind": "genesis", "note": "Official Spatial-Intelligence Benchmark \u2014 append-only signed ledger. Entries are real harness scores only; no seeded numbers.", "prev_hash": "0000000000000000000000000000000000000000000000000000000000000000", "row_hash": "940bdc6f0f5dd00f4d89e13a8fa843bab3c9ddf1b8051f426a1701e730249231", "seq": 0, "spec": "ADR-149"}
|
|
||||||
{"abs_gain": "+9.38", "benchmark": "MM-Fi", "category": "pose", "caveat": "Protocol-matched MM-Fi random_split result; NOT solved real-world generalization. Random split has temporal/subject-adjacency effects common to this benchmark family. Leakage-free cross-subject is far lower (~11-27%) and is the real deployment frontier.", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20 (||right_shoulder-left_hip|| norm, 17 COCO kpts)", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer (4L/8H ~2M params, temporal-attention)", "prev_hash": "940bdc6f0f5dd00f4d89e13a8fa843bab3c9ddf1b8051f426a1701e730249231", "protocol": "random_split (ratio=0.8, seed=0)", "rel_gain": "+13.0%", "reproduce": "download MM-Fi -> parse_mmfi_zips.py -> train_tf_torso.py X.npy Y.npy split_random.npy (seed 0)", "row_hash": "76598d8e1320d5248f8cd854a8ffa22a99bd2a2f0e0e7f2d2b1df79af16001d5", "score_pct": 81.63, "scored_at": "2026-05-30", "seq": 1, "sota_ref": "MultiFormer 72.25 (CSI2Pose 68.41)", "submitter": "ruvnet", "tier": "Gold"}
|
|
||||||
{"abs_gain": "+11.34", "benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer + skeleton-graph head + 3-ensemble + TTA", "note": "Best in-domain. Stacks attention-pooling + transformer + skeleton-graph refine + warmup + TTA + 3-model ensemble. Supersedes the 81.63 single-model entry.", "prev_hash": "76598d8e1320d5248f8cd854a8ffa22a99bd2a2f0e0e7f2d2b1df79af16001d5", "protocol": "random_split (0.8, seed 0)", "row_hash": "5780a4bc3e98eb0e30c1ecfa9091e57b280444fa1f21cd5146797e408580e4ab", "score_pct": 83.59, "scored_at": "2026-05-30", "seq": 2, "sota_ref": "MultiFormer 72.25 (CSI2Pose 68.41)", "submitter": "ruvnet", "tier": "Gold"}
|
|
||||||
{"benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer", "note": "Leakage-free generalization to unseen people, shared rooms. Honest deployment-relevant number.", "prev_hash": "5780a4bc3e98eb0e30c1ecfa9091e57b280444fa1f21cd5146797e408580e4ab", "protocol": "cross_subject (official, val=S05,S10,..,S40)", "row_hash": "d989e4e1dbc0182610305fdfbde8b094413b87c913283a46bf41f4afba7a06fd", "score_pct": 64.04, "scored_at": "2026-05-30", "seq": 3, "sota_ref": "(no matched public ref)", "submitter": "ruvnet", "tier": "Silver"}
|
|
||||||
{"benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer + CORAL domain alignment", "note": "The real deployment frontier (new room). CORAL transductive DG (+30% rel over control). Data-bound: MM-Fi has only 3 source rooms.", "prev_hash": "d989e4e1dbc0182610305fdfbde8b094413b87c913283a46bf41f4afba7a06fd", "protocol": "cross_environment (train E01-03 -> test E04, new room)", "row_hash": "bf370487bde88e198c13877956dab3c83766a6a24afef0b78b6ac7aa130bb207", "score_pct": 17.51, "scored_at": "2026-05-30", "seq": 4, "sota_ref": "(hard frontier; control 13.52)", "submitter": "ruvnet", "tier": "Bronze"}
|
|
||||||
@@ -1,100 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""AetherArena append-only, tamper-evident results ledger (ADR-149 §2.3/§2.4).
|
|
||||||
|
|
||||||
Each row is hash-chained to the previous one: ``row_hash = sha256(canonical_row
|
|
||||||
+ prev_hash)``. Any silent edit to an earlier row breaks every subsequent
|
|
||||||
``prev_hash`` link, so the ledger is append-only and verifiable by anyone — no
|
|
||||||
trust in the maintainer required. (Ed25519 row signing is the next hardening;
|
|
||||||
the chain already makes tampering detectable.)
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python ledger_tools.py seed # (re)build ledger.jsonl with genesis + baseline
|
|
||||||
python ledger_tools.py verify # verify the whole chain -> exit 0 / 1
|
|
||||||
python ledger_tools.py append '<json-row>' # append one scored row
|
|
||||||
"""
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
LEDGER = Path(__file__).parent / "ledger.jsonl"
|
|
||||||
GENESIS_PREV = "0" * 64
|
|
||||||
|
|
||||||
|
|
||||||
def canonical(row: dict) -> bytes:
|
|
||||||
# Stable key order, no whitespace -> deterministic bytes for hashing.
|
|
||||||
body = {k: row[k] for k in sorted(row) if k != "row_hash"}
|
|
||||||
return json.dumps(body, separators=(",", ":"), sort_keys=True).encode()
|
|
||||||
|
|
||||||
|
|
||||||
def row_hash(row: dict) -> str:
|
|
||||||
return hashlib.sha256(canonical(row)).hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
def read_rows() -> list[dict]:
|
|
||||||
if not LEDGER.exists():
|
|
||||||
return []
|
|
||||||
return [json.loads(l) for l in LEDGER.read_text().splitlines() if l.strip()]
|
|
||||||
|
|
||||||
|
|
||||||
def append(entry: dict) -> dict:
|
|
||||||
rows = read_rows()
|
|
||||||
prev = rows[-1]["row_hash"] if rows else GENESIS_PREV
|
|
||||||
entry = dict(entry)
|
|
||||||
entry["seq"] = len(rows)
|
|
||||||
entry["prev_hash"] = prev
|
|
||||||
entry["row_hash"] = row_hash(entry)
|
|
||||||
with LEDGER.open("a") as f:
|
|
||||||
f.write(json.dumps(entry, sort_keys=True) + "\n")
|
|
||||||
return entry
|
|
||||||
|
|
||||||
|
|
||||||
def verify() -> bool:
|
|
||||||
rows = read_rows()
|
|
||||||
prev = GENESIS_PREV
|
|
||||||
for i, r in enumerate(rows):
|
|
||||||
if r.get("seq") != i:
|
|
||||||
print(f"FAIL: row {i} seq mismatch ({r.get('seq')})")
|
|
||||||
return False
|
|
||||||
if r.get("prev_hash") != prev:
|
|
||||||
print(f"FAIL: row {i} prev_hash broken — ledger was edited")
|
|
||||||
return False
|
|
||||||
if r.get("row_hash") != row_hash(r):
|
|
||||||
print(f"FAIL: row {i} row_hash mismatch — row was tampered")
|
|
||||||
return False
|
|
||||||
prev = r["row_hash"]
|
|
||||||
print(f"OK: {len(rows)} rows, chain intact")
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def seed():
|
|
||||||
"""Rebuild with the genesis row only — an EMPTY board.
|
|
||||||
|
|
||||||
Benchmark-first: no placeholder/hand-entered numbers ever sit on the
|
|
||||||
leaderboard. Every result row is produced by the real scoring pipeline
|
|
||||||
(load model -> run inference -> score against the private eval split ->
|
|
||||||
proof hash). The board starts empty and awaits the first real harness score,
|
|
||||||
including RuView's own — which gets no special seeding.
|
|
||||||
"""
|
|
||||||
if LEDGER.exists():
|
|
||||||
LEDGER.unlink()
|
|
||||||
append({
|
|
||||||
"kind": "genesis",
|
|
||||||
"benchmark": "AetherArena",
|
|
||||||
"spec": "ADR-149",
|
|
||||||
"note": "Official Spatial-Intelligence Benchmark — append-only signed ledger. "
|
|
||||||
"Entries are real harness scores only; no seeded numbers.",
|
|
||||||
"created": "2026-05-30",
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
cmd = sys.argv[1] if len(sys.argv) > 1 else "verify"
|
|
||||||
if cmd == "seed":
|
|
||||||
seed(); verify()
|
|
||||||
elif cmd == "verify":
|
|
||||||
sys.exit(0 if verify() else 1)
|
|
||||||
elif cmd == "append":
|
|
||||||
print(json.dumps(append(json.loads(sys.argv[2])), indent=2))
|
|
||||||
else:
|
|
||||||
print(__doc__); sys.exit(2)
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
# AetherArena submission manifest (ADR-149 §2.2).
|
|
||||||
# Accompanies a model artifact pushed to the AA Hugging Face Space.
|
|
||||||
# This file is the contract the Space validates before quarantine + scoring.
|
|
||||||
|
|
||||||
[submission]
|
|
||||||
# Free-form display name shown on the leaderboard.
|
|
||||||
name = "my-spatial-model"
|
|
||||||
# Hugging Face repo or URL of the model artifact (.safetensors / .rvf / LoRA adapter).
|
|
||||||
model_ref = "hf://your-org/your-model"
|
|
||||||
# Submitter handle (HF username / org). Used to sign the ledger row.
|
|
||||||
submitter = "your-hf-username"
|
|
||||||
# SPDX license of the submitted model.
|
|
||||||
license = "Apache-2.0"
|
|
||||||
|
|
||||||
[category]
|
|
||||||
# One of: pose | presence | tracking | vitals | multi-task
|
|
||||||
# v0 ranks: pose, presence (tracking/vitals activate when ground truth lands).
|
|
||||||
primary = "pose"
|
|
||||||
|
|
||||||
[input]
|
|
||||||
# Which ADR-145 FeatureSet the model consumes. v0 input is RF/WiFi CSI.
|
|
||||||
# F0 = CSI amplitude/phase F1 = +CIR F2 = +Doppler F3 = +BFLD
|
|
||||||
feature_set = "F0"
|
|
||||||
# Tensor I/O contract so the scorer can feed the model correctly.
|
|
||||||
input_shape = [114, 2] # subcarriers × {amp, phase} (example)
|
|
||||||
output_shape = [17, 2] # 17 keypoints × {x, y} normalised [0,1]
|
|
||||||
# Normalisation expected on the input ("none" | "zscore" | "minmax").
|
|
||||||
normalization = "zscore"
|
|
||||||
|
|
||||||
[runtime]
|
|
||||||
# Inference entrypoint inside the artifact (framework-specific).
|
|
||||||
framework = "candle" # candle | onnx | torch
|
|
||||||
# Optional: target the edge-latency category with a declared device class.
|
|
||||||
device_class = "cpu" # cpu | pi5 | gpu
|
|
||||||
|
|
||||||
# Notes:
|
|
||||||
# - You submit a MODEL, never predictions on data you hold.
|
|
||||||
# - Scoring runs against a PRIVATE MM-Fi held-out split in a no-network,
|
|
||||||
# read-only sandbox. You cannot see the eval data.
|
|
||||||
# - The resulting score is a signed, append-only ledger row carrying a
|
|
||||||
# determinism proof hash and the pinned harness_version.
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
---
|
|
||||||
title: AetherArena — Spatial-Intelligence Benchmark
|
|
||||||
emoji: 📡
|
|
||||||
colorFrom: indigo
|
|
||||||
colorTo: purple
|
|
||||||
sdk: gradio
|
|
||||||
sdk_version: 5.9.1
|
|
||||||
python_version: "3.12"
|
|
||||||
app_file: app.py
|
|
||||||
pinned: true
|
|
||||||
license: cc-by-nc-4.0
|
|
||||||
tags:
|
|
||||||
- benchmark
|
|
||||||
- leaderboard
|
|
||||||
- wifi-sensing
|
|
||||||
- spatial-intelligence
|
|
||||||
- pose-estimation
|
|
||||||
---
|
|
||||||
|
|
||||||
# AetherArena ("AA") — The Official Spatial-Intelligence Benchmark
|
|
||||||
|
|
||||||
> Public leaderboard. Private evaluation split. Open scorer. Signed results.
|
|
||||||
|
|
||||||
The field's standard yardstick for camera-free **spatial intelligence** (pose, presence,
|
|
||||||
occupancy, tracking, vitals) from RF/WiFi and, over time, mmWave / UWB / multimodal.
|
|
||||||
|
|
||||||
- **Project-agnostic** — any team, framework, or modality enters; RuView donated the seed
|
|
||||||
scorer and is scored like everyone else.
|
|
||||||
- **Benchmark-first** — the board starts empty; every row is a real scoring-pipeline
|
|
||||||
**witness** (`inputs_sha256` + `proof_sha256` + `harness_version`) in an append-only,
|
|
||||||
hash-chained, tamper-evident ledger.
|
|
||||||
- **Reproducible** — the scorer is open; reproduce any proof hash + repeatability locally.
|
|
||||||
|
|
||||||
Spec: [ADR-149](https://github.com/ruvnet/RuView/blob/main/docs/adr/ADR-149-public-community-leaderboard-huggingface.md).
|
|
||||||
Source + open scorer: https://github.com/ruvnet/RuView/tree/main/aether-arena
|
|
||||||
|
|
||||||
Non-commercial (CC BY-NC 4.0): the v0 eval split derives from MM-Fi (CC BY-NC); AA is operated non-commercially.
|
|
||||||
@@ -1,161 +0,0 @@
|
|||||||
"""AetherArena ("AA") — The Official Spatial-Intelligence Benchmark.
|
|
||||||
|
|
||||||
Hugging Face Space (Gradio) — the public face of the benchmark (ADR-149).
|
|
||||||
This Space is the presentation + submission layer; the heavy scoring runs in the
|
|
||||||
pinned RuView harness (CI / scorer container), and results land in the append-only,
|
|
||||||
hash-chained **witness ledger** shown here.
|
|
||||||
|
|
||||||
Benchmark-first: the board starts EMPTY. No seeded or hand-entered numbers — every
|
|
||||||
row is a real scoring-pipeline witness (inputs_sha256 + proof_sha256 + harness_version).
|
|
||||||
"""
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import gradio as gr
|
|
||||||
|
|
||||||
LEDGER = Path(__file__).parent / "ledger.jsonl"
|
|
||||||
GENESIS_PREV = "0" * 64
|
|
||||||
|
|
||||||
|
|
||||||
def _rows():
|
|
||||||
if not LEDGER.exists():
|
|
||||||
return []
|
|
||||||
return [json.loads(l) for l in LEDGER.read_text().splitlines() if l.strip()]
|
|
||||||
|
|
||||||
|
|
||||||
def _canon(row: dict) -> bytes:
|
|
||||||
body = {k: row[k] for k in sorted(row) if k != "row_hash"}
|
|
||||||
return json.dumps(body, separators=(",", ":"), sort_keys=True).encode()
|
|
||||||
|
|
||||||
|
|
||||||
def verify_chain():
|
|
||||||
rows, prev = _rows(), GENESIS_PREV
|
|
||||||
for i, r in enumerate(rows):
|
|
||||||
if r.get("prev_hash") != prev or r.get("row_hash") != hashlib.sha256(_canon(r)).hexdigest():
|
|
||||||
return f"❌ Ledger chain BROKEN at row {i} — tampering detected."
|
|
||||||
prev = r["row_hash"]
|
|
||||||
return f"✅ Witness ledger chain intact — {len(rows)} row(s), append-only."
|
|
||||||
|
|
||||||
|
|
||||||
def leaderboard(category: str):
|
|
||||||
results = [r for r in _rows() if r.get("kind") == "result" and (category == "all" or r.get("category") == category)]
|
|
||||||
if not results:
|
|
||||||
return [["— no entries yet —", "", "", "", "", ""]]
|
|
||||||
results.sort(key=lambda r: r.get("score_pct") or 0, reverse=True)
|
|
||||||
return [[
|
|
||||||
r.get("submitter", "?"),
|
|
||||||
r.get("model_ref", "?"),
|
|
||||||
f"{r.get('benchmark','?')} / {r.get('protocol','?')}",
|
|
||||||
r.get("metric", "?"),
|
|
||||||
f"{r.get('score_pct', 0):.2f}%",
|
|
||||||
f"{r.get('tier','?')} (vs {r.get('sota_ref','?')})",
|
|
||||||
] for r in results]
|
|
||||||
|
|
||||||
|
|
||||||
FOUR_PART = "### Public leaderboard. Private evaluation split. Open scorer. Signed results."
|
|
||||||
|
|
||||||
ABOUT = """
|
|
||||||
**AetherArena** is the official, project-agnostic **Spatial-Intelligence Benchmark** —
|
|
||||||
camera-free pose, presence, occupancy, tracking, and vitals from RF/WiFi (and, over
|
|
||||||
time, mmWave / UWB / radar / multimodal). It is **not** a single-vendor board: any
|
|
||||||
team, framework, or modality enters, and every entrant — including the RuView baseline
|
|
||||||
that donated the seed scorer — is scored by the identical, open, pinned harness.
|
|
||||||
|
|
||||||
The scorer reuses RuView's released `wifi-densepose-train` acceptance harness
|
|
||||||
(`ruview_metrics` + ablation). You submit a **model, not predictions**; it is scored
|
|
||||||
against a **private** MM-Fi held-out split; one **witness** row (inputs hash + proof
|
|
||||||
hash + harness version) is appended to a **hash-chained, tamper-evident ledger**.
|
|
||||||
|
|
||||||
**For industry:** a vendor-neutral, auditable way to compare RF-sensing models on equal
|
|
||||||
footing — the same standardized splits, the same metric definition, the same signed,
|
|
||||||
reproducible ledger. No more "trust our number on our split." Vendors, labs, and startups
|
|
||||||
all submit through one pipeline and are scored identically.
|
|
||||||
|
|
||||||
**Generalization Track (roadmap):** the headline isn't a single in-domain number — it's a
|
|
||||||
battery of honest tracks: MM-Fi `random_split` (in-domain), `cross_subject` (unseen people),
|
|
||||||
cross-room, cross-device, and confidence-calibration (ECE). Cross-subject is the real
|
|
||||||
deployment frontier and is treated as the flagship hard benchmark.
|
|
||||||
|
|
||||||
Spec: ADR-149. v0 ranks **pose, presence, edge-latency, determinism**. Tracking &
|
|
||||||
vitals activate when their ground truth lands; **privacy-leakage** is gated until the
|
|
||||||
membership-inference attacker ships. Source + the open scorer:
|
|
||||||
https://github.com/ruvnet/RuView/tree/main/aether-arena
|
|
||||||
"""
|
|
||||||
|
|
||||||
SUBMIT = """
|
|
||||||
### Submit a model
|
|
||||||
|
|
||||||
1. Write a manifest — [`schema/aa-submission.toml`](https://github.com/ruvnet/RuView/blob/main/aether-arena/schema/aa-submission.toml):
|
|
||||||
declare your model ref, category, the ADR-145 feature set (F0 CSI … F3 BFLD), and the tensor I/O contract.
|
|
||||||
2. Provide your model artifact (`.safetensors` / `.rvf` / LoRA adapter).
|
|
||||||
3. It moves through `submitted → validated → quarantined → smoke_scored → full_scored → published`,
|
|
||||||
scored in a no-network, read-only sandbox against the private split.
|
|
||||||
4. Your signed witness row appears on the leaderboard.
|
|
||||||
|
|
||||||
**You submit a model, never predictions** — predictions on data you hold prove nothing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
VERIFY = """
|
|
||||||
### Verify it's fair (you don't have to trust us)
|
|
||||||
|
|
||||||
The scorer is open and reproducible. Reproduce the determinism proof + repeatability locally:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/ruvnet/RuView && cd RuView/v2
|
|
||||||
# determinism gate (same as CI):
|
|
||||||
cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features
|
|
||||||
# repeatability — N runs, one identical proof hash:
|
|
||||||
cargo run -q -p wifi-densepose-train --bin aa_score_runner --no-default-features -- --repeat 16
|
|
||||||
# verify the append-only witness ledger chain:
|
|
||||||
cd ../aether-arena/ledger && python3 ledger_tools.py verify
|
|
||||||
```
|
|
||||||
|
|
||||||
A stranger must be able to: submit → get a deterministic score → see the signed row →
|
|
||||||
rerun the scorer locally → understand why the rank is fair. That is the launch gate (ADR-149 §7).
|
|
||||||
"""
|
|
||||||
|
|
||||||
with gr.Blocks(title="AetherArena — Spatial-Intelligence Benchmark") as demo:
|
|
||||||
gr.Markdown("# 📡 AetherArena (AA)\n## The Official, Vendor-Neutral Benchmark for WiFi / RF Spatial Sensing")
|
|
||||||
gr.Markdown(FOUR_PART)
|
|
||||||
gr.Markdown(
|
|
||||||
"**An open industry benchmark — for everyone, not any one vendor.** Submit any model, any framework, "
|
|
||||||
"any modality. Every entrant — academic, startup, or incumbent — is scored *identically*: standardized "
|
|
||||||
"protocols (MM-Fi `random_split` / `cross_subject`), matched metrics (torso-PCK@20, the published "
|
|
||||||
"definition), and an auditable, hash-chained **witness ledger** anyone can verify and reproduce.\n\n"
|
|
||||||
"**Why it exists:** WiFi/RF-sensing results are reported with inconsistent splits, metrics, and no "
|
|
||||||
"auditability — so numbers aren't comparable. AetherArena fixes the *measurement*: one protocol, one "
|
|
||||||
"metric, one signed ledger, one-command reproduction. The benchmark is the product; the leaderboard is "
|
|
||||||
"just the scoreboard. (Reference implementation seeded by RuView, ADR-149.)"
|
|
||||||
)
|
|
||||||
chain = gr.Markdown(verify_chain())
|
|
||||||
|
|
||||||
with gr.Tab("🏆 Leaderboard"):
|
|
||||||
gr.Markdown(
|
|
||||||
"### Current standings — MM-Fi WiFi-CSI 2D pose, torso-PCK@20\n"
|
|
||||||
"Ranked, protocol- & metric-matched results. Each row carries its own caveats in the ledger "
|
|
||||||
"(e.g. `random_split` has temporal-adjacency leakage that inflates *all* methods equally — the "
|
|
||||||
"leakage-free `cross_subject` track is the real deployment frontier). **Submit yours — top the board.**"
|
|
||||||
)
|
|
||||||
cat = gr.Dropdown(["all", "pose", "presence"], value="all", label="Category")
|
|
||||||
tbl = gr.Dataframe(
|
|
||||||
headers=["Submitter", "Model", "Benchmark / Protocol", "Metric", "Score", "Tier (vs prior SOTA)"],
|
|
||||||
value=leaderboard("all"), interactive=False, wrap=True,
|
|
||||||
)
|
|
||||||
cat.change(leaderboard, cat, tbl)
|
|
||||||
gr.Markdown(
|
|
||||||
"*Vendor-neutral & benchmark-first: every row is a real, metric- and protocol-matched result — "
|
|
||||||
"no seeded or vendor-favored numbers. Integrity is enforced, not promised: the current top entry's "
|
|
||||||
"score was self-corrected down from an inflated metric (91.86% bbox → 81.63% torso) before it could "
|
|
||||||
"be published. The same scorer and ledger apply to every submitter.*"
|
|
||||||
)
|
|
||||||
|
|
||||||
with gr.Tab("📤 Submit"):
|
|
||||||
gr.Markdown(SUBMIT)
|
|
||||||
with gr.Tab("🔬 Verify"):
|
|
||||||
gr.Markdown(VERIFY)
|
|
||||||
with gr.Tab("ℹ️ About"):
|
|
||||||
gr.Markdown(ABOUT)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
{"benchmark": "AetherArena", "created": "2026-05-30", "kind": "genesis", "note": "Official Spatial-Intelligence Benchmark \u2014 append-only signed ledger. Entries are real harness scores only; no seeded numbers.", "prev_hash": "0000000000000000000000000000000000000000000000000000000000000000", "row_hash": "940bdc6f0f5dd00f4d89e13a8fa843bab3c9ddf1b8051f426a1701e730249231", "seq": 0, "spec": "ADR-149"}
|
|
||||||
{"abs_gain": "+9.38", "benchmark": "MM-Fi", "category": "pose", "caveat": "Protocol-matched MM-Fi random_split result; NOT solved real-world generalization. Random split has temporal/subject-adjacency effects common to this benchmark family. Leakage-free cross-subject is far lower (~11-27%) and is the real deployment frontier.", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20 (||right_shoulder-left_hip|| norm, 17 COCO kpts)", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer (4L/8H ~2M params, temporal-attention)", "prev_hash": "940bdc6f0f5dd00f4d89e13a8fa843bab3c9ddf1b8051f426a1701e730249231", "protocol": "random_split (ratio=0.8, seed=0)", "rel_gain": "+13.0%", "reproduce": "download MM-Fi -> parse_mmfi_zips.py -> train_tf_torso.py X.npy Y.npy split_random.npy (seed 0)", "row_hash": "76598d8e1320d5248f8cd854a8ffa22a99bd2a2f0e0e7f2d2b1df79af16001d5", "score_pct": 81.63, "scored_at": "2026-05-30", "seq": 1, "sota_ref": "MultiFormer 72.25 (CSI2Pose 68.41)", "submitter": "ruvnet", "tier": "Gold"}
|
|
||||||
{"abs_gain": "+11.34", "benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer + skeleton-graph head + 3-ensemble + TTA", "note": "Best in-domain. Stacks attention-pooling + transformer + skeleton-graph refine + warmup + TTA + 3-model ensemble. Supersedes the 81.63 single-model entry.", "prev_hash": "76598d8e1320d5248f8cd854a8ffa22a99bd2a2f0e0e7f2d2b1df79af16001d5", "protocol": "random_split (0.8, seed 0)", "row_hash": "5780a4bc3e98eb0e30c1ecfa9091e57b280444fa1f21cd5146797e408580e4ab", "score_pct": 83.59, "scored_at": "2026-05-30", "seq": 2, "sota_ref": "MultiFormer 72.25 (CSI2Pose 68.41)", "submitter": "ruvnet", "tier": "Gold"}
|
|
||||||
{"benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer", "note": "Leakage-free generalization to unseen people, shared rooms. Honest deployment-relevant number.", "prev_hash": "5780a4bc3e98eb0e30c1ecfa9091e57b280444fa1f21cd5146797e408580e4ab", "protocol": "cross_subject (official, val=S05,S10,..,S40)", "row_hash": "d989e4e1dbc0182610305fdfbde8b094413b87c913283a46bf41f4afba7a06fd", "score_pct": 64.04, "scored_at": "2026-05-30", "seq": 3, "sota_ref": "(no matched public ref)", "submitter": "ruvnet", "tier": "Silver"}
|
|
||||||
{"benchmark": "MM-Fi", "category": "pose", "harness_version": 1, "kind": "result", "metric": "torso-PCK@20", "modality": "wifi-csi", "model_ref": "RuView CSI-Transformer + CORAL domain alignment", "note": "The real deployment frontier (new room). CORAL transductive DG (+30% rel over control). Data-bound: MM-Fi has only 3 source rooms.", "prev_hash": "d989e4e1dbc0182610305fdfbde8b094413b87c913283a46bf41f4afba7a06fd", "protocol": "cross_environment (train E01-03 -> test E04, new room)", "row_hash": "bf370487bde88e198c13877956dab3c83766a6a24afef0b78b6ac7aa130bb207", "score_pct": 17.51, "scored_at": "2026-05-30", "seq": 4, "sota_ref": "(hard frontier; control 13.52)", "submitter": "ruvnet", "tier": "Bronze"}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
gradio==5.9.1
|
|
||||||
@@ -1,130 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
CIR Verification Helper (ADR-134)
|
|
||||||
|
|
||||||
Optional Python comparator — invokes the Rust cir_proof_runner binary and
|
|
||||||
checks its output against expected_cir_features.sha256.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python cir_verify_helper.py # verify against stored hash
|
|
||||||
python cir_verify_helper.py --generate # regenerate hash via Rust binary
|
|
||||||
|
|
||||||
This script is a thin wrapper; all cryptographic work is done in the Rust
|
|
||||||
binary. It exists to integrate the CIR proof step into the Python verify.py
|
|
||||||
flow if needed.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
|
|
||||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
REPO_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, "..", "..", "..", ".."))
|
|
||||||
|
|
||||||
|
|
||||||
def find_binary() -> str:
|
|
||||||
"""Locate the cir_proof_runner binary."""
|
|
||||||
candidates = [
|
|
||||||
os.path.join(REPO_ROOT, "v2", "target", "release", "cir_proof_runner"),
|
|
||||||
os.path.join(REPO_ROOT, "v2", "target", "release", "cir_proof_runner.exe"),
|
|
||||||
os.path.join(REPO_ROOT, "v2", "target", "debug", "cir_proof_runner"),
|
|
||||||
os.path.join(REPO_ROOT, "v2", "target", "debug", "cir_proof_runner.exe"),
|
|
||||||
]
|
|
||||||
for path in candidates:
|
|
||||||
if os.path.isfile(path):
|
|
||||||
return path
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def build_binary() -> bool:
|
|
||||||
"""Build the release binary via cargo."""
|
|
||||||
print("Building cir_proof_runner (release)...")
|
|
||||||
result = subprocess.run(
|
|
||||||
[
|
|
||||||
"cargo", "build",
|
|
||||||
"-p", "wifi-densepose-signal",
|
|
||||||
"--bin", "cir_proof_runner",
|
|
||||||
"--release",
|
|
||||||
"--no-default-features",
|
|
||||||
],
|
|
||||||
cwd=os.path.join(REPO_ROOT, "v2"),
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
)
|
|
||||||
if result.returncode != 0:
|
|
||||||
print("Build failed:", result.stderr[-2000:])
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def run_generate(binary: str) -> str:
|
|
||||||
"""Run the binary with --generate-hash; return the hex hash."""
|
|
||||||
result = subprocess.run(
|
|
||||||
[binary, "--generate-hash"],
|
|
||||||
cwd=REPO_ROOT,
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
)
|
|
||||||
if result.returncode != 0:
|
|
||||||
print("Error running binary:", result.stderr)
|
|
||||||
return ""
|
|
||||||
return result.stdout.strip()
|
|
||||||
|
|
||||||
|
|
||||||
def run_verify(binary: str) -> bool:
|
|
||||||
"""Run the binary in verify mode; return True on PASS."""
|
|
||||||
result = subprocess.run(
|
|
||||||
[binary],
|
|
||||||
cwd=REPO_ROOT,
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
)
|
|
||||||
print(result.stdout.strip())
|
|
||||||
if result.stderr.strip():
|
|
||||||
print(result.stderr.strip(), file=sys.stderr)
|
|
||||||
return result.returncode == 0
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
parser = argparse.ArgumentParser(description="CIR verification helper (ADR-134)")
|
|
||||||
parser.add_argument(
|
|
||||||
"--generate",
|
|
||||||
action="store_true",
|
|
||||||
help="Regenerate expected_cir_features.sha256 via Rust binary",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--build",
|
|
||||||
action="store_true",
|
|
||||||
default=False,
|
|
||||||
help="Build the binary before running (default: use cached binary)",
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
binary = find_binary()
|
|
||||||
|
|
||||||
if args.build or not binary:
|
|
||||||
if not build_binary():
|
|
||||||
sys.exit(1)
|
|
||||||
binary = find_binary()
|
|
||||||
|
|
||||||
if not binary:
|
|
||||||
print("ERROR: cir_proof_runner binary not found. Run with --build.")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if args.generate:
|
|
||||||
hash_val = run_generate(binary)
|
|
||||||
if not hash_val:
|
|
||||||
sys.exit(1)
|
|
||||||
hash_file = os.path.join(SCRIPT_DIR, "expected_cir_features.sha256")
|
|
||||||
with open(hash_file, "w") as f:
|
|
||||||
f.write(hash_val + "\n")
|
|
||||||
print(f"Wrote CIR hash to {hash_file}")
|
|
||||||
print(f"Hash: {hash_val}")
|
|
||||||
else:
|
|
||||||
ok = run_verify(binary)
|
|
||||||
sys.exit(0 if ok else 1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
d6bce07ecb1648e6936561df44bf4a3bfc17bb0ba5f692646b2301d105b52f67
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
304d54690af468dc6cbf0f2a1332f109cf187d5e2eab454efd8554cebc45bdeb
|
|
||||||
@@ -1 +1 @@
|
|||||||
f8e76f21a0f9852b70b6d9dd5318239f6b20cbcb4cdd995863263cecdc446f7a
|
8c0680d7d285739ea9597715e84959d9c356c87ee3ad35b5f1e69a4ca41151c6
|
||||||
|
|||||||
Binary file not shown.
+19
-181
@@ -164,120 +164,37 @@ def frame_to_csi_data(frame, signal_meta):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Quantization precision for cross-platform hash stability (issue #560).
|
|
||||||
#
|
|
||||||
# The bytes packed below feed SHA-256. Without quantization, the hash diverges
|
|
||||||
# across SIMD backends (Intel AVX2/AVX-512 vs ARM NEON vs different x86 micro-
|
|
||||||
# architectures in the same CI pool) because scipy.fft's pocketfft kernels
|
|
||||||
# reorder vectorized FP operations differently per build. IEEE 754 guarantees
|
|
||||||
# per-operation determinism, not associativity under reordering.
|
|
||||||
#
|
|
||||||
# Empirically: 9 decimals was NOT enough to collapse the divergence — two
|
|
||||||
# back-to-back Ubuntu 24.04 / Python 3.11 / scipy 1.17 CI runs landed on
|
|
||||||
# different Azure VM microarchitectures (likely Skylake vs Cascade Lake)
|
|
||||||
# and produced two different SHA-256s even after np.round(.., 9). The DSP
|
|
||||||
# pipeline (preprocess → biquad bandpass → FFT → PSD → variance accumulation)
|
|
||||||
# amplifies the ~1e-14 raw FFT divergence by several orders of magnitude
|
|
||||||
# downstream — the actual drift at features_to_bytes() input can reach 1e-7
|
|
||||||
# or worse.
|
|
||||||
#
|
|
||||||
# 6 decimals (parts per million) gives ~6 orders of magnitude headroom over
|
|
||||||
# observed pipeline-amplified ULP drift and is still far below any meaningful
|
|
||||||
# signal change (CSI phase precision is ~1e-3 rad; PSD bins differ by orders
|
|
||||||
# of magnitude). Round to this precision, then hash.
|
|
||||||
#
|
|
||||||
# NOTE: 6 decimals collapses the divergence *across Linux microarchitectures*
|
|
||||||
# but NOT Windows-vs-Linux, where the pocketfft/BLAS difference exceeds 1e-6 on
|
|
||||||
# a few elements that then straddle the 6th-decimal rounding boundary. The
|
|
||||||
# precision is overridable via PROOF_HASH_DECIMALS so it can be coarsened to a
|
|
||||||
# value that is boundary-stable across *all* platforms (Windows + Linux + macOS)
|
|
||||||
# while staying far below any signal-meaningful change.
|
|
||||||
HASH_QUANTIZATION_DECIMALS = int(os.environ.get("PROOF_HASH_DECIMALS", "6"))
|
|
||||||
|
|
||||||
|
|
||||||
def features_to_bytes(features):
|
def features_to_bytes(features):
|
||||||
"""Convert CSIFeatures to a deterministic byte representation.
|
"""Convert CSIFeatures to a deterministic byte representation.
|
||||||
|
|
||||||
Each feature array is quantized to ``HASH_QUANTIZATION_DECIMALS`` decimal
|
We serialize each numpy array to bytes in a canonical order
|
||||||
places before being packed as little-endian float64. The quantization is
|
using little-endian float64 representation. This ensures the
|
||||||
what makes the resulting SHA-256 hash actually platform-independent — the
|
hash is platform-independent for IEEE 754 compliant systems.
|
||||||
raw float values diverge at ULP precision across scipy.fft SIMD backends
|
|
||||||
(issue #560), even though all platforms compute the "correct" answer.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
features: CSIFeatures instance.
|
features: CSIFeatures instance.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bytes: Canonical, quantized byte representation.
|
bytes: Canonical byte representation.
|
||||||
"""
|
"""
|
||||||
parts = []
|
parts = []
|
||||||
|
|
||||||
# Serialize each feature array in declaration order.
|
# Serialize each feature array in declaration order
|
||||||
# doppler_shift is INTENTIONALLY excluded: it is peak-normalized
|
|
||||||
# (`spectrum / max(spectrum)` in csi_processor._extract_doppler_features),
|
|
||||||
# and when the raw spectrum has near-tied peaks the argmax flips under
|
|
||||||
# cross-microarchitecture FP reordering, renormalizing the whole array
|
|
||||||
# (O(1) divergence — not absorbable by any tolerance). The remaining five
|
|
||||||
# features, including the FFT-based PSD, reproduce deterministically and
|
|
||||||
# provide the proof. (The underlying doppler instability is a production
|
|
||||||
# reproducibility bug tracked separately.)
|
|
||||||
for array in [
|
for array in [
|
||||||
features.amplitude_mean,
|
features.amplitude_mean,
|
||||||
features.amplitude_variance,
|
features.amplitude_variance,
|
||||||
features.phase_difference,
|
features.phase_difference,
|
||||||
features.correlation_matrix,
|
features.correlation_matrix,
|
||||||
|
features.doppler_shift,
|
||||||
features.power_spectral_density,
|
features.power_spectral_density,
|
||||||
]:
|
]:
|
||||||
flat = np.asarray(array, dtype=np.float64).ravel()
|
flat = np.asarray(array, dtype=np.float64).ravel()
|
||||||
# Quantize before packing so SIMD-level FP reordering across
|
|
||||||
# Intel AVX vs Apple Silicon NEON pocketfft kernels does not
|
|
||||||
# leak into the SHA-256 input.
|
|
||||||
flat = np.round(flat, HASH_QUANTIZATION_DECIMALS)
|
|
||||||
# Pack as little-endian double (8 bytes each)
|
# Pack as little-endian double (8 bytes each)
|
||||||
parts.append(struct.pack(f"<{len(flat)}d", *flat))
|
parts.append(struct.pack(f"<{len(flat)}d", *flat))
|
||||||
|
|
||||||
return b"".join(parts)
|
return b"".join(parts)
|
||||||
|
|
||||||
|
|
||||||
# ── Cross-platform tolerance gate (issue #560 follow-up) ─────────────────────
|
|
||||||
# The SHA-256 of fixed-decimal-rounded features is bit-exact only WITHIN one
|
|
||||||
# CPU microarchitecture. The pocketfft / BLAS kernels in the manylinux
|
|
||||||
# numpy/scipy wheels reorder floating-point reductions differently across
|
|
||||||
# microarchs (e.g. a GitHub Azure runner vs a developer box vs another Linux
|
|
||||||
# host), and the resulting ~1e-6 *relative* drift lands on large-magnitude PSD
|
|
||||||
# bins as an absolute difference too large for ANY fixed-decimal grid to absorb
|
|
||||||
# (empirically the hash diverges across microarchs even at 2 decimals). So:
|
|
||||||
# • the hash is the strong, bit-exact, SAME-platform proof, and
|
|
||||||
# • a relative tolerance against a committed reference vector is the
|
|
||||||
# platform-INDEPENDENT proof.
|
|
||||||
# A run PASSES if either matches. Tolerances sit ~100x over the observed
|
|
||||||
# microarch drift and ~10x under any signal-meaningful change (CSI phase
|
|
||||||
# precision ~1e-3 rad), so real pipeline regressions still fail.
|
|
||||||
TOLERANCE_RTOL = 1e-4
|
|
||||||
TOLERANCE_ATOL = 1e-6
|
|
||||||
REFERENCE_VECTOR_FILENAME = "expected_features_reference.npz"
|
|
||||||
|
|
||||||
|
|
||||||
def features_to_vector(features):
|
|
||||||
"""Concatenate a frame's feature arrays as raw float64 (no rounding).
|
|
||||||
|
|
||||||
Mirrors ``features_to_bytes`` ordering but keeps full precision, for the
|
|
||||||
tolerance-based cross-platform comparison.
|
|
||||||
"""
|
|
||||||
# doppler_shift excluded — see features_to_bytes for the rationale
|
|
||||||
# (peak-normalization argmax instability across CPU microarchitectures).
|
|
||||||
arrays = [
|
|
||||||
features.amplitude_mean,
|
|
||||||
features.amplitude_variance,
|
|
||||||
features.phase_difference,
|
|
||||||
features.correlation_matrix,
|
|
||||||
features.power_spectral_density,
|
|
||||||
]
|
|
||||||
return np.concatenate(
|
|
||||||
[np.asarray(a, dtype=np.float64).ravel() for a in arrays]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def compute_pipeline_hash(data_path, verbose=False):
|
def compute_pipeline_hash(data_path, verbose=False):
|
||||||
"""Run the full pipeline and compute the SHA-256 hash of all features.
|
"""Run the full pipeline and compute the SHA-256 hash of all features.
|
||||||
|
|
||||||
@@ -320,7 +237,6 @@ def compute_pipeline_hash(data_path, verbose=False):
|
|||||||
features_count = 0
|
features_count = 0
|
||||||
total_feature_bytes = 0
|
total_feature_bytes = 0
|
||||||
last_features = None
|
last_features = None
|
||||||
feature_vectors = []
|
|
||||||
doppler_nonzero_count = 0
|
doppler_nonzero_count = 0
|
||||||
doppler_shape = None
|
doppler_shape = None
|
||||||
psd_shape = None
|
psd_shape = None
|
||||||
@@ -337,7 +253,6 @@ def compute_pipeline_hash(data_path, verbose=False):
|
|||||||
if features is not None:
|
if features is not None:
|
||||||
feature_bytes = features_to_bytes(features)
|
feature_bytes = features_to_bytes(features)
|
||||||
hasher.update(feature_bytes)
|
hasher.update(feature_bytes)
|
||||||
feature_vectors.append(features_to_vector(features))
|
|
||||||
features_count += 1
|
features_count += 1
|
||||||
total_feature_bytes += len(feature_bytes)
|
total_feature_bytes += len(feature_bytes)
|
||||||
last_features = features
|
last_features = features
|
||||||
@@ -406,11 +321,7 @@ def compute_pipeline_hash(data_path, verbose=False):
|
|||||||
"psd_shape": psd_shape,
|
"psd_shape": psd_shape,
|
||||||
}
|
}
|
||||||
|
|
||||||
reference_vector = (
|
return hasher.hexdigest(), stats
|
||||||
np.concatenate(feature_vectors) if feature_vectors else np.array([], dtype=np.float64)
|
|
||||||
)
|
|
||||||
|
|
||||||
return hasher.hexdigest(), reference_vector, stats
|
|
||||||
|
|
||||||
|
|
||||||
def audit_codebase(base_dir=None):
|
def audit_codebase(base_dir=None):
|
||||||
@@ -526,7 +437,7 @@ def main():
|
|||||||
print(" This runs the SAME CSIProcessor.preprocess_csi_data() and")
|
print(" This runs the SAME CSIProcessor.preprocess_csi_data() and")
|
||||||
print(" CSIProcessor.extract_features() used in production.")
|
print(" CSIProcessor.extract_features() used in production.")
|
||||||
print()
|
print()
|
||||||
computed_hash, computed_vector, stats = compute_pipeline_hash(data_path, verbose=args.verbose)
|
computed_hash, stats = compute_pipeline_hash(data_path, verbose=args.verbose)
|
||||||
|
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
# Step 3: Hash comparison
|
# Step 3: Hash comparison
|
||||||
@@ -538,11 +449,8 @@ def main():
|
|||||||
with open(hash_path, "w") as f:
|
with open(hash_path, "w") as f:
|
||||||
f.write(computed_hash + "\n")
|
f.write(computed_hash + "\n")
|
||||||
print(f" Wrote expected hash to {hash_path}")
|
print(f" Wrote expected hash to {hash_path}")
|
||||||
ref_path = os.path.join(SCRIPT_DIR, REFERENCE_VECTOR_FILENAME)
|
|
||||||
np.savez_compressed(ref_path, features=computed_vector)
|
|
||||||
print(f" Wrote reference vector ({computed_vector.size} values) to {ref_path}")
|
|
||||||
print()
|
print()
|
||||||
print(" HASH + REFERENCE GENERATED -- run without --generate-hash to verify.")
|
print(" HASH GENERATED -- run without --generate-hash to verify.")
|
||||||
print("=" * 72)
|
print("=" * 72)
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -561,70 +469,13 @@ def main():
|
|||||||
|
|
||||||
print(f" Expected: {expected_hash}")
|
print(f" Expected: {expected_hash}")
|
||||||
|
|
||||||
hash_match = computed_hash == expected_hash
|
if computed_hash == expected_hash:
|
||||||
|
match_status = "MATCH"
|
||||||
# Cross-platform fallback: if the bit-exact hash differs (different CPU
|
|
||||||
# microarchitecture reorders the pocketfft/BLAS reductions), accept the run
|
|
||||||
# when the raw feature vector matches the committed reference within a
|
|
||||||
# relative tolerance — platform-independent where the hash is not (#560).
|
|
||||||
tolerance_match = False
|
|
||||||
max_abs_dev = None
|
|
||||||
max_rel_dev = None
|
|
||||||
ref_path = os.path.join(SCRIPT_DIR, REFERENCE_VECTOR_FILENAME)
|
|
||||||
if not hash_match and os.path.exists(ref_path):
|
|
||||||
ref_vec = np.load(ref_path)["features"]
|
|
||||||
if ref_vec.shape == computed_vector.shape:
|
|
||||||
tolerance_match = bool(
|
|
||||||
np.allclose(
|
|
||||||
computed_vector, ref_vec, rtol=TOLERANCE_RTOL, atol=TOLERANCE_ATOL
|
|
||||||
)
|
|
||||||
)
|
|
||||||
diff = np.abs(computed_vector - ref_vec)
|
|
||||||
max_abs_dev = float(np.max(diff)) if diff.size else 0.0
|
|
||||||
max_rel_dev = (
|
|
||||||
float(np.max(diff / np.maximum(np.abs(ref_vec), 1e-12)))
|
|
||||||
if diff.size
|
|
||||||
else 0.0
|
|
||||||
)
|
|
||||||
|
|
||||||
if hash_match:
|
|
||||||
match_status = "MATCH (bit-exact)"
|
|
||||||
elif tolerance_match:
|
|
||||||
match_status = f"TOLERANCE MATCH (max rel dev {max_rel_dev:.2e})"
|
|
||||||
else:
|
else:
|
||||||
match_status = "MISMATCH"
|
match_status = "MISMATCH"
|
||||||
print(f" Status: {match_status}")
|
print(f" Status: {match_status}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
if not hash_match and max_abs_dev is not None:
|
|
||||||
block_sizes = [56, 56, 55, 9, 128] # per-frame feature layout (doppler excluded)
|
|
||||||
block_names = ["amp_mean", "amp_var", "phase_diff", "corr", "psd"]
|
|
||||||
frame_len = sum(block_sizes)
|
|
||||||
tol = TOLERANCE_ATOL + TOLERANCE_RTOL * np.abs(ref_vec)
|
|
||||||
outside = diff > tol
|
|
||||||
n_out = int(outside.sum())
|
|
||||||
print(
|
|
||||||
f" DIVERGENCE: {n_out}/{computed_vector.size} outside tol "
|
|
||||||
f"({100.0 * n_out / computed_vector.size:.4f}%) "
|
|
||||||
f"max|d|={max_abs_dev:.3e} maxrel={max_rel_dev:.3e}"
|
|
||||||
)
|
|
||||||
if n_out:
|
|
||||||
wf = np.where(outside)[0] % frame_len
|
|
||||||
bounds = np.cumsum([0] + block_sizes)
|
|
||||||
parts = []
|
|
||||||
for bi, name in enumerate(block_names):
|
|
||||||
c = int(((wf >= bounds[bi]) & (wf < bounds[bi + 1])).sum())
|
|
||||||
if c:
|
|
||||||
parts.append(f"{name}={c}")
|
|
||||||
print(f" by feature: {', '.join(parts)}")
|
|
||||||
for w in np.argsort(diff)[::-1][:4]:
|
|
||||||
b = int(np.searchsorted(bounds, int(w) % frame_len, side="right")) - 1
|
|
||||||
print(
|
|
||||||
f" worst idx {int(w)} ({block_names[b]}): "
|
|
||||||
f"ref={ref_vec[int(w)]:.6g} got={computed_vector[int(w)]:.6g}"
|
|
||||||
)
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
# Step 4: Audit (if requested or always in full mode)
|
# Step 4: Audit (if requested or always in full mode)
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
@@ -647,22 +498,14 @@ def main():
|
|||||||
# Final verdict
|
# Final verdict
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
print("=" * 72)
|
print("=" * 72)
|
||||||
if hash_match or tolerance_match:
|
if computed_hash == expected_hash:
|
||||||
print(" VERDICT: PASS")
|
print(" VERDICT: PASS")
|
||||||
print()
|
print()
|
||||||
if hash_match:
|
print(" The pipeline produced a SHA-256 hash that matches the published")
|
||||||
print(" The pipeline produced a SHA-256 hash that matches the published")
|
print(" expected hash. This proves:")
|
||||||
print(" expected hash (bit-exact). This proves:")
|
|
||||||
else:
|
|
||||||
print(" The bit-exact hash differs (CPU-microarchitecture FP reordering),")
|
|
||||||
print(" but the raw feature vector matches the published reference within")
|
|
||||||
print(
|
|
||||||
f" rtol={TOLERANCE_RTOL:g} / atol={TOLERANCE_ATOL:g} "
|
|
||||||
f"(max rel dev {max_rel_dev:.2e}). This proves:"
|
|
||||||
)
|
|
||||||
print(" 1. The SAME signal processing code ran on the reference signal")
|
print(" 1. The SAME signal processing code ran on the reference signal")
|
||||||
print(" 2. The output is DETERMINISTIC (same input -> same output)")
|
print(" 2. The output is DETERMINISTIC (same input -> same output)")
|
||||||
print(" 3. No randomness was introduced")
|
print(" 3. No randomness was introduced (hash would differ)")
|
||||||
print(" 4. The code path includes: noise removal, Hamming windowing,")
|
print(" 4. The code path includes: noise removal, Hamming windowing,")
|
||||||
print(" amplitude normalization, FFT-based Doppler extraction,")
|
print(" amplitude normalization, FFT-based Doppler extraction,")
|
||||||
print(" and power spectral density computation")
|
print(" and power spectral density computation")
|
||||||
@@ -673,19 +516,14 @@ def main():
|
|||||||
else:
|
else:
|
||||||
print(" VERDICT: FAIL")
|
print(" VERDICT: FAIL")
|
||||||
print()
|
print()
|
||||||
print(" The pipeline output does NOT match the expected hash OR the")
|
print(" The pipeline output does NOT match the expected hash.")
|
||||||
print(" reference feature vector within tolerance.")
|
|
||||||
if max_rel_dev is not None:
|
|
||||||
print(
|
|
||||||
f" max abs dev: {max_abs_dev:.3e} max rel dev: {max_rel_dev:.3e}"
|
|
||||||
f" (rtol={TOLERANCE_RTOL:g}, atol={TOLERANCE_ATOL:g})"
|
|
||||||
)
|
|
||||||
print()
|
print()
|
||||||
print(" Possible causes:")
|
print(" Possible causes:")
|
||||||
|
print(" - Numpy/scipy version mismatch (check requirements)")
|
||||||
print(" - Code change in CSI processor that alters numerical output")
|
print(" - Code change in CSI processor that alters numerical output")
|
||||||
print(" - A real (non-microarch) numerical regression")
|
print(" - Platform floating-point differences (unlikely for IEEE 754)")
|
||||||
print()
|
print()
|
||||||
print(" To update after an intentional change:")
|
print(" To update the expected hash after intentional changes:")
|
||||||
print(" python verify.py --generate-hash")
|
print(" python verify.py --generate-hash")
|
||||||
print("=" * 72)
|
print("=" * 72)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -6,14 +6,8 @@
|
|||||||
#
|
#
|
||||||
# To update: change versions, run `python v1/data/proof/verify.py --generate-hash`,
|
# To update: change versions, run `python v1/data/proof/verify.py --generate-hash`,
|
||||||
# then commit the new expected_features.sha256.
|
# then commit the new expected_features.sha256.
|
||||||
#
|
|
||||||
# numpy/scipy track the versions the *published* expected hash
|
|
||||||
# (expected_features.sha256 = ca58956c…) was generated with — modern numpy 2.x,
|
|
||||||
# i.e. what a fresh `pip install numpy` and the proof-of-capabilities.md skeptic
|
|
||||||
# path produce today. The old 1.26.4 pin no longer matched that hash and made
|
|
||||||
# the determinism gate fail against its own published proof.
|
|
||||||
|
|
||||||
numpy==2.4.2
|
numpy==1.26.4
|
||||||
scipy==1.17.1
|
scipy==1.14.1
|
||||||
pydantic==2.10.4
|
pydantic==2.10.4
|
||||||
pydantic-settings==2.7.1
|
pydantic-settings==2.7.1
|
||||||
|
|||||||
@@ -26,12 +26,7 @@ class Settings(BaseSettings):
|
|||||||
workers: int = Field(default=1, description="Number of worker processes")
|
workers: int = Field(default=1, description="Number of worker processes")
|
||||||
|
|
||||||
# Security settings
|
# Security settings
|
||||||
secret_key: str = Field(
|
secret_key: str = Field(..., description="Secret key for JWT tokens")
|
||||||
default="dev-not-secret-CHANGE-IN-PROD",
|
|
||||||
description="Secret key for JWT tokens (production deployments "
|
|
||||||
"MUST override via SECRET_KEY env or .env; the dev "
|
|
||||||
"default is rejected by validate_production_config)",
|
|
||||||
)
|
|
||||||
jwt_algorithm: str = Field(default="HS256", description="JWT algorithm")
|
jwt_algorithm: str = Field(default="HS256", description="JWT algorithm")
|
||||||
jwt_expire_hours: int = Field(default=24, description="JWT token expiration in hours")
|
jwt_expire_hours: int = Field(default=24, description="JWT token expiration in hours")
|
||||||
allowed_hosts: List[str] = Field(default=["*"], description="Allowed hosts")
|
allowed_hosts: List[str] = Field(default=["*"], description="Allowed hosts")
|
||||||
@@ -163,14 +158,7 @@ class Settings(BaseSettings):
|
|||||||
model_config = SettingsConfigDict(
|
model_config = SettingsConfigDict(
|
||||||
env_file=".env",
|
env_file=".env",
|
||||||
env_file_encoding="utf-8",
|
env_file_encoding="utf-8",
|
||||||
case_sensitive=False,
|
case_sensitive=False
|
||||||
# Tolerate `.env` keys that this Settings model doesn't declare
|
|
||||||
# (e.g., NPM_TOKEN, DOCKER_HUB_TOKEN, PYPI_TOKEN used by other
|
|
||||||
# tooling). Without `extra="ignore"` pydantic-settings 2.x
|
|
||||||
# raises `ValidationError: Extra inputs are not permitted` and
|
|
||||||
# leaks the offending values into the error message — a real
|
|
||||||
# security concern for secret tokens. See verify.py / `./verify`.
|
|
||||||
extra="ignore",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@field_validator("environment")
|
@field_validator("environment")
|
||||||
|
|||||||
@@ -143,35 +143,13 @@ class ESP32BinaryParser:
|
|||||||
12 4 Sequence number (LE u32)
|
12 4 Sequence number (LE u32)
|
||||||
16 1 RSSI (i8)
|
16 1 RSSI (i8)
|
||||||
17 1 Noise floor (i8)
|
17 1 Noise floor (i8)
|
||||||
18 1 PPDU type (ADR-110): 0=HT/legacy, 1=HE-SU, 2=HE-MU,
|
18 2 Reserved
|
||||||
3=HE-TB, 0xFF=unknown. Pre-ADR-110 firmware sends 0.
|
|
||||||
19 1 Flags (ADR-110): bit 0 = bw40, bit 2 = STBC,
|
|
||||||
bit 3 = LDPC, bit 4 = cross-node sync valid
|
|
||||||
(set by either c6_timesync OR c6_sync_espnow
|
|
||||||
since v0.7.0 — ADR-110 §A0.13).
|
|
||||||
20 N*2 I/Q pairs (n_antennas * n_subcarriers * 2 bytes, signed i8)
|
20 N*2 I/Q pairs (n_antennas * n_subcarriers * 2 bytes, signed i8)
|
||||||
|
|
||||||
Sibling packet (ADR-110 §A0.12, firmware v0.6.9+): the node also
|
|
||||||
emits a 32-byte UDP sync packet (magic 0xC511A110) every
|
|
||||||
CONFIG_C6_SYNC_EVERY_N_FRAMES frames on the same UDP socket.
|
|
||||||
See parse_sync_packet() / SyncPacket below.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
MAGIC = 0xC5110001
|
MAGIC = 0xC5110001
|
||||||
HEADER_SIZE = 20
|
HEADER_SIZE = 20
|
||||||
# ADR-110: previously '<IBBHIIBB2x' (last 2 bytes skipped as reserved).
|
HEADER_FMT = '<IBBHIIBB2x' # magic, node_id, n_ant, n_sc, freq, seq, rssi, noise
|
||||||
# Now read those 2 bytes as PPDU type + flags. Pre-ADR-110 firmware
|
|
||||||
# sends zeros, which decode as 'HT/legacy' + 'no flags' — fully
|
|
||||||
# backwards compatible.
|
|
||||||
HEADER_FMT = '<IBBHIIBBBB' # +2 bytes: ppdu_type, flags
|
|
||||||
|
|
||||||
# ADR-110 PPDU type byte values
|
|
||||||
PPDU_HT_LEGACY = 0
|
|
||||||
PPDU_HE_SU = 1
|
|
||||||
PPDU_HE_MU = 2
|
|
||||||
PPDU_HE_TB = 3
|
|
||||||
PPDU_UNKNOWN = 0xFF
|
|
||||||
_PPDU_NAMES = {0: 'ht_legacy', 1: 'he_su', 2: 'he_mu', 3: 'he_tb', 0xFF: 'unknown'}
|
|
||||||
|
|
||||||
def parse(self, raw_data: bytes) -> CSIData:
|
def parse(self, raw_data: bytes) -> CSIData:
|
||||||
"""Parse an ADR-018 binary frame into CSIData.
|
"""Parse an ADR-018 binary frame into CSIData.
|
||||||
@@ -190,8 +168,8 @@ class ESP32BinaryParser:
|
|||||||
f"Frame too short: need {self.HEADER_SIZE} bytes, got {len(raw_data)}"
|
f"Frame too short: need {self.HEADER_SIZE} bytes, got {len(raw_data)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
magic, node_id, n_antennas, n_subcarriers, freq_mhz, sequence, rssi_u8, noise_u8, \
|
magic, node_id, n_antennas, n_subcarriers, freq_mhz, sequence, rssi_u8, noise_u8 = \
|
||||||
ppdu_byte, flags_byte = struct.unpack_from(self.HEADER_FMT, raw_data, 0)
|
struct.unpack_from(self.HEADER_FMT, raw_data, 0)
|
||||||
|
|
||||||
if magic != self.MAGIC:
|
if magic != self.MAGIC:
|
||||||
raise CSIParseError(
|
raise CSIParseError(
|
||||||
@@ -221,15 +199,11 @@ class ESP32BinaryParser:
|
|||||||
|
|
||||||
snr = float(rssi - noise_floor)
|
snr = float(rssi - noise_floor)
|
||||||
frequency = float(freq_mhz) * 1e6
|
frequency = float(freq_mhz) * 1e6
|
||||||
|
bandwidth = 20e6 # default; could infer from n_subcarriers
|
||||||
|
|
||||||
# Bandwidth inference (issue #1005): HE-LTF uses a 4x denser tone
|
if n_subcarriers <= 56:
|
||||||
# grid than HT-LTF on the same channel width — an HE-SU frame with
|
|
||||||
# 256 bins (242 active HE20 tones) is a *20 MHz* capture, not 160.
|
|
||||||
if ppdu_byte in (1, 2, 3): # HE-SU / HE-MU / HE-TB
|
|
||||||
bandwidth = 40e6 if (flags_byte & 0x01) or n_subcarriers > 256 else 20e6
|
|
||||||
elif n_subcarriers <= 64: # ESP32 HT20 delivers the full 64-bin FFT
|
|
||||||
bandwidth = 20e6
|
bandwidth = 20e6
|
||||||
elif n_subcarriers <= 128:
|
elif n_subcarriers <= 114:
|
||||||
bandwidth = 40e6
|
bandwidth = 40e6
|
||||||
elif n_subcarriers <= 242:
|
elif n_subcarriers <= 242:
|
||||||
bandwidth = 80e6
|
bandwidth = 80e6
|
||||||
@@ -252,128 +226,10 @@ class ESP32BinaryParser:
|
|||||||
'rssi_dbm': rssi,
|
'rssi_dbm': rssi,
|
||||||
'noise_floor_dbm': noise_floor,
|
'noise_floor_dbm': noise_floor,
|
||||||
'channel_freq_mhz': freq_mhz,
|
'channel_freq_mhz': freq_mhz,
|
||||||
# ADR-110 extension — zeros from pre-ADR-110 firmware land here as
|
|
||||||
# 'ht_legacy' + all-flags-false. New consumers can branch on
|
|
||||||
# ppdu_type / he_capable for HE-LTF-aware DSP.
|
|
||||||
'ppdu_type': self._PPDU_NAMES.get(ppdu_byte, 'unknown'),
|
|
||||||
'ppdu_type_raw': ppdu_byte,
|
|
||||||
'he_capable': ppdu_byte in (1, 2, 3),
|
|
||||||
'bw40': bool(flags_byte & 0x01),
|
|
||||||
'stbc': bool(flags_byte & 0x04),
|
|
||||||
'ldpc': bool(flags_byte & 0x08),
|
|
||||||
'ieee802154_sync_valid': bool(flags_byte & 0x10),
|
|
||||||
'adr018_flags_raw': flags_byte,
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SyncPacket:
|
|
||||||
"""ADR-110 §A0.12 sync packet (firmware v0.6.9+, magic 0xC511A110).
|
|
||||||
|
|
||||||
Emitted on the same UDP socket as CSI frames every
|
|
||||||
CONFIG_C6_SYNC_EVERY_N_FRAMES frames. Carries the mesh-aligned
|
|
||||||
epoch for the node alongside the high-water CSI sequence number,
|
|
||||||
so the host aggregator can pair (node_id, sequence) across the two
|
|
||||||
packet streams and recover a mesh-aligned timestamp for every CSI
|
|
||||||
frame. See WITNESS-LOG-110 §A0.12 for the live verification.
|
|
||||||
"""
|
|
||||||
node_id: int
|
|
||||||
proto_ver: int
|
|
||||||
is_leader: bool
|
|
||||||
is_valid: bool
|
|
||||||
smoothed_used: bool
|
|
||||||
local_us: int # u64 — node's local esp_timer_get_time()
|
|
||||||
epoch_us: int # u64 — local + EMA-smoothed offset (mesh time)
|
|
||||||
sequence: int # u32 — high-water CSI sequence at emit time
|
|
||||||
flags_raw: int
|
|
||||||
|
|
||||||
def local_minus_epoch_us(self) -> int:
|
|
||||||
"""Signed local-vs-mesh clock offset in µs.
|
|
||||||
|
|
||||||
Negative when this node's clock is behind the leader's (typical
|
|
||||||
for followers). Equal to ≈0 on the leader (modulo call-stack µs).
|
|
||||||
Matches Rust's `SyncPacket::local_minus_epoch_us` byte-for-byte.
|
|
||||||
"""
|
|
||||||
return self.local_us - self.epoch_us
|
|
||||||
|
|
||||||
def apply_to_local(self, local_at_frame_us: int) -> int:
|
|
||||||
"""Recover a mesh-aligned timestamp for any node-local µs snapshot.
|
|
||||||
|
|
||||||
Math (see WITNESS-LOG-110 §A0.10 / §A0.12):
|
|
||||||
offset = epoch_us - local_us (signed; this packet)
|
|
||||||
mesh = local_at_frame_us + offset
|
|
||||||
|
|
||||||
Identical contract to Rust's `SyncPacket::apply_to_local`.
|
|
||||||
Identity at `local_at_frame_us == self.local_us` returns `epoch_us`.
|
|
||||||
"""
|
|
||||||
offset = self.epoch_us - self.local_us
|
|
||||||
return local_at_frame_us + offset
|
|
||||||
|
|
||||||
def mesh_aligned_us_for_sequence(self, frame_seq: int, fps_hz: float) -> int:
|
|
||||||
"""ADR-110 §A0.12 — recover the mesh-aligned timestamp for an
|
|
||||||
in-flight CSI frame by its sequence number.
|
|
||||||
|
|
||||||
Pairs the frame's sequence number against this sync packet's
|
|
||||||
sequence high-water + an assumed/measured CSI rate. Matches the
|
|
||||||
Rust implementation byte-for-byte at the integer level (Python
|
|
||||||
rounds via `int()` truncation; for the canonical bench values
|
|
||||||
this is exact).
|
|
||||||
"""
|
|
||||||
if fps_hz <= 0:
|
|
||||||
raise ValueError(f"fps_hz must be positive, got {fps_hz}")
|
|
||||||
# Wrap to handle u32 sequence overflow the same way Rust does.
|
|
||||||
dframes = (frame_seq - self.sequence) & 0xFFFFFFFF
|
|
||||||
if dframes >= 0x80000000:
|
|
||||||
dframes -= 0x1_0000_0000
|
|
||||||
dus = int(dframes * 1_000_000 / fps_hz)
|
|
||||||
local_at = self.local_us + dus
|
|
||||||
return self.apply_to_local(local_at)
|
|
||||||
|
|
||||||
|
|
||||||
class SyncPacketParser:
|
|
||||||
"""Parser for ADR-110 §A0.12 32-byte sync packets.
|
|
||||||
|
|
||||||
Distinguished from CSI frames by the leading magic. Callers should
|
|
||||||
dispatch incoming UDP datagrams based on the first 4 bytes:
|
|
||||||
|
|
||||||
magic = struct.unpack_from('<I', data, 0)[0]
|
|
||||||
if magic == ESP32BinaryParser.MAGIC: # 0xC5110001 — CSI frame
|
|
||||||
...
|
|
||||||
elif magic == SyncPacketParser.MAGIC: # 0xC511A110 — sync packet
|
|
||||||
...
|
|
||||||
"""
|
|
||||||
|
|
||||||
MAGIC = 0xC511A110
|
|
||||||
SIZE = 32
|
|
||||||
# <IBBBB QQ IB3x>
|
|
||||||
# I=magic, B=node_id, B=proto_ver, B=flags, B=reserved,
|
|
||||||
# Q=local_us, Q=epoch_us, I=sequence, B+3x=reserved
|
|
||||||
HEADER_FMT = '<IBBBBQQI4x'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def parse(cls, raw_data: bytes) -> SyncPacket:
|
|
||||||
if len(raw_data) < cls.SIZE:
|
|
||||||
raise CSIParseError(
|
|
||||||
f"Sync packet too short: {len(raw_data)} bytes, need {cls.SIZE}"
|
|
||||||
)
|
|
||||||
magic, node_id, proto_ver, flags_byte, _, local_us, epoch_us, seq = \
|
|
||||||
struct.unpack_from(cls.HEADER_FMT, raw_data, 0)
|
|
||||||
if magic != cls.MAGIC:
|
|
||||||
raise CSIParseError(f"Sync magic mismatch: got 0x{magic:08x}")
|
|
||||||
return SyncPacket(
|
|
||||||
node_id=node_id,
|
|
||||||
proto_ver=proto_ver,
|
|
||||||
is_leader=bool(flags_byte & 0x01),
|
|
||||||
is_valid=bool(flags_byte & 0x02),
|
|
||||||
smoothed_used=bool(flags_byte & 0x04),
|
|
||||||
local_us=local_us,
|
|
||||||
epoch_us=epoch_us,
|
|
||||||
sequence=seq,
|
|
||||||
flags_raw=flags_byte,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class RouterCSIParser:
|
class RouterCSIParser:
|
||||||
"""Parser for router CSI data format."""
|
"""Parser for router CSI data format."""
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ from datetime import datetime, timedelta
|
|||||||
|
|
||||||
from fastapi import Request, Response, HTTPException, status
|
from fastapi import Request, Response, HTTPException, status
|
||||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||||
from starlette.middleware.base import BaseHTTPMiddleware
|
|
||||||
from jose import JWTError, jwt
|
from jose import JWTError, jwt
|
||||||
from passlib.context import CryptContext
|
from passlib.context import CryptContext
|
||||||
|
|
||||||
@@ -156,17 +155,16 @@ class UserManager:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
class AuthenticationMiddleware(BaseHTTPMiddleware):
|
class AuthenticationMiddleware:
|
||||||
"""Authentication middleware for FastAPI."""
|
"""Authentication middleware for FastAPI."""
|
||||||
|
|
||||||
def __init__(self, app, settings: Settings):
|
def __init__(self, settings: Settings):
|
||||||
super().__init__(app)
|
|
||||||
self.settings = settings
|
self.settings = settings
|
||||||
self.token_manager = TokenManager(settings)
|
self.token_manager = TokenManager(settings)
|
||||||
self.user_manager = UserManager()
|
self.user_manager = UserManager()
|
||||||
self.enabled = settings.enable_authentication
|
self.enabled = settings.enable_authentication
|
||||||
|
|
||||||
async def dispatch(self, request: Request, call_next: Callable) -> Response:
|
async def __call__(self, request: Request, call_next: Callable) -> Response:
|
||||||
"""Process request through authentication middleware."""
|
"""Process request through authentication middleware."""
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ from collections import defaultdict, deque
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from fastapi import Request, Response, HTTPException, status
|
from fastapi import Request, Response, HTTPException, status
|
||||||
from starlette.middleware.base import BaseHTTPMiddleware
|
|
||||||
from starlette.types import ASGIApp
|
from starlette.types import ASGIApp
|
||||||
|
|
||||||
from src.config.settings import Settings
|
from src.config.settings import Settings
|
||||||
@@ -300,16 +299,15 @@ class RateLimiter:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class RateLimitMiddleware(BaseHTTPMiddleware):
|
class RateLimitMiddleware:
|
||||||
"""Rate limiting middleware for FastAPI."""
|
"""Rate limiting middleware for FastAPI."""
|
||||||
|
|
||||||
def __init__(self, app, settings: Settings):
|
def __init__(self, settings: Settings):
|
||||||
super().__init__(app)
|
|
||||||
self.settings = settings
|
self.settings = settings
|
||||||
self.rate_limiter = RateLimiter(settings)
|
self.rate_limiter = RateLimiter(settings)
|
||||||
self.enabled = settings.enable_rate_limiting
|
self.enabled = settings.enable_rate_limiting
|
||||||
|
|
||||||
async def dispatch(self, request: Request, call_next: Callable) -> Response:
|
async def __call__(self, request: Request, call_next: Callable) -> Response:
|
||||||
"""Process request through rate limiting middleware."""
|
"""Process request through rate limiting middleware."""
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
return await call_next(request)
|
return await call_next(request)
|
||||||
|
|||||||
@@ -107,25 +107,16 @@ class PoseService:
|
|||||||
async def _initialize_models(self):
|
async def _initialize_models(self):
|
||||||
"""Initialize neural network models."""
|
"""Initialize neural network models."""
|
||||||
try:
|
try:
|
||||||
# Initialize DensePose model. DensePoseHead requires a config
|
# Initialize DensePose model
|
||||||
# dict — input_channels matches the modality translator's output
|
|
||||||
# (256), with the standard DensePose 24 body parts and 2 (U,V)
|
|
||||||
# coordinates. (Previously called with no args → TypeError at
|
|
||||||
# startup, which broke the API service.)
|
|
||||||
densepose_config = {
|
|
||||||
'input_channels': 256,
|
|
||||||
'num_body_parts': 24,
|
|
||||||
'num_uv_coordinates': 2,
|
|
||||||
}
|
|
||||||
if self.settings.pose_model_path:
|
if self.settings.pose_model_path:
|
||||||
self.densepose_model = DensePoseHead(densepose_config)
|
self.densepose_model = DensePoseHead()
|
||||||
# Load model weights if path is provided
|
# Load model weights if path is provided
|
||||||
# model_state = torch.load(self.settings.pose_model_path)
|
# model_state = torch.load(self.settings.pose_model_path)
|
||||||
# self.densepose_model.load_state_dict(model_state)
|
# self.densepose_model.load_state_dict(model_state)
|
||||||
self.logger.info("DensePose model loaded")
|
self.logger.info("DensePose model loaded")
|
||||||
else:
|
else:
|
||||||
self.logger.warning("No pose model path provided, using default model")
|
self.logger.warning("No pose model path provided, using default model")
|
||||||
self.densepose_model = DensePoseHead(densepose_config)
|
self.densepose_model = DensePoseHead()
|
||||||
|
|
||||||
# Initialize modality translation
|
# Initialize modality translation
|
||||||
config = {
|
config = {
|
||||||
@@ -229,11 +220,7 @@ class PoseService:
|
|||||||
# Apply phase sanitization if we have phase data
|
# Apply phase sanitization if we have phase data
|
||||||
if hasattr(detection_result.features, 'phase_difference'):
|
if hasattr(detection_result.features, 'phase_difference'):
|
||||||
phase_data = detection_result.features.phase_difference
|
phase_data = detection_result.features.phase_difference
|
||||||
# PhaseSanitizer's full-pipeline method is sanitize_phase,
|
sanitized_phase = self.phase_sanitizer.sanitize(phase_data)
|
||||||
# not sanitize (issue #612). The shorter name was an
|
|
||||||
# AttributeError waiting to fire on any code path that
|
|
||||||
# reaches this branch.
|
|
||||||
sanitized_phase = self.phase_sanitizer.sanitize_phase(phase_data)
|
|
||||||
# Combine amplitude and phase data
|
# Combine amplitude and phase data
|
||||||
return np.concatenate([amplitude_data, sanitized_phase])
|
return np.concatenate([amplitude_data, sanitized_phase])
|
||||||
|
|
||||||
|
|||||||
@@ -19,16 +19,11 @@ from hardware.csi_extractor import (
|
|||||||
CSIExtractor,
|
CSIExtractor,
|
||||||
CSIParseError,
|
CSIParseError,
|
||||||
CSIExtractionError,
|
CSIExtractionError,
|
||||||
SyncPacket,
|
|
||||||
SyncPacketParser,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# ADR-018 constants
|
# ADR-018 constants
|
||||||
MAGIC = 0xC5110001
|
MAGIC = 0xC5110001
|
||||||
# ADR-110: bytes 18-19 are now PPDU type + flags (used to be `2x` reserved).
|
HEADER_FMT = '<IBBHIIBB2x'
|
||||||
# Pre-ADR-110 firmware sends zeros for both, which round-trip as
|
|
||||||
# ('ht_legacy', flags=all-false) — fully backwards compatible.
|
|
||||||
HEADER_FMT = '<IBBHIIBBBB'
|
|
||||||
HEADER_SIZE = 20
|
HEADER_SIZE = 20
|
||||||
|
|
||||||
|
|
||||||
@@ -41,8 +36,6 @@ def build_binary_frame(
|
|||||||
rssi: int = -50,
|
rssi: int = -50,
|
||||||
noise_floor: int = -90,
|
noise_floor: int = -90,
|
||||||
iq_pairs: list = None,
|
iq_pairs: list = None,
|
||||||
ppdu_byte: int = 0, # ADR-110: default 0 = HT/legacy (pre-ADR-110 behavior)
|
|
||||||
flags_byte: int = 0, # ADR-110: default 0 = no flags set
|
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
"""Build an ADR-018 binary frame for testing."""
|
"""Build an ADR-018 binary frame for testing."""
|
||||||
if iq_pairs is None:
|
if iq_pairs is None:
|
||||||
@@ -61,8 +54,6 @@ def build_binary_frame(
|
|||||||
sequence,
|
sequence,
|
||||||
rssi_u8,
|
rssi_u8,
|
||||||
noise_u8,
|
noise_u8,
|
||||||
ppdu_byte,
|
|
||||||
flags_byte,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
iq_data = b''
|
iq_data = b''
|
||||||
@@ -72,52 +63,6 @@ def build_binary_frame(
|
|||||||
return header + iq_data
|
return header + iq_data
|
||||||
|
|
||||||
|
|
||||||
class TestAdr110ByteEncoding:
|
|
||||||
"""ADR-110: byte 18 = PPDU type, byte 19 = flags."""
|
|
||||||
|
|
||||||
def setup_method(self):
|
|
||||||
self.parser = ESP32BinaryParser()
|
|
||||||
|
|
||||||
def test_pre_adr110_zeros_decode_as_ht_legacy(self):
|
|
||||||
"""Pre-ADR-110 firmware sends zeros → must surface as HT/legacy + no flags."""
|
|
||||||
frame = build_binary_frame() # ppdu_byte=0, flags_byte=0 default
|
|
||||||
csi = self.parser.parse(frame)
|
|
||||||
assert csi.metadata['ppdu_type'] == 'ht_legacy'
|
|
||||||
assert csi.metadata['ppdu_type_raw'] == 0
|
|
||||||
assert csi.metadata['he_capable'] is False
|
|
||||||
assert csi.metadata['bw40'] is False
|
|
||||||
assert csi.metadata['stbc'] is False
|
|
||||||
assert csi.metadata['ldpc'] is False
|
|
||||||
assert csi.metadata['ieee802154_sync_valid'] is False
|
|
||||||
|
|
||||||
def test_he_su_decodes(self):
|
|
||||||
frame = build_binary_frame(ppdu_byte=1)
|
|
||||||
csi = self.parser.parse(frame)
|
|
||||||
assert csi.metadata['ppdu_type'] == 'he_su'
|
|
||||||
assert csi.metadata['he_capable'] is True
|
|
||||||
|
|
||||||
def test_he_mu_and_he_tb_decode(self):
|
|
||||||
for byte, expected in [(2, 'he_mu'), (3, 'he_tb')]:
|
|
||||||
csi = self.parser.parse(build_binary_frame(ppdu_byte=byte))
|
|
||||||
assert csi.metadata['ppdu_type'] == expected
|
|
||||||
assert csi.metadata['he_capable'] is True
|
|
||||||
|
|
||||||
def test_unknown_ppdu_byte(self):
|
|
||||||
csi = self.parser.parse(build_binary_frame(ppdu_byte=0xFF))
|
|
||||||
assert csi.metadata['ppdu_type'] == 'unknown'
|
|
||||||
assert csi.metadata['ppdu_type_raw'] == 0xFF
|
|
||||||
assert csi.metadata['he_capable'] is False
|
|
||||||
|
|
||||||
def test_all_flags_set_round_trip(self):
|
|
||||||
# bw40 (0x01) + STBC (0x04) + LDPC (0x08) + 15.4-sync (0x10) = 0x1D
|
|
||||||
csi = self.parser.parse(build_binary_frame(ppdu_byte=1, flags_byte=0x1D))
|
|
||||||
assert csi.metadata['bw40'] is True
|
|
||||||
assert csi.metadata['stbc'] is True
|
|
||||||
assert csi.metadata['ldpc'] is True
|
|
||||||
assert csi.metadata['ieee802154_sync_valid'] is True
|
|
||||||
assert csi.metadata['adr018_flags_raw'] == 0x1D
|
|
||||||
|
|
||||||
|
|
||||||
class TestESP32BinaryParser:
|
class TestESP32BinaryParser:
|
||||||
"""Tests for ESP32BinaryParser."""
|
"""Tests for ESP32BinaryParser."""
|
||||||
|
|
||||||
@@ -259,172 +204,3 @@ class TestESP32BinaryParser:
|
|||||||
await extractor.disconnect()
|
await extractor.disconnect()
|
||||||
|
|
||||||
asyncio.run(run_test())
|
asyncio.run(run_test())
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# ADR-110 §A0.12 — SyncPacket / SyncPacketParser tests (firmware v0.6.9+)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
SYNC_MAGIC = 0xC511A110
|
|
||||||
SYNC_SIZE = 32
|
|
||||||
SYNC_FMT = '<IBBBBQQI4x'
|
|
||||||
|
|
||||||
|
|
||||||
def build_sync_packet(
|
|
||||||
node_id: int = 9,
|
|
||||||
proto_ver: int = 1,
|
|
||||||
is_leader: bool = False,
|
|
||||||
is_valid: bool = True,
|
|
||||||
smoothed_used: bool = True,
|
|
||||||
local_us: int = 28798450,
|
|
||||||
epoch_us: int = 27634885,
|
|
||||||
sequence: int = 20,
|
|
||||||
) -> bytes:
|
|
||||||
flags = 0
|
|
||||||
if is_leader: flags |= 0x01
|
|
||||||
if is_valid: flags |= 0x02
|
|
||||||
if smoothed_used: flags |= 0x04
|
|
||||||
return struct.pack(
|
|
||||||
SYNC_FMT,
|
|
||||||
SYNC_MAGIC,
|
|
||||||
node_id, proto_ver, flags, 0,
|
|
||||||
local_us, epoch_us, sequence,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestSyncPacketParser:
|
|
||||||
"""ADR-110 §A0.12: 32-byte UDP sync packet (magic 0xC511A110)."""
|
|
||||||
|
|
||||||
def test_follower_typical_packet_roundtrips(self):
|
|
||||||
"""Match the COM9-witnessed sync-pkt #1 byte-for-byte."""
|
|
||||||
raw = build_sync_packet(
|
|
||||||
node_id=9, is_leader=False, is_valid=True, smoothed_used=True,
|
|
||||||
local_us=28798450, epoch_us=27634885, sequence=20,
|
|
||||||
)
|
|
||||||
assert len(raw) == SYNC_SIZE
|
|
||||||
pkt = SyncPacketParser.parse(raw)
|
|
||||||
assert isinstance(pkt, SyncPacket)
|
|
||||||
assert pkt.node_id == 9
|
|
||||||
assert pkt.proto_ver == 1
|
|
||||||
assert pkt.is_leader is False
|
|
||||||
assert pkt.is_valid is True
|
|
||||||
assert pkt.smoothed_used is True
|
|
||||||
assert pkt.local_us == 28798450
|
|
||||||
assert pkt.epoch_us == 27634885
|
|
||||||
assert pkt.sequence == 20
|
|
||||||
# The 1.16-second boot delta from §A0.10 should be recoverable
|
|
||||||
assert pkt.local_us - pkt.epoch_us == 1163565
|
|
||||||
|
|
||||||
def test_leader_packet_has_local_close_to_epoch(self):
|
|
||||||
"""COM12 (leader) had flags=0x03 and epoch ≈ local."""
|
|
||||||
raw = build_sync_packet(
|
|
||||||
node_id=12, is_leader=True, is_valid=True, smoothed_used=False,
|
|
||||||
local_us=28864932, epoch_us=28864939, sequence=20,
|
|
||||||
)
|
|
||||||
pkt = SyncPacketParser.parse(raw)
|
|
||||||
assert pkt.node_id == 12
|
|
||||||
assert pkt.is_leader is True
|
|
||||||
assert pkt.is_valid is True
|
|
||||||
assert pkt.smoothed_used is False
|
|
||||||
assert pkt.flags_raw == 0x03
|
|
||||||
assert pkt.local_us - pkt.epoch_us == -7 # leader has zero offset
|
|
||||||
|
|
||||||
def test_magic_mismatch_raises(self):
|
|
||||||
"""A non-sync datagram must not silently decode."""
|
|
||||||
raw = bytearray(build_sync_packet())
|
|
||||||
raw[0] = 0x01 # corrupt magic low byte
|
|
||||||
with pytest.raises(CSIParseError, match="magic mismatch"):
|
|
||||||
SyncPacketParser.parse(bytes(raw))
|
|
||||||
|
|
||||||
def test_short_packet_raises(self):
|
|
||||||
"""Below 32 bytes must error early, not silently truncate."""
|
|
||||||
raw = build_sync_packet()[:16]
|
|
||||||
with pytest.raises(CSIParseError, match="too short"):
|
|
||||||
SyncPacketParser.parse(raw)
|
|
||||||
|
|
||||||
def test_all_flag_combinations(self):
|
|
||||||
"""Each flag bit decodes independently."""
|
|
||||||
for is_leader in (False, True):
|
|
||||||
for is_valid in (False, True):
|
|
||||||
for smoothed_used in (False, True):
|
|
||||||
raw = build_sync_packet(
|
|
||||||
is_leader=is_leader,
|
|
||||||
is_valid=is_valid,
|
|
||||||
smoothed_used=smoothed_used,
|
|
||||||
)
|
|
||||||
pkt = SyncPacketParser.parse(raw)
|
|
||||||
assert pkt.is_leader == is_leader
|
|
||||||
assert pkt.is_valid == is_valid
|
|
||||||
assert pkt.smoothed_used == smoothed_used
|
|
||||||
|
|
||||||
def test_dispatch_distinguishes_csi_from_sync(self):
|
|
||||||
"""A host can pick CSI vs sync by leading magic."""
|
|
||||||
csi_magic = struct.unpack_from('<I', build_binary_frame(), 0)[0]
|
|
||||||
sync_magic = struct.unpack_from('<I', build_sync_packet(), 0)[0]
|
|
||||||
assert csi_magic == ESP32BinaryParser.MAGIC
|
|
||||||
assert sync_magic == SyncPacketParser.MAGIC
|
|
||||||
assert csi_magic != sync_magic
|
|
||||||
|
|
||||||
def test_apply_to_local_recovers_epoch_at_sync_point(self):
|
|
||||||
"""ADR-110 iter 26 — Python parity with Rust's `apply_to_local`.
|
|
||||||
At local_at_frame == sync.local_us, the recovered mesh time must
|
|
||||||
equal sync.epoch_us exactly."""
|
|
||||||
pkt = SyncPacketParser.parse(build_sync_packet(
|
|
||||||
local_us=28_798_450, epoch_us=27_634_885, sequence=20,
|
|
||||||
))
|
|
||||||
assert pkt.apply_to_local(pkt.local_us) == pkt.epoch_us
|
|
||||||
assert pkt.local_minus_epoch_us() == 1_163_565 # §A0.10's bench number
|
|
||||||
|
|
||||||
def test_apply_to_local_preserves_inter_frame_delta(self):
|
|
||||||
"""A frame arriving 5 s after the sync packet on the follower's
|
|
||||||
local clock must produce a mesh time exactly 5 s after sync.epoch_us."""
|
|
||||||
pkt = SyncPacketParser.parse(build_sync_packet(
|
|
||||||
local_us=28_798_450, epoch_us=27_634_885, sequence=20,
|
|
||||||
))
|
|
||||||
local_at_frame = pkt.local_us + 5_000_000
|
|
||||||
assert pkt.apply_to_local(local_at_frame) == pkt.epoch_us + 5_000_000
|
|
||||||
|
|
||||||
def test_mesh_aligned_us_for_sequence_matches_rust(self):
|
|
||||||
"""Cross-language parity with Rust's
|
|
||||||
`end_to_end_sync_decode_then_frame_mesh_recovery` test —
|
|
||||||
100 frames after sync.sequence at 20 fps = sync.epoch_us + 5 s."""
|
|
||||||
pkt = SyncPacketParser.parse(build_sync_packet(
|
|
||||||
local_us=28_798_450, epoch_us=27_634_885, sequence=20,
|
|
||||||
))
|
|
||||||
mesh = pkt.mesh_aligned_us_for_sequence(120, 20.0)
|
|
||||||
assert mesh == pkt.epoch_us + 5_000_000
|
|
||||||
# Both paths (apply_to_local + interpolation) must agree
|
|
||||||
local_at = pkt.local_us + 5_000_000
|
|
||||||
assert pkt.apply_to_local(local_at) == mesh
|
|
||||||
|
|
||||||
def test_canonical_wire_bytes_match_rust_decoder(self):
|
|
||||||
"""ADR-110 iter 21 — cross-language wire-format conformance gate.
|
|
||||||
|
|
||||||
These exact bytes also appear pinned in the Rust hardware crate's
|
|
||||||
`canonical_wire_bytes_match_python_decoder` test (same field
|
|
||||||
values, encoded by Rust's `SyncPacket::to_bytes`). If Python's
|
|
||||||
hardcoded hex stops matching what Rust produces from the equivalent
|
|
||||||
SyncPacket struct, ONE of the decoders has drifted from the wire.
|
|
||||||
|
|
||||||
Canonical packet: COM9 sync-pkt #1 from §A0.12 live capture.
|
|
||||||
"""
|
|
||||||
canonical = bytes.fromhex(
|
|
||||||
"10a111c509010600" # magic LE + node=9 + ver=1 + flags=0x06 + reserved
|
|
||||||
"f26db70100000000" # local_us = 28_798_450 (LE u64)
|
|
||||||
"c5aca50100000000" # epoch_us = 27_634_885 (LE u64)
|
|
||||||
"1400000000000000" # sequence = 20 (LE u32) + 4 reserved bytes
|
|
||||||
)
|
|
||||||
assert len(canonical) == SyncPacketParser.SIZE == 32
|
|
||||||
|
|
||||||
pkt = SyncPacketParser.parse(canonical)
|
|
||||||
assert pkt.node_id == 9
|
|
||||||
assert pkt.proto_ver == 1
|
|
||||||
assert pkt.flags_raw == 0x06
|
|
||||||
assert pkt.is_leader is False
|
|
||||||
assert pkt.is_valid is True
|
|
||||||
assert pkt.smoothed_used is True
|
|
||||||
assert pkt.local_us == 28_798_450
|
|
||||||
assert pkt.epoch_us == 27_634_885
|
|
||||||
assert pkt.sequence == 20
|
|
||||||
# Recovered offset matches §A0.10's measured 1.16-second boot delta.
|
|
||||||
assert pkt.local_us - pkt.epoch_us == 1_163_565
|
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 1.9 MiB |
Binary file not shown.
|
Before Width: | Height: | Size: 1.2 MiB |
@@ -1,137 +0,0 @@
|
|||||||
# Edge-Latency Benchmark Results — ADR-163
|
|
||||||
|
|
||||||
Converting **CLAIMED** edge latency budgets into **MEASURED-on-host** numbers,
|
|
||||||
closing the measurement debt flagged by Milestones 5/6 (ADR-159 / ADR-160).
|
|
||||||
Benches + docs only — **no production-code behavior changed**.
|
|
||||||
|
|
||||||
## The honest caveat, up front (read before citing any number)
|
|
||||||
|
|
||||||
Two distinct gaps separate every number below from the figure it is converting:
|
|
||||||
|
|
||||||
1. **Host ≠ ESP32.** The wasm-edge skill modules document budgets *"on ESP32-S3
|
|
||||||
WASM3"* (e.g. `exo_time_crystal`: "H (<10 ms)"). These benches run **native
|
|
||||||
x86_64 on a development laptop**, not the Xtensa/WASM3 target. A native host
|
|
||||||
median is an **upper bound on the algorithm's work**, not the ESP32 number.
|
|
||||||
WASM3 interpretation on a ~240 MHz Xtensa core is typically 1–2 orders of
|
|
||||||
magnitude slower than native `-O` host code, so a host median far under the
|
|
||||||
budget **does NOT prove the ESP32 meets it.** *The ESP32 figure is NOT
|
|
||||||
reproduced here — it needs hardware.*
|
|
||||||
|
|
||||||
2. **Bench ≠ the doc-claimed measurement.** For the cogs, the manifest cites a
|
|
||||||
**cold-start** number (`cold_start_ms_avg`, weight-load included); these
|
|
||||||
benches measure **steady-state** per-frame `infer` (warm, weights resident).
|
|
||||||
Different measurements; we report both, labelled.
|
|
||||||
|
|
||||||
Grades (per `benchmarks/wiflow-std/RESULTS.md` / ADR-152 vocabulary):
|
|
||||||
- **MEASURED-on-host** — reproduced in this repo on the machine below, exact
|
|
||||||
command recorded. NOT the ESP32 / NOT the cold-start figure.
|
|
||||||
- **CLAIMED (ESP32)** — the doc budget; UNMEASURED on hardware here.
|
|
||||||
|
|
||||||
## Machine
|
|
||||||
|
|
||||||
| | |
|
|
||||||
|---|---|
|
|
||||||
| Host | `ruvzen` (Windows 11, this dev box) |
|
|
||||||
| CPU | Intel Core Ultra 9 285H |
|
|
||||||
| Toolchain | `cargo 1.91.1`, `--release` (opt-level per crate profile) |
|
|
||||||
| Bench harness | criterion 0.5 (`time: [low **median** high]` reported below) |
|
|
||||||
| Date | 2026-06-12 |
|
|
||||||
|
|
||||||
Run-to-run spread on this box is non-trivial (criterion's low/high bracket the
|
|
||||||
median by a few %); the medians below are single-session captures with the smoke
|
|
||||||
settings `--warm-up-time 1 --measurement-time 2` (wasm-edge) / `3` (cogs). Re-run
|
|
||||||
for your own machine — the absolute numbers are host-specific.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## T1 — wasm-edge `process_frame` hot paths (ADR-160 deferred item → DONE host)
|
|
||||||
|
|
||||||
The crate is **excluded from the v2 workspace**; bench from the crate dir.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd v2/crates/wifi-densepose-wasm-edge
|
|
||||||
cargo bench --features std -- --warm-up-time 1 --measurement-time 2
|
|
||||||
# med_seizure_detect is medical-experimental-gated:
|
|
||||||
cargo bench --features std,medical-experimental -- --warm-up-time 1 --measurement-time 2 med_seizure
|
|
||||||
```
|
|
||||||
|
|
||||||
| Hot path (M6-audit-named) | Bench id | Host median | Grade | Doc budget (CLAIMED, ESP32) |
|
|
||||||
|---|---|---|---|---|
|
|
||||||
| `exo_time_crystal` 256-pt × 128-lag autocorrelation (full buffer) | `exo_time_crystal::process_frame[autocorr_256x128]` | **17.3 µs** | MEASURED-on-host | "H (<10 ms) on ESP32-S3 WASM3" — **NOT reproduced here (needs hardware)** |
|
|
||||||
| `exo_ghost_hunter` empty-room periodicity + hidden-breathing | `exo_ghost_hunter::process_frame[empty_room_periodicity]` | **1.44 µs** | MEASURED-on-host | research/exotic; no firm ESP32 figure — host proxy only |
|
|
||||||
| `sec_weapon_detect` per-subcarrier Welford (MAX_SC=32) | `sec_weapon_detect::process_frame[per_sc_welford]` | **0.42 µs** (420 ns) | MEASURED-on-host | research-grade; calibration-gated — host proxy only |
|
|
||||||
| `med_seizure_detect` clonic-phase rhythm path (steady-state frame) | `med_seizure_detect::process_frame[clonic_rhythm]` | **0.10 µs** (105 ns) | MEASURED-on-host (feature-gated) | doc budget "S (<5 ms) on ESP32"; **NOT reproduced here** |
|
|
||||||
|
|
||||||
Reading these honestly:
|
|
||||||
|
|
||||||
- `exo_time_crystal` at **17.3 µs host** is the only one whose host cost is even
|
|
||||||
in the same *thousandths* of its 10 ms ESP32 budget — it does the most work
|
|
||||||
(~32K MACs/frame). 17.3 µs native says the algorithm is cheap; it says
|
|
||||||
**nothing** about whether WASM3-on-Xtensa lands under 10 ms. A naïve
|
|
||||||
host→ESP32 extrapolation (assume 100× interpreter+clock penalty) would put it
|
|
||||||
near ~1.7 ms, comfortably under — **but that is an extrapolation, not a
|
|
||||||
measurement**, and is recorded here only to show the host number is not
|
|
||||||
obviously in tension with the budget. ESP32 figure: **UNMEASURED**.
|
|
||||||
- `med_seizure_detect`'s 105 ns is the **steady-state** per-frame cost; the
|
|
||||||
expensive clonic autocorrelation only fires when the state machine is in the
|
|
||||||
clonic phase, so this is a lower-bound on the heavy path, not the worst case.
|
|
||||||
It is still a real, committed host datapoint.
|
|
||||||
- The pre-existing `tests/budget_compliance.rs` already asserts the L/S/H
|
|
||||||
wall-clock tiers (25 passing tests); these criterion benches add the
|
|
||||||
regression-grade, reproducible median that ADR-160 deferred.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## T2 — cog steady-state inference latency (ADR-159/160 deferred item → DONE)
|
|
||||||
|
|
||||||
Cog crates are normal workspace members; bench from `v2/`. Real weights
|
|
||||||
(`count_v1.safetensors` / `pose_v1.safetensors`) ship in-repo under each cog's
|
|
||||||
`cog/artifacts/`, so the bench measures the **real Candle CPU forward**, not the
|
|
||||||
stub (the bench `assert!`s `backend().starts_with("candle-")`).
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd v2
|
|
||||||
cargo bench -p cog-person-count --no-default-features --bench infer_bench -- --warm-up-time 1 --measurement-time 3
|
|
||||||
cargo bench -p cog-pose-estimation --no-default-features --bench infer_bench -- --warm-up-time 1 --measurement-time 3
|
|
||||||
```
|
|
||||||
|
|
||||||
| Cog | Bench id | Host median (steady-state infer, CPU) | Grade | Manifest cold-start (CLAIMED, different measurement + machine) |
|
|
||||||
|---|---|---|---|---|
|
|
||||||
| cog-person-count | `cog_person_count::infer[cpu_real_weights_steady_state]` | **305 µs** (idle box) | MEASURED-on-host | — (person-count manifest carries comparable provenance) |
|
|
||||||
| cog-pose-estimation | `cog_pose_estimation::infer[cpu_real_weights_steady_state]` | **305 µs** (idle box) | MEASURED-on-host | `cold_start_ms_avg: 5.4` (30 invocations, **ruvultra/RTX 5080 host**, candle 0.9 cpu) — **cold-start, NOT steady-state; NOT this machine** |
|
|
||||||
|
|
||||||
> Spread caveat (observed, honest): both medians above were captured with the box
|
|
||||||
> otherwise idle. A re-run of the validate-form command *while a second cargo job
|
|
||||||
> was loading the same cores* gave 385 µs (person-count) / 973 µs (pose) —
|
|
||||||
> the criterion low/high bracket widens to ~0.34–1.18 ms under contention. The
|
|
||||||
> 305 µs figures are the idle-box datapoints; the absolute number is host- and
|
|
||||||
> load-dependent (the ~10× pose swing is core contention, not a code change).
|
|
||||||
|
|
||||||
Reading these honestly:
|
|
||||||
|
|
||||||
- **Steady-state ≠ cold-start.** The pose manifest's `5.4 ms` folds in one-time
|
|
||||||
weight load / mmap / first-forward allocation. This bench warms the engine
|
|
||||||
first and times only the recurring per-frame forward, on a *different
|
|
||||||
machine*. The two numbers are not comparable and we do not claim this bench
|
|
||||||
reproduces the 5.4 ms manifest figure.
|
|
||||||
- Both cogs share the same conv encoder; person-count adds a count head +
|
|
||||||
confidence head, pose adds a 256-wide MLP head. The host steady-state cost is
|
|
||||||
dominated by the three dilated Conv1d layers (56→64→128→128) shared by both —
|
|
||||||
which is why both land at ~305 µs.
|
|
||||||
- **Empirical confirmation of the steady-state/cold-start gap:** pose
|
|
||||||
steady-state (305 µs host) is ~18× *under* the manifest's 5.4 ms cold-start.
|
|
||||||
Even accounting for the different machine, this is the expected shape — the
|
|
||||||
bulk of cold-start is one-time setup, not the forward pass — and it is exactly
|
|
||||||
why conflating the two would be dishonest.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Status vs the deferred items
|
|
||||||
|
|
||||||
| Deferred item | Was | Now |
|
|
||||||
|---|---|---|
|
|
||||||
| ADR-160 "Criterion benches for `process_frame` budget claims" | ACCEPTED-FUTURE | **DONE (host)**; ESP32-on-hardware still **PENDING** (needs the wasm32 target + a flashed ESP32-S3) |
|
|
||||||
| ADR-159/160 cog inference latency (`cold_start_ms_avg` uncommitted-benched) | CLAIMED | **MEASURED-on-host (steady-state)**; cold-start-on-ruvultra remains the manifest's separate claim |
|
|
||||||
|
|
||||||
Nothing here changes runtime behavior — these are benches + this results file
|
|
||||||
only. No crate needs republishing.
|
|
||||||
@@ -1,132 +0,0 @@
|
|||||||
# Edge-Skill Synthetic-Ground-Truth Validation — RESULTS
|
|
||||||
|
|
||||||
**Crate:** `v2/crates/wifi-densepose-wasm-edge` (workspace-EXCLUDED — build from its own dir)
|
|
||||||
**Branch:** `feat/edge-skills-synthetic-validation`
|
|
||||||
**ADR:** [ADR-160](../../docs/adr/ADR-160-edge-skill-library-honest-labeling.md)
|
|
||||||
**Date:** 2026-06-13
|
|
||||||
**Harness:** `tests/synthetic_validation.rs`
|
|
||||||
|
|
||||||
> **HONESTY BOUNDARY — read first.** Everything below is **synthetic-ground-truth
|
|
||||||
> validation**: a signal is *planted* with a known answer, the **real** detector
|
|
||||||
> is run, and detection accuracy / precision / recall / rate-error is **measured**.
|
|
||||||
> This is **NOT field accuracy.** A skill that recovers a planted sinusoid here is
|
|
||||||
> proven to do the math it claims on a *constructed* signal; it is **NOT** proven
|
|
||||||
> to work on real CSI in a real room. Skills whose detection target cannot be
|
|
||||||
> honestly planted (clinical, weapon, affect, sleep-stage, sign-language) are
|
|
||||||
> **NOT** given a number — they are listed under **DATA-GATED** with the real
|
|
||||||
> data each would require.
|
|
||||||
|
|
||||||
## Reproduce
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd v2/crates/wifi-densepose-wasm-edge # workspace-excluded; build here
|
|
||||||
cargo test --features std --test synthetic_validation -- --nocapture
|
|
||||||
# also runs under the medical tier (med_* skills stay DATA-GATED, not validated):
|
|
||||||
cargo test --features std,medical-experimental --test synthetic_validation -- --nocapture
|
|
||||||
```
|
|
||||||
|
|
||||||
Each `MEASURED-on-synthetic | …` line printed by the harness is the source of the
|
|
||||||
table below. Numbers are deterministic (no RNG; pseudo-noise uses a fixed LCG seed).
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## MEASURED-on-synthetic (constructible skills)
|
|
||||||
|
|
||||||
| Skill | What was planted (ground truth) | Result | Grade |
|
|
||||||
|-------|----------------------------------|--------|-------|
|
|
||||||
| **vital_trend** | BPM held N≥6 calls at each threshold band (brady/tachy-pnea <12 / >25, brady/tachy-cardia <50 / >120, apnea breathing<1.0 for ≥20) vs normal | **acc 1.000, prec 1.000, recall 1.000** (TP5 FP0 TN5 FN0) | MEASURED |
|
|
||||||
| **exo_time_crystal** | period-2 coordinated motion vs pseudo-noise + flat | **acc 1.000** (TP1 FP0 TN2 FN0) | MEASURED † |
|
|
||||||
| **exo_ghost_hunter** (hidden breathing) | phase sinusoid at lag-8 (breathing band 5–15) in an empty room vs flat phase | **acc 1.000**; planted score **1.000**, flat **0.000** | MEASURED |
|
|
||||||
| **occupancy** | 220-frame flat-amplitude calibration, then strong per-zone amplitude variance vs flat | **acc 1.000** (TP1 FP0 TN1 FN0) | MEASURED |
|
|
||||||
| **intrusion** | calibrate→arm (330 quiet frames), then per-subcarrier Δphase>1.5 + Δamp≫3σ vs quiet | **acc 1.000** (TP1 FP0 TN1 FN0) | MEASURED |
|
|
||||||
| **exo_rain_detect** | empty room, 60-frame baseline, then broadband variance (8/8 groups, ratio≫2.5) for ≥10 frames vs stable-low | **acc 1.000** (TP1 FP0 TN1 FN0) | MEASURED |
|
|
||||||
| **sig_flash_attention** | sustained high phase+amplitude in each of the 8 subcarrier groups; assert reported attention peak == planted group | **peak-localization 8/8 = 1.000** | MEASURED |
|
|
||||||
| **spt_spiking_tracker** | sparse (2-subcarrier) large phase-delta in each of the 4 zones; assert tracked zone == planted zone | **zone-localization 4/4 = 1.000** | MEASURED ‡ |
|
|
||||||
| **sig_optimal_transport** | sustained large frame-to-frame amplitude-distribution change vs stationary | **acc 1.000** (TP1 FP0 TN1 FN0) | MEASURED |
|
|
||||||
| **sig_mincut_person_match** | 2 persons with distinct stable per-region variance signatures over 40 frames | **person ids assigned, 0 id-swaps / 40 frames** | MEASURED |
|
|
||||||
| **lrn_dtw_gesture_learn** | stillness → 3 identical gesture rehearsals → enrollment | **template enrolled (templates=1)** | MEASURED (enroll) §|
|
|
||||||
| **sig_sparse_recovery** | 30 clean frames to init, then 8/32 (25%) nulled subcarriers | **dropout-detect + recovery-trigger = PASS** | MEASURED (trigger) ¶|
|
|
||||||
|
|
||||||
### Caveats on individual results
|
|
||||||
|
|
||||||
† **exo_time_crystal — honest discriminative limit.** A *pure* periodic signal
|
|
||||||
already has autocorrelation peaks at lag L **and** 2L (natural harmonics), so this
|
|
||||||
"period-doubling" detector cannot separate a true period-2 sub-harmonic from a
|
|
||||||
plain periodic signal — an earlier plant using a clean sine produced a *false
|
|
||||||
positive* (recorded during development). The construct it **can** discriminate
|
|
||||||
with known ground truth is **periodic-coordination vs aperiodic** (noise/flat),
|
|
||||||
which is what is measured (1.000). The original "sub-harmonic vs clean period"
|
|
||||||
claim is **NOT** validatable with this algorithm.
|
|
||||||
|
|
||||||
‡ **spt_spiking_tracker — plant must be sparse.** With weights init'd home=1.0 /
|
|
||||||
cross=0.25, firing all 8 inputs in a zone (8×0.25=2.0 > threshold 1.0) overdrives
|
|
||||||
*every* output neuron and the tracker collapses to zone 0 (measured 1/4 during
|
|
||||||
development). Firing only 2 inputs (home 2.0 fires, cross 0.5 silent) yields clean
|
|
||||||
4/4 zone localization. The validatable claim is *single-zone* localization.
|
|
||||||
|
|
||||||
§ **lrn_dtw_gesture_learn — enrollment validated; replay-match NOT.** The
|
|
||||||
deterministic, constructible part (stillness → 3 identical rehearsals → a template
|
|
||||||
is enrolled) is MEASURED. The DTW *replay match* (731) did **not** fire on the
|
|
||||||
identical replay in this run (`match_same=false`) — replay-recognition accuracy is
|
|
||||||
**reported, not asserted**, and is not claimed as validated.
|
|
||||||
|
|
||||||
¶ **sig_sparse_recovery — trigger validated; recovery accuracy is NEGATIVE.**
|
|
||||||
The dropout-detection + ISTA-recovery *trigger* pipeline fires correctly on >10%
|
|
||||||
planted nulls (asserted). But the **measured recovery accuracy is NOT a win**:
|
|
||||||
recovered RMSE **1.0045** vs unrecovered-null RMSE **0.9830** (**−2.2%**, i.e.
|
|
||||||
slightly *worse* than leaving the nulls at zero) on a neighbor-correlated signal.
|
|
||||||
The tridiagonal correlation model's fixed point does not equal the planted truth.
|
|
||||||
**The recovery's reconstruction quality is therefore NOT validated as effective on
|
|
||||||
synthetic data** — only its detection/trigger path is. Reported honestly; no
|
|
||||||
positive number claimed.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## DATA-GATED — NOT validatable on synthetic data
|
|
||||||
|
|
||||||
Planting a "seizure-like" / "weapon-like" / "happy-like" synthetic signal and
|
|
||||||
claiming the detector "works" validates **nothing real** and is exactly the
|
|
||||||
AI-slop this project fights. These skills run real DSP (per ADR-160, 0 stubs) and
|
|
||||||
keep their ADR-160 disclaimers, but get **no accuracy number** here. Each needs
|
|
||||||
the specific real, labelled data listed:
|
|
||||||
|
|
||||||
| Skill | Why not constructible on synthetic | Real data required |
|
|
||||||
|-------|------------------------------------|--------------------|
|
|
||||||
| `med_seizure_detect` | "seizure-like" motion is not a seizure; no ground-truth signature exists synthetically | Clinical EEG-/video-labelled tonic-clonic seizure CSI from instrumented patients |
|
|
||||||
| `med_sleep_apnea` | a planted breathing-pause is not clinical apnea (AHI scoring, hypopnea, desaturation) | Polysomnography-labelled (PSG) overnight CSI with scored apnea/hypopnea events |
|
|
||||||
| `med_cardiac_arrhythmia` | a synthetic HR sequence cannot encode true arrhythmia morphology | ECG-labelled CSI (AFib/PVC/etc.) from clinical monitoring |
|
|
||||||
| `med_respiratory_distress` | distress is a clinical gestalt, not a plantable rate | Clinician-labelled respiratory-distress CSI episodes |
|
|
||||||
| `med_gait_analysis` | clinical gait metrics need a reference motion-capture standard | Mocap-/force-plate-labelled gait CSI |
|
|
||||||
| `sec_weapon_detect` | a high variance ratio is RF reflectivity, **not** weapon discrimination (ADR-160 §A3 already renamed the event to `HIGH_METAL_REFLECTIVITY`) | Labelled metal-object-vs-no-object CSI with controlled object classes |
|
|
||||||
| `exo_emotion_detect` | affect is not recoverable from a planted heuristic; outputs are proxies (ADR-160 §A2) | Validated affect-labelled CSI (self-report / physiological ground truth) |
|
|
||||||
| `exo_happiness_score` | "happiness" is a gait-energy proxy, not a measured affect (ADR-160 §A2) | Validated affect/valence-labelled CSI |
|
|
||||||
| `exo_dream_stage` | sleep staging needs PSG reference (EEG/EOG/EMG) | PSG-staged overnight CSI |
|
|
||||||
| `exo_gesture_language` | coarse gesture clusters ≠ true sign language (ADR-160 §A4) | Labelled ASL letter/word CSI dataset |
|
|
||||||
|
|
||||||
> The above are **not failures** — they are the honest boundary. A smaller set of
|
|
||||||
> genuinely-measured skills plus this explicit gated list is the deliverable, per
|
|
||||||
> the prove-everything directive.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Skills not in either list
|
|
||||||
|
|
||||||
The remaining edge skills (smart-building / retail / industrial occupancy-style,
|
|
||||||
the other `sig_*`/`lrn_*`/`spt_*`/`tmp_*`/`qnt_*`/`aut_*`/`ais_*` algorithm-named
|
|
||||||
modules) are **wired and exercised live** in the unified pipeline integration test
|
|
||||||
(`tests/pipeline_all.rs`, all 59 default / 64 medical skills run without panic over
|
|
||||||
300 synthetic frames) but were **not** given an individual planted-ground-truth
|
|
||||||
accuracy number here. They are honest REAL-DSP modules (ADR-160) whose physical
|
|
||||||
observable could be planted with more harness work; that is deferred, not claimed.
|
|
||||||
|
|
||||||
## Test counts (full crate suite)
|
|
||||||
|
|
||||||
```
|
|
||||||
DEFAULT (--features std): 631 passed, 0 failed
|
|
||||||
(lib 504; budget 25; honest_labeling 10; pipeline_all 4; synthetic_validation 12; bench 1; vendor 75)
|
|
||||||
MEDICAL (--features std,medical-experimental): 669 passed, 0 failed
|
|
||||||
(lib 542; +16 same new tests; med_* stay DATA-GATED, not validated)
|
|
||||||
```
|
|
||||||
|
|
||||||
(M6 baseline was 615 / 653; the new pipeline_all (4) + synthetic_validation (12)
|
|
||||||
tests add 16 to each tier.)
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
# Upstream clone (WiFlow-STD, DY2434) -- never commit third-party code/weights
|
|
||||||
upstream/
|
|
||||||
|
|
||||||
# Local python env
|
|
||||||
.venv/
|
|
||||||
|
|
||||||
# Downloaded data / artifacts
|
|
||||||
data/
|
|
||||||
downloads/
|
|
||||||
*.pth
|
|
||||||
*.pt
|
|
||||||
*.npy
|
|
||||||
*.npz
|
|
||||||
*.zip
|
|
||||||
*.mat
|
|
||||||
*.safetensors
|
|
||||||
results/parity_fixture.json
|
|
||||||
__pycache__/
|
|
||||||
*.onnx
|
|
||||||
|
|
||||||
# Committed ground truth: corruption masks for the pristine Kaggle download.
|
|
||||||
# remote/clean_v2.py zeroes the corrupted source windows IN PLACE, so these
|
|
||||||
# masks CANNOT be regenerated from a cleaned copy (generate_corruption_masks.py
|
|
||||||
# documents the criteria and reproduces them only from a fresh download).
|
|
||||||
!results/nan_windows_mask.npy
|
|
||||||
!results/big_windows_mask.npy
|
|
||||||
@@ -1,486 +0,0 @@
|
|||||||
# WiFlow-STD (DY2434) Benchmark Results — ADR-152 §2.2
|
|
||||||
|
|
||||||
Upstream: <https://github.com/DY2434/WiFlow-WiFi-Pose-Estimation-with-Spatio-Temporal-Decoupling>
|
|
||||||
pinned at `06899d29` (2026-04-05), Apache-2.0. Dataset: Kaggle `kaka2434/wiflow-dataset`
|
|
||||||
(12.8 GB archive → 15.5 GB extracted; 360,000 windows of 540×20 CSI + 15-keypoint 2D labels).
|
|
||||||
|
|
||||||
Published claims (README "Setting 1"): PCK@20 97.25%, PCK@30 98.63%, PCK@40 99.16%,
|
|
||||||
PCK@50 99.48%, MPJPE 0.007 m, 2.23M params, 0.07 GFLOPs.
|
|
||||||
|
|
||||||
## Measurement (a): their model on their data
|
|
||||||
|
|
||||||
### Artifact verification (MEASURED, 2026-06-10, this repo `eval_repro.py`)
|
|
||||||
|
|
||||||
| Check | Result |
|
|
||||||
|---|---|
|
|
||||||
| Parameter count | **2,225,042 (2.23M) — matches claim** |
|
|
||||||
| FLOPs (torch profiler, batch 1) | ~0.055 GFLOPs — consistent with 0.07B claim |
|
|
||||||
| CPU latency (Windows box, torch 2.12 CPU) | 13.2 ms/window @ batch 1 (76/s); 2.48 ms/sample @ batch 64 (403/s) |
|
|
||||||
| Checkpoint load | `weights_only=True` (no pickle code execution) |
|
|
||||||
|
|
||||||
### Released checkpoint does NOT reproduce the claims — REFUTED as shipped
|
|
||||||
|
|
||||||
Running the released `best_pose_model.pth` through the released code on the released
|
|
||||||
dataset with the released split procedure (seed-42 file-level 70/15/15; 54,000 test
|
|
||||||
samples) yields:
|
|
||||||
|
|
||||||
| Metric | Published | Measured (shipped checkpoint) |
|
|
||||||
|---|---|---|
|
|
||||||
| PCK@20 | 97.25% | **0.08%** |
|
|
||||||
| PCK@30 | 98.63% | 0.78% |
|
|
||||||
| PCK@40 | 99.16% | 5.53% |
|
|
||||||
| PCK@50 | 99.48% | 15.42% |
|
|
||||||
| MPJPE | 0.007 | **NaN** (dataset contains NaN CSI windows) |
|
|
||||||
|
|
||||||
Raw output: `results/repro_a.json`.
|
|
||||||
|
|
||||||
Diagnostics (on 2,000 NaN-free windows from the first files of the dataset, i.e.
|
|
||||||
mostly would-be *training* data — so this is not a split mismatch):
|
|
||||||
|
|
||||||
- Predictions correlate with targets (Pearson r ≈ 0.76) — the checkpoint is a trained
|
|
||||||
model, but in a **different keypoint normalization/order** than the released data.
|
|
||||||
- Best-case post-hoc global per-axis affine correction: PCK@20 ≈ 20%.
|
|
||||||
- Best-case per-keypoint affine correction (15×2 fitted transforms — generous
|
|
||||||
cheating): PCK@20 ≈ 72%, still far below 97.25%.
|
|
||||||
- Pred↔target keypoint correspondence matrix is degenerate (multiple predicted
|
|
||||||
keypoints best-match the same target joint) — keypoint convention mismatch.
|
|
||||||
|
|
||||||
### Reproducibility defects in the released artifacts
|
|
||||||
|
|
||||||
1. `models/__init__.py` imports `TemporalConvNet`, which `models/tcn.py` does not
|
|
||||||
define — **the published code does not import/run as-is**.
|
|
||||||
2. The released root checkpoint uses pre-rename module names (`att.*`, `final_conv.*`)
|
|
||||||
vs the published code (`attention.*`, `decoder.*`) — same shapes/param count, but
|
|
||||||
confirms the checkpoint predates the published code.
|
|
||||||
3. The second shipped checkpoint (`cross_dataset_test/WiFlow/best_pose_model.pth`) is
|
|
||||||
a **different architecture** (342-channel input = MM-Fi layout, 3 TCN layers,
|
|
||||||
3-channel/3D decoder) — not usable on their own dataset.
|
|
||||||
4. `run.py` ignores `--data_dir` and hardcodes `../preprocessed_csi_data`.
|
|
||||||
5. The released dataset's final 13 files (indices 487–499; 9,072 windows, 2.52%)
|
|
||||||
are corrupted: NaN values plus garbage amplitudes up to 3.4e38 (float32 max) in
|
|
||||||
data that is otherwise [0,1]-normalized. Upstream code has no NaN/inf handling;
|
|
||||||
training as published on this download diverges — the first corrupted batch
|
|
||||||
overflows fp16 autocast and permanently poisons BatchNorm running statistics
|
|
||||||
(GradScaler step-skipping does not protect BN). The authors' training curves
|
|
||||||
show normal convergence, so their local data evidently differed from the
|
|
||||||
Kaggle upload. Window masks: `results/nan_windows_mask.npy`,
|
|
||||||
`results/big_windows_mask.npy`.
|
|
||||||
|
|
||||||
### Reproducing the corruption masks
|
|
||||||
|
|
||||||
The two mask files (9,070 NaN/Inf windows, 9,072 with |amplitude| > 1.5;
|
|
||||||
union 9,072, all in dataset files 487–499) are **committed ground truth**
|
|
||||||
(gitignore-negated, ~352 KB each). They can only be regenerated from a
|
|
||||||
**pristine** Kaggle download: `remote/clean_v2.py` repairs the dataset by
|
|
||||||
zeroing the corrupted windows in place, after which the corruption evidence
|
|
||||||
is gone and a rescan returns all-False. `generate_corruption_masks.py`
|
|
||||||
re-derives them (chunked scan, criteria: any non-finite value OR
|
|
||||||
max |finite| > 1.5 per 540×20 window) and refuses to write all-False masks,
|
|
||||||
which indicate a cleaned copy. Verified 2026-06-11: a regeneration from the
|
|
||||||
local pristine download is bit-identical to the committed masks.
|
|
||||||
|
|
||||||
### Retraining result (MEASURED, 2026-06-10): claims APPROXIMATELY REPRODUCED
|
|
||||||
|
|
||||||
Since the shipped checkpoint is unusable, measurement (a) fell back to retraining
|
|
||||||
with upstream code + defaults (seed 42, batch 64, early-stopped at epoch 41 of 50,
|
|
||||||
best epoch 36, ~75 s/epoch) on ruvultra (RTX 5080). Deviations, all forced and
|
|
||||||
documented: one-line fix for defect (1); torch 2.x+cu128 instead of pinned 2.3.1
|
|
||||||
(Blackwell sm_120 unsupported); the 9,072 corrupted windows (defect 5) zeroed
|
|
||||||
entirely — without this the published pipeline produces NaN from epoch 1 (observed).
|
|
||||||
Scripts mirrored in `remote/`; raw metrics in `results/eval_retrained.json`.
|
|
||||||
|
|
||||||
| Metric | Published | Retrained (full test, 54,000) | Retrained (corruption-free, 52,560) |
|
|
||||||
|---|---|---|---|
|
|
||||||
| PCK@20 | 97.25% | **96.09%** | **96.61%** |
|
|
||||||
| PCK@30 | 98.63% | 97.89% | 98.23% |
|
|
||||||
| PCK@40 | 99.16% | 98.58% | 98.79% |
|
|
||||||
| PCK@50 | 99.48% | 98.99% | 99.11% |
|
|
||||||
| MPJPE | 0.007 | 0.0098 | 0.0094 |
|
|
||||||
|
|
||||||
Within ~0.6–1.2 PCK points of every published figure (single run, corrupted train
|
|
||||||
windows zeroed, different torch/GPU). **Verdict: the accuracy claims are credible
|
|
||||||
and approximately reproducible — but only after repairing the released dataset and
|
|
||||||
code.** Val best: PCK@20 96.99%, MPJPE 0.0086 (epoch 36).
|
|
||||||
|
|
||||||
One more defect found during the run:
|
|
||||||
|
|
||||||
6. `train.py` calls `plot_training_history`, which is not defined anywhere — the
|
|
||||||
built-in post-training test evaluation is unreachable as published (crashes
|
|
||||||
with NameError after training completes).
|
|
||||||
|
|
||||||
## ADR-152 §2.2 citation rule
|
|
||||||
|
|
||||||
Evidence grade for the WiFlow-STD accuracy claims after measurement (a):
|
|
||||||
**MEASURED-EQUIVALENT (96.1–96.6% PCK@20 reproduced by retraining; shipped
|
|
||||||
checkpoint REFUTED; dataset/code require repairs)**. RuView docs may cite
|
|
||||||
"~96% PCK@20 (our reproduction)" — still **not comparable** to our 17-keypoint
|
|
||||||
ESP32 numbers (different hardware, 5 subjects, in-domain random split,
|
|
||||||
15 keypoints).
|
|
||||||
|
|
||||||
## Edge optimization (measured)
|
|
||||||
|
|
||||||
ADR-152 "optimize beyond SOTA" track, 2026-06-10, this Windows box (Windows 11,
|
|
||||||
16 torch threads, torch 2.12.0+cpu, onnxruntime 1.26.0). Subject: the retrained
|
|
||||||
checkpoint `results/retrained_best_pose_model.pth` (2,225,042 fp32 params).
|
|
||||||
Scripts: `quantize_bench.py`, `onnx_bench.py`, `eval_ort_accuracy.py`.
|
|
||||||
Raw numbers: `results/edge_optimization.json`.
|
|
||||||
|
|
||||||
Accuracy is on a **10,000-window seed-42 random subset** of the corruption-free
|
|
||||||
test split (same seed-42 file-level 70/15/15 split as `eval_repro.py`; 54,000
|
|
||||||
test windows, 1,440 corrupted excluded via `results/nan_windows_mask.npy` |
|
|
||||||
`results/big_windows_mask.npy`, leaving 52,560; subset drawn with
|
|
||||||
`np.random.default_rng(42)`). The fp32 subset PCK@20 (96.68%) matches the full
|
|
||||||
clean-test figure (96.61%), so the subset is representative.
|
|
||||||
|
|
||||||
Latency is CPU ms/window, median of repeated runs, 3 interleaved repetitions
|
|
||||||
per variant (medians below; run-to-run spread on this box is large, roughly
|
|
||||||
±20-40% at batch 1 — reps are in the JSON).
|
|
||||||
|
|
||||||
| Variant | Disk size | Batch 1 (ms/win) | Batch 64 (ms/win) | PCK@20 | PCK@50 | MPJPE |
|
|
||||||
|---|---|---|---|---|---|---|
|
|
||||||
| torch fp32 (baseline) | 9.07 MB | 11.0 | 2.27 | 96.68% | 99.15% | 0.00936 |
|
|
||||||
| torch fp16 (`.half()`) | **4.58 MB** | 24.3 | 2.42 | 96.68% | 99.15% | 0.00946 |
|
|
||||||
| torch int8 dynamic | 9.07 MB (unchanged) | 15.6 | 2.06 | 96.68% (identical) | 99.15% | 0.00936 |
|
|
||||||
| ONNX fp32 (onnxruntime) | 8.97 MB | **3.2** | **2.0** | 96.68% | 99.15% | 0.00936 |
|
|
||||||
| ONNX int8 (ORT dynamic, supplementary) | **2.44 MB** | 6.5 | 5.8 | 96.52% | 99.15% | 0.01108 |
|
|
||||||
|
|
||||||
Findings:
|
|
||||||
|
|
||||||
- **torch dynamic INT8 quantizes nothing on this model.** The architecture has
|
|
||||||
**zero `nn.Linear` layers** — it is entirely Conv1d (21) + Conv2d (22) +
|
|
||||||
BatchNorm. `torch.ao.quantization.quantize_dynamic` (requested over
|
|
||||||
`{Linear, Conv1d, Conv2d}`) converted **0 modules / 0.0% of params**: dynamic
|
|
||||||
quantization only has kernels for Linear/RNN-family modules and silently
|
|
||||||
skips convolutions. The "int8" model is bit-identical to fp32 (same outputs,
|
|
||||||
same 9.07 MB). Conv quantization would require static (PTQ) quantization
|
|
||||||
with calibration — out of scope here; the ORT dynamic path below is the
|
|
||||||
honest int8 datapoint.
|
|
||||||
- **fp16 halves size for free accuracy-wise** (PCK@20 −0.005 pt, MPJPE
|
|
||||||
+0.0001) but is *slower* on CPU at batch 1 (~2.2×) — torch CPU fp16 conv
|
|
||||||
kernels are emulated. fp16 is a storage/transport format here, not a CPU
|
|
||||||
runtime win.
|
|
||||||
- **ONNX Runtime is the real batch-1 latency win: ~3.4× faster than torch**
|
|
||||||
(3.2 vs 11.0 ms/window) at identical accuracy (parity 2.4e-7).
|
|
||||||
|
|
||||||
### Verdict on the paper's "~2.2 MB int8" claim
|
|
||||||
|
|
||||||
**Plausible but not free, and unreachable by the obvious PyTorch route.**
|
|
||||||
2,225,042 params × 1 byte ≈ 2.2 MB assumes *every* parameter quantizes.
|
|
||||||
PyTorch dynamic quantization — the one-liner most readers would reach for —
|
|
||||||
yields **9.07 MB (0% quantized)** because the model has no Linear layers.
|
|
||||||
ONNX Runtime dynamic quantization, which does have int8 conv weight support,
|
|
||||||
gets **2.44 MB** (close to the claim; the overhead is BatchNorm params/buffers
|
|
||||||
and quantization scales kept in fp32) at a measurable accuracy cost:
|
|
||||||
PCK@20 96.68 → 96.52% (−0.16 pt) and MPJPE 0.00936 → 0.01108 (+18%), and
|
|
||||||
~2× slower inference than ONNX fp32 (ConvInteger kernels). The paper does not
|
|
||||||
state a method or an int8 accuracy; treat "2.2 MB" as a weight-arithmetic
|
|
||||||
estimate, achievable in practice only via conv-capable quantization toolchains
|
|
||||||
and with a small accuracy penalty.
|
|
||||||
|
|
||||||
### ONNX export status
|
|
||||||
|
|
||||||
**Works.** Exported via the TorchScript exporter (`dynamo=False`), opset 17,
|
|
||||||
with a dynamic batch axis — `results/retrained_fp32_dynamic.onnx` (8.97 MB),
|
|
||||||
verified to run at batch 1/2/64. The axial attention's
|
|
||||||
`view(N*W, C, H)` reshape traced correctly (sizes recorded as graph ops, not
|
|
||||||
baked constants). The dynamo exporter also captures the graph but crashed on
|
|
||||||
this box writing a ✅ to a cp1252 console (cosmetic Windows encoding issue, not
|
|
||||||
a model blocker). Parity vs torch on the stored fixture
|
|
||||||
(`results/parity_fixture.npz`, batch 2, seed 42): **max abs diff 2.4e-7 —
|
|
||||||
PASS** (< 1e-4). ORT-quantized int8 model: `results/retrained_int8_ort_dynamic.onnx`.
|
|
||||||
|
|
||||||
### Static PTQ (calibrated) — follow-up
|
|
||||||
|
|
||||||
Follow-up to the dynamic-int8 row above (2026-06-10, same box, onnxruntime
|
|
||||||
1.26.0): ONNX Runtime **static** post-training quantization
|
|
||||||
(`quantize_static`, QDQ format, per-channel int8 weights + int8 activations)
|
|
||||||
of the same fp32 export, calibrated on **corruption-free TRAINING-split
|
|
||||||
windows only** (seed-42 file-level split, same masks; 1,000 windows for
|
|
||||||
MinMax, 512 for the histogram calibrators; never test windows). Scopes:
|
|
||||||
"conv-only" (`op_types_to_quantize=["Conv"]` — the attention path exports as
|
|
||||||
Einsum/Softmax, which ORT never quantizes anyway, so "all-ops" additionally
|
|
||||||
quantizes the elementwise Mul/Sigmoid/Add/AveragePool glue). Accuracy on the
|
|
||||||
identical 10k-window seed-42 corruption-free test subset; latency median of
|
|
||||||
3 interleaved reps (fp32/dynamic re-benched in-session as references).
|
|
||||||
Script: `static_ptq_bench.py`; raw: `results/edge_optimization.json`
|
|
||||||
(`onnx_static_ptq`).
|
|
||||||
|
|
||||||
| Variant | Disk size | Batch 1 (ms/win) | Batch 64 (ms/win) | PCK@20 | PCK@50 | MPJPE |
|
|
||||||
|---|---|---|---|---|---|---|
|
|
||||||
| ONNX fp32 (reference) | 8.97 MB | 2.5 | 1.9 | 96.68% | 99.15% | 0.00936 |
|
|
||||||
| ORT dynamic int8 (baseline) | **2.44 MB** | 5.7 | 4.6 | 96.52% | 99.15% | 0.01108 |
|
|
||||||
| static QDQ **Percentile(99.99) conv-only** | 2.53 MB | 5.3 | 4.7 | 96.61% | 99.16% | **0.01031** |
|
|
||||||
| static QDQ MinMax conv-only | 2.53 MB | 5.2 | 3.3 | **96.63%** | 99.19% | 0.01084 |
|
|
||||||
| static QDQ Entropy conv-only | 2.53 MB | 5.2 | 3.1 | 96.60% | 99.19% | 0.01078 |
|
|
||||||
| static QDQ MinMax all-ops | 2.60 MB | 6.5 | 3.9 | 95.45% | 99.14% | 0.01486 |
|
|
||||||
| static QDQ Entropy all-ops | 2.60 MB | 5.7 | 4.1 | 95.30% | 99.13% | 0.01510 |
|
|
||||||
| static QDQ Percentile all-ops | 2.60 MB | 5.3 | 4.3 | 96.39% | 99.17% | 0.01218 |
|
|
||||||
|
|
||||||
**Verdict: static PTQ (conv-only) is the new best int8 point on accuracy —
|
|
||||||
but only modestly, and it does not fix int8's latency penalty.**
|
|
||||||
|
|
||||||
- **Accuracy: beats dynamic.** All three conv-only calibrations land at
|
|
||||||
PCK@20 96.60–96.63% (vs dynamic 96.52%, fp32 96.68% — recovers ~⅔ of the
|
|
||||||
dynamic gap) and MPJPE 0.0103–0.0108 (vs dynamic 0.01108). Best MPJPE:
|
|
||||||
Percentile conv-only, +10% over fp32 instead of dynamic's +18%.
|
|
||||||
- **Size: slightly worse.** 2.53 MB vs 2.44 MB (+3.6%) — QDQ nodes and
|
|
||||||
per-channel scales cost a little; BatchNorm stays fp32 in both (the 12 BNs
|
|
||||||
follow Slice/Einsum/Reshape, never Conv, so they cannot be folded).
|
|
||||||
- **Latency: a wash vs dynamic, still ~2× slower than ONNX fp32 at batch 1.**
|
|
||||||
Batch-1 medians 5.2–5.3 vs dynamic 5.7 ms/win in-session — within this
|
|
||||||
box's ±20–40% noise. Batch 64 leans static (3.1–3.3 for MinMax/Entropy
|
|
||||||
conv-only vs 4.6), same caveat.
|
|
||||||
- **All-ops QDQ is strictly worse**: up to −1.4 pt PCK@20 and +60% MPJPE for
|
|
||||||
zero size/latency benefit — int8 activations through the elementwise glue
|
|
||||||
around the attention blocks is where the damage is. Conv-only is the right
|
|
||||||
scope.
|
|
||||||
- Negative result worth recording: **Entropy calibration is a no-op here** —
|
|
||||||
on an identical calibration set it selects full-range thresholds
|
|
||||||
bit-identical to MinMax (all 247 scales equal; verified on a 64-window
|
|
||||||
smoke set). Also, ORT 1.26's `CalibMaxIntermediateOutputs` raises a
|
|
||||||
spurious "No data is collected" when the batch count divides the chunk
|
|
||||||
size (worked around in the script).
|
|
||||||
|
|
||||||
Deployment guidance: need speed → ONNX fp32 (3.2 ms b1). Need int8 weights
|
|
||||||
for size → static QDQ conv-only (Percentile or MinMax,
|
|
||||||
`results/retrained_int8_static_percentile_conv.onnx`), which strictly
|
|
||||||
dominates dynamic int8 on accuracy at ~equal latency and +0.09 MB.
|
|
||||||
|
|
||||||
## Efficiency sweep (MEASURED, overnight 2026-06-10/11)
|
|
||||||
|
|
||||||
ADR-152 beyond-SOTA track: compact purpose-built variants of the WiFlow-STD
|
|
||||||
architecture, trained from scratch on the same cleaned dataset, identical
|
|
||||||
seed-42 file-level split, loss and protocol as the measurement-(a) reference
|
|
||||||
(fp32, batch 64, ≤50 epochs, patience 5; RTX 5080, ~22–29 min/variant).
|
|
||||||
Variant transforms are pure channel/group/stride scalings of an
|
|
||||||
architecture-exact parameterized model (validated: reproduces 2,225,042 params
|
|
||||||
at the reference config). Scripts: `remote/sweep/`; raw:
|
|
||||||
`results/efficiency_sweep.jsonl`; checkpoints `results/{half,quarter,tiny}_best.pth`
|
|
||||||
(gitignored).
|
|
||||||
|
|
||||||
| Variant | Params | vs 2.23M | Clean-test PCK@20 | PCK@50 | MPJPE | Best epoch |
|
|
||||||
|---|---|---|---|---|---|---|
|
|
||||||
| full (reference, meas. a) | 2,225,042 | 1× | 96.61% | 99.11% | 0.0094 | 36 |
|
|
||||||
| **half** | **843,834** | **0.38×** | **96.62%** | **99.47%** | **0.00898** | 23 |
|
|
||||||
| quarter | 338,600 | 0.15× | 96.05% | 99.43% | 0.00928 | 50 |
|
|
||||||
| tiny | 56,290 | 0.025× | 94.11% | 99.36% | 0.0125 | 47 |
|
|
||||||
|
|
||||||
Findings:
|
|
||||||
|
|
||||||
- **The half model (843k params) strictly dominates the full reference** on
|
|
||||||
this dataset — equal PCK@20, better PCK@50 and MPJPE, converges in fewer
|
|
||||||
epochs. The published 2.23M architecture is over-parameterized for its own
|
|
||||||
benchmark.
|
|
||||||
- **tiny (56k params, 1/39.5) holds 94.11% PCK@20** — a ~220 KB fp32 /
|
|
||||||
~60 KB int8-class model in reach of severely constrained edge targets,
|
|
||||||
at −2.5 pt from the full reference.
|
|
||||||
- Caveats: in-domain (5-subject random-file split) like every number on this
|
|
||||||
dataset; single run per variant; corruption-free test subset (52,560).
|
|
||||||
Cross-domain behavior of compact variants is untested — ADR-150's evidence
|
|
||||||
says capacity *hurts* cross-subject, so the compact end may generalize no
|
|
||||||
worse, but that is a hypothesis, not a measurement.
|
|
||||||
|
|
||||||
### Compact-variant edge artifacts (MEASURED, 2026-06-11)
|
|
||||||
|
|
||||||
Edge pipeline for the **tiny** checkpoint (56,290 params), same machinery and
|
|
||||||
protocol as the full-model edge rows above (this Windows box, torch
|
|
||||||
2.12.0+cpu, onnxruntime 1.26.0; dynamic-batch opset-17 TorchScript export;
|
|
||||||
static QDQ **Percentile(99.99) conv-only** int8 calibrated on **512**
|
|
||||||
corruption-free TRAIN-split windows; accuracy on the identical 10k-window
|
|
||||||
seed-42 clean test subset; latency = median ms/window over 3 interleaved
|
|
||||||
reps, with the full-model fp32/int8 sessions interleaved as same-session
|
|
||||||
references). Script: `tiny_edge_bench.py`; raw:
|
|
||||||
`results/edge_optimization.json` (`tiny_variant`). Torch-vs-ORT parity on the
|
|
||||||
stored fixture input: **max abs diff 1.5e-7 — PASS** (< 1e-4). The tiny fp32
|
|
||||||
subset PCK@20 (94.11%) matches the full clean-test sweep figure (94.11%)
|
|
||||||
exactly, so the subset remains representative.
|
|
||||||
|
|
||||||
Two forced deviations, both recorded in the JSON:
|
|
||||||
|
|
||||||
1. **Adaptive-pool export rewrite.** tiny's derived stride schedule
|
|
||||||
`[2,1,1,1]` leaves feature width 16, and the TorchScript exporter rejects
|
|
||||||
`AdaptiveAvgPool2d((15,1))` when 15 is not a factor of the input height
|
|
||||||
(the full model never hit this — its width was exactly 15). Since the
|
|
||||||
pool over a fixed-size map is a fixed linear operator, the export wrapper
|
|
||||||
replaces it with `mean(-1)` (W axis, a factor) + a constant averaging
|
|
||||||
matmul using PyTorch's exact bin rule; the parity check (vs the original
|
|
||||||
torch model with the real pool) proves exactness.
|
|
||||||
2. **Calibration count 512, not "~500"**: ORT 1.26's histogram collector
|
|
||||||
`np.asarray()`'s the per-batch maxima, so the calibration count must be a
|
|
||||||
multiple of the 64-window calibration batch or the ragged last batch
|
|
||||||
crashes it (the earlier static-PTQ run dodged this by using exactly 512).
|
|
||||||
|
|
||||||
| Variant | Disk size | Batch 1 (ms/win) | Batch 64 (ms/win) | PCK@20 | PCK@50 | MPJPE |
|
|
||||||
|---|---|---|---|---|---|---|
|
|
||||||
| full ONNX fp32 (same-session ref) | 8.97 MB | 2.27 | 1.42 | 96.68% | 99.15% | 0.00936 |
|
|
||||||
| full static QDQ Percentile conv-only (same-session ref) | 2.53 MB | 5.53 | 3.82 | 96.61% | 99.16% | 0.01031 |
|
|
||||||
| **tiny ONNX fp32** | **0.295 MB** | **0.66** | **0.24** | **94.11%** | 99.37% | 0.01253 |
|
|
||||||
| tiny static QDQ Percentile conv-only | 0.248 MB | 0.85 | 1.03 | 92.68% | 99.33% | 0.01491 |
|
|
||||||
|
|
||||||
(tiny torch `.pth` checkpoint for reference: 0.34 MB on disk; 56,290 fp32
|
|
||||||
params ≈ 225 KB of weights.)
|
|
||||||
|
|
||||||
Findings:
|
|
||||||
|
|
||||||
- **The smallest deployable WiFlow-class model is the tiny ONNX fp32
|
|
||||||
artifact: ~295 KB on disk, 0.66 ms/window batch-1 CPU (~1,500 windows/s),
|
|
||||||
94.1% PCK@20** — 30× smaller and ~3.4× faster (in-session) than the full
|
|
||||||
ONNX fp32 model for −2.6 pt PCK@20.
|
|
||||||
- **int8 is a bad trade at this scale.** Static QDQ conv-only — the recipe
|
|
||||||
that cost the full model only 0.07 pt — costs tiny **−1.43 pt** PCK@20
|
|
||||||
(94.11 → 92.68%) and +19% MPJPE, saves only 47 KB (−16%; QDQ scales and
|
|
||||||
the fp32 BN/attention glue are proportionally larger in a small graph),
|
|
||||||
and is *slower* than tiny fp32 (0.85 vs 0.66 ms b1; 1.03 vs 0.24 ms b64 —
|
|
||||||
QDQ kernel overhead dominates when the convs are this small). A 56k-param
|
|
||||||
model has little redundancy left to absorb weight+activation rounding.
|
|
||||||
- Deployment guidance, compact edition: ship tiny as **ONNX fp32** — at
|
|
||||||
295 KB the int8 size saving solves no real constraint and costs accuracy
|
|
||||||
and speed. If ~250 KB vs ~295 KB ever matters, weight-only quantization
|
|
||||||
would be the thing to try next, not QDQ.
|
|
||||||
|
|
||||||
## Measurement (b): BLOCKED-ON-DATA (attempted 2026-06-10)
|
|
||||||
|
|
||||||
The fine-tune-on-ESP32 measurement stopped at dataset characterization, per the
|
|
||||||
pre-registered stop rule (<2,000 paired windows). Findings (MEASURED):
|
|
||||||
|
|
||||||
- **Only one trainable paired dataset exists**: `ruvultra:~/work/cog-pose-train/paired.jsonl`
|
|
||||||
— 1,077 windows (one subject, one room, one 29.9-min session, single node;
|
|
||||||
CSI [56, 20]; 17 COCO keypoints, MediaPipe confidence mean 0.44 — only 264
|
|
||||||
windows pass ADR-079's own conf>0.5 training filter). Prior measured attempts
|
|
||||||
on this exact set: 0–3% torso-PCK@20 (temporal splits, three independent
|
|
||||||
pipelines). Fine-tuning a 2.23M-param model on ~860 train windows would
|
|
||||||
measure memorization, not transfer.
|
|
||||||
- **The April session behind the old "92.9% PCK@20" claim is lost** (345
|
|
||||||
samples, 35 subcarriers; raw CSI gone from ruvzen/ruvultra/cognitum-v0; only
|
|
||||||
a 69-sample predictions+GT holdout survives at `models/wiflow-real/eval-holdout.jsonl`).
|
|
||||||
- **Forensic recheck of that holdout RETRACTS the 92.9% figure**: the trainer's
|
|
||||||
`pck()` used an absolute 0.2 image-unit threshold (not torso-normalized) and
|
|
||||||
the model output a **constant pose** (pred std 0.0000 across 69 near-static
|
|
||||||
frames; a mean predictor scores 100% under the same protocol). The
|
|
||||||
torso-normalized PCK@20 on the same holdout is 19.1%. This corroborates the
|
|
||||||
2026-05-11 audit retraction (CHANGELOG, PR #535); stale doc citations were
|
|
||||||
removed 2026-06-10 (user-guide, readme-details, ADR-152 §2.1.3). The §2.2
|
|
||||||
no-citation rule now applies to ADR-079 accuracy claims.
|
|
||||||
|
|
||||||
Unblock criteria: a paired collection session of ≥2k windows (≈35+ min at the
|
|
||||||
observed stride; multi-pose, conf>0.5, ideally with the §2.1.3 two-checkerboard
|
|
||||||
calibration), plus a re-baselined our-pipeline number under torso-PCK@20 on the
|
|
||||||
same split. WiFlow-STD assets stand ready on ruvultra (`~/wiflow-std-bench/`).
|
|
||||||
Also worth investigating: ADR-079's protocol predicts ~9k windows per 30 min;
|
|
||||||
the May session under-delivered ~8× (aligner drop rate?).
|
|
||||||
|
|
||||||
## Measurement (b) (MEASURED 2026-06-10/11)
|
|
||||||
|
|
||||||
The data baseline unblocked: the 2026-06-10 22:10–22:40 collection session produced
|
|
||||||
**2,046 paired windows** (`ruvultra:~/wiflow-std-bench/paired-20260610.jsonl`; ONE
|
|
||||||
subject, ONE room, ONE ESP32 node, varied poses: walk/raise/squat/kick/wave/turn/
|
|
||||||
jump/sit; aligner `scripts/align-ground-truth.js`, non-overlapping 20-frame windows
|
|
||||||
~0.42 s; 17 COCO keypoints in normalized [0,1] camera coords; MediaPipe confidence
|
|
||||||
mean 0.802, min 0.692 — all windows pass the conf>0.5 filter). The −4 h timestamp
|
|
||||||
bug and the empty-frame confidence-dilution aligner findings are recorded
|
|
||||||
separately; results only here. Trained on ruvultra (RTX 5080, torch 2.11+cu128,
|
|
||||||
fp32, batch 32, GPU shared with the efficiency sweep). Scripts mirrored in
|
|
||||||
`remote/measb/`; raw metrics + full training curves in `results/measurement_b.json`.
|
|
||||||
|
|
||||||
### Two new aligner/dataset findings (forced deviations, MEASURED)
|
|
||||||
|
|
||||||
1. **`csi_shape` is heterogeneous, not [70, 20]**: 1,347× [70,20], 284× [134,20],
|
|
||||||
243× [26,20], 130× [12,20], 42× [20,20]. The ESP32 stream emits mixed frame
|
|
||||||
types and `extractCsiMatrix` stamps each window's subcarrier count from
|
|
||||||
`window[0].subcarriers`, zero-padding/truncating the other frames — even
|
|
||||||
native-70 windows contain ~20.4% internally zero-padded short frames
|
|
||||||
(subcarriers 40–69 all-zero). Handling: the primary suite ("all 2,046")
|
|
||||||
linearly resamples every frame's subcarrier axis to 70 bins (identity for
|
|
||||||
native-70 frames) so the pre-registered n and split sizes hold; a secondary
|
|
||||||
suite restricts to the 1,347 native [70,20] windows as a homogeneity check.
|
|
||||||
2. **Aligner layout bug**: `extractCsiMatrix` fills `matrix[f * nSc + s]`
|
|
||||||
(frame-major) but declares `shape: [nSc, nFrames]` — the stored shape label is
|
|
||||||
transposed relative to the data. Confirmed by coherent per-frame zero-tails;
|
|
||||||
corrected on load (`reshape(nFrames, nSc).T`).
|
|
||||||
|
|
||||||
### Protocol (pre-registered, followed)
|
|
||||||
|
|
||||||
Temporal split, no shuffling across time: first 70% train (1,432), next 15% val
|
|
||||||
(307), last 15% test (307); seed 42 elsewhere. Model: learned 1×1 Conv1d 70→540
|
|
||||||
adapter prepended to the upstream WiFlow-STD trunk; K=17 via the parameter-free
|
|
||||||
adaptive pool (`AdaptiveAvgPool2d((17,1))` — pretrained weights load strict for
|
|
||||||
any K). CSI normalized by the TRAIN-split p99 amplitude (129.7 all / 130.9
|
|
||||||
native-70), clipped to [0,1]. Three runs, ≤60 epochs, early-stop patience 8 on
|
|
||||||
val MPJPE, AdamW (adapter lr 1e-4; pretrained trunk lr 1e-5, 10× lower; scratch
|
|
||||||
all 1e-4), fp32. Pretrained init = the measurement-(a) **retrained** checkpoint
|
|
||||||
(`upstream/test/best_pose_model.pth`, ~96% PCK@20 on WiFlow data; the
|
|
||||||
`att.`/`final_conv.` key remap from `eval_repro.py` applied defensively — a no-op,
|
|
||||||
that checkpoint already uses post-rename keys). Frozen-trunk run: trunk
|
|
||||||
`requires_grad=False` **and** held in `.eval()` so BatchNorm running stats cannot
|
|
||||||
drift — a pure transfer probe; only the 70→540 adapter (38,340 params) trains.
|
|
||||||
|
|
||||||
PCK is torso-normalized with **torso = ‖l_shoulder(5) − l_hip(11)‖** (upstream
|
|
||||||
`calculate_pck` math — per-frame norm clamped at 0.01, mean over keypoints ×
|
|
||||||
frames — but upstream's `NECK_IDX/PELVIS_IDX = 2, 12` is a 15-keypoint
|
|
||||||
convention; on 17-kp COCO those indices are right_eye/right_hip, so the indices
|
|
||||||
were replaced, not the math). MPJPE is in normalized image units (not meters).
|
|
||||||
|
|
||||||
### Results — primary suite, all 2,046 windows (test = last 307)
|
|
||||||
|
|
||||||
| Run | PCK@10 | PCK@20 | PCK@30 | PCK@40 | PCK@50 | MPJPE | pred std | best ep |
|
|
||||||
|---|---|---|---|---|---|---|---|---|
|
|
||||||
| **mean-pose baseline** (honesty bar) | **73.1%** | **95.9%** | **98.7%** | 99.3% | 99.3% | **0.0148** | 0 (by constr.) | — |
|
|
||||||
| (i) pretrained-init, full fine-tune | 26.0% | 65.0% | 88.0% | 96.4% | 98.9% | 0.0313 | 0.0113 | 58/60 |
|
|
||||||
| (ii) scratch | 0.0% | 0.0% | 0.0% | 0.0% | 0.0% | 0.2554 | 0.0002 | 4 (stop @13) |
|
|
||||||
| (iii) frozen-trunk (adapter only) | 0.0% | 0.0% | 0.2% | 3.2% | 14.4% | 0.1260 | 0.0073 | 59/60 |
|
|
||||||
|
|
||||||
Secondary suite (native [70,20] windows only, n=1,347, test=202) reproduces the
|
|
||||||
same ordering: mean-baseline 96.0% / pretrained 67.1% / scratch 0.0% /
|
|
||||||
frozen-trunk 0.0% PCK@20 (MPJPE 0.0153 / 0.0318 / 0.2236 / 0.1343) — the
|
|
||||||
subcarrier-resampling choice does not change any conclusion.
|
|
||||||
|
|
||||||
### Interpretation
|
|
||||||
|
|
||||||
- **Did pretraining-transfer happen? Partially — as optimization transfer, not
|
|
||||||
feature transfer, and not past the honesty bar.**
|
|
||||||
- *Pretrained vs scratch*: dramatic (65.0% vs 0.0% PCK@20). The pretrained init
|
|
||||||
is the only configuration that trains at all under the pre-registered budget.
|
|
||||||
- *Frozen-trunk*: near-zero (0.0% PCK@20, 14.4% @50). WiFlow-STD's frozen
|
|
||||||
features do **not** transfer to our ESP32 domain through a linear subcarrier
|
|
||||||
adapter — the pretrained benefit is a well-conditioned initialization (incl.
|
|
||||||
calibrated BN/output scales), not reusable CSI→pose features.
|
|
||||||
- *Everything vs mean-pose baseline*: **no run beats it.** A constant
|
|
||||||
train-mean pose scores 95.9% torso-PCK@20 / 0.0148 MPJPE on this test split,
|
|
||||||
because a single subject in one camera frame barely moves in normalized
|
|
||||||
coordinates. The fine-tuned model is a real, non-constant model
|
|
||||||
(pred std 0.0113 > 0 — passes the constant-pose detector that retracted the
|
|
||||||
old 92.9% figure) but its deviations from the mean hurt: it fits train-period
|
|
||||||
temporal dynamics that do not generalize across the temporal split.
|
|
||||||
- **Verdict for ADR-152 §2.2(b): fine-tuning WiFlow-STD on this dataset does not
|
|
||||||
demonstrate CSI→pose signal beyond the mean pose.** Until a model beats the
|
|
||||||
mean-pose baseline on a temporal split, no PCK number from this line may be
|
|
||||||
cited as pose-estimation capability.
|
|
||||||
|
|
||||||
### Caveats (honest, pre-registered)
|
|
||||||
|
|
||||||
- Single subject, single room, single session (30 min), single ESP32 node —
|
|
||||||
in-domain temporal split only; nothing here speaks to cross-room or
|
|
||||||
cross-subject generalization.
|
|
||||||
- 2k windows vs the 360k-window WiFlow-STD corpus — **NOT comparable** to the
|
|
||||||
~96% in-domain measurement-(a) number, and the published 97.25% even less so.
|
|
||||||
- The scratch run's total collapse (it cannot even reach the mean pose; its
|
|
||||||
output BatchNorm/SiLU head must learn output scale from random init at lr 1e-4)
|
|
||||||
is an optimization outcome under the fixed budget, not proof the architecture
|
|
||||||
cannot learn from scratch — the pretrained-vs-scratch gap partially reflects
|
|
||||||
this conditioning advantage.
|
|
||||||
- Mixed-subcarrier frames (finding 1) mean even the "clean" windows carry ~20%
|
|
||||||
zero-padded frames; collection-side frame-type filtering should precede the
|
|
||||||
next session.
|
|
||||||
- Mean-baseline PCK is inflated by low pose variance relative to torso size
|
|
||||||
(~0.2–0.3 image units); PCK@10 (73.1%) shows the same ceiling effect at a
|
|
||||||
stricter threshold — the bar is the bar, but a livelier dataset would lower it.
|
|
||||||
|
|
||||||
## Pending
|
|
||||||
|
|
||||||
- (b) fine-tune on our ESP32 17-keypoint eval set — **MEASURED 2026-06-10/11**,
|
|
||||||
see above: no run beats the mean-pose baseline; pretraining transfers as
|
|
||||||
optimization aid only.
|
|
||||||
- (c) our internal WiFlow on their dataset (15-keypoint subset mapping) — also
|
|
||||||
affected: there is currently no validated internal pose model to compare
|
|
||||||
(the 92.9% artifact is retracted; the MM-Fi SOTA models in ADR-150 §3 are a
|
|
||||||
different input domain).
|
|
||||||
@@ -1,200 +0,0 @@
|
|||||||
"""Shared infrastructure for the LOCAL wiflow-std benchmark scripts (ADR-152).
|
|
||||||
|
|
||||||
This module is the single canonical implementation of the helpers that were
|
|
||||||
previously copy-pasted across eval_repro.py / quantize_bench.py /
|
|
||||||
onnx_bench.py / eval_ort_accuracy.py / export_to_safetensors.py:
|
|
||||||
|
|
||||||
- ``import_upstream()`` -- sys.path setup + the models-package stub that
|
|
||||||
works around the upstream import bug, plus the >1GB np.load mmap patch
|
|
||||||
- ``install_np_load_mmap_patch()`` -- the mmap patch on its own
|
|
||||||
- ``remap_legacy_keys()`` / ``load_remapped_state()`` -- checkpoint
|
|
||||||
key remap for the pre-rename released checkpoint
|
|
||||||
- ``load_wiflow_model()`` -- WiFlowPoseModel from a checkpoint, eval mode
|
|
||||||
- ``set_seed()`` -- mirrors upstream run.py seeding exactly
|
|
||||||
- ``evaluate()`` -- THE canonical batch-weighted PCK/MPJPE evaluation loop
|
|
||||||
(thresholds 0.1-0.5, upstream utils/metrics.py math); accepts either a
|
|
||||||
torch nn.Module or an onnxruntime InferenceSession
|
|
||||||
|
|
||||||
The scripts under remote/ deploy to ruvultra as standalone single files and
|
|
||||||
therefore intentionally inline private copies of these helpers; when editing
|
|
||||||
them, treat this module as the reference implementation and keep the copies
|
|
||||||
in sync.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import types
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
|
|
||||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
UPSTREAM = os.path.join(HERE, "upstream")
|
|
||||||
RESULTS = os.path.join(HERE, "results")
|
|
||||||
|
|
||||||
DEFAULT_THRESHOLDS = (0.1, 0.2, 0.3, 0.4, 0.5)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# >1GB np.load mmap patch
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# csi_windows.npy is ~13 GB; mmap large arrays instead of loading into RAM
|
|
||||||
# (loading it eagerly needs ~15 GB).
|
|
||||||
_np_load = np.load
|
|
||||||
|
|
||||||
|
|
||||||
def _np_load_mmap(path, *a, **kw):
|
|
||||||
if (isinstance(path, str) and path.endswith(".npy")
|
|
||||||
and os.path.getsize(path) > 1 << 30 and "mmap_mode" not in kw):
|
|
||||||
kw["mmap_mode"] = "r"
|
|
||||||
return _np_load(path, *a, **kw)
|
|
||||||
|
|
||||||
|
|
||||||
def install_np_load_mmap_patch():
|
|
||||||
"""Globally patch np.load so .npy files >1GB are mmap'd read-only.
|
|
||||||
|
|
||||||
Idempotent. Patching the numpy module attribute is equivalent to the
|
|
||||||
historical ``upstream_dataset.np.load = _np_load_mmap`` (dataset.np IS
|
|
||||||
the numpy module), but works regardless of import order.
|
|
||||||
"""
|
|
||||||
np.load = _np_load_mmap
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# upstream import shim
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def import_upstream(mmap_patch=True):
|
|
||||||
"""Make the upstream WiFlow-STD clone importable; returns its path.
|
|
||||||
|
|
||||||
Upstream bug: models/__init__.py imports TemporalConvNet, which
|
|
||||||
models/tcn.py does not define -- the package fails to import as
|
|
||||||
published. Register a stub package so the broken __init__ never
|
|
||||||
executes; submodules (models.pose_model etc.) still resolve via
|
|
||||||
__path__. Idempotent.
|
|
||||||
"""
|
|
||||||
if UPSTREAM not in sys.path:
|
|
||||||
sys.path.insert(0, UPSTREAM)
|
|
||||||
if "models" not in sys.modules:
|
|
||||||
_models_pkg = types.ModuleType("models")
|
|
||||||
_models_pkg.__path__ = [os.path.join(UPSTREAM, "models")]
|
|
||||||
sys.modules["models"] = _models_pkg
|
|
||||||
if mmap_patch:
|
|
||||||
install_np_load_mmap_patch()
|
|
||||||
return UPSTREAM
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# checkpoint loading
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# The released checkpoint predates the published code: modules were renamed
|
|
||||||
# att -> attention, final_conv -> decoder (param count identical, 2.23M).
|
|
||||||
LEGACY_RENAMES = {"att.": "attention.", "final_conv.": "decoder."}
|
|
||||||
|
|
||||||
|
|
||||||
def remap_legacy_keys(state):
|
|
||||||
"""Remap pre-rename state_dict keys; no-op for already-new-style keys."""
|
|
||||||
return {next((new + k[len(old):] for old, new in LEGACY_RENAMES.items()
|
|
||||||
if k.startswith(old)), k): v
|
|
||||||
for k, v in state.items()}
|
|
||||||
|
|
||||||
|
|
||||||
def load_remapped_state(path, map_location="cpu"):
|
|
||||||
"""torch.load (weights_only) + legacy key remap."""
|
|
||||||
state = torch.load(path, map_location=map_location, weights_only=True)
|
|
||||||
return remap_legacy_keys(state)
|
|
||||||
|
|
||||||
|
|
||||||
def load_wiflow_model(checkpoint, map_location="cpu", dropout=0.5):
|
|
||||||
"""Full-size WiFlowPoseModel from a checkpoint, strict load, eval mode."""
|
|
||||||
import_upstream()
|
|
||||||
from models.pose_model import WiFlowPoseModel
|
|
||||||
model = WiFlowPoseModel(dropout=dropout)
|
|
||||||
model.load_state_dict(load_remapped_state(checkpoint, map_location),
|
|
||||||
strict=True)
|
|
||||||
model.eval()
|
|
||||||
return model
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# seeding
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def set_seed(seed=42):
|
|
||||||
# mirror upstream run.py exactly
|
|
||||||
random.seed(seed)
|
|
||||||
np.random.seed(seed)
|
|
||||||
torch.manual_seed(seed)
|
|
||||||
if torch.cuda.is_available():
|
|
||||||
torch.cuda.manual_seed(seed)
|
|
||||||
torch.cuda.manual_seed_all(seed)
|
|
||||||
torch.backends.cudnn.deterministic = True
|
|
||||||
torch.backends.cudnn.benchmark = False
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# THE canonical evaluation loop
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def evaluate(model, loader, device=None, dtype=None, label="",
|
|
||||||
thresholds=DEFAULT_THRESHOLDS, progress_every=50):
|
|
||||||
"""Batch-weighted PCK/MPJPE over a DataLoader (upstream metrics math).
|
|
||||||
|
|
||||||
``model`` may be a torch nn.Module (optionally evaluated on ``device``
|
|
||||||
with inputs cast to ``dtype``) or an onnxruntime InferenceSession.
|
|
||||||
Per-threshold PCK values are independent in upstream calculate_pck, so
|
|
||||||
evaluating a superset of thresholds never changes any individual value.
|
|
||||||
|
|
||||||
Returns {"samples", "mpjpe", "pck@10".."pck@50", "wall_seconds"}.
|
|
||||||
"""
|
|
||||||
import_upstream()
|
|
||||||
from utils.metrics import calculate_mpjpe, calculate_pck
|
|
||||||
|
|
||||||
is_ort = hasattr(model, "get_inputs") # onnxruntime InferenceSession
|
|
||||||
if is_ort:
|
|
||||||
inp = model.get_inputs()[0].name
|
|
||||||
|
|
||||||
def forward(bx):
|
|
||||||
return torch.from_numpy(model.run(None, {inp: bx.numpy()})[0])
|
|
||||||
else:
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
def forward(bx):
|
|
||||||
if device is not None:
|
|
||||||
bx = bx.to(device)
|
|
||||||
if dtype is not None:
|
|
||||||
bx = bx.to(dtype)
|
|
||||||
return model(bx).float()
|
|
||||||
|
|
||||||
thresholds = list(thresholds)
|
|
||||||
totals = {t: 0.0 for t in thresholds}
|
|
||||||
total_mpe, n = 0.0, 0
|
|
||||||
t0 = time.time()
|
|
||||||
with torch.no_grad():
|
|
||||||
for batch_idx, (bx, by) in enumerate(loader):
|
|
||||||
out = forward(bx)
|
|
||||||
if device is not None and not is_ort:
|
|
||||||
by = by.to(device)
|
|
||||||
mpe = calculate_mpjpe(out, by)
|
|
||||||
pck = calculate_pck(out, by, thresholds=thresholds)
|
|
||||||
bs = by.size(0)
|
|
||||||
total_mpe += mpe * bs
|
|
||||||
for t in totals:
|
|
||||||
totals[t] += pck[t] * bs
|
|
||||||
n += bs
|
|
||||||
if batch_idx % progress_every == 0:
|
|
||||||
tag = f"[{label}] " if label else ""
|
|
||||||
pck20 = totals.get(0.2)
|
|
||||||
pck20_str = f"pck20={pck20 / n:.4f} " if pck20 is not None else ""
|
|
||||||
print(f" {tag}batch {batch_idx}: n={n} {pck20_str}"
|
|
||||||
f"mpjpe={total_mpe / n:.4f} ({time.time() - t0:.0f}s)",
|
|
||||||
flush=True)
|
|
||||||
return {
|
|
||||||
"samples": n,
|
|
||||||
"mpjpe": total_mpe / n,
|
|
||||||
**{f"pck@{int(t * 100)}": totals[t] / n for t in thresholds},
|
|
||||||
"wall_seconds": time.time() - t0,
|
|
||||||
}
|
|
||||||
@@ -1,67 +0,0 @@
|
|||||||
"""ADR-152 edge optimization: accuracy of the ONNX fp32 and ORT-dynamic-int8
|
|
||||||
models on the same corruption-free 10k test subset used by quantize_bench.py.
|
|
||||||
|
|
||||||
The torch dynamic-int8 path quantizes nothing (no nn.Linear in the model), so
|
|
||||||
the only real int8 datapoint for the paper's "~2.2 MB int8" claim is the
|
|
||||||
onnxruntime dynamically quantized model -- this script measures what that
|
|
||||||
quantization costs in PCK/MPJPE.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
.venv/Scripts/python.exe eval_ort_accuracy.py \
|
|
||||||
--data-dir <preprocessed_csi_data> [--subset 10000]
|
|
||||||
|
|
||||||
Writes/merges into results/edge_optimization.json under key "onnx_accuracy".
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
sys.path.insert(0, HERE)
|
|
||||||
|
|
||||||
from _bench_common import RESULTS, evaluate # noqa: E402
|
|
||||||
from quantize_bench import build_test_subset # noqa: E402 (sets up upstream imports)
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_ort(sess, loader, label):
|
|
||||||
"""ORT-session evaluation via the canonical _bench_common.evaluate loop."""
|
|
||||||
return evaluate(sess, loader, label=label)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
import onnxruntime as ort
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--data-dir", default=os.path.join(
|
|
||||||
os.path.expanduser("~"), ".cache", "kagglehub", "datasets", "kaka2434",
|
|
||||||
"wiflow-dataset", "versions", "1", "preprocessed_csi_data"))
|
|
||||||
parser.add_argument("--subset", type=int, default=10000)
|
|
||||||
parser.add_argument("--out", default=os.path.join(RESULTS, "edge_optimization.json"))
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
loader, _n_clean = build_test_subset(args.data_dir, args.subset)
|
|
||||||
results = {}
|
|
||||||
for label, fname in (("onnx_fp32", "retrained_fp32_dynamic.onnx"),
|
|
||||||
("onnx_int8_ort_dynamic", "retrained_int8_ort_dynamic.onnx")):
|
|
||||||
path = os.path.join(RESULTS, fname)
|
|
||||||
if not os.path.exists(path):
|
|
||||||
results[label] = {"error": f"{fname} not found; run onnx_bench.py first"}
|
|
||||||
continue
|
|
||||||
sess = ort.InferenceSession(path, providers=["CPUExecutionProvider"])
|
|
||||||
print(f"=== accuracy: {label} ({fname}) ===")
|
|
||||||
results[label] = evaluate_ort(sess, loader, label)
|
|
||||||
print(json.dumps(results[label], indent=2))
|
|
||||||
|
|
||||||
merged = {}
|
|
||||||
if os.path.exists(args.out):
|
|
||||||
with open(args.out) as f:
|
|
||||||
merged = json.load(f)
|
|
||||||
merged["onnx_accuracy"] = results
|
|
||||||
with open(args.out, "w") as f:
|
|
||||||
json.dump(merged, f, indent=2)
|
|
||||||
print(f"wrote {args.out}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,102 +0,0 @@
|
|||||||
"""ADR-152 §2.2 measurement (a): reproduce WiFlow-STD (DY2434) published test metrics.
|
|
||||||
|
|
||||||
Runs the released pretrained checkpoint (upstream/best_pose_model.pth) against the
|
|
||||||
released Kaggle dataset (kaka2434/wiflow-dataset) using the upstream code path:
|
|
||||||
identical dataset class, identical file-level 70/15/15 split at seed 42, identical
|
|
||||||
PCK/MPJPE implementations (utils/metrics.py).
|
|
||||||
|
|
||||||
Published claims (README, "Setting 1 random split"):
|
|
||||||
PCK@20 97.25% | PCK@30 98.63% | PCK@40 99.16% | PCK@50 99.48% | MPJPE 0.007 m
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
.venv/Scripts/python.exe eval_repro.py --data-dir <dir containing csi_windows.npy>
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from torch.utils.data import DataLoader
|
|
||||||
|
|
||||||
from _bench_common import (UPSTREAM, evaluate, import_upstream,
|
|
||||||
load_remapped_state, set_seed)
|
|
||||||
|
|
||||||
import_upstream() # sys.path + models stub + >1GB np.load mmap patch
|
|
||||||
|
|
||||||
from dataset import PreprocessedCSIKeypointsDataset, create_preprocessed_train_val_test_loaders # noqa: E402
|
|
||||||
from models.pose_model import WiFlowPoseModel # noqa: E402
|
|
||||||
|
|
||||||
|
|
||||||
def find_data_dir(root):
|
|
||||||
for dirpath, _dirnames, filenames in os.walk(root):
|
|
||||||
if "csi_windows.npy" in filenames:
|
|
||||||
return dirpath
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--data-dir", required=True,
|
|
||||||
help="Directory containing csi_windows.npy (searched recursively)")
|
|
||||||
parser.add_argument("--checkpoint", default=os.path.join(UPSTREAM, "best_pose_model.pth"))
|
|
||||||
parser.add_argument("--batch-size", type=int, default=64)
|
|
||||||
parser.add_argument("--out", default=os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
|
||||||
"results", "repro_a.json"))
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
data_dir = args.data_dir
|
|
||||||
if not os.path.exists(os.path.join(data_dir, "csi_windows.npy")):
|
|
||||||
located = find_data_dir(data_dir)
|
|
||||||
if located is None:
|
|
||||||
sys.exit(f"csi_windows.npy not found under {data_dir}")
|
|
||||||
data_dir = located
|
|
||||||
print(f"data dir: {data_dir}")
|
|
||||||
|
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
||||||
print(f"device: {device}, torch {torch.__version__}")
|
|
||||||
|
|
||||||
set_seed(42)
|
|
||||||
|
|
||||||
dataset = PreprocessedCSIKeypointsDataset(
|
|
||||||
data_dir=data_dir, keypoint_scale=1000.0, enable_temporal_clean=True)
|
|
||||||
|
|
||||||
# split must match upstream: file-level shuffle at random_seed=42, 70/15/15
|
|
||||||
_train_loader, _val_loader, test_loader = create_preprocessed_train_val_test_loaders(
|
|
||||||
dataset=dataset, batch_size=args.batch_size, num_workers=0, random_seed=42)
|
|
||||||
|
|
||||||
model = WiFlowPoseModel(dropout=0.5).to(device)
|
|
||||||
# released checkpoint predates the published code: modules were renamed
|
|
||||||
# att -> attention, final_conv -> decoder (param count identical, 2.23M)
|
|
||||||
state = load_remapped_state(args.checkpoint, map_location=device)
|
|
||||||
model.load_state_dict(state, strict=True)
|
|
||||||
n_params = sum(p.numel() for p in model.parameters())
|
|
||||||
print(f"checkpoint: {args.checkpoint} ({n_params/1e6:.2f}M params)")
|
|
||||||
|
|
||||||
# upstream also evaluates with drop_last=True; we report the full test set
|
|
||||||
# (drop_last=False) and the drop_last variant for exact comparability
|
|
||||||
results = {"published": {"pck@20": 0.9725, "pck@30": 0.9863, "pck@40": 0.9916,
|
|
||||||
"pck@50": 0.9948, "mpjpe": 0.007},
|
|
||||||
"params_millions": n_params / 1e6,
|
|
||||||
"data_dir": data_dir,
|
|
||||||
"device": str(device)}
|
|
||||||
|
|
||||||
print("=== test set (full, drop_last=False) ===")
|
|
||||||
results["test_full"] = evaluate(model, test_loader, device=device)
|
|
||||||
print(json.dumps(results["test_full"], indent=2))
|
|
||||||
|
|
||||||
test_loader_dl = DataLoader(test_loader.dataset, batch_size=args.batch_size,
|
|
||||||
shuffle=False, drop_last=True)
|
|
||||||
print("=== test set (drop_last=True, as upstream train.py) ===")
|
|
||||||
results["test_drop_last"] = evaluate(model, test_loader_dl, device=device)
|
|
||||||
print(json.dumps(results["test_drop_last"], indent=2))
|
|
||||||
|
|
||||||
os.makedirs(os.path.dirname(args.out), exist_ok=True)
|
|
||||||
with open(args.out, "w") as f:
|
|
||||||
json.dump(results, f, indent=2)
|
|
||||||
print(f"wrote {args.out}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,174 +0,0 @@
|
|||||||
"""ADR-152 §2.2: export the retrained WiFlow-STD PyTorch checkpoint to
|
|
||||||
safetensors with tch-rs (VarStore) variable names, plus a numerical-parity
|
|
||||||
fixture for the Rust port.
|
|
||||||
|
|
||||||
Outputs (all under results/, gitignored):
|
|
||||||
retrained_wiflow_std.safetensors -- 248 f32 tensors named exactly as the
|
|
||||||
Rust WiFlowStdModel VarStore expects
|
|
||||||
(see wiflow_std/model.rs
|
|
||||||
`dump_variable_names` for the
|
|
||||||
authoritative name dump)
|
|
||||||
parity_fixture.npz -- deterministic input (seed 42,
|
|
||||||
shape (2, 540, 20), uniform [0,1]) and
|
|
||||||
the Python model's eval-mode output
|
|
||||||
parity_fixture.json -- same data as flattened f32 lists, for
|
|
||||||
the dependency-free Rust test
|
|
||||||
(tests/test_wiflow_std_parity.rs)
|
|
||||||
|
|
||||||
PyTorch -> tch key mapping (derived from the VarStore dump, not guessed):
|
|
||||||
|
|
||||||
tcn.network.{i}.conv1_group.weight -> tcn{i}.conv1_group.weight
|
|
||||||
tcn.network.{i}.bn*_{group,pw}.<leaf> -> tcn{i}.bn*_{group,pw}.<leaf>
|
|
||||||
tcn.network.{i}.downsample.0.weight -> tcn{i}.ds_conv.weight
|
|
||||||
tcn.network.{i}.downsample.1.<leaf> -> tcn{i}.ds_bn.<leaf>
|
|
||||||
up.block.{0,1,4,5,8,9}.<leaf> -> conv_in.{conv1,bn1,conv2,bn2,conv3,bn3}.<leaf>
|
|
||||||
up.downsample.{0,1}.<leaf> -> conv_in.{ds_conv,ds_bn}.<leaf>
|
|
||||||
residual_blocks.{i}.block.{...}.<leaf> -> conv{i}.{conv1..bn3}.<leaf>
|
|
||||||
residual_blocks.{i}.downsample.{0,1} -> conv{i}.{ds_conv,ds_bn}
|
|
||||||
attention.{width,height}_axis.qkv_transform.weight
|
|
||||||
-> attention.{width,height}.qkv.weight
|
|
||||||
attention.{width,height}_axis.bn_* -> attention.{width,height}.bn_*
|
|
||||||
decoder.{0,1,3,4}.<leaf> -> {dec_conv1,dec_bn1,dec_conv2,dec_bn2}.<leaf>
|
|
||||||
*.num_batches_tracked -> dropped (tch BatchNorm has no such buffer)
|
|
||||||
|
|
||||||
Legacy upstream names (att. -> attention., final_conv. -> decoder.) are
|
|
||||||
remapped first, exactly as eval_repro.py does for the released checkpoint.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
.venv/Scripts/python.exe export_to_safetensors.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
from safetensors.torch import save_file
|
|
||||||
|
|
||||||
from _bench_common import RESULTS, import_upstream, remap_legacy_keys
|
|
||||||
|
|
||||||
import_upstream() # sys.path + models stub
|
|
||||||
|
|
||||||
from models.pose_model import WiFlowPoseModel # noqa: E402
|
|
||||||
|
|
||||||
CHECKPOINT = os.path.join(RESULTS, "retrained_best_pose_model.pth")
|
|
||||||
|
|
||||||
# Sequential index -> tch sub-name inside one ConvBlock1/AsymmetricConvBlock:
|
|
||||||
# [Conv2d(0), BN(1), SiLU(2), Dropout2d(3), Conv2d(4), BN(5), SiLU(6),
|
|
||||||
# Dropout2d(7), Conv2d(8), BN(9)]
|
|
||||||
_BLOCK_IDX = {"0": "conv1", "1": "bn1", "4": "conv2", "5": "bn2",
|
|
||||||
"8": "conv3", "9": "bn3"}
|
|
||||||
_DS_IDX = {"0": "ds_conv", "1": "ds_bn"}
|
|
||||||
_DECODER_IDX = {"0": "dec_conv1", "1": "dec_bn1", "3": "dec_conv2",
|
|
||||||
"4": "dec_bn2"}
|
|
||||||
|
|
||||||
|
|
||||||
def _conv_block(new_prefix: str, rest: str) -> str:
|
|
||||||
m = re.fullmatch(r"block\.(\d+)\.(.+)", rest)
|
|
||||||
if m:
|
|
||||||
return f"{new_prefix}.{_BLOCK_IDX[m.group(1)]}.{m.group(2)}"
|
|
||||||
m = re.fullmatch(r"downsample\.(\d+)\.(.+)", rest)
|
|
||||||
if m:
|
|
||||||
return f"{new_prefix}.{_DS_IDX[m.group(1)]}.{m.group(2)}"
|
|
||||||
raise KeyError(f"unmapped conv-block key: {new_prefix} / {rest}")
|
|
||||||
|
|
||||||
|
|
||||||
def map_key(key: str) -> str:
|
|
||||||
"""Map one PyTorch state_dict key to the tch VarStore name."""
|
|
||||||
m = re.fullmatch(r"tcn\.network\.(\d+)\.(.+)", key)
|
|
||||||
if m:
|
|
||||||
i, rest = m.groups()
|
|
||||||
rest = (rest.replace("downsample.0.", "ds_conv.")
|
|
||||||
.replace("downsample.1.", "ds_bn."))
|
|
||||||
return f"tcn{i}.{rest}"
|
|
||||||
|
|
||||||
m = re.fullmatch(r"up\.(.+)", key)
|
|
||||||
if m:
|
|
||||||
return _conv_block("conv_in", m.group(1))
|
|
||||||
|
|
||||||
m = re.fullmatch(r"residual_blocks\.(\d+)\.(.+)", key)
|
|
||||||
if m:
|
|
||||||
return _conv_block(f"conv{m.group(1)}", m.group(2))
|
|
||||||
|
|
||||||
m = re.fullmatch(r"attention\.(width|height)_axis\.(.+)", key)
|
|
||||||
if m:
|
|
||||||
axis, rest = m.groups()
|
|
||||||
rest = rest.replace("qkv_transform.", "qkv.")
|
|
||||||
return f"attention.{axis}.{rest}"
|
|
||||||
|
|
||||||
m = re.fullmatch(r"decoder\.(\d+)\.(.+)", key)
|
|
||||||
if m:
|
|
||||||
return f"{_DECODER_IDX[m.group(1)]}.{m.group(2)}"
|
|
||||||
|
|
||||||
raise KeyError(f"unmapped checkpoint key: {key}")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
state = torch.load(CHECKPOINT, map_location="cpu", weights_only=True)
|
|
||||||
if not isinstance(state, dict) or "tcn.network.0.conv1_group.weight" not in {
|
|
||||||
k for k in state
|
|
||||||
} | {k.replace("att.", "attention.") for k in state}:
|
|
||||||
# tolerate trainer wrappers like {"model_state_dict": ...}
|
|
||||||
for wrapper in ("model_state_dict", "state_dict", "model"):
|
|
||||||
if isinstance(state, dict) and wrapper in state:
|
|
||||||
state = state[wrapper]
|
|
||||||
break
|
|
||||||
|
|
||||||
# Legacy upstream names predate the published code (_bench_common).
|
|
||||||
state = remap_legacy_keys(state)
|
|
||||||
|
|
||||||
mapped = {}
|
|
||||||
dropped = 0
|
|
||||||
for k, v in state.items():
|
|
||||||
if k.endswith("num_batches_tracked"):
|
|
||||||
dropped += 1
|
|
||||||
continue
|
|
||||||
tch_key = map_key(k)
|
|
||||||
if tch_key in mapped:
|
|
||||||
raise KeyError(f"duplicate mapped key: {k} -> {tch_key}")
|
|
||||||
mapped[tch_key] = v.detach().to(torch.float32).contiguous()
|
|
||||||
|
|
||||||
n_params = sum(v.numel() for k, v in mapped.items()
|
|
||||||
if "running_" not in k)
|
|
||||||
print(f"checkpoint tensors: {len(state)} "
|
|
||||||
f"(dropped {dropped} num_batches_tracked)")
|
|
||||||
print(f"mapped tensors: {len(mapped)}, "
|
|
||||||
f"non-buffer params: {n_params/1e6:.6f}M")
|
|
||||||
assert len(mapped) == 248, f"expected 248 tch variables, got {len(mapped)}"
|
|
||||||
assert n_params == 2_225_042, f"param count mismatch: {n_params}"
|
|
||||||
|
|
||||||
st_path = os.path.join(RESULTS, "retrained_wiflow_std.safetensors")
|
|
||||||
save_file(mapped, st_path)
|
|
||||||
print(f"wrote {st_path}")
|
|
||||||
|
|
||||||
# ---- parity fixture --------------------------------------------------
|
|
||||||
model = WiFlowPoseModel(dropout=0.5)
|
|
||||||
model.load_state_dict(state, strict=True)
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
gen = torch.Generator().manual_seed(42)
|
|
||||||
x = torch.rand(2, 540, 20, generator=gen, dtype=torch.float32)
|
|
||||||
with torch.no_grad():
|
|
||||||
y = model(x)
|
|
||||||
print(f"fixture input {tuple(x.shape)} -> output {tuple(y.shape)}, "
|
|
||||||
f"output range [{y.min().item():.6f}, {y.max().item():.6f}]")
|
|
||||||
|
|
||||||
np.savez(os.path.join(RESULTS, "parity_fixture.npz"),
|
|
||||||
input=x.numpy(), output=y.numpy())
|
|
||||||
fixture = {
|
|
||||||
"seed": 42,
|
|
||||||
"input_shape": list(x.shape),
|
|
||||||
"input": x.flatten().tolist(),
|
|
||||||
"output_shape": list(y.shape),
|
|
||||||
"output": y.flatten().tolist(),
|
|
||||||
}
|
|
||||||
json_path = os.path.join(RESULTS, "parity_fixture.json")
|
|
||||||
with open(json_path, "w") as f:
|
|
||||||
json.dump(fixture, f)
|
|
||||||
print(f"wrote {os.path.join(RESULTS, 'parity_fixture.npz')}")
|
|
||||||
print(f"wrote {json_path}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,148 +0,0 @@
|
|||||||
"""Regenerate results/nan_windows_mask.npy + results/big_windows_mask.npy by
|
|
||||||
scanning a PRISTINE kagglehub download of the WiFlow-STD dataset
|
|
||||||
(kaka2434/wiflow-dataset v1, csi_windows.npy, 360,000 windows of 540x20).
|
|
||||||
|
|
||||||
============================ READ THIS FIRST ===============================
|
|
||||||
This script MUST be run against an UNCLEANED copy of the dataset.
|
|
||||||
|
|
||||||
remote/clean_v2.py (and its predecessor clean_nan.py) repair the dataset by
|
|
||||||
zeroing the corrupted windows IN PLACE, with no backup. A cleaned copy
|
|
||||||
contains no non-finite values and no out-of-range amplitudes, so on a cleaned
|
|
||||||
copy this scan produces ALL-FALSE masks -- silently wrong ground truth. The
|
|
||||||
script errors out loudly in that case (see the sanity check in main()).
|
|
||||||
|
|
||||||
That irreversibility is exactly why the two committed mask files under
|
|
||||||
results/ (gitignore-negated) are the canonical ground truth: once a download
|
|
||||||
has been cleaned, the masks can NEVER be regenerated from it. Only run this
|
|
||||||
on a fresh `kagglehub.dataset_download("kaka2434/wiflow-dataset")`.
|
|
||||||
============================================================================
|
|
||||||
|
|
||||||
Criteria (per window; mirrors the original 2026-06-10 scan and the
|
|
||||||
remote/clean_v2.py repair criteria):
|
|
||||||
|
|
||||||
nan mask: any non-finite value (NaN/Inf) anywhere in the 540x20 window
|
|
||||||
big mask: max |finite value| > 1.5 (the data is otherwise [0,1]-normalized;
|
|
||||||
the corrupted files contain garbage up to 3.4e38, float32 max)
|
|
||||||
|
|
||||||
Expected result on the pristine Kaggle download (RESULTS.md defect 5):
|
|
||||||
nan: 9,070 True | big: 9,072 True | union: 9,072 -- all windows in dataset
|
|
||||||
files 487-499 (the final 13 files), window indices 350,922-359,999.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
PYTHONUTF8=1 .venv/Scripts/python.exe generate_corruption_masks.py \
|
|
||||||
[--data-dir <dir containing csi_windows.npy>] [--out-dir results]
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
RESULTS = os.path.join(HERE, "results")
|
|
||||||
|
|
||||||
EXPECTED = {"nan": 9070, "big": 9072, "union": 9072,
|
|
||||||
"files": (487, 499), "windows": (350922, 359999)}
|
|
||||||
|
|
||||||
|
|
||||||
def scan(csi_path, chunk=4000):
|
|
||||||
"""Chunked scan of the (mmap'd) windows array; returns (nan_mask, big_mask)."""
|
|
||||||
csi = np.load(csi_path, mmap_mode="r")
|
|
||||||
n = len(csi)
|
|
||||||
nan_mask = np.zeros(n, dtype=bool)
|
|
||||||
big_mask = np.zeros(n, dtype=bool)
|
|
||||||
for i in range(0, n, chunk):
|
|
||||||
block = np.asarray(csi[i:i + chunk])
|
|
||||||
finite = np.isfinite(block)
|
|
||||||
nan_mask[i:i + chunk] = (~finite).any(axis=(1, 2))
|
|
||||||
big_mask[i:i + chunk] = (
|
|
||||||
np.abs(np.where(finite, block, 0)).max(axis=(1, 2)) > 1.5)
|
|
||||||
if (i // chunk) % 10 == 0:
|
|
||||||
print(f" scanned {min(i + chunk, n):,}/{n:,} windows "
|
|
||||||
f"(nan={int(nan_mask.sum()):,} big={int(big_mask.sum()):,})",
|
|
||||||
flush=True)
|
|
||||||
return nan_mask, big_mask
|
|
||||||
|
|
||||||
|
|
||||||
def describe_files(data_dir, mask):
|
|
||||||
"""Map marked windows to dataset file indices via window_info.npz."""
|
|
||||||
info = os.path.join(data_dir, "window_info.npz")
|
|
||||||
if not os.path.exists(info):
|
|
||||||
return None
|
|
||||||
w2f = np.load(info)["window_to_file"]
|
|
||||||
return np.unique(w2f[mask])
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Regenerate the corruption masks from a PRISTINE "
|
|
||||||
"(uncleaned) kagglehub download. See module docstring.")
|
|
||||||
parser.add_argument("--data-dir", default=os.path.join(
|
|
||||||
os.path.expanduser("~"), ".cache", "kagglehub", "datasets", "kaka2434",
|
|
||||||
"wiflow-dataset", "versions", "1", "preprocessed_csi_data"),
|
|
||||||
help="Directory containing csi_windows.npy (PRISTINE copy)")
|
|
||||||
parser.add_argument("--out-dir", default=RESULTS,
|
|
||||||
help="Where to write the two .npy masks")
|
|
||||||
parser.add_argument("--chunk", type=int, default=4000,
|
|
||||||
help="Windows per scan chunk (memory/speed tradeoff)")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
csi_path = os.path.join(args.data_dir, "csi_windows.npy")
|
|
||||||
if not os.path.exists(csi_path):
|
|
||||||
sys.exit(f"csi_windows.npy not found in {args.data_dir}")
|
|
||||||
|
|
||||||
print(f"scanning {csi_path} (chunk={args.chunk}) ...")
|
|
||||||
nan_mask, big_mask = scan(csi_path, args.chunk)
|
|
||||||
union = nan_mask | big_mask
|
|
||||||
print(f"nan: {int(nan_mask.sum()):,} | big: {int(big_mask.sum()):,} | "
|
|
||||||
f"union: {int(union.sum()):,} of {len(union):,} windows")
|
|
||||||
|
|
||||||
# ---- sanity check: an all-False result means a CLEANED copy ------------
|
|
||||||
if not union.any():
|
|
||||||
sys.exit(
|
|
||||||
"ERROR: scan found ZERO corrupted windows.\n"
|
|
||||||
"\n"
|
|
||||||
"The pristine Kaggle download (kaka2434/wiflow-dataset v1) is "
|
|
||||||
"known to contain\n"
|
|
||||||
"9,072 corrupted windows (NaN/Inf + amplitudes up to 3.4e38) in "
|
|
||||||
"dataset files\n"
|
|
||||||
"487-499 (RESULTS.md, reproducibility defect 5). Finding none "
|
|
||||||
"means this copy\n"
|
|
||||||
"has almost certainly already been repaired by remote/clean_v2.py "
|
|
||||||
"(or clean_nan.py),\n"
|
|
||||||
"which zeroes the corrupted windows IN PLACE -- after that the "
|
|
||||||
"corruption evidence\n"
|
|
||||||
"is gone and the masks CANNOT be regenerated from this copy.\n"
|
|
||||||
"\n"
|
|
||||||
"Refusing to overwrite the committed ground-truth masks with "
|
|
||||||
"all-False ones.\n"
|
|
||||||
"Re-download the dataset (kagglehub.dataset_download("
|
|
||||||
"'kaka2434/wiflow-dataset'))\n"
|
|
||||||
"and point --data-dir at the fresh, uncleaned copy.")
|
|
||||||
|
|
||||||
files = describe_files(args.data_dir, union)
|
|
||||||
if files is not None:
|
|
||||||
print(f"marked windows span dataset files {files.min()}-{files.max()}: "
|
|
||||||
f"{files.tolist()}")
|
|
||||||
lo, hi = EXPECTED["files"]
|
|
||||||
if files.min() != lo or files.max() != hi:
|
|
||||||
print(f"WARNING: expected marked files exactly {lo}-{hi} "
|
|
||||||
f"(the pristine v1 download); got {files.min()}-{files.max()}. "
|
|
||||||
f"Different dataset version, or a partially cleaned copy?")
|
|
||||||
for name, mask, exp in (("nan", nan_mask, EXPECTED["nan"]),
|
|
||||||
("big", big_mask, EXPECTED["big"])):
|
|
||||||
if int(mask.sum()) != exp:
|
|
||||||
print(f"WARNING: {name} mask has {int(mask.sum()):,} True windows; "
|
|
||||||
f"the pristine v1 download yields {exp:,}.")
|
|
||||||
|
|
||||||
os.makedirs(args.out_dir, exist_ok=True)
|
|
||||||
for name, mask in (("nan_windows_mask.npy", nan_mask),
|
|
||||||
("big_windows_mask.npy", big_mask)):
|
|
||||||
out = os.path.join(args.out_dir, name)
|
|
||||||
np.save(out, mask)
|
|
||||||
print(f"wrote {out} ({int(mask.sum()):,} True)")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,220 +0,0 @@
|
|||||||
"""ADR-152 edge optimization: ONNX export + onnxruntime CPU benchmark for the
|
|
||||||
retrained WiFlow-STD checkpoint.
|
|
||||||
|
|
||||||
- Exports fp32 to ONNX. The axial attention reshapes with python ints taken
|
|
||||||
from tensor.size() (view(N*W, C, H)), so a traced graph bakes the batch
|
|
||||||
size; we first try a dynamic-batch export and verify it actually works at
|
|
||||||
batch sizes 1/2/64 -- if not, we fall back to fixed-batch exports.
|
|
||||||
- Verifies output parity vs torch on the stored fixture
|
|
||||||
(results/parity_fixture.npz, batch 2, seed 42): max abs diff < 1e-4.
|
|
||||||
- Measures onnxruntime CPU latency at batch 1 and 64 (median of N runs).
|
|
||||||
- Supplementary: onnxruntime dynamic int8 quantization of the exported model
|
|
||||||
(weight size datapoint for the paper's "~2.2 MB int8" claim).
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
.venv/Scripts/python.exe onnx_bench.py
|
|
||||||
|
|
||||||
Writes/merges into results/edge_optimization.json under key "onnx".
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import platform
|
|
||||||
import statistics
|
|
||||||
import time
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
|
|
||||||
from _bench_common import RESULTS, import_upstream, load_wiflow_model
|
|
||||||
|
|
||||||
import_upstream() # sys.path + models stub + >1GB np.load mmap patch
|
|
||||||
|
|
||||||
CHECKPOINT = os.path.join(RESULTS, "retrained_best_pose_model.pth")
|
|
||||||
OUT_JSON = os.path.join(RESULTS, "edge_optimization.json")
|
|
||||||
|
|
||||||
|
|
||||||
def load_fp32_model():
|
|
||||||
return load_wiflow_model(CHECKPOINT)
|
|
||||||
|
|
||||||
|
|
||||||
def try_export(model, path, batch, dynamic, opset=17):
|
|
||||||
"""Returns (ok, exporter_used, error)."""
|
|
||||||
x = torch.rand(batch, 540, 20)
|
|
||||||
attempts = []
|
|
||||||
if dynamic:
|
|
||||||
attempts.append(("dynamo", dict(dynamo=True,
|
|
||||||
dynamic_shapes={"x": {0: "batch"}})))
|
|
||||||
attempts.append(("torchscript", dict(dynamo=False,
|
|
||||||
dynamic_axes={"input": {0: "batch"},
|
|
||||||
"output": {0: "batch"}})))
|
|
||||||
else:
|
|
||||||
attempts.append(("torchscript", dict(dynamo=False)))
|
|
||||||
attempts.append(("dynamo", dict(dynamo=True)))
|
|
||||||
last_err = None
|
|
||||||
for name, kw in attempts:
|
|
||||||
try:
|
|
||||||
with torch.no_grad():
|
|
||||||
torch.onnx.export(model, (x,), path, opset_version=opset,
|
|
||||||
input_names=["input"], output_names=["output"],
|
|
||||||
**kw)
|
|
||||||
return True, name, None
|
|
||||||
except Exception as e: # noqa: BLE001
|
|
||||||
last_err = f"{name}: {type(e).__name__}: {e}"
|
|
||||||
traceback.print_exc()
|
|
||||||
return False, None, last_err
|
|
||||||
|
|
||||||
|
|
||||||
def ort_session(path):
|
|
||||||
import onnxruntime as ort
|
|
||||||
return ort.InferenceSession(path, providers=["CPUExecutionProvider"])
|
|
||||||
|
|
||||||
|
|
||||||
def ort_run(sess, x):
|
|
||||||
inp = sess.get_inputs()[0].name
|
|
||||||
return sess.run(None, {inp: x})[0]
|
|
||||||
|
|
||||||
|
|
||||||
def bench_ort(sess, batch, n_runs):
|
|
||||||
rng = np.random.default_rng(123)
|
|
||||||
x = rng.random((batch, 540, 20), dtype=np.float32)
|
|
||||||
for _ in range(max(5, n_runs // 10)):
|
|
||||||
ort_run(sess, x)
|
|
||||||
times = []
|
|
||||||
for _ in range(n_runs):
|
|
||||||
t0 = time.perf_counter()
|
|
||||||
ort_run(sess, x)
|
|
||||||
times.append(time.perf_counter() - t0)
|
|
||||||
med = statistics.median(times)
|
|
||||||
return {
|
|
||||||
"batch_size": batch,
|
|
||||||
"runs": n_runs,
|
|
||||||
"median_ms_per_batch": med * 1e3,
|
|
||||||
"median_ms_per_window": med * 1e3 / batch,
|
|
||||||
"windows_per_second": batch / med,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
import argparse
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="ONNX export + onnxruntime CPU benchmark for the "
|
|
||||||
"retrained WiFlow-STD checkpoint (no options; see "
|
|
||||||
"module docstring). NB: the published "
|
|
||||||
"retrained_fp32_dynamic.onnx came from the TorchScript "
|
|
||||||
"exporter; on newer torch the dynamo attempt may succeed "
|
|
||||||
"first and produce a different (external-data) artifact.")
|
|
||||||
parser.parse_args()
|
|
||||||
|
|
||||||
import onnxruntime
|
|
||||||
model = load_fp32_model()
|
|
||||||
results = {
|
|
||||||
"env": {
|
|
||||||
"torch": torch.__version__,
|
|
||||||
"onnxruntime": onnxruntime.__version__,
|
|
||||||
"platform": platform.platform(),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
fixture = np.load(os.path.join(RESULTS, "parity_fixture.npz"))
|
|
||||||
fx, fy = fixture["input"], fixture["output"] # (2,540,20) -> (2,15,2)
|
|
||||||
|
|
||||||
# ---- export: dynamic batch first, fall back to fixed --------------------
|
|
||||||
dyn_path = os.path.join(RESULTS, "retrained_fp32_dynamic.onnx")
|
|
||||||
ok, exporter, err = try_export(model, dyn_path, batch=2, dynamic=True)
|
|
||||||
dynamic_works = False
|
|
||||||
if ok:
|
|
||||||
# verify the dynamic graph really runs at other batch sizes
|
|
||||||
try:
|
|
||||||
sess = ort_session(dyn_path)
|
|
||||||
for b in (1, 2, 64):
|
|
||||||
y = ort_run(sess, np.zeros((b, 540, 20), dtype=np.float32))
|
|
||||||
assert y.shape == (b, 15, 2), y.shape
|
|
||||||
dynamic_works = True
|
|
||||||
except Exception as e: # noqa: BLE001
|
|
||||||
print(f"dynamic-batch model does not generalize: {e}")
|
|
||||||
|
|
||||||
sessions = {}
|
|
||||||
if dynamic_works:
|
|
||||||
results["export"] = {"mode": "dynamic-batch", "exporter": exporter,
|
|
||||||
"file": os.path.basename(dyn_path),
|
|
||||||
"size_mb": os.path.getsize(dyn_path) / 1e6}
|
|
||||||
sess = ort_session(dyn_path)
|
|
||||||
sessions = {1: sess, 2: sess, 64: sess}
|
|
||||||
print(f"dynamic-batch export OK via {exporter}")
|
|
||||||
else:
|
|
||||||
results["export"] = {"mode": "fixed-batch", "fallback_reason": err,
|
|
||||||
"files": {}}
|
|
||||||
for b in (1, 2, 64):
|
|
||||||
p = os.path.join(RESULTS, f"retrained_fp32_b{b}.onnx")
|
|
||||||
ok, exporter, err = try_export(model, p, batch=b, dynamic=False)
|
|
||||||
if not ok:
|
|
||||||
results["export"]["files"][str(b)] = {"error": err}
|
|
||||||
print(f"EXPORT FAILED at batch {b}: {err}")
|
|
||||||
continue
|
|
||||||
results["export"]["files"][str(b)] = {
|
|
||||||
"exporter": exporter, "file": os.path.basename(p),
|
|
||||||
"size_mb": os.path.getsize(p) / 1e6}
|
|
||||||
sessions[b] = ort_session(p)
|
|
||||||
print(f"fixed-batch {b} export OK via {exporter}")
|
|
||||||
|
|
||||||
# ---- parity vs torch on the fixture -------------------------------------
|
|
||||||
if 2 in sessions:
|
|
||||||
y_ort = ort_run(sessions[2], fx)
|
|
||||||
with torch.no_grad():
|
|
||||||
y_torch = model(torch.from_numpy(fx)).numpy()
|
|
||||||
results["parity"] = {
|
|
||||||
"fixture": "results/parity_fixture.npz (batch 2, seed 42)",
|
|
||||||
"max_abs_diff_vs_stored_fixture": float(np.abs(y_ort - fy).max()),
|
|
||||||
"max_abs_diff_vs_torch_now": float(np.abs(y_ort - y_torch).max()),
|
|
||||||
"pass_lt_1e-4": bool(np.abs(y_ort - y_torch).max() < 1e-4),
|
|
||||||
}
|
|
||||||
print("parity:", json.dumps(results["parity"], indent=2))
|
|
||||||
|
|
||||||
# ---- latency -------------------------------------------------------------
|
|
||||||
results["latency"] = {}
|
|
||||||
if 1 in sessions:
|
|
||||||
results["latency"]["batch1"] = bench_ort(sessions[1], 1, 100)
|
|
||||||
print(f"ORT batch 1: {results['latency']['batch1']['median_ms_per_window']:.2f} ms/window")
|
|
||||||
if 64 in sessions:
|
|
||||||
results["latency"]["batch64"] = bench_ort(sessions[64], 64, 30)
|
|
||||||
print(f"ORT batch 64: {results['latency']['batch64']['median_ms_per_window']:.3f} ms/window")
|
|
||||||
|
|
||||||
# ---- supplementary: ORT dynamic int8 (size datapoint for the 2.2MB claim)
|
|
||||||
src = (dyn_path if dynamic_works
|
|
||||||
else os.path.join(RESULTS, "retrained_fp32_b1.onnx"))
|
|
||||||
if os.path.exists(src):
|
|
||||||
try:
|
|
||||||
from onnxruntime.quantization import QuantType, quantize_dynamic
|
|
||||||
q_path = os.path.join(RESULTS, "retrained_int8_ort_dynamic.onnx")
|
|
||||||
quantize_dynamic(src, q_path, weight_type=QuantType.QInt8)
|
|
||||||
entry = {"file": os.path.basename(q_path),
|
|
||||||
"size_mb": os.path.getsize(q_path) / 1e6}
|
|
||||||
try:
|
|
||||||
qs = ort_session(q_path)
|
|
||||||
yq = ort_run(qs, fx[:1] if not dynamic_works else fx)
|
|
||||||
ref = fy[:1] if not dynamic_works else fy
|
|
||||||
entry["runs"] = True
|
|
||||||
entry["max_abs_diff_vs_fp32_fixture"] = float(np.abs(yq - ref).max())
|
|
||||||
except Exception as e: # noqa: BLE001
|
|
||||||
entry["runs"] = False
|
|
||||||
entry["run_error"] = f"{type(e).__name__}: {e}"
|
|
||||||
results["ort_int8_dynamic_supplementary"] = entry
|
|
||||||
print("ORT int8:", json.dumps(entry, indent=2))
|
|
||||||
except Exception as e: # noqa: BLE001
|
|
||||||
results["ort_int8_dynamic_supplementary"] = {
|
|
||||||
"error": f"{type(e).__name__}: {e}"}
|
|
||||||
|
|
||||||
merged = {}
|
|
||||||
if os.path.exists(OUT_JSON):
|
|
||||||
with open(OUT_JSON) as f:
|
|
||||||
merged = json.load(f)
|
|
||||||
merged["onnx"] = results
|
|
||||||
with open(OUT_JSON, "w") as f:
|
|
||||||
json.dump(merged, f, indent=2)
|
|
||||||
print(f"wrote {OUT_JSON}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,228 +0,0 @@
|
|||||||
"""ADR-152 "optimize beyond SOTA": edge-optimization benchmark for the
|
|
||||||
retrained WiFlow-STD checkpoint (results/retrained_best_pose_model.pth,
|
|
||||||
~96% PCK@20, fp32 params 2,225,042).
|
|
||||||
|
|
||||||
Measures, for fp32 / fp16 / dynamic-int8 torch variants:
|
|
||||||
(a) serialized state_dict size on disk,
|
|
||||||
(b) CPU inference latency per window at batch 1 and batch 64
|
|
||||||
(median of repeated runs, this Windows box),
|
|
||||||
(c) accuracy (PCK@20/50 + MPJPE, upstream metrics) on a corruption-free
|
|
||||||
random subset of the seed-42 file-level 70/15/15 test split
|
|
||||||
(same split as eval_repro.py; corrupted windows 487-499 excluded via
|
|
||||||
results/nan_windows_mask.npy | results/big_windows_mask.npy).
|
|
||||||
|
|
||||||
Also verifies the paper's "~2.2 MB int8" size claim: reports which layer
|
|
||||||
types torch dynamic quantization actually converts (the model contains NO
|
|
||||||
nn.Linear -- it is Conv1d/Conv2d/BatchNorm only) and the real on-disk size.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
.venv/Scripts/python.exe quantize_bench.py \
|
|
||||||
--data-dir C:/Users/ruv/.cache/kagglehub/datasets/kaka2434/wiflow-dataset/versions/1/preprocessed_csi_data \
|
|
||||||
[--subset 10000] [--skip-accuracy]
|
|
||||||
|
|
||||||
Writes/merges into results/edge_optimization.json under key "torch".
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import platform
|
|
||||||
import statistics
|
|
||||||
import time
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
from torch.utils.data import DataLoader
|
|
||||||
|
|
||||||
from _bench_common import HERE, RESULTS, evaluate, import_upstream, load_wiflow_model
|
|
||||||
|
|
||||||
import_upstream() # sys.path + models stub + >1GB np.load mmap patch
|
|
||||||
|
|
||||||
from dataset import ( # noqa: E402
|
|
||||||
PreprocessedCSIKeypointsDataset,
|
|
||||||
create_preprocessed_train_val_test_loaders,
|
|
||||||
)
|
|
||||||
|
|
||||||
CHECKPOINT = os.path.join(RESULTS, "retrained_best_pose_model.pth")
|
|
||||||
|
|
||||||
|
|
||||||
def load_fp32_model():
|
|
||||||
# legacy upstream key remap inside is a harmless no-op on this checkpoint
|
|
||||||
return load_wiflow_model(CHECKPOINT)
|
|
||||||
|
|
||||||
|
|
||||||
def state_dict_size_bytes(model, path):
|
|
||||||
torch.save(model.state_dict(), path)
|
|
||||||
return os.path.getsize(path)
|
|
||||||
|
|
||||||
|
|
||||||
def bench_latency(model, batch_size, n_runs, dtype=torch.float32):
|
|
||||||
gen = torch.Generator().manual_seed(123)
|
|
||||||
x = torch.rand(batch_size, 540, 20, generator=gen).to(dtype)
|
|
||||||
with torch.no_grad():
|
|
||||||
for _ in range(max(5, n_runs // 10)): # warmup
|
|
||||||
model(x)
|
|
||||||
times = []
|
|
||||||
for _ in range(n_runs):
|
|
||||||
t0 = time.perf_counter()
|
|
||||||
model(x)
|
|
||||||
times.append(time.perf_counter() - t0)
|
|
||||||
med = statistics.median(times)
|
|
||||||
return {
|
|
||||||
"batch_size": batch_size,
|
|
||||||
"runs": n_runs,
|
|
||||||
"median_ms_per_batch": med * 1e3,
|
|
||||||
"median_ms_per_window": med * 1e3 / batch_size,
|
|
||||||
"windows_per_second": batch_size / med,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def build_test_subset(data_dir, subset_size, batch_size=64):
|
|
||||||
"""Seed-42 file-level 70/15/15 test split (exactly as eval_repro.py),
|
|
||||||
minus corrupted windows, then a seed-42 random subset."""
|
|
||||||
dataset = PreprocessedCSIKeypointsDataset(
|
|
||||||
data_dir=data_dir, keypoint_scale=1000.0, enable_temporal_clean=True)
|
|
||||||
_tr, _va, test_loader = create_preprocessed_train_val_test_loaders(
|
|
||||||
dataset=dataset, batch_size=batch_size, num_workers=0, random_seed=42)
|
|
||||||
test_indices = np.asarray(test_loader.dataset.indices)
|
|
||||||
|
|
||||||
corrupted = (np.load(os.path.join(RESULTS, "nan_windows_mask.npy"))
|
|
||||||
| np.load(os.path.join(RESULTS, "big_windows_mask.npy")))
|
|
||||||
clean = test_indices[~corrupted[test_indices]]
|
|
||||||
print(f"test split: {len(test_indices)} windows, "
|
|
||||||
f"{len(test_indices) - len(clean)} corrupted excluded, "
|
|
||||||
f"{len(clean)} clean")
|
|
||||||
|
|
||||||
if subset_size and subset_size < len(clean):
|
|
||||||
rng = np.random.default_rng(42)
|
|
||||||
clean = np.sort(rng.choice(clean, size=subset_size, replace=False))
|
|
||||||
subset = torch.utils.data.Subset(dataset, clean.tolist())
|
|
||||||
loader = DataLoader(subset, batch_size=batch_size, shuffle=False,
|
|
||||||
num_workers=0)
|
|
||||||
return loader, len(clean)
|
|
||||||
|
|
||||||
|
|
||||||
def quantize_int8_dynamic(fp32_model):
|
|
||||||
"""torch.ao.quantization.quantize_dynamic on Linear/Conv where supported.
|
|
||||||
Returns (model, report) where report documents what actually quantized."""
|
|
||||||
qmodel = torch.ao.quantization.quantize_dynamic(
|
|
||||||
fp32_model, {nn.Linear, nn.Conv1d, nn.Conv2d}, dtype=torch.qint8)
|
|
||||||
|
|
||||||
quantized, total_params, quant_params = [], 0, 0
|
|
||||||
for name, mod in qmodel.named_modules():
|
|
||||||
cls = type(mod).__module__ + "." + type(mod).__name__
|
|
||||||
if "quantized" in cls:
|
|
||||||
w = mod.weight() if callable(getattr(mod, "weight", None)) else None
|
|
||||||
numel = w.numel() if w is not None else 0
|
|
||||||
quant_params += numel
|
|
||||||
quantized.append({"module": name, "class": cls, "params": numel})
|
|
||||||
for p in fp32_model.parameters():
|
|
||||||
total_params += p.numel()
|
|
||||||
|
|
||||||
n_linear = sum(isinstance(m, nn.Linear) for m in fp32_model.modules())
|
|
||||||
n_conv1d = sum(isinstance(m, nn.Conv1d) for m in fp32_model.modules())
|
|
||||||
n_conv2d = sum(isinstance(m, nn.Conv2d) for m in fp32_model.modules())
|
|
||||||
report = {
|
|
||||||
"eligible_module_counts": {
|
|
||||||
"nn.Linear": n_linear, "nn.Conv1d": n_conv1d, "nn.Conv2d": n_conv2d},
|
|
||||||
"modules_actually_quantized": quantized,
|
|
||||||
"n_modules_quantized": len(quantized),
|
|
||||||
"params_total": total_params,
|
|
||||||
"params_quantized": quant_params,
|
|
||||||
"params_quantized_fraction": quant_params / total_params,
|
|
||||||
}
|
|
||||||
return qmodel, report
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--data-dir", default=os.path.join(
|
|
||||||
os.path.expanduser("~"), ".cache", "kagglehub", "datasets", "kaka2434",
|
|
||||||
"wiflow-dataset", "versions", "1", "preprocessed_csi_data"))
|
|
||||||
parser.add_argument("--subset", type=int, default=10000)
|
|
||||||
parser.add_argument("--runs-b1", type=int, default=100)
|
|
||||||
parser.add_argument("--runs-b64", type=int, default=30)
|
|
||||||
parser.add_argument("--skip-accuracy", action="store_true")
|
|
||||||
parser.add_argument("--out", default=os.path.join(RESULTS, "edge_optimization.json"))
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
torch.manual_seed(42)
|
|
||||||
results = {
|
|
||||||
"env": {
|
|
||||||
"torch": torch.__version__,
|
|
||||||
"platform": platform.platform(),
|
|
||||||
"processor": platform.processor(),
|
|
||||||
"num_threads": torch.get_num_threads(),
|
|
||||||
"checkpoint": os.path.relpath(CHECKPOINT, HERE),
|
|
||||||
},
|
|
||||||
"variants": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
# ---- build variants ---------------------------------------------------
|
|
||||||
fp32 = load_fp32_model()
|
|
||||||
n_params = sum(p.numel() for p in fp32.parameters())
|
|
||||||
results["env"]["params"] = n_params
|
|
||||||
print(f"fp32 model: {n_params:,} params")
|
|
||||||
|
|
||||||
fp16 = load_fp32_model().half()
|
|
||||||
|
|
||||||
int8, q_report = quantize_int8_dynamic(load_fp32_model())
|
|
||||||
results["int8_dynamic_quant_report"] = q_report
|
|
||||||
print(f"int8 dynamic: {q_report['n_modules_quantized']} modules quantized, "
|
|
||||||
f"{q_report['params_quantized_fraction']*100:.1f}% of params")
|
|
||||||
|
|
||||||
variants = {
|
|
||||||
"fp32": (fp32, torch.float32, "retrained_fp32_resaved.pth"),
|
|
||||||
"fp16": (fp16, torch.float16, "retrained_fp16.pth"),
|
|
||||||
"int8_dynamic": (int8, torch.float32, "retrained_int8_dynamic.pth"),
|
|
||||||
}
|
|
||||||
|
|
||||||
# ---- (a) size + (b) latency -------------------------------------------
|
|
||||||
for name, (model, dtype, fname) in variants.items():
|
|
||||||
path = os.path.join(RESULTS, fname)
|
|
||||||
size = state_dict_size_bytes(model, path)
|
|
||||||
print(f"\n=== {name}: {size/1e6:.3f} MB on disk ({fname}) ===")
|
|
||||||
lat1 = bench_latency(model, 1, args.runs_b1, dtype)
|
|
||||||
lat64 = bench_latency(model, 64, args.runs_b64, dtype)
|
|
||||||
print(f" batch 1: {lat1['median_ms_per_window']:.2f} ms/window "
|
|
||||||
f"({lat1['windows_per_second']:.0f}/s)")
|
|
||||||
print(f" batch 64: {lat64['median_ms_per_window']:.3f} ms/window "
|
|
||||||
f"({lat64['windows_per_second']:.0f}/s)")
|
|
||||||
results["variants"][name] = {
|
|
||||||
"file": fname,
|
|
||||||
"size_bytes": size,
|
|
||||||
"size_mb": size / 1e6,
|
|
||||||
"latency_batch1": lat1,
|
|
||||||
"latency_batch64": lat64,
|
|
||||||
}
|
|
||||||
|
|
||||||
# ---- (c) accuracy ------------------------------------------------------
|
|
||||||
if not args.skip_accuracy:
|
|
||||||
loader, n_clean = build_test_subset(args.data_dir, args.subset)
|
|
||||||
results["accuracy_subset"] = {
|
|
||||||
"description": "seed-42 file-level 70/15/15 test split, corrupted "
|
|
||||||
"windows (files 487-499) excluded, seed-42 random "
|
|
||||||
"subset",
|
|
||||||
"subset_size": min(args.subset, n_clean) if args.subset else n_clean,
|
|
||||||
"clean_test_total": n_clean,
|
|
||||||
}
|
|
||||||
for name, (model, dtype, _f) in variants.items():
|
|
||||||
print(f"\n=== accuracy: {name} ===")
|
|
||||||
results["variants"][name]["accuracy"] = evaluate(
|
|
||||||
model, loader, dtype=dtype, label=name)
|
|
||||||
print(json.dumps(results["variants"][name]["accuracy"], indent=2))
|
|
||||||
|
|
||||||
# ---- merge into edge_optimization.json ---------------------------------
|
|
||||||
merged = {}
|
|
||||||
if os.path.exists(args.out):
|
|
||||||
with open(args.out) as f:
|
|
||||||
merged = json.load(f)
|
|
||||||
merged["torch"] = results
|
|
||||||
with open(args.out, "w") as f:
|
|
||||||
json.dump(merged, f, indent=2)
|
|
||||||
print(f"\nwrote {args.out}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
import numpy as np, os
|
|
||||||
d = os.path.expanduser('~/wiflow-std-bench/preprocessed_csi_data')
|
|
||||||
csi = np.load(os.path.join(d, 'csi_windows.npy'), mmap_mode='r+')
|
|
||||||
zeroed = 0
|
|
||||||
chunk = 4000
|
|
||||||
for i in range(0, len(csi), chunk):
|
|
||||||
block = csi[i:i+chunk]
|
|
||||||
finite = np.isfinite(block)
|
|
||||||
bad = (~finite).any(axis=(1, 2)) | (np.abs(np.where(finite, block, 0)).max(axis=(1, 2)) > 1.5)
|
|
||||||
if bad.any():
|
|
||||||
block[bad] = 0.0
|
|
||||||
zeroed += int(bad.sum())
|
|
||||||
csi.flush()
|
|
||||||
print(f'zeroed {zeroed} corrupted windows entirely')
|
|
||||||
@@ -1,112 +0,0 @@
|
|||||||
"""Evaluate the retrained WiFlow-STD checkpoint (ADR-152 §2.2a fallback).
|
|
||||||
|
|
||||||
Scores the model produced by run.py (train_output/best_pose_model.pth or similar)
|
|
||||||
on the seed-42 test split: full test set AND NaN-free subset (excluding windows
|
|
||||||
that were zero-filled by clean_nan.py — file indices 487-499).
|
|
||||||
|
|
||||||
NOTE: deployed to ruvultra (~/wiflow-std-bench) as a standalone single file,
|
|
||||||
so it deliberately inlines its helpers. The reference implementations (upstream
|
|
||||||
import shim, >1GB np.load mmap patch, key-remap loader, canonical evaluate
|
|
||||||
loop) live in benchmarks/wiflow-std/_bench_common.py — keep copies in sync.
|
|
||||||
"""
|
|
||||||
import json, os, random, sys
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
from torch.utils.data import DataLoader, Subset
|
|
||||||
|
|
||||||
# csi_windows.npy is ~13 GB; mmap large arrays instead of eagerly loading
|
|
||||||
# ~15 GB into RAM (same patch as _bench_common._np_load_mmap).
|
|
||||||
_np_load = np.load
|
|
||||||
|
|
||||||
|
|
||||||
def _np_load_mmap(path, *a, **kw):
|
|
||||||
if (isinstance(path, str) and path.endswith('.npy')
|
|
||||||
and os.path.getsize(path) > 1 << 30 and 'mmap_mode' not in kw):
|
|
||||||
kw['mmap_mode'] = 'r'
|
|
||||||
return _np_load(path, *a, **kw)
|
|
||||||
|
|
||||||
|
|
||||||
np.load = _np_load_mmap
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.expanduser('~/wiflow-std-bench/upstream'))
|
|
||||||
from dataset import PreprocessedCSIKeypointsDataset, create_preprocessed_train_val_test_loaders
|
|
||||||
from models.pose_model import WiFlowPoseModel
|
|
||||||
from utils.metrics import calculate_pck, calculate_mpjpe
|
|
||||||
|
|
||||||
|
|
||||||
def find_checkpoint():
|
|
||||||
cands = []
|
|
||||||
for root, _, files in os.walk(os.path.expanduser('~/wiflow-std-bench/train_output')):
|
|
||||||
for f in files:
|
|
||||||
if f.endswith('.pth'):
|
|
||||||
cands.append(os.path.join(root, f))
|
|
||||||
# also upstream/test default output dir
|
|
||||||
for root, _, files in os.walk(os.path.expanduser('~/wiflow-std-bench/upstream')):
|
|
||||||
for f in files:
|
|
||||||
if f.endswith('.pth') and 'best' in f and 'cross_dataset' not in root:
|
|
||||||
p = os.path.join(root, f)
|
|
||||||
if os.path.getmtime(p) > os.path.getmtime(os.path.expanduser('~/wiflow-std-bench/train.log')) - 86400 * 2:
|
|
||||||
cands.append(p)
|
|
||||||
cands = [c for c in cands if not c.endswith('upstream/best_pose_model.pth')]
|
|
||||||
if not cands:
|
|
||||||
sys.exit('no retrained checkpoint found')
|
|
||||||
return max(cands, key=os.path.getmtime)
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate(model, loader, device):
|
|
||||||
model.eval()
|
|
||||||
totals = {t: 0.0 for t in (0.1, 0.2, 0.3, 0.4, 0.5)}
|
|
||||||
total_mpe, n = 0.0, 0
|
|
||||||
with torch.no_grad():
|
|
||||||
for bx, by in loader:
|
|
||||||
bx, by = bx.to(device), by.to(device)
|
|
||||||
out = model(bx)
|
|
||||||
bs = by.size(0)
|
|
||||||
total_mpe += calculate_mpjpe(out, by) * bs
|
|
||||||
pck = calculate_pck(out, by, thresholds=list(totals))
|
|
||||||
for t in totals:
|
|
||||||
totals[t] += pck[t] * bs
|
|
||||||
n += bs
|
|
||||||
return {'samples': n, 'mpjpe': total_mpe / n,
|
|
||||||
**{f'pck@{int(t*100)}': totals[t] / n for t in totals}}
|
|
||||||
|
|
||||||
|
|
||||||
random.seed(42); np.random.seed(42); torch.manual_seed(42)
|
|
||||||
torch.cuda.manual_seed_all(42)
|
|
||||||
torch.backends.cudnn.deterministic = True
|
|
||||||
|
|
||||||
d = os.path.expanduser('~/wiflow-std-bench/preprocessed_csi_data')
|
|
||||||
dataset = PreprocessedCSIKeypointsDataset(data_dir=d, keypoint_scale=1000.0,
|
|
||||||
enable_temporal_clean=True)
|
|
||||||
_, _, test_loader = create_preprocessed_train_val_test_loaders(
|
|
||||||
dataset=dataset, batch_size=256, num_workers=2, random_seed=42)
|
|
||||||
|
|
||||||
device = torch.device('cuda')
|
|
||||||
ckpt = find_checkpoint()
|
|
||||||
print('checkpoint:', ckpt)
|
|
||||||
model = WiFlowPoseModel(dropout=0.5).to(device)
|
|
||||||
state = torch.load(ckpt, map_location=device, weights_only=True)
|
|
||||||
renames = {'att.': 'attention.', 'final_conv.': 'decoder.'}
|
|
||||||
state = {next((new + k[len(old):] for old, new in renames.items()
|
|
||||||
if k.startswith(old)), k): v for k, v in state.items()}
|
|
||||||
model.load_state_dict(state, strict=True)
|
|
||||||
|
|
||||||
results = {'checkpoint': ckpt}
|
|
||||||
print('=== full test set ===')
|
|
||||||
results['test_full'] = evaluate(model, test_loader, device)
|
|
||||||
print(json.dumps(results['test_full'], indent=2))
|
|
||||||
|
|
||||||
# NaN-free subset: exclude windows from corrupted files 487-499
|
|
||||||
test_subset = test_loader.dataset # Subset(dataset, test_indices)
|
|
||||||
w2f = dataset.window_to_file
|
|
||||||
clean_idx = [i for i in test_subset.indices if w2f[i] < 487]
|
|
||||||
print(f'=== NaN-free test subset ({len(clean_idx)} of {len(test_subset.indices)}) ===')
|
|
||||||
clean_loader = DataLoader(Subset(dataset, clean_idx), batch_size=256, shuffle=False)
|
|
||||||
results['test_clean'] = evaluate(model, clean_loader, device)
|
|
||||||
print(json.dumps(results['test_clean'], indent=2))
|
|
||||||
|
|
||||||
out = os.path.expanduser('~/wiflow-std-bench/eval_retrained.json')
|
|
||||||
with open(out, 'w') as f:
|
|
||||||
json.dump(results, f, indent=2)
|
|
||||||
print('wrote', out)
|
|
||||||
@@ -1,374 +0,0 @@
|
|||||||
"""ADR-152 SS2.2 measurement (b): WiFlow-STD fine-tuned on our fresh ESP32 paired dataset.
|
|
||||||
|
|
||||||
Dataset: ~/wiflow-std-bench/paired-20260610.jsonl -- 2,046 paired windows collected
|
|
||||||
2026-06-10 22:10-22:40 (ONE subject, ONE room, ONE ESP32 node, varied poses).
|
|
||||||
Per record: csi = flat float32 list, csi_shape, kp = 17 COCO [x, y] normalized [0,1]
|
|
||||||
camera coords, conf (MediaPipe mean confidence, all > 0.5 in this set), ts_start/ts_end.
|
|
||||||
Aligner: scripts/align-ground-truth.js, non-overlapping 20-frame windows (~0.42 s each).
|
|
||||||
|
|
||||||
Dataset findings (MEASURED on this file, 2026-06-10):
|
|
||||||
- csi_shape is HETEROGENEOUS, not uniformly [70, 20]: 1,347x [70,20], 284x [134,20],
|
|
||||||
243x [26,20], 130x [12,20], 42x [20,20]. The ESP32 stream emits mixed frame types
|
|
||||||
and the aligner stamps each window's subcarrier count from frame[0]
|
|
||||||
(extractCsiMatrix: nSc = window[0].subcarriers), zero-padding/truncating the rest.
|
|
||||||
Even native-70 windows contain ~20.4% internally zero-padded short frames
|
|
||||||
(subcarriers 40..69 all-zero for those frames).
|
|
||||||
- LAYOUT BUG: the aligner fills matrix[f * nSc + s] (frame-major) but declares
|
|
||||||
shape [nSc, nFrames]. The true layout is (frame, subcarrier); we reshape
|
|
||||||
(nFrames, nSc) and transpose. Confirmed by coherent per-frame zero-tails.
|
|
||||||
- Handling here (primary suite, "all2046"): every frame's subcarrier axis is
|
|
||||||
linearly resampled to 70 bins (np.interp over a normalized index domain;
|
|
||||||
identity for native-70 frames) so the pre-registered n=2,046 and split sizes
|
|
||||||
hold. Secondary suite ("native70") restricts to the 1,347 native [70,20]
|
|
||||||
windows (temporal 70/15/15 of those) as a homogeneity robustness check.
|
|
||||||
|
|
||||||
Pre-registered protocol (followed exactly):
|
|
||||||
1. TEMPORAL split (records are time-sorted; asserted): first 70% train (1,432),
|
|
||||||
next 15% val (307), last 15% test (307). No shuffling across time. Seed 42
|
|
||||||
for everything else.
|
|
||||||
2. Model: upstream WiFlow-STD trunk (WiFlowPoseModel) with a learned 1x1 Conv1d
|
|
||||||
projection 70->540 prepended, and K=17 via the parameter-free adaptive pool
|
|
||||||
(AdaptiveAvgPool2d((17, 1)) instead of (15, 1)) -- pretrained weights load
|
|
||||||
for any K. CSI normalization: divide by the TRAIN-split 99th-percentile
|
|
||||||
amplitude, clip to [0, 1] (documented in output JSON).
|
|
||||||
3. Three runs, <=60 epochs, early-stop patience 8 on val MPJPE, batch 32,
|
|
||||||
AdamW, fp32 (no autocast):
|
|
||||||
(i) pretrained-init: trunk init from upstream/test/best_pose_model.pth
|
|
||||||
(the measurement-(a) retrained checkpoint, ~96% PCK@20 on WiFlow data;
|
|
||||||
key remap att.->attention. / final_conv.->decoder. applied defensively
|
|
||||||
as in eval_repro.py -- a no-op for this checkpoint, which already uses
|
|
||||||
the new names). Discriminative lr: adapter 1e-4, trunk 1e-5.
|
|
||||||
(ii) scratch: same architecture, random init, all params lr 1e-4.
|
|
||||||
(iii) frozen-trunk: pretrained trunk frozen (requires_grad=False AND held in
|
|
||||||
.eval() so BatchNorm running stats cannot drift -- pure transfer probe);
|
|
||||||
only the 70->540 adapter trains, lr 1e-4.
|
|
||||||
4. Metrics on the temporal TEST split: torso-normalized PCK@10/20/30/40/50 and
|
|
||||||
MPJPE. Upstream utils/metrics.py calculate_pck(use_torso_norm=True) hardcodes
|
|
||||||
NECK_IDX/PELVIS_IDX = 2, 12 -- a 15-keypoint convention that is WRONG for our
|
|
||||||
17 COCO keypoints (2 = right_eye, 12 = right_hip). We therefore reimplement the
|
|
||||||
identical math (per-frame norm distance, clamp min 0.01, mean over all
|
|
||||||
keypoints x frames) with torso = ||l_shoulder(5) - l_hip(11)||.
|
|
||||||
Also reported: prediction std across test frames (constant-pose detector;
|
|
||||||
must be > 0) and the mean-pose-predictor baseline (train-split mean pose
|
|
||||||
evaluated on test -- the honesty bar).
|
|
||||||
|
|
||||||
Usage (on ruvultra):
|
|
||||||
nice -n 10 nohup ~/wiflow-std-bench/venv/bin/python train_measb.py > train_measb.log 2>&1 &
|
|
||||||
|
|
||||||
NOTE: deployed to ruvultra as a standalone single file, so it deliberately
|
|
||||||
inlines its helpers. The reference implementations (upstream import shim,
|
|
||||||
np.load mmap patch, key-remap loader, canonical evaluate loop) live in
|
|
||||||
benchmarks/wiflow-std/_bench_common.py — keep copies in sync.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
|
|
||||||
BENCH = os.path.expanduser("~/wiflow-std-bench")
|
|
||||||
UPSTREAM = os.path.join(BENCH, "upstream")
|
|
||||||
MEASB = os.path.join(BENCH, "measb")
|
|
||||||
DATA = os.path.join(BENCH, "paired-20260610.jsonl")
|
|
||||||
CHECKPOINT = os.path.join(UPSTREAM, "test", "best_pose_model.pth")
|
|
||||||
|
|
||||||
sys.path.insert(0, UPSTREAM)
|
|
||||||
|
|
||||||
# Upstream defect (1): models/__init__.py imports a name tcn.py does not define.
|
|
||||||
# Register a stub package so the broken __init__ never executes (as eval_repro.py).
|
|
||||||
import types # noqa: E402
|
|
||||||
|
|
||||||
_models_pkg = types.ModuleType("models")
|
|
||||||
_models_pkg.__path__ = [os.path.join(UPSTREAM, "models")]
|
|
||||||
sys.modules["models"] = _models_pkg
|
|
||||||
|
|
||||||
from models.pose_model import WiFlowPoseModel # noqa: E402
|
|
||||||
|
|
||||||
SEED = 42
|
|
||||||
K = 17
|
|
||||||
N_SUBC = 70
|
|
||||||
TRUNK_IN = 540
|
|
||||||
BATCH = 32 # <= 64 per protocol (GPU shared with the efficiency sweep)
|
|
||||||
MAX_EPOCHS = 60
|
|
||||||
PATIENCE = 8
|
|
||||||
LR_ADAPTER = 1e-4
|
|
||||||
LR_TRUNK_FT = 1e-5 # 10x lower for the pretrained trunk vs the fresh adapter
|
|
||||||
L_SHOULDER, L_HIP = 5, 11
|
|
||||||
THRESHOLDS = (0.1, 0.2, 0.3, 0.4, 0.5)
|
|
||||||
|
|
||||||
|
|
||||||
def set_seed(seed=SEED):
|
|
||||||
random.seed(seed)
|
|
||||||
np.random.seed(seed)
|
|
||||||
torch.manual_seed(seed)
|
|
||||||
if torch.cuda.is_available():
|
|
||||||
torch.cuda.manual_seed_all(seed)
|
|
||||||
torch.backends.cudnn.deterministic = True
|
|
||||||
torch.backends.cudnn.benchmark = False
|
|
||||||
|
|
||||||
|
|
||||||
def resample_subcarriers(frame_major, n_out=N_SUBC):
|
|
||||||
"""(nFrames, nSc) -> (nFrames, n_out) by per-frame linear interpolation.
|
|
||||||
|
|
||||||
Identity for nSc == n_out. Normalized index domain [0, 1] on both sides.
|
|
||||||
"""
|
|
||||||
nf, nsc = frame_major.shape
|
|
||||||
if nsc == n_out:
|
|
||||||
return frame_major
|
|
||||||
xi = np.linspace(0.0, 1.0, nsc)
|
|
||||||
xo = np.linspace(0.0, 1.0, n_out)
|
|
||||||
return np.stack([np.interp(xo, xi, frame_major[f]) for f in range(nf)]).astype(np.float32)
|
|
||||||
|
|
||||||
|
|
||||||
def load_dataset():
|
|
||||||
csi, kps, confs, ts, native70 = [], [], [], [], []
|
|
||||||
shape_counts = {}
|
|
||||||
with open(DATA) as f:
|
|
||||||
for line in f:
|
|
||||||
r = json.loads(line)
|
|
||||||
nsc, nf = r["csi_shape"]
|
|
||||||
shape_counts[f"{nsc}x{nf}"] = shape_counts.get(f"{nsc}x{nf}", 0) + 1
|
|
||||||
assert nf == 20, r["csi_shape"]
|
|
||||||
# Aligner layout bug: data is frame-major despite the declared
|
|
||||||
# [nSc, nFrames] shape -- reshape (nFrames, nSc), then resample the
|
|
||||||
# subcarrier axis to 70 and transpose to (70 subcarriers, 20 frames).
|
|
||||||
fm = np.asarray(r["csi"], dtype=np.float32).reshape(nf, nsc)
|
|
||||||
csi.append(resample_subcarriers(fm).T)
|
|
||||||
kp = np.asarray(r["kp"], dtype=np.float32)
|
|
||||||
assert kp.shape == (K, 2), kp.shape
|
|
||||||
kps.append(kp)
|
|
||||||
confs.append(r["conf"])
|
|
||||||
ts.append(r["ts_start"])
|
|
||||||
native70.append(nsc == N_SUBC)
|
|
||||||
assert all(ts[i] <= ts[i + 1] for i in range(len(ts) - 1)), "records not time-sorted"
|
|
||||||
return (np.stack(csi), np.stack(kps), np.asarray(confs, dtype=np.float32),
|
|
||||||
np.asarray(native70), shape_counts, ts[0], ts[-1])
|
|
||||||
|
|
||||||
|
|
||||||
def temporal_split(n):
|
|
||||||
n_train = int(round(n * 0.70))
|
|
||||||
n_val = int(round(n * 0.15))
|
|
||||||
return slice(0, n_train), slice(n_train, n_train + n_val), slice(n_train + n_val, n)
|
|
||||||
|
|
||||||
|
|
||||||
class AdaptedWiFlow(nn.Module):
|
|
||||||
"""1x1 Conv1d adapter 70->540 + upstream WiFlow-STD trunk with K=17 pool head."""
|
|
||||||
|
|
||||||
def __init__(self, k=K, dropout=0.5):
|
|
||||||
super().__init__()
|
|
||||||
self.adapter = nn.Conv1d(N_SUBC, TRUNK_IN, kernel_size=1)
|
|
||||||
nn.init.kaiming_normal_(self.adapter.weight, mode="fan_out", nonlinearity="relu")
|
|
||||||
nn.init.constant_(self.adapter.bias, 0)
|
|
||||||
self.trunk = WiFlowPoseModel(dropout=dropout)
|
|
||||||
# K=17 via the parameter-free adaptive pool: decoder emits [B, 2, 15, 20]
|
|
||||||
# spatial maps; pooling H->17 instead of 15 yields [B, 17, 2] with no new
|
|
||||||
# parameters, so the pretrained state_dict loads strict=True for any K.
|
|
||||||
self.trunk.avg_pool = nn.AdaptiveAvgPool2d((k, 1))
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return self.trunk(self.adapter(x))
|
|
||||||
|
|
||||||
|
|
||||||
def load_pretrained_trunk(trunk, path):
|
|
||||||
state = torch.load(path, map_location="cpu", weights_only=True)
|
|
||||||
# Defensive remap as in eval_repro.py (no-op for the retrained checkpoint).
|
|
||||||
renames = {"att.": "attention.", "final_conv.": "decoder."}
|
|
||||||
state = {next((new + k[len(old):] for old, new in renames.items()
|
|
||||||
if k.startswith(old)), k): v
|
|
||||||
for k, v in state.items()}
|
|
||||||
trunk.load_state_dict(state, strict=True)
|
|
||||||
|
|
||||||
|
|
||||||
def pck_torso(pred, target, thresholds=THRESHOLDS):
|
|
||||||
"""Upstream calculate_pck math, torso = l_shoulder(5)<->l_hip(11) for 17-kp COCO."""
|
|
||||||
norm = torch.sqrt(((target[:, L_SHOULDER] - target[:, L_HIP]) ** 2).sum(dim=1))
|
|
||||||
norm = torch.clamp(norm, min=0.01)
|
|
||||||
dist = torch.sqrt(((pred - target) ** 2).sum(dim=2)) / norm.unsqueeze(1)
|
|
||||||
return {f"pck@{int(t * 100)}": (dist <= t).float().mean().item() for t in thresholds}
|
|
||||||
|
|
||||||
|
|
||||||
def mpjpe(pred, target):
|
|
||||||
return torch.sqrt(((pred - target) ** 2).sum(dim=2)).mean().item()
|
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
|
||||||
def predict(model, x, batch=256):
|
|
||||||
model.eval()
|
|
||||||
return torch.cat([model(x[i:i + batch]) for i in range(0, len(x), batch)])
|
|
||||||
|
|
||||||
|
|
||||||
def eval_preds(pred, target):
|
|
||||||
out = pck_torso(pred, target)
|
|
||||||
out["mpjpe"] = mpjpe(pred, target)
|
|
||||||
# Constant-pose detector: std across test frames per coordinate, mean over
|
|
||||||
# the 17x2 coordinates. 0.0 == degenerate constant predictor.
|
|
||||||
out["pred_std"] = pred.std(dim=0).mean().item()
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
def train_run(name, x_tr, y_tr, x_va, y_va, device, pretrained, freeze_trunk,
|
|
||||||
lr_trunk):
|
|
||||||
set_seed(SEED)
|
|
||||||
model = AdaptedWiFlow().to(device)
|
|
||||||
if pretrained:
|
|
||||||
load_pretrained_trunk(model.trunk, CHECKPOINT)
|
|
||||||
if freeze_trunk:
|
|
||||||
for p in model.trunk.parameters():
|
|
||||||
p.requires_grad = False
|
|
||||||
groups = [{"params": model.adapter.parameters(), "lr": LR_ADAPTER}]
|
|
||||||
else:
|
|
||||||
groups = [{"params": model.adapter.parameters(), "lr": LR_ADAPTER},
|
|
||||||
{"params": model.trunk.parameters(), "lr": lr_trunk}]
|
|
||||||
opt = torch.optim.AdamW(groups)
|
|
||||||
loss_fn = nn.MSELoss()
|
|
||||||
|
|
||||||
n = len(x_tr)
|
|
||||||
best_val, best_state, best_epoch, bad = float("inf"), None, -1, 0
|
|
||||||
history = []
|
|
||||||
t0 = time.time()
|
|
||||||
for epoch in range(MAX_EPOCHS):
|
|
||||||
model.train()
|
|
||||||
if freeze_trunk:
|
|
||||||
model.trunk.eval() # keep BatchNorm running stats fixed: pure transfer
|
|
||||||
perm = torch.randperm(n, device=device)
|
|
||||||
ep_loss = 0.0
|
|
||||||
for i in range(0, n, BATCH):
|
|
||||||
idx = perm[i:i + BATCH]
|
|
||||||
opt.zero_grad()
|
|
||||||
loss = loss_fn(model(x_tr[idx]), y_tr[idx])
|
|
||||||
loss.backward()
|
|
||||||
opt.step()
|
|
||||||
ep_loss += loss.item() * len(idx)
|
|
||||||
val_mpjpe = mpjpe(predict(model, x_va), y_va)
|
|
||||||
history.append({"epoch": epoch, "train_mse": ep_loss / n, "val_mpjpe": val_mpjpe})
|
|
||||||
marker = ""
|
|
||||||
if val_mpjpe < best_val:
|
|
||||||
best_val, best_epoch, bad = val_mpjpe, epoch, 0
|
|
||||||
best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
|
|
||||||
marker = " *"
|
|
||||||
else:
|
|
||||||
bad += 1
|
|
||||||
print(f"[{name}] epoch {epoch:02d} train_mse {ep_loss / n:.6f} "
|
|
||||||
f"val_mpjpe {val_mpjpe:.5f}{marker}", flush=True)
|
|
||||||
if bad >= PATIENCE:
|
|
||||||
print(f"[{name}] early stop at epoch {epoch} (best {best_epoch})", flush=True)
|
|
||||||
break
|
|
||||||
model.load_state_dict(best_state)
|
|
||||||
torch.save(best_state, os.path.join(MEASB, f"{name}_best.pth"))
|
|
||||||
return model, {"best_epoch": best_epoch, "best_val_mpjpe": best_val,
|
|
||||||
"epochs_run": len(history), "wall_seconds": round(time.time() - t0, 1),
|
|
||||||
"history": history}
|
|
||||||
|
|
||||||
|
|
||||||
def run_suite(tag, csi, kps, device):
|
|
||||||
"""Temporal 70/15/15 split, mean-pose baseline, three training runs."""
|
|
||||||
n = len(csi)
|
|
||||||
tr, va, te = temporal_split(n)
|
|
||||||
print(f"=== suite {tag}: n={n} train={tr.stop} val={va.stop - va.start} "
|
|
||||||
f"test={te.stop - te.start} ===", flush=True)
|
|
||||||
|
|
||||||
# CSI normalization constant from TRAIN split only.
|
|
||||||
train_p99 = float(np.percentile(csi[tr], 99))
|
|
||||||
train_max = float(csi[tr].max())
|
|
||||||
print(f"[{tag}] train p99={train_p99:.3f} max={train_max:.3f} -> /p99, clip [0,1]",
|
|
||||||
flush=True)
|
|
||||||
csi_n = np.clip(csi / train_p99, 0.0, 1.0).astype(np.float32)
|
|
||||||
|
|
||||||
x = torch.from_numpy(csi_n).to(device)
|
|
||||||
y = torch.from_numpy(kps).to(device)
|
|
||||||
x_tr, y_tr = x[tr], y[tr]
|
|
||||||
x_va, y_va = x[va], y[va]
|
|
||||||
x_te, y_te = x[te], y[te]
|
|
||||||
|
|
||||||
suite = {
|
|
||||||
"n_windows": n,
|
|
||||||
"split": {"n_train": int(tr.stop), "n_val": int(va.stop - va.start),
|
|
||||||
"n_test": int(te.stop - te.start)},
|
|
||||||
"csi_norm": {"method": "divide by train-split p99 amplitude, clip [0,1]",
|
|
||||||
"train_p99": train_p99, "train_max": train_max},
|
|
||||||
"runs": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
# Honesty bar: mean-pose predictor fit on TRAIN, evaluated on TEST.
|
|
||||||
mean_pose = y_tr.mean(dim=0, keepdim=True).expand(len(y_te), -1, -1)
|
|
||||||
suite["mean_pose_baseline"] = eval_preds(mean_pose, y_te)
|
|
||||||
suite["mean_pose_baseline"]["note"] = "train-split mean pose; pred_std 0 by construction"
|
|
||||||
print(f"[{tag}] mean-pose baseline:", json.dumps(suite["mean_pose_baseline"]),
|
|
||||||
flush=True)
|
|
||||||
|
|
||||||
configs = [
|
|
||||||
("pretrained", dict(pretrained=True, freeze_trunk=False, lr_trunk=LR_TRUNK_FT)),
|
|
||||||
("scratch", dict(pretrained=False, freeze_trunk=False, lr_trunk=LR_ADAPTER)),
|
|
||||||
("frozen_trunk", dict(pretrained=True, freeze_trunk=True, lr_trunk=0.0)),
|
|
||||||
]
|
|
||||||
for name, cfg in configs:
|
|
||||||
print(f"=== run: {tag}/{name} {cfg} ===", flush=True)
|
|
||||||
model, train_info = train_run(f"{tag}_{name}", x_tr, y_tr, x_va, y_va,
|
|
||||||
device, **cfg)
|
|
||||||
test_metrics = eval_preds(predict(model, x_te), y_te)
|
|
||||||
n_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
|
||||||
suite["runs"][name] = {"config": cfg, "trainable_params": n_trainable,
|
|
||||||
"train": {k: v for k, v in train_info.items()
|
|
||||||
if k != "history"},
|
|
||||||
"history": train_info["history"],
|
|
||||||
"test": test_metrics}
|
|
||||||
print(f"[{tag}/{name}] TEST:", json.dumps(test_metrics), flush=True)
|
|
||||||
return suite
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
||||||
print(f"device {device}, torch {torch.__version__}", flush=True)
|
|
||||||
set_seed(SEED)
|
|
||||||
|
|
||||||
csi, kps, confs, native70, shape_counts, ts_first, ts_last = load_dataset()
|
|
||||||
print(f"shape distribution: {shape_counts}", flush=True)
|
|
||||||
|
|
||||||
results = {
|
|
||||||
"protocol": {
|
|
||||||
"dataset": DATA, "n_windows": len(csi),
|
|
||||||
"ts_first": ts_first, "ts_last": ts_last,
|
|
||||||
"conf_mean": float(confs.mean()), "conf_min": float(confs.min()),
|
|
||||||
"csi_shape_distribution": shape_counts,
|
|
||||||
"csi_layout_note": "aligner stores frame-major data under a transposed "
|
|
||||||
"[nSc, nFrames] shape label; corrected on load",
|
|
||||||
"csi_resample": "per-frame linear interp of subcarrier axis to 70 bins "
|
|
||||||
"(identity for native-70 frames); native-70 windows still "
|
|
||||||
"contain ~20.4% internally zero-padded short frames",
|
|
||||||
"split": "temporal 70/15/15 (no shuffle across time)",
|
|
||||||
"model": "1x1 Conv1d 70->540 adapter + WiFlowPoseModel trunk, "
|
|
||||||
"AdaptiveAvgPool2d((17,1)) head (parameter-free K=17)",
|
|
||||||
"checkpoint": CHECKPOINT,
|
|
||||||
"checkpoint_note": "measurement-(a) retrained checkpoint (~96% PCK@20 on "
|
|
||||||
"WiFlow data); att./final_conv. remap applied "
|
|
||||||
"defensively (no-op, already new-style keys)",
|
|
||||||
"optimizer": f"AdamW, adapter lr {LR_ADAPTER}, fine-tuned trunk lr "
|
|
||||||
f"{LR_TRUNK_FT} (10x lower), scratch all {LR_ADAPTER}",
|
|
||||||
"batch": BATCH, "max_epochs": MAX_EPOCHS, "patience": PATIENCE,
|
|
||||||
"precision": "fp32", "seed": SEED,
|
|
||||||
"pck": "torso-normalized, torso = ||l_shoulder(5) - l_hip(11)||, "
|
|
||||||
"clamp min 0.01, mean over keypoints x frames "
|
|
||||||
"(upstream math; upstream 2/12 indices are a 15-kp convention)",
|
|
||||||
},
|
|
||||||
# Primary: all 2,046 windows (pre-registered n), subcarrier axis resampled.
|
|
||||||
"all2046": None,
|
|
||||||
# Secondary robustness check: the 1,347 native [70,20] windows only.
|
|
||||||
"native70": None,
|
|
||||||
}
|
|
||||||
|
|
||||||
results["all2046"] = run_suite("all2046", csi, kps, device)
|
|
||||||
results["native70"] = run_suite("native70", csi[native70], kps[native70], device)
|
|
||||||
|
|
||||||
out = os.path.join(MEASB, "measurement_b.json")
|
|
||||||
with open(out, "w") as f:
|
|
||||||
json.dump(results, f, indent=2)
|
|
||||||
print(f"wrote {out}", flush=True)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
cd ~/wiflow-std-bench
|
|
||||||
|
|
||||||
# 1. clone upstream at the pinned commit
|
|
||||||
if [ ! -d upstream ]; then
|
|
||||||
git clone https://github.com/DY2434/WiFlow-WiFi-Pose-Estimation-with-Spatio-Temporal-Decoupling upstream
|
|
||||||
fi
|
|
||||||
cd upstream && git checkout 06899d294a0f44709d601a53e91dbf24759daefb && cd ..
|
|
||||||
|
|
||||||
# 2. documented deviation: fix upstream import bug (TemporalConvNet does not exist)
|
|
||||||
sed -i 's/from .tcn import TemporalConvNet/from .tcn import TemporalBlock/; s/'"'"'TemporalConvNet'"'"'/'"'"'TemporalBlock'"'"'/' upstream/models/__init__.py
|
|
||||||
|
|
||||||
# 3. venv: torch cu128 (RTX 5080 = sm_120 needs >=2.7; their pin 2.3.1 predates Blackwell)
|
|
||||||
if [ ! -d venv ]; then
|
|
||||||
python3 -m venv venv
|
|
||||||
./venv/bin/pip install -q --upgrade pip
|
|
||||||
./venv/bin/pip install -q torch --index-url https://download.pytorch.org/whl/cu128
|
|
||||||
./venv/bin/pip install -q numpy pandas matplotlib seaborn scikit-learn opencv-python-headless scipy tqdm psutil kagglehub
|
|
||||||
fi
|
|
||||||
./venv/bin/python -c "import torch; print(torch.__version__, torch.cuda.is_available(), torch.cuda.get_device_name(0))"
|
|
||||||
|
|
||||||
# 4. dataset via kagglehub (anonymous, public dataset)
|
|
||||||
DS=$(./venv/bin/python -c "import kagglehub; print(kagglehub.dataset_download('kaka2434/wiflow-dataset'))")
|
|
||||||
echo "dataset at: $DS"
|
|
||||||
|
|
||||||
# 5. run.py hardcodes ../preprocessed_csi_data relative to upstream/
|
|
||||||
ln -sfn "$DS/preprocessed_csi_data" ~/wiflow-std-bench/preprocessed_csi_data
|
|
||||||
|
|
||||||
# 6. train with upstream defaults (seed 42 set inside run.py)
|
|
||||||
../venv/bin/python ../clean_nan.py 2>/dev/null || venv/bin/python clean_nan.py
|
|
||||||
cd upstream
|
|
||||||
../venv/bin/python run.py --gpu 0 --batch_size 64 --epochs 50 --output_dir ../train_output
|
|
||||||
@@ -1,332 +0,0 @@
|
|||||||
"""Configurable compact variants of the WiFlow-STD pose model (ADR-152 efficiency sweep).
|
|
||||||
|
|
||||||
This is a parameterized copy of upstream models/{pose_model,tcn,convnet,attention}.py
|
|
||||||
(DY2434/WiFlow @ 06899d29, Apache-2.0). upstream/ is NOT modified. Deviations from
|
|
||||||
upstream, all forced by shrinking channels and documented per variant in run_sweep.py:
|
|
||||||
|
|
||||||
1. TCN grouped-conv groups: upstream hardcodes groups=20, which does not divide
|
|
||||||
the compact channel counts (e.g. 270, 135, 85). Rule here:
|
|
||||||
- groups_mode='gcd20': per-conv groups = gcd(channels, 20) (== 20 wherever
|
|
||||||
upstream's choice is valid, incl. the 540-ch input conv; falls back to the
|
|
||||||
largest common divisor with 20 otherwise).
|
|
||||||
- groups_mode='depthwise': groups = channels (tiny variant only).
|
|
||||||
2. Conv2d downsampling strides: upstream uses 4 stride-(1,2) blocks because
|
|
||||||
240/2^4 = 15 == n_keypoints. With smaller TCN output widths that would leave
|
|
||||||
<15 rows and AdaptiveAvgPool2d((15,1)) would duplicate rows across keypoints.
|
|
||||||
Rule: halve the width only while the result stays >= 15 (stride-2 blocks
|
|
||||||
first, stride-1 after). Full model: 240 -> 4 halvings = upstream exactly.
|
|
||||||
3. input_pw_groups (tiny only): the dense 540->c pointwise + residual downsample
|
|
||||||
in TCN block 1 cost 2*540*c params (a ~117k floor that alone exceeds the
|
|
||||||
tiny <100k budget). tiny groups these two convs (groups=4; 4 | gcd(540, 68)).
|
|
||||||
4. Decoder mid-channels: upstream 64->32; here c_last -> max(c_last // 2, 4).
|
|
||||||
"""
|
|
||||||
import math
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
import torch.nn.functional as F
|
|
||||||
|
|
||||||
|
|
||||||
def tcn_groups(channels: int, mode: str) -> int:
|
|
||||||
if mode == 'depthwise':
|
|
||||||
return channels
|
|
||||||
if mode == 'gcd20':
|
|
||||||
return math.gcd(channels, 20)
|
|
||||||
raise ValueError(mode)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------- TCN (copy of tcn.py)
|
|
||||||
class Chomp1d(nn.Module):
|
|
||||||
def __init__(self, chomp_size):
|
|
||||||
super().__init__()
|
|
||||||
self.chomp_size = chomp_size
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return x[:, :, :-self.chomp_size].contiguous()
|
|
||||||
|
|
||||||
|
|
||||||
class CompactGroupedTemporalBlock(nn.Module):
|
|
||||||
"""Upstream InnerGroupedTemporalBlock with parameterized groups."""
|
|
||||||
|
|
||||||
def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding,
|
|
||||||
dropout=0.2, groups_mode='gcd20', pw_groups=1):
|
|
||||||
super().__init__()
|
|
||||||
g_in = tcn_groups(n_inputs, groups_mode)
|
|
||||||
g_out = tcn_groups(n_outputs, groups_mode)
|
|
||||||
self.groups = (g_in, g_out)
|
|
||||||
self.pw_groups = pw_groups
|
|
||||||
|
|
||||||
self.conv1_group = nn.Conv1d(n_inputs, n_inputs, kernel_size, stride=stride,
|
|
||||||
padding=padding, dilation=dilation,
|
|
||||||
groups=g_in, bias=False)
|
|
||||||
self.chomp1 = Chomp1d(padding) if padding > 0 else nn.Identity()
|
|
||||||
self.bn1_group = nn.BatchNorm1d(n_inputs)
|
|
||||||
self.relu1_group = nn.SiLU(inplace=True)
|
|
||||||
|
|
||||||
self.conv1_pw = nn.Conv1d(n_inputs, n_outputs, 1, groups=pw_groups, bias=False)
|
|
||||||
self.bn1_pw = nn.BatchNorm1d(n_outputs)
|
|
||||||
self.relu1_pw = nn.SiLU(inplace=True)
|
|
||||||
self.dropout1 = nn.Dropout(dropout)
|
|
||||||
|
|
||||||
self.conv2_group = nn.Conv1d(n_outputs, n_outputs, kernel_size, stride=1,
|
|
||||||
padding=padding, dilation=dilation,
|
|
||||||
groups=g_out, bias=False)
|
|
||||||
self.chomp2 = Chomp1d(padding) if padding > 0 else nn.Identity()
|
|
||||||
self.bn2_group = nn.BatchNorm1d(n_outputs)
|
|
||||||
self.relu2_group = nn.SiLU(inplace=True)
|
|
||||||
|
|
||||||
self.conv2_pw = nn.Conv1d(n_outputs, n_outputs, 1, bias=False)
|
|
||||||
self.bn2_pw = nn.BatchNorm1d(n_outputs)
|
|
||||||
self.relu2_pw = nn.SiLU(inplace=True)
|
|
||||||
self.dropout2 = nn.Dropout(dropout)
|
|
||||||
|
|
||||||
self.downsample = nn.Sequential(
|
|
||||||
nn.Conv1d(n_inputs, n_outputs, 1, groups=pw_groups, bias=False),
|
|
||||||
nn.BatchNorm1d(n_outputs)
|
|
||||||
) if n_inputs != n_outputs else nn.Identity()
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
res = self.downsample(x)
|
|
||||||
out = self.conv1_group(x)
|
|
||||||
out = self.chomp1(out)
|
|
||||||
out = self.bn1_group(out)
|
|
||||||
out = self.relu1_group(out)
|
|
||||||
out = self.conv1_pw(out)
|
|
||||||
out = self.bn1_pw(out)
|
|
||||||
out = self.relu1_pw(out)
|
|
||||||
out = self.dropout1(out)
|
|
||||||
out = self.conv2_group(out)
|
|
||||||
out = self.chomp2(out)
|
|
||||||
out = self.bn2_group(out)
|
|
||||||
out = self.relu2_group(out)
|
|
||||||
out = self.conv2_pw(out)
|
|
||||||
out = self.bn2_pw(out)
|
|
||||||
out = self.relu2_pw(out)
|
|
||||||
out = self.dropout2(out)
|
|
||||||
return F.silu(out + res)
|
|
||||||
|
|
||||||
|
|
||||||
class CompactTemporalBlock(nn.Module):
|
|
||||||
def __init__(self, num_inputs, num_channels, kernel_size=3, dropout=0.2,
|
|
||||||
groups_mode='gcd20', input_pw_groups=1):
|
|
||||||
super().__init__()
|
|
||||||
layers = []
|
|
||||||
for i, out_channels in enumerate(num_channels):
|
|
||||||
dilation_size = 2 ** i
|
|
||||||
in_channels = num_inputs if i == 0 else num_channels[i - 1]
|
|
||||||
layers.append(CompactGroupedTemporalBlock(
|
|
||||||
in_channels, out_channels, kernel_size, stride=1,
|
|
||||||
dilation=dilation_size, padding=(kernel_size - 1) * dilation_size,
|
|
||||||
dropout=dropout, groups_mode=groups_mode,
|
|
||||||
pw_groups=input_pw_groups if i == 0 else 1))
|
|
||||||
self.network = nn.Sequential(*layers)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return self.network(x)
|
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------- Conv2d path (copy of convnet.py)
|
|
||||||
class AsymmetricConvBlock(nn.Module):
|
|
||||||
"""Upstream block with parameterized width stride (upstream: always (1,2))."""
|
|
||||||
|
|
||||||
def __init__(self, in_channels, out_channels, dropout=0.3, stride_w=2):
|
|
||||||
super().__init__()
|
|
||||||
self.block = nn.Sequential(
|
|
||||||
nn.Conv2d(in_channels, out_channels, kernel_size=(1, 3),
|
|
||||||
stride=(1, stride_w), padding=(0, 1)),
|
|
||||||
nn.BatchNorm2d(out_channels),
|
|
||||||
nn.SiLU(inplace=True),
|
|
||||||
nn.Dropout2d(dropout),
|
|
||||||
nn.Conv2d(out_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)),
|
|
||||||
nn.BatchNorm2d(out_channels),
|
|
||||||
nn.SiLU(inplace=True),
|
|
||||||
nn.Dropout2d(dropout),
|
|
||||||
nn.Conv2d(out_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)),
|
|
||||||
nn.BatchNorm2d(out_channels)
|
|
||||||
)
|
|
||||||
self.downsample = nn.Sequential(
|
|
||||||
nn.Conv2d(in_channels, out_channels, kernel_size=1,
|
|
||||||
stride=(1, stride_w), bias=False),
|
|
||||||
nn.BatchNorm2d(out_channels)
|
|
||||||
)
|
|
||||||
self.activation = nn.SiLU(inplace=True)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return self.activation(self.block(x) + self.downsample(x))
|
|
||||||
|
|
||||||
|
|
||||||
class ConvBlock1(nn.Module):
|
|
||||||
def __init__(self, in_channels, out_channels, dropout=0.3):
|
|
||||||
super().__init__()
|
|
||||||
self.block = nn.Sequential(
|
|
||||||
nn.Conv2d(in_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)),
|
|
||||||
nn.BatchNorm2d(out_channels),
|
|
||||||
nn.SiLU(inplace=True),
|
|
||||||
nn.Dropout2d(dropout),
|
|
||||||
nn.Conv2d(out_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)),
|
|
||||||
nn.BatchNorm2d(out_channels),
|
|
||||||
nn.SiLU(inplace=True),
|
|
||||||
nn.Dropout2d(dropout),
|
|
||||||
nn.Conv2d(out_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)),
|
|
||||||
nn.BatchNorm2d(out_channels)
|
|
||||||
)
|
|
||||||
self.downsample = nn.Sequential(
|
|
||||||
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
|
|
||||||
nn.BatchNorm2d(out_channels)
|
|
||||||
)
|
|
||||||
self.activation = nn.SiLU(inplace=True)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return self.activation(self.block(x) + self.downsample(x))
|
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------- attention (verbatim attention.py)
|
|
||||||
class AxialAttention(nn.Module):
|
|
||||||
def __init__(self, in_planes, out_planes, groups=8, stride=1, bias=False, width=False):
|
|
||||||
assert (in_planes % groups == 0) and (out_planes % groups == 0)
|
|
||||||
super().__init__()
|
|
||||||
self.in_planes = in_planes
|
|
||||||
self.out_planes = out_planes
|
|
||||||
self.groups = groups
|
|
||||||
self.group_planes = out_planes // groups
|
|
||||||
self.stride = stride
|
|
||||||
self.bias = bias
|
|
||||||
self.width = width
|
|
||||||
self.qkv_transform = nn.Conv1d(in_planes, out_planes * 3, kernel_size=1,
|
|
||||||
stride=1, padding=0, bias=False)
|
|
||||||
self.bn_qkv = nn.BatchNorm1d(out_planes * 3)
|
|
||||||
self.bn_similarity = nn.BatchNorm2d(groups)
|
|
||||||
self.bn_output = nn.BatchNorm1d(out_planes)
|
|
||||||
if stride > 1:
|
|
||||||
self.pooling = nn.AvgPool2d(stride, stride=stride)
|
|
||||||
nn.init.normal_(self.qkv_transform.weight.data, 0, math.sqrt(1. / self.in_planes))
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
if self.width:
|
|
||||||
x = x.permute(0, 2, 1, 3)
|
|
||||||
else:
|
|
||||||
x = x.permute(0, 3, 1, 2)
|
|
||||||
N, W, C, H = x.shape
|
|
||||||
x = x.contiguous().view(N * W, C, H)
|
|
||||||
qkv = self.bn_qkv(self.qkv_transform(x))
|
|
||||||
qkv = qkv.reshape(N * W, 3, self.out_planes, H).permute(1, 0, 2, 3)
|
|
||||||
q, k, v = qkv[0], qkv[1], qkv[2]
|
|
||||||
q = q.reshape(N * W, self.groups, self.group_planes, H)
|
|
||||||
k = k.reshape(N * W, self.groups, self.group_planes, H)
|
|
||||||
v = v.reshape(N * W, self.groups, self.group_planes, H)
|
|
||||||
qk = torch.einsum('bgci, bgcj->bgij', q, k)
|
|
||||||
qk = self.bn_similarity(qk)
|
|
||||||
similarity = F.softmax(qk, dim=-1)
|
|
||||||
sv = torch.einsum('bgij,bgcj->bgci', similarity, v)
|
|
||||||
sv = sv.reshape(N * W, self.out_planes, H)
|
|
||||||
out = self.bn_output(sv)
|
|
||||||
out = out.view(N, W, self.out_planes, H)
|
|
||||||
if self.width:
|
|
||||||
out = out.permute(0, 2, 1, 3)
|
|
||||||
else:
|
|
||||||
out = out.permute(0, 2, 3, 1)
|
|
||||||
if self.stride > 1:
|
|
||||||
out = self.pooling(out)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class DualAxialAttention(nn.Module):
|
|
||||||
def __init__(self, in_planes, out_planes, groups=8, stride=1, bias=False):
|
|
||||||
super().__init__()
|
|
||||||
self.width_axis = AxialAttention(in_planes, out_planes, groups, stride, bias, width=True)
|
|
||||||
self.height_axis = AxialAttention(out_planes, out_planes, groups, stride, bias, width=False)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return self.height_axis(self.width_axis(x))
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------- full model
|
|
||||||
def compute_strides(width: int, n_blocks: int, target: int = 15):
|
|
||||||
"""Halve width while result stays >= target (upstream: 240 -> 4 halvings -> 15)."""
|
|
||||||
strides = []
|
|
||||||
for _ in range(n_blocks):
|
|
||||||
nxt = (width + 1) // 2 # conv k=3 s=2 p=1: out = ceil(in/2)
|
|
||||||
if nxt >= target:
|
|
||||||
strides.append(2)
|
|
||||||
width = nxt
|
|
||||||
else:
|
|
||||||
strides.append(1)
|
|
||||||
return strides, width
|
|
||||||
|
|
||||||
|
|
||||||
class CompactWiFlowPoseModel(nn.Module):
|
|
||||||
"""Parameterized upstream WiFlowPoseModel.
|
|
||||||
|
|
||||||
Upstream config == tcn_channels=[540,440,340,240], conv_channels=[8,16,32,64],
|
|
||||||
attn_groups=8, groups_mode='gcd20' (gcd(c,20)==20 for all upstream channels),
|
|
||||||
input_pw_groups=1 -> identical architecture, 2,225,042 params.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, tcn_channels, conv_channels, attn_groups,
|
|
||||||
groups_mode='gcd20', input_pw_groups=1, dropout=0.3,
|
|
||||||
num_subcarriers=540, num_keypoints=15):
|
|
||||||
super().__init__()
|
|
||||||
self.tcn = CompactTemporalBlock(
|
|
||||||
num_inputs=num_subcarriers, num_channels=tcn_channels, kernel_size=3,
|
|
||||||
dropout=dropout, groups_mode=groups_mode, input_pw_groups=input_pw_groups)
|
|
||||||
|
|
||||||
self.up = ConvBlock1(1, conv_channels[0])
|
|
||||||
|
|
||||||
strides, self.final_width = compute_strides(
|
|
||||||
tcn_channels[-1], len(conv_channels), target=num_keypoints)
|
|
||||||
self.conv_strides = strides
|
|
||||||
self.residual_blocks = nn.ModuleList()
|
|
||||||
in_channels = conv_channels[0]
|
|
||||||
for out_channels, s in zip(conv_channels, strides):
|
|
||||||
self.residual_blocks.append(
|
|
||||||
AsymmetricConvBlock(in_channels, out_channels, stride_w=s))
|
|
||||||
in_channels = out_channels
|
|
||||||
|
|
||||||
c_last = conv_channels[-1]
|
|
||||||
self.attention = DualAxialAttention(c_last, c_last, groups=attn_groups)
|
|
||||||
|
|
||||||
c_mid = max(c_last // 2, 4)
|
|
||||||
self.decoder = nn.Sequential(
|
|
||||||
nn.Conv2d(c_last, c_mid, kernel_size=3, padding=1),
|
|
||||||
nn.BatchNorm2d(c_mid),
|
|
||||||
nn.SiLU(inplace=True),
|
|
||||||
nn.Conv2d(c_mid, 2, kernel_size=1),
|
|
||||||
nn.BatchNorm2d(2),
|
|
||||||
nn.SiLU(inplace=True)
|
|
||||||
)
|
|
||||||
self.avg_pool = nn.AdaptiveAvgPool2d((num_keypoints, 1))
|
|
||||||
self._initialize_weights()
|
|
||||||
|
|
||||||
def _initialize_weights(self):
|
|
||||||
for m in self.modules():
|
|
||||||
if isinstance(m, nn.Conv1d):
|
|
||||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
|
||||||
if m.bias is not None:
|
|
||||||
nn.init.constant_(m.bias, 0)
|
|
||||||
elif isinstance(m, (nn.BatchNorm1d, nn.LayerNorm)):
|
|
||||||
nn.init.constant_(m.weight, 1)
|
|
||||||
nn.init.constant_(m.bias, 0)
|
|
||||||
elif isinstance(m, nn.Linear):
|
|
||||||
nn.init.xavier_normal_(m.weight)
|
|
||||||
if m.bias is not None:
|
|
||||||
nn.init.constant_(m.bias, 0)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
# [B, 540, 20]
|
|
||||||
x = self.tcn(x) # [B, C_tcn, 20]
|
|
||||||
x = x.transpose(1, 2).unsqueeze(1) # [B, 1, 20, C_tcn]
|
|
||||||
x = self.up(x)
|
|
||||||
for block in self.residual_blocks:
|
|
||||||
x = block(x) # [B, C_conv, 20, W']
|
|
||||||
x = x.permute(0, 1, 3, 2) # [B, C_conv, W', 20]
|
|
||||||
x = self.attention(x)
|
|
||||||
x = self.decoder(x) # [B, 2, W', 20]
|
|
||||||
x = self.avg_pool(x).squeeze(-1) # [B, 2, 15]
|
|
||||||
return x.transpose(1, 2) # [B, 15, 2]
|
|
||||||
|
|
||||||
|
|
||||||
def describe(model: 'CompactWiFlowPoseModel'):
|
|
||||||
params = sum(p.numel() for p in model.parameters())
|
|
||||||
tcn_g = [blk.groups for blk in model.tcn.network]
|
|
||||||
return {'params': params, 'tcn_groups_per_block': tcn_g,
|
|
||||||
'conv_strides': model.conv_strides, 'final_width': model.final_width}
|
|
||||||
@@ -1,278 +0,0 @@
|
|||||||
"""WiFlow-STD compact-variant efficiency sweep (ADR-152) — sequential overnight runner.
|
|
||||||
|
|
||||||
Trains compact variants of the upstream WiFlow-STD architecture on the same
|
|
||||||
data/split as the full-size reference retraining (seed 42, file-level 70/15/15,
|
|
||||||
upstream dataset.py) and evaluates PCK@10..50 + MPJPE on the full test split and
|
|
||||||
the corruption-free test subset (file indices < 487).
|
|
||||||
|
|
||||||
Training mirrors upstream run.py/train.py defaults except:
|
|
||||||
- fp32 only (no fp16 autocast / GradScaler — avoids the BN-poisoning trap
|
|
||||||
documented in RESULTS.md defect 5; data on disk is already cleaned).
|
|
||||||
- batch 64 (kept modest: another GPU job may share the 16 GB card tonight).
|
|
||||||
- scheduler + early stopping keyed on val MPJPE (upstream early-stops on val MPE
|
|
||||||
with patience 5; same here).
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
venv/bin/python sweep/run_sweep.py --dry-run # param counts only
|
|
||||||
nohup venv/bin/python sweep/run_sweep.py > sweep/sweep.log 2>&1 &
|
|
||||||
|
|
||||||
Idempotent: variants already present in sweep/results.jsonl are skipped.
|
|
||||||
|
|
||||||
NOTE: deployed to ruvultra (~/wiflow-std-bench/sweep) as a standalone file, so
|
|
||||||
it deliberately inlines its helpers. The reference implementations (upstream
|
|
||||||
import shim, >1GB np.load mmap patch, key-remap loader, canonical evaluate
|
|
||||||
loop) live in benchmarks/wiflow-std/_bench_common.py — keep copies in sync.
|
|
||||||
"""
|
|
||||||
import argparse
|
|
||||||
import copy
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
from torch.utils.data import DataLoader, Subset
|
|
||||||
|
|
||||||
# csi_windows.npy is ~13 GB; mmap large arrays instead of eagerly loading
|
|
||||||
# ~15 GB into RAM (same patch as _bench_common._np_load_mmap).
|
|
||||||
_np_load = np.load
|
|
||||||
|
|
||||||
|
|
||||||
def _np_load_mmap(path, *a, **kw):
|
|
||||||
if (isinstance(path, str) and path.endswith('.npy')
|
|
||||||
and os.path.getsize(path) > 1 << 30 and 'mmap_mode' not in kw):
|
|
||||||
kw['mmap_mode'] = 'r'
|
|
||||||
return _np_load(path, *a, **kw)
|
|
||||||
|
|
||||||
|
|
||||||
np.load = _np_load_mmap
|
|
||||||
|
|
||||||
BENCH = os.path.expanduser('~/wiflow-std-bench')
|
|
||||||
SWEEP = os.path.join(BENCH, 'sweep')
|
|
||||||
sys.path.insert(0, os.path.join(BENCH, 'upstream'))
|
|
||||||
sys.path.insert(0, SWEEP)
|
|
||||||
|
|
||||||
from dataset import PreprocessedCSIKeypointsDataset, create_preprocessed_train_val_test_loaders # noqa: E402
|
|
||||||
from losses.pose_loss import PoseLoss # noqa: E402
|
|
||||||
from utils.metrics import calculate_pck, calculate_mpjpe # noqa: E402
|
|
||||||
from model_compact import CompactWiFlowPoseModel, describe # noqa: E402
|
|
||||||
|
|
||||||
VARIANTS = [
|
|
||||||
# name, tcn_channels, conv_channels, attn_groups, groups_mode, input_pw_groups
|
|
||||||
dict(name='half', tcn=[270, 220, 170, 120], conv=[4, 8, 16, 32], attn_groups=4,
|
|
||||||
groups_mode='gcd20', input_pw_groups=1),
|
|
||||||
dict(name='quarter', tcn=[135, 110, 85, 60], conv=[2, 4, 8, 16], attn_groups=2,
|
|
||||||
groups_mode='gcd20', input_pw_groups=1),
|
|
||||||
dict(name='tiny', tcn=[68, 56, 44, 32], conv=[2, 4, 8, 16], attn_groups=2,
|
|
||||||
groups_mode='depthwise', input_pw_groups=4),
|
|
||||||
]
|
|
||||||
|
|
||||||
BATCH = 64
|
|
||||||
EPOCHS = 50
|
|
||||||
PATIENCE = 5
|
|
||||||
LR = 1e-4
|
|
||||||
WEIGHT_DECAY = 5e-5
|
|
||||||
SEED = 42
|
|
||||||
CORRUPT_FILE_START = 487 # files 487-499 were zero-filled by clean_nan.py
|
|
||||||
|
|
||||||
|
|
||||||
def set_seed(seed=SEED):
|
|
||||||
random.seed(seed)
|
|
||||||
np.random.seed(seed)
|
|
||||||
torch.manual_seed(seed)
|
|
||||||
torch.cuda.manual_seed_all(seed)
|
|
||||||
torch.backends.cudnn.deterministic = True
|
|
||||||
torch.backends.cudnn.benchmark = False
|
|
||||||
|
|
||||||
|
|
||||||
def build_model(v, dropout=0.5):
|
|
||||||
return CompactWiFlowPoseModel(
|
|
||||||
tcn_channels=v['tcn'], conv_channels=v['conv'], attn_groups=v['attn_groups'],
|
|
||||||
groups_mode=v['groups_mode'], input_pw_groups=v['input_pw_groups'],
|
|
||||||
dropout=dropout)
|
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
|
||||||
def evaluate(model, loader, device):
|
|
||||||
model.eval()
|
|
||||||
totals = {t: 0.0 for t in (0.1, 0.2, 0.3, 0.4, 0.5)}
|
|
||||||
total_mpe, n = 0.0, 0
|
|
||||||
for bx, by in loader:
|
|
||||||
bx, by = bx.to(device), by.to(device)
|
|
||||||
out = model(bx)
|
|
||||||
bs = by.size(0)
|
|
||||||
total_mpe += calculate_mpjpe(out, by) * bs
|
|
||||||
pck = calculate_pck(out, by, thresholds=list(totals))
|
|
||||||
for t in totals:
|
|
||||||
totals[t] += pck[t] * bs
|
|
||||||
n += bs
|
|
||||||
return {'samples': n, 'mpjpe': total_mpe / n,
|
|
||||||
**{f'pck@{int(t * 100)}': totals[t] / n for t in totals}}
|
|
||||||
|
|
||||||
|
|
||||||
def train_variant(v, dataset, device):
|
|
||||||
set_seed(SEED)
|
|
||||||
train_loader, val_loader, test_loader = create_preprocessed_train_val_test_loaders(
|
|
||||||
dataset=dataset, batch_size=BATCH, num_workers=2, random_seed=SEED)
|
|
||||||
|
|
||||||
set_seed(SEED) # re-seed after split so init is split-independent
|
|
||||||
model = build_model(v).to(device)
|
|
||||||
info = describe(model)
|
|
||||||
print(f"[{v['name']}] params={info['params']:,} tcn_groups={info['tcn_groups_per_block']} "
|
|
||||||
f"conv_strides={info['conv_strides']} final_width={info['final_width']}", flush=True)
|
|
||||||
|
|
||||||
criterion = PoseLoss(position_weight=1.0, bone_weight=0.2, loss_type='smooth_l1')
|
|
||||||
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY,
|
|
||||||
betas=(0.9, 0.999))
|
|
||||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
|
||||||
optimizer, mode='min', factor=0.5, patience=3, min_lr=LR / 1000,
|
|
||||||
cooldown=1, threshold=1e-4)
|
|
||||||
|
|
||||||
best_val_mpe = float('inf')
|
|
||||||
best_val_pck20 = 0.0
|
|
||||||
best_epoch = 0
|
|
||||||
best_state = None
|
|
||||||
patience_counter = 0
|
|
||||||
t0 = time.time()
|
|
||||||
error = None
|
|
||||||
epochs_run = 0
|
|
||||||
|
|
||||||
for epoch in range(1, EPOCHS + 1):
|
|
||||||
model.train()
|
|
||||||
ep_loss, nb = 0.0, 0
|
|
||||||
te = time.time()
|
|
||||||
for i, (bx, by) in enumerate(train_loader):
|
|
||||||
bx = bx.to(device, non_blocking=True)
|
|
||||||
by = by.to(device, non_blocking=True)
|
|
||||||
optimizer.zero_grad(set_to_none=True)
|
|
||||||
out = model(bx)
|
|
||||||
loss, _parts = criterion(out, by)
|
|
||||||
if not torch.isfinite(loss):
|
|
||||||
error = f'non-finite loss at epoch {epoch} step {i}'
|
|
||||||
break
|
|
||||||
loss.backward()
|
|
||||||
optimizer.step()
|
|
||||||
ep_loss += loss.item()
|
|
||||||
nb += 1
|
|
||||||
if epoch == 1 and i % 500 == 0:
|
|
||||||
print(f"[{v['name']}] e1 step {i}/{len(train_loader)} loss={loss.item():.5f}",
|
|
||||||
flush=True)
|
|
||||||
if error:
|
|
||||||
break
|
|
||||||
epochs_run = epoch
|
|
||||||
|
|
||||||
val = evaluate(model, val_loader, device)
|
|
||||||
scheduler.step(val['mpjpe'])
|
|
||||||
lr_now = optimizer.param_groups[0]['lr']
|
|
||||||
print(f"[{v['name']}] epoch {epoch}/{EPOCHS} train_loss={ep_loss / max(nb, 1):.5f} "
|
|
||||||
f"val_mpjpe={val['mpjpe']:.5f} val_pck20={val['pck@20'] * 100:.2f}% "
|
|
||||||
f"lr={lr_now:.2e} ({time.time() - te:.0f}s)", flush=True)
|
|
||||||
|
|
||||||
if val['mpjpe'] < best_val_mpe:
|
|
||||||
best_val_mpe = val['mpjpe']
|
|
||||||
best_val_pck20 = val['pck@20']
|
|
||||||
best_epoch = epoch
|
|
||||||
best_state = copy.deepcopy(model.state_dict())
|
|
||||||
patience_counter = 0
|
|
||||||
else:
|
|
||||||
patience_counter += 1
|
|
||||||
if patience_counter >= PATIENCE:
|
|
||||||
print(f"[{v['name']}] early stop at epoch {epoch} (best {best_epoch})", flush=True)
|
|
||||||
break
|
|
||||||
|
|
||||||
train_seconds = time.time() - t0
|
|
||||||
result = {
|
|
||||||
'variant': v['name'], 'params': info['params'],
|
|
||||||
'tcn_channels': v['tcn'], 'conv_channels': v['conv'],
|
|
||||||
'attn_groups': v['attn_groups'], 'groups_mode': v['groups_mode'],
|
|
||||||
'input_pw_groups': v['input_pw_groups'],
|
|
||||||
'tcn_groups_per_block': info['tcn_groups_per_block'],
|
|
||||||
'conv_strides': info['conv_strides'], 'final_width': info['final_width'],
|
|
||||||
'batch_size': BATCH, 'max_epochs': EPOCHS, 'patience': PATIENCE,
|
|
||||||
'lr': LR, 'weight_decay': WEIGHT_DECAY, 'seed': SEED, 'precision': 'fp32',
|
|
||||||
'epochs_run': epochs_run, 'best_epoch': best_epoch,
|
|
||||||
'best_val_mpjpe': best_val_mpe if best_state else None,
|
|
||||||
'best_val_pck20': best_val_pck20 if best_state else None,
|
|
||||||
'train_seconds': round(train_seconds, 1),
|
|
||||||
'torch': torch.__version__, 'error': error,
|
|
||||||
'finished_utc': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()),
|
|
||||||
}
|
|
||||||
|
|
||||||
if best_state is not None:
|
|
||||||
ckpt = os.path.join(SWEEP, f"{v['name']}_best.pth")
|
|
||||||
torch.save(best_state, ckpt)
|
|
||||||
result['checkpoint'] = ckpt
|
|
||||||
model.load_state_dict(best_state)
|
|
||||||
|
|
||||||
eval_loader = DataLoader(test_loader.dataset, batch_size=256, shuffle=False,
|
|
||||||
num_workers=2)
|
|
||||||
result['test_full'] = evaluate(model, eval_loader, device)
|
|
||||||
|
|
||||||
w2f = dataset.window_to_file
|
|
||||||
clean_idx = [i for i in test_loader.dataset.indices if w2f[i] < CORRUPT_FILE_START]
|
|
||||||
clean_loader = DataLoader(Subset(dataset, clean_idx), batch_size=256,
|
|
||||||
shuffle=False, num_workers=2)
|
|
||||||
result['test_clean'] = evaluate(model, clean_loader, device)
|
|
||||||
print(f"[{v['name']}] TEST clean: pck20={result['test_clean']['pck@20'] * 100:.2f}% "
|
|
||||||
f"mpjpe={result['test_clean']['mpjpe']:.5f} | full: "
|
|
||||||
f"pck20={result['test_full']['pck@20'] * 100:.2f}%", flush=True)
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
ap = argparse.ArgumentParser()
|
|
||||||
ap.add_argument('--dry-run', action='store_true', help='print param counts and exit')
|
|
||||||
args = ap.parse_args()
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
for v in VARIANTS:
|
|
||||||
m = build_model(v)
|
|
||||||
info = describe(m)
|
|
||||||
x = torch.randn(2, 540, 20)
|
|
||||||
m.eval()
|
|
||||||
y = m(x)
|
|
||||||
print(f"{v['name']:8s} params={info['params']:>9,} "
|
|
||||||
f"tcn={v['tcn']} conv={v['conv']} attn_g={v['attn_groups']} "
|
|
||||||
f"mode={v['groups_mode']} pw_g={v['input_pw_groups']} "
|
|
||||||
f"tcn_groups={info['tcn_groups_per_block']} strides={info['conv_strides']} "
|
|
||||||
f"W'={info['final_width']} out={tuple(y.shape)}")
|
|
||||||
return
|
|
||||||
|
|
||||||
results_path = os.path.join(SWEEP, 'results.jsonl')
|
|
||||||
done = set()
|
|
||||||
if os.path.exists(results_path):
|
|
||||||
with open(results_path) as f:
|
|
||||||
for line in f:
|
|
||||||
try:
|
|
||||||
done.add(json.loads(line)['variant'])
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
device = torch.device('cuda')
|
|
||||||
print(f"torch {torch.__version__} on {torch.cuda.get_device_name(0)}", flush=True)
|
|
||||||
data_dir = os.path.join(BENCH, 'preprocessed_csi_data')
|
|
||||||
dataset = PreprocessedCSIKeypointsDataset(data_dir=data_dir, keypoint_scale=1000.0,
|
|
||||||
enable_temporal_clean=True)
|
|
||||||
|
|
||||||
for v in VARIANTS:
|
|
||||||
if v['name'] in done:
|
|
||||||
print(f"[{v['name']}] already in results.jsonl — skipping", flush=True)
|
|
||||||
continue
|
|
||||||
print(f"\n===== variant: {v['name']} =====", flush=True)
|
|
||||||
try:
|
|
||||||
result = train_variant(v, dataset, device)
|
|
||||||
except Exception as e: # record and move on to next variant
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
result = {'variant': v['name'], 'error': repr(e),
|
|
||||||
'finished_utc': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}
|
|
||||||
with open(results_path, 'a') as f:
|
|
||||||
f.write(json.dumps(result) + '\n')
|
|
||||||
f.flush()
|
|
||||||
print('\nSWEEP COMPLETE', flush=True)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
Binary file not shown.
@@ -1,772 +0,0 @@
|
|||||||
{
|
|
||||||
"torch": {
|
|
||||||
"env": {
|
|
||||||
"torch": "2.12.0+cpu",
|
|
||||||
"platform": "Windows-11-10.0.26200-SP0",
|
|
||||||
"processor": "Intel64 Family 6 Model 197 Stepping 2, GenuineIntel",
|
|
||||||
"num_threads": 16,
|
|
||||||
"checkpoint": "results\\retrained_best_pose_model.pth",
|
|
||||||
"params": 2225042
|
|
||||||
},
|
|
||||||
"variants": {
|
|
||||||
"fp32": {
|
|
||||||
"file": "retrained_fp32_resaved.pth",
|
|
||||||
"size_bytes": 9068948,
|
|
||||||
"size_mb": 9.068948,
|
|
||||||
"latency_batch1": {
|
|
||||||
"batch_size": 1,
|
|
||||||
"runs": 100,
|
|
||||||
"median_ms_per_batch": 24.903650000851485,
|
|
||||||
"median_ms_per_window": 24.903650000851485,
|
|
||||||
"windows_per_second": 40.15475642991324
|
|
||||||
},
|
|
||||||
"latency_batch64": {
|
|
||||||
"batch_size": 64,
|
|
||||||
"runs": 30,
|
|
||||||
"median_ms_per_batch": 184.02919999789447,
|
|
||||||
"median_ms_per_window": 2.875456249967101,
|
|
||||||
"windows_per_second": 347.77089723115813
|
|
||||||
},
|
|
||||||
"accuracy": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.9668200004577636,
|
|
||||||
"pck@50": 0.9915333324432373,
|
|
||||||
"mpjpe": 0.00936222033649683,
|
|
||||||
"wall_seconds": 37.85407733917236
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"fp16": {
|
|
||||||
"file": "retrained_fp16.pth",
|
|
||||||
"size_bytes": 4580332,
|
|
||||||
"size_mb": 4.580332,
|
|
||||||
"latency_batch1": {
|
|
||||||
"batch_size": 1,
|
|
||||||
"runs": 100,
|
|
||||||
"median_ms_per_batch": 23.936699999467237,
|
|
||||||
"median_ms_per_window": 23.936699999467237,
|
|
||||||
"windows_per_second": 41.776853117691964
|
|
||||||
},
|
|
||||||
"latency_batch64": {
|
|
||||||
"batch_size": 64,
|
|
||||||
"runs": 30,
|
|
||||||
"median_ms_per_batch": 102.32584999903338,
|
|
||||||
"median_ms_per_window": 1.5988414062348966,
|
|
||||||
"windows_per_second": 625.4529036465817
|
|
||||||
},
|
|
||||||
"accuracy": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.966773332977295,
|
|
||||||
"pck@50": 0.9915066654205322,
|
|
||||||
"mpjpe": 0.009460017587244511,
|
|
||||||
"wall_seconds": 21.632277250289917
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"int8_dynamic": {
|
|
||||||
"file": "retrained_int8_dynamic.pth",
|
|
||||||
"size_bytes": 9068948,
|
|
||||||
"size_mb": 9.068948,
|
|
||||||
"latency_batch1": {
|
|
||||||
"batch_size": 1,
|
|
||||||
"runs": 100,
|
|
||||||
"median_ms_per_batch": 18.105350000041653,
|
|
||||||
"median_ms_per_window": 18.105350000041653,
|
|
||||||
"windows_per_second": 55.23229321707117
|
|
||||||
},
|
|
||||||
"latency_batch64": {
|
|
||||||
"batch_size": 64,
|
|
||||||
"runs": 30,
|
|
||||||
"median_ms_per_batch": 168.77549999844632,
|
|
||||||
"median_ms_per_window": 2.6371171874757238,
|
|
||||||
"windows_per_second": 379.20195763359703
|
|
||||||
},
|
|
||||||
"accuracy": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.9668200004577636,
|
|
||||||
"pck@50": 0.9915333324432373,
|
|
||||||
"mpjpe": 0.00936222033649683,
|
|
||||||
"wall_seconds": 45.35376596450806
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"int8_dynamic_quant_report": {
|
|
||||||
"eligible_module_counts": {
|
|
||||||
"nn.Linear": 0,
|
|
||||||
"nn.Conv1d": 21,
|
|
||||||
"nn.Conv2d": 22
|
|
||||||
},
|
|
||||||
"modules_actually_quantized": [],
|
|
||||||
"n_modules_quantized": 0,
|
|
||||||
"params_total": 2225042,
|
|
||||||
"params_quantized": 0,
|
|
||||||
"params_quantized_fraction": 0.0
|
|
||||||
},
|
|
||||||
"accuracy_subset": {
|
|
||||||
"description": "seed-42 file-level 70/15/15 test split, corrupted windows (files 487-499) excluded, seed-42 random subset",
|
|
||||||
"subset_size": 10000,
|
|
||||||
"clean_test_total": 10000
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"onnx": {
|
|
||||||
"env": {
|
|
||||||
"torch": "2.12.0+cpu",
|
|
||||||
"onnxruntime": "1.26.0",
|
|
||||||
"platform": "Windows-11-10.0.26200-SP0"
|
|
||||||
},
|
|
||||||
"export": {
|
|
||||||
"mode": "dynamic-batch",
|
|
||||||
"exporter": "torchscript",
|
|
||||||
"file": "retrained_fp32_dynamic.onnx",
|
|
||||||
"size_mb": 8.971781
|
|
||||||
},
|
|
||||||
"parity": {
|
|
||||||
"fixture": "results/parity_fixture.npz (batch 2, seed 42)",
|
|
||||||
"max_abs_diff_vs_stored_fixture": 2.384185791015625e-07,
|
|
||||||
"max_abs_diff_vs_torch_now": 2.384185791015625e-07,
|
|
||||||
"pass_lt_1e-4": true
|
|
||||||
},
|
|
||||||
"latency": {
|
|
||||||
"batch1": {
|
|
||||||
"batch_size": 1,
|
|
||||||
"runs": 100,
|
|
||||||
"median_ms_per_batch": 2.5410999987798277,
|
|
||||||
"median_ms_per_window": 2.5410999987798277,
|
|
||||||
"windows_per_second": 393.5303610563043
|
|
||||||
},
|
|
||||||
"batch64": {
|
|
||||||
"batch_size": 64,
|
|
||||||
"runs": 30,
|
|
||||||
"median_ms_per_batch": 181.95204999938142,
|
|
||||||
"median_ms_per_window": 2.8430007812403346,
|
|
||||||
"windows_per_second": 351.7410218803118
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"ort_int8_dynamic_supplementary": {
|
|
||||||
"file": "retrained_int8_ort_dynamic.onnx",
|
|
||||||
"size_mb": 2.438794,
|
|
||||||
"runs": true,
|
|
||||||
"max_abs_diff_vs_fp32_fixture": 0.00827130675315857
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"onnx_accuracy": {
|
|
||||||
"onnx_fp32": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.9668200004577636,
|
|
||||||
"pck@50": 0.9915333324432373,
|
|
||||||
"mpjpe": 0.00936222568154335,
|
|
||||||
"wall_seconds": 22.34790802001953
|
|
||||||
},
|
|
||||||
"onnx_int8_ort_dynamic": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.965240001964569,
|
|
||||||
"pck@50": 0.9915466655731201,
|
|
||||||
"mpjpe": 0.01108054072111845,
|
|
||||||
"wall_seconds": 55.742953062057495
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"latency_controlled_rerun": {
|
|
||||||
"note": "3 interleaved repetitions per variant, median ms/window; quiet box",
|
|
||||||
"fp32": {
|
|
||||||
"batch1_ms_per_window_median": 10.969150001983508,
|
|
||||||
"batch1_reps": [
|
|
||||||
10.969150001983508,
|
|
||||||
12.646450000829645,
|
|
||||||
10.49820000116597
|
|
||||||
],
|
|
||||||
"batch64_ms_per_window_median": 2.2734187500077496,
|
|
||||||
"batch64_reps": [
|
|
||||||
2.377234374989712,
|
|
||||||
2.124126562478068,
|
|
||||||
2.2734187500077496
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"fp16": {
|
|
||||||
"batch1_ms_per_window_median": 24.313550000442774,
|
|
||||||
"batch1_reps": [
|
|
||||||
25.1078499986761,
|
|
||||||
21.856999999727122,
|
|
||||||
24.313550000442774
|
|
||||||
],
|
|
||||||
"batch64_ms_per_window_median": 2.414695312495496,
|
|
||||||
"batch64_reps": [
|
|
||||||
2.5705156249955508,
|
|
||||||
1.7137437499741281,
|
|
||||||
2.414695312495496
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"int8_dynamic": {
|
|
||||||
"batch1_ms_per_window_median": 15.627150000000256,
|
|
||||||
"batch1_reps": [
|
|
||||||
17.67525000104797,
|
|
||||||
14.627999998992891,
|
|
||||||
15.627150000000256
|
|
||||||
],
|
|
||||||
"batch64_ms_per_window_median": 2.0546906250160646,
|
|
||||||
"batch64_reps": [
|
|
||||||
2.0546906250160646,
|
|
||||||
2.03407343752815,
|
|
||||||
2.9325796875241394
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"onnx_fp32": {
|
|
||||||
"batch1_ms_per_window_median": 3.186650001225644,
|
|
||||||
"batch1_reps": [
|
|
||||||
2.7332500012562377,
|
|
||||||
3.1995500012271805,
|
|
||||||
3.186650001225644
|
|
||||||
],
|
|
||||||
"batch64_ms_per_window_median": 1.9893374999924163,
|
|
||||||
"batch64_reps": [
|
|
||||||
1.5590843750032946,
|
|
||||||
1.9893374999924163,
|
|
||||||
2.2144343749914697
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"onnx_int8_ort_dynamic": {
|
|
||||||
"batch1_ms_per_window_median": 6.50984999811044,
|
|
||||||
"batch1_reps": [
|
|
||||||
6.50984999811044,
|
|
||||||
6.455249998907675,
|
|
||||||
6.789299999581999
|
|
||||||
],
|
|
||||||
"batch64_ms_per_window_median": 5.770093750015803,
|
|
||||||
"batch64_reps": [
|
|
||||||
5.770093750015803,
|
|
||||||
3.912374999970325,
|
|
||||||
7.8067296875019565
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"onnx_static_ptq": {
|
|
||||||
"env": {
|
|
||||||
"onnxruntime": "1.26.0",
|
|
||||||
"torch": "2.12.0+cpu",
|
|
||||||
"platform": "Windows-11-10.0.26200-SP0",
|
|
||||||
"source_model": "retrained_fp32_dynamic.onnx",
|
|
||||||
"preprocessed_model": {
|
|
||||||
"file": "retrained_fp32_preproc.onnx",
|
|
||||||
"size_mb": 8.981529
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"variants": {
|
|
||||||
"minmax_all": {
|
|
||||||
"file": "retrained_int8_static_minmax_all.onnx",
|
|
||||||
"size_bytes": 2604286,
|
|
||||||
"size_mb": 2.604286,
|
|
||||||
"calibration": {
|
|
||||||
"method": "minmax",
|
|
||||||
"windows": 1000,
|
|
||||||
"percentile": null,
|
|
||||||
"seconds": 5.052440166473389
|
|
||||||
},
|
|
||||||
"scope": "all",
|
|
||||||
"per_channel": true,
|
|
||||||
"activation_type": "QInt8",
|
|
||||||
"weight_type": "QInt8",
|
|
||||||
"node_counts": {
|
|
||||||
"Add": 9,
|
|
||||||
"AveragePool": 1,
|
|
||||||
"BatchNormalization": 12,
|
|
||||||
"Concat": 10,
|
|
||||||
"Conv": 43,
|
|
||||||
"DequantizeLinear": 283,
|
|
||||||
"Einsum": 4,
|
|
||||||
"Gather": 16,
|
|
||||||
"Mul": 39,
|
|
||||||
"QuantizeLinear": 181,
|
|
||||||
"Reshape": 14,
|
|
||||||
"Shape": 2,
|
|
||||||
"Sigmoid": 37,
|
|
||||||
"Slice": 8,
|
|
||||||
"Softmax": 2,
|
|
||||||
"Squeeze": 1,
|
|
||||||
"Transpose": 7,
|
|
||||||
"Unsqueeze": 11
|
|
||||||
},
|
|
||||||
"max_abs_diff_vs_fp32_fixture": 0.015945255756378174,
|
|
||||||
"accuracy": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.9545266661643982,
|
|
||||||
"pck@50": 0.9913666645050049,
|
|
||||||
"mpjpe": 0.014860070134699345,
|
|
||||||
"wall_seconds": 43.455235958099365
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"minmax_conv": {
|
|
||||||
"file": "retrained_int8_static_minmax_conv.onnx",
|
|
||||||
"size_bytes": 2527421,
|
|
||||||
"size_mb": 2.527421,
|
|
||||||
"calibration": {
|
|
||||||
"method": "minmax",
|
|
||||||
"windows": 1000,
|
|
||||||
"percentile": null,
|
|
||||||
"seconds": 4.380746126174927
|
|
||||||
},
|
|
||||||
"scope": "conv",
|
|
||||||
"per_channel": true,
|
|
||||||
"activation_type": "QInt8",
|
|
||||||
"weight_type": "QInt8",
|
|
||||||
"node_counts": {
|
|
||||||
"Add": 9,
|
|
||||||
"AveragePool": 1,
|
|
||||||
"BatchNormalization": 12,
|
|
||||||
"Concat": 10,
|
|
||||||
"Conv": 43,
|
|
||||||
"DequantizeLinear": 156,
|
|
||||||
"Einsum": 4,
|
|
||||||
"Gather": 16,
|
|
||||||
"Mul": 39,
|
|
||||||
"QuantizeLinear": 78,
|
|
||||||
"Reshape": 14,
|
|
||||||
"Shape": 2,
|
|
||||||
"Sigmoid": 37,
|
|
||||||
"Slice": 8,
|
|
||||||
"Softmax": 2,
|
|
||||||
"Squeeze": 1,
|
|
||||||
"Transpose": 7,
|
|
||||||
"Unsqueeze": 11
|
|
||||||
},
|
|
||||||
"max_abs_diff_vs_fp32_fixture": 0.010693132877349854,
|
|
||||||
"accuracy": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.9663399996757507,
|
|
||||||
"pck@50": 0.9918666641235352,
|
|
||||||
"mpjpe": 0.01084446222037077,
|
|
||||||
"wall_seconds": 35.937947034835815
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"entropy_all": {
|
|
||||||
"file": "retrained_int8_static_entropy_all.onnx",
|
|
||||||
"size_bytes": 2604268,
|
|
||||||
"size_mb": 2.604268,
|
|
||||||
"calibration": {
|
|
||||||
"method": "entropy",
|
|
||||||
"windows": 512,
|
|
||||||
"percentile": null,
|
|
||||||
"seconds": 23.835066318511963
|
|
||||||
},
|
|
||||||
"scope": "all",
|
|
||||||
"per_channel": true,
|
|
||||||
"activation_type": "QInt8",
|
|
||||||
"weight_type": "QInt8",
|
|
||||||
"node_counts": {
|
|
||||||
"Add": 9,
|
|
||||||
"AveragePool": 1,
|
|
||||||
"BatchNormalization": 12,
|
|
||||||
"Concat": 10,
|
|
||||||
"Conv": 43,
|
|
||||||
"DequantizeLinear": 283,
|
|
||||||
"Einsum": 4,
|
|
||||||
"Gather": 16,
|
|
||||||
"Mul": 39,
|
|
||||||
"QuantizeLinear": 181,
|
|
||||||
"Reshape": 14,
|
|
||||||
"Shape": 2,
|
|
||||||
"Sigmoid": 37,
|
|
||||||
"Slice": 8,
|
|
||||||
"Softmax": 2,
|
|
||||||
"Squeeze": 1,
|
|
||||||
"Transpose": 7,
|
|
||||||
"Unsqueeze": 11
|
|
||||||
},
|
|
||||||
"max_abs_diff_vs_fp32_fixture": 0.015280365943908691,
|
|
||||||
"accuracy": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.9530466662406921,
|
|
||||||
"pck@50": 0.9912600006103516,
|
|
||||||
"mpjpe": 0.015098519864678382,
|
|
||||||
"wall_seconds": 51.514281034469604
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"entropy_conv": {
|
|
||||||
"file": "retrained_int8_static_entropy_conv.onnx",
|
|
||||||
"size_bytes": 2527403,
|
|
||||||
"size_mb": 2.527403,
|
|
||||||
"calibration": {
|
|
||||||
"method": "entropy",
|
|
||||||
"windows": 512,
|
|
||||||
"percentile": null,
|
|
||||||
"seconds": 9.634419918060303
|
|
||||||
},
|
|
||||||
"scope": "conv",
|
|
||||||
"per_channel": true,
|
|
||||||
"activation_type": "QInt8",
|
|
||||||
"weight_type": "QInt8",
|
|
||||||
"node_counts": {
|
|
||||||
"Add": 9,
|
|
||||||
"AveragePool": 1,
|
|
||||||
"BatchNormalization": 12,
|
|
||||||
"Concat": 10,
|
|
||||||
"Conv": 43,
|
|
||||||
"DequantizeLinear": 156,
|
|
||||||
"Einsum": 4,
|
|
||||||
"Gather": 16,
|
|
||||||
"Mul": 39,
|
|
||||||
"QuantizeLinear": 78,
|
|
||||||
"Reshape": 14,
|
|
||||||
"Shape": 2,
|
|
||||||
"Sigmoid": 37,
|
|
||||||
"Slice": 8,
|
|
||||||
"Softmax": 2,
|
|
||||||
"Squeeze": 1,
|
|
||||||
"Transpose": 7,
|
|
||||||
"Unsqueeze": 11
|
|
||||||
},
|
|
||||||
"max_abs_diff_vs_fp32_fixture": 0.012535125017166138,
|
|
||||||
"accuracy": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.9659599989891052,
|
|
||||||
"pck@50": 0.9918666648864746,
|
|
||||||
"mpjpe": 0.010778637571632861,
|
|
||||||
"wall_seconds": 41.01180171966553
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"percentile_all": {
|
|
||||||
"file": "retrained_int8_static_percentile_all.onnx",
|
|
||||||
"size_bytes": 2604052,
|
|
||||||
"size_mb": 2.604052,
|
|
||||||
"calibration": {
|
|
||||||
"method": "percentile",
|
|
||||||
"windows": 512,
|
|
||||||
"percentile": 99.99,
|
|
||||||
"seconds": 20.221954584121704
|
|
||||||
},
|
|
||||||
"scope": "all",
|
|
||||||
"per_channel": true,
|
|
||||||
"activation_type": "QInt8",
|
|
||||||
"weight_type": "QInt8",
|
|
||||||
"node_counts": {
|
|
||||||
"Add": 9,
|
|
||||||
"AveragePool": 1,
|
|
||||||
"BatchNormalization": 12,
|
|
||||||
"Concat": 10,
|
|
||||||
"Conv": 43,
|
|
||||||
"DequantizeLinear": 283,
|
|
||||||
"Einsum": 4,
|
|
||||||
"Gather": 16,
|
|
||||||
"Mul": 39,
|
|
||||||
"QuantizeLinear": 181,
|
|
||||||
"Reshape": 14,
|
|
||||||
"Shape": 2,
|
|
||||||
"Sigmoid": 37,
|
|
||||||
"Slice": 8,
|
|
||||||
"Softmax": 2,
|
|
||||||
"Squeeze": 1,
|
|
||||||
"Transpose": 7,
|
|
||||||
"Unsqueeze": 11
|
|
||||||
},
|
|
||||||
"max_abs_diff_vs_fp32_fixture": 0.017689883708953857,
|
|
||||||
"accuracy": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.9639333323478698,
|
|
||||||
"pck@50": 0.9916799991607667,
|
|
||||||
"mpjpe": 0.012176512064039708,
|
|
||||||
"wall_seconds": 49.365190744400024
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"percentile_conv": {
|
|
||||||
"file": "retrained_int8_static_percentile_conv.onnx",
|
|
||||||
"size_bytes": 2527241,
|
|
||||||
"size_mb": 2.527241,
|
|
||||||
"calibration": {
|
|
||||||
"method": "percentile",
|
|
||||||
"windows": 512,
|
|
||||||
"percentile": 99.99,
|
|
||||||
"seconds": 8.223475694656372
|
|
||||||
},
|
|
||||||
"scope": "conv",
|
|
||||||
"per_channel": true,
|
|
||||||
"activation_type": "QInt8",
|
|
||||||
"weight_type": "QInt8",
|
|
||||||
"node_counts": {
|
|
||||||
"Add": 9,
|
|
||||||
"AveragePool": 1,
|
|
||||||
"BatchNormalization": 12,
|
|
||||||
"Concat": 10,
|
|
||||||
"Conv": 43,
|
|
||||||
"DequantizeLinear": 156,
|
|
||||||
"Einsum": 4,
|
|
||||||
"Gather": 16,
|
|
||||||
"Mul": 39,
|
|
||||||
"QuantizeLinear": 78,
|
|
||||||
"Reshape": 14,
|
|
||||||
"Shape": 2,
|
|
||||||
"Sigmoid": 37,
|
|
||||||
"Slice": 8,
|
|
||||||
"Softmax": 2,
|
|
||||||
"Squeeze": 1,
|
|
||||||
"Transpose": 7,
|
|
||||||
"Unsqueeze": 11
|
|
||||||
},
|
|
||||||
"max_abs_diff_vs_fp32_fixture": 0.014725983142852783,
|
|
||||||
"accuracy": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.9660599988937378,
|
|
||||||
"pck@50": 0.9916066654205322,
|
|
||||||
"mpjpe": 0.010310938355326652,
|
|
||||||
"wall_seconds": 36.89548587799072
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"latency": {
|
|
||||||
"note": "3 interleaved repetitions per variant, median ms/window; onnx_fp32 / onnx_int8_ort_dynamic are same-session references",
|
|
||||||
"onnx_fp32": {
|
|
||||||
"batch1_reps": [
|
|
||||||
4.5327999996516155,
|
|
||||||
2.535649999117595,
|
|
||||||
2.167549997466267
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
1.9354515624740998,
|
|
||||||
2.4948054687854437,
|
|
||||||
1.9334703125082342
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 2.535649999117595,
|
|
||||||
"batch64_ms_per_window_median": 1.9354515624740998
|
|
||||||
},
|
|
||||||
"onnx_int8_ort_dynamic": {
|
|
||||||
"batch1_reps": [
|
|
||||||
5.698599999959697,
|
|
||||||
5.721350000385428,
|
|
||||||
4.805099997611251
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
4.096601562508795,
|
|
||||||
4.857628124995017,
|
|
||||||
4.583800000006022
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 5.698599999959697,
|
|
||||||
"batch64_ms_per_window_median": 4.583800000006022
|
|
||||||
},
|
|
||||||
"entropy_all": {
|
|
||||||
"batch1_reps": [
|
|
||||||
6.444149999879301,
|
|
||||||
5.038299999796436,
|
|
||||||
5.713200000172947
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
4.149468750028973,
|
|
||||||
3.437125000004926,
|
|
||||||
4.410960937491382
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 5.713200000172947,
|
|
||||||
"batch64_ms_per_window_median": 4.149468750028973
|
|
||||||
},
|
|
||||||
"entropy_conv": {
|
|
||||||
"batch1_reps": [
|
|
||||||
4.874750000453787,
|
|
||||||
5.169099998965976,
|
|
||||||
5.236699998931726
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
3.010160156236452,
|
|
||||||
3.1175546875203963,
|
|
||||||
3.516850781238645
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 5.169099998965976,
|
|
||||||
"batch64_ms_per_window_median": 3.1175546875203963
|
|
||||||
},
|
|
||||||
"percentile_all": {
|
|
||||||
"batch1_reps": [
|
|
||||||
5.184749999898486,
|
|
||||||
5.2898499998264015,
|
|
||||||
5.916899999647285
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
4.305105468745296,
|
|
||||||
4.460741406262514,
|
|
||||||
4.184502343747454
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 5.2898499998264015,
|
|
||||||
"batch64_ms_per_window_median": 4.305105468745296
|
|
||||||
},
|
|
||||||
"percentile_conv": {
|
|
||||||
"batch1_reps": [
|
|
||||||
4.916449999655015,
|
|
||||||
7.150899999032845,
|
|
||||||
5.284949998895172
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
3.855813281262499,
|
|
||||||
4.688969531230214,
|
|
||||||
5.220103124997877
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 5.284949998895172,
|
|
||||||
"batch64_ms_per_window_median": 4.688969531230214
|
|
||||||
},
|
|
||||||
"minmax_all": {
|
|
||||||
"batch1_reps": [
|
|
||||||
6.463300000177696,
|
|
||||||
7.149449998905766,
|
|
||||||
5.3209000016067876
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
3.9251343750095202,
|
|
||||||
4.033442187505898,
|
|
||||||
3.428199218745931
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 6.463300000177696,
|
|
||||||
"batch64_ms_per_window_median": 3.9251343750095202
|
|
||||||
},
|
|
||||||
"minmax_conv": {
|
|
||||||
"batch1_reps": [
|
|
||||||
5.9961499991914025,
|
|
||||||
5.236549999608542,
|
|
||||||
4.854399998293957
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
4.368359375007458,
|
|
||||||
3.249617187492504,
|
|
||||||
3.0238906249735464
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 5.236549999608542,
|
|
||||||
"batch64_ms_per_window_median": 3.249617187492504
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"accuracy_subset": {
|
|
||||||
"description": "seed-42 file-level 70/15/15 test split, corrupted windows excluded, seed-42 random subset (same as quantize_bench/eval_ort_accuracy)",
|
|
||||||
"subset_size": 10000
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tiny_variant": {
|
|
||||||
"env": {
|
|
||||||
"torch": "2.12.0+cpu",
|
|
||||||
"onnxruntime": "1.26.0",
|
|
||||||
"platform": "Windows-11-10.0.26200-SP0",
|
|
||||||
"num_threads": 16,
|
|
||||||
"checkpoint": "results\\tiny_best.pth",
|
|
||||||
"checkpoint_size_bytes": 340555,
|
|
||||||
"params": 56290,
|
|
||||||
"variant_config": {
|
|
||||||
"tcn": [
|
|
||||||
68,
|
|
||||||
56,
|
|
||||||
44,
|
|
||||||
32
|
|
||||||
],
|
|
||||||
"conv": [
|
|
||||||
2,
|
|
||||||
4,
|
|
||||||
8,
|
|
||||||
16
|
|
||||||
],
|
|
||||||
"attn_groups": 2,
|
|
||||||
"groups_mode": "depthwise",
|
|
||||||
"input_pw_groups": 4
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"export": {
|
|
||||||
"mode": "dynamic-batch",
|
|
||||||
"exporter": "torchscript",
|
|
||||||
"opset": 17,
|
|
||||||
"file": "tiny_fp32_dynamic.onnx",
|
|
||||||
"size_bytes": 295279,
|
|
||||||
"size_mb": 0.295279,
|
|
||||||
"verified_batches": [
|
|
||||||
1,
|
|
||||||
2,
|
|
||||||
64
|
|
||||||
],
|
|
||||||
"note": "AdaptiveAvgPool2d((15,1)) replaced at export by an exact mean(-1) + constant averaging matmul (final_width 16 is not a multiple of 15, which the TorchScript exporter rejects); exactness proven by the parity check vs the original torch model"
|
|
||||||
},
|
|
||||||
"parity": {
|
|
||||||
"fixture": "results/parity_fixture.npz input (batch 2, seed 42); reference output recomputed with the tiny torch model",
|
|
||||||
"max_abs_diff_vs_torch": 1.4901161193847656e-07,
|
|
||||||
"pass_lt_1e-4": true
|
|
||||||
},
|
|
||||||
"int8_static_percentile_conv": {
|
|
||||||
"file": "tiny_int8_static_percentile_conv.onnx",
|
|
||||||
"size_bytes": 248278,
|
|
||||||
"size_mb": 0.248278,
|
|
||||||
"calibration": {
|
|
||||||
"method": "percentile",
|
|
||||||
"percentile": 99.99,
|
|
||||||
"windows": 512,
|
|
||||||
"scope": "conv-only TRAIN-split corruption-free",
|
|
||||||
"seconds": 1.5347836017608643
|
|
||||||
},
|
|
||||||
"per_channel": true,
|
|
||||||
"activation_type": "QInt8",
|
|
||||||
"weight_type": "QInt8",
|
|
||||||
"max_abs_diff_vs_fp32_fixture": 0.018491357564926147
|
|
||||||
},
|
|
||||||
"latency": {
|
|
||||||
"note": "3 interleaved repetitions per variant, median ms/window; full-model sessions are same-session references",
|
|
||||||
"tiny_onnx_fp32": {
|
|
||||||
"batch1_reps": [
|
|
||||||
0.6312500008789357,
|
|
||||||
0.6834500018157996,
|
|
||||||
0.6595999984710943
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
0.37747578119251557,
|
|
||||||
0.24196640623586063,
|
|
||||||
0.2314671875183194
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 0.6595999984710943,
|
|
||||||
"batch64_ms_per_window_median": 0.24196640623586063
|
|
||||||
},
|
|
||||||
"tiny_onnx_int8_static_percentile_conv": {
|
|
||||||
"batch1_reps": [
|
|
||||||
0.7988500001374632,
|
|
||||||
0.9382499993080273,
|
|
||||||
0.8451000030618161
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
0.9211476562995813,
|
|
||||||
1.3045390625165965,
|
|
||||||
1.026230468767153
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 0.8451000030618161,
|
|
||||||
"batch64_ms_per_window_median": 1.026230468767153
|
|
||||||
},
|
|
||||||
"full_onnx_fp32_reference": {
|
|
||||||
"batch1_reps": [
|
|
||||||
2.267249998112675,
|
|
||||||
2.80170000041835,
|
|
||||||
2.132149998942623
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
1.3050578124875756,
|
|
||||||
1.4244992187855132,
|
|
||||||
1.8014164062947202
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 2.267249998112675,
|
|
||||||
"batch64_ms_per_window_median": 1.4244992187855132
|
|
||||||
},
|
|
||||||
"full_onnx_int8_static_percentile_conv_reference": {
|
|
||||||
"batch1_reps": [
|
|
||||||
5.529599999135826,
|
|
||||||
4.768399998283712,
|
|
||||||
6.215800000063609
|
|
||||||
],
|
|
||||||
"batch64_reps": [
|
|
||||||
3.815724218725336,
|
|
||||||
3.1025562500417436,
|
|
||||||
4.333318749957016
|
|
||||||
],
|
|
||||||
"batch1_ms_per_window_median": 5.529599999135826,
|
|
||||||
"batch64_ms_per_window_median": 3.815724218725336
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"accuracy_subset": {
|
|
||||||
"description": "seed-42 file-level 70/15/15 test split, corrupted windows excluded, seed-42 random subset (same as quantize_bench/eval_ort_accuracy/static_ptq_bench)",
|
|
||||||
"subset_size": 10000
|
|
||||||
},
|
|
||||||
"accuracy": {
|
|
||||||
"tiny_onnx_fp32": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.941106667804718,
|
|
||||||
"pck@50": 0.99369333152771,
|
|
||||||
"mpjpe": 0.012527281279861927,
|
|
||||||
"wall_seconds": 10.927234888076782
|
|
||||||
},
|
|
||||||
"tiny_onnx_int8_static_percentile_conv": {
|
|
||||||
"samples": 10000,
|
|
||||||
"pck@20": 0.9268133331298828,
|
|
||||||
"pck@50": 0.9932933319091797,
|
|
||||||
"mpjpe": 0.014906252065300942,
|
|
||||||
"wall_seconds": 12.320892333984375
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
{"variant": "half", "params": 843834, "tcn_channels": [270, 220, 170, 120], "conv_channels": [4, 8, 16, 32], "attn_groups": 4, "groups_mode": "gcd20", "input_pw_groups": 1, "tcn_groups_per_block": [[20, 10], [10, 20], [20, 10], [10, 20]], "conv_strides": [2, 2, 2, 1], "final_width": 15, "batch_size": 64, "max_epochs": 50, "patience": 5, "lr": 0.0001, "weight_decay": 5e-05, "seed": 42, "precision": "fp32", "epochs_run": 28, "best_epoch": 23, "best_val_mpjpe": 0.008576328293592842, "best_val_pck20": 0.9690593021534107, "train_seconds": 1346.4, "torch": "2.11.0+cu128", "error": null, "finished_utc": "2026-06-11T03:09:47Z", "checkpoint": "/home/ruvultra/wiflow-std-bench/sweep/half_best.pth", "test_full": {"samples": 54000, "mpjpe": 0.009419974447676428, "pck@10": 0.8740543655289544, "pck@20": 0.9610469643628156, "pck@30": 0.9813556064146537, "pck@40": 0.9896086878246731, "pck@50": 0.9934827546013726}, "test_clean": {"samples": 52560, "mpjpe": 0.008980081718602137, "pck@10": 0.8840944136840205, "pck@20": 0.9662253179869514, "pck@30": 0.9847971080282144, "pck@40": 0.9917795997050618, "pck@50": 0.9946956242600532}}
|
|
||||||
{"variant": "quarter", "params": 338600, "tcn_channels": [135, 110, 85, 60], "conv_channels": [2, 4, 8, 16], "attn_groups": 2, "groups_mode": "gcd20", "input_pw_groups": 1, "tcn_groups_per_block": [[20, 5], [5, 10], [10, 5], [5, 20]], "conv_strides": [2, 2, 1, 1], "final_width": 15, "batch_size": 64, "max_epochs": 50, "patience": 5, "lr": 0.0001, "weight_decay": 5e-05, "seed": 42, "precision": "fp32", "epochs_run": 50, "best_epoch": 50, "best_val_mpjpe": 0.008780752391864856, "best_val_pck20": 0.9672531302240159, "train_seconds": 1754.4, "torch": "2.11.0+cu128", "error": null, "finished_utc": "2026-06-11T03:39:06Z", "checkpoint": "/home/ruvultra/wiflow-std-bench/sweep/quarter_best.pth", "test_full": {"samples": 54000, "mpjpe": 0.009705399298005634, "pck@10": 0.8646123917014511, "pck@20": 0.9553815319449813, "pck@30": 0.979827209190086, "pck@40": 0.9887037501511751, "pck@50": 0.9931309027671814}, "test_clean": {"samples": 52560, "mpjpe": 0.009279253277105465, "pck@10": 0.8742288637923323, "pck@20": 0.9605315079427745, "pck@30": 0.9833016723076865, "pck@40": 0.9908206971631566, "pck@50": 0.9942719799017071}}
|
|
||||||
{"variant": "tiny", "params": 56290, "tcn_channels": [68, 56, 44, 32], "conv_channels": [2, 4, 8, 16], "attn_groups": 2, "groups_mode": "depthwise", "input_pw_groups": 4, "tcn_groups_per_block": [[540, 68], [68, 56], [56, 44], [44, 32]], "conv_strides": [2, 1, 1, 1], "final_width": 16, "batch_size": 64, "max_epochs": 50, "patience": 5, "lr": 0.0001, "weight_decay": 5e-05, "seed": 42, "precision": "fp32", "epochs_run": 50, "best_epoch": 47, "best_val_mpjpe": 0.012602971208592256, "best_val_pck20": 0.9397210340146666, "train_seconds": 1540.1, "torch": "2.11.0+cu128", "error": null, "finished_utc": "2026-06-11T04:04:50Z", "checkpoint": "/home/ruvultra/wiflow-std-bench/sweep/tiny_best.pth", "test_full": {"samples": 54000, "mpjpe": 0.012859782406853305, "pck@10": 0.7640358444319831, "pck@20": 0.9364815320968628, "pck@30": 0.9731568422317505, "pck@40": 0.9866444962642811, "pck@50": 0.992488939108672}, "test_clean": {"samples": 52560, "mpjpe": 0.012502924276904246, "pck@10": 0.770895526488985, "pck@20": 0.9411073559313967, "pck@30": 0.9764840687790962, "pck@40": 0.9886695077067278, "pck@50": 0.9936238432039409}}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
{
|
|
||||||
"checkpoint": "/home/ruvultra/wiflow-std-bench/upstream/test/best_pose_model.pth",
|
|
||||||
"test_full": {
|
|
||||||
"samples": 54000,
|
|
||||||
"mpjpe": 0.009834060806367133,
|
|
||||||
"pck@10": 0.8686346120127925,
|
|
||||||
"pck@20": 0.9608815324571398,
|
|
||||||
"pck@30": 0.9789111610695168,
|
|
||||||
"pck@40": 0.9857975759682832,
|
|
||||||
"pck@50": 0.9898827553325229
|
|
||||||
},
|
|
||||||
"test_clean": {
|
|
||||||
"samples": 52560,
|
|
||||||
"mpjpe": 0.009432755044379373,
|
|
||||||
"pck@10": 0.876996495807189,
|
|
||||||
"pck@20": 0.9661454100405608,
|
|
||||||
"pck@30": 0.9823453060205306,
|
|
||||||
"pck@40": 0.987909734176537,
|
|
||||||
"pck@50": 0.9911238361167036
|
|
||||||
}
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -1,32 +0,0 @@
|
|||||||
{
|
|
||||||
"published": {
|
|
||||||
"pck@20": 0.9725,
|
|
||||||
"pck@30": 0.9863,
|
|
||||||
"pck@40": 0.9916,
|
|
||||||
"pck@50": 0.9948,
|
|
||||||
"mpjpe": 0.007
|
|
||||||
},
|
|
||||||
"params_millions": 2.225042,
|
|
||||||
"data_dir": "C:\\Users\\ruv\\.cache\\kagglehub\\datasets\\kaka2434\\wiflow-dataset\\versions\\1\\preprocessed_csi_data",
|
|
||||||
"device": "cpu",
|
|
||||||
"test_full": {
|
|
||||||
"samples": 54000,
|
|
||||||
"mpjpe": NaN,
|
|
||||||
"pck@10": 5.6790124349020145e-05,
|
|
||||||
"pck@20": 0.0007876543271596785,
|
|
||||||
"pck@30": 0.007780246982971827,
|
|
||||||
"pck@40": 0.05529259262923841,
|
|
||||||
"pck@50": 0.1542370371548114,
|
|
||||||
"wall_seconds": 118.03756999969482
|
|
||||||
},
|
|
||||||
"test_drop_last": {
|
|
||||||
"samples": 53952,
|
|
||||||
"mpjpe": NaN,
|
|
||||||
"pck@10": 5.6840649370682976e-05,
|
|
||||||
"pck@20": 0.0007883550872372227,
|
|
||||||
"pck@30": 0.007787168910892621,
|
|
||||||
"pck@40": 0.055318307667895535,
|
|
||||||
"pck@50": 0.15425316342412276,
|
|
||||||
"wall_seconds": 120.87458372116089
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Binary file not shown.
@@ -1,333 +0,0 @@
|
|||||||
"""ADR-152 edge optimization follow-up: ONNX Runtime STATIC post-training
|
|
||||||
quantization (calibration-based QDQ) of the retrained WiFlow-STD model, to
|
|
||||||
improve on the dynamic-int8 result (2.44 MB, PCK@20 96.52%, 6.5 ms/win b1).
|
|
||||||
|
|
||||||
Static PTQ pre-computes activation ranges from calibration data, so inference
|
|
||||||
uses QLinearConv/QDQ kernels instead of dynamic ConvInteger -- typically both
|
|
||||||
faster and (with good calibration) closer to fp32 accuracy.
|
|
||||||
|
|
||||||
Method:
|
|
||||||
- Calibration set: corruption-free windows drawn ONLY from the seed-42
|
|
||||||
file-level TRAINING split (same split as eval_repro.py; corrupted windows
|
|
||||||
excluded via results/nan_windows_mask.npy | big_windows_mask.npy), chosen
|
|
||||||
with np.random.default_rng(42). Never test windows.
|
|
||||||
- quantize_static, QuantFormat.QDQ, per-channel int8 weights, int8
|
|
||||||
activations; calibration methods MinMax / Entropy / Percentile(99.99);
|
|
||||||
scopes "all" (ORT default op set) vs "conv" (op_types_to_quantize=
|
|
||||||
["Conv"] -- leaves the attention path, which exports as Einsum/Softmax
|
|
||||||
and elementwise ops, in fp32).
|
|
||||||
- Model is pre-processed first (quant_pre_process: symbolic shape
|
|
||||||
inference + ORT graph optimization, folds BatchNormalization into Conv).
|
|
||||||
- Accuracy: identical protocol to eval_ort_accuracy.py -- the 10,000-window
|
|
||||||
seed-42 subset of the corruption-free test split (PCK@20/50, MPJPE).
|
|
||||||
- Latency: median ms/window at batch 1 (100 runs) and batch 64 (30 runs),
|
|
||||||
3 interleaved repetitions across all variants (fp32 and dynamic-int8
|
|
||||||
sessions included as same-session reference points).
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
PYTHONUTF8=1 .venv/Scripts/python.exe static_ptq_bench.py \
|
|
||||||
[--data-dir <preprocessed_csi_data>] [--subset 10000]
|
|
||||||
[--calib-minmax 1000] [--calib-hist 512] [--skip-accuracy]
|
|
||||||
|
|
||||||
Writes/merges into results/edge_optimization.json under key "onnx_static_ptq".
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import collections
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import platform
|
|
||||||
import statistics
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
|
|
||||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
sys.path.insert(0, HERE)
|
|
||||||
|
|
||||||
from _bench_common import RESULTS # noqa: E402
|
|
||||||
# quantize_bench sets up upstream imports + the np.load mmap patch
|
|
||||||
# (both via _bench_common.import_upstream)
|
|
||||||
from quantize_bench import build_test_subset # noqa: E402
|
|
||||||
import quantize_bench as qb # noqa: E402
|
|
||||||
from eval_ort_accuracy import evaluate_ort # noqa: E402
|
|
||||||
|
|
||||||
FP32_ONNX = os.path.join(RESULTS, "retrained_fp32_dynamic.onnx")
|
|
||||||
DYN_INT8_ONNX = os.path.join(RESULTS, "retrained_int8_ort_dynamic.onnx")
|
|
||||||
PREPROC_ONNX = os.path.join(RESULTS, "retrained_fp32_preproc.onnx")
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# calibration data: corruption-free TRAINING-split windows only
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def build_calibration_windows(data_dir, n_windows):
|
|
||||||
"""Seed-42 file-level 70/15/15 TRAIN split (exactly as eval_repro.py),
|
|
||||||
minus corrupted windows, then a seed-42 random draw of n_windows."""
|
|
||||||
dataset = qb.PreprocessedCSIKeypointsDataset(
|
|
||||||
data_dir=data_dir, keypoint_scale=1000.0, enable_temporal_clean=True)
|
|
||||||
train_loader, _va, _te = qb.create_preprocessed_train_val_test_loaders(
|
|
||||||
dataset=dataset, batch_size=64, num_workers=0, random_seed=42)
|
|
||||||
train_indices = np.asarray(train_loader.dataset.indices)
|
|
||||||
|
|
||||||
corrupted = (np.load(os.path.join(RESULTS, "nan_windows_mask.npy"))
|
|
||||||
| np.load(os.path.join(RESULTS, "big_windows_mask.npy")))
|
|
||||||
clean = train_indices[~corrupted[train_indices]]
|
|
||||||
print(f"train split: {len(train_indices)} windows, "
|
|
||||||
f"{len(train_indices) - len(clean)} corrupted excluded, "
|
|
||||||
f"{len(clean)} clean")
|
|
||||||
|
|
||||||
rng = np.random.default_rng(42)
|
|
||||||
sel = np.sort(rng.choice(clean, size=n_windows, replace=False))
|
|
||||||
xs = np.stack([dataset[int(i)][0].numpy() for i in sel]).astype(np.float32)
|
|
||||||
print(f"calibration tensor: {xs.shape} from {n_windows} clean TRAIN windows")
|
|
||||||
return xs
|
|
||||||
|
|
||||||
|
|
||||||
def make_reader(windows, batch_size=64):
|
|
||||||
from onnxruntime.quantization import CalibrationDataReader
|
|
||||||
|
|
||||||
class WindowReader(CalibrationDataReader):
|
|
||||||
def __init__(self):
|
|
||||||
self._batches = [windows[i:i + batch_size]
|
|
||||||
for i in range(0, len(windows), batch_size)]
|
|
||||||
self._it = iter(self._batches)
|
|
||||||
|
|
||||||
def get_next(self):
|
|
||||||
b = next(self._it, None)
|
|
||||||
return None if b is None else {"input": b}
|
|
||||||
|
|
||||||
def rewind(self):
|
|
||||||
self._it = iter(self._batches)
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self._batches)
|
|
||||||
|
|
||||||
return WindowReader()
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# quantization variants
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def preprocess_model():
|
|
||||||
from onnxruntime.quantization.shape_inference import quant_pre_process
|
|
||||||
quant_pre_process(FP32_ONNX, PREPROC_ONNX)
|
|
||||||
return PREPROC_ONNX
|
|
||||||
|
|
||||||
|
|
||||||
def quantize_variant(src, dst, method, scope, calib_windows):
|
|
||||||
from onnxruntime.quantization import (CalibrationMethod, QuantFormat,
|
|
||||||
QuantType, quantize_static)
|
|
||||||
methods = {
|
|
||||||
"minmax": CalibrationMethod.MinMax,
|
|
||||||
"entropy": CalibrationMethod.Entropy,
|
|
||||||
"percentile": CalibrationMethod.Percentile,
|
|
||||||
}
|
|
||||||
# NB: do NOT pass CalibMaxIntermediateOutputs -- in ORT 1.26 the MinMax
|
|
||||||
# calibrater clears its buffer every N batches and then raises
|
|
||||||
# "No data is collected" if the batch count is divisible by N.
|
|
||||||
extra = {}
|
|
||||||
if method == "percentile":
|
|
||||||
extra["CalibPercentile"] = 99.99
|
|
||||||
op_types = ["Conv"] if scope == "conv" else None
|
|
||||||
|
|
||||||
t0 = time.time()
|
|
||||||
quantize_static(
|
|
||||||
src, dst, make_reader(calib_windows),
|
|
||||||
quant_format=QuantFormat.QDQ,
|
|
||||||
op_types_to_quantize=op_types,
|
|
||||||
per_channel=True,
|
|
||||||
activation_type=QuantType.QInt8,
|
|
||||||
weight_type=QuantType.QInt8,
|
|
||||||
calibrate_method=methods[method],
|
|
||||||
extra_options=extra,
|
|
||||||
)
|
|
||||||
secs = time.time() - t0
|
|
||||||
|
|
||||||
import onnx
|
|
||||||
ops = collections.Counter(n.op_type for n in onnx.load(dst).graph.node)
|
|
||||||
return {
|
|
||||||
"file": os.path.basename(dst),
|
|
||||||
"size_bytes": os.path.getsize(dst),
|
|
||||||
"size_mb": os.path.getsize(dst) / 1e6,
|
|
||||||
"calibration": {"method": method,
|
|
||||||
"windows": int(len(calib_windows)),
|
|
||||||
"percentile": extra.get("CalibPercentile"),
|
|
||||||
"seconds": secs},
|
|
||||||
"scope": scope,
|
|
||||||
"per_channel": True,
|
|
||||||
"activation_type": "QInt8",
|
|
||||||
"weight_type": "QInt8",
|
|
||||||
"node_counts": {k: v for k, v in sorted(ops.items())},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# latency (3 interleaved reps, like the latency_controlled_rerun)
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def ort_session(path):
|
|
||||||
import onnxruntime as ort
|
|
||||||
return ort.InferenceSession(path, providers=["CPUExecutionProvider"])
|
|
||||||
|
|
||||||
|
|
||||||
def bench_ort(sess, batch, n_runs):
|
|
||||||
rng = np.random.default_rng(123)
|
|
||||||
x = rng.random((batch, 540, 20), dtype=np.float32)
|
|
||||||
inp = sess.get_inputs()[0].name
|
|
||||||
for _ in range(max(5, n_runs // 10)):
|
|
||||||
sess.run(None, {inp: x})
|
|
||||||
times = []
|
|
||||||
for _ in range(n_runs):
|
|
||||||
t0 = time.perf_counter()
|
|
||||||
sess.run(None, {inp: x})
|
|
||||||
times.append(time.perf_counter() - t0)
|
|
||||||
return statistics.median(times) * 1e3 / batch # ms/window
|
|
||||||
|
|
||||||
|
|
||||||
def interleaved_latency(sessions, reps=3, runs_b1=100, runs_b64=30):
|
|
||||||
lat = {name: {"batch1_reps": [], "batch64_reps": []} for name in sessions}
|
|
||||||
for rep in range(reps):
|
|
||||||
for name, sess in sessions.items():
|
|
||||||
lat[name]["batch1_reps"].append(bench_ort(sess, 1, runs_b1))
|
|
||||||
lat[name]["batch64_reps"].append(bench_ort(sess, 64, runs_b64))
|
|
||||||
print(f" rep {rep + 1}/{reps} {name}: "
|
|
||||||
f"b1={lat[name]['batch1_reps'][-1]:.2f} "
|
|
||||||
f"b64={lat[name]['batch64_reps'][-1]:.3f} ms/win", flush=True)
|
|
||||||
for name in lat:
|
|
||||||
lat[name]["batch1_ms_per_window_median"] = statistics.median(
|
|
||||||
lat[name]["batch1_reps"])
|
|
||||||
lat[name]["batch64_ms_per_window_median"] = statistics.median(
|
|
||||||
lat[name]["batch64_reps"])
|
|
||||||
return lat
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def main():
|
|
||||||
import onnxruntime
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--data-dir", default=os.path.join(
|
|
||||||
os.path.expanduser("~"), ".cache", "kagglehub", "datasets", "kaka2434",
|
|
||||||
"wiflow-dataset", "versions", "1", "preprocessed_csi_data"))
|
|
||||||
parser.add_argument("--subset", type=int, default=10000)
|
|
||||||
parser.add_argument("--calib-minmax", type=int, default=1000)
|
|
||||||
parser.add_argument("--calib-hist", type=int, default=512,
|
|
||||||
help="calibration windows for Entropy/Percentile "
|
|
||||||
"(histogram calibraters hold all intermediate "
|
|
||||||
"activations in RAM)")
|
|
||||||
parser.add_argument("--skip-accuracy", action="store_true")
|
|
||||||
parser.add_argument("--methods", default="minmax,entropy,percentile",
|
|
||||||
help="comma list of calibration methods to (re)run; "
|
|
||||||
"results merge into existing onnx_static_ptq")
|
|
||||||
parser.add_argument("--out", default=os.path.join(RESULTS, "edge_optimization.json"))
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
results = {
|
|
||||||
"env": {
|
|
||||||
"onnxruntime": onnxruntime.__version__,
|
|
||||||
"torch": torch.__version__,
|
|
||||||
"platform": platform.platform(),
|
|
||||||
"source_model": os.path.basename(FP32_ONNX),
|
|
||||||
},
|
|
||||||
"variants": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
# ---- calibration data (TRAIN split only) -------------------------------
|
|
||||||
calib_mm = build_calibration_windows(args.data_dir, args.calib_minmax)
|
|
||||||
calib_hist = calib_mm[:args.calib_hist]
|
|
||||||
|
|
||||||
# ---- preprocess + quantize ---------------------------------------------
|
|
||||||
print("\n=== quant_pre_process (shape inference + graph optimization) ===")
|
|
||||||
src = preprocess_model()
|
|
||||||
results["env"]["preprocessed_model"] = {
|
|
||||||
"file": os.path.basename(src),
|
|
||||||
"size_mb": os.path.getsize(src) / 1e6,
|
|
||||||
}
|
|
||||||
|
|
||||||
matrix = [(m, s) for m in args.methods.split(",")
|
|
||||||
for s in ("all", "conv")]
|
|
||||||
for method, scope in matrix:
|
|
||||||
name = f"{method}_{scope}"
|
|
||||||
dst = os.path.join(RESULTS, f"retrained_int8_static_{name}.onnx")
|
|
||||||
calib = calib_mm if method == "minmax" else calib_hist
|
|
||||||
print(f"\n=== quantize_static: {name} "
|
|
||||||
f"({len(calib)} calib windows) ===", flush=True)
|
|
||||||
try:
|
|
||||||
results["variants"][name] = quantize_variant(
|
|
||||||
src, dst, method, scope, calib)
|
|
||||||
print(f" {results['variants'][name]['size_mb']:.3f} MB")
|
|
||||||
except Exception as e: # noqa: BLE001
|
|
||||||
results["variants"][name] = {"error": f"{type(e).__name__}: {e}"}
|
|
||||||
print(f" FAILED: {e}")
|
|
||||||
|
|
||||||
# ---- fixture parity (sanity, batch 2) ----------------------------------
|
|
||||||
fixture = np.load(os.path.join(RESULTS, "parity_fixture.npz"))
|
|
||||||
fx, fy = fixture["input"], fixture["output"]
|
|
||||||
sessions = {}
|
|
||||||
for name, info in results["variants"].items():
|
|
||||||
if "error" in info:
|
|
||||||
continue
|
|
||||||
path = os.path.join(RESULTS, info["file"])
|
|
||||||
try:
|
|
||||||
sess = ort_session(path)
|
|
||||||
yq = sess.run(None, {sess.get_inputs()[0].name: fx})[0]
|
|
||||||
info["max_abs_diff_vs_fp32_fixture"] = float(np.abs(yq - fy).max())
|
|
||||||
sessions[name] = sess
|
|
||||||
except Exception as e: # noqa: BLE001
|
|
||||||
info["run_error"] = f"{type(e).__name__}: {e}"
|
|
||||||
print("\nfixture max-abs-diff vs fp32:",
|
|
||||||
{n: round(results["variants"][n].get("max_abs_diff_vs_fp32_fixture",
|
|
||||||
float("nan")), 5)
|
|
||||||
for n in results["variants"]})
|
|
||||||
|
|
||||||
# ---- latency: 3 interleaved reps incl. fp32 + dynamic-int8 reference ----
|
|
||||||
print("\n=== latency (3 interleaved reps) ===")
|
|
||||||
lat_sessions = {"onnx_fp32": ort_session(FP32_ONNX),
|
|
||||||
"onnx_int8_ort_dynamic": ort_session(DYN_INT8_ONNX)}
|
|
||||||
lat_sessions.update(sessions)
|
|
||||||
results["latency"] = {
|
|
||||||
"note": "3 interleaved repetitions per variant, median ms/window; "
|
|
||||||
"onnx_fp32 / onnx_int8_ort_dynamic are same-session references",
|
|
||||||
**interleaved_latency(lat_sessions),
|
|
||||||
}
|
|
||||||
|
|
||||||
# ---- accuracy on the standard 10k corruption-free test subset ----------
|
|
||||||
if not args.skip_accuracy:
|
|
||||||
loader, n_clean = build_test_subset(args.data_dir, args.subset)
|
|
||||||
results["accuracy_subset"] = {
|
|
||||||
"description": "seed-42 file-level 70/15/15 test split, corrupted "
|
|
||||||
"windows excluded, seed-42 random subset (same as "
|
|
||||||
"quantize_bench/eval_ort_accuracy)",
|
|
||||||
"subset_size": min(args.subset, n_clean) if args.subset else n_clean,
|
|
||||||
}
|
|
||||||
for name, sess in sessions.items():
|
|
||||||
print(f"\n=== accuracy: {name} ===")
|
|
||||||
results["variants"][name]["accuracy"] = evaluate_ort(
|
|
||||||
sess, loader, name)
|
|
||||||
print(json.dumps(results["variants"][name]["accuracy"], indent=2))
|
|
||||||
|
|
||||||
# ---- merge into edge_optimization.json ----------------------------------
|
|
||||||
merged = {}
|
|
||||||
if os.path.exists(args.out):
|
|
||||||
with open(args.out) as f:
|
|
||||||
merged = json.load(f)
|
|
||||||
prev = merged.get("onnx_static_ptq")
|
|
||||||
if prev: # nested merge so partial --methods reruns don't clobber
|
|
||||||
prev["env"] = results["env"]
|
|
||||||
prev["variants"].update(results["variants"])
|
|
||||||
prev.setdefault("latency", {}).update(results["latency"])
|
|
||||||
if "accuracy_subset" in results:
|
|
||||||
prev["accuracy_subset"] = results["accuracy_subset"]
|
|
||||||
else:
|
|
||||||
merged["onnx_static_ptq"] = results
|
|
||||||
with open(args.out, "w") as f:
|
|
||||||
json.dump(merged, f, indent=2)
|
|
||||||
print(f"\nwrote {args.out}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user