diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e1836c7f..4b3990dc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ on: env: PYTHON_VERSION: '3.11' - NODE_VERSION: '18' + NODE_VERSION: '20' # ADR-265: all Node packages in this repo declare engines >= 20 REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} diff --git a/.github/workflows/npm-packages.yml b/.github/workflows/npm-packages.yml new file mode 100644 index 00000000..f057aca2 --- /dev/null +++ b/.github/workflows/npm-packages.yml @@ -0,0 +1,148 @@ +# ADR-265 D1 — the npm-package gate. +# +# Every Node package in this repo (published or private) gets: install, build, +# tests, a version-literal gate (D3 — package.json is the only place a version +# lives), a pack-content gate (no source maps, unpacked-size budget), a +# tarball-install smoke test (would have caught ADR-264 F1's broken `require` +# export), and the claim-check honesty lint on the README (D4). + +name: npm packages + +on: + push: + branches: [main] + paths: + - 'harness/ruview/**' + - 'tools/ruview-mcp/**' + - 'tools/ruview-cli/**' + - '.github/workflows/npm-packages.yml' + pull_request: + paths: + - 'harness/ruview/**' + - 'tools/ruview-mcp/**' + - 'tools/ruview-cli/**' + - '.github/workflows/npm-packages.yml' + +permissions: + contents: read + +jobs: + gate: + name: ${{ matrix.package.dir }} (node ${{ matrix.node }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + node: ['20', '22'] + package: + - dir: harness/ruview + build: false + publishable: true + # ADR-263: dependency-free harness; budget guards against dep creep. + unpacked_budget: 65536 + - dir: tools/ruview-mcp + build: true + publishable: true + # ADR-264 O2: map-free tarball (was 188 kB with maps). + unpacked_budget: 140000 + - dir: tools/ruview-cli + build: true + publishable: false + unpacked_budget: 0 + defaults: + run: + working-directory: ${{ matrix.package.dir }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node }} + + # Repo policy gitignores lockfiles under harness/ (the harness is + # dependency-free anyway); the TS packages commit theirs. + - name: Install + run: | + if [ -f package-lock.json ]; then npm ci; else npm install --no-fund --no-audit; fi + + - name: Build + if: ${{ matrix.package.build }} + run: npm run build + + - name: Test + run: npm test --if-present + + # ADR-265 D3 — package.json is the only place a version string lives. + - name: Version-literal gate + run: | + set -euo pipefail + hits="" + for d in src bin; do + if [ -d "$d" ]; then + hits+=$(grep -rEn '\b[0-9]+\.[0-9]+\.[0-9]+\b' "$d" | grep -vE '127\.0\.0\.1|0\.0\.0\.0' || true) + fi + done + if [ -n "$hits" ]; then + echo "Hardcoded version-like literals found (read package.json instead — ADR-265 D3):" + echo "$hits" + exit 1 + fi + + # ADR-265 D1.3 — pack-content gate: no maps, size budget enforced. + - name: Pack gate + if: ${{ matrix.package.publishable }} + run: | + npm pack --dry-run --json 2>/dev/null | node -e " + const [info] = JSON.parse(require('fs').readFileSync(0, 'utf8')); + const budget = Number(process.env.UNPACKED_BUDGET); + const maps = info.files.filter((f) => f.path.endsWith('.map')); + if (maps.length > 0) { + console.error('Tarball contains source maps (ADR-264 F2):', maps.map((m) => m.path)); + process.exit(1); + } + if (info.unpackedSize > budget) { + console.error(\`Unpacked size \${info.unpackedSize} B exceeds budget \${budget} B\`); + process.exit(1); + } + console.log(\`pack gate OK: \${info.files.length} files, \${info.unpackedSize} B unpacked (budget \${budget} B), 0 maps\`); + " + env: + UNPACKED_BUDGET: ${{ matrix.package.unpacked_budget }} + + # ADR-265 D1.4 — install the real tarball and drive each bin/export. + - name: Tarball smoke test + if: ${{ matrix.package.publishable }} + run: | + set -euo pipefail + TGZ="$PWD/$(npm pack --silent 2>/dev/null | tail -1)" + SMOKE="$(mktemp -d)" + cd "$SMOKE" + npm init -y > /dev/null + npm i --no-fund --no-audit "$TGZ" + case "${{ matrix.package.dir }}" in + harness/ruview) + ./node_modules/.bin/ruview --version + ./node_modules/.bin/ruview doctor + # the honesty gate must fail closed on empty input (ADR-263 F1) + if ./node_modules/.bin/ruview claim-check; then + echo 'claim-check passed with no input — fail-open regression'; exit 1 + fi + node --input-type=module -e "const m = await import('@ruvnet/ruview'); if (!m.TOOLS) process.exit(1);" + ;; + tools/ruview-mcp) + # initialize over stdio; server must answer and exit 0 on EOF + printf '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"ci","version":"0"}}}\n' \ + | timeout 30 ./node_modules/.bin/rvagent | grep -q '"serverInfo"' + # the ESM export must resolve from the installed tarball (ADR-264 F1) + timeout 30 node --input-type=module -e "await import('@ruvnet/rvagent');" < /dev/null + ;; + esac + + # ADR-265 D4 — package READMEs must pass the project's own honesty lint. + - name: Claim-check README + run: | + if [ -f README.md ]; then + node "$GITHUB_WORKSPACE/harness/ruview/bin/cli.js" claim-check --file README.md + else + echo "no README.md — skipping" + fi diff --git a/.github/workflows/ruview-npm-release.yml b/.github/workflows/ruview-npm-release.yml new file mode 100644 index 00000000..ce95ce7f --- /dev/null +++ b/.github/workflows/ruview-npm-release.yml @@ -0,0 +1,137 @@ +# ADR-265 D2 — publish only from CI, with provenance. +# +# Manual `npm publish` from laptops stops: this workflow re-runs the ADR-265 D1 +# gate for the selected package and then publishes with npm provenance +# attestations (OIDC), tying every published version to a public commit + +# workflow run — the npm-side analogue of the ADR-028 witness bundle. +# +# Requires: NPM_TOKEN repo secret (an npm automation token), or npm Trusted +# Publishing configured for the package (in which case the token is unused). + +name: ruview npm release + +on: + workflow_dispatch: + inputs: + package: + description: 'Package directory to publish' + required: true + type: choice + options: + - harness/ruview + - tools/ruview-mcp + dist_tag: + description: 'npm dist-tag' + required: false + default: 'latest' + type: string + +permissions: + contents: read + id-token: write # npm --provenance + +jobs: + publish: + runs-on: ubuntu-latest + defaults: + run: + working-directory: ${{ inputs.package }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: '20' + registry-url: 'https://registry.npmjs.org' + + - name: Install + run: | + if [ -f package-lock.json ]; then npm ci; else npm install --no-fund --no-audit; fi + + - name: Build (if present) + run: npm run build --if-present + + - name: Test + run: npm test --if-present + + # ADR-265 D3 — package.json is the only place a version string lives. + - name: Version-literal gate + run: | + set -euo pipefail + hits="" + for d in src bin; do + if [ -d "$d" ]; then + hits+=$(grep -rEn '\b[0-9]+\.[0-9]+\.[0-9]+\b' "$d" | grep -vE '127\.0\.0\.1|0\.0\.0\.0' || true) + fi + done + if [ -n "$hits" ]; then + echo "Hardcoded version-like literals found (read package.json instead — ADR-265 D3):" + echo "$hits" + exit 1 + fi + + # ADR-265 D1.3 — pack-content gate: no maps AND the per-package + # unpacked-size budget (the budgets that npm-packages.yml enforces). + - name: Pack gate (no maps + size budget) + run: | + set -euo pipefail + case "${{ inputs.package }}" in + # ADR-263: dependency-free harness; budget guards against dep creep. + harness/ruview) export UNPACKED_BUDGET=65536 ;; + # ADR-264 O2: map-free tarball (was 188 kB with maps). + tools/ruview-mcp) export UNPACKED_BUDGET=140000 ;; + *) echo "Unknown package '${{ inputs.package }}' — no budget defined"; exit 1 ;; + esac + npm pack --dry-run --json 2>/dev/null | node -e " + const [info] = JSON.parse(require('fs').readFileSync(0, 'utf8')); + const budget = Number(process.env.UNPACKED_BUDGET); + const maps = info.files.filter((f) => f.path.endsWith('.map')); + if (maps.length > 0) { + console.error('Tarball contains source maps (ADR-264 F2):', maps.map((m) => m.path)); + process.exit(1); + } + if (info.unpackedSize > budget) { + console.error(\`Unpacked size \${info.unpackedSize} B exceeds budget \${budget} B\`); + process.exit(1); + } + console.log(\`pack gate OK: \${info.files.length} files, \${info.unpackedSize} B unpacked (budget \${budget} B), 0 maps\`); + " + + # ADR-265 D1.4 — install the real tarball and drive each bin/export. + - name: Tarball smoke test + run: | + set -euo pipefail + TGZ="$PWD/$(npm pack --silent 2>/dev/null | tail -1)" + SMOKE="$(mktemp -d)" + cd "$SMOKE" + npm init -y > /dev/null + npm i --no-fund --no-audit "$TGZ" + case "${{ inputs.package }}" in + harness/ruview) + ./node_modules/.bin/ruview --version + ./node_modules/.bin/ruview doctor + # the honesty gate must fail closed on empty input (ADR-263 F1) + if ./node_modules/.bin/ruview claim-check; then + echo 'claim-check passed with no input — fail-open regression'; exit 1 + fi + node --input-type=module -e "const m = await import('@ruvnet/ruview'); if (!m.TOOLS) process.exit(1);" + ;; + tools/ruview-mcp) + # initialize over stdio; server must answer and exit 0 on EOF + printf '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"ci","version":"0"}}}\n' \ + | timeout 30 ./node_modules/.bin/rvagent | grep -q '"serverInfo"' + # the ESM export must resolve from the installed tarball (ADR-264 F1) + timeout 30 node --input-type=module -e "await import('@ruvnet/rvagent');" < /dev/null + ;; + esac + + - name: Claim-check README + run: | + if [ -f README.md ]; then + node "$GITHUB_WORKSPACE/harness/ruview/bin/cli.js" claim-check --file README.md + fi + + - name: Publish (with provenance) + run: npm publish --provenance --access public --tag "${{ inputs.dist_tag }}" + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 4173107f..a02bd8e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed +- **`@ruvnet/rvagent` startup optimization — stdio time-to-first-response ~242 ms → ~189 ms (−22%; MEASURED, median of repeated `initialize` round-trips against `dist/index.js`, this container, reproduce with a piped-stdin timer).** Two changes: (1) `./http-transport.js` is now imported **lazily** inside the `RVAGENT_HTTP_PORT` branch — it chain-loads the MCP SDK's `streamableHttp` module (~48 ms MEASURED via per-module `import()` timing), which the default stdio path never uses; (2) the advertised JSON Schemas generated from the Zod sources are memoized per tool instead of re-walking the Zod tree on every `tools/list` (matters under the session-per-server HTTP model where each session lists tools). No behavior change: 99/99 jest tests, HTTP session flow re-smoke-tested through the lazy path. The `@ruvnet/ruview` harness CLI was profiled too and left alone — 50 ms vs the ~29 ms bare `node -e ''` floor on the same box (MEASURED), i.e. already near the interpreter floor with zero dependencies. + ### Fixed +- **ADR-263/264/265 implemented — the RuView npm surface fixed end-to-end (`@ruvnet/ruview@0.2.0`, `@ruvnet/rvagent@0.2.0`, `@ruv/ruview-cli`).** Harness (ADR-263 O1–O9): `claim-check` now **fails closed** on empty input (CLI exit 2 + `empty_text` tool error); the MCP stdio server dispatches `tools/call` asynchronously over promise-based `spawn` — `ping` answers while a long `verify`/`calibrate` runs (pinned by a new e2e test that runs a 3 s fake proof and asserts sub-second ping); the two `optionalDependencies` are gone so a cold `npx` installs exactly 1 package (MEASURED: was 4 packages / 620 kB / 71 files, `npm i` in a clean prefix); child output is captured as bounded rolling tails (no more 1 MiB `maxBuffer` kills); `node_monitor` passes the port via `sys.argv` instead of splicing it into `python -c` source; the MCP `serverInfo.version` reads package.json; `.claude/skills/*/SKILL.md` are generated from `skills/*.md` by a `prepack` sync script (byte-equality pinned by test); `which()` is a memoized dep-free PATH scan; tools are underscore-canonical (`ruview_claim_check`, …) with the dotted names accepted as call-time aliases, plus `resources/list`/`prompts/list` stubs; the guardrail's `METRIC_TERMS` matching is precision-fixed (word-boundary `map`/`f1`/`auc`/`iou`, code-span + label scrubbing, quantitative-claims-only) — ADR-263/264/265 and both package READMEs now PASS `claim-check` while real untagged claims still flag. 30/30 tests (MEASURED, `node --test`). rvagent (ADR-264 O1–O9): `exports` fixed (types-first, the never-built `dist/index.cjs` `require` target removed — verified broken in the published 0.1.0 tarball); tarball is map-free (127,704 B unpacked / 46 files / 0 maps — MEASURED, `npm pack --dry-run`, down from 188 kB with 44 maps); the Streamable HTTP transport is **actually wired** behind `RVAGENT_HTTP_PORT` with one transport + one MCP server per session (`mcp-session-id` routing), a 1 MiB body cap (413), and a port-aware localhost origin gate — the "dual-transport" description is now true; tools renamed to underscore-canonical with dotted router aliases; ONE Zod validation gate per call with the advertised JSON Schema generated from the same Zod source (`zod-to-json-schema`); `train_count` closes its log fds (was leaking 2/job) and persists job records to `/.json` so `job_status` survives restarts, with bounded log-tail reads; `detectCogBinary` actually probes its candidate paths; version reads package.json; `@types/express` dropped, `@types/jest` aligned to jest 29; README rewritten to match reality (no phantom `stdio`/`http`/`policy grant` subcommands; unimplemented ADR-124 catalog tools labeled roadmap). 99/99 jest tests (MEASURED); stdio handshake + HTTP session flow + 403/400/404/413 gates smoke-tested live. CLI: bin renamed `ruview-cli` (the `ruview` bin belongs to `@ruvnet/ruview`, ADR-265 D4), version single-sourced. Distribution (ADR-265 D1–D4): new `npm-packages.yml` (3-package × Node 20/22 matrix: tests, version-literal grep gate, pack-content/size gate, tarball-install smoke test incl. the fail-closed claim-check and an ESM-import probe that would have caught the broken `require` export, README claim-check) and `ruview-npm-release.yml` (publish from CI only, `npm publish --provenance`); `ci.yml` NODE_VERSION 18→20. - **Multistatic fusion never ran on a mixed-mode ESP32 mesh — live bridge fed raw, un-canonicalized per-node CSI to the fuser (#1170).** `node_frame_from_state` (`multistatic_bridge.rs`) wrapped each node's **raw** amplitude vector (HT20 ≈ 64 bins, HT40 ≈ 128/192) into a struct *named* `CanonicalCsiFrame` without ever resampling, so `MultistaticFuser::fuse` tripped `DimensionMismatch` on every cycle, silently fell back to per-node sum/dedup, and spun `total_engine_errors` unbounded. Added `HardwareNormalizer::resample_to_canonical` (resample-only, **no z-score** — preserves the amplitude scale the person-score's `variance/mean²` relies on) and run every node frame through it onto the canonical 56-tone grid before fusion. Heterogeneous meshes now fuse instead of erroring. Pinned by `heterogeneous_node_counts_canonicalize_and_fuse` (mixed 64/192 → fuses), `resample_to_canonical_is_length_only_no_zscore`, and an updated `test_node_frame_conversion`; the pre-existing `engine_bridge::observe_cycle_counts_engine_errors` was retargeted to force a `TimestampMismatch` (its old 56-vs-30 setup now canonicalizes cleanly). `wifi-densepose-signal` 501 / `wifi-densepose-sensing-server` 677 tests, 0 failed. - **`csi_fps_ema` reported the CSI frame rate 40–840× too high under bursty UDP delivery (#1180).** `update_csi_fps_ema` only rejected deltas `≤ 0` or `≥ 1 s`, so a 36 µs intra-burst arrival delta yielded `1/dt ≈ 27 kHz` straight into the EMA — the metric measured server arrival jitter, not the node's ~40 fps production rate. Added a `MIN_PLAUSIBLE_CSI_DT_SEC = 0.005` floor (derived from the firmware's 50 fps `CSI_MIN_SEND_INTERVAL_US` ceiling, ×4 slack) and made `observe_csi_frame_arrival` keep its anchor across sub-floor bursts so the next genuine inter-frame gap measures true cadence. Pinned by `subms_burst_delta_rejected`, `burst_interleaved_with_nominal_stays_in_band`, and `observe_csi_frame_arrival_ignores_subms_bursts`. - **`stream_sender` ENOMEM backoff starved low-rate control packets under a weak uplink (#1183, follow-up to #1135/#1159).** The global `s_backoff_until_us` gate (triggered by the 50 Hz CSI flood at weak RSSI) also suppressed the ≤48 B, ≤1 Hz `feature_state` / mesh `HEALTH` / sync packets that contribute negligible buffer pressure, so telemetry failed essentially every cycle. Added `stream_sender_send_priority()` — bypasses the backoff gate, reports ENOMEM quietly, and never extends/resets the global streak — and routed `feature_state`, HEALTH/anomaly (`rv_mesh_send`), and sync packets through it. Also fixed the misleading `"HEALTH sent"` log that printed unconditionally even when `rv_mesh_send` returned `ESP_FAIL` (now prints `sent`/`FAILED` from the actual return). Firmware builds clean (ESP-IDF v5.4). @@ -26,6 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **`homecore-recorder` security review (ADR-132 surfaces) — two real bounding fixes; SQL-injection & NaN-index dimensions confirmed clean with evidence.** Beyond-SOTA review of the HA-compat state recorder (DB persistence + history + ruvector semantic search), the crux being its DB-backed SQL-injection surface. **Findings + fixes:** (1) **Memory-DoS — unbounded `get_state_history`.** The history query carried no `LIMIT`, so a wide `[since, until]` window over a high-frequency entity (a per-second sensor ≈ 86k rows/day) would load an unbounded row set into a single in-memory `Vec`. Added a hard `LIMIT MAX_HISTORY_ROWS` (1,000,000 — generous enough never to truncate a realistic history graph, bounded enough to cap the worst case); the sibling search paths were already `k`-bounded. (2) **Disk-DoS / documented-but-missing `purge`.** The README + HA-compat table advertised `Recorder::purge(older_than)` as a capability, but **no such method existed** — i.e. no retention path at all → unbounded disk growth. Implemented a **transactional** `purge` that deletes `states` + `events` strictly **older than** the cutoff (**exclusive** boundary — idempotent, no off-by-one; a row at the cutoff instant is kept) and **garbage-collects** orphaned `state_attributes` blobs (a dedup-shared blob is dropped only once its last referencing state is gone); all three deletes run in one transaction so a mid-purge failure rolls back cleanly (no states-deleted-but-events-kept corruption). **Confirmed clean with evidence:** SQL injection — **every** query in `db.rs` uses bound `?` parameters (no `format!`/string-concat of user data into SQL); the lone `format!` builds the LIKE *pattern*, which is itself bound as a parameter with `ESCAPE '\\'` and metacharacter escaping. Pinned: a state value `'; DROP TABLE states; --` is stored/queried **literally** (table survives), and a `%`/`_` in a search query matches **literally**, not as a wildcard. NaN-index poisoning (the calibration/vitals/geo class) — **structurally impossible** here: embeddings are SHA-256 → `i32` → `f32` (an `i32` cast to `f32` is always finite, never NaN/Inf), with an all-zero-digest norm guard; probed empty-index search, empty-string query, and `k=0` — all return `Ok(0)`, **no panic**. Fail-closed write path — a removal event yields `Ok(None)`, semantic-index failure is logged not propagated (best-effort, never blocks the durable SQLite write), and `EntityId` parsing failures fall back rather than panic. **6 new pinning tests** (SQL-injection literal-storage, LIKE-metacharacter literalness, history `LIMIT`, purge exclusive-boundary, purge attribute-GC-keeps-shared, purge old-events): `homecore-recorder` **19 → 25** (`--no-default-features`) / **25 → 31** (`--features ruvector`), 0 failed; the purge-boundary test is a true pin (fails deleting 2 rows under an inclusive cutoff, passes deleting 1 under the exclusive cutoff). Behaviour otherwise unchanged; Python deterministic proof unchanged (recorder is off the signal proof path). ### Added +- **ADR-263/264/265: deep review of the RuView npm surface (`@ruvnet/ruview`, `@ruvnet/rvagent`, `@ruv/ruview-cli`) with optimization strategies recorded as ADRs.** ADR-263 reviews the published `@ruvnet/ruview@0.1.0` harness: fail-open `claim-check` on empty input (HIGH), `spawnSync` head-of-line blocking of the MCP stdio server during long `verify`/`calibrate` runs (HIGH), optionalDependencies tripling the cold `npx` install for a code path that never uses them (MEASURED, `npm i` in a clean prefix: 4 packages / 620 kB / 71 files default vs 1 package / 172 kB / 22 files with `--omit=optional`), 1 MiB `maxBuffer` truncation risk, `python -c` port-interpolation surface in `node_monitor`, hardcoded MCP server version, duplicated skill payload — optimizations O1–O8. ADR-264 reviews `@ruvnet/rvagent@0.1.0` + the private CLI **against the published registry tarball**: `exports.require` → nonexistent `dist/index.cjs` (HIGH, every CJS consumer breaks), 44 dead source-map files = 62,698 B of the 188 kB unpacked payload pointing at unshipped `../src` (MEASURED), stdio-only server described as "dual-transport" (CLAIMED capability), mixed dot/underscore tool naming, double Zod validation + hand-duplicated advertised schemas, 2-fd leak per training job, unbounded request body in the unwired HTTP scaffold, dead `detectCogBinary` candidate list, `ruview` bin-name collision — optimizations O1–O9. ADR-265 adds the cross-cutting distribution layer: an `npm-packages.yml` CI matrix (tests + pack-content/size gate + tarball-install smoke test — none of the three packages currently has any CI, and `ci.yml` pins Node 18 against `engines >= 20`), publish-from-CI-only with `npm publish --provenance`, version single-sourcing from package.json, bin/namespace ownership (the `ruview` bin belongs to `@ruvnet/ruview`), and claim-check enforcement on package READMEs/descriptions. Docs only — no runtime code changed; the findings are the work orders for the follow-up PRs. - **ADR-131 §11–§12: HOMECORE-UI wired to a real backend — single-origin BFF gateway + production front-end (no mock in prod).** Implements the §11 wiring decision so the dashboard stops rendering fabricated data. **Front-end (DONE + verified under Node):** `api.js` rewritten so every data accessor is async and calls the §11.2 gateway routes; the in-browser mock is demoted to a **dev-only fixture** reachable only via `?demo=1`/`HOMECORE_UI_DEMO` (§2.2); all ten panels now `await` and render a **typed empty/error state** on upstream failure (no mock fallback in production) — 3 panels converted by hand, 7 via a parallel agent swarm. **New `homecore-server` BFF gateway (`src/gateway.rs`, compile-pending — no Rust toolchain in the authoring env):** promotes `homecore-server` to the single origin (§2.1); adds `/api/homecore/*` + `/api/cal/*` merged into `build_app`, with `reqwest` + CLI/env flags (`--calibration-url`/`--calibration-token`/`--apps-dir`/`--gateway-timeout-ms`). Real handlers: calibration **reverse-proxy** (W2), `GET /api/homecore/rooms` with the §11.3 **RoomState adapter** (`breathing`→`breathing_bpm`, `heartbeat`→`heart_bpm`, `None`→`null` preserving not-trained-vs-withheld, injected `anomaly.threshold`/`room_id`), **COG supervisor** over `/var/lib/cognitum/apps/` (W4), and **appliance metrics** from `/proc` + TCP service probes (W6); SEED-device/appliance routes (seeds/federation/witness/privacy/settings/automations/events-history/hailo/tokens — W3/W5) return a typed `503 upstream_unavailable` and the UI shows error states. **Tests:** front-end **5 files green** — import-graph, boot, render-smoke (22), interaction (3), and a **new prod-errors suite (13)** that runs with demo OFF + gateway unreachable and proves every panel renders an error state, never mock, never throws (it caught + fixed a real unhandled-rejection in the events automation builder). **Gateway compiled, tested, and run on Rust 1.89:** `cargo test -p homecore-server --no-default-features` = **12/12 pass** (6 gateway + 6 UI mount); the binary was **run live** — `GET /api/homecore/appliance` returns real `/proc` metrics + TCP service probes, unauth → `401`, `cogs` → `[]` (no apps dir), SEED-tier → typed `503`, and against a mock calibration upstream the `/api/cal/*` proxy passes through (`200`) and `GET /api/homecore/rooms` adapts `RoomState` to the UI shape (`breathing`→`breathing_bpm`, `heartbeat:null`→`heart_bpm:null`, injected `anomaly.threshold`/`room_id`). **Live testing caught + fixed a real bug** — a double-`v1` segment in the `/api/cal/*` proxy URL. **Remaining (intrinsic, not an env limit):** W3/W5/W6-Hailo/federation depend on services/hardware **not in this repo** (recorder/automation HTTP wrappers, real SEED nodes, Hailo stat source), so they return honest `503`s rather than fabricate data; W1/W2/W4/W6-appliance are functional now. ADR-131 §10/§12.1 updated with per-wave status. - **ADR-131: HOMECORE-UI — the complete operational dashboard for the two-tier Cognitum stack, served by `homecore-server` at `/homecore`.** A zero-dependency, no-build-step vanilla TS/JS + CSS frontend (the `rufield-viewer` "Axum + vanilla-JS" pattern) that extends the Cognitum Appliance shell as a first-class nav section (Framework | Guide | Cog Store | **HOMECORE** | Status). **Complete, not a scaffold** (per the ADR's revised §2/§7): all **10 panels** ship fully built and rendered — §4.1 System Dashboard (v0 Appliance health strip + SEED fleet grid + ESP32 summary + COG status row + event-bus sparkline), §4.2 SEED Detail (vector store / witness chain / 5 onboard sensors / reflex rules / cognitive-fragility / ingest packet-type), §4.3 SEED Fleet Map (Appliance→SEED→ESP32 hierarchy, ESP-NOW mesh, cross-SEED fusion badges, ADR-105 federation), §4.4 Entity & State Browser (domain-grouped, **live WebSocket `subscribe_events` patching — never polls**, first-class provenance badges, keyword filter, context-causality slide-over), §4.5 RoomState/Sensing (mixture-of-specialists), §4.6 COG Management + App Registry, §4.7 Calibration Wizard (5-step baseline→enroll→train→verify), §4.8 Event Bus + Automation builder, §4.9 Witness/Audit log (two-tier SHA-256 + Ed25519 timeline, privacy-mode banner, pagination, export), §4.10 Settings. **Design system is the exact production Cognitum palette** (`tokens.css` carries `--cyan #4ecdc4` … `--r 10px` verbatim, §3.1) so there is no visual seam with the Cog Store (§3.3 invariant). **§6 UX invariants enforced in code and pinned by tests:** tier-origin provenance is always-visible (never collapsed); `stale`/`vetoed` flags and the kNN fragility score are prominent (amber/red tint + banners, never grey-on-grey); a `null` specialist renders "Not trained / calibrate to enable" **visually distinct from** veto-`withheld` (rendered as explicitly withheld, never zero) **distinct from** an error; all IDs/hashes/endpoints/payloads use `--mono`; Hailo-sourced COGs (`arch: hailo10`) are visually distinguished from CPU-only (`arch: arm`). **Wiring:** `homecore-server` gains a `--ui-dir`/`HOMECORE_UI_DIR` flag and mounts the assets via `tower-http` `ServeDir` at `/homecore` alongside the unchanged HA-compat `/api` surface (new testable `build_app()`), with **5 Rust integration tests** (`#[cfg(test)] mod ui_tests`, `tower::oneshot`) asserting index / design tokens / all-10-panels are served, the API coexists, and an empty `--ui-dir` disables the mount. **JS test + benchmark suite (`ui/`, runs under plain `node`, no npm install): 24 checks / 0 failed** — an import/export graph verifier (15 modules consistent), a DOM-shim render-smoke that *executes every panel* (21 checks: ui helpers + mock contracts + all 10 panels render without throwing), and an interaction suite (3 checks: live WS state-patch, ws.js handshake/parse, calibration backend contract). **Benchmark:** total bundle **136.8 KB uncompressed across 18 files — ~37× smaller than HA's ~5 MB Lit bundle** (the ADR-126 §1.1 foil), slowest panel **1.5 ms/cold-render**. **Honest scope (§7.1):** the live HOMECORE REST API (`/api/config|states|services`) and the WebSocket `subscribe_events` feed are driven for real; panels whose backing service is **not** in this binary (SEED HTTPS API, calibration ADR-151, ADR-105 federation) render against a **contract-conformant mock layer flagged with a DEMO banner** and swap to live the moment those endpoints land — no mock data is ever presented as real. **Not verified in this environment:** the Rust crate was edited and the integration tests written but **not compiled/run here** (no Rust toolchain present); `cargo test -p homecore-server` + `cargo build` must be run on a Rust host before merge. - **ADR-175: int8 quantization of the WiFlow-STD "half" pose model — MEASURED fp32-vs-int8 accuracy/size trade-off (honest negative).** Sub-deliverable 8.2 of the benchmark/optimization milestone, and the reading of the SOTA brief's "one untested edge lever" (QAT-int8 on the 843,834-param half model that strictly dominates the published 2.23M model). A new committed script `v2/crates/wifi-densepose-train/scripts/quantize_half_int8.py` quantizes `half_best.pth` to int8 two ways and scores both with the **same** upstream `calculate_pck`/`calculate_mpjpe` that produced the fp32 sweep numbers, under **one locked normalization** (ADR-173 torso-diameter PCK — neck idx2→pelvis idx12, `use_torso_norm=True`, the standard MM-Fi/GraphPose-Fi convention), on the **same** seed-42 file-level 70/15/15 test split (52,560 NaN-free / 54,000 full windows). **MEASURED on ruvultra (RTX 5080, torch 2.11.0+cu128, fbgemm; clean test, torso-PCK):** fp32 = 96.62% PCK@20 / 99.47% PCK@50 / 0.008981 MPJPE / 3.351 MB (fp32-CPU reproduces fp32-GPU to 4 dp, so the int8 deltas are pure quantization, not CPU/GPU drift); **int8 static PTQ = 40.98% PCK@20 (−55.64 pp), 1.046 MB** — naive static QDQ **collapses** on this model (the brief's 2.23M "sweet spot" does NOT transfer to the 843k half model at the tight @20 threshold); **int8 QAT (3-epoch FX fake-quant fine-tune from half_best) = 67.48% PCK@20 (−29.15 pp) / 98.69% PCK@50 (−0.78 pp), 1.043 MB.** **Verdict (honest no):** int8 is **not a win** at the strict PCK@20 edge target — QAT recovers a large share of the PTQ collapse and is near-lossless at the loose PCK@50 (coarse localization survives int8, fine does not), but a **3.2× size win at −29 pp PCK@20** is a bad trade when the half model already fits edge flash at fp32 → **keep fp32/fp16 on the edge for now.** **Disclosed gap:** the QAT *fake-quant* val PCK@20 reached 83.45% but the *converted* int8 model scores 67.48% — a real ~16 pp `convert_fx` gap (fbgemm int8 kernels ≠ straight-through estimate, esp. the axial-attention einsum/softmax); we report the converted-int8 number, not the fake-quant proxy. **MEASURED:** every table number + the PTQ collapse + the QAT partial recovery + the conversion gap. **CLAIMED/not done:** ONNX/TFLite export, on-edge-SoC latency/energy (int8 measured on x86 fbgemm — size transfers, latency does NOT), mixed-precision keeping attention fp32, longer/better-tuned QAT. **Honest limitations:** single in-domain eval split (no cross-environment split), x86-int8 not edge-SoC-int8, lightly-tuned QAT. Additive only — no production Rust or signal-pipeline change; Python deterministic proof unchanged (`f8e76f21…46f7a`, bit-exact — off the signal proof path). diff --git a/CLAUDE.md b/CLAUDE.md index 226d09a4..262884dc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -62,7 +62,7 @@ All 5 ruvector crates integrated in workspace: - `ruvector-attention` → `model.rs` (apply_spatial_attention) + `bvp.rs` ### Architecture Decisions -43 ADRs in `docs/adr/` (ADR-001 through ADR-043). Key ones: +182 ADRs in `docs/adr/` (numbered ADR-001 through ADR-265, with gaps). Key ones: - ADR-014: SOTA signal processing (Accepted) - ADR-015: MM-Fi + Wi-Pose training datasets (Accepted) - ADR-016: RuVector training pipeline integration (Accepted — complete) @@ -77,6 +77,10 @@ All 5 ruvector crates integrated in workspace: - ADR-148: Drone swarm control system / `ruview-swarm` (In Progress) - ADR-152: WiFi-Pose SOTA 2026 intake — geometry conditioning, WiFlow-STD benchmark (measurement (a) complete: claims MEASURED-EQUIVALENT at ~96% PCK@20), MAE recipe (Proposed; §2.1–2.3, 2.6 implemented) - ADR-153: IEEE 802.11bf-2025 forward-compatibility protocol model (Accepted — amends ADR-152 §2.4) +- ADR-182: `npx ruview` harness minted via MetaHarness (Accepted — P1+P2 shipped as `@ruvnet/ruview`) +- ADR-263: `@ruvnet/ruview` npm harness deep review + optimization strategy (Proposed) +- ADR-264: `@ruvnet/rvagent` MCP server + `@ruv/ruview-cli` deep review + optimization strategy (Proposed) +- ADR-265: RuView npm distribution strategy — CI gate, provenance, version single-sourcing (Proposed) ### Supported Hardware diff --git a/README.md b/README.md index 8e23fdb8..7b8c77e4 100644 --- a/README.md +++ b/README.md @@ -617,7 +617,7 @@ Verify the plugin structure: `bash plugins/ruview/scripts/smoke.sh`. Full detail | [Semantic Primitives — Precision/Recall](docs/integrations/semantic-primitives-metrics.md) | Per-primitive F1 on the held-out paired-capture set: someone-sleeping, possible-distress, room-active, elderly-inactivity-anomaly, meeting, bathroom, fall-risk, bed-exit, no-movement, multi-room. | | [Claude Code / Codex Plugin](plugins/ruview/README.md) | The `ruview` plugin + marketplace — skills, `/ruview-*` commands, agents, and the Codex prompt mirror | | [Portable harness — `npx @ruvnet/ruview`](harness/ruview/README.md) | MetaHarness-minted, host-portable RuView operator harness — `ruview.*` MCP tools + the MEASURED-vs-CLAIMED honesty guardrail enforced in code ([ADR-182](docs/adr/ADR-182-npx-ruview-harness-via-metaharness.md)). A lighter, multi-host companion to the in-repo plugin. | -| [Architecture Decisions](docs/adr/README.md) | 96 ADRs — why each technical choice was made, organized by domain (hardware, signal processing, ML, platform, infrastructure) | +| [Architecture Decisions](docs/adr/README.md) | 182 ADRs — why each technical choice was made, organized by domain (hardware, signal processing, ML, platform, infrastructure) | | [Domain Models](docs/ddd/README.md) | 8 DDD models (RuvSense, Signal Processing, Training Pipeline, Hardware Platform, Sensing Server, WiFi-Mat, CHCI, rvCSI) — bounded contexts, aggregates, domain events, and ubiquitous language | | [rvCSI — edge RF sensing runtime](https://github.com/ruvnet/rvcsi) | Rust-first / TypeScript-accessible / hardware-abstracted CSI runtime: multi-source ingestion (incl. real nexmon_csi `.pcap` from a **Raspberry Pi 5** / Pi 4 / Pi 3B+ — CYW43455 / BCM43455c0) → validation → DSP → typed events → RuVector RF memory ([ADR-095](docs/adr/ADR-095-rvcsi-edge-rf-sensing-platform.md), [ADR-096](docs/adr/ADR-096-rvcsi-ffi-crate-layout.md), [domain model](docs/ddd/rvcsi-domain-model.md)). Now its own repo — [`ruvnet/rvcsi`](https://github.com/ruvnet/rvcsi) — vendored here under `vendor/rvcsi`; 9 `rvcsi-*` crates on crates.io, `@ruv/rvcsi` on npm, plus a Claude Code plugin. | | [Desktop App](v2/crates/wifi-densepose-desktop/README.md) | **WIP** — Tauri v2 desktop app for node management, OTA updates, WASM deployment, and mesh visualization | diff --git a/docs/adr/ADR-263-ruview-npm-harness-deep-review.md b/docs/adr/ADR-263-ruview-npm-harness-deep-review.md new file mode 100644 index 00000000..83d90480 --- /dev/null +++ b/docs/adr/ADR-263-ruview-npm-harness-deep-review.md @@ -0,0 +1,191 @@ +# ADR-263: `@ruvnet/ruview` npm Harness — Deep Review + Optimization Strategy + +| Field | Value | +|-------|-------| +| **Status** | Accepted — **implemented** (O1–O9, `@ruvnet/ruview@0.2.0`): fail-closed `claim-check`, async MCP dispatch (ping answered mid-`verify`, pinned by e2e test), zero-dependency install, bounded output tails, argv-passed monitor port, package.json-sourced version, prepack skill sync, memoized `which()`, underscore-canonical tools with dotted aliases, word-boundary guardrail matching. 30/30 tests (MEASURED, `node --test test/*.test.mjs`); CI gate in ADR-265's `npm-packages.yml` | +| **Date** | 2026-07-02 | +| **Deciders** | ruv | +| **Codename** | **RUVIEW-NPM-REVIEW-1** | +| **Supersedes / amends** | none (records review of the ADR-182 P1+P2 artifact; feeds ADR-265 distribution strategy) | + +## Context + +ADR-182 minted and published **`@ruvnet/ruview@0.1.0`** (`harness/ruview/`) — the +`npx ruview` operator harness: a dependency-free ESM CLI + minimal MCP stdio server +exposing six `ruview.*` tools (onboard / claim_check / verify / node_monitor / +calibrate / node_flash), five skill playbooks, and the executable +MEASURED-vs-CLAIMED guardrail (`src/guardrails.js`). The package is live on npm +(0.1.0, 49.5 kB unpacked / 21 files — MEASURED, `npm view @ruvnet/ruview` + +`npm pack --dry-run`) and is the recommended MCP registration path +(`npx -y @ruvnet/ruview mcp start` in the bundled `.claude/settings.json`). + +This ADR is the first dedicated deep review of that npm artifact: correctness, +fail-open/fail-closed posture, performance (cold start + request handling), +packaging hygiene, and security of the subprocess surface. All 17 bundled tests +pass on Node 22 (MEASURED, `node --test test/*.test.mjs`, 17/17, ~108 ms). + +## Findings + +Severity reflects impact on the package's stated contract: *fail-closed operator +tools + an honesty guardrail that must never fail open*. + +### F1 (HIGH, fail-open): `claim-check` passes silently on empty input + +`bin/cli.js` `claim-check` with **neither `--text` nor `--file`** sends +`text: undefined` → `claimCheck(String(args.text ?? ''))` → `''` → `ok: true`, +**exit 0**. A CI hook wired as `npx ruview claim-check --text "$BODY"` where +`$BODY` expands empty therefore reports PASS. This is the single tool whose whole +purpose is to fail closed; empty input must be an error, not a pass. +Reproducer: `node bin/cli.js claim-check` → `{"ok": true}`, exit 0. + +### F2 (HIGH, head-of-line blocking): MCP server is fully synchronous + +`src/mcp-server.js` dispatches `tools/call` inside the readline `line` handler, +and every heavyweight handler in `src/tools.js` uses **`spawnSync`** +(`ruview.verify` up to 180 s, `ruview.calibrate` up to 300–600 s, +`ruview.node_monitor` up to `seconds+10`). While one call runs, the event loop is +blocked: `ping`, `tools/list`, and concurrent `tools/call` requests are not even +read from stdin. Hosts that health-check with `ping` during a long `calibrate` +will conclude the server is dead and kill it mid-run. + +### F3 (MEDIUM, cold start): optionalDependencies triple the `npx` install for a path that never uses them + +`package.json` declares `optionalDependencies` on `@metaharness/kernel` and +`@metaharness/host-claude-code`. npm installs optional deps **by default**, so +every cold `npx -y @ruvnet/ruview mcp start` fetches 3 extra packages (kernel + +host + transitive `@ruvector/emergent-time`). MEASURED (npm 10.9.7, this +container): default install = **4 packages, 620 kB, 71 files**; with +`--omit=optional` = **1 package, 172 kB, 22 files**. The operator-tool and MCP +paths never import these — only `doctor`/`install` do, and both already +dynamic-import inside `try/catch` and degrade gracefully when absent +(`kernel/host: not installed (ok…)`). The optional deps buy nothing on the hot +path and cost 3 registry round-trips + ~450 kB on every cold start. + +### F4 (MEDIUM, silent truncation): `spawnSync` default `maxBuffer` (1 MiB) + +`run()` in `src/tools.js` never sets `maxBuffer`. `cargo run -p +wifi-densepose-cli` (the `calibrate` fallback path) and a chatty `verify.py` can +exceed 1 MiB of stdout, at which point the child is killed with `ENOBUFS` and the +tool reports a spawn error that looks like a proof/calibration failure. The +handlers only ever consume the last 8 kB/1.5 kB; buffering should be bounded but +generous (e.g. `maxBuffer: 16 MiB`) or streamed with a tail ring. + +### F5 (MEDIUM, injection surface): `node_monitor` interpolates the port into Python source + +The handler builds a `python -c` script by string interpolation: +`` `ser=serial.Serial(${JSON.stringify(port)},115200,…)` `` and +`` `while time.time()-t<${dur}:` ``. `JSON.stringify` produces a *JavaScript* +string literal; Python string-literal semantics differ at the edges (`\uXXXX` is +shared, but e.g. JS emits raw U+2028/U+2029 unescaped pre-ES2019 rules aside, and +any future non-JSON-safe field added the same way would be executable). `port` +arrives from the MCP caller (an agent), so this is an agent-controlled string +concatenated into an interpreter invocation. `dur` is `Number()`-guarded; `port` +should be passed out-of-band (`sys.argv`/env), never spliced into source. + +### F6 (LOW, drift): server version hardcoded + +`SERVER_INFO = { name: 'ruview', version: '0.1.0' }` in `src/mcp-server.js` +duplicates `package.json.version` (the CLI's `--version` already reads +package.json at runtime). First release bump will drift the MCP handshake +version. + +### F7 (LOW, duplication): every skill ships twice + +`skills/*.md` and `.claude/skills/*/SKILL.md` are byte-identical (same sha256 in +`.harness/manifest.json`). ~8 kB of the 49.5 kB unpacked payload is duplicate +content, and — worse than size — two copies must be kept in sync by hand. + +### F8 (LOW, perf + portability): `which()` is uncached and shells out + +`which()` runs up to twice per tool call (`python` then `python3`), each a +blocking `spawnSync`; the POSIX branch spawns a shell (`shell: true`). Results +are stable for the process lifetime and should be memoized; the lookup can be +done dep-free with a PATH scan instead of a shell. + +### F9 (LOW, interop): dot-named tools + minimal protocol surface + +Tool names (`ruview.onboard`, `ruview.claim_check`, …) contain dots. MCP itself +does not restrict names, but downstream host APIs commonly enforce +`^[a-zA-Z0-9_-]{1,64}$` for tool names; hosts must then sanitize or reject. +The server also answers `resources/list` / `prompts/list` with `-32601` (it does +not advertise those capabilities, so this is spec-legal, but empty-list stubs are +cheaper than every host's error path). Protocol version is pinned to +`2024-11-05` with no negotiation fallback. None of this breaks Claude Code today; +it narrows portability, which is the harness's whole pitch (9 hosts, ADR-182). + +### F10 (LOW, CI gap): the published package has zero CI + +No workflow under `.github/workflows/` runs `harness/ruview` tests (checked: +no workflow references `harness/ruview`, `ruview-mcp`, or `ruview-cli`), and +`ci.yml` pins `NODE_VERSION: '18'` while the package declares +`engines.node >= 20`. Note also `node --test test/` (directory form) fails on +Node 22 while the documented glob form passes — CI should pin the working +invocation. Consolidated CI/publish strategy is ADR-265. + +### F11 (MEDIUM, guardrail precision): `METRIC_TERMS` substring matching false-positives on ordinary prose + +Found by dogfooding this review: `claimCheck` matches metric terms with +`lower.includes(t)`, so the two-character terms `'map'` and `'f1'` fire inside +ordinary words and labels — "source **map**s", "the **map**s can never +resolve", finding IDs like "**F1** (HIGH…)". MEASURED reproducer: running +`npx ruview claim-check --file` over this ADR and ADR-264 yields 4 and 16 +medium findings respectively, the majority of which are `map`/`F1` +false positives on lines carrying no accuracy claim. A guardrail that cries +wolf trains people to ignore it — precision is part of its fail-closed +contract. Short/ambiguous terms need word-boundary matching (`\bmap\b`, +`\bf1\b`, likewise `auc`, `iou`), and section-heading label patterns +(`F\d+`, `O\d+`) should not count as metric mentions. + +## Decision + +Adopt the following optimization strategy, in priority order. Each item is +independently shippable; F-numbers map to findings. + +- **O1 (F1):** `claim-check` with no `--text`/`--file` (or empty text after read) + exits 2 with a usage error. Add a regression test pinning exit ≠ 0. +- **O2 (F2):** make the MCP dispatch async: convert `run()`/`which()` to + promise-based `spawn`, make `tools/call` handlers `async`, and keep reading + stdin while calls run (respond to `ping`/`tools/list` concurrently; serialize + only same-tool hardware operations). Acceptance: `ping` round-trips < 50 ms + while a synthetic 30 s `calibrate` is in flight. +- **O3 (F3):** drop the two `optionalDependencies`; `doctor`/`install` already + degrade and should print the exact `npm i @metaharness/kernel + @metaharness/host-claude-code` hint on the miss path. Acceptance: cold + `npm i @ruvnet/ruview` installs exactly 1 package (MEASURED baseline above). +- **O4 (F4):** set `maxBuffer: 16 * 1024 * 1024` in `run()` (or stream + tail). +- **O5 (F5):** pass `port` to the monitor script via `sys.argv` + (`python -c script -- `), never by source interpolation. +- **O6 (F6):** read the MCP `serverInfo.version` from `package.json` once at + startup (same pattern the CLI already uses). +- **O7 (F7):** make `skills/*.md` the single source and generate + `.claude/skills/*/SKILL.md` in a `prepack` script (or vice versa); manifest + hashes then pin one canonical set. +- **O8 (F8, F9):** memoize `which()`; add underscore aliases for the dot-named + tools (accept both in `tools/call`, advertise the underscore form) and add + empty `resources/list` / `prompts/list` stubs. +- **O9 (F11):** switch `METRIC_TERMS` matching to word-boundary regexes for + short terms (`map`, `f1`, `auc`, `iou`) and skip label tokens matching + `\b[FO]\d+\b`. Acceptance: `claim-check --file` over ADR-263/264/265 reports + only the genuinely tagged-or-taggable percentage lines, and the existing 17 + guardrail tests still pass plus new false-positive pins ("source maps", + "F1 (HIGH)" → no finding). + +Non-goals: no new runtime dependencies (the zero-dep MCP server is a feature, +not an accident — keep it), no build step, no change to the fail-closed tool +contracts. + +## Consequences + +- The honesty guardrail becomes fail-closed end-to-end (its current empty-input + pass is the exact failure mode the guardrail exists to prevent). +- `npx` cold start drops ~450 kB / 3 packages (MEASURED baseline in F3) with no + feature loss; `doctor` output already communicates the optional-dep story. +- Long-running `verify`/`calibrate` no longer starve the MCP channel — the + harness survives host health checks during real calibration runs. +- Two-copy skill drift becomes impossible at pack time. +- Costs: async conversion touches every handler signature in `src/tools.js` + (mechanical, ~6 handlers); alias tools add a small compatibility table. +- Verification for the implementing PR: bundled tests extended for O1/O2/O5 + (target ≥ 20 tests), `npm pack --dry-run` file-count asserted, and the F3 + install measurement re-run and quoted MEASURED in the PR body — which must + itself pass `npx ruview claim-check`. diff --git a/docs/adr/ADR-264-rvagent-mcp-and-cli-npm-deep-review.md b/docs/adr/ADR-264-rvagent-mcp-and-cli-npm-deep-review.md new file mode 100644 index 00000000..f29eaf94 --- /dev/null +++ b/docs/adr/ADR-264-rvagent-mcp-and-cli-npm-deep-review.md @@ -0,0 +1,169 @@ +# ADR-264: `@ruvnet/rvagent` MCP Server + `@ruv/ruview-cli` — Deep Review + Optimization Strategy + +| Field | Value | +|-------|-------| +| **Status** | Accepted — **implemented** (O1–O9, `@ruvnet/rvagent@0.2.0`): `exports` fixed (types-first, no phantom `.cjs`), map-free tarball (127,704 B unpacked / 46 files / 0 maps — MEASURED, `npm pack --dry-run`, from 188 kB), Streamable HTTP **wired** behind `RVAGENT_HTTP_PORT` with per-session transports + 1 MiB body cap + port-aware origin gate, underscore tool names with dotted router aliases, single Zod validation gate with generated JSON Schemas, fd-leak fixed + persisted job records + bounded log tails, probing `detectCogBinary`, package.json-sourced version, `ruview-cli` bin renamed. 99/99 jest tests (MEASURED); both transports smoke-tested live | +| **Date** | 2026-07-02 | +| **Deciders** | ruv | +| **Codename** | **RUVIEW-NPM-REVIEW-2** | +| **Supersedes / amends** | none (reviews the ADR-104/ADR-124 artifacts; feeds ADR-265 distribution strategy) | + +## Context + +Two TypeScript npm packages expose RuView sensing to agents and shells: + +- **`@ruvnet/rvagent@0.1.0`** (`tools/ruview-mcp/`) — SENSE-BRIDGE, the MCP + server over the sensing-server HTTP API + cog binaries: 12 tools + (csi/pose/count/registry/train/job + ADR-124 BFLD/presence/vitals). Published + (188 kB unpacked — MEASURED, `npm view @ruvnet/rvagent`). Deps: + `@modelcontextprotocol/sdk` + `zod`. +- **`@ruv/ruview-cli@0.0.1`** (`tools/ruview-cli/`) — `private: true` yargs CLI + mirroring the same capabilities; intentionally duplicates `http.ts`/`cog.ts`/ + `config.ts` (~150 lines) to stay standalone. + +This ADR records a deep review of both: packaging correctness (verified against +the **published** tarball, not just the source tree), protocol/interop, resource +lifecycle, and the honesty of the package's own self-description — the same +MEASURED-vs-CLAIMED bar the project applies to accuracy numbers. + +## Findings + +### F1 (HIGH, broken export): `require` condition points at a file that does not exist + +`package.json` `exports["."].require = "./dist/index.cjs"`, but the build is +plain `tsc` (ESM only) and **the published 0.1.0 tarball contains no +`index.cjs`** (verified by listing the registry tarball). Any CJS consumer doing +`require('@ruvnet/rvagent')` resolves to a nonexistent file → +`ERR_MODULE_NOT_FOUND`. Additionally the `types` condition is listed **after** +`import`/`require`; TypeScript requires `types` first or it may be ignored under +`moduleResolution: bundler/node16`. + +### F2 (MEDIUM, tarball bloat): a third of the published package is dead source maps + +The 0.1.0 tarball ships **44 `.map` files = 62,698 B** against 78,209 B of +actual `.js` (MEASURED, extracted registry tarball). `src/` is not published, so +every `sourceMappingURL` points at `../src/*.ts` that consumers do not have — +the maps can never resolve. Also `files` lists `CHANGELOG.md`, which does not +exist in `tools/ruview-mcp/` (npm silently skips it), so the advertised file set +is partly fictional. + +### F3 (MEDIUM, honesty): the package description claims a transport it does not start + +The description reads "**dual-transport MCP server (stdio + Streamable HTTP)**", +but `main()` in `src/index.ts` wires **stdio only**. `http-transport.ts` is a +complete, tested scaffold that nothing imports at runtime — there is no flag, +env var, or subcommand that starts it. By this project's own rule this is a +CLAIMED capability presented as shipped. Either wire it (`--http` / +`RVAGENT_HTTP_PORT` gate) or de-claim the description until it is. + +### F4 (MEDIUM, interop + inconsistency): two tool-naming conventions, one of them dot-based + +Six tools use `ruview_snake_case`; six (ADR-124 additions) use +`ruview.dotted.names`. Same interop caveat as ADR-263 F9 (host tool-name +regexes commonly `^[a-zA-Z0-9_-]{1,64}$`), plus the split convention makes the +tool surface look like two products. Standardize on underscores and accept the +dotted forms as aliases for one deprecation cycle. + +### F5 (MEDIUM, double work + drift): every tool input is validated twice from two hand-maintained schemas + +`CallToolRequestSchema` handler runs `TOOL_INPUT_SCHEMAS[name].safeParse(args)`, +then each tool handler runs its own `schema.parse(args)` again — two full Zod +passes per call. Separately, the `inputSchema` JSON advertised via `tools/list` +is **hand-written** and duplicates the Zod schema field-by-field (defaults, +min/max, descriptions) — schema drift between what is advertised and what is +enforced is a matter of time. Parse once at the gate, pass the typed result to +handlers, and generate the advertised JSON Schema from the Zod source +(`zod-to-json-schema` at build time, or Zod 4's native `z.toJSONSchema` when the +SDK's peer range allows). + +### F6 (MEDIUM, resource lifecycle): `train_count` leaks 2 fds per job; job registry is process-local + +`trainCount` opens `logFdOut`/`logFdErr` with `openSync` and never closes them +in the parent — the spawned cargo child inherits duplicates, but the parent's +descriptors stay open for the MCP server's lifetime: 2 leaked fds per training +job. `jobRegistry` is an in-memory `Map`, so `ruview_job_status` after a server +restart reports "not found" for a training run that is still burning GPU (the +source comments acknowledge this; the fix — persist `~/.ruview/jobs/.json`, +already the documented layout — is small). Also `jobStatus` re-`import`s +`node:fs` on every poll and reads the entire log to return 20 lines. + +### F7 (MEDIUM, security/robustness of the HTTP scaffold): unbounded body + one shared session transport + +`http-transport.ts` buffers the request body with no size cap (memory DoS the +moment it is wired to a socket), reuses a **single** +`StreamableHTTPServerTransport` with `sessionIdGenerator` for all clients (the +SDK's stateful mode expects one transport per session — a second client's +`initialize` collides), and the Origin allowlist is exact-match +(`http://localhost` will not match a real browser origin `http://localhost:5173`). +Must be fixed **before** F3 wires it in; bearer-token + 127.0.0.1 defaults are +already right. + +### F8 (LOW, dead/misleading code): `detectCogBinary` always returns the bare name + +It builds a 4-candidate appliance-path array and then returns +`candidates[candidates.length - 1]` — i.e. always `name` — without checking +existence. The candidates are dead weight that reads as if path detection +happens. Either probe with `existsSync` or delete the array. + +### F9 (LOW, drift + hygiene): hardcoded versions, unused/mismatched devDeps, bin-name collision + +`PACKAGE_VERSION = "0.1.0"` (index.ts) duplicates package.json; +`@types/express` is unused (`http-transport` uses `node:http`); `@types/jest@30` +against `jest@29`; `ruview-cli` hardcodes `.version("0.0.1")`. And +`@ruv/ruview-cli` claims the **`ruview`** bin name, which collides with +`@ruvnet/ruview`'s bin (ADR-182) if both are ever installed globally — +ADR-263/265 give the `ruview` name to the harness; the CLI must rename or fold. + +## Decision + +- **O1 (F1):** fix `exports`: drop the `require` condition (ESM-only is fine for + a bin-first package) or add a real CJS build; put `types` first. Add a CI + smoke test that does `npm pack` + `node -e "import('')"`. +- **O2 (F2):** publish without maps: `declarationMap: false`, `sourceMap: false` + in a `tsconfig.build.json` used by `prepack` (or add `!dist/**/*.map` to + `files`). Remove the phantom `CHANGELOG.md` entry or create the file. + Acceptance: unpacked size ≤ ~125 kB (from 188 kB — MEASURED, `npm pack --dry-run`). +- **O3 (F3, F7):** wire the HTTP transport behind an explicit opt-in + (`RVAGENT_HTTP_PORT` or `--http`), after F7 fixes: per-session transport map + keyed by `mcp-session-id`, 1 MiB body cap, origin matching that honors ports + (compare `URL.origin` prefixes or document exact origins). Until then, change + the description to "stdio MCP server (Streamable HTTP scaffold, unwired)". +- **O4 (F4):** rename dotted tools to underscore (`ruview_bfld_last_scan`, …), + keep dotted aliases in the call router for one release, note it in the README. +- **O5 (F5):** single validation gate: the registry maps name → Zod schema → + typed handler; advertised `inputSchema` generated from Zod at build time. +- **O6 (F6):** close parent fds after spawn (`closeSync` post-`spawn` — the + child holds its own copies), persist job records to + `/.json`, and read log tails with a bounded read. +- **O7 (F8):** make `detectCogBinary` actually probe (`existsSync` over the + candidates) — it is the entire reason the function exists. +- **O8 (F9):** single-source versions from package.json; drop `@types/express`; + align `@types/jest` with jest 29 (or move to `node:test` like the harness and + drop the jest toolchain entirely — it is the heaviest devDep in both + packages). +- **O9 (F9, scope):** fold `@ruv/ruview-cli` into `rvagent` as a second bin + (`rvagent-cli`) sharing `http/cog/config`, or keep it private-forever and say + so in its README. Its `ruview` bin name is surrendered to `@ruvnet/ruview` + either way. + +## Consequences + +- CJS consumers stop hitting a guaranteed-broken export path (F1 is the only + finding that fails for every consumer of that entry point deterministically). +- The published artifact shrinks ~33% (MEASURED, F2 tarball listing: 62,698 B + of maps in a 188 kB unpacked payload) and stops advertising files/transports + it does not contain — the package description itself passes the project's + claim-check bar. +- One schema source ends advertised-vs-enforced drift and halves per-call + validation cost; naming unification makes the 12-tool surface read as one + product and survive strict host tool-name validation. +- Long-lived MCP servers stop accumulating fds during training campaigns, and + job polling survives restarts. +- Costs: the alias cycle (O4) briefly doubles the advertised tool count unless + aliases are router-only (recommended: router-only, advertise underscore names + exclusively); folding the CLI (O9) retires a package name already in use in + scripts, so it needs a deprecation note. +- Verification for the implementing PR: `npm pack --dry-run` asserted file list + (no `.map`, no phantom entries), pack-size budget in CI (ADR-265), jest/`node + --test` suite green, and a tarball-install smoke test for both `import` and + the `rvagent` bin. diff --git a/docs/adr/ADR-265-ruview-npm-distribution-strategy.md b/docs/adr/ADR-265-ruview-npm-distribution-strategy.md new file mode 100644 index 00000000..945626be --- /dev/null +++ b/docs/adr/ADR-265-ruview-npm-distribution-strategy.md @@ -0,0 +1,124 @@ +# ADR-265: RuView npm Distribution Strategy — CI Gate, Provenance, Version Single-Sourcing, Namespace + +| Field | Value | +|-------|-------| +| **Status** | Accepted — **D1–D4 implemented**: `.github/workflows/npm-packages.yml` (matrix gate: tests, version-literal grep, pack-content/size gate, tarball-install smoke test, README claim-check), `.github/workflows/ruview-npm-release.yml` (publish-from-CI with `npm publish --provenance`), version single-sourcing (all three packages read package.json), `ruview` bin owned by `@ruvnet/ruview` (`@ruv/ruview-cli` bin renamed `ruview-cli`), `ci.yml` NODE_VERSION 18→20. D5 (no workspace) stands as recorded | +| **Date** | 2026-07-02 | +| **Deciders** | ruv | +| **Codename** | **RUVIEW-NPM-DIST** | +| **Supersedes / amends** | none (cross-cutting layer above ADR-263 and ADR-264; complements ADR-182 P3/P4) | + +## Context + +The monorepo now ships (or stages) **three Node packages** with no shared +distribution engineering: + +| Package | Dir | Published | Bin(s) | Tests in CI | +|---------|-----|-----------|--------|-------------| +| `@ruvnet/ruview` | `harness/ruview/` | 0.1.0 (live) | `ruview` | **none** | +| `@ruvnet/rvagent` | `tools/ruview-mcp/` | 0.1.0 (live) | `rvagent`, `ruview-mcp` | **none** | +| `@ruv/ruview-cli` | `tools/ruview-cli/` | private | `ruview` (collides) | **none** | + +Cross-cutting facts established during the ADR-263/264 reviews: + +- **Zero CI coverage.** No workflow under `.github/workflows/` references any of + the three directories. Two of the packages are *live on the registry* and were + published from a laptop state CI never saw. Meanwhile the Rust side has a + 1,031+-test gate and a witness-bundle culture (ADR-028) — the npm surface is + the only shipped artifact class with no verification gate at all. +- **`ci.yml` pins `NODE_VERSION: '18'`** while all three packages declare + `engines.node >= 20`. +- **Version triplication.** Each package hardcodes its version in source at + least once beyond package.json (harness `SERVER_INFO`, rvagent + `PACKAGE_VERSION`, cli `.version("0.0.1")`). +- **Bin-name collision.** Two packages claim the `ruview` bin. +- **No provenance.** Neither published package carries npm provenance + attestations, in a project whose differentiator is signed, reproducible + evidence (ADR-028 witness bundles, ADR-182 P4 ed25519/SLSA design). +- **No pack-content gate.** ADR-264 F1/F2 (broken `require` target, 33% dead map weight — MEASURED, tarball listing — and a phantom + `CHANGELOG.md` in `files`) are exactly the defect class an + `npm pack --dry-run` assertion catches in seconds. + +## Decision + +Adopt one distribution layer for all Node packages. Per-package code fixes live +in ADR-263/264; this ADR fixes the machinery around them. + +### D1 — One `npm-packages.yml` CI workflow (the gate) + +Matrix over `[harness/ruview, tools/ruview-mcp, tools/ruview-cli]` × +Node `[20, 22]`: + +1. `npm ci` where a lockfile is committed (the TS packages); the harness + installs with `npm install` — repo policy gitignores lockfiles under + `harness/`, and the package is dependency-free after ADR-263 O3 so there is + nothing to pin. +2. `npm test` (harness: `node --test test/*.test.mjs` — pin the glob form, + the directory form fails on Node 22; TS packages: build + jest or `node:test` + per ADR-264 O8). +3. **Pack gate:** `npm pack --dry-run --json` asserted against a checked-in + expected file list + a max unpacked-size budget per package (harness ≤ 60 kB; + rvagent ≤ 130 kB post ADR-264 O2). Any new/missing/renamed shipped file is a + reviewed diff, not a surprise. +4. **Tarball smoke test:** install the packed tarball into a temp dir; run + `ruview --version`, `ruview doctor`, `rvagent` `--help`-equivalent, and a + Node `import()` of each declared export condition — this is the test that + would have caught ADR-264 F1 (`require` → nonexistent `dist/index.cjs`). +5. Bump `ci.yml` `NODE_VERSION` to `'20'` (independent of the matrix above). + +### D2 — Publish only from CI, with provenance + +Manual `npm publish` from laptops stops. A tag-triggered workflow +(`ruview-npm-release.yml`, mirroring the firmware release discipline) runs the +D1 gate, then `npm publish --provenance --access public` under the GitHub OIDC +token. Consequence: every published version is attested to a public commit + +workflow run — the npm-side analogue of the ADR-028 witness bundle. The +`prepublishOnly` script in each package runs the pack gate locally as a +belt-and-braces (publishing outside CI fails loudly, not silently). + +### D3 — Version single-sourcing + +Rule: **package.json is the only place a version string lives.** Runtime code +reads it (`createRequire(import.meta.url)('./package.json').version` or a +build-time define for the TS packages). CI greps for `\d+\.\d+\.\d+` literals in +`src/` of each package and fails on match (allowlist: test fixtures). This +retires ADR-263 F6 and ADR-264 F9 permanently instead of per-incident. + +### D4 — Namespace and bin ownership + +- `@ruvnet/ruview` **owns the `ruview` bin** (it is the published front door, + ADR-182). `@ruv/ruview-cli` renames its bin or folds into `rvagent` + (ADR-264 O9) — decided here so neither package ADR relitigates it. +- New Node packages in this repo use the `@ruvnet/` scope (the `@ruv/` scope + holds `rvcsi` legacies; do not grow it). +- Every package README + description must pass + `npx ruview claim-check` — enforced in the D1 gate. The guardrail package + linting its sibling packages' claims is the cheapest dogfooding we have + (ADR-264 F3 is the standing example of why). + +### D5 — Shared-code policy (bounded) + +Do **not** introduce an npm workspace or a shared runtime package yet: three +packages, two of which may merge (ADR-264 O9), do not justify workspace +machinery, and the harness's zero-dep property is load-bearing. Revisit if a +fourth package appears or if the `http/cog/config` duplication survives the +ADR-264 O9 fold. Record the duplication as intentional in each file header (the +CLI already does this). + +## Consequences + +- The npm artifacts get the same class of gate the Rust workspace has had since + ADR-028: no publish without tests, no shipped file set without an asserted + manifest, no version without provenance. The two defects that reached the + registry (broken `require` condition, dead maps) become CI-impossible. +- Cold-path costs stay near zero: the D1 matrix is 6 fast jobs (the harness + suite runs in ~108 ms MEASURED; TS builds dominate at a few tens of seconds). +- Publishing gains one constraint (must go through CI) and loses one failure + mode (laptop-state publishes) — the right trade for a project whose brand is + reproducible evidence. +- D3's grep gate is blunt but cheap; if it over-fires, scope it to + `version`-adjacent identifiers before weakening it. +- Follow-ups tracked elsewhere: per-package code fixes (ADR-263 O1–O8, ADR-264 + O1–O9); ADR-182 P4 (metaharness router + ed25519 provenance chain) remains + the deeper provenance story that D2's npm attestations complement, not + replace. diff --git a/docs/adr/README.md b/docs/adr/README.md index 4826fef0..94c0f218 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -1,6 +1,6 @@ # Architecture Decision Records -This folder contains 45 Architecture Decision Records (ADRs) that document every significant technical choice in the RuView / WiFi-DensePose project. +This folder contains 182 Architecture Decision Records (ADRs) that document every significant technical choice in the RuView / WiFi-DensePose project. (The index tables below list a curated subset per domain; see the directory listing for the full set.) ## Why ADRs? @@ -120,6 +120,9 @@ Statuses: **Proposed** (under discussion), **Accepted** (approved and/or impleme | [ADR-097](ADR-097-adopt-rvcsi-as-ruview-csi-runtime.md) | Adopt rvCSI as RuView's primary CSI runtime (phased adoption) | Proposed | | [ADR-098](ADR-098-evaluate-midstream-fit.md) | Evaluate `ruvnet/midstream` for RuView's CSI / WebSocket / mesh pipeline | Rejected | | [ADR-099](ADR-099-midstream-introspection-tap.md) | Adopt midstream as RuView's real-time introspection + low-latency tap | Proposed | +| [ADR-263](ADR-263-ruview-npm-harness-deep-review.md) | `@ruvnet/ruview` npm harness — deep review + optimization strategy | Proposed | +| [ADR-264](ADR-264-rvagent-mcp-and-cli-npm-deep-review.md) | `@ruvnet/rvagent` MCP server + `@ruv/ruview-cli` — deep review + optimization strategy | Proposed | +| [ADR-265](ADR-265-ruview-npm-distribution-strategy.md) | RuView npm distribution strategy — CI gate, provenance, version single-sourcing, namespace | Proposed | --- diff --git a/harness/ruview/.claude/skills/calibrate-room/SKILL.md b/harness/ruview/.claude/skills/calibrate-room/SKILL.md index c369974b..da10bd14 100644 --- a/harness/ruview/.claude/skills/calibrate-room/SKILL.md +++ b/harness/ruview/.claude/skills/calibrate-room/SKILL.md @@ -6,24 +6,24 @@ description: Run the ADR-151 per-room calibration pipeline — baseline → enro # calibrate-room Turn a provisioned node + sensing-server into a working room model. Pure-Rust, -edge-deployable (ADR-151). Use the `ruview.calibrate` tool (installed +edge-deployable (ADR-151). Use the `ruview_calibrate` tool (installed `wifi-densepose` binary, else `cargo run -p wifi-densepose-cli`). ## Sequence 1. **baseline** — capture the empty room (Welford amplitude + von Mises phase). Leave the room empty. - `ruview.calibrate {step: "baseline"}` + `ruview_calibrate {step: "baseline"}` 2. **enroll** — record the occupant(s) doing the target activities. - `ruview.calibrate {step: "enroll"}` + `ruview_calibrate {step: "enroll"}` 3. **train-room** — train the bank of small specialists from baseline + enrollment. - `ruview.calibrate {step: "train-room"}` + `ruview_calibrate {step: "train-room"}` 4. **room-watch** — live presence/posture/breathing from the trained room. - `ruview.calibrate {step: "room-watch"}` (or the `room-watch` skill) + `ruview_calibrate {step: "room-watch"}` (or the `room-watch` skill) ## Honesty The specialists are calibrated to *this* room; cross-room transfer is a separate problem (LoRA recalibration, ADR-079 P9). Report which room a number came from, and tag presence/vitals accuracy MEASURED only with a held-out check — run -`ruview.claim_check` on the writeup. +`ruview_claim_check` on the writeup. diff --git a/harness/ruview/.claude/skills/onboard/SKILL.md b/harness/ruview/.claude/skills/onboard/SKILL.md index dd248099..b0fb0596 100644 --- a/harness/ruview/.claude/skills/onboard/SKILL.md +++ b/harness/ruview/.claude/skills/onboard/SKILL.md @@ -8,12 +8,12 @@ description: Zero-to-sensing path picker for RuView (WiFi-DensePose) — pick do Get a newcomer from nothing to a working RuView setup. **First fact to set:** WiFi sensing infers *coarse* pose/presence/breathing from Channel State Information — it is **not a camera**, and any accuracy number must be MEASURED against a baseline -(use the `verify` skill / `ruview.claim_check` tool). Never present WiFi output as +(use the `verify` skill / `ruview_claim_check` tool). Never present WiFi output as camera-grade. ## Pick a path -Run `ruview.onboard {path}` or decide from: +Run `ruview_onboard {path}` or decide from: 1. **docker-demo** — fastest, no hardware. Replays sample CSI into the dashboard. `docker run -p 8000:8000 ruvnet/wifi-densepose` → open `http://localhost:8000`. @@ -26,5 +26,5 @@ Run `ruview.onboard {path}` or decide from: ## Then - Live sensing → go to **provision-node**, then **calibrate-room**. -- Evaluating a model/claim → go to **verify** and run `ruview.claim_check` on any +- Evaluating a model/claim → go to **verify** and run `ruview_claim_check` on any report before you quote a number. diff --git a/harness/ruview/.claude/skills/provision-node/SKILL.md b/harness/ruview/.claude/skills/provision-node/SKILL.md index 315bef39..4da128b0 100644 --- a/harness/ruview/.claude/skills/provision-node/SKILL.md +++ b/harness/ruview/.claude/skills/provision-node/SKILL.md @@ -28,7 +28,7 @@ esptool --chip esp32s3 -p -b 460800 write_flash \ 0xf000 ota_data_initial.bin 0x20000 esp32-csi-node-s3-8mb.bin ``` -(`ruview.node_flash` returns the exact pinned command rather than running an +(`ruview_node_flash` returns the exact pinned command rather than running an unattended flash.) ## 3. Provision @@ -44,6 +44,6 @@ Never echo or commit the WiFi password. ## 4. Confirm CSI is flowing -`ruview.node_monitor {port}` — PASS criteria: serial shows `CSI cb #...` callbacks and +`ruview_node_monitor {port}` — PASS criteria: serial shows `CSI cb #...` callbacks and (on a bare board) `CSI filter upgraded to MGMT+DATA`. No callbacks → the node isn't capturing; do not proceed to calibration. diff --git a/harness/ruview/.claude/skills/train-pose/SKILL.md b/harness/ruview/.claude/skills/train-pose/SKILL.md index 61d2f1f6..f393db39 100644 --- a/harness/ruview/.claude/skills/train-pose/SKILL.md +++ b/harness/ruview/.claude/skills/train-pose/SKILL.md @@ -29,5 +29,5 @@ or temporal leakage. Example honest result (ADR-181): 1. Run the mean-pose baseline on the same split. 2. Report `(model − baseline)` in pp, with the split definition (chronological / blocked-gap / grouped-bucket; no leakage). -3. `ruview.claim_check` the writeup — it flags any untagged or 100%/perfect claim. +3. `ruview_claim_check` the writeup — it flags any untagged or 100%/perfect claim. 4. If it's a benchmark vs SOTA, tag MEASURED-EQUIVALENT only with the reproducer. diff --git a/harness/ruview/.claude/skills/verify/SKILL.md b/harness/ruview/.claude/skills/verify/SKILL.md index 9c3f6c03..1d7a77b6 100644 --- a/harness/ruview/.claude/skills/verify/SKILL.md +++ b/harness/ruview/.claude/skills/verify/SKILL.md @@ -9,7 +9,7 @@ The "prove everything" skill. Nothing ships as validated without this. ## Deterministic proof (Trust Kill Switch) -`ruview.verify` runs `archive/v1/data/proof/verify.py`: it feeds a reference signal +`ruview_verify` runs `archive/v1/data/proof/verify.py`: it feeds a reference signal through the production pipeline and hashes the output against `expected_features.sha256`. Must print **VERDICT: PASS**. If numpy/scipy changed the hash, regenerate with `verify.py --generate-hash` then re-verify. @@ -28,7 +28,7 @@ crate versions — a recipient can re-verify with one command. ## Claim honesty -Run `ruview.claim_check {text}` on any report, README section, PR body, or model card +Run `ruview_claim_check {text}` on any report, README section, PR body, or model card before quoting accuracy. It flags: - untagged accuracy numbers (must be MEASURED / CLAIMED / SYNTHETIC), - MEASURED claims with no reproducer cited, diff --git a/harness/ruview/.harness/manifest.json b/harness/ruview/.harness/manifest.json index 5d358852..eb0d85a5 100644 --- a/harness/ruview/.harness/manifest.json +++ b/harness/ruview/.harness/manifest.json @@ -13,27 +13,27 @@ ], "files": { ".claude/settings.json": "b0ea971383716f18b89db73010b8f0ea0f1b16bdec4cd1068245772ba1c27bdd", - ".claude/skills/calibrate-room/SKILL.md": "6a6c8211a7109feb76620c618963c10ad9a9f633ffce7676e631a80a1181986d", - ".claude/skills/onboard/SKILL.md": "22323732fe746b38b77a7c8c052e952dff2fe87ae939ba125379125827385f21", - ".claude/skills/provision-node/SKILL.md": "5ffe5a75873e873b80758d9c81005774d4191317227f2e9aa4345cbce3f29751", - ".claude/skills/train-pose/SKILL.md": "b3ee95bfb0b678eb3d101138b9ea0e7cab3db3a9906d19c4059f9cca0598e87b", - ".claude/skills/verify/SKILL.md": "c0314d5ead465d9089b6a4917fd125051a5be20dc07ba92d5b601fcaada32e19", - "CLAUDE.md": "7ecdb2b9d9abcf4aa22dd3ce553b60216a135e147893a59fa944fc1a8c81f5ef", + ".claude/skills/calibrate-room/SKILL.md": "4b29c7c331f47acad3c0f51b3d3d8f5b5573e316e081bae71dbe21a47fa95240", + ".claude/skills/onboard/SKILL.md": "97ee71f0aa985cfc03bb8e764789bb55c4f9fd5dae10a116c1071eab85b5893f", + ".claude/skills/provision-node/SKILL.md": "5f73823794ed5f0b25c102aa8b1bf2dd534a1ec468173d8330c2af0ca24f239c", + ".claude/skills/train-pose/SKILL.md": "92aebd4423470eb10eabaee642ec3493284d98b7ae9785e0f34378c709746e65", + ".claude/skills/verify/SKILL.md": "2d38d240e9810a7827e2ebd3717dc0f85c646cc92e46c3812fe77c5b9eb40b76", + "CLAUDE.md": "1d7af0c310dd8093b4ae6c9c94a1c0cc9ff02ac9c8d5b45caba5363c3af99475", "LICENSE": "631f94984f626818d42ecf717aa6e8e0afd4f9f355ca706bd2effafbd1416d06", - "README.md": "b77d30428de8efb6758f2ca3eb22e84849013b2c0e6c601d488d2ea5a6f0da44", - "bin/cli.js": "b0d74690cff4329dfe342271fc475eaa140b767bdb66b37cf4992ad209012fe8", - "package.json": "2af49561ef0d59cafc4b99885816e580635b2d2ad329dfe17c69b9df6f8afceb", - "skills/calibrate-room.md": "6a6c8211a7109feb76620c618963c10ad9a9f633ffce7676e631a80a1181986d", - "skills/onboard.md": "22323732fe746b38b77a7c8c052e952dff2fe87ae939ba125379125827385f21", - "skills/provision-node.md": "5ffe5a75873e873b80758d9c81005774d4191317227f2e9aa4345cbce3f29751", - "skills/train-pose.md": "b3ee95bfb0b678eb3d101138b9ea0e7cab3db3a9906d19c4059f9cca0598e87b", - "skills/verify.md": "c0314d5ead465d9089b6a4917fd125051a5be20dc07ba92d5b601fcaada32e19", - "src/guardrails.js": "1631cea02c4354fe6126c576300faf5f8b68ae2f5e2e3a658c99eb25a7403e55", - "src/mcp-server.js": "e51379f5ebb0b7b4670c7412714e559931ef1be8df20551f8f7309b53f0fb7af", - "src/tools.js": "b558f61bb202abf5a967ce3a6ccaea351f2d186238cf49c7fc151d1de028eee8" + "README.md": "ac35157d66243a5f9eba262bdf2d593e978d935b3dde6e455b7acf650768eac6", + "bin/cli.js": "85d8394375edb1e967418451452e68bdbe26e69fc6877ed4936894f6101e1a12", + "package.json": "4509b68bb4211217f1e9f3f95f3134b326ee23a2322aef8d19b99a4b1d415b08", + "skills/calibrate-room.md": "4b29c7c331f47acad3c0f51b3d3d8f5b5573e316e081bae71dbe21a47fa95240", + "skills/onboard.md": "97ee71f0aa985cfc03bb8e764789bb55c4f9fd5dae10a116c1071eab85b5893f", + "skills/provision-node.md": "5f73823794ed5f0b25c102aa8b1bf2dd534a1ec468173d8330c2af0ca24f239c", + "skills/train-pose.md": "92aebd4423470eb10eabaee642ec3493284d98b7ae9785e0f34378c709746e65", + "skills/verify.md": "2d38d240e9810a7827e2ebd3717dc0f85c646cc92e46c3812fe77c5b9eb40b76", + "src/guardrails.js": "66407b00d31c4f7939b75ee3e29598855c36a4154ccf1436655a4e52b0d7c034", + "src/mcp-server.js": "ad0f21be65a37237b9c2aad69e6e75166e5f101d902cb986377043545a7a80fb", + "src/tools.js": "1d72377ae53ad2b0c6dc03eb66f584422d8a60e442cb0d4f08355590f3edf031" }, "meta": { "surface": "cli+mcp", "adr": "ADR-182" } -} \ No newline at end of file +} diff --git a/harness/ruview/.harness/manifest.sha256 b/harness/ruview/.harness/manifest.sha256 index f68a09f7..4b3d3105 100644 --- a/harness/ruview/.harness/manifest.sha256 +++ b/harness/ruview/.harness/manifest.sha256 @@ -1 +1 @@ -6c6c1431c37472494c9b309c8b5d761dd4fc41e30313baead6320831fb982e57 manifest.json +380d4bf928fd7c5fa753d11a30c1e24e2ea471caca57b439f765a9d864cef472 manifest.json diff --git a/harness/ruview/CLAUDE.md b/harness/ruview/CLAUDE.md index 4d55629f..f6872a25 100644 --- a/harness/ruview/CLAUDE.md +++ b/harness/ruview/CLAUDE.md @@ -10,15 +10,15 @@ accuracy number: 1. It must be tagged **MEASURED** (with a reproducer named), **CLAIMED**, or **SYNTHETIC**. 2. Pose PCK is quoted only as a **delta over the mean-pose baseline** on a leakage-free held-out split. (A mean-pose predictor already scores ~50% PCK.) -3. Run `ruview.claim_check` on any report/PR/model-card. It flags untagged numbers and +3. Run `ruview_claim_check` on any report/PR/model-card. It flags untagged numbers and the retracted "100%/perfect accuracy" framing. 4. Firmware is "hardware-validated" only with a captured **boot log on real silicon** — never on a build-passes signal. ## Tools -`ruview.onboard`, `ruview.claim_check`, `ruview.verify`, `ruview.node_monitor`, -`ruview.calibrate`, `ruview.node_flash`. All fail-closed. Mutating/hardware tools +`ruview_onboard`, `ruview_claim_check`, `ruview_verify`, `ruview_node_monitor`, +`ruview_calibrate`, `ruview_node_flash`. All fail-closed. Mutating/hardware tools (`node_flash`) require explicit confirmation and are Windows/ESP-IDF gated. ## Skills diff --git a/harness/ruview/README.md b/harness/ruview/README.md index 17768682..c2941b28 100644 --- a/harness/ruview/README.md +++ b/harness/ruview/README.md @@ -7,34 +7,36 @@ crucially — **refuse to overstate accuracy**. Minted from the RuView monorepo WiFi sensing infers *coarse* pose/presence/breathing from Channel State Information. It is **not a camera**. Every accuracy number this harness emits must be MEASURED -against a baseline — that rule is enforced in code (`ruview.claim_check`). +against a baseline — that rule is enforced in code (`ruview_claim_check`). ## Quick start ```bash npx @ruvnet/ruview # onboard — pick a setup path -npx @ruvnet/ruview claim-check --text "we hit 100% accuracy" # the honesty guardrail +npx @ruvnet/ruview claim-check --file REPORT.md # the honesty guardrail (non-zero exit on untagged claims) npx @ruvnet/ruview verify # run the deterministic proof (VERDICT: PASS) npx @ruvnet/ruview doctor # self-check (tools + optional kernel/host) npx @ruvnet/ruview --help ``` -The operator tools are pure Node and run with **zero install weight**. The -`@metaharness/kernel` + host adapter are `optionalDependencies` — only `doctor` / -`install` use them, only if present. +The operator tools are pure Node and run with **zero install weight** — the +package has no dependencies at all (ADR-263 O3). `doctor` / `install` can +additionally use `@metaharness/kernel` + a host adapter if you install them +(`npm i @metaharness/kernel @metaharness/host-claude-code`); everything else +runs without them. -## Tools (`ruview.*`) +## Tools (`ruview_*`) Exposed both as CLI verbs and as an MCP server (`npx @ruvnet/ruview mcp start`): | Tool | What it does | |------|--------------| -| `ruview.onboard` | Pick docker-demo / repo-build / live-esp32; print the next command | -| `ruview.claim_check` | Lint text for untagged / overstated accuracy claims (guardrail) | -| `ruview.verify` | Run `verify.py` deterministic proof → VERDICT | -| `ruview.node_monitor` | Assert CSI is flowing on an ESP32 (read-only) | -| `ruview.calibrate` | ADR-151 room pipeline (baseline→enroll→train-room→room-watch) | -| `ruview.node_flash` | Build+flash firmware (Windows/ESP-IDF; mutating, guarded) | +| `ruview_onboard` | Pick docker-demo / repo-build / live-esp32; print the next command | +| `ruview_claim_check` | Lint text for untagged / overstated accuracy claims (guardrail) | +| `ruview_verify` | Run `verify.py` deterministic proof → VERDICT | +| `ruview_node_monitor` | Assert CSI is flowing on an ESP32 (read-only) | +| `ruview_calibrate` | ADR-151 room pipeline (baseline→enroll→train-room→room-watch) | +| `ruview_node_flash` | Build+flash firmware (Windows/ESP-IDF; mutating, guarded) | Every tool is **fail-closed**: missing repo / python / binary / port → an honest negative, never a fabricated success. diff --git a/harness/ruview/bin/cli.js b/harness/ruview/bin/cli.js index 5f3af268..9fc9658c 100644 --- a/harness/ruview/bin/cli.js +++ b/harness/ruview/bin/cli.js @@ -18,14 +18,14 @@ const NAME = 'ruview'; const ROOT = dirname(dirname(fileURLToPath(import.meta.url))); const SKILLS_DIR = join(ROOT, 'skills'); -// Map friendly CLI verbs → registry tool names. +// Map friendly CLI verbs → registry tool names (underscore-canonical, ADR-263). const VERB_TO_TOOL = { - onboard: 'ruview.onboard', - verify: 'ruview.verify', - 'claim-check': 'ruview.claim_check', - calibrate: 'ruview.calibrate', - monitor: 'ruview.node_monitor', - flash: 'ruview.node_flash', + onboard: 'ruview_onboard', + verify: 'ruview_verify', + 'claim-check': 'ruview_claim_check', + calibrate: 'ruview_calibrate', + monitor: 'ruview_node_monitor', + flash: 'ruview_node_flash', }; function pjson(o) { console.log(JSON.stringify(o, null, 2)); } @@ -112,13 +112,18 @@ export async function run(args) { const toolArgs = { ...flags }; if (cmd === 'claim-check') { if (flags.file) toolArgs.text = readFileSync(flags.file, 'utf8'); - const res = runTool('ruview.claim_check', toolArgs); + // Fail closed (ADR-263 O1): an honesty gate must never PASS on no input. + if (typeof toolArgs.text !== 'string' || toolArgs.text.trim().length === 0) { + console.error('claim-check: no input — pass --text "..." or --file (empty input is an error, not a PASS).'); + return 2; + } + const res = await runTool('ruview_claim_check', toolArgs); pjson(res); return res.ok ? 0 : 1; } if (cmd === 'monitor' && flags.seconds) toolArgs.seconds = Number(flags.seconds); if (cmd === 'calibrate' && typeof flags.args === 'string') toolArgs.args = flags.args.split(','); - const res = runTool(VERB_TO_TOOL[cmd], toolArgs); + const res = await runTool(VERB_TO_TOOL[cmd], toolArgs); pjson(res); return res.ok ? 0 : 1; } diff --git a/harness/ruview/package.json b/harness/ruview/package.json index a0e5869f..54327b45 100644 --- a/harness/ruview/package.json +++ b/harness/ruview/package.json @@ -1,6 +1,6 @@ { "name": "@ruvnet/ruview", - "version": "0.1.0", + "version": "0.2.0", "description": "RuView WiFi-sensing operator agent harness — onboard, calibrate, train, and verify camera-free WiFi-CSI sensing, with the project's MEASURED-vs-CLAIMED honesty guardrail enforced. Minted via metaharness (ADR-182).", "type": "module", "bin": { @@ -23,11 +23,10 @@ "scripts": { "test": "node --test test/*.test.mjs", "doctor": "node ./bin/cli.js doctor", - "mcp": "node ./bin/cli.js mcp start" - }, - "optionalDependencies": { - "@metaharness/kernel": "^0.1.0", - "@metaharness/host-claude-code": "^0.1.0" + "mcp": "node ./bin/cli.js mcp start", + "sync-skills": "node ./scripts/sync-skills.mjs", + "prepack": "node ./scripts/sync-skills.mjs", + "prepublishOnly": "npm test" }, "keywords": [ "wifi-sensing", diff --git a/harness/ruview/scripts/sync-skills.mjs b/harness/ruview/scripts/sync-skills.mjs new file mode 100644 index 00000000..936e1313 --- /dev/null +++ b/harness/ruview/scripts/sync-skills.mjs @@ -0,0 +1,37 @@ +#!/usr/bin/env node +// SPDX-License-Identifier: MIT +// ADR-263 O7: skills/*.md is the single source of truth; the host-projected +// copies (.claude/skills//SKILL.md) are GENERATED here at pack time. +// Run with --check to verify without writing (used by tests/CI). + +import { readdirSync, readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const ROOT = dirname(dirname(fileURLToPath(import.meta.url))); +const SRC = join(ROOT, 'skills'); +const DST = join(ROOT, '.claude', 'skills'); +const checkOnly = process.argv.includes('--check'); + +let drift = 0; +for (const f of readdirSync(SRC).filter((f) => f.endsWith('.md'))) { + const name = f.replace(/\.md$/, ''); + const src = readFileSync(join(SRC, f), 'utf8'); + const dstDir = join(DST, name); + const dstFile = join(dstDir, 'SKILL.md'); + const current = existsSync(dstFile) ? readFileSync(dstFile, 'utf8') : null; + if (current === src) continue; + drift++; + if (checkOnly) { + console.error(`DRIFT: .claude/skills/${name}/SKILL.md != skills/${f}`); + } else { + mkdirSync(dstDir, { recursive: true }); + writeFileSync(dstFile, src); + console.error(`synced .claude/skills/${name}/SKILL.md`); + } +} +if (checkOnly && drift > 0) { + console.error(`sync-skills --check: ${drift} file(s) out of sync — run \`npm run sync-skills\`.`); + process.exit(1); +} +console.error(`sync-skills: ${drift === 0 ? 'all in sync' : `${drift} file(s) ${checkOnly ? 'OUT OF SYNC' : 'synced'}`}`); diff --git a/harness/ruview/skills/calibrate-room.md b/harness/ruview/skills/calibrate-room.md index c369974b..da10bd14 100644 --- a/harness/ruview/skills/calibrate-room.md +++ b/harness/ruview/skills/calibrate-room.md @@ -6,24 +6,24 @@ description: Run the ADR-151 per-room calibration pipeline — baseline → enro # calibrate-room Turn a provisioned node + sensing-server into a working room model. Pure-Rust, -edge-deployable (ADR-151). Use the `ruview.calibrate` tool (installed +edge-deployable (ADR-151). Use the `ruview_calibrate` tool (installed `wifi-densepose` binary, else `cargo run -p wifi-densepose-cli`). ## Sequence 1. **baseline** — capture the empty room (Welford amplitude + von Mises phase). Leave the room empty. - `ruview.calibrate {step: "baseline"}` + `ruview_calibrate {step: "baseline"}` 2. **enroll** — record the occupant(s) doing the target activities. - `ruview.calibrate {step: "enroll"}` + `ruview_calibrate {step: "enroll"}` 3. **train-room** — train the bank of small specialists from baseline + enrollment. - `ruview.calibrate {step: "train-room"}` + `ruview_calibrate {step: "train-room"}` 4. **room-watch** — live presence/posture/breathing from the trained room. - `ruview.calibrate {step: "room-watch"}` (or the `room-watch` skill) + `ruview_calibrate {step: "room-watch"}` (or the `room-watch` skill) ## Honesty The specialists are calibrated to *this* room; cross-room transfer is a separate problem (LoRA recalibration, ADR-079 P9). Report which room a number came from, and tag presence/vitals accuracy MEASURED only with a held-out check — run -`ruview.claim_check` on the writeup. +`ruview_claim_check` on the writeup. diff --git a/harness/ruview/skills/onboard.md b/harness/ruview/skills/onboard.md index dd248099..b0fb0596 100644 --- a/harness/ruview/skills/onboard.md +++ b/harness/ruview/skills/onboard.md @@ -8,12 +8,12 @@ description: Zero-to-sensing path picker for RuView (WiFi-DensePose) — pick do Get a newcomer from nothing to a working RuView setup. **First fact to set:** WiFi sensing infers *coarse* pose/presence/breathing from Channel State Information — it is **not a camera**, and any accuracy number must be MEASURED against a baseline -(use the `verify` skill / `ruview.claim_check` tool). Never present WiFi output as +(use the `verify` skill / `ruview_claim_check` tool). Never present WiFi output as camera-grade. ## Pick a path -Run `ruview.onboard {path}` or decide from: +Run `ruview_onboard {path}` or decide from: 1. **docker-demo** — fastest, no hardware. Replays sample CSI into the dashboard. `docker run -p 8000:8000 ruvnet/wifi-densepose` → open `http://localhost:8000`. @@ -26,5 +26,5 @@ Run `ruview.onboard {path}` or decide from: ## Then - Live sensing → go to **provision-node**, then **calibrate-room**. -- Evaluating a model/claim → go to **verify** and run `ruview.claim_check` on any +- Evaluating a model/claim → go to **verify** and run `ruview_claim_check` on any report before you quote a number. diff --git a/harness/ruview/skills/provision-node.md b/harness/ruview/skills/provision-node.md index 315bef39..4da128b0 100644 --- a/harness/ruview/skills/provision-node.md +++ b/harness/ruview/skills/provision-node.md @@ -28,7 +28,7 @@ esptool --chip esp32s3 -p -b 460800 write_flash \ 0xf000 ota_data_initial.bin 0x20000 esp32-csi-node-s3-8mb.bin ``` -(`ruview.node_flash` returns the exact pinned command rather than running an +(`ruview_node_flash` returns the exact pinned command rather than running an unattended flash.) ## 3. Provision @@ -44,6 +44,6 @@ Never echo or commit the WiFi password. ## 4. Confirm CSI is flowing -`ruview.node_monitor {port}` — PASS criteria: serial shows `CSI cb #...` callbacks and +`ruview_node_monitor {port}` — PASS criteria: serial shows `CSI cb #...` callbacks and (on a bare board) `CSI filter upgraded to MGMT+DATA`. No callbacks → the node isn't capturing; do not proceed to calibration. diff --git a/harness/ruview/skills/train-pose.md b/harness/ruview/skills/train-pose.md index 61d2f1f6..f393db39 100644 --- a/harness/ruview/skills/train-pose.md +++ b/harness/ruview/skills/train-pose.md @@ -29,5 +29,5 @@ or temporal leakage. Example honest result (ADR-181): 1. Run the mean-pose baseline on the same split. 2. Report `(model − baseline)` in pp, with the split definition (chronological / blocked-gap / grouped-bucket; no leakage). -3. `ruview.claim_check` the writeup — it flags any untagged or 100%/perfect claim. +3. `ruview_claim_check` the writeup — it flags any untagged or 100%/perfect claim. 4. If it's a benchmark vs SOTA, tag MEASURED-EQUIVALENT only with the reproducer. diff --git a/harness/ruview/skills/verify.md b/harness/ruview/skills/verify.md index 9c3f6c03..1d7a77b6 100644 --- a/harness/ruview/skills/verify.md +++ b/harness/ruview/skills/verify.md @@ -9,7 +9,7 @@ The "prove everything" skill. Nothing ships as validated without this. ## Deterministic proof (Trust Kill Switch) -`ruview.verify` runs `archive/v1/data/proof/verify.py`: it feeds a reference signal +`ruview_verify` runs `archive/v1/data/proof/verify.py`: it feeds a reference signal through the production pipeline and hashes the output against `expected_features.sha256`. Must print **VERDICT: PASS**. If numpy/scipy changed the hash, regenerate with `verify.py --generate-hash` then re-verify. @@ -28,7 +28,7 @@ crate versions — a recipient can re-verify with one command. ## Claim honesty -Run `ruview.claim_check {text}` on any report, README section, PR body, or model card +Run `ruview_claim_check {text}` on any report, README section, PR body, or model card before quoting accuracy. It flags: - untagged accuracy numbers (must be MEASURED / CLAIMED / SYNTHETIC), - MEASURED claims with no reproducer cited, diff --git a/harness/ruview/src/guardrails.js b/harness/ruview/src/guardrails.js index 787f5423..a5299dd2 100644 --- a/harness/ruview/src/guardrails.js +++ b/harness/ruview/src/guardrails.js @@ -4,15 +4,44 @@ // The project was accused of AI-slop; the cultural fix is that every accuracy // number must be tagged MEASURED (with a reproducer) or CLAIMED/SYNTHETIC, and // the retracted "100% accuracy" framing must never reappear untagged. This module -// is the static enforcement of that, shared by the `ruview.claim_check` MCP tool, +// is the static enforcement of that, shared by the `ruview_claim_check` MCP tool, // the `npx ruview claim-check` CLI, and the claude-code pre-output hook. -/** Phrases that signal a quantitative accuracy claim. */ +/** Phrases that signal a quantitative accuracy claim (safe as substrings). */ const METRIC_TERMS = [ - 'accuracy', 'pck', 'pck@', 'f1', 'precision', 'recall', 'map', 'auc', - 'iou', 'mpjpe', 'error rate', 'detection rate', 'true positive', + 'accuracy', 'pck', 'precision', 'recall', + 'mpjpe', 'error rate', 'detection rate', 'true positive', ]; +// Short/ambiguous metric tokens (ADR-263 F11): 'map' is usually the English +// word or a file extension, 'f1'/'o1' collide with finding/option labels. +// They only count as metric mentions when word-bounded, not a `.map` file +// reference, and the line (after scrubbing) carries a number — "mAP 62.3" is +// a claim, "F-numbers map to findings" is not. +// 'map' additionally must not be a `.map` file suffix or a hyphenated +// compound ("map-free", "map-reduce") — mAP the metric never appears as either. +const METRIC_TERMS_SHORT = [/(? lower.includes(t))) return true; + if (!HAS_NUMBER_RE.test(scrubbed)) return false; + return METRIC_TERMS_SHORT.some((re) => re.test(scrubbed)); +} + /** Tags that make a claim honest (case-insensitive). */ const HONEST_TAGS = ['measured', 'claimed', 'synthetic', 'unvalidated', 'baseline']; @@ -20,6 +49,8 @@ const HONEST_TAGS = ['measured', 'claimed', 'synthetic', 'unvalidated', 'baselin const REPRODUCER_HINTS = [ 'verify.py', 'witness', 'mean-pose', 'mean pose', 'held-out', 'held out', 'baseline', 'reproduce', 'sha256', 'boot log', 'pck@20 vs', 'expected_features', + // Packaging-claim reproducers (ADR-263/264 npm reviews): the tarball itself. + 'npm pack', 'npm view', 'npm i ', 'npm install', 'tarball', 'cargo test', ]; const PERCENT_RE = /\b(\d{1,3}(?:\.\d+)?)\s?%/g; @@ -49,7 +80,8 @@ export function claimCheck(text) { const hasPercent = PERCENT_RE.test(line); PERCENT_RE.lastIndex = 0; // reset stateful global regex - const mentionsMetric = METRIC_TERMS.some((t) => lower.includes(t)); + const scrubbed = scrubLine(lower); + const mentionsMetric = mentionsMetricTerm(lower, scrubbed); if (!hasPercent && !mentionsMetric) return; const tagged = HONEST_TAGS.some((t) => lower.includes(t)); @@ -67,6 +99,15 @@ export function claimCheck(text) { return; } + // A quantitative claim needs a number. Digits hidden in a code span still + // count — "accuracy reached `0.95`" is a claim — so test the line with only + // finding/option labels stripped, NOT the code-span-scrubbed copy: scrubbing + // dropped `0.95` and wrongly short-circuited both the untagged and the + // MEASURED-without-reproducer checks below. A bare metric word in prose + // ("precision matters here", "every accuracy number must be MEASURED") has no + // number and is not a taggable claim (ADR-263 F11). + if (!hasPercent && !HAS_NUMBER_RE.test(lower.replace(LABEL_TOKEN_RE, ' '))) return; + // A metric/percent with no honesty tag at all. if (!tagged) { findings.push({ @@ -79,7 +120,8 @@ export function claimCheck(text) { return; } - // Tagged MEASURED but cites no reproducer — still a gap. + // Tagged MEASURED but cites no reproducer — still a gap (reached now even + // when the only number is inside a code span, e.g. "accuracy `0.97` (MEASURED)"). if (lower.includes('measured') && !hasReproducer) { findings.push({ severity: 'medium', diff --git a/harness/ruview/src/mcp-server.js b/harness/ruview/src/mcp-server.js index 04c1a552..a414e68b 100644 --- a/harness/ruview/src/mcp-server.js +++ b/harness/ruview/src/mcp-server.js @@ -3,14 +3,23 @@ // // Dependency-free on purpose: a published `npx ruview` must `mcp start` without // pulling the full MCP SDK. Implements the subset hosts use: `initialize`, -// `tools/list`, `tools/call`, and the `notifications/initialized` ack. Logs go to -// stderr ONLY — stdout is the JSON-RPC channel and must stay clean. +// `tools/list`, `tools/call`, `ping`, empty `resources/list`/`prompts/list` +// stubs, and the `notifications/initialized` ack. Logs go to stderr ONLY — +// stdout is the JSON-RPC channel and must stay clean. +// +// ADR-263 O2: `tools/call` is dispatched asynchronously — a long-running +// verify/calibrate no longer blocks ping/tools/list, so hosts that health-check +// mid-run see a live server. Responses may therefore arrive out of request +// order, which JSON-RPC permits (ids correlate them). import { createInterface } from 'node:readline'; +import { readFileSync } from 'node:fs'; import { listTools, runTool } from './tools.js'; const PROTOCOL_VERSION = '2024-11-05'; -const SERVER_INFO = { name: 'ruview', version: '0.1.0' }; +// Single-source the version from package.json (ADR-263 O6). +const PKG = JSON.parse(readFileSync(new URL('../package.json', import.meta.url), 'utf8')); +const SERVER_INFO = { name: 'ruview', version: PKG.version }; function send(msg) { process.stdout.write(JSON.stringify(msg) + '\n'); @@ -19,7 +28,7 @@ function result(id, res) { send({ jsonrpc: '2.0', id, result: res }); } function error(id, code, message) { send({ jsonrpc: '2.0', id, error: { code, message } }); } function log(...a) { process.stderr.write('[ruview-mcp] ' + a.join(' ') + '\n'); } -function handle(msg) { +async function handle(msg) { const { id, method, params } = msg; switch (method) { case 'initialize': @@ -27,19 +36,24 @@ function handle(msg) { protocolVersion: PROTOCOL_VERSION, capabilities: { tools: { listChanged: false } }, serverInfo: SERVER_INFO, - instructions: 'RuView WiFi-sensing operator tools. All results are fail-closed; accuracy claims must pass ruview.claim_check.', + instructions: 'RuView WiFi-sensing operator tools. All results are fail-closed; accuracy claims must pass ruview_claim_check.', }); case 'notifications/initialized': case 'initialized': - return; // notification — no response + case 'notifications/cancelled': + return; // notifications — no response case 'ping': return result(id, {}); case 'tools/list': return result(id, { tools: listTools() }); + case 'resources/list': + return result(id, { resources: [] }); + case 'prompts/list': + return result(id, { prompts: [] }); case 'tools/call': { const name = params?.name; const args = params?.arguments || {}; - const out = runTool(name, args); + const out = await runTool(name, args); // MCP content envelope: text block with the JSON, isError reflects ok=false. return result(id, { content: [{ type: 'text', text: JSON.stringify(out, null, 2) }], @@ -52,17 +66,42 @@ function handle(msg) { } export function startMcpServer() { - log(`starting (protocol ${PROTOCOL_VERSION}, ${listTools().length} tools)`); + log(`starting v${SERVER_INFO.version} (protocol ${PROTOCOL_VERSION}, ${listTools().length} tools)`); const rl = createInterface({ input: process.stdin, crlfDelay: Infinity }); + + // tools/call runs are serialized through a FIFO promise chain: hardware/mutating + // tools (calibrate, serial monitor, flash) must never overlap. ping/tools/list/ + // initialize/resources/prompts stay immediate (ADR-263 O2 — a health check must + // answer during a long tool run). `toolChain` also lets stdin-close drain the + // in-flight call so its response is flushed instead of dropped by process.exit. + let toolChain = Promise.resolve(); + + const dispatch = (msg) => handle(msg).catch((err) => { + if (msg && msg.id !== undefined) error(msg.id, -32603, String(err && err.message || err)); + log('handler error:', String(err)); + }); + rl.on('line', (line) => { const s = line.trim(); if (!s) return; let msg; try { msg = JSON.parse(s); } catch { return log('bad JSON line dropped'); } - try { handle(msg); } catch (err) { - if (msg && msg.id !== undefined) error(msg.id, -32603, String(err && err.message || err)); - log('handler error:', String(err)); + if (msg && msg.method === 'tools/call') { + toolChain = toolChain.then(() => dispatch(msg)); // one tool at a time + } else { + dispatch(msg); // health/list/handshake answer immediately, even mid tool run } }); - rl.on('close', () => { log('stdin closed — exiting'); process.exit(0); }); + + rl.on('close', () => { + // Wait for any queued/in-flight tool call to settle (its response written) + // before exiting — fire-and-forget used to race this and drop the response. + toolChain.then(() => { + log('stdin closed — exiting'); + const done = () => process.exit(0); + // Pipe writes are async; flush buffered stdout before exit. + if (process.stdout.writableLength) process.stdout.once('drain', done); + else done(); + }); + }); } diff --git a/harness/ruview/src/tools.js b/harness/ruview/src/tools.js index 6465f2b2..0a0d7af8 100644 --- a/harness/ruview/src/tools.js +++ b/harness/ruview/src/tools.js @@ -7,10 +7,15 @@ // `wifi-densepose` binary, an ESP32 on a port) is absent, it returns an honest // negative — never a fabricated success. This mirrors the project's "prove // everything" rule and the RuField fail-closed posture (ADR-262 §3.3). +// +// ADR-263: handlers are async (promise-based spawn, never spawnSync) so the MCP +// server keeps answering ping/tools/list while a long verify/calibrate runs. +// Canonical tool names use underscores (host tool-name regexes commonly enforce +// ^[a-zA-Z0-9_-]{1,64}$); the historical dotted names are accepted as aliases. -import { spawnSync } from 'node:child_process'; -import { existsSync, readFileSync } from 'node:fs'; -import { join, dirname, resolve } from 'node:path'; +import { spawn } from 'node:child_process'; +import { existsSync, accessSync, constants } from 'node:fs'; +import { join, dirname, resolve, delimiter } from 'node:path'; import { claimCheck, summarize } from './guardrails.js'; /** Walk up from `start` to find the RuView monorepo root (or null). */ @@ -27,22 +32,75 @@ export function findRepoRoot(start = process.cwd()) { return null; } -function which(cmd) { - const probe = process.platform === 'win32' - ? spawnSync('where', [cmd], { encoding: 'utf8' }) - : spawnSync('command', ['-v', cmd], { encoding: 'utf8', shell: true }); - return probe.status === 0 ? (probe.stdout || '').trim().split(/\r?\n/)[0] : null; +// Dep-free PATH scan (ADR-263 O8) — no shell subprocess per lookup. Only hits +// are memoized: a miss can resolve later in a long-lived MCP session (the +// operator installs python/the CLI mid-run), so misses are re-probed each call. +const whichCache = new Map(); +export function which(cmd) { + if (whichCache.has(cmd)) return whichCache.get(cmd); + const isWin = process.platform === 'win32'; + const exts = isWin + ? (process.env.PATHEXT || '.COM;.EXE;.BAT;.CMD').split(';').filter(Boolean) + : ['']; + let found = null; + outer: + for (const dir of (process.env.PATH || '').split(delimiter)) { + if (!dir) continue; + for (const ext of isWin ? ['', ...exts] : exts) { + const p = join(dir, cmd + ext); + try { + accessSync(p, isWin ? constants.F_OK : constants.X_OK); + found = p; + break outer; + } catch { /* keep scanning */ } + } + } + if (found !== null) whichCache.set(cmd, found); + return found; } -function run(cmd, args, opts = {}) { - const r = spawnSync(cmd, args, { encoding: 'utf8', timeout: opts.timeout ?? 120000, ...opts }); - return { - status: r.status, - ok: r.status === 0, - stdout: (r.stdout || '').slice(-8000), - stderr: (r.stderr || '').slice(-4000), - error: r.error ? r.error.message : null, - }; +// Bounded output tails (ADR-263 O4): spawnSync's default 1 MiB maxBuffer killed +// chatty children with ENOBUFS; handlers only ever surface the last few kB, so +// keep rolling tails instead of the full stream. +const STDOUT_TAIL = 65536; +const STDERR_TAIL = 16384; + +/** Promise-based spawn with timeout + rolling output tails. */ +export function run(cmd, args, opts = {}) { + const timeout = opts.timeout ?? 120000; + return new Promise((resolvePromise) => { + let stdout = ''; + let stderr = ''; + let child; + try { + child = spawn(cmd, args, { cwd: opts.cwd, stdio: ['ignore', 'pipe', 'pipe'] }); + } catch (e) { + resolvePromise({ status: null, ok: false, stdout: '', stderr: '', error: e.message }); + return; + } + let timedOut = false; + const timer = setTimeout(() => { timedOut = true; child.kill('SIGKILL'); }, timeout); + child.stdout.on('data', (d) => { + stdout = (stdout + d).slice(-STDOUT_TAIL); + }); + child.stderr.on('data', (d) => { + stderr = (stderr + d).slice(-STDERR_TAIL); + }); + child.on('error', (e) => { + clearTimeout(timer); + resolvePromise({ status: null, ok: false, stdout, stderr, error: e.message }); + }); + child.on('close', (status) => { + clearTimeout(timer); + resolvePromise({ + status, + ok: status === 0, + stdout, + stderr, + error: timedOut ? `timed out after ${timeout} ms` : null, + }); + }); + }); } const ONBOARD_PATHS = { @@ -51,12 +109,36 @@ const ONBOARD_PATHS = { 'live-esp32': 'Real sensing. Flash an ESP32-S3 (see `provision-node` skill), point it at the sensing-server, then `calibrate → enroll → train-room → room-watch` (see `calibrate-room`). Good for an actual install.', }; +// Read-only serial monitor script; the port arrives via sys.argv (ADR-263 O5 — +// never spliced into interpreter source). +const MONITOR_SCRIPT = [ + 'import sys,time', + 'try:', + ' import serial', + 'except Exception as e:', + " print('NO_PYSERIAL'); sys.exit(3)", + 'port=sys.argv[1]', + 'dur=float(sys.argv[2])', + 'ser=serial.Serial(port,115200,timeout=1)', + 'csi=0; n=0; t=time.time()', + 'while time.time()-t 0 ? Number(args.seconds) : 12; - const script = [ - 'import sys,time', - 'try:', - ' import serial', - 'except Exception as e:', - " print('NO_PYSERIAL'); sys.exit(3)", - `ser=serial.Serial(${JSON.stringify(port)},115200,timeout=1)`, - 'csi=0; n=0; t=time.time()', - `while time.time()-t<${dur}:`, - ' ln=ser.readline()', - ' if not ln: continue', - " s=ln.decode('utf-8','replace')", - ' n+=1', - " if 'CSI cb' in s or 'csi_collector' in s: csi+=1", - " if 'MGMT+DATA' in s: print('UPGRADE_MGMT_DATA')", - 'ser.close()', - "print(f'LINES={n} CSI={csi}')", - ].join('\n'); - const r = run(py, ['-c', script], { timeout: (dur + 10) * 1000 }); + const r = await run(py, ['-c', MONITOR_SCRIPT, port, String(dur)], { timeout: (dur + 10) * 1000 }); if (r.stdout.includes('NO_PYSERIAL')) return { ok: false, reason: 'pyserial_missing', hint: 'pip install pyserial' }; if (!r.ok) return { ok: false, reason: 'port_error', stderr: r.stderr, error: r.error }; const csi = Number((r.stdout.match(/CSI=(\d+)/) || [])[1] || 0); @@ -156,7 +224,7 @@ export const TOOLS = { }, }, - 'ruview.calibrate': { + ruview_calibrate: { title: 'Calibrate room', description: 'Run the ADR-151 room pipeline via the wifi-densepose CLI (baseline→enroll→train-room). Fail-closed if the binary is absent.', inputSchema: { @@ -166,7 +234,7 @@ export const TOOLS = { args: { type: 'array', items: { type: 'string' }, description: 'Extra CLI args passed through.' }, }, }, - handler(args = {}) { + async handler(args = {}) { const step = args.step || 'baseline'; const bin = which('wifi-densepose'); const repo = findRepoRoot(); @@ -174,13 +242,13 @@ export const TOOLS = { const passthru = Array.isArray(args.args) ? args.args.map(String) : []; // Prefer the installed binary; otherwise cargo-run from the repo. const r = bin - ? run(bin, [step, ...passthru], { timeout: 300000 }) - : run('cargo', ['run', '-q', '-p', 'wifi-densepose-cli', '--', step, ...passthru], { cwd: repo, timeout: 600000 }); + ? await run(bin, [step, ...passthru], { timeout: 300000 }) + : await run('cargo', ['run', '-q', '-p', 'wifi-densepose-cli', '--', step, ...passthru], { cwd: repo, timeout: 600000 }); return { ok: r.ok, step, via: bin ? 'binary' : 'cargo', exit: r.status, tail: r.stdout.slice(-1500), stderr: r.stderr.slice(-500) }; }, }, - 'ruview.node_flash': { + ruview_node_flash: { title: 'Node flash', description: 'Build+flash an ESP32 firmware variant. MUTATING + hardware. Fail-closed off-Windows or without ESP-IDF. Never claims hardware validation without a boot log.', inputSchema: { @@ -203,14 +271,27 @@ export const TOOLS = { }, }; -/** Run one tool by name; returns the structured result (or an error envelope). */ -export function runTool(name, args) { - const tool = TOOLS[name]; - if (!tool) return { ok: false, reason: 'unknown_tool', name, available: Object.keys(TOOLS) }; +// Historical dotted names (pre-ADR-263) accepted as call-time aliases; the +// underscore form is what tools/list advertises. +export const TOOL_ALIASES = Object.fromEntries( + Object.keys(TOOLS).map((name) => [name.replace(/_/, '.'), name]) +); + +/** Resolve a canonical or aliased tool name (or null). */ +export function resolveToolName(name) { + if (TOOLS[name]) return name; + if (TOOL_ALIASES[name]) return TOOL_ALIASES[name]; + return null; +} + +/** Run one tool by name (canonical or dotted alias); always resolves to the structured result. */ +export async function runTool(name, args) { + const canonical = resolveToolName(name); + if (!canonical) return { ok: false, reason: 'unknown_tool', name, available: Object.keys(TOOLS) }; try { - return tool.handler(args || {}); + return await TOOLS[canonical].handler(args || {}); } catch (err) { - return { ok: false, reason: 'tool_threw', name, error: String(err && err.message || err) }; + return { ok: false, reason: 'tool_threw', name: canonical, error: String(err && err.message || err) }; } } diff --git a/harness/ruview/test/mcp.test.mjs b/harness/ruview/test/mcp.test.mjs new file mode 100644 index 00000000..0b99e807 --- /dev/null +++ b/harness/ruview/test/mcp.test.mjs @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: MIT +// MCP stdio server e2e — spawns `bin/cli.js mcp start` and speaks JSON-RPC. +// Pins ADR-263 O2 (ping answered while a long tools/call runs), O6 (version +// from package.json), and O8 (underscore names advertised, dotted accepted, +// resources/prompts stubs). + +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { spawn } from 'node:child_process'; +import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, rmSync } from 'node:fs'; +import { join, dirname } from 'node:path'; +import { tmpdir } from 'node:os'; +import { fileURLToPath } from 'node:url'; +import { which } from '../src/tools.js'; + +const PKG_ROOT = dirname(dirname(fileURLToPath(import.meta.url))); +const CLI = join(PKG_ROOT, 'bin', 'cli.js'); + +/** Start the MCP server; returns {send, next, close} where next(id) resolves the response with that id. */ +function startServer() { + const child = spawn(process.execPath, [CLI, 'mcp', 'start'], { stdio: ['pipe', 'pipe', 'pipe'] }); + const waiters = new Map(); + let buf = ''; + child.stdout.on('data', (d) => { + buf += d; + let nl; + while ((nl = buf.indexOf('\n')) !== -1) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + const msg = JSON.parse(line); + const w = waiters.get(msg.id); + if (w) { waiters.delete(msg.id); w(msg); } + } + }); + return { + send(msg) { child.stdin.write(JSON.stringify(msg) + '\n'); }, + next(id) { return new Promise((res) => waiters.set(id, res)); }, + close() { child.stdin.end(); child.kill(); }, + }; +} + +test('MCP handshake: initialize reports the package.json version; list endpoints respond', async () => { + const pkg = JSON.parse(readFileSync(join(PKG_ROOT, 'package.json'), 'utf8')); + const s = startServer(); + try { + s.send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} }); + const init = await s.next(1); + assert.equal(init.result.serverInfo.version, pkg.version, 'ADR-263 O6: version must match package.json'); + + s.send({ jsonrpc: '2.0', id: 2, method: 'tools/list' }); + const tools = (await s.next(2)).result.tools; + assert.equal(tools.length, 6); + for (const t of tools) assert.match(t.name, /^[a-zA-Z0-9_-]{1,64}$/, `advertised name not host-safe: ${t.name}`); + + s.send({ jsonrpc: '2.0', id: 3, method: 'resources/list' }); + assert.deepEqual((await s.next(3)).result, { resources: [] }); + s.send({ jsonrpc: '2.0', id: 4, method: 'prompts/list' }); + assert.deepEqual((await s.next(4)).result, { prompts: [] }); + + // Dotted legacy name still callable (alias). + s.send({ jsonrpc: '2.0', id: 5, method: 'tools/call', params: { name: 'ruview.onboard', arguments: {} } }); + const call = await s.next(5); + assert.equal(call.result.isError, false); + } finally { + s.close(); + } +}); + +test('MCP server answers ping while a long tools/call is in flight (ADR-263 O2)', { skip: !which('python') && !which('python3') ? 'python not on PATH' : false }, async () => { + // Fake RuView repo whose verify.py sleeps 3 s then passes. + const repo = mkdtempSync(join(tmpdir(), 'ruview-mcp-e2e-')); + const proofDir = join(repo, 'archive', 'v1', 'data', 'proof'); + mkdirSync(proofDir, { recursive: true }); + writeFileSync(join(proofDir, 'verify.py'), 'import time\ntime.sleep(3)\nprint("VERDICT: PASS")\n'); + + const s = startServer(); + try { + s.send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} }); + await s.next(1); + + const verifyDone = s.next(10); + s.send({ jsonrpc: '2.0', id: 10, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } }); + + // Give the server a beat to start the child, then ping. + await new Promise((r) => setTimeout(r, 300)); + const t0 = Date.now(); + const pinged = s.next(11); + s.send({ jsonrpc: '2.0', id: 11, method: 'ping' }); + await pinged; + const pingMs = Date.now() - t0; + assert.ok(pingMs < 1000, `ping took ${pingMs} ms while verify was in flight — server is blocking`); + + const verify = await verifyDone; + const payload = JSON.parse(verify.result.content[0].text); + assert.equal(payload.verdict, 'PASS'); + } finally { + s.close(); + rmSync(repo, { recursive: true, force: true }); + } +}); + +test('tools/call executions are serialized — two slow calls run sequentially', { skip: !which('python') && !which('python3') ? 'python not on PATH' : false }, async () => { + // Two verify.py that each sleep 0.8 s. Serialized ⇒ ~1.6 s+; concurrent ⇒ ~0.8 s. + const repo = mkdtempSync(join(tmpdir(), 'ruview-mcp-serial-')); + const proofDir = join(repo, 'archive', 'v1', 'data', 'proof'); + mkdirSync(proofDir, { recursive: true }); + writeFileSync(join(proofDir, 'verify.py'), 'import time\ntime.sleep(0.8)\nprint("VERDICT: PASS")\n'); + + const s = startServer(); + try { + s.send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} }); + await s.next(1); + + const t0 = Date.now(); + const a = s.next(20); + const b = s.next(21); + s.send({ jsonrpc: '2.0', id: 20, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } }); + s.send({ jsonrpc: '2.0', id: 21, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } }); + const [ra, rb] = await Promise.all([a, b]); + const elapsed = Date.now() - t0; + + assert.equal(JSON.parse(ra.result.content[0].text).verdict, 'PASS'); + assert.equal(JSON.parse(rb.result.content[0].text).verdict, 'PASS'); + assert.ok(elapsed > 1400, `two 0.8 s tool calls finished in ${elapsed} ms — they overlapped instead of serializing`); + } finally { + s.close(); + rmSync(repo, { recursive: true, force: true }); + } +}); + +test('stdin close flushes an in-flight tools/call response before exit', async () => { + const child = spawn(process.execPath, [CLI, 'mcp', 'start'], { stdio: ['pipe', 'pipe', 'pipe'] }); + let out = ''; + child.stdout.on('data', (d) => { out += d; }); + const exited = new Promise((res) => child.on('exit', res)); + + // Write a tools/call then immediately close stdin. The old fire-and-forget + // dispatch raced rl 'close' → process.exit and could drop this response. + child.stdin.write(JSON.stringify({ jsonrpc: '2.0', id: 42, method: 'tools/call', params: { name: 'ruview_onboard', arguments: {} } }) + '\n'); + child.stdin.end(); + + await exited; + const msgs = out.trim().split('\n').filter(Boolean).map((l) => JSON.parse(l)); + const resp = msgs.find((m) => m.id === 42); + assert.ok(resp, 'the in-flight tools/call response must be flushed to stdout before exit'); + assert.equal(resp.result.isError, false); +}); diff --git a/harness/ruview/test/tools.test.mjs b/harness/ruview/test/tools.test.mjs index a81ae131..9df16b7b 100644 --- a/harness/ruview/test/tools.test.mjs +++ b/harness/ruview/test/tools.test.mjs @@ -1,12 +1,18 @@ // SPDX-License-Identifier: MIT // RuView harness tests — Node's built-in test runner (no devDeps to install). -// Run: `node --test test/` (or `npm test`). +// Run: `node --test test/*.test.mjs` (or `npm test`). import { test } from 'node:test'; import assert from 'node:assert/strict'; +import { readdirSync, readFileSync, mkdtempSync, writeFileSync, rmSync } from 'node:fs'; +import { join, dirname, delimiter } from 'node:path'; +import { tmpdir } from 'node:os'; +import { fileURLToPath } from 'node:url'; import { claimCheck, summarize } from '../src/guardrails.js'; -import { TOOLS, runTool, listTools, findRepoRoot } from '../src/tools.js'; -import { run } from '../bin/cli.js'; +import { TOOLS, TOOL_ALIASES, runTool, listTools, findRepoRoot, run, which } from '../src/tools.js'; +import { run as cliRun } from '../bin/cli.js'; + +const PKG_ROOT = dirname(dirname(fileURLToPath(import.meta.url))); test('guardrail flags the retracted 100% framing as high severity', () => { const r = claimCheck('Our model reaches 100% accuracy on every pose.'); @@ -37,71 +43,190 @@ test('guardrail ignores non-metric prose', () => { assert.equal(claimCheck('').ok, true); }); +// ADR-263 F11/O9: precision pins — short metric tokens must not fire on prose. +test('guardrail does not false-positive on "map"/"F1" prose (ADR-263 F11)', () => { + assert.equal(claimCheck('F-numbers map to findings.').ok, true); + assert.equal(claimCheck('### F1 (HIGH, broken export): `require` points at a missing file').ok, true); + assert.equal(claimCheck('The 0.1.0 tarball ships 44 `.map` files = 62,698 B of dead weight.').ok, true); + assert.equal(claimCheck('the source maps can never resolve').ok, true); + assert.equal(claimCheck('- **O1 (F1):** fix `exports` (see F2 for the 33% map weight — MEASURED, tarball listing)').ok, true); + assert.equal(claimCheck('ADR-264: exports fix, map-free tarball, session-per-transport').ok, true); +}); + +test('guardrail still catches real short-token metric claims', () => { + assert.equal(claimCheck('We reach mAP 62.3 on COCO.').ok, false); + assert.equal(claimCheck('F1 score of 0.91 on the held set.').ok, false, 'f1 with a real score must still fire'); + assert.equal(claimCheck('IoU 0.75 across rooms.').ok, false); +}); + +// Digits hidden in a code span still make a claim — scrubbing must not blind the +// number gate to `0.95` (regression: code-span number bypassed the gate). +test('guardrail flags an accuracy number stated inside a code span', () => { + const r = claimCheck('Count accuracy reached `0.95` in our tests.'); + assert.equal(r.ok, false, JSON.stringify(r.findings)); + assert.ok(r.findings.some((f) => /not tagged/i.test(f.reason))); +}); + +// A MEASURED claim whose only number hides in a code span must still reach the +// missing-reproducer check (regression: the scrubbed gate short-circuited it). +// Bare metric prose with no number at all (e.g. the README rule text) stays a pass. +test('guardrail flags a MEASURED code-span number with no reproducer', () => { + const r = claimCheck('Detection accuracy `0.97` on the set (MEASURED).'); + assert.equal(r.ok, false, JSON.stringify(r.findings)); + assert.ok(r.findings.some((f) => /no reproducer/i.test(f.reason))); + assert.equal(claimCheck('Every accuracy number must be MEASURED against a baseline.').ok, true); +}); + +// F1-score phrasings ("F1: 0.91", "F1 reaches 0.91") were scrubbed as option +// labels and slipped through; option refs alone must still not false-positive. +test('guardrail catches F1-score claims but not bare option refs (ADR-263 F11)', () => { + assert.equal(claimCheck('F1: 0.91 on the held-out set.').ok, false, 'F1: value is a metric claim'); + assert.equal(claimCheck('F1 reaches 0.91 on the held-out set.').ok, false, 'F1 with a nearby number is a claim'); + assert.equal(claimCheck('Options O1–O9 are tracked in ADR-263 O2.').ok, true, 'option labels are not metrics'); + assert.equal(claimCheck('ADR-263 O2 lands the exports fix.').ok, true); +}); + test('summarize gives PASS/finding text', () => { assert.match(summarize(claimCheck('nothing here')), /PASS/); assert.match(summarize(claimCheck('100% accuracy')), /finding/); }); -test('registry exposes the documented tools with schemas', () => { +test('registry exposes the documented tools with schemas (underscore-canonical)', () => { const names = Object.keys(TOOLS); - for (const n of ['ruview.onboard', 'ruview.claim_check', 'ruview.verify', 'ruview.node_monitor', 'ruview.calibrate', 'ruview.node_flash']) { + for (const n of ['ruview_onboard', 'ruview_claim_check', 'ruview_verify', 'ruview_node_monitor', 'ruview_calibrate', 'ruview_node_flash']) { assert.ok(names.includes(n), `missing ${n}`); assert.equal(TOOLS[n].inputSchema.type, 'object'); + assert.match(n, /^[a-zA-Z0-9_-]{1,64}$/, 'canonical names must satisfy host tool-name regexes'); } assert.equal(listTools().length, names.length); }); -test('ruview.onboard returns paths and a recommendation', () => { - const r = runTool('ruview.onboard', {}); +test('dotted legacy names resolve via aliases (ADR-263 O8)', async () => { + assert.equal(TOOL_ALIASES['ruview.claim_check'], 'ruview_claim_check'); + assert.equal(TOOL_ALIASES['ruview.node_monitor'], 'ruview_node_monitor'); + const r = await runTool('ruview.onboard', {}); + assert.equal(r.ok, true); +}); + +test('ruview_onboard returns paths and a recommendation', async () => { + const r = await runTool('ruview_onboard', {}); assert.equal(r.ok, true); assert.ok(r.paths['live-esp32']); assert.ok(['repo-build', 'docker-demo'].includes(r.recommend)); }); -test('ruview.claim_check tool wraps the guardrail', () => { - const r = runTool('ruview.claim_check', { text: '100% accuracy' }); +test('ruview_claim_check tool wraps the guardrail', async () => { + const r = await runTool('ruview_claim_check', { text: '100% accuracy' }); assert.equal(r.ok, false); assert.match(r.summary, /honesty|tag|MEASURED|finding/i); }); -test('unknown tool fails closed', () => { - const r = runTool('ruview.does_not_exist', {}); +// ADR-263 F1/O1: the honesty gate must fail closed on empty input. +test('ruview_claim_check fails closed on empty/missing text', async () => { + const empty = await runTool('ruview_claim_check', { text: '' }); + assert.equal(empty.ok, false); + assert.equal(empty.reason, 'empty_text'); + const missing = await runTool('ruview_claim_check', {}); + assert.equal(missing.ok, false); + assert.equal(missing.reason, 'empty_text'); +}); + +test('unknown tool fails closed', async () => { + const r = await runTool('ruview_does_not_exist', {}); assert.equal(r.ok, false); assert.equal(r.reason, 'unknown_tool'); }); -test('node_monitor fails closed without a port', () => { - const r = runTool('ruview.node_monitor', {}); +test('node_monitor fails closed without a port', async () => { + const r = await runTool('ruview_node_monitor', {}); assert.equal(r.ok, false); assert.equal(r.reason, 'no_port'); }); -test('node_flash refuses without confirm (mutating guard)', () => { - const r = runTool('ruview.node_flash', { port: 'COM8', variant: 's3-8mb' }); +test('node_flash refuses without confirm (mutating guard)', async () => { + const r = await runTool('ruview_node_flash', { port: 'COM8', variant: 's3-8mb' }); assert.equal(r.ok, false); // either not-confirmed (win32) or unsupported_platform (posix) — both fail-closed assert.ok(['not_confirmed', 'unsupported_platform'].includes(r.reason)); }); -test('verify fails closed when not in a RuView repo', () => { +test('verify fails closed when not in a RuView repo', async () => { // point at a tmp dir with no repo markers - const r = runTool('ruview.verify', { repo: process.platform === 'win32' ? 'C:/Windows/Temp' : '/tmp' }); + const r = await runTool('ruview_verify', { repo: process.platform === 'win32' ? 'C:/Windows/Temp' : '/tmp' }); assert.equal(r.ok, false); assert.ok(['proof_missing', 'python_missing'].includes(r.reason), r.reason); }); +// ADR-263 F2/O2: registry-level concurrency — a slow child must not block +// other tool calls (run() is promise-based, never spawnSync). +test('run() is non-blocking: a fast tool completes while a slow child runs', async () => { + const slow = run('node', ['-e', 'setTimeout(() => {}, 2000)'], { timeout: 5000 }); + const t0 = Date.now(); + const fast = await runTool('ruview_onboard', {}); + const elapsed = Date.now() - t0; + assert.equal(fast.ok, true); + assert.ok(elapsed < 1000, `onboard took ${elapsed} ms while a 2 s child was running`); + const r = await slow; + assert.equal(r.ok, true); +}); + +test('run() reports a timeout as a failure, not a hang', async () => { + const r = await run('node', ['-e', 'setTimeout(() => {}, 10000)'], { timeout: 300 }); + assert.equal(r.ok, false); + assert.match(String(r.error), /timed out/); +}); + +test('run() bounds captured output instead of dying on big streams (ADR-263 O4)', async () => { + // 4 MiB of stdout would have hit spawnSync's 1 MiB default maxBuffer (ENOBUFS). + const r = await run('node', ['-e', "process.stdout.write('x'.repeat(4 * 1024 * 1024)); console.log('TAIL_MARKER')"], { timeout: 30000 }); + assert.equal(r.ok, true); + assert.ok(r.stdout.length <= 65536, `tail not bounded: ${r.stdout.length}`); + assert.ok(r.stdout.includes('TAIL_MARKER'), 'tail must keep the end of the stream'); +}); + +test('which() finds node and re-probes misses (hits are cached)', () => { + assert.ok(which('node'), 'node must be on PATH in the test env'); + assert.equal(which('definitely-not-a-binary-xyz'), null); + assert.equal(which('definitely-not-a-binary-xyz'), null); // re-probed, still absent +}); + +// ADR-263 O8: a miss must not be cached — an operator who installs a tool +// mid-session (e.g. python after a python_missing failure) must be found next call. +test('which() re-probes after a miss so a newly-installed tool is found', () => { + const dir = mkdtempSync(join(tmpdir(), 'ruview-which-')); + const name = 'ruview-probe-xyz'; + const isWin = process.platform === 'win32'; + const bin = join(dir, isWin ? `${name}.cmd` : name); + const prevPath = process.env.PATH; + try { + assert.equal(which(name), null, 'not on PATH yet → miss'); + writeFileSync(bin, isWin ? '@echo off\n' : '#!/bin/sh\n', { mode: 0o755 }); + process.env.PATH = dir + delimiter + prevPath; + assert.ok(which(name), 'installed mid-session → the miss must not have been cached'); + } finally { + process.env.PATH = prevPath; + rmSync(dir, { recursive: true, force: true }); + } +}); + test('CLI run(): claim-check exits non-zero on a bad claim', async () => { - const code = await run(['claim-check', '--text', '100% accuracy']); + const code = await cliRun(['claim-check', '--text', '100% accuracy']); assert.notEqual(code, 0); }); +// ADR-263 F1/O1: the CLI must not PASS silently with no input. +test('CLI run(): claim-check with no input exits 2 (fail-closed)', async () => { + assert.equal(await cliRun(['claim-check']), 2); + assert.equal(await cliRun(['claim-check', '--text', ' ']), 2); +}); + test('CLI run(): doctor exits 0 (tools-only path)', async () => { - const code = await run(['doctor']); + const code = await cliRun(['doctor']); assert.equal(code, 0); }); test('CLI run(): unknown command exits non-zero', async () => { - assert.notEqual(await run(['definitely-not-a-command']), 0); + assert.notEqual(await cliRun(['definitely-not-a-command']), 0); }); test('findRepoRoot locates this monorepo from cwd', () => { @@ -109,3 +234,23 @@ test('findRepoRoot locates this monorepo from cwd', () => { const root = findRepoRoot(); assert.ok(root === null || typeof root === 'string'); }); + +// ADR-263 F7/O7: skills ship from one source; the projected copies must match. +test('.claude/skills/*/SKILL.md are byte-identical to skills/*.md', () => { + const srcDir = join(PKG_ROOT, 'skills'); + for (const f of readdirSync(srcDir).filter((f) => f.endsWith('.md'))) { + const name = f.replace(/\.md$/, ''); + const src = readFileSync(join(srcDir, f), 'utf8'); + const projected = readFileSync(join(PKG_ROOT, '.claude', 'skills', name, 'SKILL.md'), 'utf8'); + assert.equal(projected, src, `skill drift: ${name} — run \`npm run sync-skills\``); + } +}); + +// ADR-263 F6/O6 + F3/O3: package hygiene pins. +test('package.json has no optionalDependencies and no hardcoded server version drift', () => { + const pkg = JSON.parse(readFileSync(join(PKG_ROOT, 'package.json'), 'utf8')); + assert.equal(pkg.optionalDependencies, undefined, 'ADR-263 O3: optional deps tripled the cold npx install'); + assert.equal(pkg.dependencies, undefined, 'the harness is dependency-free by design'); + const mcpSrc = readFileSync(join(PKG_ROOT, 'src', 'mcp-server.js'), 'utf8'); + assert.ok(!/version:\s*'\d+\.\d+\.\d+'/.test(mcpSrc), 'ADR-263 O6: server version must come from package.json'); +}); diff --git a/tools/ruview-cli/package-lock.json b/tools/ruview-cli/package-lock.json index 6a1c2905..018a2172 100644 --- a/tools/ruview-cli/package-lock.json +++ b/tools/ruview-cli/package-lock.json @@ -12,7 +12,7 @@ "yargs": "^17.7.2" }, "bin": { - "ruview": "dist/index.js" + "ruview-cli": "dist/index.js" }, "devDependencies": { "@types/node": "^20.14.0", diff --git a/tools/ruview-cli/package.json b/tools/ruview-cli/package.json index 7c09b8bb..1b88385e 100644 --- a/tools/ruview-cli/package.json +++ b/tools/ruview-cli/package.json @@ -1,13 +1,13 @@ { "name": "@ruv/ruview-cli", "version": "0.0.1", - "description": "RuView CLI — shell access to WiFi-DensePose sensing, inference, and training capabilities", + "description": "RuView CLI — shell access to WiFi-DensePose sensing, inference, and training capabilities. Private/unpublished; the `ruview` bin name belongs to @ruvnet/ruview (ADR-265 D4).", "private": true, "type": "module", "main": "dist/index.js", "types": "dist/index.d.ts", "bin": { - "ruview": "dist/index.js" + "ruview-cli": "dist/index.js" }, "files": [ "dist" @@ -15,7 +15,7 @@ "scripts": { "build": "tsc", "dev": "tsc --watch", - "test": "node --experimental-vm-modules node_modules/.bin/jest", + "test": "node --experimental-vm-modules node_modules/.bin/jest --passWithNoTests", "lint": "eslint src --ext .ts", "typecheck": "tsc --noEmit" }, diff --git a/tools/ruview-cli/src/index.ts b/tools/ruview-cli/src/index.ts index 7d75500b..bd9233b4 100644 --- a/tools/ruview-cli/src/index.ts +++ b/tools/ruview-cli/src/index.ts @@ -25,6 +25,7 @@ * See ADR-104 for the full design rationale and security model. */ +import { createRequire } from "node:module"; import yargs from "yargs"; import { hideBin } from "yargs/helpers"; import { csiCommand } from "./commands/csi.js"; @@ -34,9 +35,15 @@ import { cogsCommand } from "./commands/cogs.js"; import { trainCommand } from "./commands/train.js"; import { jobCommand } from "./commands/job.js"; +// Single-source the version from package.json (ADR-265 D3). +const require = createRequire(import.meta.url); +const VERSION: string = (require("../package.json") as { version: string }).version; + +// Bin name is `ruview-cli`: the bare `ruview` bin belongs to @ruvnet/ruview +// (ADR-264 O9 / ADR-265 D4). const cli = yargs(hideBin(process.argv)) - .scriptName("ruview") - .version("0.0.1") + .scriptName("ruview-cli") + .version(VERSION) .usage("$0 [options]") .strict() .help() diff --git a/tools/ruview-mcp/README.md b/tools/ruview-mcp/README.md index 63f2fb15..697c8205 100644 --- a/tools/ruview-mcp/README.md +++ b/tools/ruview-mcp/README.md @@ -2,52 +2,63 @@ **SENSE-BRIDGE** is a dual-transport [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that bridges the RuView WiFi-DensePose sensing stack to AI agents (Claude Code, Cursor, ruflo swarms, and any MCP-compatible client). -Install once; AI agents can then call `ruview.presence.now`, `ruview.vitals.get_heart_rate`, `ruview.bfld.last_scan`, and more — without writing HTTP or WebSocket client code. +Install once; AI agents can then call `ruview_presence_now`, `ruview_vitals_get_heart_rate`, `ruview_bfld_last_scan`, and more — without writing HTTP or WebSocket client code. ## Quickstart ```bash -# 1. Add to Claude Code -claude mcp add rvagent -- npx @ruvnet/rvagent stdio +# 1. Add to Claude Code (stdio transport — the default) +claude mcp add rvagent -- npx -y @ruvnet/rvagent # 2. Or run directly -RUVIEW_SENSING_SERVER_URL=http://cognitum-v0:3000 npx @ruvnet/rvagent stdio +RUVIEW_SENSING_SERVER_URL=http://cognitum-v0:3000 npx @ruvnet/rvagent -# 3. Streamable HTTP (remote agents, ruflo swarms) +# 3. Streamable HTTP (remote agents, ruflo swarms) — explicit opt-in RUVIEW_SENSING_SERVER_URL=http://cognitum-v0:3000 \ RVAGENT_HTTP_TOKEN=your-secret \ -npx @ruvnet/rvagent http --port 3001 -# POST JSON-RPC to http://127.0.0.1:3001/mcp +RVAGENT_HTTP_PORT=3001 npx @ruvnet/rvagent +# POST JSON-RPC to http://127.0.0.1:3001/mcp (initialize first; then send the +# returned mcp-session-id header on every request) ``` Requirements: **Node.js >= 20**. The `wifi-densepose-sensing-server` Rust binary must be reachable at `RUVIEW_SENSING_SERVER_URL` (default `http://localhost:3000`). -## Feature matrix +## Tools + +Canonical tool names are underscore-form (ADR-264 — host tool-name validators +commonly enforce `^[a-zA-Z0-9_-]{1,64}$`). The pre-0.1.1 dotted names +(`ruview.presence.now`, …) are still accepted at call time as deprecated +aliases; `tools/list` advertises the underscore form only. | Tool | Description | ADR | |------|-------------|-----| -| `ruview.presence.now` | Current occupancy: `present`, `n_persons`, `confidence` | ADR-124 §4.1 | -| `ruview.vitals.get_breathing` | Breathing rate bpm (null if unavailable) | ADR-124 §4.1 | -| `ruview.vitals.get_heart_rate` | Heart rate bpm (null if unavailable) | ADR-124 §4.1 | -| `ruview.vitals.get_all` | Full `EdgeVitalsMessage` surface | ADR-124 §4.1 | -| `ruview.bfld.last_scan` | Latest BFLD scan: `identity_risk_score`, `privacy_class`, `n_frames` | ADR-118/124 | -| `ruview.bfld.subscribe` | Subscribe to `ruview//bfld/*` events for `duration_s` seconds | ADR-122/124 | -| *(next iters)* | `pose.latest`, `primitives.*`, `node.*`, `vector.*`, `policy.*` | ADR-124 §4.1/4.1a | +| `ruview_csi_latest` | Latest 56×20 CSI window from the sensing-server | ADR-101/102 | +| `ruview_pose_infer` | Single-shot 17-keypoint pose inference via cog binary | ADR-101 | +| `ruview_count_infer` | Single-shot person-count inference via cog binary | ADR-103 | +| `ruview_registry_list` | Cognitum edge module registry (category/search filters) | ADR-102 | +| `ruview_train_count` | Kick off a count-cog training run (background job) | ADR-103 | +| `ruview_job_status` | Poll a training job (persists across server restarts) | ADR-103 | +| `ruview_presence_now` | Current occupancy: `present`, `n_persons`, `confidence` | ADR-124 §4.1 | +| `ruview_vitals_get_breathing` | Breathing rate bpm (null if unavailable) | ADR-124 §4.1 | +| `ruview_vitals_get_heart_rate` | Heart rate bpm (null if unavailable) | ADR-124 §4.1 | +| `ruview_vitals_get_all` | Full `EdgeVitalsMessage` surface | ADR-124 §4.1 | +| `ruview_bfld_last_scan` | Latest BFLD scan: `identity_risk_score`, `privacy_class`, `n_frames` | ADR-118/124 | +| `ruview_bfld_subscribe` | Subscribe to `ruview//bfld/*` events for `duration_s` seconds | ADR-122/124 | +| *(roadmap, ADR-124 §4.1/4.1a)* | `pose.latest`, `primitives.*`, `node.*`, `vector.*`, and the `policy.*` governance layer are catalogued in `src/schemas/` but **not yet implemented** | ADR-124 | -**Transport security (ADR-124 §6)**: -- **stdio**: process-level isolation — no auth needed for local Claude Code / Cursor. -- **Streamable HTTP** (`POST /mcp`): Origin header validation (cross-origin → 403), optional bearer token (`RVAGENT_HTTP_TOKEN` → 401 on mismatch), binds `127.0.0.1` by default per MCP spec. +**Transport security (ADR-124 §6, hardened per ADR-264)**: +- **stdio** (default): process-level isolation — no auth needed for local Claude Code / Cursor. +- **Streamable HTTP** (`/mcp`, opt-in via `RVAGENT_HTTP_PORT`): one transport + one MCP server per session (routed by `mcp-session-id`), Origin validation (localhost on any port allowed; anything else → 403), optional bearer token (`RVAGENT_HTTP_TOKEN` → 401 on mismatch), 1 MiB request-body cap (413), binds `127.0.0.1` by default per MCP spec. -**Schema validation**: every tool call runs `zod.safeParse` before dispatch; invalid arguments return `McpError(InvalidParams)` rather than a wrapped string. - -**Policy layer** (ADR-124 §4.1a): `ruview.policy.*` tools gate every sensing call — `vitals.*` is default-deny until a policy grant is registered via `npx @ruvnet/rvagent policy grant`. Presence and node-list are allow by default. +**Schema validation**: each tool declares one Zod schema; the CallTool gate parses exactly once and the advertised JSON Schema is generated from the same Zod source. Invalid arguments return `McpError(InvalidParams)` rather than a wrapped string. ## ADR cross-reference | ADR | Decision | |-----|----------| | [ADR-124](../../docs/adr/ADR-124-rvagent-mcp-ruvector-npm-integration.md) | SENSE-BRIDGE: dual-transport MCP server + ruvector npm + ruflo integration | -| [ADR-118](../../docs/adr/ADR-118-bfld-beamforming-feedback-layer-for-detection.md) | BFLD pipeline — source of `bfld.last_scan` wire format | +| [ADR-264](../../docs/adr/ADR-264-rvagent-mcp-and-cli-npm-deep-review.md) | npm deep review — exports fix, map-free tarball, naming, session-per-transport | +| [ADR-118](../../docs/adr/ADR-118-bfld-beamforming-feedback-layer-for-detection.md) | BFLD pipeline — source of `bfld_last_scan` wire format | | [ADR-122](../../docs/adr/ADR-122-bfld-ruview-ha-matter-exposure.md) | MQTT topic routing `ruview//bfld/*` | | [ADR-115](../../docs/adr/ADR-115-home-assistant-integration.md) | `EdgeVitalsMessage` WebSocket surface (`ws.py:74-88` parity) | | [ADR-055](../../docs/adr/ADR-055-integrated-sensing-server.md) | Sensing-server REST API (`/api/v1/*`) | @@ -58,7 +69,7 @@ Requirements: **Node.js >= 20**. The `wifi-densepose-sensing-server` Rust binary cd tools/ruview-mcp npm install npm run build # tsc -npm test # jest — 93 tests across 7 suites +npm test # jest — 99 tests across 7 suites ``` Source: `tools/ruview-mcp/src/`. Tests: `tools/ruview-mcp/tests/`. diff --git a/tools/ruview-mcp/package-lock.json b/tools/ruview-mcp/package-lock.json index 94c791c3..ac7daf29 100644 --- a/tools/ruview-mcp/package-lock.json +++ b/tools/ruview-mcp/package-lock.json @@ -1,24 +1,24 @@ { "name": "@ruvnet/rvagent", - "version": "0.1.0", + "version": "0.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@ruvnet/rvagent", - "version": "0.1.0", + "version": "0.2.0", "license": "Apache-2.0", "dependencies": { "@modelcontextprotocol/sdk": "^1.0.0", - "zod": "^3.23.8" + "zod": "^3.23.8", + "zod-to-json-schema": "^3.25.2" }, "bin": { "ruview-mcp": "dist/index.js", "rvagent": "dist/index.js" }, "devDependencies": { - "@types/express": "^5.0.6", - "@types/jest": "^30.0.0", + "@types/jest": "^29.5.14", "@types/node": "^20.14.0", "jest": "^29.7.0", "ts-jest": "^29.1.0", @@ -629,16 +629,6 @@ } } }, - "node_modules/@jest/diff-sequences": { - "version": "30.4.0", - "resolved": "https://registry.npmjs.org/@jest/diff-sequences/-/diff-sequences-30.4.0.tgz", - "integrity": "sha512-zOpzlfUs45l6u7jm39qr87JCHUDsaeCtvL+kQe/Vn9jSnRB4/5IPXISm0h9I1vZW/o00Kn4UTJ2MOlhnUGwv3g==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, "node_modules/@jest/environment": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/@jest/environment/-/environment-29.7.0.tgz", @@ -700,16 +690,6 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, - "node_modules/@jest/get-type": { - "version": "30.1.0", - "resolved": "https://registry.npmjs.org/@jest/get-type/-/get-type-30.1.0.tgz", - "integrity": "sha512-eMbZE2hUnx1WV0pmURZY9XoXPkUYjpc55mb0CrhtdWLtzMQPFvu/rZkTLZFTsdaVQa+Tr4eWAteqcUzoawq/uA==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, "node_modules/@jest/globals": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/@jest/globals/-/globals-29.7.0.tgz", @@ -726,30 +706,6 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, - "node_modules/@jest/pattern": { - "version": "30.4.0", - "resolved": "https://registry.npmjs.org/@jest/pattern/-/pattern-30.4.0.tgz", - "integrity": "sha512-RAWn3+f9u8BsHijKJ71uHcFp6vmyEt6VvoWXkl6hKF3qVIuWNmudVjg12DlBPGup/frIl5UcUlH5HfEuvHpEXg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/node": "*", - "jest-regex-util": "30.4.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@jest/pattern/node_modules/jest-regex-util": { - "version": "30.4.0", - "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-30.4.0.tgz", - "integrity": "sha512-mWlvLviKIgIQ8VCuM1xRdD0TWp3zlzionlmDBjuXVBs+VkmXq6FgW9T4Emr7oGz/Rk6feDCGyiugolcQEyp3mg==", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, "node_modules/@jest/reporters": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/@jest/reporters/-/reporters-29.7.0.tgz", @@ -1061,52 +1017,6 @@ "@babel/types": "^7.28.2" } }, - "node_modules/@types/body-parser": { - "version": "1.19.6", - "resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.6.tgz", - "integrity": "sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/connect": "*", - "@types/node": "*" - } - }, - "node_modules/@types/connect": { - "version": "3.4.38", - "resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.38.tgz", - "integrity": "sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/node": "*" - } - }, - "node_modules/@types/express": { - "version": "5.0.6", - "resolved": "https://registry.npmjs.org/@types/express/-/express-5.0.6.tgz", - "integrity": "sha512-sKYVuV7Sv9fbPIt/442koC7+IIwK5olP1KWeD88e/idgoJqDm3JV/YUiPwkoKK92ylff2MGxSz1CSjsXelx0YA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/body-parser": "*", - "@types/express-serve-static-core": "^5.0.0", - "@types/serve-static": "^2" - } - }, - "node_modules/@types/express-serve-static-core": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/@types/express-serve-static-core/-/express-serve-static-core-5.1.1.tgz", - "integrity": "sha512-v4zIMr/cX7/d2BpAEX3KNKL/JrT1s43s96lLvvdTmza1oEvDudCqK9aF/djc/SWgy8Yh0h30TZx5VpzqFCxk5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/node": "*", - "@types/qs": "*", - "@types/range-parser": "*", - "@types/send": "*" - } - }, "node_modules/@types/graceful-fs": { "version": "4.1.9", "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz", @@ -1117,13 +1027,6 @@ "@types/node": "*" } }, - "node_modules/@types/http-errors": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@types/http-errors/-/http-errors-2.0.5.tgz", - "integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==", - "dev": true, - "license": "MIT" - }, "node_modules/@types/istanbul-lib-coverage": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz", @@ -1152,229 +1055,14 @@ } }, "node_modules/@types/jest": { - "version": "30.0.0", - "resolved": "https://registry.npmjs.org/@types/jest/-/jest-30.0.0.tgz", - "integrity": "sha512-XTYugzhuwqWjws0CVz8QpM36+T+Dz5mTEBKhNs/esGLnCIlGdRy+Dq78NRjd7ls7r8BC8ZRMOrKlkO1hU0JOwA==", + "version": "29.5.14", + "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.14.tgz", + "integrity": "sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ==", "dev": true, "license": "MIT", "dependencies": { - "expect": "^30.0.0", - "pretty-format": "^30.0.0" - } - }, - "node_modules/@types/jest/node_modules/@jest/expect-utils": { - "version": "30.4.1", - "resolved": "https://registry.npmjs.org/@jest/expect-utils/-/expect-utils-30.4.1.tgz", - "integrity": "sha512-ZBn5CglH8fBsQsvs4VWNzD4aWfUYks+IdOOQU3MEK71ol/BcVm+P+rtb1KpiFBpSWSCE27uOahyyf1vfqOVbcQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/get-type": "30.1.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@types/jest/node_modules/@jest/schemas": { - "version": "30.4.1", - "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-30.4.1.tgz", - "integrity": "sha512-i6b4qw5qnP8c5FEeBJg/uZQ4ddrkN6Ca8qISJh0pr7a5hfn3h3v5x60BEbOC7OYAGZNMs1LfFLwnW2CuK8F57Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "@sinclair/typebox": "^0.34.0" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@types/jest/node_modules/@jest/types": { - "version": "30.4.1", - "resolved": "https://registry.npmjs.org/@jest/types/-/types-30.4.1.tgz", - "integrity": "sha512-f1x/vJXIfjOlEmejYpbkbgw1gOqpPECwMvMEtBqe47j7H2Hg8h8w3o3ikhSXq3MI15kg+oQ0exWO0uCtTNJLoQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/pattern": "30.4.0", - "@jest/schemas": "30.4.1", - "@types/istanbul-lib-coverage": "^2.0.6", - "@types/istanbul-reports": "^3.0.4", - "@types/node": "*", - "@types/yargs": "^17.0.33", - "chalk": "^4.1.2" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@types/jest/node_modules/@sinclair/typebox": { - "version": "0.34.49", - "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.49.tgz", - "integrity": "sha512-brySQQs7Jtn0joV8Xh9ZV/hZb9Ozb0pmazDIASBkYKCjXrXU3mpcFahmK/z4YDhGkQvP9mWJbVyahdtU5wQA+A==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/jest/node_modules/ansi-styles": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", - "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/@types/jest/node_modules/ci-info": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-4.4.0.tgz", - "integrity": "sha512-77PSwercCZU2Fc4sX94eF8k8Pxte6JAwL4/ICZLFjJLqegs7kCuAsqqj/70NQF6TvDpgFjkubQB2FW2ZZddvQg==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/sibiraj-s" - } - ], - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/@types/jest/node_modules/expect": { - "version": "30.4.1", - "resolved": "https://registry.npmjs.org/expect/-/expect-30.4.1.tgz", - "integrity": "sha512-PMARsyh/JtqC20HoGqlFcIlQAyqUtW4PlI1rup1uhYJtKuwAjbvWi3GQMAn+STdHum/dk8xrKfUM1+5SAwpolA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/expect-utils": "30.4.1", - "@jest/get-type": "30.1.0", - "jest-matcher-utils": "30.4.1", - "jest-message-util": "30.4.1", - "jest-mock": "30.4.1", - "jest-util": "30.4.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@types/jest/node_modules/jest-diff": { - "version": "30.4.1", - "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-30.4.1.tgz", - "integrity": "sha512-CRpFK0RtLriVDGcPPAnR6HMVI8bSR2jnUIgralhauzYQZIb4RH9AtEInTuQr65LmmGggGcRT6HIASxwqsVsmlA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/diff-sequences": "30.4.0", - "@jest/get-type": "30.1.0", - "chalk": "^4.1.2", - "pretty-format": "30.4.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@types/jest/node_modules/jest-matcher-utils": { - "version": "30.4.1", - "resolved": "https://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-30.4.1.tgz", - "integrity": "sha512-zvYfX5CaeEkFrrLS9suWe9rvJrm9J1Iv3ua8kIBv9GEPzcnsfBf0bob37la7s67fs0nlBC3EuvkOLnXQKxtx4A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/get-type": "30.1.0", - "chalk": "^4.1.2", - "jest-diff": "30.4.1", - "pretty-format": "30.4.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@types/jest/node_modules/jest-message-util": { - "version": "30.4.1", - "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-30.4.1.tgz", - "integrity": "sha512-kwCKIvq0MCW1HzLoGola9Te6JUdzgV0loyKJ3Qghrkz9i5/RRIHsL95BMQc2HBBhlBKC4j22K9p11TGHH8RBpQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "@jest/types": "30.4.1", - "@types/stack-utils": "^2.0.3", - "chalk": "^4.1.2", - "graceful-fs": "^4.2.11", - "jest-util": "30.4.1", - "picomatch": "^4.0.3", - "pretty-format": "30.4.1", - "slash": "^3.0.0", - "stack-utils": "^2.0.6" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@types/jest/node_modules/jest-mock": { - "version": "30.4.1", - "resolved": "https://registry.npmjs.org/jest-mock/-/jest-mock-30.4.1.tgz", - "integrity": "sha512-/i8SVb8/NSB7RfNi8gfqu8gxLV23KaL5EpAttyb9iz8qWRIqXRLflycz/32wXsYkOnaUlx8NAKnJYtpsmXUmfw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/types": "30.4.1", - "@types/node": "*", - "jest-util": "30.4.1" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@types/jest/node_modules/jest-util": { - "version": "30.4.1", - "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-30.4.1.tgz", - "integrity": "sha512-vjQb1sACEiv13DKJMDToJpzVW0joCsIQrmbg0fi7CyOOt+g9jTuQl2A216pWRBYhOVt53XbL/2LbMKg1BECWOw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/types": "30.4.1", - "@types/node": "*", - "chalk": "^4.1.2", - "ci-info": "^4.2.0", - "graceful-fs": "^4.2.11", - "picomatch": "^4.0.3" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" - } - }, - "node_modules/@types/jest/node_modules/picomatch": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", - "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/@types/jest/node_modules/pretty-format": { - "version": "30.4.1", - "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-30.4.1.tgz", - "integrity": "sha512-K6KiKMHTL4jjX4u3Kir2EW07nRfcqVTXIImx50wbjHQTcZPgg+gjVeNTIT3l3L1Rd4UefxfogquC9J37SoFyyw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jest/schemas": "30.4.1", - "ansi-styles": "^5.2.0", - "react-is-18": "npm:react-is@^18.3.1", - "react-is-19": "npm:react-is@^19.2.5" - }, - "engines": { - "node": "^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0" + "expect": "^29.0.0", + "pretty-format": "^29.0.0" } }, "node_modules/@types/node": { @@ -1387,41 +1075,6 @@ "undici-types": "~6.21.0" } }, - "node_modules/@types/qs": { - "version": "6.15.1", - "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.15.1.tgz", - "integrity": "sha512-GZHUBZR9hckSUhrxmp1nG6NwdpM9fCunJwyThLW1X3AyHgd9IlHb6VANpQQqDr2o/qQp6McZ3y/IA2rVzKzSbw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/range-parser": { - "version": "1.2.7", - "resolved": "https://registry.npmjs.org/@types/range-parser/-/range-parser-1.2.7.tgz", - "integrity": "sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/send": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/@types/send/-/send-1.2.1.tgz", - "integrity": "sha512-arsCikDvlU99zl1g69TcAB3mzZPpxgw0UQnaHeC1Nwb015xp8bknZv5rIfri9xTOcMuaVgvabfIRA7PSZVuZIQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/node": "*" - } - }, - "node_modules/@types/serve-static": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/@types/serve-static/-/serve-static-2.2.0.tgz", - "integrity": "sha512-8mam4H1NHLtu7nmtalF7eyBH14QyOASmcxHhSfEoRyr0nP/YdoesEtU+uSRvMe96TW/HPTtkoKqQLl53N7UXMQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/http-errors": "*", - "@types/node": "*" - } - }, "node_modules/@types/stack-utils": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz", @@ -4355,22 +4008,6 @@ "dev": true, "license": "MIT" }, - "node_modules/react-is-18": { - "name": "react-is", - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", - "dev": true, - "license": "MIT" - }, - "node_modules/react-is-19": { - "name": "react-is", - "version": "19.2.6", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-19.2.6.tgz", - "integrity": "sha512-XjBR15BhXuylgWGuslhDKqlSayuqvqBX91BP8pauG8kd1zY8kotkNWbXksTCNRarse4kuGbe2kIY05ARtwNIvw==", - "dev": true, - "license": "MIT" - }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", diff --git a/tools/ruview-mcp/package.json b/tools/ruview-mcp/package.json index 1601c3e0..4b76f22d 100644 --- a/tools/ruview-mcp/package.json +++ b/tools/ruview-mcp/package.json @@ -1,15 +1,14 @@ { "name": "@ruvnet/rvagent", - "version": "0.1.0", - "description": "SENSE-BRIDGE: dual-transport MCP server (stdio + Streamable HTTP) exposing RuView WiFi-DensePose sensing primitives to AI agents", + "version": "0.2.0", + "description": "SENSE-BRIDGE: dual-transport MCP server (stdio default; Streamable HTTP opt-in via RVAGENT_HTTP_PORT) exposing RuView WiFi-DensePose sensing primitives to AI agents", "type": "module", "main": "dist/index.js", "types": "dist/index.d.ts", "exports": { ".": { - "import": "./dist/index.js", - "require": "./dist/index.cjs", - "types": "./dist/index.d.ts" + "types": "./dist/index.d.ts", + "import": "./dist/index.js" } }, "bin": { @@ -18,8 +17,7 @@ }, "files": [ "dist", - "README.md", - "CHANGELOG.md" + "README.md" ], "scripts": { "build": "tsc", @@ -27,7 +25,8 @@ "start": "node dist/index.js", "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --forceExit", "lint": "eslint src --ext .ts", - "typecheck": "tsc --noEmit" + "typecheck": "tsc --noEmit", + "prepublishOnly": "npm run build && npm test" }, "keywords": [ "mcp", @@ -53,11 +52,11 @@ }, "dependencies": { "@modelcontextprotocol/sdk": "^1.0.0", - "zod": "^3.23.8" + "zod": "^3.23.8", + "zod-to-json-schema": "^3.25.2" }, "devDependencies": { - "@types/express": "^5.0.6", - "@types/jest": "^30.0.0", + "@types/jest": "^29.5.14", "@types/node": "^20.14.0", "jest": "^29.7.0", "ts-jest": "^29.1.0", diff --git a/tools/ruview-mcp/src/config.ts b/tools/ruview-mcp/src/config.ts index 8fb81f6d..1c8070b2 100644 --- a/tools/ruview-mcp/src/config.ts +++ b/tools/ruview-mcp/src/config.ts @@ -8,6 +8,7 @@ import os from "node:os"; import path from "node:path"; +import { existsSync } from "node:fs"; import type { RuviewConfig } from "./types.js"; function env(key: string): string | undefined { @@ -51,17 +52,35 @@ export function loadConfig(): RuviewConfig { } /** - * Attempt to locate a cog binary on PATH or in common install locations. - * Returns the bare binary name if not found (will fail gracefully at invocation). + * Ordered cog-binary candidate paths for a host of the given CPU architecture. + * The native-arch build is probed FIRST: an appliance that ships both + * `cog--arm` and `cog--x86_64` must never hand back the wrong-arch + * binary (ADR-264 F8/O7 — the pre-review order tried `-arm` unconditionally). + * The `/usr/local/bin` and bare-name (PATH) fallbacks follow, arch-agnostic. + * + * Pure and arch-injectable so the ordering is unit-testable. + */ +export function cogBinaryCandidates( + name: string, + arch: string = process.arch +): string[] { + const id = name.replace("cog-", ""); + const dir = `/var/lib/cognitum/apps/${id}`; + const arm = `${dir}/cog-${id}-arm`; + const x86 = `${dir}/cog-${id}-x86_64`; + // arm64 → prefer -arm; everything else (notably x64) → prefer -x86_64. + const archOrdered = arch === "arm64" ? [arm, x86] : [x86, arm]; + return [...archOrdered, `/usr/local/bin/${name}`]; +} + +/** + * Locate a cog binary in the common appliance install locations, probing each + * candidate in native-arch-first order. Falls back to the bare name (PATH + * resolution at spawn time) when no candidate exists. */ function detectCogBinary(name: string): string { - // Common install paths for Cognitum cog binaries on Linux/macOS appliances. - const candidates = [ - `/var/lib/cognitum/apps/${name.replace("cog-", "")}/cog-${name.replace("cog-", "")}-arm`, - `/var/lib/cognitum/apps/${name.replace("cog-", "")}/cog-${name.replace("cog-", "")}-x86_64`, - `/usr/local/bin/${name}`, - name, // bare name — rely on PATH - ]; - // Return the first candidate that might exist; actual existence is checked at call time. - return candidates[candidates.length - 1] ?? name; + for (const candidate of cogBinaryCandidates(name)) { + if (existsSync(candidate)) return candidate; + } + return name; // bare name — rely on PATH; spawn fails gracefully if absent } diff --git a/tools/ruview-mcp/src/http-transport.ts b/tools/ruview-mcp/src/http-transport.ts index bb22a610..c336bedf 100644 --- a/tools/ruview-mcp/src/http-transport.ts +++ b/tools/ruview-mcp/src/http-transport.ts @@ -1,30 +1,44 @@ /** - * Streamable HTTP transport scaffold for @ruvnet/rvagent (ADR-124 §3). + * Streamable HTTP transport for @ruvnet/rvagent (ADR-124 §3, hardened per + * ADR-264 F7/O3). * - * Binds to 127.0.0.1 by default and mounts a POST /mcp endpoint backed by + * Binds to 127.0.0.1 by default and mounts an /mcp endpoint backed by * StreamableHTTPServerTransport from @modelcontextprotocol/sdk. * - * Security model (ADR-124 §6): - * - Origin validation: requests from origins other than the configured - * allowlist are rejected with 403 Forbidden before reaching the MCP layer. - * - Default allowlist: ['http://localhost', 'http://127.0.0.1'] — covers - * Claude Code and Cursor on the same machine. + * Session model (ADR-264 F7): the SDK's stateful mode requires ONE transport + * (and one MCP Server) per session. An `initialize` POST creates a fresh + * transport + server pair via the caller-supplied factory; follow-up + * POST/GET/DELETE requests are routed to their session by the + * `mcp-session-id` header. Transports are dropped when their session closes. + * + * Security model (ADR-124 §6 + ADR-264 F7): + * - Origin validation: browser-style requests whose Origin is not local + * are rejected with 403 before reaching the MCP layer. With NO explicit + * allowlist, localhost origins match on hostname, ANY port + * (http://localhost:5173 is local). When an explicit allowedOrigins list is + * configured, matching is exact — the any-port-localhost convenience is off, + * so a localhost peer on an unlisted port must be added to be accepted. * - Bearer token: when RVAGENT_HTTP_TOKEN is set, requests must carry * Authorization: Bearer ; missing/wrong tokens → 401. + * - Body cap: request bodies over 1 MiB are rejected with 413 (the + * unbounded-buffering DoS from the pre-ADR-264 scaffold). * - Bind address: defaults to 127.0.0.1 per MCP spec security requirement. * Set RVAGENT_HTTP_HOST=0.0.0.0 only for intentional fleet deployment. * * Usage: * import { createHttpTransport } from './http-transport.js'; - * const { server: httpServer, transport } = await createHttpTransport(mcpServer); + * const { httpServer } = await createHttpTransport(() => buildServer(config)); * // httpServer is a node:http.Server — call httpServer.close() to shut down. */ import { createServer, type Server as HttpServer, type IncomingMessage, type ServerResponse } from "node:http"; import { randomUUID } from "node:crypto"; import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; +import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js"; import type { Server as McpServer } from "@modelcontextprotocol/sdk/server/index.js"; +export type McpServerFactory = () => McpServer; + export interface HttpTransportOptions { /** TCP host to bind (default: 127.0.0.1). */ host?: string; @@ -32,8 +46,8 @@ export interface HttpTransportOptions { port?: number; /** * Allowed Origin header values. Requests with an Origin not in this list - * are rejected with 403. Use '*' to disable Origin validation entirely - * (not recommended outside of local-dev flags). + * (and not a localhost origin) are rejected with 403. Use '*' to disable + * Origin validation entirely (not recommended outside of local-dev flags). */ allowedOrigins?: string[]; /** @@ -42,32 +56,51 @@ export interface HttpTransportOptions { * Defaults to process.env.RVAGENT_HTTP_TOKEN (undefined = auth disabled). */ bearerToken?: string; + /** Maximum accepted request body size in bytes (default: 1 MiB). */ + maxBodyBytes?: number; + /** + * Maximum number of concurrent live sessions (default: 64). When a new + * `initialize` arrives at the cap, the oldest-idle session is evicted (its + * transport closed) to make room — bounds memory against a flaky client that + * loops `initialize` or a malicious localhost peer (ADR-264 F7). + */ + maxSessions?: number; + /** + * Idle time-to-live for a session in ms (default: 5 min). Sessions with no + * request activity for longer than this are swept and closed. + */ + sessionIdleMs?: number; + /** How often the idle-session sweeper runs, in ms (default: 60 s). */ + sweepIntervalMs?: number; } export interface HttpTransportResult { /** The raw Node.js HTTP server — call .close() to shut down. */ httpServer: HttpServer; - /** The MCP Streamable HTTP transport instance wired to the MCP server. */ - transport: StreamableHTTPServerTransport; + /** Live sessions keyed by session id (exposed for tests/observability). */ + sessions: Map; /** The bound address string (e.g. "http://127.0.0.1:3001"). */ boundAddress: string; } const DEFAULT_HOST = "127.0.0.1"; const DEFAULT_PORT = 3001; -const LOCALHOST_ORIGINS = new Set([ - "http://localhost", - "http://127.0.0.1", - "https://localhost", - "https://127.0.0.1", -]); +const DEFAULT_MAX_BODY_BYTES = 1024 * 1024; +const DEFAULT_MAX_SESSIONS = 64; +const DEFAULT_SESSION_IDLE_MS = 5 * 60 * 1000; +const DEFAULT_SWEEP_INTERVAL_MS = 60 * 1000; +const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "[::1]"]); /** * Validate Origin header against the allowlist. * Returns true if the request should be allowed, false if it should be rejected. * - * An absent Origin header is allowed (same-origin non-browser requests, curl, etc.). - * A present Origin that is not in the allowlist is rejected. + * An absent Origin header is allowed (same-origin non-browser requests, curl, + * etc.). When NO explicit allowlist was configured (empty list), a localhost + * origin is allowed on any port as a convenience — real browser origins carry + * ports (ADR-264 F7). When an explicit allowlist IS configured, matching is + * exact: the any-port-localhost shortcut is disabled so an operator who pins an + * allowlist actually gets it (a looped-back peer on an unlisted port is denied). */ export function isOriginAllowed( origin: string | undefined, @@ -75,76 +108,222 @@ export function isOriginAllowed( ): boolean { if (origin === undefined) return true; // no Origin = not a cross-origin browser request if (allowedOrigins.includes("*")) return true; - return allowedOrigins.some((o) => o === origin); + if (allowedOrigins.includes(origin)) return true; + // Explicit allowlist ⇒ exact matching only; skip the localhost convenience. + if (allowedOrigins.length > 0) return false; + try { + const u = new URL(origin); + return ( + (u.protocol === "http:" || u.protocol === "https:") && + LOCAL_HOSTNAMES.has(u.hostname === "::1" ? "[::1]" : u.hostname) + ); + } catch { + return false; + } +} + +/** Read a request body with a hard size cap; null = payload too large. */ +function readBody( + req: IncomingMessage, + maxBytes: number +): Promise { + return new Promise((resolve, reject) => { + let size = 0; + let tooLarge = false; + const chunks: Buffer[] = []; + req.on("data", (chunk: Buffer) => { + if (tooLarge) return; // keep draining so the 413 response can flush + size += chunk.length; + if (size > maxBytes) { + tooLarge = true; + chunks.length = 0; + resolve(null); + return; + } + chunks.push(chunk); + }); + req.on("end", () => { + if (!tooLarge) resolve(Buffer.concat(chunks).toString("utf8")); + }); + req.on("error", reject); + }); +} + +function json(res: ServerResponse, status: number, body: object): void { + res.writeHead(status, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); } /** - * Build and wire a Streamable HTTP transport to the provided MCP server. - * Returns the Node.js HTTP server (not yet listening) plus the transport. + * Build the HTTP server around a per-session MCP transport map. + * Returns the Node.js HTTP server (not yet listening) plus the session map. * Call httpServer.listen(port, host) or rely on createHttpTransport which * does that for you. */ export function buildHttpApp( - mcpServer: McpServer, + serverFactory: McpServerFactory, opts: HttpTransportOptions = {} -): { httpServer: HttpServer; transport: StreamableHTTPServerTransport } { - const allowedOrigins: string[] = opts.allowedOrigins ?? [ - ...LOCALHOST_ORIGINS, - ]; +): { httpServer: HttpServer; sessions: Map } { + const allowedOrigins: string[] = opts.allowedOrigins ?? []; const bearerToken = opts.bearerToken ?? process.env["RVAGENT_HTTP_TOKEN"]; + const maxBodyBytes = opts.maxBodyBytes ?? DEFAULT_MAX_BODY_BYTES; + const maxSessions = opts.maxSessions ?? DEFAULT_MAX_SESSIONS; + const sessionIdleMs = opts.sessionIdleMs ?? DEFAULT_SESSION_IDLE_MS; + const sweepIntervalMs = opts.sweepIntervalMs ?? DEFAULT_SWEEP_INTERVAL_MS; + const sessions = new Map(); + // lastSeen tracks per-session request activity so the sweeper and the + // oldest-idle eviction can bound the session map (ADR-264 F7). + const lastSeen = new Map(); - const transport = new StreamableHTTPServerTransport({ - sessionIdGenerator: () => randomUUID(), - }); + /** Mark a session as freshly used. */ + function touch(sessionId: string): void { + lastSeen.set(sessionId, Date.now()); + } - const httpServer = createServer( - (req: IncomingMessage, res: ServerResponse) => { - // ── Origin validation ──────────────────────────────────────────────── + /** Close a session's transport and drop it from the bookkeeping maps. */ + function closeSession(id: string): void { + const transport = sessions.get(id); + sessions.delete(id); + lastSeen.delete(id); + if (transport) { + try { + void transport.close(); // onclose is idempotent against the maps above + } catch { + /* best-effort: a half-open transport must not block eviction */ + } + } + } + + /** Evict the session that has been idle longest — called when at capacity. */ + function evictOldestIdle(): void { + let oldestId: string | undefined; + let oldestSeen = Infinity; + for (const [id, seen] of lastSeen) { + if (seen < oldestSeen) { + oldestSeen = seen; + oldestId = id; + } + } + if (oldestId !== undefined) closeSession(oldestId); + } + + /** Periodic sweep: close sessions idle beyond sessionIdleMs. */ + function sweepIdleSessions(): void { + const now = Date.now(); + for (const [id, seen] of lastSeen) { + if (now - seen > sessionIdleMs) closeSession(id); + } + } + const sweepTimer = setInterval(sweepIdleSessions, sweepIntervalMs); + sweepTimer.unref(); // never keep the process alive just to sweep + + const httpServer = createServer((req: IncomingMessage, res: ServerResponse) => { + void (async () => { + // ── Origin validation ────────────────────────────────────────────── const origin = req.headers["origin"] as string | undefined; if (!isOriginAllowed(origin, allowedOrigins)) { - res.writeHead(403, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "Forbidden: cross-origin request rejected" })); + json(res, 403, { error: "Forbidden: cross-origin request rejected" }); return; } - // ── Bearer token auth ──────────────────────────────────────────────── + // ── Bearer token auth ────────────────────────────────────────────── if (bearerToken !== undefined && bearerToken !== "") { const authHeader = req.headers["authorization"] as string | undefined; const supplied = authHeader?.startsWith("Bearer ") ? authHeader.slice("Bearer ".length) : undefined; if (supplied !== bearerToken) { - res.writeHead(401, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "Unauthorized: missing or invalid bearer token" })); + json(res, 401, { error: "Unauthorized: missing or invalid bearer token" }); return; } } - // ── Route: POST /mcp ───────────────────────────────────────────────── - if (req.method === "POST" && req.url === "/mcp") { - let body = ""; - req.on("data", (chunk: Buffer) => { body += chunk.toString(); }); - req.on("end", () => { - let parsed: unknown; - try { - parsed = JSON.parse(body); - } catch { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "Bad Request: invalid JSON body" })); - return; - } - void transport.handleRequest(req, res, parsed); - }); + // ── Route: /mcp ──────────────────────────────────────────────────── + if (req.url !== "/mcp") { + json(res, 404, { error: "Not found. MCP endpoint: /mcp" }); return; } - // ── Fallback ───────────────────────────────────────────────────────── - res.writeHead(404, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "Not found. MCP endpoint: POST /mcp" })); - } - ); + const sessionId = req.headers["mcp-session-id"] as string | undefined; - return { httpServer, transport }; + if (req.method === "POST") { + const body = await readBody(req, maxBodyBytes); + if (body === null) { + json(res, 413, { error: `Payload too large (max ${maxBodyBytes} bytes)` }); + return; + } + let parsed: unknown; + try { + parsed = JSON.parse(body); + } catch { + json(res, 400, { error: "Bad Request: invalid JSON body" }); + return; + } + + // Existing session → route to its transport. + if (sessionId !== undefined) { + const transport = sessions.get(sessionId); + if (!transport) { + json(res, 404, { error: `Unknown session "${sessionId}"` }); + return; + } + touch(sessionId); + await transport.handleRequest(req, res, parsed); + return; + } + + // New session: must be an initialize request (ADR-264 F7 — one + // transport + one MCP Server per session). + if (!isInitializeRequest(parsed)) { + json(res, 400, { + error: "Bad Request: no mcp-session-id and not an initialize request", + }); + return; + } + // Bound the session map: at capacity, reclaim the oldest-idle slot + // before minting a new session (ADR-264 F7). + if (sessions.size >= maxSessions) evictOldestIdle(); + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: () => randomUUID(), + onsessioninitialized: (id: string) => { + sessions.set(id, transport); + touch(id); + }, + }); + transport.onclose = () => { + if (transport.sessionId !== undefined) { + sessions.delete(transport.sessionId); + lastSeen.delete(transport.sessionId); + } + }; + const mcpServer = serverFactory(); + await mcpServer.connect(transport as Parameters[0]); + await transport.handleRequest(req, res, parsed); + return; + } + + // GET (SSE stream) / DELETE (session termination) — session-scoped. + if (req.method === "GET" || req.method === "DELETE") { + const transport = sessionId !== undefined ? sessions.get(sessionId) : undefined; + if (!transport) { + json(res, 400, { error: "Bad Request: missing or unknown mcp-session-id" }); + return; + } + if (sessionId !== undefined) touch(sessionId); + await transport.handleRequest(req, res); + return; + } + + json(res, 405, { error: "Method not allowed. Use POST/GET/DELETE on /mcp" }); + })().catch(() => { + if (!res.headersSent) json(res, 500, { error: "Internal server error" }); + else res.end(); + }); + }); + + httpServer.on("close", () => clearInterval(sweepTimer)); + + return { httpServer, sessions }; } /** @@ -152,19 +331,13 @@ export function buildHttpApp( * is bound and listening. */ export async function createHttpTransport( - mcpServer: McpServer, + serverFactory: McpServerFactory, opts: HttpTransportOptions = {} ): Promise { const host = opts.host ?? process.env["RVAGENT_HTTP_HOST"] ?? DEFAULT_HOST; const port = opts.port ?? Number(process.env["RVAGENT_HTTP_PORT"] ?? DEFAULT_PORT); - const { httpServer, transport } = buildHttpApp(mcpServer, opts); - - // Wire MCP server to the transport only after the HTTP server is built. - // Cast needed: StreamableHTTPServerTransport implements Transport but - // exactOptionalPropertyTypes causes a false incompatibility on optional - // callback properties; the cast is safe — the SDK types are consistent. - await mcpServer.connect(transport as Parameters[0]); + const { httpServer, sessions } = buildHttpApp(serverFactory, opts); await new Promise((resolve, reject) => { httpServer.once("error", reject); @@ -173,7 +346,7 @@ export async function createHttpTransport( return { httpServer, - transport, + sessions, boundAddress: `http://${host}:${port}`, }; } diff --git a/tools/ruview-mcp/src/index.ts b/tools/ruview-mcp/src/index.ts index 06a5b95a..4f8dbbb6 100644 --- a/tools/ruview-mcp/src/index.ts +++ b/tools/ruview-mcp/src/index.ts @@ -1,29 +1,39 @@ #!/usr/bin/env node /** - * @ruv/ruview-mcp — RuView MCP Server + * @ruvnet/rvagent — RuView MCP Server * * Exposes RuView's WiFi-DensePose sensing capabilities as Model Context Protocol * (MCP) tools that Claude Code, Cursor, Codex, and other MCP-compatible agents * can call directly. * - * Tools exposed: - * ruview_csi_latest — pull the latest CSI window from the sensing-server - * ruview_pose_infer — single-shot 17-keypoint pose estimation - * ruview_count_infer — single-shot person count with confidence interval - * ruview_registry_list — list cogs from the Cognitum edge registry (ADR-102) - * ruview_train_count — kick off a count-cog training run (returns job ID) - * ruview_job_status — poll a background training job + * Transports (ADR-264 O3): + * stdio (default) node dist/index.js + * Streamable HTTP RVAGENT_HTTP_PORT=3001 node dist/index.js + * (127.0.0.1-bound, Origin-gated, optional bearer token — + * see http-transport.ts for the security model) * - * Usage: - * node dist/index.js # stdio transport (default) - * RUVIEW_SENSING_SERVER_URL=http://cognitum-v0:3000 node dist/index.js + * Tool naming (ADR-264 O4): canonical names are underscore-form + * (host tool-name regexes commonly enforce ^[a-zA-Z0-9_-]{1,64}$). The + * pre-ADR-264 dotted names (ruview.bfld.last_scan, …) remain callable as + * router-only aliases for one deprecation cycle; tools/list advertises the + * underscore form only. + * + * Validation (ADR-264 O5): each tool declares ONE Zod schema. The CallTool + * gate parses exactly once and hands the typed result to the handler; the + * advertised JSON Schema is generated from the same Zod source, so what is + * advertised is what is enforced. * * To register with Claude Code: - * claude mcp add ruview -- node /path/to/tools/ruview-mcp/dist/index.js + * claude mcp add ruview -- npx -y @ruvnet/rvagent * - * See ADR-104 for the full design rationale and security model. + * See ADR-104 for the original design rationale and ADR-264 for the npm + * deep-review this layout implements. */ +import { createRequire } from "node:module"; +import { realpathSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { argv } from "node:process"; import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { @@ -32,6 +42,8 @@ import { McpError, ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; +import type { z } from "zod"; +import { zodToJsonSchema } from "zod-to-json-schema"; import { loadConfig } from "./config.js"; import { csiLatestSchema, csiLatest } from "./tools/csi-latest.js"; @@ -44,40 +56,51 @@ import { jobStatusSchema, jobStatus, } from "./tools/train-count.js"; -import { TOOL_INPUT_SCHEMAS } from "./schemas/index.js"; -import { bfldLastScan } from "./tools/bfld-last-scan.js"; -import { bfldSubscribe } from "./tools/bfld-subscribe.js"; -import { presenceNow } from "./tools/presence-now.js"; -import { vitalsGetBreathing } from "./tools/vitals-get-breathing.js"; -import { vitalsGetHeartRate } from "./tools/vitals-get-heart-rate.js"; -import { vitalsGetAll } from "./tools/vitals-get-all.js"; +import { bfldLastScanSchema, bfldLastScan } from "./tools/bfld-last-scan.js"; +import { bfldSubscribeSchema, bfldSubscribe } from "./tools/bfld-subscribe.js"; +import { presenceNowSchema, presenceNow } from "./tools/presence-now.js"; +import { + vitalsGetBreathingSchema, + vitalsGetBreathing, +} from "./tools/vitals-get-breathing.js"; +import { + vitalsGetHeartRateSchema, + vitalsGetHeartRate, +} from "./tools/vitals-get-heart-rate.js"; +import { vitalsGetAllSchema, vitalsGetAll } from "./tools/vitals-get-all.js"; +// NOTE: ./http-transport.js is imported lazily in main() — it chain-loads the +// SDK's streamableHttp module (~48 ms MEASURED), which the default stdio path +// never uses. -const PACKAGE_VERSION = "0.1.0"; +// Single-source the version from package.json (ADR-264 O8/ADR-265 D3). +const require = createRequire(import.meta.url); +const PACKAGE_VERSION: string = ( + require("../package.json") as { version: string } +).version; const SERVER_NAME = "rvagent"; // ── Tool registry ────────────────────────────────────────────────────────── -const TOOLS = [ +type RuviewConfig = ReturnType; + +interface ToolDef { + name: string; + description: string; + /** The single validation source; the advertised JSON Schema derives from it. */ + schema: z.ZodTypeAny; + handler: (parsedArgs: unknown, config: RuviewConfig) => Promise; +} + +export const TOOLS: ToolDef[] = [ { name: "ruview_csi_latest", description: "Pull the latest CSI window from a running wifi-densepose-sensing-server. " + "Returns 56-subcarrier × 20-frame amplitude/phase arrays suitable for " + "downstream inference or research analysis.", - inputSchema: { - type: "object" as const, - properties: { - sensing_server_url: { - type: "string", - description: - "Base URL of the sensing-server (default: RUVIEW_SENSING_SERVER_URL or http://localhost:3000).", - }, - }, - }, - handler: async (args: unknown, config: ReturnType) => { - const input = csiLatestSchema.parse(args); - return csiLatest(input, config); - }, + schema: csiLatestSchema, + handler: (args, config) => + csiLatest(args as Parameters[0], config), }, { name: "ruview_pose_infer", @@ -86,23 +109,9 @@ const TOOLS = [ "cog-pose-estimation Cog binary (ADR-101). Accepts a CSI window JSON file " + "or uses the live sensing-server if no window is provided. " + "Returns [{keypoints: [[x,y]×17], confidence}] per detected person.", - inputSchema: { - type: "object" as const, - properties: { - window_path: { - type: "string", - description: "Path to a CSI window JSON file. Omit to use the live sensing-server.", - }, - cog_binary: { - type: "string", - description: "Path to cog-pose-estimation binary.", - }, - }, - }, - handler: async (args: unknown, config: ReturnType) => { - const input = poseInferSchema.parse(args); - return poseInfer(input, config); - }, + schema: poseInferSchema, + handler: (args, config) => + poseInfer(args as Parameters[0], config), }, { name: "ruview_count_infer", @@ -110,29 +119,9 @@ const TOOLS = [ "Run a single-shot person-count inference using the cog-person-count Cog " + "binary (ADR-103). Returns {count, confidence, count_p95_low, count_p95_high} " + "with a Stoer-Wagner multi-node fusion upper bound when multiple nodes are active.", - inputSchema: { - type: "object" as const, - properties: { - window_path: { - type: "string", - description: "Path to a CSI window JSON file. Omit to use the live sensing-server.", - }, - cog_binary: { - type: "string", - description: "Path to cog-person-count binary.", - }, - max_persons: { - type: "integer", - minimum: 1, - maximum: 7, - description: "Upper bound on person count (1–7). Default: 7.", - }, - }, - }, - handler: async (args: unknown, config: ReturnType) => { - const input = countInferSchema.parse(args); - return countInfer(input, config); - }, + schema: countInferSchema, + handler: (args, config) => + countInfer(args as Parameters[0], config), }, { name: "ruview_registry_list", @@ -140,33 +129,9 @@ const TOOLS = [ "List cogs from the Cognitum edge module registry (ADR-102). " + "Fetches /api/v1/edge/registry from the sensing-server, which proxies the " + "canonical GCS catalog (105 cogs, 11 categories). Supports category filter and search.", - inputSchema: { - type: "object" as const, - properties: { - category: { - type: "string", - description: - "Filter by category: health, security, building, retail, industrial, " + - "research, ai, swarm, signal, network, developer.", - }, - search: { - type: "string", - description: "Search substring matched against cog id and name (case-insensitive).", - }, - refresh: { - type: "boolean", - description: "Bypass the 1-hour registry cache.", - }, - sensing_server_url: { - type: "string", - description: "Override the sensing-server URL.", - }, - }, - }, - handler: async (args: unknown, config: ReturnType) => { - const input = registryListSchema.parse(args); - return registryList(input, config); - }, + schema: registryListSchema, + handler: (args, config) => + registryList(args as Parameters[0], config), }, { name: "ruview_train_count", @@ -174,211 +139,139 @@ const TOOLS = [ "Kick off a cog-person-count training run using the Candle GPU trainer " + "(ADR-103). The paired JSONL file provides CSI windows + camera-derived " + "person-count labels. Returns a job_id to poll with ruview_job_status.", - inputSchema: { - type: "object" as const, - required: ["paired_jsonl"], - properties: { - paired_jsonl: { - type: "string", - description: - "Path to the paired JSONL training file (produced by scripts/align-ground-truth.js).", - }, - epochs: { - type: "integer", - minimum: 1, - maximum: 10000, - description: "Training epochs (default: 400).", - }, - learning_rate: { - type: "number", - description: "Initial learning rate (default: 0.001).", - }, - output_dir: { - type: "string", - description: - "Directory for model artifacts (default: v2/crates/cog-person-count/cog/artifacts/).", - }, - }, - }, - handler: async (args: unknown, config: ReturnType) => { - const input = trainCountSchema.parse(args); - return trainCount(input, config); - }, + schema: trainCountSchema, + handler: (args, config) => + trainCount(args as Parameters[0], config), }, { name: "ruview_job_status", description: "Poll the status of a background training job started by ruview_train_count. " + "Returns {status, epochs_done, epochs_total, recent_log} for the given job_id.", - inputSchema: { - type: "object" as const, - required: ["job_id"], - properties: { - job_id: { - type: "string", - description: "UUID returned by ruview_train_count.", - }, - }, - }, - handler: async (args: unknown, config: ReturnType) => { - const input = jobStatusSchema.parse(args); - return jobStatus(input, config); - }, + schema: jobStatusSchema, + handler: (args, config) => + jobStatus(args as Parameters[0], config), }, - // ── ADR-124 BFLD tools (Phase 4 Refinement) ────────────────────────────── + // ── ADR-124 BFLD tools (Phase 4 Refinement; underscore names per ADR-264) ─ { - name: "ruview.bfld.last_scan", + name: "ruview_bfld_last_scan", description: "Return the most recent BFLD scan result for a node (ADR-118/ADR-121). " + "Fields: node_id, identity_risk_score [0,1], privacy_class, n_frames, timestamp_ms. " + "Proxied from sensing-server GET /api/v1/bfld//last_scan which aggregates " + "the MQTT state topics ruview//bfld/* (ADR-122 §2.2).", - inputSchema: { - type: "object" as const, - properties: { - node_id: { - type: "string", - description: "Target node id. Omit to use the single active node.", - }, - sensing_server_url: { - type: "string", - description: "Override sensing-server URL for this call only.", - }, - }, - }, - handler: async (args: unknown, config: ReturnType) => { - return bfldLastScan(args as Parameters[0], config); - }, + schema: bfldLastScanSchema, + handler: (args, config) => + bfldLastScan(args as Parameters[0], config), }, { - name: "ruview.bfld.subscribe", + name: "ruview_bfld_subscribe", description: "Subscribe to BFLD events on ruview//bfld/* for duration_s seconds (ADR-122). " + "Returns {ok, subscription_id, expires_at, topic}. When the sensing-server is unreachable, " + "returns a synthetic envelope with ok:false,warn:true so the caller can distinguish " + "a network error from an invalid request.", - inputSchema: { - type: "object" as const, - required: ["duration_s"], - properties: { - node_id: { - type: "string", - description: "Target node id. Omit to use the single active node.", - }, - duration_s: { - type: "number", - minimum: 0, - maximum: 3600, - description: "Subscription duration in seconds (max 3600).", - }, - sensing_server_url: { - type: "string", - description: "Override sensing-server URL for this call only.", - }, - }, - }, - handler: async (args: unknown, config: ReturnType) => { - return bfldSubscribe(args as Parameters[0], config); - }, + schema: bfldSubscribeSchema, + handler: (args, config) => + bfldSubscribe(args as Parameters[0], config), }, - // ── ADR-124 Presence + Vitals tools (Phase 4 Refinement iter 5) ────────── + // ── ADR-124 Presence + Vitals tools ─────────────────────────────────────── { - name: "ruview.presence.now", + name: "ruview_presence_now", description: "Return current occupancy for a node: present, n_persons, confidence, timestamp_ms. " + "Wraps EdgeVitalsMessage.presence + n_persons (ADR-124 §4.1, ws.py:74-88).", - inputSchema: { - type: "object" as const, - properties: { - node_id: { type: "string", description: "Target node id." }, - sensing_server_url: { type: "string", description: "Override sensing-server URL." }, - }, - }, - handler: async (args: unknown, config: ReturnType) => + schema: presenceNowSchema, + handler: (args, config) => presenceNow(args as Parameters[0], config), }, { - name: "ruview.vitals.get_breathing", + name: "ruview_vitals_get_breathing", description: "Return breathing rate for a node: breathing_rate_bpm (null if unavailable), " + "confidence, timestamp_ms. Wraps EdgeVitalsMessage.breathing_rate_bpm (ws.py:82).", - inputSchema: { - type: "object" as const, - properties: { - node_id: { type: "string", description: "Target node id." }, - window_s: { type: "number", description: "Averaging window in seconds (max 300)." }, - sensing_server_url: { type: "string", description: "Override sensing-server URL." }, - }, - }, - handler: async (args: unknown, config: ReturnType) => + schema: vitalsGetBreathingSchema, + handler: (args, config) => vitalsGetBreathing(args as Parameters[0], config), }, { - name: "ruview.vitals.get_heart_rate", + name: "ruview_vitals_get_heart_rate", description: "Return heart rate for a node: heartrate_bpm (null if unavailable), " + "confidence, timestamp_ms. Wraps EdgeVitalsMessage.heartrate_bpm (ws.py:83).", - inputSchema: { - type: "object" as const, - properties: { - node_id: { type: "string", description: "Target node id." }, - window_s: { type: "number", description: "Averaging window in seconds (max 300)." }, - sensing_server_url: { type: "string", description: "Override sensing-server URL." }, - }, - }, - handler: async (args: unknown, config: ReturnType) => + schema: vitalsGetHeartRateSchema, + handler: (args, config) => vitalsGetHeartRate(args as Parameters[0], config), }, { - name: "ruview.vitals.get_all", + name: "ruview_vitals_get_all", description: "Return the full EdgeVitalsMessage for a node (all fields except raw): " + "presence, n_persons, confidence, breathing_rate_bpm, heartrate_bpm, motion, zone_id. " + "Full surface of ws.py:74-88.", - inputSchema: { - type: "object" as const, - properties: { - node_id: { type: "string", description: "Target node id." }, - sensing_server_url: { type: "string", description: "Override sensing-server URL." }, - }, - }, - handler: async (args: unknown, config: ReturnType) => + schema: vitalsGetAllSchema, + handler: (args, config) => vitalsGetAll(args as Parameters[0], config), }, -] as const; +]; -// ── Server bootstrap ──────────────────────────────────────────────────────── +/** + * Pre-ADR-264 dotted tool names, accepted at call time for one deprecation + * cycle. Router-only: tools/list never advertises these. + */ +export const TOOL_ALIASES: Record = { + "ruview.bfld.last_scan": "ruview_bfld_last_scan", + "ruview.bfld.subscribe": "ruview_bfld_subscribe", + "ruview.presence.now": "ruview_presence_now", + "ruview.vitals.get_breathing": "ruview_vitals_get_breathing", + "ruview.vitals.get_heart_rate": "ruview_vitals_get_heart_rate", + "ruview.vitals.get_all": "ruview_vitals_get_all", +}; -async function main(): Promise { - const config = loadConfig(); +/** + * Advertised JSON Schema, generated from the Zod source (ADR-264 O5). + * Memoized: schemas are static for the process lifetime, and tools/list is + * called once per session (per HTTP session under the session-per-server + * model) — no point re-walking the Zod tree each time. + */ +const jsonSchemaCache = new Map(); +export function toolInputJsonSchema(def: ToolDef): object { + const cached = jsonSchemaCache.get(def.name); + if (cached !== undefined) return cached; + const raw = zodToJsonSchema(def.schema, { $refStrategy: "none" }) as Record< + string, + unknown + >; + delete raw["$schema"]; + jsonSchemaCache.set(def.name, raw); + return raw; +} +// ── Server factory ────────────────────────────────────────────────────────── + +/** + * Build a fully-wired MCP Server. A factory (not a singleton) because each + * Streamable-HTTP session needs its own Server instance (ADR-264 F7/O3). + */ +export function buildServer(config: RuviewConfig = loadConfig()): Server { const server = new Server( - { - name: SERVER_NAME, - version: PACKAGE_VERSION, - }, - { - capabilities: { - tools: {}, - }, - } + { name: SERVER_NAME, version: PACKAGE_VERSION }, + { capabilities: { tools: {} } } ); - // List tools handler. server.setRequestHandler(ListToolsRequestSchema, () => ({ tools: TOOLS.map((t) => ({ name: t.name, description: t.description, - inputSchema: t.inputSchema, + inputSchema: toolInputJsonSchema(t), })), })); - // Call tool handler — uniform Zod validation gate (ADR-124 §3 Architecture). - // If TOOL_INPUT_SCHEMAS has a schema for the tool name, run safeParse first. - // Parse failures throw McpError(InvalidParams) so the client sees a typed - // JSON-RPC error rather than a wrapped string error. + // Call tool handler — the SINGLE Zod validation gate (ADR-264 O5): parse + // once, hand the typed result (with defaults applied) to the handler. server.setRequestHandler(CallToolRequestSchema, async (request) => { - const { name, arguments: args } = request.params; + const { name: rawName, arguments: args } = request.params; + const name = TOOL_ALIASES[rawName] ?? rawName; const tool = TOOLS.find((t) => t.name === name); if (!tool) { @@ -388,7 +281,7 @@ async function main(): Promise { type: "text" as const, text: JSON.stringify({ ok: false, - error: `Unknown tool "${name}". Available tools: ${TOOLS.map((t) => t.name).join(", ")}`, + error: `Unknown tool "${rawName}". Available tools: ${TOOLS.map((t) => t.name).join(", ")}`, }), }, ], @@ -396,22 +289,16 @@ async function main(): Promise { }; } - // Schema validation gate — applies to all tools registered in TOOL_INPUT_SCHEMAS. - const schemaEntry = Object.prototype.hasOwnProperty.call(TOOL_INPUT_SCHEMAS, name) - ? TOOL_INPUT_SCHEMAS[name as keyof typeof TOOL_INPUT_SCHEMAS] - : undefined; - if (schemaEntry !== undefined) { - const parsed = schemaEntry.safeParse(args ?? {}); - if (!parsed.success) { - throw new McpError( - ErrorCode.InvalidParams, - `Invalid arguments for tool "${name}": ${parsed.error.message}` - ); - } + const parsed = tool.schema.safeParse(args ?? {}); + if (!parsed.success) { + throw new McpError( + ErrorCode.InvalidParams, + `Invalid arguments for tool "${rawName}": ${parsed.error.message}` + ); } try { - const result = await tool.handler(args ?? {}, config); + const result = await tool.handler(parsed.data, config); return { content: [ { @@ -438,18 +325,59 @@ async function main(): Promise { } }); - // Wire up stdio transport. + return server; +} + +// ── Server bootstrap ──────────────────────────────────────────────────────── + +async function main(): Promise { + const config = loadConfig(); + + // stdio transport (default, always on). + const stdioServer = buildServer(config); const transport = new StdioServerTransport(); - await server.connect(transport); + await stdioServer.connect(transport); + + // Streamable HTTP transport — explicit opt-in only (ADR-264 O3). Lazily + // imported so the stdio path never pays the streamableHttp load cost. + const httpPort = process.env["RVAGENT_HTTP_PORT"]; + let httpNote = ""; + if (httpPort !== undefined && httpPort !== "") { + const { createHttpTransport } = await import("./http-transport.js"); + const { boundAddress } = await createHttpTransport( + () => buildServer(config), + { port: Number(httpPort) } + ); + httpNote = ` HTTP: ${boundAddress}/mcp.`; + } // Log to stderr so it doesn't interfere with the MCP stdio protocol. process.stderr.write( `[@ruvnet/rvagent] Server v${PACKAGE_VERSION} started. ` + - `Sensing server: ${config.sensingServerUrl}\n` + `Sensing server: ${config.sensingServerUrl}.${httpNote}\n` ); } -main().catch((e) => { - process.stderr.write(`[ruview-mcp] Fatal: ${String(e)}\n`); - process.exit(1); -}); +// CLI guard: boot the server only when this module is the entrypoint — invoked +// as the `rvagent` / `ruview-mcp` bin or `node dist/index.js`. Importing it as a +// library (`import { buildServer } from "@ruvnet/rvagent"`) must NOT side-effect +// connect a StdioServerTransport to the consumer's stdin/stdout. Realpath both +// sides because npm's bin shim is a symlink and passes a non-normalized, +// possibly case-skewed argv[1] on Windows (mirrors harness/ruview/bin/cli.js). +const invokedDirectly = (() => { + if (!argv[1]) return false; + try { + const a = realpathSync(argv[1]); + const b = realpathSync(fileURLToPath(import.meta.url)); + return process.platform === "win32" ? a.toLowerCase() === b.toLowerCase() : a === b; + } catch { + return false; + } +})(); + +if (invokedDirectly) { + main().catch((e) => { + process.stderr.write(`[ruview-mcp] Fatal: ${String(e)}\n`); + process.exit(1); + }); +} diff --git a/tools/ruview-mcp/src/tools/train-count.ts b/tools/ruview-mcp/src/tools/train-count.ts index 4380862c..b3462268 100644 --- a/tools/ruview-mcp/src/tools/train-count.ts +++ b/tools/ruview-mcp/src/tools/train-count.ts @@ -17,7 +17,16 @@ import { z } from "zod"; import { randomUUID } from "node:crypto"; -import { mkdirSync, appendFileSync, openSync } from "node:fs"; +import { + mkdirSync, + appendFileSync, + openSync, + closeSync, + readFileSync, + writeFileSync, + statSync, + readSync, +} from "node:fs"; import path from "node:path"; import { spawn } from "node:child_process"; import type { RuviewConfig, TrainJobResult, JobStatusResult } from "../types.js"; @@ -66,17 +75,101 @@ export const jobStatusSchema = z.object({ export type JobStatusInput = z.infer; -// In-process job registry (survives for the lifetime of the MCP server process). -// For a production implementation, persist to ~/.ruview/jobs/.json. -const jobRegistry = new Map< - string, - { - status: "queued" | "running" | "done" | "failed"; - log_path: string; - queued_at: number; - epochs_total: number; +interface JobRecord { + status: "queued" | "running" | "done" | "failed" | "unknown"; + log_path: string; + queued_at: number; + epochs_total: number; + /** + * OS pid of the training child. Persisted so a later process (e.g. after an + * MCP server restart) can tell whether a job still marked 'running' actually + * outlived the process that spawned it (ADR-264 O6). + */ + pid?: number | undefined; + /** Human-readable explanation attached during reconciliation (unknown state). */ + reason?: string | undefined; +} + +// In-process job registry, mirrored to /.json on every state +// change so ruview_job_status survives an MCP server restart (ADR-264 O6). +const jobRegistry = new Map(); + +function jobRecordPath(jobsDir: string, jobId: string): string { + return path.join(jobsDir, `${jobId}.json`); +} + +function persistJob(jobsDir: string, jobId: string, record: JobRecord): void { + try { + writeFileSync( + jobRecordPath(jobsDir, jobId), + JSON.stringify({ job_id: jobId, ...record }, null, 2) + ); + } catch { + // Persistence is best-effort; the in-memory record still serves this process. } ->(); +} + +function loadPersistedJob(jobsDir: string, jobId: string): JobRecord | undefined { + try { + const raw = JSON.parse(readFileSync(jobRecordPath(jobsDir, jobId), "utf8")) as + Partial; + if (typeof raw.log_path !== "string" || typeof raw.status !== "string") { + return undefined; + } + return { + status: raw.status, + log_path: raw.log_path, + queued_at: typeof raw.queued_at === "number" ? raw.queued_at : 0, + epochs_total: typeof raw.epochs_total === "number" ? raw.epochs_total : 0, + pid: typeof raw.pid === "number" ? raw.pid : undefined, + reason: typeof raw.reason === "string" ? raw.reason : undefined, + }; + } catch { + return undefined; + } +} + +/** + * Is `pid` still a live process? `process.kill(pid, 0)` sends no signal but + * probes existence: ESRCH ⇒ gone; EPERM ⇒ alive but owned by another user + * (treated as alive so we never falsely reconcile a still-running job). + */ +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch (e) { + return (e as NodeJS.ErrnoException).code === "EPERM"; + } +} + +/** + * Scan log lines (tail) for the "# exit code: N" marker the child.on('close') + * handler appends. `found:false` means the process died without the marker — + * i.e. this server never saw the close (it restarted mid-run). + */ +function findExitMarker(lines: string[]): { found: boolean; code: number | null } { + for (let i = lines.length - 1; i >= 0; i--) { + const m = /^# exit code: (-?\d+|null)$/.exec((lines[i] ?? "").trim()); + if (m) return { found: true, code: m[1] === "null" ? null : Number(m[1]) }; + } + return { found: false, code: null }; +} + +/** Read the last `maxLines` lines of a file without loading the whole log. */ +function tailLines(filePath: string, maxLines: number, maxBytes = 64 * 1024): string[] { + const size = statSync(filePath).size; + const start = Math.max(0, size - maxBytes); + const buf = Buffer.alloc(size - start); + const fd = openSync(filePath, "r"); + try { + readSync(fd, buf, 0, buf.length, start); + } finally { + closeSync(fd); + } + const lines = buf.toString("utf8").split("\n"); + return lines.slice(Math.max(0, lines.length - maxLines)); +} export async function trainCount( input: TrainCountInput, @@ -92,13 +185,16 @@ export async function trainCount( const outputDir = input.output_dir ?? "v2/crates/cog-person-count/cog/artifacts"; - // Record the job immediately so ruview_job_status can find it. - jobRegistry.set(jobId, { + // Record the job immediately so ruview_job_status can find it — in memory + // and on disk (survives server restarts, ADR-264 O6). + const record: JobRecord = { status: "queued", log_path: logPath, queued_at: queuedAt, epochs_total: input.epochs, - }); + }; + jobRegistry.set(jobId, record); + persistJob(logDir, jobId, record); // Write the header synchronously so the log file exists before spawn. const header = [ @@ -142,21 +238,29 @@ export async function trainCount( child.unref(); // Allow the MCP server process to exit without waiting for training. - const entry = jobRegistry.get(jobId); - if (entry) { - entry.status = "running"; - } + // The child holds its own duplicates of the log fds; close the parent's + // copies immediately or every job leaks 2 fds for the server's lifetime + // (ADR-264 F6/O6). + closeSync(logFdOut); + closeSync(logFdErr); + + // Record the child pid so a later process can reconcile a stale 'running' + // record after a server restart (child.pid is undefined only if spawn failed + // synchronously, in which case the 'error' handler flips status to 'failed'). + record.pid = child.pid; + record.status = "running"; + persistJob(logDir, jobId, record); child.on("error", (e) => { appendFileSync(logPath, `\n# ERROR: ${e.message}\n`); - const rec = jobRegistry.get(jobId); - if (rec) rec.status = "failed"; + record.status = "failed"; + persistJob(logDir, jobId, record); }); child.on("close", (code) => { appendFileSync(logPath, `\n# exit code: ${code}\n`); - const rec = jobRegistry.get(jobId); - if (rec) rec.status = code === 0 ? "done" : "failed"; + record.status = code === 0 ? "done" : "failed"; + persistJob(logDir, jobId, record); }); const result: TrainJobResult = { @@ -178,24 +282,48 @@ export async function trainCount( export async function jobStatus( input: JobStatusInput, - _config: RuviewConfig + config: RuviewConfig ): Promise { - const job = jobRegistry.get(input.job_id); + // Memory first, then the persisted record (survives server restarts). + let job = jobRegistry.get(input.job_id) ?? loadPersistedJob(config.jobsDir, input.job_id); if (!job) { return { ok: false, - error: `Job ${input.job_id} not found. ` + - "The MCP server may have restarted — check the log directory directly.", + error: `Job ${input.job_id} not found in this server or in ${config.jobsDir}.`, }; } - // Read the last 20 lines of the log file. + // Reconcile a 'running' record whose owning process is gone. The status flip + // to done/failed lives only in the spawning process's child.on('close'/'error') + // handlers; if this server restarted mid-run, the record froze at 'running' + // (ADR-264 O6). When the pid is dead, recover the true outcome from the log's + // "# exit code: N" marker, else surface an honest 'unknown'. + if (job.status === "running" && typeof job.pid === "number" && !isProcessAlive(job.pid)) { + let tail: string[] = []; + try { + tail = tailLines(job.log_path, 40); + } catch { + /* log unreadable — treated as no marker below */ + } + const marker = findExitMarker(tail); + const reconciled: JobRecord = { ...job }; + if (marker.found) { + reconciled.status = marker.code === 0 ? "done" : "failed"; + reconciled.reason = undefined; + } else { + reconciled.status = "unknown"; + reconciled.reason = + "process gone, no exit marker — server likely restarted mid-run"; + } + jobRegistry.set(input.job_id, reconciled); + persistJob(config.jobsDir, input.job_id, reconciled); + job = reconciled; + } + + // Bounded tail read — never load a multi-GB training log wholesale. let recentLog: string[] = []; try { - const { readFileSync } = await import("node:fs"); - const content = readFileSync(job.log_path, "utf8"); - const lines = content.split("\n"); - recentLog = lines.slice(Math.max(0, lines.length - 20)); + recentLog = tailLines(job.log_path, 20); } catch { recentLog = ["(log not readable yet)"]; } @@ -206,6 +334,7 @@ export async function jobStatus( log_path: job.log_path, recent_log: recentLog, epochs_total: job.epochs_total, + ...(job.reason !== undefined ? { reason: job.reason } : {}), }; return { ok: true, result }; diff --git a/tools/ruview-mcp/src/types.ts b/tools/ruview-mcp/src/types.ts index 68a2a1f7..09738124 100644 --- a/tools/ruview-mcp/src/types.ts +++ b/tools/ruview-mcp/src/types.ts @@ -115,7 +115,12 @@ export interface TrainJobResult { /** Output of ruview_job_status. */ export interface JobStatusResult { job_id: string; - status: "queued" | "running" | "done" | "failed"; + /** + * 'unknown' is only ever produced by post-restart reconciliation: a record + * frozen at 'running' whose owning process is gone and whose log carries no + * exit-code marker (see reason). + */ + status: "queued" | "running" | "done" | "failed" | "unknown"; progress_pct?: number | undefined; /** Most recent log lines (last 20). */ recent_log: string[]; @@ -124,6 +129,8 @@ export interface JobStatusResult { epochs_done?: number | undefined; /** Total epochs scheduled. */ epochs_total?: number | undefined; + /** Explanation attached when status was reconciled to 'unknown'. */ + reason?: string | undefined; } // ── Vitals (ADR-124 §6 Python surface parity: ws.py:74-88) ─────────────── diff --git a/tools/ruview-mcp/tests/config.test.ts b/tools/ruview-mcp/tests/config.test.ts new file mode 100644 index 00000000..30668aab --- /dev/null +++ b/tools/ruview-mcp/tests/config.test.ts @@ -0,0 +1,49 @@ +/** + * ADR-264 F8/O7 — cog-binary detection must be architecture-aware. + * + * detectCogBinary() itself probes hardcoded /var/lib paths, so it is not + * cheaply testable without fs mocking. The bug it fixes, however, lives purely + * in the candidate ORDER, which cogBinaryCandidates() exposes as a pure, + * arch-injectable function — that is what we pin here. + */ + +import { cogBinaryCandidates } from "../src/config.js"; + +describe("cogBinaryCandidates()", () => { + it("probes -arm before -x86_64 on arm64 hosts", () => { + const c = cogBinaryCandidates("cog-person-count", "arm64"); + const arm = c.findIndex((p) => p.endsWith("cog-person-count-arm")); + const x86 = c.findIndex((p) => p.endsWith("cog-person-count-x86_64")); + expect(arm).toBeGreaterThanOrEqual(0); + expect(x86).toBeGreaterThanOrEqual(0); + expect(arm).toBeLessThan(x86); + }); + + it("probes -x86_64 before -arm on x64 hosts", () => { + const c = cogBinaryCandidates("cog-person-count", "x64"); + const arm = c.findIndex((p) => p.endsWith("cog-person-count-arm")); + const x86 = c.findIndex((p) => p.endsWith("cog-person-count-x86_64")); + expect(x86).toBeLessThan(arm); + }); + + it("defaults an unknown arch to the x86_64-first order", () => { + const c = cogBinaryCandidates("cog-pose-estimation", "riscv64"); + const arm = c.findIndex((p) => p.endsWith("cog-pose-estimation-arm")); + const x86 = c.findIndex((p) => p.endsWith("cog-pose-estimation-x86_64")); + expect(x86).toBeLessThan(arm); + }); + + it("keeps the /usr/local/bin and bare-name PATH fallbacks last", () => { + const c = cogBinaryCandidates("cog-person-count", "arm64"); + // The two arch builds come first; the /usr/local/bin fallback follows them. + expect(c[c.length - 1]).toBe("/usr/local/bin/cog-person-count"); + expect(c).toHaveLength(3); + }); + + it("derives the id by stripping the cog- prefix once", () => { + const c = cogBinaryCandidates("cog-person-count", "x64"); + expect(c[0]).toBe( + "/var/lib/cognitum/apps/person-count/cog-person-count-x86_64" + ); + }); +}); diff --git a/tools/ruview-mcp/tests/http-transport.test.ts b/tools/ruview-mcp/tests/http-transport.test.ts index b53fb727..7ec1b7ab 100644 --- a/tools/ruview-mcp/tests/http-transport.test.ts +++ b/tools/ruview-mcp/tests/http-transport.test.ts @@ -59,7 +59,9 @@ async function startServer( basePort: number ): Promise<{ port: number; close: () => Promise }> { const port = basePort + Math.floor(Math.random() * 100); - const { httpServer } = buildHttpApp(makeMockMcpServer(), opts); + // Factory, not instance: each Streamable-HTTP session gets its own MCP + // Server (ADR-264 F7/O3). + const { httpServer } = buildHttpApp(() => makeMockMcpServer(), opts); await new Promise((resolve, reject) => { httpServer.once("error", reject); httpServer.listen(port, "127.0.0.1", () => resolve()); @@ -95,8 +97,34 @@ describe("isOriginAllowed()", () => { expect(isOriginAllowed("https://evil.example.com", ["*"])).toBe(true); }); - it("is case-sensitive per RFC 6454", () => { - expect(isOriginAllowed("HTTP://localhost", allow)).toBe(false); + // ADR-264 F7: real browser origins carry ports — localhost must match on + // hostname, any port, even with an empty allowlist. + it("allows localhost origins on any port", () => { + expect(isOriginAllowed("http://localhost:5173", [])).toBe(true); + expect(isOriginAllowed("http://127.0.0.1:8080", [])).toBe(true); + expect(isOriginAllowed("https://localhost:3001", [])).toBe(true); + }); + + it("rejects non-local origins even with a localhost-looking prefix", () => { + expect(isOriginAllowed("http://localhost.evil.example.com", [])).toBe(false); + expect(isOriginAllowed("https://evil.example.com:443", [])).toBe(false); + }); + + // ADR-264 F7 hardening: an EXPLICIT allowlist means exact matching only. The + // any-port-localhost convenience applies solely to the empty-allowlist case, + // so an operator who pins an allowlist actually gets it. + it("with an explicit allowlist, rejects a localhost origin on an unlisted port", () => { + expect(isOriginAllowed("http://localhost:5173", allow)).toBe(false); + expect(isOriginAllowed("http://127.0.0.1:8080", allow)).toBe(false); + }); + + it("with an explicit allowlist, still accepts an exactly-listed localhost origin", () => { + expect(isOriginAllowed("http://localhost", allow)).toBe(true); + expect(isOriginAllowed("http://127.0.0.1", allow)).toBe(true); + }); + + it("is case-sensitive for non-local allowlist entries per RFC 6454", () => { + expect(isOriginAllowed("HTTPS://Partner.Example.com", ["https://partner.example.com"])).toBe(false); }); }); @@ -165,3 +193,117 @@ describe("HTTP transport bearer-token auth gate", () => { expect(r.status).not.toBe(401); }); }); + +// ── 7. ADR-264 F7/O3 hardening: body cap + per-session routing ───────────── + +describe("HTTP transport session + body-cap hardening (ADR-264 F7)", () => { + let port: number; + let close: () => Promise; + + beforeAll(async () => { + const srv = await startServer({ allowedOrigins: ["*"], maxBodyBytes: 64 * 1024 }, 49600); + port = srv.port; + close = srv.close; + }); + + afterAll(async () => { await close(); }); + + it("rejects oversized request bodies with 413", async () => { + const huge = JSON.stringify({ jsonrpc: "2.0", id: 1, method: "x", params: { pad: "y".repeat(128 * 1024) } }); + const r = await post(port, "/mcp", {}, huge); + expect(r.status).toBe(413); + }); + + it("rejects a non-initialize POST without a session id with 400 (never a shared transport)", async () => { + const r = await post(port, "/mcp", {}, MCP_BODY); // tools/list, no mcp-session-id + expect(r.status).toBe(400); + const body = JSON.parse(r.body) as Record; + expect(body["error"]).toMatch(/initialize/i); + }); + + it("rejects a POST with an unknown session id with 404", async () => { + const r = await post(port, "/mcp", { "mcp-session-id": "no-such-session" }, MCP_BODY); + expect(r.status).toBe(404); + }); + + it("creates a fresh session (and MCP server) per initialize request", async () => { + const init = JSON.stringify({ + jsonrpc: "2.0", + id: 1, + method: "initialize", + params: { + protocolVersion: "2024-11-05", + capabilities: {}, + clientInfo: { name: "test-client", version: "0.0.0" }, + }, + }); + const r = await post(port, "/mcp", { Accept: "application/json, text/event-stream" }, init); + expect([200, 406]).not.toContain(0); // sanity + expect(r.status).toBe(200); + }); +}); + +// ── 8. ADR-264 F7: session-map bounds (cap + idle TTL sweep) ─────────────── + +describe("HTTP transport session bounds (ADR-264 F7)", () => { + const initBody = (id: number): string => + JSON.stringify({ + jsonrpc: "2.0", + id, + method: "initialize", + params: { + protocolVersion: "2024-11-05", + capabilities: {}, + clientInfo: { name: "test-client", version: "0.0.0" }, + }, + }); + + // Build directly (not via startServer) so we can inspect the sessions map. + async function startWithApp( + opts: Parameters[1], + basePort: number + ): Promise<{ + port: number; + sessions: ReturnType["sessions"]; + close: () => Promise; + }> { + const { httpServer, sessions } = buildHttpApp(() => makeMockMcpServer(), opts); + const port = basePort + Math.floor(Math.random() * 100); + await new Promise((resolve, reject) => { + httpServer.once("error", reject); + httpServer.listen(port, "127.0.0.1", () => resolve()); + }); + const close = () => + new Promise((res, rej) => httpServer.close((e) => (e ? rej(e) : res()))); + return { port, sessions, close }; + } + + const ACCEPT = { Accept: "application/json, text/event-stream" }; + + it("never exceeds maxSessions — evicts the oldest-idle session at capacity", async () => { + const srv = await startWithApp({ allowedOrigins: ["*"], maxSessions: 2 }, 49800); + try { + for (let i = 0; i < 5; i++) { + await post(srv.port, "/mcp", ACCEPT, initBody(i)); + } + expect(srv.sessions.size).toBeLessThanOrEqual(2); + } finally { + await srv.close(); + } + }); + + it("sweeps sessions idle beyond sessionIdleMs", async () => { + const srv = await startWithApp( + { allowedOrigins: ["*"], sessionIdleMs: 20, sweepIntervalMs: 10 }, + 49900 + ); + try { + await post(srv.port, "/mcp", ACCEPT, initBody(1)); + expect(srv.sessions.size).toBe(1); + await new Promise((r) => setTimeout(r, 150)); + expect(srv.sessions.size).toBe(0); + } finally { + await srv.close(); + } + }); +}); diff --git a/tools/ruview-mcp/tests/manifest.test.ts b/tools/ruview-mcp/tests/manifest.test.ts index c1fce4b8..47b44772 100644 --- a/tools/ruview-mcp/tests/manifest.test.ts +++ b/tools/ruview-mcp/tests/manifest.test.ts @@ -15,11 +15,11 @@ */ import { readFileSync } from "node:fs"; -import { resolve, dirname } from "node:path"; -import { fileURLToPath } from "node:url"; +import { resolve } from "node:path"; -const __dirname = dirname(fileURLToPath(import.meta.url)); -const pkgPath = resolve(__dirname, "../package.json"); +// jest runs from the package root; avoid import.meta (ts-jest transforms this +// suite to a module target that rejects it — pre-existing suite failure). +const pkgPath = resolve(process.cwd(), "package.json"); // Parse once; keep raw for snapshot assertions. const raw = readFileSync(pkgPath, "utf-8"); diff --git a/tools/ruview-mcp/tests/train-count-reconcile.test.ts b/tools/ruview-mcp/tests/train-count-reconcile.test.ts new file mode 100644 index 00000000..9c294620 --- /dev/null +++ b/tools/ruview-mcp/tests/train-count-reconcile.test.ts @@ -0,0 +1,96 @@ +/** + * ADR-264 O6 — post-restart job reconciliation. + * + * When the MCP server restarts mid-run, the persisted job record stays frozen + * at 'running' (the child.on('close') that flips it lived in the dead process). + * ruview_job_status must reconcile such a record against the recorded pid and + * the log's "# exit code: N" marker. + * + * We fabricate a persisted record pointing at a KNOWN-DEAD pid (a synchronous + * child that has already exited) and assert the reconciled status. + */ + +import { mkdtempSync, writeFileSync } from "node:fs"; +import { spawnSync } from "node:child_process"; +import os from "node:os"; +import path from "node:path"; +import { randomUUID } from "node:crypto"; +import { jobStatus } from "../src/tools/train-count.js"; +import type { RuviewConfig } from "../src/types.js"; + +/** A pid that has certainly exited: spawnSync waits for the child to finish. */ +function deadPid(): number { + const r = spawnSync(process.execPath, ["-e", ""]); + if (typeof r.pid !== "number") throw new Error("could not spawn probe child"); + return r.pid; +} + +function makeConfig(jobsDir: string): RuviewConfig { + return { + sensingServerUrl: "http://127.0.0.1:19999", + apiToken: undefined, + poseCogBinary: "nonexistent", + countCogBinary: "nonexistent", + jobsDir, + }; +} + +/** Write a fake persisted 'running' record + its log, return {jobId, config}. */ +function seedRunningJob(logBody: string): { jobId: string; config: RuviewConfig } { + const jobsDir = mkdtempSync(path.join(os.tmpdir(), "rvagent-jobs-")); + const jobId = randomUUID(); + const logPath = path.join(jobsDir, `${jobId}.log`); + writeFileSync(logPath, logBody); + const record = { + job_id: jobId, + status: "running", + log_path: logPath, + queued_at: Date.now() / 1000, + epochs_total: 5, + pid: deadPid(), + }; + writeFileSync( + path.join(jobsDir, `${jobId}.json`), + JSON.stringify(record, null, 2) + ); + return { jobId, config: makeConfig(jobsDir) }; +} + +describe("ruview_job_status reconciliation (ADR-264 O6)", () => { + it("reconciles a dead 'running' job with exit 0 to 'done'", async () => { + const { jobId, config } = seedRunningJob( + "# training...\nepoch 5/5\n# exit code: 0\n" + ); + const out = (await jobStatus({ job_id: jobId }, config)) as Record; + expect(out["ok"]).toBe(true); + const res = out["result"] as Record; + expect(res["status"]).toBe("done"); + }); + + it("reconciles a dead 'running' job with non-zero exit to 'failed'", async () => { + const { jobId, config } = seedRunningJob( + "# training...\npanic: cuda oom\n# exit code: 101\n" + ); + const out = (await jobStatus({ job_id: jobId }, config)) as Record; + const res = out["result"] as Record; + expect(res["status"]).toBe("failed"); + }); + + it("marks a dead 'running' job with no exit marker as 'unknown' with a reason", async () => { + const { jobId, config } = seedRunningJob("# training...\nepoch 2/5\n"); + const out = (await jobStatus({ job_id: jobId }, config)) as Record; + const res = out["result"] as Record; + expect(res["status"]).toBe("unknown"); + expect(typeof res["reason"]).toBe("string"); + expect(res["reason"]).toMatch(/restarted/i); + }); + + it("treats a signal-killed marker (null) as 'failed'", async () => { + const { jobId, config } = seedRunningJob( + "# training...\n# exit code: null\n" + ); + const out = (await jobStatus({ job_id: jobId }, config)) as Record; + const res = out["result"] as Record; + expect(res["status"]).toBe("failed"); + }); +}); diff --git a/tools/ruview-mcp/tsconfig.json b/tools/ruview-mcp/tsconfig.json index 6408517a..2575ffd8 100644 --- a/tools/ruview-mcp/tsconfig.json +++ b/tools/ruview-mcp/tsconfig.json @@ -7,8 +7,8 @@ "outDir": "dist", "rootDir": "src", "declaration": true, - "declarationMap": true, - "sourceMap": true, + "declarationMap": false, + "sourceMap": false, "strict": true, "noUncheckedIndexedAccess": true, "exactOptionalPropertyTypes": true,