From d46eb8a4b2e30cd8b7526a38893c15d8685f96d6 Mon Sep 17 00:00:00 2001 From: ruv Date: Thu, 2 Jul 2026 09:58:00 -0400 Subject: [PATCH] fix(npm): address 10 verified review findings in harness + rvagent before 0.2.0 publish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit harness/ruview (@ruvnet/ruview): - guardrails: digit gate now sees numbers inside code spans; F1-style metric tokens followed by ':' or a nearby number are no longer scrubbed (fail-open regressions in the honesty gate) - mcp-server: tools/call requests serialize through a FIFO promise chain (hardware/mutating tools never overlap) while ping/tools/list stay immediate; stdin close drains in-flight responses before exit - tools: which() no longer memoizes negative lookups tools/ruview-mcp (@ruvnet/rvagent): - index: realpath invoked-directly guard — library import no longer connects a stdio transport to the consumer's process - http-transport: explicit allowedOrigins is exact-match only (localhost any-port convenience applies only with no configured allowlist); session map gains maxSessions=64 + 5min idle TTL sweep - train-count: job records persist the child pid and reconcile stale 'running' status after a server restart (exit-code marker or dead pid) - config: cog binary candidates ordered by process.arch .github/workflows/ruview-npm-release.yml: port the full ADR-265 D1 gate (version-literal check, unpacked-size budget, tarball-install smoke test) from npm-packages.yml so the publish path enforces what the header claims. Tests: harness 30→36, rvagent 99→112, all passing. Co-Authored-By: claude-flow --- .github/workflows/ruview-npm-release.yml | 63 +++++++++++- harness/ruview/src/guardrails.js | 24 +++-- harness/ruview/src/mcp-server.js | 34 ++++++- harness/ruview/src/tools.js | 7 +- harness/ruview/test/mcp.test.mjs | 47 +++++++++ harness/ruview/test/tools.test.mjs | 55 ++++++++++- tools/ruview-cli/package-lock.json | 2 +- tools/ruview-mcp/src/config.ts | 34 +++++-- tools/ruview-mcp/src/http-transport.ts | 94 +++++++++++++++++- tools/ruview-mcp/src/index.ts | 30 +++++- tools/ruview-mcp/src/tools/train-count.ts | 73 +++++++++++++- tools/ruview-mcp/src/types.ts | 9 +- tools/ruview-mcp/tests/config.test.ts | 49 ++++++++++ tools/ruview-mcp/tests/http-transport.test.ts | 78 +++++++++++++++ .../tests/train-count-reconcile.test.ts | 96 +++++++++++++++++++ 15 files changed, 651 insertions(+), 44 deletions(-) create mode 100644 tools/ruview-mcp/tests/config.test.ts create mode 100644 tools/ruview-mcp/tests/train-count-reconcile.test.ts diff --git a/.github/workflows/ruview-npm-release.yml b/.github/workflows/ruview-npm-release.yml index f8407564..ce95ce7f 100644 --- a/.github/workflows/ruview-npm-release.yml +++ b/.github/workflows/ruview-npm-release.yml @@ -54,18 +54,77 @@ jobs: - name: Test run: npm test --if-present - - name: Pack gate (no maps) + # ADR-265 D3 — package.json is the only place a version string lives. + - name: Version-literal gate run: | + set -euo pipefail + hits="" + for d in src bin; do + if [ -d "$d" ]; then + hits+=$(grep -rEn '\b[0-9]+\.[0-9]+\.[0-9]+\b' "$d" | grep -vE '127\.0\.0\.1|0\.0\.0\.0' || true) + fi + done + if [ -n "$hits" ]; then + echo "Hardcoded version-like literals found (read package.json instead — ADR-265 D3):" + echo "$hits" + exit 1 + fi + + # ADR-265 D1.3 — pack-content gate: no maps AND the per-package + # unpacked-size budget (the budgets that npm-packages.yml enforces). + - name: Pack gate (no maps + size budget) + run: | + set -euo pipefail + case "${{ inputs.package }}" in + # ADR-263: dependency-free harness; budget guards against dep creep. + harness/ruview) export UNPACKED_BUDGET=65536 ;; + # ADR-264 O2: map-free tarball (was 188 kB with maps). + tools/ruview-mcp) export UNPACKED_BUDGET=140000 ;; + *) echo "Unknown package '${{ inputs.package }}' — no budget defined"; exit 1 ;; + esac npm pack --dry-run --json 2>/dev/null | node -e " const [info] = JSON.parse(require('fs').readFileSync(0, 'utf8')); + const budget = Number(process.env.UNPACKED_BUDGET); const maps = info.files.filter((f) => f.path.endsWith('.map')); if (maps.length > 0) { console.error('Tarball contains source maps (ADR-264 F2):', maps.map((m) => m.path)); process.exit(1); } - console.log(\`pack gate OK: \${info.files.length} files, \${info.unpackedSize} B unpacked\`); + if (info.unpackedSize > budget) { + console.error(\`Unpacked size \${info.unpackedSize} B exceeds budget \${budget} B\`); + process.exit(1); + } + console.log(\`pack gate OK: \${info.files.length} files, \${info.unpackedSize} B unpacked (budget \${budget} B), 0 maps\`); " + # ADR-265 D1.4 — install the real tarball and drive each bin/export. + - name: Tarball smoke test + run: | + set -euo pipefail + TGZ="$PWD/$(npm pack --silent 2>/dev/null | tail -1)" + SMOKE="$(mktemp -d)" + cd "$SMOKE" + npm init -y > /dev/null + npm i --no-fund --no-audit "$TGZ" + case "${{ inputs.package }}" in + harness/ruview) + ./node_modules/.bin/ruview --version + ./node_modules/.bin/ruview doctor + # the honesty gate must fail closed on empty input (ADR-263 F1) + if ./node_modules/.bin/ruview claim-check; then + echo 'claim-check passed with no input — fail-open regression'; exit 1 + fi + node --input-type=module -e "const m = await import('@ruvnet/ruview'); if (!m.TOOLS) process.exit(1);" + ;; + tools/ruview-mcp) + # initialize over stdio; server must answer and exit 0 on EOF + printf '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"ci","version":"0"}}}\n' \ + | timeout 30 ./node_modules/.bin/rvagent | grep -q '"serverInfo"' + # the ESM export must resolve from the installed tarball (ADR-264 F1) + timeout 30 node --input-type=module -e "await import('@ruvnet/rvagent');" < /dev/null + ;; + esac + - name: Claim-check README run: | if [ -f README.md ]; then diff --git a/harness/ruview/src/guardrails.js b/harness/ruview/src/guardrails.js index ce034678..a5299dd2 100644 --- a/harness/ruview/src/guardrails.js +++ b/harness/ruview/src/guardrails.js @@ -21,9 +21,13 @@ const METRIC_TERMS = [ // 'map' additionally must not be a `.map` file suffix or a hyphenated // compound ("map-free", "map-reduce") — mAP the metric never appears as either. const METRIC_TERMS_SHORT = [/(? handle(msg).catch((err) => { + if (msg && msg.id !== undefined) error(msg.id, -32603, String(err && err.message || err)); + log('handler error:', String(err)); + }); + rl.on('line', (line) => { const s = line.trim(); if (!s) return; let msg; try { msg = JSON.parse(s); } catch { return log('bad JSON line dropped'); } - // Fire-and-forget: keep reading stdin while tool calls run (ADR-263 O2). - handle(msg).catch((err) => { - if (msg && msg.id !== undefined) error(msg.id, -32603, String(err && err.message || err)); - log('handler error:', String(err)); + if (msg && msg.method === 'tools/call') { + toolChain = toolChain.then(() => dispatch(msg)); // one tool at a time + } else { + dispatch(msg); // health/list/handshake answer immediately, even mid tool run + } + }); + + rl.on('close', () => { + // Wait for any queued/in-flight tool call to settle (its response written) + // before exiting — fire-and-forget used to race this and drop the response. + toolChain.then(() => { + log('stdin closed — exiting'); + const done = () => process.exit(0); + // Pipe writes are async; flush buffered stdout before exit. + if (process.stdout.writableLength) process.stdout.once('drain', done); + else done(); }); }); - rl.on('close', () => { log('stdin closed — exiting'); process.exit(0); }); } diff --git a/harness/ruview/src/tools.js b/harness/ruview/src/tools.js index c1dd9633..0a0d7af8 100644 --- a/harness/ruview/src/tools.js +++ b/harness/ruview/src/tools.js @@ -32,8 +32,9 @@ export function findRepoRoot(start = process.cwd()) { return null; } -// Dep-free PATH scan, memoized for the process lifetime (ADR-263 O8) — no -// shell subprocess per lookup. +// Dep-free PATH scan (ADR-263 O8) — no shell subprocess per lookup. Only hits +// are memoized: a miss can resolve later in a long-lived MCP session (the +// operator installs python/the CLI mid-run), so misses are re-probed each call. const whichCache = new Map(); export function which(cmd) { if (whichCache.has(cmd)) return whichCache.get(cmd); @@ -54,7 +55,7 @@ export function which(cmd) { } catch { /* keep scanning */ } } } - whichCache.set(cmd, found); + if (found !== null) whichCache.set(cmd, found); return found; } diff --git a/harness/ruview/test/mcp.test.mjs b/harness/ruview/test/mcp.test.mjs index fed0167c..0b99e807 100644 --- a/harness/ruview/test/mcp.test.mjs +++ b/harness/ruview/test/mcp.test.mjs @@ -99,3 +99,50 @@ test('MCP server answers ping while a long tools/call is in flight (ADR-263 O2)' rmSync(repo, { recursive: true, force: true }); } }); + +test('tools/call executions are serialized — two slow calls run sequentially', { skip: !which('python') && !which('python3') ? 'python not on PATH' : false }, async () => { + // Two verify.py that each sleep 0.8 s. Serialized ⇒ ~1.6 s+; concurrent ⇒ ~0.8 s. + const repo = mkdtempSync(join(tmpdir(), 'ruview-mcp-serial-')); + const proofDir = join(repo, 'archive', 'v1', 'data', 'proof'); + mkdirSync(proofDir, { recursive: true }); + writeFileSync(join(proofDir, 'verify.py'), 'import time\ntime.sleep(0.8)\nprint("VERDICT: PASS")\n'); + + const s = startServer(); + try { + s.send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} }); + await s.next(1); + + const t0 = Date.now(); + const a = s.next(20); + const b = s.next(21); + s.send({ jsonrpc: '2.0', id: 20, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } }); + s.send({ jsonrpc: '2.0', id: 21, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } }); + const [ra, rb] = await Promise.all([a, b]); + const elapsed = Date.now() - t0; + + assert.equal(JSON.parse(ra.result.content[0].text).verdict, 'PASS'); + assert.equal(JSON.parse(rb.result.content[0].text).verdict, 'PASS'); + assert.ok(elapsed > 1400, `two 0.8 s tool calls finished in ${elapsed} ms — they overlapped instead of serializing`); + } finally { + s.close(); + rmSync(repo, { recursive: true, force: true }); + } +}); + +test('stdin close flushes an in-flight tools/call response before exit', async () => { + const child = spawn(process.execPath, [CLI, 'mcp', 'start'], { stdio: ['pipe', 'pipe', 'pipe'] }); + let out = ''; + child.stdout.on('data', (d) => { out += d; }); + const exited = new Promise((res) => child.on('exit', res)); + + // Write a tools/call then immediately close stdin. The old fire-and-forget + // dispatch raced rl 'close' → process.exit and could drop this response. + child.stdin.write(JSON.stringify({ jsonrpc: '2.0', id: 42, method: 'tools/call', params: { name: 'ruview_onboard', arguments: {} } }) + '\n'); + child.stdin.end(); + + await exited; + const msgs = out.trim().split('\n').filter(Boolean).map((l) => JSON.parse(l)); + const resp = msgs.find((m) => m.id === 42); + assert.ok(resp, 'the in-flight tools/call response must be flushed to stdout before exit'); + assert.equal(resp.result.isError, false); +}); diff --git a/harness/ruview/test/tools.test.mjs b/harness/ruview/test/tools.test.mjs index 964e716e..9df16b7b 100644 --- a/harness/ruview/test/tools.test.mjs +++ b/harness/ruview/test/tools.test.mjs @@ -4,8 +4,9 @@ import { test } from 'node:test'; import assert from 'node:assert/strict'; -import { readdirSync, readFileSync } from 'node:fs'; -import { join, dirname } from 'node:path'; +import { readdirSync, readFileSync, mkdtempSync, writeFileSync, rmSync } from 'node:fs'; +import { join, dirname, delimiter } from 'node:path'; +import { tmpdir } from 'node:os'; import { fileURLToPath } from 'node:url'; import { claimCheck, summarize } from '../src/guardrails.js'; import { TOOLS, TOOL_ALIASES, runTool, listTools, findRepoRoot, run, which } from '../src/tools.js'; @@ -58,6 +59,33 @@ test('guardrail still catches real short-token metric claims', () => { assert.equal(claimCheck('IoU 0.75 across rooms.').ok, false); }); +// Digits hidden in a code span still make a claim — scrubbing must not blind the +// number gate to `0.95` (regression: code-span number bypassed the gate). +test('guardrail flags an accuracy number stated inside a code span', () => { + const r = claimCheck('Count accuracy reached `0.95` in our tests.'); + assert.equal(r.ok, false, JSON.stringify(r.findings)); + assert.ok(r.findings.some((f) => /not tagged/i.test(f.reason))); +}); + +// A MEASURED claim whose only number hides in a code span must still reach the +// missing-reproducer check (regression: the scrubbed gate short-circuited it). +// Bare metric prose with no number at all (e.g. the README rule text) stays a pass. +test('guardrail flags a MEASURED code-span number with no reproducer', () => { + const r = claimCheck('Detection accuracy `0.97` on the set (MEASURED).'); + assert.equal(r.ok, false, JSON.stringify(r.findings)); + assert.ok(r.findings.some((f) => /no reproducer/i.test(f.reason))); + assert.equal(claimCheck('Every accuracy number must be MEASURED against a baseline.').ok, true); +}); + +// F1-score phrasings ("F1: 0.91", "F1 reaches 0.91") were scrubbed as option +// labels and slipped through; option refs alone must still not false-positive. +test('guardrail catches F1-score claims but not bare option refs (ADR-263 F11)', () => { + assert.equal(claimCheck('F1: 0.91 on the held-out set.').ok, false, 'F1: value is a metric claim'); + assert.equal(claimCheck('F1 reaches 0.91 on the held-out set.').ok, false, 'F1 with a nearby number is a claim'); + assert.equal(claimCheck('Options O1–O9 are tracked in ADR-263 O2.').ok, true, 'option labels are not metrics'); + assert.equal(claimCheck('ADR-263 O2 lands the exports fix.').ok, true); +}); + test('summarize gives PASS/finding text', () => { assert.match(summarize(claimCheck('nothing here')), /PASS/); assert.match(summarize(claimCheck('100% accuracy')), /finding/); @@ -156,10 +184,29 @@ test('run() bounds captured output instead of dying on big streams (ADR-263 O4)' assert.ok(r.stdout.includes('TAIL_MARKER'), 'tail must keep the end of the stream'); }); -test('which() finds node and memoizes misses', () => { +test('which() finds node and re-probes misses (hits are cached)', () => { assert.ok(which('node'), 'node must be on PATH in the test env'); assert.equal(which('definitely-not-a-binary-xyz'), null); - assert.equal(which('definitely-not-a-binary-xyz'), null); // cached path + assert.equal(which('definitely-not-a-binary-xyz'), null); // re-probed, still absent +}); + +// ADR-263 O8: a miss must not be cached — an operator who installs a tool +// mid-session (e.g. python after a python_missing failure) must be found next call. +test('which() re-probes after a miss so a newly-installed tool is found', () => { + const dir = mkdtempSync(join(tmpdir(), 'ruview-which-')); + const name = 'ruview-probe-xyz'; + const isWin = process.platform === 'win32'; + const bin = join(dir, isWin ? `${name}.cmd` : name); + const prevPath = process.env.PATH; + try { + assert.equal(which(name), null, 'not on PATH yet → miss'); + writeFileSync(bin, isWin ? '@echo off\n' : '#!/bin/sh\n', { mode: 0o755 }); + process.env.PATH = dir + delimiter + prevPath; + assert.ok(which(name), 'installed mid-session → the miss must not have been cached'); + } finally { + process.env.PATH = prevPath; + rmSync(dir, { recursive: true, force: true }); + } }); test('CLI run(): claim-check exits non-zero on a bad claim', async () => { diff --git a/tools/ruview-cli/package-lock.json b/tools/ruview-cli/package-lock.json index 6a1c2905..018a2172 100644 --- a/tools/ruview-cli/package-lock.json +++ b/tools/ruview-cli/package-lock.json @@ -12,7 +12,7 @@ "yargs": "^17.7.2" }, "bin": { - "ruview": "dist/index.js" + "ruview-cli": "dist/index.js" }, "devDependencies": { "@types/node": "^20.14.0", diff --git a/tools/ruview-mcp/src/config.ts b/tools/ruview-mcp/src/config.ts index 72593181..1c8070b2 100644 --- a/tools/ruview-mcp/src/config.ts +++ b/tools/ruview-mcp/src/config.ts @@ -51,21 +51,35 @@ export function loadConfig(): RuviewConfig { }; } +/** + * Ordered cog-binary candidate paths for a host of the given CPU architecture. + * The native-arch build is probed FIRST: an appliance that ships both + * `cog--arm` and `cog--x86_64` must never hand back the wrong-arch + * binary (ADR-264 F8/O7 — the pre-review order tried `-arm` unconditionally). + * The `/usr/local/bin` and bare-name (PATH) fallbacks follow, arch-agnostic. + * + * Pure and arch-injectable so the ordering is unit-testable. + */ +export function cogBinaryCandidates( + name: string, + arch: string = process.arch +): string[] { + const id = name.replace("cog-", ""); + const dir = `/var/lib/cognitum/apps/${id}`; + const arm = `${dir}/cog-${id}-arm`; + const x86 = `${dir}/cog-${id}-x86_64`; + // arm64 → prefer -arm; everything else (notably x64) → prefer -x86_64. + const archOrdered = arch === "arm64" ? [arm, x86] : [x86, arm]; + return [...archOrdered, `/usr/local/bin/${name}`]; +} + /** * Locate a cog binary in the common appliance install locations, probing each - * candidate (ADR-264 F8/O7 — the pre-review version built this list and then - * unconditionally returned the bare name). Falls back to the bare name (PATH + * candidate in native-arch-first order. Falls back to the bare name (PATH * resolution at spawn time) when no candidate exists. */ function detectCogBinary(name: string): string { - const id = name.replace("cog-", ""); - // Common install paths for Cognitum cog binaries on Linux/macOS appliances. - const candidates = [ - `/var/lib/cognitum/apps/${id}/cog-${id}-arm`, - `/var/lib/cognitum/apps/${id}/cog-${id}-x86_64`, - `/usr/local/bin/${name}`, - ]; - for (const candidate of candidates) { + for (const candidate of cogBinaryCandidates(name)) { if (existsSync(candidate)) return candidate; } return name; // bare name — rely on PATH; spawn fails gracefully if absent diff --git a/tools/ruview-mcp/src/http-transport.ts b/tools/ruview-mcp/src/http-transport.ts index cd06269a..c336bedf 100644 --- a/tools/ruview-mcp/src/http-transport.ts +++ b/tools/ruview-mcp/src/http-transport.ts @@ -13,8 +13,11 @@ * * Security model (ADR-124 §6 + ADR-264 F7): * - Origin validation: browser-style requests whose Origin is not local - * are rejected with 403 before reaching the MCP layer. Localhost origins - * match on hostname, ANY port (http://localhost:5173 is local). + * are rejected with 403 before reaching the MCP layer. With NO explicit + * allowlist, localhost origins match on hostname, ANY port + * (http://localhost:5173 is local). When an explicit allowedOrigins list is + * configured, matching is exact — the any-port-localhost convenience is off, + * so a localhost peer on an unlisted port must be added to be accepted. * - Bearer token: when RVAGENT_HTTP_TOKEN is set, requests must carry * Authorization: Bearer ; missing/wrong tokens → 401. * - Body cap: request bodies over 1 MiB are rejected with 413 (the @@ -55,6 +58,20 @@ export interface HttpTransportOptions { bearerToken?: string; /** Maximum accepted request body size in bytes (default: 1 MiB). */ maxBodyBytes?: number; + /** + * Maximum number of concurrent live sessions (default: 64). When a new + * `initialize` arrives at the cap, the oldest-idle session is evicted (its + * transport closed) to make room — bounds memory against a flaky client that + * loops `initialize` or a malicious localhost peer (ADR-264 F7). + */ + maxSessions?: number; + /** + * Idle time-to-live for a session in ms (default: 5 min). Sessions with no + * request activity for longer than this are swept and closed. + */ + sessionIdleMs?: number; + /** How often the idle-session sweeper runs, in ms (default: 60 s). */ + sweepIntervalMs?: number; } export interface HttpTransportResult { @@ -69,6 +86,9 @@ export interface HttpTransportResult { const DEFAULT_HOST = "127.0.0.1"; const DEFAULT_PORT = 3001; const DEFAULT_MAX_BODY_BYTES = 1024 * 1024; +const DEFAULT_MAX_SESSIONS = 64; +const DEFAULT_SESSION_IDLE_MS = 5 * 60 * 1000; +const DEFAULT_SWEEP_INTERVAL_MS = 60 * 1000; const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "[::1]"]); /** @@ -76,8 +96,11 @@ const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "[::1]"]); * Returns true if the request should be allowed, false if it should be rejected. * * An absent Origin header is allowed (same-origin non-browser requests, curl, - * etc.). A localhost origin is allowed on any port (real browser origins carry - * ports — ADR-264 F7). Anything else must match the allowlist exactly. + * etc.). When NO explicit allowlist was configured (empty list), a localhost + * origin is allowed on any port as a convenience — real browser origins carry + * ports (ADR-264 F7). When an explicit allowlist IS configured, matching is + * exact: the any-port-localhost shortcut is disabled so an operator who pins an + * allowlist actually gets it (a looped-back peer on an unlisted port is denied). */ export function isOriginAllowed( origin: string | undefined, @@ -86,6 +109,8 @@ export function isOriginAllowed( if (origin === undefined) return true; // no Origin = not a cross-origin browser request if (allowedOrigins.includes("*")) return true; if (allowedOrigins.includes(origin)) return true; + // Explicit allowlist ⇒ exact matching only; skip the localhost convenience. + if (allowedOrigins.length > 0) return false; try { const u = new URL(origin); return ( @@ -142,7 +167,55 @@ export function buildHttpApp( const allowedOrigins: string[] = opts.allowedOrigins ?? []; const bearerToken = opts.bearerToken ?? process.env["RVAGENT_HTTP_TOKEN"]; const maxBodyBytes = opts.maxBodyBytes ?? DEFAULT_MAX_BODY_BYTES; + const maxSessions = opts.maxSessions ?? DEFAULT_MAX_SESSIONS; + const sessionIdleMs = opts.sessionIdleMs ?? DEFAULT_SESSION_IDLE_MS; + const sweepIntervalMs = opts.sweepIntervalMs ?? DEFAULT_SWEEP_INTERVAL_MS; const sessions = new Map(); + // lastSeen tracks per-session request activity so the sweeper and the + // oldest-idle eviction can bound the session map (ADR-264 F7). + const lastSeen = new Map(); + + /** Mark a session as freshly used. */ + function touch(sessionId: string): void { + lastSeen.set(sessionId, Date.now()); + } + + /** Close a session's transport and drop it from the bookkeeping maps. */ + function closeSession(id: string): void { + const transport = sessions.get(id); + sessions.delete(id); + lastSeen.delete(id); + if (transport) { + try { + void transport.close(); // onclose is idempotent against the maps above + } catch { + /* best-effort: a half-open transport must not block eviction */ + } + } + } + + /** Evict the session that has been idle longest — called when at capacity. */ + function evictOldestIdle(): void { + let oldestId: string | undefined; + let oldestSeen = Infinity; + for (const [id, seen] of lastSeen) { + if (seen < oldestSeen) { + oldestSeen = seen; + oldestId = id; + } + } + if (oldestId !== undefined) closeSession(oldestId); + } + + /** Periodic sweep: close sessions idle beyond sessionIdleMs. */ + function sweepIdleSessions(): void { + const now = Date.now(); + for (const [id, seen] of lastSeen) { + if (now - seen > sessionIdleMs) closeSession(id); + } + } + const sweepTimer = setInterval(sweepIdleSessions, sweepIntervalMs); + sweepTimer.unref(); // never keep the process alive just to sweep const httpServer = createServer((req: IncomingMessage, res: ServerResponse) => { void (async () => { @@ -194,6 +267,7 @@ export function buildHttpApp( json(res, 404, { error: `Unknown session "${sessionId}"` }); return; } + touch(sessionId); await transport.handleRequest(req, res, parsed); return; } @@ -206,14 +280,21 @@ export function buildHttpApp( }); return; } + // Bound the session map: at capacity, reclaim the oldest-idle slot + // before minting a new session (ADR-264 F7). + if (sessions.size >= maxSessions) evictOldestIdle(); const transport = new StreamableHTTPServerTransport({ sessionIdGenerator: () => randomUUID(), onsessioninitialized: (id: string) => { sessions.set(id, transport); + touch(id); }, }); transport.onclose = () => { - if (transport.sessionId !== undefined) sessions.delete(transport.sessionId); + if (transport.sessionId !== undefined) { + sessions.delete(transport.sessionId); + lastSeen.delete(transport.sessionId); + } }; const mcpServer = serverFactory(); await mcpServer.connect(transport as Parameters[0]); @@ -228,6 +309,7 @@ export function buildHttpApp( json(res, 400, { error: "Bad Request: missing or unknown mcp-session-id" }); return; } + if (sessionId !== undefined) touch(sessionId); await transport.handleRequest(req, res); return; } @@ -239,6 +321,8 @@ export function buildHttpApp( }); }); + httpServer.on("close", () => clearInterval(sweepTimer)); + return { httpServer, sessions }; } diff --git a/tools/ruview-mcp/src/index.ts b/tools/ruview-mcp/src/index.ts index b49ea8dd..4f8dbbb6 100644 --- a/tools/ruview-mcp/src/index.ts +++ b/tools/ruview-mcp/src/index.ts @@ -31,6 +31,9 @@ */ import { createRequire } from "node:module"; +import { realpathSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { argv } from "node:process"; import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { @@ -355,7 +358,26 @@ async function main(): Promise { ); } -main().catch((e) => { - process.stderr.write(`[ruview-mcp] Fatal: ${String(e)}\n`); - process.exit(1); -}); +// CLI guard: boot the server only when this module is the entrypoint — invoked +// as the `rvagent` / `ruview-mcp` bin or `node dist/index.js`. Importing it as a +// library (`import { buildServer } from "@ruvnet/rvagent"`) must NOT side-effect +// connect a StdioServerTransport to the consumer's stdin/stdout. Realpath both +// sides because npm's bin shim is a symlink and passes a non-normalized, +// possibly case-skewed argv[1] on Windows (mirrors harness/ruview/bin/cli.js). +const invokedDirectly = (() => { + if (!argv[1]) return false; + try { + const a = realpathSync(argv[1]); + const b = realpathSync(fileURLToPath(import.meta.url)); + return process.platform === "win32" ? a.toLowerCase() === b.toLowerCase() : a === b; + } catch { + return false; + } +})(); + +if (invokedDirectly) { + main().catch((e) => { + process.stderr.write(`[ruview-mcp] Fatal: ${String(e)}\n`); + process.exit(1); + }); +} diff --git a/tools/ruview-mcp/src/tools/train-count.ts b/tools/ruview-mcp/src/tools/train-count.ts index 673ae360..b3462268 100644 --- a/tools/ruview-mcp/src/tools/train-count.ts +++ b/tools/ruview-mcp/src/tools/train-count.ts @@ -76,10 +76,18 @@ export const jobStatusSchema = z.object({ export type JobStatusInput = z.infer; interface JobRecord { - status: "queued" | "running" | "done" | "failed"; + status: "queued" | "running" | "done" | "failed" | "unknown"; log_path: string; queued_at: number; epochs_total: number; + /** + * OS pid of the training child. Persisted so a later process (e.g. after an + * MCP server restart) can tell whether a job still marked 'running' actually + * outlived the process that spawned it (ADR-264 O6). + */ + pid?: number | undefined; + /** Human-readable explanation attached during reconciliation (unknown state). */ + reason?: string | undefined; } // In-process job registry, mirrored to /.json on every state @@ -113,12 +121,41 @@ function loadPersistedJob(jobsDir: string, jobId: string): JobRecord | undefined log_path: raw.log_path, queued_at: typeof raw.queued_at === "number" ? raw.queued_at : 0, epochs_total: typeof raw.epochs_total === "number" ? raw.epochs_total : 0, + pid: typeof raw.pid === "number" ? raw.pid : undefined, + reason: typeof raw.reason === "string" ? raw.reason : undefined, }; } catch { return undefined; } } +/** + * Is `pid` still a live process? `process.kill(pid, 0)` sends no signal but + * probes existence: ESRCH ⇒ gone; EPERM ⇒ alive but owned by another user + * (treated as alive so we never falsely reconcile a still-running job). + */ +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch (e) { + return (e as NodeJS.ErrnoException).code === "EPERM"; + } +} + +/** + * Scan log lines (tail) for the "# exit code: N" marker the child.on('close') + * handler appends. `found:false` means the process died without the marker — + * i.e. this server never saw the close (it restarted mid-run). + */ +function findExitMarker(lines: string[]): { found: boolean; code: number | null } { + for (let i = lines.length - 1; i >= 0; i--) { + const m = /^# exit code: (-?\d+|null)$/.exec((lines[i] ?? "").trim()); + if (m) return { found: true, code: m[1] === "null" ? null : Number(m[1]) }; + } + return { found: false, code: null }; +} + /** Read the last `maxLines` lines of a file without loading the whole log. */ function tailLines(filePath: string, maxLines: number, maxBytes = 64 * 1024): string[] { const size = statSync(filePath).size; @@ -207,6 +244,10 @@ export async function trainCount( closeSync(logFdOut); closeSync(logFdErr); + // Record the child pid so a later process can reconcile a stale 'running' + // record after a server restart (child.pid is undefined only if spawn failed + // synchronously, in which case the 'error' handler flips status to 'failed'). + record.pid = child.pid; record.status = "running"; persistJob(logDir, jobId, record); @@ -244,7 +285,7 @@ export async function jobStatus( config: RuviewConfig ): Promise { // Memory first, then the persisted record (survives server restarts). - const job = jobRegistry.get(input.job_id) ?? loadPersistedJob(config.jobsDir, input.job_id); + let job = jobRegistry.get(input.job_id) ?? loadPersistedJob(config.jobsDir, input.job_id); if (!job) { return { ok: false, @@ -252,6 +293,33 @@ export async function jobStatus( }; } + // Reconcile a 'running' record whose owning process is gone. The status flip + // to done/failed lives only in the spawning process's child.on('close'/'error') + // handlers; if this server restarted mid-run, the record froze at 'running' + // (ADR-264 O6). When the pid is dead, recover the true outcome from the log's + // "# exit code: N" marker, else surface an honest 'unknown'. + if (job.status === "running" && typeof job.pid === "number" && !isProcessAlive(job.pid)) { + let tail: string[] = []; + try { + tail = tailLines(job.log_path, 40); + } catch { + /* log unreadable — treated as no marker below */ + } + const marker = findExitMarker(tail); + const reconciled: JobRecord = { ...job }; + if (marker.found) { + reconciled.status = marker.code === 0 ? "done" : "failed"; + reconciled.reason = undefined; + } else { + reconciled.status = "unknown"; + reconciled.reason = + "process gone, no exit marker — server likely restarted mid-run"; + } + jobRegistry.set(input.job_id, reconciled); + persistJob(config.jobsDir, input.job_id, reconciled); + job = reconciled; + } + // Bounded tail read — never load a multi-GB training log wholesale. let recentLog: string[] = []; try { @@ -266,6 +334,7 @@ export async function jobStatus( log_path: job.log_path, recent_log: recentLog, epochs_total: job.epochs_total, + ...(job.reason !== undefined ? { reason: job.reason } : {}), }; return { ok: true, result }; diff --git a/tools/ruview-mcp/src/types.ts b/tools/ruview-mcp/src/types.ts index 68a2a1f7..09738124 100644 --- a/tools/ruview-mcp/src/types.ts +++ b/tools/ruview-mcp/src/types.ts @@ -115,7 +115,12 @@ export interface TrainJobResult { /** Output of ruview_job_status. */ export interface JobStatusResult { job_id: string; - status: "queued" | "running" | "done" | "failed"; + /** + * 'unknown' is only ever produced by post-restart reconciliation: a record + * frozen at 'running' whose owning process is gone and whose log carries no + * exit-code marker (see reason). + */ + status: "queued" | "running" | "done" | "failed" | "unknown"; progress_pct?: number | undefined; /** Most recent log lines (last 20). */ recent_log: string[]; @@ -124,6 +129,8 @@ export interface JobStatusResult { epochs_done?: number | undefined; /** Total epochs scheduled. */ epochs_total?: number | undefined; + /** Explanation attached when status was reconciled to 'unknown'. */ + reason?: string | undefined; } // ── Vitals (ADR-124 §6 Python surface parity: ws.py:74-88) ─────────────── diff --git a/tools/ruview-mcp/tests/config.test.ts b/tools/ruview-mcp/tests/config.test.ts new file mode 100644 index 00000000..30668aab --- /dev/null +++ b/tools/ruview-mcp/tests/config.test.ts @@ -0,0 +1,49 @@ +/** + * ADR-264 F8/O7 — cog-binary detection must be architecture-aware. + * + * detectCogBinary() itself probes hardcoded /var/lib paths, so it is not + * cheaply testable without fs mocking. The bug it fixes, however, lives purely + * in the candidate ORDER, which cogBinaryCandidates() exposes as a pure, + * arch-injectable function — that is what we pin here. + */ + +import { cogBinaryCandidates } from "../src/config.js"; + +describe("cogBinaryCandidates()", () => { + it("probes -arm before -x86_64 on arm64 hosts", () => { + const c = cogBinaryCandidates("cog-person-count", "arm64"); + const arm = c.findIndex((p) => p.endsWith("cog-person-count-arm")); + const x86 = c.findIndex((p) => p.endsWith("cog-person-count-x86_64")); + expect(arm).toBeGreaterThanOrEqual(0); + expect(x86).toBeGreaterThanOrEqual(0); + expect(arm).toBeLessThan(x86); + }); + + it("probes -x86_64 before -arm on x64 hosts", () => { + const c = cogBinaryCandidates("cog-person-count", "x64"); + const arm = c.findIndex((p) => p.endsWith("cog-person-count-arm")); + const x86 = c.findIndex((p) => p.endsWith("cog-person-count-x86_64")); + expect(x86).toBeLessThan(arm); + }); + + it("defaults an unknown arch to the x86_64-first order", () => { + const c = cogBinaryCandidates("cog-pose-estimation", "riscv64"); + const arm = c.findIndex((p) => p.endsWith("cog-pose-estimation-arm")); + const x86 = c.findIndex((p) => p.endsWith("cog-pose-estimation-x86_64")); + expect(x86).toBeLessThan(arm); + }); + + it("keeps the /usr/local/bin and bare-name PATH fallbacks last", () => { + const c = cogBinaryCandidates("cog-person-count", "arm64"); + // The two arch builds come first; the /usr/local/bin fallback follows them. + expect(c[c.length - 1]).toBe("/usr/local/bin/cog-person-count"); + expect(c).toHaveLength(3); + }); + + it("derives the id by stripping the cog- prefix once", () => { + const c = cogBinaryCandidates("cog-person-count", "x64"); + expect(c[0]).toBe( + "/var/lib/cognitum/apps/person-count/cog-person-count-x86_64" + ); + }); +}); diff --git a/tools/ruview-mcp/tests/http-transport.test.ts b/tools/ruview-mcp/tests/http-transport.test.ts index fbf09dfe..7ec1b7ab 100644 --- a/tools/ruview-mcp/tests/http-transport.test.ts +++ b/tools/ruview-mcp/tests/http-transport.test.ts @@ -110,6 +110,19 @@ describe("isOriginAllowed()", () => { expect(isOriginAllowed("https://evil.example.com:443", [])).toBe(false); }); + // ADR-264 F7 hardening: an EXPLICIT allowlist means exact matching only. The + // any-port-localhost convenience applies solely to the empty-allowlist case, + // so an operator who pins an allowlist actually gets it. + it("with an explicit allowlist, rejects a localhost origin on an unlisted port", () => { + expect(isOriginAllowed("http://localhost:5173", allow)).toBe(false); + expect(isOriginAllowed("http://127.0.0.1:8080", allow)).toBe(false); + }); + + it("with an explicit allowlist, still accepts an exactly-listed localhost origin", () => { + expect(isOriginAllowed("http://localhost", allow)).toBe(true); + expect(isOriginAllowed("http://127.0.0.1", allow)).toBe(true); + }); + it("is case-sensitive for non-local allowlist entries per RFC 6454", () => { expect(isOriginAllowed("HTTPS://Partner.Example.com", ["https://partner.example.com"])).toBe(false); }); @@ -229,3 +242,68 @@ describe("HTTP transport session + body-cap hardening (ADR-264 F7)", () => { expect(r.status).toBe(200); }); }); + +// ── 8. ADR-264 F7: session-map bounds (cap + idle TTL sweep) ─────────────── + +describe("HTTP transport session bounds (ADR-264 F7)", () => { + const initBody = (id: number): string => + JSON.stringify({ + jsonrpc: "2.0", + id, + method: "initialize", + params: { + protocolVersion: "2024-11-05", + capabilities: {}, + clientInfo: { name: "test-client", version: "0.0.0" }, + }, + }); + + // Build directly (not via startServer) so we can inspect the sessions map. + async function startWithApp( + opts: Parameters[1], + basePort: number + ): Promise<{ + port: number; + sessions: ReturnType["sessions"]; + close: () => Promise; + }> { + const { httpServer, sessions } = buildHttpApp(() => makeMockMcpServer(), opts); + const port = basePort + Math.floor(Math.random() * 100); + await new Promise((resolve, reject) => { + httpServer.once("error", reject); + httpServer.listen(port, "127.0.0.1", () => resolve()); + }); + const close = () => + new Promise((res, rej) => httpServer.close((e) => (e ? rej(e) : res()))); + return { port, sessions, close }; + } + + const ACCEPT = { Accept: "application/json, text/event-stream" }; + + it("never exceeds maxSessions — evicts the oldest-idle session at capacity", async () => { + const srv = await startWithApp({ allowedOrigins: ["*"], maxSessions: 2 }, 49800); + try { + for (let i = 0; i < 5; i++) { + await post(srv.port, "/mcp", ACCEPT, initBody(i)); + } + expect(srv.sessions.size).toBeLessThanOrEqual(2); + } finally { + await srv.close(); + } + }); + + it("sweeps sessions idle beyond sessionIdleMs", async () => { + const srv = await startWithApp( + { allowedOrigins: ["*"], sessionIdleMs: 20, sweepIntervalMs: 10 }, + 49900 + ); + try { + await post(srv.port, "/mcp", ACCEPT, initBody(1)); + expect(srv.sessions.size).toBe(1); + await new Promise((r) => setTimeout(r, 150)); + expect(srv.sessions.size).toBe(0); + } finally { + await srv.close(); + } + }); +}); diff --git a/tools/ruview-mcp/tests/train-count-reconcile.test.ts b/tools/ruview-mcp/tests/train-count-reconcile.test.ts new file mode 100644 index 00000000..9c294620 --- /dev/null +++ b/tools/ruview-mcp/tests/train-count-reconcile.test.ts @@ -0,0 +1,96 @@ +/** + * ADR-264 O6 — post-restart job reconciliation. + * + * When the MCP server restarts mid-run, the persisted job record stays frozen + * at 'running' (the child.on('close') that flips it lived in the dead process). + * ruview_job_status must reconcile such a record against the recorded pid and + * the log's "# exit code: N" marker. + * + * We fabricate a persisted record pointing at a KNOWN-DEAD pid (a synchronous + * child that has already exited) and assert the reconciled status. + */ + +import { mkdtempSync, writeFileSync } from "node:fs"; +import { spawnSync } from "node:child_process"; +import os from "node:os"; +import path from "node:path"; +import { randomUUID } from "node:crypto"; +import { jobStatus } from "../src/tools/train-count.js"; +import type { RuviewConfig } from "../src/types.js"; + +/** A pid that has certainly exited: spawnSync waits for the child to finish. */ +function deadPid(): number { + const r = spawnSync(process.execPath, ["-e", ""]); + if (typeof r.pid !== "number") throw new Error("could not spawn probe child"); + return r.pid; +} + +function makeConfig(jobsDir: string): RuviewConfig { + return { + sensingServerUrl: "http://127.0.0.1:19999", + apiToken: undefined, + poseCogBinary: "nonexistent", + countCogBinary: "nonexistent", + jobsDir, + }; +} + +/** Write a fake persisted 'running' record + its log, return {jobId, config}. */ +function seedRunningJob(logBody: string): { jobId: string; config: RuviewConfig } { + const jobsDir = mkdtempSync(path.join(os.tmpdir(), "rvagent-jobs-")); + const jobId = randomUUID(); + const logPath = path.join(jobsDir, `${jobId}.log`); + writeFileSync(logPath, logBody); + const record = { + job_id: jobId, + status: "running", + log_path: logPath, + queued_at: Date.now() / 1000, + epochs_total: 5, + pid: deadPid(), + }; + writeFileSync( + path.join(jobsDir, `${jobId}.json`), + JSON.stringify(record, null, 2) + ); + return { jobId, config: makeConfig(jobsDir) }; +} + +describe("ruview_job_status reconciliation (ADR-264 O6)", () => { + it("reconciles a dead 'running' job with exit 0 to 'done'", async () => { + const { jobId, config } = seedRunningJob( + "# training...\nepoch 5/5\n# exit code: 0\n" + ); + const out = (await jobStatus({ job_id: jobId }, config)) as Record; + expect(out["ok"]).toBe(true); + const res = out["result"] as Record; + expect(res["status"]).toBe("done"); + }); + + it("reconciles a dead 'running' job with non-zero exit to 'failed'", async () => { + const { jobId, config } = seedRunningJob( + "# training...\npanic: cuda oom\n# exit code: 101\n" + ); + const out = (await jobStatus({ job_id: jobId }, config)) as Record; + const res = out["result"] as Record; + expect(res["status"]).toBe("failed"); + }); + + it("marks a dead 'running' job with no exit marker as 'unknown' with a reason", async () => { + const { jobId, config } = seedRunningJob("# training...\nepoch 2/5\n"); + const out = (await jobStatus({ job_id: jobId }, config)) as Record; + const res = out["result"] as Record; + expect(res["status"]).toBe("unknown"); + expect(typeof res["reason"]).toBe("string"); + expect(res["reason"]).toMatch(/restarted/i); + }); + + it("treats a signal-killed marker (null) as 'failed'", async () => { + const { jobId, config } = seedRunningJob( + "# training...\n# exit code: null\n" + ); + const out = (await jobStatus({ job_id: jobId }, config)) as Record; + const res = out["result"] as Record; + expect(res["status"]).toBe("failed"); + }); +});