fix(npm): address 10 verified review findings in harness + rvagent before 0.2.0 publish

harness/ruview (@ruvnet/ruview): - guardrails: digit gate now sees numbers inside code spans; F1-style metric tokens followed by ':' or a nearby number are no longer scrubbed (fail-open regressions in the honesty gate) - mcp-server: tools/call requests serialize through a FIFO promise chain (hardware/mutating tools never overlap) while ping/tools/list stay immediate; stdin close drains in-flight responses before exit - tools: which() no longer memoizes negative lookups tools/ruview-mcp (@ruvnet/rvagent): - index: realpath invoked-directly guard — library import no longer connects a stdio transport to the consumer's process - http-transport: explicit allowedOrigins is exact-match only (localhost any-port convenience applies only with no configured allowlist); session map gains maxSessions=64 + 5min idle TTL sweep - train-count: job records persist the child pid and reconcile stale 'running' status after a server restart (exit-code marker or dead pid) - config: cog binary candidates ordered by process.arch .github/workflows/ruview-npm-release.yml: port the full ADR-265 D1 gate (version-literal check, unpacked-size budget, tarball-install smoke test) from npm-packages.yml so the publish path enforces what the header claims. Tests: harness 30→36, rvagent 99→112, all passing. Co-Authored-By: claude-flow <ruv@ruv.net>
2026-07-02 14:03:19 +00:00 · 2026-07-02 09:58:00 -04:00
parent d26fb0b8fc
commit d46eb8a4b2
15 changed files with 651 additions and 44 deletions
@@ -54,18 +54,77 @@ jobs:
      - name: Test
        run: npm test --if-present

-      - name: Pack gate (no maps)
+      # ADR-265 D3 — package.json is the only place a version string lives.
+      - name: Version-literal gate
        run: |
+          set -euo pipefail
+          hits=""
+          for d in src bin; do
+            if [ -d "$d" ]; then
+              hits+=$(grep -rEn '\b[0-9]+\.[0-9]+\.[0-9]+\b' "$d" | grep -vE '127\.0\.0\.1|0\.0\.0\.0' || true)
+            fi
+          done
+          if [ -n "$hits" ]; then
+            echo "Hardcoded version-like literals found (read package.json instead — ADR-265 D3):"
+            echo "$hits"
+            exit 1
+          fi
+
+      # ADR-265 D1.3 — pack-content gate: no maps AND the per-package
+      # unpacked-size budget (the budgets that npm-packages.yml enforces).
+      - name: Pack gate (no maps + size budget)
+        run: |
+          set -euo pipefail
+          case "${{ inputs.package }}" in
+            # ADR-263: dependency-free harness; budget guards against dep creep.
+            harness/ruview) export UNPACKED_BUDGET=65536 ;;
+            # ADR-264 O2: map-free tarball (was 188 kB with maps).
+            tools/ruview-mcp) export UNPACKED_BUDGET=140000 ;;
+            *) echo "Unknown package '${{ inputs.package }}' — no budget defined"; exit 1 ;;
+          esac
          npm pack --dry-run --json 2>/dev/null | node -e "
            const [info] = JSON.parse(require('fs').readFileSync(0, 'utf8'));
+            const budget = Number(process.env.UNPACKED_BUDGET);
            const maps = info.files.filter((f) => f.path.endsWith('.map'));
            if (maps.length > 0) {
              console.error('Tarball contains source maps (ADR-264 F2):', maps.map((m) => m.path));
              process.exit(1);
            }
-            console.log(\`pack gate OK: \${info.files.length} files, \${info.unpackedSize} B unpacked\`);
+            if (info.unpackedSize > budget) {
+              console.error(\`Unpacked size \${info.unpackedSize} B exceeds budget \${budget} B\`);
+              process.exit(1);
+            }
+            console.log(\`pack gate OK: \${info.files.length} files, \${info.unpackedSize} B unpacked (budget \${budget} B), 0 maps\`);
          "

+      # ADR-265 D1.4 — install the real tarball and drive each bin/export.
+      - name: Tarball smoke test
+        run: |
+          set -euo pipefail
+          TGZ="$PWD/$(npm pack --silent 2>/dev/null | tail -1)"
+          SMOKE="$(mktemp -d)"
+          cd "$SMOKE"
+          npm init -y > /dev/null
+          npm i --no-fund --no-audit "$TGZ"
+          case "${{ inputs.package }}" in
+            harness/ruview)
+              ./node_modules/.bin/ruview --version
+              ./node_modules/.bin/ruview doctor
+              # the honesty gate must fail closed on empty input (ADR-263 F1)
+              if ./node_modules/.bin/ruview claim-check; then
+                echo 'claim-check passed with no input — fail-open regression'; exit 1
+              fi
+              node --input-type=module -e "const m = await import('@ruvnet/ruview'); if (!m.TOOLS) process.exit(1);"
+              ;;
+            tools/ruview-mcp)
+              # initialize over stdio; server must answer and exit 0 on EOF
+              printf '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"ci","version":"0"}}}\n' \
+                | timeout 30 ./node_modules/.bin/rvagent | grep -q '"serverInfo"'
+              # the ESM export must resolve from the installed tarball (ADR-264 F1)
+              timeout 30 node --input-type=module -e "await import('@ruvnet/rvagent');" < /dev/null
+              ;;
+          esac
+
      - name: Claim-check README
        run: |
          if [ -f README.md ]; then
@@ -21,9 +21,13 @@ const METRIC_TERMS = [
 // 'map' additionally must not be a `.map` file suffix or a hyphenated
 // compound ("map-free", "map-reduce") — mAP the metric never appears as either.
 const METRIC_TERMS_SHORT = [/(?<![.\w])map\b(?!-)/, /\bf1\b/, /\bauc\b/, /\biou\b/];
-// Finding/option labels (F1, O2, …) count as labels unless followed by a
-// score context — "F1 score 0.91" stays a metric, "after F7 fixes" does not.
-const LABEL_TOKEN_RE = /\b[fo]\d+\b(?!\s*(?:score|=|\d|%))/g;
+// Finding/option labels (F1, O2, …) count as labels unless the token sits in a
+// metric context: an immediately following score/=/%/digit or colon ("F1: 0.91"),
+// or a number later in the same clause ("F1 reaches 0.91" — an F1-score claim).
+// Bare option refs ("F7 fixes", "O1–O9", "ADR-263 O2") carry no clause number of
+// their own and stay labels. (A surviving 'f1' still only fires as a metric when
+// its scrubbed line actually carries a number — see mentionsMetricTerm.)
+const LABEL_TOKEN_RE = /\b[fo]\d+\b(?!\s*(?:score|=|\d|%|:))(?![^\n.;]*\d)/g;
 const CODE_SPAN_RE = /`[^`]*`/g; // backticked identifiers are code, not claims
 const HAS_NUMBER_RE = /\d/;

@@ -95,9 +99,14 @@ export function claimCheck(text) {
      return;
    }

-    // A quantitative claim needs a number: a metric term in plain prose
-    // ("precision matters here") is not a taggable claim (ADR-263 F11).
-    if (!hasPercent && !HAS_NUMBER_RE.test(scrubbed)) return;
+    // A quantitative claim needs a number. Digits hidden in a code span still
+    // count — "accuracy reached `0.95`" is a claim — so test the line with only
+    // finding/option labels stripped, NOT the code-span-scrubbed copy: scrubbing
+    // dropped `0.95` and wrongly short-circuited both the untagged and the
+    // MEASURED-without-reproducer checks below. A bare metric word in prose
+    // ("precision matters here", "every accuracy number must be MEASURED") has no
+    // number and is not a taggable claim (ADR-263 F11).
+    if (!hasPercent && !HAS_NUMBER_RE.test(lower.replace(LABEL_TOKEN_RE, ' '))) return;

    // A metric/percent with no honesty tag at all.
    if (!tagged) {
@@ -111,7 +120,8 @@ export function claimCheck(text) {
      return;
    }

-    // Tagged MEASURED but cites no reproducer — still a gap.
+    // Tagged MEASURED but cites no reproducer — still a gap (reached now even
+    // when the only number is inside a code span, e.g. "accuracy `0.97` (MEASURED)").
    if (lower.includes('measured') && !hasReproducer) {
      findings.push({
        severity: 'medium',
@@ -68,16 +68,40 @@ async function handle(msg) {
 export function startMcpServer() {
  log(`starting v${SERVER_INFO.version} (protocol ${PROTOCOL_VERSION}, ${listTools().length} tools)`);
  const rl = createInterface({ input: process.stdin, crlfDelay: Infinity });
+
+  // tools/call runs are serialized through a FIFO promise chain: hardware/mutating
+  // tools (calibrate, serial monitor, flash) must never overlap. ping/tools/list/
+  // initialize/resources/prompts stay immediate (ADR-263 O2 — a health check must
+  // answer during a long tool run). `toolChain` also lets stdin-close drain the
+  // in-flight call so its response is flushed instead of dropped by process.exit.
+  let toolChain = Promise.resolve();
+
+  const dispatch = (msg) => handle(msg).catch((err) => {
+    if (msg && msg.id !== undefined) error(msg.id, -32603, String(err && err.message || err));
+    log('handler error:', String(err));
+  });
+
  rl.on('line', (line) => {
    const s = line.trim();
    if (!s) return;
    let msg;
    try { msg = JSON.parse(s); } catch { return log('bad JSON line dropped'); }
-    // Fire-and-forget: keep reading stdin while tool calls run (ADR-263 O2).
-    handle(msg).catch((err) => {
-      if (msg && msg.id !== undefined) error(msg.id, -32603, String(err && err.message || err));
-      log('handler error:', String(err));
+    if (msg && msg.method === 'tools/call') {
+      toolChain = toolChain.then(() => dispatch(msg)); // one tool at a time
+    } else {
+      dispatch(msg); // health/list/handshake answer immediately, even mid tool run
+    }
+  });
+
+  rl.on('close', () => {
+    // Wait for any queued/in-flight tool call to settle (its response written)
+    // before exiting — fire-and-forget used to race this and drop the response.
+    toolChain.then(() => {
+      log('stdin closed — exiting');
+      const done = () => process.exit(0);
+      // Pipe writes are async; flush buffered stdout before exit.
+      if (process.stdout.writableLength) process.stdout.once('drain', done);
+      else done();
    });
  });
-  rl.on('close', () => { log('stdin closed — exiting'); process.exit(0); });
 }
@@ -32,8 +32,9 @@ export function findRepoRoot(start = process.cwd()) {
  return null;
 }

-// Dep-free PATH scan, memoized for the process lifetime (ADR-263 O8) — no
-// shell subprocess per lookup.
+// Dep-free PATH scan (ADR-263 O8) — no shell subprocess per lookup. Only hits
+// are memoized: a miss can resolve later in a long-lived MCP session (the
+// operator installs python/the CLI mid-run), so misses are re-probed each call.
 const whichCache = new Map();
 export function which(cmd) {
  if (whichCache.has(cmd)) return whichCache.get(cmd);
@@ -54,7 +55,7 @@ export function which(cmd) {
      } catch { /* keep scanning */ }
    }
  }
-  whichCache.set(cmd, found);
+  if (found !== null) whichCache.set(cmd, found);
  return found;
 }

@@ -99,3 +99,50 @@ test('MCP server answers ping while a long tools/call is in flight (ADR-263 O2)'
    rmSync(repo, { recursive: true, force: true });
  }
 });
+
+test('tools/call executions are serialized — two slow calls run sequentially', { skip: !which('python') && !which('python3') ? 'python not on PATH' : false }, async () => {
+  // Two verify.py that each sleep 0.8 s. Serialized ⇒ ~1.6 s+; concurrent ⇒ ~0.8 s.
+  const repo = mkdtempSync(join(tmpdir(), 'ruview-mcp-serial-'));
+  const proofDir = join(repo, 'archive', 'v1', 'data', 'proof');
+  mkdirSync(proofDir, { recursive: true });
+  writeFileSync(join(proofDir, 'verify.py'), 'import time\ntime.sleep(0.8)\nprint("VERDICT: PASS")\n');
+
+  const s = startServer();
+  try {
+    s.send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} });
+    await s.next(1);
+
+    const t0 = Date.now();
+    const a = s.next(20);
+    const b = s.next(21);
+    s.send({ jsonrpc: '2.0', id: 20, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } });
+    s.send({ jsonrpc: '2.0', id: 21, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } });
+    const [ra, rb] = await Promise.all([a, b]);
+    const elapsed = Date.now() - t0;
+
+    assert.equal(JSON.parse(ra.result.content[0].text).verdict, 'PASS');
+    assert.equal(JSON.parse(rb.result.content[0].text).verdict, 'PASS');
+    assert.ok(elapsed > 1400, `two 0.8 s tool calls finished in ${elapsed} ms — they overlapped instead of serializing`);
+  } finally {
+    s.close();
+    rmSync(repo, { recursive: true, force: true });
+  }
+});
+
+test('stdin close flushes an in-flight tools/call response before exit', async () => {
+  const child = spawn(process.execPath, [CLI, 'mcp', 'start'], { stdio: ['pipe', 'pipe', 'pipe'] });
+  let out = '';
+  child.stdout.on('data', (d) => { out += d; });
+  const exited = new Promise((res) => child.on('exit', res));
+
+  // Write a tools/call then immediately close stdin. The old fire-and-forget
+  // dispatch raced rl 'close' → process.exit and could drop this response.
+  child.stdin.write(JSON.stringify({ jsonrpc: '2.0', id: 42, method: 'tools/call', params: { name: 'ruview_onboard', arguments: {} } }) + '\n');
+  child.stdin.end();
+
+  await exited;
+  const msgs = out.trim().split('\n').filter(Boolean).map((l) => JSON.parse(l));
+  const resp = msgs.find((m) => m.id === 42);
+  assert.ok(resp, 'the in-flight tools/call response must be flushed to stdout before exit');
+  assert.equal(resp.result.isError, false);
+});
@@ -4,8 +4,9 @@

 import { test } from 'node:test';
 import assert from 'node:assert/strict';
-import { readdirSync, readFileSync } from 'node:fs';
-import { join, dirname } from 'node:path';
+import { readdirSync, readFileSync, mkdtempSync, writeFileSync, rmSync } from 'node:fs';
+import { join, dirname, delimiter } from 'node:path';
+import { tmpdir } from 'node:os';
 import { fileURLToPath } from 'node:url';
 import { claimCheck, summarize } from '../src/guardrails.js';
 import { TOOLS, TOOL_ALIASES, runTool, listTools, findRepoRoot, run, which } from '../src/tools.js';
@@ -58,6 +59,33 @@ test('guardrail still catches real short-token metric claims', () => {
  assert.equal(claimCheck('IoU 0.75 across rooms.').ok, false);
 });

+// Digits hidden in a code span still make a claim — scrubbing must not blind the
+// number gate to `0.95` (regression: code-span number bypassed the gate).
+test('guardrail flags an accuracy number stated inside a code span', () => {
+  const r = claimCheck('Count accuracy reached `0.95` in our tests.');
+  assert.equal(r.ok, false, JSON.stringify(r.findings));
+  assert.ok(r.findings.some((f) => /not tagged/i.test(f.reason)));
+});
+
+// A MEASURED claim whose only number hides in a code span must still reach the
+// missing-reproducer check (regression: the scrubbed gate short-circuited it).
+// Bare metric prose with no number at all (e.g. the README rule text) stays a pass.
+test('guardrail flags a MEASURED code-span number with no reproducer', () => {
+  const r = claimCheck('Detection accuracy `0.97` on the set (MEASURED).');
+  assert.equal(r.ok, false, JSON.stringify(r.findings));
+  assert.ok(r.findings.some((f) => /no reproducer/i.test(f.reason)));
+  assert.equal(claimCheck('Every accuracy number must be MEASURED against a baseline.').ok, true);
+});
+
+// F1-score phrasings ("F1: 0.91", "F1 reaches 0.91") were scrubbed as option
+// labels and slipped through; option refs alone must still not false-positive.
+test('guardrail catches F1-score claims but not bare option refs (ADR-263 F11)', () => {
+  assert.equal(claimCheck('F1: 0.91 on the held-out set.').ok, false, 'F1: value is a metric claim');
+  assert.equal(claimCheck('F1 reaches 0.91 on the held-out set.').ok, false, 'F1 with a nearby number is a claim');
+  assert.equal(claimCheck('Options O1–O9 are tracked in ADR-263 O2.').ok, true, 'option labels are not metrics');
+  assert.equal(claimCheck('ADR-263 O2 lands the exports fix.').ok, true);
+});
+
 test('summarize gives PASS/finding text', () => {
  assert.match(summarize(claimCheck('nothing here')), /PASS/);
  assert.match(summarize(claimCheck('100% accuracy')), /finding/);
@@ -156,10 +184,29 @@ test('run() bounds captured output instead of dying on big streams (ADR-263 O4)'
  assert.ok(r.stdout.includes('TAIL_MARKER'), 'tail must keep the end of the stream');
 });

-test('which() finds node and memoizes misses', () => {
+test('which() finds node and re-probes misses (hits are cached)', () => {
  assert.ok(which('node'), 'node must be on PATH in the test env');
  assert.equal(which('definitely-not-a-binary-xyz'), null);
-  assert.equal(which('definitely-not-a-binary-xyz'), null); // cached path
+  assert.equal(which('definitely-not-a-binary-xyz'), null); // re-probed, still absent
+});
+
+// ADR-263 O8: a miss must not be cached — an operator who installs a tool
+// mid-session (e.g. python after a python_missing failure) must be found next call.
+test('which() re-probes after a miss so a newly-installed tool is found', () => {
+  const dir = mkdtempSync(join(tmpdir(), 'ruview-which-'));
+  const name = 'ruview-probe-xyz';
+  const isWin = process.platform === 'win32';
+  const bin = join(dir, isWin ? `${name}.cmd` : name);
+  const prevPath = process.env.PATH;
+  try {
+    assert.equal(which(name), null, 'not on PATH yet → miss');
+    writeFileSync(bin, isWin ? '@echo off\n' : '#!/bin/sh\n', { mode: 0o755 });
+    process.env.PATH = dir + delimiter + prevPath;
+    assert.ok(which(name), 'installed mid-session → the miss must not have been cached');
+  } finally {
+    process.env.PATH = prevPath;
+    rmSync(dir, { recursive: true, force: true });
+  }
 });

 test('CLI run(): claim-check exits non-zero on a bad claim', async () => {
@@ -12,7 +12,7 @@
        "yargs": "^17.7.2"
      },
      "bin": {
-        "ruview": "dist/index.js"
+        "ruview-cli": "dist/index.js"
      },
      "devDependencies": {
        "@types/node": "^20.14.0",
@@ -51,21 +51,35 @@ export function loadConfig(): RuviewConfig {
  };
 }

+/**
+ * Ordered cog-binary candidate paths for a host of the given CPU architecture.
+ * The native-arch build is probed FIRST: an appliance that ships both
+ * `cog-<id>-arm` and `cog-<id>-x86_64` must never hand back the wrong-arch
+ * binary (ADR-264 F8/O7 — the pre-review order tried `-arm` unconditionally).
+ * The `/usr/local/bin` and bare-name (PATH) fallbacks follow, arch-agnostic.
+ *
+ * Pure and arch-injectable so the ordering is unit-testable.
+ */
+export function cogBinaryCandidates(
+  name: string,
+  arch: string = process.arch
+): string[] {
+  const id = name.replace("cog-", "");
+  const dir = `/var/lib/cognitum/apps/${id}`;
+  const arm = `${dir}/cog-${id}-arm`;
+  const x86 = `${dir}/cog-${id}-x86_64`;
+  // arm64 → prefer -arm; everything else (notably x64) → prefer -x86_64.
+  const archOrdered = arch === "arm64" ? [arm, x86] : [x86, arm];
+  return [...archOrdered, `/usr/local/bin/${name}`];
+}
+
 /**
 * Locate a cog binary in the common appliance install locations, probing each
- * candidate (ADR-264 F8/O7 — the pre-review version built this list and then
- * unconditionally returned the bare name). Falls back to the bare name (PATH
+ * candidate in native-arch-first order. Falls back to the bare name (PATH
 * resolution at spawn time) when no candidate exists.
 */
 function detectCogBinary(name: string): string {
-  const id = name.replace("cog-", "");
-  // Common install paths for Cognitum cog binaries on Linux/macOS appliances.
-  const candidates = [
-    `/var/lib/cognitum/apps/${id}/cog-${id}-arm`,
-    `/var/lib/cognitum/apps/${id}/cog-${id}-x86_64`,
-    `/usr/local/bin/${name}`,
-  ];
-  for (const candidate of candidates) {
+  for (const candidate of cogBinaryCandidates(name)) {
    if (existsSync(candidate)) return candidate;
  }
  return name; // bare name — rely on PATH; spawn fails gracefully if absent
@@ -13,8 +13,11 @@
 *
 * Security model (ADR-124 §6 + ADR-264 F7):
 *   - Origin validation: browser-style requests whose Origin is not local
- *     are rejected with 403 before reaching the MCP layer. Localhost origins
- *     match on hostname, ANY port (http://localhost:5173 is local).
+ *     are rejected with 403 before reaching the MCP layer. With NO explicit
+ *     allowlist, localhost origins match on hostname, ANY port
+ *     (http://localhost:5173 is local). When an explicit allowedOrigins list is
+ *     configured, matching is exact — the any-port-localhost convenience is off,
+ *     so a localhost peer on an unlisted port must be added to be accepted.
 *   - Bearer token: when RVAGENT_HTTP_TOKEN is set, requests must carry
 *     Authorization: Bearer <token>; missing/wrong tokens → 401.
 *   - Body cap: request bodies over 1 MiB are rejected with 413 (the
@@ -55,6 +58,20 @@ export interface HttpTransportOptions {
  bearerToken?: string;
  /** Maximum accepted request body size in bytes (default: 1 MiB). */
  maxBodyBytes?: number;
+  /**
+   * Maximum number of concurrent live sessions (default: 64). When a new
+   * `initialize` arrives at the cap, the oldest-idle session is evicted (its
+   * transport closed) to make room — bounds memory against a flaky client that
+   * loops `initialize` or a malicious localhost peer (ADR-264 F7).
+   */
+  maxSessions?: number;
+  /**
+   * Idle time-to-live for a session in ms (default: 5 min). Sessions with no
+   * request activity for longer than this are swept and closed.
+   */
+  sessionIdleMs?: number;
+  /** How often the idle-session sweeper runs, in ms (default: 60 s). */
+  sweepIntervalMs?: number;
 }

 export interface HttpTransportResult {
@@ -69,6 +86,9 @@ export interface HttpTransportResult {
 const DEFAULT_HOST = "127.0.0.1";
 const DEFAULT_PORT = 3001;
 const DEFAULT_MAX_BODY_BYTES = 1024 * 1024;
+const DEFAULT_MAX_SESSIONS = 64;
+const DEFAULT_SESSION_IDLE_MS = 5 * 60 * 1000;
+const DEFAULT_SWEEP_INTERVAL_MS = 60 * 1000;
 const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "[::1]"]);

 /**
@@ -76,8 +96,11 @@ const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "[::1]"]);
 * Returns true if the request should be allowed, false if it should be rejected.
 *
 * An absent Origin header is allowed (same-origin non-browser requests, curl,
- * etc.). A localhost origin is allowed on any port (real browser origins carry
- * ports — ADR-264 F7). Anything else must match the allowlist exactly.
+ * etc.). When NO explicit allowlist was configured (empty list), a localhost
+ * origin is allowed on any port as a convenience — real browser origins carry
+ * ports (ADR-264 F7). When an explicit allowlist IS configured, matching is
+ * exact: the any-port-localhost shortcut is disabled so an operator who pins an
+ * allowlist actually gets it (a looped-back peer on an unlisted port is denied).
 */
 export function isOriginAllowed(
  origin: string | undefined,
@@ -86,6 +109,8 @@ export function isOriginAllowed(
  if (origin === undefined) return true; // no Origin = not a cross-origin browser request
  if (allowedOrigins.includes("*")) return true;
  if (allowedOrigins.includes(origin)) return true;
+  // Explicit allowlist ⇒ exact matching only; skip the localhost convenience.
+  if (allowedOrigins.length > 0) return false;
  try {
    const u = new URL(origin);
    return (
@@ -142,7 +167,55 @@ export function buildHttpApp(
  const allowedOrigins: string[] = opts.allowedOrigins ?? [];
  const bearerToken = opts.bearerToken ?? process.env["RVAGENT_HTTP_TOKEN"];
  const maxBodyBytes = opts.maxBodyBytes ?? DEFAULT_MAX_BODY_BYTES;
+  const maxSessions = opts.maxSessions ?? DEFAULT_MAX_SESSIONS;
+  const sessionIdleMs = opts.sessionIdleMs ?? DEFAULT_SESSION_IDLE_MS;
+  const sweepIntervalMs = opts.sweepIntervalMs ?? DEFAULT_SWEEP_INTERVAL_MS;
  const sessions = new Map<string, StreamableHTTPServerTransport>();
+  // lastSeen tracks per-session request activity so the sweeper and the
+  // oldest-idle eviction can bound the session map (ADR-264 F7).
+  const lastSeen = new Map<string, number>();
+
+  /** Mark a session as freshly used. */
+  function touch(sessionId: string): void {
+    lastSeen.set(sessionId, Date.now());
+  }
+
+  /** Close a session's transport and drop it from the bookkeeping maps. */
+  function closeSession(id: string): void {
+    const transport = sessions.get(id);
+    sessions.delete(id);
+    lastSeen.delete(id);
+    if (transport) {
+      try {
+        void transport.close(); // onclose is idempotent against the maps above
+      } catch {
+        /* best-effort: a half-open transport must not block eviction */
+      }
+    }
+  }
+
+  /** Evict the session that has been idle longest — called when at capacity. */
+  function evictOldestIdle(): void {
+    let oldestId: string | undefined;
+    let oldestSeen = Infinity;
+    for (const [id, seen] of lastSeen) {
+      if (seen < oldestSeen) {
+        oldestSeen = seen;
+        oldestId = id;
+      }
+    }
+    if (oldestId !== undefined) closeSession(oldestId);
+  }
+
+  /** Periodic sweep: close sessions idle beyond sessionIdleMs. */
+  function sweepIdleSessions(): void {
+    const now = Date.now();
+    for (const [id, seen] of lastSeen) {
+      if (now - seen > sessionIdleMs) closeSession(id);
+    }
+  }
+  const sweepTimer = setInterval(sweepIdleSessions, sweepIntervalMs);
+  sweepTimer.unref(); // never keep the process alive just to sweep

  const httpServer = createServer((req: IncomingMessage, res: ServerResponse) => {
    void (async () => {
@@ -194,6 +267,7 @@ export function buildHttpApp(
            json(res, 404, { error: `Unknown session "${sessionId}"` });
            return;
          }
+          touch(sessionId);
          await transport.handleRequest(req, res, parsed);
          return;
        }
@@ -206,14 +280,21 @@ export function buildHttpApp(
          });
          return;
        }
+        // Bound the session map: at capacity, reclaim the oldest-idle slot
+        // before minting a new session (ADR-264 F7).
+        if (sessions.size >= maxSessions) evictOldestIdle();
        const transport = new StreamableHTTPServerTransport({
          sessionIdGenerator: () => randomUUID(),
          onsessioninitialized: (id: string) => {
            sessions.set(id, transport);
+            touch(id);
          },
        });
        transport.onclose = () => {
-          if (transport.sessionId !== undefined) sessions.delete(transport.sessionId);
+          if (transport.sessionId !== undefined) {
+            sessions.delete(transport.sessionId);
+            lastSeen.delete(transport.sessionId);
+          }
        };
        const mcpServer = serverFactory();
        await mcpServer.connect(transport as Parameters<typeof mcpServer.connect>[0]);
@@ -228,6 +309,7 @@ export function buildHttpApp(
          json(res, 400, { error: "Bad Request: missing or unknown mcp-session-id" });
          return;
        }
+        if (sessionId !== undefined) touch(sessionId);
        await transport.handleRequest(req, res);
        return;
      }
@@ -239,6 +321,8 @@ export function buildHttpApp(
    });
  });

+  httpServer.on("close", () => clearInterval(sweepTimer));
+
  return { httpServer, sessions };
 }

@@ -31,6 +31,9 @@
 */

 import { createRequire } from "node:module";
+import { realpathSync } from "node:fs";
+import { fileURLToPath } from "node:url";
+import { argv } from "node:process";
 import { Server } from "@modelcontextprotocol/sdk/server/index.js";
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import {
@@ -355,7 +358,26 @@ async function main(): Promise<void> {
  );
 }

-main().catch((e) => {
-  process.stderr.write(`[ruview-mcp] Fatal: ${String(e)}\n`);
-  process.exit(1);
-});
+// CLI guard: boot the server only when this module is the entrypoint — invoked
+// as the `rvagent` / `ruview-mcp` bin or `node dist/index.js`. Importing it as a
+// library (`import { buildServer } from "@ruvnet/rvagent"`) must NOT side-effect
+// connect a StdioServerTransport to the consumer's stdin/stdout. Realpath both
+// sides because npm's bin shim is a symlink and passes a non-normalized,
+// possibly case-skewed argv[1] on Windows (mirrors harness/ruview/bin/cli.js).
+const invokedDirectly = (() => {
+  if (!argv[1]) return false;
+  try {
+    const a = realpathSync(argv[1]);
+    const b = realpathSync(fileURLToPath(import.meta.url));
+    return process.platform === "win32" ? a.toLowerCase() === b.toLowerCase() : a === b;
+  } catch {
+    return false;
+  }
+})();
+
+if (invokedDirectly) {
+  main().catch((e) => {
+    process.stderr.write(`[ruview-mcp] Fatal: ${String(e)}\n`);
+    process.exit(1);
+  });
+}
@@ -76,10 +76,18 @@ export const jobStatusSchema = z.object({
 export type JobStatusInput = z.infer<typeof jobStatusSchema>;

 interface JobRecord {
-  status: "queued" | "running" | "done" | "failed";
+  status: "queued" | "running" | "done" | "failed" | "unknown";
  log_path: string;
  queued_at: number;
  epochs_total: number;
+  /**
+   * OS pid of the training child. Persisted so a later process (e.g. after an
+   * MCP server restart) can tell whether a job still marked 'running' actually
+   * outlived the process that spawned it (ADR-264 O6).
+   */
+  pid?: number | undefined;
+  /** Human-readable explanation attached during reconciliation (unknown state). */
+  reason?: string | undefined;
 }

 // In-process job registry, mirrored to <jobsDir>/<id>.json on every state
@@ -113,12 +121,41 @@ function loadPersistedJob(jobsDir: string, jobId: string): JobRecord | undefined
      log_path: raw.log_path,
      queued_at: typeof raw.queued_at === "number" ? raw.queued_at : 0,
      epochs_total: typeof raw.epochs_total === "number" ? raw.epochs_total : 0,
+      pid: typeof raw.pid === "number" ? raw.pid : undefined,
+      reason: typeof raw.reason === "string" ? raw.reason : undefined,
    };
  } catch {
    return undefined;
  }
 }

+/**
+ * Is `pid` still a live process? `process.kill(pid, 0)` sends no signal but
+ * probes existence: ESRCH ⇒ gone; EPERM ⇒ alive but owned by another user
+ * (treated as alive so we never falsely reconcile a still-running job).
+ */
+function isProcessAlive(pid: number): boolean {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch (e) {
+    return (e as NodeJS.ErrnoException).code === "EPERM";
+  }
+}
+
+/**
+ * Scan log lines (tail) for the "# exit code: N" marker the child.on('close')
+ * handler appends. `found:false` means the process died without the marker —
+ * i.e. this server never saw the close (it restarted mid-run).
+ */
+function findExitMarker(lines: string[]): { found: boolean; code: number | null } {
+  for (let i = lines.length - 1; i >= 0; i--) {
+    const m = /^# exit code: (-?\d+|null)$/.exec((lines[i] ?? "").trim());
+    if (m) return { found: true, code: m[1] === "null" ? null : Number(m[1]) };
+  }
+  return { found: false, code: null };
+}
+
 /** Read the last `maxLines` lines of a file without loading the whole log. */
 function tailLines(filePath: string, maxLines: number, maxBytes = 64 * 1024): string[] {
  const size = statSync(filePath).size;
@@ -207,6 +244,10 @@ export async function trainCount(
  closeSync(logFdOut);
  closeSync(logFdErr);

+  // Record the child pid so a later process can reconcile a stale 'running'
+  // record after a server restart (child.pid is undefined only if spawn failed
+  // synchronously, in which case the 'error' handler flips status to 'failed').
+  record.pid = child.pid;
  record.status = "running";
  persistJob(logDir, jobId, record);

@@ -244,7 +285,7 @@ export async function jobStatus(
  config: RuviewConfig
 ): Promise<object> {
  // Memory first, then the persisted record (survives server restarts).
-  const job = jobRegistry.get(input.job_id) ?? loadPersistedJob(config.jobsDir, input.job_id);
+  let job = jobRegistry.get(input.job_id) ?? loadPersistedJob(config.jobsDir, input.job_id);
  if (!job) {
    return {
      ok: false,
@@ -252,6 +293,33 @@ export async function jobStatus(
    };
  }

+  // Reconcile a 'running' record whose owning process is gone. The status flip
+  // to done/failed lives only in the spawning process's child.on('close'/'error')
+  // handlers; if this server restarted mid-run, the record froze at 'running'
+  // (ADR-264 O6). When the pid is dead, recover the true outcome from the log's
+  // "# exit code: N" marker, else surface an honest 'unknown'.
+  if (job.status === "running" && typeof job.pid === "number" && !isProcessAlive(job.pid)) {
+    let tail: string[] = [];
+    try {
+      tail = tailLines(job.log_path, 40);
+    } catch {
+      /* log unreadable — treated as no marker below */
+    }
+    const marker = findExitMarker(tail);
+    const reconciled: JobRecord = { ...job };
+    if (marker.found) {
+      reconciled.status = marker.code === 0 ? "done" : "failed";
+      reconciled.reason = undefined;
+    } else {
+      reconciled.status = "unknown";
+      reconciled.reason =
+        "process gone, no exit marker — server likely restarted mid-run";
+    }
+    jobRegistry.set(input.job_id, reconciled);
+    persistJob(config.jobsDir, input.job_id, reconciled);
+    job = reconciled;
+  }
+
  // Bounded tail read — never load a multi-GB training log wholesale.
  let recentLog: string[] = [];
  try {
@@ -266,6 +334,7 @@ export async function jobStatus(
    log_path: job.log_path,
    recent_log: recentLog,
    epochs_total: job.epochs_total,
+    ...(job.reason !== undefined ? { reason: job.reason } : {}),
  };

  return { ok: true, result };
@@ -115,7 +115,12 @@ export interface TrainJobResult {
 /** Output of ruview_job_status. */
 export interface JobStatusResult {
  job_id: string;
-  status: "queued" | "running" | "done" | "failed";
+  /**
+   * 'unknown' is only ever produced by post-restart reconciliation: a record
+   * frozen at 'running' whose owning process is gone and whose log carries no
+   * exit-code marker (see reason).
+   */
+  status: "queued" | "running" | "done" | "failed" | "unknown";
  progress_pct?: number | undefined;
  /** Most recent log lines (last 20). */
  recent_log: string[];
@@ -124,6 +129,8 @@ export interface JobStatusResult {
  epochs_done?: number | undefined;
  /** Total epochs scheduled. */
  epochs_total?: number | undefined;
+  /** Explanation attached when status was reconciled to 'unknown'. */
+  reason?: string | undefined;
 }

 // ── Vitals (ADR-124 §6 Python surface parity: ws.py:74-88) ───────────────
@@ -0,0 +1,49 @@
+/**
+ * ADR-264 F8/O7 — cog-binary detection must be architecture-aware.
+ *
+ * detectCogBinary() itself probes hardcoded /var/lib paths, so it is not
+ * cheaply testable without fs mocking. The bug it fixes, however, lives purely
+ * in the candidate ORDER, which cogBinaryCandidates() exposes as a pure,
+ * arch-injectable function — that is what we pin here.
+ */
+
+import { cogBinaryCandidates } from "../src/config.js";
+
+describe("cogBinaryCandidates()", () => {
+  it("probes -arm before -x86_64 on arm64 hosts", () => {
+    const c = cogBinaryCandidates("cog-person-count", "arm64");
+    const arm = c.findIndex((p) => p.endsWith("cog-person-count-arm"));
+    const x86 = c.findIndex((p) => p.endsWith("cog-person-count-x86_64"));
+    expect(arm).toBeGreaterThanOrEqual(0);
+    expect(x86).toBeGreaterThanOrEqual(0);
+    expect(arm).toBeLessThan(x86);
+  });
+
+  it("probes -x86_64 before -arm on x64 hosts", () => {
+    const c = cogBinaryCandidates("cog-person-count", "x64");
+    const arm = c.findIndex((p) => p.endsWith("cog-person-count-arm"));
+    const x86 = c.findIndex((p) => p.endsWith("cog-person-count-x86_64"));
+    expect(x86).toBeLessThan(arm);
+  });
+
+  it("defaults an unknown arch to the x86_64-first order", () => {
+    const c = cogBinaryCandidates("cog-pose-estimation", "riscv64");
+    const arm = c.findIndex((p) => p.endsWith("cog-pose-estimation-arm"));
+    const x86 = c.findIndex((p) => p.endsWith("cog-pose-estimation-x86_64"));
+    expect(x86).toBeLessThan(arm);
+  });
+
+  it("keeps the /usr/local/bin and bare-name PATH fallbacks last", () => {
+    const c = cogBinaryCandidates("cog-person-count", "arm64");
+    // The two arch builds come first; the /usr/local/bin fallback follows them.
+    expect(c[c.length - 1]).toBe("/usr/local/bin/cog-person-count");
+    expect(c).toHaveLength(3);
+  });
+
+  it("derives the id by stripping the cog- prefix once", () => {
+    const c = cogBinaryCandidates("cog-person-count", "x64");
+    expect(c[0]).toBe(
+      "/var/lib/cognitum/apps/person-count/cog-person-count-x86_64"
+    );
+  });
+});
@@ -110,6 +110,19 @@ describe("isOriginAllowed()", () => {
    expect(isOriginAllowed("https://evil.example.com:443", [])).toBe(false);
  });

+  // ADR-264 F7 hardening: an EXPLICIT allowlist means exact matching only. The
+  // any-port-localhost convenience applies solely to the empty-allowlist case,
+  // so an operator who pins an allowlist actually gets it.
+  it("with an explicit allowlist, rejects a localhost origin on an unlisted port", () => {
+    expect(isOriginAllowed("http://localhost:5173", allow)).toBe(false);
+    expect(isOriginAllowed("http://127.0.0.1:8080", allow)).toBe(false);
+  });
+
+  it("with an explicit allowlist, still accepts an exactly-listed localhost origin", () => {
+    expect(isOriginAllowed("http://localhost", allow)).toBe(true);
+    expect(isOriginAllowed("http://127.0.0.1", allow)).toBe(true);
+  });
+
  it("is case-sensitive for non-local allowlist entries per RFC 6454", () => {
    expect(isOriginAllowed("HTTPS://Partner.Example.com", ["https://partner.example.com"])).toBe(false);
  });
@@ -229,3 +242,68 @@ describe("HTTP transport session + body-cap hardening (ADR-264 F7)", () => {
    expect(r.status).toBe(200);
  });
 });
+
+// ── 8. ADR-264 F7: session-map bounds (cap + idle TTL sweep) ───────────────
+
+describe("HTTP transport session bounds (ADR-264 F7)", () => {
+  const initBody = (id: number): string =>
+    JSON.stringify({
+      jsonrpc: "2.0",
+      id,
+      method: "initialize",
+      params: {
+        protocolVersion: "2024-11-05",
+        capabilities: {},
+        clientInfo: { name: "test-client", version: "0.0.0" },
+      },
+    });
+
+  // Build directly (not via startServer) so we can inspect the sessions map.
+  async function startWithApp(
+    opts: Parameters<typeof buildHttpApp>[1],
+    basePort: number
+  ): Promise<{
+    port: number;
+    sessions: ReturnType<typeof buildHttpApp>["sessions"];
+    close: () => Promise<void>;
+  }> {
+    const { httpServer, sessions } = buildHttpApp(() => makeMockMcpServer(), opts);
+    const port = basePort + Math.floor(Math.random() * 100);
+    await new Promise<void>((resolve, reject) => {
+      httpServer.once("error", reject);
+      httpServer.listen(port, "127.0.0.1", () => resolve());
+    });
+    const close = () =>
+      new Promise<void>((res, rej) => httpServer.close((e) => (e ? rej(e) : res())));
+    return { port, sessions, close };
+  }
+
+  const ACCEPT = { Accept: "application/json, text/event-stream" };
+
+  it("never exceeds maxSessions — evicts the oldest-idle session at capacity", async () => {
+    const srv = await startWithApp({ allowedOrigins: ["*"], maxSessions: 2 }, 49800);
+    try {
+      for (let i = 0; i < 5; i++) {
+        await post(srv.port, "/mcp", ACCEPT, initBody(i));
+      }
+      expect(srv.sessions.size).toBeLessThanOrEqual(2);
+    } finally {
+      await srv.close();
+    }
+  });
+
+  it("sweeps sessions idle beyond sessionIdleMs", async () => {
+    const srv = await startWithApp(
+      { allowedOrigins: ["*"], sessionIdleMs: 20, sweepIntervalMs: 10 },
+      49900
+    );
+    try {
+      await post(srv.port, "/mcp", ACCEPT, initBody(1));
+      expect(srv.sessions.size).toBe(1);
+      await new Promise((r) => setTimeout(r, 150));
+      expect(srv.sessions.size).toBe(0);
+    } finally {
+      await srv.close();
+    }
+  });
+});
@@ -0,0 +1,96 @@
+/**
+ * ADR-264 O6 — post-restart job reconciliation.
+ *
+ * When the MCP server restarts mid-run, the persisted job record stays frozen
+ * at 'running' (the child.on('close') that flips it lived in the dead process).
+ * ruview_job_status must reconcile such a record against the recorded pid and
+ * the log's "# exit code: N" marker.
+ *
+ * We fabricate a persisted record pointing at a KNOWN-DEAD pid (a synchronous
+ * child that has already exited) and assert the reconciled status.
+ */
+
+import { mkdtempSync, writeFileSync } from "node:fs";
+import { spawnSync } from "node:child_process";
+import os from "node:os";
+import path from "node:path";
+import { randomUUID } from "node:crypto";
+import { jobStatus } from "../src/tools/train-count.js";
+import type { RuviewConfig } from "../src/types.js";
+
+/** A pid that has certainly exited: spawnSync waits for the child to finish. */
+function deadPid(): number {
+  const r = spawnSync(process.execPath, ["-e", ""]);
+  if (typeof r.pid !== "number") throw new Error("could not spawn probe child");
+  return r.pid;
+}
+
+function makeConfig(jobsDir: string): RuviewConfig {
+  return {
+    sensingServerUrl: "http://127.0.0.1:19999",
+    apiToken: undefined,
+    poseCogBinary: "nonexistent",
+    countCogBinary: "nonexistent",
+    jobsDir,
+  };
+}
+
+/** Write a fake persisted 'running' record + its log, return {jobId, config}. */
+function seedRunningJob(logBody: string): { jobId: string; config: RuviewConfig } {
+  const jobsDir = mkdtempSync(path.join(os.tmpdir(), "rvagent-jobs-"));
+  const jobId = randomUUID();
+  const logPath = path.join(jobsDir, `${jobId}.log`);
+  writeFileSync(logPath, logBody);
+  const record = {
+    job_id: jobId,
+    status: "running",
+    log_path: logPath,
+    queued_at: Date.now() / 1000,
+    epochs_total: 5,
+    pid: deadPid(),
+  };
+  writeFileSync(
+    path.join(jobsDir, `${jobId}.json`),
+    JSON.stringify(record, null, 2)
+  );
+  return { jobId, config: makeConfig(jobsDir) };
+}
+
+describe("ruview_job_status reconciliation (ADR-264 O6)", () => {
+  it("reconciles a dead 'running' job with exit 0 to 'done'", async () => {
+    const { jobId, config } = seedRunningJob(
+      "# training...\nepoch 5/5\n# exit code: 0\n"
+    );
+    const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
+    expect(out["ok"]).toBe(true);
+    const res = out["result"] as Record<string, unknown>;
+    expect(res["status"]).toBe("done");
+  });
+
+  it("reconciles a dead 'running' job with non-zero exit to 'failed'", async () => {
+    const { jobId, config } = seedRunningJob(
+      "# training...\npanic: cuda oom\n# exit code: 101\n"
+    );
+    const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
+    const res = out["result"] as Record<string, unknown>;
+    expect(res["status"]).toBe("failed");
+  });
+
+  it("marks a dead 'running' job with no exit marker as 'unknown' with a reason", async () => {
+    const { jobId, config } = seedRunningJob("# training...\nepoch 2/5\n");
+    const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
+    const res = out["result"] as Record<string, unknown>;
+    expect(res["status"]).toBe("unknown");
+    expect(typeof res["reason"]).toBe("string");
+    expect(res["reason"]).toMatch(/restarted/i);
+  });
+
+  it("treats a signal-killed marker (null) as 'failed'", async () => {
+    const { jobId, config } = seedRunningJob(
+      "# training...\n# exit code: null\n"
+    );
+    const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
+    const res = out["result"] as Record<string, unknown>;
+    expect(res["status"]).toBe("failed");
+  });
+});