fix(npm): address 10 verified review findings in harness + rvagent before 0.2.0 publish

harness/ruview (@ruvnet/ruview):
- guardrails: digit gate now sees numbers inside code spans; F1-style
  metric tokens followed by ':' or a nearby number are no longer scrubbed
  (fail-open regressions in the honesty gate)
- mcp-server: tools/call requests serialize through a FIFO promise chain
  (hardware/mutating tools never overlap) while ping/tools/list stay
  immediate; stdin close drains in-flight responses before exit
- tools: which() no longer memoizes negative lookups

tools/ruview-mcp (@ruvnet/rvagent):
- index: realpath invoked-directly guard — library import no longer
  connects a stdio transport to the consumer's process
- http-transport: explicit allowedOrigins is exact-match only (localhost
  any-port convenience applies only with no configured allowlist);
  session map gains maxSessions=64 + 5min idle TTL sweep
- train-count: job records persist the child pid and reconcile stale
  'running' status after a server restart (exit-code marker or dead pid)
- config: cog binary candidates ordered by process.arch

.github/workflows/ruview-npm-release.yml: port the full ADR-265 D1 gate
(version-literal check, unpacked-size budget, tarball-install smoke test)
from npm-packages.yml so the publish path enforces what the header claims.

Tests: harness 30→36, rvagent 99→112, all passing.

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
ruv
2026-07-02 09:58:00 -04:00
parent d26fb0b8fc
commit d46eb8a4b2
15 changed files with 651 additions and 44 deletions
+61 -2
View File
@@ -54,18 +54,77 @@ jobs:
- name: Test
run: npm test --if-present
- name: Pack gate (no maps)
# ADR-265 D3 — package.json is the only place a version string lives.
- name: Version-literal gate
run: |
set -euo pipefail
hits=""
for d in src bin; do
if [ -d "$d" ]; then
hits+=$(grep -rEn '\b[0-9]+\.[0-9]+\.[0-9]+\b' "$d" | grep -vE '127\.0\.0\.1|0\.0\.0\.0' || true)
fi
done
if [ -n "$hits" ]; then
echo "Hardcoded version-like literals found (read package.json instead — ADR-265 D3):"
echo "$hits"
exit 1
fi
# ADR-265 D1.3 — pack-content gate: no maps AND the per-package
# unpacked-size budget (the budgets that npm-packages.yml enforces).
- name: Pack gate (no maps + size budget)
run: |
set -euo pipefail
case "${{ inputs.package }}" in
# ADR-263: dependency-free harness; budget guards against dep creep.
harness/ruview) export UNPACKED_BUDGET=65536 ;;
# ADR-264 O2: map-free tarball (was 188 kB with maps).
tools/ruview-mcp) export UNPACKED_BUDGET=140000 ;;
*) echo "Unknown package '${{ inputs.package }}' — no budget defined"; exit 1 ;;
esac
npm pack --dry-run --json 2>/dev/null | node -e "
const [info] = JSON.parse(require('fs').readFileSync(0, 'utf8'));
const budget = Number(process.env.UNPACKED_BUDGET);
const maps = info.files.filter((f) => f.path.endsWith('.map'));
if (maps.length > 0) {
console.error('Tarball contains source maps (ADR-264 F2):', maps.map((m) => m.path));
process.exit(1);
}
console.log(\`pack gate OK: \${info.files.length} files, \${info.unpackedSize} B unpacked\`);
if (info.unpackedSize > budget) {
console.error(\`Unpacked size \${info.unpackedSize} B exceeds budget \${budget} B\`);
process.exit(1);
}
console.log(\`pack gate OK: \${info.files.length} files, \${info.unpackedSize} B unpacked (budget \${budget} B), 0 maps\`);
"
# ADR-265 D1.4 — install the real tarball and drive each bin/export.
- name: Tarball smoke test
run: |
set -euo pipefail
TGZ="$PWD/$(npm pack --silent 2>/dev/null | tail -1)"
SMOKE="$(mktemp -d)"
cd "$SMOKE"
npm init -y > /dev/null
npm i --no-fund --no-audit "$TGZ"
case "${{ inputs.package }}" in
harness/ruview)
./node_modules/.bin/ruview --version
./node_modules/.bin/ruview doctor
# the honesty gate must fail closed on empty input (ADR-263 F1)
if ./node_modules/.bin/ruview claim-check; then
echo 'claim-check passed with no input — fail-open regression'; exit 1
fi
node --input-type=module -e "const m = await import('@ruvnet/ruview'); if (!m.TOOLS) process.exit(1);"
;;
tools/ruview-mcp)
# initialize over stdio; server must answer and exit 0 on EOF
printf '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"ci","version":"0"}}}\n' \
| timeout 30 ./node_modules/.bin/rvagent | grep -q '"serverInfo"'
# the ESM export must resolve from the installed tarball (ADR-264 F1)
timeout 30 node --input-type=module -e "await import('@ruvnet/rvagent');" < /dev/null
;;
esac
- name: Claim-check README
run: |
if [ -f README.md ]; then
+17 -7
View File
@@ -21,9 +21,13 @@ const METRIC_TERMS = [
// 'map' additionally must not be a `.map` file suffix or a hyphenated
// compound ("map-free", "map-reduce") — mAP the metric never appears as either.
const METRIC_TERMS_SHORT = [/(?<![.\w])map\b(?!-)/, /\bf1\b/, /\bauc\b/, /\biou\b/];
// Finding/option labels (F1, O2, …) count as labels unless followed by a
// score context — "F1 score 0.91" stays a metric, "after F7 fixes" does not.
const LABEL_TOKEN_RE = /\b[fo]\d+\b(?!\s*(?:score|=|\d|%))/g;
// Finding/option labels (F1, O2, …) count as labels unless the token sits in a
// metric context: an immediately following score/=/%/digit or colon ("F1: 0.91"),
// or a number later in the same clause ("F1 reaches 0.91" — an F1-score claim).
// Bare option refs ("F7 fixes", "O1O9", "ADR-263 O2") carry no clause number of
// their own and stay labels. (A surviving 'f1' still only fires as a metric when
// its scrubbed line actually carries a number — see mentionsMetricTerm.)
const LABEL_TOKEN_RE = /\b[fo]\d+\b(?!\s*(?:score|=|\d|%|:))(?![^\n.;]*\d)/g;
const CODE_SPAN_RE = /`[^`]*`/g; // backticked identifiers are code, not claims
const HAS_NUMBER_RE = /\d/;
@@ -95,9 +99,14 @@ export function claimCheck(text) {
return;
}
// A quantitative claim needs a number: a metric term in plain prose
// ("precision matters here") is not a taggable claim (ADR-263 F11).
if (!hasPercent && !HAS_NUMBER_RE.test(scrubbed)) return;
// A quantitative claim needs a number. Digits hidden in a code span still
// count — "accuracy reached `0.95`" is a claim — so test the line with only
// finding/option labels stripped, NOT the code-span-scrubbed copy: scrubbing
// dropped `0.95` and wrongly short-circuited both the untagged and the
// MEASURED-without-reproducer checks below. A bare metric word in prose
// ("precision matters here", "every accuracy number must be MEASURED") has no
// number and is not a taggable claim (ADR-263 F11).
if (!hasPercent && !HAS_NUMBER_RE.test(lower.replace(LABEL_TOKEN_RE, ' '))) return;
// A metric/percent with no honesty tag at all.
if (!tagged) {
@@ -111,7 +120,8 @@ export function claimCheck(text) {
return;
}
// Tagged MEASURED but cites no reproducer — still a gap.
// Tagged MEASURED but cites no reproducer — still a gap (reached now even
// when the only number is inside a code span, e.g. "accuracy `0.97` (MEASURED)").
if (lower.includes('measured') && !hasReproducer) {
findings.push({
severity: 'medium',
+29 -5
View File
@@ -68,16 +68,40 @@ async function handle(msg) {
export function startMcpServer() {
log(`starting v${SERVER_INFO.version} (protocol ${PROTOCOL_VERSION}, ${listTools().length} tools)`);
const rl = createInterface({ input: process.stdin, crlfDelay: Infinity });
// tools/call runs are serialized through a FIFO promise chain: hardware/mutating
// tools (calibrate, serial monitor, flash) must never overlap. ping/tools/list/
// initialize/resources/prompts stay immediate (ADR-263 O2 — a health check must
// answer during a long tool run). `toolChain` also lets stdin-close drain the
// in-flight call so its response is flushed instead of dropped by process.exit.
let toolChain = Promise.resolve();
const dispatch = (msg) => handle(msg).catch((err) => {
if (msg && msg.id !== undefined) error(msg.id, -32603, String(err && err.message || err));
log('handler error:', String(err));
});
rl.on('line', (line) => {
const s = line.trim();
if (!s) return;
let msg;
try { msg = JSON.parse(s); } catch { return log('bad JSON line dropped'); }
// Fire-and-forget: keep reading stdin while tool calls run (ADR-263 O2).
handle(msg).catch((err) => {
if (msg && msg.id !== undefined) error(msg.id, -32603, String(err && err.message || err));
log('handler error:', String(err));
if (msg && msg.method === 'tools/call') {
toolChain = toolChain.then(() => dispatch(msg)); // one tool at a time
} else {
dispatch(msg); // health/list/handshake answer immediately, even mid tool run
}
});
rl.on('close', () => {
// Wait for any queued/in-flight tool call to settle (its response written)
// before exiting — fire-and-forget used to race this and drop the response.
toolChain.then(() => {
log('stdin closed — exiting');
const done = () => process.exit(0);
// Pipe writes are async; flush buffered stdout before exit.
if (process.stdout.writableLength) process.stdout.once('drain', done);
else done();
});
});
rl.on('close', () => { log('stdin closed — exiting'); process.exit(0); });
}
+4 -3
View File
@@ -32,8 +32,9 @@ export function findRepoRoot(start = process.cwd()) {
return null;
}
// Dep-free PATH scan, memoized for the process lifetime (ADR-263 O8) — no
// shell subprocess per lookup.
// Dep-free PATH scan (ADR-263 O8) — no shell subprocess per lookup. Only hits
// are memoized: a miss can resolve later in a long-lived MCP session (the
// operator installs python/the CLI mid-run), so misses are re-probed each call.
const whichCache = new Map();
export function which(cmd) {
if (whichCache.has(cmd)) return whichCache.get(cmd);
@@ -54,7 +55,7 @@ export function which(cmd) {
} catch { /* keep scanning */ }
}
}
whichCache.set(cmd, found);
if (found !== null) whichCache.set(cmd, found);
return found;
}
+47
View File
@@ -99,3 +99,50 @@ test('MCP server answers ping while a long tools/call is in flight (ADR-263 O2)'
rmSync(repo, { recursive: true, force: true });
}
});
test('tools/call executions are serialized — two slow calls run sequentially', { skip: !which('python') && !which('python3') ? 'python not on PATH' : false }, async () => {
// Two verify.py that each sleep 0.8 s. Serialized ⇒ ~1.6 s+; concurrent ⇒ ~0.8 s.
const repo = mkdtempSync(join(tmpdir(), 'ruview-mcp-serial-'));
const proofDir = join(repo, 'archive', 'v1', 'data', 'proof');
mkdirSync(proofDir, { recursive: true });
writeFileSync(join(proofDir, 'verify.py'), 'import time\ntime.sleep(0.8)\nprint("VERDICT: PASS")\n');
const s = startServer();
try {
s.send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} });
await s.next(1);
const t0 = Date.now();
const a = s.next(20);
const b = s.next(21);
s.send({ jsonrpc: '2.0', id: 20, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } });
s.send({ jsonrpc: '2.0', id: 21, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } });
const [ra, rb] = await Promise.all([a, b]);
const elapsed = Date.now() - t0;
assert.equal(JSON.parse(ra.result.content[0].text).verdict, 'PASS');
assert.equal(JSON.parse(rb.result.content[0].text).verdict, 'PASS');
assert.ok(elapsed > 1400, `two 0.8 s tool calls finished in ${elapsed} ms — they overlapped instead of serializing`);
} finally {
s.close();
rmSync(repo, { recursive: true, force: true });
}
});
test('stdin close flushes an in-flight tools/call response before exit', async () => {
const child = spawn(process.execPath, [CLI, 'mcp', 'start'], { stdio: ['pipe', 'pipe', 'pipe'] });
let out = '';
child.stdout.on('data', (d) => { out += d; });
const exited = new Promise((res) => child.on('exit', res));
// Write a tools/call then immediately close stdin. The old fire-and-forget
// dispatch raced rl 'close' → process.exit and could drop this response.
child.stdin.write(JSON.stringify({ jsonrpc: '2.0', id: 42, method: 'tools/call', params: { name: 'ruview_onboard', arguments: {} } }) + '\n');
child.stdin.end();
await exited;
const msgs = out.trim().split('\n').filter(Boolean).map((l) => JSON.parse(l));
const resp = msgs.find((m) => m.id === 42);
assert.ok(resp, 'the in-flight tools/call response must be flushed to stdout before exit');
assert.equal(resp.result.isError, false);
});
+51 -4
View File
@@ -4,8 +4,9 @@
import { test } from 'node:test';
import assert from 'node:assert/strict';
import { readdirSync, readFileSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { readdirSync, readFileSync, mkdtempSync, writeFileSync, rmSync } from 'node:fs';
import { join, dirname, delimiter } from 'node:path';
import { tmpdir } from 'node:os';
import { fileURLToPath } from 'node:url';
import { claimCheck, summarize } from '../src/guardrails.js';
import { TOOLS, TOOL_ALIASES, runTool, listTools, findRepoRoot, run, which } from '../src/tools.js';
@@ -58,6 +59,33 @@ test('guardrail still catches real short-token metric claims', () => {
assert.equal(claimCheck('IoU 0.75 across rooms.').ok, false);
});
// Digits hidden in a code span still make a claim — scrubbing must not blind the
// number gate to `0.95` (regression: code-span number bypassed the gate).
test('guardrail flags an accuracy number stated inside a code span', () => {
const r = claimCheck('Count accuracy reached `0.95` in our tests.');
assert.equal(r.ok, false, JSON.stringify(r.findings));
assert.ok(r.findings.some((f) => /not tagged/i.test(f.reason)));
});
// A MEASURED claim whose only number hides in a code span must still reach the
// missing-reproducer check (regression: the scrubbed gate short-circuited it).
// Bare metric prose with no number at all (e.g. the README rule text) stays a pass.
test('guardrail flags a MEASURED code-span number with no reproducer', () => {
const r = claimCheck('Detection accuracy `0.97` on the set (MEASURED).');
assert.equal(r.ok, false, JSON.stringify(r.findings));
assert.ok(r.findings.some((f) => /no reproducer/i.test(f.reason)));
assert.equal(claimCheck('Every accuracy number must be MEASURED against a baseline.').ok, true);
});
// F1-score phrasings ("F1: 0.91", "F1 reaches 0.91") were scrubbed as option
// labels and slipped through; option refs alone must still not false-positive.
test('guardrail catches F1-score claims but not bare option refs (ADR-263 F11)', () => {
assert.equal(claimCheck('F1: 0.91 on the held-out set.').ok, false, 'F1: value is a metric claim');
assert.equal(claimCheck('F1 reaches 0.91 on the held-out set.').ok, false, 'F1 with a nearby number is a claim');
assert.equal(claimCheck('Options O1O9 are tracked in ADR-263 O2.').ok, true, 'option labels are not metrics');
assert.equal(claimCheck('ADR-263 O2 lands the exports fix.').ok, true);
});
test('summarize gives PASS/finding text', () => {
assert.match(summarize(claimCheck('nothing here')), /PASS/);
assert.match(summarize(claimCheck('100% accuracy')), /finding/);
@@ -156,10 +184,29 @@ test('run() bounds captured output instead of dying on big streams (ADR-263 O4)'
assert.ok(r.stdout.includes('TAIL_MARKER'), 'tail must keep the end of the stream');
});
test('which() finds node and memoizes misses', () => {
test('which() finds node and re-probes misses (hits are cached)', () => {
assert.ok(which('node'), 'node must be on PATH in the test env');
assert.equal(which('definitely-not-a-binary-xyz'), null);
assert.equal(which('definitely-not-a-binary-xyz'), null); // cached path
assert.equal(which('definitely-not-a-binary-xyz'), null); // re-probed, still absent
});
// ADR-263 O8: a miss must not be cached — an operator who installs a tool
// mid-session (e.g. python after a python_missing failure) must be found next call.
test('which() re-probes after a miss so a newly-installed tool is found', () => {
const dir = mkdtempSync(join(tmpdir(), 'ruview-which-'));
const name = 'ruview-probe-xyz';
const isWin = process.platform === 'win32';
const bin = join(dir, isWin ? `${name}.cmd` : name);
const prevPath = process.env.PATH;
try {
assert.equal(which(name), null, 'not on PATH yet → miss');
writeFileSync(bin, isWin ? '@echo off\n' : '#!/bin/sh\n', { mode: 0o755 });
process.env.PATH = dir + delimiter + prevPath;
assert.ok(which(name), 'installed mid-session → the miss must not have been cached');
} finally {
process.env.PATH = prevPath;
rmSync(dir, { recursive: true, force: true });
}
});
test('CLI run(): claim-check exits non-zero on a bad claim', async () => {
+1 -1
View File
@@ -12,7 +12,7 @@
"yargs": "^17.7.2"
},
"bin": {
"ruview": "dist/index.js"
"ruview-cli": "dist/index.js"
},
"devDependencies": {
"@types/node": "^20.14.0",
+24 -10
View File
@@ -51,21 +51,35 @@ export function loadConfig(): RuviewConfig {
};
}
/**
* Ordered cog-binary candidate paths for a host of the given CPU architecture.
* The native-arch build is probed FIRST: an appliance that ships both
* `cog-<id>-arm` and `cog-<id>-x86_64` must never hand back the wrong-arch
* binary (ADR-264 F8/O7 — the pre-review order tried `-arm` unconditionally).
* The `/usr/local/bin` and bare-name (PATH) fallbacks follow, arch-agnostic.
*
* Pure and arch-injectable so the ordering is unit-testable.
*/
export function cogBinaryCandidates(
name: string,
arch: string = process.arch
): string[] {
const id = name.replace("cog-", "");
const dir = `/var/lib/cognitum/apps/${id}`;
const arm = `${dir}/cog-${id}-arm`;
const x86 = `${dir}/cog-${id}-x86_64`;
// arm64 → prefer -arm; everything else (notably x64) → prefer -x86_64.
const archOrdered = arch === "arm64" ? [arm, x86] : [x86, arm];
return [...archOrdered, `/usr/local/bin/${name}`];
}
/**
* Locate a cog binary in the common appliance install locations, probing each
* candidate (ADR-264 F8/O7 — the pre-review version built this list and then
* unconditionally returned the bare name). Falls back to the bare name (PATH
* candidate in native-arch-first order. Falls back to the bare name (PATH
* resolution at spawn time) when no candidate exists.
*/
function detectCogBinary(name: string): string {
const id = name.replace("cog-", "");
// Common install paths for Cognitum cog binaries on Linux/macOS appliances.
const candidates = [
`/var/lib/cognitum/apps/${id}/cog-${id}-arm`,
`/var/lib/cognitum/apps/${id}/cog-${id}-x86_64`,
`/usr/local/bin/${name}`,
];
for (const candidate of candidates) {
for (const candidate of cogBinaryCandidates(name)) {
if (existsSync(candidate)) return candidate;
}
return name; // bare name — rely on PATH; spawn fails gracefully if absent
+89 -5
View File
@@ -13,8 +13,11 @@
*
* Security model (ADR-124 §6 + ADR-264 F7):
* - Origin validation: browser-style requests whose Origin is not local
* are rejected with 403 before reaching the MCP layer. Localhost origins
* match on hostname, ANY port (http://localhost:5173 is local).
* are rejected with 403 before reaching the MCP layer. With NO explicit
* allowlist, localhost origins match on hostname, ANY port
* (http://localhost:5173 is local). When an explicit allowedOrigins list is
* configured, matching is exact — the any-port-localhost convenience is off,
* so a localhost peer on an unlisted port must be added to be accepted.
* - Bearer token: when RVAGENT_HTTP_TOKEN is set, requests must carry
* Authorization: Bearer <token>; missing/wrong tokens → 401.
* - Body cap: request bodies over 1 MiB are rejected with 413 (the
@@ -55,6 +58,20 @@ export interface HttpTransportOptions {
bearerToken?: string;
/** Maximum accepted request body size in bytes (default: 1 MiB). */
maxBodyBytes?: number;
/**
* Maximum number of concurrent live sessions (default: 64). When a new
* `initialize` arrives at the cap, the oldest-idle session is evicted (its
* transport closed) to make room — bounds memory against a flaky client that
* loops `initialize` or a malicious localhost peer (ADR-264 F7).
*/
maxSessions?: number;
/**
* Idle time-to-live for a session in ms (default: 5 min). Sessions with no
* request activity for longer than this are swept and closed.
*/
sessionIdleMs?: number;
/** How often the idle-session sweeper runs, in ms (default: 60 s). */
sweepIntervalMs?: number;
}
export interface HttpTransportResult {
@@ -69,6 +86,9 @@ export interface HttpTransportResult {
const DEFAULT_HOST = "127.0.0.1";
const DEFAULT_PORT = 3001;
const DEFAULT_MAX_BODY_BYTES = 1024 * 1024;
const DEFAULT_MAX_SESSIONS = 64;
const DEFAULT_SESSION_IDLE_MS = 5 * 60 * 1000;
const DEFAULT_SWEEP_INTERVAL_MS = 60 * 1000;
const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "[::1]"]);
/**
@@ -76,8 +96,11 @@ const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "[::1]"]);
* Returns true if the request should be allowed, false if it should be rejected.
*
* An absent Origin header is allowed (same-origin non-browser requests, curl,
* etc.). A localhost origin is allowed on any port (real browser origins carry
* ports — ADR-264 F7). Anything else must match the allowlist exactly.
* etc.). When NO explicit allowlist was configured (empty list), a localhost
* origin is allowed on any port as a convenience — real browser origins carry
* ports (ADR-264 F7). When an explicit allowlist IS configured, matching is
* exact: the any-port-localhost shortcut is disabled so an operator who pins an
* allowlist actually gets it (a looped-back peer on an unlisted port is denied).
*/
export function isOriginAllowed(
origin: string | undefined,
@@ -86,6 +109,8 @@ export function isOriginAllowed(
if (origin === undefined) return true; // no Origin = not a cross-origin browser request
if (allowedOrigins.includes("*")) return true;
if (allowedOrigins.includes(origin)) return true;
// Explicit allowlist ⇒ exact matching only; skip the localhost convenience.
if (allowedOrigins.length > 0) return false;
try {
const u = new URL(origin);
return (
@@ -142,7 +167,55 @@ export function buildHttpApp(
const allowedOrigins: string[] = opts.allowedOrigins ?? [];
const bearerToken = opts.bearerToken ?? process.env["RVAGENT_HTTP_TOKEN"];
const maxBodyBytes = opts.maxBodyBytes ?? DEFAULT_MAX_BODY_BYTES;
const maxSessions = opts.maxSessions ?? DEFAULT_MAX_SESSIONS;
const sessionIdleMs = opts.sessionIdleMs ?? DEFAULT_SESSION_IDLE_MS;
const sweepIntervalMs = opts.sweepIntervalMs ?? DEFAULT_SWEEP_INTERVAL_MS;
const sessions = new Map<string, StreamableHTTPServerTransport>();
// lastSeen tracks per-session request activity so the sweeper and the
// oldest-idle eviction can bound the session map (ADR-264 F7).
const lastSeen = new Map<string, number>();
/** Mark a session as freshly used. */
function touch(sessionId: string): void {
lastSeen.set(sessionId, Date.now());
}
/** Close a session's transport and drop it from the bookkeeping maps. */
function closeSession(id: string): void {
const transport = sessions.get(id);
sessions.delete(id);
lastSeen.delete(id);
if (transport) {
try {
void transport.close(); // onclose is idempotent against the maps above
} catch {
/* best-effort: a half-open transport must not block eviction */
}
}
}
/** Evict the session that has been idle longest — called when at capacity. */
function evictOldestIdle(): void {
let oldestId: string | undefined;
let oldestSeen = Infinity;
for (const [id, seen] of lastSeen) {
if (seen < oldestSeen) {
oldestSeen = seen;
oldestId = id;
}
}
if (oldestId !== undefined) closeSession(oldestId);
}
/** Periodic sweep: close sessions idle beyond sessionIdleMs. */
function sweepIdleSessions(): void {
const now = Date.now();
for (const [id, seen] of lastSeen) {
if (now - seen > sessionIdleMs) closeSession(id);
}
}
const sweepTimer = setInterval(sweepIdleSessions, sweepIntervalMs);
sweepTimer.unref(); // never keep the process alive just to sweep
const httpServer = createServer((req: IncomingMessage, res: ServerResponse) => {
void (async () => {
@@ -194,6 +267,7 @@ export function buildHttpApp(
json(res, 404, { error: `Unknown session "${sessionId}"` });
return;
}
touch(sessionId);
await transport.handleRequest(req, res, parsed);
return;
}
@@ -206,14 +280,21 @@ export function buildHttpApp(
});
return;
}
// Bound the session map: at capacity, reclaim the oldest-idle slot
// before minting a new session (ADR-264 F7).
if (sessions.size >= maxSessions) evictOldestIdle();
const transport = new StreamableHTTPServerTransport({
sessionIdGenerator: () => randomUUID(),
onsessioninitialized: (id: string) => {
sessions.set(id, transport);
touch(id);
},
});
transport.onclose = () => {
if (transport.sessionId !== undefined) sessions.delete(transport.sessionId);
if (transport.sessionId !== undefined) {
sessions.delete(transport.sessionId);
lastSeen.delete(transport.sessionId);
}
};
const mcpServer = serverFactory();
await mcpServer.connect(transport as Parameters<typeof mcpServer.connect>[0]);
@@ -228,6 +309,7 @@ export function buildHttpApp(
json(res, 400, { error: "Bad Request: missing or unknown mcp-session-id" });
return;
}
if (sessionId !== undefined) touch(sessionId);
await transport.handleRequest(req, res);
return;
}
@@ -239,6 +321,8 @@ export function buildHttpApp(
});
});
httpServer.on("close", () => clearInterval(sweepTimer));
return { httpServer, sessions };
}
+26 -4
View File
@@ -31,6 +31,9 @@
*/
import { createRequire } from "node:module";
import { realpathSync } from "node:fs";
import { fileURLToPath } from "node:url";
import { argv } from "node:process";
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
@@ -355,7 +358,26 @@ async function main(): Promise<void> {
);
}
main().catch((e) => {
process.stderr.write(`[ruview-mcp] Fatal: ${String(e)}\n`);
process.exit(1);
});
// CLI guard: boot the server only when this module is the entrypoint — invoked
// as the `rvagent` / `ruview-mcp` bin or `node dist/index.js`. Importing it as a
// library (`import { buildServer } from "@ruvnet/rvagent"`) must NOT side-effect
// connect a StdioServerTransport to the consumer's stdin/stdout. Realpath both
// sides because npm's bin shim is a symlink and passes a non-normalized,
// possibly case-skewed argv[1] on Windows (mirrors harness/ruview/bin/cli.js).
const invokedDirectly = (() => {
if (!argv[1]) return false;
try {
const a = realpathSync(argv[1]);
const b = realpathSync(fileURLToPath(import.meta.url));
return process.platform === "win32" ? a.toLowerCase() === b.toLowerCase() : a === b;
} catch {
return false;
}
})();
if (invokedDirectly) {
main().catch((e) => {
process.stderr.write(`[ruview-mcp] Fatal: ${String(e)}\n`);
process.exit(1);
});
}
+71 -2
View File
@@ -76,10 +76,18 @@ export const jobStatusSchema = z.object({
export type JobStatusInput = z.infer<typeof jobStatusSchema>;
interface JobRecord {
status: "queued" | "running" | "done" | "failed";
status: "queued" | "running" | "done" | "failed" | "unknown";
log_path: string;
queued_at: number;
epochs_total: number;
/**
* OS pid of the training child. Persisted so a later process (e.g. after an
* MCP server restart) can tell whether a job still marked 'running' actually
* outlived the process that spawned it (ADR-264 O6).
*/
pid?: number | undefined;
/** Human-readable explanation attached during reconciliation (unknown state). */
reason?: string | undefined;
}
// In-process job registry, mirrored to <jobsDir>/<id>.json on every state
@@ -113,12 +121,41 @@ function loadPersistedJob(jobsDir: string, jobId: string): JobRecord | undefined
log_path: raw.log_path,
queued_at: typeof raw.queued_at === "number" ? raw.queued_at : 0,
epochs_total: typeof raw.epochs_total === "number" ? raw.epochs_total : 0,
pid: typeof raw.pid === "number" ? raw.pid : undefined,
reason: typeof raw.reason === "string" ? raw.reason : undefined,
};
} catch {
return undefined;
}
}
/**
* Is `pid` still a live process? `process.kill(pid, 0)` sends no signal but
* probes existence: ESRCH ⇒ gone; EPERM ⇒ alive but owned by another user
* (treated as alive so we never falsely reconcile a still-running job).
*/
function isProcessAlive(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch (e) {
return (e as NodeJS.ErrnoException).code === "EPERM";
}
}
/**
* Scan log lines (tail) for the "# exit code: N" marker the child.on('close')
* handler appends. `found:false` means the process died without the marker —
* i.e. this server never saw the close (it restarted mid-run).
*/
function findExitMarker(lines: string[]): { found: boolean; code: number | null } {
for (let i = lines.length - 1; i >= 0; i--) {
const m = /^# exit code: (-?\d+|null)$/.exec((lines[i] ?? "").trim());
if (m) return { found: true, code: m[1] === "null" ? null : Number(m[1]) };
}
return { found: false, code: null };
}
/** Read the last `maxLines` lines of a file without loading the whole log. */
function tailLines(filePath: string, maxLines: number, maxBytes = 64 * 1024): string[] {
const size = statSync(filePath).size;
@@ -207,6 +244,10 @@ export async function trainCount(
closeSync(logFdOut);
closeSync(logFdErr);
// Record the child pid so a later process can reconcile a stale 'running'
// record after a server restart (child.pid is undefined only if spawn failed
// synchronously, in which case the 'error' handler flips status to 'failed').
record.pid = child.pid;
record.status = "running";
persistJob(logDir, jobId, record);
@@ -244,7 +285,7 @@ export async function jobStatus(
config: RuviewConfig
): Promise<object> {
// Memory first, then the persisted record (survives server restarts).
const job = jobRegistry.get(input.job_id) ?? loadPersistedJob(config.jobsDir, input.job_id);
let job = jobRegistry.get(input.job_id) ?? loadPersistedJob(config.jobsDir, input.job_id);
if (!job) {
return {
ok: false,
@@ -252,6 +293,33 @@ export async function jobStatus(
};
}
// Reconcile a 'running' record whose owning process is gone. The status flip
// to done/failed lives only in the spawning process's child.on('close'/'error')
// handlers; if this server restarted mid-run, the record froze at 'running'
// (ADR-264 O6). When the pid is dead, recover the true outcome from the log's
// "# exit code: N" marker, else surface an honest 'unknown'.
if (job.status === "running" && typeof job.pid === "number" && !isProcessAlive(job.pid)) {
let tail: string[] = [];
try {
tail = tailLines(job.log_path, 40);
} catch {
/* log unreadable — treated as no marker below */
}
const marker = findExitMarker(tail);
const reconciled: JobRecord = { ...job };
if (marker.found) {
reconciled.status = marker.code === 0 ? "done" : "failed";
reconciled.reason = undefined;
} else {
reconciled.status = "unknown";
reconciled.reason =
"process gone, no exit marker — server likely restarted mid-run";
}
jobRegistry.set(input.job_id, reconciled);
persistJob(config.jobsDir, input.job_id, reconciled);
job = reconciled;
}
// Bounded tail read — never load a multi-GB training log wholesale.
let recentLog: string[] = [];
try {
@@ -266,6 +334,7 @@ export async function jobStatus(
log_path: job.log_path,
recent_log: recentLog,
epochs_total: job.epochs_total,
...(job.reason !== undefined ? { reason: job.reason } : {}),
};
return { ok: true, result };
+8 -1
View File
@@ -115,7 +115,12 @@ export interface TrainJobResult {
/** Output of ruview_job_status. */
export interface JobStatusResult {
job_id: string;
status: "queued" | "running" | "done" | "failed";
/**
* 'unknown' is only ever produced by post-restart reconciliation: a record
* frozen at 'running' whose owning process is gone and whose log carries no
* exit-code marker (see reason).
*/
status: "queued" | "running" | "done" | "failed" | "unknown";
progress_pct?: number | undefined;
/** Most recent log lines (last 20). */
recent_log: string[];
@@ -124,6 +129,8 @@ export interface JobStatusResult {
epochs_done?: number | undefined;
/** Total epochs scheduled. */
epochs_total?: number | undefined;
/** Explanation attached when status was reconciled to 'unknown'. */
reason?: string | undefined;
}
// ── Vitals (ADR-124 §6 Python surface parity: ws.py:74-88) ───────────────
+49
View File
@@ -0,0 +1,49 @@
/**
* ADR-264 F8/O7 — cog-binary detection must be architecture-aware.
*
* detectCogBinary() itself probes hardcoded /var/lib paths, so it is not
* cheaply testable without fs mocking. The bug it fixes, however, lives purely
* in the candidate ORDER, which cogBinaryCandidates() exposes as a pure,
* arch-injectable function — that is what we pin here.
*/
import { cogBinaryCandidates } from "../src/config.js";
describe("cogBinaryCandidates()", () => {
it("probes -arm before -x86_64 on arm64 hosts", () => {
const c = cogBinaryCandidates("cog-person-count", "arm64");
const arm = c.findIndex((p) => p.endsWith("cog-person-count-arm"));
const x86 = c.findIndex((p) => p.endsWith("cog-person-count-x86_64"));
expect(arm).toBeGreaterThanOrEqual(0);
expect(x86).toBeGreaterThanOrEqual(0);
expect(arm).toBeLessThan(x86);
});
it("probes -x86_64 before -arm on x64 hosts", () => {
const c = cogBinaryCandidates("cog-person-count", "x64");
const arm = c.findIndex((p) => p.endsWith("cog-person-count-arm"));
const x86 = c.findIndex((p) => p.endsWith("cog-person-count-x86_64"));
expect(x86).toBeLessThan(arm);
});
it("defaults an unknown arch to the x86_64-first order", () => {
const c = cogBinaryCandidates("cog-pose-estimation", "riscv64");
const arm = c.findIndex((p) => p.endsWith("cog-pose-estimation-arm"));
const x86 = c.findIndex((p) => p.endsWith("cog-pose-estimation-x86_64"));
expect(x86).toBeLessThan(arm);
});
it("keeps the /usr/local/bin and bare-name PATH fallbacks last", () => {
const c = cogBinaryCandidates("cog-person-count", "arm64");
// The two arch builds come first; the /usr/local/bin fallback follows them.
expect(c[c.length - 1]).toBe("/usr/local/bin/cog-person-count");
expect(c).toHaveLength(3);
});
it("derives the id by stripping the cog- prefix once", () => {
const c = cogBinaryCandidates("cog-person-count", "x64");
expect(c[0]).toBe(
"/var/lib/cognitum/apps/person-count/cog-person-count-x86_64"
);
});
});
@@ -110,6 +110,19 @@ describe("isOriginAllowed()", () => {
expect(isOriginAllowed("https://evil.example.com:443", [])).toBe(false);
});
// ADR-264 F7 hardening: an EXPLICIT allowlist means exact matching only. The
// any-port-localhost convenience applies solely to the empty-allowlist case,
// so an operator who pins an allowlist actually gets it.
it("with an explicit allowlist, rejects a localhost origin on an unlisted port", () => {
expect(isOriginAllowed("http://localhost:5173", allow)).toBe(false);
expect(isOriginAllowed("http://127.0.0.1:8080", allow)).toBe(false);
});
it("with an explicit allowlist, still accepts an exactly-listed localhost origin", () => {
expect(isOriginAllowed("http://localhost", allow)).toBe(true);
expect(isOriginAllowed("http://127.0.0.1", allow)).toBe(true);
});
it("is case-sensitive for non-local allowlist entries per RFC 6454", () => {
expect(isOriginAllowed("HTTPS://Partner.Example.com", ["https://partner.example.com"])).toBe(false);
});
@@ -229,3 +242,68 @@ describe("HTTP transport session + body-cap hardening (ADR-264 F7)", () => {
expect(r.status).toBe(200);
});
});
// ── 8. ADR-264 F7: session-map bounds (cap + idle TTL sweep) ───────────────
describe("HTTP transport session bounds (ADR-264 F7)", () => {
const initBody = (id: number): string =>
JSON.stringify({
jsonrpc: "2.0",
id,
method: "initialize",
params: {
protocolVersion: "2024-11-05",
capabilities: {},
clientInfo: { name: "test-client", version: "0.0.0" },
},
});
// Build directly (not via startServer) so we can inspect the sessions map.
async function startWithApp(
opts: Parameters<typeof buildHttpApp>[1],
basePort: number
): Promise<{
port: number;
sessions: ReturnType<typeof buildHttpApp>["sessions"];
close: () => Promise<void>;
}> {
const { httpServer, sessions } = buildHttpApp(() => makeMockMcpServer(), opts);
const port = basePort + Math.floor(Math.random() * 100);
await new Promise<void>((resolve, reject) => {
httpServer.once("error", reject);
httpServer.listen(port, "127.0.0.1", () => resolve());
});
const close = () =>
new Promise<void>((res, rej) => httpServer.close((e) => (e ? rej(e) : res())));
return { port, sessions, close };
}
const ACCEPT = { Accept: "application/json, text/event-stream" };
it("never exceeds maxSessions — evicts the oldest-idle session at capacity", async () => {
const srv = await startWithApp({ allowedOrigins: ["*"], maxSessions: 2 }, 49800);
try {
for (let i = 0; i < 5; i++) {
await post(srv.port, "/mcp", ACCEPT, initBody(i));
}
expect(srv.sessions.size).toBeLessThanOrEqual(2);
} finally {
await srv.close();
}
});
it("sweeps sessions idle beyond sessionIdleMs", async () => {
const srv = await startWithApp(
{ allowedOrigins: ["*"], sessionIdleMs: 20, sweepIntervalMs: 10 },
49900
);
try {
await post(srv.port, "/mcp", ACCEPT, initBody(1));
expect(srv.sessions.size).toBe(1);
await new Promise((r) => setTimeout(r, 150));
expect(srv.sessions.size).toBe(0);
} finally {
await srv.close();
}
});
});
@@ -0,0 +1,96 @@
/**
* ADR-264 O6 — post-restart job reconciliation.
*
* When the MCP server restarts mid-run, the persisted job record stays frozen
* at 'running' (the child.on('close') that flips it lived in the dead process).
* ruview_job_status must reconcile such a record against the recorded pid and
* the log's "# exit code: N" marker.
*
* We fabricate a persisted record pointing at a KNOWN-DEAD pid (a synchronous
* child that has already exited) and assert the reconciled status.
*/
import { mkdtempSync, writeFileSync } from "node:fs";
import { spawnSync } from "node:child_process";
import os from "node:os";
import path from "node:path";
import { randomUUID } from "node:crypto";
import { jobStatus } from "../src/tools/train-count.js";
import type { RuviewConfig } from "../src/types.js";
/** A pid that has certainly exited: spawnSync waits for the child to finish. */
function deadPid(): number {
const r = spawnSync(process.execPath, ["-e", ""]);
if (typeof r.pid !== "number") throw new Error("could not spawn probe child");
return r.pid;
}
function makeConfig(jobsDir: string): RuviewConfig {
return {
sensingServerUrl: "http://127.0.0.1:19999",
apiToken: undefined,
poseCogBinary: "nonexistent",
countCogBinary: "nonexistent",
jobsDir,
};
}
/** Write a fake persisted 'running' record + its log, return {jobId, config}. */
function seedRunningJob(logBody: string): { jobId: string; config: RuviewConfig } {
const jobsDir = mkdtempSync(path.join(os.tmpdir(), "rvagent-jobs-"));
const jobId = randomUUID();
const logPath = path.join(jobsDir, `${jobId}.log`);
writeFileSync(logPath, logBody);
const record = {
job_id: jobId,
status: "running",
log_path: logPath,
queued_at: Date.now() / 1000,
epochs_total: 5,
pid: deadPid(),
};
writeFileSync(
path.join(jobsDir, `${jobId}.json`),
JSON.stringify(record, null, 2)
);
return { jobId, config: makeConfig(jobsDir) };
}
describe("ruview_job_status reconciliation (ADR-264 O6)", () => {
it("reconciles a dead 'running' job with exit 0 to 'done'", async () => {
const { jobId, config } = seedRunningJob(
"# training...\nepoch 5/5\n# exit code: 0\n"
);
const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
expect(out["ok"]).toBe(true);
const res = out["result"] as Record<string, unknown>;
expect(res["status"]).toBe("done");
});
it("reconciles a dead 'running' job with non-zero exit to 'failed'", async () => {
const { jobId, config } = seedRunningJob(
"# training...\npanic: cuda oom\n# exit code: 101\n"
);
const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
const res = out["result"] as Record<string, unknown>;
expect(res["status"]).toBe("failed");
});
it("marks a dead 'running' job with no exit marker as 'unknown' with a reason", async () => {
const { jobId, config } = seedRunningJob("# training...\nepoch 2/5\n");
const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
const res = out["result"] as Record<string, unknown>;
expect(res["status"]).toBe("unknown");
expect(typeof res["reason"]).toBe("string");
expect(res["reason"]).toMatch(/restarted/i);
});
it("treats a signal-killed marker (null) as 'failed'", async () => {
const { jobId, config } = seedRunningJob(
"# training...\n# exit code: null\n"
);
const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
const res = out["result"] as Record<string, unknown>;
expect(res["status"]).toBe("failed");
});
});