mirror of
https://github.com/ruvnet/RuView
synced 2026-07-02 14:03:19 +00:00
fix(npm): address 10 verified review findings in harness + rvagent before 0.2.0 publish
harness/ruview (@ruvnet/ruview): - guardrails: digit gate now sees numbers inside code spans; F1-style metric tokens followed by ':' or a nearby number are no longer scrubbed (fail-open regressions in the honesty gate) - mcp-server: tools/call requests serialize through a FIFO promise chain (hardware/mutating tools never overlap) while ping/tools/list stay immediate; stdin close drains in-flight responses before exit - tools: which() no longer memoizes negative lookups tools/ruview-mcp (@ruvnet/rvagent): - index: realpath invoked-directly guard — library import no longer connects a stdio transport to the consumer's process - http-transport: explicit allowedOrigins is exact-match only (localhost any-port convenience applies only with no configured allowlist); session map gains maxSessions=64 + 5min idle TTL sweep - train-count: job records persist the child pid and reconcile stale 'running' status after a server restart (exit-code marker or dead pid) - config: cog binary candidates ordered by process.arch .github/workflows/ruview-npm-release.yml: port the full ADR-265 D1 gate (version-literal check, unpacked-size budget, tarball-install smoke test) from npm-packages.yml so the publish path enforces what the header claims. Tests: harness 30→36, rvagent 99→112, all passing. Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
@@ -54,18 +54,77 @@ jobs:
|
||||
- name: Test
|
||||
run: npm test --if-present
|
||||
|
||||
- name: Pack gate (no maps)
|
||||
# ADR-265 D3 — package.json is the only place a version string lives.
|
||||
- name: Version-literal gate
|
||||
run: |
|
||||
set -euo pipefail
|
||||
hits=""
|
||||
for d in src bin; do
|
||||
if [ -d "$d" ]; then
|
||||
hits+=$(grep -rEn '\b[0-9]+\.[0-9]+\.[0-9]+\b' "$d" | grep -vE '127\.0\.0\.1|0\.0\.0\.0' || true)
|
||||
fi
|
||||
done
|
||||
if [ -n "$hits" ]; then
|
||||
echo "Hardcoded version-like literals found (read package.json instead — ADR-265 D3):"
|
||||
echo "$hits"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ADR-265 D1.3 — pack-content gate: no maps AND the per-package
|
||||
# unpacked-size budget (the budgets that npm-packages.yml enforces).
|
||||
- name: Pack gate (no maps + size budget)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
case "${{ inputs.package }}" in
|
||||
# ADR-263: dependency-free harness; budget guards against dep creep.
|
||||
harness/ruview) export UNPACKED_BUDGET=65536 ;;
|
||||
# ADR-264 O2: map-free tarball (was 188 kB with maps).
|
||||
tools/ruview-mcp) export UNPACKED_BUDGET=140000 ;;
|
||||
*) echo "Unknown package '${{ inputs.package }}' — no budget defined"; exit 1 ;;
|
||||
esac
|
||||
npm pack --dry-run --json 2>/dev/null | node -e "
|
||||
const [info] = JSON.parse(require('fs').readFileSync(0, 'utf8'));
|
||||
const budget = Number(process.env.UNPACKED_BUDGET);
|
||||
const maps = info.files.filter((f) => f.path.endsWith('.map'));
|
||||
if (maps.length > 0) {
|
||||
console.error('Tarball contains source maps (ADR-264 F2):', maps.map((m) => m.path));
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(\`pack gate OK: \${info.files.length} files, \${info.unpackedSize} B unpacked\`);
|
||||
if (info.unpackedSize > budget) {
|
||||
console.error(\`Unpacked size \${info.unpackedSize} B exceeds budget \${budget} B\`);
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(\`pack gate OK: \${info.files.length} files, \${info.unpackedSize} B unpacked (budget \${budget} B), 0 maps\`);
|
||||
"
|
||||
|
||||
# ADR-265 D1.4 — install the real tarball and drive each bin/export.
|
||||
- name: Tarball smoke test
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TGZ="$PWD/$(npm pack --silent 2>/dev/null | tail -1)"
|
||||
SMOKE="$(mktemp -d)"
|
||||
cd "$SMOKE"
|
||||
npm init -y > /dev/null
|
||||
npm i --no-fund --no-audit "$TGZ"
|
||||
case "${{ inputs.package }}" in
|
||||
harness/ruview)
|
||||
./node_modules/.bin/ruview --version
|
||||
./node_modules/.bin/ruview doctor
|
||||
# the honesty gate must fail closed on empty input (ADR-263 F1)
|
||||
if ./node_modules/.bin/ruview claim-check; then
|
||||
echo 'claim-check passed with no input — fail-open regression'; exit 1
|
||||
fi
|
||||
node --input-type=module -e "const m = await import('@ruvnet/ruview'); if (!m.TOOLS) process.exit(1);"
|
||||
;;
|
||||
tools/ruview-mcp)
|
||||
# initialize over stdio; server must answer and exit 0 on EOF
|
||||
printf '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"ci","version":"0"}}}\n' \
|
||||
| timeout 30 ./node_modules/.bin/rvagent | grep -q '"serverInfo"'
|
||||
# the ESM export must resolve from the installed tarball (ADR-264 F1)
|
||||
timeout 30 node --input-type=module -e "await import('@ruvnet/rvagent');" < /dev/null
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Claim-check README
|
||||
run: |
|
||||
if [ -f README.md ]; then
|
||||
|
||||
@@ -21,9 +21,13 @@ const METRIC_TERMS = [
|
||||
// 'map' additionally must not be a `.map` file suffix or a hyphenated
|
||||
// compound ("map-free", "map-reduce") — mAP the metric never appears as either.
|
||||
const METRIC_TERMS_SHORT = [/(?<![.\w])map\b(?!-)/, /\bf1\b/, /\bauc\b/, /\biou\b/];
|
||||
// Finding/option labels (F1, O2, …) count as labels unless followed by a
|
||||
// score context — "F1 score 0.91" stays a metric, "after F7 fixes" does not.
|
||||
const LABEL_TOKEN_RE = /\b[fo]\d+\b(?!\s*(?:score|=|\d|%))/g;
|
||||
// Finding/option labels (F1, O2, …) count as labels unless the token sits in a
|
||||
// metric context: an immediately following score/=/%/digit or colon ("F1: 0.91"),
|
||||
// or a number later in the same clause ("F1 reaches 0.91" — an F1-score claim).
|
||||
// Bare option refs ("F7 fixes", "O1–O9", "ADR-263 O2") carry no clause number of
|
||||
// their own and stay labels. (A surviving 'f1' still only fires as a metric when
|
||||
// its scrubbed line actually carries a number — see mentionsMetricTerm.)
|
||||
const LABEL_TOKEN_RE = /\b[fo]\d+\b(?!\s*(?:score|=|\d|%|:))(?![^\n.;]*\d)/g;
|
||||
const CODE_SPAN_RE = /`[^`]*`/g; // backticked identifiers are code, not claims
|
||||
const HAS_NUMBER_RE = /\d/;
|
||||
|
||||
@@ -95,9 +99,14 @@ export function claimCheck(text) {
|
||||
return;
|
||||
}
|
||||
|
||||
// A quantitative claim needs a number: a metric term in plain prose
|
||||
// ("precision matters here") is not a taggable claim (ADR-263 F11).
|
||||
if (!hasPercent && !HAS_NUMBER_RE.test(scrubbed)) return;
|
||||
// A quantitative claim needs a number. Digits hidden in a code span still
|
||||
// count — "accuracy reached `0.95`" is a claim — so test the line with only
|
||||
// finding/option labels stripped, NOT the code-span-scrubbed copy: scrubbing
|
||||
// dropped `0.95` and wrongly short-circuited both the untagged and the
|
||||
// MEASURED-without-reproducer checks below. A bare metric word in prose
|
||||
// ("precision matters here", "every accuracy number must be MEASURED") has no
|
||||
// number and is not a taggable claim (ADR-263 F11).
|
||||
if (!hasPercent && !HAS_NUMBER_RE.test(lower.replace(LABEL_TOKEN_RE, ' '))) return;
|
||||
|
||||
// A metric/percent with no honesty tag at all.
|
||||
if (!tagged) {
|
||||
@@ -111,7 +120,8 @@ export function claimCheck(text) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Tagged MEASURED but cites no reproducer — still a gap.
|
||||
// Tagged MEASURED but cites no reproducer — still a gap (reached now even
|
||||
// when the only number is inside a code span, e.g. "accuracy `0.97` (MEASURED)").
|
||||
if (lower.includes('measured') && !hasReproducer) {
|
||||
findings.push({
|
||||
severity: 'medium',
|
||||
|
||||
@@ -68,16 +68,40 @@ async function handle(msg) {
|
||||
export function startMcpServer() {
|
||||
log(`starting v${SERVER_INFO.version} (protocol ${PROTOCOL_VERSION}, ${listTools().length} tools)`);
|
||||
const rl = createInterface({ input: process.stdin, crlfDelay: Infinity });
|
||||
|
||||
// tools/call runs are serialized through a FIFO promise chain: hardware/mutating
|
||||
// tools (calibrate, serial monitor, flash) must never overlap. ping/tools/list/
|
||||
// initialize/resources/prompts stay immediate (ADR-263 O2 — a health check must
|
||||
// answer during a long tool run). `toolChain` also lets stdin-close drain the
|
||||
// in-flight call so its response is flushed instead of dropped by process.exit.
|
||||
let toolChain = Promise.resolve();
|
||||
|
||||
const dispatch = (msg) => handle(msg).catch((err) => {
|
||||
if (msg && msg.id !== undefined) error(msg.id, -32603, String(err && err.message || err));
|
||||
log('handler error:', String(err));
|
||||
});
|
||||
|
||||
rl.on('line', (line) => {
|
||||
const s = line.trim();
|
||||
if (!s) return;
|
||||
let msg;
|
||||
try { msg = JSON.parse(s); } catch { return log('bad JSON line dropped'); }
|
||||
// Fire-and-forget: keep reading stdin while tool calls run (ADR-263 O2).
|
||||
handle(msg).catch((err) => {
|
||||
if (msg && msg.id !== undefined) error(msg.id, -32603, String(err && err.message || err));
|
||||
log('handler error:', String(err));
|
||||
if (msg && msg.method === 'tools/call') {
|
||||
toolChain = toolChain.then(() => dispatch(msg)); // one tool at a time
|
||||
} else {
|
||||
dispatch(msg); // health/list/handshake answer immediately, even mid tool run
|
||||
}
|
||||
});
|
||||
|
||||
rl.on('close', () => {
|
||||
// Wait for any queued/in-flight tool call to settle (its response written)
|
||||
// before exiting — fire-and-forget used to race this and drop the response.
|
||||
toolChain.then(() => {
|
||||
log('stdin closed — exiting');
|
||||
const done = () => process.exit(0);
|
||||
// Pipe writes are async; flush buffered stdout before exit.
|
||||
if (process.stdout.writableLength) process.stdout.once('drain', done);
|
||||
else done();
|
||||
});
|
||||
});
|
||||
rl.on('close', () => { log('stdin closed — exiting'); process.exit(0); });
|
||||
}
|
||||
|
||||
@@ -32,8 +32,9 @@ export function findRepoRoot(start = process.cwd()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Dep-free PATH scan, memoized for the process lifetime (ADR-263 O8) — no
|
||||
// shell subprocess per lookup.
|
||||
// Dep-free PATH scan (ADR-263 O8) — no shell subprocess per lookup. Only hits
|
||||
// are memoized: a miss can resolve later in a long-lived MCP session (the
|
||||
// operator installs python/the CLI mid-run), so misses are re-probed each call.
|
||||
const whichCache = new Map();
|
||||
export function which(cmd) {
|
||||
if (whichCache.has(cmd)) return whichCache.get(cmd);
|
||||
@@ -54,7 +55,7 @@ export function which(cmd) {
|
||||
} catch { /* keep scanning */ }
|
||||
}
|
||||
}
|
||||
whichCache.set(cmd, found);
|
||||
if (found !== null) whichCache.set(cmd, found);
|
||||
return found;
|
||||
}
|
||||
|
||||
|
||||
@@ -99,3 +99,50 @@ test('MCP server answers ping while a long tools/call is in flight (ADR-263 O2)'
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('tools/call executions are serialized — two slow calls run sequentially', { skip: !which('python') && !which('python3') ? 'python not on PATH' : false }, async () => {
|
||||
// Two verify.py that each sleep 0.8 s. Serialized ⇒ ~1.6 s+; concurrent ⇒ ~0.8 s.
|
||||
const repo = mkdtempSync(join(tmpdir(), 'ruview-mcp-serial-'));
|
||||
const proofDir = join(repo, 'archive', 'v1', 'data', 'proof');
|
||||
mkdirSync(proofDir, { recursive: true });
|
||||
writeFileSync(join(proofDir, 'verify.py'), 'import time\ntime.sleep(0.8)\nprint("VERDICT: PASS")\n');
|
||||
|
||||
const s = startServer();
|
||||
try {
|
||||
s.send({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} });
|
||||
await s.next(1);
|
||||
|
||||
const t0 = Date.now();
|
||||
const a = s.next(20);
|
||||
const b = s.next(21);
|
||||
s.send({ jsonrpc: '2.0', id: 20, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } });
|
||||
s.send({ jsonrpc: '2.0', id: 21, method: 'tools/call', params: { name: 'ruview_verify', arguments: { repo } } });
|
||||
const [ra, rb] = await Promise.all([a, b]);
|
||||
const elapsed = Date.now() - t0;
|
||||
|
||||
assert.equal(JSON.parse(ra.result.content[0].text).verdict, 'PASS');
|
||||
assert.equal(JSON.parse(rb.result.content[0].text).verdict, 'PASS');
|
||||
assert.ok(elapsed > 1400, `two 0.8 s tool calls finished in ${elapsed} ms — they overlapped instead of serializing`);
|
||||
} finally {
|
||||
s.close();
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('stdin close flushes an in-flight tools/call response before exit', async () => {
|
||||
const child = spawn(process.execPath, [CLI, 'mcp', 'start'], { stdio: ['pipe', 'pipe', 'pipe'] });
|
||||
let out = '';
|
||||
child.stdout.on('data', (d) => { out += d; });
|
||||
const exited = new Promise((res) => child.on('exit', res));
|
||||
|
||||
// Write a tools/call then immediately close stdin. The old fire-and-forget
|
||||
// dispatch raced rl 'close' → process.exit and could drop this response.
|
||||
child.stdin.write(JSON.stringify({ jsonrpc: '2.0', id: 42, method: 'tools/call', params: { name: 'ruview_onboard', arguments: {} } }) + '\n');
|
||||
child.stdin.end();
|
||||
|
||||
await exited;
|
||||
const msgs = out.trim().split('\n').filter(Boolean).map((l) => JSON.parse(l));
|
||||
const resp = msgs.find((m) => m.id === 42);
|
||||
assert.ok(resp, 'the in-flight tools/call response must be flushed to stdout before exit');
|
||||
assert.equal(resp.result.isError, false);
|
||||
});
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
|
||||
import { test } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { readdirSync, readFileSync } from 'node:fs';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { readdirSync, readFileSync, mkdtempSync, writeFileSync, rmSync } from 'node:fs';
|
||||
import { join, dirname, delimiter } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { claimCheck, summarize } from '../src/guardrails.js';
|
||||
import { TOOLS, TOOL_ALIASES, runTool, listTools, findRepoRoot, run, which } from '../src/tools.js';
|
||||
@@ -58,6 +59,33 @@ test('guardrail still catches real short-token metric claims', () => {
|
||||
assert.equal(claimCheck('IoU 0.75 across rooms.').ok, false);
|
||||
});
|
||||
|
||||
// Digits hidden in a code span still make a claim — scrubbing must not blind the
|
||||
// number gate to `0.95` (regression: code-span number bypassed the gate).
|
||||
test('guardrail flags an accuracy number stated inside a code span', () => {
|
||||
const r = claimCheck('Count accuracy reached `0.95` in our tests.');
|
||||
assert.equal(r.ok, false, JSON.stringify(r.findings));
|
||||
assert.ok(r.findings.some((f) => /not tagged/i.test(f.reason)));
|
||||
});
|
||||
|
||||
// A MEASURED claim whose only number hides in a code span must still reach the
|
||||
// missing-reproducer check (regression: the scrubbed gate short-circuited it).
|
||||
// Bare metric prose with no number at all (e.g. the README rule text) stays a pass.
|
||||
test('guardrail flags a MEASURED code-span number with no reproducer', () => {
|
||||
const r = claimCheck('Detection accuracy `0.97` on the set (MEASURED).');
|
||||
assert.equal(r.ok, false, JSON.stringify(r.findings));
|
||||
assert.ok(r.findings.some((f) => /no reproducer/i.test(f.reason)));
|
||||
assert.equal(claimCheck('Every accuracy number must be MEASURED against a baseline.').ok, true);
|
||||
});
|
||||
|
||||
// F1-score phrasings ("F1: 0.91", "F1 reaches 0.91") were scrubbed as option
|
||||
// labels and slipped through; option refs alone must still not false-positive.
|
||||
test('guardrail catches F1-score claims but not bare option refs (ADR-263 F11)', () => {
|
||||
assert.equal(claimCheck('F1: 0.91 on the held-out set.').ok, false, 'F1: value is a metric claim');
|
||||
assert.equal(claimCheck('F1 reaches 0.91 on the held-out set.').ok, false, 'F1 with a nearby number is a claim');
|
||||
assert.equal(claimCheck('Options O1–O9 are tracked in ADR-263 O2.').ok, true, 'option labels are not metrics');
|
||||
assert.equal(claimCheck('ADR-263 O2 lands the exports fix.').ok, true);
|
||||
});
|
||||
|
||||
test('summarize gives PASS/finding text', () => {
|
||||
assert.match(summarize(claimCheck('nothing here')), /PASS/);
|
||||
assert.match(summarize(claimCheck('100% accuracy')), /finding/);
|
||||
@@ -156,10 +184,29 @@ test('run() bounds captured output instead of dying on big streams (ADR-263 O4)'
|
||||
assert.ok(r.stdout.includes('TAIL_MARKER'), 'tail must keep the end of the stream');
|
||||
});
|
||||
|
||||
test('which() finds node and memoizes misses', () => {
|
||||
test('which() finds node and re-probes misses (hits are cached)', () => {
|
||||
assert.ok(which('node'), 'node must be on PATH in the test env');
|
||||
assert.equal(which('definitely-not-a-binary-xyz'), null);
|
||||
assert.equal(which('definitely-not-a-binary-xyz'), null); // cached path
|
||||
assert.equal(which('definitely-not-a-binary-xyz'), null); // re-probed, still absent
|
||||
});
|
||||
|
||||
// ADR-263 O8: a miss must not be cached — an operator who installs a tool
|
||||
// mid-session (e.g. python after a python_missing failure) must be found next call.
|
||||
test('which() re-probes after a miss so a newly-installed tool is found', () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'ruview-which-'));
|
||||
const name = 'ruview-probe-xyz';
|
||||
const isWin = process.platform === 'win32';
|
||||
const bin = join(dir, isWin ? `${name}.cmd` : name);
|
||||
const prevPath = process.env.PATH;
|
||||
try {
|
||||
assert.equal(which(name), null, 'not on PATH yet → miss');
|
||||
writeFileSync(bin, isWin ? '@echo off\n' : '#!/bin/sh\n', { mode: 0o755 });
|
||||
process.env.PATH = dir + delimiter + prevPath;
|
||||
assert.ok(which(name), 'installed mid-session → the miss must not have been cached');
|
||||
} finally {
|
||||
process.env.PATH = prevPath;
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('CLI run(): claim-check exits non-zero on a bad claim', async () => {
|
||||
|
||||
Generated
+1
-1
@@ -12,7 +12,7 @@
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"bin": {
|
||||
"ruview": "dist/index.js"
|
||||
"ruview-cli": "dist/index.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.14.0",
|
||||
|
||||
@@ -51,21 +51,35 @@ export function loadConfig(): RuviewConfig {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Ordered cog-binary candidate paths for a host of the given CPU architecture.
|
||||
* The native-arch build is probed FIRST: an appliance that ships both
|
||||
* `cog-<id>-arm` and `cog-<id>-x86_64` must never hand back the wrong-arch
|
||||
* binary (ADR-264 F8/O7 — the pre-review order tried `-arm` unconditionally).
|
||||
* The `/usr/local/bin` and bare-name (PATH) fallbacks follow, arch-agnostic.
|
||||
*
|
||||
* Pure and arch-injectable so the ordering is unit-testable.
|
||||
*/
|
||||
export function cogBinaryCandidates(
|
||||
name: string,
|
||||
arch: string = process.arch
|
||||
): string[] {
|
||||
const id = name.replace("cog-", "");
|
||||
const dir = `/var/lib/cognitum/apps/${id}`;
|
||||
const arm = `${dir}/cog-${id}-arm`;
|
||||
const x86 = `${dir}/cog-${id}-x86_64`;
|
||||
// arm64 → prefer -arm; everything else (notably x64) → prefer -x86_64.
|
||||
const archOrdered = arch === "arm64" ? [arm, x86] : [x86, arm];
|
||||
return [...archOrdered, `/usr/local/bin/${name}`];
|
||||
}
|
||||
|
||||
/**
|
||||
* Locate a cog binary in the common appliance install locations, probing each
|
||||
* candidate (ADR-264 F8/O7 — the pre-review version built this list and then
|
||||
* unconditionally returned the bare name). Falls back to the bare name (PATH
|
||||
* candidate in native-arch-first order. Falls back to the bare name (PATH
|
||||
* resolution at spawn time) when no candidate exists.
|
||||
*/
|
||||
function detectCogBinary(name: string): string {
|
||||
const id = name.replace("cog-", "");
|
||||
// Common install paths for Cognitum cog binaries on Linux/macOS appliances.
|
||||
const candidates = [
|
||||
`/var/lib/cognitum/apps/${id}/cog-${id}-arm`,
|
||||
`/var/lib/cognitum/apps/${id}/cog-${id}-x86_64`,
|
||||
`/usr/local/bin/${name}`,
|
||||
];
|
||||
for (const candidate of candidates) {
|
||||
for (const candidate of cogBinaryCandidates(name)) {
|
||||
if (existsSync(candidate)) return candidate;
|
||||
}
|
||||
return name; // bare name — rely on PATH; spawn fails gracefully if absent
|
||||
|
||||
@@ -13,8 +13,11 @@
|
||||
*
|
||||
* Security model (ADR-124 §6 + ADR-264 F7):
|
||||
* - Origin validation: browser-style requests whose Origin is not local
|
||||
* are rejected with 403 before reaching the MCP layer. Localhost origins
|
||||
* match on hostname, ANY port (http://localhost:5173 is local).
|
||||
* are rejected with 403 before reaching the MCP layer. With NO explicit
|
||||
* allowlist, localhost origins match on hostname, ANY port
|
||||
* (http://localhost:5173 is local). When an explicit allowedOrigins list is
|
||||
* configured, matching is exact — the any-port-localhost convenience is off,
|
||||
* so a localhost peer on an unlisted port must be added to be accepted.
|
||||
* - Bearer token: when RVAGENT_HTTP_TOKEN is set, requests must carry
|
||||
* Authorization: Bearer <token>; missing/wrong tokens → 401.
|
||||
* - Body cap: request bodies over 1 MiB are rejected with 413 (the
|
||||
@@ -55,6 +58,20 @@ export interface HttpTransportOptions {
|
||||
bearerToken?: string;
|
||||
/** Maximum accepted request body size in bytes (default: 1 MiB). */
|
||||
maxBodyBytes?: number;
|
||||
/**
|
||||
* Maximum number of concurrent live sessions (default: 64). When a new
|
||||
* `initialize` arrives at the cap, the oldest-idle session is evicted (its
|
||||
* transport closed) to make room — bounds memory against a flaky client that
|
||||
* loops `initialize` or a malicious localhost peer (ADR-264 F7).
|
||||
*/
|
||||
maxSessions?: number;
|
||||
/**
|
||||
* Idle time-to-live for a session in ms (default: 5 min). Sessions with no
|
||||
* request activity for longer than this are swept and closed.
|
||||
*/
|
||||
sessionIdleMs?: number;
|
||||
/** How often the idle-session sweeper runs, in ms (default: 60 s). */
|
||||
sweepIntervalMs?: number;
|
||||
}
|
||||
|
||||
export interface HttpTransportResult {
|
||||
@@ -69,6 +86,9 @@ export interface HttpTransportResult {
|
||||
const DEFAULT_HOST = "127.0.0.1";
|
||||
const DEFAULT_PORT = 3001;
|
||||
const DEFAULT_MAX_BODY_BYTES = 1024 * 1024;
|
||||
const DEFAULT_MAX_SESSIONS = 64;
|
||||
const DEFAULT_SESSION_IDLE_MS = 5 * 60 * 1000;
|
||||
const DEFAULT_SWEEP_INTERVAL_MS = 60 * 1000;
|
||||
const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "[::1]"]);
|
||||
|
||||
/**
|
||||
@@ -76,8 +96,11 @@ const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "[::1]"]);
|
||||
* Returns true if the request should be allowed, false if it should be rejected.
|
||||
*
|
||||
* An absent Origin header is allowed (same-origin non-browser requests, curl,
|
||||
* etc.). A localhost origin is allowed on any port (real browser origins carry
|
||||
* ports — ADR-264 F7). Anything else must match the allowlist exactly.
|
||||
* etc.). When NO explicit allowlist was configured (empty list), a localhost
|
||||
* origin is allowed on any port as a convenience — real browser origins carry
|
||||
* ports (ADR-264 F7). When an explicit allowlist IS configured, matching is
|
||||
* exact: the any-port-localhost shortcut is disabled so an operator who pins an
|
||||
* allowlist actually gets it (a looped-back peer on an unlisted port is denied).
|
||||
*/
|
||||
export function isOriginAllowed(
|
||||
origin: string | undefined,
|
||||
@@ -86,6 +109,8 @@ export function isOriginAllowed(
|
||||
if (origin === undefined) return true; // no Origin = not a cross-origin browser request
|
||||
if (allowedOrigins.includes("*")) return true;
|
||||
if (allowedOrigins.includes(origin)) return true;
|
||||
// Explicit allowlist ⇒ exact matching only; skip the localhost convenience.
|
||||
if (allowedOrigins.length > 0) return false;
|
||||
try {
|
||||
const u = new URL(origin);
|
||||
return (
|
||||
@@ -142,7 +167,55 @@ export function buildHttpApp(
|
||||
const allowedOrigins: string[] = opts.allowedOrigins ?? [];
|
||||
const bearerToken = opts.bearerToken ?? process.env["RVAGENT_HTTP_TOKEN"];
|
||||
const maxBodyBytes = opts.maxBodyBytes ?? DEFAULT_MAX_BODY_BYTES;
|
||||
const maxSessions = opts.maxSessions ?? DEFAULT_MAX_SESSIONS;
|
||||
const sessionIdleMs = opts.sessionIdleMs ?? DEFAULT_SESSION_IDLE_MS;
|
||||
const sweepIntervalMs = opts.sweepIntervalMs ?? DEFAULT_SWEEP_INTERVAL_MS;
|
||||
const sessions = new Map<string, StreamableHTTPServerTransport>();
|
||||
// lastSeen tracks per-session request activity so the sweeper and the
|
||||
// oldest-idle eviction can bound the session map (ADR-264 F7).
|
||||
const lastSeen = new Map<string, number>();
|
||||
|
||||
/** Mark a session as freshly used. */
|
||||
function touch(sessionId: string): void {
|
||||
lastSeen.set(sessionId, Date.now());
|
||||
}
|
||||
|
||||
/** Close a session's transport and drop it from the bookkeeping maps. */
|
||||
function closeSession(id: string): void {
|
||||
const transport = sessions.get(id);
|
||||
sessions.delete(id);
|
||||
lastSeen.delete(id);
|
||||
if (transport) {
|
||||
try {
|
||||
void transport.close(); // onclose is idempotent against the maps above
|
||||
} catch {
|
||||
/* best-effort: a half-open transport must not block eviction */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Evict the session that has been idle longest — called when at capacity. */
|
||||
function evictOldestIdle(): void {
|
||||
let oldestId: string | undefined;
|
||||
let oldestSeen = Infinity;
|
||||
for (const [id, seen] of lastSeen) {
|
||||
if (seen < oldestSeen) {
|
||||
oldestSeen = seen;
|
||||
oldestId = id;
|
||||
}
|
||||
}
|
||||
if (oldestId !== undefined) closeSession(oldestId);
|
||||
}
|
||||
|
||||
/** Periodic sweep: close sessions idle beyond sessionIdleMs. */
|
||||
function sweepIdleSessions(): void {
|
||||
const now = Date.now();
|
||||
for (const [id, seen] of lastSeen) {
|
||||
if (now - seen > sessionIdleMs) closeSession(id);
|
||||
}
|
||||
}
|
||||
const sweepTimer = setInterval(sweepIdleSessions, sweepIntervalMs);
|
||||
sweepTimer.unref(); // never keep the process alive just to sweep
|
||||
|
||||
const httpServer = createServer((req: IncomingMessage, res: ServerResponse) => {
|
||||
void (async () => {
|
||||
@@ -194,6 +267,7 @@ export function buildHttpApp(
|
||||
json(res, 404, { error: `Unknown session "${sessionId}"` });
|
||||
return;
|
||||
}
|
||||
touch(sessionId);
|
||||
await transport.handleRequest(req, res, parsed);
|
||||
return;
|
||||
}
|
||||
@@ -206,14 +280,21 @@ export function buildHttpApp(
|
||||
});
|
||||
return;
|
||||
}
|
||||
// Bound the session map: at capacity, reclaim the oldest-idle slot
|
||||
// before minting a new session (ADR-264 F7).
|
||||
if (sessions.size >= maxSessions) evictOldestIdle();
|
||||
const transport = new StreamableHTTPServerTransport({
|
||||
sessionIdGenerator: () => randomUUID(),
|
||||
onsessioninitialized: (id: string) => {
|
||||
sessions.set(id, transport);
|
||||
touch(id);
|
||||
},
|
||||
});
|
||||
transport.onclose = () => {
|
||||
if (transport.sessionId !== undefined) sessions.delete(transport.sessionId);
|
||||
if (transport.sessionId !== undefined) {
|
||||
sessions.delete(transport.sessionId);
|
||||
lastSeen.delete(transport.sessionId);
|
||||
}
|
||||
};
|
||||
const mcpServer = serverFactory();
|
||||
await mcpServer.connect(transport as Parameters<typeof mcpServer.connect>[0]);
|
||||
@@ -228,6 +309,7 @@ export function buildHttpApp(
|
||||
json(res, 400, { error: "Bad Request: missing or unknown mcp-session-id" });
|
||||
return;
|
||||
}
|
||||
if (sessionId !== undefined) touch(sessionId);
|
||||
await transport.handleRequest(req, res);
|
||||
return;
|
||||
}
|
||||
@@ -239,6 +321,8 @@ export function buildHttpApp(
|
||||
});
|
||||
});
|
||||
|
||||
httpServer.on("close", () => clearInterval(sweepTimer));
|
||||
|
||||
return { httpServer, sessions };
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,9 @@
|
||||
*/
|
||||
|
||||
import { createRequire } from "node:module";
|
||||
import { realpathSync } from "node:fs";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { argv } from "node:process";
|
||||
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
||||
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
||||
import {
|
||||
@@ -355,7 +358,26 @@ async function main(): Promise<void> {
|
||||
);
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
process.stderr.write(`[ruview-mcp] Fatal: ${String(e)}\n`);
|
||||
process.exit(1);
|
||||
});
|
||||
// CLI guard: boot the server only when this module is the entrypoint — invoked
|
||||
// as the `rvagent` / `ruview-mcp` bin or `node dist/index.js`. Importing it as a
|
||||
// library (`import { buildServer } from "@ruvnet/rvagent"`) must NOT side-effect
|
||||
// connect a StdioServerTransport to the consumer's stdin/stdout. Realpath both
|
||||
// sides because npm's bin shim is a symlink and passes a non-normalized,
|
||||
// possibly case-skewed argv[1] on Windows (mirrors harness/ruview/bin/cli.js).
|
||||
const invokedDirectly = (() => {
|
||||
if (!argv[1]) return false;
|
||||
try {
|
||||
const a = realpathSync(argv[1]);
|
||||
const b = realpathSync(fileURLToPath(import.meta.url));
|
||||
return process.platform === "win32" ? a.toLowerCase() === b.toLowerCase() : a === b;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
})();
|
||||
|
||||
if (invokedDirectly) {
|
||||
main().catch((e) => {
|
||||
process.stderr.write(`[ruview-mcp] Fatal: ${String(e)}\n`);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -76,10 +76,18 @@ export const jobStatusSchema = z.object({
|
||||
export type JobStatusInput = z.infer<typeof jobStatusSchema>;
|
||||
|
||||
interface JobRecord {
|
||||
status: "queued" | "running" | "done" | "failed";
|
||||
status: "queued" | "running" | "done" | "failed" | "unknown";
|
||||
log_path: string;
|
||||
queued_at: number;
|
||||
epochs_total: number;
|
||||
/**
|
||||
* OS pid of the training child. Persisted so a later process (e.g. after an
|
||||
* MCP server restart) can tell whether a job still marked 'running' actually
|
||||
* outlived the process that spawned it (ADR-264 O6).
|
||||
*/
|
||||
pid?: number | undefined;
|
||||
/** Human-readable explanation attached during reconciliation (unknown state). */
|
||||
reason?: string | undefined;
|
||||
}
|
||||
|
||||
// In-process job registry, mirrored to <jobsDir>/<id>.json on every state
|
||||
@@ -113,12 +121,41 @@ function loadPersistedJob(jobsDir: string, jobId: string): JobRecord | undefined
|
||||
log_path: raw.log_path,
|
||||
queued_at: typeof raw.queued_at === "number" ? raw.queued_at : 0,
|
||||
epochs_total: typeof raw.epochs_total === "number" ? raw.epochs_total : 0,
|
||||
pid: typeof raw.pid === "number" ? raw.pid : undefined,
|
||||
reason: typeof raw.reason === "string" ? raw.reason : undefined,
|
||||
};
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Is `pid` still a live process? `process.kill(pid, 0)` sends no signal but
|
||||
* probes existence: ESRCH ⇒ gone; EPERM ⇒ alive but owned by another user
|
||||
* (treated as alive so we never falsely reconcile a still-running job).
|
||||
*/
|
||||
function isProcessAlive(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (e) {
|
||||
return (e as NodeJS.ErrnoException).code === "EPERM";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan log lines (tail) for the "# exit code: N" marker the child.on('close')
|
||||
* handler appends. `found:false` means the process died without the marker —
|
||||
* i.e. this server never saw the close (it restarted mid-run).
|
||||
*/
|
||||
function findExitMarker(lines: string[]): { found: boolean; code: number | null } {
|
||||
for (let i = lines.length - 1; i >= 0; i--) {
|
||||
const m = /^# exit code: (-?\d+|null)$/.exec((lines[i] ?? "").trim());
|
||||
if (m) return { found: true, code: m[1] === "null" ? null : Number(m[1]) };
|
||||
}
|
||||
return { found: false, code: null };
|
||||
}
|
||||
|
||||
/** Read the last `maxLines` lines of a file without loading the whole log. */
|
||||
function tailLines(filePath: string, maxLines: number, maxBytes = 64 * 1024): string[] {
|
||||
const size = statSync(filePath).size;
|
||||
@@ -207,6 +244,10 @@ export async function trainCount(
|
||||
closeSync(logFdOut);
|
||||
closeSync(logFdErr);
|
||||
|
||||
// Record the child pid so a later process can reconcile a stale 'running'
|
||||
// record after a server restart (child.pid is undefined only if spawn failed
|
||||
// synchronously, in which case the 'error' handler flips status to 'failed').
|
||||
record.pid = child.pid;
|
||||
record.status = "running";
|
||||
persistJob(logDir, jobId, record);
|
||||
|
||||
@@ -244,7 +285,7 @@ export async function jobStatus(
|
||||
config: RuviewConfig
|
||||
): Promise<object> {
|
||||
// Memory first, then the persisted record (survives server restarts).
|
||||
const job = jobRegistry.get(input.job_id) ?? loadPersistedJob(config.jobsDir, input.job_id);
|
||||
let job = jobRegistry.get(input.job_id) ?? loadPersistedJob(config.jobsDir, input.job_id);
|
||||
if (!job) {
|
||||
return {
|
||||
ok: false,
|
||||
@@ -252,6 +293,33 @@ export async function jobStatus(
|
||||
};
|
||||
}
|
||||
|
||||
// Reconcile a 'running' record whose owning process is gone. The status flip
|
||||
// to done/failed lives only in the spawning process's child.on('close'/'error')
|
||||
// handlers; if this server restarted mid-run, the record froze at 'running'
|
||||
// (ADR-264 O6). When the pid is dead, recover the true outcome from the log's
|
||||
// "# exit code: N" marker, else surface an honest 'unknown'.
|
||||
if (job.status === "running" && typeof job.pid === "number" && !isProcessAlive(job.pid)) {
|
||||
let tail: string[] = [];
|
||||
try {
|
||||
tail = tailLines(job.log_path, 40);
|
||||
} catch {
|
||||
/* log unreadable — treated as no marker below */
|
||||
}
|
||||
const marker = findExitMarker(tail);
|
||||
const reconciled: JobRecord = { ...job };
|
||||
if (marker.found) {
|
||||
reconciled.status = marker.code === 0 ? "done" : "failed";
|
||||
reconciled.reason = undefined;
|
||||
} else {
|
||||
reconciled.status = "unknown";
|
||||
reconciled.reason =
|
||||
"process gone, no exit marker — server likely restarted mid-run";
|
||||
}
|
||||
jobRegistry.set(input.job_id, reconciled);
|
||||
persistJob(config.jobsDir, input.job_id, reconciled);
|
||||
job = reconciled;
|
||||
}
|
||||
|
||||
// Bounded tail read — never load a multi-GB training log wholesale.
|
||||
let recentLog: string[] = [];
|
||||
try {
|
||||
@@ -266,6 +334,7 @@ export async function jobStatus(
|
||||
log_path: job.log_path,
|
||||
recent_log: recentLog,
|
||||
epochs_total: job.epochs_total,
|
||||
...(job.reason !== undefined ? { reason: job.reason } : {}),
|
||||
};
|
||||
|
||||
return { ok: true, result };
|
||||
|
||||
@@ -115,7 +115,12 @@ export interface TrainJobResult {
|
||||
/** Output of ruview_job_status. */
|
||||
export interface JobStatusResult {
|
||||
job_id: string;
|
||||
status: "queued" | "running" | "done" | "failed";
|
||||
/**
|
||||
* 'unknown' is only ever produced by post-restart reconciliation: a record
|
||||
* frozen at 'running' whose owning process is gone and whose log carries no
|
||||
* exit-code marker (see reason).
|
||||
*/
|
||||
status: "queued" | "running" | "done" | "failed" | "unknown";
|
||||
progress_pct?: number | undefined;
|
||||
/** Most recent log lines (last 20). */
|
||||
recent_log: string[];
|
||||
@@ -124,6 +129,8 @@ export interface JobStatusResult {
|
||||
epochs_done?: number | undefined;
|
||||
/** Total epochs scheduled. */
|
||||
epochs_total?: number | undefined;
|
||||
/** Explanation attached when status was reconciled to 'unknown'. */
|
||||
reason?: string | undefined;
|
||||
}
|
||||
|
||||
// ── Vitals (ADR-124 §6 Python surface parity: ws.py:74-88) ───────────────
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* ADR-264 F8/O7 — cog-binary detection must be architecture-aware.
|
||||
*
|
||||
* detectCogBinary() itself probes hardcoded /var/lib paths, so it is not
|
||||
* cheaply testable without fs mocking. The bug it fixes, however, lives purely
|
||||
* in the candidate ORDER, which cogBinaryCandidates() exposes as a pure,
|
||||
* arch-injectable function — that is what we pin here.
|
||||
*/
|
||||
|
||||
import { cogBinaryCandidates } from "../src/config.js";
|
||||
|
||||
describe("cogBinaryCandidates()", () => {
|
||||
it("probes -arm before -x86_64 on arm64 hosts", () => {
|
||||
const c = cogBinaryCandidates("cog-person-count", "arm64");
|
||||
const arm = c.findIndex((p) => p.endsWith("cog-person-count-arm"));
|
||||
const x86 = c.findIndex((p) => p.endsWith("cog-person-count-x86_64"));
|
||||
expect(arm).toBeGreaterThanOrEqual(0);
|
||||
expect(x86).toBeGreaterThanOrEqual(0);
|
||||
expect(arm).toBeLessThan(x86);
|
||||
});
|
||||
|
||||
it("probes -x86_64 before -arm on x64 hosts", () => {
|
||||
const c = cogBinaryCandidates("cog-person-count", "x64");
|
||||
const arm = c.findIndex((p) => p.endsWith("cog-person-count-arm"));
|
||||
const x86 = c.findIndex((p) => p.endsWith("cog-person-count-x86_64"));
|
||||
expect(x86).toBeLessThan(arm);
|
||||
});
|
||||
|
||||
it("defaults an unknown arch to the x86_64-first order", () => {
|
||||
const c = cogBinaryCandidates("cog-pose-estimation", "riscv64");
|
||||
const arm = c.findIndex((p) => p.endsWith("cog-pose-estimation-arm"));
|
||||
const x86 = c.findIndex((p) => p.endsWith("cog-pose-estimation-x86_64"));
|
||||
expect(x86).toBeLessThan(arm);
|
||||
});
|
||||
|
||||
it("keeps the /usr/local/bin and bare-name PATH fallbacks last", () => {
|
||||
const c = cogBinaryCandidates("cog-person-count", "arm64");
|
||||
// The two arch builds come first; the /usr/local/bin fallback follows them.
|
||||
expect(c[c.length - 1]).toBe("/usr/local/bin/cog-person-count");
|
||||
expect(c).toHaveLength(3);
|
||||
});
|
||||
|
||||
it("derives the id by stripping the cog- prefix once", () => {
|
||||
const c = cogBinaryCandidates("cog-person-count", "x64");
|
||||
expect(c[0]).toBe(
|
||||
"/var/lib/cognitum/apps/person-count/cog-person-count-x86_64"
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -110,6 +110,19 @@ describe("isOriginAllowed()", () => {
|
||||
expect(isOriginAllowed("https://evil.example.com:443", [])).toBe(false);
|
||||
});
|
||||
|
||||
// ADR-264 F7 hardening: an EXPLICIT allowlist means exact matching only. The
|
||||
// any-port-localhost convenience applies solely to the empty-allowlist case,
|
||||
// so an operator who pins an allowlist actually gets it.
|
||||
it("with an explicit allowlist, rejects a localhost origin on an unlisted port", () => {
|
||||
expect(isOriginAllowed("http://localhost:5173", allow)).toBe(false);
|
||||
expect(isOriginAllowed("http://127.0.0.1:8080", allow)).toBe(false);
|
||||
});
|
||||
|
||||
it("with an explicit allowlist, still accepts an exactly-listed localhost origin", () => {
|
||||
expect(isOriginAllowed("http://localhost", allow)).toBe(true);
|
||||
expect(isOriginAllowed("http://127.0.0.1", allow)).toBe(true);
|
||||
});
|
||||
|
||||
it("is case-sensitive for non-local allowlist entries per RFC 6454", () => {
|
||||
expect(isOriginAllowed("HTTPS://Partner.Example.com", ["https://partner.example.com"])).toBe(false);
|
||||
});
|
||||
@@ -229,3 +242,68 @@ describe("HTTP transport session + body-cap hardening (ADR-264 F7)", () => {
|
||||
expect(r.status).toBe(200);
|
||||
});
|
||||
});
|
||||
|
||||
// ── 8. ADR-264 F7: session-map bounds (cap + idle TTL sweep) ───────────────
|
||||
|
||||
describe("HTTP transport session bounds (ADR-264 F7)", () => {
|
||||
const initBody = (id: number): string =>
|
||||
JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
method: "initialize",
|
||||
params: {
|
||||
protocolVersion: "2024-11-05",
|
||||
capabilities: {},
|
||||
clientInfo: { name: "test-client", version: "0.0.0" },
|
||||
},
|
||||
});
|
||||
|
||||
// Build directly (not via startServer) so we can inspect the sessions map.
|
||||
async function startWithApp(
|
||||
opts: Parameters<typeof buildHttpApp>[1],
|
||||
basePort: number
|
||||
): Promise<{
|
||||
port: number;
|
||||
sessions: ReturnType<typeof buildHttpApp>["sessions"];
|
||||
close: () => Promise<void>;
|
||||
}> {
|
||||
const { httpServer, sessions } = buildHttpApp(() => makeMockMcpServer(), opts);
|
||||
const port = basePort + Math.floor(Math.random() * 100);
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
httpServer.once("error", reject);
|
||||
httpServer.listen(port, "127.0.0.1", () => resolve());
|
||||
});
|
||||
const close = () =>
|
||||
new Promise<void>((res, rej) => httpServer.close((e) => (e ? rej(e) : res())));
|
||||
return { port, sessions, close };
|
||||
}
|
||||
|
||||
const ACCEPT = { Accept: "application/json, text/event-stream" };
|
||||
|
||||
it("never exceeds maxSessions — evicts the oldest-idle session at capacity", async () => {
|
||||
const srv = await startWithApp({ allowedOrigins: ["*"], maxSessions: 2 }, 49800);
|
||||
try {
|
||||
for (let i = 0; i < 5; i++) {
|
||||
await post(srv.port, "/mcp", ACCEPT, initBody(i));
|
||||
}
|
||||
expect(srv.sessions.size).toBeLessThanOrEqual(2);
|
||||
} finally {
|
||||
await srv.close();
|
||||
}
|
||||
});
|
||||
|
||||
it("sweeps sessions idle beyond sessionIdleMs", async () => {
|
||||
const srv = await startWithApp(
|
||||
{ allowedOrigins: ["*"], sessionIdleMs: 20, sweepIntervalMs: 10 },
|
||||
49900
|
||||
);
|
||||
try {
|
||||
await post(srv.port, "/mcp", ACCEPT, initBody(1));
|
||||
expect(srv.sessions.size).toBe(1);
|
||||
await new Promise((r) => setTimeout(r, 150));
|
||||
expect(srv.sessions.size).toBe(0);
|
||||
} finally {
|
||||
await srv.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
/**
|
||||
* ADR-264 O6 — post-restart job reconciliation.
|
||||
*
|
||||
* When the MCP server restarts mid-run, the persisted job record stays frozen
|
||||
* at 'running' (the child.on('close') that flips it lived in the dead process).
|
||||
* ruview_job_status must reconcile such a record against the recorded pid and
|
||||
* the log's "# exit code: N" marker.
|
||||
*
|
||||
* We fabricate a persisted record pointing at a KNOWN-DEAD pid (a synchronous
|
||||
* child that has already exited) and assert the reconciled status.
|
||||
*/
|
||||
|
||||
import { mkdtempSync, writeFileSync } from "node:fs";
|
||||
import { spawnSync } from "node:child_process";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { jobStatus } from "../src/tools/train-count.js";
|
||||
import type { RuviewConfig } from "../src/types.js";
|
||||
|
||||
/** A pid that has certainly exited: spawnSync waits for the child to finish. */
|
||||
function deadPid(): number {
|
||||
const r = spawnSync(process.execPath, ["-e", ""]);
|
||||
if (typeof r.pid !== "number") throw new Error("could not spawn probe child");
|
||||
return r.pid;
|
||||
}
|
||||
|
||||
function makeConfig(jobsDir: string): RuviewConfig {
|
||||
return {
|
||||
sensingServerUrl: "http://127.0.0.1:19999",
|
||||
apiToken: undefined,
|
||||
poseCogBinary: "nonexistent",
|
||||
countCogBinary: "nonexistent",
|
||||
jobsDir,
|
||||
};
|
||||
}
|
||||
|
||||
/** Write a fake persisted 'running' record + its log, return {jobId, config}. */
|
||||
function seedRunningJob(logBody: string): { jobId: string; config: RuviewConfig } {
|
||||
const jobsDir = mkdtempSync(path.join(os.tmpdir(), "rvagent-jobs-"));
|
||||
const jobId = randomUUID();
|
||||
const logPath = path.join(jobsDir, `${jobId}.log`);
|
||||
writeFileSync(logPath, logBody);
|
||||
const record = {
|
||||
job_id: jobId,
|
||||
status: "running",
|
||||
log_path: logPath,
|
||||
queued_at: Date.now() / 1000,
|
||||
epochs_total: 5,
|
||||
pid: deadPid(),
|
||||
};
|
||||
writeFileSync(
|
||||
path.join(jobsDir, `${jobId}.json`),
|
||||
JSON.stringify(record, null, 2)
|
||||
);
|
||||
return { jobId, config: makeConfig(jobsDir) };
|
||||
}
|
||||
|
||||
describe("ruview_job_status reconciliation (ADR-264 O6)", () => {
|
||||
it("reconciles a dead 'running' job with exit 0 to 'done'", async () => {
|
||||
const { jobId, config } = seedRunningJob(
|
||||
"# training...\nepoch 5/5\n# exit code: 0\n"
|
||||
);
|
||||
const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
|
||||
expect(out["ok"]).toBe(true);
|
||||
const res = out["result"] as Record<string, unknown>;
|
||||
expect(res["status"]).toBe("done");
|
||||
});
|
||||
|
||||
it("reconciles a dead 'running' job with non-zero exit to 'failed'", async () => {
|
||||
const { jobId, config } = seedRunningJob(
|
||||
"# training...\npanic: cuda oom\n# exit code: 101\n"
|
||||
);
|
||||
const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
|
||||
const res = out["result"] as Record<string, unknown>;
|
||||
expect(res["status"]).toBe("failed");
|
||||
});
|
||||
|
||||
it("marks a dead 'running' job with no exit marker as 'unknown' with a reason", async () => {
|
||||
const { jobId, config } = seedRunningJob("# training...\nepoch 2/5\n");
|
||||
const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
|
||||
const res = out["result"] as Record<string, unknown>;
|
||||
expect(res["status"]).toBe("unknown");
|
||||
expect(typeof res["reason"]).toBe("string");
|
||||
expect(res["reason"]).toMatch(/restarted/i);
|
||||
});
|
||||
|
||||
it("treats a signal-killed marker (null) as 'failed'", async () => {
|
||||
const { jobId, config } = seedRunningJob(
|
||||
"# training...\n# exit code: null\n"
|
||||
);
|
||||
const out = (await jobStatus({ job_id: jobId }, config)) as Record<string, unknown>;
|
||||
const res = out["result"] as Record<string, unknown>;
|
||||
expect(res["status"]).toBe("failed");
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user