chore(cog-person-count): refresh GCS manifests after run-wiring rebuild (#698 )

The arm + x86_64 manifests committed in #696 referenced the binaries built before #697 wired the `run` subcommand. Rebuilt + re-signed + re-uploaded to GCS, and re-deployed to cognitum-v0: arm sha 15c2fbac…7728ea5 (3,807,456 B, up from 2,168,816 — added Tokio runtime) x86_64 sha 051614ce…cc8388b3 (4,502,960 B, up from 2,615,528) Both re-signed Ed25519 with COGNITUM_OWNER_SIGNING_KEY. Manifests now match the binaries published at gs://cognitum-apps/cogs/{arm, x86_64}/cog-person-count-* and the binary installed at /var/lib/cognitum/apps/person-count/ on cognitum-v0.
feat(cog-person-count): wire run subcommand — v0.0.1 fully functional (#697 )
2026-06-09 10:13:17 +00:00 · 2026-05-21 19:13:10 -04:00 · 2026-05-21 19:10:15 -04:00
6 changed files with 154 additions and 44 deletions
@@ -47,6 +47,17 @@ Downstream consumers can render the **most-likely count** when confidence is hig

 `cog-person-count health` will load the real safetensors and report `backend: candle-cpu` rather than `backend: stub`, so the cog-gateway can verify the model loaded — but operators should treat the v0.0.1 count outputs as scaffold-validation rather than production data. The 2.36 MB binary + 392 KB weights + 16 KB ONNX are all real and reusable as soon as more data lands.

+## Relationship to the in-process `csi.rs::score_to_person_count` heuristic
+
+This Cog runs **out-of-process** alongside `wifi-densepose-sensing-server`. The two are complementary, not competing:
+
+- The sensing-server keeps emitting its existing slot-count heuristic from `csi.rs::score_to_person_count` (PR #491's RollingP95 + `dedup_factor`). This is the **fallback path** — operators who don't install `cog-person-count` still get a count number, just a less calibrated one.
+- `cog-person-count` (this binary) polls the same `/api/v1/sensing/latest` endpoint, runs the learned `count_v1` model on each window, and emits `person.count` events on stdout. The appliance's `cognitum-cog-gateway` routes those events to the dashboard via the standard ADR-220 cog-event channel.
+
+Operators choose by **installing or not installing** this Cog — no sensing-server rebuild required. Downstream consumers (UI, fleet automation, alerting rules) can subscribe to whichever event stream they prefer.
+
+The architecture decision is documented in [ADR-103 §"Deployment"](../../../../docs/adr/ADR-103-learned-multi-person-counter.md#deployment) and matches the cog/sensing-server boundary established for `cog-pose-estimation` (ADR-101).
+
 ## Security

 The cog has a very small attack surface — by design, it's a pure consumer of CSI data, not a server:
@@ -1,25 +1,25 @@
 {
-  "id": "person-count",
-  "version": "0.0.1",
-  "binary_url": "https://storage.googleapis.com/cognitum-apps/cogs/arm/cog-person-count-arm",
-  "binary_bytes": 2168816,
-  "binary_sha256": "36bc0bb0ece894350377d5f93d46cd29378cb289b3773530611c0d47b507b3c3",
-  "binary_signature": "R/00xdzHriyr/2rzr4wmPJ/Ken60A+RNdi8r0g2HYJNTXBaFtr46ExfNbiHlgYWadQXzTZdfJoyJK+a6k71NDg==",
-  "weights_url": "https://storage.googleapis.com/cognitum-apps/cogs/arm/cog-person-count-count_v1.safetensors",
-  "weights_bytes": 392088,
-  "weights_sha256": "dacb0551fd3887958db19696d90d811ab08faa44703e6e04ff56d15c3a65a9ff",
  "arch": "arm",
-  "target_triple": "aarch64-unknown-linux-gnu",
-  "installed_at": 0,
-  "status": "installed",
-  "signed_by": "COGNITUM_OWNER_SIGNING_KEY",
-  "sig_algo": "Ed25519",
+  "binary_bytes": 3807456,
+  "binary_sha256": "15c2fbac19741298ad1cbaf119c633a42db0a273099561fd57d8afce27728ea5",
+  "binary_signature": "gyV2CDhJo5nqBnREA08KnztGsS7AFOuXCse+2/+wul8DAzerHs9p4L6eUgl8QeiDS9rdQZs33XRxH5WTbkT0Ag==",
+  "binary_url": "https://storage.googleapis.com/cognitum-apps/cogs/arm/cog-person-count-arm",
  "build_metadata": {
-    "rust": "1.95.0",
    "candle": "0.9 cpu",
    "cog_person_count_version": "0.3.0",
+    "rust": "1.95.0",
+    "training_caveat": "single-session data; class-1 accuracy 0% \u00e2\u20ac\u201d see docs/benchmarks/person-count-cog.md",
    "training_eval_accuracy": 0.651,
-    "training_eval_mae": 0.349,
-    "training_caveat": "single-session data; class-1 accuracy 0% — see docs/benchmarks/person-count-cog.md"
-  }
-}
+    "training_eval_mae": 0.349
+  },
+  "id": "person-count",
+  "installed_at": 0,
+  "sig_algo": "Ed25519",
+  "signed_by": "COGNITUM_OWNER_SIGNING_KEY",
+  "status": "installed",
+  "target_triple": "aarch64-unknown-linux-gnu",
+  "version": "0.0.1",
+  "weights_bytes": 392088,
+  "weights_sha256": "dacb0551fd3887958db19696d90d811ab08faa44703e6e04ff56d15c3a65a9ff",
+  "weights_url": "https://storage.googleapis.com/cognitum-apps/cogs/arm/cog-person-count-count_v1.safetensors"
+}
@@ -1,25 +1,25 @@
 {
-  "id": "person-count",
-  "version": "0.0.1",
-  "binary_url": "https://storage.googleapis.com/cognitum-apps/cogs/x86_64/cog-person-count-x86_64",
-  "binary_bytes": 2615528,
-  "binary_sha256": "76cdd1ec40211add90b4942a09f79939aa28210a27e931de67122357392b01db",
-  "binary_signature": "QB+8cnGSMQmubSt/KWVu1+JMg37AKnQXDsFQi/vi+jqpW9rVrGMtnxQpWEWZPeWU1AJ6pl3O2V+7ZtTNIQ2rDg==",
-  "weights_url": "https://storage.googleapis.com/cognitum-apps/cogs/arm/cog-person-count-count_v1.safetensors",
-  "weights_bytes": 392088,
-  "weights_sha256": "dacb0551fd3887958db19696d90d811ab08faa44703e6e04ff56d15c3a65a9ff",
  "arch": "x86_64",
-  "target_triple": "x86_64-unknown-linux-gnu",
-  "installed_at": 0,
-  "status": "installed",
-  "signed_by": "COGNITUM_OWNER_SIGNING_KEY",
-  "sig_algo": "Ed25519",
+  "binary_bytes": 4502960,
+  "binary_sha256": "051614ce6ba63df704fae848a67ad095df4bb88862fdff05ef3c0419cc8388b3",
+  "binary_signature": "P9txCcsqCoFN6LyZS+Hl33pYZxiP/nXJMTI6s4bt26cc+Cteidz7ymajCQIfuq0mx0cnWaQ6eKZUjzq5AIgoBw==",
+  "binary_url": "https://storage.googleapis.com/cognitum-apps/cogs/x86_64/cog-person-count-x86_64",
  "build_metadata": {
-    "rust": "1.95.0",
    "candle": "0.9 cpu",
    "cog_person_count_version": "0.3.0",
+    "rust": "1.95.0",
+    "training_caveat": "single-session data; class-1 accuracy 0% \u00e2\u20ac\u201d see docs/benchmarks/person-count-cog.md",
    "training_eval_accuracy": 0.651,
-    "training_eval_mae": 0.349,
-    "training_caveat": "single-session data; class-1 accuracy 0% — see docs/benchmarks/person-count-cog.md"
-  }
-}
+    "training_eval_mae": 0.349
+  },
+  "id": "person-count",
+  "installed_at": 0,
+  "sig_algo": "Ed25519",
+  "signed_by": "COGNITUM_OWNER_SIGNING_KEY",
+  "status": "installed",
+  "target_triple": "x86_64-unknown-linux-gnu",
+  "version": "0.0.1",
+  "weights_bytes": 392088,
+  "weights_sha256": "dacb0551fd3887958db19696d90d811ab08faa44703e6e04ff56d15c3a65a9ff",
+  "weights_url": "https://storage.googleapis.com/cognitum-apps/cogs/arm/cog-person-count-count_v1.safetensors"
+}
@@ -10,6 +10,7 @@
 pub mod fusion;
 pub mod inference;
 pub mod publisher;
+pub mod runtime;

 pub const COG_ID: &str = "person-count";
 pub const COG_VERSION: &str = env!("CARGO_PKG_VERSION");
@@ -103,10 +103,31 @@ fn cmd_health() -> Result<(), Box<dyn std::error::Error>> {
    Ok(())
 }

-fn cmd_run(_config_path: PathBuf) -> Result<(), Box<dyn std::error::Error>> {
-    // Long-running mode is wired in the v0.0.1 release follow-up — same
-    // approach as cog-pose-estimation's runtime.rs. For now, the cog
-    // satisfies the four-verb contract; downstream consumers integrate
-    // via the in-process `InferenceEngine` API.
-    Err("`run` subcommand wiring is pending v0.0.1 — for now consume via the InferenceEngine library API".into())
+fn cmd_run(config_path: PathBuf) -> Result<(), Box<dyn std::error::Error>> {
+    let raw = std::fs::read_to_string(&config_path)
+        .map_err(|e| format!("failed to read config at {}: {}", config_path.display(), e))?;
+    let cfg: RunConfig = serde_json::from_str(&raw)
+        .map_err(|e| format!("failed to parse config at {}: {}", config_path.display(), e))?;
+
+    let engine = InferenceEngine::with_weights(cfg.model_path.as_deref())?;
+    publisher::run_started(
+        COG_ID,
+        &cfg.sensing_url,
+        cfg.poll_ms,
+        &cfg.model_path
+            .as_ref()
+            .map(|p| p.display().to_string())
+            .unwrap_or_else(|| "(auto-discover)".to_string()),
+    );
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()?;
+    rt.block_on(cog_person_count::runtime::run_loop(
+        cog_person_count::runtime::RunConfig {
+            sensing_url: cfg.sensing_url,
+            poll_ms: cfg.poll_ms,
+        },
+        engine,
+    ))
 }
@@ -0,0 +1,77 @@
+//! Long-running inference loop. Polls the appliance's sensing-server,
+//! slides a CSI window, runs the count head, and emits `person.count`
+//! events. Same shape as `cog-pose-estimation::runtime`.
+//!
+//! Multi-node fusion is single-node only in v0.0.1 — the appliance's
+//! `/api/v1/sensing/latest` endpoint already aggregates across nodes
+//! before serving, so per-cog fusion is deferred until each node ships
+//! raw frames separately (ADR-103 §"Multi-node fusion" v0.2.0).
+
+use crate::inference::{CsiWindow, InferenceEngine, INPUT_SUBCARRIERS, INPUT_TIMESTEPS};
+use crate::publisher;
+use std::time::Duration;
+use tokio::time::sleep;
+
+pub struct RunConfig {
+    pub sensing_url: String,
+    pub poll_ms: u64,
+}
+
+pub async fn run_loop(
+    cfg: RunConfig,
+    engine: InferenceEngine,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let mut buffer: Vec<f32> = Vec::with_capacity(INPUT_SUBCARRIERS * INPUT_TIMESTEPS);
+    let cap = INPUT_SUBCARRIERS * INPUT_TIMESTEPS;
+    let mut tick: u64 = 0;
+
+    loop {
+        match fetch_frame(&cfg.sensing_url).await {
+            Ok(amplitudes) => {
+                tick += 1;
+                buffer.extend(amplitudes);
+                while buffer.len() > 2 * cap {
+                    let extra = buffer.len() - cap;
+                    buffer.drain(0..extra);
+                }
+                if buffer.len() >= cap {
+                    let window = CsiWindow { data: buffer[buffer.len() - cap..].to_vec() };
+                    if let Ok(pred) = engine.infer(&window) {
+                        // v0.0.1 ships single-node — fusion is a no-op for
+                        // N=1. v0.2.0 will append additional per-node
+                        // predictions to a vec and call
+                        // `fusion::fuse_confidence_weighted` before emit.
+                        publisher::person_count(tick, &pred, 1);
+                    }
+                }
+            }
+            Err(e) => {
+                tracing::warn!(error = %e, "sensing-server fetch failed");
+            }
+        }
+        sleep(Duration::from_millis(cfg.poll_ms)).await;
+    }
+}
+
+async fn fetch_frame(url: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
+    let url = url.to_string();
+    let body = tokio::task::spawn_blocking(move || -> Result<String, ureq::Error> {
+        Ok(ureq::get(&url).call()?.into_string()?)
+    })
+    .await??;
+    let json: serde_json::Value = serde_json::from_str(&body)?;
+    let snapshot = json.get("snapshot").unwrap_or(&json);
+    let nodes = snapshot
+        .get("nodes")
+        .and_then(|v| v.as_array())
+        .ok_or("missing nodes[]")?;
+    let amplitude = nodes
+        .first()
+        .and_then(|n| n.get("amplitude"))
+        .and_then(|v| v.as_array())
+        .ok_or("missing nodes[0].amplitude[]")?;
+    Ok(amplitude
+        .iter()
+        .filter_map(|v| v.as_f64().map(|f| f as f32))
+        .collect())
+}