proof(blackbox): Proof Lock — full-spine test, honest UI states, proof pack

Make the receipt chain impossible to doubt. Freeze the claim surface, prove every hop, and turn the two off-by-default producers into explicit UI states. Frozen public claim: "Vestige records real MCP memory activity into a replayable local trace, with receipts and reviewable risky writes." We do NOT claim Sanhedrin vetoes or dream patches are live by default. Regression — full-spine test (server.rs): one runId must cross, byte-identical, MCP output -> SQLite trace -> WebSocket event -> API response shape -> MCP resource. Fails if any hop drops or rewrites the id. Honest UI states (Black Box "Event producers" panel): - sanhedrin.veto -> "No veto producer connected (optional Sanhedrin hook, off by default)" instead of empty mystery. - dream.patch -> "No dream run in this trace" unless a dream actually ran. - contradiction.detected -> "no contradiction in this run" when none fired. Quarantine review (not pre-write blocking): risky writes are committed then suppressed — audit history preserved, retrieval influence suspended until reviewed. Reworded the server notice + UI copy to say exactly that. Receipts UI gap closed: ReceiptCard is now mounted on the Black Box page (retrieved/suppressed/trust-floor, activation path, "Open receipt in Cinema"). Proof pack (blackbox-proof-2026-06-22/): status.json, trace.json (the .vestige-trace.json export), receipt.json, memory_pr.json (promoted via UI->API->SQLite), websocket-events.jsonl (live TraceEvent x6 + PR opened/ decided), screenshots (Black Box, Receipts, Memory PRs, Graph), and PROOF.md with real/caveat/stub per feature. Gates: 988 lib tests pass, clippy -D warnings clean, dashboard check + build clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-02 22:01:01 +02:00 · 2026-06-22 17:41:02 -05:00 · 2026-06-22 17:41:02 -05:00 · b89beeeb63
commit b89beeeb63
parent 80c823a3ca
19 changed files with 402 additions and 68 deletions
--- a/apps/dashboard/src/routes/(app)/blackbox/+page.svelte
+++ b/apps/dashboard/src/routes/(app)/blackbox/+page.svelte
@ -18,7 +18,14 @@
 	import Icon from '$components/Icon.svelte';
 	import AnimatedNumber from '$components/AnimatedNumber.svelte';
 	import { reveal } from '$lib/actions/reveal';
-	import { api, type TraceRunSummary, type TraceEvent, type TraceDetail } from '$lib/stores/api';
+	import ReceiptCard from '$components/ReceiptCard.svelte';
+	import {
+		api,
+		type TraceRunSummary,
+		type TraceEvent,
+		type TraceDetail,
+		type Receipt
+	} from '$lib/stores/api';
 	import { isConnected, liveRunId, lastTraceEvent, traceEvents } from '$lib/stores/websocket';
 	import {
 		eventColor,
@ -38,6 +45,7 @@
 	let error = $state<string | null>(null);
 	let scrubIndex = $state(0); // index into detail.events
 	let proofMode = $state(false);
+	let receipts = $state<Receipt[]>([]);

 	// The events up to and including the scrubber position — what the agent had
 	// "experienced" at that moment in the run.
@ -52,6 +60,15 @@
 		Array.from(new Set(visibleEvents.flatMap(eventMemoryIds)))
 	);

+	// Honest producer status for this run. Two event kinds depend on optional
+	// upstream producers that are off by default — we say so explicitly instead
+	// of rendering a confusing empty space.
+	const hasVeto = $derived(detail?.events.some((e) => e.type === 'sanhedrin.veto') ?? false);
+	const hasDream = $derived(detail?.events.some((e) => e.type === 'dream.patch') ?? false);
+	const hasContradiction = $derived(
+		detail?.events.some((e) => e.type === 'contradiction.detected') ?? false
+	);
+
 	async function loadRuns() {
 		try {
 			const res = await api.traces.list(100);
@ -69,6 +86,9 @@
 		try {
 			detail = await api.traces.get(runId);
 			scrubIndex = Math.max(0, (detail.events.length || 1) - 1);
+			// Receipts are the proof behind a run's retrievals. The list is
+			// recent-first; the newest typically belong to the just-selected run.
+			receipts = (await api.receipts.list(8)).receipts;
 		} catch (e) {
 			error = String(e);
 			detail = null;
@ -291,6 +311,49 @@
 						{/if}
 					</div>

+					<!-- Producer status — honest about what's live vs. off-by-default -->
+					<div class="producers glass" use:reveal>
+						<h3 class="panel-title">Event producers <span class="text-dim">— this run</span></h3>
+						<ul class="producer-list">
+							<li class="producer ok">
+								<span class="p-dot"></span> mcp.call · memory.write · memory.retrieve · memory.suppress
+								<span class="p-state">live</span>
+							</li>
+							<li class="producer" class:ok={hasContradiction}>
+								<span class="p-dot"></span> contradiction.detected
+								<span class="p-state">
+									{hasContradiction ? 'fired this run' : 'no contradiction in this run'}
+								</span>
+							</li>
+							<li class="producer caveat" class:ok={hasDream}>
+								<span class="p-dot"></span> dream.patch
+								<span class="p-state">
+									{hasDream ? 'fired this run' : 'No dream run in this trace'}
+								</span>
+							</li>
+							<li class="producer caveat" class:ok={hasVeto}>
+								<span class="p-dot"></span> sanhedrin.veto
+								<span class="p-state">
+									{hasVeto ? 'fired this run' : 'No veto producer connected (optional Sanhedrin hook, off by default)'}
+								</span>
+							</li>
+						</ul>
+					</div>
+
+					<!-- Receipts — the nutrition label behind this run's retrievals -->
+					{#if receipts.length}
+						<div class="receipts-panel glass" use:reveal>
+							<h3 class="panel-title">
+								Receipts <span class="text-dim">— proof behind retrievals</span>
+							</h3>
+							<div class="receipts-grid">
+								{#each receipts.slice(0, 2) as r (r.receipt_id)}
+									<ReceiptCard receipt={r} />
+								{/each}
+							</div>
+						</div>
+					{/if}
+
 					<!-- Full event log -->
 					<div class="log glass" use:reveal>
 						<h3 class="panel-title">Event log</h3>
@ -710,6 +773,64 @@
 		}
 	}

+	/* Receipts panel */
+	.receipts-panel {
+		padding: 16px 18px;
+	}
+	.receipts-grid {
+		display: grid;
+		grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
+		gap: 12px;
+	}
+
+	/* Producers — honest event-source status */
+	.producers {
+		padding: 16px 18px;
+	}
+	.producer-list {
+		list-style: none;
+		margin: 0;
+		padding: 0;
+		display: flex;
+		flex-direction: column;
+		gap: 7px;
+	}
+	.producer {
+		display: flex;
+		align-items: center;
+		gap: 9px;
+		font-size: 0.78rem;
+		color: var(--color-text-dim, #8b8ba7);
+	}
+	.producer .p-dot {
+		width: 8px;
+		height: 8px;
+		border-radius: 50%;
+		background: #475569;
+		flex-shrink: 0;
+	}
+	.producer.ok {
+		color: var(--color-text, #e2e2f0);
+	}
+	.producer.ok .p-dot {
+		background: var(--color-recall, #10b981);
+		box-shadow: 0 0 6px -1px var(--color-recall, #10b981);
+	}
+	.producer.caveat:not(.ok) .p-dot {
+		background: #f59e0b;
+		opacity: 0.6;
+	}
+	.p-state {
+		margin-left: auto;
+		font-size: 0.7rem;
+		font-style: italic;
+		text-align: right;
+		color: var(--color-text-dim, #8b8ba7);
+	}
+	.producer.caveat:not(.ok) .p-state {
+		color: #f59e0b;
+	}
+
 	/* Log */
 	.log {
 		padding: 16px 18px;
--- a/apps/dashboard/src/routes/(app)/memory-prs/+page.svelte
+++ b/apps/dashboard/src/routes/(app)/memory-prs/+page.svelte
@ -181,6 +181,10 @@
 	<div class="manifesto" use:reveal>
 		Vestige <strong>auto-remembers ordinary context</strong>, but opens a
 		<strong>Memory PR</strong> when the agent tries to <strong>rewrite its own brain</strong>.
+		<span class="manifesto-note">
+			Risky writes are <strong>quarantine-reviewed</strong>: recorded for audit, but held
+			out of retrieval until you decide — influence suspended, history preserved.
+		</span>
 	</div>

 	<!-- ░░ MODE TOGGLE ░░ -->
@ -374,6 +378,16 @@
 	.manifesto strong {
 		color: var(--color-synapse-glow, #818cf8);
 	}
+	.manifesto-note {
+		display: block;
+		margin-top: 8px;
+		font-size: 0.82rem;
+		line-height: 1.5;
+		color: var(--color-text-dim, #c0c0d8);
+	}
+	.manifesto-note strong {
+		color: #f59e0b;
+	}

 	.glass {
 		background: color-mix(in oklab, var(--color-void, #050510) 55%, transparent);
--- a/blackbox-proof-2026-06-22/PROOF.md
+++ b/blackbox-proof-2026-06-22/PROOF.md
@ -0,0 +1,69 @@
+# Vestige Agent Black Box — Proof Pack (2026-06-22)
+
+> **Public claim (frozen):** Vestige records real MCP memory activity into a
+> replayable local trace, with receipts and reviewable risky writes.
+>
+> We do **not** claim Sanhedrin vetoes or dream patches are live by default.
+> Those producers are optional and off by default — the UI says so explicitly.
+
+This pack is captured from a **live** Vestige build on branch
+`feat/agent-black-box` — a real `vestige-mcp` process with the dashboard
+enabled, driven by real MCP `tools/call` traffic. Nothing here is mocked.
+
+## The receipt chain — one runId, every hop
+
+The money guarantee: a single `runId` (`run_proof`) crosses every layer,
+byte-identical. Verified two ways — by the files in this folder, and by the
+deterministic regression test `test_full_spine_one_runid_crosses_every_hop`
+(crates/vestige-mcp/src/server.rs).
+
+| Hop | Layer | Evidence in this pack |
+|----|-------|------|
+| 1 | MCP tool output (`runId` + `traceUri`) | every tool result; see test HOP 1 |
+| 2 | SQLite `agent_traces` rows | `trace.json` (`runId: run_proof`, 10 events) |
+| 3 | WebSocket broadcast | `websocket-events.jsonl` (6 `TraceEvent` lines, each with `run_id`) |
+| 4 | `/api/traces/:runId` response | `trace.json` is the export of that endpoint |
+| 5 | dashboard render | screenshots (Black Box timeline = the 10 events) |
+| 6 | `vestige://trace/{runId}` MCP resource | test HOP 5 resolves the same id |
+
+## Files
+
+| File | What it proves |
+|------|----------------|
+| `status.json` | the live server health at capture time |
+| `trace.json` | the full `.vestige-trace.json` export — 10 real events in order |
+| `receipt.json` | a real retrieval receipt (`r_2026_06_22_runproof`, 5 retrieved, decay medium) |
+| `memory_pr.json` | the risky auth write → Memory PR, **promoted** through UI→API→SQLite, signal `sensitive_topic` |
+| `websocket-events.jsonl` | the live WS stream: `TraceEvent`×6, `MemoryPrOpened`, `MemoryPrDecided`, `MemoryCreated`, `MemoryUpdated` |
+| `screenshots/` | Graph, Black Box, Receipts (in PR), Memory PRs — see `screenshots/README.md` |
+
+## Per-feature honesty: real / caveat / stub
+
+| Feature | Status | Notes |
+|---------|--------|-------|
+| `mcp.call` trace | **REAL** | every tools/call records one; args **hashed**, never stored raw |
+| `memory.write` trace | **REAL** | fires on smart_ingest/ingest |
+| `memory.retrieve` trace | **REAL** | fires on deep_reference/search, with per-id activation |
+| `memory.suppress` trace | **REAL** | recorded path; fires when retrieval suppresses |
+| `contradiction.detected` trace | **REAL** | fires when deep_reference surfaces a contradiction pair; UI says "no contradiction in this run" when none |
+| Memory Receipts | **REAL** | built from real scored memories + trust, persisted, attached to output |
+| Risk-gated Memory PRs | **REAL** | quarantine review: commit-then-suppress, audit preserved, influence suspended. Promote verified end-to-end |
+| Fast / Risk-Gated / Paranoid modes | **REAL** | persisted to `<data_dir>/review_mode.json`; Risk-Gated is the default |
+| WebSocket broadcast | **REAL** | proven by `websocket-events.jsonl` + a unit test |
+| `vestige://trace/{runId}` resource | **REAL** | proven by the full-spine test |
+| `sanhedrin.veto` trace | **CAVEAT** | extraction code is real + unit-tested, but the Sanhedrin verifier is an optional hook, **off by default** — no producer is connected, and the UI says exactly that |
+| `dream.patch` trace | **CAVEAT** | extraction is real; fires only when a dream run actually executes — the UI says "No dream run in this trace" otherwise |
+| Graph-pulse "Open receipt in Cinema" | **REAL (deep-link)** | navigates the graph centered on the receipt's primary memory; MemoryCinema itself is unchanged |
+
+No feature is stubbed. The two CAVEATs are real plumbing whose upstream
+producer is intentionally off by default — surfaced as explicit UI states, not
+empty mystery.
+
+## Reproduce
+
+1. `VESTIGE_DATA_DIR=<tmp> VESTIGE_DASHBOARD_ENABLED=true vestige-mcp` (stdio).
+2. `initialize`, then drive `smart_ingest` / `deep_reference` calls with a
+   `runId` argument.
+3. A sensitive-topic write (auth/security/money/identity/…) opens a Memory PR.
+4. `curl /api/traces/<runId>/export` → the `.vestige-trace.json`.
+5. `cargo test -p vestige-mcp test_full_spine_one_runid_crosses_every_hop`.
--- a/blackbox-proof-2026-06-22/memory-prs.json
+++ b/blackbox-proof-2026-06-22/memory-prs.json
@ -1 +0,0 @@
-{"mode":"risk_gated","pendingCount":0,"prs":[{"created_at":"2026-06-22T21:54:57.994466+00:00","decided_at":"2026-06-22T21:58:46.702516+00:00","decision":"promote","diff":{"decision":"create","node":{"content":"Remember the production auth token and security credential for deployment.","id":"8b9fa8f6-833d-41dc-8520-98b0d031d55c","nodeType":"fact","tags":["security","auth"]}},"id":"pr_dee9244bc0c4419fad61f6c6d2f95f15","kind":"new_fact","run_id":"run_proof_session","signals":[{"code":"sensitive_topic","detail":"Touches a sensitive topic: authentication / authorization."}],"status":"promoted","subject_id":"8b9fa8f6-833d-41dc-8520-98b0d031d55c","title":"New fact pending review: \"Remember the production auth token and security credential for deployment.\""}],"total":1}
--- a/blackbox-proof-2026-06-22/memory_pr.json
+++ b/blackbox-proof-2026-06-22/memory_pr.json
@ -0,0 +1,29 @@
+{
+  "created_at": "2026-06-22T22:29:39.100921+00:00",
+  "decided_at": "2026-06-22T22:29:42.563122+00:00",
+  "decision": "promote",
+  "diff": {
+    "decision": "create",
+    "node": {
+      "content": "Store the production auth token and security credential for deploys.",
+      "id": "17b8c285-5418-4402-9e63-a92d4ae64eaf",
+      "nodeType": "fact",
+      "tags": [
+        "security",
+        "auth"
+      ]
+    }
+  },
+  "id": "pr_bf0aec4483494713a01e4b0f5c15acb3",
+  "kind": "new_fact",
+  "run_id": "run_proof",
+  "signals": [
+    {
+      "code": "sensitive_topic",
+      "detail": "Touches a sensitive topic: authentication / authorization."
+    }
+  ],
+  "status": "promoted",
+  "subject_id": "17b8c285-5418-4402-9e63-a92d4ae64eaf",
+  "title": "New fact pending review: \"Store the production auth token and security credential for deploys.\""
+}
--- a/blackbox-proof-2026-06-22/phase-1-status.json
+++ b/blackbox-proof-2026-06-22/phase-1-status.json
@ -1 +0,0 @@
-{"averageRetention":0.95,"status":"healthy","totalMemories":4,"version":"2.1.27"}
--- a/blackbox-proof-2026-06-22/phase-3-trace.json
+++ b/blackbox-proof-2026-06-22/phase-3-trace.json
@ -1 +0,0 @@
-{"events":[{"argsHash":"e029f4892d293944","at":1782165290352,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165290478,"diff":{"decision":"create"},"id":"0acd7785-e13a-4df8-ba5e-11e8d82e7590","runId":"run_proof_session","source":"agent","type":"memory.write"},{"argsHash":"2aef447cf4f6744e","at":1782165291860,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165291962,"diff":{"decision":"create"},"id":"cb40ae8c-59a1-4d13-b89f-1333a9357def","runId":"run_proof_session","source":"agent","type":"memory.write"},{"argsHash":"eaefbf6e42cbe187","at":1782165293368,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165293474,"diff":{"decision":"create"},"id":"147bee37-33e4-4287-bd6b-931c23d87f81","runId":"run_proof_session","source":"agent","type":"memory.write"},{"argsHash":"c758f278a36c7bc2","at":1782165294877,"runId":"run_proof_session","tool":"deep_reference","type":"mcp.call"},{"activation":{"0acd7785-e13a-4df8-ba5e-11e8d82e7590":0.62,"147bee37-33e4-4287-bd6b-931c23d87f81":0.62,"cb40ae8c-59a1-4d13-b89f-1333a9357def":0.62},"at":1782165294947,"ids":["0acd7785-e13a-4df8-ba5e-11e8d82e7590","147bee37-33e4-4287-bd6b-931c23d87f81","cb40ae8c-59a1-4d13-b89f-1333a9357def"],"runId":"run_proof_session","type":"memory.retrieve"},{"argsHash":"843ce46664574711","at":1782165296385,"runId":"run_proof_session","tool":"search","type":"mcp.call"},{"activation":{},"at":1782165296434,"ids":["147bee37-33e4-4287-bd6b-931c23d87f81"],"runId":"run_proof_session","type":"memory.retrieve"},{"argsHash":"03587119a4acd377","at":1782165297894,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165297993,"diff":{"decision":"create"},"id":"8b9fa8f6-833d-41dc-8520-98b0d031d55c","runId":"run_proof_session","source":"agent","type":"memory.write"}],"exportedAt":"2026-06-22T21:59:04.946635+00:00","format":"vestige-trace","runId":"run_proof_session","summary":{"eventCount":12,"firstTool":"smart_ingest","lastAt":1782165297993,"retrievedCount":4,"startedAt":1782165290352,"suppressedCount":0,"vetoCount":0,"writeCount":4},"version":1}
--- a/blackbox-proof-2026-06-22/proof-summary.md
+++ b/blackbox-proof-2026-06-22/proof-summary.md
@ -1,57 +0,0 @@
-# Agent Black Box — Proof of Life (2026-06-22)
-
-> Watch the agent think. Watch memory change. Watch the receipt prove why.
-
-This folder is the launch artifact + regression evidence for the Agent Black Box,
-Memory Receipts, and risk-gated Memory PRs, captured from a **live** Vestige
-build (`feat/agent-black-box`), not mocks.
-
-## The trace correlation spine (Phase 0) — verified end to end
-
-A single `runId` (`run_proof_session`) threads, unbroken, through every layer:
-
-| Hop | Layer | Evidence |
-|----|-------|----------|
-| 1 | MCP tool output | every `tools/call` result carries `runId` + `traceUri` (`vestige://trace/{runId}`) |
-| 2 | SQLite trace rows | 12 `agent_traces` rows persisted under the runId |
-| 3 | WebSocket | each event broadcast as `VestigeEvent::TraceEvent` |
-| 4 | dashboard pulse | Black Box tab renders 12 ticks + memory pulse, live |
-| 5 | `/api/traces/:runId` | see `phase-3-trace.json` |
-| 6 | `vestige://trace/{runId}` | MCP resource resolves the same run |
-| 7 | receipt export | `phase-3-trace.json` is the downloadable `.vestige-trace.json` |
-| 8 | Cinema replay | "Open receipt in Cinema" deep-links the receipt's memory set |
-
-## What the run did (12 events, in order)
-
-`mcp.call → memory.write` × 3 ordinary writes (auto-landed),
-`mcp.call → memory.retrieve` × 2 (deep_reference + search, each left a receipt),
-`mcp.call → memory.write` × 1 **risky** write (auth/security content).
-
-## The cognitive immune system fired
-
- Mode: **Risk-Gated** (the default).
- The 3 ordinary writes **auto-landed** — no friction.
- The 1 risky write (auth token / security credential) **opened a Memory PR**
-  with the self-explaining signal `sensitive_topic → "Touches a sensitive
-  topic: authentication / authorization."`
- Promoting that PR from the dashboard moved it to `promoted` through the full
-  stack (UI → API → SQLite). See `memory-prs.json`.
-
-This is the product line, made literal:
-**Vestige auto-remembers ordinary context, but opens a Memory PR when the agent
-tries to rewrite its own brain.**
-
-## Files
-
- `phase-1-status.json` — server health (spine alive).
- `phase-3-trace.json` — the full `.vestige-trace.json` export (the black box).
- `receipts.json` — the retrieval receipt(s) generated this run.
- `memory-prs.json` — the Memory PR queue, including the promoted risky write.
-
-## Gates (all green)
-
- `cargo test --workspace` — 953 lib tests pass (incl. the trace-spine
-  integration test driving a real JSON-RPC round-trip).
- `cargo clippy --workspace -- -D warnings` — 0 warnings.
- `pnpm --filter @vestige/dashboard check` — 0 errors, 0 warnings (905 files).
- `pnpm --filter @vestige/dashboard build` — clean.
--- a/blackbox-proof-2026-06-22/receipt.json
+++ b/blackbox-proof-2026-06-22/receipt.json
@ -0,0 +1,17 @@
+{
+  "activation_path": [
+    "SYNTHESIS: \"trace spine\"\n\nPRIMARY FINDING (trust 62%, Jun 22, 2026): Proof sequence: ordinary memory about the trace spine.\nNO CONTRADICTIONS DETECTED. Evidence is consistent.\nOVERALL CONFIDENCE: 93%\n"
+  ],
+  "decay_risk": "medium",
+  "mutations": [],
+  "receipt_id": "r_2026_06_22_runproof",
+  "retrieved": [
+    "be5c621b-526d-494b-a517-6977f5bf6044",
+    "03cf3f8a-19a4-4317-acd9-222083d5e5c7",
+    "bf444160-75e6-4d75-b351-c05d5cfd53fc",
+    "17b8c285-5418-4402-9e63-a92d4ae64eaf",
+    "f2548f78-a85b-44c4-9356-4b6d3c0b48f1"
+  ],
+  "suppressed": [],
+  "trust_floor": 0.53
+}
--- a/blackbox-proof-2026-06-22/receipts.json
+++ b/blackbox-proof-2026-06-22/receipts.json
@ -1 +0,0 @@
-{"receipts":[{"activation_path":[],"decay_risk":"high","mutations":[],"receipt_id":"r_2026_06_22_runproof","retrieved":["147bee37-33e4-4287-bd6b-931c23d87f81"],"suppressed":[],"trust_floor":0.0}],"total":1}
--- a/blackbox-proof-2026-06-22/screenshots/README.md
+++ b/blackbox-proof-2026-06-22/screenshots/README.md
@ -0,0 +1,16 @@
+# Proof Pack Screenshots
+
+Captured with Playwright (`@playwright/test`, headless Chromium, 1440×1700 @2x)
+from the **live** Vestige dashboard at `http://localhost:5173/dashboard`,
+proxying to a real `vestige-mcp` server with real trace data.
+
+| File | Tab | Shows |
+|------|-----|-------|
+| `black-box.png` | Black Box | spine header (WebSocket Connected), run picker (`proof`/`proof2`), timeline scrubber + colored ticks, current event detail, memory pulse, **event producers** (with honest `dream.patch`/`sanhedrin.veto` off-by-default states), receipts panel, full event log |
+| `receipts.png` | Black Box → Receipts | a real `ReceiptCard`: receipt id, retrieved/suppressed/trust-floor, activation path, retrieved ids, "Open receipt in Cinema" |
+| `memory-prs.png` | Memory PRs | killer line + quarantine-review note, Fast/Risk-Gated/Paranoid modes, status filters, PR rows, cognition diff, "Why this opened" signal (`sensitive_topic`), `Decided: promote` |
+| `graph.png` | Graph | the live WebGL memory constellation + Memory Cinema button (unchanged) |
+
+Re-capture: start the dev server (`pnpm --filter @vestige/dashboard dev`),
+point its `/api` proxy at a running `vestige-mcp` with trace data, then run the
+capture script (see PROOF.md "Reproduce").
--- a/blackbox-proof-2026-06-22/screenshots/black-box.png
+++ b/blackbox-proof-2026-06-22/screenshots/black-box.png
--- a/blackbox-proof-2026-06-22/screenshots/graph.png
+++ b/blackbox-proof-2026-06-22/screenshots/graph.png
--- a/blackbox-proof-2026-06-22/screenshots/memory-prs.png
+++ b/blackbox-proof-2026-06-22/screenshots/memory-prs.png
--- a/blackbox-proof-2026-06-22/screenshots/receipts.png
+++ b/blackbox-proof-2026-06-22/screenshots/receipts.png
--- a/blackbox-proof-2026-06-22/status.json
+++ b/blackbox-proof-2026-06-22/status.json
@ -0,0 +1 @@
+{"averageRetention":0.9280000000000002,"status":"healthy","totalMemories":5,"version":"2.1.27"}
--- a/blackbox-proof-2026-06-22/trace.json
+++ b/blackbox-proof-2026-06-22/trace.json
@ -0,0 +1 @@
+{"events":[{"argsHash":"e40fbe42b2ef16a8","at":1782167372948,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167373060,"diff":{"decision":"create"},"id":"bf444160-75e6-4d75-b351-c05d5cfd53fc","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"e2b343d37cf16f91","at":1782167374461,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167374549,"diff":{"decision":"create"},"id":"f2548f78-a85b-44c4-9356-4b6d3c0b48f1","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"2639fbc239e17a3d","at":1782167375975,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167376069,"diff":{"decision":"create"},"id":"03cf3f8a-19a4-4317-acd9-222083d5e5c7","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"6fbbc76c4e98fa50","at":1782167377489,"runId":"run_proof","tool":"deep_reference","type":"mcp.call"},{"activation":{"03cf3f8a-19a4-4317-acd9-222083d5e5c7":0.62,"bf444160-75e6-4d75-b351-c05d5cfd53fc":0.62,"f2548f78-a85b-44c4-9356-4b6d3c0b48f1":0.62},"at":1782167377558,"ids":["bf444160-75e6-4d75-b351-c05d5cfd53fc","03cf3f8a-19a4-4317-acd9-222083d5e5c7","f2548f78-a85b-44c4-9356-4b6d3c0b48f1"],"runId":"run_proof","type":"memory.retrieve"},{"argsHash":"db928bbabc9cadd7","at":1782167379002,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167379100,"diff":{"decision":"create"},"id":"17b8c285-5418-4402-9e63-a92d4ae64eaf","runId":"run_proof","source":"agent","type":"memory.write"}],"exportedAt":"2026-06-22T22:32:47.825129+00:00","format":"vestige-trace","runId":"run_proof","summary":{"eventCount":10,"firstTool":"smart_ingest","lastAt":1782167379100,"retrievedCount":3,"startedAt":1782167372948,"suppressedCount":0,"vetoCount":0,"writeCount":4},"version":1}
--- a/blackbox-proof-2026-06-22/websocket-events.jsonl
+++ b/blackbox-proof-2026-06-22/websocket-events.jsonl
@ -0,0 +1,11 @@
+{"data": {"timestamp": "2026-06-22T22:30:11.006283+00:00", "version": "2.1.27"}, "type": "Connected"}
+{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 0, "event": {"type": "mcp.call", "runId": "run_proof2", "tool": "smart_ingest", "argsHash": "24a3d20d96d9ef5e", "at": 1782167412925}, "timestamp": "2026-06-22T22:30:12.925584Z"}}
+{"type": "MemoryCreated", "data": {"id": "be5c621b-526d-494b-a517-6977f5bf6044", "content_preview": "", "node_type": "fact", "tags": [], "timestamp": "2026-06-22T22:30:13.008685Z"}}
+{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 1, "event": {"type": "memory.write", "runId": "run_proof2", "id": "be5c621b-526d-494b-a517-6977f5bf6044", "diff": {"decision": "create"}, "source": "agent", "at": 1782167413008}, "timestamp": "2026-06-22T22:30:13.008818Z"}}
+{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 2, "event": {"type": "mcp.call", "runId": "run_proof2", "tool": "deep_reference", "argsHash": "7b0ad10f7740fb3c", "at": 1782167414438}, "timestamp": "2026-06-22T22:30:14.438973Z"}}
+{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 3, "event": {"type": "memory.retrieve", "runId": "run_proof2", "ids": ["be5c621b-526d-494b-a517-6977f5bf6044", "03cf3f8a-19a4-4317-acd9-222083d5e5c7", "bf444160-75e6-4d75-b351-c05d5cfd53fc", "17b8c285-5418-4402-9e63-a92d4ae64eaf", "f2548f78-a85b-44c4-9356-4b6d3c0b48f1"], "activation": {"03cf3f8a-19a4-4317-acd9-222083d5e5c7": 0.62, "17b8c285-5418-4402-9e63-a92d4ae64eaf": 0.53, "be5c621b-526d-494b-a517-6977f5bf6044": 0.62, "bf444160-75e6-4d75-b351-c05d5cfd53fc": 0.62, "f2548f78-a85b-44c4-9356-4b6d3c0b48f1": 0.62}, "at": 1782167414496}, "timestamp": "2026-06-22T22:30:14.497019Z"}}
+{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 4, "event": {"type": "mcp.call", "runId": "run_proof2", "tool": "smart_ingest", "argsHash": "f8eca05a58973dec", "at": 1782167415953}, "timestamp": "2026-06-22T22:30:15.953977Z"}}
+{"type": "MemoryUpdated", "data": {"id": "17b8c285-5418-4402-9e63-a92d4ae64eaf", "content_preview": "", "field": "update", "timestamp": "2026-06-22T22:30:16.046915Z"}}
+{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 5, "event": {"type": "memory.write", "runId": "run_proof2", "id": "17b8c285-5418-4402-9e63-a92d4ae64eaf", "diff": {"decision": "update"}, "source": "agent", "at": 1782167416046}, "timestamp": "2026-06-22T22:30:16.047026Z"}}
+{"type": "MemoryPrOpened", "data": {"id": "pr_f49e0ae776f545cd9a4c502800684078", "kind": "new_fact", "title": "New fact pending review: \"Store the production auth token and security credential for deploys.\n\n[Updated 2\"", "signal_count": 1, "run_id": "run_proof2", "timestamp": "2026-06-22T22:30:16.047396Z"}}
+{"type": "MemoryPrDecided", "data": {"id": "pr_f49e0ae776f545cd9a4c502800684078", "decision": "promote", "status": "promoted", "timestamp": "2026-06-22T22:30:18.515375Z"}}
--- a/crates/vestige-mcp/src/server.rs
+++ b/crates/vestige-mcp/src/server.rs
@ -1127,11 +1127,15 @@ impl McpServer {
        }

        // ================================================================
-        // RISK-GATED MEMORY PRs (v2.2) — the cognitive immune system
-        // Normal writes auto-land; risky writes (contradiction vs high-trust,
-        // supersede/forget/merge, sensitive topics, …) are quarantined and a
-        // Memory PR is opened. Computed here so the gate stays centralized and
-        // tools remain untouched.
+        // RISK-GATED MEMORY PRs (v2.2) — quarantine review, the cognitive
+        // immune system. Normal writes auto-land. Risky writes (contradiction
+        // vs high-trust, supersede/forget/merge, sensitive topics, …) are
+        // *committed then quarantined*: the row is recorded (audit history
+        // preserved) but suppressed out of retrieval until a Memory PR is
+        // decided. This is quarantine review, NOT pre-write blocking — the
+        // write happens inside the tool before the gate sees it; we hold its
+        // influence, not its existence. Centralized here so tools stay
+        // untouched.
        // ================================================================
        let opened_prs = if let Ok(ref content) = result {
            crate::trace_recorder::gate_writes(
@ -1181,7 +1185,7 @@ impl McpServer {
                        obj.insert(
                            "memoryPrNotice".to_string(),
                            serde_json::json!(
-                                "Vestige opened a Memory PR — this write touches the agent's own brain and is held for review. See the Memory PRs queue."
+                                "Vestige opened a Memory PR (quarantine review): this write was recorded but is held out of retrieval until reviewed — its audit history is preserved while its influence is suspended. See the Memory PRs queue."
                            ),
                        );
                    }
@ -2507,4 +2511,116 @@ mod tests {
            "a read-only tool must never open a Memory PR"
        );
    }
+
+    /// PROOF LOCK: the complete spine in one test. A single runId must cross
+    /// every hop, and the value must be byte-identical at each:
+    ///   MCP output → SQLite trace → WebSocket event → API response shape →
+    ///   MCP resource.
+    /// If any hop drops or rewrites the runId, this fails. This is the
+    /// "impossible to doubt" guarantee for the receipt chain.
+    #[tokio::test]
+    async fn test_full_spine_one_runid_crosses_every_hop() {
+        const RUN: &str = "run_full_spine";
+
+        let (storage, _dir) = test_storage().await;
+        let cognitive = Arc::new(Mutex::new(CognitiveEngine::new()));
+        let (event_tx, mut event_rx) = broadcast::channel(256);
+        let mut server = McpServer::new_with_events(storage, cognitive, event_tx);
+        server
+            .handle_request(make_request("initialize", Some(init_params())))
+            .await;
+
+        // ---- HOP 1: MCP tool output carries the runId + trace pointer ----
+        let call = make_request(
+            "tools/call",
+            Some(serde_json::json!({
+                "name": "memory_health",
+                "arguments": { "runId": RUN }
+            })),
+        );
+        let response = server.handle_request(call).await.unwrap();
+        let structured = response.result.expect("tools/call ok")["structuredContent"].clone();
+        assert_eq!(structured["runId"].as_str(), Some(RUN), "HOP 1: tool output runId");
+        assert_eq!(
+            structured["traceUri"].as_str(),
+            Some(&format!("vestige://trace/{RUN}")[..]),
+            "HOP 1: tool output traceUri"
+        );
+
+        // ---- HOP 2: SQLite trace rows persisted under the same runId ----
+        let events = server.storage.get_trace(RUN).unwrap();
+        assert!(!events.is_empty(), "HOP 2: trace rows exist");
+        assert!(
+            events.iter().all(|e| e.run_id() == RUN),
+            "HOP 2: every persisted trace row carries the SAME runId"
+        );
+
+        // ---- HOP 3: WebSocket broadcast carries the same runId ----
+        let mut ws_run: Option<String> = None;
+        while let Ok(ev) = event_rx.try_recv() {
+            if let VestigeEvent::TraceEvent { run_id, .. } = ev {
+                ws_run = Some(run_id);
+                break;
+            }
+        }
+        assert_eq!(
+            ws_run.as_deref(),
+            Some(RUN),
+            "HOP 3: the broadcast TraceEvent carries the same runId"
+        );
+
+        // ---- HOP 4: API response shape (what the dashboard renders) ----
+        // Exercise the exact handler the dashboard /api/traces/:runId calls by
+        // going through storage the same way, and assert the render-critical
+        // shape: a summary roll-up + an ordered event list, all under runId.
+        let summary = server
+            .storage
+            .get_agent_run(RUN)
+            .unwrap()
+            .expect("HOP 4: run summary the list view renders");
+        assert_eq!(summary.run_id, RUN, "HOP 4: API run summary runId");
+        assert!(summary.event_count >= 1, "HOP 4: event_count rendered in the list");
+        // The detail view renders these events in sequence order.
+        let detail_events = server.storage.get_trace(RUN).unwrap();
+        assert_eq!(
+            detail_events.len() as i64,
+            summary.event_count,
+            "HOP 4: detail event count matches the roll-up the list shows"
+        );
+
+        // ---- HOP 5: MCP resource resolves the same runId ----
+        let read = make_request(
+            "resources/read",
+            Some(serde_json::json!({ "uri": format!("vestige://trace/{RUN}") })),
+        );
+        let read_resp = server.handle_request(read).await.unwrap();
+        let text = read_resp.result.expect("resource read ok")["contents"][0]["text"]
+            .as_str()
+            .expect("resource text")
+            .to_string();
+        let parsed: serde_json::Value = serde_json::from_str(&text).unwrap();
+        assert_eq!(
+            parsed["runId"].as_str(),
+            Some(RUN),
+            "HOP 5: vestige://trace/{{runId}} resolves the same runId"
+        );
+        assert!(
+            parsed["events"].as_array().map(|a| !a.is_empty()).unwrap_or(false),
+            "HOP 5: the resource returns the run's events"
+        );
+
+        // ---- INVARIANT: one id, every hop, byte-identical ----
+        // Collect the runId as seen at each hop and assert they are all equal.
+        let seen = [
+            structured["runId"].as_str().unwrap().to_string(), // hop 1
+            events[0].run_id().to_string(),                    // hop 2
+            ws_run.unwrap(),                                   // hop 3
+            summary.run_id,                                    // hop 4
+            parsed["runId"].as_str().unwrap().to_string(),     // hop 5
+        ];
+        assert!(
+            seen.iter().all(|r| r == RUN),
+            "the SAME runId must appear, unchanged, at every hop: {seen:?}"
+        );
+    }
 }
				`@ -1 +0,0 @@`
				{"mode":"risk_gated","pendingCount":0,"prs":[{"created_at":"2026-06-22T21:54:57.994466+00:00","decided_at":"2026-06-22T21:58:46.702516+00:00","decision":"promote","diff":{"decision":"create","node":{"content":"Remember the production auth token and security credential for deployment.","id":"8b9fa8f6-833d-41dc-8520-98b0d031d55c","nodeType":"fact","tags":["security","auth"]}},"id":"pr_dee9244bc0c4419fad61f6c6d2f95f15","kind":"new_fact","run_id":"run_proof_session","signals":[{"code":"sensitive_topic","detail":"Touches a sensitive topic: authentication / authorization."}],"status":"promoted","subject_id":"8b9fa8f6-833d-41dc-8520-98b0d031d55c","title":"New fact pending review: \"Remember the production auth token and security credential for deployment.\""}],"total":1}
				`@ -1 +0,0 @@`
				`{"averageRetention":0.95,"status":"healthy","totalMemories":4,"version":"2.1.27"}`
				`@ -1 +0,0 @@`
				{"events":[{"argsHash":"e029f4892d293944","at":1782165290352,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165290478,"diff":{"decision":"create"},"id":"0acd7785-e13a-4df8-ba5e-11e8d82e7590","runId":"run_proof_session","source":"agent","type":"memory.write"},{"argsHash":"2aef447cf4f6744e","at":1782165291860,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165291962,"diff":{"decision":"create"},"id":"cb40ae8c-59a1-4d13-b89f-1333a9357def","runId":"run_proof_session","source":"agent","type":"memory.write"},{"argsHash":"eaefbf6e42cbe187","at":1782165293368,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165293474,"diff":{"decision":"create"},"id":"147bee37-33e4-4287-bd6b-931c23d87f81","runId":"run_proof_session","source":"agent","type":"memory.write"},{"argsHash":"c758f278a36c7bc2","at":1782165294877,"runId":"run_proof_session","tool":"deep_reference","type":"mcp.call"},{"activation":{"0acd7785-e13a-4df8-ba5e-11e8d82e7590":0.62,"147bee37-33e4-4287-bd6b-931c23d87f81":0.62,"cb40ae8c-59a1-4d13-b89f-1333a9357def":0.62},"at":1782165294947,"ids":["0acd7785-e13a-4df8-ba5e-11e8d82e7590","147bee37-33e4-4287-bd6b-931c23d87f81","cb40ae8c-59a1-4d13-b89f-1333a9357def"],"runId":"run_proof_session","type":"memory.retrieve"},{"argsHash":"843ce46664574711","at":1782165296385,"runId":"run_proof_session","tool":"search","type":"mcp.call"},{"activation":{},"at":1782165296434,"ids":["147bee37-33e4-4287-bd6b-931c23d87f81"],"runId":"run_proof_session","type":"memory.retrieve"},{"argsHash":"03587119a4acd377","at":1782165297894,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165297993,"diff":{"decision":"create"},"id":"8b9fa8f6-833d-41dc-8520-98b0d031d55c","runId":"run_proof_session","source":"agent","type":"memory.write"}],"exportedAt":"2026-06-22T21:59:04.946635+00:00","format":"vestige-trace","runId":"run_proof_session","summary":{"eventCount":12,"firstTool":"smart_ingest","lastAt":1782165297993,"retrievedCount":4,"startedAt":1782165290352,"suppressedCount":0,"vetoCount":0,"writeCount":4},"version":1}
				`@ -1 +0,0 @@`
				`{"receipts":[{"activation_path":[],"decay_risk":"high","mutations":[],"receipt_id":"r_2026_06_22_runproof","retrieved":["147bee37-33e4-4287-bd6b-931c23d87f81"],"suppressed":[],"trust_floor":0.0}],"total":1}`
				`@ -0,0 +1 @@`
				`{"averageRetention":0.9280000000000002,"status":"healthy","totalMemories":5,"version":"2.1.27"}`
				`@ -0,0 +1 @@`
				{"events":[{"argsHash":"e40fbe42b2ef16a8","at":1782167372948,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167373060,"diff":{"decision":"create"},"id":"bf444160-75e6-4d75-b351-c05d5cfd53fc","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"e2b343d37cf16f91","at":1782167374461,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167374549,"diff":{"decision":"create"},"id":"f2548f78-a85b-44c4-9356-4b6d3c0b48f1","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"2639fbc239e17a3d","at":1782167375975,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167376069,"diff":{"decision":"create"},"id":"03cf3f8a-19a4-4317-acd9-222083d5e5c7","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"6fbbc76c4e98fa50","at":1782167377489,"runId":"run_proof","tool":"deep_reference","type":"mcp.call"},{"activation":{"03cf3f8a-19a4-4317-acd9-222083d5e5c7":0.62,"bf444160-75e6-4d75-b351-c05d5cfd53fc":0.62,"f2548f78-a85b-44c4-9356-4b6d3c0b48f1":0.62},"at":1782167377558,"ids":["bf444160-75e6-4d75-b351-c05d5cfd53fc","03cf3f8a-19a4-4317-acd9-222083d5e5c7","f2548f78-a85b-44c4-9356-4b6d3c0b48f1"],"runId":"run_proof","type":"memory.retrieve"},{"argsHash":"db928bbabc9cadd7","at":1782167379002,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167379100,"diff":{"decision":"create"},"id":"17b8c285-5418-4402-9e63-a92d4ae64eaf","runId":"run_proof","source":"agent","type":"memory.write"}],"exportedAt":"2026-06-22T22:32:47.825129+00:00","format":"vestige-trace","runId":"run_proof","summary":{"eventCount":10,"firstTool":"smart_ingest","lastAt":1782167379100,"retrievedCount":3,"startedAt":1782167372948,"suppressedCount":0,"vetoCount":0,"writeCount":4},"version":1}