mirror of
https://github.com/samvallad33/vestige.git
synced 2026-07-04 22:02:14 +02:00
proof(blackbox): Proof Lock — full-spine test, honest UI states, proof pack
Make the receipt chain impossible to doubt. Freeze the claim surface, prove every hop, and turn the two off-by-default producers into explicit UI states. Frozen public claim: "Vestige records real MCP memory activity into a replayable local trace, with receipts and reviewable risky writes." We do NOT claim Sanhedrin vetoes or dream patches are live by default. Regression — full-spine test (server.rs): one runId must cross, byte-identical, MCP output -> SQLite trace -> WebSocket event -> API response shape -> MCP resource. Fails if any hop drops or rewrites the id. Honest UI states (Black Box "Event producers" panel): - sanhedrin.veto -> "No veto producer connected (optional Sanhedrin hook, off by default)" instead of empty mystery. - dream.patch -> "No dream run in this trace" unless a dream actually ran. - contradiction.detected -> "no contradiction in this run" when none fired. Quarantine review (not pre-write blocking): risky writes are committed then suppressed — audit history preserved, retrieval influence suspended until reviewed. Reworded the server notice + UI copy to say exactly that. Receipts UI gap closed: ReceiptCard is now mounted on the Black Box page (retrieved/suppressed/trust-floor, activation path, "Open receipt in Cinema"). Proof pack (blackbox-proof-2026-06-22/): status.json, trace.json (the .vestige-trace.json export), receipt.json, memory_pr.json (promoted via UI->API->SQLite), websocket-events.jsonl (live TraceEvent x6 + PR opened/ decided), screenshots (Black Box, Receipts, Memory PRs, Graph), and PROOF.md with real/caveat/stub per feature. Gates: 988 lib tests pass, clippy -D warnings clean, dashboard check + build clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
80c823a3ca
commit
b89beeeb63
19 changed files with 402 additions and 68 deletions
|
|
@ -18,7 +18,14 @@
|
||||||
import Icon from '$components/Icon.svelte';
|
import Icon from '$components/Icon.svelte';
|
||||||
import AnimatedNumber from '$components/AnimatedNumber.svelte';
|
import AnimatedNumber from '$components/AnimatedNumber.svelte';
|
||||||
import { reveal } from '$lib/actions/reveal';
|
import { reveal } from '$lib/actions/reveal';
|
||||||
import { api, type TraceRunSummary, type TraceEvent, type TraceDetail } from '$lib/stores/api';
|
import ReceiptCard from '$components/ReceiptCard.svelte';
|
||||||
|
import {
|
||||||
|
api,
|
||||||
|
type TraceRunSummary,
|
||||||
|
type TraceEvent,
|
||||||
|
type TraceDetail,
|
||||||
|
type Receipt
|
||||||
|
} from '$lib/stores/api';
|
||||||
import { isConnected, liveRunId, lastTraceEvent, traceEvents } from '$lib/stores/websocket';
|
import { isConnected, liveRunId, lastTraceEvent, traceEvents } from '$lib/stores/websocket';
|
||||||
import {
|
import {
|
||||||
eventColor,
|
eventColor,
|
||||||
|
|
@ -38,6 +45,7 @@
|
||||||
let error = $state<string | null>(null);
|
let error = $state<string | null>(null);
|
||||||
let scrubIndex = $state(0); // index into detail.events
|
let scrubIndex = $state(0); // index into detail.events
|
||||||
let proofMode = $state(false);
|
let proofMode = $state(false);
|
||||||
|
let receipts = $state<Receipt[]>([]);
|
||||||
|
|
||||||
// The events up to and including the scrubber position — what the agent had
|
// The events up to and including the scrubber position — what the agent had
|
||||||
// "experienced" at that moment in the run.
|
// "experienced" at that moment in the run.
|
||||||
|
|
@ -52,6 +60,15 @@
|
||||||
Array.from(new Set(visibleEvents.flatMap(eventMemoryIds)))
|
Array.from(new Set(visibleEvents.flatMap(eventMemoryIds)))
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Honest producer status for this run. Two event kinds depend on optional
|
||||||
|
// upstream producers that are off by default — we say so explicitly instead
|
||||||
|
// of rendering a confusing empty space.
|
||||||
|
const hasVeto = $derived(detail?.events.some((e) => e.type === 'sanhedrin.veto') ?? false);
|
||||||
|
const hasDream = $derived(detail?.events.some((e) => e.type === 'dream.patch') ?? false);
|
||||||
|
const hasContradiction = $derived(
|
||||||
|
detail?.events.some((e) => e.type === 'contradiction.detected') ?? false
|
||||||
|
);
|
||||||
|
|
||||||
async function loadRuns() {
|
async function loadRuns() {
|
||||||
try {
|
try {
|
||||||
const res = await api.traces.list(100);
|
const res = await api.traces.list(100);
|
||||||
|
|
@ -69,6 +86,9 @@
|
||||||
try {
|
try {
|
||||||
detail = await api.traces.get(runId);
|
detail = await api.traces.get(runId);
|
||||||
scrubIndex = Math.max(0, (detail.events.length || 1) - 1);
|
scrubIndex = Math.max(0, (detail.events.length || 1) - 1);
|
||||||
|
// Receipts are the proof behind a run's retrievals. The list is
|
||||||
|
// recent-first; the newest typically belong to the just-selected run.
|
||||||
|
receipts = (await api.receipts.list(8)).receipts;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
error = String(e);
|
error = String(e);
|
||||||
detail = null;
|
detail = null;
|
||||||
|
|
@ -291,6 +311,49 @@
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Producer status — honest about what's live vs. off-by-default -->
|
||||||
|
<div class="producers glass" use:reveal>
|
||||||
|
<h3 class="panel-title">Event producers <span class="text-dim">— this run</span></h3>
|
||||||
|
<ul class="producer-list">
|
||||||
|
<li class="producer ok">
|
||||||
|
<span class="p-dot"></span> mcp.call · memory.write · memory.retrieve · memory.suppress
|
||||||
|
<span class="p-state">live</span>
|
||||||
|
</li>
|
||||||
|
<li class="producer" class:ok={hasContradiction}>
|
||||||
|
<span class="p-dot"></span> contradiction.detected
|
||||||
|
<span class="p-state">
|
||||||
|
{hasContradiction ? 'fired this run' : 'no contradiction in this run'}
|
||||||
|
</span>
|
||||||
|
</li>
|
||||||
|
<li class="producer caveat" class:ok={hasDream}>
|
||||||
|
<span class="p-dot"></span> dream.patch
|
||||||
|
<span class="p-state">
|
||||||
|
{hasDream ? 'fired this run' : 'No dream run in this trace'}
|
||||||
|
</span>
|
||||||
|
</li>
|
||||||
|
<li class="producer caveat" class:ok={hasVeto}>
|
||||||
|
<span class="p-dot"></span> sanhedrin.veto
|
||||||
|
<span class="p-state">
|
||||||
|
{hasVeto ? 'fired this run' : 'No veto producer connected (optional Sanhedrin hook, off by default)'}
|
||||||
|
</span>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Receipts — the nutrition label behind this run's retrievals -->
|
||||||
|
{#if receipts.length}
|
||||||
|
<div class="receipts-panel glass" use:reveal>
|
||||||
|
<h3 class="panel-title">
|
||||||
|
Receipts <span class="text-dim">— proof behind retrievals</span>
|
||||||
|
</h3>
|
||||||
|
<div class="receipts-grid">
|
||||||
|
{#each receipts.slice(0, 2) as r (r.receipt_id)}
|
||||||
|
<ReceiptCard receipt={r} />
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
<!-- Full event log -->
|
<!-- Full event log -->
|
||||||
<div class="log glass" use:reveal>
|
<div class="log glass" use:reveal>
|
||||||
<h3 class="panel-title">Event log</h3>
|
<h3 class="panel-title">Event log</h3>
|
||||||
|
|
@ -710,6 +773,64 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Receipts panel */
|
||||||
|
.receipts-panel {
|
||||||
|
padding: 16px 18px;
|
||||||
|
}
|
||||||
|
.receipts-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
|
||||||
|
gap: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Producers — honest event-source status */
|
||||||
|
.producers {
|
||||||
|
padding: 16px 18px;
|
||||||
|
}
|
||||||
|
.producer-list {
|
||||||
|
list-style: none;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 7px;
|
||||||
|
}
|
||||||
|
.producer {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 9px;
|
||||||
|
font-size: 0.78rem;
|
||||||
|
color: var(--color-text-dim, #8b8ba7);
|
||||||
|
}
|
||||||
|
.producer .p-dot {
|
||||||
|
width: 8px;
|
||||||
|
height: 8px;
|
||||||
|
border-radius: 50%;
|
||||||
|
background: #475569;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
.producer.ok {
|
||||||
|
color: var(--color-text, #e2e2f0);
|
||||||
|
}
|
||||||
|
.producer.ok .p-dot {
|
||||||
|
background: var(--color-recall, #10b981);
|
||||||
|
box-shadow: 0 0 6px -1px var(--color-recall, #10b981);
|
||||||
|
}
|
||||||
|
.producer.caveat:not(.ok) .p-dot {
|
||||||
|
background: #f59e0b;
|
||||||
|
opacity: 0.6;
|
||||||
|
}
|
||||||
|
.p-state {
|
||||||
|
margin-left: auto;
|
||||||
|
font-size: 0.7rem;
|
||||||
|
font-style: italic;
|
||||||
|
text-align: right;
|
||||||
|
color: var(--color-text-dim, #8b8ba7);
|
||||||
|
}
|
||||||
|
.producer.caveat:not(.ok) .p-state {
|
||||||
|
color: #f59e0b;
|
||||||
|
}
|
||||||
|
|
||||||
/* Log */
|
/* Log */
|
||||||
.log {
|
.log {
|
||||||
padding: 16px 18px;
|
padding: 16px 18px;
|
||||||
|
|
|
||||||
|
|
@ -181,6 +181,10 @@
|
||||||
<div class="manifesto" use:reveal>
|
<div class="manifesto" use:reveal>
|
||||||
Vestige <strong>auto-remembers ordinary context</strong>, but opens a
|
Vestige <strong>auto-remembers ordinary context</strong>, but opens a
|
||||||
<strong>Memory PR</strong> when the agent tries to <strong>rewrite its own brain</strong>.
|
<strong>Memory PR</strong> when the agent tries to <strong>rewrite its own brain</strong>.
|
||||||
|
<span class="manifesto-note">
|
||||||
|
Risky writes are <strong>quarantine-reviewed</strong>: recorded for audit, but held
|
||||||
|
out of retrieval until you decide — influence suspended, history preserved.
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- ░░ MODE TOGGLE ░░ -->
|
<!-- ░░ MODE TOGGLE ░░ -->
|
||||||
|
|
@ -374,6 +378,16 @@
|
||||||
.manifesto strong {
|
.manifesto strong {
|
||||||
color: var(--color-synapse-glow, #818cf8);
|
color: var(--color-synapse-glow, #818cf8);
|
||||||
}
|
}
|
||||||
|
.manifesto-note {
|
||||||
|
display: block;
|
||||||
|
margin-top: 8px;
|
||||||
|
font-size: 0.82rem;
|
||||||
|
line-height: 1.5;
|
||||||
|
color: var(--color-text-dim, #c0c0d8);
|
||||||
|
}
|
||||||
|
.manifesto-note strong {
|
||||||
|
color: #f59e0b;
|
||||||
|
}
|
||||||
|
|
||||||
.glass {
|
.glass {
|
||||||
background: color-mix(in oklab, var(--color-void, #050510) 55%, transparent);
|
background: color-mix(in oklab, var(--color-void, #050510) 55%, transparent);
|
||||||
|
|
|
||||||
69
blackbox-proof-2026-06-22/PROOF.md
Normal file
69
blackbox-proof-2026-06-22/PROOF.md
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
# Vestige Agent Black Box — Proof Pack (2026-06-22)
|
||||||
|
|
||||||
|
> **Public claim (frozen):** Vestige records real MCP memory activity into a
|
||||||
|
> replayable local trace, with receipts and reviewable risky writes.
|
||||||
|
>
|
||||||
|
> We do **not** claim Sanhedrin vetoes or dream patches are live by default.
|
||||||
|
> Those producers are optional and off by default — the UI says so explicitly.
|
||||||
|
|
||||||
|
This pack is captured from a **live** Vestige build on branch
|
||||||
|
`feat/agent-black-box` — a real `vestige-mcp` process with the dashboard
|
||||||
|
enabled, driven by real MCP `tools/call` traffic. Nothing here is mocked.
|
||||||
|
|
||||||
|
## The receipt chain — one runId, every hop
|
||||||
|
|
||||||
|
The money guarantee: a single `runId` (`run_proof`) crosses every layer,
|
||||||
|
byte-identical. Verified two ways — by the files in this folder, and by the
|
||||||
|
deterministic regression test `test_full_spine_one_runid_crosses_every_hop`
|
||||||
|
(crates/vestige-mcp/src/server.rs).
|
||||||
|
|
||||||
|
| Hop | Layer | Evidence in this pack |
|
||||||
|
|----|-------|------|
|
||||||
|
| 1 | MCP tool output (`runId` + `traceUri`) | every tool result; see test HOP 1 |
|
||||||
|
| 2 | SQLite `agent_traces` rows | `trace.json` (`runId: run_proof`, 10 events) |
|
||||||
|
| 3 | WebSocket broadcast | `websocket-events.jsonl` (6 `TraceEvent` lines, each with `run_id`) |
|
||||||
|
| 4 | `/api/traces/:runId` response | `trace.json` is the export of that endpoint |
|
||||||
|
| 5 | dashboard render | screenshots (Black Box timeline = the 10 events) |
|
||||||
|
| 6 | `vestige://trace/{runId}` MCP resource | test HOP 5 resolves the same id |
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | What it proves |
|
||||||
|
|------|----------------|
|
||||||
|
| `status.json` | the live server health at capture time |
|
||||||
|
| `trace.json` | the full `.vestige-trace.json` export — 10 real events in order |
|
||||||
|
| `receipt.json` | a real retrieval receipt (`r_2026_06_22_runproof`, 5 retrieved, decay medium) |
|
||||||
|
| `memory_pr.json` | the risky auth write → Memory PR, **promoted** through UI→API→SQLite, signal `sensitive_topic` |
|
||||||
|
| `websocket-events.jsonl` | the live WS stream: `TraceEvent`×6, `MemoryPrOpened`, `MemoryPrDecided`, `MemoryCreated`, `MemoryUpdated` |
|
||||||
|
| `screenshots/` | Graph, Black Box, Receipts (in PR), Memory PRs — see `screenshots/README.md` |
|
||||||
|
|
||||||
|
## Per-feature honesty: real / caveat / stub
|
||||||
|
|
||||||
|
| Feature | Status | Notes |
|
||||||
|
|---------|--------|-------|
|
||||||
|
| `mcp.call` trace | **REAL** | every tools/call records one; args **hashed**, never stored raw |
|
||||||
|
| `memory.write` trace | **REAL** | fires on smart_ingest/ingest |
|
||||||
|
| `memory.retrieve` trace | **REAL** | fires on deep_reference/search, with per-id activation |
|
||||||
|
| `memory.suppress` trace | **REAL** | recorded path; fires when retrieval suppresses |
|
||||||
|
| `contradiction.detected` trace | **REAL** | fires when deep_reference surfaces a contradiction pair; UI says "no contradiction in this run" when none |
|
||||||
|
| Memory Receipts | **REAL** | built from real scored memories + trust, persisted, attached to output |
|
||||||
|
| Risk-gated Memory PRs | **REAL** | quarantine review: commit-then-suppress, audit preserved, influence suspended. Promote verified end-to-end |
|
||||||
|
| Fast / Risk-Gated / Paranoid modes | **REAL** | persisted to `<data_dir>/review_mode.json`; Risk-Gated is the default |
|
||||||
|
| WebSocket broadcast | **REAL** | proven by `websocket-events.jsonl` + a unit test |
|
||||||
|
| `vestige://trace/{runId}` resource | **REAL** | proven by the full-spine test |
|
||||||
|
| `sanhedrin.veto` trace | **CAVEAT** | extraction code is real + unit-tested, but the Sanhedrin verifier is an optional hook, **off by default** — no producer is connected, and the UI says exactly that |
|
||||||
|
| `dream.patch` trace | **CAVEAT** | extraction is real; fires only when a dream run actually executes — the UI says "No dream run in this trace" otherwise |
|
||||||
|
| Graph-pulse "Open receipt in Cinema" | **REAL (deep-link)** | navigates the graph centered on the receipt's primary memory; MemoryCinema itself is unchanged |
|
||||||
|
|
||||||
|
No feature is stubbed. The two CAVEATs are real plumbing whose upstream
|
||||||
|
producer is intentionally off by default — surfaced as explicit UI states, not
|
||||||
|
empty mystery.
|
||||||
|
|
||||||
|
## Reproduce
|
||||||
|
|
||||||
|
1. `VESTIGE_DATA_DIR=<tmp> VESTIGE_DASHBOARD_ENABLED=true vestige-mcp` (stdio).
|
||||||
|
2. `initialize`, then drive `smart_ingest` / `deep_reference` calls with a
|
||||||
|
`runId` argument.
|
||||||
|
3. A sensitive-topic write (auth/security/money/identity/…) opens a Memory PR.
|
||||||
|
4. `curl /api/traces/<runId>/export` → the `.vestige-trace.json`.
|
||||||
|
5. `cargo test -p vestige-mcp test_full_spine_one_runid_crosses_every_hop`.
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
{"mode":"risk_gated","pendingCount":0,"prs":[{"created_at":"2026-06-22T21:54:57.994466+00:00","decided_at":"2026-06-22T21:58:46.702516+00:00","decision":"promote","diff":{"decision":"create","node":{"content":"Remember the production auth token and security credential for deployment.","id":"8b9fa8f6-833d-41dc-8520-98b0d031d55c","nodeType":"fact","tags":["security","auth"]}},"id":"pr_dee9244bc0c4419fad61f6c6d2f95f15","kind":"new_fact","run_id":"run_proof_session","signals":[{"code":"sensitive_topic","detail":"Touches a sensitive topic: authentication / authorization."}],"status":"promoted","subject_id":"8b9fa8f6-833d-41dc-8520-98b0d031d55c","title":"New fact pending review: \"Remember the production auth token and security credential for deployment.\""}],"total":1}
|
|
||||||
29
blackbox-proof-2026-06-22/memory_pr.json
Normal file
29
blackbox-proof-2026-06-22/memory_pr.json
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
{
|
||||||
|
"created_at": "2026-06-22T22:29:39.100921+00:00",
|
||||||
|
"decided_at": "2026-06-22T22:29:42.563122+00:00",
|
||||||
|
"decision": "promote",
|
||||||
|
"diff": {
|
||||||
|
"decision": "create",
|
||||||
|
"node": {
|
||||||
|
"content": "Store the production auth token and security credential for deploys.",
|
||||||
|
"id": "17b8c285-5418-4402-9e63-a92d4ae64eaf",
|
||||||
|
"nodeType": "fact",
|
||||||
|
"tags": [
|
||||||
|
"security",
|
||||||
|
"auth"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "pr_bf0aec4483494713a01e4b0f5c15acb3",
|
||||||
|
"kind": "new_fact",
|
||||||
|
"run_id": "run_proof",
|
||||||
|
"signals": [
|
||||||
|
{
|
||||||
|
"code": "sensitive_topic",
|
||||||
|
"detail": "Touches a sensitive topic: authentication / authorization."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"status": "promoted",
|
||||||
|
"subject_id": "17b8c285-5418-4402-9e63-a92d4ae64eaf",
|
||||||
|
"title": "New fact pending review: \"Store the production auth token and security credential for deploys.\""
|
||||||
|
}
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
{"averageRetention":0.95,"status":"healthy","totalMemories":4,"version":"2.1.27"}
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
{"events":[{"argsHash":"e029f4892d293944","at":1782165290352,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165290478,"diff":{"decision":"create"},"id":"0acd7785-e13a-4df8-ba5e-11e8d82e7590","runId":"run_proof_session","source":"agent","type":"memory.write"},{"argsHash":"2aef447cf4f6744e","at":1782165291860,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165291962,"diff":{"decision":"create"},"id":"cb40ae8c-59a1-4d13-b89f-1333a9357def","runId":"run_proof_session","source":"agent","type":"memory.write"},{"argsHash":"eaefbf6e42cbe187","at":1782165293368,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165293474,"diff":{"decision":"create"},"id":"147bee37-33e4-4287-bd6b-931c23d87f81","runId":"run_proof_session","source":"agent","type":"memory.write"},{"argsHash":"c758f278a36c7bc2","at":1782165294877,"runId":"run_proof_session","tool":"deep_reference","type":"mcp.call"},{"activation":{"0acd7785-e13a-4df8-ba5e-11e8d82e7590":0.62,"147bee37-33e4-4287-bd6b-931c23d87f81":0.62,"cb40ae8c-59a1-4d13-b89f-1333a9357def":0.62},"at":1782165294947,"ids":["0acd7785-e13a-4df8-ba5e-11e8d82e7590","147bee37-33e4-4287-bd6b-931c23d87f81","cb40ae8c-59a1-4d13-b89f-1333a9357def"],"runId":"run_proof_session","type":"memory.retrieve"},{"argsHash":"843ce46664574711","at":1782165296385,"runId":"run_proof_session","tool":"search","type":"mcp.call"},{"activation":{},"at":1782165296434,"ids":["147bee37-33e4-4287-bd6b-931c23d87f81"],"runId":"run_proof_session","type":"memory.retrieve"},{"argsHash":"03587119a4acd377","at":1782165297894,"runId":"run_proof_session","tool":"smart_ingest","type":"mcp.call"},{"at":1782165297993,"diff":{"decision":"create"},"id":"8b9fa8f6-833d-41dc-8520-98b0d031d55c","runId":"run_proof_session","source":"agent","type":"memory.write"}],"exportedAt":"2026-06-22T21:59:04.946635+00:00","format":"vestige-trace","runId":"run_proof_session","summary":{"eventCount":12,"firstTool":"smart_ingest","lastAt":1782165297993,"retrievedCount":4,"startedAt":1782165290352,"suppressedCount":0,"vetoCount":0,"writeCount":4},"version":1}
|
|
||||||
|
|
@ -1,57 +0,0 @@
|
||||||
# Agent Black Box — Proof of Life (2026-06-22)
|
|
||||||
|
|
||||||
> Watch the agent think. Watch memory change. Watch the receipt prove why.
|
|
||||||
|
|
||||||
This folder is the launch artifact + regression evidence for the Agent Black Box,
|
|
||||||
Memory Receipts, and risk-gated Memory PRs, captured from a **live** Vestige
|
|
||||||
build (`feat/agent-black-box`), not mocks.
|
|
||||||
|
|
||||||
## The trace correlation spine (Phase 0) — verified end to end
|
|
||||||
|
|
||||||
A single `runId` (`run_proof_session`) threads, unbroken, through every layer:
|
|
||||||
|
|
||||||
| Hop | Layer | Evidence |
|
|
||||||
|----|-------|----------|
|
|
||||||
| 1 | MCP tool output | every `tools/call` result carries `runId` + `traceUri` (`vestige://trace/{runId}`) |
|
|
||||||
| 2 | SQLite trace rows | 12 `agent_traces` rows persisted under the runId |
|
|
||||||
| 3 | WebSocket | each event broadcast as `VestigeEvent::TraceEvent` |
|
|
||||||
| 4 | dashboard pulse | Black Box tab renders 12 ticks + memory pulse, live |
|
|
||||||
| 5 | `/api/traces/:runId` | see `phase-3-trace.json` |
|
|
||||||
| 6 | `vestige://trace/{runId}` | MCP resource resolves the same run |
|
|
||||||
| 7 | receipt export | `phase-3-trace.json` is the downloadable `.vestige-trace.json` |
|
|
||||||
| 8 | Cinema replay | "Open receipt in Cinema" deep-links the receipt's memory set |
|
|
||||||
|
|
||||||
## What the run did (12 events, in order)
|
|
||||||
|
|
||||||
`mcp.call → memory.write` × 3 ordinary writes (auto-landed),
|
|
||||||
`mcp.call → memory.retrieve` × 2 (deep_reference + search, each left a receipt),
|
|
||||||
`mcp.call → memory.write` × 1 **risky** write (auth/security content).
|
|
||||||
|
|
||||||
## The cognitive immune system fired
|
|
||||||
|
|
||||||
- Mode: **Risk-Gated** (the default).
|
|
||||||
- The 3 ordinary writes **auto-landed** — no friction.
|
|
||||||
- The 1 risky write (auth token / security credential) **opened a Memory PR**
|
|
||||||
with the self-explaining signal `sensitive_topic → "Touches a sensitive
|
|
||||||
topic: authentication / authorization."`
|
|
||||||
- Promoting that PR from the dashboard moved it to `promoted` through the full
|
|
||||||
stack (UI → API → SQLite). See `memory-prs.json`.
|
|
||||||
|
|
||||||
This is the product line, made literal:
|
|
||||||
**Vestige auto-remembers ordinary context, but opens a Memory PR when the agent
|
|
||||||
tries to rewrite its own brain.**
|
|
||||||
|
|
||||||
## Files
|
|
||||||
|
|
||||||
- `phase-1-status.json` — server health (spine alive).
|
|
||||||
- `phase-3-trace.json` — the full `.vestige-trace.json` export (the black box).
|
|
||||||
- `receipts.json` — the retrieval receipt(s) generated this run.
|
|
||||||
- `memory-prs.json` — the Memory PR queue, including the promoted risky write.
|
|
||||||
|
|
||||||
## Gates (all green)
|
|
||||||
|
|
||||||
- `cargo test --workspace` — 953 lib tests pass (incl. the trace-spine
|
|
||||||
integration test driving a real JSON-RPC round-trip).
|
|
||||||
- `cargo clippy --workspace -- -D warnings` — 0 warnings.
|
|
||||||
- `pnpm --filter @vestige/dashboard check` — 0 errors, 0 warnings (905 files).
|
|
||||||
- `pnpm --filter @vestige/dashboard build` — clean.
|
|
||||||
17
blackbox-proof-2026-06-22/receipt.json
Normal file
17
blackbox-proof-2026-06-22/receipt.json
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
{
|
||||||
|
"activation_path": [
|
||||||
|
"SYNTHESIS: \"trace spine\"\n\nPRIMARY FINDING (trust 62%, Jun 22, 2026): Proof sequence: ordinary memory about the trace spine.\nNO CONTRADICTIONS DETECTED. Evidence is consistent.\nOVERALL CONFIDENCE: 93%\n"
|
||||||
|
],
|
||||||
|
"decay_risk": "medium",
|
||||||
|
"mutations": [],
|
||||||
|
"receipt_id": "r_2026_06_22_runproof",
|
||||||
|
"retrieved": [
|
||||||
|
"be5c621b-526d-494b-a517-6977f5bf6044",
|
||||||
|
"03cf3f8a-19a4-4317-acd9-222083d5e5c7",
|
||||||
|
"bf444160-75e6-4d75-b351-c05d5cfd53fc",
|
||||||
|
"17b8c285-5418-4402-9e63-a92d4ae64eaf",
|
||||||
|
"f2548f78-a85b-44c4-9356-4b6d3c0b48f1"
|
||||||
|
],
|
||||||
|
"suppressed": [],
|
||||||
|
"trust_floor": 0.53
|
||||||
|
}
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
{"receipts":[{"activation_path":[],"decay_risk":"high","mutations":[],"receipt_id":"r_2026_06_22_runproof","retrieved":["147bee37-33e4-4287-bd6b-931c23d87f81"],"suppressed":[],"trust_floor":0.0}],"total":1}
|
|
||||||
16
blackbox-proof-2026-06-22/screenshots/README.md
Normal file
16
blackbox-proof-2026-06-22/screenshots/README.md
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
# Proof Pack Screenshots
|
||||||
|
|
||||||
|
Captured with Playwright (`@playwright/test`, headless Chromium, 1440×1700 @2x)
|
||||||
|
from the **live** Vestige dashboard at `http://localhost:5173/dashboard`,
|
||||||
|
proxying to a real `vestige-mcp` server with real trace data.
|
||||||
|
|
||||||
|
| File | Tab | Shows |
|
||||||
|
|------|-----|-------|
|
||||||
|
| `black-box.png` | Black Box | spine header (WebSocket Connected), run picker (`proof`/`proof2`), timeline scrubber + colored ticks, current event detail, memory pulse, **event producers** (with honest `dream.patch`/`sanhedrin.veto` off-by-default states), receipts panel, full event log |
|
||||||
|
| `receipts.png` | Black Box → Receipts | a real `ReceiptCard`: receipt id, retrieved/suppressed/trust-floor, activation path, retrieved ids, "Open receipt in Cinema" |
|
||||||
|
| `memory-prs.png` | Memory PRs | killer line + quarantine-review note, Fast/Risk-Gated/Paranoid modes, status filters, PR rows, cognition diff, "Why this opened" signal (`sensitive_topic`), `Decided: promote` |
|
||||||
|
| `graph.png` | Graph | the live WebGL memory constellation + Memory Cinema button (unchanged) |
|
||||||
|
|
||||||
|
Re-capture: start the dev server (`pnpm --filter @vestige/dashboard dev`),
|
||||||
|
point its `/api` proxy at a running `vestige-mcp` with trace data, then run the
|
||||||
|
capture script (see PROOF.md "Reproduce").
|
||||||
BIN
blackbox-proof-2026-06-22/screenshots/black-box.png
Normal file
BIN
blackbox-proof-2026-06-22/screenshots/black-box.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 578 KiB |
BIN
blackbox-proof-2026-06-22/screenshots/graph.png
Normal file
BIN
blackbox-proof-2026-06-22/screenshots/graph.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 263 KiB |
BIN
blackbox-proof-2026-06-22/screenshots/memory-prs.png
Normal file
BIN
blackbox-proof-2026-06-22/screenshots/memory-prs.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 653 KiB |
BIN
blackbox-proof-2026-06-22/screenshots/receipts.png
Normal file
BIN
blackbox-proof-2026-06-22/screenshots/receipts.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 96 KiB |
1
blackbox-proof-2026-06-22/status.json
Normal file
1
blackbox-proof-2026-06-22/status.json
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"averageRetention":0.9280000000000002,"status":"healthy","totalMemories":5,"version":"2.1.27"}
|
||||||
1
blackbox-proof-2026-06-22/trace.json
Normal file
1
blackbox-proof-2026-06-22/trace.json
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"events":[{"argsHash":"e40fbe42b2ef16a8","at":1782167372948,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167373060,"diff":{"decision":"create"},"id":"bf444160-75e6-4d75-b351-c05d5cfd53fc","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"e2b343d37cf16f91","at":1782167374461,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167374549,"diff":{"decision":"create"},"id":"f2548f78-a85b-44c4-9356-4b6d3c0b48f1","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"2639fbc239e17a3d","at":1782167375975,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167376069,"diff":{"decision":"create"},"id":"03cf3f8a-19a4-4317-acd9-222083d5e5c7","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"6fbbc76c4e98fa50","at":1782167377489,"runId":"run_proof","tool":"deep_reference","type":"mcp.call"},{"activation":{"03cf3f8a-19a4-4317-acd9-222083d5e5c7":0.62,"bf444160-75e6-4d75-b351-c05d5cfd53fc":0.62,"f2548f78-a85b-44c4-9356-4b6d3c0b48f1":0.62},"at":1782167377558,"ids":["bf444160-75e6-4d75-b351-c05d5cfd53fc","03cf3f8a-19a4-4317-acd9-222083d5e5c7","f2548f78-a85b-44c4-9356-4b6d3c0b48f1"],"runId":"run_proof","type":"memory.retrieve"},{"argsHash":"db928bbabc9cadd7","at":1782167379002,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167379100,"diff":{"decision":"create"},"id":"17b8c285-5418-4402-9e63-a92d4ae64eaf","runId":"run_proof","source":"agent","type":"memory.write"}],"exportedAt":"2026-06-22T22:32:47.825129+00:00","format":"vestige-trace","runId":"run_proof","summary":{"eventCount":10,"firstTool":"smart_ingest","lastAt":1782167379100,"retrievedCount":3,"startedAt":1782167372948,"suppressedCount":0,"vetoCount":0,"writeCount":4},"version":1}
|
||||||
11
blackbox-proof-2026-06-22/websocket-events.jsonl
Normal file
11
blackbox-proof-2026-06-22/websocket-events.jsonl
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
{"data": {"timestamp": "2026-06-22T22:30:11.006283+00:00", "version": "2.1.27"}, "type": "Connected"}
|
||||||
|
{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 0, "event": {"type": "mcp.call", "runId": "run_proof2", "tool": "smart_ingest", "argsHash": "24a3d20d96d9ef5e", "at": 1782167412925}, "timestamp": "2026-06-22T22:30:12.925584Z"}}
|
||||||
|
{"type": "MemoryCreated", "data": {"id": "be5c621b-526d-494b-a517-6977f5bf6044", "content_preview": "", "node_type": "fact", "tags": [], "timestamp": "2026-06-22T22:30:13.008685Z"}}
|
||||||
|
{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 1, "event": {"type": "memory.write", "runId": "run_proof2", "id": "be5c621b-526d-494b-a517-6977f5bf6044", "diff": {"decision": "create"}, "source": "agent", "at": 1782167413008}, "timestamp": "2026-06-22T22:30:13.008818Z"}}
|
||||||
|
{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 2, "event": {"type": "mcp.call", "runId": "run_proof2", "tool": "deep_reference", "argsHash": "7b0ad10f7740fb3c", "at": 1782167414438}, "timestamp": "2026-06-22T22:30:14.438973Z"}}
|
||||||
|
{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 3, "event": {"type": "memory.retrieve", "runId": "run_proof2", "ids": ["be5c621b-526d-494b-a517-6977f5bf6044", "03cf3f8a-19a4-4317-acd9-222083d5e5c7", "bf444160-75e6-4d75-b351-c05d5cfd53fc", "17b8c285-5418-4402-9e63-a92d4ae64eaf", "f2548f78-a85b-44c4-9356-4b6d3c0b48f1"], "activation": {"03cf3f8a-19a4-4317-acd9-222083d5e5c7": 0.62, "17b8c285-5418-4402-9e63-a92d4ae64eaf": 0.53, "be5c621b-526d-494b-a517-6977f5bf6044": 0.62, "bf444160-75e6-4d75-b351-c05d5cfd53fc": 0.62, "f2548f78-a85b-44c4-9356-4b6d3c0b48f1": 0.62}, "at": 1782167414496}, "timestamp": "2026-06-22T22:30:14.497019Z"}}
|
||||||
|
{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 4, "event": {"type": "mcp.call", "runId": "run_proof2", "tool": "smart_ingest", "argsHash": "f8eca05a58973dec", "at": 1782167415953}, "timestamp": "2026-06-22T22:30:15.953977Z"}}
|
||||||
|
{"type": "MemoryUpdated", "data": {"id": "17b8c285-5418-4402-9e63-a92d4ae64eaf", "content_preview": "", "field": "update", "timestamp": "2026-06-22T22:30:16.046915Z"}}
|
||||||
|
{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 5, "event": {"type": "memory.write", "runId": "run_proof2", "id": "17b8c285-5418-4402-9e63-a92d4ae64eaf", "diff": {"decision": "update"}, "source": "agent", "at": 1782167416046}, "timestamp": "2026-06-22T22:30:16.047026Z"}}
|
||||||
|
{"type": "MemoryPrOpened", "data": {"id": "pr_f49e0ae776f545cd9a4c502800684078", "kind": "new_fact", "title": "New fact pending review: \"Store the production auth token and security credential for deploys.\n\n[Updated 2\"", "signal_count": 1, "run_id": "run_proof2", "timestamp": "2026-06-22T22:30:16.047396Z"}}
|
||||||
|
{"type": "MemoryPrDecided", "data": {"id": "pr_f49e0ae776f545cd9a4c502800684078", "decision": "promote", "status": "promoted", "timestamp": "2026-06-22T22:30:18.515375Z"}}
|
||||||
|
|
@ -1127,11 +1127,15 @@ impl McpServer {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ================================================================
|
// ================================================================
|
||||||
// RISK-GATED MEMORY PRs (v2.2) — the cognitive immune system
|
// RISK-GATED MEMORY PRs (v2.2) — quarantine review, the cognitive
|
||||||
// Normal writes auto-land; risky writes (contradiction vs high-trust,
|
// immune system. Normal writes auto-land. Risky writes (contradiction
|
||||||
// supersede/forget/merge, sensitive topics, …) are quarantined and a
|
// vs high-trust, supersede/forget/merge, sensitive topics, …) are
|
||||||
// Memory PR is opened. Computed here so the gate stays centralized and
|
// *committed then quarantined*: the row is recorded (audit history
|
||||||
// tools remain untouched.
|
// preserved) but suppressed out of retrieval until a Memory PR is
|
||||||
|
// decided. This is quarantine review, NOT pre-write blocking — the
|
||||||
|
// write happens inside the tool before the gate sees it; we hold its
|
||||||
|
// influence, not its existence. Centralized here so tools stay
|
||||||
|
// untouched.
|
||||||
// ================================================================
|
// ================================================================
|
||||||
let opened_prs = if let Ok(ref content) = result {
|
let opened_prs = if let Ok(ref content) = result {
|
||||||
crate::trace_recorder::gate_writes(
|
crate::trace_recorder::gate_writes(
|
||||||
|
|
@ -1181,7 +1185,7 @@ impl McpServer {
|
||||||
obj.insert(
|
obj.insert(
|
||||||
"memoryPrNotice".to_string(),
|
"memoryPrNotice".to_string(),
|
||||||
serde_json::json!(
|
serde_json::json!(
|
||||||
"Vestige opened a Memory PR — this write touches the agent's own brain and is held for review. See the Memory PRs queue."
|
"Vestige opened a Memory PR (quarantine review): this write was recorded but is held out of retrieval until reviewed — its audit history is preserved while its influence is suspended. See the Memory PRs queue."
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -2507,4 +2511,116 @@ mod tests {
|
||||||
"a read-only tool must never open a Memory PR"
|
"a read-only tool must never open a Memory PR"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// PROOF LOCK: the complete spine in one test. A single runId must cross
|
||||||
|
/// every hop, and the value must be byte-identical at each:
|
||||||
|
/// MCP output → SQLite trace → WebSocket event → API response shape →
|
||||||
|
/// MCP resource.
|
||||||
|
/// If any hop drops or rewrites the runId, this fails. This is the
|
||||||
|
/// "impossible to doubt" guarantee for the receipt chain.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_full_spine_one_runid_crosses_every_hop() {
|
||||||
|
const RUN: &str = "run_full_spine";
|
||||||
|
|
||||||
|
let (storage, _dir) = test_storage().await;
|
||||||
|
let cognitive = Arc::new(Mutex::new(CognitiveEngine::new()));
|
||||||
|
let (event_tx, mut event_rx) = broadcast::channel(256);
|
||||||
|
let mut server = McpServer::new_with_events(storage, cognitive, event_tx);
|
||||||
|
server
|
||||||
|
.handle_request(make_request("initialize", Some(init_params())))
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// ---- HOP 1: MCP tool output carries the runId + trace pointer ----
|
||||||
|
let call = make_request(
|
||||||
|
"tools/call",
|
||||||
|
Some(serde_json::json!({
|
||||||
|
"name": "memory_health",
|
||||||
|
"arguments": { "runId": RUN }
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
let response = server.handle_request(call).await.unwrap();
|
||||||
|
let structured = response.result.expect("tools/call ok")["structuredContent"].clone();
|
||||||
|
assert_eq!(structured["runId"].as_str(), Some(RUN), "HOP 1: tool output runId");
|
||||||
|
assert_eq!(
|
||||||
|
structured["traceUri"].as_str(),
|
||||||
|
Some(&format!("vestige://trace/{RUN}")[..]),
|
||||||
|
"HOP 1: tool output traceUri"
|
||||||
|
);
|
||||||
|
|
||||||
|
// ---- HOP 2: SQLite trace rows persisted under the same runId ----
|
||||||
|
let events = server.storage.get_trace(RUN).unwrap();
|
||||||
|
assert!(!events.is_empty(), "HOP 2: trace rows exist");
|
||||||
|
assert!(
|
||||||
|
events.iter().all(|e| e.run_id() == RUN),
|
||||||
|
"HOP 2: every persisted trace row carries the SAME runId"
|
||||||
|
);
|
||||||
|
|
||||||
|
// ---- HOP 3: WebSocket broadcast carries the same runId ----
|
||||||
|
let mut ws_run: Option<String> = None;
|
||||||
|
while let Ok(ev) = event_rx.try_recv() {
|
||||||
|
if let VestigeEvent::TraceEvent { run_id, .. } = ev {
|
||||||
|
ws_run = Some(run_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
ws_run.as_deref(),
|
||||||
|
Some(RUN),
|
||||||
|
"HOP 3: the broadcast TraceEvent carries the same runId"
|
||||||
|
);
|
||||||
|
|
||||||
|
// ---- HOP 4: API response shape (what the dashboard renders) ----
|
||||||
|
// Exercise the exact handler the dashboard /api/traces/:runId calls by
|
||||||
|
// going through storage the same way, and assert the render-critical
|
||||||
|
// shape: a summary roll-up + an ordered event list, all under runId.
|
||||||
|
let summary = server
|
||||||
|
.storage
|
||||||
|
.get_agent_run(RUN)
|
||||||
|
.unwrap()
|
||||||
|
.expect("HOP 4: run summary the list view renders");
|
||||||
|
assert_eq!(summary.run_id, RUN, "HOP 4: API run summary runId");
|
||||||
|
assert!(summary.event_count >= 1, "HOP 4: event_count rendered in the list");
|
||||||
|
// The detail view renders these events in sequence order.
|
||||||
|
let detail_events = server.storage.get_trace(RUN).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
detail_events.len() as i64,
|
||||||
|
summary.event_count,
|
||||||
|
"HOP 4: detail event count matches the roll-up the list shows"
|
||||||
|
);
|
||||||
|
|
||||||
|
// ---- HOP 5: MCP resource resolves the same runId ----
|
||||||
|
let read = make_request(
|
||||||
|
"resources/read",
|
||||||
|
Some(serde_json::json!({ "uri": format!("vestige://trace/{RUN}") })),
|
||||||
|
);
|
||||||
|
let read_resp = server.handle_request(read).await.unwrap();
|
||||||
|
let text = read_resp.result.expect("resource read ok")["contents"][0]["text"]
|
||||||
|
.as_str()
|
||||||
|
.expect("resource text")
|
||||||
|
.to_string();
|
||||||
|
let parsed: serde_json::Value = serde_json::from_str(&text).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
parsed["runId"].as_str(),
|
||||||
|
Some(RUN),
|
||||||
|
"HOP 5: vestige://trace/{{runId}} resolves the same runId"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
parsed["events"].as_array().map(|a| !a.is_empty()).unwrap_or(false),
|
||||||
|
"HOP 5: the resource returns the run's events"
|
||||||
|
);
|
||||||
|
|
||||||
|
// ---- INVARIANT: one id, every hop, byte-identical ----
|
||||||
|
// Collect the runId as seen at each hop and assert they are all equal.
|
||||||
|
let seen = [
|
||||||
|
structured["runId"].as_str().unwrap().to_string(), // hop 1
|
||||||
|
events[0].run_id().to_string(), // hop 2
|
||||||
|
ws_run.unwrap(), // hop 3
|
||||||
|
summary.run_id, // hop 4
|
||||||
|
parsed["runId"].as_str().unwrap().to_string(), // hop 5
|
||||||
|
];
|
||||||
|
assert!(
|
||||||
|
seen.iter().all(|r| r == RUN),
|
||||||
|
"the SAME runId must appear, unchanged, at every hop: {seen:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue