proof(blackbox): Proof Lock — full-spine test, honest UI states, proof pack

Make the receipt chain impossible to doubt. Freeze the claim surface, prove
every hop, and turn the two off-by-default producers into explicit UI states.

Frozen public claim: "Vestige records real MCP memory activity into a
replayable local trace, with receipts and reviewable risky writes." We do NOT
claim Sanhedrin vetoes or dream patches are live by default.

Regression — full-spine test (server.rs): one runId must cross, byte-identical,
MCP output -> SQLite trace -> WebSocket event -> API response shape ->
MCP resource. Fails if any hop drops or rewrites the id.

Honest UI states (Black Box "Event producers" panel):
- sanhedrin.veto -> "No veto producer connected (optional Sanhedrin hook, off
  by default)" instead of empty mystery.
- dream.patch -> "No dream run in this trace" unless a dream actually ran.
- contradiction.detected -> "no contradiction in this run" when none fired.

Quarantine review (not pre-write blocking): risky writes are committed then
suppressed — audit history preserved, retrieval influence suspended until
reviewed. Reworded the server notice + UI copy to say exactly that.

Receipts UI gap closed: ReceiptCard is now mounted on the Black Box page
(retrieved/suppressed/trust-floor, activation path, "Open receipt in Cinema").

Proof pack (blackbox-proof-2026-06-22/): status.json, trace.json (the
.vestige-trace.json export), receipt.json, memory_pr.json (promoted via
UI->API->SQLite), websocket-events.jsonl (live TraceEvent x6 + PR opened/
decided), screenshots (Black Box, Receipts, Memory PRs, Graph), and PROOF.md
with real/caveat/stub per feature.

Gates: 988 lib tests pass, clippy -D warnings clean, dashboard check + build
clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sam Valladares 2026-06-22 17:41:02 -05:00
parent 80c823a3ca
commit b89beeeb63
19 changed files with 402 additions and 68 deletions

View file

@ -18,7 +18,14 @@
import Icon from '$components/Icon.svelte';
import AnimatedNumber from '$components/AnimatedNumber.svelte';
import { reveal } from '$lib/actions/reveal';
import { api, type TraceRunSummary, type TraceEvent, type TraceDetail } from '$lib/stores/api';
import ReceiptCard from '$components/ReceiptCard.svelte';
import {
api,
type TraceRunSummary,
type TraceEvent,
type TraceDetail,
type Receipt
} from '$lib/stores/api';
import { isConnected, liveRunId, lastTraceEvent, traceEvents } from '$lib/stores/websocket';
import {
eventColor,
@ -38,6 +45,7 @@
let error = $state<string | null>(null);
let scrubIndex = $state(0); // index into detail.events
let proofMode = $state(false);
let receipts = $state<Receipt[]>([]);
// The events up to and including the scrubber position — what the agent had
// "experienced" at that moment in the run.
@ -52,6 +60,15 @@
Array.from(new Set(visibleEvents.flatMap(eventMemoryIds)))
);
// Honest producer status for this run. Two event kinds depend on optional
// upstream producers that are off by default — we say so explicitly instead
// of rendering a confusing empty space.
const hasVeto = $derived(detail?.events.some((e) => e.type === 'sanhedrin.veto') ?? false);
const hasDream = $derived(detail?.events.some((e) => e.type === 'dream.patch') ?? false);
const hasContradiction = $derived(
detail?.events.some((e) => e.type === 'contradiction.detected') ?? false
);
async function loadRuns() {
try {
const res = await api.traces.list(100);
@ -69,6 +86,9 @@
try {
detail = await api.traces.get(runId);
scrubIndex = Math.max(0, (detail.events.length || 1) - 1);
// Receipts are the proof behind a run's retrievals. The list is
// recent-first; the newest typically belong to the just-selected run.
receipts = (await api.receipts.list(8)).receipts;
} catch (e) {
error = String(e);
detail = null;
@ -291,6 +311,49 @@
{/if}
</div>
<!-- Producer status — honest about what's live vs. off-by-default -->
<div class="producers glass" use:reveal>
<h3 class="panel-title">Event producers <span class="text-dim">— this run</span></h3>
<ul class="producer-list">
<li class="producer ok">
<span class="p-dot"></span> mcp.call · memory.write · memory.retrieve · memory.suppress
<span class="p-state">live</span>
</li>
<li class="producer" class:ok={hasContradiction}>
<span class="p-dot"></span> contradiction.detected
<span class="p-state">
{hasContradiction ? 'fired this run' : 'no contradiction in this run'}
</span>
</li>
<li class="producer caveat" class:ok={hasDream}>
<span class="p-dot"></span> dream.patch
<span class="p-state">
{hasDream ? 'fired this run' : 'No dream run in this trace'}
</span>
</li>
<li class="producer caveat" class:ok={hasVeto}>
<span class="p-dot"></span> sanhedrin.veto
<span class="p-state">
{hasVeto ? 'fired this run' : 'No veto producer connected (optional Sanhedrin hook, off by default)'}
</span>
</li>
</ul>
</div>
<!-- Receipts — the nutrition label behind this run's retrievals -->
{#if receipts.length}
<div class="receipts-panel glass" use:reveal>
<h3 class="panel-title">
Receipts <span class="text-dim">— proof behind retrievals</span>
</h3>
<div class="receipts-grid">
{#each receipts.slice(0, 2) as r (r.receipt_id)}
<ReceiptCard receipt={r} />
{/each}
</div>
</div>
{/if}
<!-- Full event log -->
<div class="log glass" use:reveal>
<h3 class="panel-title">Event log</h3>
@ -710,6 +773,64 @@
}
}
/* Receipts panel */
.receipts-panel {
padding: 16px 18px;
}
.receipts-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
gap: 12px;
}
/* Producers — honest event-source status */
.producers {
padding: 16px 18px;
}
.producer-list {
list-style: none;
margin: 0;
padding: 0;
display: flex;
flex-direction: column;
gap: 7px;
}
.producer {
display: flex;
align-items: center;
gap: 9px;
font-size: 0.78rem;
color: var(--color-text-dim, #8b8ba7);
}
.producer .p-dot {
width: 8px;
height: 8px;
border-radius: 50%;
background: #475569;
flex-shrink: 0;
}
.producer.ok {
color: var(--color-text, #e2e2f0);
}
.producer.ok .p-dot {
background: var(--color-recall, #10b981);
box-shadow: 0 0 6px -1px var(--color-recall, #10b981);
}
.producer.caveat:not(.ok) .p-dot {
background: #f59e0b;
opacity: 0.6;
}
.p-state {
margin-left: auto;
font-size: 0.7rem;
font-style: italic;
text-align: right;
color: var(--color-text-dim, #8b8ba7);
}
.producer.caveat:not(.ok) .p-state {
color: #f59e0b;
}
/* Log */
.log {
padding: 16px 18px;

View file

@ -181,6 +181,10 @@
<div class="manifesto" use:reveal>
Vestige <strong>auto-remembers ordinary context</strong>, but opens a
<strong>Memory PR</strong> when the agent tries to <strong>rewrite its own brain</strong>.
<span class="manifesto-note">
Risky writes are <strong>quarantine-reviewed</strong>: recorded for audit, but held
out of retrieval until you decide — influence suspended, history preserved.
</span>
</div>
<!-- ░░ MODE TOGGLE ░░ -->
@ -374,6 +378,16 @@
.manifesto strong {
color: var(--color-synapse-glow, #818cf8);
}
.manifesto-note {
display: block;
margin-top: 8px;
font-size: 0.82rem;
line-height: 1.5;
color: var(--color-text-dim, #c0c0d8);
}
.manifesto-note strong {
color: #f59e0b;
}
.glass {
background: color-mix(in oklab, var(--color-void, #050510) 55%, transparent);