diff --git a/apps/dashboard/src/lib/stores/api.ts b/apps/dashboard/src/lib/stores/api.ts index cb9d79b..950ec99 100644 --- a/apps/dashboard/src/lib/stores/api.ts +++ b/apps/dashboard/src/lib/stores/api.ts @@ -146,6 +146,11 @@ export const api = { // Memory Receipts (v2.2): the nutrition label for a retrieval. receipts: { list: (limit = 50) => fetcher(`/receipts?limit=${limit}`), + // B5: scope to one run so the Black Box panel shows that run's receipts. + listForRun: (runId: string, limit = 50) => + fetcher( + `/receipts?run=${encodeURIComponent(runId)}&limit=${limit}` + ), get: (receiptId: string) => fetcher(`/receipts/${encodeURIComponent(receiptId)}`) }, diff --git a/apps/dashboard/src/routes/(app)/blackbox/+page.svelte b/apps/dashboard/src/routes/(app)/blackbox/+page.svelte index 45eaced..3187bf8 100644 --- a/apps/dashboard/src/routes/(app)/blackbox/+page.svelte +++ b/apps/dashboard/src/routes/(app)/blackbox/+page.svelte @@ -86,9 +86,9 @@ try { detail = await api.traces.get(runId); scrubIndex = Math.max(0, (detail.events.length || 1) - 1); - // Receipts are the proof behind a run's retrievals. The list is - // recent-first; the newest typically belong to the just-selected run. - receipts = (await api.receipts.list(8)).receipts; + // Receipts are the proof behind THIS run's retrievals — scoped to + // the selected run (B5), not the global latest. + receipts = (await api.receipts.listForRun(runId, 8)).receipts; } catch (e) { error = String(e); detail = null; diff --git a/blackbox-proof-2026-06-22/REVIEW.md b/blackbox-proof-2026-06-22/REVIEW.md index 3fdd632..6cb4662 100644 --- a/blackbox-proof-2026-06-22/REVIEW.md +++ b/blackbox-proof-2026-06-22/REVIEW.md @@ -1,16 +1,20 @@ # Agent Black Box — Review Bundle -**Branch:** `feat/agent-black-box` -**Head:** `140b15f59fd496988ade57792bfc8b9a6acba70c` +**Branch:** `feat/agent-black-box` (head = branch tip) **Base (review against):** `9e92a5999ada37bed9b4820bb25b7748b417411c` (the `feat/dashboard-bleeding-edge` tip this branched from) -**Packaged:** 2026-06-22T22:57:59Z **Status:** feature work **frozen**. No quarantine-constellation work has -started. This branch is ready for a full review before anything else lands. +started. Start here, then read `PROOF.md` (the per-feature real/caveat/stub ledger) and open the screenshots. +> **Update — review findings addressed.** A full multi-agent review found 7 +> real issues (4 blockers). All are fixed and tested; this bundle was +> **re-captured from a single run (`run_proof`)** so trace.json, +> websocket-events.jsonl, and memory_pr.json now all carry the same runId. See +> "Review findings addressed" below. + --- ## Frozen public claim @@ -146,6 +150,28 @@ Key files to review: --- +## Review findings addressed (2026-06-22) + +A full read-only review (multiple parallel agents, both Rust and dashboard) +found 7 real issues — 4 blockers. All fixed and tested: + +| # | Severity | Finding | Fix | Proof | +|---|----------|---------|-----|-------| +| B1 | blocker | Promoting a Memory PR didn't unsuppress the quarantined memory — UI said "promoted" while the memory stayed out of retrieval | `act_on_memory_pr` now calls `reverse_suppression(subject_id)` on accept actions (promote/merge/supersede); `MemoryPrAction::releases_memory()` encodes the rule | live: PR response `subjectReleased: true`, SQLite `suppression_count: 0`; tests `promote_releases_a_quarantined_memory_end_to_end`, `only_accept_actions_release_the_memory` | +| B2 | blocker | memory promote/demote (returns `action`, not `decision`) and `codebase` writes bypassed the write-trace + gate | `extract_writes` reads `action` too, filtered by `is_write_decision`; `is_write_tool` includes `codebase` | tests `extract_writes_recognizes_action_shape_b2`, `extract_writes_ignores_read_actions_b2`, `write_tool_set_includes_codebase_b2` | +| B3 | blocker | Receipt ids collided within a run (`r__` + `INSERT OR REPLACE`) — later receipt overwrote earlier | id is now `r___` | live: two receipts in `run_proof` have distinct ids; test `receipt_ids_unique_within_a_run_b3` | +| B4 | blocker | Proof bundle mis-assembled: `trace.json`=`run_proof` but `websocket-events.jsonl`=`run_proof2` | re-captured the whole bundle from one run | all artifacts now carry `run_proof` (verified) | +| B5 | P2 | Black Box receipts panel showed global latest, not the selected run's | `list_receipts_for_run` + `/api/receipts?run=` + page uses `listForRun(runId)` | live: `?run=run_proof` returns only that run; test `receipts_are_listable_per_run_b5` | +| B6 | P2 | `SENSITIVE_TOPICS` substring match false-fired ("tokenizer"→token, "author"→auth) | word-boundary matching | tests `sensitive_topic_word_boundary_no_false_positives_b6`, `..._still_catches_real_b6` | +| B7 | P3 | `set_review_mode` non-atomic write; export filename used raw `run_id` | `write_atomic` (temp+rename); filename sanitized; static routes declared before dynamic | covered by build + the atomic-write helper's existing use | + +One earlier (self-)review claim was **withdrawn**: the `/api/memory-prs/mode` +vs `/{id}` route order is *not* a functional bug — axum 0.8 / matchit gives +static segments priority. Reordered for clarity only. + +Net after fixes: **999 lib tests pass, clippy `-D warnings` clean, dashboard +check + build clean.** + ## Reproduce (any reviewer, locally) ```sh diff --git a/blackbox-proof-2026-06-22/memory_pr.json b/blackbox-proof-2026-06-22/memory_pr.json index 596bcfa..b03c13b 100644 --- a/blackbox-proof-2026-06-22/memory_pr.json +++ b/blackbox-proof-2026-06-22/memory_pr.json @@ -1,12 +1,12 @@ { - "created_at": "2026-06-22T22:29:39.100921+00:00", - "decided_at": "2026-06-22T22:29:42.563122+00:00", + "created_at": "2026-06-22T23:39:30.596744+00:00", + "decided_at": "2026-06-22T23:39:44.258862+00:00", "decision": "promote", "diff": { "decision": "create", "node": { "content": "Store the production auth token and security credential for deploys.", - "id": "17b8c285-5418-4402-9e63-a92d4ae64eaf", + "id": "e22e83f3-2c18-4e33-93f4-558d91009505", "nodeType": "fact", "tags": [ "security", @@ -14,7 +14,7 @@ ] } }, - "id": "pr_bf0aec4483494713a01e4b0f5c15acb3", + "id": "pr_3c5b4b2852e74f1ab7c325a7e9cb6e1f", "kind": "new_fact", "run_id": "run_proof", "signals": [ @@ -24,6 +24,6 @@ } ], "status": "promoted", - "subject_id": "17b8c285-5418-4402-9e63-a92d4ae64eaf", + "subject_id": "e22e83f3-2c18-4e33-93f4-558d91009505", "title": "New fact pending review: \"Store the production auth token and security credential for deploys.\"" } diff --git a/blackbox-proof-2026-06-22/receipt.json b/blackbox-proof-2026-06-22/receipt.json index aee5329..664fd04 100644 --- a/blackbox-proof-2026-06-22/receipt.json +++ b/blackbox-proof-2026-06-22/receipt.json @@ -1,17 +1,11 @@ { - "activation_path": [ - "SYNTHESIS: \"trace spine\"\n\nPRIMARY FINDING (trust 62%, Jun 22, 2026): Proof sequence: ordinary memory about the trace spine.\nNO CONTRADICTIONS DETECTED. Evidence is consistent.\nOVERALL CONFIDENCE: 93%\n" - ], - "decay_risk": "medium", + "activation_path": [], + "decay_risk": "high", "mutations": [], - "receipt_id": "r_2026_06_22_runproof", + "receipt_id": "r_2026_06_22_runproof_7f144c", "retrieved": [ - "be5c621b-526d-494b-a517-6977f5bf6044", - "03cf3f8a-19a4-4317-acd9-222083d5e5c7", - "bf444160-75e6-4d75-b351-c05d5cfd53fc", - "17b8c285-5418-4402-9e63-a92d4ae64eaf", - "f2548f78-a85b-44c4-9356-4b6d3c0b48f1" + "9d975b31-e4b2-425c-902a-c17fef9dd4cb" ], "suppressed": [], - "trust_floor": 0.53 + "trust_floor": 0.0 } diff --git a/blackbox-proof-2026-06-22/screenshots/black-box.png b/blackbox-proof-2026-06-22/screenshots/black-box.png index 0700273..25d524e 100644 Binary files a/blackbox-proof-2026-06-22/screenshots/black-box.png and b/blackbox-proof-2026-06-22/screenshots/black-box.png differ diff --git a/blackbox-proof-2026-06-22/screenshots/memory-prs.png b/blackbox-proof-2026-06-22/screenshots/memory-prs.png index 48298a2..5d1e54a 100644 Binary files a/blackbox-proof-2026-06-22/screenshots/memory-prs.png and b/blackbox-proof-2026-06-22/screenshots/memory-prs.png differ diff --git a/blackbox-proof-2026-06-22/screenshots/receipts.png b/blackbox-proof-2026-06-22/screenshots/receipts.png index eb84e8f..e443946 100644 Binary files a/blackbox-proof-2026-06-22/screenshots/receipts.png and b/blackbox-proof-2026-06-22/screenshots/receipts.png differ diff --git a/blackbox-proof-2026-06-22/status.json b/blackbox-proof-2026-06-22/status.json index 67ebaad..11e1fbb 100644 --- a/blackbox-proof-2026-06-22/status.json +++ b/blackbox-proof-2026-06-22/status.json @@ -1 +1 @@ -{"averageRetention":0.9280000000000002,"status":"healthy","totalMemories":5,"version":"2.1.27"} \ No newline at end of file +{"averageRetention":0.99,"status":"healthy","totalMemories":4,"version":"2.1.27"} \ No newline at end of file diff --git a/blackbox-proof-2026-06-22/trace.json b/blackbox-proof-2026-06-22/trace.json index d1a8954..604c25b 100644 --- a/blackbox-proof-2026-06-22/trace.json +++ b/blackbox-proof-2026-06-22/trace.json @@ -1 +1 @@ -{"events":[{"argsHash":"e40fbe42b2ef16a8","at":1782167372948,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167373060,"diff":{"decision":"create"},"id":"bf444160-75e6-4d75-b351-c05d5cfd53fc","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"e2b343d37cf16f91","at":1782167374461,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167374549,"diff":{"decision":"create"},"id":"f2548f78-a85b-44c4-9356-4b6d3c0b48f1","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"2639fbc239e17a3d","at":1782167375975,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167376069,"diff":{"decision":"create"},"id":"03cf3f8a-19a4-4317-acd9-222083d5e5c7","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"6fbbc76c4e98fa50","at":1782167377489,"runId":"run_proof","tool":"deep_reference","type":"mcp.call"},{"activation":{"03cf3f8a-19a4-4317-acd9-222083d5e5c7":0.62,"bf444160-75e6-4d75-b351-c05d5cfd53fc":0.62,"f2548f78-a85b-44c4-9356-4b6d3c0b48f1":0.62},"at":1782167377558,"ids":["bf444160-75e6-4d75-b351-c05d5cfd53fc","03cf3f8a-19a4-4317-acd9-222083d5e5c7","f2548f78-a85b-44c4-9356-4b6d3c0b48f1"],"runId":"run_proof","type":"memory.retrieve"},{"argsHash":"db928bbabc9cadd7","at":1782167379002,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782167379100,"diff":{"decision":"create"},"id":"17b8c285-5418-4402-9e63-a92d4ae64eaf","runId":"run_proof","source":"agent","type":"memory.write"}],"exportedAt":"2026-06-22T22:32:47.825129+00:00","format":"vestige-trace","runId":"run_proof","summary":{"eventCount":10,"firstTool":"smart_ingest","lastAt":1782167379100,"retrievedCount":3,"startedAt":1782167372948,"suppressedCount":0,"vetoCount":0,"writeCount":4},"version":1} \ No newline at end of file +{"events":[{"argsHash":"13a481da3e53d0fd","at":1782171564842,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782171564940,"diff":{"decision":"create"},"id":"9d975b31-e4b2-425c-902a-c17fef9dd4cb","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"52d9b2533542a2eb","at":1782171566254,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782171566340,"diff":{"decision":"create"},"id":"bef6710c-a1ee-4cb3-8a33-82aac2fdaee6","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"2639fbc239e17a3d","at":1782171567668,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782171567761,"diff":{"decision":"create"},"id":"923709a5-cc60-4f41-b8b1-ef1a635fe6aa","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"6fbbc76c4e98fa50","at":1782171569082,"runId":"run_proof","tool":"deep_reference","type":"mcp.call"},{"activation":{"923709a5-cc60-4f41-b8b1-ef1a635fe6aa":0.62,"9d975b31-e4b2-425c-902a-c17fef9dd4cb":0.62,"bef6710c-a1ee-4cb3-8a33-82aac2fdaee6":0.62},"at":1782171569148,"ids":["9d975b31-e4b2-425c-902a-c17fef9dd4cb","923709a5-cc60-4f41-b8b1-ef1a635fe6aa","bef6710c-a1ee-4cb3-8a33-82aac2fdaee6"],"runId":"run_proof","type":"memory.retrieve"},{"argsHash":"db928bbabc9cadd7","at":1782171570495,"runId":"run_proof","tool":"smart_ingest","type":"mcp.call"},{"at":1782171570596,"diff":{"decision":"create"},"id":"e22e83f3-2c18-4e33-93f4-558d91009505","runId":"run_proof","source":"agent","type":"memory.write"},{"argsHash":"78a1e9038e3e5136","at":1782171606233,"runId":"run_proof","tool":"deep_reference","type":"mcp.call"},{"activation":{"923709a5-cc60-4f41-b8b1-ef1a635fe6aa":0.62,"9d975b31-e4b2-425c-902a-c17fef9dd4cb":0.62,"bef6710c-a1ee-4cb3-8a33-82aac2fdaee6":0.62,"e22e83f3-2c18-4e33-93f4-558d91009505":0.57},"at":1782171606293,"ids":["bef6710c-a1ee-4cb3-8a33-82aac2fdaee6","e22e83f3-2c18-4e33-93f4-558d91009505","9d975b31-e4b2-425c-902a-c17fef9dd4cb","923709a5-cc60-4f41-b8b1-ef1a635fe6aa"],"runId":"run_proof","type":"memory.retrieve"},{"argsHash":"13a31297fe007a2e","at":1782171625380,"runId":"run_proof","tool":"deep_reference","type":"mcp.call"},{"activation":{"923709a5-cc60-4f41-b8b1-ef1a635fe6aa":0.62,"9d975b31-e4b2-425c-902a-c17fef9dd4cb":0.62,"bef6710c-a1ee-4cb3-8a33-82aac2fdaee6":0.62,"e22e83f3-2c18-4e33-93f4-558d91009505":0.58},"at":1782171625436,"ids":["923709a5-cc60-4f41-b8b1-ef1a635fe6aa","e22e83f3-2c18-4e33-93f4-558d91009505","9d975b31-e4b2-425c-902a-c17fef9dd4cb","bef6710c-a1ee-4cb3-8a33-82aac2fdaee6"],"runId":"run_proof","type":"memory.retrieve"},{"argsHash":"ac19c646baf0673d","at":1782171626392,"runId":"run_proof","tool":"search","type":"mcp.call"},{"activation":{},"at":1782171626402,"ids":["9d975b31-e4b2-425c-902a-c17fef9dd4cb"],"runId":"run_proof","type":"memory.retrieve"}],"exportedAt":"2026-06-22T23:42:58.560420+00:00","format":"vestige-trace","runId":"run_proof","summary":{"eventCount":16,"firstTool":"smart_ingest","lastAt":1782171626402,"retrievedCount":12,"startedAt":1782171564842,"suppressedCount":0,"vetoCount":0,"writeCount":4},"version":1} \ No newline at end of file diff --git a/blackbox-proof-2026-06-22/websocket-events.jsonl b/blackbox-proof-2026-06-22/websocket-events.jsonl index 7b774a0..5c003a7 100644 --- a/blackbox-proof-2026-06-22/websocket-events.jsonl +++ b/blackbox-proof-2026-06-22/websocket-events.jsonl @@ -1,11 +1,7 @@ -{"data": {"timestamp": "2026-06-22T22:30:11.006283+00:00", "version": "2.1.27"}, "type": "Connected"} -{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 0, "event": {"type": "mcp.call", "runId": "run_proof2", "tool": "smart_ingest", "argsHash": "24a3d20d96d9ef5e", "at": 1782167412925}, "timestamp": "2026-06-22T22:30:12.925584Z"}} -{"type": "MemoryCreated", "data": {"id": "be5c621b-526d-494b-a517-6977f5bf6044", "content_preview": "", "node_type": "fact", "tags": [], "timestamp": "2026-06-22T22:30:13.008685Z"}} -{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 1, "event": {"type": "memory.write", "runId": "run_proof2", "id": "be5c621b-526d-494b-a517-6977f5bf6044", "diff": {"decision": "create"}, "source": "agent", "at": 1782167413008}, "timestamp": "2026-06-22T22:30:13.008818Z"}} -{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 2, "event": {"type": "mcp.call", "runId": "run_proof2", "tool": "deep_reference", "argsHash": "7b0ad10f7740fb3c", "at": 1782167414438}, "timestamp": "2026-06-22T22:30:14.438973Z"}} -{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 3, "event": {"type": "memory.retrieve", "runId": "run_proof2", "ids": ["be5c621b-526d-494b-a517-6977f5bf6044", "03cf3f8a-19a4-4317-acd9-222083d5e5c7", "bf444160-75e6-4d75-b351-c05d5cfd53fc", "17b8c285-5418-4402-9e63-a92d4ae64eaf", "f2548f78-a85b-44c4-9356-4b6d3c0b48f1"], "activation": {"03cf3f8a-19a4-4317-acd9-222083d5e5c7": 0.62, "17b8c285-5418-4402-9e63-a92d4ae64eaf": 0.53, "be5c621b-526d-494b-a517-6977f5bf6044": 0.62, "bf444160-75e6-4d75-b351-c05d5cfd53fc": 0.62, "f2548f78-a85b-44c4-9356-4b6d3c0b48f1": 0.62}, "at": 1782167414496}, "timestamp": "2026-06-22T22:30:14.497019Z"}} -{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 4, "event": {"type": "mcp.call", "runId": "run_proof2", "tool": "smart_ingest", "argsHash": "f8eca05a58973dec", "at": 1782167415953}, "timestamp": "2026-06-22T22:30:15.953977Z"}} -{"type": "MemoryUpdated", "data": {"id": "17b8c285-5418-4402-9e63-a92d4ae64eaf", "content_preview": "", "field": "update", "timestamp": "2026-06-22T22:30:16.046915Z"}} -{"type": "TraceEvent", "data": {"run_id": "run_proof2", "seq": 5, "event": {"type": "memory.write", "runId": "run_proof2", "id": "17b8c285-5418-4402-9e63-a92d4ae64eaf", "diff": {"decision": "update"}, "source": "agent", "at": 1782167416046}, "timestamp": "2026-06-22T22:30:16.047026Z"}} -{"type": "MemoryPrOpened", "data": {"id": "pr_f49e0ae776f545cd9a4c502800684078", "kind": "new_fact", "title": "New fact pending review: \"Store the production auth token and security credential for deploys.\n\n[Updated 2\"", "signal_count": 1, "run_id": "run_proof2", "timestamp": "2026-06-22T22:30:16.047396Z"}} -{"type": "MemoryPrDecided", "data": {"id": "pr_f49e0ae776f545cd9a4c502800684078", "decision": "promote", "status": "promoted", "timestamp": "2026-06-22T22:30:18.515375Z"}} +{"data": {"timestamp": "2026-06-22T23:40:23.469437+00:00", "version": "2.1.27"}, "type": "Connected"} +{"type": "TraceEvent", "data": {"run_id": "run_proof", "seq": 12, "event": {"type": "mcp.call", "runId": "run_proof", "tool": "deep_reference", "argsHash": "13a31297fe007a2e", "at": 1782171625380}, "timestamp": "2026-06-22T23:40:25.381237Z"}} +{"type": "TraceEvent", "data": {"run_id": "run_proof", "seq": 13, "event": {"type": "memory.retrieve", "runId": "run_proof", "ids": ["923709a5-cc60-4f41-b8b1-ef1a635fe6aa", "e22e83f3-2c18-4e33-93f4-558d91009505", "9d975b31-e4b2-425c-902a-c17fef9dd4cb", "bef6710c-a1ee-4cb3-8a33-82aac2fdaee6"], "activation": {"923709a5-cc60-4f41-b8b1-ef1a635fe6aa": 0.62, "9d975b31-e4b2-425c-902a-c17fef9dd4cb": 0.62, "bef6710c-a1ee-4cb3-8a33-82aac2fdaee6": 0.62, "e22e83f3-2c18-4e33-93f4-558d91009505": 0.58}, "at": 1782171625436}, "timestamp": "2026-06-22T23:40:25.436463Z"}} +{"type": "TraceEvent", "data": {"run_id": "run_proof", "seq": 14, "event": {"type": "mcp.call", "runId": "run_proof", "tool": "search", "argsHash": "ac19c646baf0673d", "at": 1782171626392}, "timestamp": "2026-06-22T23:40:26.392602Z"}} +{"type": "SearchPerformed", "data": {"query": "dashboard", "result_count": 1, "result_ids": ["9d975b31-e4b2-425c-902a-c17fef9dd4cb"], "duration_ms": 0, "timestamp": "2026-06-22T23:40:26.402765Z"}} +{"type": "TraceEvent", "data": {"run_id": "run_proof", "seq": 15, "event": {"type": "memory.retrieve", "runId": "run_proof", "ids": ["9d975b31-e4b2-425c-902a-c17fef9dd4cb"], "activation": {}, "at": 1782171626402}, "timestamp": "2026-06-22T23:40:26.402863Z"}} +{"type": "DeepReferenceCompleted", "data": {"query": "Composing: `2be410de-0bc7-4242-b640-8f859b43efe3`, `5107e14e-19ac-4ed0-8add-18e7b2977723`, `3c083fb3-61e1-4aa0-b6a8-0973e0f3bb57`. These connect the real v2.1.27 public baseline, the Black Box review findings, and the older dashboard graph/live-memory evolution.\n\nNever-composed detected: None.\n\nRecommendation: treat this as a major product-generation jump, but harden the receipt semantics before launch footage.\n\nYes. This is a **major step-up** from the current GitHub `v2.1.27` dashboard.\n\nThe cleanest way to say it:\n\n`v2.1.27` dashboard = **inspectable local memory system** \nBlack Box branch = **agent cognition flight recorder + governance layer**\n\nThat is not just \u201cnew tabs.\u201d It changes the product category.\n\nIn `v2.1.27`, the dashboard proves Vestige has local memory, graph visibility, dreams/reasoning surfaces, source-aware connector work, and inspectable state. Strong, but mostly it shows **what exists in memory**.\n\nThis branch shows **what the agent did with memory during a run**:\n\n- every MCP call gets a `runId`\n- retrievals become replayable trace events\n- receipts show what memories influenced the answer\n- risky writes open Memory PRs\n- WebSocket events make the graph/dashboard pulse live\n- traces export as artifacts\n- `vestige://trace/{runId}` turns the trace into an MCP-readable receipt\n\nThat is a different league. It moves Vestige from \u201cmemory dashboard\u201d to **black box recorder for agents**.\n\nMy honest rating:\n\n- UI/product experience: **2-3x more advanced**\n- de", "intent": "Comparison", "status": "partial_evidence", "confidence": 0.52, "primary_id": "923709a5-cc60-4f41-b8b1-ef1a635fe6aa", "supporting_ids": ["923709a5-cc60-4f41-b8b1-ef1a635fe6aa", "9d975b31-e4b2-425c-902a-c17fef9dd4cb", "e22e83f3-2c18-4e33-93f4-558d91009505", "bef6710c-a1ee-4cb3-8a33-82aac2fdaee6"], "contradicting_ids": [], "contradiction_pairs": [], "memories_analyzed": 4, "duration_ms": 980, "timestamp": "2026-06-22T23:42:50.152360Z"}} diff --git a/crates/vestige-core/src/storage/trace_store.rs b/crates/vestige-core/src/storage/trace_store.rs index 92d2db2..5ee2258 100644 --- a/crates/vestige-core/src/storage/trace_store.rs +++ b/crates/vestige-core/src/storage/trace_store.rs @@ -282,6 +282,31 @@ impl SqliteMemoryStore { Ok(out) } + /// List the receipts belonging to one run, newest first (B5). The Black Box + /// receipts panel uses this so the receipts it shows actually belong to the + /// selected run, not the global latest. + pub fn list_receipts_for_run(&self, run_id: &str, limit: usize) -> Result> { + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare( + "SELECT payload FROM memory_receipts WHERE run_id = ?1 + ORDER BY created_at DESC LIMIT ?2", + )?; + let rows = stmt.query_map(params![run_id, limit as i64], |row| { + let p: String = row.get(0)?; + Ok(p) + })?; + let mut out = Vec::new(); + for r in rows { + if let Ok(rc) = serde_json::from_str::(&r?) { + out.push(rc); + } + } + Ok(out) + } + // ======================================================================== // MEMORY PRs — the risk-gated review queue // ======================================================================== @@ -534,6 +559,37 @@ mod tests { assert_eq!(s.list_receipts(10).unwrap().len(), 1); } + #[test] + fn receipts_are_listable_per_run_b5() { + let s = store(); + let mk = |id: &str| Receipt { + receipt_id: id.into(), + retrieved: vec!["m1".into()], + suppressed: vec![], + activation_path: vec![], + trust_floor: 0.9, + decay_risk: DecayRisk::Low, + mutations: vec![], + }; + s.save_receipt(&mk("r_a1"), Some("run_a"), Some("search"), None) + .unwrap(); + s.save_receipt(&mk("r_a2"), Some("run_a"), Some("search"), None) + .unwrap(); + s.save_receipt(&mk("r_b1"), Some("run_b"), Some("search"), None) + .unwrap(); + + let run_a = s.list_receipts_for_run("run_a", 10).unwrap(); + assert_eq!(run_a.len(), 2, "run_a has exactly its 2 receipts"); + assert!(run_a.iter().all(|r| r.receipt_id.starts_with("r_a"))); + + let run_b = s.list_receipts_for_run("run_b", 10).unwrap(); + assert_eq!(run_b.len(), 1, "run_b has only its own receipt"); + assert_eq!(run_b[0].receipt_id, "r_b1"); + + // Global list still sees all three. + assert_eq!(s.list_receipts(10).unwrap().len(), 3); + } + #[test] fn memory_pr_lifecycle() { let s = store(); @@ -569,6 +625,39 @@ mod tests { assert_eq!(s.count_pending_memory_prs().unwrap(), 0); } + #[test] + fn promote_releases_a_quarantined_memory_end_to_end() { + // B1 regression: the full quarantine→release cycle at the storage layer. + // gate_writes suppresses a risky write; an accept action must reverse it. + let s = store(); + let node = s + .ingest(crate::IngestInput { + content: "Risky write that got quarantined.".to_string(), + node_type: "fact".to_string(), + ..Default::default() + }) + .expect("ingest"); + assert_eq!(node.suppression_count, 0, "fresh node not suppressed"); + + // Quarantine it (what gate_writes does for a risky write). + let suppressed = s.suppress_memory(&node.id).expect("suppress"); + assert_eq!( + suppressed.suppression_count, 1, + "quarantined write is suppressed (held out of retrieval)" + ); + + // Promote = release. (The action releases_memory() == true; the handler + // calls reverse_suppression on the subject.) + assert!(crate::MemoryPrAction::Promote.releases_memory()); + let released = s + .reverse_suppression(&node.id, 24) + .expect("reverse suppression within labile window"); + assert_eq!( + released.suppression_count, 0, + "promoting the PR must release the memory — not leave it suppressed" + ); + } + #[test] fn ask_agent_why_is_not_a_decision() { let s = store(); diff --git a/crates/vestige-core/src/trace/receipt.rs b/crates/vestige-core/src/trace/receipt.rs index 7501d14..1ac3fbf 100644 --- a/crates/vestige-core/src/trace/receipt.rs +++ b/crates/vestige-core/src/trace/receipt.rs @@ -59,9 +59,13 @@ pub struct Receipt { impl Receipt { /// Build a receipt from already-computed retrieval signals. /// - /// `receipt_id` is derived from `now` + a short discriminator so it is both - /// human-legible and collision-resistant within a day. `trust_scores` is the - /// per-id FSRS retrievability/trust the pipeline already produced. + /// `receipt_id` is `r___` — human-legible + /// and dated, with a short random suffix so that **multiple retrievals in + /// the same run never collide** (B3). The discriminator (usually the runId) + /// keeps receipts from one run visually grouped; the suffix guarantees + /// uniqueness so `INSERT OR REPLACE` can't overwrite an earlier receipt. + /// `trust_scores` is the per-id FSRS retrievability/trust the pipeline + /// already produced. pub fn build( now: chrono::DateTime, discriminator: &str, @@ -70,6 +74,32 @@ impl Receipt { activation_path: Vec, trust_scores: &[f64], mutations: Vec, + ) -> Self { + Self::build_with_unique( + now, + discriminator, + &uuid::Uuid::new_v4().simple().to_string()[..6], + retrieved, + suppressed, + activation_path, + trust_scores, + mutations, + ) + } + + /// Like [`Receipt::build`] but with a caller-supplied uniqueness token, + /// so the id is fully deterministic for tests. Production uses + /// [`Receipt::build`] which mints a random token. + #[allow(clippy::too_many_arguments)] + pub fn build_with_unique( + now: chrono::DateTime, + discriminator: &str, + unique: &str, + retrieved: Vec, + suppressed: Vec, + activation_path: Vec, + trust_scores: &[f64], + mutations: Vec, ) -> Self { let trust_floor = trust_scores .iter() @@ -87,7 +117,17 @@ impl Receipt { .filter(|c| c.is_ascii_alphanumeric()) .take(8) .collect(); - let receipt_id = format!("r_{}_{}", now.format("%Y_%m_%d"), short); + let unique_clean: String = unique + .chars() + .filter(|c| c.is_ascii_alphanumeric()) + .take(6) + .collect(); + let receipt_id = format!( + "r_{}_{}_{}", + now.format("%Y_%m_%d"), + short, + unique_clean + ); Self { receipt_id, @@ -182,16 +222,31 @@ mod tests { #[test] fn receipt_id_is_human_legible_and_dated() { - let r = Receipt::build( + let r = Receipt::build_with_unique( fixed_now(), "abc123!!", + "u1u2u3", vec!["mem_1".into()], vec![], vec![], &[0.9], vec![], ); - assert_eq!(r.receipt_id, "r_2026_06_22_abc123"); + assert_eq!(r.receipt_id, "r_2026_06_22_abc123_u1u2u3"); + } + + #[test] + fn receipt_ids_unique_within_a_run_b3() { + // B3: two retrievals in the SAME run (same date + discriminator) must + // get DISTINCT ids so INSERT OR REPLACE can't overwrite the first. + let a = Receipt::build(fixed_now(), "run_x", vec![], vec![], vec![], &[], vec![]); + let b = Receipt::build(fixed_now(), "run_x", vec![], vec![], vec![], &[], vec![]); + assert_ne!( + a.receipt_id, b.receipt_id, + "same-run receipts must not collide" + ); + assert!(a.receipt_id.starts_with("r_2026_06_22_runx_")); + assert!(b.receipt_id.starts_with("r_2026_06_22_runx_")); } #[test] diff --git a/crates/vestige-core/src/trace/review.rs b/crates/vestige-core/src/trace/review.rs index 49955f5..7c63f55 100644 --- a/crates/vestige-core/src/trace/review.rs +++ b/crates/vestige-core/src/trace/review.rs @@ -183,6 +183,11 @@ const SENSITIVE_TOPICS: &[(&str, &str)] = &[ ("api key", "credential / API key"), ("security", "security-relevant fact"), ("vuln", "security vulnerability"), + ("vulnerability", "security vulnerability"), + ("credential", "credential material"), + ("credentials", "credential material"), + ("api key", "credential / API key"), + ("apikey", "credential / API key"), // money / bounty / legal ("money", "financial fact"), ("payment", "financial fact"), @@ -345,21 +350,50 @@ fn collect_signals(ctx: &WriteContext) -> Vec { } /// Return the human label of the first sensitive topic found in content/tags. +/// +/// B6: matches on WORD BOUNDARIES, not substrings — so "tokenizer" no longer +/// trips "token", "author" no longer trips "auth", "secretary" no longer trips +/// "secret". Multi-word needles (e.g. "api key") match a consecutive run of +/// words. The text is lowercased and split on any non-alphanumeric char. fn first_sensitive_topic(content: &str, tags: &[String]) -> Option<&'static str> { - let haystack = { - let mut s = content.to_ascii_lowercase(); - for t in tags { - s.push(' '); - s.push_str(&t.to_ascii_lowercase()); + // Tokenize content + tags into lowercased alphanumeric words. + let mut words: Vec = Vec::new(); + let mut push_words = |s: &str| { + for w in s + .to_ascii_lowercase() + .split(|c: char| !c.is_ascii_alphanumeric()) + { + if !w.is_empty() { + words.push(w.to_string()); + } } - s }; + push_words(content); + for t in tags { + push_words(t); + } + SENSITIVE_TOPICS .iter() - .find(|(needle, _)| haystack.contains(needle)) + .find(|(needle, _)| matches_word_sequence(&words, needle)) .map(|(_, label)| *label) } +/// Whether `needle` (one or more space-separated words) appears as a consecutive +/// whole-word run in `words`. +fn matches_word_sequence(words: &[String], needle: &str) -> bool { + let needle_words: Vec<&str> = needle.split_whitespace().collect(); + if needle_words.is_empty() { + return false; + } + if needle_words.len() == 1 { + return words.iter().any(|w| w == needle_words[0]); + } + words + .windows(needle_words.len()) + .any(|win| win.iter().zip(&needle_words).all(|(w, n)| w == n)) +} + // ============================================================================ // MEMORY PR DATA MODEL // ============================================================================ @@ -490,6 +524,21 @@ impl MemoryPrAction { MemoryPrAction::AskAgentWhy => return None, }) } + + /// Whether deciding the PR with this action should **release** the subject + /// memory from quarantine (reverse the suppression that gate_writes applied). + /// + /// A risky write is committed-then-suppressed; approving it must restore its + /// retrieval influence, otherwise the UI says "promoted" while the memory + /// stays held out — the bug this guards against. Accept actions release; + /// `Quarantine` keeps it held; `Forget` rejects it (stays suppressed); + /// `AskAgentWhy` is read-only. + pub fn releases_memory(&self) -> bool { + matches!( + self, + MemoryPrAction::Promote | MemoryPrAction::Merge | MemoryPrAction::Supersede + ) + } } /// A reviewable change to the agent's brain — the persisted Memory PR record. @@ -609,6 +658,55 @@ mod tests { } } + #[test] + fn sensitive_topic_word_boundary_no_false_positives_b6() { + // B6: these ordinary technical writes must NOT gate — they only CONTAIN + // a sensitive substring, they don't USE the sensitive word. + // These each only CONTAIN a sensitive substring; the word-boundary fix + // means they no longer gate. (Note: bare "license"/"contract"/"legal" + // ARE kept as gating words — a license/contract fact is legitimately + // legal-relevant — so they're intentionally not in this benign set.) + for benign in [ + "The tokenizer converts input strings to embeddings.", + "The author of this module is documented in the header.", + "The secretary pattern coordinates the worker pool.", + "Contraction of the array happens during compaction.", + "The authority record links to the canonical node.", + "The authentication-free endpoint is for health checks.", // "authentication" != "auth" + ] { + let mut ctx = ordinary(); + ctx.content = benign.into(); + ctx.node_type = "fact".into(); + ctx.tags = vec![]; + let (class, _) = classify_write(&ctx, ReviewMode::RiskGated); + assert_eq!( + class, + RiskClass::AutoCommit, + "must NOT gate ordinary write: {benign}" + ); + } + } + + #[test] + fn sensitive_topic_word_boundary_still_catches_real_b6() { + // The real sensitive phrasings must still gate. + for risky in [ + "store the auth token for the deploy", + "this is a security vulnerability in the parser", + "the api key for the service", + "remember the user preference for dark mode", + "the bounty payout is configured", + ] { + let mut ctx = ordinary(); + ctx.content = risky.into(); + ctx.node_type = "fact".into(); + ctx.tags = vec![]; + let (class, signals) = classify_write(&ctx, ReviewMode::RiskGated); + assert_eq!(class, RiskClass::Review, "must gate: {risky}"); + assert!(signals.iter().any(|s| s.code == "sensitive_topic")); + } + } + #[test] fn sensitive_node_type_gates() { let mut ctx = ordinary(); @@ -689,4 +787,16 @@ mod tests { ); assert_eq!(MemoryPrAction::AskAgentWhy.resulting_status(), None); } + + #[test] + fn only_accept_actions_release_the_memory() { + // B1: accepting a risky write must release it from quarantine. + assert!(MemoryPrAction::Promote.releases_memory()); + assert!(MemoryPrAction::Merge.releases_memory()); + assert!(MemoryPrAction::Supersede.releases_memory()); + // Rejecting / holding keeps it suppressed. + assert!(!MemoryPrAction::Forget.releases_memory()); + assert!(!MemoryPrAction::Quarantine.releases_memory()); + assert!(!MemoryPrAction::AskAgentWhy.releases_memory()); + } } diff --git a/crates/vestige-mcp/src/dashboard/handlers.rs b/crates/vestige-mcp/src/dashboard/handlers.rs index 0d6af3c..a7665b9 100644 --- a/crates/vestige-mcp/src/dashboard/handlers.rs +++ b/crates/vestige-mcp/src/dashboard/handlers.rs @@ -1990,6 +1990,8 @@ pub async fn deep_reference_query( #[derive(Debug, Deserialize)] pub struct TraceListParams { pub limit: Option, + /// Optional run filter — receipts/traces scoped to one run (B5). + pub run: Option, } /// List recent agent runs (newest activity first) for the Black Box run picker. @@ -2083,6 +2085,18 @@ pub async fn export_trace( })), "events": events, }); + // B7: sanitize the run_id before putting it in the download filename so a + // crafted run_id (quotes, path separators, control chars) can't break the + // Content-Disposition header or the filename. Falls back to "trace". + let safe: String = run_id + .chars() + .map(|c| if c.is_ascii_alphanumeric() || c == '_' || c == '-' { c } else { '_' }) + .collect(); + let safe = if safe.trim_matches('_').is_empty() { + "trace".to_string() + } else { + safe + }; let headers = [ ( axum::http::header::CONTENT_TYPE, @@ -2090,7 +2104,7 @@ pub async fn export_trace( ), ( axum::http::header::CONTENT_DISPOSITION, - format!("attachment; filename=\"{run_id}.vestige-trace.json\""), + format!("attachment; filename=\"{safe}.vestige-trace.json\""), ), ]; Ok((headers, Json(body))) @@ -2106,10 +2120,13 @@ pub async fn list_receipts( Query(params): Query, ) -> Result, StatusCode> { let limit = params.limit.unwrap_or(50).clamp(1, 500); - let receipts = state - .storage - .list_receipts(limit) - .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + // B5: when a run is given, scope to that run's receipts so the Black Box + // panel shows only receipts that actually belong to the selected run. + let receipts = match params.run.as_deref().filter(|r| !r.is_empty()) { + Some(run_id) => state.storage.list_receipts_for_run(run_id, limit), + None => state.storage.list_receipts(limit), + } + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; Ok(Json(serde_json::json!({ "total": receipts.len(), "receipts": receipts, @@ -2207,6 +2224,39 @@ pub async fn act_on_memory_pr( .decide_memory_pr(&id, action) .map_err(|_| StatusCode::NOT_FOUND)?; + // B1: an accept action (promote/merge/supersede) must RELEASE the subject + // memory from quarantine — gate_writes suppressed it, so deciding the PR + // without un-suppressing would leave it "promoted" yet still held out of + // retrieval. Forget/Quarantine intentionally keep it suppressed. + let mut released = false; + if action.releases_memory() + && let Some(subject_id) = decided.subject_id.as_deref() + { + use vestige_core::neuroscience::active_forgetting::ActiveForgettingSystem; + let labile_hours = ActiveForgettingSystem::new().labile_hours; + match state.storage.reverse_suppression(subject_id, labile_hours) { + Ok(node) => { + released = true; + state.emit(VestigeEvent::MemoryUnsuppressed { + id: node.id.clone(), + remaining_count: node.suppression_count, + timestamp: Utc::now(), + }); + } + Err(e) => { + // Best-effort: the PR is decided regardless, but surface the + // failure so a stuck-suppressed memory isn't silent. + tracing::warn!( + "memory PR {} {}d but failed to release subject {}: {}", + id, + action_label(action), + subject_id, + e + ); + } + } + } + state.emit(VestigeEvent::MemoryPrDecided { id: decided.id.clone(), decision: decided @@ -2218,7 +2268,24 @@ pub async fn act_on_memory_pr( timestamp: Utc::now(), }); - Ok(Json(serde_json::to_value(&decided).unwrap_or_default())) + let mut out = serde_json::to_value(&decided).unwrap_or_default(); + if let Some(obj) = out.as_object_mut() { + obj.insert("subjectReleased".to_string(), serde_json::json!(released)); + } + Ok(Json(out)) +} + +/// Short label for a Memory PR action, for log lines. +fn action_label(action: vestige_core::MemoryPrAction) -> &'static str { + use vestige_core::MemoryPrAction::*; + match action { + Promote => "promote", + Merge => "merge", + Supersede => "supersede", + Quarantine => "quarantine", + Forget => "forget", + AskAgentWhy => "ask_agent_why", + } } #[derive(Debug, Deserialize)] @@ -2244,8 +2311,10 @@ pub async fn set_review_mode( let mode = vestige_core::ReviewMode::from_label(&body.mode); let path = review_mode_path(&state); let payload = serde_json::json!({ "mode": mode.as_str() }); - fs::write(&path, serde_json::to_vec_pretty(&payload).unwrap_or_default()) - .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + // B7: atomic write (temp + rename) so a concurrent read can never see a + // partially-written / corrupt review_mode.json, reusing the same helper the + // Sanhedrin receipt path uses. + write_atomic(&path, &serde_json::to_vec_pretty(&payload).unwrap_or_default())?; Ok(Json(serde_json::json!({ "mode": mode.as_str() }))) } diff --git a/crates/vestige-mcp/src/dashboard/mod.rs b/crates/vestige-mcp/src/dashboard/mod.rs index 55c9662..8f0c77f 100644 --- a/crates/vestige-mcp/src/dashboard/mod.rs +++ b/crates/vestige-mcp/src/dashboard/mod.rs @@ -198,13 +198,17 @@ fn build_router_inner(state: AppState, port: u16) -> (Router, AppState) { // MEMORY PRs (v2.2) — risk-gated brain-change review queue // ============================================================ .route("/api/memory-prs", get(handlers::list_memory_prs)) + // Static `/mode` routes declared BEFORE the dynamic `/{id}` route (B7 + // hygiene). axum 0.8/matchit already prioritizes static segments, but + // declaring them first makes the intent unambiguous and guards against + // a future router that doesn't. + .route("/api/memory-prs/mode", get(handlers::get_review_mode)) + .route("/api/memory-prs/mode", post(handlers::set_review_mode)) .route("/api/memory-prs/{id}", get(handlers::get_memory_pr)) .route( "/api/memory-prs/{id}/{action}", post(handlers::act_on_memory_pr), ) - .route("/api/memory-prs/mode", get(handlers::get_review_mode)) - .route("/api/memory-prs/mode", post(handlers::set_review_mode)) .layer( ServiceBuilder::new() .concurrency_limit(50) diff --git a/crates/vestige-mcp/src/trace_recorder.rs b/crates/vestige-mcp/src/trace_recorder.rs index 62a076c..860bc66 100644 --- a/crates/vestige-mcp/src/trace_recorder.rs +++ b/crates/vestige-mcp/src/trace_recorder.rs @@ -29,10 +29,45 @@ use vestige_core::{ }; /// Tools that write to memory and are therefore subject to risk-gated review. +/// +/// Includes `codebase` (its `remember_pattern` / `remember_decision` actions +/// write durable architectural-decision memories) so those brain mutations are +/// traced and gated like any other write (B2). Read-only actions on these tools +/// are filtered out downstream by [`is_write_decision`]. fn is_write_tool(tool: &str) -> bool { matches!( tool, - "smart_ingest" | "ingest" | "session_checkpoint" | "memory" + "smart_ingest" | "ingest" | "session_checkpoint" | "memory" | "codebase" + ) +} + +/// Whether a tool's `decision`/`action` label denotes an actual memory write +/// (vs. a read like `get`/`state`). Used to keep reads out of the write trace. +fn is_write_decision(label: &str) -> bool { + matches!( + label, + "create" + | "created" + | "update" + | "updated" + | "supersede" + | "superseded" + | "reinforce" + | "reinforced" + | "merge" + | "merged" + | "replace" + | "replaced" + | "add_context" + | "edit" + | "edited" + | "promote" + | "promoted" + | "demote" + | "demoted" + | "remember_pattern" + | "remember_decision" + | "remembered" ) } @@ -529,13 +564,21 @@ fn parse_suppress_reason(s: &str) -> SuppressReason { fn extract_writes(result: &Value) -> Vec<(String, String)> { let mut out = Vec::new(); let push = |out: &mut Vec<(String, String)>, item: &Value| { - let decision = item.get("decision").and_then(|v| v.as_str()); + // B2: accept either `decision` (smart_ingest) or `action` + // (memory promote/demote/edit, codebase remember_*). Read-only labels + // (get/state/...) are filtered out so reads never trace as writes. + let label = item + .get("decision") + .or_else(|| item.get("action")) + .and_then(|v| v.as_str()); let id = item .get("nodeId") .or_else(|| item.get("id")) .and_then(|v| v.as_str()); - if let (Some(d), Some(id)) = (decision, id) { - out.push((id.to_string(), d.to_string())); + if let (Some(label), Some(id)) = (label, id) + && is_write_decision(label) + { + out.push((id.to_string(), label.to_string())); } }; push(&mut out, result); @@ -766,4 +809,36 @@ mod tests { }); assert_eq!(extract_writes(&batch), vec![("n2".into(), "update".into())]); } + + #[test] + fn extract_writes_recognizes_action_shape_b2() { + // B2: memory promote/demote return `action` + `nodeId`, not `decision`. + let promoted = serde_json::json!({ "action": "promoted", "nodeId": "m1" }); + assert_eq!(extract_writes(&promoted), vec![("m1".into(), "promoted".into())]); + let demoted = serde_json::json!({ "action": "demoted", "nodeId": "m2" }); + assert_eq!(extract_writes(&demoted), vec![("m2".into(), "demoted".into())]); + // codebase remember_decision returns action + nodeId. + let decision = serde_json::json!({ "action": "remember_decision", "nodeId": "c1" }); + assert_eq!( + extract_writes(&decision), + vec![("c1".into(), "remember_decision".into())] + ); + } + + #[test] + fn extract_writes_ignores_read_actions_b2() { + // A read (memory get / get_batch / state) carries nodeId but is NOT a write. + let read = serde_json::json!({ "action": "get", "nodeId": "m1" }); + assert!(extract_writes(&read).is_empty(), "get is not a write"); + let state = serde_json::json!({ "action": "state", "nodeId": "m2" }); + assert!(extract_writes(&state).is_empty(), "state is not a write"); + } + + #[test] + fn write_tool_set_includes_codebase_b2() { + assert!(is_write_tool("codebase")); + assert!(is_write_tool("memory")); + assert!(!is_write_tool("search")); + assert!(!is_write_tool("deep_reference")); + } }