From 6a0173dc7bd5f265acb2b276d1168f2548fc7e4e Mon Sep 17 00:00:00 2001 From: Sam Valladares Date: Mon, 22 Jun 2026 19:07:32 -0500 Subject: [PATCH] fix(blackbox): C1 unconditional quarantine release + C2 trace destructive writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two more review findings — both real, both blockers — plus stale-doc cleanup. C1: the B1 release used reverse_suppression(subject_id, labile_hours), which REFUSES once the 24h active-forgetting labile window has passed. So a Memory PR reviewed late could be marked "promoted" while its memory stayed suppressed. Approving a quarantined write is an explicit reviewer decision and must release the memory regardless of elapsed time. New SqliteMemoryStore::release_quarantine fully clears the suppression (count→0, suppressed_at→NULL) with NO time-window limit; the PR handler now uses it. Proven: a test backdates suppressed_at to +100h, shows reverse_suppression refuses, and release_quarantine still releases. C2: memory(action="purge"|"delete") returns `action` + nodeId but those labels weren't in is_write_decision, so destructive removal bypassed the memory.write trace and the PR gate. Added purge/purged/delete/deleted/forget/forgotten. Proven live: purging a node now records a second memory.write event ({"decision":"purge"}) under the run. Docs: REVIEW.md de-staled — removed the fixed 140b15f diff-stat / "3 commits" prose (it moved with each fix), listed all commits, added C1/C2 to the findings table, updated the test count. Gates: 1002 lib tests pass (+3 new regressions), clippy -D warnings clean, dashboard check + build clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- blackbox-proof-2026-06-22/REVIEW.md | 18 ++++-- crates/vestige-core/src/storage/sqlite.rs | 55 +++++++++++++++++++ .../vestige-core/src/storage/trace_store.rs | 54 +++++++++++++++++- crates/vestige-mcp/src/dashboard/handlers.rs | 7 ++- crates/vestige-mcp/src/trace_recorder.rs | 21 +++++++ 5 files changed, 143 insertions(+), 12 deletions(-) diff --git a/blackbox-proof-2026-06-22/REVIEW.md b/blackbox-proof-2026-06-22/REVIEW.md index 6cb4662..93d9ec7 100644 --- a/blackbox-proof-2026-06-22/REVIEW.md +++ b/blackbox-proof-2026-06-22/REVIEW.md @@ -105,18 +105,22 @@ Only statuses with a receipt above are credited. Nothing is claimed from memory. ## Review surface (what changed) -3 commits on top of the base, **27 source files, +5830 / -18** (build artifacts -and this proof bundle excluded): +The Black Box work sits in a series of commits on top of the base. To see the +exact, current diff (build artifacts + this proof bundle excluded): ``` -$ git diff --stat 9e92a59..140b15f -- ':!apps/dashboard/build' ':!blackbox-proof-2026-06-22' -27 files changed, 5830 insertions(+), 18 deletions(-) +$ git diff --stat 9e92a59..HEAD -- ':!apps/dashboard/build' ':!blackbox-proof-2026-06-22' +# ~27 source files (Rust + SvelteKit). Run this against the branch tip for the +# live count — it grows as review fixes land. ``` -Commits: +Commits (oldest first): - `80c823a` feat: Agent Black Box + Receipts + risk-gated Memory PRs - `b89beee` proof: Proof Lock — full-spine test, honest UI states, proof pack - `140b15f` proof: dream.patch proven live with a real dream run +- `cadffb4` docs: package the review bundle — REVIEW.md entry point +- `8f7bed0` fix: address review blockers B1–B7 + re-capture proof bundle +- (+ a follow-up fix commit for C1/C2 — see "Review findings addressed") Key files to review: - **Core (pure logic):** `crates/vestige-core/src/trace/{mod,receipt,review}.rs` @@ -164,12 +168,14 @@ found 7 real issues — 4 blockers. All fixed and tested: | B5 | P2 | Black Box receipts panel showed global latest, not the selected run's | `list_receipts_for_run` + `/api/receipts?run=` + page uses `listForRun(runId)` | live: `?run=run_proof` returns only that run; test `receipts_are_listable_per_run_b5` | | B6 | P2 | `SENSITIVE_TOPICS` substring match false-fired ("tokenizer"→token, "author"→auth) | word-boundary matching | tests `sensitive_topic_word_boundary_no_false_positives_b6`, `..._still_catches_real_b6` | | B7 | P3 | `set_review_mode` non-atomic write; export filename used raw `run_id` | `write_atomic` (temp+rename); filename sanitized; static routes declared before dynamic | covered by build + the atomic-write helper's existing use | +| C1 | blocker | B1's release used `reverse_suppression`, which **refuses past the 24h labile window** — a PR promoted late stayed suppressed | new `release_quarantine(id)`: unconditional release (no time limit), used by the PR handler instead | test `release_quarantine_works_past_the_labile_window_c1` (proves reverse_suppression refuses but release_quarantine succeeds at +100h) | +| C2 | blocker | `memory` `purge`/`delete` (destructive removal) bypassed the write-trace + gate | added purge/purged/delete/deleted/forget/forgotten to `is_write_decision` | test `extract_writes_recognizes_destructive_actions_c2` | One earlier (self-)review claim was **withdrawn**: the `/api/memory-prs/mode` vs `/{id}` route order is *not* a functional bug — axum 0.8 / matchit gives static segments priority. Reordered for clarity only. -Net after fixes: **999 lib tests pass, clippy `-D warnings` clean, dashboard +Net after fixes (B1–B7 + C1/C2): **1002 lib tests pass, clippy `-D warnings` clean, dashboard check + build clean.** ## Reproduce (any reviewer, locally) diff --git a/crates/vestige-core/src/storage/sqlite.rs b/crates/vestige-core/src/storage/sqlite.rs index 786295f..05fe6aa 100644 --- a/crates/vestige-core/src/storage/sqlite.rs +++ b/crates/vestige-core/src/storage/sqlite.rs @@ -1785,6 +1785,61 @@ impl SqliteMemoryStore { .ok_or_else(|| StorageError::NotFound(id.to_string())) } + /// Release a memory from quarantine **unconditionally** (no labile-window + /// limit), used when a Memory PR is approved. + /// + /// Unlike [`Self::reverse_suppression`] (which models a time-bounded "undo" + /// of an active-forgetting decision and refuses after the labile window), + /// approving a quarantined risky write is an explicit reviewer decision that + /// must always restore the memory's retrieval influence — even days later. + /// Fully clears the suppression (count → 0, `suppressed_at` → NULL) and + /// restores strengths. A no-op (returns the node) if it isn't suppressed. + pub fn release_quarantine(&self, id: &str) -> Result { + let node = self + .get_node(id)? + .ok_or_else(|| StorageError::NotFound(id.to_string()))?; + + if node.suppression_count == 0 && node.suppressed_at.is_none() { + // Nothing to release — idempotent. + return Ok(node); + } + + { + let writer = self + .writer + .lock() + .map_err(|_| StorageError::Init("Writer lock poisoned".into()))?; + writer.execute( + "UPDATE knowledge_nodes SET + suppression_count = 0, + suppressed_at = NULL, + retrieval_strength = MIN(1.0, retrieval_strength + 0.15), + retention_strength = MIN(1.0, retention_strength + 0.10), + stability = stability * 1.25 + WHERE id = ?1", + params![id], + )?; + } + + let _ = self.log_access(id, "release_quarantine"); + + self.get_node(id)? + .ok_or_else(|| StorageError::NotFound(id.to_string())) + } + + /// Test-only: backdate a node's `suppressed_at` to simulate a suppression + /// that happened long ago (e.g. to verify release works past the labile + /// window). `pub(crate)` so sibling test modules can reach it. + #[cfg(test)] + pub(crate) fn set_suppressed_at_for_test(&self, id: &str, when: DateTime) { + if let Ok(writer) = self.writer.lock() { + let _ = writer.execute( + "UPDATE knowledge_nodes SET suppressed_at = ?1 WHERE id = ?2", + params![when.to_rfc3339(), id], + ); + } + } + /// Count memories currently in a suppressed state (suppression_count > 0). pub fn count_suppressed(&self) -> Result { let reader = self diff --git a/crates/vestige-core/src/storage/trace_store.rs b/crates/vestige-core/src/storage/trace_store.rs index 5ee2258..ee39751 100644 --- a/crates/vestige-core/src/storage/trace_store.rs +++ b/crates/vestige-core/src/storage/trace_store.rs @@ -647,15 +647,63 @@ mod tests { ); // Promote = release. (The action releases_memory() == true; the handler - // calls reverse_suppression on the subject.) + // calls release_quarantine on the subject.) assert!(crate::MemoryPrAction::Promote.releases_memory()); let released = s - .reverse_suppression(&node.id, 24) - .expect("reverse suppression within labile window"); + .release_quarantine(&node.id) + .expect("release quarantine"); assert_eq!( released.suppression_count, 0, "promoting the PR must release the memory — not leave it suppressed" ); + assert!( + released.suppressed_at.is_none(), + "release must clear suppressed_at" + ); + } + + #[test] + fn release_quarantine_works_past_the_labile_window_c1() { + // C1: a PR reviewed LATE (past the 24h active-forgetting labile window) + // must still release the memory. reverse_suppression refuses after the + // window; release_quarantine must not. + let s = store(); + let node = s + .ingest(crate::IngestInput { + content: "Risky write quarantined and reviewed days later.".to_string(), + node_type: "fact".to_string(), + ..Default::default() + }) + .expect("ingest"); + s.suppress_memory(&node.id).expect("suppress"); + + // Backdate suppressed_at to 100h ago — well past any labile window. + s.set_suppressed_at_for_test(&node.id, chrono::Utc::now() - chrono::Duration::hours(100)); + + // reverse_suppression refuses (window expired)... + assert!( + s.reverse_suppression(&node.id, 24).is_err(), + "reverse_suppression must refuse past the labile window" + ); + // ...but release_quarantine still releases it. + let released = s.release_quarantine(&node.id).expect("release past window"); + assert_eq!(released.suppression_count, 0); + assert!(released.suppressed_at.is_none()); + } + + #[test] + fn release_quarantine_is_idempotent_on_unsuppressed() { + let s = store(); + let node = s + .ingest(crate::IngestInput { + content: "Never suppressed.".to_string(), + node_type: "fact".to_string(), + ..Default::default() + }) + .expect("ingest"); + // No-op when not suppressed — must not error. + let same = s.release_quarantine(&node.id).expect("idempotent release"); + assert_eq!(same.suppression_count, 0); } #[test] diff --git a/crates/vestige-mcp/src/dashboard/handlers.rs b/crates/vestige-mcp/src/dashboard/handlers.rs index a7665b9..6bff8f4 100644 --- a/crates/vestige-mcp/src/dashboard/handlers.rs +++ b/crates/vestige-mcp/src/dashboard/handlers.rs @@ -2232,9 +2232,10 @@ pub async fn act_on_memory_pr( if action.releases_memory() && let Some(subject_id) = decided.subject_id.as_deref() { - use vestige_core::neuroscience::active_forgetting::ActiveForgettingSystem; - let labile_hours = ActiveForgettingSystem::new().labile_hours; - match state.storage.reverse_suppression(subject_id, labile_hours) { + // Use the UNCONDITIONAL quarantine release, not reverse_suppression: + // approving a PR must restore the memory even if reviewed days later, + // past the active-forgetting labile window (the C1 fix). + match state.storage.release_quarantine(subject_id) { Ok(node) => { released = true; state.emit(VestigeEvent::MemoryUnsuppressed { diff --git a/crates/vestige-mcp/src/trace_recorder.rs b/crates/vestige-mcp/src/trace_recorder.rs index 860bc66..c49f100 100644 --- a/crates/vestige-mcp/src/trace_recorder.rs +++ b/crates/vestige-mcp/src/trace_recorder.rs @@ -68,6 +68,14 @@ fn is_write_decision(label: &str) -> bool { | "remember_pattern" | "remember_decision" | "remembered" + // C2: destructive removals are brain mutations too — they must + // trace as memory.write and be gateable, not bypass review. + | "purge" + | "purged" + | "delete" + | "deleted" + | "forget" + | "forgotten" ) } @@ -834,6 +842,19 @@ mod tests { assert!(extract_writes(&state).is_empty(), "state is not a write"); } + #[test] + fn extract_writes_recognizes_destructive_actions_c2() { + // C2: purge/delete are brain mutations and must trace + be gateable. + for act in ["purge", "delete"] { + let r = serde_json::json!({ "action": act, "nodeId": "m1", "success": true }); + assert_eq!( + extract_writes(&r), + vec![("m1".into(), act.to_string())], + "{act} must be traced as a write" + ); + } + } + #[test] fn write_tool_set_includes_codebase_b2() { assert!(is_write_tool("codebase"));