fix(audit): error-propagation, races, dedup, decay bugs (swarm, moderate tier 2)

All verified against real code: - github fetch_updated: propagate fetch_comments errors instead of unwrap_or_default() (silent failure stored a comment-less record with a corrupted hash AND advanced the cursor past it = permanent gap) - relationships load_relationships: advance next_id past loaded rel-N ids so new_id() can't collide with persisted ids - speculative store_pending_predictions: merge instead of clear (two predict() calls without an intervening record_usage() wiped the first batch's accounting) - embeddings/code is_comment_only: stop treating leading '#'/'*' as comments (was deleting #include, #define, *ptr, CSS '*', multiplication) - importance_signals: reconcile total_count to the surviving pattern sum after decay/prune (was inflated, driving novelty toward zero over time) - importance on_retrieved_with_context: set context in the SAME critical section as the event push via record_retrieval() (two-lock approach raced; last_mut() could land on a concurrently-pushed event) core 535/0, clippy clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-02 22:01:01 +02:00 · 2026-06-28 14:15:04 -05:00 · 2026-06-28 14:15:04 -05:00 · dcd536ee86
commit dcd536ee86
parent 70dae339fb
6 changed files with 46 additions and 17 deletions
--- a/crates/vestige-core/src/advanced/importance.rs
+++ b/crates/vestige-core/src/advanced/importance.rs
@ -183,6 +183,14 @@ impl ImportanceTracker {

    /// Update importance when a memory is retrieved
    pub fn on_retrieved(&self, memory_id: &str, was_helpful: bool) {
+        self.record_retrieval(memory_id, was_helpful, None);
+    }
+
+    /// Push a usage event (with its context already populated) and update the
+    /// importance score. Context is set in the SAME critical section as the push
+    /// so a concurrent on_retrieved cannot slip an event in between and steal the
+    /// context via last_mut() (the previous two-lock approach raced).
+    fn record_retrieval(&self, memory_id: &str, was_helpful: bool, context: Option<String>) {
        let now = Utc::now();

        // Record the event
@ -190,7 +198,7 @@ impl ImportanceTracker {
            events.push(UsageEvent {
                memory_id: memory_id.to_string(),
                was_helpful,
-                context: None,
+                context,
                timestamp: now,
            });

@ -227,15 +235,7 @@ impl ImportanceTracker {

    /// Update importance with additional context
    pub fn on_retrieved_with_context(&self, memory_id: &str, was_helpful: bool, context: &str) {
-        self.on_retrieved(memory_id, was_helpful);
-
-        // Store context with event
-        if let Ok(mut events) = self.recent_events.write()
-            && let Some(event) = events.last_mut()
-            && event.memory_id == memory_id
-        {
-            event.context = Some(context.to_string());
-        }
+        self.record_retrieval(memory_id, was_helpful, Some(context.to_string()));
    }

    /// Apply importance decay to all memories
--- a/crates/vestige-core/src/advanced/speculative.rs
+++ b/crates/vestige-core/src/advanced/speculative.rs
@ -476,7 +476,11 @@ impl SpeculativeRetriever {

    fn store_pending_predictions(&self, predictions: &[PredictedMemory]) {
        if let Ok(mut pending) = self.pending_predictions.write() {
-            pending.clear();
+            // Merge (do NOT clear): two predict() calls without an intervening
+            // record_usage() would otherwise wipe the first batch's pending
+            // entries, destroying the right/wrong accounting record_usage relies
+            // on. Newer predictions overwrite same-id entries; older survive
+            // until consumed.
            for pred in predictions {
                pending.insert(pred.memory_id.clone(), pred.clone());
            }
--- a/crates/vestige-core/src/codebase/relationships.rs
+++ b/crates/vestige-core/src/codebase/relationships.rs
@ -567,6 +567,16 @@ impl RelationshipTracker {
    /// Load relationships from storage
    pub fn load_relationships(&mut self, relationships: Vec<FileRelationship>) -> Result<()> {
        for relationship in relationships {
+            // Advance next_id past any loaded "rel-N" id so a later new_id() can't
+            // collide with a persisted one (next_id starts at 1 and is otherwise
+            // never reconciled with loaded data).
+            if let Some(n) = relationship
+                .id
+                .strip_prefix("rel-")
+                .and_then(|s| s.parse::<u32>().ok())
+            {
+                self.next_id = self.next_id.max(n + 1);
+            }
            self.add_relationship(relationship)?;
        }
        Ok(())
--- a/crates/vestige-core/src/connectors/github.rs
+++ b/crates/vestige-core/src/connectors/github.rs
@ -381,9 +381,13 @@ impl Connector for GithubConnector {
            if issue.pull_request.is_some() {
                continue;
            }
-            // Fetch comments only when the issue has any.
+            // Fetch comments only when the issue has any. Propagate failures
+            // instead of swallowing them: a silent unwrap_or_default() stored a
+            // comment-less record with a corrupted content hash AND let the
+            // cursor advance past it, so the issue would never be re-synced. A
+            // propagated error keeps the issue in the next window (cursor clamp).
            let comments = if issue.comments > 0 {
-                self.fetch_comments(issue.number).await.unwrap_or_default()
+                self.fetch_comments(issue.number).await?
            } else {
                Vec::new()
            };
--- a/crates/vestige-core/src/embeddings/code.rs
+++ b/crates/vestige-core/src/embeddings/code.rs
@ -85,13 +85,21 @@ impl CodeEmbedding {
        lines.join(" ")
    }

-    /// Check if a line is only a comment
+    /// Check if a line is only a comment.
+    ///
+    /// Conservative on purpose: the previous version treated any line starting
+    /// with `#` or `*` as a comment, which deleted real code — C preprocessor
+    /// directives (`#include`, `#define`), pointer derefs / multiplication
+    /// (`*ptr = 5;`), CSS `* { ... }`, etc. We only strip unambiguous comment
+    /// markers; a leading `*` counts only as a block-comment continuation
+    /// (`* ...`), never a bare `*expr`.
    fn is_comment_only(&self, line: &str) -> bool {
        let trimmed = line.trim();
        trimmed.starts_with("//")
-            || trimmed.starts_with('#')
            || trimmed.starts_with("/*")
-            || trimmed.starts_with('*')
+            || trimmed == "*"
+            || trimmed.starts_with("* ")
+            || trimmed.starts_with("*/")
    }

    /// Extract semantic chunks from code
--- a/crates/vestige-core/src/neuroscience/importance_signals.rs
+++ b/crates/vestige-core/src/neuroscience/importance_signals.rs
@ -366,9 +366,12 @@ impl PredictionModel {
                *total += 1;
            }

-            // Prune if too large
+            // Prune if too large, then reconcile total_count to the surviving
+            // sum. The old code left total_count inflated after pruning/decay,
+            // which drove computed novelty (count/total) toward zero over time.
            if patterns.len() > MAX_PREDICTION_PATTERNS {
                self.apply_decay(&mut patterns);
+                *total = patterns.values().map(|c| *c as u64).sum();
            }
        }
    }