fix(audit): error-propagation, races, dedup, decay bugs (swarm, moderate tier 2)

All verified against real code:
- github fetch_updated: propagate fetch_comments errors instead of
  unwrap_or_default() (silent failure stored a comment-less record with a
  corrupted hash AND advanced the cursor past it = permanent gap)
- relationships load_relationships: advance next_id past loaded rel-N ids so
  new_id() can't collide with persisted ids
- speculative store_pending_predictions: merge instead of clear (two predict()
  calls without an intervening record_usage() wiped the first batch's accounting)
- embeddings/code is_comment_only: stop treating leading '#'/'*' as comments
  (was deleting #include, #define, *ptr, CSS '*', multiplication)
- importance_signals: reconcile total_count to the surviving pattern sum after
  decay/prune (was inflated, driving novelty toward zero over time)
- importance on_retrieved_with_context: set context in the SAME critical section
  as the event push via record_retrieval() (two-lock approach raced; last_mut()
  could land on a concurrently-pushed event)

core 535/0, clippy clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sam Valladares 2026-06-28 14:15:04 -05:00
parent 70dae339fb
commit dcd536ee86
6 changed files with 46 additions and 17 deletions

View file

@ -183,6 +183,14 @@ impl ImportanceTracker {
/// Update importance when a memory is retrieved
pub fn on_retrieved(&self, memory_id: &str, was_helpful: bool) {
self.record_retrieval(memory_id, was_helpful, None);
}
/// Push a usage event (with its context already populated) and update the
/// importance score. Context is set in the SAME critical section as the push
/// so a concurrent on_retrieved cannot slip an event in between and steal the
/// context via last_mut() (the previous two-lock approach raced).
fn record_retrieval(&self, memory_id: &str, was_helpful: bool, context: Option<String>) {
let now = Utc::now();
// Record the event
@ -190,7 +198,7 @@ impl ImportanceTracker {
events.push(UsageEvent {
memory_id: memory_id.to_string(),
was_helpful,
context: None,
context,
timestamp: now,
});
@ -227,15 +235,7 @@ impl ImportanceTracker {
/// Update importance with additional context
pub fn on_retrieved_with_context(&self, memory_id: &str, was_helpful: bool, context: &str) {
self.on_retrieved(memory_id, was_helpful);
// Store context with event
if let Ok(mut events) = self.recent_events.write()
&& let Some(event) = events.last_mut()
&& event.memory_id == memory_id
{
event.context = Some(context.to_string());
}
self.record_retrieval(memory_id, was_helpful, Some(context.to_string()));
}
/// Apply importance decay to all memories

View file

@ -476,7 +476,11 @@ impl SpeculativeRetriever {
fn store_pending_predictions(&self, predictions: &[PredictedMemory]) {
if let Ok(mut pending) = self.pending_predictions.write() {
pending.clear();
// Merge (do NOT clear): two predict() calls without an intervening
// record_usage() would otherwise wipe the first batch's pending
// entries, destroying the right/wrong accounting record_usage relies
// on. Newer predictions overwrite same-id entries; older survive
// until consumed.
for pred in predictions {
pending.insert(pred.memory_id.clone(), pred.clone());
}

View file

@ -567,6 +567,16 @@ impl RelationshipTracker {
/// Load relationships from storage
pub fn load_relationships(&mut self, relationships: Vec<FileRelationship>) -> Result<()> {
for relationship in relationships {
// Advance next_id past any loaded "rel-N" id so a later new_id() can't
// collide with a persisted one (next_id starts at 1 and is otherwise
// never reconciled with loaded data).
if let Some(n) = relationship
.id
.strip_prefix("rel-")
.and_then(|s| s.parse::<u32>().ok())
{
self.next_id = self.next_id.max(n + 1);
}
self.add_relationship(relationship)?;
}
Ok(())

View file

@ -381,9 +381,13 @@ impl Connector for GithubConnector {
if issue.pull_request.is_some() {
continue;
}
// Fetch comments only when the issue has any.
// Fetch comments only when the issue has any. Propagate failures
// instead of swallowing them: a silent unwrap_or_default() stored a
// comment-less record with a corrupted content hash AND let the
// cursor advance past it, so the issue would never be re-synced. A
// propagated error keeps the issue in the next window (cursor clamp).
let comments = if issue.comments > 0 {
self.fetch_comments(issue.number).await.unwrap_or_default()
self.fetch_comments(issue.number).await?
} else {
Vec::new()
};

View file

@ -85,13 +85,21 @@ impl CodeEmbedding {
lines.join(" ")
}
/// Check if a line is only a comment
/// Check if a line is only a comment.
///
/// Conservative on purpose: the previous version treated any line starting
/// with `#` or `*` as a comment, which deleted real code — C preprocessor
/// directives (`#include`, `#define`), pointer derefs / multiplication
/// (`*ptr = 5;`), CSS `* { ... }`, etc. We only strip unambiguous comment
/// markers; a leading `*` counts only as a block-comment continuation
/// (`* ...`), never a bare `*expr`.
fn is_comment_only(&self, line: &str) -> bool {
let trimmed = line.trim();
trimmed.starts_with("//")
|| trimmed.starts_with('#')
|| trimmed.starts_with("/*")
|| trimmed.starts_with('*')
|| trimmed == "*"
|| trimmed.starts_with("* ")
|| trimmed.starts_with("*/")
}
/// Extract semantic chunks from code

View file

@ -366,9 +366,12 @@ impl PredictionModel {
*total += 1;
}
// Prune if too large
// Prune if too large, then reconcile total_count to the surviving
// sum. The old code left total_count inflated after pruning/decay,
// which drove computed novelty (count/total) toward zero over time.
if patterns.len() > MAX_PREDICTION_PATTERNS {
self.apply_decay(&mut patterns);
*total = patterns.values().map(|c| *c as u64).sum();
}
}
}