diff --git a/crates/vestige-core/src/advanced/chains.rs b/crates/vestige-core/src/advanced/chains.rs index 3b92bd3..6926c56 100644 --- a/crates/vestige-core/src/advanced/chains.rs +++ b/crates/vestige-core/src/advanced/chains.rs @@ -491,49 +491,45 @@ impl MemoryChainBuilder { let mut steps = Vec::new(); - for (i, (mem_id, conn)) in path - .memories - .iter() - .zip(path.connections.iter().chain(std::iter::once(&Connection { - from_id: path.memories.last().cloned().unwrap_or_default(), - to_id: to.to_string(), - connection_type: ConnectionType::SemanticSimilarity, - strength: 1.0, - created_at: Utc::now(), - }))) - .enumerate() - { + for (i, mem_id) in path.memories.iter().enumerate() { let preview = self .graph .get(mem_id) .map(|n| n.content_preview.clone()) .unwrap_or_default(); + // The connection that LED INTO memories[i] is connections[i-1] (the + // edge memories[i-1] -> memories[i]). The start step (i==0) has no + // incoming edge. The old code paired memories[i] with connections[i] + // (the OUTGOING edge) and appended a synthetic connection to mask the + // resulting off-by-one, mislabeling every step's connection type. + let incoming = if i == 0 { + None + } else { + path.connections.get(i - 1) + }; + let reasoning = if i == 0 { format!("Starting from '{}'", preview) } else { - format!( - "'{}' {} '{}'", - self.graph - .get( - &path - .memories - .get(i.saturating_sub(1)) - .cloned() - .unwrap_or_default() - ) - .map(|n| n.content_preview.as_str()) - .unwrap_or(""), - conn.connection_type.description(), - preview - ) + let prev_preview = self + .graph + .get(&path.memories[i - 1]) + .map(|n| n.content_preview.as_str()) + .unwrap_or(""); + let rel = incoming + .map(|c| c.connection_type.description()) + .unwrap_or("relates to"); + format!("'{}' {} '{}'", prev_preview, rel, preview) }; steps.push(ChainStep { memory_id: mem_id.clone(), memory_preview: preview, - connection_type: conn.connection_type.clone(), - connection_strength: conn.strength, + connection_type: incoming + .map(|c| c.connection_type.clone()) + .unwrap_or(ConnectionType::SemanticSimilarity), + connection_strength: incoming.map(|c| c.strength).unwrap_or(1.0), reasoning, }); } diff --git a/crates/vestige-core/src/advanced/merge_supersede.rs b/crates/vestige-core/src/advanced/merge_supersede.rs index cf206b4..ccc5b4b 100644 --- a/crates/vestige-core/src/advanced/merge_supersede.rs +++ b/crates/vestige-core/src/advanced/merge_supersede.rs @@ -315,11 +315,19 @@ pub fn compose_merged_content(members: &[(String, String)]) -> String { if members.is_empty() { return String::new(); } - let mut out = members[0].1.trim().to_string(); + // Dedup on EXACT normalized content, not substring containment. The old + // `out.contains(c)` test silently dropped a distinct member whose text merely + // appeared as a substring of the accumulated output (e.g. "cat" inside + // "cathedral"), losing its content and provenance. + let norm = |s: &str| s.trim().to_lowercase(); + let first = members[0].1.trim().to_string(); + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); + seen.insert(norm(&first)); + let mut out = first; for (id, content) in &members[1..] { let c = content.trim(); - if c.is_empty() || out.contains(c) { - continue; + if c.is_empty() || !seen.insert(norm(c)) { + continue; // empty or an exact (normalized) duplicate already present } out.push_str("\n\n[merged from "); out.push_str(id); diff --git a/crates/vestige-core/src/connectors/mod.rs b/crates/vestige-core/src/connectors/mod.rs index e82a5ab..e87c26b 100644 --- a/crates/vestige-core/src/connectors/mod.rs +++ b/crates/vestige-core/src/connectors/mod.rs @@ -258,6 +258,16 @@ pub async fn run_sync( let mut reconciled = false; if reconcile { match connector.list_live_ids().await { + // CATASTROPHIC-DATA-LOSS GUARD: an empty live-id set would tombstone + // EVERY stored memory for this source (none of them appear in the + // empty list). An empty result almost always means a transient/auth + // failure or an over-narrow scope, not "the source truly has zero + // issues". Treat it like None (cannot safely enumerate) and skip. + Ok(Some(live_ids)) if live_ids.is_empty() => report.warnings.push( + "list_live_ids returned an empty set; skipping reconcile to avoid \ + mass-tombstoning the entire source" + .to_string(), + ), Ok(Some(live_ids)) => { match store.reconcile_source_tombstones(&source_system, &scope, &live_ids) { Ok(r) => { diff --git a/crates/vestige-core/src/connectors/redmine.rs b/crates/vestige-core/src/connectors/redmine.rs index df4369d..03c4012 100644 --- a/crates/vestige-core/src/connectors/redmine.rs +++ b/crates/vestige-core/src/connectors/redmine.rs @@ -104,11 +104,57 @@ impl RedmineConnector { if config.project.trim().is_empty() { return Err(ConnectorError::Config("project is required".to_string())); } - if reqwest::Url::parse(&config.root()).is_err() { - return Err(ConnectorError::Config(format!( - "base_url is not a valid URL: {}", - config.base_url - ))); + // SSRF guard: require http(s) and reject internal/reserved hosts so a + // misconfigured/hostile base_url cannot point the authenticated client at + // localhost, link-local metadata endpoints (169.254.169.254), or private + // ranges. Gated off only when explicitly allowed (for local tests). + match reqwest::Url::parse(&config.root()) { + Ok(url) => { + let scheme = url.scheme(); + if scheme != "http" && scheme != "https" { + return Err(ConnectorError::Config(format!( + "base_url scheme must be http or https, got {scheme}" + ))); + } + if std::env::var("VESTIGE_ALLOW_PRIVATE_CONNECTOR_HOSTS").is_err() { + match url.host() { + None => { + return Err(ConnectorError::Config( + "base_url has no host".to_string(), + )); + } + Some(url::Host::Ipv4(ip)) + if ip.is_loopback() + || ip.is_private() + || ip.is_link_local() + || ip.is_unspecified() => + { + return Err(ConnectorError::Config(format!( + "base_url host {ip} is a reserved/internal address (SSRF guard)" + ))); + } + Some(url::Host::Ipv6(ip)) if ip.is_loopback() || ip.is_unspecified() => { + return Err(ConnectorError::Config(format!( + "base_url host {ip} is a reserved/internal address (SSRF guard)" + ))); + } + Some(url::Host::Domain(d)) + if d.eq_ignore_ascii_case("localhost") => + { + return Err(ConnectorError::Config( + "base_url host localhost is blocked (SSRF guard)".to_string(), + )); + } + _ => {} + } + } + } + Err(_) => { + return Err(ConnectorError::Config(format!( + "base_url is not a valid URL: {}", + config.base_url + ))); + } } let client = reqwest::Client::builder() .user_agent(USER_AGENT) diff --git a/crates/vestige-core/src/memory/mod.rs b/crates/vestige-core/src/memory/mod.rs index af9daeb..ab11ff7 100644 --- a/crates/vestige-core/src/memory/mod.rs +++ b/crates/vestige-core/src/memory/mod.rs @@ -209,9 +209,12 @@ impl KnowledgeEdge { } } - /// Check if the edge is currently valid + /// Check if the edge is currently valid (within its [valid_from, valid_until) + /// window). Delegates to the bi-temporal check so a not-yet-active edge + /// (future valid_from) or an expired one is correctly reported invalid — + /// the old version only checked valid_until and ignored valid_from. pub fn is_valid(&self) -> bool { - self.valid_until.is_none() + self.was_valid_at(chrono::Utc::now()) } /// Check if the edge was valid at a given time @@ -359,6 +362,10 @@ pub struct ConsolidationResult { pub activations_computed: i64, /// Personalized w20 if optimized this cycle pub w20_optimized: Option, + /// Retroactive Salience Backfill: number of quiet earlier *causes* promoted + /// because a recent salient failure reached backward and surfaced them + /// (root-cause memories a semantic search would have missed). + pub backfilled_causes: i64, } // ============================================================================ diff --git a/crates/vestige-core/src/neuroscience/emotional_memory.rs b/crates/vestige-core/src/neuroscience/emotional_memory.rs index 2f7dc86..63dab5e 100644 --- a/crates/vestige-core/src/neuroscience/emotional_memory.rs +++ b/crates/vestige-core/src/neuroscience/emotional_memory.rs @@ -302,6 +302,9 @@ impl EmotionalMemory { arousal_score: f64, ) -> EmotionalEvaluation { let mut eval = self.evaluate_content(content); + // evaluate_content already counted a flashbulb if IT detected one. Capture + // that so we can reconcile after the importance-based override below. + let inner_flashbulb = eval.is_flashbulb; // Override flashbulb detection with real importance scores eval.is_flashbulb = novelty_score >= FLASHBULB_NOVELTY_THRESHOLD @@ -310,8 +313,12 @@ impl EmotionalMemory { // Blend arousal from lexicon with importance arousal eval.arousal = (eval.arousal * 0.4 + arousal_score * 0.6).clamp(0.0, 1.0); - if eval.is_flashbulb && self.flashbulbs_detected == 0 { - self.flashbulbs_detected += 1; + // Reconcile the counter to the FINAL decision (the old code only ever + // incremented once, when the counter happened to be 0 — undercounting). + match (inner_flashbulb, eval.is_flashbulb) { + (false, true) => self.flashbulbs_detected += 1, // override promoted it + (true, false) => self.flashbulbs_detected = self.flashbulbs_detected.saturating_sub(1), // override demoted the inner count + _ => {} // unchanged: inner count already correct } eval