mirror of
https://github.com/samvallad33/vestige.git
synced 2026-07-02 22:01:01 +02:00
fix(audit): data-loss, SSRF, off-by-one, dedup bugs (swarm, moderate tier 1)
All verified against real code: - connectors: empty list_live_ids() no longer mass-tombstones the entire source (treat empty as "cannot enumerate", like None) — was catastrophic data loss - redmine: SSRF guard — require http(s), reject loopback/private/link-local hosts + localhost (escape hatch VESTIGE_ALLOW_PRIVATE_CONNECTOR_HOSTS for local tests) - KnowledgeEdge::is_valid now honors valid_from via was_valid_at(now) (was ignoring it; a future-dated edge read as valid). No production callers. - emotional_memory: flashbulb counter reconciles to the FINAL is_flashbulb decision after the importance override (was undercounting via ==0 guard) - chains path_to_chain: fixed off-by-one — step i now uses the INCOMING edge connections[i-1], not the outgoing connections[i]; removed the synthetic connection hack that masked the misalignment - merge_supersede compose_merged_content: exact normalized dedup instead of substring containment (was silently dropping "cat" inside "cathedral") core 535/0, clippy clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a3750378bd
commit
70dae339fb
6 changed files with 115 additions and 41 deletions
|
|
@ -491,49 +491,45 @@ impl MemoryChainBuilder {
|
|||
|
||||
let mut steps = Vec::new();
|
||||
|
||||
for (i, (mem_id, conn)) in path
|
||||
.memories
|
||||
.iter()
|
||||
.zip(path.connections.iter().chain(std::iter::once(&Connection {
|
||||
from_id: path.memories.last().cloned().unwrap_or_default(),
|
||||
to_id: to.to_string(),
|
||||
connection_type: ConnectionType::SemanticSimilarity,
|
||||
strength: 1.0,
|
||||
created_at: Utc::now(),
|
||||
})))
|
||||
.enumerate()
|
||||
{
|
||||
for (i, mem_id) in path.memories.iter().enumerate() {
|
||||
let preview = self
|
||||
.graph
|
||||
.get(mem_id)
|
||||
.map(|n| n.content_preview.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
// The connection that LED INTO memories[i] is connections[i-1] (the
|
||||
// edge memories[i-1] -> memories[i]). The start step (i==0) has no
|
||||
// incoming edge. The old code paired memories[i] with connections[i]
|
||||
// (the OUTGOING edge) and appended a synthetic connection to mask the
|
||||
// resulting off-by-one, mislabeling every step's connection type.
|
||||
let incoming = if i == 0 {
|
||||
None
|
||||
} else {
|
||||
path.connections.get(i - 1)
|
||||
};
|
||||
|
||||
let reasoning = if i == 0 {
|
||||
format!("Starting from '{}'", preview)
|
||||
} else {
|
||||
format!(
|
||||
"'{}' {} '{}'",
|
||||
self.graph
|
||||
.get(
|
||||
&path
|
||||
.memories
|
||||
.get(i.saturating_sub(1))
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
)
|
||||
.map(|n| n.content_preview.as_str())
|
||||
.unwrap_or(""),
|
||||
conn.connection_type.description(),
|
||||
preview
|
||||
)
|
||||
let prev_preview = self
|
||||
.graph
|
||||
.get(&path.memories[i - 1])
|
||||
.map(|n| n.content_preview.as_str())
|
||||
.unwrap_or("");
|
||||
let rel = incoming
|
||||
.map(|c| c.connection_type.description())
|
||||
.unwrap_or("relates to");
|
||||
format!("'{}' {} '{}'", prev_preview, rel, preview)
|
||||
};
|
||||
|
||||
steps.push(ChainStep {
|
||||
memory_id: mem_id.clone(),
|
||||
memory_preview: preview,
|
||||
connection_type: conn.connection_type.clone(),
|
||||
connection_strength: conn.strength,
|
||||
connection_type: incoming
|
||||
.map(|c| c.connection_type.clone())
|
||||
.unwrap_or(ConnectionType::SemanticSimilarity),
|
||||
connection_strength: incoming.map(|c| c.strength).unwrap_or(1.0),
|
||||
reasoning,
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -315,11 +315,19 @@ pub fn compose_merged_content(members: &[(String, String)]) -> String {
|
|||
if members.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
let mut out = members[0].1.trim().to_string();
|
||||
// Dedup on EXACT normalized content, not substring containment. The old
|
||||
// `out.contains(c)` test silently dropped a distinct member whose text merely
|
||||
// appeared as a substring of the accumulated output (e.g. "cat" inside
|
||||
// "cathedral"), losing its content and provenance.
|
||||
let norm = |s: &str| s.trim().to_lowercase();
|
||||
let first = members[0].1.trim().to_string();
|
||||
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
seen.insert(norm(&first));
|
||||
let mut out = first;
|
||||
for (id, content) in &members[1..] {
|
||||
let c = content.trim();
|
||||
if c.is_empty() || out.contains(c) {
|
||||
continue;
|
||||
if c.is_empty() || !seen.insert(norm(c)) {
|
||||
continue; // empty or an exact (normalized) duplicate already present
|
||||
}
|
||||
out.push_str("\n\n[merged from ");
|
||||
out.push_str(id);
|
||||
|
|
|
|||
|
|
@ -258,6 +258,16 @@ pub async fn run_sync<C: Connector>(
|
|||
let mut reconciled = false;
|
||||
if reconcile {
|
||||
match connector.list_live_ids().await {
|
||||
// CATASTROPHIC-DATA-LOSS GUARD: an empty live-id set would tombstone
|
||||
// EVERY stored memory for this source (none of them appear in the
|
||||
// empty list). An empty result almost always means a transient/auth
|
||||
// failure or an over-narrow scope, not "the source truly has zero
|
||||
// issues". Treat it like None (cannot safely enumerate) and skip.
|
||||
Ok(Some(live_ids)) if live_ids.is_empty() => report.warnings.push(
|
||||
"list_live_ids returned an empty set; skipping reconcile to avoid \
|
||||
mass-tombstoning the entire source"
|
||||
.to_string(),
|
||||
),
|
||||
Ok(Some(live_ids)) => {
|
||||
match store.reconcile_source_tombstones(&source_system, &scope, &live_ids) {
|
||||
Ok(r) => {
|
||||
|
|
|
|||
|
|
@ -104,11 +104,57 @@ impl RedmineConnector {
|
|||
if config.project.trim().is_empty() {
|
||||
return Err(ConnectorError::Config("project is required".to_string()));
|
||||
}
|
||||
if reqwest::Url::parse(&config.root()).is_err() {
|
||||
return Err(ConnectorError::Config(format!(
|
||||
"base_url is not a valid URL: {}",
|
||||
config.base_url
|
||||
)));
|
||||
// SSRF guard: require http(s) and reject internal/reserved hosts so a
|
||||
// misconfigured/hostile base_url cannot point the authenticated client at
|
||||
// localhost, link-local metadata endpoints (169.254.169.254), or private
|
||||
// ranges. Gated off only when explicitly allowed (for local tests).
|
||||
match reqwest::Url::parse(&config.root()) {
|
||||
Ok(url) => {
|
||||
let scheme = url.scheme();
|
||||
if scheme != "http" && scheme != "https" {
|
||||
return Err(ConnectorError::Config(format!(
|
||||
"base_url scheme must be http or https, got {scheme}"
|
||||
)));
|
||||
}
|
||||
if std::env::var("VESTIGE_ALLOW_PRIVATE_CONNECTOR_HOSTS").is_err() {
|
||||
match url.host() {
|
||||
None => {
|
||||
return Err(ConnectorError::Config(
|
||||
"base_url has no host".to_string(),
|
||||
));
|
||||
}
|
||||
Some(url::Host::Ipv4(ip))
|
||||
if ip.is_loopback()
|
||||
|| ip.is_private()
|
||||
|| ip.is_link_local()
|
||||
|| ip.is_unspecified() =>
|
||||
{
|
||||
return Err(ConnectorError::Config(format!(
|
||||
"base_url host {ip} is a reserved/internal address (SSRF guard)"
|
||||
)));
|
||||
}
|
||||
Some(url::Host::Ipv6(ip)) if ip.is_loopback() || ip.is_unspecified() => {
|
||||
return Err(ConnectorError::Config(format!(
|
||||
"base_url host {ip} is a reserved/internal address (SSRF guard)"
|
||||
)));
|
||||
}
|
||||
Some(url::Host::Domain(d))
|
||||
if d.eq_ignore_ascii_case("localhost") =>
|
||||
{
|
||||
return Err(ConnectorError::Config(
|
||||
"base_url host localhost is blocked (SSRF guard)".to_string(),
|
||||
));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
return Err(ConnectorError::Config(format!(
|
||||
"base_url is not a valid URL: {}",
|
||||
config.base_url
|
||||
)));
|
||||
}
|
||||
}
|
||||
let client = reqwest::Client::builder()
|
||||
.user_agent(USER_AGENT)
|
||||
|
|
|
|||
|
|
@ -209,9 +209,12 @@ impl KnowledgeEdge {
|
|||
}
|
||||
}
|
||||
|
||||
/// Check if the edge is currently valid
|
||||
/// Check if the edge is currently valid (within its [valid_from, valid_until)
|
||||
/// window). Delegates to the bi-temporal check so a not-yet-active edge
|
||||
/// (future valid_from) or an expired one is correctly reported invalid —
|
||||
/// the old version only checked valid_until and ignored valid_from.
|
||||
pub fn is_valid(&self) -> bool {
|
||||
self.valid_until.is_none()
|
||||
self.was_valid_at(chrono::Utc::now())
|
||||
}
|
||||
|
||||
/// Check if the edge was valid at a given time
|
||||
|
|
@ -359,6 +362,10 @@ pub struct ConsolidationResult {
|
|||
pub activations_computed: i64,
|
||||
/// Personalized w20 if optimized this cycle
|
||||
pub w20_optimized: Option<f64>,
|
||||
/// Retroactive Salience Backfill: number of quiet earlier *causes* promoted
|
||||
/// because a recent salient failure reached backward and surfaced them
|
||||
/// (root-cause memories a semantic search would have missed).
|
||||
pub backfilled_causes: i64,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
|
|
|||
|
|
@ -302,6 +302,9 @@ impl EmotionalMemory {
|
|||
arousal_score: f64,
|
||||
) -> EmotionalEvaluation {
|
||||
let mut eval = self.evaluate_content(content);
|
||||
// evaluate_content already counted a flashbulb if IT detected one. Capture
|
||||
// that so we can reconcile after the importance-based override below.
|
||||
let inner_flashbulb = eval.is_flashbulb;
|
||||
|
||||
// Override flashbulb detection with real importance scores
|
||||
eval.is_flashbulb = novelty_score >= FLASHBULB_NOVELTY_THRESHOLD
|
||||
|
|
@ -310,8 +313,12 @@ impl EmotionalMemory {
|
|||
// Blend arousal from lexicon with importance arousal
|
||||
eval.arousal = (eval.arousal * 0.4 + arousal_score * 0.6).clamp(0.0, 1.0);
|
||||
|
||||
if eval.is_flashbulb && self.flashbulbs_detected == 0 {
|
||||
self.flashbulbs_detected += 1;
|
||||
// Reconcile the counter to the FINAL decision (the old code only ever
|
||||
// incremented once, when the counter happened to be 0 — undercounting).
|
||||
match (inner_flashbulb, eval.is_flashbulb) {
|
||||
(false, true) => self.flashbulbs_detected += 1, // override promoted it
|
||||
(true, false) => self.flashbulbs_detected = self.flashbulbs_detected.saturating_sub(1), // override demoted the inner count
|
||||
_ => {} // unchanged: inner count already correct
|
||||
}
|
||||
|
||||
eval
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue