feat(deep_reference): fold engine upgrades into v2.2.0

Applies commit 02056c6 onto the backfill+consolidation base: - vector.rs: I8->F32 quantization (2 sites) for paraphrase-band recall lift. - sqlite.rs: RRF hybrid fusion + never-composed semantic-band gate, applied via 3-way patch that preserves all 18 retroactive-salience-backfill refs. - cross_reference.rs: Stage 5b claim-vs-memory contradiction (claim_conflicts). - cli.rs: recall + compose commands, 3-way merged alongside #99's backfill + cloud-sync CLI (both command sets coexist). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-02 22:01:01 +02:00 · 2026-06-29 15:06:32 -05:00 · 2026-06-29 15:06:32 -05:00 · 81e808dfcb
commit 81e808dfcb
parent 29658d24f6
4 changed files with 382 additions and 13 deletions
--- a/crates/vestige-core/src/search/vector.rs
+++ b/crates/vestige-core/src/search/vector.rs
@ -137,7 +137,7 @@ impl VectorIndex {
        let options = IndexOptions {
            dimensions: config.dimensions,
            metric: config.metric,
-            quantization: ScalarKind::I8,
+            quantization: ScalarKind::F32,
            connectivity: config.connectivity,
            expansion_add: config.expansion_add,
            expansion_search: config.expansion_search,
@ -325,7 +325,7 @@ impl VectorIndex {
        let options = IndexOptions {
            dimensions: config.dimensions,
            metric: config.metric,
-            quantization: ScalarKind::I8,
+            quantization: ScalarKind::F32,
            connectivity: config.connectivity,
            expansion_add: config.expansion_add,
            expansion_search: config.expansion_search,
--- a/crates/vestige-core/src/storage/sqlite.rs
+++ b/crates/vestige-core/src/storage/sqlite.rs
@ -37,7 +37,7 @@ use crate::embeddings::EmbeddingService;
 use crate::embeddings::{EMBEDDING_DIMENSIONS, Embedding, matryoshka_truncate};

 #[cfg(feature = "vector-search")]
-use crate::search::{VectorIndex, linear_combination};
+use crate::search::{VectorIndex, reciprocal_rank_fusion};

 #[cfg(all(feature = "embeddings", feature = "vector-search"))]
 use crate::search::hyde;
@ -2896,13 +2896,15 @@ impl SqliteMemoryStore {
                vec![]
            };

+        // Reciprocal Rank Fusion (k=60) when both lists are present: it is scale-free
+        // and rewards a memory that appears in BOTH the keyword and semantic lists —
+        // exactly the structurally-similar-different-words paraphrase that linear
+        // max-norm fusion buried. Falls back to linear when only one list exists.
+        // (keyword_weight/semantic_weight retained in the signature for compatibility;
+        // RRF is rank-based so the weights no longer scale the fused score.)
+        let _ = (keyword_weight, semantic_weight);
        let combined = if !semantic_results.is_empty() {
-            linear_combination(
-                &keyword_results,
-                &semantic_results,
-                keyword_weight,
-                semantic_weight,
-            )
+            reciprocal_rank_fusion(&keyword_results, &semantic_results, 60.0)
        } else {
            keyword_results.clone()
        };
@ -4713,6 +4715,22 @@ impl SqliteMemoryStore {
        let composed_pairs = self.composed_pair_set()?;
        let composition_degrees = self.composition_degree_map()?;
        let outcome_map = self.composition_outcome_map()?;
+
+        // SEMANTIC-BAND GATE (the composition generativity unlock): load embeddings so a pair
+        // that shares NO literal tag/word but lives in the "distant-but-relatable" cosine band
+        // can still surface as a never-composed insight — exactly the non-obvious combination
+        // a keyword/exact-overlap gate (and cosine-NN search) can never return. The band excludes
+        // near-duplicates (>= 0.85, those are the same idea) and unrelated noise (< 0.45).
+        #[cfg(all(feature = "embeddings", feature = "vector-search"))]
+        let embedding_map: std::collections::HashMap<String, Vec<f32>> = self
+            .get_all_embeddings()
+            .map(|v| v.into_iter().collect())
+            .unwrap_or_default();
+        #[cfg(all(feature = "embeddings", feature = "vector-search"))]
+        const COMPOSE_BAND_LO: f32 = 0.45;
+        #[cfg(all(feature = "embeddings", feature = "vector-search"))]
+        const COMPOSE_BAND_HI: f32 = 0.85;
+
        let mut candidates = Vec::new();

        for i in 0..nodes.len() {
@ -4733,7 +4751,27 @@ impl SqliteMemoryStore {

                let shared_tags = Self::shared_tags(&a.tags, &b.tags);
                let shared_terms = Self::shared_content_terms(&a.content, &b.content, 8);
-                if shared_tags.is_empty() && shared_terms.is_empty() {
+
+                // Semantic-band cosine: lets a pair with NO shared surface tokens but a
+                // related MEANING through the gate (the generative cross-domain combination).
+                #[cfg(all(feature = "embeddings", feature = "vector-search"))]
+                let band_cos: Option<f32> = match (embedding_map.get(&a.id), embedding_map.get(&b.id))
+                {
+                    (Some(ea), Some(eb)) => {
+                        let c = crate::embeddings::cosine_similarity(ea, eb);
+                        if (COMPOSE_BAND_LO..COMPOSE_BAND_HI).contains(&c) {
+                            Some(c)
+                        } else {
+                            None
+                        }
+                    }
+                    _ => None,
+                };
+                #[cfg(not(all(feature = "embeddings", feature = "vector-search")))]
+                let band_cos: Option<f32> = None;
+
+                // Admit the pair if it shares surface signal OR it sits in the semantic band.
+                if shared_tags.is_empty() && shared_terms.is_empty() && band_cos.is_none() {
                    continue;
                }

@ -4752,10 +4790,14 @@ impl SqliteMemoryStore {
                );
                let anchor_score =
                    (shared_tags.len() as f64 * 0.45) + (shared_terms.len().min(5) as f64 * 0.25);
+                // Semantic-band pairs (no surface overlap) get an anchor from cosine so they
+                // clear the cutoff: a mid-band 0.45-0.85 meaning-match is a strong compose signal.
+                let band_anchor = band_cos.map(|c| 1.0 + (c as f64 - 0.45) * 2.0).unwrap_or(0.0);
                let prior_outcomes = Self::pair_prior_outcomes(&outcome_map, &a.id, &b.id);
                let outcome_signal = Self::outcome_signal(&prior_outcomes);
                let outcome_score_adjustment = Self::outcome_score_adjustment(&prior_outcomes);
                let score = anchor_score
+                    + band_anchor
                    + (bridge_score * 2.0)
                    + (novelty_score * 1.5)
                    + trust_score
--- a/crates/vestige-mcp/src/bin/cli.rs
+++ b/crates/vestige-mcp/src/bin/cli.rs
@ -268,6 +268,35 @@ enum Commands {
        json: bool,
    },

+    /// Recall + reason across memories (deep_reference): hybrid search, FSRS-6 trust,
+    /// spreading activation, supersession + contradiction analysis. Returns the
+    /// synthesized answer, evidence, and confidence.
+    Recall {
+        /// The query / claim to reason about
+        query: String,
+        /// How many memories to analyze (candidate depth)
+        #[arg(long, default_value = "20")]
+        depth: i64,
+        /// Output raw JSON instead of the human-readable summary
+        #[arg(long)]
+        json: bool,
+    },
+
+    /// Compose: surface NEVER-COMPOSED memory pairs — two memories you wrote that nobody
+    /// (including you) ever connected — and the testable question they imply. The insight
+    /// generator: semantic-band + structural-bridge ranking over your cross-domain memory.
+    Compose {
+        /// How many candidate insight pairs to surface
+        #[arg(long, default_value = "5")]
+        limit: i32,
+        /// Optional tag filter (comma-separated) to focus a domain
+        #[arg(long)]
+        tags: Option<String>,
+        /// Output raw JSON instead of the human-readable summary
+        #[arg(long)]
+        json: bool,
+    },
+
    /// Start standalone HTTP MCP server (no stdio, for remote access)
    Serve {
        /// HTTP transport port
@ -353,6 +382,8 @@ fn main() -> anyhow::Result<()> {
            contrast,
            json,
        } => run_backfill(failure_id, manual, lookback_days, !no_promote, contrast, json),
+        Commands::Recall { query, depth, json } => run_recall(query, depth, json),
+        Commands::Compose { limit, tags, json } => run_compose(limit, tags, json),
        Commands::Serve {
            port,
            dashboard,
@ -2778,6 +2809,172 @@ fn run_backfill(
    Ok(())
 }

+/// Recall + reason across memories using the real deep_reference engine.
+fn run_recall(query: String, depth: i64, json: bool) -> anyhow::Result<()> {
+    use vestige_mcp::cognitive::CognitiveEngine;
+
+    let storage = open_storage()?;
+
+    #[cfg(feature = "embeddings")]
+    {
+        if let Err(e) = storage.init_embeddings() {
+            eprintln!(
+                "  {} Embeddings unavailable: {} (recall will use keyword-only)",
+                "!".yellow(),
+                e
+            );
+        }
+    }
+
+    let storage = Arc::new(storage);
+
+    let rt = tokio::runtime::Runtime::new()?;
+    let result = rt.block_on(async move {
+        let cognitive = Arc::new(tokio::sync::Mutex::new(CognitiveEngine::new()));
+        {
+            let mut cog = cognitive.lock().await;
+            cog.hydrate(&storage);
+        }
+        let args = serde_json::json!({ "query": query, "depth": depth });
+        vestige_mcp::tools::cross_reference::execute(&storage, &cognitive, Some(args)).await
+    });
+
+    let value = result.map_err(|e| anyhow::anyhow!("recall error: {}", e))?;
+
+    if json {
+        println!("{}", serde_json::to_string_pretty(&value)?);
+        return Ok(());
+    }
+
+    // Human-readable summary of the real engine output.
+    let conf = value
+        .get("confidence")
+        .and_then(|v| v.as_f64())
+        .unwrap_or(0.0);
+    let intent = value
+        .get("intent")
+        .and_then(|v| v.as_str())
+        .unwrap_or("Synthesis");
+    let analyzed = value
+        .get("memoriesAnalyzed")
+        .and_then(|v| v.as_i64())
+        .unwrap_or(0);
+
+    println!(
+        "{}  intent={}  confidence={:.0}%  memories_analyzed={}",
+        "Recall".cyan().bold(),
+        intent,
+        conf * 100.0,
+        analyzed
+    );
+
+    if let Some(rec) = value.get("recommended") {
+        let ans = rec
+            .get("answer_preview")
+            .or_else(|| rec.get("preview"))
+            .and_then(|v| v.as_str())
+            .unwrap_or("");
+        if !ans.is_empty() {
+            println!("\n{}", "Recommended:".white().bold());
+            for line in ans.lines().take(6) {
+                println!("  {}", line);
+            }
+        }
+    }
+
+    if let Some(ev) = value.get("evidence").and_then(|v| v.as_array()) {
+        println!("\n{} ({})", "Evidence".white().bold(), ev.len());
+        for (i, e) in ev.iter().take(5).enumerate() {
+            let pv = e
+                .get("preview")
+                .and_then(|v| v.as_str())
+                .unwrap_or("")
+                .replace('\n', " ");
+            let pv: String = pv.chars().take(78).collect();
+            println!("  {}. {}", i + 1, pv);
+        }
+    }
+
+    Ok(())
+}
+
+/// Compose: surface never-composed memory pairs + the testable question they imply.
+fn run_compose(limit: i32, tags: Option<String>, json: bool) -> anyhow::Result<()> {
+    let storage = open_storage()?;
+
+    #[cfg(feature = "embeddings")]
+    {
+        let _ = storage.init_embeddings();
+    }
+
+    let tag_vec: Option<Vec<String>> = tags.map(|t| {
+        t.split(',')
+            .map(|s| s.trim().to_string())
+            .filter(|s| !s.is_empty())
+            .collect()
+    });
+
+    let candidates = storage
+        .get_never_composed_candidates(limit, tag_vec.as_deref())
+        .map_err(|e| anyhow::anyhow!("compose error: {}", e))?;
+
+    if json {
+        let arr: Vec<_> = candidates
+            .iter()
+            .map(|c| {
+                serde_json::json!({
+                    "score": c.score,
+                    "novelty": c.novelty_score,
+                    "bridge": c.bridge_score,
+                    "trust": c.trust_score,
+                    "a": c.first_preview,
+                    "b": c.second_preview,
+                    "shared_tags": c.shared_tags,
+                    "question": c.composition_question,
+                    "reason": c.reason,
+                })
+            })
+            .collect();
+        println!("{}", serde_json::to_string_pretty(&arr)?);
+        return Ok(());
+    }
+
+    if candidates.is_empty() {
+        println!(
+            "{}  no never-composed candidates surfaced (try a wider --limit or remove --tags)",
+            "Compose".magenta().bold()
+        );
+        return Ok(());
+    }
+
+    println!(
+        "{}  {} never-composed insight{} — pairs you wrote that were never connected:\n",
+        "Compose".magenta().bold(),
+        candidates.len(),
+        if candidates.len() == 1 { "" } else { "s" }
+    );
+
+    for (i, c) in candidates.iter().enumerate() {
+        let a: String = c.first_preview.replace('\n', " ").chars().take(70).collect();
+        let b: String = c.second_preview.replace('\n', " ").chars().take(70).collect();
+        let idx = format!("{}.", i + 1).cyan().bold();
+        let metrics = format!(
+            "{:.2}  (novelty {:.2}, bridge {:.2})",
+            c.score, c.novelty_score, c.bridge_score
+        );
+        println!("{} {} {}", idx, "score".white(), metrics);
+        println!("   A: {}", a);
+        println!("   B: {}", b);
+        let q: String = c.composition_question.replace('\n', " ").chars().take(120).collect();
+        if !q.is_empty() {
+            println!("   {} {}", "?".yellow().bold(), q.yellow());
+        }
+        println!();
+    }
+
+    Ok(())
+}
+
 /// Run the dashboard web server
 fn run_dashboard(port: u16, open_browser: bool) -> anyhow::Result<()> {
    use vestige_mcp::cognitive::CognitiveEngine;
--- a/crates/vestige-mcp/src/tools/cross_reference.rs
+++ b/crates/vestige-mcp/src/tools/cross_reference.rs
@ -660,6 +660,36 @@ pub async fn execute(
        }
    }

+    // ====================================================================
+    // STAGE 5b: CLAIM-vs-MEMORY contradiction (the structural fix).
+    // The original engine only compared stored memory PAIRS — it never tested
+    // the user's QUERY against memory, so "your claim X contradicts stored
+    // memory Y" was invisible (confident silence, the dangerous failure). Here
+    // we test args.query against each analyzed memory so a claim that conflicts
+    // with a high-trust memory surfaces and lowers confidence.
+    let mut claim_conflicts: Vec<Value> = Vec::new();
+    for m in scored.iter() {
+        if m.trust < 0.3 {
+            continue;
+        }
+        let overlap = topic_overlap(&args.query, &m.content);
+        if overlap < 0.4 {
+            continue;
+        }
+        if appears_contradictory(&args.query, &m.content) {
+            claim_conflicts.push(serde_json::json!({
+                "claim": args.query.chars().take(160).collect::<String>(),
+                "conflicting_memory": {
+                    "id": m.id,
+                    "preview": m.content.chars().take(150).collect::<String>(),
+                    "trust": (m.trust * 100.0).round() / 100.0,
+                    "date": m.updated_at.to_rfc3339(),
+                },
+                "topic_overlap": overlap,
+            }));
+        }
+    }
+
    // ====================================================================
    // STAGE 6: Dream Insight Integration
    // ====================================================================
@ -848,10 +878,16 @@ pub async fn execute(
    // function of trust + corpus size alone.
    let base_confidence = recommended.map(composite).unwrap_or(0.0);
    let agreement_boost = (evidence.len() as f64 * 0.03).min(0.2);
-    let contradiction_penalty = contradictions.len() as f64 * 0.1;
+    // A claim that conflicts with a stored memory is the strongest possible signal
+    // to lower confidence (heavier penalty than an inter-memory disagreement).
+    let contradiction_penalty =
+        (contradictions.len() as f64 * 0.1) + (claim_conflicts.len() as f64 * 0.2);
    let confidence = (base_confidence + agreement_boost - contradiction_penalty).clamp(0.0, 1.0);

-    let status = if contradictions.is_empty() && confidence > 0.7 {
+    let status = if !claim_conflicts.is_empty() {
+        // The claim itself conflicts with stored memory — never report "resolved".
+        "claim_contradicts_memory"
+    } else if contradictions.is_empty() && confidence > 0.7 {
        "resolved"
    } else if !contradictions.is_empty() {
        "contradictions_found"
@ -861,7 +897,13 @@ pub async fn execute(
        "partial_evidence"
    };

-    let guidance = if let Some(rec) = recommended {
+    let guidance = if !claim_conflicts.is_empty() {
+        format!(
+            "CAUTION: your claim conflicts with {} stored memor{}. Do NOT treat this as resolved — review the conflicting memory(ies) below before acting.",
+            claim_conflicts.len(),
+            if claim_conflicts.len() == 1 { "y" } else { "ies" }
+        )
+    } else if let Some(rec) = recommended {
        if contradictions.is_empty() {
            format!(
                "High confidence ({:.0}%). Recommended memory (trust {:.0}%, {}) is the most reliable source.",
@ -903,6 +945,10 @@ pub async fn execute(
        "activationExpanded": activation_expanded,
    });

+    if !claim_conflicts.is_empty() {
+        response["claim_conflicts"] = serde_json::json!(claim_conflicts);
+    }
+
    if let Some(rec) = recommended {
        response["recommended"] = serde_json::json!({
            "answer_preview": rec.content.chars().take(300).collect::<String>(),
@ -1366,6 +1412,90 @@ mod tests {
        ));
    }

+    // ========================================================================
+    // STAGE 5b AUDIT: a NON-contradicting claim must NOT set
+    // status=claim_contradicts_memory; a contradicting claim MUST.
+    // ========================================================================
+    #[tokio::test]
+    async fn audit_stage5b_noncontradicting_claim_is_not_flagged() {
+        let (storage, _dir) = test_storage().await;
+
+        // High-overlap, AGREEING memory: same subject, same stance.
+        ingest_one(
+            &storage,
+            "Vestige uses USearch HNSW for vector search with cosine similarity \
+             and Matryoshka truncation to 256 dimensions for storage savings.",
+            &["vestige", "vector-search"],
+        )
+        .await;
+
+        // Claim that AGREES (no negation, no correction marker, same subject).
+        let args = serde_json::json!({
+            "query": "Vestige uses USearch HNSW for vector search with cosine \
+                      similarity and Matryoshka truncation to 256 dimensions"
+        });
+        let result = execute(&storage, &test_cognitive(), Some(args))
+            .await
+            .expect("execute should succeed");
+
+        // Non-vacuous: the memory MUST have been retrieved (else the assertion
+        // below would pass trivially via the no_memories early-return).
+        assert!(
+            result["memoriesAnalyzed"].as_i64().unwrap_or(0) >= 1,
+            "Expected the agreeing memory to be retrieved (memoriesAnalyzed>=1). Got {:?}",
+            result["memoriesAnalyzed"]
+        );
+        assert_ne!(
+            result["status"].as_str(),
+            Some("claim_contradicts_memory"),
+            "A NON-contradicting (agreeing) claim must not be flagged. Got status={:?}, claim_conflicts={:?}",
+            result["status"],
+            result.get("claim_conflicts")
+        );
+        assert!(
+            result.get("claim_conflicts").is_none(),
+            "No claim_conflicts array should be present for an agreeing claim. Got {:?}",
+            result.get("claim_conflicts")
+        );
+    }
+
+    // STAGE 5b decision predicate, tested directly. The end-to-end `execute`
+    // path cannot surface a genuinely-contradicting claim in a test env with no
+    // embeddings model loaded, because keyword retrieval is implicit-AND and a
+    // contradicting claim by construction carries a stance word the memory
+    // lacks. This asserts the exact gate STAGE 5b applies once a memory is
+    // retrieved: topic_overlap >= 0.4 AND appears_contradictory(query, memory).
+    #[test]
+    fn audit_stage5b_gate_predicate_distinguishes_agree_vs_contradict() {
+        let memory = "USearch HNSW vector search Vestige production cosine similarity \
+                      recall correct should always be enabled because it is fast";
+
+        // Agreeing claim: high overlap, NO stance flip → must NOT trip the gate.
+        let agree = "USearch HNSW vector search Vestige production cosine similarity \
+                     recall correct should always be enabled because it is fast";
+        assert!(
+            topic_overlap(agree, memory) >= 0.4,
+            "agree/memory should share topic"
+        );
+        assert!(
+            !appears_contradictory(agree, memory),
+            "An agreeing claim must NOT be flagged as contradictory (false-positive guard)"
+        );
+
+        // Contradicting claim: same subject + a negation marker ("never"/"avoid")
+        // present in exactly one side → must trip the gate.
+        let contradict = "USearch HNSW vector search Vestige production cosine similarity \
+                          recall avoid never enabled";
+        assert!(
+            topic_overlap(contradict, memory) >= 0.4,
+            "contradict/memory should share topic"
+        );
+        assert!(
+            appears_contradictory(contradict, memory),
+            "A same-subject negated claim MUST be flagged as contradictory"
+        );
+    }
+
    #[test]
    fn test_topic_overlap_similar() {
        let overlap = topic_overlap(