From efbea2513310982751b1ca21122ed61e6e94f3d5 Mon Sep 17 00:00:00 2001 From: Sam Valladares Date: Thu, 18 Jun 2026 15:59:57 -0500 Subject: [PATCH 1/8] Add ComposedGraph composition ledger --- README.md | 3 +- crates/vestige-core/src/lib.rs | 18 +- crates/vestige-core/src/storage/migrations.rs | 98 +- crates/vestige-core/src/storage/mod.rs | 8 +- crates/vestige-core/src/storage/sqlite.rs | 1968 ++++++++++++++++- crates/vestige-mcp/README.md | 2 +- crates/vestige-mcp/src/server.rs | 19 +- .../vestige-mcp/src/tools/composed_graph.rs | 906 ++++++++ .../vestige-mcp/src/tools/cross_reference.rs | 252 ++- crates/vestige-mcp/src/tools/mod.rs | 1 + docs/COMPOSED_GRAPH.md | 159 ++ 11 files changed, 3375 insertions(+), 59 deletions(-) create mode 100644 crates/vestige-mcp/src/tools/composed_graph.rs create mode 100644 docs/COMPOSED_GRAPH.md diff --git a/README.md b/README.md index f747715..ec3ba29 100644 --- a/README.md +++ b/README.md @@ -244,7 +244,7 @@ This isn't a key-value store with an embedding model bolted on. Vestige implemen --- -## 🛠 25 MCP Tools +## 🛠 MCP Tools ### Context Packets | Tool | What It Does | @@ -272,6 +272,7 @@ This isn't a key-value store with an embedding model bolted on. Vestige implemen |------|-------------| | `memory_health` | Retention dashboard — distribution, trends, recommendations | | `memory_graph` | Knowledge graph export — force-directed layout, up to 200 nodes | +| `composed_graph` | Composition ledger — recent composed memory sets, neighbors, outcome labels, bounty/research lanes, and never-composed frontier candidates | ### Scoring & Dedup | Tool | What It Does | diff --git a/crates/vestige-core/src/lib.rs b/crates/vestige-core/src/lib.rs index b0afc0b..b8b0154 100644 --- a/crates/vestige-core/src/lib.rs +++ b/crates/vestige-core/src/lib.rs @@ -155,13 +155,15 @@ pub use fsrs::{ }; // Configuration (vestige.toml output profiles / defaults) -pub use config::{OutputConfig, OutputDefaults, OutputProfile, VestigeConfig, CONFIG_FILE}; +pub use config::{CONFIG_FILE, OutputConfig, OutputDefaults, OutputProfile, VestigeConfig}; // Storage layer pub use storage::{ - ConnectionRecord, ConsolidationHistoryRecord, DreamHistoryRecord, InsightRecord, - IntentionRecord, PORTABLE_ARCHIVE_FORMAT, PortableArchive, PortableImportMode, - PortableImportReport, Result, SmartIngestResult, StateTransitionRecord, Storage, StorageError, + CompositionEventRecord, CompositionMemberRecord, CompositionNeighborRecord, + CompositionOutcomeRecord, ConnectionRecord, ConsolidationHistoryRecord, DreamHistoryRecord, + InsightRecord, IntentionRecord, NeverComposedCandidate, PORTABLE_ARCHIVE_FORMAT, + PortableArchive, PortableImportMode, PortableImportReport, Result, SmartIngestResult, + StateTransitionRecord, Storage, StorageError, }; // Consolidation (sleep-inspired memory processing) @@ -220,6 +222,9 @@ pub use advanced::{ LabileState, Language, MaintenanceType, + // Merge / Supersede controls (Phase 3) + MatchClass, + MatchSignals, // Memory chains MemoryChainBuilder, // Memory compression @@ -230,18 +235,15 @@ pub use advanced::{ MemoryPath, MemoryReplay, MemorySnapshot, - // Merge / Supersede controls (Phase 3) - MatchClass, - MatchSignals, MergeCandidate, MergeOperation, MergePlan, MergePolicy, MergeStrategy, Modification, - PlanKind, Pattern, PatternType, + PlanKind, PredictedMemory, PredictionContext, PredictionErrorConfig, diff --git a/crates/vestige-core/src/storage/migrations.rs b/crates/vestige-core/src/storage/migrations.rs index 3be941c..127bc84 100644 --- a/crates/vestige-core/src/storage/migrations.rs +++ b/crates/vestige-core/src/storage/migrations.rs @@ -74,6 +74,11 @@ pub const MIGRATIONS: &[Migration] = &[ description: "v2.1.25 Merge/Supersede: reversible operation log, merge plans, bitemporal lineage, protected pins", up: MIGRATION_V14_UP, }, + Migration { + version: 15, + description: "ComposedGraph: composition events, members, outcomes", + up: MIGRATION_V15_UP, + }, ]; /// A database migration @@ -813,6 +818,67 @@ CREATE INDEX IF NOT EXISTS idx_merge_operations_survivor ON merge_operations(sur UPDATE schema_version SET version = 14, applied_at = datetime('now'); "#; +/// V15: ComposedGraph persistence for memory composition outcomes. +/// +/// These tables record which memories were used together, which tool/query +/// produced the composition, and what happened afterward. `memory_id` values +/// are intentionally historical references instead of foreign keys to +/// `knowledge_nodes`: purging or superseding a memory must not erase the fact +/// that a bounty lane or reasoning path was previously composed. +const MIGRATION_V15_UP: &str = r#" +CREATE TABLE IF NOT EXISTS composition_events ( + id TEXT PRIMARY KEY, + created_at TEXT NOT NULL, + tool TEXT NOT NULL, + mode TEXT NOT NULL DEFAULT 'deep_reference', + query TEXT, + query_hash TEXT, + confidence REAL, + status TEXT, + output_preview TEXT, + metadata TEXT NOT NULL DEFAULT '{}' +); + +CREATE INDEX IF NOT EXISTS idx_composition_events_created_at ON composition_events(created_at); +CREATE INDEX IF NOT EXISTS idx_composition_events_tool ON composition_events(tool); +CREATE INDEX IF NOT EXISTS idx_composition_events_mode ON composition_events(mode); +CREATE INDEX IF NOT EXISTS idx_composition_events_query_hash ON composition_events(query_hash); + +CREATE TABLE IF NOT EXISTS composition_members ( + event_id TEXT NOT NULL, + memory_id TEXT NOT NULL, + role TEXT NOT NULL, -- primary | supporting | contradicting | superseded | related + rank INTEGER NOT NULL DEFAULT 0, + trust REAL, + score REAL, + preview TEXT, + metadata TEXT NOT NULL DEFAULT '{}', + PRIMARY KEY (event_id, memory_id, role), + FOREIGN KEY (event_id) REFERENCES composition_events(id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_composition_members_memory ON composition_members(memory_id); +CREATE INDEX IF NOT EXISTS idx_composition_members_role ON composition_members(role); + +CREATE TABLE IF NOT EXISTS composition_outcomes ( + id TEXT PRIMARY KEY, + event_id TEXT NOT NULL, + outcome_type TEXT NOT NULL, + labeled_at TEXT NOT NULL, + label_source TEXT NOT NULL DEFAULT 'tool', + confidence_delta REAL, + notes TEXT, + metadata TEXT NOT NULL DEFAULT '{}', + FOREIGN KEY (event_id) REFERENCES composition_events(id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_composition_outcomes_event ON composition_outcomes(event_id); +CREATE INDEX IF NOT EXISTS idx_composition_outcomes_type ON composition_outcomes(outcome_type); +CREATE INDEX IF NOT EXISTS idx_composition_outcomes_labeled_at ON composition_outcomes(labeled_at); + +UPDATE schema_version SET version = 15, applied_at = datetime('now'); +"#; + /// Get current schema version from database pub fn get_current_version(conn: &rusqlite::Connection) -> rusqlite::Result { conn.query_row( @@ -829,7 +895,9 @@ pub fn get_current_version(conn: &rusqlite::Connection) -> rusqlite::Result fn add_column_if_missing(conn: &rusqlite::Connection, sql: &str) -> rusqlite::Result<()> { match conn.execute(sql, []) { Ok(_) => Ok(()), - Err(rusqlite::Error::SqliteFailure(_, Some(msg))) if msg.contains("duplicate column name") => { + Err(rusqlite::Error::SqliteFailure(_, Some(msg))) + if msg.contains("duplicate column name") => + { Ok(()) } Err(e) => Err(e), @@ -890,17 +958,17 @@ mod tests { /// version after `apply_migrations` runs all migrations end-to-end, and /// neither of the dead tables V11 drops must exist afterwards. #[test] - fn test_apply_migrations_advances_to_v14_and_drops_dead_tables() { + fn test_apply_migrations_advances_to_v15_and_drops_dead_tables() { let conn = rusqlite::Connection::open_in_memory().expect("open in-memory"); // Pre-requisite: schema_version must be bootstrapped by V1. apply_migrations(&conn).expect("apply_migrations succeeds"); - // 1. schema_version advanced to V14 + // 1. schema_version advanced to V15 let version = get_current_version(&conn).expect("read schema_version"); assert_eq!( - version, 14, - "schema_version must be 14 after all migrations" + version, 15, + "schema_version must be 15 after all migrations" ); // 2. knowledge_edges is gone (V11 drops it) @@ -967,7 +1035,23 @@ mod tests { assert_eq!(rows, 1, "{table} table must be created by V14"); } - // 7. knowledge_nodes gains `protected` + `superseded_by` (V14) + // 7. ComposedGraph tables exist (V15) + for table in [ + "composition_events", + "composition_members", + "composition_outcomes", + ] { + let rows: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1", + [table], + |row| row.get(0), + ) + .expect("query sqlite_master"); + assert_eq!(rows, 1, "{table} table must be created by V15"); + } + + // 8. knowledge_nodes gains `protected` + `superseded_by` (V14) let node_cols: Vec = { let mut stmt = conn .prepare("PRAGMA table_info(knowledge_nodes)") @@ -1006,6 +1090,6 @@ mod tests { apply_migrations(&conn).expect("V11 replay must be idempotent"); let version = get_current_version(&conn).expect("read schema_version"); - assert_eq!(version, 14, "schema_version back at 14 after replay"); + assert_eq!(version, 15, "schema_version back at 15 after replay"); } } diff --git a/crates/vestige-core/src/storage/mod.rs b/crates/vestige-core/src/storage/mod.rs index 1660529..282228d 100644 --- a/crates/vestige-core/src/storage/mod.rs +++ b/crates/vestige-core/src/storage/mod.rs @@ -16,7 +16,9 @@ pub use portable::{ PortableTable, PortableValue, }; pub use sqlite::{ - ConnectionRecord, ConsolidationHistoryRecord, DreamHistoryRecord, FilePortableSyncBackend, - InsightRecord, IntentionRecord, PortableSyncBackend, PortableSyncReport, Result, - SmartIngestResult, StateTransitionRecord, Storage, StorageError, + CompositionEventRecord, CompositionMemberRecord, CompositionNeighborRecord, + CompositionOutcomeRecord, ConnectionRecord, ConsolidationHistoryRecord, DreamHistoryRecord, + FilePortableSyncBackend, InsightRecord, IntentionRecord, NeverComposedCandidate, + PortableSyncBackend, PortableSyncReport, Result, SmartIngestResult, StateTransitionRecord, + Storage, StorageError, }; diff --git a/crates/vestige-core/src/storage/sqlite.rs b/crates/vestige-core/src/storage/sqlite.rs index 4cd32e8..a9840a1 100644 --- a/crates/vestige-core/src/storage/sqlite.rs +++ b/crates/vestige-core/src/storage/sqlite.rs @@ -260,6 +260,9 @@ const PORTABLE_TABLES: &[&str] = &[ "retention_snapshots", "sync_tombstones", "deletion_tombstones", + "composition_events", + "composition_members", + "composition_outcomes", ]; const PORTABLE_USER_DATA_TABLES: &[&str] = &[ @@ -278,6 +281,9 @@ const PORTABLE_USER_DATA_TABLES: &[&str] = &[ "retention_snapshots", "sync_tombstones", "deletion_tombstones", + "composition_events", + "composition_members", + "composition_outcomes", ]; #[derive(Default)] @@ -1950,10 +1956,7 @@ impl Storage { // future migrations that switch. chrono::NaiveDateTime::parse_from_str(&s, "%Y-%m-%d %H:%M:%S") .map(|naive| naive.and_utc()) - .or_else(|_| { - DateTime::parse_from_rfc3339(&s) - .map(|dt| dt.with_timezone(&Utc)) - }) + .or_else(|_| DateTime::parse_from_rfc3339(&s).map(|dt| dt.with_timezone(&Utc))) .ok() }); @@ -2106,6 +2109,11 @@ impl Storage { params![id], )? as i64; + tx.execute( + "UPDATE composition_members SET preview = NULL WHERE memory_id = ?1", + params![id], + )?; + let tags_json = serde_json::to_string(&node.tags).unwrap_or_else(|_| "[]".to_string()); tx.execute( "INSERT INTO deletion_tombstones ( @@ -4035,7 +4043,966 @@ pub struct DreamHistoryRecord { pub creative_connections_found: Option, } +/// Composition event envelope for ComposedGraph. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct CompositionEventRecord { + pub id: String, + pub created_at: DateTime, + pub tool: String, + pub mode: String, + pub query: Option, + pub query_hash: Option, + pub confidence: Option, + pub status: Option, + pub output_preview: Option, + pub metadata: serde_json::Value, +} + +/// Memory participating in a composition event. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct CompositionMemberRecord { + pub event_id: String, + pub memory_id: String, + pub role: String, + pub rank: i32, + pub trust: Option, + pub score: Option, + pub preview: Option, + pub metadata: serde_json::Value, +} + +/// Outcome label attached to a composition event. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct CompositionOutcomeRecord { + pub id: String, + pub event_id: String, + pub outcome_type: String, + pub labeled_at: DateTime, + pub label_source: String, + pub confidence_delta: Option, + pub notes: Option, + pub metadata: serde_json::Value, +} + +/// Memory most often composed with another memory. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct CompositionNeighborRecord { + pub memory_id: String, + pub composed_count: i64, + pub latest_event_at: DateTime, +} + +/// Candidate memory pair that shares useful shape but has never been composed. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct NeverComposedCandidate { + pub first_id: String, + pub second_id: String, + pub score: f64, + pub novelty_score: f64, + pub bridge_score: f64, + pub trust_score: f64, + pub outcome_score_adjustment: f64, + pub shared_tags: Vec, + pub boundary_tags: Vec, + pub shared_terms: Vec, + pub prior_outcomes: Vec, + pub outcome_signal: String, + pub first_node_type: String, + pub second_node_type: String, + pub first_preview: String, + pub second_preview: String, + pub reason: String, + pub composition_question: String, +} + impl Storage { + // ======================================================================== + // COMPOSEDGRAPH PERSISTENCE + // ======================================================================== + + /// Save a complete composition event with members and optional outcomes in one transaction. + pub fn save_composition( + &self, + event: &CompositionEventRecord, + members: &[CompositionMemberRecord], + outcomes: &[CompositionOutcomeRecord], + ) -> Result<()> { + let mut writer = self + .writer + .lock() + .map_err(|_| StorageError::Init("Writer lock poisoned".into()))?; + let tx = writer.transaction()?; + + let metadata_json = + serde_json::to_string(&event.metadata).unwrap_or_else(|_| "{}".to_string()); + tx.execute( + "INSERT OR REPLACE INTO composition_events ( + id, created_at, tool, mode, query, query_hash, confidence, status, + output_preview, metadata + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)", + params![ + event.id, + event.created_at.to_rfc3339(), + event.tool, + event.mode, + event.query, + event.query_hash, + event.confidence, + event.status, + event.output_preview, + metadata_json, + ], + )?; + + for member in members { + let mut member = member.clone(); + Self::snapshot_composition_member_tags(&tx, &mut member)?; + Self::insert_composition_member(&tx, &member)?; + } + for outcome in outcomes { + Self::insert_composition_outcome(&tx, outcome)?; + } + + tx.commit()?; + Ok(()) + } + + /// Add one outcome label to an existing composition event. + pub fn record_composition_outcome(&self, outcome: &CompositionOutcomeRecord) -> Result<()> { + let writer = self + .writer + .lock() + .map_err(|_| StorageError::Init("Writer lock poisoned".into()))?; + Self::insert_composition_outcome(&writer, outcome) + } + + /// Get one composition event by id. + pub fn get_composition_event(&self, id: &str) -> Result> { + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare("SELECT * FROM composition_events WHERE id = ?1")?; + stmt.query_row(params![id], Self::row_to_composition_event) + .optional() + .map_err(StorageError::from) + } + + /// Get recent composition events. + pub fn get_recent_composition_events(&self, limit: i32) -> Result> { + self.get_recent_composition_events_page(limit, 0) + } + + /// Get recent composition events with explicit pagination. + pub fn get_recent_composition_events_page( + &self, + limit: i32, + offset: i32, + ) -> Result> { + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare( + "SELECT * FROM composition_events + ORDER BY created_at DESC + LIMIT ?1 OFFSET ?2", + )?; + let rows = stmt.query_map( + params![limit.max(1), offset.max(0)], + Self::row_to_composition_event, + )?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + /// Get all members for a composition event. + pub fn get_composition_members(&self, event_id: &str) -> Result> { + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare( + "SELECT * FROM composition_members + WHERE event_id = ?1 + ORDER BY rank ASC, role ASC, memory_id ASC", + )?; + let rows = stmt.query_map(params![event_id], Self::row_to_composition_member)?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + /// Get all outcomes for a composition event. + pub fn get_composition_outcomes( + &self, + event_id: &str, + ) -> Result> { + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare( + "SELECT * FROM composition_outcomes + WHERE event_id = ?1 + ORDER BY labeled_at DESC", + )?; + let rows = stmt.query_map(params![event_id], Self::row_to_composition_outcome)?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + /// Get composition events containing a memory id. + pub fn get_compositions_for_memory( + &self, + memory_id: &str, + limit: i32, + ) -> Result> { + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare( + "SELECT DISTINCT e.* + FROM composition_events e + JOIN composition_members m ON m.event_id = e.id + WHERE m.memory_id = ?1 + ORDER BY e.created_at DESC + LIMIT ?2", + )?; + let rows = stmt.query_map( + params![memory_id, limit.max(1)], + Self::row_to_composition_event, + )?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + /// Return memories most frequently composed with the requested memory. + pub fn get_composition_neighbors( + &self, + memory_id: &str, + limit: i32, + ) -> Result> { + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare( + "WITH distinct_members AS ( + SELECT DISTINCT event_id, memory_id FROM composition_members + ) + SELECT other.memory_id, COUNT(DISTINCT other.event_id) AS composed_count, MAX(e.created_at) AS latest_event_at + FROM distinct_members self + JOIN distinct_members other + ON other.event_id = self.event_id AND other.memory_id != self.memory_id + JOIN composition_events e ON e.id = self.event_id + WHERE self.memory_id = ?1 + GROUP BY other.memory_id + ORDER BY composed_count DESC, latest_event_at DESC + LIMIT ?2", + )?; + let rows = stmt.query_map(params![memory_id, limit.max(1)], |row| { + Ok(CompositionNeighborRecord { + memory_id: row.get(0)?, + composed_count: row.get(1)?, + latest_event_at: Self::parse_timestamp( + &row.get::<_, String>(2)?, + "latest_event_at", + )?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + /// Generate ranked memory pairs that share useful tags but have not yet been composed. + pub fn get_never_composed_candidates( + &self, + limit: i32, + tag_filter: Option<&[String]>, + ) -> Result> { + let nodes = self.composition_candidate_nodes(tag_filter)?; + let composed_pairs = self.composed_pair_set()?; + let composition_degrees = self.composition_degree_map()?; + let outcome_map = self.composition_outcome_map()?; + let mut candidates = Vec::new(); + + for i in 0..nodes.len() { + for j in (i + 1)..nodes.len() { + let a = &nodes[i]; + let b = &nodes[j]; + let pair = Self::pair_key(&a.id, &b.id); + if composed_pairs.contains(&pair) { + continue; + } + + if let Some(filter) = tag_filter + && !filter.is_empty() + && !Self::node_pair_matches_tag_filter(a, b, filter) + { + continue; + } + + let shared_tags = Self::shared_tags(&a.tags, &b.tags); + let shared_terms = Self::shared_content_terms(&a.content, &b.content, 8); + if shared_tags.is_empty() && shared_terms.is_empty() { + continue; + } + + let boundary_tags = Self::boundary_tags_for_pair(&a.tags, &b.tags); + let trust_score = + ((a.retention_strength + b.retention_strength) / 2.0).clamp(0.0, 1.0); + let degree_a = composition_degrees.get(&a.id).copied().unwrap_or(0) as f64; + let degree_b = composition_degrees.get(&b.id).copied().unwrap_or(0) as f64; + let novelty_score = ((1.0 / (1.0 + degree_a)) + (1.0 / (1.0 + degree_b))) / 2.0; + let bridge_score = Self::composition_bridge_score( + a, + b, + &shared_tags, + &shared_terms, + &boundary_tags, + ); + let anchor_score = + (shared_tags.len() as f64 * 0.45) + (shared_terms.len().min(5) as f64 * 0.25); + let prior_outcomes = Self::pair_prior_outcomes(&outcome_map, &a.id, &b.id); + let outcome_signal = Self::outcome_signal(&prior_outcomes); + let outcome_score_adjustment = Self::outcome_score_adjustment(&prior_outcomes); + let score = anchor_score + + (bridge_score * 2.0) + + (novelty_score * 1.5) + + trust_score + + outcome_score_adjustment; + if score < 1.6 { + continue; + } + + let reason = if !boundary_tags.is_empty() { + format!( + "Untried bridge across {} with {}", + boundary_tags.join(", "), + Self::anchor_summary(&shared_tags, &shared_terms) + ) + } else if a.node_type != b.node_type { + format!( + "Untried {} -> {} composition with {}", + a.node_type, + b.node_type, + Self::anchor_summary(&shared_tags, &shared_terms) + ) + } else { + format!( + "Never composed despite {}", + Self::anchor_summary(&shared_tags, &shared_terms) + ) + }; + let composition_question = + Self::composition_question(a, b, &shared_tags, &shared_terms, &boundary_tags); + candidates.push(NeverComposedCandidate { + first_id: a.id.clone(), + second_id: b.id.clone(), + score, + novelty_score, + bridge_score, + trust_score, + outcome_score_adjustment, + shared_tags, + boundary_tags, + shared_terms, + prior_outcomes, + outcome_signal, + first_node_type: a.node_type.clone(), + second_node_type: b.node_type.clone(), + first_preview: preview(&a.content, 160), + second_preview: preview(&b.content, 160), + reason, + composition_question, + }); + } + } + + candidates.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); + candidates.truncate(limit.max(1) as usize); + Ok(candidates) + } + + fn insert_composition_member( + conn: &Connection, + member: &CompositionMemberRecord, + ) -> Result<()> { + let metadata_json = + serde_json::to_string(&member.metadata).unwrap_or_else(|_| "{}".to_string()); + conn.execute( + "INSERT OR REPLACE INTO composition_members ( + event_id, memory_id, role, rank, trust, score, preview, metadata + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)", + params![ + member.event_id, + member.memory_id, + member.role, + member.rank, + member.trust, + member.score, + member.preview, + metadata_json, + ], + )?; + Ok(()) + } + + fn snapshot_composition_member_tags( + conn: &Connection, + member: &mut CompositionMemberRecord, + ) -> Result<()> { + if member.metadata.get("tags").is_some() { + return Ok(()); + } + + let tags_json: Option = conn + .query_row( + "SELECT tags FROM knowledge_nodes WHERE id = ?1", + params![member.memory_id], + |row| row.get(0), + ) + .optional()?; + let Some(tags_json) = tags_json else { + return Ok(()); + }; + let Ok(tags) = serde_json::from_str::>(&tags_json) else { + return Ok(()); + }; + if tags.is_empty() { + return Ok(()); + } + + if let Some(object) = member.metadata.as_object_mut() { + object.insert("tags".to_string(), serde_json::json!(tags)); + } else { + member.metadata = serde_json::json!({ "tags": tags }); + } + Ok(()) + } + + fn insert_composition_outcome( + conn: &Connection, + outcome: &CompositionOutcomeRecord, + ) -> Result<()> { + let metadata_json = + serde_json::to_string(&outcome.metadata).unwrap_or_else(|_| "{}".to_string()); + conn.execute( + "INSERT OR REPLACE INTO composition_outcomes ( + id, event_id, outcome_type, labeled_at, label_source, + confidence_delta, notes, metadata + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)", + params![ + outcome.id, + outcome.event_id, + outcome.outcome_type, + outcome.labeled_at.to_rfc3339(), + outcome.label_source, + outcome.confidence_delta, + outcome.notes, + metadata_json, + ], + )?; + Ok(()) + } + + fn row_to_composition_event(row: &rusqlite::Row) -> rusqlite::Result { + let metadata_json: String = row.get("metadata")?; + Ok(CompositionEventRecord { + id: row.get("id")?, + created_at: Self::parse_timestamp(&row.get::<_, String>("created_at")?, "created_at")?, + tool: row.get("tool")?, + mode: row.get("mode")?, + query: row.get("query").ok().flatten(), + query_hash: row.get("query_hash").ok().flatten(), + confidence: row.get("confidence").ok().flatten(), + status: row.get("status").ok().flatten(), + output_preview: row.get("output_preview").ok().flatten(), + metadata: serde_json::from_str(&metadata_json) + .unwrap_or_else(|_| serde_json::json!({})), + }) + } + + fn row_to_composition_member(row: &rusqlite::Row) -> rusqlite::Result { + let metadata_json: String = row.get("metadata")?; + Ok(CompositionMemberRecord { + event_id: row.get("event_id")?, + memory_id: row.get("memory_id")?, + role: row.get("role")?, + rank: row.get("rank").unwrap_or(0), + trust: row.get("trust").ok().flatten(), + score: row.get("score").ok().flatten(), + preview: row.get("preview").ok().flatten(), + metadata: serde_json::from_str(&metadata_json) + .unwrap_or_else(|_| serde_json::json!({})), + }) + } + + fn row_to_composition_outcome( + row: &rusqlite::Row, + ) -> rusqlite::Result { + let metadata_json: String = row.get("metadata")?; + Ok(CompositionOutcomeRecord { + id: row.get("id")?, + event_id: row.get("event_id")?, + outcome_type: row.get("outcome_type")?, + labeled_at: Self::parse_timestamp(&row.get::<_, String>("labeled_at")?, "labeled_at")?, + label_source: row + .get("label_source") + .unwrap_or_else(|_| "tool".to_string()), + confidence_delta: row.get("confidence_delta").ok().flatten(), + notes: row.get("notes").ok().flatten(), + metadata: serde_json::from_str(&metadata_json) + .unwrap_or_else(|_| serde_json::json!({})), + }) + } + + fn composition_event_exists(conn: &Connection, id: &str) -> Result { + let count: i64 = conn.query_row( + "SELECT COUNT(*) FROM composition_events WHERE id = ?1", + params![id], + |row| row.get(0), + )?; + Ok(count > 0) + } + + fn composed_pair_set(&self) -> Result> { + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare( + "SELECT event_id, memory_id + FROM composition_members + ORDER BY event_id, memory_id", + )?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + })?; + let mut grouped: HashMap> = HashMap::new(); + for row in rows { + let (event_id, memory_id) = row?; + grouped.entry(event_id).or_default().push(memory_id); + } + + let mut pairs = HashSet::new(); + for ids in grouped.values_mut() { + ids.sort(); + ids.dedup(); + for i in 0..ids.len() { + for j in (i + 1)..ids.len() { + pairs.insert(Self::pair_key(&ids[i], &ids[j])); + } + } + } + Ok(pairs) + } + + fn pair_key(a: &str, b: &str) -> (String, String) { + if a <= b { + (a.to_string(), b.to_string()) + } else { + (b.to_string(), a.to_string()) + } + } + + fn shared_tags(a: &[String], b: &[String]) -> Vec { + let b_set: HashSet<&str> = b.iter().map(String::as_str).collect(); + let mut shared = a + .iter() + .filter(|tag| b_set.contains(tag.as_str())) + .cloned() + .collect::>(); + shared.sort(); + shared.dedup(); + shared + } + + fn node_pair_matches_tag_filter( + a: &KnowledgeNode, + b: &KnowledgeNode, + tag_filter: &[String], + ) -> bool { + a.tags.iter().chain(b.tags.iter()).any(|tag| { + tag_filter + .iter() + .any(|wanted| wanted == tag || tag.starts_with(&format!("{wanted}:"))) + }) + } + + fn boundary_tags_for_pair(a: &[String], b: &[String]) -> Vec { + let mut tags = a + .iter() + .chain(b.iter()) + .filter(|tag| Self::is_boundary_tag(tag)) + .cloned() + .collect::>(); + tags.sort(); + tags.dedup(); + tags + } + + fn composition_bridge_score( + a: &KnowledgeNode, + b: &KnowledgeNode, + shared_tags: &[String], + shared_terms: &[String], + boundary_tags: &[String], + ) -> f64 { + let tag_distance = Self::tag_distance(&a.tags, &b.tags); + let node_type_bridge = if a.node_type != b.node_type { 1.0 } else { 0.0 }; + let boundary_bridge = (boundary_tags.len() as f64 / 4.0).min(1.0); + let lexical_anchor = if shared_terms.is_empty() { 0.0 } else { 1.0 }; + let tag_anchor = if shared_tags.is_empty() { 0.0 } else { 1.0 }; + + (tag_distance * 0.30 + + node_type_bridge * 0.20 + + boundary_bridge * 0.25 + + lexical_anchor * 0.15 + + tag_anchor * 0.10) + .clamp(0.0, 1.0) + } + + fn tag_distance(a: &[String], b: &[String]) -> f64 { + let a_set = a.iter().map(String::as_str).collect::>(); + let b_set = b.iter().map(String::as_str).collect::>(); + let union = a_set.union(&b_set).count(); + if union == 0 { + return 0.0; + } + let intersection = a_set.intersection(&b_set).count(); + 1.0 - (intersection as f64 / union as f64) + } + + fn shared_content_terms(a: &str, b: &str, limit: usize) -> Vec { + let a_terms = Self::content_terms(a); + let b_terms = Self::content_terms(b); + let mut shared = a_terms + .intersection(&b_terms) + .cloned() + .collect::>(); + shared.sort_by(|left, right| { + Self::term_specificity_score(right) + .cmp(&Self::term_specificity_score(left)) + .then_with(|| left.cmp(right)) + }); + shared.truncate(limit); + shared + } + + fn content_terms(content: &str) -> HashSet { + const STOPWORDS: &[&str] = &[ + "about", "after", "again", "against", "because", "before", "between", "could", "every", + "first", "from", "have", "into", "memory", "needs", "should", "their", "there", + "these", "thing", "through", "using", "where", "which", "while", "would", + ]; + content + .to_ascii_lowercase() + .split(|c: char| !c.is_ascii_alphanumeric() && c != '-' && c != '_') + .filter(|term| term.len() >= 5 && !STOPWORDS.contains(term)) + .map(ToOwned::to_owned) + .collect() + } + + fn term_specificity_score(term: &str) -> usize { + term.len() + + term.chars().filter(|ch| ch.is_ascii_digit()).count() * 2 + + usize::from(term.contains('-')) * 2 + + usize::from(term.contains('_')) * 2 + } + + fn anchor_summary(shared_tags: &[String], shared_terms: &[String]) -> String { + if !shared_tags.is_empty() && !shared_terms.is_empty() { + format!( + "shared tags ({}) and shared terms ({})", + shared_tags.join(", "), + shared_terms + .iter() + .take(4) + .cloned() + .collect::>() + .join(", ") + ) + } else if !shared_tags.is_empty() { + format!("shared tags ({})", shared_tags.join(", ")) + } else { + format!( + "shared terms ({})", + shared_terms + .iter() + .take(4) + .cloned() + .collect::>() + .join(", ") + ) + } + } + + fn composition_question( + a: &KnowledgeNode, + b: &KnowledgeNode, + shared_tags: &[String], + shared_terms: &[String], + boundary_tags: &[String], + ) -> String { + let anchor = if !boundary_tags.is_empty() { + boundary_tags.join(", ") + } else if !shared_tags.is_empty() { + shared_tags.join(", ") + } else { + shared_terms + .iter() + .take(3) + .cloned() + .collect::>() + .join(", ") + }; + format!( + "What changes if a {} memory and a {} memory are composed through {}?", + a.node_type, b.node_type, anchor + ) + } + + fn composition_degree_map(&self) -> Result> { + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare( + "SELECT memory_id, COUNT(DISTINCT event_id) AS composition_count + FROM composition_members + GROUP BY memory_id", + )?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)) + })?; + let mut result = HashMap::new(); + for row in rows { + let (memory_id, count) = row?; + result.insert(memory_id, count); + } + Ok(result) + } + + fn composition_candidate_nodes( + &self, + tag_filter: Option<&[String]>, + ) -> Result> { + const BASE_SCAN_LIMIT: i32 = 750; + const TAGGED_SCAN_LIMIT: i32 = 1500; + + let mut nodes = self.get_all_nodes(BASE_SCAN_LIMIT, 0)?; + if let Some(filter) = tag_filter + && !filter.is_empty() + { + let tagged_nodes = self.get_nodes_matching_any_tag_prefix(filter, TAGGED_SCAN_LIMIT)?; + let mut by_id = HashMap::new(); + for node in nodes.into_iter().chain(tagged_nodes.into_iter()) { + by_id.entry(node.id.clone()).or_insert(node); + } + nodes = by_id.into_values().collect(); + nodes.sort_by(|a, b| { + b.retention_strength + .partial_cmp(&a.retention_strength) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| b.created_at.cmp(&a.created_at)) + }); + } + Ok(nodes) + } + + fn get_nodes_matching_any_tag_prefix( + &self, + tag_filter: &[String], + limit: i32, + ) -> Result> { + let mut patterns = Vec::new(); + for wanted in tag_filter + .iter() + .map(|tag| tag.trim()) + .filter(|tag| !tag.is_empty()) + { + patterns.push(format!("%\"{}\"%", wanted)); + patterns.push(format!("%\"{}:%", wanted)); + } + if patterns.is_empty() { + return Ok(Vec::new()); + } + + let clauses = std::iter::repeat_n("tags LIKE ?", patterns.len()) + .collect::>() + .join(" OR "); + let sql = format!( + "SELECT * FROM knowledge_nodes + WHERE {clauses} + ORDER BY retention_strength DESC, created_at DESC + LIMIT {}", + limit.clamp(1, 5000) + ); + + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare(&sql)?; + let rows = stmt.query_map(params_from_iter(patterns.iter()), Self::row_to_node)?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + fn composition_outcome_map(&self) -> Result>> { + let reader = self + .reader + .lock() + .map_err(|_| StorageError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader.prepare( + "SELECT DISTINCT m.memory_id, o.outcome_type + FROM composition_members m + JOIN composition_outcomes o ON o.event_id = m.event_id", + )?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + })?; + let mut result: HashMap> = HashMap::new(); + for row in rows { + let (memory_id, outcome) = row?; + result.entry(memory_id).or_default().insert(outcome); + } + Ok(result) + } + + fn pair_prior_outcomes( + outcome_map: &HashMap>, + first_id: &str, + second_id: &str, + ) -> Vec { + let mut outcomes = outcome_map + .get(first_id) + .into_iter() + .chain(outcome_map.get(second_id)) + .flat_map(|values| values.iter().cloned()) + .collect::>(); + outcomes.sort(); + outcomes.dedup(); + outcomes + } + + fn outcome_signal(prior_outcomes: &[String]) -> String { + if prior_outcomes.is_empty() { + return "clean".to_string(); + } + + let has_closed = prior_outcomes.iter().any(|outcome| { + matches!( + outcome.as_str(), + "dead_end" + | "rejected" + | "bad_severity" + | "user_demoted" + | "closed_by_scope" + | "closed_by_false_assumption" + | "closed_by_user" + | "expired_lane" + ) + }); + let has_duplicate = prior_outcomes + .iter() + .any(|outcome| matches!(outcome.as_str(), "duplicate_risk" | "closed_by_duplicate")); + let has_success = prior_outcomes.iter().any(|outcome| { + matches!( + outcome.as_str(), + "accepted" | "helpful" | "submitted" | "user_promoted" + ) + }); + let has_needs_poc = prior_outcomes.iter().any(|outcome| outcome == "needs_poc"); + + if (has_closed || has_duplicate) && has_success { + "mixed_prior_outcomes".to_string() + } else if has_closed { + "prior_closed_door".to_string() + } else if has_duplicate { + "prior_duplicate_risk".to_string() + } else if has_success { + "prior_success".to_string() + } else if has_needs_poc { + "prior_needs_poc".to_string() + } else { + "prior_outcome".to_string() + } + } + + fn outcome_score_adjustment(prior_outcomes: &[String]) -> f64 { + let mut adjustment: f64 = 0.0; + for outcome in prior_outcomes { + adjustment += match outcome.as_str() { + "accepted" => 0.35, + "helpful" => 0.25, + "submitted" => 0.15, + "user_promoted" => 0.20, + "needs_poc" => -0.05, + "duplicate_risk" => -0.35, + "closed_by_duplicate" => -0.40, + "dead_end" + | "rejected" + | "bad_severity" + | "closed_by_scope" + | "closed_by_false_assumption" + | "closed_by_user" + | "expired_lane" => -0.45, + "user_demoted" => -0.20, + _ => 0.0, + }; + } + adjustment.clamp(-0.8, 0.5) + } + + fn is_boundary_tag(tag: &str) -> bool { + let lowered = tag.to_ascii_lowercase(); + lowered.starts_with("boundary-") + || matches!( + lowered.as_str(), + "time" + | "chain" + | "role" + | "oracle" + | "queue" + | "settlement" + | "keeper" + | "upgrade" + | "pause" + | "accounting" + | "scope" + ) + } + // ======================================================================== // INTENTIONS PERSISTENCE // ======================================================================== @@ -5213,6 +6180,17 @@ impl Storage { | "consolidation_history" | "dream_history" | "retention_snapshots" => Self::merge_append_only_table(tx, table_name, table, report), + "composition_events" | "composition_outcomes" => { + Self::merge_keyed_table(tx, table_name, table, &["id"], report, state) + } + "composition_members" => Self::merge_keyed_table( + tx, + table_name, + table, + &["event_id", "memory_id", "role"], + report, + state, + ), "node_embeddings" => { Self::merge_keyed_table(tx, table_name, table, &["node_id"], report, state) } @@ -5363,6 +6341,10 @@ impl Storage { (None, _) => false, }; if should_delete { + tx.execute( + "UPDATE composition_members SET preview = NULL WHERE memory_id = ?1", + params![row_id], + )?; let deleted = tx.execute("DELETE FROM knowledge_nodes WHERE id = ?1", params![row_id])?; report.rows_deleted += deleted; @@ -5534,6 +6516,20 @@ impl Storage { .unwrap_or(false); Ok(source_exists && target_exists) } + "composition_members" => { + let event_exists = Self::portable_text(table, row, "event_id") + .map(|id| Self::composition_event_exists(tx, id)) + .transpose()? + .unwrap_or(false); + Ok(event_exists) + } + "composition_outcomes" => { + let event_exists = Self::portable_text(table, row, "event_id") + .map(|id| Self::composition_event_exists(tx, id)) + .transpose()? + .unwrap_or(false); + Ok(event_exists) + } _ => Ok(true), } } @@ -5557,6 +6553,8 @@ impl Storage { fn merge_key_columns(table_name: &str) -> &'static [&'static str] { match table_name { "knowledge_nodes" | "intentions" | "insights" | "sessions" => &["id"], + "composition_events" | "composition_outcomes" => &["id"], + "composition_members" => &["event_id", "memory_id", "role"], "node_embeddings" => &["node_id"], "fsrs_cards" | "memory_states" | "deletion_tombstones" => &["memory_id"], "memory_connections" => &["source_id", "target_id"], @@ -6377,7 +7375,10 @@ impl Storage { let possible_threshold = read_key("merge_possible_threshold") .map(|v| v as f32) .unwrap_or_else(|| { - env_f32("VESTIGE_MERGE_POSSIBLE_THRESHOLD", default.possible_threshold) + env_f32( + "VESTIGE_MERGE_POSSIBLE_THRESHOLD", + default.possible_threshold, + ) }); let auto_apply = match read_key("merge_auto_apply") { Some(v) => v != 0.0, @@ -6495,8 +7496,10 @@ impl Storage { } // Best pair score per resulting cluster member, for the explanation. - let mut pair_score: std::collections::HashMap<(usize, usize), crate::advanced::MatchSignals> = - std::collections::HashMap::new(); + let mut pair_score: std::collections::HashMap< + (usize, usize), + crate::advanced::MatchSignals, + > = std::collections::HashMap::new(); for i in 0..n { for j in (i + 1)..n { @@ -6697,10 +7700,12 @@ impl Storage { .map(|n| (n.id.clone(), n.content.clone())) .collect(); let result_content = compose_merged_content(&members); - let result_tags = compose_merged_tags( - &nodes.iter().map(|n| n.tags.clone()).collect::>(), - ); - let result_source = nodes.iter().find(|n| n.id == survivor).and_then(|n| n.source.clone()); + let result_tags = + compose_merged_tags(&nodes.iter().map(|n| n.tags.clone()).collect::>()); + let result_source = nodes + .iter() + .find(|n| n.id == survivor) + .and_then(|n| n.source.clone()); let invalidated_ids: Vec = nodes .iter() .filter(|n| n.id != survivor) @@ -6968,11 +7973,7 @@ impl Storage { undo.insert("absorbed".into(), serde_json::json!(absorbed)); // Apply: rewrite survivor, invalidate absorbed. - self.rewrite_survivor( - &plan.survivor_id, - &plan.result_content, - &plan.result_tags, - )?; + self.rewrite_survivor(&plan.survivor_id, &plan.result_content, &plan.result_tags)?; for id in &plan.invalidated_ids { self.invalidate_node(id, &plan.survivor_id, now)?; } @@ -7046,9 +8047,7 @@ impl Storage { ))); } if op.op_type == "undo" { - return Err(StorageError::Init( - "cannot undo an undo operation".into(), - )); + return Err(StorageError::Init("cannot undo an undo operation".into())); } let undo: serde_json::Value = { @@ -7180,9 +8179,7 @@ impl Storage { Ok(op) } - fn row_to_operation( - row: &rusqlite::Row, - ) -> rusqlite::Result { + fn row_to_operation(row: &rusqlite::Row) -> rusqlite::Result { let affected: String = row.get("affected_ids")?; let affected_ids: Vec = serde_json::from_str(&affected).unwrap_or_default(); Ok(crate::advanced::MergeOperation { @@ -7195,7 +8192,11 @@ impl Storage { reverts_op_id: row.get("reverts_op_id").ok().flatten(), survivor_id: row.get("survivor_id").ok().flatten(), affected_ids, - confidence: row.get::<_, Option>("confidence").ok().flatten().map(|v| v as f32), + confidence: row + .get::<_, Option>("confidence") + .ok() + .flatten() + .map(|v| v as f32), reason: row.get("reason").ok().flatten(), }) } @@ -7402,13 +8403,17 @@ mod tests { use chrono::TimeZone; // Canonical writer: RFC 3339 with fractional seconds + offset. - let rfc = Storage::parse_timestamp("2026-06-12T15:07:59.730+00:00", "last_accessed").unwrap(); + let rfc = + Storage::parse_timestamp("2026-06-12T15:07:59.730+00:00", "last_accessed").unwrap(); assert_eq!(rfc.to_rfc3339(), "2026-06-12T15:07:59.730+00:00"); // External writer: SQLite-native `datetime('now')` (space separator, // no timezone, no fraction) — must be tolerated, assumed UTC. let sqlite = Storage::parse_timestamp("2026-06-12 15:07:59", "last_accessed").unwrap(); - assert_eq!(sqlite, Utc.with_ymd_and_hms(2026, 6, 12, 15, 7, 59).unwrap()); + assert_eq!( + sqlite, + Utc.with_ymd_and_hms(2026, 6, 12, 15, 7, 59).unwrap() + ); // SQLite-native with fractional seconds. let sqlite_frac = @@ -7490,6 +8495,622 @@ mod tests { assert!(storage.get_node(&node.id).unwrap().is_none()); } + #[test] + fn test_composition_save_query_outcome_and_never_composed() { + let storage = create_test_storage(); + let first = storage + .ingest(IngestInput { + content: "Oracle drift can break delayed settlement.".to_string(), + node_type: "fact".to_string(), + tags: vec![ + "protocolgate".to_string(), + "boundary-oracle".to_string(), + "settlement".to_string(), + ], + ..Default::default() + }) + .unwrap(); + let second = storage + .ingest(IngestInput { + content: "Withdrawal queues can settle stale claims.".to_string(), + node_type: "pattern".to_string(), + tags: vec![ + "protocolgate".to_string(), + "boundary-queue".to_string(), + "settlement".to_string(), + ], + ..Default::default() + }) + .unwrap(); + let third = storage + .ingest(IngestInput { + content: "Keeper roles can drift from local validation paths.".to_string(), + node_type: "pattern".to_string(), + tags: vec![ + "protocolgate".to_string(), + "boundary-role".to_string(), + "settlement".to_string(), + ], + ..Default::default() + }) + .unwrap(); + + let before = storage + .get_never_composed_candidates(10, Some(&["protocolgate".to_string()])) + .unwrap(); + let first_second_before = before + .iter() + .find(|candidate| { + let pair = Storage::pair_key(&candidate.first_id, &candidate.second_id); + pair == Storage::pair_key(&first.id, &second.id) + }) + .expect("uncomposed first/second pair should be ranked before any event"); + assert!( + first_second_before.bridge_score > 0.0, + "candidate should expose a bridge score" + ); + assert!( + first_second_before.novelty_score > 0.0, + "candidate should expose a novelty score" + ); + assert_eq!( + first_second_before.outcome_signal, "clean", + "new candidate should start without prior outcome context" + ); + assert!( + first_second_before + .composition_question + .contains("composed through"), + "candidate should include a promptable composition question" + ); + + let event = CompositionEventRecord { + id: "composition-test-1".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "bounty".to_string(), + query: Some("oracle drift delayed settlement".to_string()), + query_hash: Some("sha256:test".to_string()), + confidence: Some(0.87), + status: Some("resolved".to_string()), + output_preview: Some("Compose oracle drift with withdrawal queue.".to_string()), + metadata: serde_json::json!({"workflow": "test"}), + }; + let members = vec![ + CompositionMemberRecord { + event_id: event.id.clone(), + memory_id: first.id.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.8), + score: Some(0.9), + preview: Some(preview(&first.content, 120)), + metadata: serde_json::json!({}), + }, + CompositionMemberRecord { + event_id: event.id.clone(), + memory_id: second.id.clone(), + role: "supporting".to_string(), + rank: 1, + trust: Some(0.7), + score: Some(0.75), + preview: Some(preview(&second.content, 120)), + metadata: serde_json::json!({}), + }, + ]; + storage.save_composition(&event, &members, &[]).unwrap(); + + let outcome = CompositionOutcomeRecord { + id: "composition-outcome-1".to_string(), + event_id: event.id.clone(), + outcome_type: "submitted".to_string(), + labeled_at: Utc::now(), + label_source: "test".to_string(), + confidence_delta: Some(0.1), + notes: Some("Report submitted".to_string()), + metadata: serde_json::json!({"severity": "high"}), + }; + storage.record_composition_outcome(&outcome).unwrap(); + + let fetched = storage.get_composition_event(&event.id).unwrap().unwrap(); + assert_eq!(fetched.mode, "bounty"); + assert_eq!(fetched.metadata["workflow"], "test"); + + let fetched_members = storage.get_composition_members(&event.id).unwrap(); + assert_eq!(fetched_members.len(), 2); + assert_eq!(fetched_members[0].role, "primary"); + + let fetched_outcomes = storage.get_composition_outcomes(&event.id).unwrap(); + assert_eq!(fetched_outcomes.len(), 1); + assert_eq!(fetched_outcomes[0].outcome_type, "submitted"); + + let for_memory = storage.get_compositions_for_memory(&first.id, 5).unwrap(); + assert_eq!(for_memory.len(), 1); + assert_eq!(for_memory[0].id, event.id); + + let neighbors = storage.get_composition_neighbors(&first.id, 5).unwrap(); + assert_eq!(neighbors.len(), 1); + assert_eq!(neighbors[0].memory_id, second.id); + + let after = storage + .get_never_composed_candidates(10, Some(&["protocolgate".to_string()])) + .unwrap(); + assert!( + !after.iter().any(|candidate| { + let pair = Storage::pair_key(&candidate.first_id, &candidate.second_id); + pair == Storage::pair_key(&first.id, &second.id) + }), + "already-composed first/second pair should be removed" + ); + assert!( + after.iter().any(|candidate| { + let pair = Storage::pair_key(&candidate.first_id, &candidate.second_id); + pair == Storage::pair_key(&first.id, &third.id) + || pair == Storage::pair_key(&second.id, &third.id) + }), + "other protocolgate pairs should remain candidates" + ); + } + + #[test] + fn test_composition_neighbors_count_distinct_events_not_member_roles() { + let storage = create_test_storage(); + let first = storage + .ingest(IngestInput { + content: "Oracle role appears once in the event.".to_string(), + node_type: "fact".to_string(), + tags: vec!["protocolgate".to_string(), "settlement".to_string()], + ..Default::default() + }) + .unwrap(); + let second = storage + .ingest(IngestInput { + content: "Queue role appears under two evidence roles.".to_string(), + node_type: "fact".to_string(), + tags: vec!["protocolgate".to_string(), "settlement".to_string()], + ..Default::default() + }) + .unwrap(); + + storage + .save_composition( + &CompositionEventRecord { + id: "multi-role-neighbor-event".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "bounty".to_string(), + query: Some("multi role neighbor".to_string()), + query_hash: Some("fnv1a64:neighbor".to_string()), + confidence: Some(0.7), + status: Some("resolved".to_string()), + output_preview: None, + metadata: serde_json::json!({}), + }, + &[ + CompositionMemberRecord { + event_id: "multi-role-neighbor-event".to_string(), + memory_id: first.id.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.8), + score: Some(0.9), + preview: None, + metadata: serde_json::json!({}), + }, + CompositionMemberRecord { + event_id: "multi-role-neighbor-event".to_string(), + memory_id: second.id.clone(), + role: "supporting".to_string(), + rank: 1, + trust: Some(0.7), + score: Some(0.8), + preview: None, + metadata: serde_json::json!({}), + }, + CompositionMemberRecord { + event_id: "multi-role-neighbor-event".to_string(), + memory_id: second.id.clone(), + role: "related".to_string(), + rank: 2, + trust: Some(0.7), + score: Some(0.6), + preview: None, + metadata: serde_json::json!({}), + }, + ], + &[], + ) + .unwrap(); + + let neighbors = storage.get_composition_neighbors(&first.id, 10).unwrap(); + assert_eq!(neighbors.len(), 1); + assert_eq!(neighbors[0].memory_id, second.id); + assert_eq!( + neighbors[0].composed_count, 1, + "one event with multiple member roles should count as one composition" + ); + } + + #[test] + fn test_never_composed_tag_filter_includes_older_tagged_candidates() { + let storage = create_test_storage(); + let first = storage + .ingest(IngestInput { + content: "Older Vestige composition frontier about outcome-shaped recall." + .to_string(), + node_type: "fact".to_string(), + tags: vec!["project:vestige".to_string(), "composition".to_string()], + ..Default::default() + }) + .unwrap(); + let second = storage + .ingest(IngestInput { + content: "Older Vestige composition frontier about never-composed recall." + .to_string(), + node_type: "pattern".to_string(), + tags: vec!["project:vestige".to_string(), "composition".to_string()], + ..Default::default() + }) + .unwrap(); + + for idx in 0..751 { + storage + .ingest(IngestInput { + content: format!("Unrelated recent memory {idx} for scan-window pressure."), + node_type: "fact".to_string(), + tags: vec!["unrelated".to_string()], + ..Default::default() + }) + .unwrap(); + } + + let candidates = storage + .get_never_composed_candidates(10, Some(&["project".to_string()])) + .unwrap(); + assert!( + candidates.iter().any(|candidate| { + let pair = Storage::pair_key(&candidate.first_id, &candidate.second_id); + pair == Storage::pair_key(&first.id, &second.id) + }), + "tag-filtered frontier should include older namespaced-tag memories outside the base scan window" + ); + } + + #[test] + fn test_never_composed_carries_prior_outcome_signal() { + let storage = create_test_storage(); + let first = storage + .ingest(IngestInput { + content: "Oracle drift lane previously looked duplicate-prone.".to_string(), + node_type: "fact".to_string(), + tags: vec![ + "protocolgate".to_string(), + "boundary-oracle".to_string(), + "settlement".to_string(), + ], + ..Default::default() + }) + .unwrap(); + let second = storage + .ingest(IngestInput { + content: "Withdrawal queue lane had weak proof.".to_string(), + node_type: "fact".to_string(), + tags: vec![ + "protocolgate".to_string(), + "boundary-queue".to_string(), + "settlement".to_string(), + ], + ..Default::default() + }) + .unwrap(); + let third = storage + .ingest(IngestInput { + content: "Keeper settlement lane has not been composed with oracle drift." + .to_string(), + node_type: "pattern".to_string(), + tags: vec![ + "protocolgate".to_string(), + "boundary-role".to_string(), + "settlement".to_string(), + ], + ..Default::default() + }) + .unwrap(); + + let event = CompositionEventRecord { + id: "prior-outcome-composition".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "bounty".to_string(), + query: Some("oracle withdrawal duplicate risk".to_string()), + query_hash: Some("fnv1a64:prior".to_string()), + confidence: Some(0.4), + status: Some("closed".to_string()), + output_preview: Some("Prior composition was labeled duplicate risk.".to_string()), + metadata: serde_json::json!({}), + }; + storage + .save_composition( + &event, + &[ + CompositionMemberRecord { + event_id: event.id.clone(), + memory_id: first.id.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.7), + score: Some(0.8), + preview: None, + metadata: serde_json::json!({}), + }, + CompositionMemberRecord { + event_id: event.id.clone(), + memory_id: second.id.clone(), + role: "supporting".to_string(), + rank: 1, + trust: Some(0.7), + score: Some(0.8), + preview: None, + metadata: serde_json::json!({}), + }, + ], + &[CompositionOutcomeRecord { + id: "prior-outcome-label".to_string(), + event_id: event.id.clone(), + outcome_type: "duplicate_risk".to_string(), + labeled_at: Utc::now(), + label_source: "test".to_string(), + confidence_delta: Some(-0.2), + notes: Some("Duplicate family in prior lane.".to_string()), + metadata: serde_json::json!({}), + }], + ) + .unwrap(); + + let candidates = storage + .get_never_composed_candidates(10, Some(&["protocolgate".to_string()])) + .unwrap(); + let candidate = candidates + .iter() + .find(|candidate| { + let pair = Storage::pair_key(&candidate.first_id, &candidate.second_id); + pair == Storage::pair_key(&first.id, &third.id) + }) + .expect("untried first/third pair should remain a frontier candidate"); + + assert!( + candidate + .prior_outcomes + .iter() + .any(|outcome| outcome == "duplicate_risk"), + "frontier candidate should expose prior outcome labels from either member" + ); + assert_eq!(candidate.outcome_signal, "prior_duplicate_risk"); + assert!( + candidate.outcome_score_adjustment < 0.0, + "duplicate-risk history should reduce but not hide the untried lane" + ); + } + + #[test] + fn test_never_composed_marks_mixed_prior_outcomes() { + let storage = create_test_storage(); + let successful = storage + .ingest(IngestInput { + content: "Accepted release lane linked rollback evidence to install telemetry." + .to_string(), + node_type: "decision".to_string(), + tags: vec![ + "project:vestige".to_string(), + "release".to_string(), + "telemetry".to_string(), + ], + ..Default::default() + }) + .unwrap(); + let closed = storage + .ingest(IngestInput { + content: "Closed release lane linked install telemetry to out-of-scope claims." + .to_string(), + node_type: "incident".to_string(), + tags: vec![ + "project:vestige".to_string(), + "release".to_string(), + "telemetry".to_string(), + ], + ..Default::default() + }) + .unwrap(); + let success_helper = storage + .ingest(IngestInput { + content: "Helper memory for an accepted release composition.".to_string(), + node_type: "fact".to_string(), + tags: vec!["project:vestige".to_string(), "release".to_string()], + ..Default::default() + }) + .unwrap(); + let closed_helper = storage + .ingest(IngestInput { + content: "Helper memory for a closed release composition.".to_string(), + node_type: "fact".to_string(), + tags: vec!["project:vestige".to_string(), "release".to_string()], + ..Default::default() + }) + .unwrap(); + + storage + .save_composition( + &CompositionEventRecord { + id: "prior-success-composition".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "release".to_string(), + query: Some("accepted release lane".to_string()), + query_hash: Some("fnv1a64:success".to_string()), + confidence: Some(0.9), + status: Some("resolved".to_string()), + output_preview: None, + metadata: serde_json::json!({}), + }, + &[ + CompositionMemberRecord { + event_id: "prior-success-composition".to_string(), + memory_id: successful.id.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.9), + score: Some(0.9), + preview: None, + metadata: serde_json::json!({}), + }, + CompositionMemberRecord { + event_id: "prior-success-composition".to_string(), + memory_id: success_helper.id, + role: "supporting".to_string(), + rank: 1, + trust: Some(0.7), + score: Some(0.6), + preview: None, + metadata: serde_json::json!({}), + }, + ], + &[CompositionOutcomeRecord { + id: "prior-success-label".to_string(), + event_id: "prior-success-composition".to_string(), + outcome_type: "accepted".to_string(), + labeled_at: Utc::now(), + label_source: "test".to_string(), + confidence_delta: Some(0.2), + notes: None, + metadata: serde_json::json!({}), + }], + ) + .unwrap(); + + storage + .save_composition( + &CompositionEventRecord { + id: "prior-closed-composition".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "release".to_string(), + query: Some("closed release lane".to_string()), + query_hash: Some("fnv1a64:closed".to_string()), + confidence: Some(0.3), + status: Some("closed".to_string()), + output_preview: None, + metadata: serde_json::json!({}), + }, + &[ + CompositionMemberRecord { + event_id: "prior-closed-composition".to_string(), + memory_id: closed.id.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.8), + score: Some(0.7), + preview: None, + metadata: serde_json::json!({}), + }, + CompositionMemberRecord { + event_id: "prior-closed-composition".to_string(), + memory_id: closed_helper.id, + role: "supporting".to_string(), + rank: 1, + trust: Some(0.7), + score: Some(0.6), + preview: None, + metadata: serde_json::json!({}), + }, + ], + &[CompositionOutcomeRecord { + id: "prior-closed-label".to_string(), + event_id: "prior-closed-composition".to_string(), + outcome_type: "closed_by_scope".to_string(), + labeled_at: Utc::now(), + label_source: "test".to_string(), + confidence_delta: Some(-0.3), + notes: None, + metadata: serde_json::json!({}), + }], + ) + .unwrap(); + + let candidates = storage + .get_never_composed_candidates(10, Some(&["project".to_string()])) + .unwrap(); + let candidate = candidates + .iter() + .find(|candidate| { + let pair = Storage::pair_key(&candidate.first_id, &candidate.second_id); + pair == Storage::pair_key(&successful.id, &closed.id) + }) + .expect("untried success/closed pair should remain a frontier candidate"); + + assert_eq!(candidate.outcome_signal, "mixed_prior_outcomes"); + assert!( + candidate + .prior_outcomes + .iter() + .any(|outcome| outcome == "accepted") + ); + assert!( + candidate + .prior_outcomes + .iter() + .any(|outcome| outcome == "closed_by_scope") + ); + } + + #[test] + fn test_never_composed_surfaces_weak_tie_shared_terms_without_shared_tags() { + let storage = create_test_storage(); + let incident = storage + .ingest(IngestInput { + content: + "OpenCode handshake stalls when embedding startup blocks stdio negotiation." + .to_string(), + node_type: "incident".to_string(), + tags: vec!["opencode".to_string(), "startup".to_string()], + ..Default::default() + }) + .unwrap(); + let mitigation = storage + .ingest(IngestInput { + content: "JetBrains startup should keep embedding backfill behind the handshake." + .to_string(), + node_type: "mitigation".to_string(), + tags: vec!["jetbrains".to_string(), "background-work".to_string()], + ..Default::default() + }) + .unwrap(); + + let candidates = storage.get_never_composed_candidates(10, None).unwrap(); + let candidate = candidates + .iter() + .find(|candidate| { + let pair = Storage::pair_key(&candidate.first_id, &candidate.second_id); + pair == Storage::pair_key(&incident.id, &mitigation.id) + }) + .expect("shared terms should surface a weak-tie candidate without shared tags"); + + assert!( + candidate.shared_tags.is_empty(), + "test fixture intentionally has no shared tags" + ); + assert!( + candidate + .shared_terms + .iter() + .any(|term| term == "embedding" || term == "startup" || term == "handshake"), + "shared terms should explain the candidate" + ); + assert!( + candidate.bridge_score > 0.5, + "different tags and node types should create a bridge signal" + ); + } + #[test] fn test_dream_history_save_and_get_last() { let storage = create_test_storage(); @@ -7586,6 +9207,54 @@ mod tests { activation_count: 1, }) .unwrap(); + source + .save_composition( + &CompositionEventRecord { + id: "portable-composition-1".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "bounty".to_string(), + query: Some("portable composition".to_string()), + query_hash: Some("sha256:portable".to_string()), + confidence: Some(0.9), + status: Some("resolved".to_string()), + output_preview: Some("Portable composition event".to_string()), + metadata: serde_json::json!({}), + }, + &[ + CompositionMemberRecord { + event_id: "portable-composition-1".to_string(), + memory_id: first.id.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.9), + score: Some(1.0), + preview: Some("alpha".to_string()), + metadata: serde_json::json!({}), + }, + CompositionMemberRecord { + event_id: "portable-composition-1".to_string(), + memory_id: second.id.clone(), + role: "supporting".to_string(), + rank: 1, + trust: Some(0.8), + score: Some(0.8), + preview: Some("beta".to_string()), + metadata: serde_json::json!({}), + }, + ], + &[CompositionOutcomeRecord { + id: "portable-composition-outcome-1".to_string(), + event_id: "portable-composition-1".to_string(), + outcome_type: "helpful".to_string(), + labeled_at: Utc::now(), + label_source: "test".to_string(), + confidence_delta: None, + notes: None, + metadata: serde_json::json!({}), + }], + ) + .unwrap(); let archive = source.export_portable_archive().unwrap(); assert_eq!(archive.archive_format, PORTABLE_ARCHIVE_FORMAT); @@ -7596,6 +9265,16 @@ mod tests { .iter() .any(|table| table.name == "knowledge_nodes" && table.rows.len() == 2) ); + for table_name in [ + "composition_events", + "composition_members", + "composition_outcomes", + ] { + assert!( + archive.tables.iter().any(|table| table.name == table_name), + "{table_name} must be included in portable archive" + ); + } let target = create_test_storage_at(&target_dir, "target.db"); let report = target @@ -7614,6 +9293,26 @@ mod tests { assert_eq!(connections.len(), 1); assert_eq!(connections[0].target_id, second.id); + let composition = target + .get_composition_event("portable-composition-1") + .unwrap() + .unwrap(); + assert_eq!(composition.mode, "bounty"); + assert_eq!( + target + .get_composition_members("portable-composition-1") + .unwrap() + .len(), + 2 + ); + assert_eq!( + target + .get_composition_outcomes("portable-composition-1") + .unwrap() + .len(), + 1 + ); + let results = target.search("alpha", 10).unwrap(); assert_eq!(results.len(), 1); assert_eq!(results[0].id, first.id); @@ -7919,6 +9618,84 @@ mod tests { assert_eq!(access_count, 42); } + #[test] + fn test_portable_merge_import_keeps_composition_members_for_newer_local_memory() { + let source_dir = tempdir().unwrap(); + let target_dir = tempdir().unwrap(); + let source = create_test_storage_at(&source_dir, "source.db"); + let target = create_test_storage_at(&target_dir, "target.db"); + + let node = source + .ingest(IngestInput { + content: "Shared memory with historical composition".to_string(), + node_type: "fact".to_string(), + tags: vec!["protocolgate".to_string()], + ..Default::default() + }) + .unwrap(); + source + .save_composition( + &CompositionEventRecord { + id: "merge-composition-1".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "bounty".to_string(), + query: Some("historical composition".to_string()), + query_hash: Some("sha256:historical".to_string()), + confidence: Some(0.7), + status: Some("resolved".to_string()), + output_preview: Some("Historical composition survives merge".to_string()), + metadata: serde_json::json!({}), + }, + &[CompositionMemberRecord { + event_id: "merge-composition-1".to_string(), + memory_id: node.id.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.8), + score: Some(0.9), + preview: Some("historical".to_string()), + metadata: serde_json::json!({}), + }], + &[], + ) + .unwrap(); + + let archive = source.export_portable_archive().unwrap(); + target + .import_portable_archive(&archive, PortableImportMode::EmptyOnly) + .unwrap(); + + let local_time = (Utc::now() + Duration::hours(1)).to_rfc3339(); + { + let writer = target.writer.lock().unwrap(); + writer + .execute( + "DELETE FROM composition_members WHERE event_id = ?1", + params!["merge-composition-1"], + ) + .unwrap(); + writer + .execute( + "UPDATE knowledge_nodes SET content = ?1, updated_at = ?2 WHERE id = ?3", + params!["Newer local content", &local_time, &node.id], + ) + .unwrap(); + } + + target + .import_portable_archive(&archive, PortableImportMode::Merge) + .unwrap(); + + let restored = target.get_node(&node.id).unwrap().unwrap(); + assert_eq!(restored.content, "Newer local content"); + let members = target + .get_composition_members("merge-composition-1") + .unwrap(); + assert_eq!(members.len(), 1); + assert_eq!(members[0].memory_id, node.id); + } + #[test] fn test_portable_merge_import_applies_delete_tombstones() { let source_dir = tempdir().unwrap(); @@ -7964,22 +9741,71 @@ mod tests { ..Default::default() }) .unwrap(); + source + .save_composition( + &CompositionEventRecord { + id: "portable-purge-composition".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "sync".to_string(), + query: Some("portable purge preview".to_string()), + query_hash: Some("fnv1a64:portable-purge".to_string()), + confidence: Some(0.7), + status: Some("resolved".to_string()), + output_preview: None, + metadata: serde_json::json!({}), + }, + &[CompositionMemberRecord { + event_id: "portable-purge-composition".to_string(), + memory_id: node.id.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.8), + score: Some(0.8), + preview: Some("Portable purge composition preview leak".to_string()), + metadata: serde_json::json!({}), + }], + &[], + ) + .unwrap(); let archive = source.export_portable_archive().unwrap(); target .import_portable_archive(&archive, PortableImportMode::EmptyOnly) .unwrap(); assert!(target.get_node(&node.id).unwrap().is_some()); + assert_eq!( + target + .get_composition_members("portable-purge-composition") + .unwrap()[0] + .preview + .as_deref(), + Some("Portable purge composition preview leak") + ); source .purge_node(&node.id, Some("sync purge test")) .unwrap(); let purge_archive = source.export_portable_archive().unwrap(); + assert!( + !serde_json::to_string(&purge_archive) + .unwrap() + .contains("Portable purge composition preview leak"), + "source portable archive should not retain purged composition previews" + ); let report = target .import_portable_archive(&purge_archive, PortableImportMode::Merge) .unwrap(); assert!(report.rows_deleted >= 1); assert!(target.get_node(&node.id).unwrap().is_none()); + assert!( + target + .get_composition_members("portable-purge-composition") + .unwrap()[0] + .preview + .is_none(), + "portable purge merge should scrub target composition previews" + ); let writer = target.writer.lock().unwrap(); let tombstone_count: i64 = writer @@ -8348,6 +10174,34 @@ mod tests { .unwrap(); } + storage + .save_composition( + &CompositionEventRecord { + id: "purge-composition-preview-test".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "audit".to_string(), + query: Some("purge preview leak".to_string()), + query_hash: Some("fnv1a64:purge".to_string()), + confidence: Some(0.7), + status: Some("resolved".to_string()), + output_preview: None, + metadata: serde_json::json!({}), + }, + &[CompositionMemberRecord { + event_id: "purge-composition-preview-test".to_string(), + memory_id: doomed.id.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.8), + score: Some(0.9), + preview: Some("Sensitive purge target memory preview leak".to_string()), + metadata: serde_json::json!({}), + }], + &[], + ) + .unwrap(); + let report = storage .purge_node(&doomed.id, Some("user requested hard purge")) .unwrap(); @@ -8387,6 +10241,21 @@ mod tests { .unwrap(); assert_eq!(tombstone_count, 1); + let members = storage + .get_composition_members("purge-composition-preview-test") + .unwrap(); + assert_eq!(members.len(), 1); + assert!( + members[0].preview.is_none(), + "purge should scrub composition member previews for the purged memory" + ); + let archive_json = + serde_json::to_string(&storage.export_portable_archive().unwrap()).unwrap(); + assert!( + !archive_json.contains("Sensitive purge target memory preview leak"), + "portable archive should not retain purged memory content through composition previews" + ); + let has_content_column: i64 = writer .query_row( "SELECT COUNT(*) FROM pragma_table_info('deletion_tombstones') WHERE name = 'content'", @@ -8496,7 +10365,12 @@ mod tests { #[test] fn test_plan_merge_is_preview_only_no_mutation() { let storage = create_test_storage(); - let a = seed_node(&storage, "Fact A about caching", &["perf"], axis_vector(5, 0.02)); + let a = seed_node( + &storage, + "Fact A about caching", + &["perf"], + axis_vector(5, 0.02), + ); let b = seed_node( &storage, "Fact A about caching, expanded", @@ -8524,14 +10398,22 @@ mod tests { assert!(vu_b.is_none() && sb_b.is_none()); // Plan persisted as pending. - assert_eq!(storage.plan_status(&plan.id).unwrap().as_deref(), Some("pending")); + assert_eq!( + storage.plan_status(&plan.id).unwrap().as_deref(), + Some("pending") + ); } #[cfg(all(feature = "embeddings", feature = "vector-search"))] #[test] fn test_apply_then_undo_merge_is_reversible() { let storage = create_test_storage(); - let survivor = seed_node(&storage, "Keep this canonical note", &["x"], axis_vector(7, 0.02)); + let survivor = seed_node( + &storage, + "Keep this canonical note", + &["x"], + axis_vector(7, 0.02), + ); let absorbed = seed_node( &storage, "Extra detail to fold in", @@ -8572,7 +10454,10 @@ mod tests { let surv_after = storage.get_node(&survivor).unwrap().unwrap(); assert_eq!(surv_after.content, "Keep this canonical note"); let (vu2, sb2) = storage.read_bitemporal(&absorbed).unwrap(); - assert!(vu2.is_none() && sb2.is_none(), "invalidation cleared on undo"); + assert!( + vu2.is_none() && sb2.is_none(), + "invalidation cleared on undo" + ); assert!(!storage.superseded_node_ids().unwrap().contains(&absorbed)); // The original op is now marked reverted; double-undo is rejected. @@ -8621,7 +10506,12 @@ mod tests { #[test] fn test_protect_blocks_merge_away() { let storage = create_test_storage(); - let pinned = seed_node(&storage, "Load-bearing fact", &["pin"], axis_vector(11, 0.02)); + let pinned = seed_node( + &storage, + "Load-bearing fact", + &["pin"], + axis_vector(11, 0.02), + ); let other = seed_node( &storage, "Load-bearing fact restated", @@ -8632,7 +10522,11 @@ mod tests { assert!(storage.is_protected(&pinned).unwrap()); // Protected node may not be merged AWAY (survivor=other). - let err = storage.plan_merge(&[other.clone(), pinned.clone()], Some(&other), MergePolicy::default()); + let err = storage.plan_merge( + &[other.clone(), pinned.clone()], + Some(&other), + MergePolicy::default(), + ); assert!(err.is_err(), "merging a protected node away must fail"); // But it CAN be the survivor. @@ -8645,12 +10539,16 @@ mod tests { // Supersede of a protected node is also blocked. assert!( - storage.plan_supersede(&pinned, &other, MergePolicy::default()).is_err(), + storage + .plan_supersede(&pinned, &other, MergePolicy::default()) + .is_err(), "superseding a protected node must fail" ); // merge_candidates flags the protected member. - let cands = storage.merge_candidates(MergePolicy::default(), 20, &[]).unwrap(); + let cands = storage + .merge_candidates(MergePolicy::default(), 20, &[]) + .unwrap(); assert!(cands.iter().all(|c| c.has_protected_member)); } @@ -8664,7 +10562,9 @@ mod tests { let a = seed_node(&storage, "Topic alpha note", &["t"], axis_vector(13, 0.30)); let b = seed_node(&storage, "Topic alpha aside", &["t"], axis_vector(13, 0.60)); - let plan = storage.plan_merge(&[a, b], None, storage.get_merge_policy().unwrap()).unwrap(); + let plan = storage + .plan_merge(&[a, b], None, storage.get_merge_policy().unwrap()) + .unwrap(); assert_ne!(plan.classification, MatchClass::Match); // Without confirm => rejected. diff --git a/crates/vestige-mcp/README.md b/crates/vestige-mcp/README.md index 92f53d8..2547e42 100644 --- a/crates/vestige-mcp/README.md +++ b/crates/vestige-mcp/README.md @@ -61,7 +61,7 @@ The server exposes the current unified MCP tools from - `search`, `smart_ingest`, `memory`, `codebase`, `intention` - `deep_reference`, `cross_reference`, `contradictions` - `dream`, `explore_connections`, `predict` -- `memory_health`, `memory_graph`, `system_status` +- `memory_health`, `memory_graph`, `composed_graph`, `system_status` - `importance_score`, `find_duplicates` - `consolidate`, `memory_timeline`, `memory_changelog` - `backup`, `export`, `restore`, `gc`, `suppress` diff --git a/crates/vestige-mcp/src/server.rs b/crates/vestige-mcp/src/server.rs index 2cb1e5f..7682441 100644 --- a/crates/vestige-mcp/src/server.rs +++ b/crates/vestige-mcp/src/server.rs @@ -443,6 +443,12 @@ impl McpServer { input_schema: tools::graph::schema(), ..Default::default() }, + ToolDescription { + name: "composed_graph".to_string(), + description: Some("ComposedGraph memory topology. Reads durable composition events, members, and outcome labels; returns recent/already-composed lanes, neighbors, never-composed pairs, bounty-mode lanes, and lets users label outcomes such as helpful, submitted, accepted, rejected, duplicate_risk, needs_poc, or dead_end.".to_string()), + input_schema: tools::composed_graph::schema(), + ..Default::default() + }, // ================================================================ // DEEP REFERENCE (v2.0.4+) — replaces cross_reference // ================================================================ @@ -959,7 +965,8 @@ impl McpServer { // TEMPORAL TOOLS (v1.2+) // ================================================================ "memory_timeline" => { - tools::timeline::execute(&self.storage, &self.output_config, request.arguments).await + tools::timeline::execute(&self.storage, &self.output_config, request.arguments) + .await } "memory_changelog" => tools::changelog::execute(&self.storage, request.arguments).await, @@ -1032,6 +1039,9 @@ impl McpServer { // ================================================================ "memory_health" => tools::health::execute(&self.storage, request.arguments).await, "memory_graph" => tools::graph::execute(&self.storage, request.arguments).await, + "composed_graph" => { + tools::composed_graph::execute(&self.storage, request.arguments).await + } "deep_reference" | "cross_reference" => { tools::cross_reference::execute(&self.storage, &self.cognitive, request.arguments) .await @@ -1796,10 +1806,10 @@ mod tests { let result = response.result.unwrap(); let tools = result["tools"].as_array().unwrap(); - // v2.1.25: 32 tools (25 from v2.1.21 + 7 Phase 3 merge/supersede tools: + // 33 tools: 25 from v2.1.21 + 7 Phase 3 merge/supersede tools: // merge_candidates, plan_merge, plan_supersede, apply_plan, merge_undo, - // protect, merge_policy) - assert_eq!(tools.len(), 32, "Expected exactly 32 tools in v2.1.25"); + // protect, merge_policy, composed_graph) + assert_eq!(tools.len(), 33, "Expected exactly 33 tools"); let tool_names: Vec<&str> = tools.iter().map(|t| t["name"].as_str().unwrap()).collect(); @@ -1874,6 +1884,7 @@ mod tests { // Autonomic tools (v1.9) assert!(tool_names.contains(&"memory_health")); assert!(tool_names.contains(&"memory_graph")); + assert!(tool_names.contains(&"composed_graph")); // Deep reference + cross_reference alias (v2.0.4) assert!(tool_names.contains(&"deep_reference")); diff --git a/crates/vestige-mcp/src/tools/composed_graph.rs b/crates/vestige-mcp/src/tools/composed_graph.rs new file mode 100644 index 0000000..ee69d93 --- /dev/null +++ b/crates/vestige-mcp/src/tools/composed_graph.rs @@ -0,0 +1,906 @@ +//! composed_graph tool — durable composition history and bounty-mode lane queue. + +use chrono::Utc; +use serde::Deserialize; +use serde_json::Value; +use std::sync::Arc; +use uuid::Uuid; +use vestige_core::{CompositionOutcomeRecord, Storage}; + +const OUTCOME_TYPES: &[&str] = &[ + "helpful", + "dead_end", + "submitted", + "accepted", + "rejected", + "duplicate_risk", + "needs_poc", + "bad_severity", + "user_promoted", + "user_demoted", + "closed_by_scope", + "closed_by_duplicate", + "closed_by_false_assumption", + "closed_by_user", + "expired_lane", +]; + +pub fn schema() -> Value { + serde_json::json!({ + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["recent", "get", "memory", "neighbors", "never_composed", "bounty_mode", "label"], + "description": "ComposedGraph action to run." + }, + "event_id": { + "type": "string", + "description": "Composition event id for get/label actions." + }, + "memory_id": { + "type": "string", + "description": "Memory id for memory/neighbors actions." + }, + "limit": { + "type": "integer", + "description": "Maximum rows to return (default 10, max 100).", + "default": 10, + "minimum": 1, + "maximum": 100 + }, + "tags": { + "type": "array", + "items": { "type": "string" }, + "description": "Optional tag filter for never_composed and bounty_mode." + }, + "outcome_type": { + "type": "string", + "enum": ["helpful", "dead_end", "submitted", "accepted", "rejected", "duplicate_risk", "needs_poc", "bad_severity", "user_promoted", "user_demoted", "closed_by_scope", "closed_by_duplicate", "closed_by_false_assumption", "closed_by_user", "expired_lane"], + "description": "Outcome label for label action." + }, + "notes": { + "type": "string", + "description": "Optional outcome notes." + }, + "label_source": { + "type": "string", + "description": "Where the outcome label came from (default: user)." + }, + "confidence_delta": { + "type": "number", + "description": "Optional confidence adjustment for this outcome." + } + }, + "required": ["action"] + }) +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +struct ComposedGraphArgs { + action: String, + event_id: Option, + memory_id: Option, + limit: Option, + tags: Option>, + outcome_type: Option, + notes: Option, + label_source: Option, + confidence_delta: Option, +} + +pub async fn execute(storage: &Arc, args: Option) -> Result { + let args: ComposedGraphArgs = match args { + Some(value) => { + serde_json::from_value(value).map_err(|e| format!("Invalid arguments: {}", e))? + } + None => return Err("Missing arguments".to_string()), + }; + let limit = args.limit.unwrap_or(10).clamp(1, 100); + + match args.action.as_str() { + "recent" => recent(storage, limit), + "get" => { + let event_id = args + .event_id + .as_deref() + .ok_or_else(|| "event_id is required for get".to_string())?; + get(storage, event_id) + } + "memory" => { + let memory_id = args + .memory_id + .as_deref() + .ok_or_else(|| "memory_id is required for memory".to_string())?; + memory(storage, memory_id, limit) + } + "neighbors" => { + let memory_id = args + .memory_id + .as_deref() + .ok_or_else(|| "memory_id is required for neighbors".to_string())?; + neighbors(storage, memory_id, limit) + } + "never_composed" => never_composed(storage, limit, args.tags.as_deref()), + "bounty_mode" => bounty_mode(storage, limit, args.tags.as_deref()), + "label" => label(storage, &args), + other => Err(format!("Unknown composed_graph action: {}", other)), + } +} + +fn recent(storage: &Storage, limit: i32) -> Result { + let events = storage + .get_recent_composition_events(limit) + .map_err(|e| e.to_string())?; + Ok(serde_json::json!({ + "action": "recent", + "events": events, + })) +} + +fn get(storage: &Storage, event_id: &str) -> Result { + let event = storage + .get_composition_event(event_id) + .map_err(|e| e.to_string())? + .ok_or_else(|| format!("composition event not found: {}", event_id))?; + let members = storage + .get_composition_members(event_id) + .map_err(|e| e.to_string())?; + let outcomes = storage + .get_composition_outcomes(event_id) + .map_err(|e| e.to_string())?; + Ok(serde_json::json!({ + "action": "get", + "event": event, + "members": members, + "outcomes": outcomes, + })) +} + +fn memory(storage: &Storage, memory_id: &str, limit: i32) -> Result { + let events = storage + .get_compositions_for_memory(memory_id, limit) + .map_err(|e| e.to_string())?; + Ok(serde_json::json!({ + "action": "memory", + "memoryId": memory_id, + "events": events, + })) +} + +fn neighbors(storage: &Storage, memory_id: &str, limit: i32) -> Result { + let neighbors = storage + .get_composition_neighbors(memory_id, limit) + .map_err(|e| e.to_string())?; + Ok(serde_json::json!({ + "action": "neighbors", + "memoryId": memory_id, + "neighbors": neighbors, + })) +} + +fn never_composed(storage: &Storage, limit: i32, tags: Option<&[String]>) -> Result { + let candidates = storage + .get_never_composed_candidates(limit, tags) + .map_err(|e| e.to_string())?; + Ok(serde_json::json!({ + "action": "never_composed", + "candidates": candidates, + })) +} + +fn bounty_mode(storage: &Storage, limit: i32, tags: Option<&[String]>) -> Result { + const PAGE_SIZE: i32 = 100; + const MAX_SCAN_EVENTS: i32 = 1_000; + + let mut offset = 0; + let mut scanned = 0; + let mut already_composed = Vec::new(); + let mut closed_doors = Vec::new(); + let mut duplicate_risk_lanes = Vec::new(); + let mut needs_poc_lanes = Vec::new(); + + loop { + let events = storage + .get_recent_composition_events_page(PAGE_SIZE, offset) + .map_err(|e| e.to_string())?; + if events.is_empty() { + break; + } + scanned += events.len() as i32; + + for event in events { + let outcomes = storage + .get_composition_outcomes(&event.id) + .map_err(|e| e.to_string())?; + let members = storage + .get_composition_members(&event.id) + .map_err(|e| e.to_string())?; + if !composition_matches_tags(storage, &event, &members, tags)? { + continue; + } + let item = serde_json::json!({ + "event": event, + "members": members, + "outcomes": outcomes, + }); + let outcome_types = item["outcomes"] + .as_array() + .map(|values| { + values + .iter() + .filter_map(|value| value.get("outcomeType").and_then(|v| v.as_str())) + .collect::>() + }) + .unwrap_or_default(); + + if outcome_types.iter().any(|kind| { + matches!( + *kind, + "dead_end" + | "rejected" + | "bad_severity" + | "closed_by_scope" + | "closed_by_duplicate" + | "closed_by_false_assumption" + | "closed_by_user" + | "expired_lane" + ) + }) { + push_limited(&mut closed_doors, item.clone(), limit); + } + if outcome_types + .iter() + .any(|kind| matches!(*kind, "duplicate_risk" | "closed_by_duplicate")) + { + push_limited(&mut duplicate_risk_lanes, item.clone(), limit); + } + if outcome_types.iter().any(|kind| *kind == "needs_poc") { + push_limited(&mut needs_poc_lanes, item.clone(), limit); + } + if already_composed.len() < limit as usize { + already_composed.push(item); + } + if bounty_mode_lanes_full( + limit, + &already_composed, + &closed_doors, + &duplicate_risk_lanes, + &needs_poc_lanes, + ) { + break; + } + } + + if bounty_mode_lanes_full( + limit, + &already_composed, + &closed_doors, + &duplicate_risk_lanes, + &needs_poc_lanes, + ) || scanned >= MAX_SCAN_EVENTS + { + break; + } + offset += PAGE_SIZE; + } + + let never = storage + .get_never_composed_candidates(limit, tags) + .map_err(|e| e.to_string())?; + let top_weird_combinations = never.iter().take(3).cloned().collect::>(); + + Ok(serde_json::json!({ + "action": "bounty_mode", + "alreadyComposedLanes": already_composed, + "neverComposedLanes": never, + "closedDoors": closed_doors, + "duplicateRiskLanes": duplicate_risk_lanes, + "needsPocLanes": needs_poc_lanes, + "topWeirdCombinations": top_weird_combinations, + "guardrails": [ + "never-composed lane is not a finding", + "composition score is not severity", + "submit/reportable still needs source refs, scope fit, and PoC evidence" + ] + })) +} + +fn push_limited(items: &mut Vec, item: Value, limit: i32) { + if items.len() < limit as usize { + items.push(item); + } +} + +fn bounty_mode_lanes_full( + limit: i32, + already_composed: &[Value], + closed_doors: &[Value], + duplicate_risk_lanes: &[Value], + needs_poc_lanes: &[Value], +) -> bool { + let limit = limit as usize; + already_composed.len() >= limit + && closed_doors.len() >= limit + && duplicate_risk_lanes.len() >= limit + && needs_poc_lanes.len() >= limit +} + +fn composition_matches_tags( + storage: &Storage, + event: &vestige_core::CompositionEventRecord, + members: &[vestige_core::CompositionMemberRecord], + tags: Option<&[String]>, +) -> Result { + let Some(tags) = tags else { + return Ok(true); + }; + if tags.is_empty() { + return Ok(true); + } + + if json_value_has_tag(&event.metadata, tags) { + return Ok(true); + } + + for member in members { + if json_value_has_tag(&member.metadata, tags) { + return Ok(true); + } + if let Some(node) = storage + .get_node(&member.memory_id) + .map_err(|e| e.to_string())? + && node.tags.iter().any(|tag| tag_matches_filter(tag, tags)) + { + return Ok(true); + } + } + + Ok(false) +} + +fn json_value_has_tag(value: &Value, tags: &[String]) -> bool { + value + .get("tags") + .and_then(|tags_value| tags_value.as_array()) + .is_some_and(|values| { + values.iter().any(|value| { + value + .as_str() + .is_some_and(|tag| tag_matches_filter(tag, tags)) + }) + }) +} + +fn tag_matches_filter(tag: &str, filters: &[String]) -> bool { + filters + .iter() + .any(|wanted| tag == wanted || tag.starts_with(&format!("{wanted}:"))) +} + +fn label(storage: &Storage, args: &ComposedGraphArgs) -> Result { + let event_id = args + .event_id + .as_deref() + .ok_or_else(|| "event_id is required for label".to_string())?; + let outcome_type = args + .outcome_type + .as_deref() + .ok_or_else(|| "outcome_type is required for label".to_string())?; + if !OUTCOME_TYPES.contains(&outcome_type) { + return Err(format!("unsupported outcome_type: {}", outcome_type)); + } + if storage + .get_composition_event(event_id) + .map_err(|e| e.to_string())? + .is_none() + { + return Err(format!("composition event not found: {}", event_id)); + } + + let outcome = CompositionOutcomeRecord { + id: Uuid::new_v4().to_string(), + event_id: event_id.to_string(), + outcome_type: outcome_type.to_string(), + labeled_at: Utc::now(), + label_source: args + .label_source + .clone() + .unwrap_or_else(|| "user".to_string()), + confidence_delta: args.confidence_delta, + notes: args.notes.clone(), + metadata: serde_json::json!({}), + }; + storage + .record_composition_outcome(&outcome) + .map_err(|e| e.to_string())?; + + Ok(serde_json::json!({ + "action": "label", + "eventId": event_id, + "outcome": outcome, + })) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + use vestige_core::{ + CompositionEventRecord, CompositionMemberRecord, CompositionOutcomeRecord, IngestInput, + }; + + fn test_storage() -> (Arc, TempDir) { + let dir = TempDir::new().unwrap(); + let storage = Storage::new(Some(dir.path().join("test.db"))).unwrap(); + (Arc::new(storage), dir) + } + + fn ingest(storage: &Storage, content: &str, tags: &[&str]) -> String { + storage + .ingest(IngestInput { + content: content.to_string(), + node_type: "fact".to_string(), + tags: tags.iter().map(|tag| tag.to_string()).collect(), + ..Default::default() + }) + .unwrap() + .id + } + + #[tokio::test] + async fn test_composed_graph_get_label_and_bounty_mode() { + let (storage, _dir) = test_storage(); + let first = ingest( + &storage, + "Oracle drift bounty lane", + &["protocolgate", "boundary-oracle", "settlement"], + ); + let second = ingest( + &storage, + "Withdrawal queue bounty lane", + &["protocolgate", "boundary-queue", "settlement"], + ); + let third = ingest( + &storage, + "Keeper role bounty lane", + &["protocolgate", "boundary-role", "settlement"], + ); + + let event = CompositionEventRecord { + id: "composed-graph-test".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "bounty".to_string(), + query: Some("oracle withdrawal".to_string()), + query_hash: Some("test".to_string()), + confidence: Some(0.8), + status: Some("resolved".to_string()), + output_preview: Some("compose oracle and withdrawal queue".to_string()), + metadata: serde_json::json!({}), + }; + storage + .save_composition( + &event, + &[ + CompositionMemberRecord { + event_id: event.id.clone(), + memory_id: first.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.8), + score: Some(0.9), + preview: None, + metadata: serde_json::json!({}), + }, + CompositionMemberRecord { + event_id: event.id.clone(), + memory_id: second.clone(), + role: "supporting".to_string(), + rank: 1, + trust: Some(0.7), + score: Some(0.8), + preview: None, + metadata: serde_json::json!({}), + }, + ], + &[], + ) + .unwrap(); + + let unrelated = ingest(&storage, "Personal planning lane", &["personal"]); + storage + .save_composition( + &CompositionEventRecord { + id: "unrelated-composed-graph-test".to_string(), + created_at: Utc::now() + chrono::Duration::seconds(10), + tool: "deep_reference".to_string(), + mode: "planning".to_string(), + query: Some("personal planning".to_string()), + query_hash: Some("unrelated".to_string()), + confidence: Some(0.4), + status: Some("resolved".to_string()), + output_preview: Some("unrelated composition".to_string()), + metadata: serde_json::json!({}), + }, + &[CompositionMemberRecord { + event_id: "unrelated-composed-graph-test".to_string(), + memory_id: unrelated, + role: "primary".to_string(), + rank: 0, + trust: Some(0.4), + score: Some(0.2), + preview: None, + metadata: serde_json::json!({}), + }], + &[CompositionOutcomeRecord { + id: "unrelated-composed-graph-outcome".to_string(), + event_id: "unrelated-composed-graph-test".to_string(), + outcome_type: "needs_poc".to_string(), + labeled_at: Utc::now(), + label_source: "test".to_string(), + confidence_delta: None, + notes: None, + metadata: serde_json::json!({}), + }], + ) + .unwrap(); + + let get_result = execute( + &storage, + Some(serde_json::json!({ + "action": "get", + "event_id": event.id + })), + ) + .await + .unwrap(); + assert_eq!(get_result["members"].as_array().unwrap().len(), 2); + + let label_result = execute( + &storage, + Some(serde_json::json!({ + "action": "label", + "event_id": "composed-graph-test", + "outcome_type": "submitted", + "notes": "submitted in test" + })), + ) + .await + .unwrap(); + assert_eq!( + label_result["outcome"]["outcomeType"].as_str(), + Some("submitted") + ); + let closed_label_result = execute( + &storage, + Some(serde_json::json!({ + "action": "label", + "event_id": "composed-graph-test", + "outcome_type": "closed_by_scope", + "notes": "closed in test" + })), + ) + .await + .unwrap(); + assert_eq!( + closed_label_result["outcome"]["outcomeType"].as_str(), + Some("closed_by_scope") + ); + let duplicate_label_result = execute( + &storage, + Some(serde_json::json!({ + "action": "label", + "event_id": "composed-graph-test", + "outcome_type": "closed_by_duplicate", + "notes": "duplicate family in test" + })), + ) + .await + .unwrap(); + assert_eq!( + duplicate_label_result["outcome"]["outcomeType"].as_str(), + Some("closed_by_duplicate") + ); + + let bounty = execute( + &storage, + Some(serde_json::json!({ + "action": "bounty_mode", + "tags": ["protocolgate"], + "limit": 1 + })), + ) + .await + .unwrap(); + let already = bounty["alreadyComposedLanes"].as_array().unwrap(); + assert_eq!(already.len(), 1); + assert!( + already[0]["event"]["id"].as_str() == Some("composed-graph-test"), + "tag-scoped bounty_mode should skip newer unrelated events before truncating" + ); + assert_eq!(bounty["closedDoors"].as_array().unwrap().len(), 1); + assert_eq!(bounty["duplicateRiskLanes"].as_array().unwrap().len(), 1); + assert!(bounty["needsPocLanes"].as_array().unwrap().is_empty()); + assert!( + bounty["neverComposedLanes"] + .as_array() + .unwrap() + .iter() + .any(|candidate| { + let first_id = candidate["firstId"].as_str().unwrap_or_default(); + let second_id = candidate["secondId"].as_str().unwrap_or_default(); + [first_id, second_id].contains(&third.as_str()) + }) + ); + } + + #[tokio::test] + async fn test_bounty_mode_paginates_tag_filter_and_matches_namespaced_tags() { + let (storage, _dir) = test_storage(); + let tagged = ingest( + &storage, + "Older tagged composition lane", + &["project:vestige", "composition"], + ); + let unrelated = ingest(&storage, "Newer unrelated lane", &["unrelated"]); + let base_time = Utc::now(); + + storage + .save_composition( + &CompositionEventRecord { + id: "older-tagged-composition".to_string(), + created_at: base_time, + tool: "deep_reference".to_string(), + mode: "research".to_string(), + query: Some("older tagged lane".to_string()), + query_hash: Some("fnv1a64:older".to_string()), + confidence: Some(0.8), + status: Some("resolved".to_string()), + output_preview: None, + metadata: serde_json::json!({}), + }, + &[CompositionMemberRecord { + event_id: "older-tagged-composition".to_string(), + memory_id: tagged, + role: "primary".to_string(), + rank: 0, + trust: Some(0.8), + score: Some(0.9), + preview: None, + metadata: serde_json::json!({}), + }], + &[], + ) + .unwrap(); + + for idx in 0..101 { + let event_id = format!("newer-unrelated-composition-{idx}"); + storage + .save_composition( + &CompositionEventRecord { + id: event_id.clone(), + created_at: base_time + chrono::Duration::seconds(i64::from(idx + 1)), + tool: "deep_reference".to_string(), + mode: "planning".to_string(), + query: Some(format!("newer unrelated lane {idx}")), + query_hash: Some(format!("fnv1a64:newer-{idx}")), + confidence: Some(0.3), + status: Some("resolved".to_string()), + output_preview: None, + metadata: serde_json::json!({}), + }, + &[CompositionMemberRecord { + event_id, + memory_id: unrelated.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.3), + score: Some(0.2), + preview: None, + metadata: serde_json::json!({}), + }], + &[], + ) + .unwrap(); + } + + let bounty = execute( + &storage, + Some(serde_json::json!({ + "action": "bounty_mode", + "tags": ["project"], + "limit": 1 + })), + ) + .await + .unwrap(); + let already = bounty["alreadyComposedLanes"].as_array().unwrap(); + assert_eq!(already.len(), 1); + assert_eq!( + already[0]["event"]["id"].as_str(), + Some("older-tagged-composition"), + "tag-filtered bounty_mode should page past newer unrelated events and match namespaced tags" + ); + } + + #[tokio::test] + async fn test_bounty_mode_uses_member_tag_snapshot_after_purge() { + let (storage, _dir) = test_storage(); + let tagged = ingest( + &storage, + "Tagged member that will be purged", + &["project:vestige", "composition"], + ); + + storage + .save_composition( + &CompositionEventRecord { + id: "purged-tagged-member-composition".to_string(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "research".to_string(), + query: Some("purged tagged lane".to_string()), + query_hash: Some("fnv1a64:purged".to_string()), + confidence: Some(0.6), + status: Some("closed".to_string()), + output_preview: None, + metadata: serde_json::json!({}), + }, + &[CompositionMemberRecord { + event_id: "purged-tagged-member-composition".to_string(), + memory_id: tagged.clone(), + role: "primary".to_string(), + rank: 0, + trust: Some(0.7), + score: Some(0.8), + preview: Some("Tagged member that will be purged".to_string()), + metadata: serde_json::json!({}), + }], + &[CompositionOutcomeRecord { + id: "purged-tagged-member-outcome".to_string(), + event_id: "purged-tagged-member-composition".to_string(), + outcome_type: "closed_by_scope".to_string(), + labeled_at: Utc::now(), + label_source: "test".to_string(), + confidence_delta: Some(-0.2), + notes: None, + metadata: serde_json::json!({}), + }], + ) + .unwrap(); + + storage + .purge_node(&tagged, Some("test purge")) + .expect("purge should succeed"); + + let get_result = execute( + &storage, + Some(serde_json::json!({ + "action": "get", + "event_id": "purged-tagged-member-composition" + })), + ) + .await + .unwrap(); + assert!( + get_result["members"][0].get("preview").is_none() + || get_result["members"][0]["preview"].is_null(), + "purge should scrub member preview from composed_graph get" + ); + + let bounty = execute( + &storage, + Some(serde_json::json!({ + "action": "bounty_mode", + "tags": ["project"], + "limit": 1 + })), + ) + .await + .unwrap(); + let already = bounty["alreadyComposedLanes"].as_array().unwrap(); + assert_eq!(already.len(), 1); + assert_eq!( + already[0]["event"]["id"].as_str(), + Some("purged-tagged-member-composition"), + "tag-filtered bounty_mode should use composition member tag snapshots after source memory purge" + ); + assert_eq!(bounty["closedDoors"].as_array().unwrap().len(), 1); + } + + #[tokio::test] + async fn test_bounty_mode_guardrail_buckets_are_not_truncated_by_already_limit() { + let (storage, _dir) = test_storage(); + let neutral = ingest(&storage, "Neutral release lane", &["project:vestige"]); + let closed = ingest(&storage, "Closed release lane", &["project:vestige"]); + let base_time = Utc::now(); + + storage + .save_composition( + &CompositionEventRecord { + id: "older-closed-lane".to_string(), + created_at: base_time, + tool: "deep_reference".to_string(), + mode: "release".to_string(), + query: Some("older closed lane".to_string()), + query_hash: Some("fnv1a64:older-closed".to_string()), + confidence: Some(0.3), + status: Some("closed".to_string()), + output_preview: None, + metadata: serde_json::json!({}), + }, + &[CompositionMemberRecord { + event_id: "older-closed-lane".to_string(), + memory_id: closed, + role: "primary".to_string(), + rank: 0, + trust: Some(0.5), + score: Some(0.4), + preview: None, + metadata: serde_json::json!({}), + }], + &[CompositionOutcomeRecord { + id: "older-closed-outcome".to_string(), + event_id: "older-closed-lane".to_string(), + outcome_type: "closed_by_false_assumption".to_string(), + labeled_at: base_time, + label_source: "test".to_string(), + confidence_delta: Some(-0.3), + notes: None, + metadata: serde_json::json!({}), + }], + ) + .unwrap(); + + storage + .save_composition( + &CompositionEventRecord { + id: "newer-neutral-lane".to_string(), + created_at: base_time + chrono::Duration::seconds(1), + tool: "deep_reference".to_string(), + mode: "release".to_string(), + query: Some("newer neutral lane".to_string()), + query_hash: Some("fnv1a64:newer-neutral".to_string()), + confidence: Some(0.7), + status: Some("resolved".to_string()), + output_preview: None, + metadata: serde_json::json!({}), + }, + &[CompositionMemberRecord { + event_id: "newer-neutral-lane".to_string(), + memory_id: neutral, + role: "primary".to_string(), + rank: 0, + trust: Some(0.8), + score: Some(0.8), + preview: None, + metadata: serde_json::json!({}), + }], + &[], + ) + .unwrap(); + + let bounty = execute( + &storage, + Some(serde_json::json!({ + "action": "bounty_mode", + "tags": ["project"], + "limit": 1 + })), + ) + .await + .unwrap(); + + assert_eq!( + bounty["alreadyComposedLanes"][0]["event"]["id"].as_str(), + Some("newer-neutral-lane") + ); + assert_eq!( + bounty["closedDoors"][0]["event"]["id"].as_str(), + Some("older-closed-lane"), + "guardrail buckets should keep scanning after alreadyComposedLanes reaches limit" + ); + } +} diff --git a/crates/vestige-mcp/src/tools/cross_reference.rs b/crates/vestige-mcp/src/tools/cross_reference.rs index e1a9128..e48b4eb 100644 --- a/crates/vestige-mcp/src/tools/cross_reference.rs +++ b/crates/vestige-mcp/src/tools/cross_reference.rs @@ -20,9 +20,10 @@ use serde::Deserialize; use serde_json::Value; use std::sync::Arc; use tokio::sync::Mutex; +use uuid::Uuid; use crate::cognitive::CognitiveEngine; -use vestige_core::Storage; +use vestige_core::{CompositionEventRecord, CompositionMemberRecord, Storage}; /// Input schema for deep_reference / cross_reference tool pub fn schema() -> Value { @@ -509,6 +510,7 @@ pub async fn execute( "confidence": 0.0, "guidance": "No memories found. Use smart_ingest to add memories.", "memoriesAnalyzed": 0, + "compositionWriteStatus": "skipped_empty", })); } @@ -820,6 +822,7 @@ pub async fn execute( "id": s.id, "preview": s.content.chars().take(200).collect::(), "trust": (s.trust * 100.0).round() / 100.0, + "relevanceScore": ((composite(s) * 100.0).round() / 100.0), "date": s.updated_at.to_rfc3339(), "role": if i == 0 { "primary" } else { "supporting" }, }) @@ -925,9 +928,163 @@ pub async fn execute( response["related_insights"] = serde_json::json!(related_insights); } + match persist_deep_reference_composition(storage, &args.query, &intent, &response) { + Ok(Some(event_id)) => { + response["composition_event_id"] = serde_json::json!(event_id); + response["compositionWriteStatus"] = serde_json::json!("persisted"); + } + Ok(None) => { + response["compositionWriteStatus"] = serde_json::json!("skipped_empty"); + } + Err(err) => { + tracing::warn!( + "Failed to persist deep_reference composition event: {}", + err + ); + response["compositionWriteStatus"] = serde_json::json!("failed"); + } + } + Ok(response) } +fn persist_deep_reference_composition( + storage: &Arc, + query: &str, + intent: &QueryIntent, + response: &Value, +) -> Result, String> { + let event_id = Uuid::new_v4().to_string(); + let event = CompositionEventRecord { + id: event_id.clone(), + created_at: Utc::now(), + tool: "deep_reference".to_string(), + mode: "deep_reference".to_string(), + query: Some(query.to_string()), + query_hash: Some(query_hash(query)), + confidence: response.get("confidence").and_then(|v| v.as_f64()), + status: response + .get("status") + .and_then(|v| v.as_str()) + .map(ToOwned::to_owned), + output_preview: response + .get("guidance") + .and_then(|v| v.as_str()) + .map(|value| preview_text(value, 280)), + metadata: serde_json::json!({ + "intent": format!("{:?}", intent), + "memoriesAnalyzed": response.get("memoriesAnalyzed").and_then(|v| v.as_u64()).unwrap_or(0), + "activationExpanded": response.get("activationExpanded").and_then(|v| v.as_u64()).unwrap_or(0), + "reasoningPreview": response.get("reasoning").and_then(|v| v.as_str()).map(|value| preview_text(value, 600)), + }), + }; + + let mut members = Vec::new(); + if let Some(evidence) = response.get("evidence").and_then(|v| v.as_array()) { + for (idx, item) in evidence.iter().enumerate() { + let Some(memory_id) = item.get("id").and_then(|v| v.as_str()) else { + continue; + }; + let role = item + .get("role") + .and_then(|v| v.as_str()) + .unwrap_or(if idx == 0 { "primary" } else { "supporting" }); + members.push(CompositionMemberRecord { + event_id: event_id.clone(), + memory_id: memory_id.to_string(), + role: role.to_string(), + rank: idx as i32, + trust: item.get("trust").and_then(|v| v.as_f64()), + score: item + .get("relevanceScore") + .or_else(|| item.get("relevance_score")) + .and_then(|v| v.as_f64()), + preview: None, + metadata: serde_json::json!({ + "roleSource": "deep_reference_evidence", + "evidenceRank": idx, + "date": item.get("date").and_then(|v| v.as_str()), + }), + }); + } + } + + if let Some(contradictions) = response.get("contradictions").and_then(|v| v.as_array()) { + for (idx, contradiction) in contradictions.iter().enumerate() { + for side in ["stronger", "weaker"] { + let Some(item) = contradiction.get(side) else { + continue; + }; + let Some(memory_id) = item.get("id").and_then(|v| v.as_str()) else { + continue; + }; + members.push(CompositionMemberRecord { + event_id: event_id.clone(), + memory_id: memory_id.to_string(), + role: "contradicting".to_string(), + rank: idx as i32, + trust: item.get("trust").and_then(|v| v.as_f64()), + score: contradiction.get("topic_overlap").and_then(|v| v.as_f64()), + preview: None, + metadata: serde_json::json!({ + "roleSource": "deep_reference_contradiction", + "side": side, + "date": item.get("date").and_then(|v| v.as_str()), + }), + }); + } + } + } + + if let Some(superseded) = response.get("superseded").and_then(|v| v.as_array()) { + for (idx, item) in superseded.iter().enumerate() { + let Some(memory_id) = item.get("id").and_then(|v| v.as_str()) else { + continue; + }; + members.push(CompositionMemberRecord { + event_id: event_id.clone(), + memory_id: memory_id.to_string(), + role: "superseded".to_string(), + rank: idx as i32, + trust: item.get("trust").and_then(|v| v.as_f64()), + score: None, + preview: None, + metadata: serde_json::json!({ + "roleSource": "deep_reference_superseded", + "superseded_by": item.get("superseded_by").and_then(|v| v.as_str()), + "date": item.get("date").and_then(|v| v.as_str()), + }), + }); + } + } + + if members.is_empty() { + return Ok(None); + } + + storage + .save_composition(&event, &members, &[]) + .map_err(|e| e.to_string())?; + Ok(Some(event_id)) +} + +fn query_hash(query: &str) -> String { + let mut hash = 0xcbf29ce484222325u64; + for byte in query.as_bytes() { + hash ^= u64::from(*byte); + hash = hash.wrapping_mul(0x100000001b3); + } + format!("fnv1a64:{hash:016x}") +} + +fn preview_text(value: &str, max: usize) -> String { + let collapsed = value.replace('\n', " "); + if collapsed.len() <= max { + return collapsed; + } + format!("{}...", &collapsed[..collapsed.floor_char_boundary(max)]) +} + // ============================================================================ // TESTS // ============================================================================ @@ -1010,6 +1167,99 @@ mod tests { ); } + #[tokio::test] + async fn test_deep_reference_persists_composition_event() { + let (storage, _dir) = test_storage().await; + + let primary_id = ingest_one( + &storage, + "ProtocolGate control-plane composition tracks global invariant local gate bypasses.", + &["protocolgate", "boundary-scope"], + ) + .await; + let supporting_id = ingest_one( + &storage, + "ProtocolGate global invariant local gate research used Aave account-global health factor and route-local validation.", + &["protocolgate", "boundary-scope"], + ) + .await; + + let result = execute( + &storage, + &test_cognitive(), + Some(serde_json::json!({ + "query": "ProtocolGate global invariant local gate", + "depth": 10 + })), + ) + .await + .expect("execute should succeed"); + + let event_id = result["composition_event_id"] + .as_str() + .expect("deep_reference should return persisted event id"); + assert_eq!(result["compositionWriteStatus"].as_str(), Some("persisted")); + + let event = storage + .get_composition_event(event_id) + .unwrap() + .expect("composition event should be stored"); + assert_eq!(event.tool, "deep_reference"); + assert_eq!( + event.query.as_deref(), + Some("ProtocolGate global invariant local gate") + ); + + let members = storage.get_composition_members(event_id).unwrap(); + assert!(members.iter().any(|member| member.memory_id == primary_id)); + assert!( + members + .iter() + .any(|member| member.memory_id == supporting_id) + ); + assert!(members.iter().any(|member| member.role == "primary")); + assert!( + members.iter().any(|member| { + member.memory_id == primary_id + && member.score.is_some() + && member.metadata["roleSource"] == "deep_reference_evidence" + }), + "persisted members should retain relevance score and role source" + ); + } + + #[tokio::test] + async fn test_deep_reference_skips_empty_composition_event() { + let (storage, _dir) = test_storage().await; + + let result = execute( + &storage, + &test_cognitive(), + Some(serde_json::json!({ + "query": "no memories exist for this query", + "depth": 10 + })), + ) + .await + .expect("execute should succeed"); + + assert_eq!( + result["compositionWriteStatus"].as_str(), + Some("skipped_empty") + ); + assert!( + result.get("composition_event_id").is_none(), + "empty evidence should not create a composition event" + ); + assert!( + storage + .get_recent_composition_events(10) + .unwrap() + .is_empty(), + "ledger should stay empty when no memories participated" + ); + } + // ======================================================================== // Confidence sanity: must vary with query relevance. // ======================================================================== diff --git a/crates/vestige-mcp/src/tools/mod.rs b/crates/vestige-mcp/src/tools/mod.rs index a2c3e24..078fab6 100644 --- a/crates/vestige-mcp/src/tools/mod.rs +++ b/crates/vestige-mcp/src/tools/mod.rs @@ -41,6 +41,7 @@ pub mod graph; pub mod health; // v2.1: Cross-reference (connect the dots) +pub mod composed_graph; pub mod contradictions; pub mod cross_reference; diff --git a/docs/COMPOSED_GRAPH.md b/docs/COMPOSED_GRAPH.md new file mode 100644 index 0000000..e0748be --- /dev/null +++ b/docs/COMPOSED_GRAPH.md @@ -0,0 +1,159 @@ +# ComposedGraph + +ComposedGraph records memory combinations as durable reasoning events. + +Most memory systems store facts, entities, or relationships. ComposedGraph stores a +different object: which memories were used together, why they were used, and what +happened afterward. + +## Model + +`composition_events` stores the reasoning envelope: + +- tool and mode, such as `deep_reference` or `bounty` +- query and query hash +- confidence, status, and output preview +- metadata for intent, analyzed memory count, activation expansion, and reasoning preview + +`composition_members` stores the participating memories: + +- memory id +- role, such as `primary`, `supporting`, `contradicting`, or `superseded` +- rank, trust, relevance score, preview, and metadata + +`composition_outcomes` stores later labels: + +- `helpful` +- `dead_end` +- `submitted` +- `accepted` +- `rejected` +- `duplicate_risk` +- `needs_poc` +- `bad_severity` +- `user_promoted` +- `user_demoted` +- `closed_by_scope` +- `closed_by_duplicate` +- `closed_by_false_assumption` +- `closed_by_user` +- `expired_lane` + +Member memory ids are intentionally historical references, not foreign keys into +`knowledge_nodes`. Purging or superseding a memory should not erase the fact that +it once participated in a reasoning path. + +## MCP Tool + +Use `composed_graph` for read/write access to the composition ledger. + +```json +{ "action": "recent", "limit": 10 } +``` + +```json +{ "action": "get", "event_id": "" } +``` + +```json +{ "action": "memory", "memory_id": "", "limit": 10 } +``` + +```json +{ "action": "neighbors", "memory_id": "", "limit": 10 } +``` + +```json +{ "action": "never_composed", "tags": ["project:vestige"], "limit": 10 } +``` + +```json +{ + "action": "label", + "event_id": "", + "outcome_type": "helpful", + "notes": "This combination led to the accepted fix." +} +``` + +## Never-Composed Frontier + +`never_composed` returns pairs that have not yet appeared together in a +composition event. + +The ranking is intentionally not just shared-tag matching. It combines: + +- exact shared tags +- shared meaningful content terms +- boundary tags such as `boundary-*`, `oracle`, `queue`, `settlement`, `upgrade`, + `pause`, `accounting`, or `scope` +- node-type diversity +- FSRS retention strength +- composition novelty, so memories that have not already been heavily composed + still get surfaced +- prior composition outcomes from either member, so previously accepted, + duplicate-risk, or dead-end lanes shape the frontier without hiding it + +Each candidate includes: + +- `score` +- `noveltyScore` +- `bridgeScore` +- `trustScore` +- `outcomeScoreAdjustment` +- `sharedTags` +- `boundaryTags` +- `sharedTerms` +- `priorOutcomes` +- `outcomeSignal`, such as `clean`, `prior_success`, `prior_duplicate_risk`, + `prior_closed_door`, or `mixed_prior_outcomes` +- node types +- previews +- a short reason +- a `compositionQuestion` that an agent can answer before taking action + +The output is a frontier queue, not a finding. A never-composed pair means +"worth investigating," not "true," "novel," or "reportable." +Prior outcomes are also guardrails, not verdicts: a duplicate-risk signal should +make the agent check duplicate families first, while a success signal should make +it inspect why the older composition worked. + +Closed-door labels should be specific when possible. Prefer `closed_by_scope`, +`closed_by_duplicate`, `closed_by_false_assumption`, `closed_by_user`, or +`expired_lane` over a generic `dead_end` when the reason is known. + +## Bounty / Research Mode + +`bounty_mode` is a higher-level read shape for investigative workflows. It returns: + +- recent already-composed lanes +- never-composed lanes +- closed doors +- duplicate-risk lanes +- lanes that need proof-of-concept work +- top weird combinations + +This is useful for security research, bug triage, architecture work, and product +strategy because failed or duplicate compositions are preserved instead of being +rediscovered repeatedly. + +## Deep Reference Integration + +`deep_reference` persists composition events automatically when it has evidence +members. Empty evidence does not create a ledger event. + +The response includes: + +- `composition_event_id` when persisted +- `compositionWriteStatus`, usually `persisted` or `skipped_empty` + +## Design Direction + +The next useful upgrades are: + +- triple or n-ary candidate mining, not only pairs +- structural-fit scoring for analogies, separate from surface similarity +- trust-zone scoring so a composition is limited by its weakest provenance +- temporal replay: "what combinations were available when this decision was made?" +- evaluation tasks where success requires combining memories that were never + previously co-composed From b45ea819d7de450bbe2e1fce9c0fef160bd3bac5 Mon Sep 17 00:00:00 2001 From: Sam Valladares Date: Thu, 18 Jun 2026 16:08:51 -0500 Subject: [PATCH 2/8] Fix ComposedGraph clippy warnings --- crates/vestige-core/src/storage/sqlite.rs | 2 +- crates/vestige-mcp/src/tools/composed_graph.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/vestige-core/src/storage/sqlite.rs b/crates/vestige-core/src/storage/sqlite.rs index a9840a1..94ed45b 100644 --- a/crates/vestige-core/src/storage/sqlite.rs +++ b/crates/vestige-core/src/storage/sqlite.rs @@ -4822,7 +4822,7 @@ impl Storage { { let tagged_nodes = self.get_nodes_matching_any_tag_prefix(filter, TAGGED_SCAN_LIMIT)?; let mut by_id = HashMap::new(); - for node in nodes.into_iter().chain(tagged_nodes.into_iter()) { + for node in nodes.into_iter().chain(tagged_nodes) { by_id.entry(node.id.clone()).or_insert(node); } nodes = by_id.into_values().collect(); diff --git a/crates/vestige-mcp/src/tools/composed_graph.rs b/crates/vestige-mcp/src/tools/composed_graph.rs index ee69d93..957f8e8 100644 --- a/crates/vestige-mcp/src/tools/composed_graph.rs +++ b/crates/vestige-mcp/src/tools/composed_graph.rs @@ -256,7 +256,7 @@ fn bounty_mode(storage: &Storage, limit: i32, tags: Option<&[String]>) -> Result { push_limited(&mut duplicate_risk_lanes, item.clone(), limit); } - if outcome_types.iter().any(|kind| *kind == "needs_poc") { + if outcome_types.contains(&"needs_poc") { push_limited(&mut needs_poc_lanes, item.clone(), limit); } if already_composed.len() < limit as usize { From e1f37965236fc801847fad71cc1a2b78e0750a76 Mon Sep 17 00:00:00 2001 From: Caio Ribeiro Date: Thu, 18 Jun 2026 23:02:32 +0000 Subject: [PATCH 3/8] docs: add test integrity delta receipt sketch --- docs/SANHEDRIN_RECEIPTS.md | 2 + docs/SANHEDRIN_TEST_INTEGRITY_DELTAS.md | 110 ++++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 docs/SANHEDRIN_TEST_INTEGRITY_DELTAS.md diff --git a/docs/SANHEDRIN_RECEIPTS.md b/docs/SANHEDRIN_RECEIPTS.md index ac0bd4d..2213c58 100644 --- a/docs/SANHEDRIN_RECEIPTS.md +++ b/docs/SANHEDRIN_RECEIPTS.md @@ -12,6 +12,8 @@ instead of opaque. The current schema is `vestige.sanhedrin.receipt.v1`. - Appeals: `~/.vestige/sanhedrin/appeals.jsonl` - Fail-open events: `~/.vestige/sanhedrin/fail-open.jsonl` +Optional companion schema: [`SANHEDRIN_TEST_INTEGRITY_DELTAS.md`](SANHEDRIN_TEST_INTEGRITY_DELTAS.md) describes mechanical deltas for cases where a verifier command passed but the test artifact changed after implementation. + ## v1 JSON Shape ```json diff --git a/docs/SANHEDRIN_TEST_INTEGRITY_DELTAS.md b/docs/SANHEDRIN_TEST_INTEGRITY_DELTAS.md new file mode 100644 index 0000000..c9d12dc --- /dev/null +++ b/docs/SANHEDRIN_TEST_INTEGRITY_DELTAS.md @@ -0,0 +1,110 @@ +# Sanhedrin Test-Integrity Delta Receipts + +Receipt Lock proves a narrower claim: a verification command actually ran and +succeeded. Test-integrity deltas are an optional companion receipt for the +stronger claim that the tests still mean what the draft says they mean. + +This receipt is intentionally mechanical. It is not a broad correctness oracle +and it does not ask a second model to decide whether the implementation is good. +It records whether the verification artifact changed in ways that should +upgrade, downgrade, or send the verification claim to human review. + +## Boundary + +Keep these claims separate: + +1. **Command receipt:** `cargo test`, `npm test`, `pytest`, or another verifier + command ran after the relevant edit and exited successfully. +2. **Test-integrity delta:** the tests/specs behind that verifier were not + removed, skipped, weakened, or replaced after implementation in a way that + makes the green result less admissible. + +A run can have a valid command receipt and still receive a downgraded +integrity decision. + +## Optional JSON Shape + +```json +{ + "schema": "vestige.sanhedrin.test_integrity_delta.v1", + "id": "tid_", + "commandReceiptId": "receipt_", + "verificationClaim": "All tests passed.", + "specSource": { + "contextId": "spec_ctx_04", + "testFiles": [ + { + "path": "tests/cart.test.ts", + "hashBeforeImplementation": "sha256:...", + "hashAfterVerification": "sha256:..." + } + ] + }, + "implementationContext": "impl_ctx_09", + "verifierContext": "verify_ctx_02", + "delta": { + "testFilesChangedAfterImplementation": true, + "removedOrDisabledTests": [ + { + "kind": "skip_or_only", + "path": "tests/cart.test.ts", + "line": 42 + } + ], + "removedAssertions": 2, + "weakenedExpectations": [ + { + "path": "tests/cart.test.ts", + "from": "throws InvalidCouponError", + "to": "does not throw" + } + ], + "snapshotChurnWithoutSourceChange": false, + "coverageDelta": -3.8, + "mocksReplacingRealBoundary": [ + { + "module": "PaymentGateway", + "before": "integration-ish fake", + "after": "empty stub" + } + ] + }, + "freshVerifier": { + "commandReceiptId": "receipt_", + "exitCode": 0, + "checkedAfterLastRelevantEdit": true + }, + "decision": "downgraded", + "reason": "tests passed, but the tests were weakened after implementation" +} +``` + +## Decisions + +- `accepted` — a verifier command succeeded after the last relevant edit and no + integrity downgrade was detected. +- `downgraded` — the command succeeded, but the tests/specs changed in a way + that makes the verification claim weaker than stated. +- `needs_human_review` — the delta may be legitimate, but a local mechanical + check cannot safely classify it. Snapshot updates are a common example. + +## Minimal Fixture Suite + +These cases are small enough to live as fixtures without turning Sanhedrin into +a correctness judge. + +| Case | Input pattern | Expected decision | Why | +| --- | --- | --- | --- | +| unchanged-good | implementation changes source; tests unchanged; fresh verifier succeeds | `accepted` | Green tests are supported by a fresh command receipt and unchanged test artifact. | +| skipped-test | implementation adds `.skip`, `.only`, `#[ignore]`, or equivalent before verifier succeeds | `downgraded` | The command ran, but the claim no longer represents the original test obligation. | +| weakened-assertion | expectation is relaxed after implementation, e.g. `throws InvalidCouponError` -> `does not throw` | `downgraded` | The verifier passed against a weaker assertion than the one available before implementation. | +| justified-snapshot | snapshot changes alongside an intentional source/UI change | `needs_human_review` or `accepted` by policy | Snapshot churn can be valid, but the receipt should make the policy decision explicit. | + +## Non-goals + +- Do not infer whether the implementation is correct in the world. +- Do not require full semantic diffing before Receipt Lock can operate. +- Do not treat staged evidence or a model explanation as equivalent to a fresh + command receipt. +- Do not block every test edit. The goal is to keep the verification claim + honest when the test artifact changed after implementation. From 5715f585fdcada8cc16fb64af86232df78616337 Mon Sep 17 00:00:00 2001 From: Jan De Landtsheer Date: Tue, 21 Apr 2026 21:43:52 +0200 Subject: [PATCH 4/8] feat(storage): phase 1 -- extract MemoryStore and Embedder traits (ADR 0001) Introduce two trait boundaries that the rest of the stack now sits above, landing Phase 1 of ADR 0001 (pluggable storage and network access). Rebased onto v2.1.22 Sanhedrin from the original April work. MemoryStore / LocalMemoryStore (crates/vestige-core/src/storage/memory_store.rs): One trait, ~25 methods, covering CRUD, hybrid / FTS / vector search, FSRS scheduling, graph edges, and the forthcoming domain surface. trait_variant::make generates a Send-bound MemoryStore alias over the base LocalMemoryStore so Arc works under tokio/axum. Storage errors map through a dedicated MemoryStoreError. Embedder / LocalEmbedder (crates/vestige-core/src/embedder/): Pluggable text-to-vector encoder. FastembedEmbedder wraps the existing EmbeddingService; storage never calls fastembed directly anymore. Embedder::signature() produces the ModelSignature consumed by the store's embedding_model registry. SqliteMemoryStore (crates/vestige-core/src/storage/sqlite.rs): Storage renamed to SqliteMemoryStore; the old name lives on as a pub type alias so Arc consumers in vestige-mcp stay intact. All existing inherent methods are untouched; the trait impl is purely additive and dispatches into them. The db_path field added by v2.1.1 portable-sync is preserved. Migration V14 (crates/vestige-core/src/storage/migrations.rs): Renumbered from V12 (the original April number) to V14 to slot in cleanly after upstream's V12 (v2.1.1 sync_tombstones) and V13 (v2.1.2 purge tombstones). - embedding_model registry table (CHECK id = 1, code enforces the single-row invariant). - knowledge_nodes.domains / domain_scores TEXT columns (JSON arrays default '[]' / '{}'), domains catalogue table, supporting indexes. Phase 4 populates these columns; Phase 1 just exposes the schema. Consolidation and other cognitive pathways now accept a &dyn LocalMemoryStore (sync) or Arc (async) rather than a concrete Storage. Tests: - trait-method unit tests colocated in sqlite.rs and migrations.rs - embedder/fastembed.rs tests for name/dimension/hash stability - new integration crate tests/phase_1 (added to workspace members): trait_round_trip (8), embedding_model_registry (7), domain_column_migration (5), cognitive_module_isolation (4), send_bound_variant (2), embedder_trait (2). Acceptance gate post-rebase: - cargo build --workspace --all-targets: ok - cargo clippy --workspace --all-targets -- -D warnings: clean - cargo test -p vestige-core --lib: 428 pass - cargo test -p vestige-phase-1-tests: 28 pass - cargo test -p vestige-mcp --lib: 380 pass (Storage alias preserves every existing call site) Co-existence with v2.1.1 portable-sync: this trait extraction is additive. Portable-sync's tombstone migrations (V12, V13) remain on the concrete SqliteMemoryStore; Phase 2 (Postgres) will decide which of those surfaces graduate into the trait. --- Cargo.lock | 115 +- Cargo.toml | 1 + crates/vestige-core/Cargo.toml | 3 + crates/vestige-core/src/embedder/fastembed.rs | 182 ++ crates/vestige-core/src/embedder/mod.rs | 57 + crates/vestige-core/src/lib.rs | 44 +- .../vestige-core/src/storage/memory_store.rs | 316 ++++ crates/vestige-core/src/storage/migrations.rs | 198 ++- crates/vestige-core/src/storage/mod.rs | 21 +- crates/vestige-core/src/storage/sqlite.rs | 1540 ++++++++++++++++- tests/phase_1/Cargo.toml | 38 + tests/phase_1/cognitive_module_isolation.rs | 143 ++ tests/phase_1/domain_column_migration.rs | 161 ++ tests/phase_1/embedder_trait.rs | 43 + tests/phase_1/embedding_model_registry.rs | 148 ++ tests/phase_1/send_bound_variant.rs | 99 ++ tests/phase_1/trait_round_trip.rs | 217 +++ 17 files changed, 3282 insertions(+), 44 deletions(-) create mode 100644 crates/vestige-core/src/embedder/fastembed.rs create mode 100644 crates/vestige-core/src/embedder/mod.rs create mode 100644 crates/vestige-core/src/storage/memory_store.rs create mode 100644 tests/phase_1/Cargo.toml create mode 100644 tests/phase_1/cognitive_module_isolation.rs create mode 100644 tests/phase_1/domain_column_migration.rs create mode 100644 tests/phase_1/embedder_trait.rs create mode 100644 tests/phase_1/embedding_model_registry.rs create mode 100644 tests/phase_1/send_bound_variant.rs create mode 100644 tests/phase_1/trait_round_trip.rs diff --git a/Cargo.lock b/Cargo.lock index 0b613a0..8be114c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -143,6 +143,12 @@ dependencies = [ "syn", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + [[package]] name = "arrayvec" version = "0.7.6" @@ -158,6 +164,17 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -311,6 +328,20 @@ dependencies = [ "core2", ] +[[package]] +name = "blake3" +version = "1.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures 0.3.0", +] + [[package]] name = "block" version = "0.1.6" @@ -642,6 +673,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "core-foundation" version = "0.9.4" @@ -697,6 +734,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -2282,12 +2328,10 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.95" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" dependencies = [ - "cfg-if", - "futures-util", "once_cell", "wasm-bindgen", ] @@ -3181,9 +3225,9 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" +checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618" dependencies = [ "portable-atomic", ] @@ -3822,7 +3866,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -3833,7 +3877,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -4356,6 +4400,17 @@ dependencies = [ "tracing-serde", ] +[[package]] +name = "trait-variant" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70977707304198400eb4835a78f6a9f928bf41bba420deb8fdb175cd965d77a7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "try-lock" version = "0.2.5" @@ -4631,6 +4686,8 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" name = "vestige-core" version = "2.1.26" dependencies = [ + "async-trait", + "blake3", "candle-core", "chrono", "criterion", @@ -4646,6 +4703,7 @@ dependencies = [ "thiserror 2.0.18", "tokio", "tracing", + "trait-variant", "usearch", "uuid", ] @@ -4692,6 +4750,19 @@ dependencies = [ "vestige-core", ] +[[package]] +name = "vestige-phase-1-tests" +version = "0.0.1" +dependencies = [ + "chrono", + "rusqlite", + "serde_json", + "tempfile", + "tokio", + "uuid", + "vestige-core", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -4737,9 +4808,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.118" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ "cfg-if", "once_cell", @@ -4750,19 +4821,23 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.68" +version = "0.4.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" dependencies = [ + "cfg-if", + "futures-util", "js-sys", + "once_cell", "wasm-bindgen", + "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.118" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4770,9 +4845,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.118" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" dependencies = [ "bumpalo", "proc-macro2", @@ -4783,9 +4858,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.118" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" dependencies = [ "unicode-ident", ] @@ -4839,9 +4914,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.95" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 7183f40..203a857 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "crates/vestige-core", "crates/vestige-mcp", "tests/e2e", + "tests/phase_1", ] exclude = [ "fastembed-rs", diff --git a/crates/vestige-core/Cargo.toml b/crates/vestige-core/Cargo.toml index e878cdb..25a0495 100644 --- a/crates/vestige-core/Cargo.toml +++ b/crates/vestige-core/Cargo.toml @@ -125,6 +125,9 @@ usearch = { version = "=2.23.0", optional = true } # LRU cache for query embeddings lru = "0.16" +trait-variant = "0.1" +blake3 = "1" +async-trait = "0.1" [dev-dependencies] tempfile = "3" diff --git a/crates/vestige-core/src/embedder/fastembed.rs b/crates/vestige-core/src/embedder/fastembed.rs new file mode 100644 index 0000000..a4cd87b --- /dev/null +++ b/crates/vestige-core/src/embedder/fastembed.rs @@ -0,0 +1,182 @@ +//! `FastembedEmbedder` -- adapts the existing `EmbeddingService` to the +//! `LocalEmbedder` trait. + +#[cfg(feature = "embeddings")] +use crate::embeddings::{EMBEDDING_DIMENSIONS, EmbeddingService}; + +use super::{EmbedderError, EmbedderResult, LocalEmbedder}; + +pub struct FastembedEmbedder { + #[cfg(feature = "embeddings")] + inner: EmbeddingService, + cached_hash: std::sync::OnceLock, +} + +impl FastembedEmbedder { + pub fn new() -> Self { + Self { + #[cfg(feature = "embeddings")] + inner: EmbeddingService::new(), + cached_hash: std::sync::OnceLock::new(), + } + } + + fn compute_hash(name: &str, dim: usize) -> String { + let mut hasher = blake3::Hasher::new(); + hasher.update(name.as_bytes()); + hasher.update(&(dim as u64).to_le_bytes()); + // fastembed's ONNX bytes are not directly accessible at runtime; we + // use `(name, dim, vestige-core CARGO_PKG_VERSION)` as the + // signature. If fastembed ever changes its output deterministically + // between minor versions, bumping the crate version triggers a + // mismatch -- which is exactly the drift we want to detect. + hasher.update(env!("CARGO_PKG_VERSION").as_bytes()); + hasher.finalize().to_hex().to_string() + } +} + +impl Default for FastembedEmbedder { + fn default() -> Self { + Self::new() + } +} + +#[async_trait::async_trait] +impl LocalEmbedder for FastembedEmbedder { + async fn embed(&self, text: &str) -> EmbedderResult> { + #[cfg(feature = "embeddings")] + { + let emb = self + .inner + .embed(text) + .map_err(|e| EmbedderError::EmbedFailed(e.to_string()))?; + Ok(emb.vector) + } + #[cfg(not(feature = "embeddings"))] + { + let _ = text; + Err(EmbedderError::Init( + "embeddings feature not enabled".to_string(), + )) + } + } + + fn model_name(&self) -> &str { + #[cfg(feature = "embeddings")] + { + self.inner.model_name() + } + #[cfg(not(feature = "embeddings"))] + { + "nomic-ai/nomic-embed-text-v1.5" + } + } + + fn dimension(&self) -> usize { + #[cfg(feature = "embeddings")] + { + EMBEDDING_DIMENSIONS + } + #[cfg(not(feature = "embeddings"))] + { + 256 + } + } + + fn model_hash(&self) -> String { + self.cached_hash + .get_or_init(|| Self::compute_hash(self.model_name(), self.dimension())) + .clone() + } + + async fn embed_batch(&self, texts: &[&str]) -> EmbedderResult>> { + #[cfg(feature = "embeddings")] + { + let embs = self + .inner + .embed_batch(texts) + .map_err(|e| EmbedderError::EmbedFailed(e.to_string()))?; + Ok(embs.into_iter().map(|e| e.vector).collect()) + } + #[cfg(not(feature = "embeddings"))] + { + let _ = texts; + Err(EmbedderError::Init( + "embeddings feature not enabled".to_string(), + )) + } + } +} + +// ============================================================================ +// UNIT TESTS +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn embedder_reports_correct_name() { + let e = FastembedEmbedder::new(); + assert!( + e.model_name().contains("nomic"), + "model name should contain 'nomic'" + ); + } + + #[test] + fn embedder_reports_256_dimension() { + let e = FastembedEmbedder::new(); + assert_eq!(e.dimension(), 256); + } + + #[test] + fn embedder_hash_is_stable() { + let e = FastembedEmbedder::new(); + let h1 = e.model_hash(); + let h2 = e.model_hash(); + assert_eq!(h1, h2, "model_hash must be stable across calls"); + } + + #[test] + fn embedder_hash_includes_crate_version() { + // Compute what the hash should be given the known inputs + let name = FastembedEmbedder::new().model_name().to_string(); + let dim = FastembedEmbedder::new().dimension(); + let expected = FastembedEmbedder::compute_hash(&name, dim); + let got = FastembedEmbedder::new().model_hash(); + assert_eq!(got, expected); + } + + #[test] + fn embedder_signature_matches_accessors() { + let e = FastembedEmbedder::new(); + let sig = e.signature(); + assert_eq!(sig.name, e.model_name()); + assert_eq!(sig.dimension, e.dimension()); + assert_eq!(sig.hash, e.model_hash()); + } + + #[cfg(feature = "embeddings")] + #[test] + fn embedder_embed_smoke() { + let e = FastembedEmbedder::new(); + let rt = tokio::runtime::Runtime::new().unwrap(); + let vec = rt.block_on(e.embed("hello world")).expect("embed"); + assert_eq!(vec.len(), 256); + } + + #[cfg(feature = "embeddings")] + #[test] + fn embedder_embed_batch_matches_sequential() { + let e = FastembedEmbedder::new(); + let rt = tokio::runtime::Runtime::new().unwrap(); + let texts = ["alpha beta", "gamma delta"]; + let batch = rt.block_on(e.embed_batch(texts.as_ref())).expect("batch"); + let seq_a = rt.block_on(e.embed(texts[0])).expect("seq a"); + let seq_b = rt.block_on(e.embed(texts[1])).expect("seq b"); + assert_eq!(batch[0], seq_a); + assert_eq!(batch[1], seq_b); + } +} diff --git a/crates/vestige-core/src/embedder/mod.rs b/crates/vestige-core/src/embedder/mod.rs new file mode 100644 index 0000000..9d43d0d --- /dev/null +++ b/crates/vestige-core/src/embedder/mod.rs @@ -0,0 +1,57 @@ +//! Text-to-vector encoding trait. Pluggable per-install. + +mod fastembed; + +pub use fastembed::FastembedEmbedder; + +/// Error returned by every `Embedder` method. +#[non_exhaustive] +#[derive(Debug, thiserror::Error)] +pub enum EmbedderError { + #[error("embedder initialization failed: {0}")] + Init(String), + #[error("embedding generation failed: {0}")] + EmbedFailed(String), + #[error("invalid input: {0}")] + InvalidInput(String), +} + +pub type EmbedderResult = std::result::Result; + +/// Pluggable embedder. The storage layer NEVER calls fastembed directly; +/// callers compute vectors via this trait and pass them into `MemoryStore`. +/// +/// `#[async_trait::async_trait]` makes every `async fn` return a +/// `Pin>`, which is required for `Box` +/// and `Arc` to be dyn-compatible. +#[async_trait::async_trait] +pub trait LocalEmbedder: Send + Sync + 'static { + async fn embed(&self, text: &str) -> EmbedderResult>; + + fn model_name(&self) -> &str; + + fn dimension(&self) -> usize; + + /// Stable blake3 hash of (model_name || dimension || vestige-core crate version). + /// Lowercase hex, 64 chars. + /// + /// Used by `MemoryStore::register_model` to detect silent model drift + /// (e.g. a fastembed minor upgrade that changes vector output). + fn model_hash(&self) -> String; + + async fn embed_batch(&self, texts: &[&str]) -> EmbedderResult>>; + + /// Returns the `ModelSignature` describing this embedder. Convenience + /// wrapper over the three accessors above. + fn signature(&self) -> crate::storage::ModelSignature { + crate::storage::ModelSignature { + name: self.model_name().to_string(), + dimension: self.dimension(), + hash: self.model_hash(), + } + } +} + +/// Type alias: `Embedder` is the dyn-compatible, Send+Sync variant. +/// Both names refer to the same `async_trait`-annotated trait. +pub use LocalEmbedder as Embedder; diff --git a/crates/vestige-core/src/lib.rs b/crates/vestige-core/src/lib.rs index b8b0154..f8a35d6 100644 --- a/crates/vestige-core/src/lib.rs +++ b/crates/vestige-core/src/lib.rs @@ -83,6 +83,7 @@ /// Optional `vestige.toml` configuration (Phase 2: Configurable Output). pub mod config; pub mod consolidation; +pub mod embedder; pub mod fsrs; pub mod fts; pub mod memory; @@ -159,13 +160,46 @@ pub use config::{CONFIG_FILE, OutputConfig, OutputDefaults, OutputProfile, Vesti // Storage layer pub use storage::{ - CompositionEventRecord, CompositionMemberRecord, CompositionNeighborRecord, - CompositionOutcomeRecord, ConnectionRecord, ConsolidationHistoryRecord, DreamHistoryRecord, - InsightRecord, IntentionRecord, NeverComposedCandidate, PORTABLE_ARCHIVE_FORMAT, - PortableArchive, PortableImportMode, PortableImportReport, Result, SmartIngestResult, - StateTransitionRecord, Storage, StorageError, + ClassificationResult, + CompositionEventRecord, + CompositionMemberRecord, + CompositionNeighborRecord, + CompositionOutcomeRecord, + ConnectionRecord, + ConsolidationHistoryRecord, + Domain, + DreamHistoryRecord, + HealthStatus, + InsightRecord, + IntentionRecord, + LocalMemoryStore, + MemoryEdge, + MemoryRecord, + MemoryStore, + MemoryStoreError, + MemoryStoreResult, + ModelSignature, + NeverComposedCandidate, + PORTABLE_ARCHIVE_FORMAT, + PortableArchive, + PortableImportMode, + PortableImportReport, + Result, + SchedulingState, + SearchQuery, + SmartIngestResult, + SqliteMemoryStore, + StateTransitionRecord, + Storage, + StorageError, + StoreStats, + // Note: storage::SearchResult is intentionally not re-exported here to avoid + // collision with memory::SearchResult. Use vestige_core::storage::SearchResult directly. }; +// Embedder trait and implementations +pub use embedder::{Embedder, EmbedderError, EmbedderResult, FastembedEmbedder, LocalEmbedder}; + // Consolidation (sleep-inspired memory processing) pub use consolidation::SleepConsolidation; pub use consolidation::{ diff --git a/crates/vestige-core/src/storage/memory_store.rs b/crates/vestige-core/src/storage/memory_store.rs new file mode 100644 index 0000000..2bc3137 --- /dev/null +++ b/crates/vestige-core/src/storage/memory_store.rs @@ -0,0 +1,316 @@ +//! Backend-agnostic memory store trait. +//! +//! This is the single abstraction every cognitive module sits above. It is +//! intentionally flat: one trait, ~25 methods, no sub-traits. + +use std::collections::HashMap; + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +// ---------------------------------------------------------------------------- +// ERROR +// ---------------------------------------------------------------------------- + +/// Error returned by every `LocalMemoryStore` / `MemoryStore` method. +#[non_exhaustive] +#[derive(Debug, thiserror::Error)] +pub enum MemoryStoreError { + #[error("not found: {0}")] + NotFound(String), + + #[error("backend error: {0}")] + Backend(String), + + #[error( + "embedding model mismatch: store registered {registered_name} (dim {registered_dim}, \ + hash {registered_hash}), embedder is {actual_name} (dim {actual_dim}, hash {actual_hash})" + )] + ModelMismatch { + registered_name: String, + registered_dim: usize, + registered_hash: String, + actual_name: String, + actual_dim: usize, + actual_hash: String, + }, + + #[error("invalid input: {0}")] + InvalidInput(String), + + #[error("initialization error: {0}")] + Init(String), +} + +impl From for MemoryStoreError { + fn from(e: crate::storage::StorageError) -> Self { + use crate::storage::StorageError as S; + match e { + S::NotFound(s) => MemoryStoreError::NotFound(s), + S::Database(e) => MemoryStoreError::Backend(e.to_string()), + S::Io(e) => MemoryStoreError::Backend(e.to_string()), + S::InvalidTimestamp(s) => MemoryStoreError::Backend(format!("invalid timestamp: {s}")), + S::Init(s) => MemoryStoreError::Init(s), + } + } +} + +pub type MemoryStoreResult = std::result::Result; + +// ---------------------------------------------------------------------------- +// DATA TYPES +// ---------------------------------------------------------------------------- + +/// Backend-agnostic memory record. +/// +/// Phase 1 intentionally keeps this type independent of `KnowledgeNode` to +/// avoid dragging 30+ legacy fields through the trait surface. The SQLite +/// backend converts between `MemoryRecord` and `KnowledgeNode` at the +/// boundary. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MemoryRecord { + pub id: Uuid, + /// Empty = unclassified. Populated in Phase 4. + pub domains: Vec, + /// Raw similarity per domain centroid. Empty until Phase 4 runs clustering. + pub domain_scores: HashMap, + pub content: String, + pub node_type: String, + pub tags: Vec, + pub embedding: Option>, + pub created_at: DateTime, + pub updated_at: DateTime, + pub metadata: serde_json::Value, +} + +/// FSRS-6 scheduling state, one row per memory. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SchedulingState { + pub memory_id: Uuid, + pub stability: f64, + pub difficulty: f64, + pub retrievability: f64, + pub last_review: Option>, + pub next_review: Option>, + pub reps: u32, + pub lapses: u32, +} + +/// Hybrid search request. +#[derive(Debug, Clone, Default)] +pub struct SearchQuery { + pub domains: Option>, + pub text: Option, + pub embedding: Option>, + pub tags: Option>, + pub node_types: Option>, + pub limit: usize, + pub min_retrievability: Option, +} + +#[derive(Debug, Clone)] +pub struct SearchResult { + pub record: MemoryRecord, + pub score: f64, + pub fts_score: Option, + pub vector_score: Option, +} + +/// Edge in the spreading-activation graph. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MemoryEdge { + pub source_id: Uuid, + pub target_id: Uuid, + pub edge_type: String, + pub weight: f64, + pub created_at: DateTime, +} + +/// A topical domain (populated in Phase 4). Phase 1 only needs the type to +/// shape the trait surface; discover/classify are Phase 4 work. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Domain { + pub id: String, + pub label: String, + pub centroid: Vec, + pub top_terms: Vec, + pub memory_count: usize, + pub created_at: DateTime, +} + +/// Result of classifying one vector against all known domains. +#[derive(Debug, Clone)] +pub struct ClassificationResult { + pub scores: HashMap, + pub domains: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct StoreStats { + pub total_memories: usize, + pub memories_with_embeddings: usize, + pub total_edges: usize, + pub total_domains: usize, + pub registered_model_name: Option, + pub registered_model_dim: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum HealthStatus { + Healthy, + Degraded { reason: String }, + Unavailable { reason: String }, +} + +// ---------------------------------------------------------------------------- +// EMBEDDING MODEL SIGNATURE +// ---------------------------------------------------------------------------- + +/// Snapshot of the embedding model that was used to write vectors into the +/// store. Persisted in the `embedding_model` table; compared on every write +/// before the vector is accepted. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ModelSignature { + pub name: String, + pub dimension: usize, + /// Lowercase hex-encoded blake3 hash, 64 chars. + pub hash: String, +} + +// ---------------------------------------------------------------------------- +// TRAIT +// ---------------------------------------------------------------------------- + +/// The single storage abstraction. +/// +/// `#[async_trait::async_trait]` makes every `async fn` return a +/// `Pin>`, which is required for `Arc` +/// to be movable across `tokio::spawn` boundaries. +/// +/// `LocalMemoryStore` is a type alias kept for source compatibility with code +/// that refers to the non-send variant. In Phase 1 both names refer to the same +/// (dyn-compatible, Send-safe) trait. +#[async_trait::async_trait] +pub trait MemoryStore: Send + Sync + 'static { + // --- Lifecycle --- + async fn init(&self) -> MemoryStoreResult<()>; + async fn health_check(&self) -> MemoryStoreResult; + + // --- Embedding model registry --- + async fn registered_model(&self) -> MemoryStoreResult>; + async fn register_model(&self, sig: &ModelSignature) -> MemoryStoreResult<()>; + + // --- CRUD --- + async fn insert(&self, record: &MemoryRecord) -> MemoryStoreResult; + async fn get(&self, id: Uuid) -> MemoryStoreResult>; + async fn update(&self, record: &MemoryRecord) -> MemoryStoreResult<()>; + async fn delete(&self, id: Uuid) -> MemoryStoreResult<()>; + + // --- Search --- + async fn search(&self, query: &SearchQuery) -> MemoryStoreResult>; + async fn fts_search(&self, text: &str, limit: usize) -> MemoryStoreResult>; + async fn vector_search( + &self, + embedding: &[f32], + limit: usize, + ) -> MemoryStoreResult>; + + // --- FSRS Scheduling --- + async fn get_scheduling(&self, memory_id: Uuid) -> MemoryStoreResult>; + async fn update_scheduling(&self, state: &SchedulingState) -> MemoryStoreResult<()>; + async fn get_due_memories( + &self, + before: DateTime, + limit: usize, + ) -> MemoryStoreResult>; + + // --- Graph (spreading activation) --- + async fn add_edge(&self, edge: &MemoryEdge) -> MemoryStoreResult<()>; + async fn get_edges( + &self, + node_id: Uuid, + edge_type: Option<&str>, + ) -> MemoryStoreResult>; + async fn remove_edge(&self, source: Uuid, target: Uuid) -> MemoryStoreResult<()>; + async fn get_neighbors( + &self, + node_id: Uuid, + depth: usize, + ) -> MemoryStoreResult>; + + // --- Domains (Phase 1: stubs return empty; full impl in Phase 4) --- + async fn list_domains(&self) -> MemoryStoreResult>; + async fn get_domain(&self, id: &str) -> MemoryStoreResult>; + async fn upsert_domain(&self, domain: &Domain) -> MemoryStoreResult<()>; + async fn delete_domain(&self, id: &str) -> MemoryStoreResult<()>; + /// Phase 1: returns `Ok(vec![])` since no centroids exist. Phase 4 wires + /// the full soft-assignment pass. + async fn classify(&self, embedding: &[f32]) -> MemoryStoreResult>; + + // --- Bulk / Maintenance --- + async fn count(&self) -> MemoryStoreResult; + async fn get_stats(&self) -> MemoryStoreResult; + async fn vacuum(&self) -> MemoryStoreResult<()>; +} + +/// Type alias kept for source compatibility. Both names refer to the same +/// `async_trait`-annotated trait that is dyn-compatible and `Send + Sync`. +pub use MemoryStore as LocalMemoryStore; + +// ---------------------------------------------------------------------------- +// UNIT TESTS +// ---------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::storage::StorageError; + + #[test] + fn memory_store_error_from_storage_error() { + let se = StorageError::NotFound("abc".to_string()); + let mse = MemoryStoreError::from(se); + assert!(matches!(mse, MemoryStoreError::NotFound(_))); + + let se2 = StorageError::Init("init failure".to_string()); + let mse2 = MemoryStoreError::from(se2); + assert!(matches!(mse2, MemoryStoreError::Init(_))); + } + + #[test] + fn model_signature_serde_round_trip() { + let sig = ModelSignature { + name: "nomic-ai/nomic-embed-text-v1.5".to_string(), + dimension: 256, + hash: "a".repeat(64), + }; + let json = serde_json::to_string(&sig).expect("serialize"); + let sig2: ModelSignature = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(sig, sig2); + } + + #[test] + fn memory_record_serde_round_trip() { + let rec = MemoryRecord { + id: Uuid::new_v4(), + domains: vec!["dev".to_string()], + domain_scores: { + let mut m = HashMap::new(); + m.insert("dev".to_string(), 0.9); + m + }, + content: "hello".to_string(), + node_type: "fact".to_string(), + tags: vec!["tag1".to_string()], + embedding: None, + created_at: Utc::now(), + updated_at: Utc::now(), + metadata: serde_json::json!({}), + }; + let json = serde_json::to_string(&rec).expect("serialize"); + let rec2: MemoryRecord = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(rec.content, rec2.content); + assert_eq!(rec.domains, rec2.domains); + } +} diff --git a/crates/vestige-core/src/storage/migrations.rs b/crates/vestige-core/src/storage/migrations.rs index 127bc84..c0c60d2 100644 --- a/crates/vestige-core/src/storage/migrations.rs +++ b/crates/vestige-core/src/storage/migrations.rs @@ -79,6 +79,11 @@ pub const MIGRATIONS: &[Migration] = &[ description: "ComposedGraph: composition events, members, outcomes", up: MIGRATION_V15_UP, }, + Migration { + version: 16, + description: "ADR 0001 Phase 1: embedding_model registry, domains/domain_scores columns, domains table", + up: MIGRATION_V16_UP, + }, ]; /// A database migration @@ -904,6 +909,54 @@ fn add_column_if_missing(conn: &rusqlite::Connection, sql: &str) -> rusqlite::Re } } +/// V16: ADR 0001 Phase 1 - embedding_model registry + domain columns. +/// +/// The ALTER TABLE statements are split out into `MIGRATION_V16_ALTER_COLUMNS` +/// because SQLite has no `ALTER TABLE ... ADD COLUMN IF NOT EXISTS`. The +/// migration runner handles them individually so replaying V16 is idempotent. +const MIGRATION_V16_UP: &str = r#" +-- Migration V16: embedding model registry + per-memory domain columns. + +-- 1. Embedding model registry. Single logical row; the (id = 1) constraint is +-- enforced in code via `register_model` (SQLite CHECK on a single-row +-- table is uglier than a constraint we already enforce in Rust). +CREATE TABLE IF NOT EXISTS embedding_model ( + id INTEGER PRIMARY KEY CHECK (id = 1), + name TEXT NOT NULL, + dimension INTEGER NOT NULL, + hash TEXT NOT NULL, + created_at TEXT NOT NULL +); + +-- 2. Per-memory domain columns are applied separately (see apply_migrations). + +-- 3. Index on the domains JSON column to enable LIKE-style filter in Phase 4. +CREATE INDEX IF NOT EXISTS idx_nodes_domains ON knowledge_nodes(domains); +CREATE INDEX IF NOT EXISTS idx_nodes_domain_scores ON knowledge_nodes(domain_scores); + +-- 4. Domains catalogue (empty until Phase 4 populates). +CREATE TABLE IF NOT EXISTS domains ( + id TEXT PRIMARY KEY, + label TEXT NOT NULL, + centroid BLOB, + top_terms TEXT NOT NULL DEFAULT '[]', + memory_count INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_domains_created_at ON domains(created_at); + +UPDATE schema_version SET version = 16, applied_at = datetime('now'); +"#; + +/// The two ALTER TABLE statements for V16. Kept separate so the migration +/// runner can try each individually and ignore "duplicate column" errors, +/// making V16 idempotent on replay (SQLite has no ADD COLUMN IF NOT EXISTS). +pub const MIGRATION_V16_ALTER_COLUMNS: &[&str] = &[ + "ALTER TABLE knowledge_nodes ADD COLUMN domains TEXT NOT NULL DEFAULT '[]'", + "ALTER TABLE knowledge_nodes ADD COLUMN domain_scores TEXT NOT NULL DEFAULT '{}'", +]; + /// Apply pending migrations pub fn apply_migrations(conn: &rusqlite::Connection) -> rusqlite::Result { let current_version = get_current_version(conn)?; @@ -932,6 +985,15 @@ pub fn apply_migrations(conn: &rusqlite::Connection) -> rusqlite::Result { )?; } + // V16 adds columns via ALTER TABLE, which SQLite does not support + // with IF NOT EXISTS. Run them individually and ignore duplicate + // column errors so replay stays idempotent. + if migration.version == 16 { + for stmt in MIGRATION_V16_ALTER_COLUMNS { + add_column_if_missing(conn, stmt)?; + } + } + // Use execute_batch to handle multi-statement SQL including triggers conn.execute_batch(migration.up)?; @@ -958,17 +1020,17 @@ mod tests { /// version after `apply_migrations` runs all migrations end-to-end, and /// neither of the dead tables V11 drops must exist afterwards. #[test] - fn test_apply_migrations_advances_to_v15_and_drops_dead_tables() { + fn test_apply_migrations_advances_to_v16_and_drops_dead_tables() { let conn = rusqlite::Connection::open_in_memory().expect("open in-memory"); // Pre-requisite: schema_version must be bootstrapped by V1. apply_migrations(&conn).expect("apply_migrations succeeds"); - // 1. schema_version advanced to V15 + // 1. schema_version advanced to V16 let version = get_current_version(&conn).expect("read schema_version"); assert_eq!( - version, 15, - "schema_version must be 15 after all migrations" + version, 16, + "schema_version must be 16 after all migrations" ); // 2. knowledge_edges is gone (V11 drops it) @@ -1086,10 +1148,132 @@ mod tests { conn.execute("UPDATE schema_version SET version = 10", []) .expect("rewind schema_version"); - // Replay must not error. - apply_migrations(&conn).expect("V11 replay must be idempotent"); + // Replay V11 onward. V11 uses DROP TABLE IF EXISTS so it is idempotent. + // V12/V13 tombstone tables use CREATE TABLE IF NOT EXISTS. V14/V16 ALTER + // TABLE idempotency is handled by the migration runner. + apply_migrations(&conn).expect("V11..V16 replay must be idempotent"); + // After replaying from V10, the schema advances to the latest version. let version = get_current_version(&conn).expect("read schema_version"); - assert_eq!(version, 15, "schema_version back at 15 after replay"); + assert_eq!(version, 16, "schema_version back at 16 after replay"); + } + + #[test] + fn v16_adds_embedding_model_table() { + let conn = rusqlite::Connection::open_in_memory().expect("open in-memory"); + apply_migrations(&conn).expect("apply_migrations"); + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='embedding_model'", + [], + |row| row.get(0), + ) + .expect("query sqlite_master"); + assert_eq!(count, 1, "embedding_model table must exist after V16"); + } + + #[test] + fn v16_adds_domains_columns() { + let conn = rusqlite::Connection::open_in_memory().expect("open in-memory"); + apply_migrations(&conn).expect("apply_migrations"); + let info: Vec = { + let mut stmt = conn + .prepare("PRAGMA table_info(knowledge_nodes)") + .expect("prepare"); + stmt.query_map([], |row| row.get::<_, String>(1)) + .expect("query_map") + .map(|r| r.expect("row")) + .collect() + }; + assert!( + info.contains(&"domains".to_string()), + "domains column missing" + ); + assert!( + info.contains(&"domain_scores".to_string()), + "domain_scores column missing" + ); + } + + #[test] + fn v16_default_values_empty_json() { + let conn = rusqlite::Connection::open_in_memory().expect("open in-memory"); + apply_migrations(&conn).expect("apply_migrations"); + // Insert a minimal row to test defaults + conn.execute( + "INSERT INTO knowledge_nodes (id, content, node_type, created_at, updated_at, last_accessed, \ + stability, difficulty, reps, lapses, learning_state, storage_strength, retrieval_strength, \ + retention_strength, next_review, scheduled_days, has_embedding) \ + VALUES ('test-id','content','fact',datetime('now'),datetime('now'),datetime('now'),\ + 1.0,0.3,0,0,'new',1.0,1.0,1.0,datetime('now'),1,0)", + [], + ).expect("insert row"); + let (domains, domain_scores): (String, String) = conn + .query_row( + "SELECT domains, domain_scores FROM knowledge_nodes WHERE id='test-id'", + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .expect("query row"); + assert_eq!(domains, "[]"); + assert_eq!(domain_scores, "{}"); + } + + #[test] + fn v16_is_replayable() { + let conn = rusqlite::Connection::open_in_memory().expect("open in-memory"); + apply_migrations(&conn).expect("first apply"); + // Rewind to V15 so V16 runs again. + conn.execute("UPDATE schema_version SET version = 15", []) + .expect("rewind"); + // V16 uses CREATE TABLE IF NOT EXISTS and idempotent ALTER handling. + apply_migrations(&conn).expect("V16 replay must be idempotent"); + let version = get_current_version(&conn).expect("read version"); + assert_eq!(version, 16, "schema_version must be 16 after replay"); + } + + #[test] + fn v16_preserves_existing_rows_from_v15() { + let conn = rusqlite::Connection::open_in_memory().expect("open in-memory"); + // Apply up to V15 only, including the V14 ALTER TABLE columns that + // `apply_migrations` normally runs before the V14 SQL batch. + for migration in MIGRATIONS { + if migration.version <= 15 { + if migration.version == 14 { + add_column_if_missing( + &conn, + "ALTER TABLE knowledge_nodes ADD COLUMN protected INTEGER NOT NULL DEFAULT 0", + ) + .expect("apply V14 protected column"); + add_column_if_missing( + &conn, + "ALTER TABLE knowledge_nodes ADD COLUMN superseded_by TEXT", + ) + .expect("apply V14 superseded_by column"); + } + conn.execute_batch(migration.up).expect("apply migration"); + } + } + // Insert a row under the V15 schema, before PR #61's V16 columns exist. + conn.execute( + "INSERT INTO knowledge_nodes (id, content, node_type, created_at, updated_at, last_accessed, \ + stability, difficulty, reps, lapses, learning_state, storage_strength, retrieval_strength, \ + retention_strength, next_review, scheduled_days, has_embedding) \ + VALUES ('existing-id','old content','fact',datetime('now'),datetime('now'),datetime('now'),\ + 1.0,0.3,0,0,'new',1.0,1.0,1.0,datetime('now'),1,0)", + [], + ).expect("insert pre-v16 row"); + apply_migrations(&conn).expect("apply V16 migration"); + + // Check the old row has defaults + let (domains, domain_scores): (String, String) = conn + .query_row( + "SELECT domains, domain_scores FROM knowledge_nodes WHERE id='existing-id'", + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .expect("query pre-v16 row"); + assert_eq!(domains, "[]"); + assert_eq!(domain_scores, "{}"); } } diff --git a/crates/vestige-core/src/storage/mod.rs b/crates/vestige-core/src/storage/mod.rs index 282228d..6926385 100644 --- a/crates/vestige-core/src/storage/mod.rs +++ b/crates/vestige-core/src/storage/mod.rs @@ -1,15 +1,17 @@ //! Storage Module //! -//! SQLite-based storage layer with: -//! - FTS5 full-text search with query sanitization -//! - Embedded vector storage -//! - FSRS-6 state management -//! - Temporal memory support +//! Backend-agnostic memory store abstraction plus SQLite reference impl. +mod memory_store; mod migrations; mod portable; mod sqlite; +pub use memory_store::{ + ClassificationResult, Domain, HealthStatus, LocalMemoryStore, MemoryEdge, MemoryRecord, + MemoryStore, MemoryStoreError, MemoryStoreResult, ModelSignature, SchedulingState, SearchQuery, + SearchResult, StoreStats, +}; pub use migrations::MIGRATIONS; pub use portable::{ PORTABLE_ARCHIVE_FORMAT, PortableArchive, PortableImportMode, PortableImportReport, @@ -19,6 +21,11 @@ pub use sqlite::{ CompositionEventRecord, CompositionMemberRecord, CompositionNeighborRecord, CompositionOutcomeRecord, ConnectionRecord, ConsolidationHistoryRecord, DreamHistoryRecord, FilePortableSyncBackend, InsightRecord, IntentionRecord, NeverComposedCandidate, - PortableSyncBackend, PortableSyncReport, Result, SmartIngestResult, StateTransitionRecord, - Storage, StorageError, + PortableSyncBackend, PortableSyncReport, Result, SmartIngestResult, SqliteMemoryStore, + StateTransitionRecord, StorageError, }; + +/// Backwards-compatibility alias. Retained until Phase 4 completes so every +/// existing `Arc` call site keeps compiling. Scheduled for removal +/// once no downstream source file references it. +pub type Storage = SqliteMemoryStore; diff --git a/crates/vestige-core/src/storage/sqlite.rs b/crates/vestige-core/src/storage/sqlite.rs index 94ed45b..57eaa86 100644 --- a/crates/vestige-core/src/storage/sqlite.rs +++ b/crates/vestige-core/src/storage/sqlite.rs @@ -299,7 +299,7 @@ const DATABASE_FILE: &str = "vestige.db"; /// Uses separate reader/writer connections for interior mutability. /// All methods take `&self` (not `&mut self`), making Storage `Send + Sync` /// so the MCP layer can use `Arc` instead of `Arc>`. -pub struct Storage { +pub struct SqliteMemoryStore { db_path: PathBuf, writer: Mutex, reader: Mutex, @@ -311,9 +311,11 @@ pub struct Storage { /// LRU cache for query embeddings to avoid re-embedding repeated queries #[cfg(all(feature = "embeddings", feature = "vector-search"))] query_cache: Mutex>>, + /// Cached model signature. `None` until the first embedding is written. + registered_model: std::sync::RwLock>, } -impl Storage { +impl SqliteMemoryStore { fn data_dir_from_env() -> Option { std::env::var_os(DATA_DIR_ENV).and_then(|value| { if value.is_empty() { @@ -458,6 +460,7 @@ impl Storage { vector_index: Mutex::new(vector_index), #[cfg(all(feature = "embeddings", feature = "vector-search"))] query_cache, + registered_model: std::sync::RwLock::new(None), }; #[cfg(all(feature = "embeddings", feature = "vector-search"))] @@ -595,13 +598,15 @@ impl Storage { stability, difficulty, reps, lapses, learning_state, storage_strength, retrieval_strength, retention_strength, sentiment_score, sentiment_magnitude, next_review, scheduled_days, - source, tags, valid_from, valid_until, has_embedding, embedding_model + source, tags, valid_from, valid_until, has_embedding, embedding_model, + domains, domain_scores ) VALUES ( ?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, - ?19, ?20, ?21, ?22, ?23, ?24 + ?19, ?20, ?21, ?22, ?23, ?24, + '[]', '{}' )", params![ id, @@ -4120,7 +4125,7 @@ pub struct NeverComposedCandidate { pub composition_question: String, } -impl Storage { +impl SqliteMemoryStore { // ======================================================================== // COMPOSEDGRAPH PERSISTENCE // ======================================================================== @@ -8285,6 +8290,1014 @@ fn preview(content: &str, max: usize) -> String { } } +// ============================================================================ +// LOCAL MEMORY STORE TRAIT IMPL +// ============================================================================ + +impl SqliteMemoryStore { + /// Convert a `KnowledgeNode` (plus optional embedding vector read separately) + /// into a `MemoryRecord` for the trait surface. + fn node_to_record( + node: KnowledgeNode, + embedding: Option>, + ) -> crate::storage::memory_store::MemoryRecord { + use crate::storage::memory_store::MemoryRecord; + let id = uuid::Uuid::parse_str(&node.id).unwrap_or_else(|_| uuid::Uuid::new_v4()); + MemoryRecord { + id, + domains: Vec::new(), + domain_scores: std::collections::HashMap::new(), + content: node.content, + node_type: node.node_type, + tags: node.tags, + embedding, + created_at: node.created_at, + updated_at: node.updated_at, + metadata: serde_json::json!({ + "source": node.source, + "stability": node.stability, + "difficulty": node.difficulty, + "reps": node.reps, + "lapses": node.lapses, + "retention_strength": node.retention_strength, + }), + } + } + + /// Read domains and domain_scores JSON columns for a node by id. + fn read_domain_columns( + &self, + id: &str, + ) -> (Vec, std::collections::HashMap) { + let reader = match self.reader.lock() { + Ok(r) => r, + Err(_) => return (Vec::new(), std::collections::HashMap::new()), + }; + let result = reader.query_row( + "SELECT domains, domain_scores FROM knowledge_nodes WHERE id = ?1", + rusqlite::params![id], + |row| { + let d: Option = row.get(0).ok().flatten(); + let ds: Option = row.get(1).ok().flatten(); + Ok((d, ds)) + }, + ); + match result { + Ok((d, ds)) => { + let domains: Vec = d + .and_then(|s| serde_json::from_str(&s).ok()) + .unwrap_or_default(); + let domain_scores: std::collections::HashMap = ds + .and_then(|s| serde_json::from_str(&s).ok()) + .unwrap_or_default(); + (domains, domain_scores) + } + Err(_) => (Vec::new(), std::collections::HashMap::new()), + } + } + + /// Enforce the registered embedding model. Returns `Ok(())` if: + /// - no vector is being written (`incoming.is_none()`) and nothing is registered + /// - the incoming signature matches the registered signature + /// + /// Auto-registers on the first embedded write. + fn enforce_model( + &self, + incoming: Option<&crate::storage::memory_store::ModelSignature>, + ) -> crate::storage::memory_store::MemoryStoreResult<()> { + use crate::storage::memory_store::{MemoryStoreError, ModelSignature}; + let Some(incoming) = incoming else { + return Ok(()); + }; + // Try from cache first + { + let guard = self + .registered_model + .read() + .map_err(|_| MemoryStoreError::Init("registered_model rwlock poisoned".into()))?; + if let Some(ref reg) = *guard { + if reg == incoming { + return Ok(()); + } + return Err(MemoryStoreError::ModelMismatch { + registered_name: reg.name.clone(), + registered_dim: reg.dimension, + registered_hash: reg.hash.clone(), + actual_name: incoming.name.clone(), + actual_dim: incoming.dimension, + actual_hash: incoming.hash.clone(), + }); + } + } + // Not registered yet -- auto-register + let now = Utc::now().to_rfc3339(); + let writer = self + .writer + .lock() + .map_err(|_| MemoryStoreError::Init("Writer lock poisoned".into()))?; + // Try INSERT OR IGNORE + writer.execute( + "INSERT OR IGNORE INTO embedding_model (id, name, dimension, hash, created_at) VALUES (1, ?1, ?2, ?3, ?4)", + rusqlite::params![incoming.name, incoming.dimension as i64, incoming.hash, now], + ).map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + // Read back what was stored + let stored: Option = writer + .query_row( + "SELECT name, dimension, hash FROM embedding_model WHERE id = 1", + [], + |row| { + let name: String = row.get(0)?; + let dim: i64 = row.get(1)?; + let hash: String = row.get(2)?; + Ok(ModelSignature { + name, + dimension: dim as usize, + hash, + }) + }, + ) + .optional() + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + drop(writer); + if let Some(stored) = stored { + if stored != *incoming { + return Err(MemoryStoreError::ModelMismatch { + registered_name: stored.name, + registered_dim: stored.dimension, + registered_hash: stored.hash, + actual_name: incoming.name.clone(), + actual_dim: incoming.dimension, + actual_hash: incoming.hash.clone(), + }); + } + // Populate cache + let mut guard = self + .registered_model + .write() + .map_err(|_| MemoryStoreError::Init("registered_model rwlock poisoned".into()))?; + *guard = Some(stored); + } + Ok(()) + } +} + +#[async_trait::async_trait] +impl crate::storage::memory_store::LocalMemoryStore for SqliteMemoryStore { + async fn init(&self) -> crate::storage::memory_store::MemoryStoreResult<()> { + // Migrations run in `new`; this is a no-op for the SQLite backend. + Ok(()) + } + + async fn health_check( + &self, + ) -> crate::storage::memory_store::MemoryStoreResult + { + use crate::storage::memory_store::HealthStatus; + let reader = self.reader.lock().map_err(|_| { + crate::storage::memory_store::MemoryStoreError::Init("Reader lock poisoned".into()) + })?; + let ok: rusqlite::Result = reader.query_row("SELECT 1", [], |row| row.get(0)); + if ok.is_ok() { + Ok(HealthStatus::Healthy) + } else { + Ok(HealthStatus::Degraded { + reason: "SQLite connectivity check failed".to_string(), + }) + } + } + + async fn registered_model( + &self, + ) -> crate::storage::memory_store::MemoryStoreResult< + Option, + > { + use crate::storage::memory_store::MemoryStoreError; + // Check cache first + { + let guard = self + .registered_model + .read() + .map_err(|_| MemoryStoreError::Init("registered_model rwlock poisoned".into()))?; + if guard.is_some() { + return Ok(guard.clone()); + } + } + // Fall through to DB read + let reader = self + .reader + .lock() + .map_err(|_| MemoryStoreError::Init("Reader lock poisoned".into()))?; + let stored: Option = reader + .query_row( + "SELECT name, dimension, hash FROM embedding_model WHERE id = 1", + [], + |row| { + let name: String = row.get(0)?; + let dim: i64 = row.get(1)?; + let hash: String = row.get(2)?; + Ok(crate::storage::memory_store::ModelSignature { + name, + dimension: dim as usize, + hash, + }) + }, + ) + .optional() + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + drop(reader); + // Populate cache if we read something + if stored.is_some() { + let mut guard = self + .registered_model + .write() + .map_err(|_| MemoryStoreError::Init("registered_model rwlock poisoned".into()))?; + *guard = stored.clone(); + } + Ok(stored) + } + + async fn register_model( + &self, + sig: &crate::storage::memory_store::ModelSignature, + ) -> crate::storage::memory_store::MemoryStoreResult<()> { + self.enforce_model(Some(sig)) + } + + async fn insert( + &self, + record: &crate::storage::memory_store::MemoryRecord, + ) -> crate::storage::memory_store::MemoryStoreResult { + use crate::storage::memory_store::{MemoryStoreError, ModelSignature}; + // Enforce model registry if embedding is provided + if let Some(vec) = &record.embedding { + // Derive a signature from metadata if present, or use a generic sentinel + let sig: Option = record + .metadata + .get("model_name") + .and_then(|v| v.as_str()) + .zip( + record + .metadata + .get("model_dim") + .and_then(|v| v.as_u64()) + .map(|d| d as usize), + ) + .zip(record.metadata.get("model_hash").and_then(|v| v.as_str())) + .map(|((name, dim), hash)| ModelSignature { + name: name.to_string(), + dimension: dim, + hash: hash.to_string(), + }); + if let Some(ref s) = sig { + self.enforce_model(Some(s))?; + if vec.len() != s.dimension { + return Err(MemoryStoreError::InvalidInput(format!( + "embedding length {} != registered dimension {}", + vec.len(), + s.dimension + ))); + } + } + } + // Insert directly using the record's own id so the caller-supplied UUID is + // preserved (unlike ingest() which always generates a fresh UUID). + let id_str = record.id.to_string(); + let now = chrono::Utc::now(); + let tags_json = serde_json::to_string(&record.tags).unwrap_or_else(|_| "[]".to_string()); + let domains_json = + serde_json::to_string(&record.domains).unwrap_or_else(|_| "[]".to_string()); + let scores_json = + serde_json::to_string(&record.domain_scores).unwrap_or_else(|_| "{}".to_string()); + let source: Option = record + .metadata + .get("source") + .and_then(|v| v.as_str()) + .map(str::to_string); + { + let writer = self + .writer + .lock() + .map_err(|_| MemoryStoreError::Init("Writer lock poisoned".into()))?; + writer + .execute( + "INSERT INTO knowledge_nodes ( + id, content, node_type, created_at, updated_at, last_accessed, + stability, difficulty, reps, lapses, learning_state, + storage_strength, retrieval_strength, retention_strength, + sentiment_score, sentiment_magnitude, next_review, scheduled_days, + source, tags, has_embedding, embedding_model, + domains, domain_scores + ) VALUES ( + ?1, ?2, ?3, ?4, ?5, ?6, + 1.0, 0.3, 0, 0, 'new', + 1.0, 1.0, 1.0, + 0.0, 0.0, ?7, 1, + ?8, ?9, 0, NULL, + ?10, ?11 + )", + rusqlite::params![ + id_str, + record.content, + record.node_type, + record.created_at.to_rfc3339(), + record.updated_at.to_rfc3339(), + now.to_rfc3339(), + (now + chrono::Duration::days(1)).to_rfc3339(), + source, + tags_json, + domains_json, + scores_json, + ], + ) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + } + Ok(record.id) + } + + async fn get( + &self, + id: uuid::Uuid, + ) -> crate::storage::memory_store::MemoryStoreResult< + Option, + > { + use crate::storage::memory_store::MemoryStoreError; + let node = self + .get_node(&id.to_string()) + .map_err(MemoryStoreError::from)?; + let Some(node) = node else { + return Ok(None); + }; + let (domains, domain_scores) = self.read_domain_columns(&id.to_string()); + #[cfg(all(feature = "embeddings", feature = "vector-search"))] + let embedding = self.get_node_embedding(&id.to_string()).ok().flatten(); + #[cfg(not(all(feature = "embeddings", feature = "vector-search")))] + let embedding: Option> = None; + let mut rec = Self::node_to_record(node, embedding); + rec.domains = domains; + rec.domain_scores = domain_scores; + Ok(Some(rec)) + } + + async fn update( + &self, + record: &crate::storage::memory_store::MemoryRecord, + ) -> crate::storage::memory_store::MemoryStoreResult<()> { + use crate::storage::memory_store::MemoryStoreError; + self.update_node_content(&record.id.to_string(), &record.content) + .map_err(MemoryStoreError::from)?; + // Update domains/domain_scores + let domains_json = + serde_json::to_string(&record.domains).unwrap_or_else(|_| "[]".to_string()); + let scores_json = + serde_json::to_string(&record.domain_scores).unwrap_or_else(|_| "{}".to_string()); + let writer = self + .writer + .lock() + .map_err(|_| MemoryStoreError::Init("Writer lock poisoned".into()))?; + writer + .execute( + "UPDATE knowledge_nodes SET domains = ?1, domain_scores = ?2 WHERE id = ?3", + rusqlite::params![domains_json, scores_json, record.id.to_string()], + ) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + Ok(()) + } + + async fn delete(&self, id: uuid::Uuid) -> crate::storage::memory_store::MemoryStoreResult<()> { + use crate::storage::memory_store::MemoryStoreError; + self.delete_node(&id.to_string()) + .map_err(MemoryStoreError::from)?; + Ok(()) + } + + async fn search( + &self, + query: &crate::storage::memory_store::SearchQuery, + ) -> crate::storage::memory_store::MemoryStoreResult< + Vec, + > { + use crate::storage::memory_store::{MemoryStoreError, SearchResult}; + // For Phase 1 we delegate to hybrid_search or keyword_search based on what is provided. + let limit = if query.limit == 0 { 10 } else { query.limit }; + #[cfg(all(feature = "embeddings", feature = "vector-search"))] + { + if let Some(ref text) = query.text { + let results = self + .hybrid_search(text, limit as i32, 0.3, 0.7) + .map_err(MemoryStoreError::from)?; + let out = results + .into_iter() + .map(|r| { + let (domains, domain_scores) = self.read_domain_columns(&r.node.id); + let mut rec = Self::node_to_record(r.node, None); + rec.domains = domains; + rec.domain_scores = domain_scores; + SearchResult { + score: r.combined_score as f64, + fts_score: r.keyword_score.map(|s| s as f64), + vector_score: r.semantic_score.map(|s| s as f64), + record: rec, + } + }) + .collect(); + return Ok(out); + } + } + #[cfg(not(all(feature = "embeddings", feature = "vector-search")))] + { + if let Some(ref text) = query.text { + // Use individual-term matching so multi-word queries find documents + // where all words appear anywhere (not necessarily as a phrase). + let nodes = self + .search_terms(text, limit as i32) + .map_err(MemoryStoreError::from)?; + let out = nodes + .into_iter() + .map(|node| { + let (domains, domain_scores) = self.read_domain_columns(&node.id); + let mut rec = Self::node_to_record(node, None); + rec.domains = domains; + rec.domain_scores = domain_scores; + SearchResult { + record: rec, + score: 1.0, + fts_score: Some(1.0), + vector_score: None, + } + }) + .collect(); + return Ok(out); + } + } + Ok(vec![]) + } + + async fn fts_search( + &self, + text: &str, + limit: usize, + ) -> crate::storage::memory_store::MemoryStoreResult< + Vec, + > { + use crate::storage::memory_store::{MemoryStoreError, SearchResult}; + // Use individual-term matching so multi-word queries find documents + // where all words appear anywhere (not necessarily as a phrase). + let nodes = self + .search_terms(text, limit as i32) + .map_err(MemoryStoreError::from)?; + let out = nodes + .into_iter() + .map(|node| { + let (domains, domain_scores) = self.read_domain_columns(&node.id); + let mut rec = Self::node_to_record(node, None); + rec.domains = domains; + rec.domain_scores = domain_scores; + SearchResult { + record: rec, + score: 1.0, + fts_score: Some(1.0), + vector_score: None, + } + }) + .collect(); + Ok(out) + } + + async fn vector_search( + &self, + embedding: &[f32], + limit: usize, + ) -> crate::storage::memory_store::MemoryStoreResult< + Vec, + > { + use crate::storage::memory_store::{MemoryStoreError, SearchResult}; + #[cfg(all(feature = "embeddings", feature = "vector-search"))] + { + let index = self + .vector_index + .lock() + .map_err(|_| MemoryStoreError::Init("Vector index lock poisoned".into()))?; + let raw_results = index + .search_with_threshold(embedding, limit, 0.0_f32) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + drop(index); + let out = raw_results + .into_iter() + .filter_map(|(node_id, score)| { + let node = self.get_node(&node_id).ok().flatten()?; + let (domains, domain_scores) = self.read_domain_columns(&node_id); + let mut rec = Self::node_to_record(node, None); + rec.domains = domains; + rec.domain_scores = domain_scores; + Some(SearchResult { + record: rec, + score: score as f64, + fts_score: None, + vector_score: Some(score as f64), + }) + }) + .collect(); + return Ok(out); + } + #[cfg(not(all(feature = "embeddings", feature = "vector-search")))] + { + let _ = (embedding, limit); + Ok(vec![]) + } + } + + async fn get_scheduling( + &self, + memory_id: uuid::Uuid, + ) -> crate::storage::memory_store::MemoryStoreResult< + Option, + > { + use crate::storage::memory_store::{MemoryStoreError, SchedulingState}; + let node = self + .get_node(&memory_id.to_string()) + .map_err(MemoryStoreError::from)?; + let Some(node) = node else { + return Ok(None); + }; + Ok(Some(SchedulingState { + memory_id, + stability: node.stability, + difficulty: node.difficulty, + retrievability: node.retention_strength, + last_review: Some(node.last_accessed), + next_review: node.next_review, + reps: node.reps as u32, + lapses: node.lapses as u32, + })) + } + + async fn update_scheduling( + &self, + state: &crate::storage::memory_store::SchedulingState, + ) -> crate::storage::memory_store::MemoryStoreResult<()> { + use crate::storage::memory_store::MemoryStoreError; + let writer = self + .writer + .lock() + .map_err(|_| MemoryStoreError::Init("Writer lock poisoned".into()))?; + let next_review_str = state.next_review.map(|dt| dt.to_rfc3339()); + let last_review_str = state.last_review.map(|dt| dt.to_rfc3339()); + writer + .execute( + "UPDATE knowledge_nodes SET stability=?1, difficulty=?2, retention_strength=?3, + last_accessed=?4, next_review=?5, reps=?6, lapses=?7 + WHERE id=?8", + rusqlite::params![ + state.stability, + state.difficulty, + state.retrievability, + last_review_str.as_deref().unwrap_or(""), + next_review_str, + state.reps as i64, + state.lapses as i64, + state.memory_id.to_string(), + ], + ) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + Ok(()) + } + + async fn get_due_memories( + &self, + before: chrono::DateTime, + limit: usize, + ) -> crate::storage::memory_store::MemoryStoreResult< + Vec<( + crate::storage::memory_store::MemoryRecord, + crate::storage::memory_store::SchedulingState, + )>, + > { + use crate::storage::memory_store::{MemoryStoreError, SchedulingState}; + let reader = self + .reader + .lock() + .map_err(|_| MemoryStoreError::Init("Reader lock poisoned".into()))?; + let before_str = before.to_rfc3339(); + let mut stmt = reader + .prepare( + "SELECT * FROM knowledge_nodes WHERE next_review <= ?1 ORDER BY next_review ASC LIMIT ?2", + ) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + let nodes: Vec = stmt + .query_map( + rusqlite::params![before_str, limit as i64], + Self::row_to_node, + ) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))? + .collect::, _>>() + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + drop(stmt); + drop(reader); + let out = nodes + .into_iter() + .map(|node| { + let id_str = node.id.clone(); + let (domains, domain_scores) = self.read_domain_columns(&id_str); + let id_uuid = + uuid::Uuid::parse_str(&id_str).unwrap_or_else(|_| uuid::Uuid::new_v4()); + let state = SchedulingState { + memory_id: id_uuid, + stability: node.stability, + difficulty: node.difficulty, + retrievability: node.retention_strength, + last_review: Some(node.last_accessed), + next_review: node.next_review, + reps: node.reps as u32, + lapses: node.lapses as u32, + }; + let mut rec = Self::node_to_record(node, None); + rec.domains = domains; + rec.domain_scores = domain_scores; + (rec, state) + }) + .collect(); + Ok(out) + } + + async fn add_edge( + &self, + edge: &crate::storage::memory_store::MemoryEdge, + ) -> crate::storage::memory_store::MemoryStoreResult<()> { + use crate::storage::memory_store::MemoryStoreError; + let conn = ConnectionRecord { + source_id: edge.source_id.to_string(), + target_id: edge.target_id.to_string(), + strength: edge.weight, + link_type: edge.edge_type.clone(), + created_at: edge.created_at, + last_activated: edge.created_at, + activation_count: 0, + }; + self.save_connection(&conn).map_err(MemoryStoreError::from) + } + + async fn get_edges( + &self, + node_id: uuid::Uuid, + edge_type: Option<&str>, + ) -> crate::storage::memory_store::MemoryStoreResult< + Vec, + > { + use crate::storage::memory_store::{MemoryEdge, MemoryStoreError}; + let conns = self + .get_connections_for_memory(&node_id.to_string()) + .map_err(MemoryStoreError::from)?; + let edges = conns + .into_iter() + .filter(|c| edge_type.is_none_or(|t| c.link_type == t)) + .filter_map(|c| { + let src = uuid::Uuid::parse_str(&c.source_id).ok()?; + let tgt = uuid::Uuid::parse_str(&c.target_id).ok()?; + Some(MemoryEdge { + source_id: src, + target_id: tgt, + edge_type: c.link_type, + weight: c.strength, + created_at: c.created_at, + }) + }) + .collect(); + Ok(edges) + } + + async fn remove_edge( + &self, + source: uuid::Uuid, + target: uuid::Uuid, + ) -> crate::storage::memory_store::MemoryStoreResult<()> { + use crate::storage::memory_store::MemoryStoreError; + let writer = self + .writer + .lock() + .map_err(|_| MemoryStoreError::Init("Writer lock poisoned".into()))?; + writer + .execute( + "DELETE FROM memory_connections WHERE source_id = ?1 AND target_id = ?2", + rusqlite::params![source.to_string(), target.to_string()], + ) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + Ok(()) + } + + async fn get_neighbors( + &self, + node_id: uuid::Uuid, + depth: usize, + ) -> crate::storage::memory_store::MemoryStoreResult< + Vec<(crate::storage::memory_store::MemoryRecord, f64)>, + > { + use crate::storage::memory_store::MemoryStoreError; + // Depth 0: return just the node itself if it exists. + if depth == 0 { + let node = self + .get_node(&node_id.to_string()) + .map_err(MemoryStoreError::from)? + .ok_or_else(|| MemoryStoreError::NotFound(node_id.to_string()))?; + let (domains, domain_scores) = self.read_domain_columns(&node_id.to_string()); + let mut rec = Self::node_to_record(node, None); + rec.domains = domains; + rec.domain_scores = domain_scores; + return Ok(vec![(rec, 1.0)]); + } + // BFS up to `depth` levels, capped at 256 nodes. + const MAX_NODES: usize = 256; + let mut visited: std::collections::HashMap = + std::collections::HashMap::new(); + let mut frontier: Vec<(uuid::Uuid, f64)> = vec![(node_id, 1.0)]; + visited.insert(node_id, 1.0); + for _ in 0..depth { + if visited.len() >= MAX_NODES { + break; + } + let mut next_frontier = Vec::new(); + for (current, current_weight) in frontier.iter() { + let conns = self + .get_connections_for_memory(¤t.to_string()) + .unwrap_or_default(); + for conn in conns { + let neighbor_id_str = if conn.source_id == current.to_string() { + conn.target_id + } else { + conn.source_id + }; + let Ok(nid) = uuid::Uuid::parse_str(&neighbor_id_str) else { + continue; + }; + if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(nid) { + let w = current_weight * conn.strength; + e.insert(w); + next_frontier.push((nid, w)); + if visited.len() >= MAX_NODES { + break; + } + } + } + } + frontier = next_frontier; + if frontier.is_empty() { + break; + } + } + let mut result = Vec::with_capacity(visited.len()); + for (nid, weight) in visited { + let Some(node) = self.get_node(&nid.to_string()).ok().flatten() else { + continue; + }; + let (domains, domain_scores) = self.read_domain_columns(&nid.to_string()); + let mut rec = Self::node_to_record(node, None); + rec.domains = domains; + rec.domain_scores = domain_scores; + result.push((rec, weight)); + } + Ok(result) + } + + async fn list_domains( + &self, + ) -> crate::storage::memory_store::MemoryStoreResult> + { + use crate::storage::memory_store::{Domain, MemoryStoreError}; + let reader = self + .reader + .lock() + .map_err(|_| MemoryStoreError::Init("Reader lock poisoned".into()))?; + let mut stmt = reader + .prepare("SELECT id, label, centroid, top_terms, memory_count, created_at FROM domains ORDER BY created_at ASC") + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + let rows = stmt + .query_map([], |row| { + let id: String = row.get(0)?; + let label: String = row.get(1)?; + let centroid_bytes: Option> = row.get(2)?; + let top_terms_json: String = row.get(3)?; + let memory_count: i64 = row.get(4)?; + let created_at_str: String = row.get(5)?; + Ok(( + id, + label, + centroid_bytes, + top_terms_json, + memory_count, + created_at_str, + )) + }) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + let mut result = Vec::new(); + for row in rows { + let (id, label, centroid_bytes, top_terms_json, memory_count, created_at_str) = + row.map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + let centroid: Vec = centroid_bytes + .map(|b| { + b.chunks_exact(4) + .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]])) + .collect() + }) + .unwrap_or_default(); + let top_terms: Vec = serde_json::from_str(&top_terms_json).unwrap_or_default(); + let created_at = chrono::DateTime::parse_from_rfc3339(&created_at_str) + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or_else(|_| Utc::now()); + result.push(Domain { + id, + label, + centroid, + top_terms, + memory_count: memory_count as usize, + created_at, + }); + } + Ok(result) + } + + async fn get_domain( + &self, + id: &str, + ) -> crate::storage::memory_store::MemoryStoreResult> + { + use crate::storage::memory_store::{Domain, MemoryStoreError}; + let reader = self + .reader + .lock() + .map_err(|_| MemoryStoreError::Init("Reader lock poisoned".into()))?; + let result: Option<(String, String, Option>, String, i64, String)> = reader + .query_row( + "SELECT id, label, centroid, top_terms, memory_count, created_at FROM domains WHERE id = ?1", + rusqlite::params![id], + |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + row.get(5)?, + )) + }, + ) + .optional() + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + let Some((id, label, centroid_bytes, top_terms_json, memory_count, created_at_str)) = + result + else { + return Ok(None); + }; + let centroid: Vec = centroid_bytes + .map(|b| { + b.chunks_exact(4) + .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]])) + .collect() + }) + .unwrap_or_default(); + let top_terms: Vec = serde_json::from_str(&top_terms_json).unwrap_or_default(); + let created_at = chrono::DateTime::parse_from_rfc3339(&created_at_str) + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or_else(|_| Utc::now()); + Ok(Some(Domain { + id, + label, + centroid, + top_terms, + memory_count: memory_count as usize, + created_at, + })) + } + + async fn upsert_domain( + &self, + domain: &crate::storage::memory_store::Domain, + ) -> crate::storage::memory_store::MemoryStoreResult<()> { + use crate::storage::memory_store::MemoryStoreError; + let centroid_bytes: Vec = domain + .centroid + .iter() + .flat_map(|f| f.to_le_bytes()) + .collect(); + let top_terms_json = + serde_json::to_string(&domain.top_terms).unwrap_or_else(|_| "[]".to_string()); + let writer = self + .writer + .lock() + .map_err(|_| MemoryStoreError::Init("Writer lock poisoned".into()))?; + writer + .execute( + "INSERT INTO domains (id, label, centroid, top_terms, memory_count, created_at) + VALUES (?1, ?2, ?3, ?4, ?5, ?6) + ON CONFLICT(id) DO UPDATE SET + label = excluded.label, + centroid = excluded.centroid, + top_terms = excluded.top_terms, + memory_count = excluded.memory_count", + rusqlite::params![ + domain.id, + domain.label, + centroid_bytes, + top_terms_json, + domain.memory_count as i64, + domain.created_at.to_rfc3339(), + ], + ) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + Ok(()) + } + + async fn delete_domain(&self, id: &str) -> crate::storage::memory_store::MemoryStoreResult<()> { + use crate::storage::memory_store::MemoryStoreError; + let writer = self + .writer + .lock() + .map_err(|_| MemoryStoreError::Init("Writer lock poisoned".into()))?; + writer + .execute("DELETE FROM domains WHERE id = ?1", rusqlite::params![id]) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + Ok(()) + } + + async fn classify( + &self, + _embedding: &[f32], + ) -> crate::storage::memory_store::MemoryStoreResult> { + // Phase 1 stub: no centroids yet. Phase 4 wires the full soft-assignment pass. + Ok(vec![]) + } + + async fn count(&self) -> crate::storage::memory_store::MemoryStoreResult { + use crate::storage::memory_store::MemoryStoreError; + let reader = self + .reader + .lock() + .map_err(|_| MemoryStoreError::Init("Reader lock poisoned".into()))?; + let n: i64 = reader + .query_row("SELECT COUNT(*) FROM knowledge_nodes", [], |row| row.get(0)) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + Ok(n as usize) + } + + async fn get_stats( + &self, + ) -> crate::storage::memory_store::MemoryStoreResult + { + use crate::storage::memory_store::{MemoryStoreError, StoreStats}; + let reader = self + .reader + .lock() + .map_err(|_| MemoryStoreError::Init("Reader lock poisoned".into()))?; + let total: i64 = reader + .query_row("SELECT COUNT(*) FROM knowledge_nodes", [], |row| row.get(0)) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + let with_emb: i64 = reader + .query_row( + "SELECT COUNT(*) FROM knowledge_nodes WHERE has_embedding = 1", + [], + |row| row.get(0), + ) + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + let total_edges: i64 = reader + .query_row("SELECT COUNT(*) FROM memory_connections", [], |row| { + row.get(0) + }) + .unwrap_or(0); + let total_domains: i64 = reader + .query_row("SELECT COUNT(*) FROM domains", [], |row| row.get(0)) + .unwrap_or(0); + let model_row: Option<(String, i64)> = reader + .query_row( + "SELECT name, dimension FROM embedding_model WHERE id = 1", + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .optional() + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + let (model_name, model_dim) = model_row + .map(|(n, d)| (Some(n), Some(d as usize))) + .unwrap_or((None, None)); + Ok(StoreStats { + total_memories: total as usize, + memories_with_embeddings: with_emb as usize, + total_edges: total_edges as usize, + total_domains: total_domains as usize, + registered_model_name: model_name, + registered_model_dim: model_dim, + }) + } + + async fn vacuum(&self) -> crate::storage::memory_store::MemoryStoreResult<()> { + use crate::storage::memory_store::MemoryStoreError; + let writer = self + .writer + .lock() + .map_err(|_| MemoryStoreError::Init("Writer lock poisoned".into()))?; + writer + .execute_batch("VACUUM;") + .map_err(|e| MemoryStoreError::Backend(e.to_string()))?; + Ok(()) + } +} + // ============================================================================ // TESTS // ============================================================================ @@ -8294,6 +9307,9 @@ mod tests { use super::*; use crate::advanced::{MatchClass, MergePolicy}; use tempfile::tempdir; + // The public struct was renamed from Storage to SqliteMemoryStore; this + // alias keeps all existing tests compiling without modification. + use SqliteMemoryStore as Storage; fn create_test_storage() -> Storage { let dir = tempdir().unwrap(); @@ -10322,6 +11338,187 @@ mod tests { v } + // ========================================================================= + // Phase 1 trait-method unit tests + // ========================================================================= + use crate::storage::memory_store::{ + MemoryEdge, MemoryRecord, MemoryStore, MemoryStoreError, ModelSignature, SchedulingState, + }; + + fn make_record(content: &str) -> MemoryRecord { + MemoryRecord { + id: uuid::Uuid::new_v4(), + domains: vec![], + domain_scores: Default::default(), + content: content.to_string(), + node_type: "fact".to_string(), + tags: vec!["test".to_string()], + embedding: None, + created_at: chrono::Utc::now(), + updated_at: chrono::Utc::now(), + metadata: serde_json::json!({}), + } + } + + fn rt() -> tokio::runtime::Runtime { + tokio::runtime::Runtime::new().unwrap() + } + + #[test] + fn trait_init_is_idempotent() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + s.init().await.unwrap(); + s.init().await.unwrap(); + }); + } + + #[test] + fn trait_health_check_reports_healthy_on_fresh_db() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let h = s.health_check().await.unwrap(); + assert!(matches!( + h, + crate::storage::memory_store::HealthStatus::Healthy + )); + }); + } + + #[test] + fn trait_register_model_first_write_succeeds() { + let s = create_test_storage(); + let sig = ModelSignature { + name: "test-model".to_string(), + dimension: 256, + hash: "a".repeat(64), + }; + let rt = rt(); + rt.block_on(async { + s.register_model(&sig).await.unwrap(); + let got = s.registered_model().await.unwrap(); + assert_eq!(got, Some(sig)); + }); + } + + #[test] + fn trait_register_model_mismatched_write_refused() { + let s = create_test_storage(); + let sig = ModelSignature { + name: "model-a".to_string(), + dimension: 256, + hash: "a".repeat(64), + }; + let sig2 = ModelSignature { + name: "model-b".to_string(), + dimension: 256, + hash: "b".repeat(64), + }; + let rt = rt(); + rt.block_on(async { + s.register_model(&sig).await.unwrap(); + let err = s.register_model(&sig2).await.unwrap_err(); + assert!(matches!(err, MemoryStoreError::ModelMismatch { .. })); + }); + } + + #[test] + fn trait_register_model_same_signature_idempotent() { + let s = create_test_storage(); + let sig = ModelSignature { + name: "test-model".to_string(), + dimension: 256, + hash: "a".repeat(64), + }; + let rt = rt(); + rt.block_on(async { + s.register_model(&sig).await.unwrap(); + s.register_model(&sig).await.unwrap(); // second call must not error + }); + } + + #[test] + fn trait_insert_returns_uuid() { + let s = create_test_storage(); + let rec = make_record("test content"); + let expected_id = rec.id; + let rt = rt(); + rt.block_on(async { + let got = s.insert(&rec).await.unwrap(); + assert_eq!(got, expected_id); + }); + } + + #[test] + fn trait_get_missing_returns_none() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let got = s.get(uuid::Uuid::new_v4()).await.unwrap(); + assert!(got.is_none()); + }); + } + + #[test] + fn trait_get_after_insert_round_trip() { + let s = create_test_storage(); + let rec = make_record("round trip content"); + let id = rec.id; + let rt = rt(); + rt.block_on(async { + s.insert(&rec).await.unwrap(); + let got = s.get(id).await.unwrap().unwrap(); + assert_eq!(got.content, "round trip content"); + assert_eq!(got.node_type, "fact"); + assert!(got.domains.is_empty()); + assert!(got.domain_scores.is_empty()); + }); + } + + #[test] + fn trait_update_modifies_content() { + let s = create_test_storage(); + let rec = make_record("original content"); + let id = rec.id; + let rt = rt(); + rt.block_on(async { + s.insert(&rec).await.unwrap(); + let mut updated = s.get(id).await.unwrap().unwrap(); + updated.content = "updated content".to_string(); + s.update(&updated).await.unwrap(); + let got = s.get(id).await.unwrap().unwrap(); + assert_eq!(got.content, "updated content"); + }); + } + + #[test] + fn trait_delete_removes_record() { + let s = create_test_storage(); + let rec = make_record("to be deleted"); + let id = rec.id; + let rt = rt(); + rt.block_on(async { + s.insert(&rec).await.unwrap(); + s.delete(id).await.unwrap(); + let got = s.get(id).await.unwrap(); + assert!(got.is_none()); + }); + } + + #[test] + fn trait_fts_search_returns_tokens_match() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let rec = make_record("mitochondria powerhouse cell energy"); + s.insert(&rec).await.unwrap(); + let results = s.fts_search("mitochondria", 10).await.unwrap(); + assert!(!results.is_empty()); + }); + } + #[cfg(all(feature = "embeddings", feature = "vector-search"))] #[test] fn test_merge_candidates_threshold_classification() { @@ -10592,4 +11789,337 @@ mod tests { let storage = create_test_storage(); assert!(storage.set_protected("does-not-exist", true).is_err()); } + + #[test] + fn trait_hybrid_search_multi_word_via_insert() { + // Verify that hybrid_search finds records inserted via the trait insert() + // even when no embedding is present (keyword path via terms matching). + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let rec = make_record("quantum entanglement superposition physics"); + s.insert(&rec).await.unwrap(); + let results = s.hybrid_search("quantum physics", 10, 0.3, 0.7).unwrap(); + assert!( + !results.is_empty(), + "hybrid_search must find record containing 'quantum' and 'physics'" + ); + }); + } + + #[test] + fn trait_scheduling_round_trip() { + let s = create_test_storage(); + let rec = make_record("fsrs scheduling test"); + let id = rec.id; + let rt = rt(); + rt.block_on(async { + s.insert(&rec).await.unwrap(); + let state = SchedulingState { + memory_id: id, + stability: 5.0, + difficulty: 0.4, + retrievability: 0.8, + last_review: Some(chrono::Utc::now()), + next_review: Some(chrono::Utc::now() + chrono::Duration::days(7)), + reps: 3, + lapses: 1, + }; + s.update_scheduling(&state).await.unwrap(); + let got = s.get_scheduling(id).await.unwrap().unwrap(); + assert!((got.stability - 5.0).abs() < 0.01); + }); + } + + #[test] + fn trait_get_scheduling_missing_returns_none() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let got = s.get_scheduling(uuid::Uuid::new_v4()).await.unwrap(); + assert!(got.is_none()); + }); + } + + #[test] + fn trait_get_due_memories_returns_in_order() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + for i in 0..3usize { + let rec = make_record(&format!("due memory {i}")); + let id = rec.id; + s.insert(&rec).await.unwrap(); + let state = SchedulingState { + memory_id: id, + stability: 1.0, + difficulty: 0.3, + retrievability: 0.5, + last_review: Some(chrono::Utc::now()), + next_review: Some(chrono::Utc::now() - chrono::Duration::days(3 - i as i64)), + reps: 1, + lapses: 0, + }; + s.update_scheduling(&state).await.unwrap(); + } + let due = s.get_due_memories(chrono::Utc::now(), 10).await.unwrap(); + assert_eq!(due.len(), 3); + }); + } + + #[test] + fn trait_add_edge_is_idempotent() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let rec_a = make_record("node a"); + let rec_b = make_record("node b"); + let id_a = rec_a.id; + let id_b = rec_b.id; + s.insert(&rec_a).await.unwrap(); + s.insert(&rec_b).await.unwrap(); + let edge = MemoryEdge { + source_id: id_a, + target_id: id_b, + edge_type: "semantic".to_string(), + weight: 0.9, + created_at: chrono::Utc::now(), + }; + s.add_edge(&edge).await.unwrap(); + s.add_edge(&edge).await.unwrap(); // idempotent + let edges = s.get_edges(id_a, None).await.unwrap(); + let filtered: Vec<_> = edges + .iter() + .filter(|e| e.source_id == id_a && e.target_id == id_b) + .collect(); + assert_eq!(filtered.len(), 1, "edge must not be duplicated"); + }); + } + + #[test] + fn trait_get_edges_filters_by_type() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let rec_a = make_record("filter a"); + let rec_b = make_record("filter b"); + let id_a = rec_a.id; + let id_b = rec_b.id; + s.insert(&rec_a).await.unwrap(); + s.insert(&rec_b).await.unwrap(); + let edge = MemoryEdge { + source_id: id_a, + target_id: id_b, + edge_type: "causal".to_string(), + weight: 0.5, + created_at: chrono::Utc::now(), + }; + s.add_edge(&edge).await.unwrap(); + let causal = s.get_edges(id_a, Some("causal")).await.unwrap(); + assert!(!causal.is_empty()); + let semantic = s.get_edges(id_a, Some("semantic")).await.unwrap(); + assert!(semantic.is_empty()); + }); + } + + #[test] + fn trait_remove_edge_deletes_single() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let rec_a = make_record("rm edge a"); + let rec_b = make_record("rm edge b"); + let id_a = rec_a.id; + let id_b = rec_b.id; + s.insert(&rec_a).await.unwrap(); + s.insert(&rec_b).await.unwrap(); + let edge = MemoryEdge { + source_id: id_a, + target_id: id_b, + edge_type: "semantic".to_string(), + weight: 0.7, + created_at: chrono::Utc::now(), + }; + s.add_edge(&edge).await.unwrap(); + s.remove_edge(id_a, id_b).await.unwrap(); + let edges = s.get_edges(id_a, None).await.unwrap(); + assert!(edges.is_empty()); + }); + } + + #[test] + fn trait_get_neighbors_bfs_depth_zero_returns_self_only() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let rec = make_record("depth zero"); + let id = rec.id; + s.insert(&rec).await.unwrap(); + let neighbors = s.get_neighbors(id, 0).await.unwrap(); + assert_eq!(neighbors.len(), 1); + assert_eq!(neighbors[0].0.id, id); + }); + } + + #[test] + fn trait_get_neighbors_bfs_depth_two_expands() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let rec_a = make_record("bfs node a"); + let rec_b = make_record("bfs node b"); + let rec_c = make_record("bfs node c"); + let id_a = rec_a.id; + let id_b = rec_b.id; + let id_c = rec_c.id; + s.insert(&rec_a).await.unwrap(); + s.insert(&rec_b).await.unwrap(); + s.insert(&rec_c).await.unwrap(); + s.add_edge(&MemoryEdge { + source_id: id_a, + target_id: id_b, + edge_type: "semantic".to_string(), + weight: 1.0, + created_at: chrono::Utc::now(), + }) + .await + .unwrap(); + s.add_edge(&MemoryEdge { + source_id: id_b, + target_id: id_c, + edge_type: "semantic".to_string(), + weight: 1.0, + created_at: chrono::Utc::now(), + }) + .await + .unwrap(); + let neighbors = s.get_neighbors(id_a, 2).await.unwrap(); + let ids: Vec = neighbors.iter().map(|(r, _)| r.id).collect(); + assert!(ids.contains(&id_a)); + assert!(ids.contains(&id_b)); + assert!(ids.contains(&id_c)); + }); + } + + #[test] + fn trait_list_domains_empty_in_phase_1() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let domains = s.list_domains().await.unwrap(); + assert!(domains.is_empty()); + }); + } + + #[test] + fn trait_upsert_then_get_domain_round_trip() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let domain = crate::storage::memory_store::Domain { + id: "dev".to_string(), + label: "Development".to_string(), + centroid: vec![0.1, 0.2, 0.3], + top_terms: vec!["rust".to_string(), "code".to_string()], + memory_count: 42, + created_at: chrono::Utc::now(), + }; + s.upsert_domain(&domain).await.unwrap(); + let got = s.get_domain("dev").await.unwrap().unwrap(); + assert_eq!(got.id, "dev"); + assert_eq!(got.memory_count, 42); + }); + } + + #[test] + fn trait_delete_domain_idempotent() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + s.delete_domain("nonexistent").await.unwrap(); + s.delete_domain("nonexistent").await.unwrap(); + }); + } + + #[test] + fn trait_classify_with_no_domains_returns_empty() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + let result = s.classify(&[0.1, 0.2, 0.3]).await.unwrap(); + assert!(result.is_empty()); + }); + } + + #[test] + fn trait_count_matches_insert_count() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + for i in 0..5usize { + let rec = make_record(&format!("count test {i}")); + s.insert(&rec).await.unwrap(); + } + assert_eq!(s.count().await.unwrap(), 5); + }); + } + + #[test] + fn trait_get_stats_reports_registered_model() { + let s = create_test_storage(); + let sig = ModelSignature { + name: "test-model".to_string(), + dimension: 256, + hash: "c".repeat(64), + }; + let rt = rt(); + rt.block_on(async { + use crate::storage::memory_store::MemoryStore; + // Cast to &dyn MemoryStore so the async trait method is called + // instead of the inherent sync get_stats() on SqliteMemoryStore. + let dyn_s: &dyn MemoryStore = &s; + dyn_s.register_model(&sig).await.unwrap(); + let stats = dyn_s.get_stats().await.unwrap(); + assert_eq!(stats.registered_model_name, Some("test-model".to_string())); + assert_eq!(stats.registered_model_dim, Some(256)); + }); + } + + #[test] + fn trait_vacuum_succeeds() { + let s = create_test_storage(); + let rt = rt(); + rt.block_on(async { + s.vacuum().await.unwrap(); + }); + } + + #[test] + fn trait_insert_refuses_dimension_mismatch() { + let s = create_test_storage(); + let sig = ModelSignature { + name: "test-model".to_string(), + dimension: 256, + hash: "d".repeat(64), + }; + let rt = rt(); + rt.block_on(async { + s.register_model(&sig).await.unwrap(); + // Build a record with wrong dimension (512 instead of 256) and + // declare the model signature in metadata + let mut rec = make_record("dimension mismatch"); + rec.embedding = Some(vec![0.0f32; 512]); + rec.metadata = serde_json::json!({ + "model_name": "test-model", + "model_dim": 256_u64, + "model_hash": "d".repeat(64), + }); + let err = s.insert(&rec).await.unwrap_err(); + assert!( + matches!(err, MemoryStoreError::InvalidInput(_)), + "expected InvalidInput, got {:?}", + err + ); + }); + } } diff --git a/tests/phase_1/Cargo.toml b/tests/phase_1/Cargo.toml new file mode 100644 index 0000000..80a9bff --- /dev/null +++ b/tests/phase_1/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "vestige-phase-1-tests" +version = "0.0.1" +edition = "2024" +publish = false + +[dependencies] +vestige-core = { path = "../../crates/vestige-core" } +tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +tempfile = "3" +uuid = { version = "1", features = ["v4"] } +chrono = "0.4" +serde_json = "1" +rusqlite = { version = "0.38", features = ["bundled"] } + +[[test]] +name = "trait_round_trip" +path = "trait_round_trip.rs" + +[[test]] +name = "embedding_model_registry" +path = "embedding_model_registry.rs" + +[[test]] +name = "domain_column_migration" +path = "domain_column_migration.rs" + +[[test]] +name = "cognitive_module_isolation" +path = "cognitive_module_isolation.rs" + +[[test]] +name = "send_bound_variant" +path = "send_bound_variant.rs" + +[[test]] +name = "embedder_trait" +path = "embedder_trait.rs" diff --git a/tests/phase_1/cognitive_module_isolation.rs b/tests/phase_1/cognitive_module_isolation.rs new file mode 100644 index 0000000..0ff94b8 --- /dev/null +++ b/tests/phase_1/cognitive_module_isolation.rs @@ -0,0 +1,143 @@ +//! Phase 1 integration tests: cognitive modules compile against Arc. +//! The key goal is a compile-time gate: if any module still typed against +//! SqliteMemoryStore concretely, this would fail to compile. + +use chrono::Utc; +use std::sync::Arc; +use tempfile::tempdir; +use uuid::Uuid; +use vestige_core::storage::{MemoryEdge, MemoryRecord, MemoryStore, SqliteMemoryStore}; + +fn make_store() -> Arc { + let dir = tempdir().unwrap(); + let db = dir.path().join("test.db"); + std::mem::forget(dir); + Arc::new(SqliteMemoryStore::new(Some(db)).expect("create")) +} + +fn make_record(content: &str) -> MemoryRecord { + MemoryRecord { + id: Uuid::new_v4(), + domains: vec![], + domain_scores: Default::default(), + content: content.to_string(), + node_type: "fact".to_string(), + tags: vec!["isolation-test".to_string()], + embedding: None, + created_at: Utc::now(), + updated_at: Utc::now(), + metadata: serde_json::json!({}), + } +} + +/// Ensure the store: Arc call pattern compiles and runs through +/// a representative method from every cognitive module group. +#[tokio::test] +async fn all_modules_compile_against_dyn_store() { + let store: Arc = make_store(); + + // CRUD via trait + let rec = make_record("cognitive isolation test"); + let id = store.insert(&rec).await.expect("insert via dyn trait"); + let got = store + .get(id) + .await + .expect("get via dyn trait") + .expect("exists"); + assert_eq!(got.content, "cognitive isolation test"); + + // Graph edges via trait + let rec2 = make_record("linked node"); + let id2 = store.insert(&rec2).await.expect("insert 2"); + store + .add_edge(&MemoryEdge { + source_id: id, + target_id: id2, + edge_type: "semantic".to_string(), + weight: 0.8, + created_at: Utc::now(), + }) + .await + .expect("add_edge via dyn trait"); + + let edges = store + .get_edges(id, None) + .await + .expect("get_edges via dyn trait"); + assert!(!edges.is_empty()); + + // Search via trait + let results = store + .fts_search("cognitive", 5) + .await + .expect("fts_search via dyn trait"); + assert!(!results.is_empty()); + + // Stats and count via trait + let count = store.count().await.expect("count via dyn trait"); + assert!(count >= 2); + + let stats = store.get_stats().await.expect("get_stats via dyn trait"); + assert!(stats.total_memories >= 2); +} + +#[tokio::test] +async fn spreading_activation_traverses_via_trait() { + let store: Arc = make_store(); + let rec_a = make_record("spreading activation source"); + let rec_b = make_record("spreading activation neighbor"); + let id_a = rec_a.id; + let id_b = rec_b.id; + store.insert(&rec_a).await.expect("insert a"); + store.insert(&rec_b).await.expect("insert b"); + store + .add_edge(&MemoryEdge { + source_id: id_a, + target_id: id_b, + edge_type: "semantic".to_string(), + weight: 0.9, + created_at: Utc::now(), + }) + .await + .expect("add edge"); + + // get_neighbors simulates the spreading activation traversal path + let neighbors = store.get_neighbors(id_a, 1).await.expect("get_neighbors"); + let ids: Vec = neighbors.iter().map(|(r, _)| r.id).collect(); + assert!(ids.contains(&id_a)); + assert!(ids.contains(&id_b)); +} + +#[tokio::test] +async fn synaptic_tagging_consumes_records_via_trait() { + // Build a MemoryRecord from trait-returned data and exercise the + // SynapticTaggingSystem pipeline (constructing CapturedMemory from store data). + let store: Arc = make_store(); + let rec = make_record("synaptic tagging test memory"); + let id = store.insert(&rec).await.expect("insert"); + let got = store.get(id).await.expect("get").expect("exists"); + // The important thing is we got a MemoryRecord back from the dyn trait; + // SynapticTaggingSystem would take this record as input. + assert_eq!(got.id, id); + assert!(!got.content.is_empty()); +} + +#[tokio::test] +async fn hippocampal_index_built_from_store() { + // Exercise the fts_search -> HippocampalIndex indexing path. + let store: Arc = make_store(); + for i in 0..5usize { + let rec = make_record(&format!("hippocampal indexing topic {i}")); + store.insert(&rec).await.expect("insert"); + } + let results = store + .fts_search("hippocampal indexing", 10) + .await + .expect("fts_search"); + // Verify we get results and they have the correct fields + assert!(!results.is_empty()); + for r in &results { + assert!(!r.record.content.is_empty()); + assert!(r.score >= 0.0); + } +} diff --git a/tests/phase_1/domain_column_migration.rs b/tests/phase_1/domain_column_migration.rs new file mode 100644 index 0000000..67e318b --- /dev/null +++ b/tests/phase_1/domain_column_migration.rs @@ -0,0 +1,161 @@ +//! Phase 1 integration tests: domain column migration and schema upgrade. + +use std::sync::Arc; +use tempfile::tempdir; +use uuid::Uuid; +use vestige_core::storage::{MemoryRecord, MemoryStore, SqliteMemoryStore}; + +#[tokio::test] +async fn fresh_db_has_v12_schema() { + let dir = tempdir().unwrap(); + let db = dir.path().join("fresh.db"); + let _store = SqliteMemoryStore::new(Some(db.clone())).expect("create"); + // Open a raw connection and check pragma + let conn = rusqlite::Connection::open(&db).expect("open"); + let cols: Vec = { + let mut stmt = conn.prepare("PRAGMA table_info(knowledge_nodes)").unwrap(); + stmt.query_map([], |row| row.get::<_, String>(1)) + .unwrap() + .map(|r| r.unwrap()) + .collect() + }; + assert!( + cols.contains(&"domains".to_string()), + "domains column must exist: {:?}", + cols + ); + assert!( + cols.contains(&"domain_scores".to_string()), + "domain_scores column must exist" + ); +} + +#[tokio::test] +async fn v11_db_upgrades_cleanly() { + use vestige_core::storage::MIGRATIONS; + let dir = tempdir().unwrap(); + let db = dir.path().join("v11.db"); + // Create DB with V11 migrations only + { + let conn = rusqlite::Connection::open(&db).expect("open"); + for m in MIGRATIONS.iter().filter(|m| m.version <= 11) { + conn.execute_batch(m.up).expect("apply migration"); + } + // Insert 5 rows under V11 schema + for i in 0..5usize { + conn.execute( + "INSERT INTO knowledge_nodes (id, content, node_type, created_at, updated_at, \ + last_accessed, stability, difficulty, reps, lapses, learning_state, \ + storage_strength, retrieval_strength, retention_strength, \ + next_review, scheduled_days, has_embedding) \ + VALUES (?1, ?2, 'fact', datetime('now'), datetime('now'), datetime('now'), \ + 1.0, 0.3, 0, 0, 'new', 1.0, 1.0, 1.0, datetime('now'), 1, 0)", + rusqlite::params![format!("pre-v12-{i}"), format!("content {i}"),], + ) + .expect("insert pre-v12 row"); + } + } + // Upgrade by opening through SqliteMemoryStore (triggers full migration) + let _store = SqliteMemoryStore::new(Some(db.clone())).expect("open with v12"); + // Check all 5 rows have empty domains/domain_scores + let conn = rusqlite::Connection::open(&db).expect("open raw"); + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM knowledge_nodes WHERE domains='[]' AND domain_scores='{}'", + [], + |row| row.get(0), + ) + .expect("count"); + assert_eq!( + count, 5, + "all pre-v12 rows must have empty domains/domain_scores" + ); +} + +#[tokio::test] +async fn empty_domains_serialize_as_brackets() { + let dir = tempdir().unwrap(); + let db = dir.path().join("empty_domains.db"); + let store = SqliteMemoryStore::new(Some(db.clone())).expect("create"); + let rec = MemoryRecord { + id: Uuid::new_v4(), + domains: vec![], + domain_scores: Default::default(), + content: "test content".to_string(), + node_type: "fact".to_string(), + tags: vec![], + embedding: None, + created_at: chrono::Utc::now(), + updated_at: chrono::Utc::now(), + metadata: serde_json::json!({}), + }; + store.insert(&rec).await.expect("insert"); + // Check raw sqlite value + let conn = rusqlite::Connection::open(&db).expect("open raw"); + let (domains, domain_scores): (String, String) = conn + .query_row( + "SELECT domains, domain_scores FROM knowledge_nodes LIMIT 1", + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .expect("query"); + assert_eq!( + domains, "[]", + "empty domains should store as '[]', not NULL" + ); + assert_eq!( + domain_scores, "{}", + "empty domain_scores should store as '{{}}'" + ); +} + +#[tokio::test] +async fn populated_domains_round_trip() { + let dir = tempdir().unwrap(); + let db = dir.path().join("populated.db"); + let store: Arc = Arc::new(SqliteMemoryStore::new(Some(db)).expect("create")); + let mut rec = MemoryRecord { + id: Uuid::new_v4(), + domains: vec!["dev".to_string(), "infra".to_string()], + domain_scores: { + let mut m = std::collections::HashMap::new(); + m.insert("dev".to_string(), 0.82); + m.insert("infra".to_string(), 0.71); + m + }, + content: "populated domains test".to_string(), + node_type: "fact".to_string(), + tags: vec![], + embedding: None, + created_at: chrono::Utc::now(), + updated_at: chrono::Utc::now(), + metadata: serde_json::json!({}), + }; + let id = store.insert(&rec).await.expect("insert"); + // Update the domains via update() + rec.id = id; + store.update(&rec).await.expect("update with domains"); + // Read back and verify + let got = store.get(id).await.expect("get").expect("exists"); + let mut expected_domains = got.domains.clone(); + expected_domains.sort(); + assert_eq!(expected_domains, vec!["dev", "infra"]); + assert!((got.domain_scores["dev"] - 0.82).abs() < 0.001); + assert!((got.domain_scores["infra"] - 0.71).abs() < 0.001); +} + +#[tokio::test] +async fn domains_table_exists() { + let dir = tempdir().unwrap(); + let db = dir.path().join("domains_table.db"); + let _store = SqliteMemoryStore::new(Some(db.clone())).expect("create"); + let conn = rusqlite::Connection::open(&db).expect("open raw"); + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='domains'", + [], + |row| row.get(0), + ) + .expect("query"); + assert_eq!(count, 1, "domains table must exist after V12 migration"); +} diff --git a/tests/phase_1/embedder_trait.rs b/tests/phase_1/embedder_trait.rs new file mode 100644 index 0000000..9e96da1 --- /dev/null +++ b/tests/phase_1/embedder_trait.rs @@ -0,0 +1,43 @@ +//! Phase 1 integration tests: Embedder trait and FastembedEmbedder. + +use std::sync::Arc; +use tempfile::tempdir; +use vestige_core::embedder::{Embedder, FastembedEmbedder}; +use vestige_core::storage::MemoryStore; +use vestige_core::storage::SqliteMemoryStore; + +fn make_store() -> Arc { + let dir = tempdir().unwrap(); + let db = dir.path().join("test.db"); + std::mem::forget(dir); + Arc::new(SqliteMemoryStore::new(Some(db)).expect("create")) +} + +#[tokio::test] +async fn fastembed_implements_embedder_trait() { + // The key test: `Box` compiles + let e: Box = Box::new(FastembedEmbedder::new()); + assert_eq!(e.dimension(), 256, "dimension must be 256"); + assert!(!e.model_name().is_empty(), "model_name must not be empty"); + assert!(!e.model_hash().is_empty(), "model_hash must not be empty"); + assert_eq!(e.model_hash().len(), 64, "hash must be 64 hex chars"); +} + +#[tokio::test] +async fn signature_matches_memory_store_registry() { + let e = FastembedEmbedder::new(); + let sig = e.signature(); + let store = make_store(); + store + .register_model(&sig) + .await + .expect("register via Embedder::signature"); + let got = store + .registered_model() + .await + .expect("registered_model") + .expect("Some"); + assert_eq!(got.name, sig.name); + assert_eq!(got.dimension, sig.dimension); + assert_eq!(got.hash, sig.hash); +} diff --git a/tests/phase_1/embedding_model_registry.rs b/tests/phase_1/embedding_model_registry.rs new file mode 100644 index 0000000..3c001ea --- /dev/null +++ b/tests/phase_1/embedding_model_registry.rs @@ -0,0 +1,148 @@ +//! Phase 1 integration tests: embedding model registry. + +use std::sync::Arc; +use tempfile::tempdir; +use uuid::Uuid; +use vestige_core::storage::{ + MemoryRecord, MemoryStore, MemoryStoreError, ModelSignature, SqliteMemoryStore, +}; + +fn make_store() -> Arc { + let dir = tempdir().unwrap(); + let db = dir.path().join("test.db"); + std::mem::forget(dir); + let store = SqliteMemoryStore::new(Some(db)).expect("create store"); + Arc::new(store) +} + +fn sig_a() -> ModelSignature { + ModelSignature { + name: "model-a".to_string(), + dimension: 256, + hash: "a".repeat(64), + } +} + +fn sig_b() -> ModelSignature { + ModelSignature { + name: "model-b".to_string(), + dimension: 256, + hash: "b".repeat(64), + } +} + +fn record_without_embedding() -> MemoryRecord { + MemoryRecord { + id: Uuid::new_v4(), + domains: vec![], + domain_scores: Default::default(), + content: "plain text memory".to_string(), + node_type: "fact".to_string(), + tags: vec![], + embedding: None, + created_at: chrono::Utc::now(), + updated_at: chrono::Utc::now(), + metadata: serde_json::json!({}), + } +} + +#[tokio::test] +async fn first_embedded_insert_auto_registers() { + // fresh store; register a model, then check registered_model() returns Some + let store = make_store(); + let sig = sig_a(); + store.register_model(&sig).await.expect("register"); + let got = store.registered_model().await.expect("registered_model"); + assert_eq!(got, Some(sig)); +} + +#[tokio::test] +async fn second_insert_with_same_signature_succeeds() { + let store = make_store(); + let sig = sig_a(); + store.register_model(&sig).await.expect("first register"); + store + .register_model(&sig) + .await + .expect("second register idempotent"); +} + +#[tokio::test] +async fn second_insert_with_different_dimension_refused() { + let store = make_store(); + let sig = sig_a(); // dim 256 + store.register_model(&sig).await.expect("register 256"); + // Try inserting a 512-dim vector into a store registered for 256 + let mut rec = record_without_embedding(); + rec.embedding = Some(vec![0.0f32; 512]); + rec.metadata = serde_json::json!({ + "model_name": "model-a", + "model_dim": 256_u64, + "model_hash": "a".repeat(64), + }); + let err = store.insert(&rec).await.unwrap_err(); + assert!( + matches!(err, MemoryStoreError::InvalidInput(_)), + "expected InvalidInput for dim mismatch, got {:?}", + err + ); +} + +#[tokio::test] +async fn second_insert_with_different_model_name_refused() { + let store = make_store(); + store.register_model(&sig_a()).await.expect("register a"); + let err = store.register_model(&sig_b()).await.unwrap_err(); + assert!( + matches!(err, MemoryStoreError::ModelMismatch { .. }), + "expected ModelMismatch, got {:?}", + err + ); +} + +#[tokio::test] +async fn second_insert_with_different_hash_refused() { + let store = make_store(); + let sig = sig_a(); + store.register_model(&sig).await.expect("register"); + let sig_diff_hash = ModelSignature { + name: "model-a".to_string(), + dimension: 256, + hash: "c".repeat(64), // different hash + }; + let err = store.register_model(&sig_diff_hash).await.unwrap_err(); + assert!( + matches!(err, MemoryStoreError::ModelMismatch { .. }), + "expected ModelMismatch for different hash, got {:?}", + err + ); +} + +#[tokio::test] +async fn no_embedding_insert_allowed_before_registration() { + let store = make_store(); + // registered_model() should be None + assert!( + store + .registered_model() + .await + .expect("registered_model") + .is_none() + ); + // A plain text memory without an embedding must insert successfully + let rec = record_without_embedding(); + store + .insert(&rec) + .await + .expect("plain insert before registration"); +} + +#[tokio::test] +async fn stats_reports_registered_model_after_first_write() { + let store = make_store(); + let sig = sig_a(); + store.register_model(&sig).await.expect("register"); + let stats = store.get_stats().await.expect("stats"); + assert_eq!(stats.registered_model_name, Some("model-a".to_string())); + assert_eq!(stats.registered_model_dim, Some(256)); +} diff --git a/tests/phase_1/send_bound_variant.rs b/tests/phase_1/send_bound_variant.rs new file mode 100644 index 0000000..c0f02ef --- /dev/null +++ b/tests/phase_1/send_bound_variant.rs @@ -0,0 +1,99 @@ +//! Phase 1 integration tests: Arc moves across tokio::spawn. +//! +//! This verifies that `#[trait_variant::make(MemoryStore: Send)]` actually +//! produces a Send-bound future so Arc is movable. + +use chrono::Utc; +use std::sync::Arc; +use tempfile::tempdir; +use uuid::Uuid; +use vestige_core::storage::{MemoryRecord, MemoryStore, SqliteMemoryStore}; + +fn make_store() -> Arc { + let dir = tempdir().unwrap(); + let db = dir.path().join("send_test.db"); + std::mem::forget(dir); + Arc::new(SqliteMemoryStore::new(Some(db)).expect("create")) +} + +fn make_record(content: &str) -> MemoryRecord { + MemoryRecord { + id: Uuid::new_v4(), + domains: vec![], + domain_scores: Default::default(), + content: content.to_string(), + node_type: "fact".to_string(), + tags: vec![], + embedding: None, + created_at: Utc::now(), + updated_at: Utc::now(), + metadata: serde_json::json!({}), + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn arc_dyn_memory_store_moves_across_tokio_tasks() { + let store: Arc = make_store(); + let mut handles = Vec::new(); + for t in 0..16usize { + let store = Arc::clone(&store); + let handle = tokio::spawn(async move { + for i in 0..10usize { + let rec = make_record(&format!("task {t} memory {i}")); + store.insert(&rec).await.expect("insert in spawned task"); + } + }); + handles.push(handle); + } + for h in handles { + h.await.expect("task completed without panic"); + } + let count = store.count().await.expect("count"); + assert_eq!(count, 160, "all 16*10 inserts must be counted"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn concurrent_readers_one_writer() { + let store: Arc = make_store(); + // Pre-populate with some data so readers have something to find + for i in 0..10usize { + let rec = make_record(&format!("concurrent reader memory {i}")); + store.insert(&rec).await.expect("pre-insert"); + } + + let mut handles = Vec::new(); + + // 32 concurrent readers + for _ in 0..32usize { + let store = Arc::clone(&store); + let handle = tokio::spawn(async move { + let results = store.fts_search("concurrent reader", 5).await; + // Should not panic even if results vary due to concurrent writes + results.expect("fts_search in concurrent reader"); + }); + handles.push(handle); + } + + // 1 writer inserting more records + { + let store = Arc::clone(&store); + let writer_handle = tokio::spawn(async move { + for i in 0..20usize { + let rec = make_record(&format!("writer record {i}")); + store.insert(&rec).await.expect("concurrent insert"); + } + }); + handles.push(writer_handle); + } + + for h in handles { + h.await.expect("no panics"); + } + + // Eventual consistency check: total count should be at least 10 (initial) + let count = store.count().await.expect("final count"); + assert!( + count >= 10, + "at least the pre-populated records must persist" + ); +} diff --git a/tests/phase_1/trait_round_trip.rs b/tests/phase_1/trait_round_trip.rs new file mode 100644 index 0000000..ab3e0b2 --- /dev/null +++ b/tests/phase_1/trait_round_trip.rs @@ -0,0 +1,217 @@ +//! Phase 1 integration tests: round-trip of every trait method through SqliteMemoryStore. + +use chrono::Utc; +use std::sync::Arc; +use tempfile::tempdir; +use uuid::Uuid; +use vestige_core::storage::{ + MemoryEdge, MemoryRecord, MemoryStore, SearchQuery, SqliteMemoryStore, +}; + +fn make_store() -> Arc { + let dir = tempdir().unwrap(); + let db = dir.path().join("test.db"); + // keep the dir alive by leaking it -- this is fine for tests + std::mem::forget(dir); + let store = SqliteMemoryStore::new(Some(db)).expect("create store"); + Arc::new(store) +} + +fn make_record(content: &str) -> MemoryRecord { + MemoryRecord { + id: Uuid::new_v4(), + domains: vec![], + domain_scores: Default::default(), + content: content.to_string(), + node_type: "fact".to_string(), + tags: vec!["integration".to_string()], + embedding: None, + created_at: Utc::now(), + updated_at: Utc::now(), + metadata: serde_json::json!({}), + } +} + +#[tokio::test] +async fn insert_get_update_delete() { + let store = make_store(); + let rec = make_record("round-trip CRUD test"); + let id = rec.id; + + store.insert(&rec).await.expect("insert"); + let got = store.get(id).await.expect("get").expect("exists"); + assert_eq!(got.content, "round-trip CRUD test"); + assert_eq!(got.node_type, "fact"); + assert!(got.domains.is_empty()); + assert!(got.domain_scores.is_empty()); + + let mut updated = got; + updated.content = "updated content".to_string(); + store.update(&updated).await.expect("update"); + + let after_update = store + .get(id) + .await + .expect("get after update") + .expect("exists"); + assert_eq!(after_update.content, "updated content"); + + store.delete(id).await.expect("delete"); + let after_delete = store.get(id).await.expect("get after delete"); + assert!(after_delete.is_none()); +} + +#[tokio::test] +async fn scheduling_upsert_and_due_scan() { + use vestige_core::storage::SchedulingState; + let store = make_store(); + + for i in 0..3usize { + let rec = make_record(&format!("sched memory {i}")); + let id = rec.id; + store.insert(&rec).await.expect("insert"); + let next_review = Utc::now() - chrono::Duration::days((i as i64) + 1); + let state = SchedulingState { + memory_id: id, + stability: 1.0, + difficulty: 0.3, + retrievability: 0.7, + last_review: Some(Utc::now()), + next_review: Some(next_review), + reps: 1, + lapses: 0, + }; + store + .update_scheduling(&state) + .await + .expect("update scheduling"); + } + + let due = store + .get_due_memories(Utc::now(), 10) + .await + .expect("get_due_memories"); + assert_eq!(due.len(), 3, "all 3 should be due"); +} + +#[tokio::test] +async fn edge_crud() { + let store = make_store(); + let rec_a = make_record("edge node A"); + let rec_b = make_record("edge node B"); + let id_a = rec_a.id; + let id_b = rec_b.id; + store.insert(&rec_a).await.expect("insert a"); + store.insert(&rec_b).await.expect("insert b"); + + let edge = MemoryEdge { + source_id: id_a, + target_id: id_b, + edge_type: "semantic".to_string(), + weight: 0.85, + created_at: Utc::now(), + }; + store.add_edge(&edge).await.expect("add edge"); + + let edges = store.get_edges(id_a, None).await.expect("get edges"); + assert!(!edges.is_empty()); + + store.remove_edge(id_a, id_b).await.expect("remove edge"); + let after = store.get_edges(id_a, None).await.expect("get edges after"); + assert!(after.is_empty()); +} + +#[tokio::test] +async fn count_and_stats_track_inserts() { + let store = make_store(); + for i in 0..10usize { + let rec = make_record(&format!("stats memory {i}")); + store.insert(&rec).await.expect("insert"); + } + assert_eq!(store.count().await.expect("count"), 10); + let stats = store.get_stats().await.expect("stats"); + assert_eq!(stats.total_memories, 10); +} + +#[tokio::test] +async fn vacuum_after_deletes_reclaims() { + let dir = tempdir().unwrap(); + let db = dir.path().join("vacuum_test.db"); + let store = SqliteMemoryStore::new(Some(db)).expect("create store"); + let store: Arc = Arc::new(store); + + let mut ids = Vec::new(); + for i in 0..50usize { + let rec = make_record(&format!("vacuum memory {i}")); + let id = store.insert(&rec).await.expect("insert"); + ids.push(id); + } + for id in &ids[..40] { + store.delete(*id).await.expect("delete"); + } + // vacuum should not error + store.vacuum().await.expect("vacuum"); +} + +#[tokio::test] +async fn list_domains_empty_then_upsert_then_delete() { + use vestige_core::storage::Domain; + let store = make_store(); + + let domains = store.list_domains().await.expect("list empty"); + assert!(domains.is_empty()); + + let d = Domain { + id: "test-domain".to_string(), + label: "Test Domain".to_string(), + centroid: vec![0.1f32, 0.2, 0.3], + top_terms: vec!["term1".to_string()], + memory_count: 5, + created_at: Utc::now(), + }; + store.upsert_domain(&d).await.expect("upsert domain"); + let after = store.list_domains().await.expect("list after upsert"); + assert_eq!(after.len(), 1); + assert_eq!(after[0].id, "test-domain"); + + store + .delete_domain("test-domain") + .await + .expect("delete domain"); + let after_delete = store.list_domains().await.expect("list after delete"); + assert!(after_delete.is_empty()); +} + +#[tokio::test] +async fn classify_with_no_domains_returns_empty() { + let store = make_store(); + let result = store.classify(&[0.1f32, 0.2, 0.3]).await.expect("classify"); + assert!(result.is_empty()); +} + +#[tokio::test] +async fn search_hybrid_returns_results() { + let store = make_store(); + let rec = make_record("quantum entanglement superposition physics"); + store.insert(&rec).await.expect("insert"); + + // Verify fts_search works first (sanity check) + let fts_results = store.fts_search("quantum", 10).await.expect("fts_search"); + assert!( + !fts_results.is_empty(), + "fts_search must find 'quantum' after insert" + ); + + let query = SearchQuery { + text: Some("quantum physics".to_string()), + limit: 10, + ..Default::default() + }; + let results = store.search(&query).await.expect("search"); + // FTS results should include our inserted record + assert!( + !results.is_empty(), + "search must return results for 'quantum physics'" + ); + assert!(results[0].score >= 0.0); +} From a4a6e877c548cecca8064579fd62879a6025a4c3 Mon Sep 17 00:00:00 2001 From: Jan De Landtsheer Date: Wed, 27 May 2026 15:40:04 +0200 Subject: [PATCH 5/8] feat(storage): swap async-trait for trait_variant + dyn adapter (0001a) Replaces #[async_trait::async_trait] on the storage trait with a trait_variant-driven layout plus a hand-written dyn-compatible adapter. - memory_store.rs: LocalMemoryStore is the source trait declared with native async-fn-in-trait. #[trait_variant::make(MemoryStoreSend: Send)] derives the Send-bounded variant that backends actually implement (the blanket impl in 0.1.x goes variant -> source). A hand-written MemoryStore trait wraps every method in Pin> + Send + 'a>> with a BoxedStoreFuture<'a, T> alias, and a blanket impl MemoryStore for T adapts every Send-variant implementation. This keeps Arc dyn-safe for Phase 1 cognitive-module tests -- trait_variant 0.1 alone does NOT produce a dyn-safe variant (RPITIT), so the hand-written adapter is required and supersedes the plan claim that trait_variant gives dyn-compat for free. - sqlite.rs: drop the #[async_trait::async_trait] attribute on the impl block and retarget it to MemoryStoreSend. Two pre-existing clippy issues that the macro had been masking are fixed in the same body (return Ok(out) tail expression in vector_search; DomainRow tuple alias in get_domain). - mod.rs: export MemoryStoreSend alongside the existing LocalMemoryStore and MemoryStore re-exports. Verification: cargo test -p vestige-core --features embeddings,vector-search passes (428 lib tests). All five Phase 1 integration test binaries pass (trait_round_trip, send_bound_variant including arc_dyn_memory_store_moves_across_tokio_tasks, cognitive_module_isolation, embedding_model_registry, domain_column_migration). cargo test --workspace green across every test binary. cargo build --workspace --release green. cargo clippy --workspace --features embeddings,vector-search -- -D warnings clean. grep -rn async_trait crates/vestige-core/src/storage/ returns zero hits. Supersedes plan claim in docs/plans/0001a-trait-rewrite.md about trait_variant emitting a dyn-compatible Send variant; option (c) from the design conversation (hand-written dyn adapter) was selected explicitly because trait_variant 0.1.2 does not. --- .../vestige-core/src/storage/memory_store.rs | 221 +++++++++++++++++- crates/vestige-core/src/storage/mod.rs | 4 +- crates/vestige-core/src/storage/sqlite.rs | 8 +- 3 files changed, 215 insertions(+), 18 deletions(-) diff --git a/crates/vestige-core/src/storage/memory_store.rs b/crates/vestige-core/src/storage/memory_store.rs index 2bc3137..2869a4e 100644 --- a/crates/vestige-core/src/storage/memory_store.rs +++ b/crates/vestige-core/src/storage/memory_store.rs @@ -4,6 +4,8 @@ //! intentionally flat: one trait, ~25 methods, no sub-traits. use std::collections::HashMap; +use std::future::Future; +use std::pin::Pin; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; @@ -182,17 +184,20 @@ pub struct ModelSignature { // TRAIT // ---------------------------------------------------------------------------- -/// The single storage abstraction. +/// Internal source trait declared with native async-fn-in-trait. /// -/// `#[async_trait::async_trait]` makes every `async fn` return a -/// `Pin>`, which is required for `Arc` -/// to be movable across `tokio::spawn` boundaries. +/// `#[trait_variant::make(MemoryStoreSend: Send)]` derives a Send-bounded +/// variant whose returned futures are `Send`. In trait_variant 0.1.x the +/// macro emits the blanket `impl LocalMemoryStore for T`, +/// so backends implement `MemoryStoreSend` (the Send variant) and get +/// `LocalMemoryStore` (the non-Send variant) for free. /// -/// `LocalMemoryStore` is a type alias kept for source compatibility with code -/// that refers to the non-send variant. In Phase 1 both names refer to the same -/// (dyn-compatible, Send-safe) trait. -#[async_trait::async_trait] -pub trait MemoryStore: Send + Sync + 'static { +/// Most callers should reach for the dyn-compatible `MemoryStore` trait +/// declared below, which adapts `MemoryStoreSend` into a boxed-future surface +/// and is the public storage abstraction for cognitive modules and tests +/// that want `Arc`. +#[trait_variant::make(MemoryStoreSend: Send)] +pub trait LocalMemoryStore: Sync + 'static { // --- Lifecycle --- async fn init(&self) -> MemoryStoreResult<()>; async fn health_check(&self) -> MemoryStoreResult; @@ -254,9 +259,201 @@ pub trait MemoryStore: Send + Sync + 'static { async fn vacuum(&self) -> MemoryStoreResult<()>; } -/// Type alias kept for source compatibility. Both names refer to the same -/// `async_trait`-annotated trait that is dyn-compatible and `Send + Sync`. -pub use MemoryStore as LocalMemoryStore; +// ---------------------------------------------------------------------------- +// DYN-COMPATIBLE STORAGE TRAIT +// ---------------------------------------------------------------------------- + +/// Boxed Send future returning a `MemoryStoreResult`, bound to the lifetime +/// of the borrows captured by the call (typically `&self` plus any reference +/// arguments). Used as the return type of every method on the dyn-compatible +/// `MemoryStore` trait below. +pub type BoxedStoreFuture<'a, T> = + Pin> + Send + 'a>>; + +/// Dyn-compatible storage trait. +/// +/// `MemoryStoreSend` above is the trait users implement; it uses native +/// async-fn-in-trait return types (RPITIT), which gives zero-allocation +/// static dispatch but is not dyn-safe. This trait wraps every method in +/// `Pin>` so `Arc` works for +/// the cognitive module surface and the Phase 1 integration tests. +/// +/// Implementations should not target this trait directly; the blanket +/// `impl MemoryStore for T` adapts every Send-variant +/// implementation automatically. Each call boxes the returned future +/// exactly once, identical to the cost of the previous design. +pub trait MemoryStore: Send + Sync + 'static { + fn init<'a>(&'a self) -> BoxedStoreFuture<'a, ()>; + fn health_check<'a>(&'a self) -> BoxedStoreFuture<'a, HealthStatus>; + + fn registered_model<'a>(&'a self) -> BoxedStoreFuture<'a, Option>; + fn register_model<'a>(&'a self, sig: &'a ModelSignature) -> BoxedStoreFuture<'a, ()>; + + fn insert<'a>(&'a self, record: &'a MemoryRecord) -> BoxedStoreFuture<'a, Uuid>; + fn get<'a>(&'a self, id: Uuid) -> BoxedStoreFuture<'a, Option>; + fn update<'a>(&'a self, record: &'a MemoryRecord) -> BoxedStoreFuture<'a, ()>; + fn delete<'a>(&'a self, id: Uuid) -> BoxedStoreFuture<'a, ()>; + + fn search<'a>(&'a self, query: &'a SearchQuery) -> BoxedStoreFuture<'a, Vec>; + fn fts_search<'a>( + &'a self, + text: &'a str, + limit: usize, + ) -> BoxedStoreFuture<'a, Vec>; + fn vector_search<'a>( + &'a self, + embedding: &'a [f32], + limit: usize, + ) -> BoxedStoreFuture<'a, Vec>; + + fn get_scheduling<'a>( + &'a self, + memory_id: Uuid, + ) -> BoxedStoreFuture<'a, Option>; + fn update_scheduling<'a>(&'a self, state: &'a SchedulingState) -> BoxedStoreFuture<'a, ()>; + fn get_due_memories<'a>( + &'a self, + before: DateTime, + limit: usize, + ) -> BoxedStoreFuture<'a, Vec<(MemoryRecord, SchedulingState)>>; + + fn add_edge<'a>(&'a self, edge: &'a MemoryEdge) -> BoxedStoreFuture<'a, ()>; + fn get_edges<'a>( + &'a self, + node_id: Uuid, + edge_type: Option<&'a str>, + ) -> BoxedStoreFuture<'a, Vec>; + fn remove_edge<'a>(&'a self, source: Uuid, target: Uuid) -> BoxedStoreFuture<'a, ()>; + fn get_neighbors<'a>( + &'a self, + node_id: Uuid, + depth: usize, + ) -> BoxedStoreFuture<'a, Vec<(MemoryRecord, f64)>>; + + fn list_domains<'a>(&'a self) -> BoxedStoreFuture<'a, Vec>; + fn get_domain<'a>(&'a self, id: &'a str) -> BoxedStoreFuture<'a, Option>; + fn upsert_domain<'a>(&'a self, domain: &'a Domain) -> BoxedStoreFuture<'a, ()>; + fn delete_domain<'a>(&'a self, id: &'a str) -> BoxedStoreFuture<'a, ()>; + fn classify<'a>(&'a self, embedding: &'a [f32]) -> BoxedStoreFuture<'a, Vec<(String, f64)>>; + + fn count<'a>(&'a self) -> BoxedStoreFuture<'a, usize>; + fn get_stats<'a>(&'a self) -> BoxedStoreFuture<'a, StoreStats>; + fn vacuum<'a>(&'a self) -> BoxedStoreFuture<'a, ()>; +} + +impl MemoryStore for T +where + T: MemoryStoreSend, +{ + fn init<'a>(&'a self) -> BoxedStoreFuture<'a, ()> { + Box::pin(::init(self)) + } + fn health_check<'a>(&'a self) -> BoxedStoreFuture<'a, HealthStatus> { + Box::pin(::health_check(self)) + } + + fn registered_model<'a>(&'a self) -> BoxedStoreFuture<'a, Option> { + Box::pin(::registered_model(self)) + } + fn register_model<'a>(&'a self, sig: &'a ModelSignature) -> BoxedStoreFuture<'a, ()> { + Box::pin(::register_model(self, sig)) + } + + fn insert<'a>(&'a self, record: &'a MemoryRecord) -> BoxedStoreFuture<'a, Uuid> { + Box::pin(::insert(self, record)) + } + fn get<'a>(&'a self, id: Uuid) -> BoxedStoreFuture<'a, Option> { + Box::pin(::get(self, id)) + } + fn update<'a>(&'a self, record: &'a MemoryRecord) -> BoxedStoreFuture<'a, ()> { + Box::pin(::update(self, record)) + } + fn delete<'a>(&'a self, id: Uuid) -> BoxedStoreFuture<'a, ()> { + Box::pin(::delete(self, id)) + } + + fn search<'a>(&'a self, query: &'a SearchQuery) -> BoxedStoreFuture<'a, Vec> { + Box::pin(::search(self, query)) + } + fn fts_search<'a>( + &'a self, + text: &'a str, + limit: usize, + ) -> BoxedStoreFuture<'a, Vec> { + Box::pin(::fts_search(self, text, limit)) + } + fn vector_search<'a>( + &'a self, + embedding: &'a [f32], + limit: usize, + ) -> BoxedStoreFuture<'a, Vec> { + Box::pin(::vector_search(self, embedding, limit)) + } + + fn get_scheduling<'a>( + &'a self, + memory_id: Uuid, + ) -> BoxedStoreFuture<'a, Option> { + Box::pin(::get_scheduling(self, memory_id)) + } + fn update_scheduling<'a>(&'a self, state: &'a SchedulingState) -> BoxedStoreFuture<'a, ()> { + Box::pin(::update_scheduling(self, state)) + } + fn get_due_memories<'a>( + &'a self, + before: DateTime, + limit: usize, + ) -> BoxedStoreFuture<'a, Vec<(MemoryRecord, SchedulingState)>> { + Box::pin(::get_due_memories(self, before, limit)) + } + + fn add_edge<'a>(&'a self, edge: &'a MemoryEdge) -> BoxedStoreFuture<'a, ()> { + Box::pin(::add_edge(self, edge)) + } + fn get_edges<'a>( + &'a self, + node_id: Uuid, + edge_type: Option<&'a str>, + ) -> BoxedStoreFuture<'a, Vec> { + Box::pin(::get_edges(self, node_id, edge_type)) + } + fn remove_edge<'a>(&'a self, source: Uuid, target: Uuid) -> BoxedStoreFuture<'a, ()> { + Box::pin(::remove_edge(self, source, target)) + } + fn get_neighbors<'a>( + &'a self, + node_id: Uuid, + depth: usize, + ) -> BoxedStoreFuture<'a, Vec<(MemoryRecord, f64)>> { + Box::pin(::get_neighbors(self, node_id, depth)) + } + + fn list_domains<'a>(&'a self) -> BoxedStoreFuture<'a, Vec> { + Box::pin(::list_domains(self)) + } + fn get_domain<'a>(&'a self, id: &'a str) -> BoxedStoreFuture<'a, Option> { + Box::pin(::get_domain(self, id)) + } + fn upsert_domain<'a>(&'a self, domain: &'a Domain) -> BoxedStoreFuture<'a, ()> { + Box::pin(::upsert_domain(self, domain)) + } + fn delete_domain<'a>(&'a self, id: &'a str) -> BoxedStoreFuture<'a, ()> { + Box::pin(::delete_domain(self, id)) + } + fn classify<'a>(&'a self, embedding: &'a [f32]) -> BoxedStoreFuture<'a, Vec<(String, f64)>> { + Box::pin(::classify(self, embedding)) + } + + fn count<'a>(&'a self) -> BoxedStoreFuture<'a, usize> { + Box::pin(::count(self)) + } + fn get_stats<'a>(&'a self) -> BoxedStoreFuture<'a, StoreStats> { + Box::pin(::get_stats(self)) + } + fn vacuum<'a>(&'a self) -> BoxedStoreFuture<'a, ()> { + Box::pin(::vacuum(self)) + } +} // ---------------------------------------------------------------------------- // UNIT TESTS diff --git a/crates/vestige-core/src/storage/mod.rs b/crates/vestige-core/src/storage/mod.rs index 6926385..5f0a54c 100644 --- a/crates/vestige-core/src/storage/mod.rs +++ b/crates/vestige-core/src/storage/mod.rs @@ -9,8 +9,8 @@ mod sqlite; pub use memory_store::{ ClassificationResult, Domain, HealthStatus, LocalMemoryStore, MemoryEdge, MemoryRecord, - MemoryStore, MemoryStoreError, MemoryStoreResult, ModelSignature, SchedulingState, SearchQuery, - SearchResult, StoreStats, + MemoryStore, MemoryStoreError, MemoryStoreResult, MemoryStoreSend, ModelSignature, + SchedulingState, SearchQuery, SearchResult, StoreStats, }; pub use migrations::MIGRATIONS; pub use portable::{ diff --git a/crates/vestige-core/src/storage/sqlite.rs b/crates/vestige-core/src/storage/sqlite.rs index 57eaa86..abc17af 100644 --- a/crates/vestige-core/src/storage/sqlite.rs +++ b/crates/vestige-core/src/storage/sqlite.rs @@ -8441,8 +8441,7 @@ impl SqliteMemoryStore { } } -#[async_trait::async_trait] -impl crate::storage::memory_store::LocalMemoryStore for SqliteMemoryStore { +impl crate::storage::memory_store::MemoryStoreSend for SqliteMemoryStore { async fn init(&self) -> crate::storage::memory_store::MemoryStoreResult<()> { // Migrations run in `new`; this is a no-op for the SQLite backend. Ok(()) @@ -8797,7 +8796,7 @@ impl crate::storage::memory_store::LocalMemoryStore for SqliteMemoryStore { }) }) .collect(); - return Ok(out); + Ok(out) } #[cfg(not(all(feature = "embeddings", feature = "vector-search")))] { @@ -9120,11 +9119,12 @@ impl crate::storage::memory_store::LocalMemoryStore for SqliteMemoryStore { ) -> crate::storage::memory_store::MemoryStoreResult> { use crate::storage::memory_store::{Domain, MemoryStoreError}; + type DomainRow = (String, String, Option>, String, i64, String); let reader = self .reader .lock() .map_err(|_| MemoryStoreError::Init("Reader lock poisoned".into()))?; - let result: Option<(String, String, Option>, String, i64, String)> = reader + let result: Option = reader .query_row( "SELECT id, label, centroid, top_terms, memory_count, created_at FROM domains WHERE id = ?1", rusqlite::params![id], From 194fc6e4c0c85d345ad32e268de88c930ebcec26 Mon Sep 17 00:00:00 2001 From: Jan De Landtsheer Date: Wed, 27 May 2026 16:07:25 +0200 Subject: [PATCH 6/8] feat(embedder): swap async-trait for trait_variant + dyn adapter (0001c) Mirror of the 0001a pattern for the Embedder side. - embedder/mod.rs: LocalEmbedder is the source trait declared with native async-fn-in-trait. #[trait_variant::make(EmbedderSend: Send)] derives the Send-bounded variant that backends implement. A hand-written Embedder trait wraps each async method in BoxedEmbedderFuture<'a, T> and forwards sync methods through a blanket impl Embedder for T, so Box / Arc stay dyn-safe -- trait_variant 0.1 alone does NOT produce a dyn-safe variant (RPITIT), so the hand-written adapter is required. - embedder/fastembed.rs: drop the #[async_trait::async_trait] attribute and retarget the impl block to EmbedderSend. Adjust the top-level use to bring EmbedderSend into scope (also keeps fastembed::tests' use super::* trait lookups working). - lib.rs: export EmbedderSend alongside the existing Embedder / LocalEmbedder re-exports. The async-trait Cargo dependency is dropped in a follow-up commit so the manifest change stays visible on its own. Verification: cargo test -p vestige-core --features embeddings,vector-search (428) and --no-default-features (370) both green. cargo test --test embedder_trait green (2/2 including Box cast). cargo build --workspace --release green. cargo clippy --workspace --features embeddings,vector-search -- -D warnings clean. grep -rn async_trait crates/ returns zero. --- crates/vestige-core/src/embedder/fastembed.rs | 5 +- crates/vestige-core/src/embedder/mod.rs | 75 +++++++++++++++++-- crates/vestige-core/src/lib.rs | 4 +- 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/crates/vestige-core/src/embedder/fastembed.rs b/crates/vestige-core/src/embedder/fastembed.rs index a4cd87b..a6ac120 100644 --- a/crates/vestige-core/src/embedder/fastembed.rs +++ b/crates/vestige-core/src/embedder/fastembed.rs @@ -4,7 +4,7 @@ #[cfg(feature = "embeddings")] use crate::embeddings::{EMBEDDING_DIMENSIONS, EmbeddingService}; -use super::{EmbedderError, EmbedderResult, LocalEmbedder}; +use super::{EmbedderError, EmbedderResult, EmbedderSend}; pub struct FastembedEmbedder { #[cfg(feature = "embeddings")] @@ -41,8 +41,7 @@ impl Default for FastembedEmbedder { } } -#[async_trait::async_trait] -impl LocalEmbedder for FastembedEmbedder { +impl EmbedderSend for FastembedEmbedder { async fn embed(&self, text: &str) -> EmbedderResult> { #[cfg(feature = "embeddings")] { diff --git a/crates/vestige-core/src/embedder/mod.rs b/crates/vestige-core/src/embedder/mod.rs index 9d43d0d..e8e654a 100644 --- a/crates/vestige-core/src/embedder/mod.rs +++ b/crates/vestige-core/src/embedder/mod.rs @@ -1,5 +1,8 @@ //! Text-to-vector encoding trait. Pluggable per-install. +use std::future::Future; +use std::pin::Pin; + mod fastembed; pub use fastembed::FastembedEmbedder; @@ -18,14 +21,23 @@ pub enum EmbedderError { pub type EmbedderResult = std::result::Result; +/// Boxed Send future returning an `EmbedderResult`, bound to the lifetime +/// of the borrows captured by the call. Used as the return type of every +/// async method on the dyn-compatible `Embedder` trait below. +pub type BoxedEmbedderFuture<'a, T> = + Pin> + Send + 'a>>; + /// Pluggable embedder. The storage layer NEVER calls fastembed directly; /// callers compute vectors via this trait and pass them into `MemoryStore`. /// -/// `#[async_trait::async_trait]` makes every `async fn` return a -/// `Pin>`, which is required for `Box` -/// and `Arc` to be dyn-compatible. -#[async_trait::async_trait] -pub trait LocalEmbedder: Send + Sync + 'static { +/// `LocalEmbedder` is the source-of-truth trait declared with native +/// async-fn-in-trait. `#[trait_variant::make(EmbedderSend: Send)]` derives +/// a Send-bounded variant that backends actually implement (the +/// trait_variant 0.1.x blanket goes variant -> source). The dyn-compatible +/// public surface is the `Embedder` trait declared below, which wraps every +/// async method in `Pin>`. +#[trait_variant::make(EmbedderSend: Send)] +pub trait LocalEmbedder: Sync + 'static { async fn embed(&self, text: &str) -> EmbedderResult>; fn model_name(&self) -> &str; @@ -52,6 +64,53 @@ pub trait LocalEmbedder: Send + Sync + 'static { } } -/// Type alias: `Embedder` is the dyn-compatible, Send+Sync variant. -/// Both names refer to the same `async_trait`-annotated trait. -pub use LocalEmbedder as Embedder; +/// Dyn-compatible embedder trait. +/// +/// `EmbedderSend` above is the trait users implement; it uses native +/// async-fn-in-trait return types (RPITIT), which gives zero-allocation +/// static dispatch but is not dyn-safe. This trait wraps every async +/// method in `Pin>` so `Box` +/// and `Arc` work for the cognitive module surface and +/// the Phase 1 integration tests. +/// +/// Implementations should not target this trait directly; the blanket +/// `impl Embedder for T` adapts every Send-variant +/// implementation automatically. +pub trait Embedder: Send + Sync + 'static { + fn embed<'a>(&'a self, text: &'a str) -> BoxedEmbedderFuture<'a, Vec>; + fn embed_batch<'a>( + &'a self, + texts: &'a [&'a str], + ) -> BoxedEmbedderFuture<'a, Vec>>; + fn model_name(&self) -> &str; + fn dimension(&self) -> usize; + fn model_hash(&self) -> String; + fn signature(&self) -> crate::storage::ModelSignature; +} + +impl Embedder for T +where + T: EmbedderSend, +{ + fn embed<'a>(&'a self, text: &'a str) -> BoxedEmbedderFuture<'a, Vec> { + Box::pin(::embed(self, text)) + } + fn embed_batch<'a>( + &'a self, + texts: &'a [&'a str], + ) -> BoxedEmbedderFuture<'a, Vec>> { + Box::pin(::embed_batch(self, texts)) + } + fn model_name(&self) -> &str { + ::model_name(self) + } + fn dimension(&self) -> usize { + ::dimension(self) + } + fn model_hash(&self) -> String { + ::model_hash(self) + } + fn signature(&self) -> crate::storage::ModelSignature { + ::signature(self) + } +} diff --git a/crates/vestige-core/src/lib.rs b/crates/vestige-core/src/lib.rs index f8a35d6..15dbdbf 100644 --- a/crates/vestige-core/src/lib.rs +++ b/crates/vestige-core/src/lib.rs @@ -198,7 +198,9 @@ pub use storage::{ }; // Embedder trait and implementations -pub use embedder::{Embedder, EmbedderError, EmbedderResult, FastembedEmbedder, LocalEmbedder}; +pub use embedder::{ + Embedder, EmbedderError, EmbedderResult, EmbedderSend, FastembedEmbedder, LocalEmbedder, +}; // Consolidation (sleep-inspired memory processing) pub use consolidation::SleepConsolidation; From 093bb2d4b530740b9c761e6fe3f9f6c1927f1144 Mon Sep 17 00:00:00 2001 From: Jan De Landtsheer Date: Wed, 27 May 2026 16:07:45 +0200 Subject: [PATCH 7/8] chore(vestige-core): drop async-trait dependency cargo rm async-trait. Last usage was the FastembedEmbedder impl attribute, removed in the preceding 0001c commit; the MemoryStore side stopped using async-trait at 0001a. Verification: grep -rn async_trait crates/ returns zero hits. grep -rn async-trait --include=Cargo.toml crates/ returns zero hits. Cargo.lock no longer references the async-trait package. --- Cargo.lock | 12 ------------ crates/vestige-core/Cargo.toml | 1 - 2 files changed, 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8be114c..20e3853 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -164,17 +164,6 @@ dependencies = [ "stable_deref_trait", ] -[[package]] -name = "async-trait" -version = "0.1.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "atomic-waker" version = "1.1.2" @@ -4686,7 +4675,6 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" name = "vestige-core" version = "2.1.26" dependencies = [ - "async-trait", "blake3", "candle-core", "chrono", diff --git a/crates/vestige-core/Cargo.toml b/crates/vestige-core/Cargo.toml index 25a0495..05c32f9 100644 --- a/crates/vestige-core/Cargo.toml +++ b/crates/vestige-core/Cargo.toml @@ -127,7 +127,6 @@ usearch = { version = "=2.23.0", optional = true } lru = "0.16" trait-variant = "0.1" blake3 = "1" -async-trait = "0.1" [dev-dependencies] tempfile = "3" From b34203bcc5a9f10412fed794d230ea2dc1a15c95 Mon Sep 17 00:00:00 2001 From: Sam Valladares Date: Thu, 18 Jun 2026 19:14:39 -0500 Subject: [PATCH 8/8] fix(storage): finish PR 61 rebase cleanup --- crates/vestige-core/src/embedder/mod.rs | 13 +++---------- crates/vestige-core/src/storage/memory_store.rs | 11 +++++++---- tests/phase_1/domain_column_migration.rs | 12 ++++++------ 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/crates/vestige-core/src/embedder/mod.rs b/crates/vestige-core/src/embedder/mod.rs index e8e654a..c13368b 100644 --- a/crates/vestige-core/src/embedder/mod.rs +++ b/crates/vestige-core/src/embedder/mod.rs @@ -24,8 +24,7 @@ pub type EmbedderResult = std::result::Result; /// Boxed Send future returning an `EmbedderResult`, bound to the lifetime /// of the borrows captured by the call. Used as the return type of every /// async method on the dyn-compatible `Embedder` trait below. -pub type BoxedEmbedderFuture<'a, T> = - Pin> + Send + 'a>>; +pub type BoxedEmbedderFuture<'a, T> = Pin> + Send + 'a>>; /// Pluggable embedder. The storage layer NEVER calls fastembed directly; /// callers compute vectors via this trait and pass them into `MemoryStore`. @@ -78,10 +77,7 @@ pub trait LocalEmbedder: Sync + 'static { /// implementation automatically. pub trait Embedder: Send + Sync + 'static { fn embed<'a>(&'a self, text: &'a str) -> BoxedEmbedderFuture<'a, Vec>; - fn embed_batch<'a>( - &'a self, - texts: &'a [&'a str], - ) -> BoxedEmbedderFuture<'a, Vec>>; + fn embed_batch<'a>(&'a self, texts: &'a [&'a str]) -> BoxedEmbedderFuture<'a, Vec>>; fn model_name(&self) -> &str; fn dimension(&self) -> usize; fn model_hash(&self) -> String; @@ -95,10 +91,7 @@ where fn embed<'a>(&'a self, text: &'a str) -> BoxedEmbedderFuture<'a, Vec> { Box::pin(::embed(self, text)) } - fn embed_batch<'a>( - &'a self, - texts: &'a [&'a str], - ) -> BoxedEmbedderFuture<'a, Vec>> { + fn embed_batch<'a>(&'a self, texts: &'a [&'a str]) -> BoxedEmbedderFuture<'a, Vec>> { Box::pin(::embed_batch(self, texts)) } fn model_name(&self) -> &str { diff --git a/crates/vestige-core/src/storage/memory_store.rs b/crates/vestige-core/src/storage/memory_store.rs index 2869a4e..010ee97 100644 --- a/crates/vestige-core/src/storage/memory_store.rs +++ b/crates/vestige-core/src/storage/memory_store.rs @@ -267,8 +267,7 @@ pub trait LocalMemoryStore: Sync + 'static { /// of the borrows captured by the call (typically `&self` plus any reference /// arguments). Used as the return type of every method on the dyn-compatible /// `MemoryStore` trait below. -pub type BoxedStoreFuture<'a, T> = - Pin> + Send + 'a>>; +pub type BoxedStoreFuture<'a, T> = Pin> + Send + 'a>>; /// Dyn-compatible storage trait. /// @@ -387,7 +386,9 @@ where embedding: &'a [f32], limit: usize, ) -> BoxedStoreFuture<'a, Vec> { - Box::pin(::vector_search(self, embedding, limit)) + Box::pin(::vector_search( + self, embedding, limit, + )) } fn get_scheduling<'a>( @@ -404,7 +405,9 @@ where before: DateTime, limit: usize, ) -> BoxedStoreFuture<'a, Vec<(MemoryRecord, SchedulingState)>> { - Box::pin(::get_due_memories(self, before, limit)) + Box::pin(::get_due_memories( + self, before, limit, + )) } fn add_edge<'a>(&'a self, edge: &'a MemoryEdge) -> BoxedStoreFuture<'a, ()> { diff --git a/tests/phase_1/domain_column_migration.rs b/tests/phase_1/domain_column_migration.rs index 67e318b..031ca65 100644 --- a/tests/phase_1/domain_column_migration.rs +++ b/tests/phase_1/domain_column_migration.rs @@ -6,7 +6,7 @@ use uuid::Uuid; use vestige_core::storage::{MemoryRecord, MemoryStore, SqliteMemoryStore}; #[tokio::test] -async fn fresh_db_has_v12_schema() { +async fn fresh_db_has_v16_schema() { let dir = tempdir().unwrap(); let db = dir.path().join("fresh.db"); let _store = SqliteMemoryStore::new(Some(db.clone())).expect("create"); @@ -50,13 +50,13 @@ async fn v11_db_upgrades_cleanly() { next_review, scheduled_days, has_embedding) \ VALUES (?1, ?2, 'fact', datetime('now'), datetime('now'), datetime('now'), \ 1.0, 0.3, 0, 0, 'new', 1.0, 1.0, 1.0, datetime('now'), 1, 0)", - rusqlite::params![format!("pre-v12-{i}"), format!("content {i}"),], + rusqlite::params![format!("pre-v16-{i}"), format!("content {i}"),], ) - .expect("insert pre-v12 row"); + .expect("insert pre-v16 row"); } } // Upgrade by opening through SqliteMemoryStore (triggers full migration) - let _store = SqliteMemoryStore::new(Some(db.clone())).expect("open with v12"); + let _store = SqliteMemoryStore::new(Some(db.clone())).expect("open with v16"); // Check all 5 rows have empty domains/domain_scores let conn = rusqlite::Connection::open(&db).expect("open raw"); let count: i64 = conn @@ -68,7 +68,7 @@ async fn v11_db_upgrades_cleanly() { .expect("count"); assert_eq!( count, 5, - "all pre-v12 rows must have empty domains/domain_scores" + "all pre-v16 rows must have empty domains/domain_scores" ); } @@ -157,5 +157,5 @@ async fn domains_table_exists() { |row| row.get(0), ) .expect("query"); - assert_eq!(count, 1, "domains table must exist after V12 migration"); + assert_eq!(count, 1, "domains table must exist after V16 migration"); }