Merge remote-tracking branch 'origin/main' into codex/opencode-sigill-salvage

This commit is contained in:
Sam Valladares 2026-06-18 19:59:25 -05:00
commit ea5ed28081
26 changed files with 6997 additions and 91 deletions

103
Cargo.lock generated
View file

@ -143,6 +143,12 @@ dependencies = [
"syn",
]
[[package]]
name = "arrayref"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
[[package]]
name = "arrayvec"
version = "0.7.6"
@ -311,6 +317,20 @@ dependencies = [
"core2",
]
[[package]]
name = "blake3"
version = "1.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
"cpufeatures 0.3.0",
]
[[package]]
name = "block"
version = "0.1.6"
@ -642,6 +662,12 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "constant_time_eq"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
[[package]]
name = "core-foundation"
version = "0.9.4"
@ -697,6 +723,15 @@ dependencies = [
"libc",
]
[[package]]
name = "cpufeatures"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
dependencies = [
"libc",
]
[[package]]
name = "crc32fast"
version = "1.5.0"
@ -2282,12 +2317,10 @@ dependencies = [
[[package]]
name = "js-sys"
version = "0.3.95"
version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca"
checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
dependencies = [
"cfg-if",
"futures-util",
"once_cell",
"wasm-bindgen",
]
@ -3181,9 +3214,9 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
[[package]]
name = "portable-atomic-util"
version = "0.2.6"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3"
checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618"
dependencies = [
"portable-atomic",
]
@ -3822,7 +3855,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
"cfg-if",
"cpufeatures",
"cpufeatures 0.2.17",
"digest",
]
@ -3833,7 +3866,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
"cfg-if",
"cpufeatures",
"cpufeatures 0.2.17",
"digest",
]
@ -4356,6 +4389,17 @@ dependencies = [
"tracing-serde",
]
[[package]]
name = "trait-variant"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70977707304198400eb4835a78f6a9f928bf41bba420deb8fdb175cd965d77a7"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "try-lock"
version = "0.2.5"
@ -4631,6 +4675,7 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
name = "vestige-core"
version = "2.1.26"
dependencies = [
"blake3",
"candle-core",
"chrono",
"criterion",
@ -4646,6 +4691,7 @@ dependencies = [
"thiserror 2.0.18",
"tokio",
"tracing",
"trait-variant",
"usearch",
"uuid",
]
@ -4692,6 +4738,19 @@ dependencies = [
"vestige-core",
]
[[package]]
name = "vestige-phase-1-tests"
version = "0.0.1"
dependencies = [
"chrono",
"rusqlite",
"serde_json",
"tempfile",
"tokio",
"uuid",
"vestige-core",
]
[[package]]
name = "walkdir"
version = "2.5.0"
@ -4737,9 +4796,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen"
version = "0.2.118"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89"
checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
dependencies = [
"cfg-if",
"once_cell",
@ -4750,19 +4809,23 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.68"
version = "0.4.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8"
checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
dependencies = [
"cfg-if",
"futures-util",
"js-sys",
"once_cell",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.118"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed"
checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@ -4770,9 +4833,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.118"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904"
checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
dependencies = [
"bumpalo",
"proc-macro2",
@ -4783,9 +4846,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.118"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129"
checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
dependencies = [
"unicode-ident",
]
@ -4839,9 +4902,9 @@ dependencies = [
[[package]]
name = "web-sys"
version = "0.3.95"
version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d"
checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
dependencies = [
"js-sys",
"wasm-bindgen",

View file

@ -4,6 +4,7 @@ members = [
"crates/vestige-core",
"crates/vestige-mcp",
"tests/e2e",
"tests/phase_1",
]
exclude = [
"fastembed-rs",

View file

@ -248,7 +248,7 @@ This isn't a key-value store with an embedding model bolted on. Vestige implemen
---
## 🛠 25 MCP Tools
## 🛠 MCP Tools
### Context Packets
| Tool | What It Does |
@ -276,6 +276,7 @@ This isn't a key-value store with an embedding model bolted on. Vestige implemen
|------|-------------|
| `memory_health` | Retention dashboard — distribution, trends, recommendations |
| `memory_graph` | Knowledge graph export — force-directed layout, up to 200 nodes |
| `composed_graph` | Composition ledger — recent composed memory sets, neighbors, outcome labels, bounty/research lanes, and never-composed frontier candidates |
### Scoring & Dedup
| Tool | What It Does |

View file

@ -129,6 +129,8 @@ usearch = { version = "=2.23.0", default-features = false, optional = true }
# LRU cache for query embeddings
lru = "0.16"
trait-variant = "0.1"
blake3 = "1"
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,181 @@
//! `FastembedEmbedder` -- adapts the existing `EmbeddingService` to the
//! `LocalEmbedder` trait.
#[cfg(feature = "embeddings")]
use crate::embeddings::{EMBEDDING_DIMENSIONS, EmbeddingService};
use super::{EmbedderError, EmbedderResult, EmbedderSend};
pub struct FastembedEmbedder {
#[cfg(feature = "embeddings")]
inner: EmbeddingService,
cached_hash: std::sync::OnceLock<String>,
}
impl FastembedEmbedder {
pub fn new() -> Self {
Self {
#[cfg(feature = "embeddings")]
inner: EmbeddingService::new(),
cached_hash: std::sync::OnceLock::new(),
}
}
fn compute_hash(name: &str, dim: usize) -> String {
let mut hasher = blake3::Hasher::new();
hasher.update(name.as_bytes());
hasher.update(&(dim as u64).to_le_bytes());
// fastembed's ONNX bytes are not directly accessible at runtime; we
// use `(name, dim, vestige-core CARGO_PKG_VERSION)` as the
// signature. If fastembed ever changes its output deterministically
// between minor versions, bumping the crate version triggers a
// mismatch -- which is exactly the drift we want to detect.
hasher.update(env!("CARGO_PKG_VERSION").as_bytes());
hasher.finalize().to_hex().to_string()
}
}
impl Default for FastembedEmbedder {
fn default() -> Self {
Self::new()
}
}
impl EmbedderSend for FastembedEmbedder {
async fn embed(&self, text: &str) -> EmbedderResult<Vec<f32>> {
#[cfg(feature = "embeddings")]
{
let emb = self
.inner
.embed(text)
.map_err(|e| EmbedderError::EmbedFailed(e.to_string()))?;
Ok(emb.vector)
}
#[cfg(not(feature = "embeddings"))]
{
let _ = text;
Err(EmbedderError::Init(
"embeddings feature not enabled".to_string(),
))
}
}
fn model_name(&self) -> &str {
#[cfg(feature = "embeddings")]
{
self.inner.model_name()
}
#[cfg(not(feature = "embeddings"))]
{
"nomic-ai/nomic-embed-text-v1.5"
}
}
fn dimension(&self) -> usize {
#[cfg(feature = "embeddings")]
{
EMBEDDING_DIMENSIONS
}
#[cfg(not(feature = "embeddings"))]
{
256
}
}
fn model_hash(&self) -> String {
self.cached_hash
.get_or_init(|| Self::compute_hash(self.model_name(), self.dimension()))
.clone()
}
async fn embed_batch(&self, texts: &[&str]) -> EmbedderResult<Vec<Vec<f32>>> {
#[cfg(feature = "embeddings")]
{
let embs = self
.inner
.embed_batch(texts)
.map_err(|e| EmbedderError::EmbedFailed(e.to_string()))?;
Ok(embs.into_iter().map(|e| e.vector).collect())
}
#[cfg(not(feature = "embeddings"))]
{
let _ = texts;
Err(EmbedderError::Init(
"embeddings feature not enabled".to_string(),
))
}
}
}
// ============================================================================
// UNIT TESTS
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn embedder_reports_correct_name() {
let e = FastembedEmbedder::new();
assert!(
e.model_name().contains("nomic"),
"model name should contain 'nomic'"
);
}
#[test]
fn embedder_reports_256_dimension() {
let e = FastembedEmbedder::new();
assert_eq!(e.dimension(), 256);
}
#[test]
fn embedder_hash_is_stable() {
let e = FastembedEmbedder::new();
let h1 = e.model_hash();
let h2 = e.model_hash();
assert_eq!(h1, h2, "model_hash must be stable across calls");
}
#[test]
fn embedder_hash_includes_crate_version() {
// Compute what the hash should be given the known inputs
let name = FastembedEmbedder::new().model_name().to_string();
let dim = FastembedEmbedder::new().dimension();
let expected = FastembedEmbedder::compute_hash(&name, dim);
let got = FastembedEmbedder::new().model_hash();
assert_eq!(got, expected);
}
#[test]
fn embedder_signature_matches_accessors() {
let e = FastembedEmbedder::new();
let sig = e.signature();
assert_eq!(sig.name, e.model_name());
assert_eq!(sig.dimension, e.dimension());
assert_eq!(sig.hash, e.model_hash());
}
#[cfg(feature = "embeddings")]
#[test]
fn embedder_embed_smoke() {
let e = FastembedEmbedder::new();
let rt = tokio::runtime::Runtime::new().unwrap();
let vec = rt.block_on(e.embed("hello world")).expect("embed");
assert_eq!(vec.len(), 256);
}
#[cfg(feature = "embeddings")]
#[test]
fn embedder_embed_batch_matches_sequential() {
let e = FastembedEmbedder::new();
let rt = tokio::runtime::Runtime::new().unwrap();
let texts = ["alpha beta", "gamma delta"];
let batch = rt.block_on(e.embed_batch(texts.as_ref())).expect("batch");
let seq_a = rt.block_on(e.embed(texts[0])).expect("seq a");
let seq_b = rt.block_on(e.embed(texts[1])).expect("seq b");
assert_eq!(batch[0], seq_a);
assert_eq!(batch[1], seq_b);
}
}

View file

@ -0,0 +1,109 @@
//! Text-to-vector encoding trait. Pluggable per-install.
use std::future::Future;
use std::pin::Pin;
mod fastembed;
pub use fastembed::FastembedEmbedder;
/// Error returned by every `Embedder` method.
#[non_exhaustive]
#[derive(Debug, thiserror::Error)]
pub enum EmbedderError {
#[error("embedder initialization failed: {0}")]
Init(String),
#[error("embedding generation failed: {0}")]
EmbedFailed(String),
#[error("invalid input: {0}")]
InvalidInput(String),
}
pub type EmbedderResult<T> = std::result::Result<T, EmbedderError>;
/// Boxed Send future returning an `EmbedderResult<T>`, bound to the lifetime
/// of the borrows captured by the call. Used as the return type of every
/// async method on the dyn-compatible `Embedder` trait below.
pub type BoxedEmbedderFuture<'a, T> = Pin<Box<dyn Future<Output = EmbedderResult<T>> + Send + 'a>>;
/// Pluggable embedder. The storage layer NEVER calls fastembed directly;
/// callers compute vectors via this trait and pass them into `MemoryStore`.
///
/// `LocalEmbedder` is the source-of-truth trait declared with native
/// async-fn-in-trait. `#[trait_variant::make(EmbedderSend: Send)]` derives
/// a Send-bounded variant that backends actually implement (the
/// trait_variant 0.1.x blanket goes variant -> source). The dyn-compatible
/// public surface is the `Embedder` trait declared below, which wraps every
/// async method in `Pin<Box<dyn Future + Send + '_>>`.
#[trait_variant::make(EmbedderSend: Send)]
pub trait LocalEmbedder: Sync + 'static {
async fn embed(&self, text: &str) -> EmbedderResult<Vec<f32>>;
fn model_name(&self) -> &str;
fn dimension(&self) -> usize;
/// Stable blake3 hash of (model_name || dimension || vestige-core crate version).
/// Lowercase hex, 64 chars.
///
/// Used by `MemoryStore::register_model` to detect silent model drift
/// (e.g. a fastembed minor upgrade that changes vector output).
fn model_hash(&self) -> String;
async fn embed_batch(&self, texts: &[&str]) -> EmbedderResult<Vec<Vec<f32>>>;
/// Returns the `ModelSignature` describing this embedder. Convenience
/// wrapper over the three accessors above.
fn signature(&self) -> crate::storage::ModelSignature {
crate::storage::ModelSignature {
name: self.model_name().to_string(),
dimension: self.dimension(),
hash: self.model_hash(),
}
}
}
/// Dyn-compatible embedder trait.
///
/// `EmbedderSend` above is the trait users implement; it uses native
/// async-fn-in-trait return types (RPITIT), which gives zero-allocation
/// static dispatch but is not dyn-safe. This trait wraps every async
/// method in `Pin<Box<dyn Future + Send + '_>>` so `Box<dyn Embedder>`
/// and `Arc<dyn Embedder>` work for the cognitive module surface and
/// the Phase 1 integration tests.
///
/// Implementations should not target this trait directly; the blanket
/// `impl<T: EmbedderSend> Embedder for T` adapts every Send-variant
/// implementation automatically.
pub trait Embedder: Send + Sync + 'static {
fn embed<'a>(&'a self, text: &'a str) -> BoxedEmbedderFuture<'a, Vec<f32>>;
fn embed_batch<'a>(&'a self, texts: &'a [&'a str]) -> BoxedEmbedderFuture<'a, Vec<Vec<f32>>>;
fn model_name(&self) -> &str;
fn dimension(&self) -> usize;
fn model_hash(&self) -> String;
fn signature(&self) -> crate::storage::ModelSignature;
}
impl<T> Embedder for T
where
T: EmbedderSend,
{
fn embed<'a>(&'a self, text: &'a str) -> BoxedEmbedderFuture<'a, Vec<f32>> {
Box::pin(<T as EmbedderSend>::embed(self, text))
}
fn embed_batch<'a>(&'a self, texts: &'a [&'a str]) -> BoxedEmbedderFuture<'a, Vec<Vec<f32>>> {
Box::pin(<T as EmbedderSend>::embed_batch(self, texts))
}
fn model_name(&self) -> &str {
<T as EmbedderSend>::model_name(self)
}
fn dimension(&self) -> usize {
<T as EmbedderSend>::dimension(self)
}
fn model_hash(&self) -> String {
<T as EmbedderSend>::model_hash(self)
}
fn signature(&self) -> crate::storage::ModelSignature {
<T as EmbedderSend>::signature(self)
}
}

View file

@ -83,6 +83,7 @@
/// Optional `vestige.toml` configuration (Phase 2: Configurable Output).
pub mod config;
pub mod consolidation;
pub mod embedder;
pub mod fsrs;
pub mod fts;
pub mod memory;
@ -155,13 +156,50 @@ pub use fsrs::{
};
// Configuration (vestige.toml output profiles / defaults)
pub use config::{OutputConfig, OutputDefaults, OutputProfile, VestigeConfig, CONFIG_FILE};
pub use config::{CONFIG_FILE, OutputConfig, OutputDefaults, OutputProfile, VestigeConfig};
// Storage layer
pub use storage::{
ConnectionRecord, ConsolidationHistoryRecord, DreamHistoryRecord, InsightRecord,
IntentionRecord, PORTABLE_ARCHIVE_FORMAT, PortableArchive, PortableImportMode,
PortableImportReport, Result, SmartIngestResult, StateTransitionRecord, Storage, StorageError,
ClassificationResult,
CompositionEventRecord,
CompositionMemberRecord,
CompositionNeighborRecord,
CompositionOutcomeRecord,
ConnectionRecord,
ConsolidationHistoryRecord,
Domain,
DreamHistoryRecord,
HealthStatus,
InsightRecord,
IntentionRecord,
LocalMemoryStore,
MemoryEdge,
MemoryRecord,
MemoryStore,
MemoryStoreError,
MemoryStoreResult,
ModelSignature,
NeverComposedCandidate,
PORTABLE_ARCHIVE_FORMAT,
PortableArchive,
PortableImportMode,
PortableImportReport,
Result,
SchedulingState,
SearchQuery,
SmartIngestResult,
SqliteMemoryStore,
StateTransitionRecord,
Storage,
StorageError,
StoreStats,
// Note: storage::SearchResult is intentionally not re-exported here to avoid
// collision with memory::SearchResult. Use vestige_core::storage::SearchResult directly.
};
// Embedder trait and implementations
pub use embedder::{
Embedder, EmbedderError, EmbedderResult, EmbedderSend, FastembedEmbedder, LocalEmbedder,
};
// Consolidation (sleep-inspired memory processing)
@ -220,6 +258,9 @@ pub use advanced::{
LabileState,
Language,
MaintenanceType,
// Merge / Supersede controls (Phase 3)
MatchClass,
MatchSignals,
// Memory chains
MemoryChainBuilder,
// Memory compression
@ -230,18 +271,15 @@ pub use advanced::{
MemoryPath,
MemoryReplay,
MemorySnapshot,
// Merge / Supersede controls (Phase 3)
MatchClass,
MatchSignals,
MergeCandidate,
MergeOperation,
MergePlan,
MergePolicy,
MergeStrategy,
Modification,
PlanKind,
Pattern,
PatternType,
PlanKind,
PredictedMemory,
PredictionContext,
PredictionErrorConfig,

View file

@ -0,0 +1,516 @@
//! Backend-agnostic memory store trait.
//!
//! This is the single abstraction every cognitive module sits above. It is
//! intentionally flat: one trait, ~25 methods, no sub-traits.
use std::collections::HashMap;
use std::future::Future;
use std::pin::Pin;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
// ----------------------------------------------------------------------------
// ERROR
// ----------------------------------------------------------------------------
/// Error returned by every `LocalMemoryStore` / `MemoryStore` method.
#[non_exhaustive]
#[derive(Debug, thiserror::Error)]
pub enum MemoryStoreError {
#[error("not found: {0}")]
NotFound(String),
#[error("backend error: {0}")]
Backend(String),
#[error(
"embedding model mismatch: store registered {registered_name} (dim {registered_dim}, \
hash {registered_hash}), embedder is {actual_name} (dim {actual_dim}, hash {actual_hash})"
)]
ModelMismatch {
registered_name: String,
registered_dim: usize,
registered_hash: String,
actual_name: String,
actual_dim: usize,
actual_hash: String,
},
#[error("invalid input: {0}")]
InvalidInput(String),
#[error("initialization error: {0}")]
Init(String),
}
impl From<crate::storage::StorageError> for MemoryStoreError {
fn from(e: crate::storage::StorageError) -> Self {
use crate::storage::StorageError as S;
match e {
S::NotFound(s) => MemoryStoreError::NotFound(s),
S::Database(e) => MemoryStoreError::Backend(e.to_string()),
S::Io(e) => MemoryStoreError::Backend(e.to_string()),
S::InvalidTimestamp(s) => MemoryStoreError::Backend(format!("invalid timestamp: {s}")),
S::Init(s) => MemoryStoreError::Init(s),
}
}
}
pub type MemoryStoreResult<T> = std::result::Result<T, MemoryStoreError>;
// ----------------------------------------------------------------------------
// DATA TYPES
// ----------------------------------------------------------------------------
/// Backend-agnostic memory record.
///
/// Phase 1 intentionally keeps this type independent of `KnowledgeNode` to
/// avoid dragging 30+ legacy fields through the trait surface. The SQLite
/// backend converts between `MemoryRecord` and `KnowledgeNode` at the
/// boundary.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryRecord {
pub id: Uuid,
/// Empty = unclassified. Populated in Phase 4.
pub domains: Vec<String>,
/// Raw similarity per domain centroid. Empty until Phase 4 runs clustering.
pub domain_scores: HashMap<String, f64>,
pub content: String,
pub node_type: String,
pub tags: Vec<String>,
pub embedding: Option<Vec<f32>>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub metadata: serde_json::Value,
}
/// FSRS-6 scheduling state, one row per memory.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SchedulingState {
pub memory_id: Uuid,
pub stability: f64,
pub difficulty: f64,
pub retrievability: f64,
pub last_review: Option<DateTime<Utc>>,
pub next_review: Option<DateTime<Utc>>,
pub reps: u32,
pub lapses: u32,
}
/// Hybrid search request.
#[derive(Debug, Clone, Default)]
pub struct SearchQuery {
pub domains: Option<Vec<String>>,
pub text: Option<String>,
pub embedding: Option<Vec<f32>>,
pub tags: Option<Vec<String>>,
pub node_types: Option<Vec<String>>,
pub limit: usize,
pub min_retrievability: Option<f64>,
}
#[derive(Debug, Clone)]
pub struct SearchResult {
pub record: MemoryRecord,
pub score: f64,
pub fts_score: Option<f64>,
pub vector_score: Option<f64>,
}
/// Edge in the spreading-activation graph.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryEdge {
pub source_id: Uuid,
pub target_id: Uuid,
pub edge_type: String,
pub weight: f64,
pub created_at: DateTime<Utc>,
}
/// A topical domain (populated in Phase 4). Phase 1 only needs the type to
/// shape the trait surface; discover/classify are Phase 4 work.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Domain {
pub id: String,
pub label: String,
pub centroid: Vec<f32>,
pub top_terms: Vec<String>,
pub memory_count: usize,
pub created_at: DateTime<Utc>,
}
/// Result of classifying one vector against all known domains.
#[derive(Debug, Clone)]
pub struct ClassificationResult {
pub scores: HashMap<String, f64>,
pub domains: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct StoreStats {
pub total_memories: usize,
pub memories_with_embeddings: usize,
pub total_edges: usize,
pub total_domains: usize,
pub registered_model_name: Option<String>,
pub registered_model_dim: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum HealthStatus {
Healthy,
Degraded { reason: String },
Unavailable { reason: String },
}
// ----------------------------------------------------------------------------
// EMBEDDING MODEL SIGNATURE
// ----------------------------------------------------------------------------
/// Snapshot of the embedding model that was used to write vectors into the
/// store. Persisted in the `embedding_model` table; compared on every write
/// before the vector is accepted.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ModelSignature {
pub name: String,
pub dimension: usize,
/// Lowercase hex-encoded blake3 hash, 64 chars.
pub hash: String,
}
// ----------------------------------------------------------------------------
// TRAIT
// ----------------------------------------------------------------------------
/// Internal source trait declared with native async-fn-in-trait.
///
/// `#[trait_variant::make(MemoryStoreSend: Send)]` derives a Send-bounded
/// variant whose returned futures are `Send`. In trait_variant 0.1.x the
/// macro emits the blanket `impl<T: MemoryStoreSend> LocalMemoryStore for T`,
/// so backends implement `MemoryStoreSend` (the Send variant) and get
/// `LocalMemoryStore` (the non-Send variant) for free.
///
/// Most callers should reach for the dyn-compatible `MemoryStore` trait
/// declared below, which adapts `MemoryStoreSend` into a boxed-future surface
/// and is the public storage abstraction for cognitive modules and tests
/// that want `Arc<dyn MemoryStore>`.
#[trait_variant::make(MemoryStoreSend: Send)]
pub trait LocalMemoryStore: Sync + 'static {
// --- Lifecycle ---
async fn init(&self) -> MemoryStoreResult<()>;
async fn health_check(&self) -> MemoryStoreResult<HealthStatus>;
// --- Embedding model registry ---
async fn registered_model(&self) -> MemoryStoreResult<Option<ModelSignature>>;
async fn register_model(&self, sig: &ModelSignature) -> MemoryStoreResult<()>;
// --- CRUD ---
async fn insert(&self, record: &MemoryRecord) -> MemoryStoreResult<Uuid>;
async fn get(&self, id: Uuid) -> MemoryStoreResult<Option<MemoryRecord>>;
async fn update(&self, record: &MemoryRecord) -> MemoryStoreResult<()>;
async fn delete(&self, id: Uuid) -> MemoryStoreResult<()>;
// --- Search ---
async fn search(&self, query: &SearchQuery) -> MemoryStoreResult<Vec<SearchResult>>;
async fn fts_search(&self, text: &str, limit: usize) -> MemoryStoreResult<Vec<SearchResult>>;
async fn vector_search(
&self,
embedding: &[f32],
limit: usize,
) -> MemoryStoreResult<Vec<SearchResult>>;
// --- FSRS Scheduling ---
async fn get_scheduling(&self, memory_id: Uuid) -> MemoryStoreResult<Option<SchedulingState>>;
async fn update_scheduling(&self, state: &SchedulingState) -> MemoryStoreResult<()>;
async fn get_due_memories(
&self,
before: DateTime<Utc>,
limit: usize,
) -> MemoryStoreResult<Vec<(MemoryRecord, SchedulingState)>>;
// --- Graph (spreading activation) ---
async fn add_edge(&self, edge: &MemoryEdge) -> MemoryStoreResult<()>;
async fn get_edges(
&self,
node_id: Uuid,
edge_type: Option<&str>,
) -> MemoryStoreResult<Vec<MemoryEdge>>;
async fn remove_edge(&self, source: Uuid, target: Uuid) -> MemoryStoreResult<()>;
async fn get_neighbors(
&self,
node_id: Uuid,
depth: usize,
) -> MemoryStoreResult<Vec<(MemoryRecord, f64)>>;
// --- Domains (Phase 1: stubs return empty; full impl in Phase 4) ---
async fn list_domains(&self) -> MemoryStoreResult<Vec<Domain>>;
async fn get_domain(&self, id: &str) -> MemoryStoreResult<Option<Domain>>;
async fn upsert_domain(&self, domain: &Domain) -> MemoryStoreResult<()>;
async fn delete_domain(&self, id: &str) -> MemoryStoreResult<()>;
/// Phase 1: returns `Ok(vec![])` since no centroids exist. Phase 4 wires
/// the full soft-assignment pass.
async fn classify(&self, embedding: &[f32]) -> MemoryStoreResult<Vec<(String, f64)>>;
// --- Bulk / Maintenance ---
async fn count(&self) -> MemoryStoreResult<usize>;
async fn get_stats(&self) -> MemoryStoreResult<StoreStats>;
async fn vacuum(&self) -> MemoryStoreResult<()>;
}
// ----------------------------------------------------------------------------
// DYN-COMPATIBLE STORAGE TRAIT
// ----------------------------------------------------------------------------
/// Boxed Send future returning a `MemoryStoreResult<T>`, bound to the lifetime
/// of the borrows captured by the call (typically `&self` plus any reference
/// arguments). Used as the return type of every method on the dyn-compatible
/// `MemoryStore` trait below.
pub type BoxedStoreFuture<'a, T> = Pin<Box<dyn Future<Output = MemoryStoreResult<T>> + Send + 'a>>;
/// Dyn-compatible storage trait.
///
/// `MemoryStoreSend` above is the trait users implement; it uses native
/// async-fn-in-trait return types (RPITIT), which gives zero-allocation
/// static dispatch but is not dyn-safe. This trait wraps every method in
/// `Pin<Box<dyn Future + Send + '_>>` so `Arc<dyn MemoryStore>` works for
/// the cognitive module surface and the Phase 1 integration tests.
///
/// Implementations should not target this trait directly; the blanket
/// `impl<T: MemoryStoreSend> MemoryStore for T` adapts every Send-variant
/// implementation automatically. Each call boxes the returned future
/// exactly once, identical to the cost of the previous design.
pub trait MemoryStore: Send + Sync + 'static {
fn init<'a>(&'a self) -> BoxedStoreFuture<'a, ()>;
fn health_check<'a>(&'a self) -> BoxedStoreFuture<'a, HealthStatus>;
fn registered_model<'a>(&'a self) -> BoxedStoreFuture<'a, Option<ModelSignature>>;
fn register_model<'a>(&'a self, sig: &'a ModelSignature) -> BoxedStoreFuture<'a, ()>;
fn insert<'a>(&'a self, record: &'a MemoryRecord) -> BoxedStoreFuture<'a, Uuid>;
fn get<'a>(&'a self, id: Uuid) -> BoxedStoreFuture<'a, Option<MemoryRecord>>;
fn update<'a>(&'a self, record: &'a MemoryRecord) -> BoxedStoreFuture<'a, ()>;
fn delete<'a>(&'a self, id: Uuid) -> BoxedStoreFuture<'a, ()>;
fn search<'a>(&'a self, query: &'a SearchQuery) -> BoxedStoreFuture<'a, Vec<SearchResult>>;
fn fts_search<'a>(
&'a self,
text: &'a str,
limit: usize,
) -> BoxedStoreFuture<'a, Vec<SearchResult>>;
fn vector_search<'a>(
&'a self,
embedding: &'a [f32],
limit: usize,
) -> BoxedStoreFuture<'a, Vec<SearchResult>>;
fn get_scheduling<'a>(
&'a self,
memory_id: Uuid,
) -> BoxedStoreFuture<'a, Option<SchedulingState>>;
fn update_scheduling<'a>(&'a self, state: &'a SchedulingState) -> BoxedStoreFuture<'a, ()>;
fn get_due_memories<'a>(
&'a self,
before: DateTime<Utc>,
limit: usize,
) -> BoxedStoreFuture<'a, Vec<(MemoryRecord, SchedulingState)>>;
fn add_edge<'a>(&'a self, edge: &'a MemoryEdge) -> BoxedStoreFuture<'a, ()>;
fn get_edges<'a>(
&'a self,
node_id: Uuid,
edge_type: Option<&'a str>,
) -> BoxedStoreFuture<'a, Vec<MemoryEdge>>;
fn remove_edge<'a>(&'a self, source: Uuid, target: Uuid) -> BoxedStoreFuture<'a, ()>;
fn get_neighbors<'a>(
&'a self,
node_id: Uuid,
depth: usize,
) -> BoxedStoreFuture<'a, Vec<(MemoryRecord, f64)>>;
fn list_domains<'a>(&'a self) -> BoxedStoreFuture<'a, Vec<Domain>>;
fn get_domain<'a>(&'a self, id: &'a str) -> BoxedStoreFuture<'a, Option<Domain>>;
fn upsert_domain<'a>(&'a self, domain: &'a Domain) -> BoxedStoreFuture<'a, ()>;
fn delete_domain<'a>(&'a self, id: &'a str) -> BoxedStoreFuture<'a, ()>;
fn classify<'a>(&'a self, embedding: &'a [f32]) -> BoxedStoreFuture<'a, Vec<(String, f64)>>;
fn count<'a>(&'a self) -> BoxedStoreFuture<'a, usize>;
fn get_stats<'a>(&'a self) -> BoxedStoreFuture<'a, StoreStats>;
fn vacuum<'a>(&'a self) -> BoxedStoreFuture<'a, ()>;
}
impl<T> MemoryStore for T
where
T: MemoryStoreSend,
{
fn init<'a>(&'a self) -> BoxedStoreFuture<'a, ()> {
Box::pin(<T as MemoryStoreSend>::init(self))
}
fn health_check<'a>(&'a self) -> BoxedStoreFuture<'a, HealthStatus> {
Box::pin(<T as MemoryStoreSend>::health_check(self))
}
fn registered_model<'a>(&'a self) -> BoxedStoreFuture<'a, Option<ModelSignature>> {
Box::pin(<T as MemoryStoreSend>::registered_model(self))
}
fn register_model<'a>(&'a self, sig: &'a ModelSignature) -> BoxedStoreFuture<'a, ()> {
Box::pin(<T as MemoryStoreSend>::register_model(self, sig))
}
fn insert<'a>(&'a self, record: &'a MemoryRecord) -> BoxedStoreFuture<'a, Uuid> {
Box::pin(<T as MemoryStoreSend>::insert(self, record))
}
fn get<'a>(&'a self, id: Uuid) -> BoxedStoreFuture<'a, Option<MemoryRecord>> {
Box::pin(<T as MemoryStoreSend>::get(self, id))
}
fn update<'a>(&'a self, record: &'a MemoryRecord) -> BoxedStoreFuture<'a, ()> {
Box::pin(<T as MemoryStoreSend>::update(self, record))
}
fn delete<'a>(&'a self, id: Uuid) -> BoxedStoreFuture<'a, ()> {
Box::pin(<T as MemoryStoreSend>::delete(self, id))
}
fn search<'a>(&'a self, query: &'a SearchQuery) -> BoxedStoreFuture<'a, Vec<SearchResult>> {
Box::pin(<T as MemoryStoreSend>::search(self, query))
}
fn fts_search<'a>(
&'a self,
text: &'a str,
limit: usize,
) -> BoxedStoreFuture<'a, Vec<SearchResult>> {
Box::pin(<T as MemoryStoreSend>::fts_search(self, text, limit))
}
fn vector_search<'a>(
&'a self,
embedding: &'a [f32],
limit: usize,
) -> BoxedStoreFuture<'a, Vec<SearchResult>> {
Box::pin(<T as MemoryStoreSend>::vector_search(
self, embedding, limit,
))
}
fn get_scheduling<'a>(
&'a self,
memory_id: Uuid,
) -> BoxedStoreFuture<'a, Option<SchedulingState>> {
Box::pin(<T as MemoryStoreSend>::get_scheduling(self, memory_id))
}
fn update_scheduling<'a>(&'a self, state: &'a SchedulingState) -> BoxedStoreFuture<'a, ()> {
Box::pin(<T as MemoryStoreSend>::update_scheduling(self, state))
}
fn get_due_memories<'a>(
&'a self,
before: DateTime<Utc>,
limit: usize,
) -> BoxedStoreFuture<'a, Vec<(MemoryRecord, SchedulingState)>> {
Box::pin(<T as MemoryStoreSend>::get_due_memories(
self, before, limit,
))
}
fn add_edge<'a>(&'a self, edge: &'a MemoryEdge) -> BoxedStoreFuture<'a, ()> {
Box::pin(<T as MemoryStoreSend>::add_edge(self, edge))
}
fn get_edges<'a>(
&'a self,
node_id: Uuid,
edge_type: Option<&'a str>,
) -> BoxedStoreFuture<'a, Vec<MemoryEdge>> {
Box::pin(<T as MemoryStoreSend>::get_edges(self, node_id, edge_type))
}
fn remove_edge<'a>(&'a self, source: Uuid, target: Uuid) -> BoxedStoreFuture<'a, ()> {
Box::pin(<T as MemoryStoreSend>::remove_edge(self, source, target))
}
fn get_neighbors<'a>(
&'a self,
node_id: Uuid,
depth: usize,
) -> BoxedStoreFuture<'a, Vec<(MemoryRecord, f64)>> {
Box::pin(<T as MemoryStoreSend>::get_neighbors(self, node_id, depth))
}
fn list_domains<'a>(&'a self) -> BoxedStoreFuture<'a, Vec<Domain>> {
Box::pin(<T as MemoryStoreSend>::list_domains(self))
}
fn get_domain<'a>(&'a self, id: &'a str) -> BoxedStoreFuture<'a, Option<Domain>> {
Box::pin(<T as MemoryStoreSend>::get_domain(self, id))
}
fn upsert_domain<'a>(&'a self, domain: &'a Domain) -> BoxedStoreFuture<'a, ()> {
Box::pin(<T as MemoryStoreSend>::upsert_domain(self, domain))
}
fn delete_domain<'a>(&'a self, id: &'a str) -> BoxedStoreFuture<'a, ()> {
Box::pin(<T as MemoryStoreSend>::delete_domain(self, id))
}
fn classify<'a>(&'a self, embedding: &'a [f32]) -> BoxedStoreFuture<'a, Vec<(String, f64)>> {
Box::pin(<T as MemoryStoreSend>::classify(self, embedding))
}
fn count<'a>(&'a self) -> BoxedStoreFuture<'a, usize> {
Box::pin(<T as MemoryStoreSend>::count(self))
}
fn get_stats<'a>(&'a self) -> BoxedStoreFuture<'a, StoreStats> {
Box::pin(<T as MemoryStoreSend>::get_stats(self))
}
fn vacuum<'a>(&'a self) -> BoxedStoreFuture<'a, ()> {
Box::pin(<T as MemoryStoreSend>::vacuum(self))
}
}
// ----------------------------------------------------------------------------
// UNIT TESTS
// ----------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::storage::StorageError;
#[test]
fn memory_store_error_from_storage_error() {
let se = StorageError::NotFound("abc".to_string());
let mse = MemoryStoreError::from(se);
assert!(matches!(mse, MemoryStoreError::NotFound(_)));
let se2 = StorageError::Init("init failure".to_string());
let mse2 = MemoryStoreError::from(se2);
assert!(matches!(mse2, MemoryStoreError::Init(_)));
}
#[test]
fn model_signature_serde_round_trip() {
let sig = ModelSignature {
name: "nomic-ai/nomic-embed-text-v1.5".to_string(),
dimension: 256,
hash: "a".repeat(64),
};
let json = serde_json::to_string(&sig).expect("serialize");
let sig2: ModelSignature = serde_json::from_str(&json).expect("deserialize");
assert_eq!(sig, sig2);
}
#[test]
fn memory_record_serde_round_trip() {
let rec = MemoryRecord {
id: Uuid::new_v4(),
domains: vec!["dev".to_string()],
domain_scores: {
let mut m = HashMap::new();
m.insert("dev".to_string(), 0.9);
m
},
content: "hello".to_string(),
node_type: "fact".to_string(),
tags: vec!["tag1".to_string()],
embedding: None,
created_at: Utc::now(),
updated_at: Utc::now(),
metadata: serde_json::json!({}),
};
let json = serde_json::to_string(&rec).expect("serialize");
let rec2: MemoryRecord = serde_json::from_str(&json).expect("deserialize");
assert_eq!(rec.content, rec2.content);
assert_eq!(rec.domains, rec2.domains);
}
}

View file

@ -74,6 +74,16 @@ pub const MIGRATIONS: &[Migration] = &[
description: "v2.1.25 Merge/Supersede: reversible operation log, merge plans, bitemporal lineage, protected pins",
up: MIGRATION_V14_UP,
},
Migration {
version: 15,
description: "ComposedGraph: composition events, members, outcomes",
up: MIGRATION_V15_UP,
},
Migration {
version: 16,
description: "ADR 0001 Phase 1: embedding_model registry, domains/domain_scores columns, domains table",
up: MIGRATION_V16_UP,
},
];
/// A database migration
@ -813,6 +823,67 @@ CREATE INDEX IF NOT EXISTS idx_merge_operations_survivor ON merge_operations(sur
UPDATE schema_version SET version = 14, applied_at = datetime('now');
"#;
/// V15: ComposedGraph persistence for memory composition outcomes.
///
/// These tables record which memories were used together, which tool/query
/// produced the composition, and what happened afterward. `memory_id` values
/// are intentionally historical references instead of foreign keys to
/// `knowledge_nodes`: purging or superseding a memory must not erase the fact
/// that a bounty lane or reasoning path was previously composed.
const MIGRATION_V15_UP: &str = r#"
CREATE TABLE IF NOT EXISTS composition_events (
id TEXT PRIMARY KEY,
created_at TEXT NOT NULL,
tool TEXT NOT NULL,
mode TEXT NOT NULL DEFAULT 'deep_reference',
query TEXT,
query_hash TEXT,
confidence REAL,
status TEXT,
output_preview TEXT,
metadata TEXT NOT NULL DEFAULT '{}'
);
CREATE INDEX IF NOT EXISTS idx_composition_events_created_at ON composition_events(created_at);
CREATE INDEX IF NOT EXISTS idx_composition_events_tool ON composition_events(tool);
CREATE INDEX IF NOT EXISTS idx_composition_events_mode ON composition_events(mode);
CREATE INDEX IF NOT EXISTS idx_composition_events_query_hash ON composition_events(query_hash);
CREATE TABLE IF NOT EXISTS composition_members (
event_id TEXT NOT NULL,
memory_id TEXT NOT NULL,
role TEXT NOT NULL, -- primary | supporting | contradicting | superseded | related
rank INTEGER NOT NULL DEFAULT 0,
trust REAL,
score REAL,
preview TEXT,
metadata TEXT NOT NULL DEFAULT '{}',
PRIMARY KEY (event_id, memory_id, role),
FOREIGN KEY (event_id) REFERENCES composition_events(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_composition_members_memory ON composition_members(memory_id);
CREATE INDEX IF NOT EXISTS idx_composition_members_role ON composition_members(role);
CREATE TABLE IF NOT EXISTS composition_outcomes (
id TEXT PRIMARY KEY,
event_id TEXT NOT NULL,
outcome_type TEXT NOT NULL,
labeled_at TEXT NOT NULL,
label_source TEXT NOT NULL DEFAULT 'tool',
confidence_delta REAL,
notes TEXT,
metadata TEXT NOT NULL DEFAULT '{}',
FOREIGN KEY (event_id) REFERENCES composition_events(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_composition_outcomes_event ON composition_outcomes(event_id);
CREATE INDEX IF NOT EXISTS idx_composition_outcomes_type ON composition_outcomes(outcome_type);
CREATE INDEX IF NOT EXISTS idx_composition_outcomes_labeled_at ON composition_outcomes(labeled_at);
UPDATE schema_version SET version = 15, applied_at = datetime('now');
"#;
/// Get current schema version from database
pub fn get_current_version(conn: &rusqlite::Connection) -> rusqlite::Result<u32> {
conn.query_row(
@ -829,13 +900,63 @@ pub fn get_current_version(conn: &rusqlite::Connection) -> rusqlite::Result<u32>
fn add_column_if_missing(conn: &rusqlite::Connection, sql: &str) -> rusqlite::Result<()> {
match conn.execute(sql, []) {
Ok(_) => Ok(()),
Err(rusqlite::Error::SqliteFailure(_, Some(msg))) if msg.contains("duplicate column name") => {
Err(rusqlite::Error::SqliteFailure(_, Some(msg)))
if msg.contains("duplicate column name") =>
{
Ok(())
}
Err(e) => Err(e),
}
}
/// V16: ADR 0001 Phase 1 - embedding_model registry + domain columns.
///
/// The ALTER TABLE statements are split out into `MIGRATION_V16_ALTER_COLUMNS`
/// because SQLite has no `ALTER TABLE ... ADD COLUMN IF NOT EXISTS`. The
/// migration runner handles them individually so replaying V16 is idempotent.
const MIGRATION_V16_UP: &str = r#"
-- Migration V16: embedding model registry + per-memory domain columns.
-- 1. Embedding model registry. Single logical row; the (id = 1) constraint is
-- enforced in code via `register_model` (SQLite CHECK on a single-row
-- table is uglier than a constraint we already enforce in Rust).
CREATE TABLE IF NOT EXISTS embedding_model (
id INTEGER PRIMARY KEY CHECK (id = 1),
name TEXT NOT NULL,
dimension INTEGER NOT NULL,
hash TEXT NOT NULL,
created_at TEXT NOT NULL
);
-- 2. Per-memory domain columns are applied separately (see apply_migrations).
-- 3. Index on the domains JSON column to enable LIKE-style filter in Phase 4.
CREATE INDEX IF NOT EXISTS idx_nodes_domains ON knowledge_nodes(domains);
CREATE INDEX IF NOT EXISTS idx_nodes_domain_scores ON knowledge_nodes(domain_scores);
-- 4. Domains catalogue (empty until Phase 4 populates).
CREATE TABLE IF NOT EXISTS domains (
id TEXT PRIMARY KEY,
label TEXT NOT NULL,
centroid BLOB,
top_terms TEXT NOT NULL DEFAULT '[]',
memory_count INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_domains_created_at ON domains(created_at);
UPDATE schema_version SET version = 16, applied_at = datetime('now');
"#;
/// The two ALTER TABLE statements for V16. Kept separate so the migration
/// runner can try each individually and ignore "duplicate column" errors,
/// making V16 idempotent on replay (SQLite has no ADD COLUMN IF NOT EXISTS).
pub const MIGRATION_V16_ALTER_COLUMNS: &[&str] = &[
"ALTER TABLE knowledge_nodes ADD COLUMN domains TEXT NOT NULL DEFAULT '[]'",
"ALTER TABLE knowledge_nodes ADD COLUMN domain_scores TEXT NOT NULL DEFAULT '{}'",
];
/// Apply pending migrations
pub fn apply_migrations(conn: &rusqlite::Connection) -> rusqlite::Result<u32> {
let current_version = get_current_version(conn)?;
@ -864,6 +985,15 @@ pub fn apply_migrations(conn: &rusqlite::Connection) -> rusqlite::Result<u32> {
)?;
}
// V16 adds columns via ALTER TABLE, which SQLite does not support
// with IF NOT EXISTS. Run them individually and ignore duplicate
// column errors so replay stays idempotent.
if migration.version == 16 {
for stmt in MIGRATION_V16_ALTER_COLUMNS {
add_column_if_missing(conn, stmt)?;
}
}
// Use execute_batch to handle multi-statement SQL including triggers
conn.execute_batch(migration.up)?;
@ -890,17 +1020,17 @@ mod tests {
/// version after `apply_migrations` runs all migrations end-to-end, and
/// neither of the dead tables V11 drops must exist afterwards.
#[test]
fn test_apply_migrations_advances_to_v14_and_drops_dead_tables() {
fn test_apply_migrations_advances_to_v16_and_drops_dead_tables() {
let conn = rusqlite::Connection::open_in_memory().expect("open in-memory");
// Pre-requisite: schema_version must be bootstrapped by V1.
apply_migrations(&conn).expect("apply_migrations succeeds");
// 1. schema_version advanced to V14
// 1. schema_version advanced to V16
let version = get_current_version(&conn).expect("read schema_version");
assert_eq!(
version, 14,
"schema_version must be 14 after all migrations"
version, 16,
"schema_version must be 16 after all migrations"
);
// 2. knowledge_edges is gone (V11 drops it)
@ -967,7 +1097,23 @@ mod tests {
assert_eq!(rows, 1, "{table} table must be created by V14");
}
// 7. knowledge_nodes gains `protected` + `superseded_by` (V14)
// 7. ComposedGraph tables exist (V15)
for table in [
"composition_events",
"composition_members",
"composition_outcomes",
] {
let rows: i64 = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
[table],
|row| row.get(0),
)
.expect("query sqlite_master");
assert_eq!(rows, 1, "{table} table must be created by V15");
}
// 8. knowledge_nodes gains `protected` + `superseded_by` (V14)
let node_cols: Vec<String> = {
let mut stmt = conn
.prepare("PRAGMA table_info(knowledge_nodes)")
@ -1002,10 +1148,132 @@ mod tests {
conn.execute("UPDATE schema_version SET version = 10", [])
.expect("rewind schema_version");
// Replay must not error.
apply_migrations(&conn).expect("V11 replay must be idempotent");
// Replay V11 onward. V11 uses DROP TABLE IF EXISTS so it is idempotent.
// V12/V13 tombstone tables use CREATE TABLE IF NOT EXISTS. V14/V16 ALTER
// TABLE idempotency is handled by the migration runner.
apply_migrations(&conn).expect("V11..V16 replay must be idempotent");
// After replaying from V10, the schema advances to the latest version.
let version = get_current_version(&conn).expect("read schema_version");
assert_eq!(version, 14, "schema_version back at 14 after replay");
assert_eq!(version, 16, "schema_version back at 16 after replay");
}
#[test]
fn v16_adds_embedding_model_table() {
let conn = rusqlite::Connection::open_in_memory().expect("open in-memory");
apply_migrations(&conn).expect("apply_migrations");
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='embedding_model'",
[],
|row| row.get(0),
)
.expect("query sqlite_master");
assert_eq!(count, 1, "embedding_model table must exist after V16");
}
#[test]
fn v16_adds_domains_columns() {
let conn = rusqlite::Connection::open_in_memory().expect("open in-memory");
apply_migrations(&conn).expect("apply_migrations");
let info: Vec<String> = {
let mut stmt = conn
.prepare("PRAGMA table_info(knowledge_nodes)")
.expect("prepare");
stmt.query_map([], |row| row.get::<_, String>(1))
.expect("query_map")
.map(|r| r.expect("row"))
.collect()
};
assert!(
info.contains(&"domains".to_string()),
"domains column missing"
);
assert!(
info.contains(&"domain_scores".to_string()),
"domain_scores column missing"
);
}
#[test]
fn v16_default_values_empty_json() {
let conn = rusqlite::Connection::open_in_memory().expect("open in-memory");
apply_migrations(&conn).expect("apply_migrations");
// Insert a minimal row to test defaults
conn.execute(
"INSERT INTO knowledge_nodes (id, content, node_type, created_at, updated_at, last_accessed, \
stability, difficulty, reps, lapses, learning_state, storage_strength, retrieval_strength, \
retention_strength, next_review, scheduled_days, has_embedding) \
VALUES ('test-id','content','fact',datetime('now'),datetime('now'),datetime('now'),\
1.0,0.3,0,0,'new',1.0,1.0,1.0,datetime('now'),1,0)",
[],
).expect("insert row");
let (domains, domain_scores): (String, String) = conn
.query_row(
"SELECT domains, domain_scores FROM knowledge_nodes WHERE id='test-id'",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.expect("query row");
assert_eq!(domains, "[]");
assert_eq!(domain_scores, "{}");
}
#[test]
fn v16_is_replayable() {
let conn = rusqlite::Connection::open_in_memory().expect("open in-memory");
apply_migrations(&conn).expect("first apply");
// Rewind to V15 so V16 runs again.
conn.execute("UPDATE schema_version SET version = 15", [])
.expect("rewind");
// V16 uses CREATE TABLE IF NOT EXISTS and idempotent ALTER handling.
apply_migrations(&conn).expect("V16 replay must be idempotent");
let version = get_current_version(&conn).expect("read version");
assert_eq!(version, 16, "schema_version must be 16 after replay");
}
#[test]
fn v16_preserves_existing_rows_from_v15() {
let conn = rusqlite::Connection::open_in_memory().expect("open in-memory");
// Apply up to V15 only, including the V14 ALTER TABLE columns that
// `apply_migrations` normally runs before the V14 SQL batch.
for migration in MIGRATIONS {
if migration.version <= 15 {
if migration.version == 14 {
add_column_if_missing(
&conn,
"ALTER TABLE knowledge_nodes ADD COLUMN protected INTEGER NOT NULL DEFAULT 0",
)
.expect("apply V14 protected column");
add_column_if_missing(
&conn,
"ALTER TABLE knowledge_nodes ADD COLUMN superseded_by TEXT",
)
.expect("apply V14 superseded_by column");
}
conn.execute_batch(migration.up).expect("apply migration");
}
}
// Insert a row under the V15 schema, before PR #61's V16 columns exist.
conn.execute(
"INSERT INTO knowledge_nodes (id, content, node_type, created_at, updated_at, last_accessed, \
stability, difficulty, reps, lapses, learning_state, storage_strength, retrieval_strength, \
retention_strength, next_review, scheduled_days, has_embedding) \
VALUES ('existing-id','old content','fact',datetime('now'),datetime('now'),datetime('now'),\
1.0,0.3,0,0,'new',1.0,1.0,1.0,datetime('now'),1,0)",
[],
).expect("insert pre-v16 row");
apply_migrations(&conn).expect("apply V16 migration");
// Check the old row has defaults
let (domains, domain_scores): (String, String) = conn
.query_row(
"SELECT domains, domain_scores FROM knowledge_nodes WHERE id='existing-id'",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.expect("query pre-v16 row");
assert_eq!(domains, "[]");
assert_eq!(domain_scores, "{}");
}
}

View file

@ -1,22 +1,31 @@
//! Storage Module
//!
//! SQLite-based storage layer with:
//! - FTS5 full-text search with query sanitization
//! - Embedded vector storage
//! - FSRS-6 state management
//! - Temporal memory support
//! Backend-agnostic memory store abstraction plus SQLite reference impl.
mod memory_store;
mod migrations;
mod portable;
mod sqlite;
pub use memory_store::{
ClassificationResult, Domain, HealthStatus, LocalMemoryStore, MemoryEdge, MemoryRecord,
MemoryStore, MemoryStoreError, MemoryStoreResult, MemoryStoreSend, ModelSignature,
SchedulingState, SearchQuery, SearchResult, StoreStats,
};
pub use migrations::MIGRATIONS;
pub use portable::{
PORTABLE_ARCHIVE_FORMAT, PortableArchive, PortableImportMode, PortableImportReport,
PortableTable, PortableValue,
};
pub use sqlite::{
ConnectionRecord, ConsolidationHistoryRecord, DreamHistoryRecord, FilePortableSyncBackend,
InsightRecord, IntentionRecord, PortableSyncBackend, PortableSyncReport, Result,
SmartIngestResult, StateTransitionRecord, Storage, StorageError,
CompositionEventRecord, CompositionMemberRecord, CompositionNeighborRecord,
CompositionOutcomeRecord, ConnectionRecord, ConsolidationHistoryRecord, DreamHistoryRecord,
FilePortableSyncBackend, InsightRecord, IntentionRecord, NeverComposedCandidate,
PortableSyncBackend, PortableSyncReport, Result, SmartIngestResult, SqliteMemoryStore,
StateTransitionRecord, StorageError,
};
/// Backwards-compatibility alias. Retained until Phase 4 completes so every
/// existing `Arc<Storage>` call site keeps compiling. Scheduled for removal
/// once no downstream source file references it.
pub type Storage = SqliteMemoryStore;

File diff suppressed because it is too large Load diff

View file

@ -61,7 +61,7 @@ The server exposes the current unified MCP tools from
- `search`, `smart_ingest`, `memory`, `codebase`, `intention`
- `deep_reference`, `cross_reference`, `contradictions`
- `dream`, `explore_connections`, `predict`
- `memory_health`, `memory_graph`, `system_status`
- `memory_health`, `memory_graph`, `composed_graph`, `system_status`
- `importance_score`, `find_duplicates`
- `consolidate`, `memory_timeline`, `memory_changelog`
- `backup`, `export`, `restore`, `gc`, `suppress`

View file

@ -443,6 +443,12 @@ impl McpServer {
input_schema: tools::graph::schema(),
..Default::default()
},
ToolDescription {
name: "composed_graph".to_string(),
description: Some("ComposedGraph memory topology. Reads durable composition events, members, and outcome labels; returns recent/already-composed lanes, neighbors, never-composed pairs, bounty-mode lanes, and lets users label outcomes such as helpful, submitted, accepted, rejected, duplicate_risk, needs_poc, or dead_end.".to_string()),
input_schema: tools::composed_graph::schema(),
..Default::default()
},
// ================================================================
// DEEP REFERENCE (v2.0.4+) — replaces cross_reference
// ================================================================
@ -959,7 +965,8 @@ impl McpServer {
// TEMPORAL TOOLS (v1.2+)
// ================================================================
"memory_timeline" => {
tools::timeline::execute(&self.storage, &self.output_config, request.arguments).await
tools::timeline::execute(&self.storage, &self.output_config, request.arguments)
.await
}
"memory_changelog" => tools::changelog::execute(&self.storage, request.arguments).await,
@ -1032,6 +1039,9 @@ impl McpServer {
// ================================================================
"memory_health" => tools::health::execute(&self.storage, request.arguments).await,
"memory_graph" => tools::graph::execute(&self.storage, request.arguments).await,
"composed_graph" => {
tools::composed_graph::execute(&self.storage, request.arguments).await
}
"deep_reference" | "cross_reference" => {
tools::cross_reference::execute(&self.storage, &self.cognitive, request.arguments)
.await
@ -1796,10 +1806,10 @@ mod tests {
let result = response.result.unwrap();
let tools = result["tools"].as_array().unwrap();
// v2.1.25: 32 tools (25 from v2.1.21 + 7 Phase 3 merge/supersede tools:
// 33 tools: 25 from v2.1.21 + 7 Phase 3 merge/supersede tools:
// merge_candidates, plan_merge, plan_supersede, apply_plan, merge_undo,
// protect, merge_policy)
assert_eq!(tools.len(), 32, "Expected exactly 32 tools in v2.1.25");
// protect, merge_policy, composed_graph)
assert_eq!(tools.len(), 33, "Expected exactly 33 tools");
let tool_names: Vec<&str> = tools.iter().map(|t| t["name"].as_str().unwrap()).collect();
@ -1874,6 +1884,7 @@ mod tests {
// Autonomic tools (v1.9)
assert!(tool_names.contains(&"memory_health"));
assert!(tool_names.contains(&"memory_graph"));
assert!(tool_names.contains(&"composed_graph"));
// Deep reference + cross_reference alias (v2.0.4)
assert!(tool_names.contains(&"deep_reference"));

View file

@ -0,0 +1,906 @@
//! composed_graph tool — durable composition history and bounty-mode lane queue.
use chrono::Utc;
use serde::Deserialize;
use serde_json::Value;
use std::sync::Arc;
use uuid::Uuid;
use vestige_core::{CompositionOutcomeRecord, Storage};
const OUTCOME_TYPES: &[&str] = &[
"helpful",
"dead_end",
"submitted",
"accepted",
"rejected",
"duplicate_risk",
"needs_poc",
"bad_severity",
"user_promoted",
"user_demoted",
"closed_by_scope",
"closed_by_duplicate",
"closed_by_false_assumption",
"closed_by_user",
"expired_lane",
];
pub fn schema() -> Value {
serde_json::json!({
"type": "object",
"properties": {
"action": {
"type": "string",
"enum": ["recent", "get", "memory", "neighbors", "never_composed", "bounty_mode", "label"],
"description": "ComposedGraph action to run."
},
"event_id": {
"type": "string",
"description": "Composition event id for get/label actions."
},
"memory_id": {
"type": "string",
"description": "Memory id for memory/neighbors actions."
},
"limit": {
"type": "integer",
"description": "Maximum rows to return (default 10, max 100).",
"default": 10,
"minimum": 1,
"maximum": 100
},
"tags": {
"type": "array",
"items": { "type": "string" },
"description": "Optional tag filter for never_composed and bounty_mode."
},
"outcome_type": {
"type": "string",
"enum": ["helpful", "dead_end", "submitted", "accepted", "rejected", "duplicate_risk", "needs_poc", "bad_severity", "user_promoted", "user_demoted", "closed_by_scope", "closed_by_duplicate", "closed_by_false_assumption", "closed_by_user", "expired_lane"],
"description": "Outcome label for label action."
},
"notes": {
"type": "string",
"description": "Optional outcome notes."
},
"label_source": {
"type": "string",
"description": "Where the outcome label came from (default: user)."
},
"confidence_delta": {
"type": "number",
"description": "Optional confidence adjustment for this outcome."
}
},
"required": ["action"]
})
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
struct ComposedGraphArgs {
action: String,
event_id: Option<String>,
memory_id: Option<String>,
limit: Option<i32>,
tags: Option<Vec<String>>,
outcome_type: Option<String>,
notes: Option<String>,
label_source: Option<String>,
confidence_delta: Option<f64>,
}
pub async fn execute(storage: &Arc<Storage>, args: Option<Value>) -> Result<Value, String> {
let args: ComposedGraphArgs = match args {
Some(value) => {
serde_json::from_value(value).map_err(|e| format!("Invalid arguments: {}", e))?
}
None => return Err("Missing arguments".to_string()),
};
let limit = args.limit.unwrap_or(10).clamp(1, 100);
match args.action.as_str() {
"recent" => recent(storage, limit),
"get" => {
let event_id = args
.event_id
.as_deref()
.ok_or_else(|| "event_id is required for get".to_string())?;
get(storage, event_id)
}
"memory" => {
let memory_id = args
.memory_id
.as_deref()
.ok_or_else(|| "memory_id is required for memory".to_string())?;
memory(storage, memory_id, limit)
}
"neighbors" => {
let memory_id = args
.memory_id
.as_deref()
.ok_or_else(|| "memory_id is required for neighbors".to_string())?;
neighbors(storage, memory_id, limit)
}
"never_composed" => never_composed(storage, limit, args.tags.as_deref()),
"bounty_mode" => bounty_mode(storage, limit, args.tags.as_deref()),
"label" => label(storage, &args),
other => Err(format!("Unknown composed_graph action: {}", other)),
}
}
fn recent(storage: &Storage, limit: i32) -> Result<Value, String> {
let events = storage
.get_recent_composition_events(limit)
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"action": "recent",
"events": events,
}))
}
fn get(storage: &Storage, event_id: &str) -> Result<Value, String> {
let event = storage
.get_composition_event(event_id)
.map_err(|e| e.to_string())?
.ok_or_else(|| format!("composition event not found: {}", event_id))?;
let members = storage
.get_composition_members(event_id)
.map_err(|e| e.to_string())?;
let outcomes = storage
.get_composition_outcomes(event_id)
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"action": "get",
"event": event,
"members": members,
"outcomes": outcomes,
}))
}
fn memory(storage: &Storage, memory_id: &str, limit: i32) -> Result<Value, String> {
let events = storage
.get_compositions_for_memory(memory_id, limit)
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"action": "memory",
"memoryId": memory_id,
"events": events,
}))
}
fn neighbors(storage: &Storage, memory_id: &str, limit: i32) -> Result<Value, String> {
let neighbors = storage
.get_composition_neighbors(memory_id, limit)
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"action": "neighbors",
"memoryId": memory_id,
"neighbors": neighbors,
}))
}
fn never_composed(storage: &Storage, limit: i32, tags: Option<&[String]>) -> Result<Value, String> {
let candidates = storage
.get_never_composed_candidates(limit, tags)
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"action": "never_composed",
"candidates": candidates,
}))
}
fn bounty_mode(storage: &Storage, limit: i32, tags: Option<&[String]>) -> Result<Value, String> {
const PAGE_SIZE: i32 = 100;
const MAX_SCAN_EVENTS: i32 = 1_000;
let mut offset = 0;
let mut scanned = 0;
let mut already_composed = Vec::new();
let mut closed_doors = Vec::new();
let mut duplicate_risk_lanes = Vec::new();
let mut needs_poc_lanes = Vec::new();
loop {
let events = storage
.get_recent_composition_events_page(PAGE_SIZE, offset)
.map_err(|e| e.to_string())?;
if events.is_empty() {
break;
}
scanned += events.len() as i32;
for event in events {
let outcomes = storage
.get_composition_outcomes(&event.id)
.map_err(|e| e.to_string())?;
let members = storage
.get_composition_members(&event.id)
.map_err(|e| e.to_string())?;
if !composition_matches_tags(storage, &event, &members, tags)? {
continue;
}
let item = serde_json::json!({
"event": event,
"members": members,
"outcomes": outcomes,
});
let outcome_types = item["outcomes"]
.as_array()
.map(|values| {
values
.iter()
.filter_map(|value| value.get("outcomeType").and_then(|v| v.as_str()))
.collect::<Vec<_>>()
})
.unwrap_or_default();
if outcome_types.iter().any(|kind| {
matches!(
*kind,
"dead_end"
| "rejected"
| "bad_severity"
| "closed_by_scope"
| "closed_by_duplicate"
| "closed_by_false_assumption"
| "closed_by_user"
| "expired_lane"
)
}) {
push_limited(&mut closed_doors, item.clone(), limit);
}
if outcome_types
.iter()
.any(|kind| matches!(*kind, "duplicate_risk" | "closed_by_duplicate"))
{
push_limited(&mut duplicate_risk_lanes, item.clone(), limit);
}
if outcome_types.contains(&"needs_poc") {
push_limited(&mut needs_poc_lanes, item.clone(), limit);
}
if already_composed.len() < limit as usize {
already_composed.push(item);
}
if bounty_mode_lanes_full(
limit,
&already_composed,
&closed_doors,
&duplicate_risk_lanes,
&needs_poc_lanes,
) {
break;
}
}
if bounty_mode_lanes_full(
limit,
&already_composed,
&closed_doors,
&duplicate_risk_lanes,
&needs_poc_lanes,
) || scanned >= MAX_SCAN_EVENTS
{
break;
}
offset += PAGE_SIZE;
}
let never = storage
.get_never_composed_candidates(limit, tags)
.map_err(|e| e.to_string())?;
let top_weird_combinations = never.iter().take(3).cloned().collect::<Vec<_>>();
Ok(serde_json::json!({
"action": "bounty_mode",
"alreadyComposedLanes": already_composed,
"neverComposedLanes": never,
"closedDoors": closed_doors,
"duplicateRiskLanes": duplicate_risk_lanes,
"needsPocLanes": needs_poc_lanes,
"topWeirdCombinations": top_weird_combinations,
"guardrails": [
"never-composed lane is not a finding",
"composition score is not severity",
"submit/reportable still needs source refs, scope fit, and PoC evidence"
]
}))
}
fn push_limited(items: &mut Vec<Value>, item: Value, limit: i32) {
if items.len() < limit as usize {
items.push(item);
}
}
fn bounty_mode_lanes_full(
limit: i32,
already_composed: &[Value],
closed_doors: &[Value],
duplicate_risk_lanes: &[Value],
needs_poc_lanes: &[Value],
) -> bool {
let limit = limit as usize;
already_composed.len() >= limit
&& closed_doors.len() >= limit
&& duplicate_risk_lanes.len() >= limit
&& needs_poc_lanes.len() >= limit
}
fn composition_matches_tags(
storage: &Storage,
event: &vestige_core::CompositionEventRecord,
members: &[vestige_core::CompositionMemberRecord],
tags: Option<&[String]>,
) -> Result<bool, String> {
let Some(tags) = tags else {
return Ok(true);
};
if tags.is_empty() {
return Ok(true);
}
if json_value_has_tag(&event.metadata, tags) {
return Ok(true);
}
for member in members {
if json_value_has_tag(&member.metadata, tags) {
return Ok(true);
}
if let Some(node) = storage
.get_node(&member.memory_id)
.map_err(|e| e.to_string())?
&& node.tags.iter().any(|tag| tag_matches_filter(tag, tags))
{
return Ok(true);
}
}
Ok(false)
}
fn json_value_has_tag(value: &Value, tags: &[String]) -> bool {
value
.get("tags")
.and_then(|tags_value| tags_value.as_array())
.is_some_and(|values| {
values.iter().any(|value| {
value
.as_str()
.is_some_and(|tag| tag_matches_filter(tag, tags))
})
})
}
fn tag_matches_filter(tag: &str, filters: &[String]) -> bool {
filters
.iter()
.any(|wanted| tag == wanted || tag.starts_with(&format!("{wanted}:")))
}
fn label(storage: &Storage, args: &ComposedGraphArgs) -> Result<Value, String> {
let event_id = args
.event_id
.as_deref()
.ok_or_else(|| "event_id is required for label".to_string())?;
let outcome_type = args
.outcome_type
.as_deref()
.ok_or_else(|| "outcome_type is required for label".to_string())?;
if !OUTCOME_TYPES.contains(&outcome_type) {
return Err(format!("unsupported outcome_type: {}", outcome_type));
}
if storage
.get_composition_event(event_id)
.map_err(|e| e.to_string())?
.is_none()
{
return Err(format!("composition event not found: {}", event_id));
}
let outcome = CompositionOutcomeRecord {
id: Uuid::new_v4().to_string(),
event_id: event_id.to_string(),
outcome_type: outcome_type.to_string(),
labeled_at: Utc::now(),
label_source: args
.label_source
.clone()
.unwrap_or_else(|| "user".to_string()),
confidence_delta: args.confidence_delta,
notes: args.notes.clone(),
metadata: serde_json::json!({}),
};
storage
.record_composition_outcome(&outcome)
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"action": "label",
"eventId": event_id,
"outcome": outcome,
}))
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
use vestige_core::{
CompositionEventRecord, CompositionMemberRecord, CompositionOutcomeRecord, IngestInput,
};
fn test_storage() -> (Arc<Storage>, TempDir) {
let dir = TempDir::new().unwrap();
let storage = Storage::new(Some(dir.path().join("test.db"))).unwrap();
(Arc::new(storage), dir)
}
fn ingest(storage: &Storage, content: &str, tags: &[&str]) -> String {
storage
.ingest(IngestInput {
content: content.to_string(),
node_type: "fact".to_string(),
tags: tags.iter().map(|tag| tag.to_string()).collect(),
..Default::default()
})
.unwrap()
.id
}
#[tokio::test]
async fn test_composed_graph_get_label_and_bounty_mode() {
let (storage, _dir) = test_storage();
let first = ingest(
&storage,
"Oracle drift bounty lane",
&["protocolgate", "boundary-oracle", "settlement"],
);
let second = ingest(
&storage,
"Withdrawal queue bounty lane",
&["protocolgate", "boundary-queue", "settlement"],
);
let third = ingest(
&storage,
"Keeper role bounty lane",
&["protocolgate", "boundary-role", "settlement"],
);
let event = CompositionEventRecord {
id: "composed-graph-test".to_string(),
created_at: Utc::now(),
tool: "deep_reference".to_string(),
mode: "bounty".to_string(),
query: Some("oracle withdrawal".to_string()),
query_hash: Some("test".to_string()),
confidence: Some(0.8),
status: Some("resolved".to_string()),
output_preview: Some("compose oracle and withdrawal queue".to_string()),
metadata: serde_json::json!({}),
};
storage
.save_composition(
&event,
&[
CompositionMemberRecord {
event_id: event.id.clone(),
memory_id: first.clone(),
role: "primary".to_string(),
rank: 0,
trust: Some(0.8),
score: Some(0.9),
preview: None,
metadata: serde_json::json!({}),
},
CompositionMemberRecord {
event_id: event.id.clone(),
memory_id: second.clone(),
role: "supporting".to_string(),
rank: 1,
trust: Some(0.7),
score: Some(0.8),
preview: None,
metadata: serde_json::json!({}),
},
],
&[],
)
.unwrap();
let unrelated = ingest(&storage, "Personal planning lane", &["personal"]);
storage
.save_composition(
&CompositionEventRecord {
id: "unrelated-composed-graph-test".to_string(),
created_at: Utc::now() + chrono::Duration::seconds(10),
tool: "deep_reference".to_string(),
mode: "planning".to_string(),
query: Some("personal planning".to_string()),
query_hash: Some("unrelated".to_string()),
confidence: Some(0.4),
status: Some("resolved".to_string()),
output_preview: Some("unrelated composition".to_string()),
metadata: serde_json::json!({}),
},
&[CompositionMemberRecord {
event_id: "unrelated-composed-graph-test".to_string(),
memory_id: unrelated,
role: "primary".to_string(),
rank: 0,
trust: Some(0.4),
score: Some(0.2),
preview: None,
metadata: serde_json::json!({}),
}],
&[CompositionOutcomeRecord {
id: "unrelated-composed-graph-outcome".to_string(),
event_id: "unrelated-composed-graph-test".to_string(),
outcome_type: "needs_poc".to_string(),
labeled_at: Utc::now(),
label_source: "test".to_string(),
confidence_delta: None,
notes: None,
metadata: serde_json::json!({}),
}],
)
.unwrap();
let get_result = execute(
&storage,
Some(serde_json::json!({
"action": "get",
"event_id": event.id
})),
)
.await
.unwrap();
assert_eq!(get_result["members"].as_array().unwrap().len(), 2);
let label_result = execute(
&storage,
Some(serde_json::json!({
"action": "label",
"event_id": "composed-graph-test",
"outcome_type": "submitted",
"notes": "submitted in test"
})),
)
.await
.unwrap();
assert_eq!(
label_result["outcome"]["outcomeType"].as_str(),
Some("submitted")
);
let closed_label_result = execute(
&storage,
Some(serde_json::json!({
"action": "label",
"event_id": "composed-graph-test",
"outcome_type": "closed_by_scope",
"notes": "closed in test"
})),
)
.await
.unwrap();
assert_eq!(
closed_label_result["outcome"]["outcomeType"].as_str(),
Some("closed_by_scope")
);
let duplicate_label_result = execute(
&storage,
Some(serde_json::json!({
"action": "label",
"event_id": "composed-graph-test",
"outcome_type": "closed_by_duplicate",
"notes": "duplicate family in test"
})),
)
.await
.unwrap();
assert_eq!(
duplicate_label_result["outcome"]["outcomeType"].as_str(),
Some("closed_by_duplicate")
);
let bounty = execute(
&storage,
Some(serde_json::json!({
"action": "bounty_mode",
"tags": ["protocolgate"],
"limit": 1
})),
)
.await
.unwrap();
let already = bounty["alreadyComposedLanes"].as_array().unwrap();
assert_eq!(already.len(), 1);
assert!(
already[0]["event"]["id"].as_str() == Some("composed-graph-test"),
"tag-scoped bounty_mode should skip newer unrelated events before truncating"
);
assert_eq!(bounty["closedDoors"].as_array().unwrap().len(), 1);
assert_eq!(bounty["duplicateRiskLanes"].as_array().unwrap().len(), 1);
assert!(bounty["needsPocLanes"].as_array().unwrap().is_empty());
assert!(
bounty["neverComposedLanes"]
.as_array()
.unwrap()
.iter()
.any(|candidate| {
let first_id = candidate["firstId"].as_str().unwrap_or_default();
let second_id = candidate["secondId"].as_str().unwrap_or_default();
[first_id, second_id].contains(&third.as_str())
})
);
}
#[tokio::test]
async fn test_bounty_mode_paginates_tag_filter_and_matches_namespaced_tags() {
let (storage, _dir) = test_storage();
let tagged = ingest(
&storage,
"Older tagged composition lane",
&["project:vestige", "composition"],
);
let unrelated = ingest(&storage, "Newer unrelated lane", &["unrelated"]);
let base_time = Utc::now();
storage
.save_composition(
&CompositionEventRecord {
id: "older-tagged-composition".to_string(),
created_at: base_time,
tool: "deep_reference".to_string(),
mode: "research".to_string(),
query: Some("older tagged lane".to_string()),
query_hash: Some("fnv1a64:older".to_string()),
confidence: Some(0.8),
status: Some("resolved".to_string()),
output_preview: None,
metadata: serde_json::json!({}),
},
&[CompositionMemberRecord {
event_id: "older-tagged-composition".to_string(),
memory_id: tagged,
role: "primary".to_string(),
rank: 0,
trust: Some(0.8),
score: Some(0.9),
preview: None,
metadata: serde_json::json!({}),
}],
&[],
)
.unwrap();
for idx in 0..101 {
let event_id = format!("newer-unrelated-composition-{idx}");
storage
.save_composition(
&CompositionEventRecord {
id: event_id.clone(),
created_at: base_time + chrono::Duration::seconds(i64::from(idx + 1)),
tool: "deep_reference".to_string(),
mode: "planning".to_string(),
query: Some(format!("newer unrelated lane {idx}")),
query_hash: Some(format!("fnv1a64:newer-{idx}")),
confidence: Some(0.3),
status: Some("resolved".to_string()),
output_preview: None,
metadata: serde_json::json!({}),
},
&[CompositionMemberRecord {
event_id,
memory_id: unrelated.clone(),
role: "primary".to_string(),
rank: 0,
trust: Some(0.3),
score: Some(0.2),
preview: None,
metadata: serde_json::json!({}),
}],
&[],
)
.unwrap();
}
let bounty = execute(
&storage,
Some(serde_json::json!({
"action": "bounty_mode",
"tags": ["project"],
"limit": 1
})),
)
.await
.unwrap();
let already = bounty["alreadyComposedLanes"].as_array().unwrap();
assert_eq!(already.len(), 1);
assert_eq!(
already[0]["event"]["id"].as_str(),
Some("older-tagged-composition"),
"tag-filtered bounty_mode should page past newer unrelated events and match namespaced tags"
);
}
#[tokio::test]
async fn test_bounty_mode_uses_member_tag_snapshot_after_purge() {
let (storage, _dir) = test_storage();
let tagged = ingest(
&storage,
"Tagged member that will be purged",
&["project:vestige", "composition"],
);
storage
.save_composition(
&CompositionEventRecord {
id: "purged-tagged-member-composition".to_string(),
created_at: Utc::now(),
tool: "deep_reference".to_string(),
mode: "research".to_string(),
query: Some("purged tagged lane".to_string()),
query_hash: Some("fnv1a64:purged".to_string()),
confidence: Some(0.6),
status: Some("closed".to_string()),
output_preview: None,
metadata: serde_json::json!({}),
},
&[CompositionMemberRecord {
event_id: "purged-tagged-member-composition".to_string(),
memory_id: tagged.clone(),
role: "primary".to_string(),
rank: 0,
trust: Some(0.7),
score: Some(0.8),
preview: Some("Tagged member that will be purged".to_string()),
metadata: serde_json::json!({}),
}],
&[CompositionOutcomeRecord {
id: "purged-tagged-member-outcome".to_string(),
event_id: "purged-tagged-member-composition".to_string(),
outcome_type: "closed_by_scope".to_string(),
labeled_at: Utc::now(),
label_source: "test".to_string(),
confidence_delta: Some(-0.2),
notes: None,
metadata: serde_json::json!({}),
}],
)
.unwrap();
storage
.purge_node(&tagged, Some("test purge"))
.expect("purge should succeed");
let get_result = execute(
&storage,
Some(serde_json::json!({
"action": "get",
"event_id": "purged-tagged-member-composition"
})),
)
.await
.unwrap();
assert!(
get_result["members"][0].get("preview").is_none()
|| get_result["members"][0]["preview"].is_null(),
"purge should scrub member preview from composed_graph get"
);
let bounty = execute(
&storage,
Some(serde_json::json!({
"action": "bounty_mode",
"tags": ["project"],
"limit": 1
})),
)
.await
.unwrap();
let already = bounty["alreadyComposedLanes"].as_array().unwrap();
assert_eq!(already.len(), 1);
assert_eq!(
already[0]["event"]["id"].as_str(),
Some("purged-tagged-member-composition"),
"tag-filtered bounty_mode should use composition member tag snapshots after source memory purge"
);
assert_eq!(bounty["closedDoors"].as_array().unwrap().len(), 1);
}
#[tokio::test]
async fn test_bounty_mode_guardrail_buckets_are_not_truncated_by_already_limit() {
let (storage, _dir) = test_storage();
let neutral = ingest(&storage, "Neutral release lane", &["project:vestige"]);
let closed = ingest(&storage, "Closed release lane", &["project:vestige"]);
let base_time = Utc::now();
storage
.save_composition(
&CompositionEventRecord {
id: "older-closed-lane".to_string(),
created_at: base_time,
tool: "deep_reference".to_string(),
mode: "release".to_string(),
query: Some("older closed lane".to_string()),
query_hash: Some("fnv1a64:older-closed".to_string()),
confidence: Some(0.3),
status: Some("closed".to_string()),
output_preview: None,
metadata: serde_json::json!({}),
},
&[CompositionMemberRecord {
event_id: "older-closed-lane".to_string(),
memory_id: closed,
role: "primary".to_string(),
rank: 0,
trust: Some(0.5),
score: Some(0.4),
preview: None,
metadata: serde_json::json!({}),
}],
&[CompositionOutcomeRecord {
id: "older-closed-outcome".to_string(),
event_id: "older-closed-lane".to_string(),
outcome_type: "closed_by_false_assumption".to_string(),
labeled_at: base_time,
label_source: "test".to_string(),
confidence_delta: Some(-0.3),
notes: None,
metadata: serde_json::json!({}),
}],
)
.unwrap();
storage
.save_composition(
&CompositionEventRecord {
id: "newer-neutral-lane".to_string(),
created_at: base_time + chrono::Duration::seconds(1),
tool: "deep_reference".to_string(),
mode: "release".to_string(),
query: Some("newer neutral lane".to_string()),
query_hash: Some("fnv1a64:newer-neutral".to_string()),
confidence: Some(0.7),
status: Some("resolved".to_string()),
output_preview: None,
metadata: serde_json::json!({}),
},
&[CompositionMemberRecord {
event_id: "newer-neutral-lane".to_string(),
memory_id: neutral,
role: "primary".to_string(),
rank: 0,
trust: Some(0.8),
score: Some(0.8),
preview: None,
metadata: serde_json::json!({}),
}],
&[],
)
.unwrap();
let bounty = execute(
&storage,
Some(serde_json::json!({
"action": "bounty_mode",
"tags": ["project"],
"limit": 1
})),
)
.await
.unwrap();
assert_eq!(
bounty["alreadyComposedLanes"][0]["event"]["id"].as_str(),
Some("newer-neutral-lane")
);
assert_eq!(
bounty["closedDoors"][0]["event"]["id"].as_str(),
Some("older-closed-lane"),
"guardrail buckets should keep scanning after alreadyComposedLanes reaches limit"
);
}
}

View file

@ -20,9 +20,10 @@ use serde::Deserialize;
use serde_json::Value;
use std::sync::Arc;
use tokio::sync::Mutex;
use uuid::Uuid;
use crate::cognitive::CognitiveEngine;
use vestige_core::Storage;
use vestige_core::{CompositionEventRecord, CompositionMemberRecord, Storage};
/// Input schema for deep_reference / cross_reference tool
pub fn schema() -> Value {
@ -509,6 +510,7 @@ pub async fn execute(
"confidence": 0.0,
"guidance": "No memories found. Use smart_ingest to add memories.",
"memoriesAnalyzed": 0,
"compositionWriteStatus": "skipped_empty",
}));
}
@ -820,6 +822,7 @@ pub async fn execute(
"id": s.id,
"preview": s.content.chars().take(200).collect::<String>(),
"trust": (s.trust * 100.0).round() / 100.0,
"relevanceScore": ((composite(s) * 100.0).round() / 100.0),
"date": s.updated_at.to_rfc3339(),
"role": if i == 0 { "primary" } else { "supporting" },
})
@ -925,9 +928,163 @@ pub async fn execute(
response["related_insights"] = serde_json::json!(related_insights);
}
match persist_deep_reference_composition(storage, &args.query, &intent, &response) {
Ok(Some(event_id)) => {
response["composition_event_id"] = serde_json::json!(event_id);
response["compositionWriteStatus"] = serde_json::json!("persisted");
}
Ok(None) => {
response["compositionWriteStatus"] = serde_json::json!("skipped_empty");
}
Err(err) => {
tracing::warn!(
"Failed to persist deep_reference composition event: {}",
err
);
response["compositionWriteStatus"] = serde_json::json!("failed");
}
}
Ok(response)
}
fn persist_deep_reference_composition(
storage: &Arc<Storage>,
query: &str,
intent: &QueryIntent,
response: &Value,
) -> Result<Option<String>, String> {
let event_id = Uuid::new_v4().to_string();
let event = CompositionEventRecord {
id: event_id.clone(),
created_at: Utc::now(),
tool: "deep_reference".to_string(),
mode: "deep_reference".to_string(),
query: Some(query.to_string()),
query_hash: Some(query_hash(query)),
confidence: response.get("confidence").and_then(|v| v.as_f64()),
status: response
.get("status")
.and_then(|v| v.as_str())
.map(ToOwned::to_owned),
output_preview: response
.get("guidance")
.and_then(|v| v.as_str())
.map(|value| preview_text(value, 280)),
metadata: serde_json::json!({
"intent": format!("{:?}", intent),
"memoriesAnalyzed": response.get("memoriesAnalyzed").and_then(|v| v.as_u64()).unwrap_or(0),
"activationExpanded": response.get("activationExpanded").and_then(|v| v.as_u64()).unwrap_or(0),
"reasoningPreview": response.get("reasoning").and_then(|v| v.as_str()).map(|value| preview_text(value, 600)),
}),
};
let mut members = Vec::new();
if let Some(evidence) = response.get("evidence").and_then(|v| v.as_array()) {
for (idx, item) in evidence.iter().enumerate() {
let Some(memory_id) = item.get("id").and_then(|v| v.as_str()) else {
continue;
};
let role = item
.get("role")
.and_then(|v| v.as_str())
.unwrap_or(if idx == 0 { "primary" } else { "supporting" });
members.push(CompositionMemberRecord {
event_id: event_id.clone(),
memory_id: memory_id.to_string(),
role: role.to_string(),
rank: idx as i32,
trust: item.get("trust").and_then(|v| v.as_f64()),
score: item
.get("relevanceScore")
.or_else(|| item.get("relevance_score"))
.and_then(|v| v.as_f64()),
preview: None,
metadata: serde_json::json!({
"roleSource": "deep_reference_evidence",
"evidenceRank": idx,
"date": item.get("date").and_then(|v| v.as_str()),
}),
});
}
}
if let Some(contradictions) = response.get("contradictions").and_then(|v| v.as_array()) {
for (idx, contradiction) in contradictions.iter().enumerate() {
for side in ["stronger", "weaker"] {
let Some(item) = contradiction.get(side) else {
continue;
};
let Some(memory_id) = item.get("id").and_then(|v| v.as_str()) else {
continue;
};
members.push(CompositionMemberRecord {
event_id: event_id.clone(),
memory_id: memory_id.to_string(),
role: "contradicting".to_string(),
rank: idx as i32,
trust: item.get("trust").and_then(|v| v.as_f64()),
score: contradiction.get("topic_overlap").and_then(|v| v.as_f64()),
preview: None,
metadata: serde_json::json!({
"roleSource": "deep_reference_contradiction",
"side": side,
"date": item.get("date").and_then(|v| v.as_str()),
}),
});
}
}
}
if let Some(superseded) = response.get("superseded").and_then(|v| v.as_array()) {
for (idx, item) in superseded.iter().enumerate() {
let Some(memory_id) = item.get("id").and_then(|v| v.as_str()) else {
continue;
};
members.push(CompositionMemberRecord {
event_id: event_id.clone(),
memory_id: memory_id.to_string(),
role: "superseded".to_string(),
rank: idx as i32,
trust: item.get("trust").and_then(|v| v.as_f64()),
score: None,
preview: None,
metadata: serde_json::json!({
"roleSource": "deep_reference_superseded",
"superseded_by": item.get("superseded_by").and_then(|v| v.as_str()),
"date": item.get("date").and_then(|v| v.as_str()),
}),
});
}
}
if members.is_empty() {
return Ok(None);
}
storage
.save_composition(&event, &members, &[])
.map_err(|e| e.to_string())?;
Ok(Some(event_id))
}
fn query_hash(query: &str) -> String {
let mut hash = 0xcbf29ce484222325u64;
for byte in query.as_bytes() {
hash ^= u64::from(*byte);
hash = hash.wrapping_mul(0x100000001b3);
}
format!("fnv1a64:{hash:016x}")
}
fn preview_text(value: &str, max: usize) -> String {
let collapsed = value.replace('\n', " ");
if collapsed.len() <= max {
return collapsed;
}
format!("{}...", &collapsed[..collapsed.floor_char_boundary(max)])
}
// ============================================================================
// TESTS
// ============================================================================
@ -1010,6 +1167,99 @@ mod tests {
);
}
#[tokio::test]
async fn test_deep_reference_persists_composition_event() {
let (storage, _dir) = test_storage().await;
let primary_id = ingest_one(
&storage,
"ProtocolGate control-plane composition tracks global invariant local gate bypasses.",
&["protocolgate", "boundary-scope"],
)
.await;
let supporting_id = ingest_one(
&storage,
"ProtocolGate global invariant local gate research used Aave account-global health factor and route-local validation.",
&["protocolgate", "boundary-scope"],
)
.await;
let result = execute(
&storage,
&test_cognitive(),
Some(serde_json::json!({
"query": "ProtocolGate global invariant local gate",
"depth": 10
})),
)
.await
.expect("execute should succeed");
let event_id = result["composition_event_id"]
.as_str()
.expect("deep_reference should return persisted event id");
assert_eq!(result["compositionWriteStatus"].as_str(), Some("persisted"));
let event = storage
.get_composition_event(event_id)
.unwrap()
.expect("composition event should be stored");
assert_eq!(event.tool, "deep_reference");
assert_eq!(
event.query.as_deref(),
Some("ProtocolGate global invariant local gate")
);
let members = storage.get_composition_members(event_id).unwrap();
assert!(members.iter().any(|member| member.memory_id == primary_id));
assert!(
members
.iter()
.any(|member| member.memory_id == supporting_id)
);
assert!(members.iter().any(|member| member.role == "primary"));
assert!(
members.iter().any(|member| {
member.memory_id == primary_id
&& member.score.is_some()
&& member.metadata["roleSource"] == "deep_reference_evidence"
}),
"persisted members should retain relevance score and role source"
);
}
#[tokio::test]
async fn test_deep_reference_skips_empty_composition_event() {
let (storage, _dir) = test_storage().await;
let result = execute(
&storage,
&test_cognitive(),
Some(serde_json::json!({
"query": "no memories exist for this query",
"depth": 10
})),
)
.await
.expect("execute should succeed");
assert_eq!(
result["compositionWriteStatus"].as_str(),
Some("skipped_empty")
);
assert!(
result.get("composition_event_id").is_none(),
"empty evidence should not create a composition event"
);
assert!(
storage
.get_recent_composition_events(10)
.unwrap()
.is_empty(),
"ledger should stay empty when no memories participated"
);
}
// ========================================================================
// Confidence sanity: must vary with query relevance.
// ========================================================================

View file

@ -41,6 +41,7 @@ pub mod graph;
pub mod health;
// v2.1: Cross-reference (connect the dots)
pub mod composed_graph;
pub mod contradictions;
pub mod cross_reference;

159
docs/COMPOSED_GRAPH.md Normal file
View file

@ -0,0 +1,159 @@
# ComposedGraph
ComposedGraph records memory combinations as durable reasoning events.
Most memory systems store facts, entities, or relationships. ComposedGraph stores a
different object: which memories were used together, why they were used, and what
happened afterward.
## Model
`composition_events` stores the reasoning envelope:
- tool and mode, such as `deep_reference` or `bounty`
- query and query hash
- confidence, status, and output preview
- metadata for intent, analyzed memory count, activation expansion, and reasoning preview
`composition_members` stores the participating memories:
- memory id
- role, such as `primary`, `supporting`, `contradicting`, or `superseded`
- rank, trust, relevance score, preview, and metadata
`composition_outcomes` stores later labels:
- `helpful`
- `dead_end`
- `submitted`
- `accepted`
- `rejected`
- `duplicate_risk`
- `needs_poc`
- `bad_severity`
- `user_promoted`
- `user_demoted`
- `closed_by_scope`
- `closed_by_duplicate`
- `closed_by_false_assumption`
- `closed_by_user`
- `expired_lane`
Member memory ids are intentionally historical references, not foreign keys into
`knowledge_nodes`. Purging or superseding a memory should not erase the fact that
it once participated in a reasoning path.
## MCP Tool
Use `composed_graph` for read/write access to the composition ledger.
```json
{ "action": "recent", "limit": 10 }
```
```json
{ "action": "get", "event_id": "<composition-event-id>" }
```
```json
{ "action": "memory", "memory_id": "<memory-id>", "limit": 10 }
```
```json
{ "action": "neighbors", "memory_id": "<memory-id>", "limit": 10 }
```
```json
{ "action": "never_composed", "tags": ["project:vestige"], "limit": 10 }
```
```json
{
"action": "label",
"event_id": "<composition-event-id>",
"outcome_type": "helpful",
"notes": "This combination led to the accepted fix."
}
```
## Never-Composed Frontier
`never_composed` returns pairs that have not yet appeared together in a
composition event.
The ranking is intentionally not just shared-tag matching. It combines:
- exact shared tags
- shared meaningful content terms
- boundary tags such as `boundary-*`, `oracle`, `queue`, `settlement`, `upgrade`,
`pause`, `accounting`, or `scope`
- node-type diversity
- FSRS retention strength
- composition novelty, so memories that have not already been heavily composed
still get surfaced
- prior composition outcomes from either member, so previously accepted,
duplicate-risk, or dead-end lanes shape the frontier without hiding it
Each candidate includes:
- `score`
- `noveltyScore`
- `bridgeScore`
- `trustScore`
- `outcomeScoreAdjustment`
- `sharedTags`
- `boundaryTags`
- `sharedTerms`
- `priorOutcomes`
- `outcomeSignal`, such as `clean`, `prior_success`, `prior_duplicate_risk`,
`prior_closed_door`, or `mixed_prior_outcomes`
- node types
- previews
- a short reason
- a `compositionQuestion` that an agent can answer before taking action
The output is a frontier queue, not a finding. A never-composed pair means
"worth investigating," not "true," "novel," or "reportable."
Prior outcomes are also guardrails, not verdicts: a duplicate-risk signal should
make the agent check duplicate families first, while a success signal should make
it inspect why the older composition worked.
Closed-door labels should be specific when possible. Prefer `closed_by_scope`,
`closed_by_duplicate`, `closed_by_false_assumption`, `closed_by_user`, or
`expired_lane` over a generic `dead_end` when the reason is known.
## Bounty / Research Mode
`bounty_mode` is a higher-level read shape for investigative workflows. It returns:
- recent already-composed lanes
- never-composed lanes
- closed doors
- duplicate-risk lanes
- lanes that need proof-of-concept work
- top weird combinations
This is useful for security research, bug triage, architecture work, and product
strategy because failed or duplicate compositions are preserved instead of being
rediscovered repeatedly.
## Deep Reference Integration
`deep_reference` persists composition events automatically when it has evidence
members. Empty evidence does not create a ledger event.
The response includes:
- `composition_event_id` when persisted
- `compositionWriteStatus`, usually `persisted` or `skipped_empty`
## Design Direction
The next useful upgrades are:
- triple or n-ary candidate mining, not only pairs
- structural-fit scoring for analogies, separate from surface similarity
- trust-zone scoring so a composition is limited by its weakest provenance
- temporal replay: "what combinations were available when this decision was made?"
- evaluation tasks where success requires combining memories that were never
previously co-composed

View file

@ -12,6 +12,8 @@ instead of opaque. The current schema is `vestige.sanhedrin.receipt.v1`.
- Appeals: `~/.vestige/sanhedrin/appeals.jsonl`
- Fail-open events: `~/.vestige/sanhedrin/fail-open.jsonl`
Optional companion schema: [`SANHEDRIN_TEST_INTEGRITY_DELTAS.md`](SANHEDRIN_TEST_INTEGRITY_DELTAS.md) describes mechanical deltas for cases where a verifier command passed but the test artifact changed after implementation.
## v1 JSON Shape
```json

View file

@ -0,0 +1,110 @@
# Sanhedrin Test-Integrity Delta Receipts
Receipt Lock proves a narrower claim: a verification command actually ran and
succeeded. Test-integrity deltas are an optional companion receipt for the
stronger claim that the tests still mean what the draft says they mean.
This receipt is intentionally mechanical. It is not a broad correctness oracle
and it does not ask a second model to decide whether the implementation is good.
It records whether the verification artifact changed in ways that should
upgrade, downgrade, or send the verification claim to human review.
## Boundary
Keep these claims separate:
1. **Command receipt:** `cargo test`, `npm test`, `pytest`, or another verifier
command ran after the relevant edit and exited successfully.
2. **Test-integrity delta:** the tests/specs behind that verifier were not
removed, skipped, weakened, or replaced after implementation in a way that
makes the green result less admissible.
A run can have a valid command receipt and still receive a downgraded
integrity decision.
## Optional JSON Shape
```json
{
"schema": "vestige.sanhedrin.test_integrity_delta.v1",
"id": "tid_<stable hash>",
"commandReceiptId": "receipt_<stable hash>",
"verificationClaim": "All tests passed.",
"specSource": {
"contextId": "spec_ctx_04",
"testFiles": [
{
"path": "tests/cart.test.ts",
"hashBeforeImplementation": "sha256:...",
"hashAfterVerification": "sha256:..."
}
]
},
"implementationContext": "impl_ctx_09",
"verifierContext": "verify_ctx_02",
"delta": {
"testFilesChangedAfterImplementation": true,
"removedOrDisabledTests": [
{
"kind": "skip_or_only",
"path": "tests/cart.test.ts",
"line": 42
}
],
"removedAssertions": 2,
"weakenedExpectations": [
{
"path": "tests/cart.test.ts",
"from": "throws InvalidCouponError",
"to": "does not throw"
}
],
"snapshotChurnWithoutSourceChange": false,
"coverageDelta": -3.8,
"mocksReplacingRealBoundary": [
{
"module": "PaymentGateway",
"before": "integration-ish fake",
"after": "empty stub"
}
]
},
"freshVerifier": {
"commandReceiptId": "receipt_<stable hash>",
"exitCode": 0,
"checkedAfterLastRelevantEdit": true
},
"decision": "downgraded",
"reason": "tests passed, but the tests were weakened after implementation"
}
```
## Decisions
- `accepted` — a verifier command succeeded after the last relevant edit and no
integrity downgrade was detected.
- `downgraded` — the command succeeded, but the tests/specs changed in a way
that makes the verification claim weaker than stated.
- `needs_human_review` — the delta may be legitimate, but a local mechanical
check cannot safely classify it. Snapshot updates are a common example.
## Minimal Fixture Suite
These cases are small enough to live as fixtures without turning Sanhedrin into
a correctness judge.
| Case | Input pattern | Expected decision | Why |
| --- | --- | --- | --- |
| unchanged-good | implementation changes source; tests unchanged; fresh verifier succeeds | `accepted` | Green tests are supported by a fresh command receipt and unchanged test artifact. |
| skipped-test | implementation adds `.skip`, `.only`, `#[ignore]`, or equivalent before verifier succeeds | `downgraded` | The command ran, but the claim no longer represents the original test obligation. |
| weakened-assertion | expectation is relaxed after implementation, e.g. `throws InvalidCouponError` -> `does not throw` | `downgraded` | The verifier passed against a weaker assertion than the one available before implementation. |
| justified-snapshot | snapshot changes alongside an intentional source/UI change | `needs_human_review` or `accepted` by policy | Snapshot churn can be valid, but the receipt should make the policy decision explicit. |
## Non-goals
- Do not infer whether the implementation is correct in the world.
- Do not require full semantic diffing before Receipt Lock can operate.
- Do not treat staged evidence or a model explanation as equivalent to a fresh
command receipt.
- Do not block every test edit. The goal is to keep the verification claim
honest when the test artifact changed after implementation.

38
tests/phase_1/Cargo.toml Normal file
View file

@ -0,0 +1,38 @@
[package]
name = "vestige-phase-1-tests"
version = "0.0.1"
edition = "2024"
publish = false
[dependencies]
vestige-core = { path = "../../crates/vestige-core" }
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
tempfile = "3"
uuid = { version = "1", features = ["v4"] }
chrono = "0.4"
serde_json = "1"
rusqlite = { version = "0.38", features = ["bundled"] }
[[test]]
name = "trait_round_trip"
path = "trait_round_trip.rs"
[[test]]
name = "embedding_model_registry"
path = "embedding_model_registry.rs"
[[test]]
name = "domain_column_migration"
path = "domain_column_migration.rs"
[[test]]
name = "cognitive_module_isolation"
path = "cognitive_module_isolation.rs"
[[test]]
name = "send_bound_variant"
path = "send_bound_variant.rs"
[[test]]
name = "embedder_trait"
path = "embedder_trait.rs"

View file

@ -0,0 +1,143 @@
//! Phase 1 integration tests: cognitive modules compile against Arc<dyn MemoryStore>.
//! The key goal is a compile-time gate: if any module still typed against
//! SqliteMemoryStore concretely, this would fail to compile.
use chrono::Utc;
use std::sync::Arc;
use tempfile::tempdir;
use uuid::Uuid;
use vestige_core::storage::{MemoryEdge, MemoryRecord, MemoryStore, SqliteMemoryStore};
fn make_store() -> Arc<dyn MemoryStore> {
let dir = tempdir().unwrap();
let db = dir.path().join("test.db");
std::mem::forget(dir);
Arc::new(SqliteMemoryStore::new(Some(db)).expect("create"))
}
fn make_record(content: &str) -> MemoryRecord {
MemoryRecord {
id: Uuid::new_v4(),
domains: vec![],
domain_scores: Default::default(),
content: content.to_string(),
node_type: "fact".to_string(),
tags: vec!["isolation-test".to_string()],
embedding: None,
created_at: Utc::now(),
updated_at: Utc::now(),
metadata: serde_json::json!({}),
}
}
/// Ensure the store: Arc<dyn MemoryStore> call pattern compiles and runs through
/// a representative method from every cognitive module group.
#[tokio::test]
async fn all_modules_compile_against_dyn_store() {
let store: Arc<dyn MemoryStore> = make_store();
// CRUD via trait
let rec = make_record("cognitive isolation test");
let id = store.insert(&rec).await.expect("insert via dyn trait");
let got = store
.get(id)
.await
.expect("get via dyn trait")
.expect("exists");
assert_eq!(got.content, "cognitive isolation test");
// Graph edges via trait
let rec2 = make_record("linked node");
let id2 = store.insert(&rec2).await.expect("insert 2");
store
.add_edge(&MemoryEdge {
source_id: id,
target_id: id2,
edge_type: "semantic".to_string(),
weight: 0.8,
created_at: Utc::now(),
})
.await
.expect("add_edge via dyn trait");
let edges = store
.get_edges(id, None)
.await
.expect("get_edges via dyn trait");
assert!(!edges.is_empty());
// Search via trait
let results = store
.fts_search("cognitive", 5)
.await
.expect("fts_search via dyn trait");
assert!(!results.is_empty());
// Stats and count via trait
let count = store.count().await.expect("count via dyn trait");
assert!(count >= 2);
let stats = store.get_stats().await.expect("get_stats via dyn trait");
assert!(stats.total_memories >= 2);
}
#[tokio::test]
async fn spreading_activation_traverses_via_trait() {
let store: Arc<dyn MemoryStore> = make_store();
let rec_a = make_record("spreading activation source");
let rec_b = make_record("spreading activation neighbor");
let id_a = rec_a.id;
let id_b = rec_b.id;
store.insert(&rec_a).await.expect("insert a");
store.insert(&rec_b).await.expect("insert b");
store
.add_edge(&MemoryEdge {
source_id: id_a,
target_id: id_b,
edge_type: "semantic".to_string(),
weight: 0.9,
created_at: Utc::now(),
})
.await
.expect("add edge");
// get_neighbors simulates the spreading activation traversal path
let neighbors = store.get_neighbors(id_a, 1).await.expect("get_neighbors");
let ids: Vec<Uuid> = neighbors.iter().map(|(r, _)| r.id).collect();
assert!(ids.contains(&id_a));
assert!(ids.contains(&id_b));
}
#[tokio::test]
async fn synaptic_tagging_consumes_records_via_trait() {
// Build a MemoryRecord from trait-returned data and exercise the
// SynapticTaggingSystem pipeline (constructing CapturedMemory from store data).
let store: Arc<dyn MemoryStore> = make_store();
let rec = make_record("synaptic tagging test memory");
let id = store.insert(&rec).await.expect("insert");
let got = store.get(id).await.expect("get").expect("exists");
// The important thing is we got a MemoryRecord back from the dyn trait;
// SynapticTaggingSystem would take this record as input.
assert_eq!(got.id, id);
assert!(!got.content.is_empty());
}
#[tokio::test]
async fn hippocampal_index_built_from_store() {
// Exercise the fts_search -> HippocampalIndex indexing path.
let store: Arc<dyn MemoryStore> = make_store();
for i in 0..5usize {
let rec = make_record(&format!("hippocampal indexing topic {i}"));
store.insert(&rec).await.expect("insert");
}
let results = store
.fts_search("hippocampal indexing", 10)
.await
.expect("fts_search");
// Verify we get results and they have the correct fields
assert!(!results.is_empty());
for r in &results {
assert!(!r.record.content.is_empty());
assert!(r.score >= 0.0);
}
}

View file

@ -0,0 +1,161 @@
//! Phase 1 integration tests: domain column migration and schema upgrade.
use std::sync::Arc;
use tempfile::tempdir;
use uuid::Uuid;
use vestige_core::storage::{MemoryRecord, MemoryStore, SqliteMemoryStore};
#[tokio::test]
async fn fresh_db_has_v16_schema() {
let dir = tempdir().unwrap();
let db = dir.path().join("fresh.db");
let _store = SqliteMemoryStore::new(Some(db.clone())).expect("create");
// Open a raw connection and check pragma
let conn = rusqlite::Connection::open(&db).expect("open");
let cols: Vec<String> = {
let mut stmt = conn.prepare("PRAGMA table_info(knowledge_nodes)").unwrap();
stmt.query_map([], |row| row.get::<_, String>(1))
.unwrap()
.map(|r| r.unwrap())
.collect()
};
assert!(
cols.contains(&"domains".to_string()),
"domains column must exist: {:?}",
cols
);
assert!(
cols.contains(&"domain_scores".to_string()),
"domain_scores column must exist"
);
}
#[tokio::test]
async fn v11_db_upgrades_cleanly() {
use vestige_core::storage::MIGRATIONS;
let dir = tempdir().unwrap();
let db = dir.path().join("v11.db");
// Create DB with V11 migrations only
{
let conn = rusqlite::Connection::open(&db).expect("open");
for m in MIGRATIONS.iter().filter(|m| m.version <= 11) {
conn.execute_batch(m.up).expect("apply migration");
}
// Insert 5 rows under V11 schema
for i in 0..5usize {
conn.execute(
"INSERT INTO knowledge_nodes (id, content, node_type, created_at, updated_at, \
last_accessed, stability, difficulty, reps, lapses, learning_state, \
storage_strength, retrieval_strength, retention_strength, \
next_review, scheduled_days, has_embedding) \
VALUES (?1, ?2, 'fact', datetime('now'), datetime('now'), datetime('now'), \
1.0, 0.3, 0, 0, 'new', 1.0, 1.0, 1.0, datetime('now'), 1, 0)",
rusqlite::params![format!("pre-v16-{i}"), format!("content {i}"),],
)
.expect("insert pre-v16 row");
}
}
// Upgrade by opening through SqliteMemoryStore (triggers full migration)
let _store = SqliteMemoryStore::new(Some(db.clone())).expect("open with v16");
// Check all 5 rows have empty domains/domain_scores
let conn = rusqlite::Connection::open(&db).expect("open raw");
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM knowledge_nodes WHERE domains='[]' AND domain_scores='{}'",
[],
|row| row.get(0),
)
.expect("count");
assert_eq!(
count, 5,
"all pre-v16 rows must have empty domains/domain_scores"
);
}
#[tokio::test]
async fn empty_domains_serialize_as_brackets() {
let dir = tempdir().unwrap();
let db = dir.path().join("empty_domains.db");
let store = SqliteMemoryStore::new(Some(db.clone())).expect("create");
let rec = MemoryRecord {
id: Uuid::new_v4(),
domains: vec![],
domain_scores: Default::default(),
content: "test content".to_string(),
node_type: "fact".to_string(),
tags: vec![],
embedding: None,
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),
metadata: serde_json::json!({}),
};
store.insert(&rec).await.expect("insert");
// Check raw sqlite value
let conn = rusqlite::Connection::open(&db).expect("open raw");
let (domains, domain_scores): (String, String) = conn
.query_row(
"SELECT domains, domain_scores FROM knowledge_nodes LIMIT 1",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.expect("query");
assert_eq!(
domains, "[]",
"empty domains should store as '[]', not NULL"
);
assert_eq!(
domain_scores, "{}",
"empty domain_scores should store as '{{}}'"
);
}
#[tokio::test]
async fn populated_domains_round_trip() {
let dir = tempdir().unwrap();
let db = dir.path().join("populated.db");
let store: Arc<dyn MemoryStore> = Arc::new(SqliteMemoryStore::new(Some(db)).expect("create"));
let mut rec = MemoryRecord {
id: Uuid::new_v4(),
domains: vec!["dev".to_string(), "infra".to_string()],
domain_scores: {
let mut m = std::collections::HashMap::new();
m.insert("dev".to_string(), 0.82);
m.insert("infra".to_string(), 0.71);
m
},
content: "populated domains test".to_string(),
node_type: "fact".to_string(),
tags: vec![],
embedding: None,
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),
metadata: serde_json::json!({}),
};
let id = store.insert(&rec).await.expect("insert");
// Update the domains via update()
rec.id = id;
store.update(&rec).await.expect("update with domains");
// Read back and verify
let got = store.get(id).await.expect("get").expect("exists");
let mut expected_domains = got.domains.clone();
expected_domains.sort();
assert_eq!(expected_domains, vec!["dev", "infra"]);
assert!((got.domain_scores["dev"] - 0.82).abs() < 0.001);
assert!((got.domain_scores["infra"] - 0.71).abs() < 0.001);
}
#[tokio::test]
async fn domains_table_exists() {
let dir = tempdir().unwrap();
let db = dir.path().join("domains_table.db");
let _store = SqliteMemoryStore::new(Some(db.clone())).expect("create");
let conn = rusqlite::Connection::open(&db).expect("open raw");
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='domains'",
[],
|row| row.get(0),
)
.expect("query");
assert_eq!(count, 1, "domains table must exist after V16 migration");
}

View file

@ -0,0 +1,43 @@
//! Phase 1 integration tests: Embedder trait and FastembedEmbedder.
use std::sync::Arc;
use tempfile::tempdir;
use vestige_core::embedder::{Embedder, FastembedEmbedder};
use vestige_core::storage::MemoryStore;
use vestige_core::storage::SqliteMemoryStore;
fn make_store() -> Arc<dyn MemoryStore> {
let dir = tempdir().unwrap();
let db = dir.path().join("test.db");
std::mem::forget(dir);
Arc::new(SqliteMemoryStore::new(Some(db)).expect("create"))
}
#[tokio::test]
async fn fastembed_implements_embedder_trait() {
// The key test: `Box<dyn Embedder>` compiles
let e: Box<dyn Embedder> = Box::new(FastembedEmbedder::new());
assert_eq!(e.dimension(), 256, "dimension must be 256");
assert!(!e.model_name().is_empty(), "model_name must not be empty");
assert!(!e.model_hash().is_empty(), "model_hash must not be empty");
assert_eq!(e.model_hash().len(), 64, "hash must be 64 hex chars");
}
#[tokio::test]
async fn signature_matches_memory_store_registry() {
let e = FastembedEmbedder::new();
let sig = e.signature();
let store = make_store();
store
.register_model(&sig)
.await
.expect("register via Embedder::signature");
let got = store
.registered_model()
.await
.expect("registered_model")
.expect("Some");
assert_eq!(got.name, sig.name);
assert_eq!(got.dimension, sig.dimension);
assert_eq!(got.hash, sig.hash);
}

View file

@ -0,0 +1,148 @@
//! Phase 1 integration tests: embedding model registry.
use std::sync::Arc;
use tempfile::tempdir;
use uuid::Uuid;
use vestige_core::storage::{
MemoryRecord, MemoryStore, MemoryStoreError, ModelSignature, SqliteMemoryStore,
};
fn make_store() -> Arc<dyn MemoryStore> {
let dir = tempdir().unwrap();
let db = dir.path().join("test.db");
std::mem::forget(dir);
let store = SqliteMemoryStore::new(Some(db)).expect("create store");
Arc::new(store)
}
fn sig_a() -> ModelSignature {
ModelSignature {
name: "model-a".to_string(),
dimension: 256,
hash: "a".repeat(64),
}
}
fn sig_b() -> ModelSignature {
ModelSignature {
name: "model-b".to_string(),
dimension: 256,
hash: "b".repeat(64),
}
}
fn record_without_embedding() -> MemoryRecord {
MemoryRecord {
id: Uuid::new_v4(),
domains: vec![],
domain_scores: Default::default(),
content: "plain text memory".to_string(),
node_type: "fact".to_string(),
tags: vec![],
embedding: None,
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),
metadata: serde_json::json!({}),
}
}
#[tokio::test]
async fn first_embedded_insert_auto_registers() {
// fresh store; register a model, then check registered_model() returns Some
let store = make_store();
let sig = sig_a();
store.register_model(&sig).await.expect("register");
let got = store.registered_model().await.expect("registered_model");
assert_eq!(got, Some(sig));
}
#[tokio::test]
async fn second_insert_with_same_signature_succeeds() {
let store = make_store();
let sig = sig_a();
store.register_model(&sig).await.expect("first register");
store
.register_model(&sig)
.await
.expect("second register idempotent");
}
#[tokio::test]
async fn second_insert_with_different_dimension_refused() {
let store = make_store();
let sig = sig_a(); // dim 256
store.register_model(&sig).await.expect("register 256");
// Try inserting a 512-dim vector into a store registered for 256
let mut rec = record_without_embedding();
rec.embedding = Some(vec![0.0f32; 512]);
rec.metadata = serde_json::json!({
"model_name": "model-a",
"model_dim": 256_u64,
"model_hash": "a".repeat(64),
});
let err = store.insert(&rec).await.unwrap_err();
assert!(
matches!(err, MemoryStoreError::InvalidInput(_)),
"expected InvalidInput for dim mismatch, got {:?}",
err
);
}
#[tokio::test]
async fn second_insert_with_different_model_name_refused() {
let store = make_store();
store.register_model(&sig_a()).await.expect("register a");
let err = store.register_model(&sig_b()).await.unwrap_err();
assert!(
matches!(err, MemoryStoreError::ModelMismatch { .. }),
"expected ModelMismatch, got {:?}",
err
);
}
#[tokio::test]
async fn second_insert_with_different_hash_refused() {
let store = make_store();
let sig = sig_a();
store.register_model(&sig).await.expect("register");
let sig_diff_hash = ModelSignature {
name: "model-a".to_string(),
dimension: 256,
hash: "c".repeat(64), // different hash
};
let err = store.register_model(&sig_diff_hash).await.unwrap_err();
assert!(
matches!(err, MemoryStoreError::ModelMismatch { .. }),
"expected ModelMismatch for different hash, got {:?}",
err
);
}
#[tokio::test]
async fn no_embedding_insert_allowed_before_registration() {
let store = make_store();
// registered_model() should be None
assert!(
store
.registered_model()
.await
.expect("registered_model")
.is_none()
);
// A plain text memory without an embedding must insert successfully
let rec = record_without_embedding();
store
.insert(&rec)
.await
.expect("plain insert before registration");
}
#[tokio::test]
async fn stats_reports_registered_model_after_first_write() {
let store = make_store();
let sig = sig_a();
store.register_model(&sig).await.expect("register");
let stats = store.get_stats().await.expect("stats");
assert_eq!(stats.registered_model_name, Some("model-a".to_string()));
assert_eq!(stats.registered_model_dim, Some(256));
}

View file

@ -0,0 +1,99 @@
//! Phase 1 integration tests: Arc<dyn MemoryStore> moves across tokio::spawn.
//!
//! This verifies that `#[trait_variant::make(MemoryStore: Send)]` actually
//! produces a Send-bound future so Arc<dyn MemoryStore> is movable.
use chrono::Utc;
use std::sync::Arc;
use tempfile::tempdir;
use uuid::Uuid;
use vestige_core::storage::{MemoryRecord, MemoryStore, SqliteMemoryStore};
fn make_store() -> Arc<dyn MemoryStore> {
let dir = tempdir().unwrap();
let db = dir.path().join("send_test.db");
std::mem::forget(dir);
Arc::new(SqliteMemoryStore::new(Some(db)).expect("create"))
}
fn make_record(content: &str) -> MemoryRecord {
MemoryRecord {
id: Uuid::new_v4(),
domains: vec![],
domain_scores: Default::default(),
content: content.to_string(),
node_type: "fact".to_string(),
tags: vec![],
embedding: None,
created_at: Utc::now(),
updated_at: Utc::now(),
metadata: serde_json::json!({}),
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn arc_dyn_memory_store_moves_across_tokio_tasks() {
let store: Arc<dyn MemoryStore> = make_store();
let mut handles = Vec::new();
for t in 0..16usize {
let store = Arc::clone(&store);
let handle = tokio::spawn(async move {
for i in 0..10usize {
let rec = make_record(&format!("task {t} memory {i}"));
store.insert(&rec).await.expect("insert in spawned task");
}
});
handles.push(handle);
}
for h in handles {
h.await.expect("task completed without panic");
}
let count = store.count().await.expect("count");
assert_eq!(count, 160, "all 16*10 inserts must be counted");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn concurrent_readers_one_writer() {
let store: Arc<dyn MemoryStore> = make_store();
// Pre-populate with some data so readers have something to find
for i in 0..10usize {
let rec = make_record(&format!("concurrent reader memory {i}"));
store.insert(&rec).await.expect("pre-insert");
}
let mut handles = Vec::new();
// 32 concurrent readers
for _ in 0..32usize {
let store = Arc::clone(&store);
let handle = tokio::spawn(async move {
let results = store.fts_search("concurrent reader", 5).await;
// Should not panic even if results vary due to concurrent writes
results.expect("fts_search in concurrent reader");
});
handles.push(handle);
}
// 1 writer inserting more records
{
let store = Arc::clone(&store);
let writer_handle = tokio::spawn(async move {
for i in 0..20usize {
let rec = make_record(&format!("writer record {i}"));
store.insert(&rec).await.expect("concurrent insert");
}
});
handles.push(writer_handle);
}
for h in handles {
h.await.expect("no panics");
}
// Eventual consistency check: total count should be at least 10 (initial)
let count = store.count().await.expect("final count");
assert!(
count >= 10,
"at least the pre-populated records must persist"
);
}

View file

@ -0,0 +1,217 @@
//! Phase 1 integration tests: round-trip of every trait method through SqliteMemoryStore.
use chrono::Utc;
use std::sync::Arc;
use tempfile::tempdir;
use uuid::Uuid;
use vestige_core::storage::{
MemoryEdge, MemoryRecord, MemoryStore, SearchQuery, SqliteMemoryStore,
};
fn make_store() -> Arc<dyn MemoryStore> {
let dir = tempdir().unwrap();
let db = dir.path().join("test.db");
// keep the dir alive by leaking it -- this is fine for tests
std::mem::forget(dir);
let store = SqliteMemoryStore::new(Some(db)).expect("create store");
Arc::new(store)
}
fn make_record(content: &str) -> MemoryRecord {
MemoryRecord {
id: Uuid::new_v4(),
domains: vec![],
domain_scores: Default::default(),
content: content.to_string(),
node_type: "fact".to_string(),
tags: vec!["integration".to_string()],
embedding: None,
created_at: Utc::now(),
updated_at: Utc::now(),
metadata: serde_json::json!({}),
}
}
#[tokio::test]
async fn insert_get_update_delete() {
let store = make_store();
let rec = make_record("round-trip CRUD test");
let id = rec.id;
store.insert(&rec).await.expect("insert");
let got = store.get(id).await.expect("get").expect("exists");
assert_eq!(got.content, "round-trip CRUD test");
assert_eq!(got.node_type, "fact");
assert!(got.domains.is_empty());
assert!(got.domain_scores.is_empty());
let mut updated = got;
updated.content = "updated content".to_string();
store.update(&updated).await.expect("update");
let after_update = store
.get(id)
.await
.expect("get after update")
.expect("exists");
assert_eq!(after_update.content, "updated content");
store.delete(id).await.expect("delete");
let after_delete = store.get(id).await.expect("get after delete");
assert!(after_delete.is_none());
}
#[tokio::test]
async fn scheduling_upsert_and_due_scan() {
use vestige_core::storage::SchedulingState;
let store = make_store();
for i in 0..3usize {
let rec = make_record(&format!("sched memory {i}"));
let id = rec.id;
store.insert(&rec).await.expect("insert");
let next_review = Utc::now() - chrono::Duration::days((i as i64) + 1);
let state = SchedulingState {
memory_id: id,
stability: 1.0,
difficulty: 0.3,
retrievability: 0.7,
last_review: Some(Utc::now()),
next_review: Some(next_review),
reps: 1,
lapses: 0,
};
store
.update_scheduling(&state)
.await
.expect("update scheduling");
}
let due = store
.get_due_memories(Utc::now(), 10)
.await
.expect("get_due_memories");
assert_eq!(due.len(), 3, "all 3 should be due");
}
#[tokio::test]
async fn edge_crud() {
let store = make_store();
let rec_a = make_record("edge node A");
let rec_b = make_record("edge node B");
let id_a = rec_a.id;
let id_b = rec_b.id;
store.insert(&rec_a).await.expect("insert a");
store.insert(&rec_b).await.expect("insert b");
let edge = MemoryEdge {
source_id: id_a,
target_id: id_b,
edge_type: "semantic".to_string(),
weight: 0.85,
created_at: Utc::now(),
};
store.add_edge(&edge).await.expect("add edge");
let edges = store.get_edges(id_a, None).await.expect("get edges");
assert!(!edges.is_empty());
store.remove_edge(id_a, id_b).await.expect("remove edge");
let after = store.get_edges(id_a, None).await.expect("get edges after");
assert!(after.is_empty());
}
#[tokio::test]
async fn count_and_stats_track_inserts() {
let store = make_store();
for i in 0..10usize {
let rec = make_record(&format!("stats memory {i}"));
store.insert(&rec).await.expect("insert");
}
assert_eq!(store.count().await.expect("count"), 10);
let stats = store.get_stats().await.expect("stats");
assert_eq!(stats.total_memories, 10);
}
#[tokio::test]
async fn vacuum_after_deletes_reclaims() {
let dir = tempdir().unwrap();
let db = dir.path().join("vacuum_test.db");
let store = SqliteMemoryStore::new(Some(db)).expect("create store");
let store: Arc<dyn MemoryStore> = Arc::new(store);
let mut ids = Vec::new();
for i in 0..50usize {
let rec = make_record(&format!("vacuum memory {i}"));
let id = store.insert(&rec).await.expect("insert");
ids.push(id);
}
for id in &ids[..40] {
store.delete(*id).await.expect("delete");
}
// vacuum should not error
store.vacuum().await.expect("vacuum");
}
#[tokio::test]
async fn list_domains_empty_then_upsert_then_delete() {
use vestige_core::storage::Domain;
let store = make_store();
let domains = store.list_domains().await.expect("list empty");
assert!(domains.is_empty());
let d = Domain {
id: "test-domain".to_string(),
label: "Test Domain".to_string(),
centroid: vec![0.1f32, 0.2, 0.3],
top_terms: vec!["term1".to_string()],
memory_count: 5,
created_at: Utc::now(),
};
store.upsert_domain(&d).await.expect("upsert domain");
let after = store.list_domains().await.expect("list after upsert");
assert_eq!(after.len(), 1);
assert_eq!(after[0].id, "test-domain");
store
.delete_domain("test-domain")
.await
.expect("delete domain");
let after_delete = store.list_domains().await.expect("list after delete");
assert!(after_delete.is_empty());
}
#[tokio::test]
async fn classify_with_no_domains_returns_empty() {
let store = make_store();
let result = store.classify(&[0.1f32, 0.2, 0.3]).await.expect("classify");
assert!(result.is_empty());
}
#[tokio::test]
async fn search_hybrid_returns_results() {
let store = make_store();
let rec = make_record("quantum entanglement superposition physics");
store.insert(&rec).await.expect("insert");
// Verify fts_search works first (sanity check)
let fts_results = store.fts_search("quantum", 10).await.expect("fts_search");
assert!(
!fts_results.is_empty(),
"fts_search must find 'quantum' after insert"
);
let query = SearchQuery {
text: Some("quantum physics".to_string()),
limit: 10,
..Default::default()
};
let results = store.search(&query).await.expect("search");
// FTS results should include our inserted record
assert!(
!results.is_empty(),
"search must return results for 'quantum physics'"
);
assert!(results[0].score >= 0.0);
}