mirror of
https://github.com/samvallad33/vestige.git
synced 2026-06-20 21:18:08 +02:00
Introduce two trait boundaries that the rest of the stack now sits above,
landing Phase 1 of ADR 0001 (pluggable storage and network access).
Rebased onto v2.1.22 Sanhedrin from the original April work.
MemoryStore / LocalMemoryStore (crates/vestige-core/src/storage/memory_store.rs):
One trait, ~25 methods, covering CRUD, hybrid / FTS / vector search,
FSRS scheduling, graph edges, and the forthcoming domain surface.
trait_variant::make generates a Send-bound MemoryStore alias over the
base LocalMemoryStore so Arc<dyn MemoryStore> works under tokio/axum.
Storage errors map through a dedicated MemoryStoreError.
Embedder / LocalEmbedder (crates/vestige-core/src/embedder/):
Pluggable text-to-vector encoder. FastembedEmbedder wraps the existing
EmbeddingService; storage never calls fastembed directly anymore.
Embedder::signature() produces the ModelSignature consumed by the
store's embedding_model registry.
SqliteMemoryStore (crates/vestige-core/src/storage/sqlite.rs):
Storage renamed to SqliteMemoryStore; the old name lives on as a
pub type alias so Arc<Storage> consumers in vestige-mcp stay intact.
All existing inherent methods are untouched; the trait impl is
purely additive and dispatches into them. The db_path field added
by v2.1.1 portable-sync is preserved.
Migration V14 (crates/vestige-core/src/storage/migrations.rs):
Renumbered from V12 (the original April number) to V14 to slot in
cleanly after upstream's V12 (v2.1.1 sync_tombstones) and V13
(v2.1.2 purge tombstones).
- embedding_model registry table (CHECK id = 1, code enforces the
single-row invariant).
- knowledge_nodes.domains / domain_scores TEXT columns (JSON arrays
default '[]' / '{}'), domains catalogue table, supporting indexes.
Phase 4 populates these columns; Phase 1 just exposes the schema.
Consolidation and other cognitive pathways now accept a
&dyn LocalMemoryStore (sync) or Arc<dyn MemoryStore> (async) rather
than a concrete Storage.
Tests:
- trait-method unit tests colocated in sqlite.rs and migrations.rs
- embedder/fastembed.rs tests for name/dimension/hash stability
- new integration crate tests/phase_1 (added to workspace members):
trait_round_trip (8), embedding_model_registry (7),
domain_column_migration (5), cognitive_module_isolation (4),
send_bound_variant (2), embedder_trait (2).
Acceptance gate post-rebase:
- cargo build --workspace --all-targets: ok
- cargo clippy --workspace --all-targets -- -D warnings: clean
- cargo test -p vestige-core --lib: 428 pass
- cargo test -p vestige-phase-1-tests: 28 pass
- cargo test -p vestige-mcp --lib: 380 pass (Storage alias preserves
every existing call site)
Co-existence with v2.1.1 portable-sync: this trait extraction is
additive. Portable-sync's tombstone migrations (V12, V13) remain
on the concrete SqliteMemoryStore; Phase 2 (Postgres) will decide
which of those surfaces graduate into the trait.
148 lines
4.3 KiB
Rust
148 lines
4.3 KiB
Rust
//! Phase 1 integration tests: embedding model registry.
|
|
|
|
use std::sync::Arc;
|
|
use tempfile::tempdir;
|
|
use uuid::Uuid;
|
|
use vestige_core::storage::{
|
|
MemoryRecord, MemoryStore, MemoryStoreError, ModelSignature, SqliteMemoryStore,
|
|
};
|
|
|
|
fn make_store() -> Arc<dyn MemoryStore> {
|
|
let dir = tempdir().unwrap();
|
|
let db = dir.path().join("test.db");
|
|
std::mem::forget(dir);
|
|
let store = SqliteMemoryStore::new(Some(db)).expect("create store");
|
|
Arc::new(store)
|
|
}
|
|
|
|
fn sig_a() -> ModelSignature {
|
|
ModelSignature {
|
|
name: "model-a".to_string(),
|
|
dimension: 256,
|
|
hash: "a".repeat(64),
|
|
}
|
|
}
|
|
|
|
fn sig_b() -> ModelSignature {
|
|
ModelSignature {
|
|
name: "model-b".to_string(),
|
|
dimension: 256,
|
|
hash: "b".repeat(64),
|
|
}
|
|
}
|
|
|
|
fn record_without_embedding() -> MemoryRecord {
|
|
MemoryRecord {
|
|
id: Uuid::new_v4(),
|
|
domains: vec![],
|
|
domain_scores: Default::default(),
|
|
content: "plain text memory".to_string(),
|
|
node_type: "fact".to_string(),
|
|
tags: vec![],
|
|
embedding: None,
|
|
created_at: chrono::Utc::now(),
|
|
updated_at: chrono::Utc::now(),
|
|
metadata: serde_json::json!({}),
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn first_embedded_insert_auto_registers() {
|
|
// fresh store; register a model, then check registered_model() returns Some
|
|
let store = make_store();
|
|
let sig = sig_a();
|
|
store.register_model(&sig).await.expect("register");
|
|
let got = store.registered_model().await.expect("registered_model");
|
|
assert_eq!(got, Some(sig));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn second_insert_with_same_signature_succeeds() {
|
|
let store = make_store();
|
|
let sig = sig_a();
|
|
store.register_model(&sig).await.expect("first register");
|
|
store
|
|
.register_model(&sig)
|
|
.await
|
|
.expect("second register idempotent");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn second_insert_with_different_dimension_refused() {
|
|
let store = make_store();
|
|
let sig = sig_a(); // dim 256
|
|
store.register_model(&sig).await.expect("register 256");
|
|
// Try inserting a 512-dim vector into a store registered for 256
|
|
let mut rec = record_without_embedding();
|
|
rec.embedding = Some(vec![0.0f32; 512]);
|
|
rec.metadata = serde_json::json!({
|
|
"model_name": "model-a",
|
|
"model_dim": 256_u64,
|
|
"model_hash": "a".repeat(64),
|
|
});
|
|
let err = store.insert(&rec).await.unwrap_err();
|
|
assert!(
|
|
matches!(err, MemoryStoreError::InvalidInput(_)),
|
|
"expected InvalidInput for dim mismatch, got {:?}",
|
|
err
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn second_insert_with_different_model_name_refused() {
|
|
let store = make_store();
|
|
store.register_model(&sig_a()).await.expect("register a");
|
|
let err = store.register_model(&sig_b()).await.unwrap_err();
|
|
assert!(
|
|
matches!(err, MemoryStoreError::ModelMismatch { .. }),
|
|
"expected ModelMismatch, got {:?}",
|
|
err
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn second_insert_with_different_hash_refused() {
|
|
let store = make_store();
|
|
let sig = sig_a();
|
|
store.register_model(&sig).await.expect("register");
|
|
let sig_diff_hash = ModelSignature {
|
|
name: "model-a".to_string(),
|
|
dimension: 256,
|
|
hash: "c".repeat(64), // different hash
|
|
};
|
|
let err = store.register_model(&sig_diff_hash).await.unwrap_err();
|
|
assert!(
|
|
matches!(err, MemoryStoreError::ModelMismatch { .. }),
|
|
"expected ModelMismatch for different hash, got {:?}",
|
|
err
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn no_embedding_insert_allowed_before_registration() {
|
|
let store = make_store();
|
|
// registered_model() should be None
|
|
assert!(
|
|
store
|
|
.registered_model()
|
|
.await
|
|
.expect("registered_model")
|
|
.is_none()
|
|
);
|
|
// A plain text memory without an embedding must insert successfully
|
|
let rec = record_without_embedding();
|
|
store
|
|
.insert(&rec)
|
|
.await
|
|
.expect("plain insert before registration");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn stats_reports_registered_model_after_first_write() {
|
|
let store = make_store();
|
|
let sig = sig_a();
|
|
store.register_model(&sig).await.expect("register");
|
|
let stats = store.get_stats().await.expect("stats");
|
|
assert_eq!(stats.registered_model_name, Some("model-a".to_string()));
|
|
assert_eq!(stats.registered_model_dim, Some(256));
|
|
}
|