Initial commit: Vestige v1.0.0 - Cognitive memory MCP server

FSRS-6 spaced repetition, spreading activation, synaptic tagging,
hippocampal indexing, and 130 years of memory research.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Sam Valladares 2026-01-25 01:31:03 -06:00
commit f9c60eb5a7
169 changed files with 97206 additions and 0 deletions

View file

@ -0,0 +1,521 @@
//! Custom Test Assertions
//!
//! Provides domain-specific assertions for memory testing:
//! - Retention and decay assertions
//! - Scheduling assertions
//! - State transition assertions
//! - Search result assertions
use vestige_core::{KnowledgeNode, Storage};
// ============================================================================
// RETENTION ASSERTIONS
// ============================================================================
/// Assert that retention has decreased from an expected value
///
/// # Example
/// ```rust,ignore
/// assert_retention_decreased!(node.retention_strength, 1.0, 0.1);
/// ```
#[macro_export]
macro_rules! assert_retention_decreased {
($actual:expr, $original:expr) => {
assert!(
$actual < $original,
"Expected retention to decrease: {} should be less than {}",
$actual,
$original
);
};
($actual:expr, $original:expr, $min_decrease:expr) => {
let decrease = $original - $actual;
assert!(
decrease >= $min_decrease,
"Expected retention to decrease by at least {}: actual decrease was {} ({} -> {})",
$min_decrease,
decrease,
$original,
$actual
);
};
}
/// Assert that retention is within expected range
#[macro_export]
macro_rules! assert_retention_in_range {
($actual:expr, $min:expr, $max:expr) => {
assert!(
$actual >= $min && $actual <= $max,
"Expected retention in range [{}, {}], got {}",
$min,
$max,
$actual
);
};
}
/// Assert that retrieval strength has decayed properly
#[macro_export]
macro_rules! assert_retrieval_decayed {
($node:expr, $elapsed_days:expr) => {
let expected_max = 1.0; // Can't exceed 1.0
let expected_min = if $elapsed_days > 0.0 {
0.0 // Should have decayed at least somewhat
} else {
1.0
};
assert!(
$node.retrieval_strength >= expected_min && $node.retrieval_strength <= expected_max,
"Retrieval strength {} out of expected range [{}, {}] after {} days",
$node.retrieval_strength,
expected_min,
expected_max,
$elapsed_days
);
};
}
// ============================================================================
// SCHEDULING ASSERTIONS
// ============================================================================
/// Assert that a memory is due for review
#[macro_export]
macro_rules! assert_is_due {
($node:expr) => {
assert!(
$node.is_due(),
"Expected memory to be due for review, but next_review is {:?}",
$node.next_review
);
};
}
/// Assert that a memory is not due for review
#[macro_export]
macro_rules! assert_not_due {
($node:expr) => {
assert!(
!$node.is_due(),
"Expected memory to NOT be due for review, but it is (next_review: {:?})",
$node.next_review
);
};
}
/// Assert that interval increased after review
#[macro_export]
macro_rules! assert_interval_increased {
($before:expr, $after:expr) => {
let before_interval = $before
.next_review
.map(|t| (t - $before.last_accessed).num_days())
.unwrap_or(0);
let after_interval = $after
.next_review
.map(|t| (t - $after.last_accessed).num_days())
.unwrap_or(0);
assert!(
after_interval >= before_interval,
"Expected interval to increase: {} days -> {} days",
before_interval,
after_interval
);
};
}
/// Assert that stability increased after successful review
#[macro_export]
macro_rules! assert_stability_increased {
($before:expr, $after:expr) => {
assert!(
$after.stability >= $before.stability,
"Expected stability to increase: {} -> {}",
$before.stability,
$after.stability
);
};
}
// ============================================================================
// STATE ASSERTIONS
// ============================================================================
/// Assert that storage strength increased
#[macro_export]
macro_rules! assert_storage_strength_increased {
($before:expr, $after:expr) => {
assert!(
$after.storage_strength >= $before.storage_strength,
"Expected storage strength to increase: {} -> {}",
$before.storage_strength,
$after.storage_strength
);
};
}
/// Assert that reps count increased
#[macro_export]
macro_rules! assert_reps_increased {
($before:expr, $after:expr) => {
assert!(
$after.reps > $before.reps,
"Expected reps to increase: {} -> {}",
$before.reps,
$after.reps
);
};
}
/// Assert that lapses count increased
#[macro_export]
macro_rules! assert_lapses_increased {
($before:expr, $after:expr) => {
assert!(
$after.lapses > $before.lapses,
"Expected lapses to increase: {} -> {}",
$before.lapses,
$after.lapses
);
};
}
// ============================================================================
// TEMPORAL ASSERTIONS
// ============================================================================
/// Assert that a memory is currently valid
#[macro_export]
macro_rules! assert_currently_valid {
($node:expr) => {
assert!(
$node.is_currently_valid(),
"Expected memory to be currently valid, but valid_from={:?}, valid_until={:?}",
$node.valid_from,
$node.valid_until
);
};
}
/// Assert that a memory is not currently valid
#[macro_export]
macro_rules! assert_not_currently_valid {
($node:expr) => {
assert!(
!$node.is_currently_valid(),
"Expected memory to NOT be currently valid, but it is (valid_from={:?}, valid_until={:?})",
$node.valid_from,
$node.valid_until
);
};
}
/// Assert that a memory is valid at a specific time
#[macro_export]
macro_rules! assert_valid_at {
($node:expr, $time:expr) => {
assert!(
$node.is_valid_at($time),
"Expected memory to be valid at {:?}, but valid_from={:?}, valid_until={:?}",
$time,
$node.valid_from,
$node.valid_until
);
};
}
// ============================================================================
// SEARCH ASSERTIONS
// ============================================================================
/// Assert that search results contain a specific ID
#[macro_export]
macro_rules! assert_search_contains {
($results:expr, $id:expr) => {
assert!(
$results.iter().any(|n| n.id == $id),
"Expected search results to contain ID {}, but it was not found",
$id
);
};
}
/// Assert that search results do not contain a specific ID
#[macro_export]
macro_rules! assert_search_not_contains {
($results:expr, $id:expr) => {
assert!(
!$results.iter().any(|n| n.id == $id),
"Expected search results to NOT contain ID {}, but it was found",
$id
);
};
}
/// Assert search result count
#[macro_export]
macro_rules! assert_search_count {
($results:expr, $expected:expr) => {
assert_eq!(
$results.len(),
$expected,
"Expected {} search results, got {}",
$expected,
$results.len()
);
};
}
/// Assert that search results are ordered by relevance (first result is most relevant)
#[macro_export]
macro_rules! assert_search_order {
($results:expr, $expected_first:expr) => {
assert!(
!$results.is_empty(),
"Expected non-empty search results"
);
assert_eq!(
$results[0].id,
$expected_first,
"Expected first result to be {}, got {}",
$expected_first,
$results[0].id
);
};
}
// ============================================================================
// EMBEDDING ASSERTIONS
// ============================================================================
/// Assert that embeddings are similar (cosine similarity > threshold)
#[macro_export]
macro_rules! assert_embeddings_similar {
($emb1:expr, $emb2:expr, $threshold:expr) => {{
let dot: f32 = $emb1.iter().zip($emb2.iter()).map(|(a, b)| a * b).sum();
let norm1: f32 = $emb1.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm2: f32 = $emb2.iter().map(|x| x * x).sum::<f32>().sqrt();
let similarity = if norm1 > 0.0 && norm2 > 0.0 {
dot / (norm1 * norm2)
} else {
0.0
};
assert!(
similarity >= $threshold,
"Expected embeddings to be similar (>= {}), got similarity {}",
$threshold,
similarity
);
}};
}
/// Assert that embeddings are different (cosine similarity < threshold)
#[macro_export]
macro_rules! assert_embeddings_different {
($emb1:expr, $emb2:expr, $threshold:expr) => {{
let dot: f32 = $emb1.iter().zip($emb2.iter()).map(|(a, b)| a * b).sum();
let norm1: f32 = $emb1.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm2: f32 = $emb2.iter().map(|x| x * x).sum::<f32>().sqrt();
let similarity = if norm1 > 0.0 && norm2 > 0.0 {
dot / (norm1 * norm2)
} else {
0.0
};
assert!(
similarity < $threshold,
"Expected embeddings to be different (< {}), got similarity {}",
$threshold,
similarity
);
}};
}
// ============================================================================
// HELPER FUNCTIONS
// ============================================================================
/// Verify that a node exists in storage
pub fn assert_node_exists(storage: &Storage, id: &str) {
let node = storage.get_node(id);
assert!(
node.is_ok() && node.unwrap().is_some(),
"Expected node {} to exist in storage",
id
);
}
/// Verify that a node does not exist in storage
pub fn assert_node_not_exists(storage: &Storage, id: &str) {
let node = storage.get_node(id);
assert!(
node.is_ok() && node.unwrap().is_none(),
"Expected node {} to NOT exist in storage",
id
);
}
/// Verify that storage has expected node count
pub fn assert_node_count(storage: &Storage, expected: i64) {
let stats = storage.get_stats().expect("Failed to get stats");
assert_eq!(
stats.total_nodes, expected,
"Expected {} nodes, got {}",
expected, stats.total_nodes
);
}
/// Verify that a node has the expected content
pub fn assert_node_content(node: &KnowledgeNode, expected_content: &str) {
assert_eq!(
node.content, expected_content,
"Expected content '{}', got '{}'",
expected_content, node.content
);
}
/// Verify that a node has the expected type
pub fn assert_node_type(node: &KnowledgeNode, expected_type: &str) {
assert_eq!(
node.node_type, expected_type,
"Expected type '{}', got '{}'",
expected_type, node.node_type
);
}
/// Verify that a node has specific tags
pub fn assert_has_tags(node: &KnowledgeNode, expected_tags: &[&str]) {
for tag in expected_tags {
assert!(
node.tags.contains(&tag.to_string()),
"Expected node to have tag '{}', but tags are {:?}",
tag,
node.tags
);
}
}
/// Verify difficulty is within valid range
pub fn assert_difficulty_valid(node: &KnowledgeNode) {
assert!(
node.difficulty >= 1.0 && node.difficulty <= 10.0,
"Difficulty {} is out of valid range [1.0, 10.0]",
node.difficulty
);
}
/// Verify stability is positive
pub fn assert_stability_valid(node: &KnowledgeNode) {
assert!(
node.stability > 0.0,
"Stability {} should be positive",
node.stability
);
}
/// Approximate equality for floating point
pub fn assert_approx_eq(actual: f64, expected: f64, epsilon: f64) {
assert!(
(actual - expected).abs() < epsilon,
"Expected {} to be approximately equal to {} (epsilon: {})",
actual,
expected,
epsilon
);
}
/// Approximate equality for f32
pub fn assert_approx_eq_f32(actual: f32, expected: f32, epsilon: f32) {
assert!(
(actual - expected).abs() < epsilon,
"Expected {} to be approximately equal to {} (epsilon: {})",
actual,
expected,
epsilon
);
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::{Duration, Utc};
fn create_test_node() -> KnowledgeNode {
let mut node = KnowledgeNode::default();
node.id = "test-id".to_string();
node.content = "test content".to_string();
node.node_type = "fact".to_string();
node.created_at = Utc::now();
node.updated_at = Utc::now();
node.last_accessed = Utc::now();
node.stability = 5.0;
node.difficulty = 5.0;
node.reps = 3;
node.lapses = 0;
node.storage_strength = 2.0;
node.retrieval_strength = 0.9;
node.retention_strength = 0.85;
node.sentiment_score = 0.0;
node.sentiment_magnitude = 0.0;
node.next_review = Some(Utc::now() + Duration::days(5));
node.source = None;
node.tags = vec!["test".to_string(), "example".to_string()];
node.valid_from = None;
node.valid_until = None;
node.has_embedding = None;
node.embedding_model = None;
node
}
#[test]
fn test_retention_assertions() {
assert_retention_decreased!(0.7, 1.0);
assert_retention_decreased!(0.5, 1.0, 0.3);
assert_retention_in_range!(0.85, 0.8, 0.9);
}
#[test]
fn test_scheduling_assertions() {
let mut node = create_test_node();
// Not due yet (next_review is in the future)
assert_not_due!(node);
// Make it due
node.next_review = Some(Utc::now() - Duration::hours(1));
assert_is_due!(node);
}
#[test]
fn test_temporal_assertions() {
let node = create_test_node();
assert_currently_valid!(node);
}
#[test]
fn test_helper_functions() {
let node = create_test_node();
assert_node_content(&node, "test content");
assert_node_type(&node, "fact");
assert_has_tags(&node, &["test", "example"]);
assert_difficulty_valid(&node);
assert_stability_valid(&node);
}
#[test]
fn test_approx_eq() {
assert_approx_eq(0.90001, 0.9, 0.001);
assert_approx_eq_f32(0.90001, 0.9, 0.001);
}
#[test]
fn test_embedding_assertions() {
let emb1 = vec![1.0f32, 0.0, 0.0];
let emb2 = vec![0.9, 0.1, 0.0];
let emb3 = vec![0.0, 1.0, 0.0];
assert_embeddings_similar!(emb1, emb2, 0.8);
assert_embeddings_different!(emb1, emb3, 0.5);
}
}

View file

@ -0,0 +1,390 @@
//! Test Database Manager
//!
//! Provides isolated database instances for testing:
//! - Temporary databases that are automatically cleaned up
//! - Pre-seeded databases with test data
//! - Database snapshots and restoration
//! - Concurrent test isolation
use vestige_core::{KnowledgeNode, Rating, Storage};
use std::path::PathBuf;
use tempfile::TempDir;
/// Helper to create IngestInput (works around non_exhaustive)
fn make_ingest_input(
content: String,
node_type: String,
tags: Vec<String>,
sentiment_score: f64,
sentiment_magnitude: f64,
source: Option<String>,
valid_from: Option<chrono::DateTime<chrono::Utc>>,
valid_until: Option<chrono::DateTime<chrono::Utc>>,
) -> vestige_core::IngestInput {
let mut input = vestige_core::IngestInput::default();
input.content = content;
input.node_type = node_type;
input.tags = tags;
input.sentiment_score = sentiment_score;
input.sentiment_magnitude = sentiment_magnitude;
input.source = source;
input.valid_from = valid_from;
input.valid_until = valid_until;
input
}
/// Manager for test databases
///
/// Creates isolated database instances for each test to prevent interference.
/// Automatically cleans up temporary databases when dropped.
///
/// # Example
///
/// ```rust,ignore
/// let mut db = TestDatabaseManager::new_temp();
///
/// // Use the storage
/// db.storage.ingest(IngestInput { ... });
///
/// // Database is automatically deleted when `db` goes out of scope
/// ```
pub struct TestDatabaseManager {
/// The storage instance
pub storage: Storage,
/// Temporary directory (kept alive to prevent premature deletion)
_temp_dir: Option<TempDir>,
/// Path to the database file
db_path: PathBuf,
/// Snapshot data for restore operations
snapshot: Option<Vec<KnowledgeNode>>,
}
impl TestDatabaseManager {
/// Create a new test database in a temporary directory
///
/// The database is automatically deleted when the manager is dropped.
pub fn new_temp() -> Self {
let temp_dir = TempDir::new().expect("Failed to create temp directory");
let db_path = temp_dir.path().join("test_vestige.db");
let storage = Storage::new(Some(db_path.clone())).expect("Failed to create test storage");
Self {
storage,
_temp_dir: Some(temp_dir),
db_path,
snapshot: None,
}
}
/// Create a test database at a specific path
///
/// The database is NOT automatically deleted.
pub fn new_at_path(path: PathBuf) -> Self {
let storage = Storage::new(Some(path.clone())).expect("Failed to create test storage");
Self {
storage,
_temp_dir: None,
db_path: path,
snapshot: None,
}
}
/// Get the database path
pub fn path(&self) -> &PathBuf {
&self.db_path
}
/// Check if the database is empty
pub fn is_empty(&self) -> bool {
self.storage
.get_stats()
.map(|s| s.total_nodes == 0)
.unwrap_or(true)
}
/// Get the number of nodes in the database
pub fn node_count(&self) -> i64 {
self.storage
.get_stats()
.map(|s| s.total_nodes)
.unwrap_or(0)
}
// ========================================================================
// SEEDING METHODS
// ========================================================================
/// Seed the database with a specified number of test nodes
pub fn seed_nodes(&mut self, count: usize) -> Vec<String> {
let mut ids = Vec::with_capacity(count);
for i in 0..count {
let input = make_ingest_input(
format!("Test memory content {}", i),
"fact".to_string(),
vec![format!("test-{}", i % 5)],
0.0,
0.0,
None,
None,
None,
);
if let Ok(node) = self.storage.ingest(input) {
ids.push(node.id);
}
}
ids
}
/// Seed with diverse node types
pub fn seed_diverse(&mut self, count_per_type: usize) -> Vec<String> {
let types = ["fact", "concept", "procedure", "event", "code"];
let mut ids = Vec::with_capacity(count_per_type * types.len());
for node_type in types {
for i in 0..count_per_type {
let input = make_ingest_input(
format!("Test {} content {}", node_type, i),
node_type.to_string(),
vec![node_type.to_string()],
0.0,
0.0,
None,
None,
None,
);
if let Ok(node) = self.storage.ingest(input) {
ids.push(node.id);
}
}
}
ids
}
/// Seed with nodes having various retention states
pub fn seed_with_retention_states(&mut self) -> Vec<String> {
let mut ids = Vec::new();
// New node (never reviewed)
let input = make_ingest_input(
"New memory - never reviewed".to_string(),
"fact".to_string(),
vec!["new".to_string()],
0.0,
0.0,
None,
None,
None,
);
if let Ok(node) = self.storage.ingest(input) {
ids.push(node.id);
}
// Well-learned node (multiple good reviews)
let input = make_ingest_input(
"Well-learned memory - reviewed multiple times".to_string(),
"fact".to_string(),
vec!["learned".to_string()],
0.0,
0.0,
None,
None,
None,
);
if let Ok(node) = self.storage.ingest(input) {
let _ = self.storage.mark_reviewed(&node.id, Rating::Good);
let _ = self.storage.mark_reviewed(&node.id, Rating::Good);
let _ = self.storage.mark_reviewed(&node.id, Rating::Easy);
ids.push(node.id);
}
// Struggling node (multiple lapses)
let input = make_ingest_input(
"Struggling memory - has lapses".to_string(),
"fact".to_string(),
vec!["struggling".to_string()],
0.0,
0.0,
None,
None,
None,
);
if let Ok(node) = self.storage.ingest(input) {
let _ = self.storage.mark_reviewed(&node.id, Rating::Again);
let _ = self.storage.mark_reviewed(&node.id, Rating::Hard);
let _ = self.storage.mark_reviewed(&node.id, Rating::Again);
ids.push(node.id);
}
ids
}
/// Seed with emotional memories (different sentiment magnitudes)
pub fn seed_emotional(&mut self, count: usize) -> Vec<String> {
let mut ids = Vec::with_capacity(count);
for i in 0..count {
let magnitude = (i as f64) / (count as f64);
let input = make_ingest_input(
format!("Emotional memory with magnitude {:.2}", magnitude),
"event".to_string(),
vec!["emotional".to_string()],
if i % 2 == 0 { 0.8 } else { -0.8 },
magnitude,
None,
None,
None,
);
if let Ok(node) = self.storage.ingest(input) {
ids.push(node.id);
}
}
ids
}
// ========================================================================
// SNAPSHOT/RESTORE
// ========================================================================
/// Take a snapshot of current database state
pub fn take_snapshot(&mut self) {
let nodes = self
.storage
.get_all_nodes(10000, 0)
.unwrap_or_default();
self.snapshot = Some(nodes);
}
/// Restore from the last snapshot
///
/// Note: This clears the database and re-inserts all nodes from snapshot.
/// IDs will NOT be preserved (new UUIDs are generated).
pub fn restore_snapshot(&mut self) -> bool {
if let Some(nodes) = self.snapshot.take() {
// Clear current data by recreating storage
// Delete the database file first
let _ = std::fs::remove_file(&self.db_path);
self.storage = Storage::new(Some(self.db_path.clone()))
.expect("Failed to recreate storage for restore");
// Re-insert nodes
for node in nodes {
let input = make_ingest_input(
node.content,
node.node_type,
node.tags,
node.sentiment_score,
node.sentiment_magnitude,
node.source,
node.valid_from,
node.valid_until,
);
let _ = self.storage.ingest(input);
}
true
} else {
false
}
}
/// Check if a snapshot exists
pub fn has_snapshot(&self) -> bool {
self.snapshot.is_some()
}
// ========================================================================
// CLEANUP
// ========================================================================
/// Clear all data from the database
pub fn clear(&mut self) {
// Get all node IDs and delete them
if let Ok(nodes) = self.storage.get_all_nodes(10000, 0) {
for node in nodes {
let _ = self.storage.delete_node(&node.id);
}
}
}
/// Recreate the database (useful for testing migrations)
pub fn recreate(&mut self) {
// Delete the database file
let _ = std::fs::remove_file(&self.db_path);
// Recreate storage
self.storage = Storage::new(Some(self.db_path.clone()))
.expect("Failed to recreate storage");
}
}
impl Drop for TestDatabaseManager {
fn drop(&mut self) {
// Storage is dropped automatically
// TempDir (if Some) will clean up the temp directory
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_temp_database_creation() {
let db = TestDatabaseManager::new_temp();
assert!(db.is_empty());
assert!(db.path().exists());
}
#[test]
fn test_seed_nodes() {
let mut db = TestDatabaseManager::new_temp();
let ids = db.seed_nodes(10);
assert_eq!(ids.len(), 10);
assert_eq!(db.node_count(), 10);
}
#[test]
fn test_seed_diverse() {
let mut db = TestDatabaseManager::new_temp();
let ids = db.seed_diverse(3);
// 5 types * 3 each = 15
assert_eq!(ids.len(), 15);
assert_eq!(db.node_count(), 15);
}
#[test]
fn test_clear_database() {
let mut db = TestDatabaseManager::new_temp();
db.seed_nodes(5);
assert_eq!(db.node_count(), 5);
db.clear();
assert!(db.is_empty());
}
#[test]
fn test_snapshot_restore() {
let mut db = TestDatabaseManager::new_temp();
db.seed_nodes(5);
db.take_snapshot();
assert!(db.has_snapshot());
db.clear();
assert!(db.is_empty());
db.restore_snapshot();
assert_eq!(db.node_count(), 5);
}
}

View file

@ -0,0 +1,11 @@
//! Test Harness Module
//!
//! Provides test setup utilities:
//! - `TimeTravelEnvironment` for testing time-dependent behavior (decay, scheduling)
//! - `TestDatabaseManager` for isolated test databases
mod db_manager;
mod time_travel;
pub use db_manager::TestDatabaseManager;
pub use time_travel::TimeTravelEnvironment;

View file

@ -0,0 +1,342 @@
//! Time Travel Environment for Testing Decay
//!
//! Enables testing of time-dependent memory behavior:
//! - FSRS-6 scheduling and intervals
//! - Memory decay (retrieval strength degradation)
//! - Temporal validity periods
//! - Consolidation timing
//!
//! Uses a virtual clock that can be advanced without waiting.
use chrono::{DateTime, Duration, Utc};
use std::cell::RefCell;
/// Environment for testing time-dependent memory behavior
///
/// Provides a virtual clock that can be advanced to test:
/// - Memory decay over time
/// - FSRS-6 scheduling calculations
/// - Temporal validity windows
/// - Consolidation cycles
///
/// # Example
///
/// ```rust,ignore
/// let mut env = TimeTravelEnvironment::new();
///
/// // Start at a known time
/// env.set_time(Utc::now());
///
/// // Advance 30 days to test decay
/// env.advance_days(30);
///
/// // Check retrievability at this point
/// let elapsed = env.days_since(original_time);
/// ```
pub struct TimeTravelEnvironment {
/// Current virtual time
current_time: RefCell<DateTime<Utc>>,
/// Original start time for reference
start_time: DateTime<Utc>,
/// History of time jumps for debugging
time_history: RefCell<Vec<TimeJump>>,
}
/// Record of a time jump for debugging
#[derive(Debug, Clone)]
pub struct TimeJump {
pub from: DateTime<Utc>,
pub to: DateTime<Utc>,
pub reason: String,
}
impl Default for TimeTravelEnvironment {
fn default() -> Self {
Self::new()
}
}
impl TimeTravelEnvironment {
/// Create a new time travel environment starting at the current time
pub fn new() -> Self {
let now = Utc::now();
Self {
current_time: RefCell::new(now),
start_time: now,
time_history: RefCell::new(Vec::new()),
}
}
/// Create environment at a specific starting time
pub fn at(time: DateTime<Utc>) -> Self {
Self {
current_time: RefCell::new(time),
start_time: time,
time_history: RefCell::new(Vec::new()),
}
}
/// Get the current virtual time
pub fn now(&self) -> DateTime<Utc> {
*self.current_time.borrow()
}
/// Get the original start time
pub fn start_time(&self) -> DateTime<Utc> {
self.start_time
}
/// Set the current time to a specific point
pub fn set_time(&self, time: DateTime<Utc>) {
let from = *self.current_time.borrow();
self.time_history.borrow_mut().push(TimeJump {
from,
to: time,
reason: "set_time".to_string(),
});
*self.current_time.borrow_mut() = time;
}
/// Advance time by a duration
pub fn advance(&self, duration: Duration) {
let from = *self.current_time.borrow();
let to = from + duration;
self.time_history.borrow_mut().push(TimeJump {
from,
to,
reason: format!("advance {:?}", duration),
});
*self.current_time.borrow_mut() = to;
}
/// Advance time by the specified number of days
pub fn advance_days(&self, days: i64) {
self.advance(Duration::days(days));
}
/// Advance time by the specified number of hours
pub fn advance_hours(&self, hours: i64) {
self.advance(Duration::hours(hours));
}
/// Advance time by the specified number of minutes
pub fn advance_minutes(&self, minutes: i64) {
self.advance(Duration::minutes(minutes));
}
/// Advance time by the specified number of seconds
pub fn advance_seconds(&self, seconds: i64) {
self.advance(Duration::seconds(seconds));
}
/// Calculate days elapsed since a reference time
pub fn days_since(&self, reference: DateTime<Utc>) -> f64 {
let current = *self.current_time.borrow();
(current - reference).num_seconds() as f64 / 86400.0
}
/// Calculate days elapsed since the start time
pub fn days_since_start(&self) -> f64 {
self.days_since(self.start_time)
}
/// Calculate hours elapsed since a reference time
pub fn hours_since(&self, reference: DateTime<Utc>) -> f64 {
let current = *self.current_time.borrow();
(current - reference).num_seconds() as f64 / 3600.0
}
/// Get time history for debugging
pub fn get_history(&self) -> Vec<TimeJump> {
self.time_history.borrow().clone()
}
/// Clear time history
pub fn clear_history(&self) {
self.time_history.borrow_mut().clear();
}
/// Reset to start time
pub fn reset(&self) {
let from = *self.current_time.borrow();
self.time_history.borrow_mut().push(TimeJump {
from,
to: self.start_time,
reason: "reset".to_string(),
});
*self.current_time.borrow_mut() = self.start_time;
}
// ========================================================================
// DECAY TESTING HELPERS
// ========================================================================
/// Calculate expected retrievability at current time
///
/// Uses FSRS-6 power forgetting curve:
/// R = (1 + factor * t / S)^(-w20)
pub fn expected_retrievability(&self, stability: f64, last_review: DateTime<Utc>) -> f64 {
let elapsed_days = self.days_since(last_review);
vestige_core::retrievability(stability, elapsed_days)
}
/// Calculate expected retrievability with custom decay
pub fn expected_retrievability_with_decay(
&self,
stability: f64,
last_review: DateTime<Utc>,
w20: f64,
) -> f64 {
let elapsed_days = self.days_since(last_review);
vestige_core::retrievability_with_decay(stability, elapsed_days, w20)
}
/// Check if a memory would be due for review at current time
pub fn is_due(&self, next_review: DateTime<Utc>) -> bool {
*self.current_time.borrow() >= next_review
}
/// Calculate how overdue a memory is (negative if not yet due)
pub fn days_overdue(&self, next_review: DateTime<Utc>) -> f64 {
self.days_since(next_review)
}
// ========================================================================
// SCHEDULING HELPERS
// ========================================================================
/// Advance to when a memory would be due
pub fn advance_to_due(&self, next_review: DateTime<Utc>) {
let from = *self.current_time.borrow();
self.time_history.borrow_mut().push(TimeJump {
from,
to: next_review,
reason: "advance_to_due".to_string(),
});
*self.current_time.borrow_mut() = next_review;
}
/// Advance past due date by specified days
pub fn advance_past_due(&self, next_review: DateTime<Utc>, days_overdue: i64) {
let target = next_review + Duration::days(days_overdue);
let from = *self.current_time.borrow();
self.time_history.borrow_mut().push(TimeJump {
from,
to: target,
reason: format!("advance_past_due +{} days", days_overdue),
});
*self.current_time.borrow_mut() = target;
}
// ========================================================================
// TEMPORAL VALIDITY HELPERS
// ========================================================================
/// Check if a time is within a validity window
pub fn is_within_validity(
&self,
valid_from: Option<DateTime<Utc>>,
valid_until: Option<DateTime<Utc>>,
) -> bool {
let current = *self.current_time.borrow();
let after_start = valid_from.map(|t| current >= t).unwrap_or(true);
let before_end = valid_until.map(|t| current <= t).unwrap_or(true);
after_start && before_end
}
/// Advance to just before validity starts
pub fn advance_to_before_validity(&self, valid_from: DateTime<Utc>) {
let target = valid_from - Duration::seconds(1);
self.set_time(target);
}
/// Advance to just after validity ends
pub fn advance_to_after_validity(&self, valid_until: DateTime<Utc>) {
let target = valid_until + Duration::seconds(1);
self.set_time(target);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_time_travel_basic() {
let env = TimeTravelEnvironment::new();
let start = env.now();
env.advance_days(10);
assert!(env.days_since(start) >= 9.99);
assert!(env.days_since(start) <= 10.01);
}
#[test]
fn test_time_travel_reset() {
let env = TimeTravelEnvironment::new();
let start = env.start_time();
env.advance_days(100);
env.reset();
assert_eq!(env.now(), start);
}
#[test]
fn test_retrievability_decay() {
let env = TimeTravelEnvironment::new();
let stability = 10.0;
let last_review = env.now();
// At t=0, retrievability should be ~1.0
let r0 = env.expected_retrievability(stability, last_review);
assert!(r0 > 0.99);
// After 10 days with stability=10, retrievability should be ~0.9
env.advance_days(10);
let r10 = env.expected_retrievability(stability, last_review);
assert!(r10 < r0);
assert!(r10 > 0.85 && r10 < 0.95);
// After 30 days, retrievability should be much lower
env.advance_days(20);
let r30 = env.expected_retrievability(stability, last_review);
assert!(r30 < r10);
}
#[test]
fn test_due_date_checking() {
let env = TimeTravelEnvironment::new();
let next_review = env.now() + Duration::days(5);
// Not due yet
assert!(!env.is_due(next_review));
assert!(env.days_overdue(next_review) < 0.0);
// Advance to due
env.advance_to_due(next_review);
assert!(env.is_due(next_review));
assert!(env.days_overdue(next_review).abs() < 0.01);
// Advance past due
env.advance_days(3);
assert!(env.is_due(next_review));
assert!(env.days_overdue(next_review) > 2.99);
}
#[test]
fn test_history_tracking() {
let env = TimeTravelEnvironment::new();
env.advance_days(1);
env.advance_hours(12);
env.advance_minutes(30);
let history = env.get_history();
assert_eq!(history.len(), 3);
env.clear_history();
assert!(env.get_history().is_empty());
}
}

49
tests/e2e/src/lib.rs Normal file
View file

@ -0,0 +1,49 @@
//! E2E Test Infrastructure for Vestige
//!
//! Provides comprehensive testing utilities for 250+ end-to-end tests:
//!
//! - **Harness**: Test setup, time travel, database management
//! - **Mocks**: MockEmbeddingService (FxHash-based), test fixtures
//! - **Assertions**: Custom assertions for memory states, decay, etc.
//!
//! ## Quick Start
//!
//! ```rust,ignore
//! use vestige_e2e_tests::prelude::*;
//!
//! #[test]
//! fn test_memory_decay() {
//! let mut env = TimeTravelEnvironment::new();
//! let mut db = TestDatabaseManager::new_temp();
//!
//! // Create test data
//! let node = TestDataFactory::create_memory(&mut db.storage, "test content");
//!
//! // Time travel to test decay
//! env.advance_days(30);
//!
//! // Assert decay occurred
//! assert_retention_decreased!(db.storage.get_node(&node.id), 0.9);
//! }
//! ```
pub mod assertions;
pub mod harness;
pub mod mocks;
// Re-export commonly used items
pub use harness::{TestDatabaseManager, TimeTravelEnvironment};
pub use mocks::{MockEmbeddingService, TestDataFactory};
/// Convenient imports for tests
pub mod prelude {
pub use crate::assertions::*;
pub use crate::harness::{TestDatabaseManager, TimeTravelEnvironment};
pub use crate::mocks::{MockEmbeddingService, TestDataFactory};
// Re-export vestige-core essentials
pub use vestige_core::{
FSRSScheduler, FSRSState, IngestInput, KnowledgeNode, NodeType, Rating, RecallInput,
Result, SearchMode, Storage, StorageError,
};
}

View file

@ -0,0 +1,573 @@
//! Test Data Factory
//!
//! Provides utilities for generating realistic test data:
//! - Memory nodes with various properties
//! - Batch generation for stress testing
//! - Pre-built scenarios for common test cases
use chrono::{DateTime, Duration, Utc};
use vestige_core::{KnowledgeNode, Rating, Storage};
/// Helper to create IngestInput (works around non_exhaustive)
fn make_ingest_input(
content: String,
node_type: String,
tags: Vec<String>,
sentiment_score: f64,
sentiment_magnitude: f64,
source: Option<String>,
valid_from: Option<DateTime<Utc>>,
valid_until: Option<DateTime<Utc>>,
) -> vestige_core::IngestInput {
let mut input = vestige_core::IngestInput::default();
input.content = content;
input.node_type = node_type;
input.tags = tags;
input.sentiment_score = sentiment_score;
input.sentiment_magnitude = sentiment_magnitude;
input.source = source;
input.valid_from = valid_from;
input.valid_until = valid_until;
input
}
/// Factory for creating test data
///
/// Generates realistic test data with configurable properties.
/// Designed for creating comprehensive test scenarios.
///
/// # Example
///
/// ```rust,ignore
/// let mut storage = Storage::new(Some(path))?;
///
/// // Create a single memory
/// let node = TestDataFactory::create_memory(&mut storage, "test content");
///
/// // Create a batch
/// let nodes = TestDataFactory::create_batch(&mut storage, 100);
///
/// // Create a specific scenario
/// let scenario = TestDataFactory::create_decay_scenario(&mut storage);
/// ```
pub struct TestDataFactory;
/// Configuration for batch memory generation
#[derive(Debug, Clone)]
pub struct BatchConfig {
/// Number of memories to create
pub count: usize,
/// Node type to use (None = random)
pub node_type: Option<String>,
/// Base content prefix
pub content_prefix: String,
/// Tags to apply
pub tags: Vec<String>,
/// Whether to add sentiment
pub with_sentiment: bool,
/// Whether to add temporal validity
pub with_temporal: bool,
}
impl Default for BatchConfig {
fn default() -> Self {
Self {
count: 10,
node_type: None,
content_prefix: "Test memory".to_string(),
tags: vec![],
with_sentiment: false,
with_temporal: false,
}
}
}
/// Scenario containing related test data
#[derive(Debug)]
pub struct TestScenario {
/// IDs of created nodes
pub node_ids: Vec<String>,
/// Description of the scenario
pub description: String,
/// Metadata for test assertions
pub metadata: std::collections::HashMap<String, String>,
}
impl TestDataFactory {
// ========================================================================
// SINGLE MEMORY CREATION
// ========================================================================
/// Create a simple memory with content
pub fn create_memory(storage: &mut Storage, content: &str) -> Option<KnowledgeNode> {
let input = make_ingest_input(
content.to_string(),
"fact".to_string(),
vec![],
0.0,
0.0,
None,
None,
None,
);
storage.ingest(input).ok()
}
/// Create a memory with full configuration
pub fn create_memory_full(
storage: &mut Storage,
content: &str,
node_type: &str,
source: Option<&str>,
tags: Vec<&str>,
sentiment_score: f64,
sentiment_magnitude: f64,
) -> Option<KnowledgeNode> {
let input = make_ingest_input(
content.to_string(),
node_type.to_string(),
tags.iter().map(|s| s.to_string()).collect(),
sentiment_score,
sentiment_magnitude,
source.map(String::from),
None,
None,
);
storage.ingest(input).ok()
}
/// Create a memory with temporal validity
pub fn create_temporal_memory(
storage: &mut Storage,
content: &str,
valid_from: Option<DateTime<Utc>>,
valid_until: Option<DateTime<Utc>>,
) -> Option<KnowledgeNode> {
let input = make_ingest_input(
content.to_string(),
"fact".to_string(),
vec![],
0.0,
0.0,
None,
valid_from,
valid_until,
);
storage.ingest(input).ok()
}
/// Create an emotional memory
pub fn create_emotional_memory(
storage: &mut Storage,
content: &str,
sentiment: f64,
magnitude: f64,
) -> Option<KnowledgeNode> {
let input = make_ingest_input(
content.to_string(),
"event".to_string(),
vec![],
sentiment,
magnitude,
None,
None,
None,
);
storage.ingest(input).ok()
}
// ========================================================================
// BATCH CREATION
// ========================================================================
/// Create a batch of memories
pub fn create_batch(storage: &mut Storage, count: usize) -> Vec<String> {
Self::create_batch_with_config(storage, BatchConfig { count, ..Default::default() })
}
/// Create a batch with custom configuration
pub fn create_batch_with_config(storage: &mut Storage, config: BatchConfig) -> Vec<String> {
let node_types = ["fact", "concept", "procedure", "event", "code"];
let mut ids = Vec::with_capacity(config.count);
for i in 0..config.count {
let node_type = config
.node_type
.clone()
.unwrap_or_else(|| node_types[i % node_types.len()].to_string());
let sentiment_score = if config.with_sentiment {
((i as f64) / (config.count as f64) * 2.0) - 1.0
} else {
0.0
};
let sentiment_magnitude = if config.with_sentiment {
(i as f64) / (config.count as f64)
} else {
0.0
};
let (valid_from, valid_until) = if config.with_temporal {
let now = Utc::now();
if i % 3 == 0 {
(Some(now - Duration::days(30)), Some(now + Duration::days(30)))
} else if i % 3 == 1 {
(Some(now - Duration::days(60)), Some(now - Duration::days(30)))
} else {
(None, None)
}
} else {
(None, None)
};
let input = make_ingest_input(
format!("{} {}", config.content_prefix, i),
node_type,
config.tags.clone(),
sentiment_score,
sentiment_magnitude,
None,
valid_from,
valid_until,
);
if let Ok(node) = storage.ingest(input) {
ids.push(node.id);
}
}
ids
}
// ========================================================================
// SCENARIO CREATION
// ========================================================================
/// Create a scenario for testing memory decay
pub fn create_decay_scenario(storage: &mut Storage) -> TestScenario {
let mut ids = Vec::new();
let mut metadata = std::collections::HashMap::new();
// High stability memory (should decay slowly)
let high_stab = Self::create_memory_full(
storage,
"Well-learned fact about photosynthesis",
"fact",
Some("biology textbook"),
vec!["biology", "science"],
0.3,
0.5,
);
if let Some(node) = high_stab {
metadata.insert("high_stability".to_string(), node.id.clone());
ids.push(node.id);
}
// Low stability memory (should decay quickly)
let low_stab = Self::create_memory(storage, "Random fact I just learned");
if let Some(node) = low_stab {
metadata.insert("low_stability".to_string(), node.id.clone());
ids.push(node.id);
}
// Emotional memory (decay should be affected by sentiment)
let emotional = Self::create_emotional_memory(
storage,
"Important life event",
0.9,
0.95,
);
if let Some(node) = emotional {
metadata.insert("emotional".to_string(), node.id.clone());
ids.push(node.id);
}
TestScenario {
node_ids: ids,
description: "Decay testing scenario with varied stability".to_string(),
metadata,
}
}
/// Create a scenario for testing review scheduling
pub fn create_scheduling_scenario(storage: &mut Storage) -> TestScenario {
let mut ids = Vec::new();
let mut metadata = std::collections::HashMap::new();
// New card (never reviewed)
let new_card = Self::create_memory(storage, "Brand new memory");
if let Some(node) = new_card {
metadata.insert("new".to_string(), node.id.clone());
ids.push(node.id);
}
// Learning card (few reviews)
if let Some(node) = Self::create_memory(storage, "Learning memory") {
let _ = storage.mark_reviewed(&node.id, Rating::Good);
metadata.insert("learning".to_string(), node.id.clone());
ids.push(node.id);
}
// Review card (many reviews)
if let Some(node) = Self::create_memory(storage, "Well-reviewed memory") {
for _ in 0..5 {
let _ = storage.mark_reviewed(&node.id, Rating::Good);
}
metadata.insert("review".to_string(), node.id.clone());
ids.push(node.id);
}
// Relearning card (had lapses)
if let Some(node) = Self::create_memory(storage, "Struggling memory") {
let _ = storage.mark_reviewed(&node.id, Rating::Good);
let _ = storage.mark_reviewed(&node.id, Rating::Again);
metadata.insert("relearning".to_string(), node.id.clone());
ids.push(node.id);
}
TestScenario {
node_ids: ids,
description: "Scheduling scenario with cards in different learning states".to_string(),
metadata,
}
}
/// Create a scenario for testing search
pub fn create_search_scenario(storage: &mut Storage) -> TestScenario {
let mut ids = Vec::new();
let mut metadata = std::collections::HashMap::new();
// Programming memories
for content in [
"Rust programming language uses ownership for memory safety",
"Python is great for data science and machine learning",
"JavaScript runs in web browsers and Node.js",
] {
if let Some(node) = Self::create_memory_full(
storage,
content,
"fact",
Some("programming docs"),
vec!["programming", "code"],
0.0,
0.0,
) {
ids.push(node.id);
}
}
metadata.insert("programming_count".to_string(), "3".to_string());
// Science memories
for content in [
"Mitochondria is the powerhouse of the cell",
"DNA contains genetic information",
"Gravity is the force of attraction between masses",
] {
if let Some(node) = Self::create_memory_full(
storage,
content,
"fact",
Some("science textbook"),
vec!["science"],
0.0,
0.0,
) {
ids.push(node.id);
}
}
metadata.insert("science_count".to_string(), "3".to_string());
// Recipe memories
for content in [
"To make pasta, boil water and add salt",
"Chocolate cake requires cocoa powder and eggs",
] {
if let Some(node) = Self::create_memory_full(
storage,
content,
"procedure",
Some("cookbook"),
vec!["cooking", "recipes"],
0.0,
0.0,
) {
ids.push(node.id);
}
}
metadata.insert("recipe_count".to_string(), "2".to_string());
TestScenario {
node_ids: ids,
description: "Search scenario with categorized content".to_string(),
metadata,
}
}
/// Create a scenario for testing temporal queries
pub fn create_temporal_scenario(storage: &mut Storage) -> TestScenario {
let now = Utc::now();
let mut ids = Vec::new();
let mut metadata = std::collections::HashMap::new();
// Currently valid
if let Some(node) = Self::create_temporal_memory(
storage,
"Currently valid memory",
Some(now - Duration::days(10)),
Some(now + Duration::days(10)),
) {
metadata.insert("current".to_string(), node.id.clone());
ids.push(node.id);
}
// Expired
if let Some(node) = Self::create_temporal_memory(
storage,
"Expired memory",
Some(now - Duration::days(60)),
Some(now - Duration::days(30)),
) {
metadata.insert("expired".to_string(), node.id.clone());
ids.push(node.id);
}
// Future
if let Some(node) = Self::create_temporal_memory(
storage,
"Future memory",
Some(now + Duration::days(30)),
Some(now + Duration::days(60)),
) {
metadata.insert("future".to_string(), node.id.clone());
ids.push(node.id);
}
// No bounds (always valid)
if let Some(node) = Self::create_temporal_memory(
storage,
"Always valid memory",
None,
None,
) {
metadata.insert("always_valid".to_string(), node.id.clone());
ids.push(node.id);
}
TestScenario {
node_ids: ids,
description: "Temporal scenario with different validity periods".to_string(),
metadata,
}
}
// ========================================================================
// UTILITY METHODS
// ========================================================================
/// Get a random node type
pub fn random_node_type(seed: usize) -> &'static str {
const TYPES: [&str; 9] = [
"fact", "concept", "procedure", "event", "relationship",
"quote", "code", "question", "insight",
];
TYPES[seed % TYPES.len()]
}
/// Generate lorem ipsum-like content
pub fn lorem_content(words: usize, seed: usize) -> String {
const WORDS: [&str; 20] = [
"the", "memory", "learning", "knowledge", "algorithm",
"data", "system", "process", "function", "method",
"class", "object", "variable", "constant", "type",
"structure", "pattern", "design", "architecture", "code",
];
(0..words)
.map(|i| WORDS[(seed + i * 7) % WORDS.len()])
.collect::<Vec<_>>()
.join(" ")
}
/// Generate tags
pub fn generate_tags(count: usize, seed: usize) -> Vec<String> {
const TAGS: [&str; 10] = [
"important", "review", "todo", "concept", "fact",
"code", "note", "idea", "question", "reference",
];
(0..count)
.map(|i| TAGS[(seed + i) % TAGS.len()].to_string())
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
fn create_test_storage() -> Storage {
let dir = tempdir().unwrap();
let db_path = dir.path().join("test.db");
Storage::new(Some(db_path)).unwrap()
}
#[test]
fn test_create_memory() {
let mut storage = create_test_storage();
let node = TestDataFactory::create_memory(&mut storage, "test content");
assert!(node.is_some());
assert_eq!(node.unwrap().content, "test content");
}
#[test]
fn test_create_batch() {
let mut storage = create_test_storage();
let ids = TestDataFactory::create_batch(&mut storage, 10);
assert_eq!(ids.len(), 10);
let stats = storage.get_stats().unwrap();
assert_eq!(stats.total_nodes, 10);
}
#[test]
fn test_create_decay_scenario() {
let mut storage = create_test_storage();
let scenario = TestDataFactory::create_decay_scenario(&mut storage);
assert!(!scenario.node_ids.is_empty());
assert!(scenario.metadata.contains_key("high_stability"));
assert!(scenario.metadata.contains_key("low_stability"));
assert!(scenario.metadata.contains_key("emotional"));
}
#[test]
fn test_create_scheduling_scenario() {
let mut storage = create_test_storage();
let scenario = TestDataFactory::create_scheduling_scenario(&mut storage);
assert!(!scenario.node_ids.is_empty());
assert!(scenario.metadata.contains_key("new"));
assert!(scenario.metadata.contains_key("learning"));
assert!(scenario.metadata.contains_key("review"));
}
#[test]
fn test_lorem_content() {
let content = TestDataFactory::lorem_content(10, 42);
let words: Vec<_> = content.split_whitespace().collect();
assert_eq!(words.len(), 10);
}
#[test]
fn test_generate_tags() {
let tags = TestDataFactory::generate_tags(5, 0);
assert_eq!(tags.len(), 5);
assert!(tags.iter().all(|t| !t.is_empty()));
}
}

View file

@ -0,0 +1,377 @@
//! Mock Embedding Service using FxHash
//!
//! Provides deterministic embeddings for testing without requiring
//! the actual fastembed model. Uses FxHash for fast, consistent hashing.
//!
//! Key properties:
//! - Deterministic: Same input always produces same embedding
//! - Fast: No ML model loading/inference
//! - Semantic similarity: Similar strings produce similar embeddings
//! - Normalized: All embeddings have unit length
use std::collections::HashMap;
/// Dimensions for mock embeddings (matches BGE-base-en-v1.5)
pub const MOCK_EMBEDDING_DIM: usize = 768;
/// FxHash implementation (fast, non-cryptographic hash)
/// Based on Firefox's hash function
fn fx_hash(data: &[u8]) -> u64 {
const SEED: u64 = 0x517cc1b727220a95;
let mut hash = SEED;
for &byte in data {
hash = hash.rotate_left(5) ^ (byte as u64);
hash = hash.wrapping_mul(SEED);
}
hash
}
/// Mock embedding service for testing
///
/// Produces deterministic embeddings based on text content using FxHash.
/// Designed to approximate real embedding behavior:
/// - Similar texts produce similar embeddings
/// - Different texts produce different embeddings
/// - Embeddings are normalized to unit length
///
/// # Example
///
/// ```rust,ignore
/// let service = MockEmbeddingService::new();
///
/// let emb1 = service.embed("hello world");
/// let emb2 = service.embed("hello world");
/// let emb3 = service.embed("goodbye world");
///
/// // Same input = same output
/// assert_eq!(emb1, emb2);
///
/// // Different input = different output
/// assert_ne!(emb1, emb3);
///
/// // But similar inputs have higher similarity
/// let sim_same = service.cosine_similarity(&emb1, &emb2);
/// let sim_diff = service.cosine_similarity(&emb1, &emb3);
/// assert!(sim_same > sim_diff);
/// ```
pub struct MockEmbeddingService {
/// Cache for computed embeddings
cache: HashMap<String, Vec<f32>>,
/// Whether to use word-level hashing for better semantic similarity
semantic_mode: bool,
}
impl Default for MockEmbeddingService {
fn default() -> Self {
Self::new()
}
}
impl MockEmbeddingService {
/// Create a new mock embedding service
pub fn new() -> Self {
Self {
cache: HashMap::new(),
semantic_mode: true,
}
}
/// Create a service without semantic mode (pure hash-based)
pub fn new_simple() -> Self {
Self {
cache: HashMap::new(),
semantic_mode: false,
}
}
/// Embed text into a vector
pub fn embed(&mut self, text: &str) -> Vec<f32> {
// Check cache first
if let Some(cached) = self.cache.get(text) {
return cached.clone();
}
let embedding = if self.semantic_mode {
self.semantic_embed(text)
} else {
self.simple_embed(text)
};
self.cache.insert(text.to_string(), embedding.clone());
embedding
}
/// Simple hash-based embedding
fn simple_embed(&self, text: &str) -> Vec<f32> {
let mut embedding = vec![0.0f32; MOCK_EMBEDDING_DIM];
let normalized = text.to_lowercase();
// Use multiple hash seeds for different dimensions
for (i, chunk) in embedding.chunks_mut(64).enumerate() {
let seed_text = format!("{}:{}", i, normalized);
let hash = fx_hash(seed_text.as_bytes());
for (j, val) in chunk.iter_mut().enumerate() {
// Generate pseudo-random float from hash
let shifted = hash.rotate_left((j * 5) as u32);
*val = ((shifted as f32 / u64::MAX as f32) * 2.0) - 1.0;
}
}
normalize(&mut embedding);
embedding
}
/// Semantic-aware embedding (word-level hashing)
fn semantic_embed(&self, text: &str) -> Vec<f32> {
let mut embedding = vec![0.0f32; MOCK_EMBEDDING_DIM];
let normalized = text.to_lowercase();
// Tokenize into words
let words: Vec<&str> = normalized
.split(|c: char| !c.is_alphanumeric())
.filter(|w| !w.is_empty())
.collect();
if words.is_empty() {
// Fall back to simple embedding for empty text
return self.simple_embed(text);
}
// Each word contributes to the embedding
for word in &words {
let word_hash = fx_hash(word.as_bytes());
// Map word to a sparse set of dimensions
for i in 0..16 {
let dim = ((word_hash >> (i * 4)) as usize) % MOCK_EMBEDDING_DIM;
let sign = if (word_hash >> (i + 48)) & 1 == 0 { 1.0 } else { -1.0 };
let magnitude = ((word_hash >> (i * 2)) as f32 % 100.0) / 100.0 + 0.5;
embedding[dim] += sign * magnitude;
}
}
// Add position-aware component for word order sensitivity
for (pos, word) in words.iter().enumerate() {
let pos_hash = fx_hash(format!("{}:{}", pos, word).as_bytes());
let dim = (pos_hash as usize) % MOCK_EMBEDDING_DIM;
let weight = 1.0 / (pos as f32 + 1.0);
embedding[dim] += weight;
}
// Add character n-gram features for subword similarity
let chars: Vec<char> = normalized.chars().collect();
for i in 0..chars.len().saturating_sub(2) {
let trigram: String = chars[i..i + 3].iter().collect();
let hash = fx_hash(trigram.as_bytes());
let dim = (hash as usize) % MOCK_EMBEDDING_DIM;
embedding[dim] += 0.1;
}
normalize(&mut embedding);
embedding
}
/// Calculate cosine similarity between two embeddings
pub fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
return 0.0;
}
dot / (norm_a * norm_b)
}
/// Calculate euclidean distance between two embeddings
pub fn euclidean_distance(&self, a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return f32::MAX;
}
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).powi(2))
.sum::<f32>()
.sqrt()
}
/// Find most similar embedding from a set
pub fn find_most_similar<'a>(
&self,
query: &[f32],
candidates: &'a [(String, Vec<f32>)],
) -> Option<(&'a str, f32)> {
candidates
.iter()
.map(|(id, emb)| (id.as_str(), self.cosine_similarity(query, emb)))
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
}
/// Clear the embedding cache
pub fn clear_cache(&mut self) {
self.cache.clear();
}
/// Get cache size
pub fn cache_size(&self) -> usize {
self.cache.len()
}
/// Check if service is ready (always true for mock)
pub fn is_ready(&self) -> bool {
true
}
}
/// Normalize a vector to unit length
fn normalize(v: &mut [f32]) {
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for x in v.iter_mut() {
*x /= norm;
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_deterministic_embedding() {
let mut service = MockEmbeddingService::new();
let emb1 = service.embed("hello world");
let emb2 = service.embed("hello world");
assert_eq!(emb1, emb2);
}
#[test]
fn test_different_texts_different_embeddings() {
let mut service = MockEmbeddingService::new();
let emb1 = service.embed("hello world");
let emb2 = service.embed("goodbye universe");
assert_ne!(emb1, emb2);
}
#[test]
fn test_embedding_dimension() {
let mut service = MockEmbeddingService::new();
let emb = service.embed("test text");
assert_eq!(emb.len(), MOCK_EMBEDDING_DIM);
}
#[test]
fn test_normalized_embeddings() {
let mut service = MockEmbeddingService::new();
let emb = service.embed("test normalization");
let norm: f32 = emb.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!((norm - 1.0).abs() < 0.001);
}
#[test]
fn test_semantic_similarity() {
let mut service = MockEmbeddingService::new();
let emb_dog = service.embed("the dog runs fast");
let emb_cat = service.embed("the cat runs fast");
let emb_car = service.embed("machine learning algorithms");
let sim_animals = service.cosine_similarity(&emb_dog, &emb_cat);
let sim_different = service.cosine_similarity(&emb_dog, &emb_car);
// Similar sentences should have higher similarity
assert!(sim_animals > sim_different);
}
#[test]
fn test_cosine_similarity_range() {
let mut service = MockEmbeddingService::new();
let emb1 = service.embed("test one");
let emb2 = service.embed("test two");
let sim = service.cosine_similarity(&emb1, &emb2);
// Cosine similarity should be in [-1, 1]
assert!(sim >= -1.0 && sim <= 1.0);
}
#[test]
fn test_self_similarity() {
let mut service = MockEmbeddingService::new();
let emb = service.embed("self similarity test");
let sim = service.cosine_similarity(&emb, &emb);
assert!((sim - 1.0).abs() < 0.001);
}
#[test]
fn test_caching() {
let mut service = MockEmbeddingService::new();
assert_eq!(service.cache_size(), 0);
service.embed("text one");
assert_eq!(service.cache_size(), 1);
service.embed("text one"); // Should use cache
assert_eq!(service.cache_size(), 1);
service.embed("text two");
assert_eq!(service.cache_size(), 2);
service.clear_cache();
assert_eq!(service.cache_size(), 0);
}
#[test]
fn test_find_most_similar() {
let mut service = MockEmbeddingService::new();
let query = service.embed("programming code");
let candidates = vec![
("doc1".to_string(), service.embed("python programming language")),
("doc2".to_string(), service.embed("cooking recipes")),
("doc3".to_string(), service.embed("software development code")),
];
let result = service.find_most_similar(&query, &candidates);
assert!(result.is_some());
// Should find a programming-related document
let (id, _) = result.unwrap();
assert!(id == "doc1" || id == "doc3");
}
#[test]
fn test_empty_text() {
let mut service = MockEmbeddingService::new();
let emb = service.embed("");
assert_eq!(emb.len(), MOCK_EMBEDDING_DIM);
}
#[test]
fn test_simple_mode() {
let mut service = MockEmbeddingService::new_simple();
let emb = service.embed("test simple mode");
assert_eq!(emb.len(), MOCK_EMBEDDING_DIM);
// Verify normalization
let norm: f32 = emb.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!((norm - 1.0).abs() < 0.001);
}
}

View file

@ -0,0 +1,11 @@
//! Mock Services Module
//!
//! Provides mock implementations for testing:
//! - `MockEmbeddingService` - Deterministic embeddings using FxHash
//! - `TestDataFactory` - Generate test data with realistic properties
mod fixtures;
mod mock_embedding;
pub use fixtures::TestDataFactory;
pub use mock_embedding::MockEmbeddingService;