mirror of
https://github.com/samvallad33/vestige.git
synced 2026-04-30 19:36:22 +02:00
Initial commit: Vestige v1.0.0 - Cognitive memory MCP server
FSRS-6 spaced repetition, spreading activation, synaptic tagging, hippocampal indexing, and 130 years of memory research. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
commit
f9c60eb5a7
169 changed files with 97206 additions and 0 deletions
521
tests/e2e/src/assertions/mod.rs
Normal file
521
tests/e2e/src/assertions/mod.rs
Normal file
|
|
@ -0,0 +1,521 @@
|
|||
//! Custom Test Assertions
|
||||
//!
|
||||
//! Provides domain-specific assertions for memory testing:
|
||||
//! - Retention and decay assertions
|
||||
//! - Scheduling assertions
|
||||
//! - State transition assertions
|
||||
//! - Search result assertions
|
||||
|
||||
use vestige_core::{KnowledgeNode, Storage};
|
||||
|
||||
// ============================================================================
|
||||
// RETENTION ASSERTIONS
|
||||
// ============================================================================
|
||||
|
||||
/// Assert that retention has decreased from an expected value
|
||||
///
|
||||
/// # Example
|
||||
/// ```rust,ignore
|
||||
/// assert_retention_decreased!(node.retention_strength, 1.0, 0.1);
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! assert_retention_decreased {
|
||||
($actual:expr, $original:expr) => {
|
||||
assert!(
|
||||
$actual < $original,
|
||||
"Expected retention to decrease: {} should be less than {}",
|
||||
$actual,
|
||||
$original
|
||||
);
|
||||
};
|
||||
($actual:expr, $original:expr, $min_decrease:expr) => {
|
||||
let decrease = $original - $actual;
|
||||
assert!(
|
||||
decrease >= $min_decrease,
|
||||
"Expected retention to decrease by at least {}: actual decrease was {} ({} -> {})",
|
||||
$min_decrease,
|
||||
decrease,
|
||||
$original,
|
||||
$actual
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that retention is within expected range
|
||||
#[macro_export]
|
||||
macro_rules! assert_retention_in_range {
|
||||
($actual:expr, $min:expr, $max:expr) => {
|
||||
assert!(
|
||||
$actual >= $min && $actual <= $max,
|
||||
"Expected retention in range [{}, {}], got {}",
|
||||
$min,
|
||||
$max,
|
||||
$actual
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that retrieval strength has decayed properly
|
||||
#[macro_export]
|
||||
macro_rules! assert_retrieval_decayed {
|
||||
($node:expr, $elapsed_days:expr) => {
|
||||
let expected_max = 1.0; // Can't exceed 1.0
|
||||
let expected_min = if $elapsed_days > 0.0 {
|
||||
0.0 // Should have decayed at least somewhat
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
assert!(
|
||||
$node.retrieval_strength >= expected_min && $node.retrieval_strength <= expected_max,
|
||||
"Retrieval strength {} out of expected range [{}, {}] after {} days",
|
||||
$node.retrieval_strength,
|
||||
expected_min,
|
||||
expected_max,
|
||||
$elapsed_days
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SCHEDULING ASSERTIONS
|
||||
// ============================================================================
|
||||
|
||||
/// Assert that a memory is due for review
|
||||
#[macro_export]
|
||||
macro_rules! assert_is_due {
|
||||
($node:expr) => {
|
||||
assert!(
|
||||
$node.is_due(),
|
||||
"Expected memory to be due for review, but next_review is {:?}",
|
||||
$node.next_review
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that a memory is not due for review
|
||||
#[macro_export]
|
||||
macro_rules! assert_not_due {
|
||||
($node:expr) => {
|
||||
assert!(
|
||||
!$node.is_due(),
|
||||
"Expected memory to NOT be due for review, but it is (next_review: {:?})",
|
||||
$node.next_review
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that interval increased after review
|
||||
#[macro_export]
|
||||
macro_rules! assert_interval_increased {
|
||||
($before:expr, $after:expr) => {
|
||||
let before_interval = $before
|
||||
.next_review
|
||||
.map(|t| (t - $before.last_accessed).num_days())
|
||||
.unwrap_or(0);
|
||||
let after_interval = $after
|
||||
.next_review
|
||||
.map(|t| (t - $after.last_accessed).num_days())
|
||||
.unwrap_or(0);
|
||||
assert!(
|
||||
after_interval >= before_interval,
|
||||
"Expected interval to increase: {} days -> {} days",
|
||||
before_interval,
|
||||
after_interval
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that stability increased after successful review
|
||||
#[macro_export]
|
||||
macro_rules! assert_stability_increased {
|
||||
($before:expr, $after:expr) => {
|
||||
assert!(
|
||||
$after.stability >= $before.stability,
|
||||
"Expected stability to increase: {} -> {}",
|
||||
$before.stability,
|
||||
$after.stability
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// STATE ASSERTIONS
|
||||
// ============================================================================
|
||||
|
||||
/// Assert that storage strength increased
|
||||
#[macro_export]
|
||||
macro_rules! assert_storage_strength_increased {
|
||||
($before:expr, $after:expr) => {
|
||||
assert!(
|
||||
$after.storage_strength >= $before.storage_strength,
|
||||
"Expected storage strength to increase: {} -> {}",
|
||||
$before.storage_strength,
|
||||
$after.storage_strength
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that reps count increased
|
||||
#[macro_export]
|
||||
macro_rules! assert_reps_increased {
|
||||
($before:expr, $after:expr) => {
|
||||
assert!(
|
||||
$after.reps > $before.reps,
|
||||
"Expected reps to increase: {} -> {}",
|
||||
$before.reps,
|
||||
$after.reps
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that lapses count increased
|
||||
#[macro_export]
|
||||
macro_rules! assert_lapses_increased {
|
||||
($before:expr, $after:expr) => {
|
||||
assert!(
|
||||
$after.lapses > $before.lapses,
|
||||
"Expected lapses to increase: {} -> {}",
|
||||
$before.lapses,
|
||||
$after.lapses
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// TEMPORAL ASSERTIONS
|
||||
// ============================================================================
|
||||
|
||||
/// Assert that a memory is currently valid
|
||||
#[macro_export]
|
||||
macro_rules! assert_currently_valid {
|
||||
($node:expr) => {
|
||||
assert!(
|
||||
$node.is_currently_valid(),
|
||||
"Expected memory to be currently valid, but valid_from={:?}, valid_until={:?}",
|
||||
$node.valid_from,
|
||||
$node.valid_until
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that a memory is not currently valid
|
||||
#[macro_export]
|
||||
macro_rules! assert_not_currently_valid {
|
||||
($node:expr) => {
|
||||
assert!(
|
||||
!$node.is_currently_valid(),
|
||||
"Expected memory to NOT be currently valid, but it is (valid_from={:?}, valid_until={:?})",
|
||||
$node.valid_from,
|
||||
$node.valid_until
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that a memory is valid at a specific time
|
||||
#[macro_export]
|
||||
macro_rules! assert_valid_at {
|
||||
($node:expr, $time:expr) => {
|
||||
assert!(
|
||||
$node.is_valid_at($time),
|
||||
"Expected memory to be valid at {:?}, but valid_from={:?}, valid_until={:?}",
|
||||
$time,
|
||||
$node.valid_from,
|
||||
$node.valid_until
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SEARCH ASSERTIONS
|
||||
// ============================================================================
|
||||
|
||||
/// Assert that search results contain a specific ID
|
||||
#[macro_export]
|
||||
macro_rules! assert_search_contains {
|
||||
($results:expr, $id:expr) => {
|
||||
assert!(
|
||||
$results.iter().any(|n| n.id == $id),
|
||||
"Expected search results to contain ID {}, but it was not found",
|
||||
$id
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that search results do not contain a specific ID
|
||||
#[macro_export]
|
||||
macro_rules! assert_search_not_contains {
|
||||
($results:expr, $id:expr) => {
|
||||
assert!(
|
||||
!$results.iter().any(|n| n.id == $id),
|
||||
"Expected search results to NOT contain ID {}, but it was found",
|
||||
$id
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert search result count
|
||||
#[macro_export]
|
||||
macro_rules! assert_search_count {
|
||||
($results:expr, $expected:expr) => {
|
||||
assert_eq!(
|
||||
$results.len(),
|
||||
$expected,
|
||||
"Expected {} search results, got {}",
|
||||
$expected,
|
||||
$results.len()
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
/// Assert that search results are ordered by relevance (first result is most relevant)
|
||||
#[macro_export]
|
||||
macro_rules! assert_search_order {
|
||||
($results:expr, $expected_first:expr) => {
|
||||
assert!(
|
||||
!$results.is_empty(),
|
||||
"Expected non-empty search results"
|
||||
);
|
||||
assert_eq!(
|
||||
$results[0].id,
|
||||
$expected_first,
|
||||
"Expected first result to be {}, got {}",
|
||||
$expected_first,
|
||||
$results[0].id
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// EMBEDDING ASSERTIONS
|
||||
// ============================================================================
|
||||
|
||||
/// Assert that embeddings are similar (cosine similarity > threshold)
|
||||
#[macro_export]
|
||||
macro_rules! assert_embeddings_similar {
|
||||
($emb1:expr, $emb2:expr, $threshold:expr) => {{
|
||||
let dot: f32 = $emb1.iter().zip($emb2.iter()).map(|(a, b)| a * b).sum();
|
||||
let norm1: f32 = $emb1.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm2: f32 = $emb2.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let similarity = if norm1 > 0.0 && norm2 > 0.0 {
|
||||
dot / (norm1 * norm2)
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
assert!(
|
||||
similarity >= $threshold,
|
||||
"Expected embeddings to be similar (>= {}), got similarity {}",
|
||||
$threshold,
|
||||
similarity
|
||||
);
|
||||
}};
|
||||
}
|
||||
|
||||
/// Assert that embeddings are different (cosine similarity < threshold)
|
||||
#[macro_export]
|
||||
macro_rules! assert_embeddings_different {
|
||||
($emb1:expr, $emb2:expr, $threshold:expr) => {{
|
||||
let dot: f32 = $emb1.iter().zip($emb2.iter()).map(|(a, b)| a * b).sum();
|
||||
let norm1: f32 = $emb1.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm2: f32 = $emb2.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let similarity = if norm1 > 0.0 && norm2 > 0.0 {
|
||||
dot / (norm1 * norm2)
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
assert!(
|
||||
similarity < $threshold,
|
||||
"Expected embeddings to be different (< {}), got similarity {}",
|
||||
$threshold,
|
||||
similarity
|
||||
);
|
||||
}};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// HELPER FUNCTIONS
|
||||
// ============================================================================
|
||||
|
||||
/// Verify that a node exists in storage
|
||||
pub fn assert_node_exists(storage: &Storage, id: &str) {
|
||||
let node = storage.get_node(id);
|
||||
assert!(
|
||||
node.is_ok() && node.unwrap().is_some(),
|
||||
"Expected node {} to exist in storage",
|
||||
id
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that a node does not exist in storage
|
||||
pub fn assert_node_not_exists(storage: &Storage, id: &str) {
|
||||
let node = storage.get_node(id);
|
||||
assert!(
|
||||
node.is_ok() && node.unwrap().is_none(),
|
||||
"Expected node {} to NOT exist in storage",
|
||||
id
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that storage has expected node count
|
||||
pub fn assert_node_count(storage: &Storage, expected: i64) {
|
||||
let stats = storage.get_stats().expect("Failed to get stats");
|
||||
assert_eq!(
|
||||
stats.total_nodes, expected,
|
||||
"Expected {} nodes, got {}",
|
||||
expected, stats.total_nodes
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that a node has the expected content
|
||||
pub fn assert_node_content(node: &KnowledgeNode, expected_content: &str) {
|
||||
assert_eq!(
|
||||
node.content, expected_content,
|
||||
"Expected content '{}', got '{}'",
|
||||
expected_content, node.content
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that a node has the expected type
|
||||
pub fn assert_node_type(node: &KnowledgeNode, expected_type: &str) {
|
||||
assert_eq!(
|
||||
node.node_type, expected_type,
|
||||
"Expected type '{}', got '{}'",
|
||||
expected_type, node.node_type
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that a node has specific tags
|
||||
pub fn assert_has_tags(node: &KnowledgeNode, expected_tags: &[&str]) {
|
||||
for tag in expected_tags {
|
||||
assert!(
|
||||
node.tags.contains(&tag.to_string()),
|
||||
"Expected node to have tag '{}', but tags are {:?}",
|
||||
tag,
|
||||
node.tags
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify difficulty is within valid range
|
||||
pub fn assert_difficulty_valid(node: &KnowledgeNode) {
|
||||
assert!(
|
||||
node.difficulty >= 1.0 && node.difficulty <= 10.0,
|
||||
"Difficulty {} is out of valid range [1.0, 10.0]",
|
||||
node.difficulty
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify stability is positive
|
||||
pub fn assert_stability_valid(node: &KnowledgeNode) {
|
||||
assert!(
|
||||
node.stability > 0.0,
|
||||
"Stability {} should be positive",
|
||||
node.stability
|
||||
);
|
||||
}
|
||||
|
||||
/// Approximate equality for floating point
|
||||
pub fn assert_approx_eq(actual: f64, expected: f64, epsilon: f64) {
|
||||
assert!(
|
||||
(actual - expected).abs() < epsilon,
|
||||
"Expected {} to be approximately equal to {} (epsilon: {})",
|
||||
actual,
|
||||
expected,
|
||||
epsilon
|
||||
);
|
||||
}
|
||||
|
||||
/// Approximate equality for f32
|
||||
pub fn assert_approx_eq_f32(actual: f32, expected: f32, epsilon: f32) {
|
||||
assert!(
|
||||
(actual - expected).abs() < epsilon,
|
||||
"Expected {} to be approximately equal to {} (epsilon: {})",
|
||||
actual,
|
||||
expected,
|
||||
epsilon
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::{Duration, Utc};
|
||||
|
||||
fn create_test_node() -> KnowledgeNode {
|
||||
let mut node = KnowledgeNode::default();
|
||||
node.id = "test-id".to_string();
|
||||
node.content = "test content".to_string();
|
||||
node.node_type = "fact".to_string();
|
||||
node.created_at = Utc::now();
|
||||
node.updated_at = Utc::now();
|
||||
node.last_accessed = Utc::now();
|
||||
node.stability = 5.0;
|
||||
node.difficulty = 5.0;
|
||||
node.reps = 3;
|
||||
node.lapses = 0;
|
||||
node.storage_strength = 2.0;
|
||||
node.retrieval_strength = 0.9;
|
||||
node.retention_strength = 0.85;
|
||||
node.sentiment_score = 0.0;
|
||||
node.sentiment_magnitude = 0.0;
|
||||
node.next_review = Some(Utc::now() + Duration::days(5));
|
||||
node.source = None;
|
||||
node.tags = vec!["test".to_string(), "example".to_string()];
|
||||
node.valid_from = None;
|
||||
node.valid_until = None;
|
||||
node.has_embedding = None;
|
||||
node.embedding_model = None;
|
||||
node
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retention_assertions() {
|
||||
assert_retention_decreased!(0.7, 1.0);
|
||||
assert_retention_decreased!(0.5, 1.0, 0.3);
|
||||
assert_retention_in_range!(0.85, 0.8, 0.9);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scheduling_assertions() {
|
||||
let mut node = create_test_node();
|
||||
|
||||
// Not due yet (next_review is in the future)
|
||||
assert_not_due!(node);
|
||||
|
||||
// Make it due
|
||||
node.next_review = Some(Utc::now() - Duration::hours(1));
|
||||
assert_is_due!(node);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_temporal_assertions() {
|
||||
let node = create_test_node();
|
||||
assert_currently_valid!(node);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_helper_functions() {
|
||||
let node = create_test_node();
|
||||
|
||||
assert_node_content(&node, "test content");
|
||||
assert_node_type(&node, "fact");
|
||||
assert_has_tags(&node, &["test", "example"]);
|
||||
assert_difficulty_valid(&node);
|
||||
assert_stability_valid(&node);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_approx_eq() {
|
||||
assert_approx_eq(0.90001, 0.9, 0.001);
|
||||
assert_approx_eq_f32(0.90001, 0.9, 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_assertions() {
|
||||
let emb1 = vec![1.0f32, 0.0, 0.0];
|
||||
let emb2 = vec![0.9, 0.1, 0.0];
|
||||
let emb3 = vec![0.0, 1.0, 0.0];
|
||||
|
||||
assert_embeddings_similar!(emb1, emb2, 0.8);
|
||||
assert_embeddings_different!(emb1, emb3, 0.5);
|
||||
}
|
||||
}
|
||||
390
tests/e2e/src/harness/db_manager.rs
Normal file
390
tests/e2e/src/harness/db_manager.rs
Normal file
|
|
@ -0,0 +1,390 @@
|
|||
//! Test Database Manager
|
||||
//!
|
||||
//! Provides isolated database instances for testing:
|
||||
//! - Temporary databases that are automatically cleaned up
|
||||
//! - Pre-seeded databases with test data
|
||||
//! - Database snapshots and restoration
|
||||
//! - Concurrent test isolation
|
||||
|
||||
use vestige_core::{KnowledgeNode, Rating, Storage};
|
||||
use std::path::PathBuf;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Helper to create IngestInput (works around non_exhaustive)
|
||||
fn make_ingest_input(
|
||||
content: String,
|
||||
node_type: String,
|
||||
tags: Vec<String>,
|
||||
sentiment_score: f64,
|
||||
sentiment_magnitude: f64,
|
||||
source: Option<String>,
|
||||
valid_from: Option<chrono::DateTime<chrono::Utc>>,
|
||||
valid_until: Option<chrono::DateTime<chrono::Utc>>,
|
||||
) -> vestige_core::IngestInput {
|
||||
let mut input = vestige_core::IngestInput::default();
|
||||
input.content = content;
|
||||
input.node_type = node_type;
|
||||
input.tags = tags;
|
||||
input.sentiment_score = sentiment_score;
|
||||
input.sentiment_magnitude = sentiment_magnitude;
|
||||
input.source = source;
|
||||
input.valid_from = valid_from;
|
||||
input.valid_until = valid_until;
|
||||
input
|
||||
}
|
||||
|
||||
/// Manager for test databases
|
||||
///
|
||||
/// Creates isolated database instances for each test to prevent interference.
|
||||
/// Automatically cleans up temporary databases when dropped.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// let mut db = TestDatabaseManager::new_temp();
|
||||
///
|
||||
/// // Use the storage
|
||||
/// db.storage.ingest(IngestInput { ... });
|
||||
///
|
||||
/// // Database is automatically deleted when `db` goes out of scope
|
||||
/// ```
|
||||
pub struct TestDatabaseManager {
|
||||
/// The storage instance
|
||||
pub storage: Storage,
|
||||
/// Temporary directory (kept alive to prevent premature deletion)
|
||||
_temp_dir: Option<TempDir>,
|
||||
/// Path to the database file
|
||||
db_path: PathBuf,
|
||||
/// Snapshot data for restore operations
|
||||
snapshot: Option<Vec<KnowledgeNode>>,
|
||||
}
|
||||
|
||||
impl TestDatabaseManager {
|
||||
/// Create a new test database in a temporary directory
|
||||
///
|
||||
/// The database is automatically deleted when the manager is dropped.
|
||||
pub fn new_temp() -> Self {
|
||||
let temp_dir = TempDir::new().expect("Failed to create temp directory");
|
||||
let db_path = temp_dir.path().join("test_vestige.db");
|
||||
|
||||
let storage = Storage::new(Some(db_path.clone())).expect("Failed to create test storage");
|
||||
|
||||
Self {
|
||||
storage,
|
||||
_temp_dir: Some(temp_dir),
|
||||
db_path,
|
||||
snapshot: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a test database at a specific path
|
||||
///
|
||||
/// The database is NOT automatically deleted.
|
||||
pub fn new_at_path(path: PathBuf) -> Self {
|
||||
let storage = Storage::new(Some(path.clone())).expect("Failed to create test storage");
|
||||
|
||||
Self {
|
||||
storage,
|
||||
_temp_dir: None,
|
||||
db_path: path,
|
||||
snapshot: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the database path
|
||||
pub fn path(&self) -> &PathBuf {
|
||||
&self.db_path
|
||||
}
|
||||
|
||||
/// Check if the database is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.storage
|
||||
.get_stats()
|
||||
.map(|s| s.total_nodes == 0)
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
/// Get the number of nodes in the database
|
||||
pub fn node_count(&self) -> i64 {
|
||||
self.storage
|
||||
.get_stats()
|
||||
.map(|s| s.total_nodes)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// SEEDING METHODS
|
||||
// ========================================================================
|
||||
|
||||
/// Seed the database with a specified number of test nodes
|
||||
pub fn seed_nodes(&mut self, count: usize) -> Vec<String> {
|
||||
let mut ids = Vec::with_capacity(count);
|
||||
|
||||
for i in 0..count {
|
||||
let input = make_ingest_input(
|
||||
format!("Test memory content {}", i),
|
||||
"fact".to_string(),
|
||||
vec![format!("test-{}", i % 5)],
|
||||
0.0,
|
||||
0.0,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
if let Ok(node) = self.storage.ingest(input) {
|
||||
ids.push(node.id);
|
||||
}
|
||||
}
|
||||
|
||||
ids
|
||||
}
|
||||
|
||||
/// Seed with diverse node types
|
||||
pub fn seed_diverse(&mut self, count_per_type: usize) -> Vec<String> {
|
||||
let types = ["fact", "concept", "procedure", "event", "code"];
|
||||
let mut ids = Vec::with_capacity(count_per_type * types.len());
|
||||
|
||||
for node_type in types {
|
||||
for i in 0..count_per_type {
|
||||
let input = make_ingest_input(
|
||||
format!("Test {} content {}", node_type, i),
|
||||
node_type.to_string(),
|
||||
vec![node_type.to_string()],
|
||||
0.0,
|
||||
0.0,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
if let Ok(node) = self.storage.ingest(input) {
|
||||
ids.push(node.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ids
|
||||
}
|
||||
|
||||
/// Seed with nodes having various retention states
|
||||
pub fn seed_with_retention_states(&mut self) -> Vec<String> {
|
||||
let mut ids = Vec::new();
|
||||
|
||||
// New node (never reviewed)
|
||||
let input = make_ingest_input(
|
||||
"New memory - never reviewed".to_string(),
|
||||
"fact".to_string(),
|
||||
vec!["new".to_string()],
|
||||
0.0,
|
||||
0.0,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
if let Ok(node) = self.storage.ingest(input) {
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
// Well-learned node (multiple good reviews)
|
||||
let input = make_ingest_input(
|
||||
"Well-learned memory - reviewed multiple times".to_string(),
|
||||
"fact".to_string(),
|
||||
vec!["learned".to_string()],
|
||||
0.0,
|
||||
0.0,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
if let Ok(node) = self.storage.ingest(input) {
|
||||
let _ = self.storage.mark_reviewed(&node.id, Rating::Good);
|
||||
let _ = self.storage.mark_reviewed(&node.id, Rating::Good);
|
||||
let _ = self.storage.mark_reviewed(&node.id, Rating::Easy);
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
// Struggling node (multiple lapses)
|
||||
let input = make_ingest_input(
|
||||
"Struggling memory - has lapses".to_string(),
|
||||
"fact".to_string(),
|
||||
vec!["struggling".to_string()],
|
||||
0.0,
|
||||
0.0,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
if let Ok(node) = self.storage.ingest(input) {
|
||||
let _ = self.storage.mark_reviewed(&node.id, Rating::Again);
|
||||
let _ = self.storage.mark_reviewed(&node.id, Rating::Hard);
|
||||
let _ = self.storage.mark_reviewed(&node.id, Rating::Again);
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
ids
|
||||
}
|
||||
|
||||
/// Seed with emotional memories (different sentiment magnitudes)
|
||||
pub fn seed_emotional(&mut self, count: usize) -> Vec<String> {
|
||||
let mut ids = Vec::with_capacity(count);
|
||||
|
||||
for i in 0..count {
|
||||
let magnitude = (i as f64) / (count as f64);
|
||||
let input = make_ingest_input(
|
||||
format!("Emotional memory with magnitude {:.2}", magnitude),
|
||||
"event".to_string(),
|
||||
vec!["emotional".to_string()],
|
||||
if i % 2 == 0 { 0.8 } else { -0.8 },
|
||||
magnitude,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
if let Ok(node) = self.storage.ingest(input) {
|
||||
ids.push(node.id);
|
||||
}
|
||||
}
|
||||
|
||||
ids
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// SNAPSHOT/RESTORE
|
||||
// ========================================================================
|
||||
|
||||
/// Take a snapshot of current database state
|
||||
pub fn take_snapshot(&mut self) {
|
||||
let nodes = self
|
||||
.storage
|
||||
.get_all_nodes(10000, 0)
|
||||
.unwrap_or_default();
|
||||
self.snapshot = Some(nodes);
|
||||
}
|
||||
|
||||
/// Restore from the last snapshot
|
||||
///
|
||||
/// Note: This clears the database and re-inserts all nodes from snapshot.
|
||||
/// IDs will NOT be preserved (new UUIDs are generated).
|
||||
pub fn restore_snapshot(&mut self) -> bool {
|
||||
if let Some(nodes) = self.snapshot.take() {
|
||||
// Clear current data by recreating storage
|
||||
// Delete the database file first
|
||||
let _ = std::fs::remove_file(&self.db_path);
|
||||
self.storage = Storage::new(Some(self.db_path.clone()))
|
||||
.expect("Failed to recreate storage for restore");
|
||||
|
||||
// Re-insert nodes
|
||||
for node in nodes {
|
||||
let input = make_ingest_input(
|
||||
node.content,
|
||||
node.node_type,
|
||||
node.tags,
|
||||
node.sentiment_score,
|
||||
node.sentiment_magnitude,
|
||||
node.source,
|
||||
node.valid_from,
|
||||
node.valid_until,
|
||||
);
|
||||
let _ = self.storage.ingest(input);
|
||||
}
|
||||
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a snapshot exists
|
||||
pub fn has_snapshot(&self) -> bool {
|
||||
self.snapshot.is_some()
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// CLEANUP
|
||||
// ========================================================================
|
||||
|
||||
/// Clear all data from the database
|
||||
pub fn clear(&mut self) {
|
||||
// Get all node IDs and delete them
|
||||
if let Ok(nodes) = self.storage.get_all_nodes(10000, 0) {
|
||||
for node in nodes {
|
||||
let _ = self.storage.delete_node(&node.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Recreate the database (useful for testing migrations)
|
||||
pub fn recreate(&mut self) {
|
||||
// Delete the database file
|
||||
let _ = std::fs::remove_file(&self.db_path);
|
||||
|
||||
// Recreate storage
|
||||
self.storage = Storage::new(Some(self.db_path.clone()))
|
||||
.expect("Failed to recreate storage");
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TestDatabaseManager {
|
||||
fn drop(&mut self) {
|
||||
// Storage is dropped automatically
|
||||
// TempDir (if Some) will clean up the temp directory
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_temp_database_creation() {
|
||||
let db = TestDatabaseManager::new_temp();
|
||||
assert!(db.is_empty());
|
||||
assert!(db.path().exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_seed_nodes() {
|
||||
let mut db = TestDatabaseManager::new_temp();
|
||||
let ids = db.seed_nodes(10);
|
||||
|
||||
assert_eq!(ids.len(), 10);
|
||||
assert_eq!(db.node_count(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_seed_diverse() {
|
||||
let mut db = TestDatabaseManager::new_temp();
|
||||
let ids = db.seed_diverse(3);
|
||||
|
||||
// 5 types * 3 each = 15
|
||||
assert_eq!(ids.len(), 15);
|
||||
assert_eq!(db.node_count(), 15);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear_database() {
|
||||
let mut db = TestDatabaseManager::new_temp();
|
||||
db.seed_nodes(5);
|
||||
assert_eq!(db.node_count(), 5);
|
||||
|
||||
db.clear();
|
||||
assert!(db.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_snapshot_restore() {
|
||||
let mut db = TestDatabaseManager::new_temp();
|
||||
db.seed_nodes(5);
|
||||
|
||||
db.take_snapshot();
|
||||
assert!(db.has_snapshot());
|
||||
|
||||
db.clear();
|
||||
assert!(db.is_empty());
|
||||
|
||||
db.restore_snapshot();
|
||||
assert_eq!(db.node_count(), 5);
|
||||
}
|
||||
}
|
||||
11
tests/e2e/src/harness/mod.rs
Normal file
11
tests/e2e/src/harness/mod.rs
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
//! Test Harness Module
|
||||
//!
|
||||
//! Provides test setup utilities:
|
||||
//! - `TimeTravelEnvironment` for testing time-dependent behavior (decay, scheduling)
|
||||
//! - `TestDatabaseManager` for isolated test databases
|
||||
|
||||
mod db_manager;
|
||||
mod time_travel;
|
||||
|
||||
pub use db_manager::TestDatabaseManager;
|
||||
pub use time_travel::TimeTravelEnvironment;
|
||||
342
tests/e2e/src/harness/time_travel.rs
Normal file
342
tests/e2e/src/harness/time_travel.rs
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
//! Time Travel Environment for Testing Decay
|
||||
//!
|
||||
//! Enables testing of time-dependent memory behavior:
|
||||
//! - FSRS-6 scheduling and intervals
|
||||
//! - Memory decay (retrieval strength degradation)
|
||||
//! - Temporal validity periods
|
||||
//! - Consolidation timing
|
||||
//!
|
||||
//! Uses a virtual clock that can be advanced without waiting.
|
||||
|
||||
use chrono::{DateTime, Duration, Utc};
|
||||
use std::cell::RefCell;
|
||||
|
||||
/// Environment for testing time-dependent memory behavior
|
||||
///
|
||||
/// Provides a virtual clock that can be advanced to test:
|
||||
/// - Memory decay over time
|
||||
/// - FSRS-6 scheduling calculations
|
||||
/// - Temporal validity windows
|
||||
/// - Consolidation cycles
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// let mut env = TimeTravelEnvironment::new();
|
||||
///
|
||||
/// // Start at a known time
|
||||
/// env.set_time(Utc::now());
|
||||
///
|
||||
/// // Advance 30 days to test decay
|
||||
/// env.advance_days(30);
|
||||
///
|
||||
/// // Check retrievability at this point
|
||||
/// let elapsed = env.days_since(original_time);
|
||||
/// ```
|
||||
pub struct TimeTravelEnvironment {
|
||||
/// Current virtual time
|
||||
current_time: RefCell<DateTime<Utc>>,
|
||||
/// Original start time for reference
|
||||
start_time: DateTime<Utc>,
|
||||
/// History of time jumps for debugging
|
||||
time_history: RefCell<Vec<TimeJump>>,
|
||||
}
|
||||
|
||||
/// Record of a time jump for debugging
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TimeJump {
|
||||
pub from: DateTime<Utc>,
|
||||
pub to: DateTime<Utc>,
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
impl Default for TimeTravelEnvironment {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl TimeTravelEnvironment {
|
||||
/// Create a new time travel environment starting at the current time
|
||||
pub fn new() -> Self {
|
||||
let now = Utc::now();
|
||||
Self {
|
||||
current_time: RefCell::new(now),
|
||||
start_time: now,
|
||||
time_history: RefCell::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create environment at a specific starting time
|
||||
pub fn at(time: DateTime<Utc>) -> Self {
|
||||
Self {
|
||||
current_time: RefCell::new(time),
|
||||
start_time: time,
|
||||
time_history: RefCell::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current virtual time
|
||||
pub fn now(&self) -> DateTime<Utc> {
|
||||
*self.current_time.borrow()
|
||||
}
|
||||
|
||||
/// Get the original start time
|
||||
pub fn start_time(&self) -> DateTime<Utc> {
|
||||
self.start_time
|
||||
}
|
||||
|
||||
/// Set the current time to a specific point
|
||||
pub fn set_time(&self, time: DateTime<Utc>) {
|
||||
let from = *self.current_time.borrow();
|
||||
self.time_history.borrow_mut().push(TimeJump {
|
||||
from,
|
||||
to: time,
|
||||
reason: "set_time".to_string(),
|
||||
});
|
||||
*self.current_time.borrow_mut() = time;
|
||||
}
|
||||
|
||||
/// Advance time by a duration
|
||||
pub fn advance(&self, duration: Duration) {
|
||||
let from = *self.current_time.borrow();
|
||||
let to = from + duration;
|
||||
self.time_history.borrow_mut().push(TimeJump {
|
||||
from,
|
||||
to,
|
||||
reason: format!("advance {:?}", duration),
|
||||
});
|
||||
*self.current_time.borrow_mut() = to;
|
||||
}
|
||||
|
||||
/// Advance time by the specified number of days
|
||||
pub fn advance_days(&self, days: i64) {
|
||||
self.advance(Duration::days(days));
|
||||
}
|
||||
|
||||
/// Advance time by the specified number of hours
|
||||
pub fn advance_hours(&self, hours: i64) {
|
||||
self.advance(Duration::hours(hours));
|
||||
}
|
||||
|
||||
/// Advance time by the specified number of minutes
|
||||
pub fn advance_minutes(&self, minutes: i64) {
|
||||
self.advance(Duration::minutes(minutes));
|
||||
}
|
||||
|
||||
/// Advance time by the specified number of seconds
|
||||
pub fn advance_seconds(&self, seconds: i64) {
|
||||
self.advance(Duration::seconds(seconds));
|
||||
}
|
||||
|
||||
/// Calculate days elapsed since a reference time
|
||||
pub fn days_since(&self, reference: DateTime<Utc>) -> f64 {
|
||||
let current = *self.current_time.borrow();
|
||||
(current - reference).num_seconds() as f64 / 86400.0
|
||||
}
|
||||
|
||||
/// Calculate days elapsed since the start time
|
||||
pub fn days_since_start(&self) -> f64 {
|
||||
self.days_since(self.start_time)
|
||||
}
|
||||
|
||||
/// Calculate hours elapsed since a reference time
|
||||
pub fn hours_since(&self, reference: DateTime<Utc>) -> f64 {
|
||||
let current = *self.current_time.borrow();
|
||||
(current - reference).num_seconds() as f64 / 3600.0
|
||||
}
|
||||
|
||||
/// Get time history for debugging
|
||||
pub fn get_history(&self) -> Vec<TimeJump> {
|
||||
self.time_history.borrow().clone()
|
||||
}
|
||||
|
||||
/// Clear time history
|
||||
pub fn clear_history(&self) {
|
||||
self.time_history.borrow_mut().clear();
|
||||
}
|
||||
|
||||
/// Reset to start time
|
||||
pub fn reset(&self) {
|
||||
let from = *self.current_time.borrow();
|
||||
self.time_history.borrow_mut().push(TimeJump {
|
||||
from,
|
||||
to: self.start_time,
|
||||
reason: "reset".to_string(),
|
||||
});
|
||||
*self.current_time.borrow_mut() = self.start_time;
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// DECAY TESTING HELPERS
|
||||
// ========================================================================
|
||||
|
||||
/// Calculate expected retrievability at current time
|
||||
///
|
||||
/// Uses FSRS-6 power forgetting curve:
|
||||
/// R = (1 + factor * t / S)^(-w20)
|
||||
pub fn expected_retrievability(&self, stability: f64, last_review: DateTime<Utc>) -> f64 {
|
||||
let elapsed_days = self.days_since(last_review);
|
||||
vestige_core::retrievability(stability, elapsed_days)
|
||||
}
|
||||
|
||||
/// Calculate expected retrievability with custom decay
|
||||
pub fn expected_retrievability_with_decay(
|
||||
&self,
|
||||
stability: f64,
|
||||
last_review: DateTime<Utc>,
|
||||
w20: f64,
|
||||
) -> f64 {
|
||||
let elapsed_days = self.days_since(last_review);
|
||||
vestige_core::retrievability_with_decay(stability, elapsed_days, w20)
|
||||
}
|
||||
|
||||
/// Check if a memory would be due for review at current time
|
||||
pub fn is_due(&self, next_review: DateTime<Utc>) -> bool {
|
||||
*self.current_time.borrow() >= next_review
|
||||
}
|
||||
|
||||
/// Calculate how overdue a memory is (negative if not yet due)
|
||||
pub fn days_overdue(&self, next_review: DateTime<Utc>) -> f64 {
|
||||
self.days_since(next_review)
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// SCHEDULING HELPERS
|
||||
// ========================================================================
|
||||
|
||||
/// Advance to when a memory would be due
|
||||
pub fn advance_to_due(&self, next_review: DateTime<Utc>) {
|
||||
let from = *self.current_time.borrow();
|
||||
self.time_history.borrow_mut().push(TimeJump {
|
||||
from,
|
||||
to: next_review,
|
||||
reason: "advance_to_due".to_string(),
|
||||
});
|
||||
*self.current_time.borrow_mut() = next_review;
|
||||
}
|
||||
|
||||
/// Advance past due date by specified days
|
||||
pub fn advance_past_due(&self, next_review: DateTime<Utc>, days_overdue: i64) {
|
||||
let target = next_review + Duration::days(days_overdue);
|
||||
let from = *self.current_time.borrow();
|
||||
self.time_history.borrow_mut().push(TimeJump {
|
||||
from,
|
||||
to: target,
|
||||
reason: format!("advance_past_due +{} days", days_overdue),
|
||||
});
|
||||
*self.current_time.borrow_mut() = target;
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// TEMPORAL VALIDITY HELPERS
|
||||
// ========================================================================
|
||||
|
||||
/// Check if a time is within a validity window
|
||||
pub fn is_within_validity(
|
||||
&self,
|
||||
valid_from: Option<DateTime<Utc>>,
|
||||
valid_until: Option<DateTime<Utc>>,
|
||||
) -> bool {
|
||||
let current = *self.current_time.borrow();
|
||||
let after_start = valid_from.map(|t| current >= t).unwrap_or(true);
|
||||
let before_end = valid_until.map(|t| current <= t).unwrap_or(true);
|
||||
after_start && before_end
|
||||
}
|
||||
|
||||
/// Advance to just before validity starts
|
||||
pub fn advance_to_before_validity(&self, valid_from: DateTime<Utc>) {
|
||||
let target = valid_from - Duration::seconds(1);
|
||||
self.set_time(target);
|
||||
}
|
||||
|
||||
/// Advance to just after validity ends
|
||||
pub fn advance_to_after_validity(&self, valid_until: DateTime<Utc>) {
|
||||
let target = valid_until + Duration::seconds(1);
|
||||
self.set_time(target);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_time_travel_basic() {
|
||||
let env = TimeTravelEnvironment::new();
|
||||
let start = env.now();
|
||||
|
||||
env.advance_days(10);
|
||||
|
||||
assert!(env.days_since(start) >= 9.99);
|
||||
assert!(env.days_since(start) <= 10.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_travel_reset() {
|
||||
let env = TimeTravelEnvironment::new();
|
||||
let start = env.start_time();
|
||||
|
||||
env.advance_days(100);
|
||||
env.reset();
|
||||
|
||||
assert_eq!(env.now(), start);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retrievability_decay() {
|
||||
let env = TimeTravelEnvironment::new();
|
||||
let stability = 10.0;
|
||||
let last_review = env.now();
|
||||
|
||||
// At t=0, retrievability should be ~1.0
|
||||
let r0 = env.expected_retrievability(stability, last_review);
|
||||
assert!(r0 > 0.99);
|
||||
|
||||
// After 10 days with stability=10, retrievability should be ~0.9
|
||||
env.advance_days(10);
|
||||
let r10 = env.expected_retrievability(stability, last_review);
|
||||
assert!(r10 < r0);
|
||||
assert!(r10 > 0.85 && r10 < 0.95);
|
||||
|
||||
// After 30 days, retrievability should be much lower
|
||||
env.advance_days(20);
|
||||
let r30 = env.expected_retrievability(stability, last_review);
|
||||
assert!(r30 < r10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_due_date_checking() {
|
||||
let env = TimeTravelEnvironment::new();
|
||||
let next_review = env.now() + Duration::days(5);
|
||||
|
||||
// Not due yet
|
||||
assert!(!env.is_due(next_review));
|
||||
assert!(env.days_overdue(next_review) < 0.0);
|
||||
|
||||
// Advance to due
|
||||
env.advance_to_due(next_review);
|
||||
assert!(env.is_due(next_review));
|
||||
assert!(env.days_overdue(next_review).abs() < 0.01);
|
||||
|
||||
// Advance past due
|
||||
env.advance_days(3);
|
||||
assert!(env.is_due(next_review));
|
||||
assert!(env.days_overdue(next_review) > 2.99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_history_tracking() {
|
||||
let env = TimeTravelEnvironment::new();
|
||||
|
||||
env.advance_days(1);
|
||||
env.advance_hours(12);
|
||||
env.advance_minutes(30);
|
||||
|
||||
let history = env.get_history();
|
||||
assert_eq!(history.len(), 3);
|
||||
|
||||
env.clear_history();
|
||||
assert!(env.get_history().is_empty());
|
||||
}
|
||||
}
|
||||
49
tests/e2e/src/lib.rs
Normal file
49
tests/e2e/src/lib.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
//! E2E Test Infrastructure for Vestige
|
||||
//!
|
||||
//! Provides comprehensive testing utilities for 250+ end-to-end tests:
|
||||
//!
|
||||
//! - **Harness**: Test setup, time travel, database management
|
||||
//! - **Mocks**: MockEmbeddingService (FxHash-based), test fixtures
|
||||
//! - **Assertions**: Custom assertions for memory states, decay, etc.
|
||||
//!
|
||||
//! ## Quick Start
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! use vestige_e2e_tests::prelude::*;
|
||||
//!
|
||||
//! #[test]
|
||||
//! fn test_memory_decay() {
|
||||
//! let mut env = TimeTravelEnvironment::new();
|
||||
//! let mut db = TestDatabaseManager::new_temp();
|
||||
//!
|
||||
//! // Create test data
|
||||
//! let node = TestDataFactory::create_memory(&mut db.storage, "test content");
|
||||
//!
|
||||
//! // Time travel to test decay
|
||||
//! env.advance_days(30);
|
||||
//!
|
||||
//! // Assert decay occurred
|
||||
//! assert_retention_decreased!(db.storage.get_node(&node.id), 0.9);
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
pub mod assertions;
|
||||
pub mod harness;
|
||||
pub mod mocks;
|
||||
|
||||
// Re-export commonly used items
|
||||
pub use harness::{TestDatabaseManager, TimeTravelEnvironment};
|
||||
pub use mocks::{MockEmbeddingService, TestDataFactory};
|
||||
|
||||
/// Convenient imports for tests
|
||||
pub mod prelude {
|
||||
pub use crate::assertions::*;
|
||||
pub use crate::harness::{TestDatabaseManager, TimeTravelEnvironment};
|
||||
pub use crate::mocks::{MockEmbeddingService, TestDataFactory};
|
||||
|
||||
// Re-export vestige-core essentials
|
||||
pub use vestige_core::{
|
||||
FSRSScheduler, FSRSState, IngestInput, KnowledgeNode, NodeType, Rating, RecallInput,
|
||||
Result, SearchMode, Storage, StorageError,
|
||||
};
|
||||
}
|
||||
573
tests/e2e/src/mocks/fixtures.rs
Normal file
573
tests/e2e/src/mocks/fixtures.rs
Normal file
|
|
@ -0,0 +1,573 @@
|
|||
//! Test Data Factory
|
||||
//!
|
||||
//! Provides utilities for generating realistic test data:
|
||||
//! - Memory nodes with various properties
|
||||
//! - Batch generation for stress testing
|
||||
//! - Pre-built scenarios for common test cases
|
||||
|
||||
use chrono::{DateTime, Duration, Utc};
|
||||
use vestige_core::{KnowledgeNode, Rating, Storage};
|
||||
|
||||
/// Helper to create IngestInput (works around non_exhaustive)
|
||||
fn make_ingest_input(
|
||||
content: String,
|
||||
node_type: String,
|
||||
tags: Vec<String>,
|
||||
sentiment_score: f64,
|
||||
sentiment_magnitude: f64,
|
||||
source: Option<String>,
|
||||
valid_from: Option<DateTime<Utc>>,
|
||||
valid_until: Option<DateTime<Utc>>,
|
||||
) -> vestige_core::IngestInput {
|
||||
let mut input = vestige_core::IngestInput::default();
|
||||
input.content = content;
|
||||
input.node_type = node_type;
|
||||
input.tags = tags;
|
||||
input.sentiment_score = sentiment_score;
|
||||
input.sentiment_magnitude = sentiment_magnitude;
|
||||
input.source = source;
|
||||
input.valid_from = valid_from;
|
||||
input.valid_until = valid_until;
|
||||
input
|
||||
}
|
||||
|
||||
/// Factory for creating test data
|
||||
///
|
||||
/// Generates realistic test data with configurable properties.
|
||||
/// Designed for creating comprehensive test scenarios.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// let mut storage = Storage::new(Some(path))?;
|
||||
///
|
||||
/// // Create a single memory
|
||||
/// let node = TestDataFactory::create_memory(&mut storage, "test content");
|
||||
///
|
||||
/// // Create a batch
|
||||
/// let nodes = TestDataFactory::create_batch(&mut storage, 100);
|
||||
///
|
||||
/// // Create a specific scenario
|
||||
/// let scenario = TestDataFactory::create_decay_scenario(&mut storage);
|
||||
/// ```
|
||||
pub struct TestDataFactory;
|
||||
|
||||
/// Configuration for batch memory generation
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BatchConfig {
|
||||
/// Number of memories to create
|
||||
pub count: usize,
|
||||
/// Node type to use (None = random)
|
||||
pub node_type: Option<String>,
|
||||
/// Base content prefix
|
||||
pub content_prefix: String,
|
||||
/// Tags to apply
|
||||
pub tags: Vec<String>,
|
||||
/// Whether to add sentiment
|
||||
pub with_sentiment: bool,
|
||||
/// Whether to add temporal validity
|
||||
pub with_temporal: bool,
|
||||
}
|
||||
|
||||
impl Default for BatchConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
count: 10,
|
||||
node_type: None,
|
||||
content_prefix: "Test memory".to_string(),
|
||||
tags: vec![],
|
||||
with_sentiment: false,
|
||||
with_temporal: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scenario containing related test data
|
||||
#[derive(Debug)]
|
||||
pub struct TestScenario {
|
||||
/// IDs of created nodes
|
||||
pub node_ids: Vec<String>,
|
||||
/// Description of the scenario
|
||||
pub description: String,
|
||||
/// Metadata for test assertions
|
||||
pub metadata: std::collections::HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl TestDataFactory {
|
||||
// ========================================================================
|
||||
// SINGLE MEMORY CREATION
|
||||
// ========================================================================
|
||||
|
||||
/// Create a simple memory with content
|
||||
pub fn create_memory(storage: &mut Storage, content: &str) -> Option<KnowledgeNode> {
|
||||
let input = make_ingest_input(
|
||||
content.to_string(),
|
||||
"fact".to_string(),
|
||||
vec![],
|
||||
0.0,
|
||||
0.0,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
storage.ingest(input).ok()
|
||||
}
|
||||
|
||||
/// Create a memory with full configuration
|
||||
pub fn create_memory_full(
|
||||
storage: &mut Storage,
|
||||
content: &str,
|
||||
node_type: &str,
|
||||
source: Option<&str>,
|
||||
tags: Vec<&str>,
|
||||
sentiment_score: f64,
|
||||
sentiment_magnitude: f64,
|
||||
) -> Option<KnowledgeNode> {
|
||||
let input = make_ingest_input(
|
||||
content.to_string(),
|
||||
node_type.to_string(),
|
||||
tags.iter().map(|s| s.to_string()).collect(),
|
||||
sentiment_score,
|
||||
sentiment_magnitude,
|
||||
source.map(String::from),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
storage.ingest(input).ok()
|
||||
}
|
||||
|
||||
/// Create a memory with temporal validity
|
||||
pub fn create_temporal_memory(
|
||||
storage: &mut Storage,
|
||||
content: &str,
|
||||
valid_from: Option<DateTime<Utc>>,
|
||||
valid_until: Option<DateTime<Utc>>,
|
||||
) -> Option<KnowledgeNode> {
|
||||
let input = make_ingest_input(
|
||||
content.to_string(),
|
||||
"fact".to_string(),
|
||||
vec![],
|
||||
0.0,
|
||||
0.0,
|
||||
None,
|
||||
valid_from,
|
||||
valid_until,
|
||||
);
|
||||
storage.ingest(input).ok()
|
||||
}
|
||||
|
||||
/// Create an emotional memory
|
||||
pub fn create_emotional_memory(
|
||||
storage: &mut Storage,
|
||||
content: &str,
|
||||
sentiment: f64,
|
||||
magnitude: f64,
|
||||
) -> Option<KnowledgeNode> {
|
||||
let input = make_ingest_input(
|
||||
content.to_string(),
|
||||
"event".to_string(),
|
||||
vec![],
|
||||
sentiment,
|
||||
magnitude,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
storage.ingest(input).ok()
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// BATCH CREATION
|
||||
// ========================================================================
|
||||
|
||||
/// Create a batch of memories
|
||||
pub fn create_batch(storage: &mut Storage, count: usize) -> Vec<String> {
|
||||
Self::create_batch_with_config(storage, BatchConfig { count, ..Default::default() })
|
||||
}
|
||||
|
||||
/// Create a batch with custom configuration
|
||||
pub fn create_batch_with_config(storage: &mut Storage, config: BatchConfig) -> Vec<String> {
|
||||
let node_types = ["fact", "concept", "procedure", "event", "code"];
|
||||
let mut ids = Vec::with_capacity(config.count);
|
||||
|
||||
for i in 0..config.count {
|
||||
let node_type = config
|
||||
.node_type
|
||||
.clone()
|
||||
.unwrap_or_else(|| node_types[i % node_types.len()].to_string());
|
||||
|
||||
let sentiment_score = if config.with_sentiment {
|
||||
((i as f64) / (config.count as f64) * 2.0) - 1.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let sentiment_magnitude = if config.with_sentiment {
|
||||
(i as f64) / (config.count as f64)
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let (valid_from, valid_until) = if config.with_temporal {
|
||||
let now = Utc::now();
|
||||
if i % 3 == 0 {
|
||||
(Some(now - Duration::days(30)), Some(now + Duration::days(30)))
|
||||
} else if i % 3 == 1 {
|
||||
(Some(now - Duration::days(60)), Some(now - Duration::days(30)))
|
||||
} else {
|
||||
(None, None)
|
||||
}
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
let input = make_ingest_input(
|
||||
format!("{} {}", config.content_prefix, i),
|
||||
node_type,
|
||||
config.tags.clone(),
|
||||
sentiment_score,
|
||||
sentiment_magnitude,
|
||||
None,
|
||||
valid_from,
|
||||
valid_until,
|
||||
);
|
||||
|
||||
if let Ok(node) = storage.ingest(input) {
|
||||
ids.push(node.id);
|
||||
}
|
||||
}
|
||||
|
||||
ids
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// SCENARIO CREATION
|
||||
// ========================================================================
|
||||
|
||||
/// Create a scenario for testing memory decay
|
||||
pub fn create_decay_scenario(storage: &mut Storage) -> TestScenario {
|
||||
let mut ids = Vec::new();
|
||||
let mut metadata = std::collections::HashMap::new();
|
||||
|
||||
// High stability memory (should decay slowly)
|
||||
let high_stab = Self::create_memory_full(
|
||||
storage,
|
||||
"Well-learned fact about photosynthesis",
|
||||
"fact",
|
||||
Some("biology textbook"),
|
||||
vec!["biology", "science"],
|
||||
0.3,
|
||||
0.5,
|
||||
);
|
||||
if let Some(node) = high_stab {
|
||||
metadata.insert("high_stability".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
// Low stability memory (should decay quickly)
|
||||
let low_stab = Self::create_memory(storage, "Random fact I just learned");
|
||||
if let Some(node) = low_stab {
|
||||
metadata.insert("low_stability".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
// Emotional memory (decay should be affected by sentiment)
|
||||
let emotional = Self::create_emotional_memory(
|
||||
storage,
|
||||
"Important life event",
|
||||
0.9,
|
||||
0.95,
|
||||
);
|
||||
if let Some(node) = emotional {
|
||||
metadata.insert("emotional".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
TestScenario {
|
||||
node_ids: ids,
|
||||
description: "Decay testing scenario with varied stability".to_string(),
|
||||
metadata,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a scenario for testing review scheduling
|
||||
pub fn create_scheduling_scenario(storage: &mut Storage) -> TestScenario {
|
||||
let mut ids = Vec::new();
|
||||
let mut metadata = std::collections::HashMap::new();
|
||||
|
||||
// New card (never reviewed)
|
||||
let new_card = Self::create_memory(storage, "Brand new memory");
|
||||
if let Some(node) = new_card {
|
||||
metadata.insert("new".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
// Learning card (few reviews)
|
||||
if let Some(node) = Self::create_memory(storage, "Learning memory") {
|
||||
let _ = storage.mark_reviewed(&node.id, Rating::Good);
|
||||
metadata.insert("learning".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
// Review card (many reviews)
|
||||
if let Some(node) = Self::create_memory(storage, "Well-reviewed memory") {
|
||||
for _ in 0..5 {
|
||||
let _ = storage.mark_reviewed(&node.id, Rating::Good);
|
||||
}
|
||||
metadata.insert("review".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
// Relearning card (had lapses)
|
||||
if let Some(node) = Self::create_memory(storage, "Struggling memory") {
|
||||
let _ = storage.mark_reviewed(&node.id, Rating::Good);
|
||||
let _ = storage.mark_reviewed(&node.id, Rating::Again);
|
||||
metadata.insert("relearning".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
TestScenario {
|
||||
node_ids: ids,
|
||||
description: "Scheduling scenario with cards in different learning states".to_string(),
|
||||
metadata,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a scenario for testing search
|
||||
pub fn create_search_scenario(storage: &mut Storage) -> TestScenario {
|
||||
let mut ids = Vec::new();
|
||||
let mut metadata = std::collections::HashMap::new();
|
||||
|
||||
// Programming memories
|
||||
for content in [
|
||||
"Rust programming language uses ownership for memory safety",
|
||||
"Python is great for data science and machine learning",
|
||||
"JavaScript runs in web browsers and Node.js",
|
||||
] {
|
||||
if let Some(node) = Self::create_memory_full(
|
||||
storage,
|
||||
content,
|
||||
"fact",
|
||||
Some("programming docs"),
|
||||
vec!["programming", "code"],
|
||||
0.0,
|
||||
0.0,
|
||||
) {
|
||||
ids.push(node.id);
|
||||
}
|
||||
}
|
||||
metadata.insert("programming_count".to_string(), "3".to_string());
|
||||
|
||||
// Science memories
|
||||
for content in [
|
||||
"Mitochondria is the powerhouse of the cell",
|
||||
"DNA contains genetic information",
|
||||
"Gravity is the force of attraction between masses",
|
||||
] {
|
||||
if let Some(node) = Self::create_memory_full(
|
||||
storage,
|
||||
content,
|
||||
"fact",
|
||||
Some("science textbook"),
|
||||
vec!["science"],
|
||||
0.0,
|
||||
0.0,
|
||||
) {
|
||||
ids.push(node.id);
|
||||
}
|
||||
}
|
||||
metadata.insert("science_count".to_string(), "3".to_string());
|
||||
|
||||
// Recipe memories
|
||||
for content in [
|
||||
"To make pasta, boil water and add salt",
|
||||
"Chocolate cake requires cocoa powder and eggs",
|
||||
] {
|
||||
if let Some(node) = Self::create_memory_full(
|
||||
storage,
|
||||
content,
|
||||
"procedure",
|
||||
Some("cookbook"),
|
||||
vec!["cooking", "recipes"],
|
||||
0.0,
|
||||
0.0,
|
||||
) {
|
||||
ids.push(node.id);
|
||||
}
|
||||
}
|
||||
metadata.insert("recipe_count".to_string(), "2".to_string());
|
||||
|
||||
TestScenario {
|
||||
node_ids: ids,
|
||||
description: "Search scenario with categorized content".to_string(),
|
||||
metadata,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a scenario for testing temporal queries
|
||||
pub fn create_temporal_scenario(storage: &mut Storage) -> TestScenario {
|
||||
let now = Utc::now();
|
||||
let mut ids = Vec::new();
|
||||
let mut metadata = std::collections::HashMap::new();
|
||||
|
||||
// Currently valid
|
||||
if let Some(node) = Self::create_temporal_memory(
|
||||
storage,
|
||||
"Currently valid memory",
|
||||
Some(now - Duration::days(10)),
|
||||
Some(now + Duration::days(10)),
|
||||
) {
|
||||
metadata.insert("current".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
// Expired
|
||||
if let Some(node) = Self::create_temporal_memory(
|
||||
storage,
|
||||
"Expired memory",
|
||||
Some(now - Duration::days(60)),
|
||||
Some(now - Duration::days(30)),
|
||||
) {
|
||||
metadata.insert("expired".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
// Future
|
||||
if let Some(node) = Self::create_temporal_memory(
|
||||
storage,
|
||||
"Future memory",
|
||||
Some(now + Duration::days(30)),
|
||||
Some(now + Duration::days(60)),
|
||||
) {
|
||||
metadata.insert("future".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
// No bounds (always valid)
|
||||
if let Some(node) = Self::create_temporal_memory(
|
||||
storage,
|
||||
"Always valid memory",
|
||||
None,
|
||||
None,
|
||||
) {
|
||||
metadata.insert("always_valid".to_string(), node.id.clone());
|
||||
ids.push(node.id);
|
||||
}
|
||||
|
||||
TestScenario {
|
||||
node_ids: ids,
|
||||
description: "Temporal scenario with different validity periods".to_string(),
|
||||
metadata,
|
||||
}
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// UTILITY METHODS
|
||||
// ========================================================================
|
||||
|
||||
/// Get a random node type
|
||||
pub fn random_node_type(seed: usize) -> &'static str {
|
||||
const TYPES: [&str; 9] = [
|
||||
"fact", "concept", "procedure", "event", "relationship",
|
||||
"quote", "code", "question", "insight",
|
||||
];
|
||||
TYPES[seed % TYPES.len()]
|
||||
}
|
||||
|
||||
/// Generate lorem ipsum-like content
|
||||
pub fn lorem_content(words: usize, seed: usize) -> String {
|
||||
const WORDS: [&str; 20] = [
|
||||
"the", "memory", "learning", "knowledge", "algorithm",
|
||||
"data", "system", "process", "function", "method",
|
||||
"class", "object", "variable", "constant", "type",
|
||||
"structure", "pattern", "design", "architecture", "code",
|
||||
];
|
||||
|
||||
(0..words)
|
||||
.map(|i| WORDS[(seed + i * 7) % WORDS.len()])
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
/// Generate tags
|
||||
pub fn generate_tags(count: usize, seed: usize) -> Vec<String> {
|
||||
const TAGS: [&str; 10] = [
|
||||
"important", "review", "todo", "concept", "fact",
|
||||
"code", "note", "idea", "question", "reference",
|
||||
];
|
||||
|
||||
(0..count)
|
||||
.map(|i| TAGS[(seed + i) % TAGS.len()].to_string())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::tempdir;
|
||||
|
||||
fn create_test_storage() -> Storage {
|
||||
let dir = tempdir().unwrap();
|
||||
let db_path = dir.path().join("test.db");
|
||||
Storage::new(Some(db_path)).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_memory() {
|
||||
let mut storage = create_test_storage();
|
||||
let node = TestDataFactory::create_memory(&mut storage, "test content");
|
||||
|
||||
assert!(node.is_some());
|
||||
assert_eq!(node.unwrap().content, "test content");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_batch() {
|
||||
let mut storage = create_test_storage();
|
||||
let ids = TestDataFactory::create_batch(&mut storage, 10);
|
||||
|
||||
assert_eq!(ids.len(), 10);
|
||||
|
||||
let stats = storage.get_stats().unwrap();
|
||||
assert_eq!(stats.total_nodes, 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_decay_scenario() {
|
||||
let mut storage = create_test_storage();
|
||||
let scenario = TestDataFactory::create_decay_scenario(&mut storage);
|
||||
|
||||
assert!(!scenario.node_ids.is_empty());
|
||||
assert!(scenario.metadata.contains_key("high_stability"));
|
||||
assert!(scenario.metadata.contains_key("low_stability"));
|
||||
assert!(scenario.metadata.contains_key("emotional"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_scheduling_scenario() {
|
||||
let mut storage = create_test_storage();
|
||||
let scenario = TestDataFactory::create_scheduling_scenario(&mut storage);
|
||||
|
||||
assert!(!scenario.node_ids.is_empty());
|
||||
assert!(scenario.metadata.contains_key("new"));
|
||||
assert!(scenario.metadata.contains_key("learning"));
|
||||
assert!(scenario.metadata.contains_key("review"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lorem_content() {
|
||||
let content = TestDataFactory::lorem_content(10, 42);
|
||||
let words: Vec<_> = content.split_whitespace().collect();
|
||||
|
||||
assert_eq!(words.len(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_tags() {
|
||||
let tags = TestDataFactory::generate_tags(5, 0);
|
||||
|
||||
assert_eq!(tags.len(), 5);
|
||||
assert!(tags.iter().all(|t| !t.is_empty()));
|
||||
}
|
||||
}
|
||||
377
tests/e2e/src/mocks/mock_embedding.rs
Normal file
377
tests/e2e/src/mocks/mock_embedding.rs
Normal file
|
|
@ -0,0 +1,377 @@
|
|||
//! Mock Embedding Service using FxHash
|
||||
//!
|
||||
//! Provides deterministic embeddings for testing without requiring
|
||||
//! the actual fastembed model. Uses FxHash for fast, consistent hashing.
|
||||
//!
|
||||
//! Key properties:
|
||||
//! - Deterministic: Same input always produces same embedding
|
||||
//! - Fast: No ML model loading/inference
|
||||
//! - Semantic similarity: Similar strings produce similar embeddings
|
||||
//! - Normalized: All embeddings have unit length
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Dimensions for mock embeddings (matches BGE-base-en-v1.5)
|
||||
pub const MOCK_EMBEDDING_DIM: usize = 768;
|
||||
|
||||
/// FxHash implementation (fast, non-cryptographic hash)
|
||||
/// Based on Firefox's hash function
|
||||
fn fx_hash(data: &[u8]) -> u64 {
|
||||
const SEED: u64 = 0x517cc1b727220a95;
|
||||
let mut hash = SEED;
|
||||
for &byte in data {
|
||||
hash = hash.rotate_left(5) ^ (byte as u64);
|
||||
hash = hash.wrapping_mul(SEED);
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
/// Mock embedding service for testing
|
||||
///
|
||||
/// Produces deterministic embeddings based on text content using FxHash.
|
||||
/// Designed to approximate real embedding behavior:
|
||||
/// - Similar texts produce similar embeddings
|
||||
/// - Different texts produce different embeddings
|
||||
/// - Embeddings are normalized to unit length
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// let service = MockEmbeddingService::new();
|
||||
///
|
||||
/// let emb1 = service.embed("hello world");
|
||||
/// let emb2 = service.embed("hello world");
|
||||
/// let emb3 = service.embed("goodbye world");
|
||||
///
|
||||
/// // Same input = same output
|
||||
/// assert_eq!(emb1, emb2);
|
||||
///
|
||||
/// // Different input = different output
|
||||
/// assert_ne!(emb1, emb3);
|
||||
///
|
||||
/// // But similar inputs have higher similarity
|
||||
/// let sim_same = service.cosine_similarity(&emb1, &emb2);
|
||||
/// let sim_diff = service.cosine_similarity(&emb1, &emb3);
|
||||
/// assert!(sim_same > sim_diff);
|
||||
/// ```
|
||||
pub struct MockEmbeddingService {
|
||||
/// Cache for computed embeddings
|
||||
cache: HashMap<String, Vec<f32>>,
|
||||
/// Whether to use word-level hashing for better semantic similarity
|
||||
semantic_mode: bool,
|
||||
}
|
||||
|
||||
impl Default for MockEmbeddingService {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl MockEmbeddingService {
|
||||
/// Create a new mock embedding service
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
cache: HashMap::new(),
|
||||
semantic_mode: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a service without semantic mode (pure hash-based)
|
||||
pub fn new_simple() -> Self {
|
||||
Self {
|
||||
cache: HashMap::new(),
|
||||
semantic_mode: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Embed text into a vector
|
||||
pub fn embed(&mut self, text: &str) -> Vec<f32> {
|
||||
// Check cache first
|
||||
if let Some(cached) = self.cache.get(text) {
|
||||
return cached.clone();
|
||||
}
|
||||
|
||||
let embedding = if self.semantic_mode {
|
||||
self.semantic_embed(text)
|
||||
} else {
|
||||
self.simple_embed(text)
|
||||
};
|
||||
|
||||
self.cache.insert(text.to_string(), embedding.clone());
|
||||
embedding
|
||||
}
|
||||
|
||||
/// Simple hash-based embedding
|
||||
fn simple_embed(&self, text: &str) -> Vec<f32> {
|
||||
let mut embedding = vec![0.0f32; MOCK_EMBEDDING_DIM];
|
||||
let normalized = text.to_lowercase();
|
||||
|
||||
// Use multiple hash seeds for different dimensions
|
||||
for (i, chunk) in embedding.chunks_mut(64).enumerate() {
|
||||
let seed_text = format!("{}:{}", i, normalized);
|
||||
let hash = fx_hash(seed_text.as_bytes());
|
||||
|
||||
for (j, val) in chunk.iter_mut().enumerate() {
|
||||
// Generate pseudo-random float from hash
|
||||
let shifted = hash.rotate_left((j * 5) as u32);
|
||||
*val = ((shifted as f32 / u64::MAX as f32) * 2.0) - 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
normalize(&mut embedding);
|
||||
embedding
|
||||
}
|
||||
|
||||
/// Semantic-aware embedding (word-level hashing)
|
||||
fn semantic_embed(&self, text: &str) -> Vec<f32> {
|
||||
let mut embedding = vec![0.0f32; MOCK_EMBEDDING_DIM];
|
||||
let normalized = text.to_lowercase();
|
||||
|
||||
// Tokenize into words
|
||||
let words: Vec<&str> = normalized
|
||||
.split(|c: char| !c.is_alphanumeric())
|
||||
.filter(|w| !w.is_empty())
|
||||
.collect();
|
||||
|
||||
if words.is_empty() {
|
||||
// Fall back to simple embedding for empty text
|
||||
return self.simple_embed(text);
|
||||
}
|
||||
|
||||
// Each word contributes to the embedding
|
||||
for word in &words {
|
||||
let word_hash = fx_hash(word.as_bytes());
|
||||
|
||||
// Map word to a sparse set of dimensions
|
||||
for i in 0..16 {
|
||||
let dim = ((word_hash >> (i * 4)) as usize) % MOCK_EMBEDDING_DIM;
|
||||
let sign = if (word_hash >> (i + 48)) & 1 == 0 { 1.0 } else { -1.0 };
|
||||
let magnitude = ((word_hash >> (i * 2)) as f32 % 100.0) / 100.0 + 0.5;
|
||||
embedding[dim] += sign * magnitude;
|
||||
}
|
||||
}
|
||||
|
||||
// Add position-aware component for word order sensitivity
|
||||
for (pos, word) in words.iter().enumerate() {
|
||||
let pos_hash = fx_hash(format!("{}:{}", pos, word).as_bytes());
|
||||
let dim = (pos_hash as usize) % MOCK_EMBEDDING_DIM;
|
||||
let weight = 1.0 / (pos as f32 + 1.0);
|
||||
embedding[dim] += weight;
|
||||
}
|
||||
|
||||
// Add character n-gram features for subword similarity
|
||||
let chars: Vec<char> = normalized.chars().collect();
|
||||
for i in 0..chars.len().saturating_sub(2) {
|
||||
let trigram: String = chars[i..i + 3].iter().collect();
|
||||
let hash = fx_hash(trigram.as_bytes());
|
||||
let dim = (hash as usize) % MOCK_EMBEDDING_DIM;
|
||||
embedding[dim] += 0.1;
|
||||
}
|
||||
|
||||
normalize(&mut embedding);
|
||||
embedding
|
||||
}
|
||||
|
||||
/// Calculate cosine similarity between two embeddings
|
||||
pub fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if norm_a == 0.0 || norm_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
dot / (norm_a * norm_b)
|
||||
}
|
||||
|
||||
/// Calculate euclidean distance between two embeddings
|
||||
pub fn euclidean_distance(&self, a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() {
|
||||
return f32::MAX;
|
||||
}
|
||||
|
||||
a.iter()
|
||||
.zip(b.iter())
|
||||
.map(|(x, y)| (x - y).powi(2))
|
||||
.sum::<f32>()
|
||||
.sqrt()
|
||||
}
|
||||
|
||||
/// Find most similar embedding from a set
|
||||
pub fn find_most_similar<'a>(
|
||||
&self,
|
||||
query: &[f32],
|
||||
candidates: &'a [(String, Vec<f32>)],
|
||||
) -> Option<(&'a str, f32)> {
|
||||
candidates
|
||||
.iter()
|
||||
.map(|(id, emb)| (id.as_str(), self.cosine_similarity(query, emb)))
|
||||
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
|
||||
}
|
||||
|
||||
/// Clear the embedding cache
|
||||
pub fn clear_cache(&mut self) {
|
||||
self.cache.clear();
|
||||
}
|
||||
|
||||
/// Get cache size
|
||||
pub fn cache_size(&self) -> usize {
|
||||
self.cache.len()
|
||||
}
|
||||
|
||||
/// Check if service is ready (always true for mock)
|
||||
pub fn is_ready(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalize a vector to unit length
|
||||
fn normalize(v: &mut [f32]) {
|
||||
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm > 0.0 {
|
||||
for x in v.iter_mut() {
|
||||
*x /= norm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_deterministic_embedding() {
|
||||
let mut service = MockEmbeddingService::new();
|
||||
|
||||
let emb1 = service.embed("hello world");
|
||||
let emb2 = service.embed("hello world");
|
||||
|
||||
assert_eq!(emb1, emb2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_different_texts_different_embeddings() {
|
||||
let mut service = MockEmbeddingService::new();
|
||||
|
||||
let emb1 = service.embed("hello world");
|
||||
let emb2 = service.embed("goodbye universe");
|
||||
|
||||
assert_ne!(emb1, emb2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embedding_dimension() {
|
||||
let mut service = MockEmbeddingService::new();
|
||||
let emb = service.embed("test text");
|
||||
|
||||
assert_eq!(emb.len(), MOCK_EMBEDDING_DIM);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalized_embeddings() {
|
||||
let mut service = MockEmbeddingService::new();
|
||||
let emb = service.embed("test normalization");
|
||||
|
||||
let norm: f32 = emb.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
assert!((norm - 1.0).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_semantic_similarity() {
|
||||
let mut service = MockEmbeddingService::new();
|
||||
|
||||
let emb_dog = service.embed("the dog runs fast");
|
||||
let emb_cat = service.embed("the cat runs fast");
|
||||
let emb_car = service.embed("machine learning algorithms");
|
||||
|
||||
let sim_animals = service.cosine_similarity(&emb_dog, &emb_cat);
|
||||
let sim_different = service.cosine_similarity(&emb_dog, &emb_car);
|
||||
|
||||
// Similar sentences should have higher similarity
|
||||
assert!(sim_animals > sim_different);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_similarity_range() {
|
||||
let mut service = MockEmbeddingService::new();
|
||||
|
||||
let emb1 = service.embed("test one");
|
||||
let emb2 = service.embed("test two");
|
||||
|
||||
let sim = service.cosine_similarity(&emb1, &emb2);
|
||||
|
||||
// Cosine similarity should be in [-1, 1]
|
||||
assert!(sim >= -1.0 && sim <= 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_self_similarity() {
|
||||
let mut service = MockEmbeddingService::new();
|
||||
let emb = service.embed("self similarity test");
|
||||
|
||||
let sim = service.cosine_similarity(&emb, &emb);
|
||||
assert!((sim - 1.0).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_caching() {
|
||||
let mut service = MockEmbeddingService::new();
|
||||
assert_eq!(service.cache_size(), 0);
|
||||
|
||||
service.embed("text one");
|
||||
assert_eq!(service.cache_size(), 1);
|
||||
|
||||
service.embed("text one"); // Should use cache
|
||||
assert_eq!(service.cache_size(), 1);
|
||||
|
||||
service.embed("text two");
|
||||
assert_eq!(service.cache_size(), 2);
|
||||
|
||||
service.clear_cache();
|
||||
assert_eq!(service.cache_size(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_most_similar() {
|
||||
let mut service = MockEmbeddingService::new();
|
||||
|
||||
let query = service.embed("programming code");
|
||||
let candidates = vec![
|
||||
("doc1".to_string(), service.embed("python programming language")),
|
||||
("doc2".to_string(), service.embed("cooking recipes")),
|
||||
("doc3".to_string(), service.embed("software development code")),
|
||||
];
|
||||
|
||||
let result = service.find_most_similar(&query, &candidates);
|
||||
assert!(result.is_some());
|
||||
|
||||
// Should find a programming-related document
|
||||
let (id, _) = result.unwrap();
|
||||
assert!(id == "doc1" || id == "doc3");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_text() {
|
||||
let mut service = MockEmbeddingService::new();
|
||||
let emb = service.embed("");
|
||||
|
||||
assert_eq!(emb.len(), MOCK_EMBEDDING_DIM);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_mode() {
|
||||
let mut service = MockEmbeddingService::new_simple();
|
||||
let emb = service.embed("test simple mode");
|
||||
|
||||
assert_eq!(emb.len(), MOCK_EMBEDDING_DIM);
|
||||
|
||||
// Verify normalization
|
||||
let norm: f32 = emb.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
assert!((norm - 1.0).abs() < 0.001);
|
||||
}
|
||||
}
|
||||
11
tests/e2e/src/mocks/mod.rs
Normal file
11
tests/e2e/src/mocks/mod.rs
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
//! Mock Services Module
|
||||
//!
|
||||
//! Provides mock implementations for testing:
|
||||
//! - `MockEmbeddingService` - Deterministic embeddings using FxHash
|
||||
//! - `TestDataFactory` - Generate test data with realistic properties
|
||||
|
||||
mod fixtures;
|
||||
mod mock_embedding;
|
||||
|
||||
pub use fixtures::TestDataFactory;
|
||||
pub use mock_embedding::MockEmbeddingService;
|
||||
Loading…
Add table
Add a link
Reference in a new issue