Initial commit: Vestige v1.0.0 - Cognitive memory MCP server

FSRS-6 spaced repetition, spreading activation, synaptic tagging,
hippocampal indexing, and 130 years of memory research.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Sam Valladares 2026-01-25 01:31:03 -06:00
commit f9c60eb5a7
169 changed files with 97206 additions and 0 deletions

View file

@ -0,0 +1,573 @@
//! Test Data Factory
//!
//! Provides utilities for generating realistic test data:
//! - Memory nodes with various properties
//! - Batch generation for stress testing
//! - Pre-built scenarios for common test cases
use chrono::{DateTime, Duration, Utc};
use vestige_core::{KnowledgeNode, Rating, Storage};
/// Helper to create IngestInput (works around non_exhaustive)
fn make_ingest_input(
content: String,
node_type: String,
tags: Vec<String>,
sentiment_score: f64,
sentiment_magnitude: f64,
source: Option<String>,
valid_from: Option<DateTime<Utc>>,
valid_until: Option<DateTime<Utc>>,
) -> vestige_core::IngestInput {
let mut input = vestige_core::IngestInput::default();
input.content = content;
input.node_type = node_type;
input.tags = tags;
input.sentiment_score = sentiment_score;
input.sentiment_magnitude = sentiment_magnitude;
input.source = source;
input.valid_from = valid_from;
input.valid_until = valid_until;
input
}
/// Factory for creating test data
///
/// Generates realistic test data with configurable properties.
/// Designed for creating comprehensive test scenarios.
///
/// # Example
///
/// ```rust,ignore
/// let mut storage = Storage::new(Some(path))?;
///
/// // Create a single memory
/// let node = TestDataFactory::create_memory(&mut storage, "test content");
///
/// // Create a batch
/// let nodes = TestDataFactory::create_batch(&mut storage, 100);
///
/// // Create a specific scenario
/// let scenario = TestDataFactory::create_decay_scenario(&mut storage);
/// ```
pub struct TestDataFactory;
/// Configuration for batch memory generation
#[derive(Debug, Clone)]
pub struct BatchConfig {
/// Number of memories to create
pub count: usize,
/// Node type to use (None = random)
pub node_type: Option<String>,
/// Base content prefix
pub content_prefix: String,
/// Tags to apply
pub tags: Vec<String>,
/// Whether to add sentiment
pub with_sentiment: bool,
/// Whether to add temporal validity
pub with_temporal: bool,
}
impl Default for BatchConfig {
fn default() -> Self {
Self {
count: 10,
node_type: None,
content_prefix: "Test memory".to_string(),
tags: vec![],
with_sentiment: false,
with_temporal: false,
}
}
}
/// Scenario containing related test data
#[derive(Debug)]
pub struct TestScenario {
/// IDs of created nodes
pub node_ids: Vec<String>,
/// Description of the scenario
pub description: String,
/// Metadata for test assertions
pub metadata: std::collections::HashMap<String, String>,
}
impl TestDataFactory {
// ========================================================================
// SINGLE MEMORY CREATION
// ========================================================================
/// Create a simple memory with content
pub fn create_memory(storage: &mut Storage, content: &str) -> Option<KnowledgeNode> {
let input = make_ingest_input(
content.to_string(),
"fact".to_string(),
vec![],
0.0,
0.0,
None,
None,
None,
);
storage.ingest(input).ok()
}
/// Create a memory with full configuration
pub fn create_memory_full(
storage: &mut Storage,
content: &str,
node_type: &str,
source: Option<&str>,
tags: Vec<&str>,
sentiment_score: f64,
sentiment_magnitude: f64,
) -> Option<KnowledgeNode> {
let input = make_ingest_input(
content.to_string(),
node_type.to_string(),
tags.iter().map(|s| s.to_string()).collect(),
sentiment_score,
sentiment_magnitude,
source.map(String::from),
None,
None,
);
storage.ingest(input).ok()
}
/// Create a memory with temporal validity
pub fn create_temporal_memory(
storage: &mut Storage,
content: &str,
valid_from: Option<DateTime<Utc>>,
valid_until: Option<DateTime<Utc>>,
) -> Option<KnowledgeNode> {
let input = make_ingest_input(
content.to_string(),
"fact".to_string(),
vec![],
0.0,
0.0,
None,
valid_from,
valid_until,
);
storage.ingest(input).ok()
}
/// Create an emotional memory
pub fn create_emotional_memory(
storage: &mut Storage,
content: &str,
sentiment: f64,
magnitude: f64,
) -> Option<KnowledgeNode> {
let input = make_ingest_input(
content.to_string(),
"event".to_string(),
vec![],
sentiment,
magnitude,
None,
None,
None,
);
storage.ingest(input).ok()
}
// ========================================================================
// BATCH CREATION
// ========================================================================
/// Create a batch of memories
pub fn create_batch(storage: &mut Storage, count: usize) -> Vec<String> {
Self::create_batch_with_config(storage, BatchConfig { count, ..Default::default() })
}
/// Create a batch with custom configuration
pub fn create_batch_with_config(storage: &mut Storage, config: BatchConfig) -> Vec<String> {
let node_types = ["fact", "concept", "procedure", "event", "code"];
let mut ids = Vec::with_capacity(config.count);
for i in 0..config.count {
let node_type = config
.node_type
.clone()
.unwrap_or_else(|| node_types[i % node_types.len()].to_string());
let sentiment_score = if config.with_sentiment {
((i as f64) / (config.count as f64) * 2.0) - 1.0
} else {
0.0
};
let sentiment_magnitude = if config.with_sentiment {
(i as f64) / (config.count as f64)
} else {
0.0
};
let (valid_from, valid_until) = if config.with_temporal {
let now = Utc::now();
if i % 3 == 0 {
(Some(now - Duration::days(30)), Some(now + Duration::days(30)))
} else if i % 3 == 1 {
(Some(now - Duration::days(60)), Some(now - Duration::days(30)))
} else {
(None, None)
}
} else {
(None, None)
};
let input = make_ingest_input(
format!("{} {}", config.content_prefix, i),
node_type,
config.tags.clone(),
sentiment_score,
sentiment_magnitude,
None,
valid_from,
valid_until,
);
if let Ok(node) = storage.ingest(input) {
ids.push(node.id);
}
}
ids
}
// ========================================================================
// SCENARIO CREATION
// ========================================================================
/// Create a scenario for testing memory decay
pub fn create_decay_scenario(storage: &mut Storage) -> TestScenario {
let mut ids = Vec::new();
let mut metadata = std::collections::HashMap::new();
// High stability memory (should decay slowly)
let high_stab = Self::create_memory_full(
storage,
"Well-learned fact about photosynthesis",
"fact",
Some("biology textbook"),
vec!["biology", "science"],
0.3,
0.5,
);
if let Some(node) = high_stab {
metadata.insert("high_stability".to_string(), node.id.clone());
ids.push(node.id);
}
// Low stability memory (should decay quickly)
let low_stab = Self::create_memory(storage, "Random fact I just learned");
if let Some(node) = low_stab {
metadata.insert("low_stability".to_string(), node.id.clone());
ids.push(node.id);
}
// Emotional memory (decay should be affected by sentiment)
let emotional = Self::create_emotional_memory(
storage,
"Important life event",
0.9,
0.95,
);
if let Some(node) = emotional {
metadata.insert("emotional".to_string(), node.id.clone());
ids.push(node.id);
}
TestScenario {
node_ids: ids,
description: "Decay testing scenario with varied stability".to_string(),
metadata,
}
}
/// Create a scenario for testing review scheduling
pub fn create_scheduling_scenario(storage: &mut Storage) -> TestScenario {
let mut ids = Vec::new();
let mut metadata = std::collections::HashMap::new();
// New card (never reviewed)
let new_card = Self::create_memory(storage, "Brand new memory");
if let Some(node) = new_card {
metadata.insert("new".to_string(), node.id.clone());
ids.push(node.id);
}
// Learning card (few reviews)
if let Some(node) = Self::create_memory(storage, "Learning memory") {
let _ = storage.mark_reviewed(&node.id, Rating::Good);
metadata.insert("learning".to_string(), node.id.clone());
ids.push(node.id);
}
// Review card (many reviews)
if let Some(node) = Self::create_memory(storage, "Well-reviewed memory") {
for _ in 0..5 {
let _ = storage.mark_reviewed(&node.id, Rating::Good);
}
metadata.insert("review".to_string(), node.id.clone());
ids.push(node.id);
}
// Relearning card (had lapses)
if let Some(node) = Self::create_memory(storage, "Struggling memory") {
let _ = storage.mark_reviewed(&node.id, Rating::Good);
let _ = storage.mark_reviewed(&node.id, Rating::Again);
metadata.insert("relearning".to_string(), node.id.clone());
ids.push(node.id);
}
TestScenario {
node_ids: ids,
description: "Scheduling scenario with cards in different learning states".to_string(),
metadata,
}
}
/// Create a scenario for testing search
pub fn create_search_scenario(storage: &mut Storage) -> TestScenario {
let mut ids = Vec::new();
let mut metadata = std::collections::HashMap::new();
// Programming memories
for content in [
"Rust programming language uses ownership for memory safety",
"Python is great for data science and machine learning",
"JavaScript runs in web browsers and Node.js",
] {
if let Some(node) = Self::create_memory_full(
storage,
content,
"fact",
Some("programming docs"),
vec!["programming", "code"],
0.0,
0.0,
) {
ids.push(node.id);
}
}
metadata.insert("programming_count".to_string(), "3".to_string());
// Science memories
for content in [
"Mitochondria is the powerhouse of the cell",
"DNA contains genetic information",
"Gravity is the force of attraction between masses",
] {
if let Some(node) = Self::create_memory_full(
storage,
content,
"fact",
Some("science textbook"),
vec!["science"],
0.0,
0.0,
) {
ids.push(node.id);
}
}
metadata.insert("science_count".to_string(), "3".to_string());
// Recipe memories
for content in [
"To make pasta, boil water and add salt",
"Chocolate cake requires cocoa powder and eggs",
] {
if let Some(node) = Self::create_memory_full(
storage,
content,
"procedure",
Some("cookbook"),
vec!["cooking", "recipes"],
0.0,
0.0,
) {
ids.push(node.id);
}
}
metadata.insert("recipe_count".to_string(), "2".to_string());
TestScenario {
node_ids: ids,
description: "Search scenario with categorized content".to_string(),
metadata,
}
}
/// Create a scenario for testing temporal queries
pub fn create_temporal_scenario(storage: &mut Storage) -> TestScenario {
let now = Utc::now();
let mut ids = Vec::new();
let mut metadata = std::collections::HashMap::new();
// Currently valid
if let Some(node) = Self::create_temporal_memory(
storage,
"Currently valid memory",
Some(now - Duration::days(10)),
Some(now + Duration::days(10)),
) {
metadata.insert("current".to_string(), node.id.clone());
ids.push(node.id);
}
// Expired
if let Some(node) = Self::create_temporal_memory(
storage,
"Expired memory",
Some(now - Duration::days(60)),
Some(now - Duration::days(30)),
) {
metadata.insert("expired".to_string(), node.id.clone());
ids.push(node.id);
}
// Future
if let Some(node) = Self::create_temporal_memory(
storage,
"Future memory",
Some(now + Duration::days(30)),
Some(now + Duration::days(60)),
) {
metadata.insert("future".to_string(), node.id.clone());
ids.push(node.id);
}
// No bounds (always valid)
if let Some(node) = Self::create_temporal_memory(
storage,
"Always valid memory",
None,
None,
) {
metadata.insert("always_valid".to_string(), node.id.clone());
ids.push(node.id);
}
TestScenario {
node_ids: ids,
description: "Temporal scenario with different validity periods".to_string(),
metadata,
}
}
// ========================================================================
// UTILITY METHODS
// ========================================================================
/// Get a random node type
pub fn random_node_type(seed: usize) -> &'static str {
const TYPES: [&str; 9] = [
"fact", "concept", "procedure", "event", "relationship",
"quote", "code", "question", "insight",
];
TYPES[seed % TYPES.len()]
}
/// Generate lorem ipsum-like content
pub fn lorem_content(words: usize, seed: usize) -> String {
const WORDS: [&str; 20] = [
"the", "memory", "learning", "knowledge", "algorithm",
"data", "system", "process", "function", "method",
"class", "object", "variable", "constant", "type",
"structure", "pattern", "design", "architecture", "code",
];
(0..words)
.map(|i| WORDS[(seed + i * 7) % WORDS.len()])
.collect::<Vec<_>>()
.join(" ")
}
/// Generate tags
pub fn generate_tags(count: usize, seed: usize) -> Vec<String> {
const TAGS: [&str; 10] = [
"important", "review", "todo", "concept", "fact",
"code", "note", "idea", "question", "reference",
];
(0..count)
.map(|i| TAGS[(seed + i) % TAGS.len()].to_string())
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
fn create_test_storage() -> Storage {
let dir = tempdir().unwrap();
let db_path = dir.path().join("test.db");
Storage::new(Some(db_path)).unwrap()
}
#[test]
fn test_create_memory() {
let mut storage = create_test_storage();
let node = TestDataFactory::create_memory(&mut storage, "test content");
assert!(node.is_some());
assert_eq!(node.unwrap().content, "test content");
}
#[test]
fn test_create_batch() {
let mut storage = create_test_storage();
let ids = TestDataFactory::create_batch(&mut storage, 10);
assert_eq!(ids.len(), 10);
let stats = storage.get_stats().unwrap();
assert_eq!(stats.total_nodes, 10);
}
#[test]
fn test_create_decay_scenario() {
let mut storage = create_test_storage();
let scenario = TestDataFactory::create_decay_scenario(&mut storage);
assert!(!scenario.node_ids.is_empty());
assert!(scenario.metadata.contains_key("high_stability"));
assert!(scenario.metadata.contains_key("low_stability"));
assert!(scenario.metadata.contains_key("emotional"));
}
#[test]
fn test_create_scheduling_scenario() {
let mut storage = create_test_storage();
let scenario = TestDataFactory::create_scheduling_scenario(&mut storage);
assert!(!scenario.node_ids.is_empty());
assert!(scenario.metadata.contains_key("new"));
assert!(scenario.metadata.contains_key("learning"));
assert!(scenario.metadata.contains_key("review"));
}
#[test]
fn test_lorem_content() {
let content = TestDataFactory::lorem_content(10, 42);
let words: Vec<_> = content.split_whitespace().collect();
assert_eq!(words.len(), 10);
}
#[test]
fn test_generate_tags() {
let tags = TestDataFactory::generate_tags(5, 0);
assert_eq!(tags.len(), 5);
assert!(tags.iter().all(|t| !t.is_empty()));
}
}

View file

@ -0,0 +1,377 @@
//! Mock Embedding Service using FxHash
//!
//! Provides deterministic embeddings for testing without requiring
//! the actual fastembed model. Uses FxHash for fast, consistent hashing.
//!
//! Key properties:
//! - Deterministic: Same input always produces same embedding
//! - Fast: No ML model loading/inference
//! - Semantic similarity: Similar strings produce similar embeddings
//! - Normalized: All embeddings have unit length
use std::collections::HashMap;
/// Dimensions for mock embeddings (matches BGE-base-en-v1.5)
pub const MOCK_EMBEDDING_DIM: usize = 768;
/// FxHash implementation (fast, non-cryptographic hash)
/// Based on Firefox's hash function
fn fx_hash(data: &[u8]) -> u64 {
const SEED: u64 = 0x517cc1b727220a95;
let mut hash = SEED;
for &byte in data {
hash = hash.rotate_left(5) ^ (byte as u64);
hash = hash.wrapping_mul(SEED);
}
hash
}
/// Mock embedding service for testing
///
/// Produces deterministic embeddings based on text content using FxHash.
/// Designed to approximate real embedding behavior:
/// - Similar texts produce similar embeddings
/// - Different texts produce different embeddings
/// - Embeddings are normalized to unit length
///
/// # Example
///
/// ```rust,ignore
/// let service = MockEmbeddingService::new();
///
/// let emb1 = service.embed("hello world");
/// let emb2 = service.embed("hello world");
/// let emb3 = service.embed("goodbye world");
///
/// // Same input = same output
/// assert_eq!(emb1, emb2);
///
/// // Different input = different output
/// assert_ne!(emb1, emb3);
///
/// // But similar inputs have higher similarity
/// let sim_same = service.cosine_similarity(&emb1, &emb2);
/// let sim_diff = service.cosine_similarity(&emb1, &emb3);
/// assert!(sim_same > sim_diff);
/// ```
pub struct MockEmbeddingService {
/// Cache for computed embeddings
cache: HashMap<String, Vec<f32>>,
/// Whether to use word-level hashing for better semantic similarity
semantic_mode: bool,
}
impl Default for MockEmbeddingService {
fn default() -> Self {
Self::new()
}
}
impl MockEmbeddingService {
/// Create a new mock embedding service
pub fn new() -> Self {
Self {
cache: HashMap::new(),
semantic_mode: true,
}
}
/// Create a service without semantic mode (pure hash-based)
pub fn new_simple() -> Self {
Self {
cache: HashMap::new(),
semantic_mode: false,
}
}
/// Embed text into a vector
pub fn embed(&mut self, text: &str) -> Vec<f32> {
// Check cache first
if let Some(cached) = self.cache.get(text) {
return cached.clone();
}
let embedding = if self.semantic_mode {
self.semantic_embed(text)
} else {
self.simple_embed(text)
};
self.cache.insert(text.to_string(), embedding.clone());
embedding
}
/// Simple hash-based embedding
fn simple_embed(&self, text: &str) -> Vec<f32> {
let mut embedding = vec![0.0f32; MOCK_EMBEDDING_DIM];
let normalized = text.to_lowercase();
// Use multiple hash seeds for different dimensions
for (i, chunk) in embedding.chunks_mut(64).enumerate() {
let seed_text = format!("{}:{}", i, normalized);
let hash = fx_hash(seed_text.as_bytes());
for (j, val) in chunk.iter_mut().enumerate() {
// Generate pseudo-random float from hash
let shifted = hash.rotate_left((j * 5) as u32);
*val = ((shifted as f32 / u64::MAX as f32) * 2.0) - 1.0;
}
}
normalize(&mut embedding);
embedding
}
/// Semantic-aware embedding (word-level hashing)
fn semantic_embed(&self, text: &str) -> Vec<f32> {
let mut embedding = vec![0.0f32; MOCK_EMBEDDING_DIM];
let normalized = text.to_lowercase();
// Tokenize into words
let words: Vec<&str> = normalized
.split(|c: char| !c.is_alphanumeric())
.filter(|w| !w.is_empty())
.collect();
if words.is_empty() {
// Fall back to simple embedding for empty text
return self.simple_embed(text);
}
// Each word contributes to the embedding
for word in &words {
let word_hash = fx_hash(word.as_bytes());
// Map word to a sparse set of dimensions
for i in 0..16 {
let dim = ((word_hash >> (i * 4)) as usize) % MOCK_EMBEDDING_DIM;
let sign = if (word_hash >> (i + 48)) & 1 == 0 { 1.0 } else { -1.0 };
let magnitude = ((word_hash >> (i * 2)) as f32 % 100.0) / 100.0 + 0.5;
embedding[dim] += sign * magnitude;
}
}
// Add position-aware component for word order sensitivity
for (pos, word) in words.iter().enumerate() {
let pos_hash = fx_hash(format!("{}:{}", pos, word).as_bytes());
let dim = (pos_hash as usize) % MOCK_EMBEDDING_DIM;
let weight = 1.0 / (pos as f32 + 1.0);
embedding[dim] += weight;
}
// Add character n-gram features for subword similarity
let chars: Vec<char> = normalized.chars().collect();
for i in 0..chars.len().saturating_sub(2) {
let trigram: String = chars[i..i + 3].iter().collect();
let hash = fx_hash(trigram.as_bytes());
let dim = (hash as usize) % MOCK_EMBEDDING_DIM;
embedding[dim] += 0.1;
}
normalize(&mut embedding);
embedding
}
/// Calculate cosine similarity between two embeddings
pub fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
return 0.0;
}
dot / (norm_a * norm_b)
}
/// Calculate euclidean distance between two embeddings
pub fn euclidean_distance(&self, a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return f32::MAX;
}
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).powi(2))
.sum::<f32>()
.sqrt()
}
/// Find most similar embedding from a set
pub fn find_most_similar<'a>(
&self,
query: &[f32],
candidates: &'a [(String, Vec<f32>)],
) -> Option<(&'a str, f32)> {
candidates
.iter()
.map(|(id, emb)| (id.as_str(), self.cosine_similarity(query, emb)))
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
}
/// Clear the embedding cache
pub fn clear_cache(&mut self) {
self.cache.clear();
}
/// Get cache size
pub fn cache_size(&self) -> usize {
self.cache.len()
}
/// Check if service is ready (always true for mock)
pub fn is_ready(&self) -> bool {
true
}
}
/// Normalize a vector to unit length
fn normalize(v: &mut [f32]) {
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for x in v.iter_mut() {
*x /= norm;
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_deterministic_embedding() {
let mut service = MockEmbeddingService::new();
let emb1 = service.embed("hello world");
let emb2 = service.embed("hello world");
assert_eq!(emb1, emb2);
}
#[test]
fn test_different_texts_different_embeddings() {
let mut service = MockEmbeddingService::new();
let emb1 = service.embed("hello world");
let emb2 = service.embed("goodbye universe");
assert_ne!(emb1, emb2);
}
#[test]
fn test_embedding_dimension() {
let mut service = MockEmbeddingService::new();
let emb = service.embed("test text");
assert_eq!(emb.len(), MOCK_EMBEDDING_DIM);
}
#[test]
fn test_normalized_embeddings() {
let mut service = MockEmbeddingService::new();
let emb = service.embed("test normalization");
let norm: f32 = emb.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!((norm - 1.0).abs() < 0.001);
}
#[test]
fn test_semantic_similarity() {
let mut service = MockEmbeddingService::new();
let emb_dog = service.embed("the dog runs fast");
let emb_cat = service.embed("the cat runs fast");
let emb_car = service.embed("machine learning algorithms");
let sim_animals = service.cosine_similarity(&emb_dog, &emb_cat);
let sim_different = service.cosine_similarity(&emb_dog, &emb_car);
// Similar sentences should have higher similarity
assert!(sim_animals > sim_different);
}
#[test]
fn test_cosine_similarity_range() {
let mut service = MockEmbeddingService::new();
let emb1 = service.embed("test one");
let emb2 = service.embed("test two");
let sim = service.cosine_similarity(&emb1, &emb2);
// Cosine similarity should be in [-1, 1]
assert!(sim >= -1.0 && sim <= 1.0);
}
#[test]
fn test_self_similarity() {
let mut service = MockEmbeddingService::new();
let emb = service.embed("self similarity test");
let sim = service.cosine_similarity(&emb, &emb);
assert!((sim - 1.0).abs() < 0.001);
}
#[test]
fn test_caching() {
let mut service = MockEmbeddingService::new();
assert_eq!(service.cache_size(), 0);
service.embed("text one");
assert_eq!(service.cache_size(), 1);
service.embed("text one"); // Should use cache
assert_eq!(service.cache_size(), 1);
service.embed("text two");
assert_eq!(service.cache_size(), 2);
service.clear_cache();
assert_eq!(service.cache_size(), 0);
}
#[test]
fn test_find_most_similar() {
let mut service = MockEmbeddingService::new();
let query = service.embed("programming code");
let candidates = vec![
("doc1".to_string(), service.embed("python programming language")),
("doc2".to_string(), service.embed("cooking recipes")),
("doc3".to_string(), service.embed("software development code")),
];
let result = service.find_most_similar(&query, &candidates);
assert!(result.is_some());
// Should find a programming-related document
let (id, _) = result.unwrap();
assert!(id == "doc1" || id == "doc3");
}
#[test]
fn test_empty_text() {
let mut service = MockEmbeddingService::new();
let emb = service.embed("");
assert_eq!(emb.len(), MOCK_EMBEDDING_DIM);
}
#[test]
fn test_simple_mode() {
let mut service = MockEmbeddingService::new_simple();
let emb = service.embed("test simple mode");
assert_eq!(emb.len(), MOCK_EMBEDDING_DIM);
// Verify normalization
let norm: f32 = emb.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!((norm - 1.0).abs() < 0.001);
}
}

View file

@ -0,0 +1,11 @@
//! Mock Services Module
//!
//! Provides mock implementations for testing:
//! - `MockEmbeddingService` - Deterministic embeddings using FxHash
//! - `TestDataFactory` - Generate test data with realistic properties
mod fixtures;
mod mock_embedding;
pub use fixtures::TestDataFactory;
pub use mock_embedding::MockEmbeddingService;