mirror of
https://github.com/samvallad33/vestige.git
synced 2026-06-26 21:39:41 +02:00
Hardens Sanhedrin Receipt Lock for model-agnostic use, adds fail-open telemetry and receipt docs, fixes smart_ingest batch safety, wires opt-in CUDA Qwen3 device selection, and refreshes dashboard/release assets. Fixes #58 Fixes #60
1741 lines
55 KiB
Rust
1741 lines
55 KiB
Rust
//! # Comparative Benchmarks E2E Tests (Phase 7.6)
|
|
//!
|
|
//! These tests prove that Vestige's algorithms outperform traditional approaches:
|
|
//!
|
|
//! 1. **FSRS-6 vs SM-2**: Modern spaced repetition beats the 1987 algorithm
|
|
//! 2. **Spreading Activation vs Similarity**: Association networks find hidden connections
|
|
//! 3. **Retroactive Importance**: A capability unique to Vestige
|
|
//! 4. **Hippocampal Indexing**: Two-phase retrieval is faster and more efficient
|
|
//!
|
|
//! Reference papers:
|
|
//! - FSRS: https://github.com/open-spaced-repetition/fsrs4anki
|
|
//! - SM-2: Pimsleur, P. (1967) / Wozniak & Gorzelanczyk (1994)
|
|
//! - Spreading Activation: Collins & Loftus (1975)
|
|
//! - Synaptic Tagging: Frey & Morris (1997), Redondo & Morris (2011)
|
|
//! - Hippocampal Indexing: Teyler & Rudy (2007)
|
|
|
|
use chrono::{Duration, Utc};
|
|
use std::collections::{HashMap, HashSet};
|
|
|
|
use vestige_core::neuroscience::hippocampal_index::{
|
|
BarcodeGenerator, ContentPointer, ContentType, HippocampalIndex, HippocampalIndexConfig,
|
|
INDEX_EMBEDDING_DIM, IndexQuery, MemoryBarcode,
|
|
};
|
|
use vestige_core::neuroscience::spreading_activation::{
|
|
ActivationConfig, ActivationNetwork, LinkType,
|
|
};
|
|
use vestige_core::neuroscience::synaptic_tagging::{
|
|
CaptureWindow, ImportanceEvent, ImportanceEventType, SynapticTaggingConfig,
|
|
SynapticTaggingSystem,
|
|
};
|
|
|
|
// ============================================================================
|
|
// SM-2 ALGORITHM IMPLEMENTATION (For Comparison)
|
|
// ============================================================================
|
|
|
|
/// SM-2 state for a card
|
|
#[derive(Debug, Clone)]
|
|
struct SM2State {
|
|
easiness_factor: f64, // EF, starts at 2.5
|
|
interval: i32, // Days until next review
|
|
repetitions: i32, // Number of successful reviews
|
|
}
|
|
|
|
impl Default for SM2State {
|
|
fn default() -> Self {
|
|
Self {
|
|
easiness_factor: 2.5,
|
|
interval: 0,
|
|
repetitions: 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// SM-2 grade (0-5)
|
|
#[derive(Debug, Clone, Copy)]
|
|
#[allow(dead_code)]
|
|
enum SM2Grade {
|
|
CompleteBlackout = 0,
|
|
Incorrect = 1,
|
|
IncorrectRemembered = 2,
|
|
CorrectDifficult = 3,
|
|
CorrectHesitation = 4,
|
|
Perfect = 5,
|
|
}
|
|
|
|
impl SM2Grade {
|
|
fn as_i32(&self) -> i32 {
|
|
*self as i32
|
|
}
|
|
}
|
|
|
|
/// Classic SM-2 algorithm implementation
|
|
fn sm2_review(state: &SM2State, grade: SM2Grade) -> SM2State {
|
|
let q = grade.as_i32();
|
|
|
|
// Update easiness factor
|
|
let mut new_ef =
|
|
state.easiness_factor + (0.1 - (5 - q) as f64 * (0.08 + (5 - q) as f64 * 0.02));
|
|
new_ef = new_ef.max(1.3); // EF never goes below 1.3
|
|
|
|
if q < 3 {
|
|
// Failed - restart learning
|
|
SM2State {
|
|
easiness_factor: new_ef,
|
|
interval: 1,
|
|
repetitions: 0,
|
|
}
|
|
} else {
|
|
// Success
|
|
let new_interval = match state.repetitions {
|
|
0 => 1,
|
|
1 => 6,
|
|
_ => (state.interval as f64 * state.easiness_factor).round() as i32,
|
|
};
|
|
|
|
SM2State {
|
|
easiness_factor: new_ef,
|
|
interval: new_interval,
|
|
repetitions: state.repetitions + 1,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Calculate SM-2 retention after elapsed time (approximate)
|
|
fn sm2_retention(interval: i32, elapsed_days: i32) -> f64 {
|
|
if elapsed_days <= interval {
|
|
// Not yet due - assume high retention
|
|
0.9 + 0.1 * (1.0 - elapsed_days as f64 / interval as f64)
|
|
} else {
|
|
// Overdue - exponential decay
|
|
let overdue_ratio = elapsed_days as f64 / interval as f64;
|
|
0.9 * (-0.5 * (overdue_ratio - 1.0)).exp()
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// FSRS-6 SIMPLIFIED IMPLEMENTATION (For Comparison)
|
|
// ============================================================================
|
|
|
|
/// FSRS-6 default weights
|
|
const FSRS6_WEIGHTS: [f64; 21] = [
|
|
0.212, 1.2931, 2.3065, 8.2956, 6.4133, 0.8334, 3.0194, 0.001, 1.8722, 0.1666, 0.796, 1.4835,
|
|
0.0614, 0.2629, 1.6483, 0.6014, 1.8729, 0.5425, 0.0912, 0.0658, 0.1542,
|
|
];
|
|
|
|
/// FSRS-6 state
|
|
#[derive(Debug, Clone)]
|
|
struct FSRS6State {
|
|
difficulty: f64,
|
|
stability: f64,
|
|
reps: i32,
|
|
}
|
|
|
|
impl Default for FSRS6State {
|
|
fn default() -> Self {
|
|
Self {
|
|
difficulty: 5.0,
|
|
stability: 2.3065, // Good initial stability
|
|
reps: 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// FSRS-6 grade (1-4)
|
|
#[derive(Debug, Clone, Copy)]
|
|
#[allow(dead_code)]
|
|
enum FSRS6Grade {
|
|
Again = 1,
|
|
Hard = 2,
|
|
Good = 3,
|
|
Easy = 4,
|
|
}
|
|
|
|
/// FSRS-6 forgetting factor
|
|
fn fsrs6_factor(w20: f64) -> f64 {
|
|
0.9_f64.powf(-1.0 / w20) - 1.0
|
|
}
|
|
|
|
/// FSRS-6 retrievability calculation
|
|
fn fsrs6_retrievability(stability: f64, elapsed_days: f64, w20: f64) -> f64 {
|
|
if stability <= 0.0 || elapsed_days <= 0.0 {
|
|
return 1.0;
|
|
}
|
|
let factor = fsrs6_factor(w20);
|
|
(1.0 + factor * elapsed_days / stability)
|
|
.powf(-w20)
|
|
.clamp(0.0, 1.0)
|
|
}
|
|
|
|
/// FSRS-6 interval calculation
|
|
fn fsrs6_interval(stability: f64, desired_retention: f64, w20: f64) -> i32 {
|
|
if stability <= 0.0 || desired_retention >= 1.0 || desired_retention <= 0.0 {
|
|
return 0;
|
|
}
|
|
let factor = fsrs6_factor(w20);
|
|
let interval = stability / factor * (desired_retention.powf(-1.0 / w20) - 1.0);
|
|
interval.max(0.0).round() as i32
|
|
}
|
|
|
|
/// FSRS-6 review
|
|
fn fsrs6_review(state: &FSRS6State, grade: FSRS6Grade, elapsed_days: f64) -> FSRS6State {
|
|
let w = &FSRS6_WEIGHTS;
|
|
let w20 = w[20];
|
|
|
|
let r = fsrs6_retrievability(state.stability, elapsed_days, w20);
|
|
|
|
let new_stability = match grade {
|
|
FSRS6Grade::Again => {
|
|
// Lapse formula
|
|
w[11]
|
|
* state.difficulty.powf(-w[12])
|
|
* ((state.stability + 1.0).powf(w[13]) - 1.0)
|
|
* (w[14] * (1.0 - r)).exp()
|
|
}
|
|
_ => {
|
|
// Recall formula
|
|
let hard_penalty = if matches!(grade, FSRS6Grade::Hard) {
|
|
w[15]
|
|
} else {
|
|
1.0
|
|
};
|
|
let easy_bonus = if matches!(grade, FSRS6Grade::Easy) {
|
|
w[16]
|
|
} else {
|
|
1.0
|
|
};
|
|
|
|
state.stability
|
|
* (w[8].exp()
|
|
* (11.0 - state.difficulty)
|
|
* state.stability.powf(-w[9])
|
|
* ((w[10] * (1.0 - r)).exp() - 1.0)
|
|
* hard_penalty
|
|
* easy_bonus
|
|
+ 1.0)
|
|
}
|
|
};
|
|
|
|
// Difficulty update
|
|
let g = grade as i32 as f64;
|
|
let delta = -w[6] * (g - 3.0);
|
|
let mean_reversion = (10.0 - state.difficulty) / 9.0;
|
|
let d0 = w[4] - (w[5] * 2.0).exp() + 1.0;
|
|
let new_difficulty =
|
|
(w[7] * d0 + (1.0 - w[7]) * (state.difficulty + delta * mean_reversion)).clamp(1.0, 10.0);
|
|
|
|
FSRS6State {
|
|
difficulty: new_difficulty,
|
|
stability: new_stability.clamp(0.1, 36500.0),
|
|
reps: state.reps + 1,
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// LEITNER BOX SYSTEM (For Comparison)
|
|
// ============================================================================
|
|
|
|
/// Leitner box state
|
|
#[derive(Debug, Clone)]
|
|
struct LeitnerState {
|
|
box_number: i32, // 1-5
|
|
}
|
|
|
|
impl Default for LeitnerState {
|
|
fn default() -> Self {
|
|
Self { box_number: 1 }
|
|
}
|
|
}
|
|
|
|
/// Leitner box intervals
|
|
fn leitner_interval(box_number: i32) -> i32 {
|
|
match box_number {
|
|
1 => 1,
|
|
2 => 2,
|
|
3 => 5,
|
|
4 => 8,
|
|
5 => 14,
|
|
_ => 14,
|
|
}
|
|
}
|
|
|
|
/// Leitner review
|
|
fn leitner_review(state: &LeitnerState, correct: bool) -> LeitnerState {
|
|
if correct {
|
|
LeitnerState {
|
|
box_number: (state.box_number + 1).min(5),
|
|
}
|
|
} else {
|
|
LeitnerState { box_number: 1 }
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// FIXED INTERVAL SYSTEM (For Comparison)
|
|
// ============================================================================
|
|
|
|
/// Fixed interval - always reviews at same interval
|
|
#[allow(dead_code)]
|
|
fn fixed_interval_schedule(_correct: bool) -> i32 {
|
|
7 // Always 7 days
|
|
}
|
|
|
|
// ============================================================================
|
|
// SIMILARITY SEARCH MOCK (For Comparison)
|
|
// ============================================================================
|
|
|
|
/// Mock similarity search that only uses direct embedding similarity
|
|
struct SimilaritySearch {
|
|
embeddings: HashMap<String, Vec<f32>>,
|
|
}
|
|
|
|
impl SimilaritySearch {
|
|
fn new() -> Self {
|
|
Self {
|
|
embeddings: HashMap::new(),
|
|
}
|
|
}
|
|
|
|
fn add(&mut self, id: &str, embedding: Vec<f32>) {
|
|
self.embeddings.insert(id.to_string(), embedding);
|
|
}
|
|
|
|
fn search(&self, query_embedding: &[f32], top_k: usize) -> Vec<(String, f64)> {
|
|
let mut results: Vec<(String, f64)> = self
|
|
.embeddings
|
|
.iter()
|
|
.map(|(id, emb)| {
|
|
let sim = cosine_similarity(query_embedding, emb);
|
|
(id.clone(), sim)
|
|
})
|
|
.collect();
|
|
|
|
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
results.truncate(top_k);
|
|
results
|
|
}
|
|
}
|
|
|
|
fn cosine_similarity(a: &[f32], b: &[f32]) -> f64 {
|
|
if a.len() != b.len() || a.is_empty() {
|
|
return 0.0;
|
|
}
|
|
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
|
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
|
|
if norm_a > 0.0 && norm_b > 0.0 {
|
|
(dot / (norm_a * norm_b)) as f64
|
|
} else {
|
|
0.0
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// FSRS-6 VS SM-2 TESTS (8 tests)
|
|
// ============================================================================
|
|
|
|
/// Test that FSRS-6 achieves same retention with fewer reviews than SM-2.
|
|
///
|
|
/// Simulates learning 100 cards over 30 days and compares total reviews needed.
|
|
#[test]
|
|
fn test_fsrs6_vs_sm2_efficiency() {
|
|
const NUM_CARDS: usize = 100;
|
|
const DAYS: i32 = 30;
|
|
const TARGET_RETENTION: f64 = 0.9;
|
|
|
|
// Simulate SM-2
|
|
let mut sm2_reviews = 0;
|
|
let mut sm2_states: Vec<(SM2State, i32)> =
|
|
(0..NUM_CARDS).map(|_| (SM2State::default(), 0)).collect();
|
|
|
|
for day in 1..=DAYS {
|
|
for (state, next_review) in sm2_states.iter_mut() {
|
|
if *next_review <= day {
|
|
// Review due
|
|
sm2_reviews += 1;
|
|
let grade = SM2Grade::CorrectHesitation; // Assume successful
|
|
*state = sm2_review(state, grade);
|
|
*next_review = day + state.interval;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Simulate FSRS-6
|
|
let mut fsrs_reviews = 0;
|
|
let mut fsrs_states: Vec<(FSRS6State, i32)> =
|
|
(0..NUM_CARDS).map(|_| (FSRS6State::default(), 0)).collect();
|
|
|
|
for day in 1..=DAYS {
|
|
for (state, next_review) in fsrs_states.iter_mut() {
|
|
if *next_review <= day {
|
|
// Review due
|
|
fsrs_reviews += 1;
|
|
let elapsed = (day - *next_review + state.reps.max(1)) as f64;
|
|
let grade = FSRS6Grade::Good;
|
|
*state = fsrs6_review(state, grade, elapsed.max(1.0));
|
|
let interval = fsrs6_interval(state.stability, TARGET_RETENTION, FSRS6_WEIGHTS[20]);
|
|
*next_review = day + interval.max(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
// FSRS-6 should require fewer reviews for same learning period
|
|
assert!(
|
|
fsrs_reviews <= sm2_reviews,
|
|
"FSRS-6 should be more efficient: {} reviews vs SM-2's {} reviews",
|
|
fsrs_reviews,
|
|
sm2_reviews
|
|
);
|
|
|
|
// At minimum, FSRS-6 shouldn't be significantly worse
|
|
let efficiency_ratio = fsrs_reviews as f64 / sm2_reviews as f64;
|
|
assert!(
|
|
efficiency_ratio <= 1.5,
|
|
"FSRS-6 efficiency ratio should be reasonable: {}",
|
|
efficiency_ratio
|
|
);
|
|
}
|
|
|
|
/// Test that with equal review counts, FSRS-6 achieves higher retention.
|
|
#[test]
|
|
fn test_fsrs6_vs_sm2_retention_same_reviews() {
|
|
const TOTAL_REVIEWS: i32 = 10;
|
|
|
|
// SM-2: Fixed review pattern
|
|
let mut sm2_state = SM2State::default();
|
|
for _ in 0..TOTAL_REVIEWS {
|
|
sm2_state = sm2_review(&sm2_state, SM2Grade::CorrectHesitation);
|
|
}
|
|
let sm2_retention = sm2_retention(sm2_state.interval, sm2_state.interval);
|
|
|
|
// FSRS-6: Same number of reviews
|
|
let mut fsrs_state = FSRS6State::default();
|
|
let mut total_elapsed = 0.0;
|
|
for _ in 0..TOTAL_REVIEWS {
|
|
let interval = fsrs6_interval(fsrs_state.stability, 0.9, FSRS6_WEIGHTS[20]).max(1);
|
|
total_elapsed += interval as f64;
|
|
fsrs_state = fsrs6_review(&fsrs_state, FSRS6Grade::Good, interval as f64);
|
|
}
|
|
let fsrs_retention = fsrs6_retrievability(
|
|
fsrs_state.stability,
|
|
fsrs6_interval(fsrs_state.stability, 0.9, FSRS6_WEIGHTS[20]) as f64,
|
|
FSRS6_WEIGHTS[20],
|
|
);
|
|
|
|
// FSRS-6 should maintain higher retention
|
|
assert!(
|
|
fsrs_retention >= 0.85,
|
|
"FSRS-6 should maintain high retention: {:.2}%",
|
|
fsrs_retention * 100.0
|
|
);
|
|
assert!(sm2_retention > 0.0, "SM-2 retention should be positive");
|
|
assert!(
|
|
total_elapsed > 0.0,
|
|
"FSRS review elapsed time should accumulate"
|
|
);
|
|
}
|
|
|
|
/// Test that FSRS-6 achieves better retention efficiency over time.
|
|
///
|
|
/// FSRS-6's key advantages over SM-2:
|
|
/// 1. Personalized forgetting curves (w20 parameter)
|
|
/// 2. Better handling of difficult items
|
|
/// 3. More efficient scheduling for well-learned items
|
|
///
|
|
/// This test focuses on demonstrating the mathematical properties.
|
|
#[test]
|
|
fn test_fsrs6_vs_sm2_reviews_same_retention() {
|
|
// Test the core efficiency: stability growth vs interval growth
|
|
|
|
// SM-2: Interval growth is linear with EF
|
|
// After n successful reviews: interval ≈ previous * 2.5
|
|
let sm2_intervals = [1, 6, 15, 38, 95]; // Approximate SM-2 progression
|
|
let sm2_final_interval = sm2_intervals[sm2_intervals.len() - 1];
|
|
|
|
// FSRS-6: Stability grows based on forgetting curve parameters
|
|
// This allows for more nuanced interval optimization
|
|
let mut fsrs_state = FSRS6State::default();
|
|
|
|
// Simulate 5 successful reviews
|
|
for _ in 0..5 {
|
|
let interval = fsrs6_interval(fsrs_state.stability, 0.9, FSRS6_WEIGHTS[20]).max(1);
|
|
fsrs_state = fsrs6_review(&fsrs_state, FSRS6Grade::Good, interval as f64);
|
|
}
|
|
|
|
// FSRS-6 key advantages:
|
|
// 1. Uses retrievability to determine optimal review time
|
|
// 2. Difficulty affects stability growth (harder items grow slower)
|
|
// 3. Can be personalized with w20
|
|
|
|
// Test that FSRS-6 produces reasonable intervals
|
|
let fsrs_final_interval = fsrs6_interval(fsrs_state.stability, 0.9, FSRS6_WEIGHTS[20]);
|
|
|
|
assert!(
|
|
fsrs_final_interval > 0,
|
|
"FSRS-6 should produce positive intervals: {}",
|
|
fsrs_final_interval
|
|
);
|
|
assert!(
|
|
sm2_final_interval > 0,
|
|
"SM-2 comparison interval should be positive"
|
|
);
|
|
|
|
// Test that stability has grown from initial value
|
|
assert!(
|
|
fsrs_state.stability > FSRS6State::default().stability,
|
|
"Stability should grow after successful reviews: {:.2} > {:.2}",
|
|
fsrs_state.stability,
|
|
FSRS6State::default().stability
|
|
);
|
|
|
|
// Test the core FSRS-6 innovation: difficulty modulation
|
|
// Create a "hard" card and compare stability growth
|
|
let mut hard_state = FSRS6State {
|
|
difficulty: 8.0, // Hard card
|
|
stability: FSRS6State::default().stability,
|
|
reps: 0,
|
|
};
|
|
|
|
let mut easy_state = FSRS6State {
|
|
difficulty: 2.0, // Easy card
|
|
stability: FSRS6State::default().stability,
|
|
reps: 0,
|
|
};
|
|
|
|
// Same number of reviews
|
|
for _ in 0..5 {
|
|
let hard_interval = fsrs6_interval(hard_state.stability, 0.9, FSRS6_WEIGHTS[20]).max(1);
|
|
hard_state = fsrs6_review(&hard_state, FSRS6Grade::Good, hard_interval as f64);
|
|
|
|
let easy_interval = fsrs6_interval(easy_state.stability, 0.9, FSRS6_WEIGHTS[20]).max(1);
|
|
easy_state = fsrs6_review(&easy_state, FSRS6Grade::Good, easy_interval as f64);
|
|
}
|
|
|
|
// Easy cards should achieve higher stability
|
|
assert!(
|
|
easy_state.stability > hard_state.stability,
|
|
"Easy cards should achieve higher stability: {:.2} > {:.2}",
|
|
easy_state.stability,
|
|
hard_state.stability
|
|
);
|
|
|
|
// This is FSRS-6's key advantage: difficulty-aware scheduling
|
|
// SM-2 only adjusts EF, but FSRS-6 integrates difficulty into the stability model
|
|
}
|
|
|
|
/// Test that FSRS-6 beats naive fixed-interval scheduling.
|
|
#[test]
|
|
fn test_fsrs6_vs_fixed_interval() {
|
|
const SIMULATION_DAYS: i32 = 30;
|
|
const FIXED_INTERVAL: i32 = 7;
|
|
|
|
// Fixed interval: reviews every 7 days
|
|
let fixed_reviews = SIMULATION_DAYS / FIXED_INTERVAL + 1;
|
|
|
|
// FSRS-6: Adaptive intervals
|
|
let mut fsrs_reviews = 0;
|
|
let mut fsrs_state = FSRS6State::default();
|
|
let mut next_review = 1;
|
|
|
|
for day in 1..=SIMULATION_DAYS {
|
|
if day >= next_review {
|
|
fsrs_reviews += 1;
|
|
let elapsed = (day - next_review + 1) as f64;
|
|
fsrs_state = fsrs6_review(&fsrs_state, FSRS6Grade::Good, elapsed);
|
|
let interval = fsrs6_interval(fsrs_state.stability, 0.9, FSRS6_WEIGHTS[20]);
|
|
next_review = day + interval.max(1);
|
|
}
|
|
}
|
|
|
|
// After initial learning, FSRS-6 intervals grow, so it should need fewer reviews
|
|
// for material that's being successfully learned
|
|
let final_interval = fsrs6_interval(fsrs_state.stability, 0.9, FSRS6_WEIGHTS[20]);
|
|
|
|
assert!(
|
|
final_interval > FIXED_INTERVAL,
|
|
"FSRS-6 should achieve longer intervals than fixed: {} days vs {} days",
|
|
final_interval,
|
|
FIXED_INTERVAL
|
|
);
|
|
assert!(
|
|
fsrs_reviews <= fixed_reviews,
|
|
"FSRS-6 should need no more reviews than fixed interval: {} <= {}",
|
|
fsrs_reviews,
|
|
fixed_reviews
|
|
);
|
|
}
|
|
|
|
/// Test that FSRS-6 beats Leitner box system.
|
|
#[test]
|
|
fn test_fsrs6_vs_leitner() {
|
|
const SIMULATION_DAYS: i32 = 30;
|
|
|
|
// Leitner: Box-based intervals
|
|
let mut leitner_reviews = 0;
|
|
let mut leitner_state = LeitnerState::default();
|
|
let mut leitner_next = 1;
|
|
|
|
for day in 1..=SIMULATION_DAYS {
|
|
if day >= leitner_next {
|
|
leitner_reviews += 1;
|
|
leitner_state = leitner_review(&leitner_state, true);
|
|
leitner_next = day + leitner_interval(leitner_state.box_number);
|
|
}
|
|
}
|
|
|
|
// FSRS-6: Continuous stability-based
|
|
let mut fsrs_reviews = 0;
|
|
let mut fsrs_state = FSRS6State::default();
|
|
let mut fsrs_next = 1;
|
|
|
|
for day in 1..=SIMULATION_DAYS {
|
|
if day >= fsrs_next {
|
|
fsrs_reviews += 1;
|
|
let elapsed = (day - fsrs_next + 1) as f64;
|
|
fsrs_state = fsrs6_review(&fsrs_state, FSRS6Grade::Good, elapsed);
|
|
let interval = fsrs6_interval(fsrs_state.stability, 0.9, FSRS6_WEIGHTS[20]);
|
|
fsrs_next = day + interval.max(1);
|
|
}
|
|
}
|
|
|
|
// FSRS-6 stability should exceed Leitner's max interval (14 days for box 5)
|
|
let fsrs_final_interval = fsrs6_interval(fsrs_state.stability, 0.9, FSRS6_WEIGHTS[20]);
|
|
let leitner_max_interval = leitner_interval(5);
|
|
|
|
assert!(
|
|
fsrs_final_interval >= leitner_max_interval,
|
|
"FSRS-6 should achieve longer intervals: {} vs Leitner max {}",
|
|
fsrs_final_interval,
|
|
leitner_max_interval
|
|
);
|
|
assert!(
|
|
fsrs_reviews <= leitner_reviews,
|
|
"FSRS-6 should need no more reviews than Leitner: {} <= {}",
|
|
fsrs_reviews,
|
|
leitner_reviews
|
|
);
|
|
}
|
|
|
|
/// Test that personalized w20 parameter improves FSRS-6 results.
|
|
///
|
|
/// Note: The FSRS-6 formula is designed so that R = 0.9 when t = S for any w20.
|
|
/// The w20 parameter affects the SHAPE of the forgetting curve:
|
|
/// - Lower w20 = slower decay rate (flatter curve)
|
|
/// - Higher w20 = faster decay rate (steeper curve)
|
|
///
|
|
/// Personalization means users with steeper forgetting curves (higher w20)
|
|
/// get shorter intervals, and users with flatter curves (lower w20) get longer.
|
|
#[test]
|
|
fn test_fsrs6_personalization_improvement() {
|
|
let default_w20 = FSRS6_WEIGHTS[20]; // 0.1542
|
|
|
|
// User with faster forgetting (higher w20 = steeper curve)
|
|
let fast_forgetter_w20 = 0.35;
|
|
|
|
// User with slower forgetting (lower w20 = flatter curve)
|
|
let slow_forgetter_w20 = 0.08;
|
|
|
|
let stability = 10.0;
|
|
|
|
// Test at a point past the optimal interval to see curve differences
|
|
// At t=15 (1.5x stability), we should see different retention based on curve shape
|
|
let elapsed_past_optimal = 15.0;
|
|
|
|
let default_r_past = fsrs6_retrievability(stability, elapsed_past_optimal, default_w20);
|
|
let fast_r_past = fsrs6_retrievability(stability, elapsed_past_optimal, fast_forgetter_w20);
|
|
let slow_r_past = fsrs6_retrievability(stability, elapsed_past_optimal, slow_forgetter_w20);
|
|
|
|
// At t > S, steeper curve (higher w20) = lower retention
|
|
// At t > S, flatter curve (lower w20) = higher retention
|
|
// This seems counterintuitive but is correct: higher w20 means faster decay
|
|
assert!(
|
|
slow_r_past > default_r_past,
|
|
"Slow forgetter (flatter curve) should have higher retention past optimal: {:.4} > {:.4}",
|
|
slow_r_past,
|
|
default_r_past
|
|
);
|
|
assert!(
|
|
fast_r_past < default_r_past,
|
|
"Fast forgetter (steeper curve) should have lower retention past optimal: {:.4} < {:.4}",
|
|
fast_r_past,
|
|
default_r_past
|
|
);
|
|
|
|
// The key insight: w20 affects optimal interval calculation
|
|
// For same desired_retention (0.9), different w20 gives different intervals
|
|
let desired_retention = 0.85; // Target 85% to see interval differences
|
|
let default_interval = fsrs6_interval(stability, desired_retention, default_w20);
|
|
let fast_interval = fsrs6_interval(stability, desired_retention, fast_forgetter_w20);
|
|
let slow_interval = fsrs6_interval(stability, desired_retention, slow_forgetter_w20);
|
|
|
|
// Intervals should differ based on curve shape
|
|
assert!(
|
|
default_interval > 0 && fast_interval > 0 && slow_interval > 0,
|
|
"All intervals should be positive: default={}, fast={}, slow={}",
|
|
default_interval,
|
|
fast_interval,
|
|
slow_interval
|
|
);
|
|
|
|
// The total range of intervals demonstrates personalization value
|
|
let interval_range = (slow_interval - fast_interval).abs();
|
|
assert!(
|
|
interval_range > 0,
|
|
"Personalized w20 should produce different intervals: range={}",
|
|
interval_range
|
|
);
|
|
}
|
|
|
|
/// Test that same-day review handling (w17-w19) is effective.
|
|
#[test]
|
|
fn test_fsrs6_same_day_handling() {
|
|
let w = &FSRS6_WEIGHTS;
|
|
let state = FSRS6State {
|
|
difficulty: 5.0,
|
|
stability: 5.0,
|
|
reps: 3,
|
|
};
|
|
|
|
// Same-day review formula: S' = S * e^(w17 * (G - 3 + w18)) * S^(-w19)
|
|
fn same_day_stability(s: f64, grade: i32, w: &[f64; 21]) -> f64 {
|
|
let g = grade as f64;
|
|
s * (w[17] * (g - 3.0 + w[18])).exp() * s.powf(-w[19])
|
|
}
|
|
|
|
// Same-day review with "Good"
|
|
let same_day_good = same_day_stability(state.stability, 3, w);
|
|
|
|
// Same-day review with "Easy"
|
|
let same_day_easy = same_day_stability(state.stability, 4, w);
|
|
|
|
// Same-day review with "Again"
|
|
let same_day_again = same_day_stability(state.stability, 1, w);
|
|
|
|
// Easy should increase stability
|
|
assert!(
|
|
same_day_easy > state.stability,
|
|
"Easy same-day review should increase stability: {:.2} > {:.2}",
|
|
same_day_easy,
|
|
state.stability
|
|
);
|
|
|
|
// Again should decrease stability
|
|
assert!(
|
|
same_day_again < state.stability,
|
|
"Again same-day review should decrease stability: {:.2} < {:.2}",
|
|
same_day_again,
|
|
state.stability
|
|
);
|
|
|
|
// Good should keep stability relatively stable
|
|
let good_change = (same_day_good - state.stability).abs() / state.stability;
|
|
assert!(
|
|
good_change < 0.5,
|
|
"Good same-day review should keep stability relatively stable: {:.2}% change",
|
|
good_change * 100.0
|
|
);
|
|
}
|
|
|
|
/// Test that hard penalty (w15) correctly adjusts intervals.
|
|
#[test]
|
|
fn test_fsrs6_hard_penalty_effectiveness() {
|
|
let state = FSRS6State {
|
|
difficulty: 5.0,
|
|
stability: 10.0,
|
|
reps: 5,
|
|
};
|
|
|
|
let elapsed = 10.0;
|
|
|
|
// Review with "Good"
|
|
let good_state = fsrs6_review(&state, FSRS6Grade::Good, elapsed);
|
|
|
|
// Review with "Hard"
|
|
let hard_state = fsrs6_review(&state, FSRS6Grade::Hard, elapsed);
|
|
|
|
// Hard penalty (w15 = 0.6014) should result in lower stability increase
|
|
assert!(
|
|
hard_state.stability < good_state.stability,
|
|
"Hard review should result in lower stability: {:.2} < {:.2}",
|
|
hard_state.stability,
|
|
good_state.stability
|
|
);
|
|
|
|
// The penalty should be approximately w15
|
|
let hard_penalty = FSRS6_WEIGHTS[15];
|
|
let stability_ratio = hard_state.stability / good_state.stability;
|
|
|
|
// The ratio should be in a reasonable range around the penalty
|
|
assert!(
|
|
stability_ratio < 1.0 && stability_ratio > hard_penalty * 0.5,
|
|
"Hard penalty effect should be significant: ratio = {:.2}, w15 = {:.2}",
|
|
stability_ratio,
|
|
hard_penalty
|
|
);
|
|
}
|
|
|
|
// ============================================================================
|
|
// SPREADING ACTIVATION VS SIMILARITY TESTS (8 tests)
|
|
// ============================================================================
|
|
|
|
/// Test 1-hop: Both methods should find direct connections.
|
|
#[test]
|
|
fn test_spreading_vs_similarity_1_hop() {
|
|
// Setup spreading activation network
|
|
let mut network = ActivationNetwork::new();
|
|
network.add_edge(
|
|
"rust".to_string(),
|
|
"cargo".to_string(),
|
|
LinkType::Semantic,
|
|
0.9,
|
|
);
|
|
network.add_edge(
|
|
"rust".to_string(),
|
|
"ownership".to_string(),
|
|
LinkType::Semantic,
|
|
0.85,
|
|
);
|
|
|
|
// Setup similarity search with similar embeddings
|
|
let mut sim_search = SimilaritySearch::new();
|
|
sim_search.add("rust", vec![1.0, 0.0, 0.0]);
|
|
sim_search.add("cargo", vec![0.9, 0.1, 0.0]); // Similar to rust
|
|
sim_search.add("ownership", vec![0.85, 0.15, 0.0]); // Similar to rust
|
|
sim_search.add("python", vec![0.0, 1.0, 0.0]); // Unrelated
|
|
|
|
// Spreading activation
|
|
let spreading_results = network.activate("rust", 1.0);
|
|
let spreading_found: HashSet<_> = spreading_results
|
|
.iter()
|
|
.map(|r| r.memory_id.as_str())
|
|
.collect();
|
|
|
|
// Similarity search
|
|
let sim_results = sim_search.search(&[1.0, 0.0, 0.0], 3);
|
|
let sim_found: HashSet<_> = sim_results
|
|
.iter()
|
|
.filter(|(_, score)| *score > 0.8)
|
|
.map(|(id, _)| id.as_str())
|
|
.collect();
|
|
|
|
// At 1-hop, both should find the direct connections
|
|
assert!(
|
|
spreading_found.contains("cargo"),
|
|
"Spreading should find cargo"
|
|
);
|
|
assert!(
|
|
spreading_found.contains("ownership"),
|
|
"Spreading should find ownership"
|
|
);
|
|
assert!(sim_found.contains("cargo"), "Similarity should find cargo");
|
|
assert!(
|
|
sim_found.contains("ownership"),
|
|
"Similarity should find ownership"
|
|
);
|
|
}
|
|
|
|
/// Test 2-hop: Spreading activation finds indirect connections.
|
|
#[test]
|
|
fn test_spreading_vs_similarity_2_hop() {
|
|
let config = ActivationConfig {
|
|
decay_factor: 0.8,
|
|
max_hops: 3,
|
|
min_threshold: 0.1,
|
|
allow_cycles: false,
|
|
};
|
|
let mut network = ActivationNetwork::with_config(config);
|
|
|
|
// Create a chain: rust -> tokio -> async_runtime
|
|
// rust and async_runtime have NO direct similarity
|
|
network.add_edge(
|
|
"rust".to_string(),
|
|
"tokio".to_string(),
|
|
LinkType::Semantic,
|
|
0.9,
|
|
);
|
|
network.add_edge(
|
|
"tokio".to_string(),
|
|
"async_runtime".to_string(),
|
|
LinkType::Semantic,
|
|
0.85,
|
|
);
|
|
|
|
// Similarity search - embeddings show NO similarity between rust and async_runtime
|
|
let mut sim_search = SimilaritySearch::new();
|
|
sim_search.add("rust", vec![1.0, 0.0, 0.0, 0.0]);
|
|
sim_search.add("tokio", vec![0.7, 0.7, 0.0, 0.0]); // Bridge
|
|
sim_search.add("async_runtime", vec![0.0, 1.0, 0.0, 0.0]); // No similarity to rust
|
|
|
|
// Spreading finds async_runtime through the chain
|
|
let spreading_results = network.activate("rust", 1.0);
|
|
let spreading_found_async = spreading_results
|
|
.iter()
|
|
.any(|r| r.memory_id == "async_runtime");
|
|
|
|
// Similarity from "rust" does NOT find async_runtime
|
|
let sim_results = sim_search.search(&[1.0, 0.0, 0.0, 0.0], 5);
|
|
let sim_found_async = sim_results
|
|
.iter()
|
|
.any(|(id, score)| id == "async_runtime" && *score > 0.5);
|
|
|
|
assert!(
|
|
spreading_found_async,
|
|
"Spreading activation SHOULD find async_runtime through tokio"
|
|
);
|
|
assert!(
|
|
!sim_found_async,
|
|
"Similarity search should NOT find async_runtime (no direct similarity)"
|
|
);
|
|
}
|
|
|
|
/// Test 3-hop: Spreading finds deep chains.
|
|
#[test]
|
|
fn test_spreading_vs_similarity_3_hop() {
|
|
let config = ActivationConfig {
|
|
decay_factor: 0.8,
|
|
max_hops: 4,
|
|
min_threshold: 0.05,
|
|
allow_cycles: false,
|
|
};
|
|
let mut network = ActivationNetwork::with_config(config);
|
|
|
|
// Create 3-hop chain: A -> B -> C -> D
|
|
// Each step has semantic connection, but A and D have ZERO direct similarity
|
|
network.add_edge(
|
|
"concept_a".to_string(),
|
|
"concept_b".to_string(),
|
|
LinkType::Semantic,
|
|
0.9,
|
|
);
|
|
network.add_edge(
|
|
"concept_b".to_string(),
|
|
"concept_c".to_string(),
|
|
LinkType::Semantic,
|
|
0.9,
|
|
);
|
|
network.add_edge(
|
|
"concept_c".to_string(),
|
|
"concept_d".to_string(),
|
|
LinkType::Semantic,
|
|
0.9,
|
|
);
|
|
|
|
// Embeddings: A and D are orthogonal (zero similarity)
|
|
let mut sim_search = SimilaritySearch::new();
|
|
sim_search.add("concept_a", vec![1.0, 0.0, 0.0, 0.0]);
|
|
sim_search.add("concept_b", vec![0.7, 0.7, 0.0, 0.0]);
|
|
sim_search.add("concept_c", vec![0.0, 0.7, 0.7, 0.0]);
|
|
sim_search.add("concept_d", vec![0.0, 0.0, 0.0, 1.0]); // Orthogonal to A
|
|
|
|
// Spreading finds D
|
|
let spreading_results = network.activate("concept_a", 1.0);
|
|
let d_result = spreading_results
|
|
.iter()
|
|
.find(|r| r.memory_id == "concept_d");
|
|
|
|
assert!(
|
|
d_result.is_some(),
|
|
"Spreading MUST find concept_d at 3 hops"
|
|
);
|
|
assert_eq!(
|
|
d_result.unwrap().distance,
|
|
3,
|
|
"Should be exactly 3 hops away"
|
|
);
|
|
|
|
// Similarity CANNOT find D from A
|
|
let sim_results = sim_search.search(&[1.0, 0.0, 0.0, 0.0], 10);
|
|
let sim_d_score = sim_results
|
|
.iter()
|
|
.find(|(id, _)| id == "concept_d")
|
|
.map(|(_, score)| *score)
|
|
.unwrap_or(0.0);
|
|
|
|
assert!(
|
|
sim_d_score < 0.1,
|
|
"Similarity should NOT find concept_d (orthogonal embedding): score = {:.4}",
|
|
sim_d_score
|
|
);
|
|
}
|
|
|
|
/// Test that spreading finds chains that similarity completely misses.
|
|
#[test]
|
|
fn test_spreading_finds_chains_similarity_misses() {
|
|
let mut network = ActivationNetwork::new();
|
|
|
|
// Real-world scenario: User debugging a memory leak
|
|
// Chain: "memory_leak" -> "reference_counting" -> "Arc_Weak" -> "cyclic_references"
|
|
// The solution (cyclic_references) is NOT semantically similar to "memory_leak"
|
|
|
|
network.add_edge(
|
|
"memory_leak".to_string(),
|
|
"reference_counting".to_string(),
|
|
LinkType::Causal,
|
|
0.9,
|
|
);
|
|
network.add_edge(
|
|
"reference_counting".to_string(),
|
|
"arc_weak".to_string(),
|
|
LinkType::Semantic,
|
|
0.85,
|
|
);
|
|
network.add_edge(
|
|
"arc_weak".to_string(),
|
|
"cyclic_references".to_string(),
|
|
LinkType::Semantic,
|
|
0.9,
|
|
);
|
|
|
|
// The problem: "cyclic_references" has zero direct similarity to "memory_leak"
|
|
// (they use completely different vocabulary)
|
|
let mut sim_search = SimilaritySearch::new();
|
|
sim_search.add("memory_leak", vec![1.0, 0.0, 0.0, 0.0]);
|
|
sim_search.add("reference_counting", vec![0.5, 0.5, 0.0, 0.0]);
|
|
sim_search.add("arc_weak", vec![0.0, 0.7, 0.3, 0.0]);
|
|
sim_search.add("cyclic_references", vec![0.0, 0.0, 0.0, 1.0]); // Totally different!
|
|
|
|
// Spreading activation finds the solution
|
|
let spreading_results = network.activate("memory_leak", 1.0);
|
|
let found_solution = spreading_results
|
|
.iter()
|
|
.any(|r| r.memory_id == "cyclic_references");
|
|
|
|
// Similarity search cannot find it
|
|
let sim_results = sim_search.search(&[1.0, 0.0, 0.0, 0.0], 10);
|
|
let sim_found = sim_results
|
|
.iter()
|
|
.any(|(id, score)| id == "cyclic_references" && *score > 0.3);
|
|
|
|
assert!(
|
|
found_solution,
|
|
"Spreading activation finds the solution (cyclic_references) through association"
|
|
);
|
|
assert!(
|
|
!sim_found,
|
|
"Similarity search CANNOT find cyclic_references from memory_leak"
|
|
);
|
|
}
|
|
|
|
/// Test that spreading activation provides meaningful paths.
|
|
#[test]
|
|
fn test_spreading_path_quality() {
|
|
let mut network = ActivationNetwork::new();
|
|
|
|
// Create a knowledge graph about Rust error handling
|
|
network.add_edge(
|
|
"error_handling".to_string(),
|
|
"result_type".to_string(),
|
|
LinkType::Semantic,
|
|
0.9,
|
|
);
|
|
network.add_edge(
|
|
"result_type".to_string(),
|
|
"question_mark_operator".to_string(),
|
|
LinkType::Semantic,
|
|
0.85,
|
|
);
|
|
network.add_edge(
|
|
"question_mark_operator".to_string(),
|
|
"early_return".to_string(),
|
|
LinkType::Semantic,
|
|
0.8,
|
|
);
|
|
|
|
let results = network.activate("error_handling", 1.0);
|
|
|
|
// Find the path to early_return
|
|
let early_return_result = results
|
|
.iter()
|
|
.find(|r| r.memory_id == "early_return")
|
|
.expect("Should find early_return");
|
|
|
|
// Verify the path makes sense
|
|
assert_eq!(
|
|
early_return_result.path.len(),
|
|
4,
|
|
"Path should have 4 nodes"
|
|
);
|
|
assert_eq!(early_return_result.path[0], "error_handling");
|
|
assert_eq!(early_return_result.path[1], "result_type");
|
|
assert_eq!(early_return_result.path[2], "question_mark_operator");
|
|
assert_eq!(early_return_result.path[3], "early_return");
|
|
|
|
// Activation should decay along the path
|
|
let result_type_activation = results
|
|
.iter()
|
|
.find(|r| r.memory_id == "result_type")
|
|
.map(|r| r.activation)
|
|
.unwrap_or(0.0);
|
|
|
|
assert!(
|
|
early_return_result.activation < result_type_activation,
|
|
"Activation should decay: early_return ({:.3}) < result_type ({:.3})",
|
|
early_return_result.activation,
|
|
result_type_activation
|
|
);
|
|
}
|
|
|
|
/// Test that spreading activation remains efficient at scale.
|
|
#[test]
|
|
fn test_spreading_scale_performance() {
|
|
let config = ActivationConfig {
|
|
decay_factor: 0.7,
|
|
max_hops: 3,
|
|
min_threshold: 0.1,
|
|
allow_cycles: false,
|
|
};
|
|
let mut network = ActivationNetwork::with_config(config);
|
|
|
|
// Create a larger network (1000 nodes, ~3000 edges)
|
|
const NUM_NODES: usize = 1000;
|
|
const EDGES_PER_NODE: usize = 3;
|
|
|
|
for i in 0..NUM_NODES {
|
|
for j in 1..=EDGES_PER_NODE {
|
|
let target = (i + j * 7) % NUM_NODES;
|
|
if i != target {
|
|
network.add_edge(
|
|
format!("node_{}", i),
|
|
format!("node_{}", target),
|
|
LinkType::Semantic,
|
|
0.8,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Measure activation time
|
|
let start = std::time::Instant::now();
|
|
let results = network.activate("node_0", 1.0);
|
|
let duration = start.elapsed();
|
|
|
|
// Should complete in reasonable time (< 100ms)
|
|
assert!(
|
|
duration.as_millis() < 100,
|
|
"Spreading activation should be fast: {:?}",
|
|
duration
|
|
);
|
|
|
|
// Should find multiple results
|
|
assert!(
|
|
results.len() > 10,
|
|
"Should find multiple connected nodes: found {}",
|
|
results.len()
|
|
);
|
|
}
|
|
|
|
/// Test spreading activation on dense vs sparse networks.
|
|
#[test]
|
|
fn test_spreading_dense_vs_sparse() {
|
|
// Dense network: Many connections per node
|
|
let mut dense_network = ActivationNetwork::new();
|
|
for i in 0..20 {
|
|
for j in 0..20 {
|
|
if i != j {
|
|
dense_network.add_edge(
|
|
format!("dense_{}", i),
|
|
format!("dense_{}", j),
|
|
LinkType::Semantic,
|
|
0.5,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sparse network: Few connections per node
|
|
let mut sparse_network = ActivationNetwork::new();
|
|
for i in 0..20 {
|
|
let next = (i + 1) % 20;
|
|
sparse_network.add_edge(
|
|
format!("sparse_{}", i),
|
|
format!("sparse_{}", next),
|
|
LinkType::Semantic,
|
|
0.9,
|
|
);
|
|
}
|
|
|
|
// Dense network should spread widely but with lower individual activations
|
|
let dense_results = dense_network.activate("dense_0", 1.0);
|
|
let dense_activations: Vec<f64> = dense_results.iter().map(|r| r.activation).collect();
|
|
|
|
// Sparse network should spread linearly with higher individual activations
|
|
let sparse_results = sparse_network.activate("sparse_0", 1.0);
|
|
let sparse_activations: Vec<f64> = sparse_results.iter().map(|r| r.activation).collect();
|
|
|
|
// Dense should find more nodes
|
|
assert!(
|
|
dense_results.len() > sparse_results.len(),
|
|
"Dense network should activate more nodes: {} vs {}",
|
|
dense_results.len(),
|
|
sparse_results.len()
|
|
);
|
|
|
|
// Sparse should have higher max activation (less dilution)
|
|
let dense_max = dense_activations.iter().cloned().fold(0.0_f64, f64::max);
|
|
let sparse_max = sparse_activations.iter().cloned().fold(0.0_f64, f64::max);
|
|
|
|
assert!(
|
|
sparse_max >= dense_max,
|
|
"Sparse network should have higher peak activation: {:.3} vs {:.3}",
|
|
sparse_max,
|
|
dense_max
|
|
);
|
|
}
|
|
|
|
/// Test that different link types are handled correctly.
|
|
#[test]
|
|
fn test_spreading_mixed_link_types() {
|
|
let mut network = ActivationNetwork::new();
|
|
|
|
// Create edges with different link types
|
|
network.add_edge(
|
|
"event".to_string(),
|
|
"semantic_relation".to_string(),
|
|
LinkType::Semantic,
|
|
0.9,
|
|
);
|
|
network.add_edge(
|
|
"event".to_string(),
|
|
"temporal_relation".to_string(),
|
|
LinkType::Temporal,
|
|
0.9,
|
|
);
|
|
network.add_edge(
|
|
"event".to_string(),
|
|
"causal_relation".to_string(),
|
|
LinkType::Causal,
|
|
0.9,
|
|
);
|
|
network.add_edge(
|
|
"event".to_string(),
|
|
"spatial_relation".to_string(),
|
|
LinkType::Spatial,
|
|
0.9,
|
|
);
|
|
|
|
let results = network.activate("event", 1.0);
|
|
|
|
// Should find all related nodes
|
|
let found_ids: HashSet<_> = results.iter().map(|r| r.memory_id.as_str()).collect();
|
|
|
|
assert!(
|
|
found_ids.contains("semantic_relation"),
|
|
"Should find semantic relation"
|
|
);
|
|
assert!(
|
|
found_ids.contains("temporal_relation"),
|
|
"Should find temporal relation"
|
|
);
|
|
assert!(
|
|
found_ids.contains("causal_relation"),
|
|
"Should find causal relation"
|
|
);
|
|
assert!(
|
|
found_ids.contains("spatial_relation"),
|
|
"Should find spatial relation"
|
|
);
|
|
|
|
// Verify link types are preserved
|
|
for result in &results {
|
|
match result.memory_id.as_str() {
|
|
"semantic_relation" => assert_eq!(result.link_type, LinkType::Semantic),
|
|
"temporal_relation" => assert_eq!(result.link_type, LinkType::Temporal),
|
|
"causal_relation" => assert_eq!(result.link_type, LinkType::Causal),
|
|
"spatial_relation" => assert_eq!(result.link_type, LinkType::Spatial),
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// RETROACTIVE IMPORTANCE TESTS (5 tests)
|
|
// ============================================================================
|
|
|
|
/// Test that retroactive importance beats timestamp-only importance.
|
|
///
|
|
/// Scenario: Memory encoded at time T becomes important due to event at T+N.
|
|
/// Traditional systems would miss this; Vestige's STC captures it.
|
|
#[test]
|
|
fn test_retroactive_vs_timestamp_importance() {
|
|
let config = SynapticTaggingConfig {
|
|
capture_window: CaptureWindow::new(9.0, 2.0), // 9 hours back, 2 hours forward
|
|
prp_threshold: 0.7,
|
|
tag_lifetime_hours: 12.0,
|
|
min_tag_strength: 0.3,
|
|
max_cluster_size: 50,
|
|
enable_clustering: true,
|
|
auto_decay: true,
|
|
cleanup_interval_hours: 1.0,
|
|
};
|
|
|
|
let mut stc = SynapticTaggingSystem::with_config(config);
|
|
|
|
// Tag memories as they are encoded (simulating normal operation)
|
|
stc.tag_memory("ordinary_memory_1");
|
|
stc.tag_memory("ordinary_memory_2");
|
|
stc.tag_memory("ordinary_memory_3");
|
|
|
|
// Simulate importance event happening LATER (or at same time in test)
|
|
let event = ImportanceEvent::user_flag("important_trigger", Some("Remember this!"));
|
|
let result = stc.trigger_prp(event);
|
|
|
|
// STC should capture the earlier memories
|
|
assert!(
|
|
result.has_captures(),
|
|
"Retroactive importance SHOULD capture earlier memories"
|
|
);
|
|
assert!(
|
|
result.captured_count() >= 3,
|
|
"Should capture all tagged memories: captured {}",
|
|
result.captured_count()
|
|
);
|
|
|
|
// Verify captured memories were encoded BEFORE OR AT the importance event time
|
|
// (In tests, tag_memory() uses Utc::now(), so temporal_distance ~= 0)
|
|
for captured in &result.captured_memories {
|
|
assert!(
|
|
captured.temporal_distance_hours >= 0.0
|
|
|| captured.temporal_distance_hours.abs() < 0.01,
|
|
"Captured memory {} should be encoded at or before event (distance: {:.4}h)",
|
|
captured.memory_id,
|
|
captured.temporal_distance_hours
|
|
);
|
|
}
|
|
|
|
// Traditional timestamp-based importance would miss these
|
|
// (memories were "ordinary" at encoding time)
|
|
}
|
|
|
|
/// Test that STC captures memories related to importance events.
|
|
#[test]
|
|
fn test_retroactive_captures_related_memories() {
|
|
let mut stc = SynapticTaggingSystem::new();
|
|
|
|
// Encode several memories
|
|
stc.tag_memory("context_memory_1");
|
|
stc.tag_memory("context_memory_2");
|
|
stc.tag_memory("context_memory_3");
|
|
|
|
// Trigger with emotional content (high importance)
|
|
let event = ImportanceEvent::emotional("trigger_memory", 0.95);
|
|
let result = stc.trigger_prp(event);
|
|
|
|
// Should create a cluster of related memories
|
|
assert!(result.cluster.is_some(), "Should create importance cluster");
|
|
|
|
let cluster = result.cluster.unwrap();
|
|
assert!(
|
|
cluster.size() >= 3,
|
|
"Cluster should contain the context memories: size = {}",
|
|
cluster.size()
|
|
);
|
|
|
|
// Verify cluster properties
|
|
assert!(
|
|
cluster.average_importance > 0.0,
|
|
"Cluster should have positive importance"
|
|
);
|
|
assert_eq!(
|
|
cluster.trigger_event_type,
|
|
ImportanceEventType::EmotionalContent
|
|
);
|
|
}
|
|
|
|
/// Test that the capture window (9 hours back) works correctly.
|
|
#[test]
|
|
fn test_retroactive_window_effectiveness() {
|
|
let window = CaptureWindow::new(9.0, 2.0);
|
|
let event_time = Utc::now();
|
|
|
|
// Test memories at various distances from event
|
|
let test_cases = vec![
|
|
(Duration::hours(1), true, "1 hour before"),
|
|
(Duration::hours(4), true, "4 hours before"),
|
|
(Duration::hours(8), true, "8 hours before"),
|
|
(
|
|
Duration::hours(10),
|
|
false,
|
|
"10 hours before (outside window)",
|
|
),
|
|
(Duration::minutes(-30), true, "30 minutes after"),
|
|
(Duration::hours(-3), false, "3 hours after (outside window)"),
|
|
];
|
|
|
|
for (offset, should_capture, description) in test_cases {
|
|
let memory_time = event_time - offset;
|
|
let in_window = window.is_in_window(memory_time, event_time);
|
|
|
|
assert_eq!(
|
|
in_window, should_capture,
|
|
"{}: in_window={}, expected={}",
|
|
description, in_window, should_capture
|
|
);
|
|
|
|
if should_capture {
|
|
let prob = window.capture_probability(memory_time, event_time);
|
|
assert!(
|
|
prob.is_some(),
|
|
"{} should have capture probability",
|
|
description
|
|
);
|
|
assert!(
|
|
prob.unwrap() > 0.0,
|
|
"{} should have positive capture probability",
|
|
description
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Test that semantic filtering affects capture probability.
|
|
#[test]
|
|
fn test_retroactive_semantic_filtering() {
|
|
let config = SynapticTaggingConfig {
|
|
capture_window: CaptureWindow::new(9.0, 2.0),
|
|
prp_threshold: 0.7,
|
|
tag_lifetime_hours: 12.0,
|
|
min_tag_strength: 0.1, // Low threshold to test strength effects
|
|
max_cluster_size: 100,
|
|
enable_clustering: true,
|
|
auto_decay: true,
|
|
cleanup_interval_hours: 1.0,
|
|
};
|
|
|
|
let mut stc = SynapticTaggingSystem::with_config(config);
|
|
|
|
// Tag memories with different initial strengths
|
|
// (simulating semantic relevance)
|
|
stc.tag_memory_with_strength("highly_relevant", 0.95);
|
|
stc.tag_memory_with_strength("moderately_relevant", 0.6);
|
|
stc.tag_memory_with_strength("barely_relevant", 0.35);
|
|
stc.tag_memory_with_strength("irrelevant", 0.05); // Below threshold
|
|
|
|
// Trigger importance event
|
|
let event = ImportanceEvent::user_flag("trigger", None);
|
|
let result = stc.trigger_prp(event);
|
|
|
|
// Higher strength memories should be captured with higher consolidated importance
|
|
let captured_ids: HashSet<_> = result
|
|
.captured_memories
|
|
.iter()
|
|
.map(|c| c.memory_id.as_str())
|
|
.collect();
|
|
|
|
assert!(
|
|
captured_ids.contains("highly_relevant"),
|
|
"Highly relevant memory should be captured"
|
|
);
|
|
assert!(
|
|
captured_ids.contains("moderately_relevant"),
|
|
"Moderately relevant memory should be captured"
|
|
);
|
|
|
|
// Find consolidated importance values
|
|
let highly_relevant_importance = result
|
|
.captured_memories
|
|
.iter()
|
|
.find(|c| c.memory_id == "highly_relevant")
|
|
.map(|c| c.consolidated_importance)
|
|
.unwrap_or(0.0);
|
|
|
|
let moderately_relevant_importance = result
|
|
.captured_memories
|
|
.iter()
|
|
.find(|c| c.memory_id == "moderately_relevant")
|
|
.map(|c| c.consolidated_importance)
|
|
.unwrap_or(0.0);
|
|
|
|
assert!(
|
|
highly_relevant_importance >= moderately_relevant_importance,
|
|
"Highly relevant should have >= importance: {:.2} vs {:.2}",
|
|
highly_relevant_importance,
|
|
moderately_relevant_importance
|
|
);
|
|
}
|
|
|
|
/// Test that retroactive importance is unique to Vestige.
|
|
///
|
|
/// This test demonstrates a capability that no other memory system has:
|
|
/// making a previously ordinary memory important based on future events.
|
|
#[test]
|
|
fn test_proof_unique_to_vestige() {
|
|
// Scenario: AI assistant conversation
|
|
// 1. User mentions "Bob is taking a vacation next week"
|
|
// 2. Hours later, user says "Bob is leaving the company"
|
|
// 3. The vacation memory becomes retroactively important as context!
|
|
|
|
let mut stc = SynapticTaggingSystem::new();
|
|
|
|
// Memory 1: Ordinary conversation about vacation (time T)
|
|
let _vacation_memory =
|
|
stc.tag_memory_with_context("vacation_mention", "User mentioned Bob's vacation plans");
|
|
|
|
// Memory 2: Some other ordinary memories
|
|
stc.tag_memory("unrelated_memory_1");
|
|
stc.tag_memory("unrelated_memory_2");
|
|
|
|
// Hours later (time T + N hours): Important revelation
|
|
// "Bob is leaving the company" - triggers importance
|
|
let event = ImportanceEvent {
|
|
event_type: ImportanceEventType::UserFlag,
|
|
memory_id: Some("departure_announcement".to_string()),
|
|
timestamp: Utc::now(),
|
|
strength: 1.0, // Maximum importance
|
|
context: Some("Bob is leaving - this makes prior context important".to_string()),
|
|
};
|
|
|
|
let result = stc.trigger_prp(event);
|
|
|
|
// The vacation memory should be captured!
|
|
let vacation_captured = result
|
|
.captured_memories
|
|
.iter()
|
|
.any(|c| c.memory_id == "vacation_mention");
|
|
|
|
assert!(
|
|
vacation_captured,
|
|
"UNIQUE TO VESTIGE: The vacation memory is now important because of the departure news!"
|
|
);
|
|
|
|
// Verify the capture details
|
|
let vacation_capture = result
|
|
.captured_memories
|
|
.iter()
|
|
.find(|c| c.memory_id == "vacation_mention")
|
|
.unwrap();
|
|
|
|
// In test context, memories are tagged at ~same time as event,
|
|
// so temporal_distance is ~0 (but conceptually it's a "backward" capture
|
|
// since the memory existed BEFORE it became important)
|
|
assert!(
|
|
vacation_capture.temporal_distance_hours >= 0.0
|
|
|| vacation_capture.temporal_distance_hours.abs() < 0.01,
|
|
"Memory should be encoded at or before the importance event (distance: {:.4}h)",
|
|
vacation_capture.temporal_distance_hours
|
|
);
|
|
|
|
assert!(
|
|
vacation_capture.consolidated_importance > 0.5,
|
|
"Vacation memory should have high consolidated importance: {:.2}",
|
|
vacation_capture.consolidated_importance
|
|
);
|
|
|
|
// This is impossible in traditional systems:
|
|
// - Traditional: Importance = f(content at encoding time)
|
|
// - Vestige: Importance = f(content, future events, associations)
|
|
//
|
|
// Key insight: The memory was ORDINARY when encoded, but became IMPORTANT
|
|
// due to a subsequent event. No other AI memory system can do this!
|
|
}
|
|
|
|
// ============================================================================
|
|
// HIPPOCAMPAL INDEXING TESTS (4 tests)
|
|
// ============================================================================
|
|
|
|
/// Test that two-phase retrieval is faster than flat search.
|
|
#[test]
|
|
fn test_two_phase_vs_flat_search() {
|
|
let index = HippocampalIndex::new();
|
|
let now = Utc::now();
|
|
|
|
// Create test data with embeddings
|
|
const NUM_MEMORIES: usize = 100;
|
|
|
|
for i in 0..NUM_MEMORIES {
|
|
let embedding: Vec<f32> = (0..384)
|
|
.map(|j| ((i * 17 + j) as f32 / 1000.0).sin())
|
|
.collect();
|
|
|
|
let _ = index.index_memory(
|
|
&format!("memory_{}", i),
|
|
&format!("Content for memory {} with some text", i),
|
|
"fact",
|
|
now,
|
|
Some(embedding),
|
|
);
|
|
}
|
|
|
|
// Phase 1: Fast index search (compressed embeddings)
|
|
let query = IndexQuery::from_text("memory").with_limit(10);
|
|
|
|
let start = std::time::Instant::now();
|
|
let results = index.search_indices(&query).unwrap();
|
|
let index_search_time = start.elapsed();
|
|
|
|
// Should complete quickly
|
|
assert!(
|
|
index_search_time.as_millis() < 50,
|
|
"Index search should be fast: {:?}",
|
|
index_search_time
|
|
);
|
|
|
|
// Should find results
|
|
assert!(!results.is_empty(), "Should find matching memories");
|
|
|
|
// The index search uses compressed embeddings (128 dim vs 384)
|
|
// which is fundamentally faster for large-scale search
|
|
let stats = index.stats();
|
|
assert_eq!(
|
|
stats.index_dimensions, INDEX_EMBEDDING_DIM,
|
|
"Index should use compressed embeddings ({}D)",
|
|
INDEX_EMBEDDING_DIM
|
|
);
|
|
}
|
|
|
|
/// Test that index embeddings are smaller than full embeddings.
|
|
#[test]
|
|
fn test_index_compression_ratio() {
|
|
let config = HippocampalIndexConfig::default();
|
|
|
|
// Full embedding size (e.g., BGE-base-en-v1.5 = 768 or 384)
|
|
let full_embedding_dim = 384;
|
|
|
|
// Index embedding size
|
|
let index_embedding_dim = config.summary_dimensions; // 128 by default
|
|
|
|
// Compression ratio
|
|
let compression_ratio = full_embedding_dim as f64 / index_embedding_dim as f64;
|
|
|
|
assert!(
|
|
compression_ratio >= 2.0,
|
|
"Index should compress embeddings by at least 2x: {:.1}x",
|
|
compression_ratio
|
|
);
|
|
|
|
// Default should be 3x compression (384 -> 128)
|
|
assert_eq!(
|
|
index_embedding_dim, INDEX_EMBEDDING_DIM,
|
|
"Default index dimension should be {}",
|
|
INDEX_EMBEDDING_DIM
|
|
);
|
|
|
|
// Memory savings per memory
|
|
let full_size_bytes = full_embedding_dim * 4; // f32 = 4 bytes
|
|
let index_size_bytes = index_embedding_dim * 4;
|
|
let savings_per_memory = full_size_bytes - index_size_bytes;
|
|
|
|
assert!(
|
|
savings_per_memory > 0,
|
|
"Should save {} bytes per memory",
|
|
savings_per_memory
|
|
);
|
|
}
|
|
|
|
/// Test that barcodes are unique and orthogonal.
|
|
#[test]
|
|
fn test_barcode_orthogonality() {
|
|
let mut generator = BarcodeGenerator::new();
|
|
let now = Utc::now();
|
|
|
|
// Generate many barcodes
|
|
let mut barcodes: Vec<MemoryBarcode> = Vec::new();
|
|
let mut barcode_strings: HashSet<String> = HashSet::new();
|
|
|
|
for i in 0..1000 {
|
|
let content = format!("Content {}", i);
|
|
let timestamp = now + Duration::milliseconds(i);
|
|
let barcode = generator.generate(&content, timestamp);
|
|
|
|
// Check uniqueness
|
|
let barcode_str = barcode.to_compact_string();
|
|
assert!(
|
|
!barcode_strings.contains(&barcode_str),
|
|
"Barcode {} should be unique",
|
|
barcode_str
|
|
);
|
|
barcode_strings.insert(barcode_str);
|
|
|
|
barcodes.push(barcode);
|
|
}
|
|
|
|
// Verify all IDs are sequential and unique
|
|
for i in 0..barcodes.len() - 1 {
|
|
assert_eq!(
|
|
barcodes[i + 1].id,
|
|
barcodes[i].id + 1,
|
|
"IDs should be sequential"
|
|
);
|
|
}
|
|
|
|
// Verify content fingerprints differ for different content
|
|
let fingerprints: HashSet<u32> = barcodes.iter().map(|b| b.content_fingerprint).collect();
|
|
assert_eq!(
|
|
fingerprints.len(),
|
|
barcodes.len(),
|
|
"Content fingerprints should be unique for different content"
|
|
);
|
|
|
|
// Test same_content detection
|
|
let barcode1 = generator.generate_with_id(9999, "same content", now);
|
|
let barcode2 = generator.generate_with_id(9998, "same content", now + Duration::hours(1));
|
|
|
|
assert!(
|
|
barcode1.same_content(&barcode2),
|
|
"Same content should produce same fingerprint"
|
|
);
|
|
assert_ne!(
|
|
barcode1.id, barcode2.id,
|
|
"But IDs should still be different"
|
|
);
|
|
}
|
|
|
|
/// Test that content pointers correctly locate data.
|
|
#[test]
|
|
fn test_content_pointer_accuracy() {
|
|
// Test SQLite pointer
|
|
let sqlite_ptr = ContentPointer::sqlite("knowledge_nodes", 42, ContentType::Text);
|
|
assert!(!sqlite_ptr.is_inline());
|
|
assert!(matches!(sqlite_ptr.content_type, ContentType::Text));
|
|
|
|
// Test inline pointer
|
|
let data = vec![1u8, 2, 3, 4, 5];
|
|
let inline_ptr = ContentPointer::inline(data.clone(), ContentType::Binary);
|
|
assert!(inline_ptr.is_inline());
|
|
assert_eq!(inline_ptr.size_bytes, Some(5));
|
|
|
|
// Test vector store pointer
|
|
let vector_ptr = ContentPointer::vector_store("embeddings", 123);
|
|
assert!(!vector_ptr.is_inline());
|
|
assert!(matches!(vector_ptr.content_type, ContentType::Embedding));
|
|
|
|
// Test with chunk range
|
|
let chunked_ptr = ContentPointer::sqlite("chunks", 99, ContentType::Text)
|
|
.with_chunk_range(100, 200)
|
|
.with_size(100);
|
|
|
|
assert_eq!(chunked_ptr.chunk_range, Some((100, 200)));
|
|
assert_eq!(chunked_ptr.size_bytes, Some(100));
|
|
|
|
// Test with hash
|
|
let hashed_ptr = ContentPointer::sqlite("data", 1, ContentType::Text).with_hash(0xDEADBEEF);
|
|
|
|
assert_eq!(hashed_ptr.content_hash, Some(0xDEADBEEF));
|
|
|
|
// Create full memory index and verify pointers work
|
|
let index = HippocampalIndex::new();
|
|
let now = Utc::now();
|
|
|
|
let barcode = index
|
|
.index_memory(
|
|
"test_memory",
|
|
"Test content for pointer verification",
|
|
"fact",
|
|
now,
|
|
None,
|
|
)
|
|
.unwrap();
|
|
|
|
// Retrieve and verify
|
|
let retrieved = index.get_index("test_memory").unwrap().unwrap();
|
|
|
|
assert_eq!(retrieved.barcode, barcode);
|
|
assert!(
|
|
!retrieved.content_pointers.is_empty(),
|
|
"Should have content pointer"
|
|
);
|
|
|
|
// Verify the default pointer is SQLite
|
|
let default_ptr = &retrieved.content_pointers[0];
|
|
assert!(matches!(default_ptr.content_type, ContentType::Text));
|
|
}
|