feat: P0/P1 fixes — backup, export, gc, performance, auto-consolidation, encryption

P0 fixes:
- Add `vestige backup <path>` — full DB copy with WAL checkpoint flush
- Add `vestige export --format json|jsonl [--tags] [--since] <path>` —
  paginated memory export with tag/date filtering
- Add `vestige gc --min-retention 0.1 [--max-age-days] [--dry-run] [--yes]`
  — bulk cleanup of stale memories with safety prompts
- Fix apply_decay() scaling: batched pagination (500 rows/batch) with
  explicit transactions instead of loading all nodes into memory
- Fix hidden MCP resources: memory://insights and memory://consolidation-log
  now listed in resources/list (were implemented but undiscoverable)

P1 fixes:
- Add auto-consolidation on server startup: FSRS-6 decay runs in background
  after 2s delay, only if last consolidation was >6 hours ago
- Add encryption at rest via SQLCipher feature flag: use --features encryption
  with VESTIGE_ENCRYPTION_KEY env var (bundled-sqlite and encryption are
  mutually exclusive)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sam Valladares 2026-02-12 03:02:32 -06:00
parent a680fa7d2f
commit 6a5c3771fb
7 changed files with 565 additions and 50 deletions

View file

@ -11,7 +11,15 @@ keywords = ["memory", "spaced-repetition", "fsrs", "embeddings", "knowledge-grap
categories = ["science", "database"]
[features]
default = ["embeddings", "vector-search"]
default = ["embeddings", "vector-search", "bundled-sqlite"]
# SQLite backend (default, unencrypted)
bundled-sqlite = ["rusqlite/bundled"]
# Encrypted SQLite via SQLCipher (mutually exclusive with bundled-sqlite)
# Use: --no-default-features --features encryption,embeddings,vector-search
# Set VESTIGE_ENCRYPTION_KEY env var to enable encryption
encryption = ["rusqlite/bundled-sqlcipher"]
# Core embeddings with fastembed (ONNX-based, local inference)
embeddings = ["dep:fastembed"]
@ -40,7 +48,8 @@ uuid = { version = "1", features = ["v4", "serde"] }
thiserror = "2"
# Database - SQLite with FTS5 full-text search and JSON
rusqlite = { version = "0.38", features = ["bundled", "chrono", "serde_json"] }
# Note: "bundled" or "bundled-sqlcipher" added via feature flags above
rusqlite = { version = "0.38", features = ["chrono", "serde_json"] }
# Platform-specific directories
directories = "6"

View file

@ -105,6 +105,16 @@ impl Storage {
let conn = Connection::open(&path)?;
// Apply encryption key if SQLCipher is enabled and key is provided
#[cfg(feature = "encryption")]
{
if let Ok(key) = std::env::var("VESTIGE_ENCRYPTION_KEY") {
if !key.is_empty() {
conn.pragma_update(None, "key", &key)?;
}
}
}
// Configure SQLite for performance
conn.execute_batch(
"PRAGMA journal_mode = WAL;
@ -1431,61 +1441,81 @@ impl Storage {
Ok(result)
}
/// Apply decay to all memories
/// Apply decay to all memories using batched pagination to avoid OOM.
///
/// Instead of loading all knowledge_nodes into memory at once, this
/// processes rows in fixed-size batches (BATCH_SIZE = 500) using
/// LIMIT/OFFSET pagination. Each batch runs inside its own transaction
/// for atomicity without holding a giant write-lock.
pub fn apply_decay(&mut self) -> Result<i32> {
const FSRS_DECAY: f64 = 0.5;
const FSRS_FACTOR: f64 = 9.0;
const BATCH_SIZE: i64 = 500;
let now = Utc::now();
let mut count = 0i32;
let mut offset = 0i64;
let mut stmt = self.conn.prepare(
"SELECT id, last_accessed, storage_strength, retrieval_strength,
sentiment_magnitude, stability
FROM knowledge_nodes",
)?;
loop {
let batch: Vec<(String, String, f64, f64, f64, f64)> = self
.conn
.prepare(
"SELECT id, last_accessed, storage_strength, retrieval_strength,
sentiment_magnitude, stability
FROM knowledge_nodes
ORDER BY id
LIMIT ?1 OFFSET ?2",
)?
.query_map(params![BATCH_SIZE, offset], |row| {
Ok((
row.get(0)?,
row.get(1)?,
row.get(2)?,
row.get(3)?,
row.get(4)?,
row.get(5)?,
))
})?
.filter_map(|r| r.ok())
.collect();
let nodes: Vec<(String, String, f64, f64, f64, f64)> = stmt
.query_map([], |row| {
Ok((
row.get(0)?,
row.get(1)?,
row.get(2)?,
row.get(3)?,
row.get(4)?,
row.get(5)?,
))
})?
.filter_map(|r| r.ok())
.collect();
let mut count = 0;
for (id, last_accessed, storage_strength, _, sentiment_mag, stability) in nodes {
let last = DateTime::parse_from_rfc3339(&last_accessed)
.map(|dt| dt.with_timezone(&Utc))
.unwrap_or(now);
let days_since = (now - last).num_seconds() as f64 / 86400.0;
if days_since > 0.0 {
let effective_stability = stability * (1.0 + sentiment_mag * 0.5);
let new_retrieval = (1.0 + days_since / (FSRS_FACTOR * effective_stability))
.powf(-1.0 / FSRS_DECAY);
let new_retention =
(new_retrieval * 0.7) + ((storage_strength / 10.0).min(1.0) * 0.3);
self.conn.execute(
"UPDATE knowledge_nodes SET
retrieval_strength = ?1,
retention_strength = ?2
WHERE id = ?3",
params![new_retrieval, new_retention, id],
)?;
count += 1;
if batch.is_empty() {
break;
}
let batch_len = batch.len() as i64;
// Use a transaction for the batch
let tx = self.conn.transaction()?;
for (id, last_accessed, storage_strength, _, sentiment_mag, stability) in &batch {
let last = DateTime::parse_from_rfc3339(last_accessed)
.map(|dt| dt.with_timezone(&Utc))
.unwrap_or(now);
let days_since = (now - last).num_seconds() as f64 / 86400.0;
if days_since > 0.0 {
let effective_stability = stability * (1.0 + sentiment_mag * 0.5);
let new_retrieval =
(1.0 + days_since / (FSRS_FACTOR * effective_stability))
.powf(-1.0 / FSRS_DECAY);
let new_retention =
(new_retrieval * 0.7) + ((storage_strength / 10.0).min(1.0) * 0.3);
tx.execute(
"UPDATE knowledge_nodes SET retrieval_strength = ?1, retention_strength = ?2 WHERE id = ?3",
params![new_retrieval, new_retention, id],
)?;
count += 1;
}
}
tx.commit()?;
offset += batch_len;
}
Ok(count)