feat: P0/P1 fixes — backup, export, gc, performance, auto-consolidation, encryption

P0 fixes: - Add `vestige backup <path>` — full DB copy with WAL checkpoint flush - Add `vestige export --format json|jsonl [--tags] [--since] <path>` — paginated memory export with tag/date filtering - Add `vestige gc --min-retention 0.1 [--max-age-days] [--dry-run] [--yes]` — bulk cleanup of stale memories with safety prompts - Fix apply_decay() scaling: batched pagination (500 rows/batch) with explicit transactions instead of loading all nodes into memory - Fix hidden MCP resources: memory://insights and memory://consolidation-log now listed in resources/list (were implemented but undiscoverable) P1 fixes: - Add auto-consolidation on server startup: FSRS-6 decay runs in background after 2s delay, only if last consolidation was >6 hours ago - Add encryption at rest via SQLCipher feature flag: use --features encryption with VESTIGE_ENCRYPTION_KEY env var (bundled-sqlite and encryption are mutually exclusive) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-09 07:42:37 +02:00 · 2026-02-12 03:02:32 -06:00 · 2026-02-12 03:02:32 -06:00 · 6a5c3771fb
commit 6a5c3771fb
parent a680fa7d2f
7 changed files with 565 additions and 50 deletions
--- a/crates/vestige-core/Cargo.toml
+++ b/crates/vestige-core/Cargo.toml
@ -11,7 +11,15 @@ keywords = ["memory", "spaced-repetition", "fsrs", "embeddings", "knowledge-grap
 categories = ["science", "database"]

 [features]
-default = ["embeddings", "vector-search"]
+default = ["embeddings", "vector-search", "bundled-sqlite"]
+
+# SQLite backend (default, unencrypted)
+bundled-sqlite = ["rusqlite/bundled"]
+
+# Encrypted SQLite via SQLCipher (mutually exclusive with bundled-sqlite)
+# Use: --no-default-features --features encryption,embeddings,vector-search
+# Set VESTIGE_ENCRYPTION_KEY env var to enable encryption
+encryption = ["rusqlite/bundled-sqlcipher"]

 # Core embeddings with fastembed (ONNX-based, local inference)
 embeddings = ["dep:fastembed"]
@ -40,7 +48,8 @@ uuid = { version = "1", features = ["v4", "serde"] }
 thiserror = "2"

 # Database - SQLite with FTS5 full-text search and JSON
-rusqlite = { version = "0.38", features = ["bundled", "chrono", "serde_json"] }
+# Note: "bundled" or "bundled-sqlcipher" added via feature flags above
+rusqlite = { version = "0.38", features = ["chrono", "serde_json"] }

 # Platform-specific directories
 directories = "6"
--- a/crates/vestige-core/src/storage/sqlite.rs
+++ b/crates/vestige-core/src/storage/sqlite.rs
@ -105,6 +105,16 @@ impl Storage {

        let conn = Connection::open(&path)?;

+        // Apply encryption key if SQLCipher is enabled and key is provided
+        #[cfg(feature = "encryption")]
+        {
+            if let Ok(key) = std::env::var("VESTIGE_ENCRYPTION_KEY") {
+                if !key.is_empty() {
+                    conn.pragma_update(None, "key", &key)?;
+                }
+            }
+        }
+
        // Configure SQLite for performance
        conn.execute_batch(
            "PRAGMA journal_mode = WAL;
@ -1431,61 +1441,81 @@ impl Storage {
        Ok(result)
    }

-    /// Apply decay to all memories
+    /// Apply decay to all memories using batched pagination to avoid OOM.
+    ///
+    /// Instead of loading all knowledge_nodes into memory at once, this
+    /// processes rows in fixed-size batches (BATCH_SIZE = 500) using
+    /// LIMIT/OFFSET pagination. Each batch runs inside its own transaction
+    /// for atomicity without holding a giant write-lock.
    pub fn apply_decay(&mut self) -> Result<i32> {
        const FSRS_DECAY: f64 = 0.5;
        const FSRS_FACTOR: f64 = 9.0;
+        const BATCH_SIZE: i64 = 500;

        let now = Utc::now();
+        let mut count = 0i32;
+        let mut offset = 0i64;

-        let mut stmt = self.conn.prepare(
-            "SELECT id, last_accessed, storage_strength, retrieval_strength,
-                    sentiment_magnitude, stability
-             FROM knowledge_nodes",
-        )?;
+        loop {
+            let batch: Vec<(String, String, f64, f64, f64, f64)> = self
+                .conn
+                .prepare(
+                    "SELECT id, last_accessed, storage_strength, retrieval_strength,
+                            sentiment_magnitude, stability
+                     FROM knowledge_nodes
+                     ORDER BY id
+                     LIMIT ?1 OFFSET ?2",
+                )?
+                .query_map(params![BATCH_SIZE, offset], |row| {
+                    Ok((
+                        row.get(0)?,
+                        row.get(1)?,
+                        row.get(2)?,
+                        row.get(3)?,
+                        row.get(4)?,
+                        row.get(5)?,
+                    ))
+                })?
+                .filter_map(|r| r.ok())
+                .collect();

-        let nodes: Vec<(String, String, f64, f64, f64, f64)> = stmt
-            .query_map([], |row| {
-                Ok((
-                    row.get(0)?,
-                    row.get(1)?,
-                    row.get(2)?,
-                    row.get(3)?,
-                    row.get(4)?,
-                    row.get(5)?,
-                ))
-            })?
-            .filter_map(|r| r.ok())
-            .collect();
-
-        let mut count = 0;
-
-        for (id, last_accessed, storage_strength, _, sentiment_mag, stability) in nodes {
-            let last = DateTime::parse_from_rfc3339(&last_accessed)
-                .map(|dt| dt.with_timezone(&Utc))
-                .unwrap_or(now);
-
-            let days_since = (now - last).num_seconds() as f64 / 86400.0;
-
-            if days_since > 0.0 {
-                let effective_stability = stability * (1.0 + sentiment_mag * 0.5);
-
-                let new_retrieval = (1.0 + days_since / (FSRS_FACTOR * effective_stability))
-                    .powf(-1.0 / FSRS_DECAY);
-
-                let new_retention =
-                    (new_retrieval * 0.7) + ((storage_strength / 10.0).min(1.0) * 0.3);
-
-                self.conn.execute(
-                    "UPDATE knowledge_nodes SET
-                        retrieval_strength = ?1,
-                        retention_strength = ?2
-                     WHERE id = ?3",
-                    params![new_retrieval, new_retention, id],
-                )?;
-
-                count += 1;
+            if batch.is_empty() {
+                break;
            }
+
+            let batch_len = batch.len() as i64;
+
+            // Use a transaction for the batch
+            let tx = self.conn.transaction()?;
+
+            for (id, last_accessed, storage_strength, _, sentiment_mag, stability) in &batch {
+                let last = DateTime::parse_from_rfc3339(last_accessed)
+                    .map(|dt| dt.with_timezone(&Utc))
+                    .unwrap_or(now);
+
+                let days_since = (now - last).num_seconds() as f64 / 86400.0;
+
+                if days_since > 0.0 {
+                    let effective_stability = stability * (1.0 + sentiment_mag * 0.5);
+
+                    let new_retrieval =
+                        (1.0 + days_since / (FSRS_FACTOR * effective_stability))
+                            .powf(-1.0 / FSRS_DECAY);
+
+                    let new_retention =
+                        (new_retrieval * 0.7) + ((storage_strength / 10.0).min(1.0) * 0.3);
+
+                    tx.execute(
+                        "UPDATE knowledge_nodes SET retrieval_strength = ?1, retention_strength = ?2 WHERE id = ?3",
+                        params![new_retrieval, new_retention, id],
+                    )?;
+
+                    count += 1;
+                }
+            }
+
+            tx.commit()?;
+            offset += batch_len;
        }

        Ok(count)