mirror of
https://github.com/samvallad33/vestige.git
synced 2026-05-09 07:42:37 +02:00
feat: P0/P1 fixes — backup, export, gc, performance, auto-consolidation, encryption
P0 fixes: - Add `vestige backup <path>` — full DB copy with WAL checkpoint flush - Add `vestige export --format json|jsonl [--tags] [--since] <path>` — paginated memory export with tag/date filtering - Add `vestige gc --min-retention 0.1 [--max-age-days] [--dry-run] [--yes]` — bulk cleanup of stale memories with safety prompts - Fix apply_decay() scaling: batched pagination (500 rows/batch) with explicit transactions instead of loading all nodes into memory - Fix hidden MCP resources: memory://insights and memory://consolidation-log now listed in resources/list (were implemented but undiscoverable) P1 fixes: - Add auto-consolidation on server startup: FSRS-6 decay runs in background after 2s delay, only if last consolidation was >6 hours ago - Add encryption at rest via SQLCipher feature flag: use --features encryption with VESTIGE_ENCRYPTION_KEY env var (bundled-sqlite and encryption are mutually exclusive) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a680fa7d2f
commit
6a5c3771fb
7 changed files with 565 additions and 50 deletions
|
|
@ -11,7 +11,15 @@ keywords = ["memory", "spaced-repetition", "fsrs", "embeddings", "knowledge-grap
|
|||
categories = ["science", "database"]
|
||||
|
||||
[features]
|
||||
default = ["embeddings", "vector-search"]
|
||||
default = ["embeddings", "vector-search", "bundled-sqlite"]
|
||||
|
||||
# SQLite backend (default, unencrypted)
|
||||
bundled-sqlite = ["rusqlite/bundled"]
|
||||
|
||||
# Encrypted SQLite via SQLCipher (mutually exclusive with bundled-sqlite)
|
||||
# Use: --no-default-features --features encryption,embeddings,vector-search
|
||||
# Set VESTIGE_ENCRYPTION_KEY env var to enable encryption
|
||||
encryption = ["rusqlite/bundled-sqlcipher"]
|
||||
|
||||
# Core embeddings with fastembed (ONNX-based, local inference)
|
||||
embeddings = ["dep:fastembed"]
|
||||
|
|
@ -40,7 +48,8 @@ uuid = { version = "1", features = ["v4", "serde"] }
|
|||
thiserror = "2"
|
||||
|
||||
# Database - SQLite with FTS5 full-text search and JSON
|
||||
rusqlite = { version = "0.38", features = ["bundled", "chrono", "serde_json"] }
|
||||
# Note: "bundled" or "bundled-sqlcipher" added via feature flags above
|
||||
rusqlite = { version = "0.38", features = ["chrono", "serde_json"] }
|
||||
|
||||
# Platform-specific directories
|
||||
directories = "6"
|
||||
|
|
|
|||
|
|
@ -105,6 +105,16 @@ impl Storage {
|
|||
|
||||
let conn = Connection::open(&path)?;
|
||||
|
||||
// Apply encryption key if SQLCipher is enabled and key is provided
|
||||
#[cfg(feature = "encryption")]
|
||||
{
|
||||
if let Ok(key) = std::env::var("VESTIGE_ENCRYPTION_KEY") {
|
||||
if !key.is_empty() {
|
||||
conn.pragma_update(None, "key", &key)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Configure SQLite for performance
|
||||
conn.execute_batch(
|
||||
"PRAGMA journal_mode = WAL;
|
||||
|
|
@ -1431,61 +1441,81 @@ impl Storage {
|
|||
Ok(result)
|
||||
}
|
||||
|
||||
/// Apply decay to all memories
|
||||
/// Apply decay to all memories using batched pagination to avoid OOM.
|
||||
///
|
||||
/// Instead of loading all knowledge_nodes into memory at once, this
|
||||
/// processes rows in fixed-size batches (BATCH_SIZE = 500) using
|
||||
/// LIMIT/OFFSET pagination. Each batch runs inside its own transaction
|
||||
/// for atomicity without holding a giant write-lock.
|
||||
pub fn apply_decay(&mut self) -> Result<i32> {
|
||||
const FSRS_DECAY: f64 = 0.5;
|
||||
const FSRS_FACTOR: f64 = 9.0;
|
||||
const BATCH_SIZE: i64 = 500;
|
||||
|
||||
let now = Utc::now();
|
||||
let mut count = 0i32;
|
||||
let mut offset = 0i64;
|
||||
|
||||
let mut stmt = self.conn.prepare(
|
||||
"SELECT id, last_accessed, storage_strength, retrieval_strength,
|
||||
sentiment_magnitude, stability
|
||||
FROM knowledge_nodes",
|
||||
)?;
|
||||
loop {
|
||||
let batch: Vec<(String, String, f64, f64, f64, f64)> = self
|
||||
.conn
|
||||
.prepare(
|
||||
"SELECT id, last_accessed, storage_strength, retrieval_strength,
|
||||
sentiment_magnitude, stability
|
||||
FROM knowledge_nodes
|
||||
ORDER BY id
|
||||
LIMIT ?1 OFFSET ?2",
|
||||
)?
|
||||
.query_map(params![BATCH_SIZE, offset], |row| {
|
||||
Ok((
|
||||
row.get(0)?,
|
||||
row.get(1)?,
|
||||
row.get(2)?,
|
||||
row.get(3)?,
|
||||
row.get(4)?,
|
||||
row.get(5)?,
|
||||
))
|
||||
})?
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
|
||||
let nodes: Vec<(String, String, f64, f64, f64, f64)> = stmt
|
||||
.query_map([], |row| {
|
||||
Ok((
|
||||
row.get(0)?,
|
||||
row.get(1)?,
|
||||
row.get(2)?,
|
||||
row.get(3)?,
|
||||
row.get(4)?,
|
||||
row.get(5)?,
|
||||
))
|
||||
})?
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
|
||||
let mut count = 0;
|
||||
|
||||
for (id, last_accessed, storage_strength, _, sentiment_mag, stability) in nodes {
|
||||
let last = DateTime::parse_from_rfc3339(&last_accessed)
|
||||
.map(|dt| dt.with_timezone(&Utc))
|
||||
.unwrap_or(now);
|
||||
|
||||
let days_since = (now - last).num_seconds() as f64 / 86400.0;
|
||||
|
||||
if days_since > 0.0 {
|
||||
let effective_stability = stability * (1.0 + sentiment_mag * 0.5);
|
||||
|
||||
let new_retrieval = (1.0 + days_since / (FSRS_FACTOR * effective_stability))
|
||||
.powf(-1.0 / FSRS_DECAY);
|
||||
|
||||
let new_retention =
|
||||
(new_retrieval * 0.7) + ((storage_strength / 10.0).min(1.0) * 0.3);
|
||||
|
||||
self.conn.execute(
|
||||
"UPDATE knowledge_nodes SET
|
||||
retrieval_strength = ?1,
|
||||
retention_strength = ?2
|
||||
WHERE id = ?3",
|
||||
params![new_retrieval, new_retention, id],
|
||||
)?;
|
||||
|
||||
count += 1;
|
||||
if batch.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
let batch_len = batch.len() as i64;
|
||||
|
||||
// Use a transaction for the batch
|
||||
let tx = self.conn.transaction()?;
|
||||
|
||||
for (id, last_accessed, storage_strength, _, sentiment_mag, stability) in &batch {
|
||||
let last = DateTime::parse_from_rfc3339(last_accessed)
|
||||
.map(|dt| dt.with_timezone(&Utc))
|
||||
.unwrap_or(now);
|
||||
|
||||
let days_since = (now - last).num_seconds() as f64 / 86400.0;
|
||||
|
||||
if days_since > 0.0 {
|
||||
let effective_stability = stability * (1.0 + sentiment_mag * 0.5);
|
||||
|
||||
let new_retrieval =
|
||||
(1.0 + days_since / (FSRS_FACTOR * effective_stability))
|
||||
.powf(-1.0 / FSRS_DECAY);
|
||||
|
||||
let new_retention =
|
||||
(new_retrieval * 0.7) + ((storage_strength / 10.0).min(1.0) * 0.3);
|
||||
|
||||
tx.execute(
|
||||
"UPDATE knowledge_nodes SET retrieval_strength = ?1, retention_strength = ?2 WHERE id = ?3",
|
||||
params![new_retrieval, new_retention, id],
|
||||
)?;
|
||||
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
tx.commit()?;
|
||||
offset += batch_len;
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue