mirror of
https://github.com/samvallad33/vestige.git
synced 2026-04-25 00:36:22 +02:00
feat: Vestige v1.3.0 — importance scoring, session checkpoints, duplicate detection
3 new MCP tools (16 → 19 total): - importance_score: 4-channel neuroscience importance scoring (novelty/arousal/reward/attention) - session_checkpoint: batch smart_ingest up to 20 items with PE Gating - find_duplicates: cosine similarity clustering with union-find for dedup CLI: vestige ingest command for memory ingestion via command line Core: made get_node_embedding public, added get_all_embeddings for dedup scanning Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
5cca386d6b
commit
04a3062328
9 changed files with 848 additions and 5 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -3689,7 +3689,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "vestige-mcp"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
|
|
|
|||
|
|
@ -478,7 +478,7 @@ impl Storage {
|
|||
|
||||
/// Get the embedding vector for a node
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
fn get_node_embedding(&self, node_id: &str) -> Result<Option<Vec<f32>>> {
|
||||
pub fn get_node_embedding(&self, node_id: &str) -> Result<Option<Vec<f32>>> {
|
||||
let mut stmt = self.conn.prepare(
|
||||
"SELECT embedding FROM node_embeddings WHERE node_id = ?1"
|
||||
)?;
|
||||
|
|
@ -492,6 +492,29 @@ impl Storage {
|
|||
}))
|
||||
}
|
||||
|
||||
/// Get all embedding vectors for duplicate detection
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
pub fn get_all_embeddings(&self) -> Result<Vec<(String, Vec<f32>)>> {
|
||||
let mut stmt = self
|
||||
.conn
|
||||
.prepare("SELECT node_id, embedding FROM node_embeddings")?;
|
||||
|
||||
let results: Vec<(String, Vec<f32>)> = stmt
|
||||
.query_map([], |row| {
|
||||
let node_id: String = row.get(0)?;
|
||||
let embedding_bytes: Vec<u8> = row.get(1)?;
|
||||
Ok((node_id, embedding_bytes))
|
||||
})?
|
||||
.filter_map(|r| r.ok())
|
||||
.filter_map(|(id, bytes)| {
|
||||
crate::embeddings::Embedding::from_bytes(&bytes)
|
||||
.map(|e| (id, e.vector))
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Update the content of an existing node
|
||||
pub fn update_node_content(&mut self, id: &str, new_content: &str) -> Result<()> {
|
||||
let now = Utc::now();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "vestige-mcp"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
edition = "2024"
|
||||
description = "Cognitive memory MCP server for Claude - FSRS-6, spreading activation, synaptic tagging, and 130 years of memory research"
|
||||
authors = ["samvallad33"]
|
||||
|
|
|
|||
|
|
@ -94,6 +94,21 @@ enum Commands {
|
|||
#[arg(long)]
|
||||
no_open: bool,
|
||||
},
|
||||
|
||||
/// Ingest a memory (routes through Prediction Error Gating)
|
||||
Ingest {
|
||||
/// Content to remember
|
||||
content: String,
|
||||
/// Tags (comma-separated)
|
||||
#[arg(long)]
|
||||
tags: Option<String>,
|
||||
/// Node type (fact, concept, event, person, place, note, pattern, decision)
|
||||
#[arg(long, default_value = "fact")]
|
||||
node_type: String,
|
||||
/// Source reference
|
||||
#[arg(long)]
|
||||
source: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
|
|
@ -118,6 +133,12 @@ fn main() -> anyhow::Result<()> {
|
|||
yes,
|
||||
} => run_gc(min_retention, max_age_days, dry_run, yes),
|
||||
Commands::Dashboard { port, no_open } => run_dashboard(port, !no_open),
|
||||
Commands::Ingest {
|
||||
content,
|
||||
tags,
|
||||
node_type,
|
||||
source,
|
||||
} => run_ingest(content, tags, node_type, source),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -842,6 +863,83 @@ fn run_gc(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Ingest a memory via CLI (routes through smart_ingest / PE Gating)
|
||||
fn run_ingest(
|
||||
content: String,
|
||||
tags: Option<String>,
|
||||
node_type: String,
|
||||
source: Option<String>,
|
||||
) -> anyhow::Result<()> {
|
||||
if content.trim().is_empty() {
|
||||
anyhow::bail!("Content cannot be empty");
|
||||
}
|
||||
|
||||
let tag_list: Vec<String> = tags
|
||||
.as_deref()
|
||||
.map(|t| {
|
||||
t.split(',')
|
||||
.map(|s| s.trim().to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let input = IngestInput {
|
||||
content: content.clone(),
|
||||
node_type,
|
||||
source,
|
||||
sentiment_score: 0.0,
|
||||
sentiment_magnitude: 0.0,
|
||||
tags: tag_list,
|
||||
valid_from: None,
|
||||
valid_until: None,
|
||||
};
|
||||
|
||||
let mut storage = Storage::new(None)?;
|
||||
|
||||
// Try smart_ingest (PE Gating) if available, otherwise regular ingest
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
{
|
||||
let result = storage.smart_ingest(input)?;
|
||||
println!("{}", "=== Vestige Ingest ===".cyan().bold());
|
||||
println!();
|
||||
println!("{}: {}", "Decision".white().bold(), result.decision.green());
|
||||
println!("{}: {}", "Node ID".white().bold(), result.node.id);
|
||||
if let Some(sim) = result.similarity {
|
||||
println!("{}: {:.3}", "Similarity".white().bold(), sim);
|
||||
}
|
||||
if let Some(pe) = result.prediction_error {
|
||||
println!("{}: {:.3}", "Prediction Error".white().bold(), pe);
|
||||
}
|
||||
println!("{}: {}", "Reason".white().bold(), result.reason);
|
||||
println!();
|
||||
println!(
|
||||
"{}",
|
||||
format!("Memory {} ({})", result.decision, truncate(&content, 60))
|
||||
.green()
|
||||
.bold()
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(all(feature = "embeddings", feature = "vector-search")))]
|
||||
{
|
||||
let node = storage.ingest(input)?;
|
||||
println!("{}", "=== Vestige Ingest ===".cyan().bold());
|
||||
println!();
|
||||
println!("{}: create", "Decision".white().bold());
|
||||
println!("{}: {}", "Node ID".white().bold(), node.id);
|
||||
println!();
|
||||
println!(
|
||||
"{}",
|
||||
format!("Memory created ({})", truncate(&content, 60))
|
||||
.green()
|
||||
.bold()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run the dashboard web server
|
||||
fn run_dashboard(port: u16, open_browser: bool) -> anyhow::Result<()> {
|
||||
println!("{}", "=== Vestige Dashboard ===".cyan().bold());
|
||||
|
|
|
|||
|
|
@ -222,6 +222,24 @@ impl McpServer {
|
|||
description: Some("Garbage collect stale memories below retention threshold. Defaults to dry_run=true for safety.".to_string()),
|
||||
input_schema: tools::maintenance::gc_schema(),
|
||||
},
|
||||
// ================================================================
|
||||
// AUTO-SAVE & DEDUP TOOLS (v1.3+)
|
||||
// ================================================================
|
||||
ToolDescription {
|
||||
name: "importance_score".to_string(),
|
||||
description: Some("Score content importance using 4-channel neuroscience model (novelty/arousal/reward/attention). Returns composite score, channel breakdown, encoding boost, and explanations.".to_string()),
|
||||
input_schema: tools::importance::schema(),
|
||||
},
|
||||
ToolDescription {
|
||||
name: "session_checkpoint".to_string(),
|
||||
description: Some("Batch save up to 20 items in one call. Each item routes through Prediction Error Gating (smart_ingest). Use at session end or before context compaction to save all unsaved work.".to_string()),
|
||||
input_schema: tools::checkpoint::schema(),
|
||||
},
|
||||
ToolDescription {
|
||||
name: "find_duplicates".to_string(),
|
||||
description: Some("Find duplicate and near-duplicate memory clusters using cosine similarity on embeddings. Returns clusters with suggested actions (merge/review). Use to clean up redundant memories.".to_string()),
|
||||
input_schema: tools::dedup::schema(),
|
||||
},
|
||||
];
|
||||
|
||||
let result = ListToolsResult { tools };
|
||||
|
|
@ -485,6 +503,13 @@ impl McpServer {
|
|||
"export" => tools::maintenance::execute_export(&self.storage, request.arguments).await,
|
||||
"gc" => tools::maintenance::execute_gc(&self.storage, request.arguments).await,
|
||||
|
||||
// ================================================================
|
||||
// AUTO-SAVE & DEDUP TOOLS (v1.3+)
|
||||
// ================================================================
|
||||
"importance_score" => tools::importance::execute(&self.storage, request.arguments).await,
|
||||
"session_checkpoint" => tools::checkpoint::execute(&self.storage, request.arguments).await,
|
||||
"find_duplicates" => tools::dedup::execute(&self.storage, request.arguments).await,
|
||||
|
||||
name => {
|
||||
return Err(JsonRpcError::method_not_found_with_message(&format!(
|
||||
"Unknown tool: {}",
|
||||
|
|
@ -788,8 +813,8 @@ mod tests {
|
|||
let result = response.result.unwrap();
|
||||
let tools = result["tools"].as_array().unwrap();
|
||||
|
||||
// v1.2+: 16 tools (8 unified + 2 temporal + 6 maintenance)
|
||||
assert_eq!(tools.len(), 16, "Expected exactly 16 tools in v1.2+");
|
||||
// v1.3+: 19 tools (8 unified + 2 temporal + 6 maintenance + 3 auto-save/dedup)
|
||||
assert_eq!(tools.len(), 19, "Expected exactly 19 tools in v1.3+");
|
||||
|
||||
let tool_names: Vec<&str> = tools
|
||||
.iter()
|
||||
|
|
@ -821,6 +846,11 @@ mod tests {
|
|||
assert!(tool_names.contains(&"backup"));
|
||||
assert!(tool_names.contains(&"export"));
|
||||
assert!(tool_names.contains(&"gc"));
|
||||
|
||||
// Auto-save & dedup tools (v1.3)
|
||||
assert!(tool_names.contains(&"importance_score"));
|
||||
assert!(tool_names.contains(&"session_checkpoint"));
|
||||
assert!(tool_names.contains(&"find_duplicates"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
|
|||
240
crates/vestige-mcp/src/tools/checkpoint.rs
Normal file
240
crates/vestige-mcp/src/tools/checkpoint.rs
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
//! Session Checkpoint Tool
|
||||
//!
|
||||
//! Batch smart_ingest for session-end saves. Accepts up to 20 items
|
||||
//! in a single call, routing each through Prediction Error Gating.
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use vestige_core::{IngestInput, Storage};
|
||||
|
||||
/// Input schema for session_checkpoint tool
|
||||
pub fn schema() -> Value {
|
||||
serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"items": {
|
||||
"type": "array",
|
||||
"description": "Array of items to save (max 20). Each goes through Prediction Error Gating.",
|
||||
"maxItems": 20,
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The content to remember"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "Tags for categorization"
|
||||
},
|
||||
"node_type": {
|
||||
"type": "string",
|
||||
"description": "Type: fact, concept, event, person, place, note, pattern, decision",
|
||||
"default": "fact"
|
||||
},
|
||||
"source": {
|
||||
"type": "string",
|
||||
"description": "Source reference"
|
||||
}
|
||||
},
|
||||
"required": ["content"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["items"]
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CheckpointArgs {
|
||||
items: Vec<CheckpointItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct CheckpointItem {
|
||||
content: String,
|
||||
tags: Option<Vec<String>>,
|
||||
node_type: Option<String>,
|
||||
source: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn execute(
|
||||
storage: &Arc<Mutex<Storage>>,
|
||||
args: Option<Value>,
|
||||
) -> Result<Value, String> {
|
||||
let args: CheckpointArgs = match args {
|
||||
Some(v) => serde_json::from_value(v).map_err(|e| format!("Invalid arguments: {}", e))?,
|
||||
None => return Err("Missing arguments".to_string()),
|
||||
};
|
||||
|
||||
if args.items.is_empty() {
|
||||
return Err("Items array cannot be empty".to_string());
|
||||
}
|
||||
|
||||
if args.items.len() > 20 {
|
||||
return Err("Maximum 20 items per checkpoint".to_string());
|
||||
}
|
||||
|
||||
let mut storage = storage.lock().await;
|
||||
let mut results = Vec::new();
|
||||
let mut created = 0u32;
|
||||
let mut updated = 0u32;
|
||||
let mut skipped = 0u32;
|
||||
let mut errors = 0u32;
|
||||
|
||||
for (i, item) in args.items.into_iter().enumerate() {
|
||||
if item.content.trim().is_empty() {
|
||||
results.push(serde_json::json!({
|
||||
"index": i,
|
||||
"status": "skipped",
|
||||
"reason": "Empty content"
|
||||
}));
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let input = IngestInput {
|
||||
content: item.content,
|
||||
node_type: item.node_type.unwrap_or_else(|| "fact".to_string()),
|
||||
source: item.source,
|
||||
sentiment_score: 0.0,
|
||||
sentiment_magnitude: 0.0,
|
||||
tags: item.tags.unwrap_or_default(),
|
||||
valid_from: None,
|
||||
valid_until: None,
|
||||
};
|
||||
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
{
|
||||
match storage.smart_ingest(input) {
|
||||
Ok(result) => {
|
||||
match result.decision.as_str() {
|
||||
"create" | "supersede" | "replace" => created += 1,
|
||||
"update" | "reinforce" | "merge" | "add_context" => updated += 1,
|
||||
_ => created += 1,
|
||||
}
|
||||
results.push(serde_json::json!({
|
||||
"index": i,
|
||||
"status": "saved",
|
||||
"decision": result.decision,
|
||||
"nodeId": result.node.id,
|
||||
"similarity": result.similarity,
|
||||
"reason": result.reason
|
||||
}));
|
||||
}
|
||||
Err(e) => {
|
||||
errors += 1;
|
||||
results.push(serde_json::json!({
|
||||
"index": i,
|
||||
"status": "error",
|
||||
"reason": e.to_string()
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(all(feature = "embeddings", feature = "vector-search")))]
|
||||
{
|
||||
match storage.ingest(input) {
|
||||
Ok(node) => {
|
||||
created += 1;
|
||||
results.push(serde_json::json!({
|
||||
"index": i,
|
||||
"status": "saved",
|
||||
"decision": "create",
|
||||
"nodeId": node.id,
|
||||
"reason": "Embeddings not available - used regular ingest"
|
||||
}));
|
||||
}
|
||||
Err(e) => {
|
||||
errors += 1;
|
||||
results.push(serde_json::json!({
|
||||
"index": i,
|
||||
"status": "error",
|
||||
"reason": e.to_string()
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"success": errors == 0,
|
||||
"summary": {
|
||||
"total": results.len(),
|
||||
"created": created,
|
||||
"updated": updated,
|
||||
"skipped": skipped,
|
||||
"errors": errors
|
||||
},
|
||||
"results": results
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
async fn test_storage() -> (Arc<Mutex<Storage>>, TempDir) {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let storage = Storage::new(Some(dir.path().join("test.db"))).unwrap();
|
||||
(Arc::new(Mutex::new(storage)), dir)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_schema_has_required_fields() {
|
||||
let schema = schema();
|
||||
assert_eq!(schema["type"], "object");
|
||||
assert!(schema["properties"]["items"].is_object());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_empty_items_fails() {
|
||||
let (storage, _dir) = test_storage().await;
|
||||
let result = execute(&storage, Some(serde_json::json!({ "items": [] }))).await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_ingest() {
|
||||
let (storage, _dir) = test_storage().await;
|
||||
let result = execute(
|
||||
&storage,
|
||||
Some(serde_json::json!({
|
||||
"items": [
|
||||
{ "content": "First checkpoint item", "tags": ["test"] },
|
||||
{ "content": "Second checkpoint item", "tags": ["test"] }
|
||||
]
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
let value = result.unwrap();
|
||||
assert_eq!(value["summary"]["total"], 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_skips_empty_content() {
|
||||
let (storage, _dir) = test_storage().await;
|
||||
let result = execute(
|
||||
&storage,
|
||||
Some(serde_json::json!({
|
||||
"items": [
|
||||
{ "content": "Valid item" },
|
||||
{ "content": "" },
|
||||
{ "content": "Another valid item" }
|
||||
]
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
let value = result.unwrap();
|
||||
assert_eq!(value["summary"]["skipped"], 1);
|
||||
}
|
||||
}
|
||||
307
crates/vestige-mcp/src/tools/dedup.rs
Normal file
307
crates/vestige-mcp/src/tools/dedup.rs
Normal file
|
|
@ -0,0 +1,307 @@
|
|||
//! Find Duplicates Tool
|
||||
//!
|
||||
//! Detects duplicate and near-duplicate memory clusters using
|
||||
//! cosine similarity on stored embeddings. Uses union-find for
|
||||
//! efficient clustering.
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use vestige_core::Storage;
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
use vestige_core::cosine_similarity;
|
||||
|
||||
/// Input schema for find_duplicates tool
|
||||
pub fn schema() -> Value {
|
||||
serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"similarity_threshold": {
|
||||
"type": "number",
|
||||
"description": "Minimum cosine similarity to consider as duplicate (0.0-1.0, default: 0.80)",
|
||||
"default": 0.80,
|
||||
"minimum": 0.5,
|
||||
"maximum": 1.0
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "Maximum number of duplicate clusters to return (default: 20)",
|
||||
"default": 20,
|
||||
"minimum": 1,
|
||||
"maximum": 100
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "Optional: only check memories with these tags (ANY match)"
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct DedupArgs {
|
||||
similarity_threshold: Option<f64>,
|
||||
limit: Option<usize>,
|
||||
tags: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
/// Simple union-find for clustering
|
||||
struct UnionFind {
|
||||
parent: Vec<usize>,
|
||||
rank: Vec<usize>,
|
||||
}
|
||||
|
||||
impl UnionFind {
|
||||
fn new(n: usize) -> Self {
|
||||
Self {
|
||||
parent: (0..n).collect(),
|
||||
rank: vec![0; n],
|
||||
}
|
||||
}
|
||||
|
||||
fn find(&mut self, x: usize) -> usize {
|
||||
if self.parent[x] != x {
|
||||
self.parent[x] = self.find(self.parent[x]);
|
||||
}
|
||||
self.parent[x]
|
||||
}
|
||||
|
||||
fn union(&mut self, x: usize, y: usize) {
|
||||
let rx = self.find(x);
|
||||
let ry = self.find(y);
|
||||
if rx == ry {
|
||||
return;
|
||||
}
|
||||
if self.rank[rx] < self.rank[ry] {
|
||||
self.parent[rx] = ry;
|
||||
} else if self.rank[rx] > self.rank[ry] {
|
||||
self.parent[ry] = rx;
|
||||
} else {
|
||||
self.parent[ry] = rx;
|
||||
self.rank[rx] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn execute(
|
||||
storage: &Arc<Mutex<Storage>>,
|
||||
args: Option<Value>,
|
||||
) -> Result<Value, String> {
|
||||
let args: DedupArgs = match args {
|
||||
Some(v) => serde_json::from_value(v).map_err(|e| format!("Invalid arguments: {}", e))?,
|
||||
None => DedupArgs {
|
||||
similarity_threshold: None,
|
||||
limit: None,
|
||||
tags: None,
|
||||
},
|
||||
};
|
||||
|
||||
let threshold = args.similarity_threshold.unwrap_or(0.80) as f32;
|
||||
let limit = args.limit.unwrap_or(20);
|
||||
let tag_filter = args.tags.unwrap_or_default();
|
||||
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
{
|
||||
let storage = storage.lock().await;
|
||||
|
||||
// Load all embeddings
|
||||
let all_embeddings = storage
|
||||
.get_all_embeddings()
|
||||
.map_err(|e| format!("Failed to load embeddings: {}", e))?;
|
||||
|
||||
if all_embeddings.is_empty() {
|
||||
return Ok(serde_json::json!({
|
||||
"clusters": [],
|
||||
"totalMemories": 0,
|
||||
"totalWithEmbeddings": 0,
|
||||
"message": "No embeddings found. Run consolidation first."
|
||||
}));
|
||||
}
|
||||
|
||||
// Load nodes for metadata (content preview, retention, tags)
|
||||
let mut all_nodes = Vec::new();
|
||||
let mut offset = 0;
|
||||
loop {
|
||||
let batch = storage
|
||||
.get_all_nodes(500, offset)
|
||||
.map_err(|e| format!("Failed to load nodes: {}", e))?;
|
||||
let batch_len = batch.len();
|
||||
all_nodes.extend(batch);
|
||||
if batch_len < 500 {
|
||||
break;
|
||||
}
|
||||
offset += 500;
|
||||
}
|
||||
|
||||
// Build node lookup
|
||||
let node_map: HashMap<String, &vestige_core::KnowledgeNode> =
|
||||
all_nodes.iter().map(|n| (n.id.clone(), n)).collect();
|
||||
|
||||
// Filter by tags if specified
|
||||
let filtered_embeddings: Vec<(usize, &String, &Vec<f32>)> = all_embeddings
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, (id, _))| {
|
||||
if tag_filter.is_empty() {
|
||||
return true;
|
||||
}
|
||||
if let Some(node) = node_map.get(id) {
|
||||
tag_filter.iter().any(|t| node.tags.contains(t))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
.map(|(i, (id, vec))| (i, id, vec))
|
||||
.collect();
|
||||
|
||||
let n = filtered_embeddings.len();
|
||||
|
||||
if n > 2000 {
|
||||
return Ok(serde_json::json!({
|
||||
"warning": format!("Too many memories to scan ({} with embeddings). Filter by tags to reduce scope.", n),
|
||||
"totalMemories": all_nodes.len(),
|
||||
"totalWithEmbeddings": n
|
||||
}));
|
||||
}
|
||||
|
||||
// O(n^2) pairwise similarity + union-find clustering
|
||||
let mut uf = UnionFind::new(n);
|
||||
let mut similarities: Vec<(usize, usize, f32)> = Vec::new();
|
||||
|
||||
for i in 0..n {
|
||||
for j in (i + 1)..n {
|
||||
let sim = cosine_similarity(&filtered_embeddings[i].2, &filtered_embeddings[j].2);
|
||||
if sim >= threshold {
|
||||
uf.union(i, j);
|
||||
similarities.push((i, j, sim));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Group into clusters
|
||||
let mut cluster_map: HashMap<usize, Vec<usize>> = HashMap::new();
|
||||
for i in 0..n {
|
||||
let root = uf.find(i);
|
||||
cluster_map.entry(root).or_default().push(i);
|
||||
}
|
||||
|
||||
// Only keep clusters with >1 member, sorted by size descending
|
||||
let mut clusters: Vec<Vec<usize>> = cluster_map
|
||||
.into_values()
|
||||
.filter(|c| c.len() > 1)
|
||||
.collect();
|
||||
clusters.sort_by(|a, b| b.len().cmp(&a.len()));
|
||||
clusters.truncate(limit);
|
||||
|
||||
// Build similarity lookup for formatting
|
||||
let mut sim_lookup: HashMap<(usize, usize), f32> = HashMap::new();
|
||||
for &(i, j, sim) in &similarities {
|
||||
sim_lookup.insert((i, j), sim);
|
||||
sim_lookup.insert((j, i), sim);
|
||||
}
|
||||
|
||||
// Format output
|
||||
let cluster_results: Vec<Value> = clusters
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(ci, members)| {
|
||||
let anchor = members[0];
|
||||
let member_results: Vec<Value> = members
|
||||
.iter()
|
||||
.map(|&idx| {
|
||||
let id = &filtered_embeddings[idx].1;
|
||||
let node = node_map.get(id.as_str());
|
||||
let content_preview = node
|
||||
.map(|n| {
|
||||
let c = n.content.replace('\n', " ");
|
||||
if c.len() > 120 {
|
||||
format!("{}...", &c[..120])
|
||||
} else {
|
||||
c
|
||||
}
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let sim_to_anchor = if idx == anchor {
|
||||
1.0
|
||||
} else {
|
||||
sim_lookup
|
||||
.get(&(anchor, idx))
|
||||
.copied()
|
||||
.unwrap_or(0.0)
|
||||
};
|
||||
|
||||
serde_json::json!({
|
||||
"id": id,
|
||||
"contentPreview": content_preview,
|
||||
"retention": node.map(|n| n.retention_strength).unwrap_or(0.0),
|
||||
"createdAt": node.map(|n| n.created_at.to_rfc3339()).unwrap_or_default(),
|
||||
"tags": node.map(|n| &n.tags).unwrap_or(&vec![]),
|
||||
"similarityToAnchor": format!("{:.3}", sim_to_anchor)
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
serde_json::json!({
|
||||
"clusterId": ci,
|
||||
"size": members.len(),
|
||||
"members": member_results,
|
||||
"suggestedAction": if members.len() > 3 { "review" } else { "merge" }
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"clusters": cluster_results,
|
||||
"totalClusters": cluster_results.len(),
|
||||
"totalMemories": all_nodes.len(),
|
||||
"totalWithEmbeddings": n,
|
||||
"threshold": threshold,
|
||||
"pairsChecked": n * (n - 1) / 2
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(not(all(feature = "embeddings", feature = "vector-search")))]
|
||||
{
|
||||
Ok(serde_json::json!({
|
||||
"error": "Embeddings feature not enabled. Cannot compute similarities.",
|
||||
"clusters": []
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_schema() {
|
||||
let schema = schema();
|
||||
assert_eq!(schema["type"], "object");
|
||||
assert!(schema["properties"]["similarity_threshold"].is_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_union_find() {
|
||||
let mut uf = UnionFind::new(5);
|
||||
uf.union(0, 1);
|
||||
uf.union(2, 3);
|
||||
uf.union(1, 3);
|
||||
assert_eq!(uf.find(0), uf.find(3));
|
||||
assert_ne!(uf.find(0), uf.find(4));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_empty_storage() {
|
||||
let dir = tempfile::TempDir::new().unwrap();
|
||||
let storage = Storage::new(Some(dir.path().join("test.db"))).unwrap();
|
||||
let storage = Arc::new(Mutex::new(storage));
|
||||
let result = execute(&storage, None).await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
140
crates/vestige-mcp/src/tools/importance.rs
Normal file
140
crates/vestige-mcp/src/tools/importance.rs
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
//! Importance Score Tool
|
||||
//!
|
||||
//! Exposes the 4-channel importance signaling system as an MCP tool.
|
||||
//! Wraps ImportanceSignals::compute_importance() from vestige-core's
|
||||
//! neuroscience module (dopamine/norepinephrine/acetylcholine/serotonin model).
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use vestige_core::{ImportanceContext, ImportanceSignals, Storage};
|
||||
|
||||
/// Input schema for importance_score tool
|
||||
pub fn schema() -> Value {
|
||||
serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The content to score for importance"
|
||||
},
|
||||
"context_topics": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "Optional topics for novelty detection context"
|
||||
},
|
||||
"project": {
|
||||
"type": "string",
|
||||
"description": "Optional project/codebase name for context"
|
||||
}
|
||||
},
|
||||
"required": ["content"]
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct ImportanceArgs {
|
||||
content: String,
|
||||
context_topics: Option<Vec<String>>,
|
||||
project: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn execute(
|
||||
_storage: &Arc<Mutex<Storage>>,
|
||||
args: Option<Value>,
|
||||
) -> Result<Value, String> {
|
||||
let args: ImportanceArgs = match args {
|
||||
Some(v) => serde_json::from_value(v).map_err(|e| format!("Invalid arguments: {}", e))?,
|
||||
None => return Err("Missing arguments".to_string()),
|
||||
};
|
||||
|
||||
if args.content.trim().is_empty() {
|
||||
return Err("Content cannot be empty".to_string());
|
||||
}
|
||||
|
||||
let signals = ImportanceSignals::new();
|
||||
|
||||
let mut context = ImportanceContext::current();
|
||||
if let Some(project) = args.project {
|
||||
context = context.with_project(project);
|
||||
}
|
||||
if let Some(topics) = args.context_topics {
|
||||
context = context.with_tags(topics);
|
||||
}
|
||||
|
||||
let score = signals.compute_importance(&args.content, &context);
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"composite": score.composite,
|
||||
"channels": {
|
||||
"novelty": score.novelty,
|
||||
"arousal": score.arousal,
|
||||
"reward": score.reward,
|
||||
"attention": score.attention
|
||||
},
|
||||
"encodingBoost": score.encoding_boost,
|
||||
"consolidationPriority": format!("{:?}", score.consolidation_priority),
|
||||
"weightsUsed": {
|
||||
"novelty": score.weights_used.novelty,
|
||||
"arousal": score.weights_used.arousal,
|
||||
"reward": score.weights_used.reward,
|
||||
"attention": score.weights_used.attention
|
||||
},
|
||||
"explanations": {
|
||||
"novelty": score.novelty_explanation.as_ref().map(|e| format!("{:?}", e)),
|
||||
"arousal": score.arousal_explanation.as_ref().map(|e| format!("{:?}", e)),
|
||||
"reward": score.reward_explanation.as_ref().map(|e| format!("{:?}", e)),
|
||||
"attention": score.attention_explanation.as_ref().map(|e| format!("{:?}", e))
|
||||
},
|
||||
"summary": score.summary(),
|
||||
"dominantSignal": score.dominant_signal()
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_schema_has_required_fields() {
|
||||
let schema = schema();
|
||||
assert_eq!(schema["type"], "object");
|
||||
assert!(schema["properties"]["content"].is_object());
|
||||
assert!(schema["required"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.contains(&serde_json::json!("content")));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_empty_content_fails() {
|
||||
let storage = Arc::new(Mutex::new(
|
||||
Storage::new(Some(std::path::PathBuf::from("/tmp/test_importance.db"))).unwrap(),
|
||||
));
|
||||
let result = execute(&storage, Some(serde_json::json!({ "content": "" }))).await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_basic_importance_score() {
|
||||
let storage = Arc::new(Mutex::new(
|
||||
Storage::new(Some(std::path::PathBuf::from("/tmp/test_importance2.db"))).unwrap(),
|
||||
));
|
||||
let result = execute(
|
||||
&storage,
|
||||
Some(serde_json::json!({
|
||||
"content": "CRITICAL: Production database migration failed with data loss!"
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
let value = result.unwrap();
|
||||
assert!(value["composite"].as_f64().is_some());
|
||||
assert!(value["channels"]["novelty"].as_f64().is_some());
|
||||
assert!(value["channels"]["arousal"].as_f64().is_some());
|
||||
assert!(value["dominantSignal"].is_string());
|
||||
}
|
||||
}
|
||||
|
|
@ -21,6 +21,11 @@ pub mod timeline;
|
|||
// v1.2: Maintenance tools
|
||||
pub mod maintenance;
|
||||
|
||||
// v1.3: Auto-save and dedup tools
|
||||
pub mod checkpoint;
|
||||
pub mod dedup;
|
||||
pub mod importance;
|
||||
|
||||
// Deprecated tools - kept for internal backwards compatibility
|
||||
// These modules are intentionally unused in the public API
|
||||
#[allow(dead_code)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue