vestige/crates/vestige-mcp/src/tools/ingest.rs
Sam Valladares 5b90a73055 feat: Vestige v1.9.1 AUTONOMIC — self-regulating memory with graph visualization
Retention Target System: auto-GC low-retention memories during consolidation
(VESTIGE_RETENTION_TARGET env var, default 0.8). Auto-Promote: memories
accessed 3+ times in 24h get frequency-dependent potentiation. Waking SWR
Tagging: promoted memories get preferential 70/30 dream replay. Improved
Consolidation Scheduler: triggers on 6h staleness or 2h active use.

New tools: memory_health (retention dashboard with distribution buckets,
trend tracking, recommendations) and memory_graph (subgraph export with
Fruchterman-Reingold force-directed layout, up to 200 nodes).

Dream connections now persist to database via save_connection(), enabling
memory_graph traversal. Schema Migration V8 adds waking_tag, utility_score,
times_retrieved/useful columns and retention_snapshots table. 21 MCP tools.

v1.9.1 fixes: ConnectionRecord export, UTF-8 safe truncation, link_type
normalization, utility_score clamping, only-new-connections persistence,
70/30 split capacity fill, nonexistent center_id error handling.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 02:02:06 -06:00

434 lines
16 KiB
Rust

//! Ingest Tool
//!
//! Add new knowledge to memory.
//!
//! v1.5.0: Enhanced with same cognitive pipeline as smart_ingest:
//! Pre-ingest: importance scoring + intent detection
//! Post-ingest: synaptic tagging + novelty model update + hippocampal indexing
use chrono::Utc;
use serde::Deserialize;
use serde_json::Value;
use std::sync::Arc;
use tokio::sync::Mutex;
use crate::cognitive::CognitiveEngine;
use vestige_core::{
ContentType, ImportanceContext, ImportanceEvent, ImportanceEventType, IngestInput, Storage,
};
/// Input schema for ingest tool
pub fn schema() -> Value {
serde_json::json!({
"type": "object",
"properties": {
"content": {
"type": "string",
"description": "The content to remember"
},
"node_type": {
"type": "string",
"description": "Type of knowledge: fact, concept, event, person, place, note, pattern, decision",
"default": "fact"
},
"tags": {
"type": "array",
"items": { "type": "string" },
"description": "Tags for categorization"
},
"source": {
"type": "string",
"description": "Source or reference for this knowledge"
}
},
"required": ["content"]
})
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct IngestArgs {
content: String,
node_type: Option<String>,
tags: Option<Vec<String>>,
source: Option<String>,
}
pub async fn execute(
storage: &Arc<Storage>,
cognitive: &Arc<Mutex<CognitiveEngine>>,
args: Option<Value>,
) -> Result<Value, String> {
let args: IngestArgs = match args {
Some(v) => serde_json::from_value(v).map_err(|e| format!("Invalid arguments: {}", e))?,
None => return Err("Missing arguments".to_string()),
};
// Validate content
if args.content.trim().is_empty() {
return Err("Content cannot be empty".to_string());
}
if args.content.len() > 1_000_000 {
return Err("Content too large (max 1MB)".to_string());
}
// ====================================================================
// COGNITIVE PRE-INGEST: importance scoring + intent detection
// ====================================================================
let mut importance_composite = 0.0_f64;
let mut tags = args.tags.unwrap_or_default();
let mut is_novel = false;
let mut embedding_strategy = String::new();
if let Ok(cog) = cognitive.try_lock() {
// Full 4-channel importance scoring
let context = ImportanceContext::current();
let importance = cog.importance_signals.compute_importance(&args.content, &context);
importance_composite = importance.composite;
// Standalone novelty check (dopaminergic signal)
let novelty_ctx = vestige_core::neuroscience::importance_signals::Context::default();
is_novel = cog.novelty_signal.is_novel(&args.content, &novelty_ctx);
// Intent detection → auto-tag
let intent_result = cog.intent_detector.detect_intent();
if intent_result.confidence > 0.5 {
let intent_tag = format!("intent:{:?}", intent_result.primary_intent);
let intent_tag = if intent_tag.len() > 50 {
format!("{}...", &intent_tag[..47])
} else {
intent_tag
};
tags.push(intent_tag);
}
// Detect content type → select adaptive embedding strategy
let content_type = ContentType::detect(&args.content);
let strategy = cog.adaptive_embedder.select_strategy(&content_type);
embedding_strategy = format!("{:?}", strategy);
}
let input = IngestInput {
content: args.content.clone(),
node_type: args.node_type.unwrap_or_else(|| "fact".to_string()),
source: args.source,
sentiment_score: 0.0,
sentiment_magnitude: importance_composite,
tags,
valid_from: None,
valid_until: None,
};
// ====================================================================
// INGEST (storage lock)
// ====================================================================
// Route through smart_ingest when embeddings are available to prevent duplicates.
// Falls back to raw ingest only when embeddings aren't ready.
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
{
let fallback_input = input.clone();
match storage.smart_ingest(input) {
Ok(result) => {
let node_id = result.node.id.clone();
let node_content = result.node.content.clone();
let node_type = result.node.node_type.clone();
let has_embedding = result.node.has_embedding.unwrap_or(false);
run_post_ingest(cognitive, &node_id, &node_content, &node_type, importance_composite);
Ok(serde_json::json!({
"success": true,
"nodeId": node_id,
"decision": result.decision,
"message": format!("Knowledge ingested successfully. Node ID: {} ({})", node_id, result.decision),
"hasEmbedding": has_embedding,
"similarity": result.similarity,
"reason": result.reason,
"isNovel": is_novel,
"embeddingStrategy": embedding_strategy,
}))
}
Err(_) => {
let node = storage.ingest(fallback_input).map_err(|e| e.to_string())?;
let node_id = node.id.clone();
let node_content = node.content.clone();
let node_type = node.node_type.clone();
let has_embedding = node.has_embedding.unwrap_or(false);
run_post_ingest(cognitive, &node_id, &node_content, &node_type, importance_composite);
Ok(serde_json::json!({
"success": true,
"nodeId": node_id,
"decision": "create",
"message": format!("Knowledge ingested successfully. Node ID: {}", node_id),
"hasEmbedding": has_embedding,
"isNovel": is_novel,
"embeddingStrategy": embedding_strategy,
}))
}
}
}
// Fallback for builds without embedding features
#[cfg(not(all(feature = "embeddings", feature = "vector-search")))]
{
let node = storage.ingest(input).map_err(|e| e.to_string())?;
let node_id = node.id.clone();
let node_content = node.content.clone();
let node_type = node.node_type.clone();
let has_embedding = node.has_embedding.unwrap_or(false);
run_post_ingest(cognitive, &node_id, &node_content, &node_type, importance_composite);
Ok(serde_json::json!({
"success": true,
"nodeId": node_id,
"decision": "create",
"message": format!("Knowledge ingested successfully. Node ID: {}", node_id),
"hasEmbedding": has_embedding,
"isNovel": is_novel,
"embeddingStrategy": embedding_strategy,
}))
}
}
/// Cognitive post-ingest side effects: synaptic tagging, novelty update, hippocampal indexing.
fn run_post_ingest(
cognitive: &Arc<Mutex<CognitiveEngine>>,
node_id: &str,
content: &str,
node_type: &str,
importance_composite: f64,
) {
if let Ok(mut cog) = cognitive.try_lock() {
// Synaptic tagging for retroactive capture
if importance_composite > 0.3 {
cog.synaptic_tagging.tag_memory(node_id);
if importance_composite > 0.7 {
let event = ImportanceEvent::for_memory(node_id, ImportanceEventType::NoveltySpike);
let _capture = cog.synaptic_tagging.trigger_prp(event);
}
}
// Update novelty model
cog.importance_signals.learn_content(content);
// Record in hippocampal index
let _ = cog.hippocampal_index.index_memory(
node_id,
content,
node_type,
Utc::now(),
None,
);
// Cross-project pattern recording
cog.cross_project.record_project_memory(node_id, "default", None);
}
}
// ============================================================================
// TESTS
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
use crate::cognitive::CognitiveEngine;
use tempfile::TempDir;
fn test_cognitive() -> Arc<Mutex<CognitiveEngine>> {
Arc::new(Mutex::new(CognitiveEngine::new()))
}
/// Create a test storage instance with a temporary database
async fn test_storage() -> (Arc<Storage>, TempDir) {
let dir = TempDir::new().unwrap();
let storage = Storage::new(Some(dir.path().join("test.db"))).unwrap();
(Arc::new(storage), dir)
}
// ========================================================================
// INPUT VALIDATION TESTS
// ========================================================================
#[tokio::test]
async fn test_ingest_empty_content_fails() {
let (storage, _dir) = test_storage().await;
let args = serde_json::json!({ "content": "" });
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("empty"));
}
#[tokio::test]
async fn test_ingest_whitespace_only_content_fails() {
let (storage, _dir) = test_storage().await;
let args = serde_json::json!({ "content": " \n\t " });
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("empty"));
}
#[tokio::test]
async fn test_ingest_missing_arguments_fails() {
let (storage, _dir) = test_storage().await;
let result = execute(&storage, &test_cognitive(), None).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("Missing arguments"));
}
#[tokio::test]
async fn test_ingest_missing_content_field_fails() {
let (storage, _dir) = test_storage().await;
let args = serde_json::json!({ "node_type": "fact" });
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("Invalid arguments"));
}
// ========================================================================
// LARGE CONTENT TESTS
// ========================================================================
#[tokio::test]
async fn test_ingest_large_content_fails() {
let (storage, _dir) = test_storage().await;
// Create content larger than 1MB
let large_content = "x".repeat(1_000_001);
let args = serde_json::json!({ "content": large_content });
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("too large"));
}
#[tokio::test]
async fn test_ingest_exactly_1mb_succeeds() {
let (storage, _dir) = test_storage().await;
// Create content exactly 1MB
let exact_content = "x".repeat(1_000_000);
let args = serde_json::json!({ "content": exact_content });
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_ok());
}
// ========================================================================
// SUCCESSFUL INGEST TESTS
// ========================================================================
#[tokio::test]
async fn test_ingest_basic_content_succeeds() {
let (storage, _dir) = test_storage().await;
let args = serde_json::json!({
"content": "This is a test fact to remember."
});
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_ok());
let value = result.unwrap();
assert_eq!(value["success"], true);
assert!(value["nodeId"].is_string());
assert!(value["message"].as_str().unwrap().contains("successfully"));
}
#[tokio::test]
async fn test_ingest_with_node_type() {
let (storage, _dir) = test_storage().await;
let args = serde_json::json!({
"content": "Error handling should use Result<T, E> pattern.",
"node_type": "pattern"
});
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_ok());
let value = result.unwrap();
assert_eq!(value["success"], true);
}
#[tokio::test]
async fn test_ingest_with_tags() {
let (storage, _dir) = test_storage().await;
let args = serde_json::json!({
"content": "The Rust programming language emphasizes safety.",
"tags": ["rust", "programming", "safety"]
});
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_ok());
let value = result.unwrap();
assert_eq!(value["success"], true);
}
#[tokio::test]
async fn test_ingest_with_source() {
let (storage, _dir) = test_storage().await;
let args = serde_json::json!({
"content": "MCP protocol version 2024-11-05 is the current standard.",
"source": "https://modelcontextprotocol.io/spec"
});
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_ok());
let value = result.unwrap();
assert_eq!(value["success"], true);
}
#[tokio::test]
async fn test_ingest_with_all_optional_fields() {
let (storage, _dir) = test_storage().await;
let args = serde_json::json!({
"content": "Complex memory with all metadata.",
"node_type": "decision",
"tags": ["architecture", "design"],
"source": "team meeting notes"
});
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_ok());
let value = result.unwrap();
assert_eq!(value["success"], true);
assert!(value["nodeId"].is_string());
}
// ========================================================================
// NODE TYPE DEFAULTS
// ========================================================================
#[tokio::test]
async fn test_ingest_default_node_type_is_fact() {
let (storage, _dir) = test_storage().await;
let args = serde_json::json!({
"content": "Default type test content."
});
let result = execute(&storage, &test_cognitive(), Some(args)).await;
assert!(result.is_ok());
// Verify node was created - the default type is "fact"
let node_id = result.unwrap()["nodeId"].as_str().unwrap().to_string();
let node = storage.get_node(&node_id).unwrap().unwrap();
assert_eq!(node.node_type, "fact");
}
// ========================================================================
// SCHEMA TESTS
// ========================================================================
#[test]
fn test_schema_has_required_fields() {
let schema_value = schema();
assert_eq!(schema_value["type"], "object");
assert!(schema_value["properties"]["content"].is_object());
assert!(schema_value["required"].as_array().unwrap().contains(&serde_json::json!("content")));
}
#[test]
fn test_schema_has_optional_fields() {
let schema_value = schema();
assert!(schema_value["properties"]["node_type"].is_object());
assert!(schema_value["properties"]["tags"].is_object());
assert!(schema_value["properties"]["source"].is_object());
}
}