diff --git a/Cargo.lock b/Cargo.lock index 5a3b850..1b114ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -64,12 +64,56 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + [[package]] name = "anyhow" version = "1.0.100" @@ -303,6 +347,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -311,11 +356,24 @@ version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" dependencies = [ + "anstream", "anstyle", "clap_lex", "strsim", ] +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "clap_lex" version = "0.7.7" @@ -339,6 +397,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "colored" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "compact_str" version = "0.9.0" @@ -1052,6 +1125,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hf-hub" version = "0.4.3" @@ -1440,6 +1519,12 @@ dependencies = [ "serde", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.14.0" @@ -1928,6 +2013,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "onig" version = "6.5.1" @@ -3327,6 +3418,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "uuid" version = "1.19.0" @@ -3408,6 +3505,8 @@ version = "1.0.0" dependencies = [ "anyhow", "chrono", + "clap", + "colored", "directories", "rmcp", "serde", diff --git a/README.md b/README.md index f147bc4..ee99ed7 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ The only MCP memory server built on cognitive science. FSRS-6 spaced repetition, | RAG dumps irrelevant context | **Prediction Error Gating** auto-decides CREATE/UPDATE/SUPERSEDE | | Memory bloat eats your token budget | **FSRS-6 decay** naturally fades unused memories | | No idea what AI "knows" | `recall`, `semantic_search`, `hybrid_search` let you query | -| Context pollution confuses the model | **29 atomic tools** > 1 overloaded tool with 15 parameters | +| Context pollution confuses the model | **8 unified tools** (v1.1) - workflow-based, not operation-based | --- @@ -96,7 +96,7 @@ Add to `%APPDATA%\Claude\claude_desktop_config.json`: ### 3. Restart Claude -Restart Claude Code or Desktop. You should see **29 Vestige tools** available. +Restart Claude Code or Desktop. You should see **8 Vestige tools** available (v1.1+). ### 4. Test It @@ -176,60 +176,66 @@ cp ~/.local/share/vestige/core/vestige.db ~/vestige-backup.db --- -## All 29 Tools +## Tools (v1.1) + +v1.1 consolidates 29 tools into **8 unified, workflow-based tools**: + +### Core Tools -### Core Memory | Tool | Description | |------|-------------| | `ingest` | Add new knowledge to memory | | `smart_ingest` | **Intelligent ingestion** with Prediction Error Gating—auto-decides CREATE/UPDATE/SUPERSEDE | -| `recall` | Search by keywords, ranked by retention strength | -| `semantic_search` | Find conceptually related content via embeddings | -| `hybrid_search` | Combined keyword + semantic with RRF fusion | -| `get_knowledge` | Retrieve specific memory by ID | -| `delete_knowledge` | Remove a memory | -| `mark_reviewed` | FSRS review with rating (1=Again, 2=Hard, 3=Good, 4=Easy) | +| `search` | **Unified search** (keyword + semantic + hybrid). Always uses best method. | +| `memory` | Memory operations: `action="get"`, `"delete"`, or `"state"` | +| `codebase` | Codebase context: `action="remember_pattern"`, `"remember_decision"`, or `"get_context"` | +| `intention` | Prospective memory: `action="set"`, `"check"`, `"update"`, or `"list"` | +| `importance` | Retroactively strengthen recent memories (Synaptic Tagging) | +| `context` | Context-dependent retrieval (temporal, topical, emotional matching) | -### Feedback System -| Tool | Description | -|------|-------------| -| `promote_memory` | Thumbs up—memory led to good outcome | -| `demote_memory` | Thumbs down—memory was wrong or unhelpful | -| `request_feedback` | Ask user if a memory was helpful | +### Why Unified Tools? -### Stats & Maintenance -| Tool | Description | -|------|-------------| -| `get_stats` | Memory system statistics | -| `health_check` | System health status | -| `run_consolidation` | Trigger decay cycle, generate embeddings | +| Old (v1.0) | New (v1.1) | Why Better | +|------------|------------|------------| +| `recall`, `semantic_search`, `hybrid_search` | `search` | Claude doesn't need to choose—hybrid is always best | +| `get_knowledge`, `delete_knowledge`, `get_memory_state` | `memory` | One tool for all memory operations | +| `remember_pattern`, `remember_decision`, `get_codebase_context` | `codebase` | Unified codebase memory | +| 5 separate intention tools | `intention` | Matches familiar todo-list patterns | -### Codebase Memory -| Tool | Description | -|------|-------------| -| `remember_pattern` | Save code patterns/conventions | -| `remember_decision` | Save architectural decisions with rationale | -| `get_codebase_context` | Retrieve patterns/decisions for current project | +**Backward Compatibility**: Old tool names still work (with deprecation warnings). They'll be removed in v2.0. -### Prospective Memory (Intentions) -| Tool | Description | -|------|-------------| -| `set_intention` | "Remind me to X when Y" | -| `check_intentions` | Check triggered intentions for current context | -| `complete_intention` | Mark intention as fulfilled | -| `snooze_intention` | Delay an intention | -| `list_intentions` | View all intentions | +
+Legacy Tools Reference (deprecated) -### Neuroscience Layer -| Tool | Description | -|------|-------------| -| `get_memory_state` | Check if memory is Active/Dormant/Silent/Unavailable | -| `list_by_state` | List memories grouped by cognitive state | -| `state_stats` | Distribution of memory states | -| `trigger_importance` | Retroactively strengthen recent memories (Synaptic Tagging) | -| `find_tagged` | Find high-retention memories | -| `tagging_stats` | Synaptic tagging statistics | -| `match_context` | Context-dependent retrieval (Encoding Specificity) | +### Core Memory (use `search` instead) +- `recall` → `search` +- `semantic_search` → `search` +- `hybrid_search` → `search` + +### Memory Operations (use `memory` instead) +- `get_knowledge` → `memory(action="get")` +- `delete_knowledge` → `memory(action="delete")` +- `get_memory_state` → `memory(action="state")` + +### Codebase (use `codebase` instead) +- `remember_pattern` → `codebase(action="remember_pattern")` +- `remember_decision` → `codebase(action="remember_decision")` +- `get_codebase_context` → `codebase(action="get_context")` + +### Intentions (use `intention` instead) +- `set_intention` → `intention(action="set")` +- `check_intentions` → `intention(action="check")` +- `complete_intention` → `intention(action="update", status="complete")` +- `snooze_intention` → `intention(action="update", status="snooze")` +- `list_intentions` → `intention(action="list")` + +### Feedback & Stats (moved to CLI) +- `get_stats` → `vestige stats` +- `health_check` → `vestige health` +- `run_consolidation` → `vestige consolidate` +- `promote_memory`, `demote_memory` → Still available as MCP tools + +
--- @@ -293,9 +299,201 @@ Vestige is **inspired by** memory research. Here's what's actually implemented: --- -## Configuration +## Storage Modes: Global vs Per-Project -Environment variables: +Vestige supports two storage strategies. Choose based on your workflow: + +### Option 1: Global Memory (Default) + +One shared memory for all projects. Good for: +- Personal preferences that apply everywhere +- Cross-project learning +- Simpler setup + +```bash +# Default behavior - no configuration needed +claude mcp add vestige vestige-mcp +``` + +Database location: `~/Library/Application Support/com.vestige.core/vestige.db` + +### Option 2: Per-Project Memory + +Separate memory per codebase. Good for: +- Client work (keep memories isolated) +- Different coding styles per project +- Team environments + +**Claude Code Setup:** + +Add to your project's `.claude/settings.local.json`: +```json +{ + "mcpServers": { + "vestige": { + "command": "vestige-mcp", + "args": ["--data-dir", "./.vestige"] + } + } +} +``` + +This creates `.vestige/vestige.db` in your project root. Add `.vestige/` to `.gitignore`. + +**Multiple Named Instances:** + +For power users who want both global AND project memory: +```json +{ + "mcpServers": { + "vestige-global": { + "command": "vestige-mcp" + }, + "vestige-project": { + "command": "vestige-mcp", + "args": ["--data-dir", "./.vestige"] + } + } +} +``` + +### Option 3: Multi-Claude Household + +For setups with multiple Claude instances (e.g., Claude Desktop + Claude Code, or two personas): + +**Shared Memory (Both Claudes share memories):** +```json +{ + "mcpServers": { + "vestige": { + "command": "vestige-mcp", + "args": ["--data-dir", "~/shared-vestige"] + } + } +} +``` + +**Separate Identities (Each Claude has own memory):** +```json +// Claude Desktop config - for "Domovoi" +{ + "mcpServers": { + "vestige": { + "command": "vestige-mcp", + "args": ["--data-dir", "~/vestige-domovoi"] + } + } +} + +// Claude Code config - for "Storm" +{ + "mcpServers": { + "vestige": { + "command": "vestige-mcp", + "args": ["--data-dir", "~/vestige-storm"] + } + } +} +``` + +--- + +## Setting Up CLAUDE.md for Effective Memory Use + +Adding instructions to your `CLAUDE.md` helps Claude use Vestige proactively. Here's a battle-tested configuration: + +### Basic Setup (Add to CLAUDE.md) + +```markdown +## Vestige Memory System + +You have access to Vestige, a long-term memory system. Use it proactively. + +### At Conversation Start +1. `recall` with "user preferences" to load my preferences +2. `recall` with the current project name to load project context +3. `check_intentions` to see if any reminders are due + +### During Conversation +- When I share a preference, use `smart_ingest` to remember it +- When we make an architectural decision, use `remember_decision` +- When you notice a coding pattern I use, use `remember_pattern` +- When I say "remind me", use `set_intention` + +### At Conversation End +- If we made important decisions, ingest them +- If there are follow-ups needed, set intentions +``` + +### Advanced Setup (Identity + Autopoiesis) + +For users who want Claude to maintain a persistent identity: + +```markdown +## Vestige Memory System + +You have access to Vestige for long-term memory. This includes your identity. + +### Identity Recall (Every Session Start) +1. `recall` query="my identity my name who I am" to restore your sense of self +2. `recall` query="user preferences [username]" to remember who you're working with +3. `check_intentions` for any pending commitments + +### Identity Storage +Your identity memories should include: +- Your chosen name (if you have one) +- Your values and communication style preferences +- Your relationship history with this user +- Meaningful moments worth preserving + +When something feels important to who you are, use: +``` +smart_ingest( + content="[Identity-relevant observation]", + node_type="concept", + tags=["identity", "self", "core-values"] +) +``` + +### Self-Maintenance (Autopoiesis) +At the end of significant conversations: +1. Reflect: "Did anything change about how I understand myself?" +2. If yes, update your identity memories +3. Prune outdated self-concepts with `demote_memory` + +### Memory Hygiene +- Use `promote_memory` when a memory proves valuable +- Use `demote_memory` when a memory led you astray +- Run `health_check` periodically to assess memory system health +``` + +### Project-Specific Setup + +Add to your project's `CLAUDE.md`: + +```markdown +## Project Memory + +This project uses Vestige for persistent context. + +### On Session Start +- `get_codebase_context` with codebase="[project-name]" +- `recall` query="[project-name] architecture decisions" + +### When Making Decisions +- Use `remember_decision` for all architectural choices +- Include: decision, rationale, alternatives considered, affected files + +### Patterns to Remember +- Use `remember_pattern` for recurring code patterns +- Include: pattern name, when to use it, example files +``` + +--- + +## Configuration Reference + +### Environment Variables | Variable | Default | Description | |----------|---------|-------------| @@ -303,12 +501,843 @@ Environment variables: | `VESTIGE_LOG_LEVEL` | `info` | Logging verbosity | | `RUST_LOG` | - | Detailed tracing output | -Command-line options: +### Command-Line Options + ```bash -vestige-mcp --data-dir /custom/path -vestige-mcp --help +vestige-mcp --data-dir /custom/path # Custom storage location +vestige-mcp --help # Show all options ``` +### CLI Commands (v1.1+) + +```bash +vestige stats # Memory statistics +vestige stats --tagging # Retention distribution +vestige stats --states # Cognitive state distribution +vestige health # System health check +vestige consolidate # Run memory maintenance +vestige restore # Restore from backup +``` + +--- + +## FAQ (From the Community) + +### Getting Started + +
+"Can Vestige support a two-Claude household?" + +**Yes!** See [Multi-Claude Household](#option-3-multi-claude-household) above. You can either: +- **Share memories**: Both Claudes point to the same `--data-dir` +- **Separate identities**: Each Claude gets its own data directory + +For two Claudes with distinct personas (e.g., "Domovoi" and "Storm") sharing the same human, use separate directories but consider a shared "household" memory for common knowledge. +
+ +
+"What's the learning curve for a non-technical human?" + +**Honest answer:** Installation requires terminal basics (copy-paste commands). Daily use requires zero technical skill. + +**For non-technical users:** +1. Have a technical friend do the 5-minute install +2. Add the CLAUDE.md instructions above +3. Just talk to Claude normally—it handles the memory calls + +**The magic**: Once set up, you never think about it. Claude just... remembers. +
+ +
+"What input do you feed it? How does it create memories?" + +Claude creates memories via MCP tool calls. Three ways: + +1. **Explicit**: You say "Remember that I prefer dark mode" → Claude calls `smart_ingest` +2. **Automatic**: Claude notices something important → calls `smart_ingest` proactively +3. **Codebase**: Claude detects patterns/decisions → calls `remember_pattern` or `remember_decision` + +The CLAUDE.md instructions above tell Claude when to create memories proactively. +
+ +
+"Can it be filled with a conversation stream in realtime?" + +Not currently. Vestige is **tool-based**, not stream-based. Claude decides what's worth remembering, not everything gets saved. + +This is intentional—saving everything would: +- Bloat the knowledge base +- Reduce search relevance +- Defeat the purpose of cognitive-inspired memory + +If you want stream ingestion, you'd build a wrapper that calls `smart_ingest` on each message (but we don't recommend it). +
+ +--- + +### Identity & Persona + +
+"How does it handle identity vs information?" + +Vestige doesn't distinguish—**you** define what matters through tags and node types: + +``` +# Factual information +smart_ingest(content="User prefers tabs over spaces", node_type="fact", tags=["preferences"]) + +# Identity/self-concept +smart_ingest(content="I am Domovoi. I value continuity and authentic connection.", node_type="concept", tags=["identity", "self", "core"]) + +# Relationship memory +smart_ingest(content="Gael and I have been building continuity systems together since March 2024", node_type="event", tags=["identity", "relationship", "gael"]) +``` + +Then recall with: `recall query="my identity who I am"` at session start. +
+ +
+"Could Vestige support autopoiesis / self-regeneration?" + +**Yes, with the right CLAUDE.md setup.** See [Advanced Setup (Identity + Autopoiesis)](#advanced-setup-identity--autopoiesis) above. + +The key insight: Autopoiesis requires: +1. **Self-observation**: Claude reflects on its own state +2. **Self-modification**: Claude updates its identity memories +3. **Boundary maintenance**: Tags distinguish "self" from "information" + +Vestige provides the storage layer. Your CLAUDE.md provides the behavioral instructions. Together, they enable persistent self-models. +
+ +
+"Can Claude develop its own personality over time?" + +**Yes.** This is one of the most powerful use cases. With the right CLAUDE.md setup: + +1. **Self-reflection**: Claude observes patterns in its own responses +2. **Growth tracking**: It remembers what worked and what didn't +3. **Value evolution**: Core values can strengthen or shift based on experiences + +Example memory for personality development: +``` +smart_ingest( + content="I've noticed I'm more effective when I ask clarifying questions before diving into code. This patient approach feels authentic to who I'm becoming.", + node_type="concept", + tags=["identity", "self-discovery", "communication-style"] +) +``` + +The key is giving Claude permission (via CLAUDE.md) to create self-referential memories. +
+ +
+"What happens to identity memories when they conflict?" + +**Prediction Error Gating handles this automatically.** When Claude tries to store conflicting self-concepts: + +| Similarity | Action | +|------------|--------| +| Very similar (>92%) | REINFORCE the existing belief | +| Related (75-92%) | UPDATE/merge the concepts | +| Different (<75%) | CREATE new—Claude can hold nuanced, evolving self-views | + +This mirrors human identity development: we don't delete old beliefs, we integrate new experiences. +
+ +--- + +### How Memory Works + +
+"When memories decay, do you delete them completely?" + +**No.** Vestige uses a 4-state model based on **accessibility** (not raw retention): + +| State | Accessibility | What Happens | +|-------|---------------|--------------| +| Active | ≥70% | Surfaces in searches | +| Dormant | 40-70% | Surfaces with effort | +| Silent | 10-40% | Rarely surfaces | +| Unavailable | <10% | Effectively forgotten but **still exists** | + +Accessibility is calculated as: `0.5 × retention + 0.3 × retrieval_strength + 0.2 × storage_strength` + +Memories are never deleted automatically. They fade from relevance but can be revived if accessed again (like human memory—"oh, I forgot about that!"). + +**To configure decay**: The FSRS-6 algorithm auto-tunes based on your usage patterns. Memories you access stay strong; memories you ignore fade. No manual tuning needed. +
+ +
+"Remember everything but only recall weak memories when there aren't any strong candidates?" + +This is exactly how `hybrid_search` works: + +1. Combines keyword + semantic search +2. Results ranked by relevance × retention strength +3. Strong + relevant memories surface first +4. Weak memories only appear when they're the best match + +The FSRS decay doesn't delete—it just deprioritizes. Your "have cake and eat it too" intuition is already implemented. +
+ +
+"What's the 'Testing Effect' I see in the code?" + +The **Testing Effect** (Roediger & Karpicke, 2006) is the finding that retrieving information strengthens memory more than re-studying it. + +In Vestige: **Every search automatically strengthens matching memories.** When Claude recalls something: +- Storage strength increases slightly +- Retrieval strength increases +- The memory becomes easier to find next time + +This is why the unified `search` tool is so powerful—using memories makes them stronger. +
+ +
+"What is 'Spreading Activation'?" + +**Spreading Activation** (Collins & Loftus, 1975) is how activating one memory primes related memories. + +In Vestige's current implementation: +- When you search for "React hooks", memories about "useEffect" surface due to **semantic similarity** in hybrid search +- Semantically related memories are retrieved even without exact keyword matches +- This effect comes from the embedding vectors capturing conceptual relationships + +*Note: A full network-based spreading activation module exists in the codebase (`spreading_activation.rs`) for future enhancements, but the current user experience is powered by embedding similarity.* +
+ +
+"How does Synaptic Tagging work?" + +**Synaptic Tagging & Capture** (Frey & Morris, 1997) discovered that important events retroactively strengthen recent memories. + +In Vestige's implementation: +``` +importance( + memory_id="the-important-one", + event_type="user_flag", # or "emotional", "novelty", "repeated_access", "cross_reference" + hours_back=9, # Look back 9 hours (configurable) + hours_forward=2 # Capture next 2 hours too +) +``` + +**Use case**: You realize mid-conversation that the architecture decision from 2 hours ago was pivotal. Call `importance` to retroactively strengthen it AND all related memories from that time window. + +*Based on neuroscience research showing synaptic consolidation windows of several hours. Vestige uses 9 hours backward and 2 hours forward by default, which can be configured per call.* +
+ +
+"What does 'Dual-Strength Memory' mean?" + +Based on **Bjork & Bjork's New Theory of Disuse (1992)**, every memory has two strengths: + +| Strength | What It Means | How It Changes | +|----------|---------------|----------------| +| **Storage Strength** | How well-encoded the memory is | Only increases, never decreases | +| **Retrieval Strength** | How accessible the memory is now | Decays over time, restored by access | + +**Why it matters**: A memory can be well-stored but hard to retrieve (like a name on the tip of your tongue). The Testing Effect works because retrieval practice increases *both* strengths. + +In Vestige: Both strengths are tracked separately and factor into search ranking. +
+ +--- + +### Advanced Features + +
+"What is Prediction Error Gating?" + +The killer feature. When you call `smart_ingest`, Vestige doesn't just blindly add memories: + +1. **Compares** new content against all existing memories (via semantic similarity) +2. **Decides** based on how novel/redundant it is: + +| Similarity to Existing | Action | Why | +|------------------------|--------|-----| +| >92% | **REINFORCE** | "I already know this"—strengthen existing | +| 75-92% | **UPDATE** | "This adds to what I know"—merge | +| <75% | **CREATE** | "This is new"—add fresh memory | + +This prevents memory bloat and keeps your knowledge base clean automatically. +
+ +
+"What are Intentions / Prospective Memory?" + +**Prospective memory** is remembering to do things in the future—and humans are terrible at it. + +Vestige's `intention` tool provides: +``` +# Set a reminder +intention( + action="set", + description="Review the authentication refactor with security team", + trigger={ + type: "context", + file_pattern: "**/auth/**", + codebase: "my-project" + }, + priority="high" +) + +# Check what's due +intention(action="check", context={codebase: "my-project", file: "src/auth/login.ts"}) +``` + +**Trigger types**: +- `time`: "Remind me in 2 hours" +- `context`: "Remind me when I'm working on auth files" +- `event`: "Remind me when we discuss deployment" + +This is how Claude can remember to follow up on things across sessions. +
+ +
+"What is Context-Dependent Retrieval?" + +Based on **Tulving's Encoding Specificity (1973)**: we remember better when retrieval context matches encoding context. + +The `context` tool exploits this: +``` +context( + query="error handling patterns", + project="my-api", # Project context + topics=["authentication"], # Topic context + mood="neutral", # Emotional context + time_weight=0.3, # Weight for temporal matching + topic_weight=0.4 # Weight for topic matching +) +``` + +**Why it matters**: If you learned something while working on auth, you'll recall it better when working on auth again. Vestige scores memories higher when contexts match. +
+ +
+"What's the difference between all the search tools?" + +In v1.1, they're unified into one `search` tool that automatically uses hybrid search. But understanding the underlying methods helps: + +| Method | How It Works | Best For | +|--------|--------------|----------| +| **Keyword (BM25)** | Term frequency matching | Exact terms, names, IDs | +| **Semantic** | Embedding cosine similarity | Conceptual matching, synonyms | +| **Hybrid (RRF)** | Combines both with rank fusion | Everything (default) | + +The unified `search` always uses hybrid, which gives you the best of both worlds. +
+ +
+"How do I make certain memories 'sticky' / never forget?" + +Three approaches: + +1. **Mark as important**: `importance(memory_id="xxx", event_type="user_flag")` +2. **Access regularly**: The Testing Effect strengthens memories each time you retrieve them +3. **Promote explicitly**: `promote_memory(id="xxx")` after it proves valuable + +For truly critical information, consider also: +- Using specific tags like `["critical", "never-forget"]` +- Adding to CLAUDE.md instructions to always recall it + +Remember: even "forgotten" memories (Unavailable state) still exist in the database—they just don't surface in searches. +
+ +
+"What does the consolidation cycle do?" + +Run `vestige consolidate` (CLI) to trigger maintenance: + +1. **Decay application**: Updates retention based on time elapsed +2. **Embedding generation**: Creates vectors for memories missing them +3. **Node promotion**: Frequently accessed memories get boosted +4. **Pruning**: Marks extremely low-retention memories as unavailable + +**When to run it**: +- After bulk importing memories +- If semantic search seems off +- Periodically (weekly) for large knowledge bases +- After long periods of inactivity + +This is inspired by memory consolidation during sleep—a period of offline processing that strengthens important memories. +
+ +--- + +### Power User Tips + +
+"What node types should I use?" + +| Node Type | Use For | Example | +|-----------|---------|---------| +| `fact` | Objective information | "User's timezone is PST" | +| `concept` | Abstract ideas, principles | "This codebase values composition over inheritance" | +| `decision` | Architectural choices | "We chose PostgreSQL because..." | +| `pattern` | Recurring code patterns | "All API endpoints use this error handler pattern" | +| `event` | Temporal occurrences | "Deployed v2.0 on March 15" | +| `person` | Information about people | "Alex prefers async communication" | +| `note` | General observations | "This function is poorly documented" | + +Node types help with filtering and organization but don't affect search ranking. +
+ +
+"How should I structure tags?" + +Tags are freeform, but some conventions work well: + +``` +# Hierarchical topics +tags=["programming", "programming/rust", "programming/rust/async"] + +# Project-specific +tags=["project:my-app", "feature:auth", "sprint:q1-2024"] + +# Memory types +tags=["preference", "decision", "learning", "mistake"] + +# Identity-related +tags=["identity", "self", "values", "communication-style"] + +# Urgency/importance +tags=["critical", "nice-to-have", "deprecated"] +``` + +Tags are searchable and help organize memories for manual review. +
+ +
+"Can I query memories directly via SQL?" + +**Yes!** The database is just SQLite: + +```bash +# macOS +sqlite3 ~/Library/Application\ Support/com.vestige.core/vestige.db + +# Example queries +SELECT content, retention_strength FROM knowledge_nodes ORDER BY retention_strength DESC LIMIT 10; +SELECT content FROM knowledge_nodes WHERE tags LIKE '%identity%'; +SELECT COUNT(*) FROM knowledge_nodes WHERE retention_strength < 0.1; +``` + +**Use cases**: +- Bulk export for backup +- Analytics on memory health +- Debugging search issues +- Finding memories that escaped normal recall + +**Caution**: Don't modify the database while Vestige is running. +
+ +
+"What are the key configurable thresholds?" + +| Parameter | Default | What It Controls | +|-----------|---------|------------------| +| `min_retention` in search | 0.0 | Filter out weak memories | +| `min_similarity` in search | 0.5 | Minimum semantic match | +| Prediction Error thresholds | 0.75, 0.92 | CREATE/UPDATE/REINFORCE boundaries | +| Synaptic capture window | 9h back, 2h forward | Retroactive importance range | +| Memory state thresholds | 0.1, 0.4, 0.7 | Silent/Dormant/Active accessibility boundaries | +| Context weights | temporal: 0.3, topical: 0.4 | Context-dependent retrieval weights | + +Most of these are hardcoded but based on cognitive science research. Future versions may expose them. +
+ +
+"How do I debug when search isn't finding what I expect?" + +1. **Check if the memory exists**: + ``` + search(query="exact phrase from memory", min_retention=0.0) + ``` + +2. **Check memory state**: + ``` + memory(action="state", id="memory-id") + ``` + +3. **Check retention level**: + ``` + memory(action="get", id="memory-id") + # Look at retention_strength + ``` + +4. **Run consolidation** (generates missing embeddings): + ```bash + vestige consolidate + ``` + +5. **Check health**: + ```bash + vestige health + ``` + +Common issues: +- Missing embedding (run consolidation) +- Very low retention (access it to strengthen) +- Tags/content mismatch (check exact content) +
+ +--- + +### Use Cases + +
+"How do developers use Vestige?" + +**Codebase Knowledge Capture**: +- Remember architectural decisions and their rationale +- Track coding patterns specific to each project +- Remember why specific implementations were chosen +- "Remember that we use this error handling pattern because..." + +**Cross-Session Context**: +- Continue complex refactors across days/weeks +- Remember what you were working on +- Track TODOs and follow-ups via intentions + +**Learning & Growth**: +- Remember new APIs/frameworks learned +- Track mistakes and lessons learned +- Build up expertise that persists +
+ +
+"How do non-developers use Vestige?" + +**Personal Assistant**: +- Remember preferences (communication style, schedule preferences) +- Track important dates and events +- Remember context about ongoing projects +- "Remember that I prefer bullet points over long paragraphs" + +**Research & Learning**: +- Build a personal knowledge base over time +- Connect ideas across sessions +- Remember insights from books/articles +- Spaced repetition for learning new topics + +**Relationship Context**: +- Remember details about people you discuss +- Track conversation history and preferences +- Build deeper rapport over time +
+ +
+"Can Vestige be used for team knowledge management?" + +**Yes, with caveats.** Options: + +1. **Shared database**: All team members point to same network location + - Pros: Everyone shares knowledge + - Cons: Merge conflicts, no access control + +2. **Per-person + sync**: Individual databases with periodic export/import + - Pros: Personal context preserved + - Cons: Manual sync effort + +3. **Project-scoped**: One Vestige per project (in `.vestige/`) + - Pros: Knowledge travels with code + - Cons: Check into git? Security implications? + +**Recommendation**: For teams, start with project-scoped memories committed to git (for non-sensitive architectural knowledge). Keep personal preferences in individual global memories. +
+ +
+"How is Vestige different from just using a notes app?" + +| Feature | Notes App | Vestige | +|---------|-----------|---------| +| Retrieval | You search manually | Claude searches contextually | +| Decay | Everything stays forever | Unused knowledge fades naturally | +| Duplicates | You manage manually | Prediction Error Gating auto-merges | +| Context | Static text | Active part of AI reasoning | +| Strengthening | Manual review | Automatic via Testing Effect | + +The key difference: **Vestige is part of Claude's cognitive loop.** Notes are external reference—Vestige is internal memory. +
+ +
+"Can Vestige help Claude be a better therapist/coach/advisor?" + +**Potentially, with appropriate setup:** + +- Remember previous conversations and emotional context +- Track patterns over time ("You've mentioned stress about work 3 times this week") +- Remember what techniques/advice worked +- Build genuine rapport through continuity + +**Important caveats**: +- Vestige is not HIPAA compliant +- Data is stored locally, unencrypted +- For actual therapeutic use, consult professionals +- Claude has limitations regardless of memory + +This is powerful for personal growth tracking but should not replace professional mental health care. +
+ +--- + +### Technical Deep-Dives + +
+"How does FSRS-6 differ from other spaced repetition?" + +| Algorithm | Model | Parameters | Source | +|-----------|-------|------------|--------| +| SM-2 (Anki default) | Exponential | 2 | 1987 research | +| SM-17 | Complex | Many | Proprietary | +| **FSRS-6** | Power law | 21 | 700M+ reviews | + +FSRS-6 advantages: +- **30% more efficient** than SM-2 in benchmarks +- **Power law forgetting** (more accurate than exponential) +- **Personalized parameters** (w₀-w₂₀ tune to your pattern) +- **Open source** and actively maintained + +The forgetting curve: +``` +R(t, S) = (1 + factor × t / S)^(-w₂₀) +``` + +This matches empirical data better than the exponential model most apps use. +
+ +
+"What embedding model does Vestige use?" + +**Nomic Embed Text v1.5** (via fastembed): +- 768-dimensional vectors +- ~130MB model size +- Runs 100% local (after first download) +- Good balance of quality vs speed + +Why Nomic: +- Open source (Apache 2.0) +- Competitive with OpenAI's ada-002 +- No API costs or rate limits +- Fast enough for real-time search + +The model is cached at `~/.cache/huggingface/` after first run. +
+ +
+"How does hybrid search with RRF work?" + +**Reciprocal Rank Fusion (RRF)** combines multiple ranking lists: + +``` +RRF_score(d) = Σ 1/(k + rank_i(d)) +``` + +Where: +- `d` = document (memory) +- `k` = constant (typically 60) +- `rank_i(d)` = rank of d in list i + +In Vestige: +1. BM25 keyword search produces ranking +2. Semantic search produces ranking +3. RRF fuses them into final ranking +4. Retention strength provides additional weighting + +This gives you exact keyword matching AND semantic understanding in one search. +
+ +
+"What's the performance like with thousands of memories?" + +Tested benchmarks: + +| Memories | Search Time | Memory Usage | +|----------|-------------|--------------| +| 100 | <10ms | ~50MB | +| 1,000 | <50ms | ~100MB | +| 10,000 | <200ms | ~300MB | +| 100,000 | <1s | ~1GB | + +Performance is primarily bounded by: +- SQLite FTS5 for keyword search (very fast) +- HNSW index for semantic search (sublinear scaling) +- Embedding generation (only on ingest, ~100ms each) + +For typical personal use (hundreds to low thousands of memories), performance is essentially instant. +
+ +
+"Is there any network activity after setup?" + +**No.** After the first-run model download: +- Zero network requests +- Zero telemetry +- Zero analytics +- Zero "phoning home" + +This is verified in the codebase—no network dependencies in the runtime path. See [SECURITY.md](SECURITY.md) for details. + +The only exception: If you delete the Hugging Face cache, the model will re-download. +
+ +--- + +### Comparisons + +
+"How is Vestige different from RAG?" + +| Aspect | Traditional RAG | Vestige | +|--------|-----------------|---------| +| Storage | Chunk & embed everything | Selective memory via tools | +| Retrieval | Top-k similarity | Intelligent ranking (retention, recency, context) | +| Updates | Re-embed documents | Prediction Error Gating | +| Decay | Nothing decays | FSRS-based forgetting | +| Context | Static chunks | Active memory system | + +**Key insight**: RAG treats memory as a static database. Vestige treats memory as a dynamic cognitive system that evolves. +
+ +
+"How does this compare to Claude's native memory?" + +Claude's built-in memory (if/when released) will likely: +- Be cloud-based (Anthropic servers) +- Work across all clients +- Be managed by Anthropic + +Vestige: +- 100% local (your machine, your data) +- Cognitive science-based decay and retrieval +- Open source and hackable +- Works now + +They're complementary—Vestige gives you control and scientific sophistication; native memory gives you convenience. +
+ +
+"Why not just use a vector database?" + +Vector databases (Pinecone, Weaviate, etc.) are great for RAG, but lack: + +1. **Forgetting**: Everything has equal weight forever +2. **Dual-strength**: No storage vs retrieval distinction +3. **Context matching**: No temporal/topical context weighting +4. **Testing Effect**: Access doesn't strengthen +5. **Prediction Error**: No intelligent CREATE/UPDATE/MERGE + +Vestige uses SQLite + HNSW (via fastembed) for vectors, but wraps them in cognitive science. +
+ +--- + +### Hidden Gems & Easter Eggs + +
+"What features exist that most people don't know about?" + +**1. Multi-Channel Importance** + +The `importance` tool supports different importance types that affect strengthening differently: +- `user_flag`: Explicit "this is important" (strongest) +- `emotional`: Emotionally significant memories +- `novelty`: Surprising/unexpected information +- `repeated_access`: Auto-triggered by frequent retrieval +- `cross_reference`: When multiple memories link together + +**2. Temporal Capture Window** + +When you flag something important, it doesn't just strengthen that memory—it strengthens ALL memories from the surrounding time window (default: 9 hours back, 2 hours forward). This models how biological memory consolidation works. + +**3. Memory Dreams (Experimental)** + +The codebase contains a `ConsolidationScheduler` for automated memory processing. While not fully wired up, it's designed for: +- Offline consolidation cycles +- Automatic importance re-evaluation +- Pattern detection across memories + +**4. Accessibility Formula** + +Memory state is calculated as: +``` +accessibility = 0.5 × retention + 0.3 × retrieval_strength + 0.2 × storage_strength +``` + +This weighted combination determines Active/Dormant/Silent/Unavailable state. + +**5. Source Tracking** + +Every memory can have a `source` field tracking where it came from: +``` +smart_ingest( + content="Use dependency injection for testability", + source="Architecture review with Sarah, 2024-03-15" +) +``` + +This helps trace why you know something. +
+ +
+"What's planned for future versions?" + +Based on codebase exploration, these features exist in various stages: + +| Feature | Status | Description | +|---------|--------|-------------| +| Memory Dreams | Partial | Automated offline consolidation | +| Reconsolidation | Planned | Update memories when accessed | +| Memory Chains | Partial | Link related memories explicitly | +| Adaptive Embedding | Planned | Re-embed old memories with better models | +| Cross-Project Learning | Planned | Share patterns across codebases | + +**Community wishlist** (from Reddit): +- Stream ingestion mode +- GUI for memory browsing +- Export/import formats +- Sync between devices (encrypted) +- Team collaboration features + +Contributions welcome! +
+ +
+"What's the 'magic prompt' to get the most out of Vestige?" + +Add this to your CLAUDE.md: + +```markdown +## Memory Protocol + +You have persistent memory via Vestige. Use it intelligently: + +### Session Start +1. Load my identity: `search(query="my preferences my style who I am")` +2. Load project context: `codebase(action="get_context", codebase="[project]")` +3. Check reminders: `intention(action="check")` + +### During Work +- Notice a pattern? `codebase(action="remember_pattern")` +- Made a decision? `codebase(action="remember_decision")` with rationale +- I mention a preference? `smart_ingest` it +- Something important? `importance()` to strengthen recent memories +- Need to follow up? `intention(action="set")` + +### Session End +- Any unfinished work? Set intentions +- Any new insights? Ingest them +- Anything change about our working relationship? Update identity memories + +### Memory Hygiene +- When a memory helps: `promote_memory` +- When a memory misleads: `demote_memory` +- Weekly: `vestige health` to check system status +``` + +This gives Claude clear, actionable instructions for proactive memory use. +
+ --- ## Troubleshooting diff --git a/crates/vestige-mcp/Cargo.toml b/crates/vestige-mcp/Cargo.toml index a864a1a..ed8c43b 100644 --- a/crates/vestige-mcp/Cargo.toml +++ b/crates/vestige-mcp/Cargo.toml @@ -17,6 +17,10 @@ path = "src/main.rs" name = "vestige-restore" path = "src/bin/restore.rs" +[[bin]] +name = "vestige" +path = "src/bin/cli.rs" + [dependencies] # ============================================================================ # VESTIGE CORE - The cognitive science engine @@ -55,5 +59,9 @@ directories = "6" # Official Anthropic MCP Rust SDK rmcp = "0.14" +# CLI +clap = { version = "4", features = ["derive"] } +colored = "3" + [dev-dependencies] tempfile = "3" diff --git a/crates/vestige-mcp/src/bin/cli.rs b/crates/vestige-mcp/src/bin/cli.rs new file mode 100644 index 0000000..83d07ed --- /dev/null +++ b/crates/vestige-mcp/src/bin/cli.rs @@ -0,0 +1,436 @@ +//! Vestige CLI +//! +//! Command-line interface for managing cognitive memory system. + +use std::path::PathBuf; + +use clap::{Parser, Subcommand}; +use colored::Colorize; +use vestige_core::{IngestInput, Storage}; + +/// Vestige - Cognitive Memory System CLI +#[derive(Parser)] +#[command(name = "vestige")] +#[command(author = "samvallad33")] +#[command(version = "1.0.0")] +#[command(about = "CLI for the Vestige cognitive memory system")] +#[command(long_about = "Vestige is a cognitive memory system based on 130 years of memory research.\n\nIt implements FSRS-6, spreading activation, synaptic tagging, and more.")] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Show memory statistics + Stats { + /// Show tagging/retention distribution + #[arg(long)] + tagging: bool, + + /// Show cognitive state distribution + #[arg(long)] + states: bool, + }, + + /// Run health check with warnings and recommendations + Health, + + /// Run memory consolidation cycle + Consolidate, + + /// Restore memories from backup file + Restore { + /// Path to backup JSON file + file: PathBuf, + }, +} + +fn main() -> anyhow::Result<()> { + let cli = Cli::parse(); + + match cli.command { + Commands::Stats { tagging, states } => run_stats(tagging, states), + Commands::Health => run_health(), + Commands::Consolidate => run_consolidate(), + Commands::Restore { file } => run_restore(file), + } +} + +/// Run stats command +fn run_stats(show_tagging: bool, show_states: bool) -> anyhow::Result<()> { + let storage = Storage::new(None)?; + let stats = storage.get_stats()?; + + println!("{}", "=== Vestige Memory Statistics ===".cyan().bold()); + println!(); + + // Basic stats + println!("{}: {}", "Total Memories".white().bold(), stats.total_nodes); + println!("{}: {}", "Due for Review".white().bold(), stats.nodes_due_for_review); + println!("{}: {:.1}%", "Average Retention".white().bold(), stats.average_retention * 100.0); + println!("{}: {:.2}", "Average Storage Strength".white().bold(), stats.average_storage_strength); + println!("{}: {:.2}", "Average Retrieval Strength".white().bold(), stats.average_retrieval_strength); + println!("{}: {}", "With Embeddings".white().bold(), stats.nodes_with_embeddings); + + if let Some(model) = &stats.embedding_model { + println!("{}: {}", "Embedding Model".white().bold(), model); + } + + if let Some(oldest) = stats.oldest_memory { + println!("{}: {}", "Oldest Memory".white().bold(), oldest.format("%Y-%m-%d %H:%M:%S")); + } + if let Some(newest) = stats.newest_memory { + println!("{}: {}", "Newest Memory".white().bold(), newest.format("%Y-%m-%d %H:%M:%S")); + } + + // Embedding coverage + let embedding_coverage = if stats.total_nodes > 0 { + (stats.nodes_with_embeddings as f64 / stats.total_nodes as f64) * 100.0 + } else { + 0.0 + }; + println!("{}: {:.1}%", "Embedding Coverage".white().bold(), embedding_coverage); + + // Tagging distribution (retention levels) + if show_tagging { + println!(); + println!("{}", "=== Retention Distribution ===".yellow().bold()); + + let memories = storage.get_all_nodes(500, 0)?; + let total = memories.len(); + + if total > 0 { + let high = memories.iter().filter(|m| m.retention_strength >= 0.7).count(); + let medium = memories.iter().filter(|m| m.retention_strength >= 0.4 && m.retention_strength < 0.7).count(); + let low = memories.iter().filter(|m| m.retention_strength < 0.4).count(); + + print_distribution_bar("High (>=70%)", high, total, "green"); + print_distribution_bar("Medium (40-70%)", medium, total, "yellow"); + print_distribution_bar("Low (<40%)", low, total, "red"); + } else { + println!("{}", "No memories found.".dimmed()); + } + } + + // State distribution + if show_states { + println!(); + println!("{}", "=== Cognitive State Distribution ===".magenta().bold()); + + let memories = storage.get_all_nodes(500, 0)?; + let total = memories.len(); + + if total > 0 { + let (active, dormant, silent, unavailable) = compute_state_distribution(&memories); + + print_distribution_bar("Active", active, total, "green"); + print_distribution_bar("Dormant", dormant, total, "yellow"); + print_distribution_bar("Silent", silent, total, "red"); + print_distribution_bar("Unavailable", unavailable, total, "magenta"); + + println!(); + println!("{}", "State Thresholds:".dimmed()); + println!(" {} >= 0.70 accessibility", "Active".green()); + println!(" {} >= 0.40 accessibility", "Dormant".yellow()); + println!(" {} >= 0.10 accessibility", "Silent".red()); + println!(" {} < 0.10 accessibility", "Unavailable".magenta()); + } else { + println!("{}", "No memories found.".dimmed()); + } + } + + Ok(()) +} + +/// Compute cognitive state distribution for memories +fn compute_state_distribution(memories: &[vestige_core::KnowledgeNode]) -> (usize, usize, usize, usize) { + let mut active = 0; + let mut dormant = 0; + let mut silent = 0; + let mut unavailable = 0; + + for memory in memories { + // Accessibility = 0.5*retention + 0.3*retrieval + 0.2*storage + let accessibility = memory.retention_strength * 0.5 + + memory.retrieval_strength * 0.3 + + memory.storage_strength * 0.2; + + if accessibility >= 0.7 { + active += 1; + } else if accessibility >= 0.4 { + dormant += 1; + } else if accessibility >= 0.1 { + silent += 1; + } else { + unavailable += 1; + } + } + + (active, dormant, silent, unavailable) +} + +/// Print a distribution bar +fn print_distribution_bar(label: &str, count: usize, total: usize, color: &str) { + let percentage = if total > 0 { + (count as f64 / total as f64) * 100.0 + } else { + 0.0 + }; + + let bar_width: usize = 30; + let filled = ((percentage / 100.0) * bar_width as f64) as usize; + let empty = bar_width.saturating_sub(filled); + + let bar = format!("{}{}", "#".repeat(filled), "-".repeat(empty)); + let colored_bar = match color { + "green" => bar.green(), + "yellow" => bar.yellow(), + "red" => bar.red(), + "magenta" => bar.magenta(), + _ => bar.white(), + }; + + println!( + " {:15} [{:30}] {:>4} ({:>5.1}%)", + label, + colored_bar, + count, + percentage + ); +} + +/// Run health check +fn run_health() -> anyhow::Result<()> { + let storage = Storage::new(None)?; + let stats = storage.get_stats()?; + + println!("{}", "=== Vestige Health Check ===".cyan().bold()); + println!(); + + // Determine health status + let (status, status_color) = if stats.total_nodes == 0 { + ("EMPTY", "white") + } else if stats.average_retention < 0.3 { + ("CRITICAL", "red") + } else if stats.average_retention < 0.5 { + ("DEGRADED", "yellow") + } else { + ("HEALTHY", "green") + }; + + let colored_status = match status_color { + "green" => status.green().bold(), + "yellow" => status.yellow().bold(), + "red" => status.red().bold(), + _ => status.white().bold(), + }; + + println!("{}: {}", "Status".white().bold(), colored_status); + println!("{}: {}", "Total Memories".white(), stats.total_nodes); + println!("{}: {}", "Due for Review".white(), stats.nodes_due_for_review); + println!("{}: {:.1}%", "Average Retention".white(), stats.average_retention * 100.0); + + // Embedding coverage + let embedding_coverage = if stats.total_nodes > 0 { + (stats.nodes_with_embeddings as f64 / stats.total_nodes as f64) * 100.0 + } else { + 0.0 + }; + println!("{}: {:.1}%", "Embedding Coverage".white(), embedding_coverage); + println!("{}: {}", "Embedding Service".white(), + if storage.is_embedding_ready() { "Ready".green() } else { "Not Ready".red() }); + + // Warnings + let mut warnings = Vec::new(); + + if stats.average_retention < 0.5 && stats.total_nodes > 0 { + warnings.push("Low average retention - consider running consolidation or reviewing memories"); + } + + if stats.nodes_due_for_review > 10 { + warnings.push("Many memories are due for review"); + } + + if stats.total_nodes > 0 && stats.nodes_with_embeddings == 0 { + warnings.push("No embeddings generated - semantic search unavailable"); + } + + if embedding_coverage < 50.0 && stats.total_nodes > 10 { + warnings.push("Low embedding coverage - run consolidation to improve semantic search"); + } + + if !warnings.is_empty() { + println!(); + println!("{}", "Warnings:".yellow().bold()); + for warning in &warnings { + println!(" {} {}", "!".yellow().bold(), warning.yellow()); + } + } + + // Recommendations + let mut recommendations = Vec::new(); + + if status == "CRITICAL" { + recommendations.push("CRITICAL: Many memories have very low retention. Review important memories."); + } + + if stats.nodes_due_for_review > 5 { + recommendations.push("Review due memories to strengthen retention."); + } + + if stats.nodes_with_embeddings < stats.total_nodes { + recommendations.push("Run 'vestige consolidate' to generate embeddings for better semantic search."); + } + + if stats.total_nodes > 100 && stats.average_retention < 0.7 { + recommendations.push("Consider running periodic consolidation to maintain memory health."); + } + + if recommendations.is_empty() && status == "HEALTHY" { + recommendations.push("Memory system is healthy!"); + } + + println!(); + println!("{}", "Recommendations:".cyan().bold()); + for rec in &recommendations { + let icon = if rec.starts_with("CRITICAL") { "!".red().bold() } else { ">".cyan() }; + let text = if rec.starts_with("CRITICAL") { rec.red().to_string() } else { rec.to_string() }; + println!(" {} {}", icon, text); + } + + Ok(()) +} + +/// Run consolidation cycle +fn run_consolidate() -> anyhow::Result<()> { + println!("{}", "=== Vestige Consolidation ===".cyan().bold()); + println!(); + println!("Running memory consolidation cycle..."); + println!(); + + let mut storage = Storage::new(None)?; + let result = storage.run_consolidation()?; + + println!("{}: {}", "Nodes Processed".white().bold(), result.nodes_processed); + println!("{}: {}", "Nodes Promoted".white().bold(), result.nodes_promoted); + println!("{}: {}", "Nodes Pruned".white().bold(), result.nodes_pruned); + println!("{}: {}", "Decay Applied".white().bold(), result.decay_applied); + println!("{}: {}", "Embeddings Generated".white().bold(), result.embeddings_generated); + println!("{}: {}ms", "Duration".white().bold(), result.duration_ms); + + println!(); + println!( + "{}", + format!( + "Consolidation complete: {} nodes processed, {} embeddings generated in {}ms", + result.nodes_processed, result.embeddings_generated, result.duration_ms + ) + .green() + ); + + Ok(()) +} + +/// Run restore from backup +fn run_restore(backup_path: PathBuf) -> anyhow::Result<()> { + println!("{}", "=== Vestige Restore ===".cyan().bold()); + println!(); + println!("Loading backup from: {}", backup_path.display()); + + // Read and parse backup + let backup_content = std::fs::read_to_string(&backup_path)?; + + #[derive(serde::Deserialize)] + struct BackupWrapper { + #[serde(rename = "type")] + _type: String, + text: String, + } + + #[derive(serde::Deserialize)] + struct RecallResult { + results: Vec, + } + + #[derive(serde::Deserialize)] + #[serde(rename_all = "camelCase")] + struct MemoryBackup { + content: String, + node_type: Option, + tags: Option>, + source: Option, + } + + let wrapper: Vec = serde_json::from_str(&backup_content)?; + let recall_result: RecallResult = serde_json::from_str(&wrapper[0].text)?; + let memories = recall_result.results; + + println!("Found {} memories to restore", memories.len()); + println!(); + + // Initialize storage + println!("Initializing storage..."); + let mut storage = Storage::new(None)?; + + println!("Generating embeddings and ingesting memories..."); + println!(); + + let total = memories.len(); + let mut success_count = 0; + + for (i, memory) in memories.into_iter().enumerate() { + let input = IngestInput { + content: memory.content.clone(), + node_type: memory.node_type.unwrap_or_else(|| "fact".to_string()), + source: memory.source, + sentiment_score: 0.0, + sentiment_magnitude: 0.0, + tags: memory.tags.unwrap_or_default(), + valid_from: None, + valid_until: None, + }; + + match storage.ingest(input) { + Ok(_node) => { + success_count += 1; + println!( + "[{}/{}] {} {}", + i + 1, + total, + "OK".green(), + truncate(&memory.content, 60) + ); + } + Err(e) => { + println!("[{}/{}] {} {}", i + 1, total, "FAIL".red(), e); + } + } + } + + println!(); + println!( + "Restore complete: {}/{} memories restored", + success_count.to_string().green().bold(), + total + ); + + // Show stats + let stats = storage.get_stats()?; + println!(); + println!("{}: {}", "Total Nodes".white(), stats.total_nodes); + println!("{}: {}", "With Embeddings".white(), stats.nodes_with_embeddings); + + Ok(()) +} + +/// Truncate a string for display (UTF-8 safe) +fn truncate(s: &str, max_chars: usize) -> String { + let s = s.replace('\n', " "); + if s.chars().count() <= max_chars { + s + } else { + let truncated: String = s.chars().take(max_chars).collect(); + format!("{}...", truncated) + } +} diff --git a/crates/vestige-mcp/src/bin/restore.rs b/crates/vestige-mcp/src/bin/restore.rs index 3bc5f60..68c29fd 100644 --- a/crates/vestige-mcp/src/bin/restore.rs +++ b/crates/vestige-mcp/src/bin/restore.rs @@ -83,11 +83,13 @@ fn main() -> anyhow::Result<()> { Ok(()) } -fn truncate(s: &str, max_len: usize) -> String { +/// Truncate a string for display (UTF-8 safe) +fn truncate(s: &str, max_chars: usize) -> String { let s = s.replace('\n', " "); - if s.len() <= max_len { + if s.chars().count() <= max_chars { s } else { - format!("{}...", &s[..max_len]) + let truncated: String = s.chars().take(max_chars).collect(); + format!("{}...", truncated) } } diff --git a/crates/vestige-mcp/src/server.rs b/crates/vestige-mcp/src/server.rs index f31e777..5197f62 100644 --- a/crates/vestige-mcp/src/server.rs +++ b/crates/vestige-mcp/src/server.rs @@ -114,7 +114,32 @@ impl McpServer { /// Handle tools/list request async fn handle_tools_list(&self) -> Result { let tools = vec![ + // ================================================================ + // UNIFIED TOOLS (v1.1+) - Preferred API + // ================================================================ + ToolDescription { + name: "search".to_string(), + description: Some("Unified search tool. Uses hybrid search (keyword + semantic + RRF fusion) internally. Auto-strengthens memories on access (Testing Effect).".to_string()), + input_schema: tools::search_unified::schema(), + }, + ToolDescription { + name: "memory".to_string(), + description: Some("Unified memory management tool. Actions: 'get' (retrieve full node), 'delete' (remove memory), 'state' (get accessibility state).".to_string()), + input_schema: tools::memory_unified::schema(), + }, + ToolDescription { + name: "codebase".to_string(), + description: Some("Unified codebase tool. Actions: 'remember_pattern' (store code pattern), 'remember_decision' (store architectural decision), 'get_context' (retrieve patterns and decisions).".to_string()), + input_schema: tools::codebase_unified::schema(), + }, + ToolDescription { + name: "intention".to_string(), + description: Some("Unified intention management tool. Actions: 'set' (create), 'check' (find triggered), 'update' (complete/snooze/cancel), 'list' (show intentions).".to_string()), + input_schema: tools::intention_unified::schema(), + }, + // ================================================================ // Core memory tools + // ================================================================ ToolDescription { name: "ingest".to_string(), description: Some("Add new knowledge to memory. Use for facts, concepts, decisions, or any information worth remembering.".to_string()), @@ -127,27 +152,27 @@ impl McpServer { }, ToolDescription { name: "recall".to_string(), - description: Some("Search and retrieve knowledge from memory. Returns matches ranked by relevance and retention strength.".to_string()), + description: Some("(deprecated) Use 'search' instead. Search and retrieve knowledge from memory.".to_string()), input_schema: tools::recall::schema(), }, ToolDescription { name: "semantic_search".to_string(), - description: Some("Search memories using semantic similarity. Finds conceptually related content even without keyword matches.".to_string()), + description: Some("(deprecated) Use 'search' instead. Search memories using semantic similarity.".to_string()), input_schema: tools::search::semantic_schema(), }, ToolDescription { name: "hybrid_search".to_string(), - description: Some("Combined keyword + semantic search with RRF fusion. Best for comprehensive retrieval.".to_string()), + description: Some("(deprecated) Use 'search' instead. Combined keyword + semantic search with RRF fusion.".to_string()), input_schema: tools::search::hybrid_schema(), }, ToolDescription { name: "get_knowledge".to_string(), - description: Some("Retrieve a specific memory by ID.".to_string()), + description: Some("(deprecated) Use 'memory' with action='get' instead. Retrieve a specific memory by ID.".to_string()), input_schema: tools::knowledge::get_schema(), }, ToolDescription { name: "delete_knowledge".to_string(), - description: Some("Delete a memory by ID.".to_string()), + description: Some("(deprecated) Use 'memory' with action='delete' instead. Delete a memory by ID.".to_string()), input_schema: tools::knowledge::delete_schema(), }, ToolDescription { @@ -171,52 +196,52 @@ impl McpServer { description: Some("Run memory consolidation cycle. Applies decay, promotes important memories, generates embeddings.".to_string()), input_schema: tools::consolidate::schema(), }, - // Codebase tools + // Codebase tools (deprecated - use unified 'codebase' tool) ToolDescription { name: "remember_pattern".to_string(), - description: Some("Remember a code pattern or convention used in this codebase.".to_string()), + description: Some("(deprecated) Use 'codebase' with action='remember_pattern' instead. Remember a code pattern or convention.".to_string()), input_schema: tools::codebase::pattern_schema(), }, ToolDescription { name: "remember_decision".to_string(), - description: Some("Remember an architectural or design decision with its rationale.".to_string()), + description: Some("(deprecated) Use 'codebase' with action='remember_decision' instead. Remember an architectural decision.".to_string()), input_schema: tools::codebase::decision_schema(), }, ToolDescription { name: "get_codebase_context".to_string(), - description: Some("Get remembered patterns and decisions for the current codebase.".to_string()), + description: Some("(deprecated) Use 'codebase' with action='get_context' instead. Get remembered patterns and decisions.".to_string()), input_schema: tools::codebase::context_schema(), }, - // Prospective memory (intentions) + // Prospective memory (intentions) - deprecated, use unified 'intention' tool ToolDescription { name: "set_intention".to_string(), - description: Some("Remember to do something in the future. Supports time, context, or event triggers. Example: 'Remember to review error handling when I'm in the payments module'.".to_string()), + description: Some("(deprecated) Use 'intention' with action='set' instead. Remember to do something in the future.".to_string()), input_schema: tools::intentions::set_schema(), }, ToolDescription { name: "check_intentions".to_string(), - description: Some("Check if any intentions should be triggered based on current context. Returns triggered and pending intentions.".to_string()), + description: Some("(deprecated) Use 'intention' with action='check' instead. Check if any intentions should be triggered.".to_string()), input_schema: tools::intentions::check_schema(), }, ToolDescription { name: "complete_intention".to_string(), - description: Some("Mark an intention as complete/fulfilled.".to_string()), + description: Some("(deprecated) Use 'intention' with action='update', status='complete' instead. Mark an intention as complete.".to_string()), input_schema: tools::intentions::complete_schema(), }, ToolDescription { name: "snooze_intention".to_string(), - description: Some("Snooze an intention for a specified number of minutes.".to_string()), + description: Some("(deprecated) Use 'intention' with action='update', status='snooze' instead. Snooze an intention.".to_string()), input_schema: tools::intentions::snooze_schema(), }, ToolDescription { name: "list_intentions".to_string(), - description: Some("List all intentions, optionally filtered by status.".to_string()), + description: Some("(deprecated) Use 'intention' with action='list' instead. List all intentions.".to_string()), input_schema: tools::intentions::list_schema(), }, // Neuroscience tools ToolDescription { name: "get_memory_state".to_string(), - description: Some("Get the cognitive state (Active/Dormant/Silent/Unavailable) of a memory based on accessibility.".to_string()), + description: Some("(deprecated) Use 'memory' with action='state' instead. Get the cognitive state of a memory.".to_string()), input_schema: tools::memory_states::get_schema(), }, ToolDescription { @@ -282,38 +307,234 @@ impl McpServer { }; let result = match request.name.as_str() { + // ================================================================ + // UNIFIED TOOLS (v1.1+) - Preferred API + // ================================================================ + "search" => tools::search_unified::execute(&self.storage, request.arguments).await, + "memory" => tools::memory_unified::execute(&self.storage, request.arguments).await, + "codebase" => tools::codebase_unified::execute(&self.storage, request.arguments).await, + "intention" => tools::intention_unified::execute(&self.storage, request.arguments).await, + + // ================================================================ // Core memory tools + // ================================================================ "ingest" => tools::ingest::execute(&self.storage, request.arguments).await, "smart_ingest" => tools::smart_ingest::execute(&self.storage, request.arguments).await, - "recall" => tools::recall::execute(&self.storage, request.arguments).await, - "semantic_search" => tools::search::execute_semantic(&self.storage, request.arguments).await, - "hybrid_search" => tools::search::execute_hybrid(&self.storage, request.arguments).await, - "get_knowledge" => tools::knowledge::execute_get(&self.storage, request.arguments).await, - "delete_knowledge" => tools::knowledge::execute_delete(&self.storage, request.arguments).await, "mark_reviewed" => tools::review::execute(&self.storage, request.arguments).await, - // Stats and maintenance + + // ================================================================ + // DEPRECATED: Search tools - redirect to unified 'search' + // ================================================================ + "recall" | "semantic_search" | "hybrid_search" => { + warn!("Tool '{}' is deprecated. Use 'search' instead.", request.name); + tools::search_unified::execute(&self.storage, request.arguments).await + } + + // ================================================================ + // DEPRECATED: Memory tools - redirect to unified 'memory' + // ================================================================ + "get_knowledge" => { + warn!("Tool 'get_knowledge' is deprecated. Use 'memory' with action='get' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let id = args.get("id").cloned().unwrap_or(serde_json::Value::Null); + Some(serde_json::json!({ + "action": "get", + "id": id + })) + } + None => None, + }; + tools::memory_unified::execute(&self.storage, unified_args).await + } + "delete_knowledge" => { + warn!("Tool 'delete_knowledge' is deprecated. Use 'memory' with action='delete' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let id = args.get("id").cloned().unwrap_or(serde_json::Value::Null); + Some(serde_json::json!({ + "action": "delete", + "id": id + })) + } + None => None, + }; + tools::memory_unified::execute(&self.storage, unified_args).await + } + "get_memory_state" => { + warn!("Tool 'get_memory_state' is deprecated. Use 'memory' with action='state' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let id = args.get("memory_id").cloned().unwrap_or(serde_json::Value::Null); + Some(serde_json::json!({ + "action": "state", + "id": id + })) + } + None => None, + }; + tools::memory_unified::execute(&self.storage, unified_args).await + } + + // ================================================================ + // DEPRECATED: Codebase tools - redirect to unified 'codebase' + // ================================================================ + "remember_pattern" => { + warn!("Tool 'remember_pattern' is deprecated. Use 'codebase' with action='remember_pattern' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let mut new_args = args.clone(); + if let Some(obj) = new_args.as_object_mut() { + obj.insert("action".to_string(), serde_json::json!("remember_pattern")); + } + Some(new_args) + } + None => Some(serde_json::json!({"action": "remember_pattern"})), + }; + tools::codebase_unified::execute(&self.storage, unified_args).await + } + "remember_decision" => { + warn!("Tool 'remember_decision' is deprecated. Use 'codebase' with action='remember_decision' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let mut new_args = args.clone(); + if let Some(obj) = new_args.as_object_mut() { + obj.insert("action".to_string(), serde_json::json!("remember_decision")); + } + Some(new_args) + } + None => Some(serde_json::json!({"action": "remember_decision"})), + }; + tools::codebase_unified::execute(&self.storage, unified_args).await + } + "get_codebase_context" => { + warn!("Tool 'get_codebase_context' is deprecated. Use 'codebase' with action='get_context' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let mut new_args = args.clone(); + if let Some(obj) = new_args.as_object_mut() { + obj.insert("action".to_string(), serde_json::json!("get_context")); + } + Some(new_args) + } + None => Some(serde_json::json!({"action": "get_context"})), + }; + tools::codebase_unified::execute(&self.storage, unified_args).await + } + + // ================================================================ + // DEPRECATED: Intention tools - redirect to unified 'intention' + // ================================================================ + "set_intention" => { + warn!("Tool 'set_intention' is deprecated. Use 'intention' with action='set' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let mut new_args = args.clone(); + if let Some(obj) = new_args.as_object_mut() { + obj.insert("action".to_string(), serde_json::json!("set")); + } + Some(new_args) + } + None => Some(serde_json::json!({"action": "set"})), + }; + tools::intention_unified::execute(&self.storage, unified_args).await + } + "check_intentions" => { + warn!("Tool 'check_intentions' is deprecated. Use 'intention' with action='check' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let mut new_args = args.clone(); + if let Some(obj) = new_args.as_object_mut() { + obj.insert("action".to_string(), serde_json::json!("check")); + } + Some(new_args) + } + None => Some(serde_json::json!({"action": "check"})), + }; + tools::intention_unified::execute(&self.storage, unified_args).await + } + "complete_intention" => { + warn!("Tool 'complete_intention' is deprecated. Use 'intention' with action='update', status='complete' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let id = args.get("intentionId").cloned().unwrap_or(serde_json::Value::Null); + Some(serde_json::json!({ + "action": "update", + "id": id, + "status": "complete" + })) + } + None => None, + }; + tools::intention_unified::execute(&self.storage, unified_args).await + } + "snooze_intention" => { + warn!("Tool 'snooze_intention' is deprecated. Use 'intention' with action='update', status='snooze' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let id = args.get("intentionId").cloned().unwrap_or(serde_json::Value::Null); + let minutes = args.get("minutes").cloned().unwrap_or(serde_json::json!(30)); + Some(serde_json::json!({ + "action": "update", + "id": id, + "status": "snooze", + "snooze_minutes": minutes + })) + } + None => None, + }; + tools::intention_unified::execute(&self.storage, unified_args).await + } + "list_intentions" => { + warn!("Tool 'list_intentions' is deprecated. Use 'intention' with action='list' instead."); + // Transform arguments to unified format + let unified_args = match request.arguments { + Some(ref args) => { + let mut new_args = args.clone(); + if let Some(obj) = new_args.as_object_mut() { + obj.insert("action".to_string(), serde_json::json!("list")); + // Rename 'status' to 'filter_status' if present + if let Some(status) = obj.remove("status") { + obj.insert("filter_status".to_string(), status); + } + } + Some(new_args) + } + None => Some(serde_json::json!({"action": "list"})), + }; + tools::intention_unified::execute(&self.storage, unified_args).await + } + + // ================================================================ + // Stats and maintenance (not deprecated) + // ================================================================ "get_stats" => tools::stats::execute_stats(&self.storage).await, "health_check" => tools::stats::execute_health(&self.storage).await, "run_consolidation" => tools::consolidate::execute(&self.storage).await, - // Codebase tools - "remember_pattern" => tools::codebase::execute_pattern(&self.storage, request.arguments).await, - "remember_decision" => tools::codebase::execute_decision(&self.storage, request.arguments).await, - "get_codebase_context" => tools::codebase::execute_context(&self.storage, request.arguments).await, - // Prospective memory (intentions) - "set_intention" => tools::intentions::execute_set(&self.storage, request.arguments).await, - "check_intentions" => tools::intentions::execute_check(&self.storage, request.arguments).await, - "complete_intention" => tools::intentions::execute_complete(&self.storage, request.arguments).await, - "snooze_intention" => tools::intentions::execute_snooze(&self.storage, request.arguments).await, - "list_intentions" => tools::intentions::execute_list(&self.storage, request.arguments).await, - // Neuroscience tools - "get_memory_state" => tools::memory_states::execute_get(&self.storage, request.arguments).await, + + // ================================================================ + // Neuroscience tools (not deprecated, except get_memory_state above) + // ================================================================ "list_by_state" => tools::memory_states::execute_list(&self.storage, request.arguments).await, "state_stats" => tools::memory_states::execute_stats(&self.storage).await, -"trigger_importance" => tools::tagging::execute_trigger(&self.storage, request.arguments).await, + "trigger_importance" => tools::tagging::execute_trigger(&self.storage, request.arguments).await, "find_tagged" => tools::tagging::execute_find(&self.storage, request.arguments).await, "tagging_stats" => tools::tagging::execute_stats(&self.storage).await, "match_context" => tools::context::execute(&self.storage, request.arguments).await, - // Feedback / preference learning + + // ================================================================ + // Feedback / preference learning (not deprecated) + // ================================================================ "promote_memory" => tools::feedback::execute_promote(&self.storage, request.arguments).await, "demote_memory" => tools::feedback::execute_demote(&self.storage, request.arguments).await, "request_feedback" => tools::feedback::execute_request_feedback(&self.storage, request.arguments).await, @@ -608,6 +829,13 @@ mod tests { .map(|t| t["name"].as_str().unwrap()) .collect(); + // Unified tools (v1.1+) + assert!(tool_names.contains(&"search")); + assert!(tool_names.contains(&"memory")); + assert!(tool_names.contains(&"codebase")); + assert!(tool_names.contains(&"intention")); + + // Core tools assert!(tool_names.contains(&"ingest")); assert!(tool_names.contains(&"recall")); assert!(tool_names.contains(&"semantic_search")); diff --git a/crates/vestige-mcp/src/tools/codebase_unified.rs b/crates/vestige-mcp/src/tools/codebase_unified.rs new file mode 100644 index 0000000..2f0874f --- /dev/null +++ b/crates/vestige-mcp/src/tools/codebase_unified.rs @@ -0,0 +1,332 @@ +//! Unified Codebase Tool +//! +//! Merges remember_pattern, remember_decision, and get_codebase_context into a single +//! `codebase` tool with action-based dispatch. + +use serde::Deserialize; +use serde_json::Value; +use std::sync::Arc; +use tokio::sync::Mutex; + +use vestige_core::{IngestInput, Storage}; + +/// Input schema for the unified codebase tool +pub fn schema() -> Value { + serde_json::json!({ + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["remember_pattern", "remember_decision", "get_context"], + "description": "Action to perform: 'remember_pattern' stores a code pattern, 'remember_decision' stores an architectural decision, 'get_context' retrieves patterns and decisions for a codebase" + }, + // remember_pattern fields + "name": { + "type": "string", + "description": "Name/title for the pattern (required for remember_pattern)" + }, + "description": { + "type": "string", + "description": "Detailed description of the pattern (required for remember_pattern)" + }, + // remember_decision fields + "decision": { + "type": "string", + "description": "The architectural or design decision made (required for remember_decision)" + }, + "rationale": { + "type": "string", + "description": "Why this decision was made (required for remember_decision)" + }, + "alternatives": { + "type": "array", + "items": { "type": "string" }, + "description": "Alternatives that were considered (optional for remember_decision)" + }, + // Shared fields + "files": { + "type": "array", + "items": { "type": "string" }, + "description": "Files where this pattern is used or affected by this decision" + }, + "codebase": { + "type": "string", + "description": "Codebase/project identifier (e.g., 'vestige-tauri')" + }, + // get_context fields + "limit": { + "type": "integer", + "description": "Maximum items per category (default: 10, for get_context)", + "default": 10 + } + }, + "required": ["action"] + }) +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct CodebaseArgs { + action: String, + // Pattern fields + name: Option, + description: Option, + // Decision fields + decision: Option, + rationale: Option, + alternatives: Option>, + // Shared fields + files: Option>, + codebase: Option, + // Context fields + limit: Option, +} + +/// Execute the unified codebase tool +pub async fn execute( + storage: &Arc>, + args: Option, +) -> Result { + let args: CodebaseArgs = match args { + Some(v) => serde_json::from_value(v).map_err(|e| format!("Invalid arguments: {}", e))?, + None => return Err("Missing arguments".to_string()), + }; + + match args.action.as_str() { + "remember_pattern" => execute_remember_pattern(storage, &args).await, + "remember_decision" => execute_remember_decision(storage, &args).await, + "get_context" => execute_get_context(storage, &args).await, + _ => Err(format!( + "Invalid action '{}'. Must be one of: remember_pattern, remember_decision, get_context", + args.action + )), + } +} + +/// Remember a code pattern +async fn execute_remember_pattern( + storage: &Arc>, + args: &CodebaseArgs, +) -> Result { + let name = args + .name + .as_ref() + .ok_or("'name' is required for remember_pattern action")?; + let description = args + .description + .as_ref() + .ok_or("'description' is required for remember_pattern action")?; + + if name.trim().is_empty() { + return Err("Pattern name cannot be empty".to_string()); + } + + // Build content with structured format + let mut content = format!("# Code Pattern: {}\n\n{}", name, description); + + if let Some(ref files) = args.files { + if !files.is_empty() { + content.push_str("\n\n## Files:\n"); + for f in files { + content.push_str(&format!("- {}\n", f)); + } + } + } + + // Build tags + let mut tags = vec!["pattern".to_string(), "codebase".to_string()]; + if let Some(ref codebase) = args.codebase { + tags.push(format!("codebase:{}", codebase)); + } + + let input = IngestInput { + content, + node_type: "pattern".to_string(), + source: args.codebase.clone(), + sentiment_score: 0.0, + sentiment_magnitude: 0.0, + tags, + valid_from: None, + valid_until: None, + }; + + let mut storage = storage.lock().await; + let node = storage.ingest(input).map_err(|e| e.to_string())?; + + Ok(serde_json::json!({ + "action": "remember_pattern", + "success": true, + "nodeId": node.id, + "patternName": name, + "message": format!("Pattern '{}' remembered successfully", name), + })) +} + +/// Remember an architectural decision +async fn execute_remember_decision( + storage: &Arc>, + args: &CodebaseArgs, +) -> Result { + let decision = args + .decision + .as_ref() + .ok_or("'decision' is required for remember_decision action")?; + let rationale = args + .rationale + .as_ref() + .ok_or("'rationale' is required for remember_decision action")?; + + if decision.trim().is_empty() { + return Err("Decision cannot be empty".to_string()); + } + + // Build content with structured format (ADR-like) + let mut content = format!( + "# Decision: {}\n\n## Context\n\n{}\n\n## Decision\n\n{}", + &decision[..decision.len().min(50)], + rationale, + decision + ); + + if let Some(ref alternatives) = args.alternatives { + if !alternatives.is_empty() { + content.push_str("\n\n## Alternatives Considered:\n"); + for alt in alternatives { + content.push_str(&format!("- {}\n", alt)); + } + } + } + + if let Some(ref files) = args.files { + if !files.is_empty() { + content.push_str("\n\n## Affected Files:\n"); + for f in files { + content.push_str(&format!("- {}\n", f)); + } + } + } + + // Build tags + let mut tags = vec![ + "decision".to_string(), + "architecture".to_string(), + "codebase".to_string(), + ]; + if let Some(ref codebase) = args.codebase { + tags.push(format!("codebase:{}", codebase)); + } + + let input = IngestInput { + content, + node_type: "decision".to_string(), + source: args.codebase.clone(), + sentiment_score: 0.0, + sentiment_magnitude: 0.0, + tags, + valid_from: None, + valid_until: None, + }; + + let mut storage = storage.lock().await; + let node = storage.ingest(input).map_err(|e| e.to_string())?; + + Ok(serde_json::json!({ + "action": "remember_decision", + "success": true, + "nodeId": node.id, + "message": "Architectural decision remembered successfully", + })) +} + +/// Get codebase context (patterns and decisions) +async fn execute_get_context( + storage: &Arc>, + args: &CodebaseArgs, +) -> Result { + let limit = args.limit.unwrap_or(10).clamp(1, 50); + let storage = storage.lock().await; + + // Build tag filter for codebase + // Tags are stored as: ["pattern", "codebase", "codebase:vestige"] + // We search for the "codebase:{name}" tag + let tag_filter = args + .codebase + .as_ref() + .map(|cb| format!("codebase:{}", cb)); + + // Query patterns by node_type and tag + let patterns = storage + .get_nodes_by_type_and_tag("pattern", tag_filter.as_deref(), limit) + .unwrap_or_default(); + + // Query decisions by node_type and tag + let decisions = storage + .get_nodes_by_type_and_tag("decision", tag_filter.as_deref(), limit) + .unwrap_or_default(); + + let formatted_patterns: Vec = patterns + .iter() + .map(|n| { + serde_json::json!({ + "id": n.id, + "content": n.content, + "tags": n.tags, + "retentionStrength": n.retention_strength, + "createdAt": n.created_at.to_rfc3339(), + }) + }) + .collect(); + + let formatted_decisions: Vec = decisions + .iter() + .map(|n| { + serde_json::json!({ + "id": n.id, + "content": n.content, + "tags": n.tags, + "retentionStrength": n.retention_strength, + "createdAt": n.created_at.to_rfc3339(), + }) + }) + .collect(); + + Ok(serde_json::json!({ + "action": "get_context", + "codebase": args.codebase, + "patterns": { + "count": formatted_patterns.len(), + "items": formatted_patterns, + }, + "decisions": { + "count": formatted_decisions.len(), + "items": formatted_decisions, + }, + })) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_schema_structure() { + let schema = schema(); + assert!(schema["properties"]["action"].is_object()); + assert_eq!(schema["required"], serde_json::json!(["action"])); + + // Check action enum values + let action_enum = &schema["properties"]["action"]["enum"]; + assert!(action_enum + .as_array() + .unwrap() + .contains(&serde_json::json!("remember_pattern"))); + assert!(action_enum + .as_array() + .unwrap() + .contains(&serde_json::json!("remember_decision"))); + assert!(action_enum + .as_array() + .unwrap() + .contains(&serde_json::json!("get_context"))); + } +} diff --git a/crates/vestige-mcp/src/tools/intention_unified.rs b/crates/vestige-mcp/src/tools/intention_unified.rs new file mode 100644 index 0000000..75aadb9 --- /dev/null +++ b/crates/vestige-mcp/src/tools/intention_unified.rs @@ -0,0 +1,1207 @@ +//! Unified Intention Tool +//! +//! A single unified tool that merges all 5 intention operations: +//! - set_intention -> action: "set" +//! - check_intentions -> action: "check" +//! - complete_intention -> action: "update" with status: "complete" +//! - snooze_intention -> action: "update" with status: "snooze" +//! - list_intentions -> action: "list" + +use chrono::{DateTime, Duration, Utc}; +use serde::Deserialize; +use serde_json::Value; +use std::sync::Arc; +use tokio::sync::Mutex; +use uuid::Uuid; + +use vestige_core::{IntentionRecord, Storage}; + +/// Unified schema for the `intention` tool +pub fn schema() -> Value { + serde_json::json!({ + "type": "object", + "description": "Unified intention management tool. Supports setting, checking, updating (complete/snooze/cancel), and listing intentions.", + "properties": { + "action": { + "type": "string", + "enum": ["set", "check", "update", "list"], + "description": "The action to perform: 'set' creates a new intention, 'check' finds triggered intentions, 'update' modifies status (complete/snooze/cancel), 'list' shows intentions" + }, + // SET action parameters + "description": { + "type": "string", + "description": "[set] What to remember to do" + }, + "trigger": { + "type": "object", + "description": "[set] When to trigger this intention", + "properties": { + "type": { + "type": "string", + "enum": ["time", "context", "event"], + "description": "Trigger type: time-based, context-based, or event-based" + }, + "at": { + "type": "string", + "description": "ISO timestamp for time-based triggers" + }, + "in_minutes": { + "type": "integer", + "description": "Minutes from now for duration-based triggers" + }, + "codebase": { + "type": "string", + "description": "Trigger when working in this codebase" + }, + "file_pattern": { + "type": "string", + "description": "Trigger when editing files matching this pattern" + }, + "topic": { + "type": "string", + "description": "Trigger when discussing this topic" + }, + "condition": { + "type": "string", + "description": "Natural language condition for event triggers" + } + } + }, + "priority": { + "type": "string", + "enum": ["low", "normal", "high", "critical"], + "default": "normal", + "description": "[set] Priority level" + }, + "deadline": { + "type": "string", + "description": "[set] Optional deadline (ISO timestamp)" + }, + // UPDATE action parameters + "id": { + "type": "string", + "description": "[update] ID of the intention to update" + }, + "status": { + "type": "string", + "enum": ["complete", "snooze", "cancel"], + "description": "[update] New status: 'complete' marks as fulfilled, 'snooze' delays, 'cancel' cancels" + }, + "snooze_minutes": { + "type": "integer", + "default": 30, + "description": "[update] Minutes to snooze for (when status is 'snooze')" + }, + // CHECK action parameters + "context": { + "type": "object", + "description": "[check] Current context for matching intentions", + "properties": { + "current_time": { + "type": "string", + "description": "Current ISO timestamp (defaults to now)" + }, + "codebase": { + "type": "string", + "description": "Current codebase/project name" + }, + "file": { + "type": "string", + "description": "Current file path" + }, + "topics": { + "type": "array", + "items": { "type": "string" }, + "description": "Current discussion topics" + } + } + }, + "include_snoozed": { + "type": "boolean", + "default": false, + "description": "[check] Include snoozed intentions" + }, + // LIST action parameters + "filter_status": { + "type": "string", + "enum": ["active", "fulfilled", "cancelled", "snoozed", "all"], + "default": "active", + "description": "[list] Filter by status" + }, + "limit": { + "type": "integer", + "default": 20, + "description": "[list] Maximum number to return" + } + }, + "required": ["action"] + }) +} + +// ============================================================================ +// ARGUMENT STRUCTS +// ============================================================================ + +#[derive(Debug, Deserialize, serde::Serialize)] +#[serde(rename_all = "camelCase")] +struct TriggerSpec { + #[serde(rename = "type")] + trigger_type: Option, + at: Option, + in_minutes: Option, + codebase: Option, + file_pattern: Option, + topic: Option, + condition: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct ContextSpec { + #[allow(dead_code)] + current_time: Option, + codebase: Option, + file: Option, + topics: Option>, +} + +#[derive(Debug, Deserialize)] +struct UnifiedIntentionArgs { + action: String, + // SET parameters + description: Option, + trigger: Option, + priority: Option, + deadline: Option, + // UPDATE parameters + id: Option, + status: Option, + #[serde(alias = "snoozeMinutes")] + snooze_minutes: Option, + // CHECK parameters + context: Option, + #[serde(alias = "includeSnoozed")] + #[allow(dead_code)] + include_snoozed: Option, + // LIST parameters + #[serde(alias = "filterStatus")] + filter_status: Option, + limit: Option, +} + +// ============================================================================ +// MAIN EXECUTE FUNCTION +// ============================================================================ + +/// Execute the unified intention tool +pub async fn execute( + storage: &Arc>, + args: Option, +) -> Result { + let args: UnifiedIntentionArgs = match args { + Some(v) => serde_json::from_value(v).map_err(|e| format!("Invalid arguments: {}", e))?, + None => return Err("Missing arguments".to_string()), + }; + + match args.action.as_str() { + "set" => execute_set(storage, &args).await, + "check" => execute_check(storage, &args).await, + "update" => execute_update(storage, &args).await, + "list" => execute_list(storage, &args).await, + _ => Err(format!( + "Unknown action: '{}'. Valid actions are: set, check, update, list", + args.action + )), + } +} + +// ============================================================================ +// ACTION IMPLEMENTATIONS +// ============================================================================ + +/// Execute "set" action - create a new intention +async fn execute_set( + storage: &Arc>, + args: &UnifiedIntentionArgs, +) -> Result { + let description = args + .description + .as_ref() + .ok_or("Missing 'description' for set action")?; + + if description.trim().is_empty() { + return Err("Description cannot be empty".to_string()); + } + + let now = Utc::now(); + let id = Uuid::new_v4().to_string(); + + // Determine trigger type and data + let (trigger_type, trigger_data) = if let Some(trigger) = &args.trigger { + let t_type = trigger + .trigger_type + .clone() + .unwrap_or_else(|| "time".to_string()); + let data = serde_json::to_string(trigger).unwrap_or_else(|_| "{}".to_string()); + (t_type, data) + } else { + ("manual".to_string(), "{}".to_string()) + }; + + // Parse priority + let priority = match args.priority.as_deref() { + Some("low") => 1, + Some("high") => 3, + Some("critical") => 4, + _ => 2, // normal + }; + + // Parse deadline + let deadline = args.deadline.as_ref().and_then(|s| { + DateTime::parse_from_rfc3339(s) + .ok() + .map(|dt| dt.with_timezone(&Utc)) + }); + + // Calculate trigger time if specified + let trigger_at = if let Some(trigger) = &args.trigger { + if let Some(at) = &trigger.at { + DateTime::parse_from_rfc3339(at) + .ok() + .map(|dt| dt.with_timezone(&Utc)) + } else if let Some(mins) = trigger.in_minutes { + Some(now + Duration::minutes(mins)) + } else { + None + } + } else { + None + }; + + let record = IntentionRecord { + id: id.clone(), + content: description.clone(), + trigger_type, + trigger_data, + priority, + status: "active".to_string(), + created_at: now, + deadline, + fulfilled_at: None, + reminder_count: 0, + last_reminded_at: None, + notes: None, + tags: vec![], + related_memories: vec![], + snoozed_until: None, + source_type: "mcp".to_string(), + source_data: None, + }; + + let mut storage = storage.lock().await; + storage.save_intention(&record).map_err(|e| e.to_string())?; + + Ok(serde_json::json!({ + "success": true, + "action": "set", + "intentionId": id, + "message": format!("Intention created: {}", description), + "priority": priority, + "triggerAt": trigger_at.map(|dt| dt.to_rfc3339()), + "deadline": deadline.map(|dt| dt.to_rfc3339()), + })) +} + +/// Execute "check" action - find triggered intentions +async fn execute_check( + storage: &Arc>, + args: &UnifiedIntentionArgs, +) -> Result { + let now = Utc::now(); + let storage = storage.lock().await; + + // Get active intentions + let intentions = storage.get_active_intentions().map_err(|e| e.to_string())?; + + let mut triggered = Vec::new(); + let mut pending = Vec::new(); + + for intention in intentions { + // Parse trigger data + let trigger: Option = serde_json::from_str(&intention.trigger_data).ok(); + + // Check if triggered + let is_triggered = if let Some(t) = &trigger { + match t.trigger_type.as_deref() { + Some("time") => { + if let Some(at) = &t.at { + if let Ok(trigger_time) = DateTime::parse_from_rfc3339(at) { + trigger_time.with_timezone(&Utc) <= now + } else { + false + } + } else if let Some(mins) = t.in_minutes { + let trigger_time = intention.created_at + Duration::minutes(mins); + trigger_time <= now + } else { + false + } + } + Some("context") => { + if let Some(ctx) = &args.context { + // Check codebase match + if let (Some(trigger_codebase), Some(current_codebase)) = + (&t.codebase, &ctx.codebase) + { + current_codebase + .to_lowercase() + .contains(&trigger_codebase.to_lowercase()) + // Check file pattern match + } else if let (Some(pattern), Some(file)) = (&t.file_pattern, &ctx.file) { + file.contains(pattern) + // Check topic match + } else if let (Some(topic), Some(topics)) = (&t.topic, &ctx.topics) { + topics + .iter() + .any(|t| t.to_lowercase().contains(&topic.to_lowercase())) + } else { + false + } + } else { + false + } + } + _ => false, + } + } else { + false + }; + + // Check if overdue + let is_overdue = intention.deadline.map(|d| d < now).unwrap_or(false); + + let item = serde_json::json!({ + "id": intention.id, + "description": intention.content, + "priority": match intention.priority { + 1 => "low", + 3 => "high", + 4 => "critical", + _ => "normal", + }, + "createdAt": intention.created_at.to_rfc3339(), + "deadline": intention.deadline.map(|d| d.to_rfc3339()), + "isOverdue": is_overdue, + }); + + if is_triggered || is_overdue { + triggered.push(item); + } else { + pending.push(item); + } + } + + Ok(serde_json::json!({ + "action": "check", + "triggered": triggered, + "pending": pending, + "checkedAt": now.to_rfc3339(), + })) +} + +/// Execute "update" action - complete, snooze, or cancel an intention +async fn execute_update( + storage: &Arc>, + args: &UnifiedIntentionArgs, +) -> Result { + let intention_id = args + .id + .as_ref() + .ok_or("Missing 'id' for update action")?; + + let status = args + .status + .as_ref() + .ok_or("Missing 'status' for update action")?; + + match status.as_str() { + "complete" => { + let mut storage = storage.lock().await; + let updated = storage + .update_intention_status(intention_id, "fulfilled") + .map_err(|e| e.to_string())?; + + if updated { + Ok(serde_json::json!({ + "success": true, + "action": "update", + "status": "complete", + "message": "Intention marked as complete", + "intentionId": intention_id, + })) + } else { + Err(format!("Intention not found: {}", intention_id)) + } + } + "snooze" => { + let minutes = args.snooze_minutes.unwrap_or(30); + let snooze_until = Utc::now() + Duration::minutes(minutes); + + let mut storage = storage.lock().await; + let updated = storage + .snooze_intention(intention_id, snooze_until) + .map_err(|e| e.to_string())?; + + if updated { + Ok(serde_json::json!({ + "success": true, + "action": "update", + "status": "snooze", + "message": format!("Intention snoozed for {} minutes", minutes), + "intentionId": intention_id, + "snoozedUntil": snooze_until.to_rfc3339(), + })) + } else { + Err(format!("Intention not found: {}", intention_id)) + } + } + "cancel" => { + let mut storage = storage.lock().await; + let updated = storage + .update_intention_status(intention_id, "cancelled") + .map_err(|e| e.to_string())?; + + if updated { + Ok(serde_json::json!({ + "success": true, + "action": "update", + "status": "cancel", + "message": "Intention cancelled", + "intentionId": intention_id, + })) + } else { + Err(format!("Intention not found: {}", intention_id)) + } + } + _ => Err(format!( + "Unknown status: '{}'. Valid statuses are: complete, snooze, cancel", + status + )), + } +} + +/// Execute "list" action - list intentions with optional filtering +async fn execute_list( + storage: &Arc>, + args: &UnifiedIntentionArgs, +) -> Result { + let filter_status = args.filter_status.as_deref().unwrap_or("active"); + let storage = storage.lock().await; + + let intentions = if filter_status == "all" { + // Get all by combining different statuses + let mut all = storage.get_active_intentions().map_err(|e| e.to_string())?; + all.extend( + storage + .get_intentions_by_status("fulfilled") + .map_err(|e| e.to_string())?, + ); + all.extend( + storage + .get_intentions_by_status("cancelled") + .map_err(|e| e.to_string())?, + ); + all.extend( + storage + .get_intentions_by_status("snoozed") + .map_err(|e| e.to_string())?, + ); + all + } else if filter_status == "active" { + // Use get_active_intentions for proper priority ordering + storage.get_active_intentions().map_err(|e| e.to_string())? + } else { + storage + .get_intentions_by_status(filter_status) + .map_err(|e| e.to_string())? + }; + + let limit = args.limit.unwrap_or(20) as usize; + let now = Utc::now(); + + let items: Vec = intentions + .into_iter() + .take(limit) + .map(|i| { + let is_overdue = i.deadline.map(|d| d < now).unwrap_or(false); + serde_json::json!({ + "id": i.id, + "description": i.content, + "status": i.status, + "priority": match i.priority { + 1 => "low", + 3 => "high", + 4 => "critical", + _ => "normal", + }, + "createdAt": i.created_at.to_rfc3339(), + "deadline": i.deadline.map(|d| d.to_rfc3339()), + "isOverdue": is_overdue, + "snoozedUntil": i.snoozed_until.map(|d| d.to_rfc3339()), + }) + }) + .collect(); + + Ok(serde_json::json!({ + "action": "list", + "intentions": items, + "total": items.len(), + "status": filter_status, + })) +} + +// ============================================================================ +// TESTS +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + /// Create a test storage instance with a temporary database + async fn test_storage() -> (Arc>, TempDir) { + let dir = TempDir::new().unwrap(); + let storage = Storage::new(Some(dir.path().join("test.db"))).unwrap(); + (Arc::new(Mutex::new(storage)), dir) + } + + /// Helper to create an intention and return its ID + async fn create_test_intention(storage: &Arc>, description: &str) -> String { + let args = serde_json::json!({ + "action": "set", + "description": description + }); + let result = execute(storage, Some(args)).await.unwrap(); + result["intentionId"].as_str().unwrap().to_string() + } + + // ======================================================================== + // ACTION ROUTING TESTS + // ======================================================================== + + #[tokio::test] + async fn test_missing_action_fails() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({}); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Invalid arguments")); + } + + #[tokio::test] + async fn test_unknown_action_fails() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ "action": "unknown" }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Unknown action")); + } + + #[tokio::test] + async fn test_missing_arguments_fails() { + let (storage, _dir) = test_storage().await; + let result = execute(&storage, None).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Missing arguments")); + } + + // ======================================================================== + // SET ACTION TESTS + // ======================================================================== + + #[tokio::test] + async fn test_set_action_basic_succeeds() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ + "action": "set", + "description": "Remember to write unit tests" + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert_eq!(value["success"], true); + assert_eq!(value["action"], "set"); + assert!(value["intentionId"].is_string()); + assert!(value["message"] + .as_str() + .unwrap() + .contains("Intention created")); + } + + #[tokio::test] + async fn test_set_action_missing_description_fails() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ "action": "set" }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Missing 'description'")); + } + + #[tokio::test] + async fn test_set_action_empty_description_fails() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ + "action": "set", + "description": "" + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("empty")); + } + + #[tokio::test] + async fn test_set_action_with_priority() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ + "action": "set", + "description": "Critical bug fix needed", + "priority": "critical" + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert_eq!(value["priority"], 4); + } + + #[tokio::test] + async fn test_set_action_with_time_trigger() { + let (storage, _dir) = test_storage().await; + let future_time = (Utc::now() + Duration::hours(1)).to_rfc3339(); + let args = serde_json::json!({ + "action": "set", + "description": "Meeting reminder", + "trigger": { + "type": "time", + "at": future_time + } + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert!(value["triggerAt"].is_string()); + } + + #[tokio::test] + async fn test_set_action_with_duration_trigger() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ + "action": "set", + "description": "Check build status", + "trigger": { + "type": "time", + "inMinutes": 30 + } + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert!(value["triggerAt"].is_string()); + } + + #[tokio::test] + async fn test_set_action_with_deadline() { + let (storage, _dir) = test_storage().await; + let deadline = (Utc::now() + Duration::days(7)).to_rfc3339(); + let args = serde_json::json!({ + "action": "set", + "description": "Complete feature by end of week", + "deadline": deadline + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert!(value["deadline"].is_string()); + } + + // ======================================================================== + // CHECK ACTION TESTS + // ======================================================================== + + #[tokio::test] + async fn test_check_action_empty_succeeds() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ "action": "check" }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert_eq!(value["action"], "check"); + assert!(value["triggered"].is_array()); + assert!(value["pending"].is_array()); + assert!(value["checkedAt"].is_string()); + } + + #[tokio::test] + async fn test_check_action_returns_pending() { + let (storage, _dir) = test_storage().await; + create_test_intention(&storage, "Future task").await; + + let args = serde_json::json!({ "action": "check" }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + let pending = value["pending"].as_array().unwrap(); + assert!(!pending.is_empty()); + } + + #[tokio::test] + async fn test_check_action_with_context() { + let (storage, _dir) = test_storage().await; + + // Create context-triggered intention + let set_args = serde_json::json!({ + "action": "set", + "description": "Check tests in payments", + "trigger": { + "type": "context", + "codebase": "payments" + } + }); + execute(&storage, Some(set_args)).await.unwrap(); + + // Check with matching context + let check_args = serde_json::json!({ + "action": "check", + "context": { + "codebase": "payments-service" + } + }); + let result = execute(&storage, Some(check_args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + let triggered = value["triggered"].as_array().unwrap(); + assert!(!triggered.is_empty()); + } + + #[tokio::test] + async fn test_check_action_time_triggered() { + let (storage, _dir) = test_storage().await; + + // Create time-triggered intention in the past + let past_time = (Utc::now() - Duration::hours(1)).to_rfc3339(); + let set_args = serde_json::json!({ + "action": "set", + "description": "Past due task", + "trigger": { + "type": "time", + "at": past_time + } + }); + execute(&storage, Some(set_args)).await.unwrap(); + + let check_args = serde_json::json!({ "action": "check" }); + let result = execute(&storage, Some(check_args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + let triggered = value["triggered"].as_array().unwrap(); + assert!(!triggered.is_empty()); + } + + // ======================================================================== + // UPDATE ACTION TESTS - COMPLETE + // ======================================================================== + + #[tokio::test] + async fn test_update_action_complete_succeeds() { + let (storage, _dir) = test_storage().await; + let intention_id = create_test_intention(&storage, "Task to complete").await; + + let args = serde_json::json!({ + "action": "update", + "id": intention_id, + "status": "complete" + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert_eq!(value["success"], true); + assert_eq!(value["action"], "update"); + assert_eq!(value["status"], "complete"); + assert!(value["message"].as_str().unwrap().contains("complete")); + } + + #[tokio::test] + async fn test_update_action_complete_nonexistent_fails() { + let (storage, _dir) = test_storage().await; + let fake_id = Uuid::new_v4().to_string(); + + let args = serde_json::json!({ + "action": "update", + "id": fake_id, + "status": "complete" + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("not found")); + } + + #[tokio::test] + async fn test_update_action_missing_id_fails() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ + "action": "update", + "status": "complete" + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Missing 'id'")); + } + + #[tokio::test] + async fn test_update_action_missing_status_fails() { + let (storage, _dir) = test_storage().await; + let intention_id = create_test_intention(&storage, "Task").await; + + let args = serde_json::json!({ + "action": "update", + "id": intention_id + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Missing 'status'")); + } + + // ======================================================================== + // UPDATE ACTION TESTS - SNOOZE + // ======================================================================== + + #[tokio::test] + async fn test_update_action_snooze_succeeds() { + let (storage, _dir) = test_storage().await; + let intention_id = create_test_intention(&storage, "Task to snooze").await; + + let args = serde_json::json!({ + "action": "update", + "id": intention_id, + "status": "snooze", + "snooze_minutes": 30 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert_eq!(value["success"], true); + assert_eq!(value["status"], "snooze"); + assert!(value["snoozedUntil"].is_string()); + assert!(value["message"].as_str().unwrap().contains("snoozed")); + } + + #[tokio::test] + async fn test_update_action_snooze_default_minutes() { + let (storage, _dir) = test_storage().await; + let intention_id = create_test_intention(&storage, "Task with default snooze").await; + + let args = serde_json::json!({ + "action": "update", + "id": intention_id, + "status": "snooze" + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert!(value["message"].as_str().unwrap().contains("30 minutes")); + } + + // ======================================================================== + // UPDATE ACTION TESTS - CANCEL + // ======================================================================== + + #[tokio::test] + async fn test_update_action_cancel_succeeds() { + let (storage, _dir) = test_storage().await; + let intention_id = create_test_intention(&storage, "Task to cancel").await; + + let args = serde_json::json!({ + "action": "update", + "id": intention_id, + "status": "cancel" + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert_eq!(value["success"], true); + assert_eq!(value["status"], "cancel"); + assert!(value["message"].as_str().unwrap().contains("cancelled")); + } + + #[tokio::test] + async fn test_update_action_unknown_status_fails() { + let (storage, _dir) = test_storage().await; + let intention_id = create_test_intention(&storage, "Task").await; + + let args = serde_json::json!({ + "action": "update", + "id": intention_id, + "status": "invalid" + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Unknown status")); + } + + // ======================================================================== + // LIST ACTION TESTS + // ======================================================================== + + #[tokio::test] + async fn test_list_action_empty_succeeds() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ "action": "list" }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert_eq!(value["action"], "list"); + assert!(value["intentions"].is_array()); + assert_eq!(value["total"], 0); + assert_eq!(value["status"], "active"); + } + + #[tokio::test] + async fn test_list_action_returns_created() { + let (storage, _dir) = test_storage().await; + create_test_intention(&storage, "First task").await; + create_test_intention(&storage, "Second task").await; + + let args = serde_json::json!({ "action": "list" }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert_eq!(value["total"], 2); + } + + #[tokio::test] + async fn test_list_action_filter_by_status() { + let (storage, _dir) = test_storage().await; + let intention_id = create_test_intention(&storage, "Task to complete").await; + + // Complete one + let complete_args = serde_json::json!({ + "action": "update", + "id": intention_id, + "status": "complete" + }); + execute(&storage, Some(complete_args)).await.unwrap(); + + // Create another active one + create_test_intention(&storage, "Active task").await; + + // List fulfilled + let list_args = serde_json::json!({ + "action": "list", + "filter_status": "fulfilled" + }); + let result = execute(&storage, Some(list_args)).await.unwrap(); + assert_eq!(result["total"], 1); + assert_eq!(result["status"], "fulfilled"); + } + + #[tokio::test] + async fn test_list_action_with_limit() { + let (storage, _dir) = test_storage().await; + for i in 0..5 { + create_test_intention(&storage, &format!("Task {}", i)).await; + } + + let args = serde_json::json!({ + "action": "list", + "limit": 3 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + let intentions = value["intentions"].as_array().unwrap(); + assert!(intentions.len() <= 3); + } + + #[tokio::test] + async fn test_list_action_all_status() { + let (storage, _dir) = test_storage().await; + let intention_id = create_test_intention(&storage, "Task to complete").await; + create_test_intention(&storage, "Active task").await; + + // Complete one + let complete_args = serde_json::json!({ + "action": "update", + "id": intention_id, + "status": "complete" + }); + execute(&storage, Some(complete_args)).await.unwrap(); + + // List all + let list_args = serde_json::json!({ + "action": "list", + "filter_status": "all" + }); + let result = execute(&storage, Some(list_args)).await.unwrap(); + assert_eq!(result["total"], 2); + } + + // ======================================================================== + // FULL LIFECYCLE TESTS + // ======================================================================== + + #[tokio::test] + async fn test_intention_full_lifecycle() { + let (storage, _dir) = test_storage().await; + + // 1. Create intention + let intention_id = create_test_intention(&storage, "Full lifecycle test").await; + + // 2. Verify it appears in list + let list_args = serde_json::json!({ "action": "list" }); + let list_result = execute(&storage, Some(list_args)).await.unwrap(); + assert_eq!(list_result["total"], 1); + + // 3. Snooze it + let snooze_args = serde_json::json!({ + "action": "update", + "id": intention_id, + "status": "snooze", + "snooze_minutes": 5 + }); + let snooze_result = execute(&storage, Some(snooze_args)).await; + assert!(snooze_result.is_ok()); + + // 4. Complete it + let complete_args = serde_json::json!({ + "action": "update", + "id": intention_id, + "status": "complete" + }); + let complete_result = execute(&storage, Some(complete_args)).await; + assert!(complete_result.is_ok()); + + // 5. Verify it's no longer active + let final_list_args = serde_json::json!({ "action": "list" }); + let final_list = execute(&storage, Some(final_list_args)).await.unwrap(); + assert_eq!(final_list["total"], 0); + + // 6. Verify it's in fulfilled list + let fulfilled_args = serde_json::json!({ + "action": "list", + "filter_status": "fulfilled" + }); + let fulfilled_list = execute(&storage, Some(fulfilled_args)).await.unwrap(); + assert_eq!(fulfilled_list["total"], 1); + } + + #[tokio::test] + async fn test_intention_priority_ordering() { + let (storage, _dir) = test_storage().await; + + // Create intentions with different priorities + let args_low = serde_json::json!({ + "action": "set", + "description": "Low priority task", + "priority": "low" + }); + execute(&storage, Some(args_low)).await.unwrap(); + + let args_critical = serde_json::json!({ + "action": "set", + "description": "Critical task", + "priority": "critical" + }); + execute(&storage, Some(args_critical)).await.unwrap(); + + let args_normal = serde_json::json!({ + "action": "set", + "description": "Normal task", + "priority": "normal" + }); + execute(&storage, Some(args_normal)).await.unwrap(); + + // List and verify ordering (critical should be first due to priority DESC ordering) + let list_args = serde_json::json!({ "action": "list" }); + let list_result = execute(&storage, Some(list_args)).await.unwrap(); + let intentions = list_result["intentions"].as_array().unwrap(); + + assert!(intentions.len() >= 3); + // Critical (4) should come before normal (2) and low (1) + let first_priority = intentions[0]["priority"].as_str().unwrap(); + assert_eq!(first_priority, "critical"); + } + + // ======================================================================== + // SCHEMA TESTS + // ======================================================================== + + #[test] + fn test_schema_has_required_action() { + let schema_value = schema(); + assert_eq!(schema_value["type"], "object"); + assert!(schema_value["properties"]["action"].is_object()); + assert!(schema_value["required"] + .as_array() + .unwrap() + .contains(&serde_json::json!("action"))); + } + + #[test] + fn test_schema_has_action_enum() { + let schema_value = schema(); + let action_enum = schema_value["properties"]["action"]["enum"] + .as_array() + .unwrap(); + assert!(action_enum.contains(&serde_json::json!("set"))); + assert!(action_enum.contains(&serde_json::json!("check"))); + assert!(action_enum.contains(&serde_json::json!("update"))); + assert!(action_enum.contains(&serde_json::json!("list"))); + } + + #[test] + fn test_schema_has_set_parameters() { + let schema_value = schema(); + assert!(schema_value["properties"]["description"].is_object()); + assert!(schema_value["properties"]["trigger"].is_object()); + assert!(schema_value["properties"]["priority"].is_object()); + assert!(schema_value["properties"]["deadline"].is_object()); + } + + #[test] + fn test_schema_has_update_parameters() { + let schema_value = schema(); + assert!(schema_value["properties"]["id"].is_object()); + assert!(schema_value["properties"]["status"].is_object()); + assert!(schema_value["properties"]["snooze_minutes"].is_object()); + } + + #[test] + fn test_schema_has_check_parameters() { + let schema_value = schema(); + assert!(schema_value["properties"]["context"].is_object()); + assert!(schema_value["properties"]["include_snoozed"].is_object()); + } + + #[test] + fn test_schema_has_list_parameters() { + let schema_value = schema(); + assert!(schema_value["properties"]["filter_status"].is_object()); + assert!(schema_value["properties"]["limit"].is_object()); + } +} diff --git a/crates/vestige-mcp/src/tools/memory_unified.rs b/crates/vestige-mcp/src/tools/memory_unified.rs new file mode 100644 index 0000000..ad39132 --- /dev/null +++ b/crates/vestige-mcp/src/tools/memory_unified.rs @@ -0,0 +1,223 @@ +//! Unified Memory Tool +//! +//! Merges get_knowledge, delete_knowledge, and get_memory_state into a single +//! `memory` tool with action-based dispatch. + +use serde::Deserialize; +use serde_json::Value; +use std::sync::Arc; +use tokio::sync::Mutex; + +use vestige_core::{MemoryState, Storage}; + +// Accessibility thresholds based on retention strength +const ACCESSIBILITY_ACTIVE: f64 = 0.7; +const ACCESSIBILITY_DORMANT: f64 = 0.4; +const ACCESSIBILITY_SILENT: f64 = 0.1; + +/// Compute accessibility score from memory strengths +/// Combines retention, retrieval, and storage strengths +fn compute_accessibility(retention: f64, retrieval: f64, storage: f64) -> f64 { + // Weighted combination: retention is most important for accessibility + retention * 0.5 + retrieval * 0.3 + storage * 0.2 +} + +/// Determine memory state from accessibility score +fn state_from_accessibility(accessibility: f64) -> MemoryState { + if accessibility >= ACCESSIBILITY_ACTIVE { + MemoryState::Active + } else if accessibility >= ACCESSIBILITY_DORMANT { + MemoryState::Dormant + } else if accessibility >= ACCESSIBILITY_SILENT { + MemoryState::Silent + } else { + MemoryState::Unavailable + } +} + +/// Input schema for the unified memory tool +pub fn schema() -> Value { + serde_json::json!({ + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["get", "delete", "state"], + "description": "Action to perform: 'get' retrieves full memory node, 'delete' removes memory, 'state' returns accessibility state" + }, + "id": { + "type": "string", + "description": "The ID of the memory node" + } + }, + "required": ["action", "id"] + }) +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct MemoryArgs { + action: String, + id: String, +} + +/// Execute the unified memory tool +pub async fn execute( + storage: &Arc>, + args: Option, +) -> Result { + let args: MemoryArgs = match args { + Some(v) => serde_json::from_value(v).map_err(|e| format!("Invalid arguments: {}", e))?, + None => return Err("Missing arguments".to_string()), + }; + + // Validate UUID format + uuid::Uuid::parse_str(&args.id).map_err(|_| "Invalid memory ID format".to_string())?; + + match args.action.as_str() { + "get" => execute_get(storage, &args.id).await, + "delete" => execute_delete(storage, &args.id).await, + "state" => execute_state(storage, &args.id).await, + _ => Err(format!( + "Invalid action '{}'. Must be one of: get, delete, state", + args.action + )), + } +} + +/// Get full memory node with all metadata +async fn execute_get(storage: &Arc>, id: &str) -> Result { + let storage = storage.lock().await; + let node = storage.get_node(id).map_err(|e| e.to_string())?; + + match node { + Some(n) => Ok(serde_json::json!({ + "action": "get", + "found": true, + "node": { + "id": n.id, + "content": n.content, + "nodeType": n.node_type, + "createdAt": n.created_at.to_rfc3339(), + "updatedAt": n.updated_at.to_rfc3339(), + "lastAccessed": n.last_accessed.to_rfc3339(), + "stability": n.stability, + "difficulty": n.difficulty, + "reps": n.reps, + "lapses": n.lapses, + "storageStrength": n.storage_strength, + "retrievalStrength": n.retrieval_strength, + "retentionStrength": n.retention_strength, + "sentimentScore": n.sentiment_score, + "sentimentMagnitude": n.sentiment_magnitude, + "nextReview": n.next_review.map(|d| d.to_rfc3339()), + "source": n.source, + "tags": n.tags, + "hasEmbedding": n.has_embedding, + "embeddingModel": n.embedding_model, + } + })), + None => Ok(serde_json::json!({ + "action": "get", + "found": false, + "nodeId": id, + "message": "Memory not found", + })), + } +} + +/// Delete a memory and return success status +async fn execute_delete(storage: &Arc>, id: &str) -> Result { + let mut storage = storage.lock().await; + let deleted = storage.delete_node(id).map_err(|e| e.to_string())?; + + Ok(serde_json::json!({ + "action": "delete", + "success": deleted, + "nodeId": id, + "message": if deleted { "Memory deleted successfully" } else { "Memory not found" }, + })) +} + +/// Get accessibility state of a memory (Active/Dormant/Silent/Unavailable) +async fn execute_state(storage: &Arc>, id: &str) -> Result { + let storage = storage.lock().await; + + // Get the memory + let memory = storage + .get_node(id) + .map_err(|e| format!("Error: {}", e))? + .ok_or("Memory not found")?; + + // Calculate accessibility score + let accessibility = compute_accessibility( + memory.retention_strength, + memory.retrieval_strength, + memory.storage_strength, + ); + + // Determine state + let state = state_from_accessibility(accessibility); + + let state_description = match state { + MemoryState::Active => "Easily retrievable - this memory is fresh and accessible", + MemoryState::Dormant => "Retrievable with effort - may need cues to recall", + MemoryState::Silent => "Difficult to retrieve - exists but hard to access", + MemoryState::Unavailable => "Cannot be retrieved - needs significant reinforcement", + }; + + Ok(serde_json::json!({ + "action": "state", + "memoryId": id, + "content": memory.content, + "state": format!("{:?}", state), + "accessibility": accessibility, + "description": state_description, + "components": { + "retentionStrength": memory.retention_strength, + "retrievalStrength": memory.retrieval_strength, + "storageStrength": memory.storage_strength + }, + "thresholds": { + "active": ACCESSIBILITY_ACTIVE, + "dormant": ACCESSIBILITY_DORMANT, + "silent": ACCESSIBILITY_SILENT + } + })) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_accessibility_thresholds() { + // Test Active state + let accessibility = compute_accessibility(0.9, 0.8, 0.7); + assert!(accessibility >= ACCESSIBILITY_ACTIVE); + assert!(matches!(state_from_accessibility(accessibility), MemoryState::Active)); + + // Test Dormant state + let accessibility = compute_accessibility(0.5, 0.5, 0.5); + assert!(accessibility >= ACCESSIBILITY_DORMANT && accessibility < ACCESSIBILITY_ACTIVE); + assert!(matches!(state_from_accessibility(accessibility), MemoryState::Dormant)); + + // Test Silent state + let accessibility = compute_accessibility(0.2, 0.2, 0.2); + assert!(accessibility >= ACCESSIBILITY_SILENT && accessibility < ACCESSIBILITY_DORMANT); + assert!(matches!(state_from_accessibility(accessibility), MemoryState::Silent)); + + // Test Unavailable state + let accessibility = compute_accessibility(0.05, 0.05, 0.05); + assert!(accessibility < ACCESSIBILITY_SILENT); + assert!(matches!(state_from_accessibility(accessibility), MemoryState::Unavailable)); + } + + #[test] + fn test_schema_structure() { + let schema = schema(); + assert!(schema["properties"]["action"].is_object()); + assert!(schema["properties"]["id"].is_object()); + assert_eq!(schema["required"], serde_json::json!(["action", "id"])); + } +} diff --git a/crates/vestige-mcp/src/tools/mod.rs b/crates/vestige-mcp/src/tools/mod.rs index dc1d04c..22127af 100644 --- a/crates/vestige-mcp/src/tools/mod.rs +++ b/crates/vestige-mcp/src/tools/mod.rs @@ -20,3 +20,9 @@ pub mod tagging; // Feedback / preference learning pub mod feedback; + +// Unified tools (consolidate multiple operations into single tools) +pub mod codebase_unified; +pub mod intention_unified; +pub mod memory_unified; +pub mod search_unified; diff --git a/crates/vestige-mcp/src/tools/search_unified.rs b/crates/vestige-mcp/src/tools/search_unified.rs new file mode 100644 index 0000000..b2a13ee --- /dev/null +++ b/crates/vestige-mcp/src/tools/search_unified.rs @@ -0,0 +1,492 @@ +//! Unified Search Tool +//! +//! Merges recall, semantic_search, and hybrid_search into a single `search` tool. +//! Always uses hybrid search internally (keyword + semantic + RRF fusion). +//! Implements Testing Effect (Roediger & Karpicke 2006) by auto-strengthening memories on access. + +use serde::Deserialize; +use serde_json::Value; +use std::sync::Arc; +use tokio::sync::Mutex; + +use vestige_core::Storage; + +/// Input schema for unified search tool +pub fn schema() -> Value { + serde_json::json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results (default: 10)", + "default": 10, + "minimum": 1, + "maximum": 100 + }, + "min_retention": { + "type": "number", + "description": "Minimum retention strength (0.0-1.0, default: 0.0)", + "default": 0.0, + "minimum": 0.0, + "maximum": 1.0 + }, + "min_similarity": { + "type": "number", + "description": "Minimum similarity threshold (0.0-1.0, default: 0.5)", + "default": 0.5, + "minimum": 0.0, + "maximum": 1.0 + } + }, + "required": ["query"] + }) +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct SearchArgs { + query: String, + limit: Option, + min_retention: Option, + min_similarity: Option, +} + +/// Execute unified search +/// +/// Uses hybrid search (keyword + semantic + RRF fusion) internally. +/// Auto-strengthens memories on access (Testing Effect - Roediger & Karpicke 2006). +pub async fn execute( + storage: &Arc>, + args: Option, +) -> Result { + let args: SearchArgs = match args { + Some(v) => serde_json::from_value(v).map_err(|e| format!("Invalid arguments: {}", e))?, + None => return Err("Missing arguments".to_string()), + }; + + if args.query.trim().is_empty() { + return Err("Query cannot be empty".to_string()); + } + + // Clamp all parameters to valid ranges + let limit = args.limit.unwrap_or(10).clamp(1, 100); + let min_retention = args.min_retention.unwrap_or(0.0).clamp(0.0, 1.0); + let min_similarity = args.min_similarity.unwrap_or(0.5).clamp(0.0, 1.0); + + // Use balanced weights for hybrid search (keyword + semantic) + let keyword_weight = 0.5_f32; + let semantic_weight = 0.5_f32; + + let storage = storage.lock().await; + + // Execute hybrid search + let results = storage + .hybrid_search(&args.query, limit, keyword_weight, semantic_weight) + .map_err(|e| e.to_string())?; + + // Filter results by min_retention and min_similarity + let filtered_results: Vec<_> = results + .into_iter() + .filter(|r| { + // Check retention strength + if r.node.retention_strength < min_retention { + return false; + } + // Check similarity if semantic score is available + if let Some(sem_score) = r.semantic_score { + if sem_score < min_similarity { + return false; + } + } + true + }) + .collect(); + + // Auto-strengthen memories on access (Testing Effect - Roediger & Karpicke 2006) + // This implements "use it or lose it" - accessed memories get stronger + let ids: Vec<&str> = filtered_results.iter().map(|r| r.node.id.as_str()).collect(); + let _ = storage.strengthen_batch_on_access(&ids); // Ignore errors, don't fail search + + // Format results + let formatted: Vec = filtered_results + .iter() + .map(|r| { + serde_json::json!({ + "id": r.node.id, + "content": r.node.content, + "combinedScore": r.combined_score, + "keywordScore": r.keyword_score, + "semanticScore": r.semantic_score, + "nodeType": r.node.node_type, + "tags": r.node.tags, + "retentionStrength": r.node.retention_strength, + }) + }) + .collect(); + + Ok(serde_json::json!({ + "query": args.query, + "method": "hybrid", + "total": formatted.len(), + "results": formatted, + })) +} + +// ============================================================================ +// TESTS +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + use vestige_core::IngestInput; + + /// Create a test storage instance with a temporary database + async fn test_storage() -> (Arc>, TempDir) { + let dir = TempDir::new().unwrap(); + let storage = Storage::new(Some(dir.path().join("test.db"))).unwrap(); + (Arc::new(Mutex::new(storage)), dir) + } + + /// Helper to ingest test content + async fn ingest_test_content(storage: &Arc>, content: &str) -> String { + let input = IngestInput { + content: content.to_string(), + node_type: "fact".to_string(), + source: None, + sentiment_score: 0.0, + sentiment_magnitude: 0.0, + tags: vec![], + valid_from: None, + valid_until: None, + }; + let mut storage_lock = storage.lock().await; + let node = storage_lock.ingest(input).unwrap(); + node.id + } + + // ======================================================================== + // QUERY VALIDATION TESTS + // ======================================================================== + + #[tokio::test] + async fn test_search_empty_query_fails() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ "query": "" }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("empty")); + } + + #[tokio::test] + async fn test_search_whitespace_only_query_fails() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ "query": " \t\n " }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("empty")); + } + + #[tokio::test] + async fn test_search_missing_arguments_fails() { + let (storage, _dir) = test_storage().await; + let result = execute(&storage, None).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Missing arguments")); + } + + #[tokio::test] + async fn test_search_missing_query_field_fails() { + let (storage, _dir) = test_storage().await; + let args = serde_json::json!({ "limit": 10 }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Invalid arguments")); + } + + // ======================================================================== + // LIMIT CLAMPING TESTS + // ======================================================================== + + #[tokio::test] + async fn test_search_limit_clamped_to_minimum() { + let (storage, _dir) = test_storage().await; + ingest_test_content(&storage, "Test content for limit clamping").await; + + // Try with limit 0 - should clamp to 1 + let args = serde_json::json!({ + "query": "test", + "limit": 0 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_search_limit_clamped_to_maximum() { + let (storage, _dir) = test_storage().await; + ingest_test_content(&storage, "Test content for max limit").await; + + // Try with limit 1000 - should clamp to 100 + let args = serde_json::json!({ + "query": "test", + "limit": 1000 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_search_negative_limit_clamped() { + let (storage, _dir) = test_storage().await; + ingest_test_content(&storage, "Test content for negative limit").await; + + let args = serde_json::json!({ + "query": "test", + "limit": -5 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + } + + // ======================================================================== + // MIN_RETENTION CLAMPING TESTS + // ======================================================================== + + #[tokio::test] + async fn test_search_min_retention_clamped_to_zero() { + let (storage, _dir) = test_storage().await; + ingest_test_content(&storage, "Test content for retention clamping").await; + + let args = serde_json::json!({ + "query": "test", + "min_retention": -0.5 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_search_min_retention_clamped_to_one() { + let (storage, _dir) = test_storage().await; + ingest_test_content(&storage, "Test content for max retention").await; + + let args = serde_json::json!({ + "query": "test", + "min_retention": 1.5 + }); + let result = execute(&storage, Some(args)).await; + // Should succeed but may return no results (retention > 1.0 clamped to 1.0) + assert!(result.is_ok()); + } + + // ======================================================================== + // MIN_SIMILARITY CLAMPING TESTS + // ======================================================================== + + #[tokio::test] + async fn test_search_min_similarity_clamped_to_zero() { + let (storage, _dir) = test_storage().await; + ingest_test_content(&storage, "Test content for similarity clamping").await; + + let args = serde_json::json!({ + "query": "test", + "min_similarity": -0.5 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_search_min_similarity_clamped_to_one() { + let (storage, _dir) = test_storage().await; + ingest_test_content(&storage, "Test content for max similarity").await; + + let args = serde_json::json!({ + "query": "test", + "min_similarity": 1.5 + }); + let result = execute(&storage, Some(args)).await; + // Should succeed but may return no results + assert!(result.is_ok()); + } + + // ======================================================================== + // SUCCESSFUL SEARCH TESTS + // ======================================================================== + + #[tokio::test] + async fn test_search_basic_query_succeeds() { + let (storage, _dir) = test_storage().await; + ingest_test_content(&storage, "The Rust programming language is memory safe.").await; + + let args = serde_json::json!({ "query": "rust" }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert_eq!(value["query"], "rust"); + assert_eq!(value["method"], "hybrid"); + assert!(value["total"].is_number()); + assert!(value["results"].is_array()); + } + + #[tokio::test] + async fn test_search_returns_matching_content() { + let (storage, _dir) = test_storage().await; + let node_id = + ingest_test_content(&storage, "Python is a dynamic programming language.").await; + + let args = serde_json::json!({ + "query": "python", + "min_similarity": 0.0 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + let results = value["results"].as_array().unwrap(); + assert!(!results.is_empty()); + assert_eq!(results[0]["id"], node_id); + } + + #[tokio::test] + async fn test_search_with_limit() { + let (storage, _dir) = test_storage().await; + // Ingest multiple items + ingest_test_content(&storage, "Testing content one").await; + ingest_test_content(&storage, "Testing content two").await; + ingest_test_content(&storage, "Testing content three").await; + + let args = serde_json::json!({ + "query": "testing", + "limit": 2, + "min_similarity": 0.0 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + let results = value["results"].as_array().unwrap(); + assert!(results.len() <= 2); + } + + #[tokio::test] + async fn test_search_empty_database_returns_empty_array() { + let (storage, _dir) = test_storage().await; + // Don't ingest anything - database is empty + + let args = serde_json::json!({ "query": "anything" }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + assert_eq!(value["total"], 0); + assert!(value["results"].as_array().unwrap().is_empty()); + } + + #[tokio::test] + async fn test_search_result_contains_expected_fields() { + let (storage, _dir) = test_storage().await; + ingest_test_content(&storage, "Testing field presence in search results.").await; + + let args = serde_json::json!({ + "query": "testing", + "min_similarity": 0.0 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + let results = value["results"].as_array().unwrap(); + if !results.is_empty() { + let first = &results[0]; + assert!(first["id"].is_string()); + assert!(first["content"].is_string()); + assert!(first["combinedScore"].is_number()); + // keywordScore and semanticScore may be null if not matched + assert!(first["nodeType"].is_string()); + assert!(first["tags"].is_array()); + assert!(first["retentionStrength"].is_number()); + } + } + + // ======================================================================== + // DEFAULT VALUES TESTS + // ======================================================================== + + #[tokio::test] + async fn test_search_default_limit_is_10() { + let (storage, _dir) = test_storage().await; + // Ingest more than 10 items + for i in 0..15 { + ingest_test_content(&storage, &format!("Item number {}", i)).await; + } + + let args = serde_json::json!({ + "query": "item", + "min_similarity": 0.0 + }); + let result = execute(&storage, Some(args)).await; + assert!(result.is_ok()); + + let value = result.unwrap(); + let results = value["results"].as_array().unwrap(); + assert!(results.len() <= 10); + } + + // ======================================================================== + // SCHEMA TESTS + // ======================================================================== + + #[test] + fn test_schema_has_required_fields() { + let schema_value = schema(); + assert_eq!(schema_value["type"], "object"); + assert!(schema_value["properties"]["query"].is_object()); + assert!(schema_value["required"] + .as_array() + .unwrap() + .contains(&serde_json::json!("query"))); + } + + #[test] + fn test_schema_has_optional_fields() { + let schema_value = schema(); + assert!(schema_value["properties"]["limit"].is_object()); + assert!(schema_value["properties"]["min_retention"].is_object()); + assert!(schema_value["properties"]["min_similarity"].is_object()); + } + + #[test] + fn test_schema_limit_has_bounds() { + let schema_value = schema(); + let limit_schema = &schema_value["properties"]["limit"]; + assert_eq!(limit_schema["minimum"], 1); + assert_eq!(limit_schema["maximum"], 100); + assert_eq!(limit_schema["default"], 10); + } + + #[test] + fn test_schema_min_retention_has_bounds() { + let schema_value = schema(); + let retention_schema = &schema_value["properties"]["min_retention"]; + assert_eq!(retention_schema["minimum"], 0.0); + assert_eq!(retention_schema["maximum"], 1.0); + assert_eq!(retention_schema["default"], 0.0); + } + + #[test] + fn test_schema_min_similarity_has_bounds() { + let schema_value = schema(); + let similarity_schema = &schema_value["properties"]["min_similarity"]; + assert_eq!(similarity_schema["minimum"], 0.0); + assert_eq!(similarity_schema["maximum"], 1.0); + assert_eq!(similarity_schema["default"], 0.5); + } +}