feat(mcp): add per-tool _meta["anthropic/maxResultSizeChars"] annotation (#56)

Claude Code v2.1.91+ honors the per-tool annotation `_meta["anthropic/maxResultSizeChars"]` (up to 500_000) to override its 50K default truncation of `CallToolResult`. Without it, large Vestige payloads are silently truncated and spilled to disk, forcing the parent agent to chunk-read them. Empirically observed truncation under realistic default parameters (measured on v1.3.0 against ~3,300 memories; v2.x tool surface preserves the same names + payload shapes): search(detail_level="full", limit=20) -> 134,824 chars -> truncated search(detail_level="summary", limit=10) -> 71,318 chars -> truncated memory_timeline(limit=30) -> 83,626 chars -> truncated This patch: 1. Adds `meta: Option<serde_json::Value>` to `ToolDescription` with `#[serde(rename = "_meta")]` so the wire shape matches the MCP spec. Backwards-compatible (the field is optional + `skip_serializing_if`; older MCP clients ignore unknown JSON keys per the spec). 2. Derives `Default` on `ToolDescription` so existing call sites can adopt the new field via struct-update syntax (`..Default::default()`) without restating it. 3. Annotates the four high-payload tools per measurement-driven discipline; the other 21 tools deliberately do NOT carry the annotation (cargo-cult prevention — a generous cap on every tool dilutes the signal and trains future maintainers that the value is arbitrary): - search -> 300_000 (2.2x headroom over observed peak) - memory_timeline -> 200_000 (2.4x headroom over observed peak) - memory -> 100_000 (single-record bounded) - codebase -> 100_000 (future-growth bounded) Tools that COULD plausibly grow into the annotated set with future workload (`deep_reference`, `cross_reference`, `memory_graph`, `explore_connections`, `session_context`) are left unannotated until empirical measurement shows truncation under realistic use. 4. Adds three regression tests in `server::tests`: - test_high_payload_tools_have_max_result_size_annotation: pins each cap value + asserts <= 500K Anthropic ceiling - test_other_tools_do_not_carry_max_result_size_annotation: cargo-cult prevention; dynamically iterates `tools/list` and asserts every tool NOT in the discipline-prescribed set lacks the annotation (robust to new tools being added by future PRs) - test_meta_wire_shape_uses_underscore_meta_field: pins the serde rename to `_meta` (the spec'd wire name) so a refactor of `ToolDescription` cannot silently drop the rename All 22 `server::tests` pass on v2.1.22 base (19 pre-existing + 3 new). Full lib test suite: 379/380 pass; the 1 unrelated failure (`tools::maintenance::tests::test_portable_export_writes_archive_to_storage_exports_dir`) is a pre-existing Windows path-separator assertion bug in `tools/maintenance.rs:823` (`path.ends_with("exports/portable-test.json")` fails on Windows where the path uses `\`) — unaffected by this PR. References: - Anthropic CC v2.1.91 release notes (April 2026): "Added MCP tool result persistence override via _meta['anthropic/maxResultSizeChars'] annotation (up to 500K), allowing larger results like DB schemas to pass through without truncation" - claude-agent-sdk-python v0.1.55 #756: forward bookkeeping establishing the on-Tool-definition (not on-CallToolResult) semantics for this annotation Co-authored-by: Peter Lauzon <inbijiburu@protonmail.com>
2026-07-22 23:31:02 +02:00 · 2026-05-25 12:49:51 -06:00 · 2026-05-25 12:49:51 -06:00 · a8550410b0
commit a8550410b0
parent 1399329810
2 changed files with 214 additions and 2 deletions
--- a/crates/vestige-mcp/src/protocol/messages.rs
+++ b/crates/vestige-mcp/src/protocol/messages.rs
@ -82,13 +82,25 @@ pub struct ServerCapabilities {
 // ============================================================================

 /// Tool description for tools/list
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct ToolDescription {
    pub name: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub description: Option<String>,
    pub input_schema: Value,
+    /// Per-tool `_meta` annotations from the MCP wire spec.
+    ///
+    /// Notable keys recognized by Claude Code (v2.1.91+):
+    /// - `anthropic/maxResultSizeChars` (integer, up to 500_000):
+    ///   per-tool override of the 50K default `CallToolResult` truncation
+    ///   ceiling. Pinned on the Tool definition; applies to every invocation.
+    ///
+    /// Free-form `serde_json::Value` (typically an object) so additional
+    /// vendor-specific `_meta` keys can be added without further schema
+    /// changes.
+    #[serde(rename = "_meta", skip_serializing_if = "Option::is_none")]
+    pub meta: Option<Value>,
 }

 /// Result of tools/list
--- a/crates/vestige-mcp/src/server.rs
+++ b/crates/vestige-mcp/src/server.rs
@ -227,7 +227,7 @@ impl McpServer {
        // v2.1.21: 25 tools (verified by the `tools.len() == 25` assertion in the
        // handle_tools_list test below — the `suppress` tool landed in v2.0.5).
        // Deprecated tools still work via redirects in handle_tools_call.
-        let tools = vec![
+        let mut tools = vec![
            // ================================================================
            // UNIFIED TOOLS (v1.1+)
            // ================================================================
@ -235,21 +235,25 @@ impl McpServer {
                name: "search".to_string(),
                description: Some("Unified search tool. Uses hybrid search (keyword + semantic + convex combination fusion) internally. Auto-strengthens memories on access (Testing Effect).".to_string()),
                input_schema: tools::search_unified::schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "memory".to_string(),
                description: Some("Unified memory management tool. Actions: 'get' (retrieve full node), 'purge' (irreversibly remove content/embeddings with confirm=true), 'delete' (legacy alias for purge), 'state' (get accessibility state), 'promote' (thumbs up — increases retrieval strength), 'demote' (thumbs down — decreases retrieval strength, does NOT delete), 'edit' (update content in-place, preserves FSRS state).".to_string()),
                input_schema: tools::memory_unified::schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "codebase".to_string(),
                description: Some("Unified codebase tool. Actions: 'remember_pattern' (store code pattern), 'remember_decision' (store architectural decision), 'get_context' (retrieve patterns and decisions).".to_string()),
                input_schema: tools::codebase_unified::schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "intention".to_string(),
                description: Some("Unified intention management tool. Actions: 'set' (create), 'check' (find triggered), 'update' (complete/snooze/cancel), 'list' (show intentions).".to_string()),
                input_schema: tools::intention_unified::schema(),
+                ..Default::default()
            },
            // ================================================================
            // CORE MEMORY (v1.7: smart_ingest absorbs ingest + checkpoint)
@ -258,6 +262,7 @@ impl McpServer {
                name: "smart_ingest".to_string(),
                description: Some("INTELLIGENT memory ingestion with Prediction Error Gating. Single mode: provide 'content' to auto-decide CREATE/UPDATE/SUPERSEDE. Batch mode: provide 'items' array (max 20) for session-end saves — each item runs the full cognitive pipeline (importance scoring, intent detection, synaptic tagging).".to_string()),
                input_schema: tools::smart_ingest::schema(),
+                ..Default::default()
            },
            // ================================================================
            // TEMPORAL TOOLS (v1.2+)
@ -266,11 +271,13 @@ impl McpServer {
                name: "memory_timeline".to_string(),
                description: Some("Browse memories chronologically. Returns memories in a time range, grouped by day. Defaults to last 7 days.".to_string()),
                input_schema: tools::timeline::schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "memory_changelog".to_string(),
                description: Some("View audit trail of memory changes. Per-memory: state transitions. System-wide: consolidations + recent state changes.".to_string()),
                input_schema: tools::changelog::schema(),
+                ..Default::default()
            },
            // ================================================================
            // MAINTENANCE TOOLS (v1.7: system_status replaces health_check + stats)
@ -279,26 +286,31 @@ impl McpServer {
                name: "system_status".to_string(),
                description: Some("Combined system health and statistics. Returns status (healthy/degraded/critical/empty), full stats, FSRS preview, cognitive module health, state distribution, warnings, and recommendations.".to_string()),
                input_schema: tools::maintenance::system_status_schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "consolidate".to_string(),
                description: Some("Run FSRS-6 memory consolidation cycle. Applies decay, generates embeddings, and performs maintenance. Use when memories seem stale.".to_string()),
                input_schema: tools::maintenance::consolidate_schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "backup".to_string(),
                description: Some("Create a SQLite database backup. Returns the backup file path.".to_string()),
                input_schema: tools::maintenance::backup_schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "export".to_string(),
                description: Some("Export memories as JSON or JSONL. Supports tag and date filters.".to_string()),
                input_schema: tools::maintenance::export_schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "gc".to_string(),
                description: Some("Garbage collect stale memories below retention threshold. Defaults to dry_run=true for safety.".to_string()),
                input_schema: tools::maintenance::gc_schema(),
+                ..Default::default()
            },
            // ================================================================
            // AUTO-SAVE & DEDUP TOOLS (v1.3+)
@ -307,11 +319,13 @@ impl McpServer {
                name: "importance_score".to_string(),
                description: Some("Score content importance using 4-channel neuroscience model (novelty/arousal/reward/attention). Returns composite score, channel breakdown, encoding boost, and explanations.".to_string()),
                input_schema: tools::importance::schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "find_duplicates".to_string(),
                description: Some("Find duplicate and near-duplicate memory clusters using cosine similarity on embeddings. Returns clusters with suggested actions (merge/review). Use to clean up redundant memories.".to_string()),
                input_schema: tools::dedup::schema(),
+                ..Default::default()
            },
            // ================================================================
            // COGNITIVE TOOLS (v1.5+)
@ -320,16 +334,19 @@ impl McpServer {
                name: "dream".to_string(),
                description: Some("Trigger memory dreaming — replays recent memories to discover hidden connections, synthesize insights, and strengthen important patterns. Returns insights, connections, and dream stats.".to_string()),
                input_schema: tools::dream::schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "explore_connections".to_string(),
                description: Some("Graph exploration tool for memory connections. Actions: 'chain' (build reasoning path between memories), 'associations' (find related memories via spreading activation + hippocampal index), 'bridges' (find connecting memories between two nodes).".to_string()),
                input_schema: tools::explore::schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "predict".to_string(),
                description: Some("Proactive memory prediction — predicts what memories you'll need next based on context, recent activity, and learned patterns. Returns predictions, suggestions, and speculative retrievals.".to_string()),
                input_schema: tools::predict::schema(),
+                ..Default::default()
            },
            // ================================================================
            // RESTORE TOOL (v1.5+)
@ -338,6 +355,7 @@ impl McpServer {
                name: "restore".to_string(),
                description: Some("Restore memories from a JSON backup file. Supports MCP wrapper format, RecallResult format, and direct memory array format.".to_string()),
                input_schema: tools::restore::schema(),
+                ..Default::default()
            },
            // ================================================================
            // CONTEXT PACKETS (v1.8+)
@ -346,6 +364,7 @@ impl McpServer {
                name: "session_context".to_string(),
                description: Some("One-call session initialization. Combines search, intentions, status, predictions, and codebase context into a single token-budgeted response. Replaces 5 separate calls at session start.".to_string()),
                input_schema: tools::session_context::schema(),
+                ..Default::default()
            },
            // ================================================================
            // AUTONOMIC TOOLS (v1.9+)
@ -354,11 +373,13 @@ impl McpServer {
                name: "memory_health".to_string(),
                description: Some("Retention dashboard. Returns avg retention, retention distribution (buckets: 0-20%, 20-40%, etc.), trend (improving/declining/stable), and recommendation. Lightweight alternative to full system_status focused on memory quality.".to_string()),
                input_schema: tools::health::schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "memory_graph".to_string(),
                description: Some("Subgraph export for visualization. Input: center_id or query, depth (1-3), max_nodes. Returns nodes with force-directed layout positions and edges with weights. Powers memory graph visualization.".to_string()),
                input_schema: tools::graph::schema(),
+                ..Default::default()
            },
            // ================================================================
            // DEEP REFERENCE (v2.0.4+) — replaces cross_reference
@ -367,16 +388,19 @@ impl McpServer {
                name: "deep_reference".to_string(),
                description: Some("Deep cognitive reasoning across memories. Combines FSRS-6 trust scoring, spreading activation, temporal supersession, dream insights, and contradiction analysis to build a complete understanding of a topic. Returns trust-scored evidence, fact evolution timeline, and a recommended answer. Use this when accuracy matters.".to_string()),
                input_schema: tools::cross_reference::schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "cross_reference".to_string(),
                description: Some("Alias for deep_reference. Connect the dots across memories with cognitive reasoning.".to_string()),
                input_schema: tools::cross_reference::schema(),
+                ..Default::default()
            },
            ToolDescription {
                name: "contradictions".to_string(),
                description: Some("Inspect memory disagreements directly. Scans a topic or recent memories for trust-weighted contradiction pairs using the same local logic as deep_reference.".to_string()),
                input_schema: tools::contradictions::schema(),
+                ..Default::default()
            },
            // ================================================================
            // ACTIVE FORGETTING (v2.0.5) — top-down suppression
@ -386,9 +410,47 @@ impl McpServer {
                name: "suppress".to_string(),
                description: Some("Actively suppress a memory via top-down inhibitory control (Anderson 2025 SIF + Davis Rac1). Distinct from delete: the memory persists but is inhibited from retrieval and actively decays. Each call compounds. A background Rac1 worker cascades decay to co-activated neighbors. Reversible within 24 hours via reverse=true.".to_string()),
                input_schema: tools::suppress::schema(),
+                ..Default::default()
            },
        ];

+        // Per-tool result-size annotation `_meta["anthropic/maxResultSizeChars"]`.
+        //
+        // Claude Code v2.1.91+ honors this annotation to override its 50K default
+        // `CallToolResult` truncation. Without it, large Vestige payloads
+        // (`search` with `detail_level="full"` at `limit=20` has been observed
+        // at ~135K chars; `memory_timeline` at `limit=30` at ~84K chars) are
+        // silently truncated and spilled to disk, forcing the parent agent to
+        // chunk-read them.
+        //
+        // Per-tool caps below are sized at ~2× observed peak with growth
+        // headroom; max permitted by Anthropic is 500_000. Only the four
+        // empirically-measured high-payload tools carry the annotation today;
+        // the remaining 21 tools deliberately do NOT (cargo-cult prevention —
+        // annotating a small-payload tool dilutes the signal).
+        //
+        // Other tools that COULD plausibly grow into the annotated set with
+        // future workload (`deep_reference`, `cross_reference`, `memory_graph`,
+        // `explore_connections`, `session_context`) are left unannotated until
+        // empirical measurement shows truncation under realistic use.
+        for tool in tools.iter_mut() {
+            let max_chars: Option<u64> = match tool.name.as_str() {
+                "search" => Some(300_000),
+                "memory_timeline" => Some(200_000),
+                "memory" => Some(100_000),
+                "codebase" => Some(100_000),
+                _ => None,
+            };
+            if let Some(n) = max_chars {
+                let mut meta = serde_json::Map::new();
+                meta.insert(
+                    "anthropic/maxResultSizeChars".to_string(),
+                    serde_json::Value::from(n),
+                );
+                tool.meta = Some(serde_json::Value::Object(meta));
+            }
+        }
+
        let result = ListToolsResult { tools };
        serde_json::to_value(result).map_err(|e| JsonRpcError::internal_error(&e.to_string()))
    }
@ -1899,4 +1961,142 @@ mod tests {
        assert!(response.error.is_some());
        assert_eq!(response.error.unwrap().code, -32602);
    }
+
+    // ========================================================================
+    // Per-tool result-size annotation tests
+    // (`_meta["anthropic/maxResultSizeChars"]`, CC v2.1.91+)
+    //
+    // The annotation lives on the Tool definition in `tools/list`, so CC reads
+    // it once when the MCP session opens and applies the override to every
+    // invocation of that tool. These tests pin the wire-form so a future
+    // refactor of `ToolDescription` cannot silently drop the annotation.
+    // ========================================================================
+
+    /// Expected per-tool caps. Returns `Some(cap)` for tools the discipline
+    /// annotates, `None` for tools that MUST NOT carry the annotation
+    /// (cargo-cult prevention).
+    fn expected_max_result_size(name: &str) -> Option<u64> {
+        match name {
+            "search" => Some(300_000),
+            "memory_timeline" => Some(200_000),
+            "memory" => Some(100_000),
+            "codebase" => Some(100_000),
+            _ => None,
+        }
+    }
+
+    #[tokio::test]
+    async fn test_high_payload_tools_have_max_result_size_annotation() {
+        let (mut server, _dir) = test_server().await;
+        let init_request = make_request("initialize", Some(init_params()));
+        server.handle_request(init_request).await;
+
+        let request = make_request("tools/list", None);
+        let response = server.handle_request(request).await.unwrap();
+        let result = response.result.unwrap();
+        let tools = result["tools"].as_array().unwrap();
+
+        for name in ["search", "memory_timeline", "memory", "codebase"] {
+            let tool = tools
+                .iter()
+                .find(|t| t["name"].as_str() == Some(name))
+                .unwrap_or_else(|| panic!("Tool '{}' missing from tools/list", name));
+
+            let expected = expected_max_result_size(name).unwrap();
+            let meta = tool.get("_meta").unwrap_or_else(|| {
+                panic!("Tool '{}' is missing the `_meta` field on the wire", name)
+            });
+            let actual = meta
+                .get("anthropic/maxResultSizeChars")
+                .and_then(|v| v.as_u64())
+                .unwrap_or_else(|| {
+                    panic!(
+                        "Tool '{}' _meta lacks integer 'anthropic/maxResultSizeChars'",
+                        name
+                    )
+                });
+            assert_eq!(
+                actual, expected,
+                "Tool '{}' cap drift: expected {} got {}",
+                name, expected, actual
+            );
+            assert!(
+                actual <= 500_000,
+                "Tool '{}' cap {} exceeds Anthropic 500K ceiling",
+                name,
+                actual
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn test_other_tools_do_not_carry_max_result_size_annotation() {
+        // Cargo-cult prevention. Dynamically derived from tools/list so this
+        // test is robust to new tools being added: any tool that is NOT in
+        // the discipline-prescribed set MUST NOT carry the annotation.
+        // Adding the annotation to a small-payload tool dilutes the signal
+        // and trains future maintainers that the value is arbitrary.
+        let (mut server, _dir) = test_server().await;
+        let init_request = make_request("initialize", Some(init_params()));
+        server.handle_request(init_request).await;
+
+        let request = make_request("tools/list", None);
+        let response = server.handle_request(request).await.unwrap();
+        let result = response.result.unwrap();
+        let tools = result["tools"].as_array().unwrap();
+
+        for tool in tools {
+            let name = tool["name"].as_str().unwrap();
+            if expected_max_result_size(name).is_some() {
+                continue; // covered by the annotated-tools test
+            }
+
+            // Either the `_meta` key is absent OR it is an object without the
+            // anthropic key — both are acceptable. The forbidden case is the
+            // anthropic key present on this tool.
+            let has_max_size = tool
+                .get("_meta")
+                .and_then(|m| m.get("anthropic/maxResultSizeChars"))
+                .is_some();
+            assert!(
+                !has_max_size,
+                "Tool '{}' should NOT carry maxResultSizeChars annotation \
+                 (not in the discipline-prescribed set: search, memory_timeline, \
+                 memory, codebase). If this tool's realistic max-payload now \
+                 routinely exceeds 50K, update expected_max_result_size() + the \
+                 annotation loop in handle_tools_list together.",
+                name
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn test_meta_wire_shape_uses_underscore_meta_field() {
+        // Anthropic's MCP spec is explicit: the field on the wire is `_meta`,
+        // NOT `meta`. The Rust struct uses `meta: Option<Value>` with
+        // `#[serde(rename = "_meta")]` — assert the rename actually fired.
+        let (mut server, _dir) = test_server().await;
+        let init_request = make_request("initialize", Some(init_params()));
+        server.handle_request(init_request).await;
+
+        let request = make_request("tools/list", None);
+        let response = server.handle_request(request).await.unwrap();
+        let result = response.result.unwrap();
+        let tools = result["tools"].as_array().unwrap();
+
+        let search_tool = tools
+            .iter()
+            .find(|t| t["name"].as_str() == Some("search"))
+            .expect("'search' tool present");
+
+        // Wire-form: `_meta` must exist; `meta` (un-renamed) must NOT exist.
+        assert!(
+            search_tool.get("_meta").is_some(),
+            "search tool missing `_meta` key (serde rename to _meta did not apply)"
+        );
+        assert!(
+            search_tool.get("meta").is_none(),
+            "search tool has un-renamed `meta` key (regression — serde rename broke)"
+        );
+    }
 }