feat(mcp): add per-tool _meta["anthropic/maxResultSizeChars"] annotation (#56)

Claude Code v2.1.91+ honors the per-tool annotation
`_meta["anthropic/maxResultSizeChars"]` (up to 500_000) to override
its 50K default truncation of `CallToolResult`. Without it, large
Vestige payloads are silently truncated and spilled to disk, forcing
the parent agent to chunk-read them.

Empirically observed truncation under realistic default parameters
(measured on v1.3.0 against ~3,300 memories; v2.x tool surface
preserves the same names + payload shapes):

  search(detail_level="full", limit=20)  -> 134,824 chars  -> truncated
  search(detail_level="summary", limit=10) ->  71,318 chars -> truncated
  memory_timeline(limit=30)              ->  83,626 chars  -> truncated

This patch:

1. Adds `meta: Option<serde_json::Value>` to `ToolDescription` with
   `#[serde(rename = "_meta")]` so the wire shape matches the MCP
   spec. Backwards-compatible (the field is optional +
   `skip_serializing_if`; older MCP clients ignore unknown JSON keys
   per the spec).

2. Derives `Default` on `ToolDescription` so existing call sites can
   adopt the new field via struct-update syntax
   (`..Default::default()`) without restating it.

3. Annotates the four high-payload tools per measurement-driven
   discipline; the other 21 tools deliberately do NOT carry the
   annotation (cargo-cult prevention — a generous cap on every tool
   dilutes the signal and trains future maintainers that the value
   is arbitrary):

   - search           -> 300_000 (2.2x headroom over observed peak)
   - memory_timeline  -> 200_000 (2.4x headroom over observed peak)
   - memory           -> 100_000 (single-record bounded)
   - codebase         -> 100_000 (future-growth bounded)

   Tools that COULD plausibly grow into the annotated set with future
   workload (`deep_reference`, `cross_reference`, `memory_graph`,
   `explore_connections`, `session_context`) are left unannotated
   until empirical measurement shows truncation under realistic use.

4. Adds three regression tests in `server::tests`:
   - test_high_payload_tools_have_max_result_size_annotation:
     pins each cap value + asserts <= 500K Anthropic ceiling
   - test_other_tools_do_not_carry_max_result_size_annotation:
     cargo-cult prevention; dynamically iterates `tools/list` and
     asserts every tool NOT in the discipline-prescribed set lacks
     the annotation (robust to new tools being added by future PRs)
   - test_meta_wire_shape_uses_underscore_meta_field:
     pins the serde rename to `_meta` (the spec'd wire name) so a
     refactor of `ToolDescription` cannot silently drop the rename

All 22 `server::tests` pass on v2.1.22 base (19 pre-existing + 3 new).
Full lib test suite: 379/380 pass; the 1 unrelated failure
(`tools::maintenance::tests::test_portable_export_writes_archive_to_storage_exports_dir`)
is a pre-existing Windows path-separator assertion bug in
`tools/maintenance.rs:823` (`path.ends_with("exports/portable-test.json")`
fails on Windows where the path uses `\`) — unaffected by this PR.

References:
- Anthropic CC v2.1.91 release notes (April 2026): "Added MCP tool
  result persistence override via _meta['anthropic/maxResultSizeChars']
  annotation (up to 500K), allowing larger results like DB schemas
  to pass through without truncation"
- claude-agent-sdk-python v0.1.55 #756: forward bookkeeping
  establishing the on-Tool-definition (not on-CallToolResult)
  semantics for this annotation

Co-authored-by: Peter Lauzon <inbijiburu@protonmail.com>
This commit is contained in:
Luc Lauzon 2026-05-25 12:49:51 -06:00 committed by GitHub
parent 1399329810
commit a8550410b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 214 additions and 2 deletions

View file

@ -82,13 +82,25 @@ pub struct ServerCapabilities {
// ============================================================================
/// Tool description for tools/list
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ToolDescription {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
pub input_schema: Value,
/// Per-tool `_meta` annotations from the MCP wire spec.
///
/// Notable keys recognized by Claude Code (v2.1.91+):
/// - `anthropic/maxResultSizeChars` (integer, up to 500_000):
/// per-tool override of the 50K default `CallToolResult` truncation
/// ceiling. Pinned on the Tool definition; applies to every invocation.
///
/// Free-form `serde_json::Value` (typically an object) so additional
/// vendor-specific `_meta` keys can be added without further schema
/// changes.
#[serde(rename = "_meta", skip_serializing_if = "Option::is_none")]
pub meta: Option<Value>,
}
/// Result of tools/list

View file

@ -227,7 +227,7 @@ impl McpServer {
// v2.1.21: 25 tools (verified by the `tools.len() == 25` assertion in the
// handle_tools_list test below — the `suppress` tool landed in v2.0.5).
// Deprecated tools still work via redirects in handle_tools_call.
let tools = vec![
let mut tools = vec![
// ================================================================
// UNIFIED TOOLS (v1.1+)
// ================================================================
@ -235,21 +235,25 @@ impl McpServer {
name: "search".to_string(),
description: Some("Unified search tool. Uses hybrid search (keyword + semantic + convex combination fusion) internally. Auto-strengthens memories on access (Testing Effect).".to_string()),
input_schema: tools::search_unified::schema(),
..Default::default()
},
ToolDescription {
name: "memory".to_string(),
description: Some("Unified memory management tool. Actions: 'get' (retrieve full node), 'purge' (irreversibly remove content/embeddings with confirm=true), 'delete' (legacy alias for purge), 'state' (get accessibility state), 'promote' (thumbs up — increases retrieval strength), 'demote' (thumbs down — decreases retrieval strength, does NOT delete), 'edit' (update content in-place, preserves FSRS state).".to_string()),
input_schema: tools::memory_unified::schema(),
..Default::default()
},
ToolDescription {
name: "codebase".to_string(),
description: Some("Unified codebase tool. Actions: 'remember_pattern' (store code pattern), 'remember_decision' (store architectural decision), 'get_context' (retrieve patterns and decisions).".to_string()),
input_schema: tools::codebase_unified::schema(),
..Default::default()
},
ToolDescription {
name: "intention".to_string(),
description: Some("Unified intention management tool. Actions: 'set' (create), 'check' (find triggered), 'update' (complete/snooze/cancel), 'list' (show intentions).".to_string()),
input_schema: tools::intention_unified::schema(),
..Default::default()
},
// ================================================================
// CORE MEMORY (v1.7: smart_ingest absorbs ingest + checkpoint)
@ -258,6 +262,7 @@ impl McpServer {
name: "smart_ingest".to_string(),
description: Some("INTELLIGENT memory ingestion with Prediction Error Gating. Single mode: provide 'content' to auto-decide CREATE/UPDATE/SUPERSEDE. Batch mode: provide 'items' array (max 20) for session-end saves — each item runs the full cognitive pipeline (importance scoring, intent detection, synaptic tagging).".to_string()),
input_schema: tools::smart_ingest::schema(),
..Default::default()
},
// ================================================================
// TEMPORAL TOOLS (v1.2+)
@ -266,11 +271,13 @@ impl McpServer {
name: "memory_timeline".to_string(),
description: Some("Browse memories chronologically. Returns memories in a time range, grouped by day. Defaults to last 7 days.".to_string()),
input_schema: tools::timeline::schema(),
..Default::default()
},
ToolDescription {
name: "memory_changelog".to_string(),
description: Some("View audit trail of memory changes. Per-memory: state transitions. System-wide: consolidations + recent state changes.".to_string()),
input_schema: tools::changelog::schema(),
..Default::default()
},
// ================================================================
// MAINTENANCE TOOLS (v1.7: system_status replaces health_check + stats)
@ -279,26 +286,31 @@ impl McpServer {
name: "system_status".to_string(),
description: Some("Combined system health and statistics. Returns status (healthy/degraded/critical/empty), full stats, FSRS preview, cognitive module health, state distribution, warnings, and recommendations.".to_string()),
input_schema: tools::maintenance::system_status_schema(),
..Default::default()
},
ToolDescription {
name: "consolidate".to_string(),
description: Some("Run FSRS-6 memory consolidation cycle. Applies decay, generates embeddings, and performs maintenance. Use when memories seem stale.".to_string()),
input_schema: tools::maintenance::consolidate_schema(),
..Default::default()
},
ToolDescription {
name: "backup".to_string(),
description: Some("Create a SQLite database backup. Returns the backup file path.".to_string()),
input_schema: tools::maintenance::backup_schema(),
..Default::default()
},
ToolDescription {
name: "export".to_string(),
description: Some("Export memories as JSON or JSONL. Supports tag and date filters.".to_string()),
input_schema: tools::maintenance::export_schema(),
..Default::default()
},
ToolDescription {
name: "gc".to_string(),
description: Some("Garbage collect stale memories below retention threshold. Defaults to dry_run=true for safety.".to_string()),
input_schema: tools::maintenance::gc_schema(),
..Default::default()
},
// ================================================================
// AUTO-SAVE & DEDUP TOOLS (v1.3+)
@ -307,11 +319,13 @@ impl McpServer {
name: "importance_score".to_string(),
description: Some("Score content importance using 4-channel neuroscience model (novelty/arousal/reward/attention). Returns composite score, channel breakdown, encoding boost, and explanations.".to_string()),
input_schema: tools::importance::schema(),
..Default::default()
},
ToolDescription {
name: "find_duplicates".to_string(),
description: Some("Find duplicate and near-duplicate memory clusters using cosine similarity on embeddings. Returns clusters with suggested actions (merge/review). Use to clean up redundant memories.".to_string()),
input_schema: tools::dedup::schema(),
..Default::default()
},
// ================================================================
// COGNITIVE TOOLS (v1.5+)
@ -320,16 +334,19 @@ impl McpServer {
name: "dream".to_string(),
description: Some("Trigger memory dreaming — replays recent memories to discover hidden connections, synthesize insights, and strengthen important patterns. Returns insights, connections, and dream stats.".to_string()),
input_schema: tools::dream::schema(),
..Default::default()
},
ToolDescription {
name: "explore_connections".to_string(),
description: Some("Graph exploration tool for memory connections. Actions: 'chain' (build reasoning path between memories), 'associations' (find related memories via spreading activation + hippocampal index), 'bridges' (find connecting memories between two nodes).".to_string()),
input_schema: tools::explore::schema(),
..Default::default()
},
ToolDescription {
name: "predict".to_string(),
description: Some("Proactive memory prediction — predicts what memories you'll need next based on context, recent activity, and learned patterns. Returns predictions, suggestions, and speculative retrievals.".to_string()),
input_schema: tools::predict::schema(),
..Default::default()
},
// ================================================================
// RESTORE TOOL (v1.5+)
@ -338,6 +355,7 @@ impl McpServer {
name: "restore".to_string(),
description: Some("Restore memories from a JSON backup file. Supports MCP wrapper format, RecallResult format, and direct memory array format.".to_string()),
input_schema: tools::restore::schema(),
..Default::default()
},
// ================================================================
// CONTEXT PACKETS (v1.8+)
@ -346,6 +364,7 @@ impl McpServer {
name: "session_context".to_string(),
description: Some("One-call session initialization. Combines search, intentions, status, predictions, and codebase context into a single token-budgeted response. Replaces 5 separate calls at session start.".to_string()),
input_schema: tools::session_context::schema(),
..Default::default()
},
// ================================================================
// AUTONOMIC TOOLS (v1.9+)
@ -354,11 +373,13 @@ impl McpServer {
name: "memory_health".to_string(),
description: Some("Retention dashboard. Returns avg retention, retention distribution (buckets: 0-20%, 20-40%, etc.), trend (improving/declining/stable), and recommendation. Lightweight alternative to full system_status focused on memory quality.".to_string()),
input_schema: tools::health::schema(),
..Default::default()
},
ToolDescription {
name: "memory_graph".to_string(),
description: Some("Subgraph export for visualization. Input: center_id or query, depth (1-3), max_nodes. Returns nodes with force-directed layout positions and edges with weights. Powers memory graph visualization.".to_string()),
input_schema: tools::graph::schema(),
..Default::default()
},
// ================================================================
// DEEP REFERENCE (v2.0.4+) — replaces cross_reference
@ -367,16 +388,19 @@ impl McpServer {
name: "deep_reference".to_string(),
description: Some("Deep cognitive reasoning across memories. Combines FSRS-6 trust scoring, spreading activation, temporal supersession, dream insights, and contradiction analysis to build a complete understanding of a topic. Returns trust-scored evidence, fact evolution timeline, and a recommended answer. Use this when accuracy matters.".to_string()),
input_schema: tools::cross_reference::schema(),
..Default::default()
},
ToolDescription {
name: "cross_reference".to_string(),
description: Some("Alias for deep_reference. Connect the dots across memories with cognitive reasoning.".to_string()),
input_schema: tools::cross_reference::schema(),
..Default::default()
},
ToolDescription {
name: "contradictions".to_string(),
description: Some("Inspect memory disagreements directly. Scans a topic or recent memories for trust-weighted contradiction pairs using the same local logic as deep_reference.".to_string()),
input_schema: tools::contradictions::schema(),
..Default::default()
},
// ================================================================
// ACTIVE FORGETTING (v2.0.5) — top-down suppression
@ -386,9 +410,47 @@ impl McpServer {
name: "suppress".to_string(),
description: Some("Actively suppress a memory via top-down inhibitory control (Anderson 2025 SIF + Davis Rac1). Distinct from delete: the memory persists but is inhibited from retrieval and actively decays. Each call compounds. A background Rac1 worker cascades decay to co-activated neighbors. Reversible within 24 hours via reverse=true.".to_string()),
input_schema: tools::suppress::schema(),
..Default::default()
},
];
// Per-tool result-size annotation `_meta["anthropic/maxResultSizeChars"]`.
//
// Claude Code v2.1.91+ honors this annotation to override its 50K default
// `CallToolResult` truncation. Without it, large Vestige payloads
// (`search` with `detail_level="full"` at `limit=20` has been observed
// at ~135K chars; `memory_timeline` at `limit=30` at ~84K chars) are
// silently truncated and spilled to disk, forcing the parent agent to
// chunk-read them.
//
// Per-tool caps below are sized at ~2× observed peak with growth
// headroom; max permitted by Anthropic is 500_000. Only the four
// empirically-measured high-payload tools carry the annotation today;
// the remaining 21 tools deliberately do NOT (cargo-cult prevention —
// annotating a small-payload tool dilutes the signal).
//
// Other tools that COULD plausibly grow into the annotated set with
// future workload (`deep_reference`, `cross_reference`, `memory_graph`,
// `explore_connections`, `session_context`) are left unannotated until
// empirical measurement shows truncation under realistic use.
for tool in tools.iter_mut() {
let max_chars: Option<u64> = match tool.name.as_str() {
"search" => Some(300_000),
"memory_timeline" => Some(200_000),
"memory" => Some(100_000),
"codebase" => Some(100_000),
_ => None,
};
if let Some(n) = max_chars {
let mut meta = serde_json::Map::new();
meta.insert(
"anthropic/maxResultSizeChars".to_string(),
serde_json::Value::from(n),
);
tool.meta = Some(serde_json::Value::Object(meta));
}
}
let result = ListToolsResult { tools };
serde_json::to_value(result).map_err(|e| JsonRpcError::internal_error(&e.to_string()))
}
@ -1899,4 +1961,142 @@ mod tests {
assert!(response.error.is_some());
assert_eq!(response.error.unwrap().code, -32602);
}
// ========================================================================
// Per-tool result-size annotation tests
// (`_meta["anthropic/maxResultSizeChars"]`, CC v2.1.91+)
//
// The annotation lives on the Tool definition in `tools/list`, so CC reads
// it once when the MCP session opens and applies the override to every
// invocation of that tool. These tests pin the wire-form so a future
// refactor of `ToolDescription` cannot silently drop the annotation.
// ========================================================================
/// Expected per-tool caps. Returns `Some(cap)` for tools the discipline
/// annotates, `None` for tools that MUST NOT carry the annotation
/// (cargo-cult prevention).
fn expected_max_result_size(name: &str) -> Option<u64> {
match name {
"search" => Some(300_000),
"memory_timeline" => Some(200_000),
"memory" => Some(100_000),
"codebase" => Some(100_000),
_ => None,
}
}
#[tokio::test]
async fn test_high_payload_tools_have_max_result_size_annotation() {
let (mut server, _dir) = test_server().await;
let init_request = make_request("initialize", Some(init_params()));
server.handle_request(init_request).await;
let request = make_request("tools/list", None);
let response = server.handle_request(request).await.unwrap();
let result = response.result.unwrap();
let tools = result["tools"].as_array().unwrap();
for name in ["search", "memory_timeline", "memory", "codebase"] {
let tool = tools
.iter()
.find(|t| t["name"].as_str() == Some(name))
.unwrap_or_else(|| panic!("Tool '{}' missing from tools/list", name));
let expected = expected_max_result_size(name).unwrap();
let meta = tool.get("_meta").unwrap_or_else(|| {
panic!("Tool '{}' is missing the `_meta` field on the wire", name)
});
let actual = meta
.get("anthropic/maxResultSizeChars")
.and_then(|v| v.as_u64())
.unwrap_or_else(|| {
panic!(
"Tool '{}' _meta lacks integer 'anthropic/maxResultSizeChars'",
name
)
});
assert_eq!(
actual, expected,
"Tool '{}' cap drift: expected {} got {}",
name, expected, actual
);
assert!(
actual <= 500_000,
"Tool '{}' cap {} exceeds Anthropic 500K ceiling",
name,
actual
);
}
}
#[tokio::test]
async fn test_other_tools_do_not_carry_max_result_size_annotation() {
// Cargo-cult prevention. Dynamically derived from tools/list so this
// test is robust to new tools being added: any tool that is NOT in
// the discipline-prescribed set MUST NOT carry the annotation.
// Adding the annotation to a small-payload tool dilutes the signal
// and trains future maintainers that the value is arbitrary.
let (mut server, _dir) = test_server().await;
let init_request = make_request("initialize", Some(init_params()));
server.handle_request(init_request).await;
let request = make_request("tools/list", None);
let response = server.handle_request(request).await.unwrap();
let result = response.result.unwrap();
let tools = result["tools"].as_array().unwrap();
for tool in tools {
let name = tool["name"].as_str().unwrap();
if expected_max_result_size(name).is_some() {
continue; // covered by the annotated-tools test
}
// Either the `_meta` key is absent OR it is an object without the
// anthropic key — both are acceptable. The forbidden case is the
// anthropic key present on this tool.
let has_max_size = tool
.get("_meta")
.and_then(|m| m.get("anthropic/maxResultSizeChars"))
.is_some();
assert!(
!has_max_size,
"Tool '{}' should NOT carry maxResultSizeChars annotation \
(not in the discipline-prescribed set: search, memory_timeline, \
memory, codebase). If this tool's realistic max-payload now \
routinely exceeds 50K, update expected_max_result_size() + the \
annotation loop in handle_tools_list together.",
name
);
}
}
#[tokio::test]
async fn test_meta_wire_shape_uses_underscore_meta_field() {
// Anthropic's MCP spec is explicit: the field on the wire is `_meta`,
// NOT `meta`. The Rust struct uses `meta: Option<Value>` with
// `#[serde(rename = "_meta")]` — assert the rename actually fired.
let (mut server, _dir) = test_server().await;
let init_request = make_request("initialize", Some(init_params()));
server.handle_request(init_request).await;
let request = make_request("tools/list", None);
let response = server.handle_request(request).await.unwrap();
let result = response.result.unwrap();
let tools = result["tools"].as_array().unwrap();
let search_tool = tools
.iter()
.find(|t| t["name"].as_str() == Some("search"))
.expect("'search' tool present");
// Wire-form: `_meta` must exist; `meta` (un-renamed) must NOT exist.
assert!(
search_tool.get("_meta").is_some(),
"search tool missing `_meta` key (serde rename to _meta did not apply)"
);
assert!(
search_tool.get("meta").is_none(),
"search tool has un-renamed `meta` key (regression — serde rename broke)"
);
}
}