feat(mcp): consolidate dedup+merge into one dedup action-tool (8→1)

Tool Consolidation v2.2.0, Layer 1 commit 1/6. Advertised tools 34 → 27.

Folds find_duplicates + the 7 Phase-3 merge/supersede tools
(merge_candidates, plan_merge, plan_supersede, apply_plan, merge_undo,
protect, merge_policy) into a single action-dispatched `dedup` tool:

  action = scan (default) | plan_merge | plan_supersede | apply | undo
         | protect | policy

- `scan` combines cosine-similarity duplicate clusters with Fellegi-Sunter
  merge candidates in one read-only payload.
- The mutate/preview/reverse actions delegate to merge::execute verbatim,
  preserving plan_id → apply → undo, confirm-gating, and
  bitemporal-never-delete byte-for-byte.
- All 8 old names remain dispatchable as hidden warn!+redirect aliases
  (≥1 minor release, removed v2.3.0) but drop off the advertised tools/list.
- Size annotation: dedup=150_000 (scan payload). expected_max_result_size
  + both annotation tests kept in sync.
- Tests: count assert 34→27, 8 negative asserts, new
  test_deprecated_dedup_aliases_redirect verifying old names still dispatch.

Gates: cargo test --workspace (all green), cargo clippy -D warnings (clean).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sam Valladares 2026-06-28 17:28:04 -05:00
parent ef4cefaa40
commit ba85a06eac
2 changed files with 209 additions and 66 deletions

View file

@ -346,56 +346,16 @@ impl McpServer {
input_schema: tools::importance::schema(),
..Default::default()
},
ToolDescription {
name: "find_duplicates".to_string(),
description: Some("Find duplicate and near-duplicate memory clusters using cosine similarity on embeddings. Returns clusters with suggested actions (merge/review). Use to clean up redundant memories.".to_string()),
input_schema: tools::dedup::schema(),
..Default::default()
},
// ================================================================
// MERGE / SUPERSEDE CONTROLS (v2.1.25 — Phase 3)
// Diff-previewed, confidence-gated, reversible, never silent.
// DEDUP / MERGE / SUPERSEDE — unified `dedup` tool (v2.2)
// Folds find_duplicates + the 7 Phase-3 merge tools into one
// action-dispatched surface. Diff-previewed, confidence-gated,
// reversible, never silent; bitemporal-never-delete preserved.
// ================================================================
ToolDescription {
name: "merge_candidates".to_string(),
description: Some("Surface likely duplicate/overlapping memory clusters with confidence scores and the signals behind each (Fellegi-Sunter match/possible/non-match). Read-only — nothing is changed.".to_string()),
input_schema: tools::merge::merge_candidates_schema(),
..Default::default()
},
ToolDescription {
name: "plan_merge".to_string(),
description: Some("Produce a previewable MERGE plan (a diff: combined content/tags/provenance) for 2+ memories WITHOUT applying it. Returns a plan_id for apply_plan. Protected members block the merge.".to_string()),
input_schema: tools::merge::plan_merge_schema(),
..Default::default()
},
ToolDescription {
name: "plan_supersede".to_string(),
description: Some("Preview superseding memory A with B — bitemporal invalidation (stamps valid_until, keeps A queryable for audit) WITHOUT applying. Returns a plan_id for apply_plan.".to_string()),
input_schema: tools::merge::plan_supersede_schema(),
..Default::default()
},
ToolDescription {
name: "apply_plan".to_string(),
description: Some("Execute a previously-generated merge/supersede plan by id. Recorded as a reversible operation. Old memories are invalidated (never deleted). 'possible'/'non_match' plans require confirm=true.".to_string()),
input_schema: tools::merge::apply_plan_schema(),
..Default::default()
},
ToolDescription {
name: "merge_undo".to_string(),
description: Some("Reverse a prior merge/supersede operation (the 'git reflog for your agent's memory'). With no operation_id, lists the reversible operation log so you can pick one.".to_string()),
input_schema: tools::merge::merge_undo_schema(),
..Default::default()
},
ToolDescription {
name: "protect".to_string(),
description: Some("Pin a memory so it can never be auto-merged, superseded, or garbage-collected. Pass protected=false to unpin.".to_string()),
input_schema: tools::merge::protect_schema(),
..Default::default()
},
ToolDescription {
name: "merge_policy".to_string(),
description: Some("Get or set the per-project merge policy: the two Fellegi-Sunter thresholds (match_threshold, possible_threshold) and auto_apply. No args returns the current policy.".to_string()),
input_schema: tools::merge::merge_policy_schema(),
name: "dedup".to_string(),
description: Some("Deduplication & merge/supersede. Actions: 'scan' (default — surface duplicate clusters via cosine + merge candidates via Fellegi-Sunter, read-only), 'plan_merge' (preview a reversible merge plan for 2+ member_ids → plan_id), 'plan_supersede' (preview superseding old_id with new_id → plan_id), 'apply' (execute a plan_id; 'possible'/'non_match' need confirm=true), 'undo' (reverse an operation_id, or omit to list the reflog), 'protect' (pin a memory against auto-merge/supersede/forget), 'policy' (get/set Fellegi-Sunter thresholds). Old memories are invalidated, never deleted.".to_string()),
input_schema: tools::dedup::unified_schema(),
..Default::default()
},
// ================================================================
@ -516,6 +476,9 @@ impl McpServer {
"memory_timeline" => Some(200_000),
"memory" => Some(100_000),
"codebase" => Some(100_000),
// v2.2: dedup action='scan' returns duplicate clusters +
// merge candidates + policy in one payload.
"dedup" => Some(150_000),
_ => None,
};
if let Some(n) = max_chars {
@ -1003,13 +966,31 @@ impl McpServer {
"importance_score" => {
tools::importance::execute(&self.storage, &self.cognitive, request.arguments).await
}
"find_duplicates" => tools::dedup::execute(&self.storage, request.arguments).await,
// ================================================================
// DEDUP / MERGE / SUPERSEDE — unified `dedup` tool (v2.2)
// ================================================================
"dedup" => tools::dedup::execute_unified(&self.storage, request.arguments).await,
// ================================================================
// MERGE / SUPERSEDE CONTROLS (v2.1.25 — Phase 3)
// ================================================================
// DEPRECATED (v2.2): folded into `dedup`. Kept as hidden back-compat
// aliases (≥1 minor release) — they call the same underlying handlers
// verbatim, so envelopes/plan_id/confirm-gating/bitemporal are intact.
"find_duplicates" => {
warn!("Tool 'find_duplicates' is deprecated in v2.2. Use 'dedup' with action='scan'.");
tools::dedup::execute(&self.storage, request.arguments).await
}
"merge_candidates" | "plan_merge" | "plan_supersede" | "apply_plan" | "merge_undo"
| "protect" | "merge_policy" => {
warn!(
"Tool '{}' is deprecated in v2.2. Use 'dedup' (action={}).",
request.name,
match request.name.as_str() {
"merge_candidates" => "scan",
"apply_plan" => "apply",
"merge_undo" => "undo",
"merge_policy" => "policy",
other => other,
}
);
tools::merge::execute(&self.storage, request.name.as_str(), request.arguments).await
}
@ -1820,10 +1801,10 @@ mod tests {
let result = response.result.unwrap();
let tools = result["tools"].as_array().unwrap();
// 34 tools: 25 from v2.1.21 + 7 Phase 3 merge/supersede tools
// (merge_candidates, plan_merge, plan_supersede, apply_plan, merge_undo,
// protect, merge_policy, composed_graph) + 1 connector tool (source_sync, #57).
assert_eq!(tools.len(), 34, "Expected exactly 34 tools");
// v2.2 Tool Consolidation (Layer 1): 34 → 27 after `dedup` folds
// find_duplicates + the 7 Phase-3 merge tools (8 → 1). Old names remain
// dispatchable as hidden back-compat aliases but drop off the advertised list.
assert_eq!(tools.len(), 27, "Expected exactly 27 tools after dedup consolidation");
let tool_names: Vec<&str> = tools.iter().map(|t| t["name"].as_str().unwrap()).collect();
@ -1876,18 +1857,28 @@ mod tests {
assert!(tool_names.contains(&"export"));
assert!(tool_names.contains(&"gc"));
// Auto-save & dedup tools (v1.3)
// Auto-save tool (v1.3)
assert!(tool_names.contains(&"importance_score"));
assert!(tool_names.contains(&"find_duplicates"));
// Merge / Supersede controls (v2.1.25 — Phase 3)
assert!(tool_names.contains(&"merge_candidates"));
assert!(tool_names.contains(&"plan_merge"));
assert!(tool_names.contains(&"plan_supersede"));
assert!(tool_names.contains(&"apply_plan"));
assert!(tool_names.contains(&"merge_undo"));
assert!(tool_names.contains(&"protect"));
assert!(tool_names.contains(&"merge_policy"));
// Dedup / merge / supersede — unified `dedup` tool (v2.2).
// find_duplicates + the 7 Phase-3 merge tools folded in; still
// dispatchable as hidden back-compat aliases, but off the advertised list.
assert!(tool_names.contains(&"dedup"));
for old in [
"find_duplicates",
"merge_candidates",
"plan_merge",
"plan_supersede",
"apply_plan",
"merge_undo",
"protect",
"merge_policy",
] {
assert!(
!tool_names.contains(&old),
"{old} should be folded into 'dedup' in v2.2"
);
}
// Cognitive tools (v1.5)
assert!(tool_names.contains(&"dream"));
@ -1912,6 +1903,44 @@ mod tests {
assert!(tool_names.contains(&"suppress"));
}
/// v2.2: the 8 tools folded into `dedup` must still dispatch (hidden
/// back-compat aliases), i.e. they must NOT return the "Unknown tool"
/// InvalidParams (-32602) error. Read-only/list-style actions are used so
/// the call resolves without mutating or requiring extra setup.
#[tokio::test]
async fn test_deprecated_dedup_aliases_redirect() {
let (mut server, _dir) = test_server().await;
let init_request = make_request("initialize", Some(init_params()));
server.handle_request(init_request).await;
// (tool name, args) — all read-only / list-style so they resolve cleanly.
let calls: Vec<(&str, serde_json::Value)> = vec![
("find_duplicates", serde_json::json!({})),
("merge_candidates", serde_json::json!({})),
("merge_undo", serde_json::json!({})), // no operation_id => lists the reflog
("merge_policy", serde_json::json!({})), // no args => returns current policy
("dedup", serde_json::json!({"action": "policy"})),
("dedup", serde_json::json!({})), // default action = scan
];
for (name, args) in calls {
let request = make_request(
"tools/call",
Some(serde_json::json!({ "name": name, "arguments": args })),
);
let response = server.handle_request(request).await.unwrap();
// The call may succeed (result) or fail for a domain reason, but it
// must NOT be the unknown-tool InvalidParams error.
if let Some(err) = response.error {
assert_ne!(
err.code, -32602,
"'{name}' should still dispatch (hidden alias), got unknown-tool error: {}",
err.message
);
}
}
}
#[tokio::test]
async fn test_tools_have_descriptions_and_schemas() {
let (mut server, _dir) = test_server().await;
@ -2130,6 +2159,8 @@ mod tests {
"memory_timeline" => Some(200_000),
"memory" => Some(100_000),
"codebase" => Some(100_000),
// v2.2: dedup action='scan' returns clusters + candidates + policy.
"dedup" => Some(150_000),
_ => None,
}
}
@ -2145,7 +2176,7 @@ mod tests {
let result = response.result.unwrap();
let tools = result["tools"].as_array().unwrap();
for name in ["search", "memory_timeline", "memory", "codebase"] {
for name in ["search", "memory_timeline", "memory", "codebase", "dedup"] {
let tool = tools
.iter()
.find(|t| t["name"].as_str() == Some(name))

View file

@ -276,6 +276,99 @@ pub async fn execute(storage: &Arc<Storage>, args: Option<Value>) -> Result<Valu
}
}
// ============================================================================
// UNIFIED `dedup` TOOL (v2.2 — Tool Consolidation)
//
// Folds the 8 former dedup/merge tools into a single action-dispatched surface:
// action = scan (default) | plan_merge | plan_supersede | apply | undo
// | protect | policy
//
// `scan` combines cosine-similarity duplicate clusters (this module's
// `execute`) with Fellegi-Sunter merge candidates (`merge::merge_candidates`),
// returning both in separate fields. The mutate/preview/reverse actions delegate
// to `super::merge::execute` verbatim, preserving plan_id → apply → undo,
// confirm-gating, and bitemporal-never-delete byte-for-byte.
// ============================================================================
/// Discriminated-union schema for the unified `dedup` tool.
pub fn unified_schema() -> Value {
serde_json::json!({
"type": "object",
"properties": {
"action": {
"type": "string",
"enum": ["scan", "plan_merge", "plan_supersede", "apply", "undo", "protect", "policy"],
"default": "scan",
"description": "What to do. 'scan' (default): surface duplicate clusters (cosine) AND merge candidates (Fellegi-Sunter), read-only. 'plan_merge'/'plan_supersede': preview a reversible plan without applying (returns plan_id). 'apply': execute a plan_id. 'undo': reverse a prior operation (omit operation_id to list the reflog). 'protect': pin a memory against auto-merge/supersede/forget. 'policy': get/set Fellegi-Sunter thresholds."
},
"similarity_threshold": {
"type": "number",
"description": "[scan] Minimum cosine similarity for duplicate clusters (0.5-1.0, default 0.80).",
"minimum": 0.5, "maximum": 1.0
},
"limit": {
"type": "integer",
"description": "[scan] Max clusters/candidates to return (default 20).",
"minimum": 1, "maximum": 100
},
"tags": {
"type": "array", "items": { "type": "string" },
"description": "[scan] Optional: only consider memories with these tags (ANY match)."
},
"member_ids": {
"type": "array", "items": { "type": "string" },
"description": "[plan_merge] IDs of memories to merge (>= 2). Survivor kept; rest bitemporally invalidated."
},
"survivor_id": { "type": "string", "description": "[plan_merge] Optional: which member to keep (defaults to highest-retention)." },
"old_id": { "type": "string", "description": "[plan_supersede] Memory being superseded (kept, marked invalid)." },
"new_id": { "type": "string", "description": "[plan_supersede] Memory that supersedes the old one." },
"plan_id": { "type": "string", "description": "[apply] ID of a plan produced by plan_merge/plan_supersede." },
"confirm": { "type": "boolean", "default": false, "description": "[apply] Required true for 'possible'/'non_match' plans." },
"operation_id": { "type": "string", "description": "[undo] Operation to reverse. Omit to list the reflog." },
"id": { "type": "string", "description": "[protect] Memory id to protect/unprotect." },
"protected": { "type": "boolean", "default": true, "description": "[protect] true to pin, false to unpin." },
"match_threshold": { "type": "number", "minimum": 0.0, "maximum": 1.0, "description": "[policy] Score >= this => 'match'." },
"possible_threshold": { "type": "number", "minimum": 0.0, "maximum": 1.0, "description": "[policy] Score in [possible, match) => review." },
"auto_apply": { "type": "boolean", "description": "[policy] Allow 'match' plans to apply without confirm. Default false." }
}
})
}
/// Unified dispatcher for the `dedup` tool. Routes on `action` (default `scan`).
pub async fn execute_unified(storage: &Arc<Storage>, args: Option<Value>) -> Result<Value, String> {
let action = args
.as_ref()
.and_then(|a| a.get("action"))
.and_then(|v| v.as_str())
.unwrap_or("scan")
.to_string();
match action.as_str() {
"scan" => {
// Cosine-similarity duplicate clusters (this module).
let clusters = execute(storage, args.clone()).await?;
// Fellegi-Sunter merge candidates (merge module, name-dispatched).
let candidates =
super::merge::execute(storage, "merge_candidates", args.clone()).await?;
Ok(serde_json::json!({
"action": "scan",
"duplicateClusters": clusters,
"mergeCandidates": candidates,
"nextStep": "Use action='plan_merge' (member_ids) or action='plan_supersede' (old_id,new_id) to preview a reversible plan, then action='apply' (plan_id)."
}))
}
"plan_merge" => super::merge::execute(storage, "plan_merge", args).await,
"plan_supersede" => super::merge::execute(storage, "plan_supersede", args).await,
"apply" => super::merge::execute(storage, "apply_plan", args).await,
"undo" => super::merge::execute(storage, "merge_undo", args).await,
"protect" => super::merge::execute(storage, "protect", args).await,
"policy" => super::merge::execute(storage, "merge_policy", args).await,
other => Err(format!(
"Unknown dedup action '{other}'. Use scan|plan_merge|plan_supersede|apply|undo|protect|policy."
)),
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -287,6 +380,25 @@ mod tests {
assert!(schema["properties"]["similarity_threshold"].is_object());
}
#[test]
fn test_unified_schema() {
let schema = unified_schema();
assert_eq!(schema["type"], "object");
let actions = schema["properties"]["action"]["enum"].as_array().unwrap();
assert_eq!(actions.len(), 7);
assert_eq!(schema["properties"]["action"]["default"], "scan");
}
#[tokio::test]
async fn test_unified_scan_empty_storage() {
let dir = tempfile::TempDir::new().unwrap();
let storage = Storage::new(Some(dir.path().join("test.db"))).unwrap();
let storage = Arc::new(storage);
// Default action (scan) on empty storage must not error.
let result = execute_unified(&storage, None).await;
assert!(result.is_ok());
}
#[test]
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
fn test_union_find() {