fix: push type filters into SQL WHERE clause + expose in MCP search

Type filtering (include_types/exclude_types) was applied post-fetch after the database LIMIT, which could return zero results when all top-N results were of the filtered type. This pushes type filters into the SQL WHERE clause in keyword_search_with_scores() so they apply before the limit. Semantic results still get post-fetch filtering as a safety net since the vector index cannot filter by type. Also adds hybrid_search_filtered() as the new primary method, with the original hybrid_search() delegating to it with no filters for backward compatibility. The MCP search tool now exposes include_types and exclude_types parameters. Includes 5 new test cases covering include, exclude, precedence, empty results, and backward compatibility. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-08 15:22:37 +02:00 · 2026-04-07 15:32:28 -05:00 · 2026-04-07 15:32:28 -05:00 · f3e25f7503
commit f3e25f7503
parent 16fe2674ed
3 changed files with 565 additions and 314 deletions
--- a/crates/vestige-mcp/src/tools/search_unified.rs
+++ b/crates/vestige-mcp/src/tools/search_unified.rs
@ -66,6 +66,16 @@ pub fn schema() -> Value {
                "items": { "type": "string" },
                "description": "Optional topics for context-dependent retrieval boosting"
            },
+            "exclude_types": {
+                "type": "array",
+                "items": { "type": "string" },
+                "description": "Node types to exclude from results (e.g., ['reflection']). Reflections are excluded by default to prevent polluting factual queries."
+            },
+            "include_types": {
+                "type": "array",
+                "items": { "type": "string" },
+                "description": "If set, only return nodes of these types. Overrides exclude_types."
+            },
            "token_budget": {
                "type": "integer",
                "description": "Max tokens for response. Server truncates content to fit budget. Use memory(action='get') for full content of specific IDs. With 1M context models, budgets up to 100K are practical.",
@ -96,6 +106,10 @@ struct SearchArgs {
    detail_level: Option<String>,
    #[serde(alias = "context_topics")]
    context_topics: Option<Vec<String>>,
+    #[serde(alias = "exclude_types")]
+    exclude_types: Option<Vec<String>>,
+    #[serde(alias = "include_types")]
+    include_types: Option<Vec<String>>,
    #[serde(alias = "token_budget")]
    token_budget: Option<i32>,
    #[serde(alias = "retrieval_mode")]
@ -174,7 +188,14 @@ pub async fn execute(
    let overfetch_limit = (limit * overfetch_multiplier).min(100); // Cap at 100 to avoid excessive DB load

    let results = storage
-        .hybrid_search(&args.query, overfetch_limit, keyword_weight, semantic_weight)
+        .hybrid_search_filtered(
+            &args.query,
+            overfetch_limit,
+            keyword_weight,
+            semantic_weight,
+            args.include_types.as_deref(),
+            args.exclude_types.as_deref(),
+        )
        .map_err(|e| e.to_string())?;

    // Filter by min_retention and min_similarity first (cheap filters)