mirror of
https://github.com/samvallad33/vestige.git
synced 2026-05-08 07:12:37 +02:00
fix: push type filters into SQL WHERE clause + expose in MCP search
Type filtering (include_types/exclude_types) was applied post-fetch after the database LIMIT, which could return zero results when all top-N results were of the filtered type. This pushes type filters into the SQL WHERE clause in keyword_search_with_scores() so they apply before the limit. Semantic results still get post-fetch filtering as a safety net since the vector index cannot filter by type. Also adds hybrid_search_filtered() as the new primary method, with the original hybrid_search() delegating to it with no filters for backward compatibility. The MCP search tool now exposes include_types and exclude_types parameters. Includes 5 new test cases covering include, exclude, precedence, empty results, and backward compatibility. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
16fe2674ed
commit
f3e25f7503
3 changed files with 565 additions and 314 deletions
|
|
@ -66,6 +66,16 @@ pub fn schema() -> Value {
|
|||
"items": { "type": "string" },
|
||||
"description": "Optional topics for context-dependent retrieval boosting"
|
||||
},
|
||||
"exclude_types": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "Node types to exclude from results (e.g., ['reflection']). Reflections are excluded by default to prevent polluting factual queries."
|
||||
},
|
||||
"include_types": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "If set, only return nodes of these types. Overrides exclude_types."
|
||||
},
|
||||
"token_budget": {
|
||||
"type": "integer",
|
||||
"description": "Max tokens for response. Server truncates content to fit budget. Use memory(action='get') for full content of specific IDs. With 1M context models, budgets up to 100K are practical.",
|
||||
|
|
@ -96,6 +106,10 @@ struct SearchArgs {
|
|||
detail_level: Option<String>,
|
||||
#[serde(alias = "context_topics")]
|
||||
context_topics: Option<Vec<String>>,
|
||||
#[serde(alias = "exclude_types")]
|
||||
exclude_types: Option<Vec<String>>,
|
||||
#[serde(alias = "include_types")]
|
||||
include_types: Option<Vec<String>>,
|
||||
#[serde(alias = "token_budget")]
|
||||
token_budget: Option<i32>,
|
||||
#[serde(alias = "retrieval_mode")]
|
||||
|
|
@ -174,7 +188,14 @@ pub async fn execute(
|
|||
let overfetch_limit = (limit * overfetch_multiplier).min(100); // Cap at 100 to avoid excessive DB load
|
||||
|
||||
let results = storage
|
||||
.hybrid_search(&args.query, overfetch_limit, keyword_weight, semantic_weight)
|
||||
.hybrid_search_filtered(
|
||||
&args.query,
|
||||
overfetch_limit,
|
||||
keyword_weight,
|
||||
semantic_weight,
|
||||
args.include_types.as_deref(),
|
||||
args.exclude_types.as_deref(),
|
||||
)
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
// Filter by min_retention and min_similarity first (cheap filters)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue