perf(kb-planner): route internal planner calls to dedicated small/fast LLM

Adds an optional planner LLM role wired through KnowledgePriorityMiddleware
so KB query rewriting, date extraction, and recency classification run on a
cheap model (e.g. gpt-4o-mini, Haiku, Azure nano) instead of the user's
chat LLM. Operators opt in by setting is_planner: true on exactly one
global config; without it, behavior is unchanged.
This commit is contained in:
CREDO23 2026-05-20 11:42:52 +02:00
parent c3db25302b
commit 71dead0406
6 changed files with 123 additions and 10 deletions

View file

@ -102,6 +102,7 @@ from app.agents.new_chat.tools.registry import (
)
from app.db import ChatVisibility
from app.services.connector_service import ConnectorService
from app.services.llm_service import get_planner_llm
from app.utils.perf import get_perf_logger
_perf_log = get_perf_logger()
@ -1077,6 +1078,7 @@ def _build_compiled_agent_blocking(
else None,
KnowledgePriorityMiddleware(
llm=llm,
planner_llm=get_planner_llm(),
search_space_id=search_space_id,
filesystem_mode=filesystem_mode,
available_connectors=available_connectors,