perf(kb-planner): route internal planner calls to dedicated small/fast LLM

Adds an optional planner LLM role wired through KnowledgePriorityMiddleware
so KB query rewriting, date extraction, and recency classification run on a
cheap model (e.g. gpt-4o-mini, Haiku, Azure nano) instead of the user's
chat LLM. Operators opt in by setting is_planner: true on exactly one
global config; without it, behavior is unchanged.
This commit is contained in:
CREDO23 2026-05-20 11:42:52 +02:00
parent c3db25302b
commit 71dead0406
6 changed files with 123 additions and 10 deletions

View file

@ -6,6 +6,7 @@ from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import KnowledgePriorityMiddleware
from app.services.llm_service import get_planner_llm
def build_knowledge_priority_mw(
@ -19,6 +20,7 @@ def build_knowledge_priority_mw(
) -> KnowledgePriorityMiddleware:
return KnowledgePriorityMiddleware(
llm=llm,
planner_llm=get_planner_llm(),
search_space_id=search_space_id,
filesystem_mode=filesystem_mode,
available_connectors=available_connectors,