mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-27 19:25:15 +02:00
perf(kb-planner): route internal planner calls to dedicated small/fast LLM
Adds an optional planner LLM role wired through KnowledgePriorityMiddleware so KB query rewriting, date extraction, and recency classification run on a cheap model (e.g. gpt-4o-mini, Haiku, Azure nano) instead of the user's chat LLM. Operators opt in by setting is_planner: true on exactly one global config; without it, behavior is unchanged.
This commit is contained in:
parent
c3db25302b
commit
71dead0406
6 changed files with 123 additions and 10 deletions
|
|
@ -102,6 +102,7 @@ from app.agents.new_chat.tools.registry import (
|
|||
)
|
||||
from app.db import ChatVisibility
|
||||
from app.services.connector_service import ConnectorService
|
||||
from app.services.llm_service import get_planner_llm
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
_perf_log = get_perf_logger()
|
||||
|
|
@ -1077,6 +1078,7 @@ def _build_compiled_agent_blocking(
|
|||
else None,
|
||||
KnowledgePriorityMiddleware(
|
||||
llm=llm,
|
||||
planner_llm=get_planner_llm(),
|
||||
search_space_id=search_space_id,
|
||||
filesystem_mode=filesystem_mode,
|
||||
available_connectors=available_connectors,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue