mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-27 19:25:15 +02:00
perf(kb-planner): route internal planner calls to dedicated small/fast LLM
Adds an optional planner LLM role wired through KnowledgePriorityMiddleware so KB query rewriting, date extraction, and recency classification run on a cheap model (e.g. gpt-4o-mini, Haiku, Azure nano) instead of the user's chat LLM. Operators opt in by setting is_planner: true on exactly one global config; without it, behavior is unchanged.
This commit is contained in:
parent
c3db25302b
commit
71dead0406
6 changed files with 123 additions and 10 deletions
|
|
@ -6,6 +6,7 @@ from langchain_core.language_models import BaseChatModel
|
|||
|
||||
from app.agents.new_chat.filesystem_selection import FilesystemMode
|
||||
from app.agents.new_chat.middleware import KnowledgePriorityMiddleware
|
||||
from app.services.llm_service import get_planner_llm
|
||||
|
||||
|
||||
def build_knowledge_priority_mw(
|
||||
|
|
@ -19,6 +20,7 @@ def build_knowledge_priority_mw(
|
|||
) -> KnowledgePriorityMiddleware:
|
||||
return KnowledgePriorityMiddleware(
|
||||
llm=llm,
|
||||
planner_llm=get_planner_llm(),
|
||||
search_space_id=search_space_id,
|
||||
filesystem_mode=filesystem_mode,
|
||||
available_connectors=available_connectors,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue