mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-27 19:25:15 +02:00
perf(kb-planner): route internal planner calls to dedicated small/fast LLM
Adds an optional planner LLM role wired through KnowledgePriorityMiddleware so KB query rewriting, date extraction, and recency classification run on a cheap model (e.g. gpt-4o-mini, Haiku, Azure nano) instead of the user's chat LLM. Operators opt in by setting is_planner: true on exactly one global config; without it, behavior is unchanged.
This commit is contained in:
parent
c3db25302b
commit
71dead0406
6 changed files with 123 additions and 10 deletions
|
|
@ -110,6 +110,19 @@ def load_global_llm_configs():
|
|||
except Exception as e:
|
||||
print(f"Warning: Failed to score global LLM configs: {e}")
|
||||
|
||||
# Planner LLM is a singleton role. If an operator accidentally
|
||||
# marks multiple configs ``is_planner: true``, only the first one
|
||||
# is used at runtime — surface the others at startup so the
|
||||
# mistake is caught before traffic, not silently buried.
|
||||
planner_cfgs = [c for c in configs if c.get("is_planner") is True]
|
||||
if len(planner_cfgs) > 1:
|
||||
extra_ids = [c.get("id") for c in planner_cfgs[1:]]
|
||||
print(
|
||||
"Warning: Multiple global LLM configs marked is_planner=true "
|
||||
f"(ids {[c.get('id') for c in planner_cfgs]}); using id "
|
||||
f"{planner_cfgs[0].get('id')} and ignoring {extra_ids}"
|
||||
)
|
||||
|
||||
return configs
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to load global LLM configs: {e}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue