mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-05 13:52:40 +02:00
feat: enhance knowledge base search and document retrieval
- Introduced a mechanism to identify degenerate queries that lack meaningful search signals, improving search accuracy. - Implemented a fallback method for browsing recent documents when queries are degenerate, ensuring relevant results are returned. - Added limits on the number of chunks fetched per document to optimize performance and prevent excessive data loading. - Updated the ConnectorService to allow for reusable query embeddings, enhancing efficiency in search operations. - Enhanced LLM router service to support context window fallbacks, improving robustness during context window limitations.
This commit is contained in:
parent
b08e8da40c
commit
40a091f8cc
7 changed files with 476 additions and 100 deletions
|
|
@ -159,26 +159,95 @@ class LLMRouterService:
|
|||
# Merge with provided settings
|
||||
final_settings = {**default_settings, **instance._router_settings}
|
||||
|
||||
# Build a "auto-large" fallback group with deployments whose context
|
||||
# window exceeds the smallest deployment. This lets the router
|
||||
# automatically fall back to a bigger-context model when gpt-4o (128K)
|
||||
# hits ContextWindowExceededError.
|
||||
full_model_list, ctx_fallbacks = cls._build_context_fallback_groups(model_list)
|
||||
|
||||
try:
|
||||
instance._router = Router(
|
||||
model_list=model_list,
|
||||
routing_strategy=final_settings.get(
|
||||
router_kwargs: dict[str, Any] = {
|
||||
"model_list": full_model_list,
|
||||
"routing_strategy": final_settings.get(
|
||||
"routing_strategy", "usage-based-routing"
|
||||
),
|
||||
num_retries=final_settings.get("num_retries", 3),
|
||||
allowed_fails=final_settings.get("allowed_fails", 3),
|
||||
cooldown_time=final_settings.get("cooldown_time", 60),
|
||||
set_verbose=False, # Disable verbose logging in production
|
||||
)
|
||||
"num_retries": final_settings.get("num_retries", 3),
|
||||
"allowed_fails": final_settings.get("allowed_fails", 3),
|
||||
"cooldown_time": final_settings.get("cooldown_time", 60),
|
||||
"set_verbose": False,
|
||||
}
|
||||
if ctx_fallbacks:
|
||||
router_kwargs["context_window_fallbacks"] = ctx_fallbacks
|
||||
|
||||
instance._router = Router(**router_kwargs)
|
||||
instance._initialized = True
|
||||
logger.info(
|
||||
f"LLM Router initialized with {len(model_list)} deployments, "
|
||||
f"strategy: {final_settings.get('routing_strategy')}"
|
||||
"LLM Router initialized with %d deployments, "
|
||||
"strategy: %s, context_window_fallbacks: %s",
|
||||
len(model_list),
|
||||
final_settings.get("routing_strategy"),
|
||||
ctx_fallbacks or "none",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize LLM Router: {e}")
|
||||
instance._router = None
|
||||
|
||||
@classmethod
|
||||
def _build_context_fallback_groups(
|
||||
cls, model_list: list[dict]
|
||||
) -> tuple[list[dict], list[dict[str, list[str]]] | None]:
|
||||
"""Create an ``auto-large`` model group for context-window fallbacks.
|
||||
|
||||
Uses ``litellm.get_model_info`` to discover the context window of each
|
||||
deployment. Deployments whose ``max_input_tokens`` exceeds the smallest
|
||||
window are duplicated into an ``auto-large`` group. The returned
|
||||
fallback config tells the Router: on ``ContextWindowExceededError`` for
|
||||
``auto``, retry with ``auto-large``.
|
||||
|
||||
Returns:
|
||||
(full_model_list, context_window_fallbacks) — ``full_model_list``
|
||||
contains the original entries plus any ``auto-large`` duplicates.
|
||||
``context_window_fallbacks`` is ``None`` when every deployment has
|
||||
the same context size (no useful fallback).
|
||||
"""
|
||||
from litellm import get_model_info
|
||||
|
||||
ctx_map: dict[str, int] = {}
|
||||
for dep in model_list:
|
||||
params = dep.get("litellm_params", {})
|
||||
base_model = params.get("base_model") or params.get("model", "")
|
||||
try:
|
||||
info = get_model_info(base_model)
|
||||
ctx = info.get("max_input_tokens")
|
||||
if isinstance(ctx, int) and ctx > 0:
|
||||
ctx_map[base_model] = ctx
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not ctx_map:
|
||||
return model_list, None
|
||||
|
||||
min_ctx = min(ctx_map.values())
|
||||
|
||||
large_deployments: list[dict] = []
|
||||
for dep in model_list:
|
||||
params = dep.get("litellm_params", {})
|
||||
base_model = params.get("base_model") or params.get("model", "")
|
||||
if ctx_map.get(base_model, 0) > min_ctx:
|
||||
dup = {**dep, "model_name": "auto-large"}
|
||||
large_deployments.append(dup)
|
||||
|
||||
if not large_deployments:
|
||||
return model_list, None
|
||||
|
||||
logger.info(
|
||||
"Context-window fallback: %d large-context deployments "
|
||||
"(min_ctx=%d) added to 'auto-large' group",
|
||||
len(large_deployments),
|
||||
min_ctx,
|
||||
)
|
||||
return model_list + large_deployments, [{"auto": ["auto-large"]}]
|
||||
|
||||
@classmethod
|
||||
def _config_to_deployment(cls, config: dict) -> dict | None:
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue