mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-24 21:38:09 +02:00
feat(database-migrations): add migration to remove legacy model config tables and remove stale model connection code
This commit is contained in:
parent
50668775f8
commit
bd4a04f2e7
93 changed files with 956 additions and 11442 deletions
|
|
@ -1,13 +1,13 @@
|
|||
"""Resolve and persist Auto (Fastest) model pins per chat thread.
|
||||
"""Resolve and persist Auto model pins per chat thread.
|
||||
|
||||
Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we
|
||||
resolve that virtual mode to one concrete global LLM config exactly once and
|
||||
Auto is represented by ``chat_model_id == 0``. For chat threads we
|
||||
resolve that virtual mode to one concrete global model exactly once and
|
||||
persist the chosen config id on ``new_chat_threads.pinned_llm_config_id`` so
|
||||
subsequent turns are stable.
|
||||
|
||||
Single-writer invariant: this module is the only writer of
|
||||
``NewChatThread.pinned_llm_config_id`` (aside from the bulk clear in
|
||||
``search_spaces_routes`` when a search space's ``agent_llm_id`` changes).
|
||||
``model_connections_routes`` when a search space's ``chat_model_id`` changes).
|
||||
Therefore a non-NULL value unambiguously means "this thread has an
|
||||
Auto-resolved pin"; no separate source/policy column is needed.
|
||||
"""
|
||||
|
|
@ -33,8 +33,10 @@ from app.services.token_quota_service import TokenQuotaService
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
AUTO_FASTEST_ID = 0
|
||||
AUTO_FASTEST_MODE = "auto_fastest"
|
||||
AUTO_MODE_ID = 0
|
||||
# Stable internal hash namespace for deterministic per-thread selection.
|
||||
# Do not rename: changing this rebalances Auto's model choice for new pins.
|
||||
AUTO_PIN_HASH_NAMESPACE = "auto_fastest"
|
||||
_RUNTIME_COOLDOWN_SECONDS = 600
|
||||
_HEALTHY_TTL_SECONDS = 45
|
||||
|
||||
|
|
@ -383,7 +385,7 @@ def _select_pin(eligible: list[dict], thread_id: int) -> tuple[dict, int]:
|
|||
pool = tier_a if tier_a else eligible
|
||||
pool = sorted(pool, key=lambda c: -int(c.get("quality_score") or 0))
|
||||
top_k = pool[:_QUALITY_TOP_K]
|
||||
digest = hashlib.sha256(f"{AUTO_FASTEST_MODE}:{thread_id}".encode()).digest()
|
||||
digest = hashlib.sha256(f"{AUTO_PIN_HASH_NAMESPACE}:{thread_id}".encode()).digest()
|
||||
idx = int.from_bytes(digest[:8], "big") % len(top_k)
|
||||
return top_k[idx], len(top_k)
|
||||
|
||||
|
|
@ -425,7 +427,7 @@ async def resolve_or_get_pinned_llm_config_id(
|
|||
exclude_config_ids: set[int] | None = None,
|
||||
requires_image_input: bool = False,
|
||||
) -> AutoPinResolution:
|
||||
"""Resolve Auto (Fastest) to one concrete config id and persist the pin.
|
||||
"""Resolve Auto to one concrete config id and persist the pin.
|
||||
|
||||
For non-auto selections, this function clears any existing pin and returns
|
||||
the selected id as-is.
|
||||
|
|
@ -457,7 +459,7 @@ async def resolve_or_get_pinned_llm_config_id(
|
|||
)
|
||||
|
||||
# Explicit model selected: clear any stale pin.
|
||||
if selected_llm_config_id != AUTO_FASTEST_ID:
|
||||
if selected_llm_config_id != AUTO_MODE_ID:
|
||||
if thread.pinned_llm_config_id is not None:
|
||||
thread.pinned_llm_config_id = None
|
||||
await session.commit()
|
||||
|
|
|
|||
|
|
@ -450,10 +450,10 @@ async def _resolve_agent_billing_for_search_space(
|
|||
Used by Celery tasks (podcast generation, video presentation) to bill the
|
||||
search-space owner's premium credit pool when the chat model is premium.
|
||||
|
||||
Resolution rules mirror chat at ``stream_new_chat.py:2294-2351``:
|
||||
Resolution rules mirror the chat model role resolver:
|
||||
|
||||
- Search space not found / no ``agent_llm_id``: raise ``ValueError``.
|
||||
- **Auto mode** (``id == AUTO_FASTEST_ID == 0``):
|
||||
- Search space not found / no ``chat_model_id``: raise ``ValueError``.
|
||||
- **Auto mode** (``id == AUTO_MODE_ID == 0``):
|
||||
* ``thread_id`` is set: delegate to
|
||||
``resolve_or_get_pinned_llm_config_id`` (the same call chat uses) and
|
||||
recurse into the resolved id. Reuses chat's existing pin if present
|
||||
|
|
@ -469,9 +469,8 @@ async def _resolve_agent_billing_for_search_space(
|
|||
(defaults to ``"free"`` via ``app/config/__init__.py:52`` setdefault),
|
||||
``base_model = litellm_params.get("base_model") or model_name`` —
|
||||
NOT provider-prefixed, matching chat's cost-map lookup convention.
|
||||
- **Positive id** (user BYOK ``NewLLMConfig``): always free (matches
|
||||
``AgentConfig.from_new_llm_config`` which hard-codes ``billing_tier="free"``);
|
||||
``base_model`` from ``litellm_params`` or ``model_name``.
|
||||
- **Positive id** (user BYOK ``Model``): always free; ``base_model`` from
|
||||
the model catalog override or the upstream ``model_id``.
|
||||
|
||||
Note on imports: ``llm_service``, ``auto_model_pin_service``, and
|
||||
``llm_router_service`` are imported lazily inside the function body to
|
||||
|
|
@ -480,8 +479,9 @@ async def _resolve_agent_billing_for_search_space(
|
|||
``billable_calls.py``'s module load path.
|
||||
"""
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.db import NewLLMConfig, SearchSpace
|
||||
from app.db import Model, SearchSpace
|
||||
|
||||
result = await session.execute(
|
||||
select(SearchSpace).where(SearchSpace.id == search_space_id)
|
||||
|
|
@ -490,20 +490,20 @@ async def _resolve_agent_billing_for_search_space(
|
|||
if search_space is None:
|
||||
raise ValueError(f"Search space {search_space_id} not found")
|
||||
|
||||
agent_llm_id = search_space.agent_llm_id
|
||||
if agent_llm_id is None:
|
||||
chat_model_id = search_space.chat_model_id
|
||||
if chat_model_id is None:
|
||||
raise ValueError(
|
||||
f"Search space {search_space_id} has no agent_llm_id configured"
|
||||
f"Search space {search_space_id} has no chat_model_id configured"
|
||||
)
|
||||
|
||||
owner_user_id: UUID = search_space.user_id
|
||||
|
||||
from app.services.auto_model_pin_service import (
|
||||
AUTO_FASTEST_ID,
|
||||
AUTO_MODE_ID,
|
||||
resolve_or_get_pinned_llm_config_id,
|
||||
)
|
||||
|
||||
if agent_llm_id == AUTO_FASTEST_ID:
|
||||
if chat_model_id == AUTO_MODE_ID:
|
||||
if thread_id is None:
|
||||
return owner_user_id, "free", "auto"
|
||||
try:
|
||||
|
|
@ -512,7 +512,7 @@ async def _resolve_agent_billing_for_search_space(
|
|||
thread_id=thread_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=str(owner_user_id),
|
||||
selected_llm_config_id=AUTO_FASTEST_ID,
|
||||
selected_llm_config_id=AUTO_MODE_ID,
|
||||
)
|
||||
except ValueError:
|
||||
logger.warning(
|
||||
|
|
@ -523,28 +523,35 @@ async def _resolve_agent_billing_for_search_space(
|
|||
exc_info=True,
|
||||
)
|
||||
return owner_user_id, "free", "auto"
|
||||
agent_llm_id = resolution.resolved_llm_config_id
|
||||
chat_model_id = resolution.resolved_llm_config_id
|
||||
|
||||
if agent_llm_id < 0:
|
||||
if chat_model_id < 0:
|
||||
from app.services.llm_service import get_global_llm_config
|
||||
|
||||
cfg = get_global_llm_config(agent_llm_id) or {}
|
||||
cfg = get_global_llm_config(chat_model_id) or {}
|
||||
billing_tier = str(cfg.get("billing_tier", "free")).lower()
|
||||
litellm_params = cfg.get("litellm_params") or {}
|
||||
base_model = litellm_params.get("base_model") or cfg.get("model_name") or ""
|
||||
return owner_user_id, billing_tier, base_model
|
||||
|
||||
nlc_result = await session.execute(
|
||||
select(NewLLMConfig).where(
|
||||
NewLLMConfig.id == agent_llm_id,
|
||||
NewLLMConfig.search_space_id == search_space_id,
|
||||
)
|
||||
model_result = await session.execute(
|
||||
select(Model)
|
||||
.options(selectinload(Model.connection))
|
||||
.where(Model.id == chat_model_id, Model.enabled.is_(True))
|
||||
)
|
||||
nlc = nlc_result.scalars().first()
|
||||
model = model_result.scalars().first()
|
||||
base_model = ""
|
||||
if nlc is not None:
|
||||
litellm_params = nlc.litellm_params or {}
|
||||
base_model = litellm_params.get("base_model") or nlc.model_name or ""
|
||||
if (
|
||||
model is not None
|
||||
and model.connection is not None
|
||||
and model.connection.enabled
|
||||
and (
|
||||
model.connection.search_space_id in (None, search_space_id)
|
||||
and model.connection.user_id in (None, owner_user_id)
|
||||
)
|
||||
):
|
||||
catalog = model.catalog or {}
|
||||
base_model = catalog.get("base_model") or model.model_id or ""
|
||||
return owner_user_id, "free", base_model
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,11 @@ from app.services.auto_model_pin_service import (
|
|||
auto_model_candidates,
|
||||
choose_auto_model_candidate,
|
||||
)
|
||||
from app.services.llm_router_service import AUTO_MODE_ID, ChatLiteLLMRouter, is_auto_mode
|
||||
from app.services.llm_router_service import (
|
||||
AUTO_MODE_ID,
|
||||
ChatLiteLLMRouter,
|
||||
is_auto_mode,
|
||||
)
|
||||
from app.services.model_capabilities import has_capability
|
||||
from app.services.model_resolver import native_connection_from_config, to_litellm
|
||||
from app.services.token_tracking_service import token_tracker
|
||||
|
|
@ -96,26 +100,16 @@ class LLMRole:
|
|||
def get_global_llm_config(llm_config_id: int) -> dict | None:
|
||||
"""
|
||||
Get a global LLM configuration by ID.
|
||||
Global configs have negative IDs. ID 0 is reserved for Auto mode.
|
||||
Global configs have negative IDs. Auto mode (ID 0) is resolved through the
|
||||
model-candidate pipeline, not this legacy config lookup.
|
||||
|
||||
Args:
|
||||
llm_config_id: The ID of the global config (should be negative or 0 for Auto)
|
||||
llm_config_id: The ID of the global config (must be negative)
|
||||
|
||||
Returns:
|
||||
dict: Global config dictionary or None if not found
|
||||
"""
|
||||
# Auto mode (ID 0) is handled separately via the router
|
||||
if llm_config_id == AUTO_MODE_ID:
|
||||
return {
|
||||
"id": AUTO_MODE_ID,
|
||||
"name": "Auto (Fastest)",
|
||||
"description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling",
|
||||
"provider": "AUTO",
|
||||
"model_name": "auto",
|
||||
"is_auto_mode": True,
|
||||
}
|
||||
|
||||
if llm_config_id > 0:
|
||||
if llm_config_id >= 0:
|
||||
return None
|
||||
|
||||
for cfg in config.GLOBAL_LLM_CONFIGS:
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ CACHE_TTL_SECONDS = 86400 # 24 hours
|
|||
_cache: list[dict] | None = None
|
||||
_cache_timestamp: float = 0
|
||||
|
||||
# Maps OpenRouter provider slug → our LiteLLMProvider enum value.
|
||||
# Maps OpenRouter provider slug to native LiteLLM provider prefixes.
|
||||
# Only providers where the model-name part (after the slash) can be
|
||||
# used directly with the native provider's litellm prefix are listed.
|
||||
#
|
||||
|
|
|
|||
|
|
@ -281,7 +281,7 @@ def _generate_configs(
|
|||
|
||||
OpenRouter's own ``openrouter/free`` meta-router is filtered out upstream
|
||||
via ``_EXCLUDED_MODEL_IDS``; we don't expose a redundant auto-select layer
|
||||
because our own Auto (Fastest) pin + 24 h refresh + repair logic already
|
||||
because our own Auto pin + 24 h refresh + repair logic already
|
||||
cover the catalogue-churn case.
|
||||
"""
|
||||
id_offset: int = settings.get("id_offset", -10000)
|
||||
|
|
@ -346,7 +346,7 @@ def _generate_configs(
|
|||
# ``"No endpoints found that support image input"``.
|
||||
"supports_image_input": bool(normalized.get("supports_image_input")),
|
||||
_OPENROUTER_DYNAMIC_MARKER: True,
|
||||
# Auto (Fastest) ranking metadata. ``quality_score`` is initialised
|
||||
# Auto ranking metadata. ``quality_score`` is initialised
|
||||
# to the static score and gets re-blended with health on the next
|
||||
# ``_enrich_health`` pass (synchronous on refresh, deferred on cold
|
||||
# start so startup latency is unchanged).
|
||||
|
|
@ -361,11 +361,7 @@ def _generate_configs(
|
|||
return configs
|
||||
|
||||
|
||||
# ID-offset bands used to keep dynamic OpenRouter configs in their own
|
||||
# namespace per surface. Image / vision get separate bands so a single
|
||||
# Postgres-INTEGER cfg ID is unambiguous about which selector it belongs to.
|
||||
_OPENROUTER_IMAGE_ID_OFFSET_DEFAULT = -20000
|
||||
_OPENROUTER_VISION_ID_OFFSET_DEFAULT = -30000
|
||||
|
||||
|
||||
def _generate_image_gen_configs(
|
||||
|
|
@ -431,89 +427,6 @@ def _generate_image_gen_configs(
|
|||
return configs
|
||||
|
||||
|
||||
def _generate_vision_llm_configs(
|
||||
raw_models: list[dict], settings: dict[str, Any]
|
||||
) -> list[dict]:
|
||||
"""Convert OpenRouter vision-capable LLMs into global vision-LLM config
|
||||
dicts (matches the YAML shape consumed by ``vision_llm_routes``).
|
||||
|
||||
Filter:
|
||||
- architecture.input_modalities contains "image"
|
||||
- architecture.output_modalities contains "text"
|
||||
- compatible provider (excluded slugs blocked)
|
||||
- allowed model id (excluded list blocked)
|
||||
|
||||
Vision-LLM is invoked from the indexer (image extraction during
|
||||
document upload) via ``langchain_litellm.ChatLiteLLM.ainvoke``, so
|
||||
the chat-only ``_supports_tool_calling`` and ``_has_sufficient_context``
|
||||
filters do not apply: a small-context vision model that doesn't
|
||||
advertise tool-calling is still perfectly viable for "describe this
|
||||
image" prompts.
|
||||
"""
|
||||
id_offset: int = int(
|
||||
settings.get("vision_id_offset") or _OPENROUTER_VISION_ID_OFFSET_DEFAULT
|
||||
)
|
||||
api_key: str = settings.get("api_key", "")
|
||||
rpm: int = settings.get("rpm", 200)
|
||||
tpm: int = settings.get("tpm", 1_000_000)
|
||||
free_rpm: int = settings.get("free_rpm", 20)
|
||||
free_tpm: int = settings.get("free_tpm", 100_000)
|
||||
quota_reserve_tokens: int = settings.get("quota_reserve_tokens", 4000)
|
||||
litellm_params: dict = settings.get("litellm_params") or {}
|
||||
|
||||
vision_models = [
|
||||
m
|
||||
for m in raw_models
|
||||
if supports_image_input(m)
|
||||
and _shared_is_compatible_provider(m)
|
||||
and _shared_is_allowed_model(m)
|
||||
and "/" in m.get("id", "")
|
||||
]
|
||||
|
||||
configs: list[dict] = []
|
||||
taken: set[int] = set()
|
||||
for model in vision_models:
|
||||
model_id: str = model["id"]
|
||||
name: str = model.get("name", model_id)
|
||||
tier = _openrouter_tier(model)
|
||||
pricing = model.get("pricing") or {}
|
||||
|
||||
# Capture per-token prices so ``pricing_registration`` can
|
||||
# register them with LiteLLM at startup (and so the cost
|
||||
# estimator in ``estimate_call_reserve_micros`` can resolve
|
||||
# them at reserve time).
|
||||
try:
|
||||
input_cost = float(pricing.get("prompt", 0) or 0)
|
||||
except (TypeError, ValueError):
|
||||
input_cost = 0.0
|
||||
try:
|
||||
output_cost = float(pricing.get("completion", 0) or 0)
|
||||
except (TypeError, ValueError):
|
||||
output_cost = 0.0
|
||||
|
||||
cfg: dict[str, Any] = {
|
||||
"id": _stable_config_id(model_id, id_offset, taken),
|
||||
"name": name,
|
||||
"description": f"{name} via OpenRouter (vision)",
|
||||
"provider": "openrouter",
|
||||
"model_name": model_id,
|
||||
"api_key": api_key,
|
||||
"api_base": "https://openrouter.ai/api/v1",
|
||||
"api_version": None,
|
||||
"rpm": free_rpm if tier == "free" else rpm,
|
||||
"tpm": free_tpm if tier == "free" else tpm,
|
||||
"litellm_params": dict(litellm_params),
|
||||
"billing_tier": tier,
|
||||
"quota_reserve_tokens": quota_reserve_tokens,
|
||||
"input_cost_per_token": input_cost or None,
|
||||
"output_cost_per_token": output_cost or None,
|
||||
_OPENROUTER_DYNAMIC_MARKER: True,
|
||||
}
|
||||
configs.append(cfg)
|
||||
|
||||
return configs
|
||||
|
||||
|
||||
class OpenRouterIntegrationService:
|
||||
"""Singleton that manages the dynamic OpenRouter model catalogue."""
|
||||
|
||||
|
|
@ -724,7 +637,7 @@ class OpenRouterIntegrationService:
|
|||
return counts
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Auto (Fastest) health enrichment
|
||||
# Auto health enrichment
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _enrich_health_safely(
|
||||
|
|
|
|||
|
|
@ -154,10 +154,8 @@ def _register_chat_shape_configs(
|
|||
input_cost = _safe_float(entry.get("prompt"))
|
||||
output_cost = _safe_float(entry.get("completion"))
|
||||
else:
|
||||
# Vision configs from ``_generate_vision_llm_configs``
|
||||
# carry their pricing inline because the OpenRouter
|
||||
# raw-pricing cache is keyed by chat-catalogue model_id;
|
||||
# vision flows pick up the inline values here.
|
||||
# Some dynamically materialized configs can carry pricing
|
||||
# inline when the raw OpenRouter cache has no matching entry.
|
||||
input_cost = _safe_float(cfg.get("input_cost_per_token"))
|
||||
output_cost = _safe_float(cfg.get("output_cost_per_token"))
|
||||
if input_cost == 0.0 and output_cost == 0.0:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Pure-function quality scoring for Auto (Fastest) model selection.
|
||||
"""Pure-function quality scoring for Auto model selection.
|
||||
|
||||
This module is import-free of any service / request-path dependencies. All
|
||||
numbers are computed once during the OpenRouter refresh tick (or YAML load)
|
||||
|
|
|
|||
|
|
@ -1,160 +0,0 @@
|
|||
import logging
|
||||
from typing import Any
|
||||
|
||||
from litellm import Router
|
||||
|
||||
from app.services.model_resolver import native_connection_from_config, to_litellm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VISION_AUTO_MODE_ID = 0
|
||||
|
||||
class VisionLLMRouterService:
|
||||
_instance = None
|
||||
_router: Router | None = None
|
||||
_model_list: list[dict] = []
|
||||
_router_settings: dict = {}
|
||||
_initialized: bool = False
|
||||
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls) -> "VisionLLMRouterService":
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
@classmethod
|
||||
def initialize(
|
||||
cls,
|
||||
global_configs: list[dict],
|
||||
router_settings: dict | None = None,
|
||||
) -> None:
|
||||
instance = cls.get_instance()
|
||||
|
||||
if instance._initialized:
|
||||
logger.debug("Vision LLM Router already initialized, skipping")
|
||||
return
|
||||
|
||||
model_list = []
|
||||
for config in global_configs:
|
||||
deployment = cls._config_to_deployment(config)
|
||||
if deployment:
|
||||
model_list.append(deployment)
|
||||
|
||||
if not model_list:
|
||||
logger.warning(
|
||||
"No valid vision LLM configs found for router initialization"
|
||||
)
|
||||
return
|
||||
|
||||
instance._model_list = model_list
|
||||
instance._router_settings = router_settings or {}
|
||||
|
||||
default_settings = {
|
||||
"routing_strategy": "usage-based-routing",
|
||||
"num_retries": 3,
|
||||
"allowed_fails": 3,
|
||||
"cooldown_time": 60,
|
||||
"retry_after": 5,
|
||||
}
|
||||
|
||||
final_settings = {**default_settings, **instance._router_settings}
|
||||
|
||||
try:
|
||||
instance._router = Router(
|
||||
model_list=model_list,
|
||||
routing_strategy=final_settings.get(
|
||||
"routing_strategy", "usage-based-routing"
|
||||
),
|
||||
num_retries=final_settings.get("num_retries", 3),
|
||||
allowed_fails=final_settings.get("allowed_fails", 3),
|
||||
cooldown_time=final_settings.get("cooldown_time", 60),
|
||||
set_verbose=False,
|
||||
)
|
||||
instance._initialized = True
|
||||
logger.info(
|
||||
"Vision LLM Router initialized with %d deployments, strategy: %s",
|
||||
len(model_list),
|
||||
final_settings.get("routing_strategy"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize Vision LLM Router: {e}")
|
||||
instance._router = None
|
||||
|
||||
@classmethod
|
||||
def _config_to_deployment(cls, config: dict) -> dict | None:
|
||||
try:
|
||||
if not config.get("model_name") or not config.get("api_key"):
|
||||
return None
|
||||
|
||||
model_string, resolved_kwargs = to_litellm(
|
||||
native_connection_from_config(config),
|
||||
config["model_name"],
|
||||
)
|
||||
litellm_params: dict[str, Any] = {"model": model_string, **resolved_kwargs}
|
||||
|
||||
deployment: dict[str, Any] = {
|
||||
"model_name": "auto",
|
||||
"litellm_params": litellm_params,
|
||||
}
|
||||
|
||||
if config.get("rpm"):
|
||||
deployment["rpm"] = config["rpm"]
|
||||
if config.get("tpm"):
|
||||
deployment["tpm"] = config["tpm"]
|
||||
|
||||
return deployment
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to convert vision config to deployment: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def get_router(cls) -> Router | None:
|
||||
instance = cls.get_instance()
|
||||
return instance._router
|
||||
|
||||
@classmethod
|
||||
def is_initialized(cls) -> bool:
|
||||
instance = cls.get_instance()
|
||||
return instance._initialized and instance._router is not None
|
||||
|
||||
@classmethod
|
||||
def get_model_count(cls) -> int:
|
||||
instance = cls.get_instance()
|
||||
return len(instance._model_list)
|
||||
|
||||
|
||||
def is_vision_auto_mode(config_id: int | None) -> bool:
|
||||
return config_id == VISION_AUTO_MODE_ID
|
||||
|
||||
|
||||
def build_vision_model_string(
|
||||
litellm_provider: str, model_name: str, custom_provider: str | None
|
||||
) -> str:
|
||||
if custom_provider:
|
||||
return f"{custom_provider}/{model_name}"
|
||||
return f"{litellm_provider}/{model_name}"
|
||||
|
||||
|
||||
def get_global_vision_llm_config(config_id: int) -> dict | None:
|
||||
from app.config import config
|
||||
|
||||
if config_id == VISION_AUTO_MODE_ID:
|
||||
return {
|
||||
"id": VISION_AUTO_MODE_ID,
|
||||
"name": "Auto (Fastest)",
|
||||
"provider": "AUTO",
|
||||
"model_name": "auto",
|
||||
"is_auto_mode": True,
|
||||
}
|
||||
if config_id > 0:
|
||||
return None
|
||||
for cfg in config.GLOBAL_VISION_LLM_CONFIGS:
|
||||
if cfg.get("id") == config_id:
|
||||
return cfg
|
||||
return None
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
"""
|
||||
Service for fetching and caching the vision-capable model list.
|
||||
|
||||
Reuses the same OpenRouter public API and local fallback as the LLM model
|
||||
list service, but filters for models that accept image input.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
|
||||
FALLBACK_FILE = (
|
||||
Path(__file__).parent.parent / "config" / "vision_model_list_fallback.json"
|
||||
)
|
||||
CACHE_TTL_SECONDS = 86400 # 24 hours
|
||||
|
||||
_cache: list[dict] | None = None
|
||||
_cache_timestamp: float = 0
|
||||
|
||||
OPENROUTER_SLUG_TO_VISION_PROVIDER: dict[str, str] = {
|
||||
"openai": "OPENAI",
|
||||
"anthropic": "ANTHROPIC",
|
||||
"google": "GOOGLE",
|
||||
"mistralai": "MISTRAL",
|
||||
"x-ai": "XAI",
|
||||
}
|
||||
|
||||
|
||||
def _format_context_length(length: int | None) -> str | None:
|
||||
if not length:
|
||||
return None
|
||||
if length >= 1_000_000:
|
||||
return f"{length / 1_000_000:g}M"
|
||||
if length >= 1_000:
|
||||
return f"{length / 1_000:g}K"
|
||||
return str(length)
|
||||
|
||||
|
||||
async def _fetch_from_openrouter() -> list[dict] | None:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15) as client:
|
||||
response = await client.get(OPENROUTER_API_URL)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get("data", [])
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch from OpenRouter API for vision models: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def _load_fallback() -> list[dict]:
|
||||
try:
|
||||
with open(FALLBACK_FILE, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.error("Failed to load vision model fallback list: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
def _is_vision_model(model: dict) -> bool:
|
||||
"""Return True if the model accepts image input and outputs text."""
|
||||
arch = model.get("architecture", {})
|
||||
input_mods = arch.get("input_modalities", [])
|
||||
output_mods = arch.get("output_modalities", [])
|
||||
return "image" in input_mods and "text" in output_mods
|
||||
|
||||
|
||||
def _process_vision_models(raw_models: list[dict]) -> list[dict]:
|
||||
processed: list[dict] = []
|
||||
|
||||
for model in raw_models:
|
||||
model_id: str = model.get("id", "")
|
||||
name: str = model.get("name", "")
|
||||
context_length = model.get("context_length")
|
||||
|
||||
if "/" not in model_id:
|
||||
continue
|
||||
|
||||
if not _is_vision_model(model):
|
||||
continue
|
||||
|
||||
provider_slug, model_name = model_id.split("/", 1)
|
||||
context_window = _format_context_length(context_length)
|
||||
|
||||
processed.append(
|
||||
{
|
||||
"value": model_id,
|
||||
"label": name,
|
||||
"provider": "OPENROUTER",
|
||||
"context_window": context_window,
|
||||
}
|
||||
)
|
||||
|
||||
direct_provider = OPENROUTER_SLUG_TO_VISION_PROVIDER.get(provider_slug)
|
||||
if direct_provider:
|
||||
if direct_provider == "GOOGLE" and not model_name.startswith("gemini-"):
|
||||
continue
|
||||
|
||||
processed.append(
|
||||
{
|
||||
"value": model_name,
|
||||
"label": name,
|
||||
"provider": direct_provider,
|
||||
"context_window": context_window,
|
||||
}
|
||||
)
|
||||
|
||||
return processed
|
||||
|
||||
|
||||
async def get_vision_model_list() -> list[dict]:
|
||||
global _cache, _cache_timestamp
|
||||
|
||||
if _cache is not None and (time.time() - _cache_timestamp) < CACHE_TTL_SECONDS:
|
||||
return _cache
|
||||
|
||||
raw_models = await _fetch_from_openrouter()
|
||||
|
||||
if raw_models is None:
|
||||
logger.info("Using fallback vision model list")
|
||||
return _load_fallback()
|
||||
|
||||
processed = _process_vision_models(raw_models)
|
||||
|
||||
_cache = processed
|
||||
_cache_timestamp = time.time()
|
||||
|
||||
return processed
|
||||
Loading…
Add table
Add a link
Reference in a new issue