feat(models): add provider catalog and resolver

This commit is contained in:
Anish Sarkar 2026-06-10 21:47:42 +05:30
parent adb857925b
commit 8b59ca59c1
6 changed files with 355 additions and 56 deletions

View file

@ -389,10 +389,28 @@ def initialize_openrouter_integration():
)
except Exception as e:
print(f"Warning: Failed to inject OpenRouter vision-LLM configs: {e}")
refresh_global_model_catalog()
except Exception as e:
print(f"Warning: Failed to initialize OpenRouter integration: {e}")
def materialize_global_configs():
from app.services.global_model_catalog import materialize_global_model_catalog
return materialize_global_model_catalog(
chat_configs=getattr(config, "GLOBAL_LLM_CONFIGS", []),
vision_configs=getattr(config, "GLOBAL_VISION_LLM_CONFIGS", []),
image_configs=getattr(config, "GLOBAL_IMAGE_GEN_CONFIGS", []),
)
def refresh_global_model_catalog():
connections, models = materialize_global_configs()
config.GLOBAL_CONNECTIONS = connections
config.GLOBAL_MODELS = models
def initialize_pricing_registration():
"""
Teach LiteLLM the per-token cost of every deployment in
@ -723,7 +741,7 @@ class Config:
os.getenv("QUOTA_DEFAULT_IMAGE_RESERVE_MICROS", "50000")
)
# Per-podcast reservation (in micro-USD). One agent LLM call generating
# Per-podcast reservation (in micro-USD). One chat model call generating
# a transcript, typically 5k-20k completion tokens. $0.20 covers a long
# premium-model run. Tune via env.
QUOTA_DEFAULT_PODCAST_RESERVE_MICROS = int(
@ -849,6 +867,19 @@ class Config:
# Router settings for Vision LLM Auto mode
VISION_LLM_ROUTER_SETTINGS = load_vision_llm_router_settings()
# Virtual GLOBAL connection/model catalog. This is server-only metadata
# derived from global_llm_config.yaml; GLOBAL keys are not stored in DB.
from app.services.global_model_catalog import (
materialize_global_model_catalog as _materialize_global_model_catalog,
)
GLOBAL_CONNECTIONS, GLOBAL_MODELS = _materialize_global_model_catalog(
chat_configs=GLOBAL_LLM_CONFIGS,
vision_configs=GLOBAL_VISION_LLM_CONFIGS,
image_configs=GLOBAL_IMAGE_GEN_CONFIGS,
)
del _materialize_global_model_catalog
# OpenRouter Integration settings (optional)
OPENROUTER_INTEGRATION_SETTINGS = load_openrouter_integration_settings()

View file

@ -7,8 +7,9 @@
# NOTE: The example API keys below are placeholders and won't work.
# Replace them with your actual API keys to enable global configurations.
#
# These configurations will be available to all users as a convenient option
# Users can choose to use these global configs or add their own
# These configurations are materialized as server-owned GLOBAL connections/models
# and become available on the Models page. Users can choose hosted/global models
# or add their own BYOK/local connections.
#
# AUTO MODE (Recommended):
# - Auto mode (ID: 0) uses LiteLLM Router to automatically load balance across all global configs
@ -16,9 +17,12 @@
# - New users are automatically assigned Auto mode by default
# - Configure router_settings below to customize the load balancing behavior
#
# Structure matches NewLLMConfig:
# - Model configuration (provider, model_name, api_key, etc.)
# - Prompt configuration (system_instructions, citations_enabled)
# Static config shape:
# - Connection fields: provider, api_key, api_base, api_version
# - Model fields: model_name, billing_tier, rpm/tpm, litellm_params
# - Prompt defaults: system_instructions, citations_enabled
# IDs share one GLOBAL model namespace across chat, vision, and image generation.
# Suggested ranges: chat -1..-999, vision -1001..-1999, image -2001..-2999.
#
# COST-BASED PREMIUM CREDITS:
# Each premium config bills the user's USD-credit balance based on the
@ -327,7 +331,7 @@ openrouter_integration:
quota_reserve_tokens: 4000
# id_offset: base negative ID for dynamically generated configs.
# Model IDs are derived deterministically via BLAKE2b so they survive
# catalogue churn. Must not overlap with your static global_llm_configs IDs.
# catalogue churn. Must not overlap with any static GLOBAL model IDs.
id_offset: -10000
# refresh_interval_hours: how often to re-fetch models from OpenRouter (0 = startup only)
refresh_interval_hours: 24
@ -351,8 +355,8 @@ openrouter_integration:
# Image generation + vision LLM emission are OPT-IN. OpenRouter's catalogue
# contains hundreds of image- and vision-capable models; turning these on
# injects them into the global Image-Generation / Vision-LLM model
# selectors alongside any static configs. Tier (free/premium) is derived
# injects them into the global image-generation / vision model lists
# alongside any static configs. Tier (free/premium) is derived
# per model the same way it is for chat (`:free` suffix or zero pricing).
# When a user picks a premium image/vision model the call debits the
# shared $5 USD-cost-based premium credit pool — so leaving these off
@ -384,7 +388,7 @@ image_generation_router_settings:
global_image_generation_configs:
# Example: OpenAI DALL-E 3
- id: -1
- id: -2001
name: "Global DALL-E 3"
description: "OpenAI's DALL-E 3 for high-quality image generation"
provider: "OPENAI"
@ -395,7 +399,7 @@ global_image_generation_configs:
litellm_params: {}
# Example: OpenAI GPT Image 1
- id: -2
- id: -2002
name: "Global GPT Image 1"
description: "OpenAI's GPT Image 1 model"
provider: "OPENAI"
@ -406,7 +410,7 @@ global_image_generation_configs:
litellm_params: {}
# Example: Azure OpenAI DALL-E 3
- id: -3
- id: -2003
name: "Global Azure DALL-E 3"
description: "Azure-hosted DALL-E 3 deployment"
provider: "AZURE_OPENAI"
@ -419,7 +423,7 @@ global_image_generation_configs:
base_model: "dall-e-3"
# Example: OpenRouter Gemini Image Generation
# - id: -4
# - id: -2004
# name: "Global Gemini Image Gen"
# description: "Google Gemini image generation via OpenRouter"
# provider: "OPENROUTER"
@ -448,7 +452,7 @@ vision_llm_router_settings:
global_vision_llm_configs:
# Example: OpenAI GPT-4o (recommended for vision)
- id: -1
- id: -1001
name: "Global GPT-4o Vision"
description: "OpenAI's GPT-4o with strong vision capabilities"
provider: "OPENAI"
@ -462,7 +466,7 @@ global_vision_llm_configs:
max_tokens: 1000
# Example: Google Gemini 2.0 Flash
- id: -2
- id: -1002
name: "Global Gemini 2.0 Flash"
description: "Google's fast vision model with large context"
provider: "GOOGLE"
@ -476,7 +480,7 @@ global_vision_llm_configs:
max_tokens: 1000
# Example: Anthropic Claude 3.5 Sonnet
- id: -3
- id: -1003
name: "Global Claude 3.5 Sonnet Vision"
description: "Anthropic's Claude 3.5 Sonnet with vision support"
provider: "ANTHROPIC"
@ -490,7 +494,7 @@ global_vision_llm_configs:
max_tokens: 1000
# Example: Azure OpenAI GPT-4o
# - id: -4
# - id: -1004
# name: "Global Azure GPT-4o Vision"
# description: "Azure-hosted GPT-4o for vision analysis"
# provider: "AZURE_OPENAI"
@ -507,8 +511,9 @@ global_vision_llm_configs:
# Notes:
# - ID 0 is reserved for "Auto" mode - uses LiteLLM Router for load balancing
# - Use negative IDs to distinguish global configs from user configs (NewLLMConfig in DB)
# - IDs should be unique and sequential (e.g., -1, -2, -3, etc.)
# - Use negative IDs to distinguish global models from BYOK/local DB models
# - IDs must be unique across chat, vision, and image generation configs
# - Suggested static ranges: chat -1..-999, vision -1001..-1999, image -2001..-2999
# - The 'api_key' field will not be exposed to users via API
# - system_instructions: Custom prompt or empty string to use defaults
# - use_default_system_instructions: true = use SURFSENSE_SYSTEM_INSTRUCTIONS when system_instructions is empty
@ -519,7 +524,7 @@ global_vision_llm_configs:
#
#
# IMAGE GENERATION NOTES:
# - Image generation configs use the same ID scheme as LLM configs (negative for global)
# - Image generation configs use the shared GLOBAL ID namespace
# - Supported models: dall-e-2, dall-e-3, gpt-image-1 (OpenAI), azure/* (Azure),
# bedrock/* (AWS), vertex_ai/* (Google), recraft/* (Recraft), openrouter/* (OpenRouter)
# - The router uses litellm.aimage_generation() for async image generation
@ -527,7 +532,7 @@ global_vision_llm_configs:
# TPM (tokens per minute) does not apply since image APIs are billed/rate-limited per request, not per token.
#
# VISION LLM NOTES:
# - Vision configs use the same ID scheme (negative for global, positive for user DB)
# - Vision configs use the shared GLOBAL ID namespace
# - Only use vision-capable models (GPT-4o, Gemini, Claude 3, etc.)
# - Lower temperature (0.3) is recommended for accurate screenshot analysis
# - Lower max_tokens (1000) is sufficient since autocomplete produces short suggestions

View file

@ -229,7 +229,7 @@ class LLMPreferencesRead(BaseModel):
description="ID of the vision LLM config to use for vision/screenshot analysis",
)
agent_llm: dict[str, Any] | None = Field(
None, description="Full config for agent LLM"
None, description="Full config for chat model"
)
image_generation_config: dict[str, Any] | None = Field(
None, description="Full config for image generation"

View file

@ -0,0 +1,142 @@
"""Materialize server-owned GLOBAL YAML configs as virtual connections/models."""
from __future__ import annotations
from typing import Any
from app.services.model_resolver import native_connection_from_config
def _base_model(config: dict[str, Any]) -> str | None:
litellm_params = config.get("litellm_params") or {}
if isinstance(litellm_params, dict):
return litellm_params.get("base_model")
return None
def _connection_key(conn: dict[str, Any]) -> tuple[Any, ...]:
# Deliberately includes api_key because two operator-owned credentials for
# the same provider/base can have different quota/rate limits upstream.
return (
conn.get("protocol"),
conn.get("native_provider"),
conn.get("base_url"),
conn.get("api_key"),
_freeze(conn.get("extra") or {}),
)
def _freeze(value: Any) -> Any:
if isinstance(value, dict):
return tuple(sorted((key, _freeze(val)) for key, val in value.items()))
if isinstance(value, list):
return tuple(_freeze(item) for item in value)
return value
def _capabilities_for(role: str, config: dict[str, Any]) -> dict[str, bool]:
return {
"chat": role == "chat",
"vision": role == "vision" or bool(config.get("supports_image_input")),
"image_gen": role == "image_gen",
"embedding": False,
"tools": bool(config.get("supports_tools", False)),
}
def _catalog_metadata(config: dict[str, Any]) -> dict[str, Any]:
return {
"billing_tier": config.get("billing_tier", "free"),
"quota_reserve_tokens": config.get("quota_reserve_tokens"),
"rpm": config.get("rpm"),
"tpm": config.get("tpm"),
"anonymous_enabled": config.get("anonymous_enabled", False),
"seo_enabled": config.get("seo_enabled", False),
"seo_slug": config.get("seo_slug"),
"input_cost_per_token": (config.get("litellm_params") or {}).get(
"input_cost_per_token"
)
if isinstance(config.get("litellm_params"), dict)
else None,
"output_cost_per_token": (config.get("litellm_params") or {}).get(
"output_cost_per_token"
)
if isinstance(config.get("litellm_params"), dict)
else None,
"is_planner": config.get("is_planner", False),
"base_model": _base_model(config),
"router_pool_eligible": config.get("router_pool_eligible", True),
}
def materialize_global_model_catalog(
*,
chat_configs: list[dict[str, Any]],
vision_configs: list[dict[str, Any]],
image_configs: list[dict[str, Any]],
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
connections: list[dict[str, Any]] = []
models: list[dict[str, Any]] = []
connection_id_by_key: dict[tuple[Any, ...], int] = {}
next_connection_id = -1
def add_config(config: dict[str, Any], role: str) -> None:
nonlocal next_connection_id
if not config.get("id") or not config.get("model_name"):
return
conn = native_connection_from_config(config)
conn["scope"] = "GLOBAL"
conn["enabled"] = True
conn["last_status"] = "OK"
key = _connection_key(conn)
connection_id = connection_id_by_key.get(key)
if connection_id is None:
connection_id = next_connection_id
next_connection_id -= 1
connection_id_by_key[key] = connection_id
connections.append(
{
"id": connection_id,
**conn,
}
)
model_id = int(config["id"])
models.append(
{
"id": model_id,
"connection_id": connection_id,
"model_id": config["model_name"],
"display_name": config.get("name") or config["model_name"],
"source": "MANUAL",
"capabilities": _capabilities_for(role, config),
"capabilities_declared": _capabilities_for(role, config),
"capabilities_verified": _capabilities_for(role, config),
"capabilities_override": {},
"embedding_dimension": None,
"enabled": True,
"billing_tier": config.get("billing_tier", "free"),
"catalog": _catalog_metadata(config),
"role": role,
}
)
for cfg in chat_configs:
if cfg.get("is_auto_mode"):
continue
add_config(cfg, "chat")
for cfg in vision_configs:
if cfg.get("is_auto_mode"):
continue
add_config(cfg, "vision")
for cfg in image_configs:
if cfg.get("is_auto_mode"):
continue
add_config(cfg, "image_gen")
# Each virtual connection is server-only. Callers that serialize these
# must strip api_key before returning data to clients.
return connections, models
__all__ = ["materialize_global_model_catalog"]

View file

@ -0,0 +1,152 @@
"""Single model-to-LiteLLM resolver.
All chat, vision, image-generation, validation, and Auto routing paths should
turn a Connection + Model into LiteLLM input through this module.
"""
from __future__ import annotations
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any
from app.services.provider_api_base import resolve_api_base
if TYPE_CHECKING:
from app.db import Connection
PROTOCOL_OLLAMA = "OLLAMA"
PROTOCOL_OPENAI_COMPATIBLE = "OPENAI_COMPATIBLE"
PROTOCOL_NATIVE = "NATIVE"
NATIVE_PROVIDER_PREFIX: dict[str, str] = {
"OPENAI": "openai",
"ANTHROPIC": "anthropic",
"GROQ": "groq",
"COHERE": "cohere",
"GOOGLE": "gemini",
"MISTRAL": "mistral",
"AZURE_OPENAI": "azure",
"AZURE": "azure",
"OPENROUTER": "openrouter",
"COMETAPI": "cometapi",
"XAI": "xai",
"BEDROCK": "bedrock",
"AWS_BEDROCK": "bedrock",
"VERTEX_AI": "vertex_ai",
"TOGETHER_AI": "together_ai",
"FIREWORKS_AI": "fireworks_ai",
"DEEPSEEK": "openai",
"ALIBABA_QWEN": "openai",
"MOONSHOT": "openai",
"ZHIPU": "openai",
"GITHUB_MODELS": "github",
"REPLICATE": "replicate",
"PERPLEXITY": "perplexity",
"ANYSCALE": "anyscale",
"DEEPINFRA": "deepinfra",
"CEREBRAS": "cerebras",
"SAMBANOVA": "sambanova",
"AI21": "ai21",
"CLOUDFLARE": "cloudflare",
"DATABRICKS": "databricks",
"HUGGINGFACE": "huggingface",
"MINIMAX": "openai",
"RECRAFT": "recraft",
"XINFERENCE": "xinference",
"NSCALE": "nscale",
"CUSTOM": "custom",
}
def ensure_v1(base_url: str | None) -> str | None:
if not base_url:
return None
stripped = base_url.rstrip("/")
if stripped.endswith("/v1"):
return stripped
return f"{stripped}/v1"
def _conn_value(conn: Connection | Mapping[str, Any], key: str) -> Any:
if isinstance(conn, Mapping):
return conn.get(key)
return getattr(conn, key)
def _protocol_value(protocol: Any) -> str:
return getattr(protocol, "value", str(protocol))
def to_litellm(
conn: Connection | Mapping[str, Any],
model_id: str,
) -> tuple[str, dict[str, Any]]:
"""Return ``(model_string, litellm_kwargs)`` for any model role."""
protocol = _protocol_value(_conn_value(conn, "protocol"))
base_url = _conn_value(conn, "base_url")
api_key = _conn_value(conn, "api_key")
native_provider = _conn_value(conn, "native_provider")
extra = _conn_value(conn, "extra") or {}
kwargs: dict[str, Any] = {}
if api_key:
kwargs["api_key"] = api_key
if protocol == PROTOCOL_OLLAMA:
model_string = f"ollama_chat/{model_id}"
if base_url:
kwargs["api_base"] = base_url.rstrip("/")
elif protocol == PROTOCOL_OPENAI_COMPATIBLE:
model_string = f"openai/{model_id}"
api_base = ensure_v1(base_url)
if api_base:
kwargs["api_base"] = api_base
else:
provider_key = (native_provider or "").upper()
prefix = NATIVE_PROVIDER_PREFIX.get(provider_key, provider_key.lower())
if prefix == "custom":
custom_provider = extra.get("custom_provider") or native_provider
model_string = f"{custom_provider}/{model_id}" if custom_provider else model_id
else:
model_string = f"{prefix}/{model_id}"
api_base = resolve_api_base(
provider=provider_key,
provider_prefix=prefix,
config_api_base=base_url,
)
if api_base:
kwargs["api_base"] = api_base
if api_version := extra.get("api_version"):
kwargs["api_version"] = api_version
kwargs.update(extra.get("litellm_params", {}))
kwargs.update(extra.get("kwargs", {}))
return model_string, kwargs
def native_connection_from_config(config: Mapping[str, Any]) -> dict[str, Any]:
"""Build an in-memory NATIVE connection mapping from a legacy/global config."""
provider = str(config.get("provider") or config.get("custom_provider") or "CUSTOM")
extra: dict[str, Any] = {
"litellm_params": config.get("litellm_params") or {},
}
if config.get("api_version"):
extra["api_version"] = config.get("api_version")
if config.get("custom_provider"):
extra["custom_provider"] = config.get("custom_provider")
return {
"protocol": PROTOCOL_NATIVE,
"native_provider": provider,
"base_url": config.get("api_base") or None,
"api_key": config.get("api_key") or None,
"extra": extra,
}
__all__ = [
"NATIVE_PROVIDER_PREFIX",
"ensure_v1",
"native_connection_from_config",
"to_litellm",
]

View file

@ -46,6 +46,8 @@ from collections.abc import Iterable
import litellm
from app.services.model_resolver import NATIVE_PROVIDER_PREFIX
logger = logging.getLogger(__name__)
@ -58,40 +60,7 @@ logger = logging.getLogger(__name__)
# map there directly would re-introduce the
# ``app.config -> ... -> deliverables/tools/generate_image ->
# app.config`` cycle that prompted the move.
_PROVIDER_PREFIX_MAP: dict[str, str] = {
"OPENAI": "openai",
"ANTHROPIC": "anthropic",
"GROQ": "groq",
"COHERE": "cohere",
"GOOGLE": "gemini",
"OLLAMA": "ollama_chat",
"MISTRAL": "mistral",
"AZURE_OPENAI": "azure",
"OPENROUTER": "openrouter",
"XAI": "xai",
"BEDROCK": "bedrock",
"VERTEX_AI": "vertex_ai",
"TOGETHER_AI": "together_ai",
"FIREWORKS_AI": "fireworks_ai",
"DEEPSEEK": "openai",
"ALIBABA_QWEN": "openai",
"MOONSHOT": "openai",
"ZHIPU": "openai",
"GITHUB_MODELS": "github",
"REPLICATE": "replicate",
"PERPLEXITY": "perplexity",
"ANYSCALE": "anyscale",
"DEEPINFRA": "deepinfra",
"CEREBRAS": "cerebras",
"SAMBANOVA": "sambanova",
"AI21": "ai21",
"CLOUDFLARE": "cloudflare",
"DATABRICKS": "databricks",
"COMETAPI": "cometapi",
"HUGGINGFACE": "huggingface",
"MINIMAX": "openai",
"CUSTOM": "custom",
}
_PROVIDER_PREFIX_MAP = NATIVE_PROVIDER_PREFIX
def _candidate_model_strings(