mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-12 20:45:20 +02:00
feat(models): add provider catalog and resolver
This commit is contained in:
parent
adb857925b
commit
8b59ca59c1
6 changed files with 355 additions and 56 deletions
|
|
@ -389,10 +389,28 @@ def initialize_openrouter_integration():
|
|||
)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to inject OpenRouter vision-LLM configs: {e}")
|
||||
|
||||
refresh_global_model_catalog()
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to initialize OpenRouter integration: {e}")
|
||||
|
||||
|
||||
def materialize_global_configs():
|
||||
from app.services.global_model_catalog import materialize_global_model_catalog
|
||||
|
||||
return materialize_global_model_catalog(
|
||||
chat_configs=getattr(config, "GLOBAL_LLM_CONFIGS", []),
|
||||
vision_configs=getattr(config, "GLOBAL_VISION_LLM_CONFIGS", []),
|
||||
image_configs=getattr(config, "GLOBAL_IMAGE_GEN_CONFIGS", []),
|
||||
)
|
||||
|
||||
|
||||
def refresh_global_model_catalog():
|
||||
connections, models = materialize_global_configs()
|
||||
config.GLOBAL_CONNECTIONS = connections
|
||||
config.GLOBAL_MODELS = models
|
||||
|
||||
|
||||
def initialize_pricing_registration():
|
||||
"""
|
||||
Teach LiteLLM the per-token cost of every deployment in
|
||||
|
|
@ -723,7 +741,7 @@ class Config:
|
|||
os.getenv("QUOTA_DEFAULT_IMAGE_RESERVE_MICROS", "50000")
|
||||
)
|
||||
|
||||
# Per-podcast reservation (in micro-USD). One agent LLM call generating
|
||||
# Per-podcast reservation (in micro-USD). One chat model call generating
|
||||
# a transcript, typically 5k-20k completion tokens. $0.20 covers a long
|
||||
# premium-model run. Tune via env.
|
||||
QUOTA_DEFAULT_PODCAST_RESERVE_MICROS = int(
|
||||
|
|
@ -849,6 +867,19 @@ class Config:
|
|||
# Router settings for Vision LLM Auto mode
|
||||
VISION_LLM_ROUTER_SETTINGS = load_vision_llm_router_settings()
|
||||
|
||||
# Virtual GLOBAL connection/model catalog. This is server-only metadata
|
||||
# derived from global_llm_config.yaml; GLOBAL keys are not stored in DB.
|
||||
from app.services.global_model_catalog import (
|
||||
materialize_global_model_catalog as _materialize_global_model_catalog,
|
||||
)
|
||||
|
||||
GLOBAL_CONNECTIONS, GLOBAL_MODELS = _materialize_global_model_catalog(
|
||||
chat_configs=GLOBAL_LLM_CONFIGS,
|
||||
vision_configs=GLOBAL_VISION_LLM_CONFIGS,
|
||||
image_configs=GLOBAL_IMAGE_GEN_CONFIGS,
|
||||
)
|
||||
del _materialize_global_model_catalog
|
||||
|
||||
# OpenRouter Integration settings (optional)
|
||||
OPENROUTER_INTEGRATION_SETTINGS = load_openrouter_integration_settings()
|
||||
|
||||
|
|
|
|||
|
|
@ -7,8 +7,9 @@
|
|||
# NOTE: The example API keys below are placeholders and won't work.
|
||||
# Replace them with your actual API keys to enable global configurations.
|
||||
#
|
||||
# These configurations will be available to all users as a convenient option
|
||||
# Users can choose to use these global configs or add their own
|
||||
# These configurations are materialized as server-owned GLOBAL connections/models
|
||||
# and become available on the Models page. Users can choose hosted/global models
|
||||
# or add their own BYOK/local connections.
|
||||
#
|
||||
# AUTO MODE (Recommended):
|
||||
# - Auto mode (ID: 0) uses LiteLLM Router to automatically load balance across all global configs
|
||||
|
|
@ -16,9 +17,12 @@
|
|||
# - New users are automatically assigned Auto mode by default
|
||||
# - Configure router_settings below to customize the load balancing behavior
|
||||
#
|
||||
# Structure matches NewLLMConfig:
|
||||
# - Model configuration (provider, model_name, api_key, etc.)
|
||||
# - Prompt configuration (system_instructions, citations_enabled)
|
||||
# Static config shape:
|
||||
# - Connection fields: provider, api_key, api_base, api_version
|
||||
# - Model fields: model_name, billing_tier, rpm/tpm, litellm_params
|
||||
# - Prompt defaults: system_instructions, citations_enabled
|
||||
# IDs share one GLOBAL model namespace across chat, vision, and image generation.
|
||||
# Suggested ranges: chat -1..-999, vision -1001..-1999, image -2001..-2999.
|
||||
#
|
||||
# COST-BASED PREMIUM CREDITS:
|
||||
# Each premium config bills the user's USD-credit balance based on the
|
||||
|
|
@ -327,7 +331,7 @@ openrouter_integration:
|
|||
quota_reserve_tokens: 4000
|
||||
# id_offset: base negative ID for dynamically generated configs.
|
||||
# Model IDs are derived deterministically via BLAKE2b so they survive
|
||||
# catalogue churn. Must not overlap with your static global_llm_configs IDs.
|
||||
# catalogue churn. Must not overlap with any static GLOBAL model IDs.
|
||||
id_offset: -10000
|
||||
# refresh_interval_hours: how often to re-fetch models from OpenRouter (0 = startup only)
|
||||
refresh_interval_hours: 24
|
||||
|
|
@ -351,8 +355,8 @@ openrouter_integration:
|
|||
|
||||
# Image generation + vision LLM emission are OPT-IN. OpenRouter's catalogue
|
||||
# contains hundreds of image- and vision-capable models; turning these on
|
||||
# injects them into the global Image-Generation / Vision-LLM model
|
||||
# selectors alongside any static configs. Tier (free/premium) is derived
|
||||
# injects them into the global image-generation / vision model lists
|
||||
# alongside any static configs. Tier (free/premium) is derived
|
||||
# per model the same way it is for chat (`:free` suffix or zero pricing).
|
||||
# When a user picks a premium image/vision model the call debits the
|
||||
# shared $5 USD-cost-based premium credit pool — so leaving these off
|
||||
|
|
@ -384,7 +388,7 @@ image_generation_router_settings:
|
|||
|
||||
global_image_generation_configs:
|
||||
# Example: OpenAI DALL-E 3
|
||||
- id: -1
|
||||
- id: -2001
|
||||
name: "Global DALL-E 3"
|
||||
description: "OpenAI's DALL-E 3 for high-quality image generation"
|
||||
provider: "OPENAI"
|
||||
|
|
@ -395,7 +399,7 @@ global_image_generation_configs:
|
|||
litellm_params: {}
|
||||
|
||||
# Example: OpenAI GPT Image 1
|
||||
- id: -2
|
||||
- id: -2002
|
||||
name: "Global GPT Image 1"
|
||||
description: "OpenAI's GPT Image 1 model"
|
||||
provider: "OPENAI"
|
||||
|
|
@ -406,7 +410,7 @@ global_image_generation_configs:
|
|||
litellm_params: {}
|
||||
|
||||
# Example: Azure OpenAI DALL-E 3
|
||||
- id: -3
|
||||
- id: -2003
|
||||
name: "Global Azure DALL-E 3"
|
||||
description: "Azure-hosted DALL-E 3 deployment"
|
||||
provider: "AZURE_OPENAI"
|
||||
|
|
@ -419,7 +423,7 @@ global_image_generation_configs:
|
|||
base_model: "dall-e-3"
|
||||
|
||||
# Example: OpenRouter Gemini Image Generation
|
||||
# - id: -4
|
||||
# - id: -2004
|
||||
# name: "Global Gemini Image Gen"
|
||||
# description: "Google Gemini image generation via OpenRouter"
|
||||
# provider: "OPENROUTER"
|
||||
|
|
@ -448,7 +452,7 @@ vision_llm_router_settings:
|
|||
|
||||
global_vision_llm_configs:
|
||||
# Example: OpenAI GPT-4o (recommended for vision)
|
||||
- id: -1
|
||||
- id: -1001
|
||||
name: "Global GPT-4o Vision"
|
||||
description: "OpenAI's GPT-4o with strong vision capabilities"
|
||||
provider: "OPENAI"
|
||||
|
|
@ -462,7 +466,7 @@ global_vision_llm_configs:
|
|||
max_tokens: 1000
|
||||
|
||||
# Example: Google Gemini 2.0 Flash
|
||||
- id: -2
|
||||
- id: -1002
|
||||
name: "Global Gemini 2.0 Flash"
|
||||
description: "Google's fast vision model with large context"
|
||||
provider: "GOOGLE"
|
||||
|
|
@ -476,7 +480,7 @@ global_vision_llm_configs:
|
|||
max_tokens: 1000
|
||||
|
||||
# Example: Anthropic Claude 3.5 Sonnet
|
||||
- id: -3
|
||||
- id: -1003
|
||||
name: "Global Claude 3.5 Sonnet Vision"
|
||||
description: "Anthropic's Claude 3.5 Sonnet with vision support"
|
||||
provider: "ANTHROPIC"
|
||||
|
|
@ -490,7 +494,7 @@ global_vision_llm_configs:
|
|||
max_tokens: 1000
|
||||
|
||||
# Example: Azure OpenAI GPT-4o
|
||||
# - id: -4
|
||||
# - id: -1004
|
||||
# name: "Global Azure GPT-4o Vision"
|
||||
# description: "Azure-hosted GPT-4o for vision analysis"
|
||||
# provider: "AZURE_OPENAI"
|
||||
|
|
@ -507,8 +511,9 @@ global_vision_llm_configs:
|
|||
|
||||
# Notes:
|
||||
# - ID 0 is reserved for "Auto" mode - uses LiteLLM Router for load balancing
|
||||
# - Use negative IDs to distinguish global configs from user configs (NewLLMConfig in DB)
|
||||
# - IDs should be unique and sequential (e.g., -1, -2, -3, etc.)
|
||||
# - Use negative IDs to distinguish global models from BYOK/local DB models
|
||||
# - IDs must be unique across chat, vision, and image generation configs
|
||||
# - Suggested static ranges: chat -1..-999, vision -1001..-1999, image -2001..-2999
|
||||
# - The 'api_key' field will not be exposed to users via API
|
||||
# - system_instructions: Custom prompt or empty string to use defaults
|
||||
# - use_default_system_instructions: true = use SURFSENSE_SYSTEM_INSTRUCTIONS when system_instructions is empty
|
||||
|
|
@ -519,7 +524,7 @@ global_vision_llm_configs:
|
|||
#
|
||||
#
|
||||
# IMAGE GENERATION NOTES:
|
||||
# - Image generation configs use the same ID scheme as LLM configs (negative for global)
|
||||
# - Image generation configs use the shared GLOBAL ID namespace
|
||||
# - Supported models: dall-e-2, dall-e-3, gpt-image-1 (OpenAI), azure/* (Azure),
|
||||
# bedrock/* (AWS), vertex_ai/* (Google), recraft/* (Recraft), openrouter/* (OpenRouter)
|
||||
# - The router uses litellm.aimage_generation() for async image generation
|
||||
|
|
@ -527,7 +532,7 @@ global_vision_llm_configs:
|
|||
# TPM (tokens per minute) does not apply since image APIs are billed/rate-limited per request, not per token.
|
||||
#
|
||||
# VISION LLM NOTES:
|
||||
# - Vision configs use the same ID scheme (negative for global, positive for user DB)
|
||||
# - Vision configs use the shared GLOBAL ID namespace
|
||||
# - Only use vision-capable models (GPT-4o, Gemini, Claude 3, etc.)
|
||||
# - Lower temperature (0.3) is recommended for accurate screenshot analysis
|
||||
# - Lower max_tokens (1000) is sufficient since autocomplete produces short suggestions
|
||||
|
|
|
|||
|
|
@ -229,7 +229,7 @@ class LLMPreferencesRead(BaseModel):
|
|||
description="ID of the vision LLM config to use for vision/screenshot analysis",
|
||||
)
|
||||
agent_llm: dict[str, Any] | None = Field(
|
||||
None, description="Full config for agent LLM"
|
||||
None, description="Full config for chat model"
|
||||
)
|
||||
image_generation_config: dict[str, Any] | None = Field(
|
||||
None, description="Full config for image generation"
|
||||
|
|
|
|||
142
surfsense_backend/app/services/global_model_catalog.py
Normal file
142
surfsense_backend/app/services/global_model_catalog.py
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
"""Materialize server-owned GLOBAL YAML configs as virtual connections/models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from app.services.model_resolver import native_connection_from_config
|
||||
|
||||
|
||||
def _base_model(config: dict[str, Any]) -> str | None:
|
||||
litellm_params = config.get("litellm_params") or {}
|
||||
if isinstance(litellm_params, dict):
|
||||
return litellm_params.get("base_model")
|
||||
return None
|
||||
|
||||
|
||||
def _connection_key(conn: dict[str, Any]) -> tuple[Any, ...]:
|
||||
# Deliberately includes api_key because two operator-owned credentials for
|
||||
# the same provider/base can have different quota/rate limits upstream.
|
||||
return (
|
||||
conn.get("protocol"),
|
||||
conn.get("native_provider"),
|
||||
conn.get("base_url"),
|
||||
conn.get("api_key"),
|
||||
_freeze(conn.get("extra") or {}),
|
||||
)
|
||||
|
||||
|
||||
def _freeze(value: Any) -> Any:
|
||||
if isinstance(value, dict):
|
||||
return tuple(sorted((key, _freeze(val)) for key, val in value.items()))
|
||||
if isinstance(value, list):
|
||||
return tuple(_freeze(item) for item in value)
|
||||
return value
|
||||
|
||||
|
||||
def _capabilities_for(role: str, config: dict[str, Any]) -> dict[str, bool]:
|
||||
return {
|
||||
"chat": role == "chat",
|
||||
"vision": role == "vision" or bool(config.get("supports_image_input")),
|
||||
"image_gen": role == "image_gen",
|
||||
"embedding": False,
|
||||
"tools": bool(config.get("supports_tools", False)),
|
||||
}
|
||||
|
||||
|
||||
def _catalog_metadata(config: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"billing_tier": config.get("billing_tier", "free"),
|
||||
"quota_reserve_tokens": config.get("quota_reserve_tokens"),
|
||||
"rpm": config.get("rpm"),
|
||||
"tpm": config.get("tpm"),
|
||||
"anonymous_enabled": config.get("anonymous_enabled", False),
|
||||
"seo_enabled": config.get("seo_enabled", False),
|
||||
"seo_slug": config.get("seo_slug"),
|
||||
"input_cost_per_token": (config.get("litellm_params") or {}).get(
|
||||
"input_cost_per_token"
|
||||
)
|
||||
if isinstance(config.get("litellm_params"), dict)
|
||||
else None,
|
||||
"output_cost_per_token": (config.get("litellm_params") or {}).get(
|
||||
"output_cost_per_token"
|
||||
)
|
||||
if isinstance(config.get("litellm_params"), dict)
|
||||
else None,
|
||||
"is_planner": config.get("is_planner", False),
|
||||
"base_model": _base_model(config),
|
||||
"router_pool_eligible": config.get("router_pool_eligible", True),
|
||||
}
|
||||
|
||||
|
||||
def materialize_global_model_catalog(
|
||||
*,
|
||||
chat_configs: list[dict[str, Any]],
|
||||
vision_configs: list[dict[str, Any]],
|
||||
image_configs: list[dict[str, Any]],
|
||||
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
||||
connections: list[dict[str, Any]] = []
|
||||
models: list[dict[str, Any]] = []
|
||||
connection_id_by_key: dict[tuple[Any, ...], int] = {}
|
||||
next_connection_id = -1
|
||||
|
||||
def add_config(config: dict[str, Any], role: str) -> None:
|
||||
nonlocal next_connection_id
|
||||
if not config.get("id") or not config.get("model_name"):
|
||||
return
|
||||
conn = native_connection_from_config(config)
|
||||
conn["scope"] = "GLOBAL"
|
||||
conn["enabled"] = True
|
||||
conn["last_status"] = "OK"
|
||||
key = _connection_key(conn)
|
||||
connection_id = connection_id_by_key.get(key)
|
||||
if connection_id is None:
|
||||
connection_id = next_connection_id
|
||||
next_connection_id -= 1
|
||||
connection_id_by_key[key] = connection_id
|
||||
connections.append(
|
||||
{
|
||||
"id": connection_id,
|
||||
**conn,
|
||||
}
|
||||
)
|
||||
|
||||
model_id = int(config["id"])
|
||||
models.append(
|
||||
{
|
||||
"id": model_id,
|
||||
"connection_id": connection_id,
|
||||
"model_id": config["model_name"],
|
||||
"display_name": config.get("name") or config["model_name"],
|
||||
"source": "MANUAL",
|
||||
"capabilities": _capabilities_for(role, config),
|
||||
"capabilities_declared": _capabilities_for(role, config),
|
||||
"capabilities_verified": _capabilities_for(role, config),
|
||||
"capabilities_override": {},
|
||||
"embedding_dimension": None,
|
||||
"enabled": True,
|
||||
"billing_tier": config.get("billing_tier", "free"),
|
||||
"catalog": _catalog_metadata(config),
|
||||
"role": role,
|
||||
}
|
||||
)
|
||||
|
||||
for cfg in chat_configs:
|
||||
if cfg.get("is_auto_mode"):
|
||||
continue
|
||||
add_config(cfg, "chat")
|
||||
for cfg in vision_configs:
|
||||
if cfg.get("is_auto_mode"):
|
||||
continue
|
||||
add_config(cfg, "vision")
|
||||
for cfg in image_configs:
|
||||
if cfg.get("is_auto_mode"):
|
||||
continue
|
||||
add_config(cfg, "image_gen")
|
||||
|
||||
# Each virtual connection is server-only. Callers that serialize these
|
||||
# must strip api_key before returning data to clients.
|
||||
return connections, models
|
||||
|
||||
|
||||
__all__ = ["materialize_global_model_catalog"]
|
||||
152
surfsense_backend/app/services/model_resolver.py
Normal file
152
surfsense_backend/app/services/model_resolver.py
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
"""Single model-to-LiteLLM resolver.
|
||||
|
||||
All chat, vision, image-generation, validation, and Auto routing paths should
|
||||
turn a Connection + Model into LiteLLM input through this module.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from app.services.provider_api_base import resolve_api_base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.db import Connection
|
||||
|
||||
PROTOCOL_OLLAMA = "OLLAMA"
|
||||
PROTOCOL_OPENAI_COMPATIBLE = "OPENAI_COMPATIBLE"
|
||||
PROTOCOL_NATIVE = "NATIVE"
|
||||
|
||||
NATIVE_PROVIDER_PREFIX: dict[str, str] = {
|
||||
"OPENAI": "openai",
|
||||
"ANTHROPIC": "anthropic",
|
||||
"GROQ": "groq",
|
||||
"COHERE": "cohere",
|
||||
"GOOGLE": "gemini",
|
||||
"MISTRAL": "mistral",
|
||||
"AZURE_OPENAI": "azure",
|
||||
"AZURE": "azure",
|
||||
"OPENROUTER": "openrouter",
|
||||
"COMETAPI": "cometapi",
|
||||
"XAI": "xai",
|
||||
"BEDROCK": "bedrock",
|
||||
"AWS_BEDROCK": "bedrock",
|
||||
"VERTEX_AI": "vertex_ai",
|
||||
"TOGETHER_AI": "together_ai",
|
||||
"FIREWORKS_AI": "fireworks_ai",
|
||||
"DEEPSEEK": "openai",
|
||||
"ALIBABA_QWEN": "openai",
|
||||
"MOONSHOT": "openai",
|
||||
"ZHIPU": "openai",
|
||||
"GITHUB_MODELS": "github",
|
||||
"REPLICATE": "replicate",
|
||||
"PERPLEXITY": "perplexity",
|
||||
"ANYSCALE": "anyscale",
|
||||
"DEEPINFRA": "deepinfra",
|
||||
"CEREBRAS": "cerebras",
|
||||
"SAMBANOVA": "sambanova",
|
||||
"AI21": "ai21",
|
||||
"CLOUDFLARE": "cloudflare",
|
||||
"DATABRICKS": "databricks",
|
||||
"HUGGINGFACE": "huggingface",
|
||||
"MINIMAX": "openai",
|
||||
"RECRAFT": "recraft",
|
||||
"XINFERENCE": "xinference",
|
||||
"NSCALE": "nscale",
|
||||
"CUSTOM": "custom",
|
||||
}
|
||||
|
||||
|
||||
def ensure_v1(base_url: str | None) -> str | None:
|
||||
if not base_url:
|
||||
return None
|
||||
stripped = base_url.rstrip("/")
|
||||
if stripped.endswith("/v1"):
|
||||
return stripped
|
||||
return f"{stripped}/v1"
|
||||
|
||||
|
||||
def _conn_value(conn: Connection | Mapping[str, Any], key: str) -> Any:
|
||||
if isinstance(conn, Mapping):
|
||||
return conn.get(key)
|
||||
return getattr(conn, key)
|
||||
|
||||
|
||||
def _protocol_value(protocol: Any) -> str:
|
||||
return getattr(protocol, "value", str(protocol))
|
||||
|
||||
|
||||
def to_litellm(
|
||||
conn: Connection | Mapping[str, Any],
|
||||
model_id: str,
|
||||
) -> tuple[str, dict[str, Any]]:
|
||||
"""Return ``(model_string, litellm_kwargs)`` for any model role."""
|
||||
protocol = _protocol_value(_conn_value(conn, "protocol"))
|
||||
base_url = _conn_value(conn, "base_url")
|
||||
api_key = _conn_value(conn, "api_key")
|
||||
native_provider = _conn_value(conn, "native_provider")
|
||||
extra = _conn_value(conn, "extra") or {}
|
||||
|
||||
kwargs: dict[str, Any] = {}
|
||||
if api_key:
|
||||
kwargs["api_key"] = api_key
|
||||
|
||||
if protocol == PROTOCOL_OLLAMA:
|
||||
model_string = f"ollama_chat/{model_id}"
|
||||
if base_url:
|
||||
kwargs["api_base"] = base_url.rstrip("/")
|
||||
elif protocol == PROTOCOL_OPENAI_COMPATIBLE:
|
||||
model_string = f"openai/{model_id}"
|
||||
api_base = ensure_v1(base_url)
|
||||
if api_base:
|
||||
kwargs["api_base"] = api_base
|
||||
else:
|
||||
provider_key = (native_provider or "").upper()
|
||||
prefix = NATIVE_PROVIDER_PREFIX.get(provider_key, provider_key.lower())
|
||||
if prefix == "custom":
|
||||
custom_provider = extra.get("custom_provider") or native_provider
|
||||
model_string = f"{custom_provider}/{model_id}" if custom_provider else model_id
|
||||
else:
|
||||
model_string = f"{prefix}/{model_id}"
|
||||
|
||||
api_base = resolve_api_base(
|
||||
provider=provider_key,
|
||||
provider_prefix=prefix,
|
||||
config_api_base=base_url,
|
||||
)
|
||||
if api_base:
|
||||
kwargs["api_base"] = api_base
|
||||
|
||||
if api_version := extra.get("api_version"):
|
||||
kwargs["api_version"] = api_version
|
||||
kwargs.update(extra.get("litellm_params", {}))
|
||||
kwargs.update(extra.get("kwargs", {}))
|
||||
return model_string, kwargs
|
||||
|
||||
|
||||
def native_connection_from_config(config: Mapping[str, Any]) -> dict[str, Any]:
|
||||
"""Build an in-memory NATIVE connection mapping from a legacy/global config."""
|
||||
provider = str(config.get("provider") or config.get("custom_provider") or "CUSTOM")
|
||||
extra: dict[str, Any] = {
|
||||
"litellm_params": config.get("litellm_params") or {},
|
||||
}
|
||||
if config.get("api_version"):
|
||||
extra["api_version"] = config.get("api_version")
|
||||
if config.get("custom_provider"):
|
||||
extra["custom_provider"] = config.get("custom_provider")
|
||||
return {
|
||||
"protocol": PROTOCOL_NATIVE,
|
||||
"native_provider": provider,
|
||||
"base_url": config.get("api_base") or None,
|
||||
"api_key": config.get("api_key") or None,
|
||||
"extra": extra,
|
||||
}
|
||||
|
||||
|
||||
__all__ = [
|
||||
"NATIVE_PROVIDER_PREFIX",
|
||||
"ensure_v1",
|
||||
"native_connection_from_config",
|
||||
"to_litellm",
|
||||
]
|
||||
|
|
@ -46,6 +46,8 @@ from collections.abc import Iterable
|
|||
|
||||
import litellm
|
||||
|
||||
from app.services.model_resolver import NATIVE_PROVIDER_PREFIX
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
|
@ -58,40 +60,7 @@ logger = logging.getLogger(__name__)
|
|||
# map there directly would re-introduce the
|
||||
# ``app.config -> ... -> deliverables/tools/generate_image ->
|
||||
# app.config`` cycle that prompted the move.
|
||||
_PROVIDER_PREFIX_MAP: dict[str, str] = {
|
||||
"OPENAI": "openai",
|
||||
"ANTHROPIC": "anthropic",
|
||||
"GROQ": "groq",
|
||||
"COHERE": "cohere",
|
||||
"GOOGLE": "gemini",
|
||||
"OLLAMA": "ollama_chat",
|
||||
"MISTRAL": "mistral",
|
||||
"AZURE_OPENAI": "azure",
|
||||
"OPENROUTER": "openrouter",
|
||||
"XAI": "xai",
|
||||
"BEDROCK": "bedrock",
|
||||
"VERTEX_AI": "vertex_ai",
|
||||
"TOGETHER_AI": "together_ai",
|
||||
"FIREWORKS_AI": "fireworks_ai",
|
||||
"DEEPSEEK": "openai",
|
||||
"ALIBABA_QWEN": "openai",
|
||||
"MOONSHOT": "openai",
|
||||
"ZHIPU": "openai",
|
||||
"GITHUB_MODELS": "github",
|
||||
"REPLICATE": "replicate",
|
||||
"PERPLEXITY": "perplexity",
|
||||
"ANYSCALE": "anyscale",
|
||||
"DEEPINFRA": "deepinfra",
|
||||
"CEREBRAS": "cerebras",
|
||||
"SAMBANOVA": "sambanova",
|
||||
"AI21": "ai21",
|
||||
"CLOUDFLARE": "cloudflare",
|
||||
"DATABRICKS": "databricks",
|
||||
"COMETAPI": "cometapi",
|
||||
"HUGGINGFACE": "huggingface",
|
||||
"MINIMAX": "openai",
|
||||
"CUSTOM": "custom",
|
||||
}
|
||||
_PROVIDER_PREFIX_MAP = NATIVE_PROVIDER_PREFIX
|
||||
|
||||
|
||||
def _candidate_model_strings(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue