feat(database-migrations): add migration to remove legacy model config tables and remove stale model connection code

This commit is contained in:
Anish Sarkar 2026-06-13 12:45:43 +05:30
parent 50668775f8
commit bd4a04f2e7
93 changed files with 956 additions and 11442 deletions

View file

@ -215,7 +215,7 @@ def create_generate_image_tool(
prompt=prompt,
model=getattr(response, "_hidden_params", {}).get("model"),
n=n,
image_generation_config_id=config_id,
image_gen_model_id=config_id,
response_data=response_dict,
search_space_id=search_space_id,
access_token=access_token,

View file

@ -24,8 +24,6 @@ from langchain_core.messages import AIMessage, BaseMessage
from langchain_core.outputs import ChatGenerationChunk, ChatResult
from langchain_litellm import ChatLiteLLM
from litellm import get_model_info
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.chat.runtime.prompt_caching import (
apply_litellm_prompt_caching,
@ -34,7 +32,6 @@ from app.services.llm_router_service import (
AUTO_MODE_ID,
ChatLiteLLMRouter,
_sanitize_content,
is_auto_mode,
)
@ -130,7 +127,7 @@ class AgentConfig:
"""
Complete configuration for the SurfSense agent.
This combines LLM settings with prompt configuration from NewLLMConfig.
This combines resolved model settings with prompt configuration.
Supports Auto mode metadata (ID 0). Runtime callers must resolve Auto to
a concrete global or BYOK model before constructing ChatLiteLLM.
"""
@ -180,7 +177,7 @@ class AgentConfig:
use_default_system_instructions=True,
citations_enabled=True,
config_id=AUTO_MODE_ID,
config_name="Auto (Fastest)",
config_name="Auto",
is_auto_mode=True,
billing_tier="free",
is_premium=False,
@ -191,57 +188,12 @@ class AgentConfig:
supports_image_input=True,
)
@classmethod
def from_new_llm_config(cls, config) -> "AgentConfig":
"""Build an AgentConfig from a NewLLMConfig database model."""
# Lazy import: keeps provider_capabilities (and litellm) out of init order.
from app.services.provider_capabilities import derive_supports_image_input
provider_value = (
config.provider.value
if hasattr(config.provider, "value")
else str(config.provider)
)
litellm_params = config.litellm_params or {}
base_model = (
litellm_params.get("base_model")
if isinstance(litellm_params, dict)
else None
)
return cls(
provider=provider_value,
model_name=config.model_name,
api_key=config.api_key,
api_base=config.api_base,
custom_provider=config.custom_provider,
litellm_params=config.litellm_params,
system_instructions=config.system_instructions,
use_default_system_instructions=config.use_default_system_instructions,
citations_enabled=config.citations_enabled,
config_id=config.id,
config_name=config.name,
is_auto_mode=False,
billing_tier="free",
is_premium=False,
anonymous_enabled=False,
quota_reserve_tokens=None,
# BYOK rows have no curated flag; ask LiteLLM (default-allow on
# unknown). The streaming safety net still blocks explicit text-only.
supports_image_input=derive_supports_image_input(
provider=provider_value.lower(),
model_name=config.model_name,
base_model=base_model,
custom_provider=config.custom_provider,
),
)
@classmethod
def from_yaml_config(cls, yaml_config: dict) -> "AgentConfig":
"""Build an AgentConfig from a YAML configuration dictionary.
Supports the same prompt fields as NewLLMConfig (system_instructions,
use_default_system_instructions, citations_enabled).
Supports prompt fields such as system_instructions,
use_default_system_instructions, and citations_enabled.
"""
# Lazy import: keeps provider_capabilities (and litellm) out of init order.
from app.services.provider_capabilities import derive_supports_image_input
@ -334,82 +286,6 @@ def load_global_llm_config_by_id(llm_config_id: int) -> dict | None:
return load_llm_config_from_yaml(llm_config_id)
async def load_new_llm_config_from_db(
session: AsyncSession,
config_id: int,
) -> "AgentConfig | None":
"""Load a NewLLMConfig from the database by ID."""
from app.db import NewLLMConfig
try:
result = await session.execute(
select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
)
config = result.scalars().first()
if not config:
print(f"Error: NewLLMConfig with id {config_id} not found")
return None
return AgentConfig.from_new_llm_config(config)
except Exception as e:
print(f"Error loading NewLLMConfig from database: {e}")
return None
async def load_agent_llm_config_for_search_space(
session: AsyncSession,
search_space_id: int,
) -> "AgentConfig | None":
"""Load the chat model config for a search space via its agent_llm_id.
Positive id -> DB; negative -> YAML; None -> first global config (-1).
"""
from app.db import SearchSpace
try:
result = await session.execute(
select(SearchSpace).filter(SearchSpace.id == search_space_id)
)
search_space = result.scalars().first()
if not search_space:
print(f"Error: SearchSpace with id {search_space_id} not found")
return None
config_id = (
search_space.agent_llm_id if search_space.agent_llm_id is not None else -1
)
return await load_agent_config(session, config_id, search_space_id)
except Exception as e:
print(f"Error loading chat model config for search space {search_space_id}: {e}")
return None
async def load_agent_config(
session: AsyncSession,
config_id: int,
search_space_id: int | None = None,
) -> "AgentConfig | None":
"""Main config loader: id 0 -> Auto mode; negative -> YAML; positive -> DB."""
if is_auto_mode(config_id):
return AgentConfig.from_auto_mode()
if config_id < 0:
# In-memory covers static YAML + dynamic OpenRouter configs.
from app.config import config as app_config
for cfg in app_config.GLOBAL_LLM_CONFIGS:
if cfg.get("id") == config_id:
return AgentConfig.from_yaml_config(cfg)
yaml_config = load_llm_config_from_yaml(config_id)
if yaml_config:
return AgentConfig.from_yaml_config(yaml_config)
return None
else:
return await load_new_llm_config_from_db(session, config_id)
def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None:
"""Create a ChatLiteLLM instance from a global LLM config dictionary."""
if llm_config.get("custom_provider"):

View file

@ -2,11 +2,11 @@
Automations run unattended, so every run must be **billable**: it may only use
either a premium global model (``billing_tier == "premium"``) or a user-provided
BYOK model (a positive config id pointing at a per-user/per-space DB row). Free
BYOK model (a positive model id pointing at a per-user/per-space DB row). Free
global models and Auto mode are blocked, because Auto can dispatch to a free
deployment and free models aren't metered in premium credits.
Config id conventions (shared across chat / image / vision):
Model id conventions (shared across chat / image / vision):
- ``id == 0`` Auto mode (``AUTO_MODE_ID`` / ``IMAGE_GEN_AUTO_MODE_ID`` /
``VISION_AUTO_MODE_ID``). Blocked.
- ``id < 0`` global YAML/OpenRouter config. Allowed only if premium.
@ -82,7 +82,7 @@ def get_model_eligibility(
The ID-based core shared by both the search-space path (creation/eligibility)
and the captured-snapshot path (runtime backstop). Each violation is
``{"kind", "config_id", "reason"}``.
``{"kind", "model_id", "reason"}``.
"""
checks: list[tuple[ModelKind, int | None]] = [
("chat", chat_model_id),
@ -91,10 +91,10 @@ def get_model_eligibility(
]
violations: list[dict] = []
for kind, config_id in checks:
allowed, reason = _classify(kind, config_id)
for kind, model_id in checks:
allowed, reason = _classify(kind, model_id)
if not allowed:
violations.append({"kind": kind, "model_id": config_id, "reason": reason})
violations.append({"kind": kind, "model_id": model_id, "reason": reason})
return {"allowed": not violations, "violations": violations}

View file

@ -119,7 +119,7 @@ def load_global_llm_configs():
else:
seen_slugs[slug] = cfg.get("id", 0)
# Stamp Auto (Fastest) ranking metadata. YAML configs are always
# Stamp Auto ranking metadata. YAML configs are always
# Tier A — operator-curated, locked first when premium-eligible.
# The OpenRouter refresh tick later re-stamps health for any cfg
# whose provider == "openrouter" via _enrich_health.
@ -210,42 +210,6 @@ def load_global_image_gen_configs():
return []
def load_global_vision_llm_configs():
data = _global_config_data()
if not data:
return []
try:
configs = copy.deepcopy(data.get("global_vision_llm_configs", []) or [])
for cfg in configs:
if isinstance(cfg, dict):
cfg.setdefault("billing_tier", "free")
return configs
except Exception as e:
print(f"Warning: Failed to load global vision LLM configs: {e}")
return []
def load_vision_llm_router_settings():
default_settings = {
"routing_strategy": "usage-based-routing",
"num_retries": 3,
"allowed_fails": 3,
"cooldown_time": 60,
}
data = _global_config_data()
if not data:
return default_settings
try:
settings = data.get("vision_llm_router_settings", {})
return {**default_settings, **settings}
except Exception as e:
print(f"Warning: Failed to load vision LLM router settings: {e}")
return default_settings
def load_image_gen_router_settings():
"""
Load router settings for image generation Auto mode from YAML file.
@ -482,12 +446,6 @@ def initialize_image_gen_router():
print(f"Warning: Failed to initialize Image Generation Router: {e}")
def initialize_vision_llm_router():
# Retired: vision Auto now uses shared capability-filtered model selection
# over GLOBAL/BYOK chat models with supports_image_input=true.
return
class Config:
# Check if ffmpeg is installed
if not is_ffmpeg_installed():
@ -869,12 +827,6 @@ class Config:
# Router settings for Image Generation Auto mode
IMAGE_GEN_ROUTER_SETTINGS = load_image_gen_router_settings()
# Global Vision LLM Configurations (optional)
GLOBAL_VISION_LLM_CONFIGS = load_global_vision_llm_configs()
# Router settings for Vision LLM Auto mode
VISION_LLM_ROUTER_SETTINGS = load_vision_llm_router_settings()
# Virtual GLOBAL connection/model catalog. This is server-only metadata
# derived from global_llm_config.yaml; GLOBAL keys are not stored in DB.
from app.services.global_model_catalog import (

View file

@ -433,87 +433,11 @@ global_image_generation_configs:
# rpm: 30
# litellm_params: {}
# =============================================================================
# Vision LLM Configuration
# =============================================================================
# These configurations power the vision autocomplete feature (screenshot analysis).
# Only vision-capable models should be used here (e.g. GPT-4o, Gemini Pro, Claude 3).
# Supported providers: OpenAI, Anthropic, Google, Azure OpenAI, Vertex AI, Bedrock,
# xAI, OpenRouter, Ollama, Groq, Together AI, Fireworks AI, DeepSeek, Mistral, Custom
#
# Auto mode (ID 0) uses LiteLLM Router for load balancing across all vision configs.
# Router Settings for Vision LLM Auto Mode
vision_llm_router_settings:
routing_strategy: "usage-based-routing"
num_retries: 3
allowed_fails: 3
cooldown_time: 60
global_vision_llm_configs:
# Example: OpenAI GPT-4o (recommended for vision)
- id: -1001
name: "Global GPT-4o Vision"
description: "OpenAI's GPT-4o with strong vision capabilities"
litellm_provider: "openai"
model_name: "gpt-4o"
api_key: "sk-your-openai-api-key-here"
api_base: "https://api.openai.com/v1"
rpm: 500
tpm: 100000
litellm_params:
temperature: 0.3
max_tokens: 1000
# Example: Google Gemini 2.0 Flash
- id: -1002
name: "Global Gemini 2.0 Flash"
description: "Google's fast vision model with large context"
litellm_provider: "gemini"
model_name: "gemini-2.0-flash"
api_key: "your-google-ai-api-key-here"
api_base: "https://generativelanguage.googleapis.com/v1beta"
rpm: 1000
tpm: 200000
litellm_params:
temperature: 0.3
max_tokens: 1000
# Example: Anthropic Claude 3.5 Sonnet
- id: -1003
name: "Global Claude 3.5 Sonnet Vision"
description: "Anthropic's Claude 3.5 Sonnet with vision support"
litellm_provider: "anthropic"
model_name: "claude-3-5-sonnet-20241022"
api_key: "sk-ant-your-anthropic-api-key-here"
api_base: "https://api.anthropic.com/v1"
rpm: 1000
tpm: 100000
litellm_params:
temperature: 0.3
max_tokens: 1000
# Example: Azure OpenAI GPT-4o
# - id: -1004
# name: "Global Azure GPT-4o Vision"
# description: "Azure-hosted GPT-4o for vision analysis"
# litellm_provider: "azure"
# model_name: "azure/gpt-4o-deployment"
# api_key: "your-azure-api-key-here"
# api_base: "https://your-resource.openai.azure.com"
# api_version: "2024-02-15-preview"
# rpm: 500
# tpm: 100000
# litellm_params:
# temperature: 0.3
# max_tokens: 1000
# base_model: "gpt-4o"
# Notes:
# - ID 0 is reserved for "Auto" mode - uses LiteLLM Router for load balancing
# - Use negative IDs to distinguish global models from BYOK/local DB models
# - IDs must be unique across chat, vision, and image generation configs
# - Suggested static ranges: chat -1..-999, vision -1001..-1999, image -2001..-2999
# - IDs must be unique across chat and image generation configs
# - Suggested static ranges: chat -1..-999, image -2001..-2999
# - The 'api_key' field will not be exposed to users via API
# - system_instructions: Custom prompt or empty string to use defaults
# - use_default_system_instructions: true = use SURFSENSE_SYSTEM_INSTRUCTIONS when system_instructions is empty

View file

@ -198,81 +198,6 @@ class DocumentStatus:
return None
class LiteLLMProvider(StrEnum):
"""
Enum for LLM providers supported by LiteLLM.
"""
OPENAI = "OPENAI"
ANTHROPIC = "ANTHROPIC"
GOOGLE = "GOOGLE"
AZURE_OPENAI = "AZURE_OPENAI"
BEDROCK = "BEDROCK"
VERTEX_AI = "VERTEX_AI"
GROQ = "GROQ"
COHERE = "COHERE"
MISTRAL = "MISTRAL"
DEEPSEEK = "DEEPSEEK"
XAI = "XAI"
OPENROUTER = "OPENROUTER"
TOGETHER_AI = "TOGETHER_AI"
FIREWORKS_AI = "FIREWORKS_AI"
REPLICATE = "REPLICATE"
PERPLEXITY = "PERPLEXITY"
OLLAMA = "OLLAMA"
ALIBABA_QWEN = "ALIBABA_QWEN"
MOONSHOT = "MOONSHOT"
ZHIPU = "ZHIPU"
ANYSCALE = "ANYSCALE"
DEEPINFRA = "DEEPINFRA"
CEREBRAS = "CEREBRAS"
SAMBANOVA = "SAMBANOVA"
AI21 = "AI21"
CLOUDFLARE = "CLOUDFLARE"
DATABRICKS = "DATABRICKS"
COMETAPI = "COMETAPI"
HUGGINGFACE = "HUGGINGFACE"
GITHUB_MODELS = "GITHUB_MODELS"
MINIMAX = "MINIMAX"
CUSTOM = "CUSTOM"
class ImageGenProvider(StrEnum):
"""
Enum for image generation providers supported by LiteLLM.
This is a subset of LLM providers only those that support image generation.
See: https://docs.litellm.ai/docs/image_generation#supported-providers
"""
OPENAI = "OPENAI"
AZURE_OPENAI = "AZURE_OPENAI"
GOOGLE = "GOOGLE" # Google AI Studio
VERTEX_AI = "VERTEX_AI"
BEDROCK = "BEDROCK" # AWS Bedrock
RECRAFT = "RECRAFT"
OPENROUTER = "OPENROUTER"
XINFERENCE = "XINFERENCE"
NSCALE = "NSCALE"
class VisionProvider(StrEnum):
OPENAI = "OPENAI"
ANTHROPIC = "ANTHROPIC"
GOOGLE = "GOOGLE"
AZURE_OPENAI = "AZURE_OPENAI"
VERTEX_AI = "VERTEX_AI"
BEDROCK = "BEDROCK"
XAI = "XAI"
OPENROUTER = "OPENROUTER"
OLLAMA = "OLLAMA"
GROQ = "GROQ"
TOGETHER_AI = "TOGETHER_AI"
FIREWORKS_AI = "FIREWORKS_AI"
DEEPSEEK = "DEEPSEEK"
MISTRAL = "MISTRAL"
CUSTOM = "CUSTOM"
class ConnectionScope(StrEnum):
GLOBAL = "GLOBAL"
SEARCH_SPACE = "SEARCH_SPACE"
@ -710,11 +635,11 @@ class NewChatThread(BaseModel, TimestampMixin):
default=False,
server_default="false",
)
# Auto (Fastest) model pin for this thread: concrete resolved global LLM
# Auto model pin for this thread: concrete resolved global LLM
# config id. NULL means no pin; Auto will resolve on the next turn.
# Single-writer invariant: only app.services.auto_model_pin_service sets
# or clears this column (plus bulk clears when a search space's
# agent_llm_id changes). Unindexed: all reads are by primary key.
# chat_model_id changes). Unindexed: all reads are by primary key.
pinned_llm_config_id = Column(Integer, nullable=True)
# Surface metadata for first-party SurfSense and external chat threads.
@ -1686,75 +1611,6 @@ class Model(BaseModel, TimestampMixin):
)
class ImageGenerationConfig(BaseModel, TimestampMixin):
"""
Dedicated configuration table for image generation models.
Separate from NewLLMConfig because image generation models don't need
system_instructions, citations_enabled, or use_default_system_instructions.
They only need provider credentials and model parameters.
"""
__tablename__ = "image_generation_configs"
name = Column(String(100), nullable=False, index=True)
description = Column(String(500), nullable=True)
# Provider & model (uses ImageGenProvider, NOT LiteLLMProvider)
provider = Column(SQLAlchemyEnum(ImageGenProvider), nullable=False)
custom_provider = Column(String(100), nullable=True)
model_name = Column(String(100), nullable=False)
# Credentials
api_key = Column(String, nullable=False)
api_base = Column(String(500), nullable=True)
api_version = Column(String(50), nullable=True) # Azure-specific
# Additional litellm parameters
litellm_params = Column(JSON, nullable=True, default={})
# Relationships
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship(
"SearchSpace", back_populates="image_generation_configs"
)
# User who created this config
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
)
user = relationship("User", back_populates="image_generation_configs")
class VisionLLMConfig(BaseModel, TimestampMixin):
__tablename__ = "vision_llm_configs"
name = Column(String(100), nullable=False, index=True)
description = Column(String(500), nullable=True)
provider = Column(SQLAlchemyEnum(VisionProvider), nullable=False)
custom_provider = Column(String(100), nullable=True)
model_name = Column(String(100), nullable=False)
api_key = Column(String, nullable=False)
api_base = Column(String(500), nullable=True)
api_version = Column(String(50), nullable=True)
litellm_params = Column(JSON, nullable=True, default={})
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship("SearchSpace", back_populates="vision_llm_configs")
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
)
user = relationship("User", back_populates="vision_llm_configs")
class ImageGeneration(BaseModel, TimestampMixin):
"""
Stores image generation requests and results using litellm.aimage_generation().
@ -1786,10 +1642,9 @@ class ImageGeneration(BaseModel, TimestampMixin):
style = Column(String(50), nullable=True) # Model-specific style parameter
response_format = Column(String(50), nullable=True) # "url" or "b64_json"
# Image generation config reference
# 0 = Auto mode (router), negative IDs = global configs from YAML,
# positive IDs = ImageGenerationConfig records in DB
image_generation_config_id = Column(Integer, nullable=True)
# Image generation model provenance.
# 0 = Auto mode, negative IDs = GLOBAL models, positive IDs = Model records.
image_gen_model_id = Column(Integer, nullable=True)
# Response data (full litellm response as JSONB) — present on success
response_data = Column(JSONB, nullable=True)
@ -1831,23 +1686,7 @@ class SearchSpace(BaseModel, TimestampMixin):
shared_memory_md = Column(Text, nullable=True, server_default="")
# Search space-level LLM preferences (shared by all members)
# Note: ID values:
# - 0: Auto mode (uses LiteLLM Router for load balancing) - default for new search spaces
# - Negative IDs: Global configs from YAML
# - Positive IDs: Custom configs from DB (NewLLMConfig table)
agent_llm_id = Column(
Integer, nullable=True, default=0
) # For chat operations, defaults to Auto mode
image_generation_config_id = Column(
Integer, nullable=True, default=0
) # For image generation, defaults to Auto mode
vision_llm_config_id = Column(
Integer, nullable=True, default=0
) # For vision/screenshot analysis, defaults to Auto mode
# New connection/model role bindings. These supersede the legacy config
# columns above without removing them in this PR.
# Connection/model role bindings.
# Note: ID values preserve the existing convention:
# - 0: Auto mode
# - Negative IDs: Global virtual models from global_llm_config.yaml
@ -1931,24 +1770,6 @@ class SearchSpace(BaseModel, TimestampMixin):
order_by="SearchSourceConnector.id",
cascade="all, delete-orphan",
)
new_llm_configs = relationship(
"NewLLMConfig",
back_populates="search_space",
order_by="NewLLMConfig.id",
cascade="all, delete-orphan",
)
image_generation_configs = relationship(
"ImageGenerationConfig",
back_populates="search_space",
order_by="ImageGenerationConfig.id",
cascade="all, delete-orphan",
)
vision_llm_configs = relationship(
"VisionLLMConfig",
back_populates="search_space",
order_by="VisionLLMConfig.id",
cascade="all, delete-orphan",
)
connections = relationship(
"Connection",
back_populates="search_space",
@ -2057,64 +1878,6 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
documents = relationship("Document", back_populates="connector")
class NewLLMConfig(BaseModel, TimestampMixin):
"""
New LLM configuration table that combines model settings with prompt configuration.
This table provides:
- LLM model configuration (provider, model_name, api_key, etc.)
- Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS)
- Citation toggle (enable/disable citation instructions)
Note: Tools instructions are built by get_tools_instructions(thread_visibility) (personal vs shared memory).
"""
__tablename__ = "new_llm_configs"
name = Column(String(100), nullable=False, index=True)
description = Column(String(500), nullable=True)
# === LLM Model Configuration (from original LLMConfig, excluding 'language') ===
# Provider from the enum
provider = Column(SQLAlchemyEnum(LiteLLMProvider), nullable=False)
# Custom provider name when provider is CUSTOM
custom_provider = Column(String(100), nullable=True)
# Just the model name without provider prefix
model_name = Column(String(100), nullable=False)
# API Key should be encrypted before storing
api_key = Column(String, nullable=False)
api_base = Column(String(500), nullable=True)
# For any other parameters that litellm supports
litellm_params = Column(JSON, nullable=True, default={})
# === Prompt Configuration ===
# Configurable system instructions (defaults to SURFSENSE_SYSTEM_INSTRUCTIONS)
# Users can customize this from the UI
system_instructions = Column(
Text,
nullable=False,
default="", # Empty string means use default SURFSENSE_SYSTEM_INSTRUCTIONS
)
# Whether to use the default system instructions when system_instructions is empty
use_default_system_instructions = Column(Boolean, nullable=False, default=True)
# Citation toggle - when enabled, SURFSENSE_CITATION_INSTRUCTIONS is injected
# When disabled, an anti-citation prompt is injected instead
citations_enabled = Column(Boolean, nullable=False, default=True)
# === Relationships ===
search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
search_space = relationship("SearchSpace", back_populates="new_llm_configs")
# User who created this config
user_id = Column(
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
)
user = relationship("User", back_populates="new_llm_configs")
class Log(BaseModel, TimestampMixin):
__tablename__ = "logs"
@ -2481,25 +2244,6 @@ if config.AUTH_TYPE == "GOOGLE":
passive_deletes=True,
)
# LLM configs created by this user
new_llm_configs = relationship(
"NewLLMConfig",
back_populates="user",
passive_deletes=True,
)
# Image generation configs created by this user
image_generation_configs = relationship(
"ImageGenerationConfig",
back_populates="user",
passive_deletes=True,
)
vision_llm_configs = relationship(
"VisionLLMConfig",
back_populates="user",
passive_deletes=True,
)
connections = relationship(
"Connection",
back_populates="user",
@ -2632,25 +2376,6 @@ else:
passive_deletes=True,
)
# LLM configs created by this user
new_llm_configs = relationship(
"NewLLMConfig",
back_populates="user",
passive_deletes=True,
)
# Image generation configs created by this user
image_generation_configs = relationship(
"ImageGenerationConfig",
back_populates="user",
passive_deletes=True,
)
vision_llm_configs = relationship(
"VisionLLMConfig",
back_populates="user",
passive_deletes=True,
)
connections = relationship(
"Connection",
back_populates="user",

View file

@ -82,7 +82,7 @@ def build_configurable_system_prompt(
*,
model_name: str | None = None,
) -> str:
"""Build a configurable SurfSense system prompt (NewLLMConfig path).
"""Build a configurable SurfSense system prompt.
See :func:`app.prompts.system_prompt_composer.composer.compose_system_prompt`
for full parameter docs.
@ -104,7 +104,7 @@ def build_configurable_system_prompt(
def get_default_system_instructions() -> str:
"""Return the default ``<system_instruction>`` block (no tools / citations).
Useful for populating the UI when seeding ``NewLLMConfig.system_instructions``.
Useful for populating the UI when editing custom system instructions.
The output reflects the current fragment tree, not a baked-in constant.
"""
resolved_today = datetime.now(UTC).date().isoformat()

View file

@ -348,8 +348,7 @@ def compose_system_prompt(
mcp_connector_tools: ``{server_name: [tool_names...]}`` to inject
an explicit MCP routing block.
custom_system_instructions: Free-form instructions that override
the default ``<system_instruction>`` block (legacy support
for ``NewLLMConfig.system_instructions``).
the default ``<system_instruction>`` block.
use_default_system_instructions: When ``custom_system_instructions``
is empty/None, fall back to defaults (legacy semantics).
citations_enabled: Include ``citations_on.md`` (true) or

View file

@ -47,7 +47,6 @@ from .model_connections_routes import router as model_connections_router
from .memory_routes import router as memory_router
from .model_list_routes import router as model_list_router
from .new_chat_routes import router as new_chat_router
from .new_llm_config_routes import router as new_llm_config_router
from .notes_routes import router as notes_router
from .notion_add_connector_route import router as notion_add_connector_router
from .obsidian_plugin_routes import router as obsidian_plugin_router
@ -64,7 +63,6 @@ from .stripe_routes import router as stripe_router
from .team_memory_routes import router as team_memory_router
from .teams_add_connector_route import router as teams_add_connector_router
from .video_presentations_routes import router as video_presentations_router
from .vision_llm_routes import router as vision_llm_router
from .youtube_routes import router as youtube_router
router = APIRouter()
@ -99,7 +97,6 @@ router.include_router(
) # Video presentation status and streaming
router.include_router(reports_router) # Report CRUD and multi-format export
router.include_router(image_generation_router) # Image generation via litellm
router.include_router(vision_llm_router) # Vision LLM configs for screenshot analysis
router.include_router(search_source_connectors_router)
router.include_router(google_calendar_add_connector_router)
router.include_router(google_gmail_add_connector_router)
@ -117,7 +114,6 @@ router.include_router(jira_add_connector_router)
router.include_router(confluence_add_connector_router)
router.include_router(clickup_add_connector_router)
router.include_router(dropbox_add_connector_router)
router.include_router(new_llm_config_router) # LLM configs with prompt configuration
router.include_router(model_connections_router) # Connection-centric model catalog
router.include_router(model_list_router) # Dynamic model catalogue from OpenRouter
router.include_router(logs_router)

View file

@ -1,7 +1,5 @@
"""
Image Generation routes:
- CRUD for ImageGenerationConfig (user-created image model configs)
- Global image gen configs endpoint (from YAML)
- Image generation execution (calls litellm.aimage_generation())
- CRUD for ImageGeneration records (results)
- Image serving endpoint (serves b64_json images from DB, protected by signed tokens)
@ -21,7 +19,6 @@ from sqlalchemy.orm import selectinload
from app.config import config
from app.db import (
ImageGeneration,
ImageGenerationConfig,
Model,
Permission,
SearchSpace,
@ -30,14 +27,14 @@ from app.db import (
get_async_session,
)
from app.schemas import (
GlobalImageGenConfigRead,
ImageGenerationConfigCreate,
ImageGenerationConfigRead,
ImageGenerationConfigUpdate,
ImageGenerationCreate,
ImageGenerationListRead,
ImageGenerationRead,
)
from app.services.auto_model_pin_service import (
auto_model_candidates,
choose_auto_model_candidate,
)
from app.services.billable_calls import (
DEFAULT_IMAGE_RESERVE_MICROS,
QuotaInsufficientError,
@ -47,12 +44,8 @@ from app.services.image_gen_router_service import (
IMAGE_GEN_AUTO_MODE_ID,
is_image_gen_auto_mode,
)
from app.services.auto_model_pin_service import (
auto_model_candidates,
choose_auto_model_candidate,
)
from app.services.model_resolver import to_litellm
from app.services.model_capabilities import has_capability
from app.services.model_resolver import to_litellm
from app.users import current_active_user
from app.utils.rbac import check_permission
from app.utils.signed_image_urls import verify_image_token
@ -131,14 +124,14 @@ async def _execute_image_generation(
Call litellm.aimage_generation() with the appropriate config.
Resolution order:
1. Explicit image_generation_config_id on the request
2. Search space's image_generation_config_id preference
1. Explicit image_gen_model_id on the request
2. Search space's image_gen_model_id preference
3. Falls back to Auto mode if available
"""
config_id = image_gen.image_generation_config_id
config_id = image_gen.image_gen_model_id
if config_id is None:
config_id = search_space.image_gen_model_id or IMAGE_GEN_AUTO_MODE_ID
image_gen.image_generation_config_id = config_id
image_gen.image_gen_model_id = config_id
# Build kwargs
gen_kwargs = {}
@ -163,7 +156,7 @@ async def _execute_image_generation(
if not candidates:
raise ValueError("No image-generation models are available for Auto mode")
config_id = int(choose_auto_model_candidate(candidates, search_space.id)["id"])
image_gen.image_generation_config_id = config_id
image_gen.image_gen_model_id = config_id
if config_id < 0:
global_model = _get_global_model(config_id)
@ -228,266 +221,6 @@ async def _execute_image_generation(
image_gen.model = hidden["model"]
# =============================================================================
# Global Image Generation Configs (from YAML)
# =============================================================================
@router.get(
"/global-image-generation-configs",
response_model=list[GlobalImageGenConfigRead],
)
async def get_global_image_gen_configs(
user: User = Depends(current_active_user),
):
"""Get all global image generation configs. API keys are hidden."""
try:
global_configs = config.GLOBAL_IMAGE_GEN_CONFIGS
safe_configs = []
if global_configs and len(global_configs) > 0:
safe_configs.append(
{
"id": 0,
"name": "Auto (Fastest)",
"description": "Automatically routes across available image generation providers.",
"provider": "AUTO",
"custom_provider": None,
"model_name": "auto",
"api_base": None,
"api_version": None,
"litellm_params": {},
"is_global": True,
"is_auto_mode": True,
# Auto mode currently treated as free until per-deployment
# billing-tier surfacing lands (see _resolve_billing_for_image_gen).
"billing_tier": "free",
"is_premium": False,
}
)
for cfg in global_configs:
billing_tier = str(cfg.get("billing_tier", "free")).lower()
safe_configs.append(
{
"id": cfg.get("id"),
"name": cfg.get("name"),
"description": cfg.get("description"),
"provider": cfg.get("provider") or cfg.get("litellm_provider"),
"custom_provider": cfg.get("custom_provider"),
"model_name": cfg.get("model_name"),
"api_base": cfg.get("api_base") or None,
"api_version": cfg.get("api_version") or None,
"litellm_params": cfg.get("litellm_params", {}),
"is_global": True,
"billing_tier": billing_tier,
# Mirror chat (``new_llm_config_routes``) so the new-chat
# selector's premium badge logic keys off the same
# field across chat / image / vision tabs.
"is_premium": billing_tier == "premium",
"quota_reserve_micros": cfg.get("quota_reserve_micros"),
}
)
return safe_configs
except Exception as e:
logger.exception("Failed to fetch global image generation configs")
raise HTTPException(
status_code=500, detail=f"Failed to fetch configs: {e!s}"
) from e
# =============================================================================
# ImageGenerationConfig CRUD
# =============================================================================
@router.post("/image-generation-configs", response_model=ImageGenerationConfigRead)
async def create_image_gen_config(
config_data: ImageGenerationConfigCreate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Create a new image generation config for a search space."""
try:
await check_permission(
session,
user,
config_data.search_space_id,
Permission.IMAGE_GENERATIONS_CREATE.value,
"You don't have permission to create image generation configs in this search space",
)
db_config = ImageGenerationConfig(**config_data.model_dump(), user_id=user.id)
session.add(db_config)
await session.commit()
await session.refresh(db_config)
return db_config
except HTTPException:
raise
except Exception as e:
await session.rollback()
logger.exception("Failed to create ImageGenerationConfig")
raise HTTPException(
status_code=500, detail=f"Failed to create config: {e!s}"
) from e
@router.get("/image-generation-configs", response_model=list[ImageGenerationConfigRead])
async def list_image_gen_configs(
search_space_id: int,
skip: int = 0,
limit: int = 100,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""List image generation configs for a search space."""
try:
await check_permission(
session,
user,
search_space_id,
Permission.IMAGE_GENERATIONS_READ.value,
"You don't have permission to view image generation configs in this search space",
)
result = await session.execute(
select(ImageGenerationConfig)
.filter(ImageGenerationConfig.search_space_id == search_space_id)
.order_by(ImageGenerationConfig.created_at.desc())
.offset(skip)
.limit(limit)
)
return result.scalars().all()
except HTTPException:
raise
except Exception as e:
logger.exception("Failed to list ImageGenerationConfigs")
raise HTTPException(
status_code=500, detail=f"Failed to fetch configs: {e!s}"
) from e
@router.get(
"/image-generation-configs/{config_id}", response_model=ImageGenerationConfigRead
)
async def get_image_gen_config(
config_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Get a specific image generation config by ID."""
try:
result = await session.execute(
select(ImageGenerationConfig).filter(ImageGenerationConfig.id == config_id)
)
db_config = result.scalars().first()
if not db_config:
raise HTTPException(status_code=404, detail="Config not found")
await check_permission(
session,
user,
db_config.search_space_id,
Permission.IMAGE_GENERATIONS_READ.value,
"You don't have permission to view image generation configs in this search space",
)
return db_config
except HTTPException:
raise
except Exception as e:
logger.exception("Failed to get ImageGenerationConfig")
raise HTTPException(
status_code=500, detail=f"Failed to fetch config: {e!s}"
) from e
@router.put(
"/image-generation-configs/{config_id}", response_model=ImageGenerationConfigRead
)
async def update_image_gen_config(
config_id: int,
update_data: ImageGenerationConfigUpdate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Update an existing image generation config."""
try:
result = await session.execute(
select(ImageGenerationConfig).filter(ImageGenerationConfig.id == config_id)
)
db_config = result.scalars().first()
if not db_config:
raise HTTPException(status_code=404, detail="Config not found")
await check_permission(
session,
user,
db_config.search_space_id,
Permission.IMAGE_GENERATIONS_CREATE.value,
"You don't have permission to update image generation configs in this search space",
)
for key, value in update_data.model_dump(exclude_unset=True).items():
setattr(db_config, key, value)
await session.commit()
await session.refresh(db_config)
return db_config
except HTTPException:
raise
except Exception as e:
await session.rollback()
logger.exception("Failed to update ImageGenerationConfig")
raise HTTPException(
status_code=500, detail=f"Failed to update config: {e!s}"
) from e
@router.delete("/image-generation-configs/{config_id}", response_model=dict)
async def delete_image_gen_config(
config_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Delete an image generation config."""
try:
result = await session.execute(
select(ImageGenerationConfig).filter(ImageGenerationConfig.id == config_id)
)
db_config = result.scalars().first()
if not db_config:
raise HTTPException(status_code=404, detail="Config not found")
await check_permission(
session,
user,
db_config.search_space_id,
Permission.IMAGE_GENERATIONS_DELETE.value,
"You don't have permission to delete image generation configs in this search space",
)
await session.delete(db_config)
await session.commit()
return {
"message": "Image generation config deleted successfully",
"id": config_id,
}
except HTTPException:
raise
except Exception as e:
await session.rollback()
logger.exception("Failed to delete ImageGenerationConfig")
raise HTTPException(
status_code=500, detail=f"Failed to delete config: {e!s}"
) from e
# =============================================================================
# Image Generation Execution + Results CRUD
# =============================================================================
@ -536,7 +269,7 @@ async def create_image_generation(
raise HTTPException(status_code=404, detail="Search space not found")
billing_tier, base_model, reserve_micros = await _resolve_billing_for_image_gen(
session, data.image_generation_config_id, search_space
session, data.image_gen_model_id, search_space
)
# billable_call runs OUTSIDE the inner try/except so QuotaInsufficientError
@ -562,7 +295,7 @@ async def create_image_generation(
size=data.size,
style=data.style,
response_format=data.response_format,
image_generation_config_id=data.image_generation_config_id,
image_gen_model_id=data.image_gen_model_id,
search_space_id=data.search_space_id,
created_by_id=user.id,
)

View file

@ -11,6 +11,7 @@ from app.db import (
ConnectionScope,
Model,
ModelSource,
NewChatThread,
Permission,
SearchSpace,
User,
@ -708,12 +709,26 @@ async def update_model_roles(
search_space = await _get_search_space(session, search_space_id)
updates = data.model_dump(exclude_unset=True)
if "chat_model_id" in updates:
search_space.chat_model_id = await _validate_role_model_id(
previous_chat_model_id = search_space.chat_model_id
next_chat_model_id = await _validate_role_model_id(
session,
search_space_id=search_space_id,
model_id=updates["chat_model_id"],
capability="chat",
)
search_space.chat_model_id = next_chat_model_id
if next_chat_model_id != previous_chat_model_id:
await session.execute(
update(NewChatThread)
.where(NewChatThread.search_space_id == search_space_id)
.values(pinned_llm_config_id=None)
)
logger.info(
"Cleared auto model pins for search_space_id=%s after chat_model_id change (%s -> %s)",
search_space_id,
previous_chat_model_id,
next_chat_model_id,
)
if "vision_model_id" in updates:
search_space.vision_model_id = await _validate_role_model_id(
session,

View file

@ -1,480 +0,0 @@
"""
API routes for NewLLMConfig CRUD operations.
NewLLMConfig combines model settings with prompt configuration:
- LLM provider, model, API key, etc.
- Configurable system instructions
- Citation toggle
"""
import logging
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from app.config import config
from app.db import (
NewLLMConfig,
Permission,
User,
get_async_session,
)
from app.prompts.default_system_instructions import get_default_system_instructions
from app.schemas import (
DefaultSystemInstructionsResponse,
GlobalNewLLMConfigRead,
NewLLMConfigCreate,
NewLLMConfigRead,
NewLLMConfigUpdate,
)
from app.services.llm_service import validate_llm_config
from app.services.provider_capabilities import derive_supports_image_input
from app.users import current_active_user
from app.utils.rbac import check_permission
router = APIRouter()
logger = logging.getLogger(__name__)
def _serialize_byok_config(config: NewLLMConfig) -> NewLLMConfigRead:
"""Augment a BYOK chat config row with the derived ``supports_image_input``.
There is no DB column for ``supports_image_input`` the value is
resolved at the API boundary from LiteLLM's authoritative model map
(default-allow on unknown). Returning ``NewLLMConfigRead`` here keeps
the response shape consistent across list / detail / create / update
endpoints without having to remember to set the field at every call
site.
"""
provider_value = (
config.provider.value
if hasattr(config.provider, "value")
else str(config.provider)
)
litellm_params = config.litellm_params or {}
base_model = (
litellm_params.get("base_model") if isinstance(litellm_params, dict) else None
)
supports_image_input = derive_supports_image_input(
provider=provider_value.lower(),
model_name=config.model_name,
base_model=base_model,
custom_provider=config.custom_provider,
)
# ``model_validate`` runs the Pydantic conversion using the ORM
# attribute access path enabled by ``ConfigDict(from_attributes=True)``,
# then we layer the derived field on. ``model_copy(update=...)`` keeps
# the surface immutable from the caller's perspective.
base_read = NewLLMConfigRead.model_validate(config)
return base_read.model_copy(update={"supports_image_input": supports_image_input})
# =============================================================================
# Global Configs Routes
# =============================================================================
@router.get("/global-new-llm-configs", response_model=list[GlobalNewLLMConfigRead])
async def get_global_new_llm_configs(
user: User = Depends(current_active_user),
):
"""
Get all available global NewLLMConfig configurations.
These are pre-configured by the system administrator and available to all users.
API keys are not exposed through this endpoint.
Includes:
- Auto mode (ID 0): Uses LiteLLM Router for automatic load balancing
- Global configs (negative IDs): Individual pre-configured LLM providers
"""
try:
global_configs = config.GLOBAL_LLM_CONFIGS
safe_configs = []
# Only include Auto mode if there are actual global configs to route to
# Auto mode requires at least one global config with valid API key
if global_configs and len(global_configs) > 0:
safe_configs.append(
{
"id": 0,
"name": "Auto (Fastest)",
"description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling. Recommended for most users.",
"provider": "AUTO",
"custom_provider": None,
"model_name": "auto",
"api_base": None,
"litellm_params": {},
"system_instructions": "",
"use_default_system_instructions": True,
"citations_enabled": True,
"is_global": True,
"is_auto_mode": True,
"billing_tier": "free",
"is_premium": False,
"anonymous_enabled": False,
"seo_enabled": False,
"seo_slug": None,
"seo_title": None,
"seo_description": None,
"quota_reserve_tokens": None,
# Auto routes across the configured pool, which usually
# includes at least one vision-capable deployment, so
# treat Auto as image-capable. The router itself will
# still pick a vision-capable deployment for messages
# carrying image_url blocks (LiteLLM Router falls back
# on ``404`` per its ``allowed_fails`` policy).
"supports_image_input": True,
}
)
# Add individual global configs
for cfg in global_configs:
# Capability resolution: explicit value (YAML override or OR
# `_supports_image_input(model)` payload baked in by the
# OpenRouter integration service) wins. Fall back to the
# LiteLLM-driven helper which default-allows on unknown so
# we don't hide vision-capable models that happen to lack a
# YAML annotation. The streaming task safety net is the
# only place a False ever blocks.
if "supports_image_input" in cfg:
supports_image_input = bool(cfg.get("supports_image_input"))
else:
cfg_litellm_params = cfg.get("litellm_params") or {}
cfg_base_model = (
cfg_litellm_params.get("base_model")
if isinstance(cfg_litellm_params, dict)
else None
)
supports_image_input = derive_supports_image_input(
provider=cfg.get("provider") or cfg.get("litellm_provider"),
model_name=cfg.get("model_name"),
base_model=cfg_base_model,
custom_provider=cfg.get("custom_provider"),
)
safe_config = {
"id": cfg.get("id"),
"name": cfg.get("name"),
"description": cfg.get("description"),
"provider": cfg.get("provider") or cfg.get("litellm_provider"),
"custom_provider": cfg.get("custom_provider"),
"model_name": cfg.get("model_name"),
"api_base": cfg.get("api_base") or None,
"litellm_params": cfg.get("litellm_params", {}),
# New prompt configuration fields
"system_instructions": cfg.get("system_instructions", ""),
"use_default_system_instructions": cfg.get(
"use_default_system_instructions", True
),
"citations_enabled": cfg.get("citations_enabled", True),
"is_global": True,
"billing_tier": cfg.get("billing_tier", "free"),
"is_premium": cfg.get("billing_tier", "free") == "premium",
"anonymous_enabled": cfg.get("anonymous_enabled", False),
"seo_enabled": cfg.get("seo_enabled", False),
"seo_slug": cfg.get("seo_slug"),
"seo_title": cfg.get("seo_title"),
"seo_description": cfg.get("seo_description"),
"quota_reserve_tokens": cfg.get("quota_reserve_tokens"),
"supports_image_input": supports_image_input,
}
safe_configs.append(safe_config)
return safe_configs
except Exception as e:
logger.exception("Failed to fetch global NewLLMConfigs")
raise HTTPException(
status_code=500, detail=f"Failed to fetch global configurations: {e!s}"
) from e
# =============================================================================
# CRUD Routes
# =============================================================================
@router.post("/new-llm-configs", response_model=NewLLMConfigRead)
async def create_new_llm_config(
config_data: NewLLMConfigCreate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Create a new NewLLMConfig for a search space.
Requires LLM_CONFIGS_CREATE permission.
"""
try:
# Verify user has permission
await check_permission(
session,
user,
config_data.search_space_id,
Permission.LLM_CONFIGS_CREATE.value,
"You don't have permission to create LLM configurations in this search space",
)
# Validate the LLM configuration by making a test API call
is_valid, error_message = await validate_llm_config(
provider=config_data.provider.value,
model_name=config_data.model_name,
api_key=config_data.api_key,
api_base=config_data.api_base,
custom_provider=config_data.custom_provider,
litellm_params=config_data.litellm_params,
)
if not is_valid:
raise HTTPException(
status_code=400,
detail=f"Invalid LLM configuration: {error_message}",
)
# Create the config with user association
db_config = NewLLMConfig(**config_data.model_dump(), user_id=user.id)
session.add(db_config)
await session.commit()
await session.refresh(db_config)
return _serialize_byok_config(db_config)
except HTTPException:
raise
except Exception as e:
await session.rollback()
logger.exception("Failed to create NewLLMConfig")
raise HTTPException(
status_code=500, detail=f"Failed to create configuration: {e!s}"
) from e
@router.get("/new-llm-configs", response_model=list[NewLLMConfigRead])
async def list_new_llm_configs(
search_space_id: int,
skip: int = 0,
limit: int = 100,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Get all NewLLMConfigs for a search space.
Requires LLM_CONFIGS_READ permission.
"""
try:
# Verify user has permission
await check_permission(
session,
user,
search_space_id,
Permission.LLM_CONFIGS_READ.value,
"You don't have permission to view LLM configurations in this search space",
)
result = await session.execute(
select(NewLLMConfig)
.filter(NewLLMConfig.search_space_id == search_space_id)
.order_by(NewLLMConfig.created_at.desc())
.offset(skip)
.limit(limit)
)
return [_serialize_byok_config(cfg) for cfg in result.scalars().all()]
except HTTPException:
raise
except Exception as e:
logger.exception("Failed to list NewLLMConfigs")
raise HTTPException(
status_code=500, detail=f"Failed to fetch configurations: {e!s}"
) from e
@router.get(
"/new-llm-configs/default-system-instructions",
response_model=DefaultSystemInstructionsResponse,
)
async def get_default_system_instructions_endpoint(
user: User = Depends(current_active_user),
):
"""
Get the default SURFSENSE_SYSTEM_INSTRUCTIONS template.
Useful for pre-populating the UI when creating a new configuration.
"""
return DefaultSystemInstructionsResponse(
default_system_instructions=get_default_system_instructions()
)
@router.get("/new-llm-configs/{config_id}", response_model=NewLLMConfigRead)
async def get_new_llm_config(
config_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Get a specific NewLLMConfig by ID.
Requires LLM_CONFIGS_READ permission.
"""
try:
result = await session.execute(
select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
)
config = result.scalars().first()
if not config:
raise HTTPException(status_code=404, detail="Configuration not found")
# Verify user has permission
await check_permission(
session,
user,
config.search_space_id,
Permission.LLM_CONFIGS_READ.value,
"You don't have permission to view LLM configurations in this search space",
)
return _serialize_byok_config(config)
except HTTPException:
raise
except Exception as e:
logger.exception("Failed to get NewLLMConfig")
raise HTTPException(
status_code=500, detail=f"Failed to fetch configuration: {e!s}"
) from e
@router.put("/new-llm-configs/{config_id}", response_model=NewLLMConfigRead)
async def update_new_llm_config(
config_id: int,
update_data: NewLLMConfigUpdate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Update an existing NewLLMConfig.
Requires LLM_CONFIGS_UPDATE permission.
"""
try:
result = await session.execute(
select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
)
config = result.scalars().first()
if not config:
raise HTTPException(status_code=404, detail="Configuration not found")
# Verify user has permission
await check_permission(
session,
user,
config.search_space_id,
Permission.LLM_CONFIGS_UPDATE.value,
"You don't have permission to update LLM configurations in this search space",
)
update_dict = update_data.model_dump(exclude_unset=True)
# If updating LLM settings, validate them
if any(
key in update_dict
for key in [
"provider",
"model_name",
"api_key",
"api_base",
"custom_provider",
"litellm_params",
]
):
# Build the validation config from existing + updates
validation_config = {
"provider": update_dict.get("provider", config.provider).value
if hasattr(update_dict.get("provider", config.provider), "value")
else update_dict.get("provider", config.provider.value),
"model_name": update_dict.get("model_name", config.model_name),
"api_key": update_dict.get("api_key", config.api_key),
"api_base": update_dict.get("api_base", config.api_base),
"custom_provider": update_dict.get(
"custom_provider", config.custom_provider
),
"litellm_params": update_dict.get(
"litellm_params", config.litellm_params
),
}
is_valid, error_message = await validate_llm_config(
provider=validation_config["provider"],
model_name=validation_config["model_name"],
api_key=validation_config["api_key"],
api_base=validation_config["api_base"],
custom_provider=validation_config["custom_provider"],
litellm_params=validation_config["litellm_params"],
)
if not is_valid:
raise HTTPException(
status_code=400,
detail=f"Invalid LLM configuration: {error_message}",
)
# Apply updates
for key, value in update_dict.items():
setattr(config, key, value)
await session.commit()
await session.refresh(config)
return _serialize_byok_config(config)
except HTTPException:
raise
except Exception as e:
await session.rollback()
logger.exception("Failed to update NewLLMConfig")
raise HTTPException(
status_code=500, detail=f"Failed to update configuration: {e!s}"
) from e
@router.delete("/new-llm-configs/{config_id}", response_model=dict)
async def delete_new_llm_config(
config_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Delete a NewLLMConfig.
Requires LLM_CONFIGS_DELETE permission.
"""
try:
result = await session.execute(
select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
)
config = result.scalars().first()
if not config:
raise HTTPException(status_code=404, detail="Configuration not found")
# Verify user has permission
await check_permission(
session,
user,
config.search_space_id,
Permission.LLM_CONFIGS_DELETE.value,
"You don't have permission to delete LLM configurations in this search space",
)
await session.delete(config)
await session.commit()
return {"message": "Configuration deleted successfully", "id": config_id}
except HTTPException:
raise
except Exception as e:
await session.rollback()
logger.exception("Failed to delete NewLLMConfig")
raise HTTPException(
status_code=500, detail=f"Failed to delete configuration: {e!s}"
) from e

View file

@ -1,27 +1,20 @@
import logging
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import func, update
from sqlalchemy import func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from app.config import config
from app.db import (
ImageGenerationConfig,
NewChatThread,
NewLLMConfig,
Permission,
SearchSpace,
SearchSpaceMembership,
SearchSpaceRole,
User,
VisionLLMConfig,
get_async_session,
get_default_roles_config,
)
from app.schemas import (
LLMPreferencesRead,
LLMPreferencesUpdate,
SearchSpaceCreate,
SearchSpaceRead,
SearchSpaceUpdate,
@ -377,357 +370,6 @@ async def delete_search_space(
) from e
# =============================================================================
# LLM Preferences Routes
# =============================================================================
async def _get_llm_config_by_id(
session: AsyncSession, config_id: int | None
) -> dict | None:
"""
Get an LLM config by ID as a dictionary. Returns database config for positive IDs,
global config for negative IDs, Auto mode config for ID 0, or None if ID is None.
"""
if config_id is None:
return None
# Auto mode (ID 0) - uses LiteLLM Router for load balancing
if config_id == 0:
return {
"id": 0,
"name": "Auto (Fastest)",
"description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling",
"provider": "AUTO",
"custom_provider": None,
"model_name": "auto",
"api_base": None,
"litellm_params": {},
"system_instructions": "",
"use_default_system_instructions": True,
"citations_enabled": True,
"is_global": True,
"is_auto_mode": True,
}
if config_id < 0:
# Global config - find from YAML
global_configs = config.GLOBAL_LLM_CONFIGS
for cfg in global_configs:
if cfg.get("id") == config_id:
return {
"id": cfg.get("id"),
"name": cfg.get("name"),
"description": cfg.get("description"),
"provider": cfg.get("provider") or cfg.get("litellm_provider"),
"custom_provider": cfg.get("custom_provider"),
"model_name": cfg.get("model_name"),
"api_base": cfg.get("api_base"),
"litellm_params": cfg.get("litellm_params", {}),
"system_instructions": cfg.get("system_instructions", ""),
"use_default_system_instructions": cfg.get(
"use_default_system_instructions", True
),
"citations_enabled": cfg.get("citations_enabled", True),
"is_global": True,
}
return None
else:
# Database config - convert to dict
result = await session.execute(
select(NewLLMConfig).filter(NewLLMConfig.id == config_id)
)
db_config = result.scalars().first()
if db_config:
return {
"id": db_config.id,
"name": db_config.name,
"description": db_config.description,
"provider": db_config.provider.value if db_config.provider else None,
"custom_provider": db_config.custom_provider,
"model_name": db_config.model_name,
"api_key": db_config.api_key,
"api_base": db_config.api_base,
"litellm_params": db_config.litellm_params or {},
"system_instructions": db_config.system_instructions or "",
"use_default_system_instructions": db_config.use_default_system_instructions,
"citations_enabled": db_config.citations_enabled,
"created_at": db_config.created_at.isoformat()
if db_config.created_at
else None,
"search_space_id": db_config.search_space_id,
}
return None
async def _get_image_gen_config_by_id(
session: AsyncSession, config_id: int | None
) -> dict | None:
"""
Get an image generation config by ID as a dictionary.
Returns Auto mode for ID 0, global config for negative IDs,
DB ImageGenerationConfig for positive IDs, or None.
"""
if config_id is None:
return None
if config_id == 0:
return {
"id": 0,
"name": "Auto (Fastest)",
"description": "Automatically routes requests across available image generation providers",
"provider": "AUTO",
"model_name": "auto",
"is_global": True,
"is_auto_mode": True,
"billing_tier": "free",
}
if config_id < 0:
for cfg in config.GLOBAL_IMAGE_GEN_CONFIGS:
if cfg.get("id") == config_id:
return {
"id": cfg.get("id"),
"name": cfg.get("name"),
"description": cfg.get("description"),
"provider": cfg.get("provider") or cfg.get("litellm_provider"),
"custom_provider": cfg.get("custom_provider"),
"model_name": cfg.get("model_name"),
"api_base": cfg.get("api_base") or None,
"api_version": cfg.get("api_version") or None,
"litellm_params": cfg.get("litellm_params", {}),
"is_global": True,
"billing_tier": cfg.get("billing_tier", "free"),
}
return None
# Positive ID: query ImageGenerationConfig table
result = await session.execute(
select(ImageGenerationConfig).filter(ImageGenerationConfig.id == config_id)
)
db_config = result.scalars().first()
if db_config:
return {
"id": db_config.id,
"name": db_config.name,
"description": db_config.description,
"provider": db_config.provider.value if db_config.provider else None,
"custom_provider": db_config.custom_provider,
"model_name": db_config.model_name,
"api_base": db_config.api_base,
"api_version": db_config.api_version,
"litellm_params": db_config.litellm_params or {},
"created_at": db_config.created_at.isoformat()
if db_config.created_at
else None,
"search_space_id": db_config.search_space_id,
}
return None
async def _get_vision_llm_config_by_id(
session: AsyncSession, config_id: int | None
) -> dict | None:
if config_id is None:
return None
if config_id == 0:
return {
"id": 0,
"name": "Auto (Fastest)",
"description": "Automatically routes requests across available vision LLM providers",
"provider": "AUTO",
"model_name": "auto",
"is_global": True,
"is_auto_mode": True,
"billing_tier": "free",
}
if config_id < 0:
for cfg in config.GLOBAL_VISION_LLM_CONFIGS:
if cfg.get("id") == config_id:
return {
"id": cfg.get("id"),
"name": cfg.get("name"),
"description": cfg.get("description"),
"provider": cfg.get("provider") or cfg.get("litellm_provider"),
"custom_provider": cfg.get("custom_provider"),
"model_name": cfg.get("model_name"),
"api_base": cfg.get("api_base") or None,
"api_version": cfg.get("api_version") or None,
"litellm_params": cfg.get("litellm_params", {}),
"is_global": True,
"billing_tier": cfg.get("billing_tier", "free"),
}
return None
result = await session.execute(
select(VisionLLMConfig).filter(VisionLLMConfig.id == config_id)
)
db_config = result.scalars().first()
if db_config:
return {
"id": db_config.id,
"name": db_config.name,
"description": db_config.description,
"provider": db_config.provider.value if db_config.provider else None,
"custom_provider": db_config.custom_provider,
"model_name": db_config.model_name,
"api_base": db_config.api_base,
"api_version": db_config.api_version,
"litellm_params": db_config.litellm_params or {},
"created_at": db_config.created_at.isoformat()
if db_config.created_at
else None,
"search_space_id": db_config.search_space_id,
}
return None
@router.get(
"/search-spaces/{search_space_id}/llm-preferences",
response_model=LLMPreferencesRead,
)
async def get_llm_preferences(
search_space_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Get LLM preferences (role assignments) for a search space.
Requires LLM_CONFIGS_READ permission.
"""
try:
# Check permission
await check_permission(
session,
user,
search_space_id,
Permission.LLM_CONFIGS_READ.value,
"You don't have permission to view LLM preferences",
)
result = await session.execute(
select(SearchSpace).filter(SearchSpace.id == search_space_id)
)
search_space = result.scalars().first()
if not search_space:
raise HTTPException(status_code=404, detail="Search space not found")
# Get full config objects for each role
agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
image_generation_config = await _get_image_gen_config_by_id(
session, search_space.image_generation_config_id
)
vision_llm_config = await _get_vision_llm_config_by_id(
session, search_space.vision_llm_config_id
)
return LLMPreferencesRead(
agent_llm_id=search_space.agent_llm_id,
image_generation_config_id=search_space.image_generation_config_id,
vision_llm_config_id=search_space.vision_llm_config_id,
agent_llm=agent_llm,
image_generation_config=image_generation_config,
vision_llm_config=vision_llm_config,
)
except HTTPException:
raise
except Exception as e:
logger.exception("Failed to get LLM preferences")
raise HTTPException(
status_code=500, detail=f"Failed to get LLM preferences: {e!s}"
) from e
@router.put(
"/search-spaces/{search_space_id}/llm-preferences",
response_model=LLMPreferencesRead,
)
async def update_llm_preferences(
search_space_id: int,
preferences: LLMPreferencesUpdate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Update LLM preferences (role assignments) for a search space.
Requires LLM_CONFIGS_UPDATE permission.
"""
try:
# Check permission
await check_permission(
session,
user,
search_space_id,
Permission.LLM_CONFIGS_UPDATE.value,
"You don't have permission to update LLM preferences",
)
result = await session.execute(
select(SearchSpace).filter(SearchSpace.id == search_space_id)
)
search_space = result.scalars().first()
if not search_space:
raise HTTPException(status_code=404, detail="Search space not found")
# Update preferences
update_data = preferences.model_dump(exclude_unset=True)
previous_agent_llm_id = search_space.agent_llm_id
for key, value in update_data.items():
setattr(search_space, key, value)
agent_llm_changed = (
"agent_llm_id" in update_data
and update_data["agent_llm_id"] != previous_agent_llm_id
)
if agent_llm_changed:
await session.execute(
update(NewChatThread)
.where(NewChatThread.search_space_id == search_space_id)
.values(pinned_llm_config_id=None)
)
logger.info(
"Cleared auto model pins for search_space_id=%s after agent_llm_id change (%s -> %s)",
search_space_id,
previous_agent_llm_id,
update_data["agent_llm_id"],
)
await session.commit()
await session.refresh(search_space)
# Get full config objects for response
agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
image_generation_config = await _get_image_gen_config_by_id(
session, search_space.image_generation_config_id
)
vision_llm_config = await _get_vision_llm_config_by_id(
session, search_space.vision_llm_config_id
)
return LLMPreferencesRead(
agent_llm_id=search_space.agent_llm_id,
image_generation_config_id=search_space.image_generation_config_id,
vision_llm_config_id=search_space.vision_llm_config_id,
agent_llm=agent_llm,
image_generation_config=image_generation_config,
vision_llm_config=vision_llm_config,
)
except HTTPException:
raise
except Exception as e:
await session.rollback()
logger.exception("Failed to update LLM preferences")
raise HTTPException(
status_code=500, detail=f"Failed to update LLM preferences: {e!s}"
) from e
@router.get("/searchspaces/{search_space_id}/snapshots")
async def list_search_space_snapshots(
search_space_id: int,

View file

@ -1,304 +0,0 @@
import logging
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.db import (
Permission,
User,
VisionLLMConfig,
get_async_session,
)
from app.schemas import (
GlobalVisionLLMConfigRead,
VisionLLMConfigCreate,
VisionLLMConfigRead,
VisionLLMConfigUpdate,
)
from app.services.vision_model_list_service import get_vision_model_list
from app.users import current_active_user
from app.utils.rbac import check_permission
router = APIRouter()
logger = logging.getLogger(__name__)
# =============================================================================
# Vision Model Catalogue (from OpenRouter, filtered for image-input models)
# =============================================================================
class VisionModelListItem(BaseModel):
value: str
label: str
provider: str
context_window: str | None = None
@router.get("/vision-models", response_model=list[VisionModelListItem])
async def list_vision_models(
user: User = Depends(current_active_user),
):
"""Return vision-capable models sourced from OpenRouter (filtered by image input)."""
try:
return await get_vision_model_list()
except Exception as e:
logger.exception("Failed to fetch vision model list")
raise HTTPException(
status_code=500, detail=f"Failed to fetch vision model list: {e!s}"
) from e
# =============================================================================
# Global Vision LLM Configs (from YAML)
# =============================================================================
@router.get(
"/global-vision-llm-configs",
response_model=list[GlobalVisionLLMConfigRead],
)
async def get_global_vision_llm_configs(
user: User = Depends(current_active_user),
):
try:
global_configs = config.GLOBAL_VISION_LLM_CONFIGS
safe_configs = []
if global_configs and len(global_configs) > 0:
safe_configs.append(
{
"id": 0,
"name": "Auto (Fastest)",
"description": "Automatically routes across available vision LLM providers.",
"provider": "AUTO",
"custom_provider": None,
"model_name": "auto",
"api_base": None,
"api_version": None,
"litellm_params": {},
"is_global": True,
"is_auto_mode": True,
# Auto mode treated as free until per-deployment billing-tier
# surfacing lands; see ``get_vision_llm`` for parity.
"billing_tier": "free",
"is_premium": False,
}
)
for cfg in global_configs:
billing_tier = str(cfg.get("billing_tier", "free")).lower()
safe_configs.append(
{
"id": cfg.get("id"),
"name": cfg.get("name"),
"description": cfg.get("description"),
"provider": cfg.get("provider") or cfg.get("litellm_provider"),
"custom_provider": cfg.get("custom_provider"),
"model_name": cfg.get("model_name"),
"api_base": cfg.get("api_base") or None,
"api_version": cfg.get("api_version") or None,
"litellm_params": cfg.get("litellm_params", {}),
"is_global": True,
"billing_tier": billing_tier,
# Mirror chat (``new_llm_config_routes``) so the new-chat
# selector's premium badge logic keys off the same
# field across chat / image / vision tabs.
"is_premium": billing_tier == "premium",
"quota_reserve_tokens": cfg.get("quota_reserve_tokens"),
"input_cost_per_token": cfg.get("input_cost_per_token"),
"output_cost_per_token": cfg.get("output_cost_per_token"),
}
)
return safe_configs
except Exception as e:
logger.exception("Failed to fetch global vision LLM configs")
raise HTTPException(
status_code=500, detail=f"Failed to fetch configs: {e!s}"
) from e
# =============================================================================
# VisionLLMConfig CRUD
# =============================================================================
@router.post("/vision-llm-configs", response_model=VisionLLMConfigRead)
async def create_vision_llm_config(
config_data: VisionLLMConfigCreate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
try:
await check_permission(
session,
user,
config_data.search_space_id,
Permission.VISION_CONFIGS_CREATE.value,
"You don't have permission to create vision LLM configs in this search space",
)
db_config = VisionLLMConfig(**config_data.model_dump(), user_id=user.id)
session.add(db_config)
await session.commit()
await session.refresh(db_config)
return db_config
except HTTPException:
raise
except Exception as e:
await session.rollback()
logger.exception("Failed to create VisionLLMConfig")
raise HTTPException(
status_code=500, detail=f"Failed to create config: {e!s}"
) from e
@router.get("/vision-llm-configs", response_model=list[VisionLLMConfigRead])
async def list_vision_llm_configs(
search_space_id: int,
skip: int = 0,
limit: int = 100,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
try:
await check_permission(
session,
user,
search_space_id,
Permission.VISION_CONFIGS_READ.value,
"You don't have permission to view vision LLM configs in this search space",
)
result = await session.execute(
select(VisionLLMConfig)
.filter(VisionLLMConfig.search_space_id == search_space_id)
.order_by(VisionLLMConfig.created_at.desc())
.offset(skip)
.limit(limit)
)
return result.scalars().all()
except HTTPException:
raise
except Exception as e:
logger.exception("Failed to list VisionLLMConfigs")
raise HTTPException(
status_code=500, detail=f"Failed to fetch configs: {e!s}"
) from e
@router.get("/vision-llm-configs/{config_id}", response_model=VisionLLMConfigRead)
async def get_vision_llm_config(
config_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
try:
result = await session.execute(
select(VisionLLMConfig).filter(VisionLLMConfig.id == config_id)
)
db_config = result.scalars().first()
if not db_config:
raise HTTPException(status_code=404, detail="Config not found")
await check_permission(
session,
user,
db_config.search_space_id,
Permission.VISION_CONFIGS_READ.value,
"You don't have permission to view vision LLM configs in this search space",
)
return db_config
except HTTPException:
raise
except Exception as e:
logger.exception("Failed to get VisionLLMConfig")
raise HTTPException(
status_code=500, detail=f"Failed to fetch config: {e!s}"
) from e
@router.put("/vision-llm-configs/{config_id}", response_model=VisionLLMConfigRead)
async def update_vision_llm_config(
config_id: int,
update_data: VisionLLMConfigUpdate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
try:
result = await session.execute(
select(VisionLLMConfig).filter(VisionLLMConfig.id == config_id)
)
db_config = result.scalars().first()
if not db_config:
raise HTTPException(status_code=404, detail="Config not found")
await check_permission(
session,
user,
db_config.search_space_id,
Permission.VISION_CONFIGS_CREATE.value,
"You don't have permission to update vision LLM configs in this search space",
)
for key, value in update_data.model_dump(exclude_unset=True).items():
setattr(db_config, key, value)
await session.commit()
await session.refresh(db_config)
return db_config
except HTTPException:
raise
except Exception as e:
await session.rollback()
logger.exception("Failed to update VisionLLMConfig")
raise HTTPException(
status_code=500, detail=f"Failed to update config: {e!s}"
) from e
@router.delete("/vision-llm-configs/{config_id}", response_model=dict)
async def delete_vision_llm_config(
config_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
try:
result = await session.execute(
select(VisionLLMConfig).filter(VisionLLMConfig.id == config_id)
)
db_config = result.scalars().first()
if not db_config:
raise HTTPException(status_code=404, detail="Config not found")
await check_permission(
session,
user,
db_config.search_space_id,
Permission.VISION_CONFIGS_DELETE.value,
"You don't have permission to delete vision LLM configs in this search space",
)
await session.delete(db_config)
await session.commit()
return {
"message": "Vision LLM config deleted successfully",
"id": config_id,
}
except HTTPException:
raise
except Exception as e:
await session.rollback()
logger.exception("Failed to delete VisionLLMConfig")
raise HTTPException(
status_code=500, detail=f"Failed to delete config: {e!s}"
) from e

View file

@ -34,11 +34,6 @@ from .folders import (
)
from .google_drive import DriveItem, GoogleDriveIndexingOptions, GoogleDriveIndexRequest
from .image_generation import (
GlobalImageGenConfigRead,
ImageGenerationConfigCreate,
ImageGenerationConfigPublic,
ImageGenerationConfigRead,
ImageGenerationConfigUpdate,
ImageGenerationCreate,
ImageGenerationListRead,
ImageGenerationRead,
@ -74,16 +69,6 @@ from .new_chat import (
ThreadListItem,
ThreadListResponse,
)
from .new_llm_config import (
DefaultSystemInstructionsResponse,
GlobalNewLLMConfigRead,
LLMPreferencesRead,
LLMPreferencesUpdate,
NewLLMConfigCreate,
NewLLMConfigPublic,
NewLLMConfigRead,
NewLLMConfigUpdate,
)
from .rbac_schemas import (
InviteAcceptRequest,
InviteAcceptResponse,
@ -142,14 +127,6 @@ from .video_presentations import (
VideoPresentationRead,
VideoPresentationUpdate,
)
from .vision_llm import (
GlobalVisionLLMConfigRead,
VisionLLMConfigCreate,
VisionLLMConfigPublic,
VisionLLMConfigRead,
VisionLLMConfigUpdate,
)
__all__ = [
# Folder schemas
"BulkDocumentMove",
@ -169,7 +146,6 @@ __all__ = [
"CreditPurchaseHistoryResponse",
"CreditPurchaseRead",
"CreditStripeStatusResponse",
"DefaultSystemInstructionsResponse",
# Document schemas
"DocumentBase",
"DocumentMove",
@ -192,19 +168,10 @@ __all__ = [
"FolderRead",
"FolderReorder",
"FolderUpdate",
"GlobalImageGenConfigRead",
"GlobalNewLLMConfigRead",
# Vision LLM Config schemas
"GlobalVisionLLMConfigRead",
"GoogleDriveIndexRequest",
"GoogleDriveIndexingOptions",
# Base schemas
"IDModel",
# Image Generation Config schemas
"ImageGenerationConfigCreate",
"ImageGenerationConfigPublic",
"ImageGenerationConfigRead",
"ImageGenerationConfigUpdate",
# Image Generation schemas
"ImageGenerationCreate",
"ImageGenerationListRead",
@ -216,9 +183,6 @@ __all__ = [
"InviteInfoResponse",
"InviteRead",
"InviteUpdate",
# LLM Preferences schemas
"LLMPreferencesRead",
"LLMPreferencesUpdate",
# Log schemas
"LogBase",
"LogCreate",
@ -255,11 +219,6 @@ __all__ = [
"NewChatThreadRead",
"NewChatThreadUpdate",
"NewChatThreadWithMessages",
# NewLLMConfig schemas
"NewLLMConfigCreate",
"NewLLMConfigPublic",
"NewLLMConfigRead",
"NewLLMConfigUpdate",
"PagePurchaseHistoryResponse",
"PagePurchaseRead",
"PaginatedResponse",
@ -303,8 +262,4 @@ __all__ = [
"VideoPresentationCreate",
"VideoPresentationRead",
"VideoPresentationUpdate",
"VisionLLMConfigCreate",
"VisionLLMConfigPublic",
"VisionLLMConfigRead",
"VisionLLMConfigUpdate",
]

View file

@ -1,109 +1,10 @@
"""
Pydantic schemas for Image Generation configs and generation requests.
"""Pydantic schemas for image generation requests/results."""
ImageGenerationConfig: CRUD schemas for user-created image gen model configs.
ImageGeneration: Schemas for the actual image generation requests/results.
GlobalImageGenConfigRead: Schema for admin-configured YAML configs.
"""
import uuid
from datetime import datetime
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from app.db import ImageGenProvider
# =============================================================================
# ImageGenerationConfig CRUD Schemas
# =============================================================================
class ImageGenerationConfigBase(BaseModel):
"""Base schema with fields for ImageGenerationConfig."""
name: str = Field(
..., max_length=100, description="User-friendly name for the config"
)
description: str | None = Field(
None, max_length=500, description="Optional description"
)
provider: ImageGenProvider = Field(
...,
description="Image generation provider (OpenAI, Azure, Google AI Studio, Vertex AI, Bedrock, Recraft, OpenRouter, Xinference, Nscale)",
)
custom_provider: str | None = Field(
None, max_length=100, description="Custom provider name"
)
model_name: str = Field(
..., max_length=100, description="Model name (e.g., dall-e-3, gpt-image-1)"
)
api_key: str = Field(..., description="API key for the provider")
api_base: str | None = Field(
None, max_length=500, description="Optional API base URL"
)
api_version: str | None = Field(
None,
max_length=50,
description="Azure-specific API version (e.g., '2024-02-15-preview')",
)
litellm_params: dict[str, Any] | None = Field(
default=None, description="Additional LiteLLM parameters"
)
class ImageGenerationConfigCreate(ImageGenerationConfigBase):
"""Schema for creating a new ImageGenerationConfig."""
search_space_id: int = Field(
..., description="Search space ID to associate the config with"
)
class ImageGenerationConfigUpdate(BaseModel):
"""Schema for updating an existing ImageGenerationConfig. All fields optional."""
name: str | None = Field(None, max_length=100)
description: str | None = Field(None, max_length=500)
provider: ImageGenProvider | None = None
custom_provider: str | None = Field(None, max_length=100)
model_name: str | None = Field(None, max_length=100)
api_key: str | None = None
api_base: str | None = Field(None, max_length=500)
api_version: str | None = Field(None, max_length=50)
litellm_params: dict[str, Any] | None = None
class ImageGenerationConfigRead(ImageGenerationConfigBase):
"""Schema for reading an ImageGenerationConfig (includes id and timestamps)."""
id: int
created_at: datetime
search_space_id: int
user_id: uuid.UUID
model_config = ConfigDict(from_attributes=True)
class ImageGenerationConfigPublic(BaseModel):
"""Public schema that hides the API key (for list views)."""
id: int
name: str
description: str | None = None
provider: ImageGenProvider
custom_provider: str | None = None
model_name: str
api_base: str | None = None
api_version: str | None = None
litellm_params: dict[str, Any] | None = None
created_at: datetime
search_space_id: int
user_id: uuid.UUID
model_config = ConfigDict(from_attributes=True)
# =============================================================================
# ImageGeneration (request/result) Schemas
# =============================================================================
@ -136,12 +37,12 @@ class ImageGenerationCreate(BaseModel):
search_space_id: int = Field(
..., description="Search space ID to associate the generation with"
)
image_generation_config_id: int | None = Field(
image_gen_model_id: int | None = Field(
None,
description=(
"Image generation config ID. "
"0 = Auto mode (router), negative = global YAML config, positive = DB config. "
"If not provided, uses the search space's image_generation_config_id preference."
"Image generation model ID. "
"0 = Auto mode, negative = GLOBAL model, positive = BYOK Model row. "
"If not provided, uses the search space's image_gen_model_id preference."
),
)
@ -157,7 +58,7 @@ class ImageGenerationRead(BaseModel):
size: str | None = None
style: str | None = None
response_format: str | None = None
image_generation_config_id: int | None = None
image_gen_model_id: int | None = None
response_data: dict[str, Any] | None = None
error_message: str | None = None
search_space_id: int
@ -204,57 +105,3 @@ class ImageGenerationListRead(BaseModel):
image_count=image_count,
)
# =============================================================================
# Global Image Gen Config (from YAML)
# =============================================================================
class GlobalImageGenConfigRead(BaseModel):
"""
Schema for reading global image generation configs from YAML.
Global configs have negative IDs. API key is hidden.
ID 0 is reserved for Auto mode (LiteLLM Router load balancing).
The ``billing_tier`` field allows the frontend to show a Premium/Free
badge and (more importantly) tells the backend whether to debit the
user's premium credit pool when this config is used. ``"free"`` is
the default for backward compatibility admins must explicitly opt
a global config into ``"premium"``.
"""
id: int = Field(
...,
description="Config ID: 0 for Auto mode, negative for global configs",
)
name: str
description: str | None = None
provider: str
custom_provider: str | None = None
model_name: str
api_base: str | None = None
api_version: str | None = None
litellm_params: dict[str, Any] | None = None
is_global: bool = True
is_auto_mode: bool = False
billing_tier: str = Field(
default="free",
description="'free' or 'premium'. Premium debits the user's premium credit pool (USD-cost-based).",
)
is_premium: bool = Field(
default=False,
description=(
"Convenience boolean derived server-side from "
"``billing_tier == 'premium'``. The new-chat model selector "
"keys its Free/Premium badge off this field for parity with "
"chat (`GlobalLLMConfigRead.is_premium`)."
),
)
quota_reserve_micros: int | None = Field(
default=None,
description=(
"Optional override for the reservation amount (in micro-USD) used when "
"this image generation is premium. Falls back to "
"QUOTA_DEFAULT_IMAGE_RESERVE_MICROS when omitted."
),
)

View file

@ -1,256 +0,0 @@
"""
Pydantic schemas for the NewLLMConfig API.
NewLLMConfig combines model settings with prompt configuration:
- LLM provider, model, API key, etc.
- Configurable system instructions
- Citation toggle
"""
import uuid
from datetime import datetime
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from app.db import LiteLLMProvider
class NewLLMConfigBase(BaseModel):
"""Base schema with common fields for NewLLMConfig."""
name: str = Field(
..., max_length=100, description="User-friendly name for the configuration"
)
description: str | None = Field(
None, max_length=500, description="Optional description"
)
# Model Configuration
provider: LiteLLMProvider = Field(..., description="LiteLLM provider type")
custom_provider: str | None = Field(
None, max_length=100, description="Custom provider name when provider is CUSTOM"
)
model_name: str = Field(
..., max_length=100, description="Model name without provider prefix"
)
api_key: str = Field(..., description="API key for the provider")
api_base: str | None = Field(
None, max_length=500, description="Optional API base URL"
)
litellm_params: dict[str, Any] | None = Field(
default=None, description="Additional LiteLLM parameters"
)
# Prompt Configuration
system_instructions: str = Field(
default="",
description="Custom system instructions. Empty string uses default SURFSENSE_SYSTEM_INSTRUCTIONS.",
)
use_default_system_instructions: bool = Field(
default=True,
description="Whether to use default instructions when system_instructions is empty",
)
citations_enabled: bool = Field(
default=True,
description="Whether to include citation instructions in the system prompt",
)
class NewLLMConfigCreate(NewLLMConfigBase):
"""Schema for creating a new NewLLMConfig."""
search_space_id: int = Field(
..., description="Search space ID to associate the config with"
)
class NewLLMConfigUpdate(BaseModel):
"""Schema for updating an existing NewLLMConfig. All fields are optional."""
name: str | None = Field(None, max_length=100)
description: str | None = Field(None, max_length=500)
# Model Configuration
provider: LiteLLMProvider | None = None
custom_provider: str | None = Field(None, max_length=100)
model_name: str | None = Field(None, max_length=100)
api_key: str | None = None
api_base: str | None = Field(None, max_length=500)
litellm_params: dict[str, Any] | None = None
# Prompt Configuration
system_instructions: str | None = None
use_default_system_instructions: bool | None = None
citations_enabled: bool | None = None
class NewLLMConfigRead(NewLLMConfigBase):
"""Schema for reading a NewLLMConfig (includes id and timestamps)."""
id: int
created_at: datetime
search_space_id: int
user_id: uuid.UUID
# Capability flag derived at the API boundary (no DB column). Default
# True matches the conservative-allow stance — a BYOK row that the
# route forgot to augment is not pre-judged. The streaming-task
# safety net is the only place a False actually blocks a request.
supports_image_input: bool = Field(
default=True,
description=(
"Whether the BYOK chat config can accept image inputs. Derived "
"at the route boundary from LiteLLM's authoritative model map "
"(``litellm.supports_vision``) — there is no DB column. "
"Default True is the conservative-allow stance for unknown / "
"unmapped models."
),
)
model_config = ConfigDict(from_attributes=True)
class NewLLMConfigPublic(BaseModel):
"""
Public schema for NewLLMConfig that hides the API key.
Used when returning configs in list views or to users who shouldn't see keys.
"""
id: int
name: str
description: str | None = None
# Model Configuration (no api_key)
provider: LiteLLMProvider
custom_provider: str | None = None
model_name: str
api_base: str | None = None
litellm_params: dict[str, Any] | None = None
# Prompt Configuration
system_instructions: str
use_default_system_instructions: bool
citations_enabled: bool
created_at: datetime
search_space_id: int
user_id: uuid.UUID
# Capability flag derived at the API boundary (see NewLLMConfigRead).
supports_image_input: bool = Field(
default=True,
description=(
"Whether the BYOK chat config can accept image inputs. Derived "
"at the route boundary from LiteLLM's authoritative model map. "
"Default True is the conservative-allow stance."
),
)
model_config = ConfigDict(from_attributes=True)
class DefaultSystemInstructionsResponse(BaseModel):
"""Response schema for getting default system instructions."""
default_system_instructions: str = Field(
..., description="The default SURFSENSE_SYSTEM_INSTRUCTIONS template"
)
class GlobalNewLLMConfigRead(BaseModel):
"""
Schema for reading global LLM configs from YAML.
Global configs have negative IDs and no search_space_id.
API key is hidden for security.
ID 0 is reserved for Auto mode which uses LiteLLM Router for load balancing.
"""
id: int = Field(
...,
description="Config ID: 0 for Auto mode, negative for global configs",
)
name: str
description: str | None = None
# Model Configuration (no api_key)
provider: str # String because YAML doesn't enforce enum, "AUTO" for Auto mode
custom_provider: str | None = None
model_name: str
api_base: str | None = None
litellm_params: dict[str, Any] | None = None
# Prompt Configuration
system_instructions: str = ""
use_default_system_instructions: bool = True
citations_enabled: bool = True
is_global: bool = True # Always true for global configs
is_auto_mode: bool = False # True only for Auto mode (ID 0)
billing_tier: str = "free"
is_premium: bool = False
anonymous_enabled: bool = False
seo_enabled: bool = False
seo_slug: str | None = None
seo_title: str | None = None
seo_description: str | None = None
quota_reserve_tokens: int | None = None
supports_image_input: bool = Field(
default=True,
description=(
"Whether the model accepts image inputs (multimodal vision). "
"Derived server-side: OpenRouter dynamic configs use "
"``architecture.input_modalities``; YAML / BYOK use LiteLLM's "
"authoritative model map (``litellm.supports_vision``). The "
"new-chat selector hints with a 'No image' badge when this is "
"False and there are pending image attachments. The streaming "
"task fails fast only when LiteLLM *explicitly* marks a model "
"as text-only — unknown / unmapped models default-allow."
),
)
# =============================================================================
# LLM Preferences Schemas (for role assignments)
# =============================================================================
class LLMPreferencesRead(BaseModel):
"""Schema for reading LLM preferences (role assignments) for a search space."""
agent_llm_id: int | None = Field(
None, description="ID of the LLM config to use for agent/chat tasks"
)
image_generation_config_id: int | None = Field(
None, description="ID of the image generation config to use"
)
vision_llm_config_id: int | None = Field(
None,
description="ID of the vision LLM config to use for vision/screenshot analysis",
)
agent_llm: dict[str, Any] | None = Field(
None, description="Full config for chat model"
)
image_generation_config: dict[str, Any] | None = Field(
None, description="Full config for image generation"
)
vision_llm_config: dict[str, Any] | None = Field(
None, description="Full config for vision LLM"
)
model_config = ConfigDict(from_attributes=True)
class LLMPreferencesUpdate(BaseModel):
"""Schema for updating LLM preferences."""
agent_llm_id: int | None = Field(
None, description="ID of the LLM config to use for agent/chat tasks"
)
image_generation_config_id: int | None = Field(
None, description="ID of the image generation config to use"
)
vision_llm_config_id: int | None = Field(
None,
description="ID of the vision LLM config to use for vision/screenshot analysis",
)

View file

@ -1,116 +0,0 @@
import uuid
from datetime import datetime
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from app.db import VisionProvider
class VisionLLMConfigBase(BaseModel):
name: str = Field(..., max_length=100)
description: str | None = Field(None, max_length=500)
provider: VisionProvider = Field(...)
custom_provider: str | None = Field(None, max_length=100)
model_name: str = Field(..., max_length=100)
api_key: str = Field(...)
api_base: str | None = Field(None, max_length=500)
api_version: str | None = Field(None, max_length=50)
litellm_params: dict[str, Any] | None = Field(default=None)
class VisionLLMConfigCreate(VisionLLMConfigBase):
search_space_id: int = Field(...)
class VisionLLMConfigUpdate(BaseModel):
name: str | None = Field(None, max_length=100)
description: str | None = Field(None, max_length=500)
provider: VisionProvider | None = None
custom_provider: str | None = Field(None, max_length=100)
model_name: str | None = Field(None, max_length=100)
api_key: str | None = None
api_base: str | None = Field(None, max_length=500)
api_version: str | None = Field(None, max_length=50)
litellm_params: dict[str, Any] | None = None
class VisionLLMConfigRead(VisionLLMConfigBase):
id: int
created_at: datetime
search_space_id: int
user_id: uuid.UUID
model_config = ConfigDict(from_attributes=True)
class VisionLLMConfigPublic(BaseModel):
id: int
name: str
description: str | None = None
provider: VisionProvider
custom_provider: str | None = None
model_name: str
api_base: str | None = None
api_version: str | None = None
litellm_params: dict[str, Any] | None = None
created_at: datetime
search_space_id: int
user_id: uuid.UUID
model_config = ConfigDict(from_attributes=True)
class GlobalVisionLLMConfigRead(BaseModel):
"""Schema for reading global vision LLM configs from YAML.
The ``billing_tier`` field allows the frontend to show a Premium/Free
badge and (more importantly) tells the backend whether to debit the
user's premium credit pool when this config is used. ``"free"`` is
the default for backward compatibility admins must explicitly opt
a global config into ``"premium"``.
"""
id: int = Field(...)
name: str
description: str | None = None
provider: str
custom_provider: str | None = None
model_name: str
api_base: str | None = None
api_version: str | None = None
litellm_params: dict[str, Any] | None = None
is_global: bool = True
is_auto_mode: bool = False
billing_tier: str = Field(
default="free",
description="'free' or 'premium'. Premium debits the user's premium credit pool (USD-cost-based).",
)
is_premium: bool = Field(
default=False,
description=(
"Convenience boolean derived server-side from "
"``billing_tier == 'premium'``. The new-chat model selector "
"keys its Free/Premium badge off this field for parity with "
"chat (`GlobalLLMConfigRead.is_premium`)."
),
)
quota_reserve_tokens: int | None = Field(
default=None,
description=(
"Optional override for the per-call reservation in *tokens* — "
"converted to micro-USD via the model's input/output prices at "
"reservation time. Falls back to QUOTA_DEFAULT_RESERVE_TOKENS."
),
)
input_cost_per_token: float | None = Field(
default=None,
description=(
"Optional input price in USD/token. Used by pricing_registration to "
"register custom Azure / OpenRouter aliases with LiteLLM at startup."
),
)
output_cost_per_token: float | None = Field(
default=None,
description="Optional output price in USD/token. Pair with input_cost_per_token.",
)

View file

@ -1,13 +1,13 @@
"""Resolve and persist Auto (Fastest) model pins per chat thread.
"""Resolve and persist Auto model pins per chat thread.
Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we
resolve that virtual mode to one concrete global LLM config exactly once and
Auto is represented by ``chat_model_id == 0``. For chat threads we
resolve that virtual mode to one concrete global model exactly once and
persist the chosen config id on ``new_chat_threads.pinned_llm_config_id`` so
subsequent turns are stable.
Single-writer invariant: this module is the only writer of
``NewChatThread.pinned_llm_config_id`` (aside from the bulk clear in
``search_spaces_routes`` when a search space's ``agent_llm_id`` changes).
``model_connections_routes`` when a search space's ``chat_model_id`` changes).
Therefore a non-NULL value unambiguously means "this thread has an
Auto-resolved pin"; no separate source/policy column is needed.
"""
@ -33,8 +33,10 @@ from app.services.token_quota_service import TokenQuotaService
logger = logging.getLogger(__name__)
AUTO_FASTEST_ID = 0
AUTO_FASTEST_MODE = "auto_fastest"
AUTO_MODE_ID = 0
# Stable internal hash namespace for deterministic per-thread selection.
# Do not rename: changing this rebalances Auto's model choice for new pins.
AUTO_PIN_HASH_NAMESPACE = "auto_fastest"
_RUNTIME_COOLDOWN_SECONDS = 600
_HEALTHY_TTL_SECONDS = 45
@ -383,7 +385,7 @@ def _select_pin(eligible: list[dict], thread_id: int) -> tuple[dict, int]:
pool = tier_a if tier_a else eligible
pool = sorted(pool, key=lambda c: -int(c.get("quality_score") or 0))
top_k = pool[:_QUALITY_TOP_K]
digest = hashlib.sha256(f"{AUTO_FASTEST_MODE}:{thread_id}".encode()).digest()
digest = hashlib.sha256(f"{AUTO_PIN_HASH_NAMESPACE}:{thread_id}".encode()).digest()
idx = int.from_bytes(digest[:8], "big") % len(top_k)
return top_k[idx], len(top_k)
@ -425,7 +427,7 @@ async def resolve_or_get_pinned_llm_config_id(
exclude_config_ids: set[int] | None = None,
requires_image_input: bool = False,
) -> AutoPinResolution:
"""Resolve Auto (Fastest) to one concrete config id and persist the pin.
"""Resolve Auto to one concrete config id and persist the pin.
For non-auto selections, this function clears any existing pin and returns
the selected id as-is.
@ -457,7 +459,7 @@ async def resolve_or_get_pinned_llm_config_id(
)
# Explicit model selected: clear any stale pin.
if selected_llm_config_id != AUTO_FASTEST_ID:
if selected_llm_config_id != AUTO_MODE_ID:
if thread.pinned_llm_config_id is not None:
thread.pinned_llm_config_id = None
await session.commit()

View file

@ -450,10 +450,10 @@ async def _resolve_agent_billing_for_search_space(
Used by Celery tasks (podcast generation, video presentation) to bill the
search-space owner's premium credit pool when the chat model is premium.
Resolution rules mirror chat at ``stream_new_chat.py:2294-2351``:
Resolution rules mirror the chat model role resolver:
- Search space not found / no ``agent_llm_id``: raise ``ValueError``.
- **Auto mode** (``id == AUTO_FASTEST_ID == 0``):
- Search space not found / no ``chat_model_id``: raise ``ValueError``.
- **Auto mode** (``id == AUTO_MODE_ID == 0``):
* ``thread_id`` is set: delegate to
``resolve_or_get_pinned_llm_config_id`` (the same call chat uses) and
recurse into the resolved id. Reuses chat's existing pin if present
@ -469,9 +469,8 @@ async def _resolve_agent_billing_for_search_space(
(defaults to ``"free"`` via ``app/config/__init__.py:52`` setdefault),
``base_model = litellm_params.get("base_model") or model_name``
NOT provider-prefixed, matching chat's cost-map lookup convention.
- **Positive id** (user BYOK ``NewLLMConfig``): always free (matches
``AgentConfig.from_new_llm_config`` which hard-codes ``billing_tier="free"``);
``base_model`` from ``litellm_params`` or ``model_name``.
- **Positive id** (user BYOK ``Model``): always free; ``base_model`` from
the model catalog override or the upstream ``model_id``.
Note on imports: ``llm_service``, ``auto_model_pin_service``, and
``llm_router_service`` are imported lazily inside the function body to
@ -480,8 +479,9 @@ async def _resolve_agent_billing_for_search_space(
``billable_calls.py``'s module load path.
"""
from sqlalchemy import select
from sqlalchemy.orm import selectinload
from app.db import NewLLMConfig, SearchSpace
from app.db import Model, SearchSpace
result = await session.execute(
select(SearchSpace).where(SearchSpace.id == search_space_id)
@ -490,20 +490,20 @@ async def _resolve_agent_billing_for_search_space(
if search_space is None:
raise ValueError(f"Search space {search_space_id} not found")
agent_llm_id = search_space.agent_llm_id
if agent_llm_id is None:
chat_model_id = search_space.chat_model_id
if chat_model_id is None:
raise ValueError(
f"Search space {search_space_id} has no agent_llm_id configured"
f"Search space {search_space_id} has no chat_model_id configured"
)
owner_user_id: UUID = search_space.user_id
from app.services.auto_model_pin_service import (
AUTO_FASTEST_ID,
AUTO_MODE_ID,
resolve_or_get_pinned_llm_config_id,
)
if agent_llm_id == AUTO_FASTEST_ID:
if chat_model_id == AUTO_MODE_ID:
if thread_id is None:
return owner_user_id, "free", "auto"
try:
@ -512,7 +512,7 @@ async def _resolve_agent_billing_for_search_space(
thread_id=thread_id,
search_space_id=search_space_id,
user_id=str(owner_user_id),
selected_llm_config_id=AUTO_FASTEST_ID,
selected_llm_config_id=AUTO_MODE_ID,
)
except ValueError:
logger.warning(
@ -523,28 +523,35 @@ async def _resolve_agent_billing_for_search_space(
exc_info=True,
)
return owner_user_id, "free", "auto"
agent_llm_id = resolution.resolved_llm_config_id
chat_model_id = resolution.resolved_llm_config_id
if agent_llm_id < 0:
if chat_model_id < 0:
from app.services.llm_service import get_global_llm_config
cfg = get_global_llm_config(agent_llm_id) or {}
cfg = get_global_llm_config(chat_model_id) or {}
billing_tier = str(cfg.get("billing_tier", "free")).lower()
litellm_params = cfg.get("litellm_params") or {}
base_model = litellm_params.get("base_model") or cfg.get("model_name") or ""
return owner_user_id, billing_tier, base_model
nlc_result = await session.execute(
select(NewLLMConfig).where(
NewLLMConfig.id == agent_llm_id,
NewLLMConfig.search_space_id == search_space_id,
)
model_result = await session.execute(
select(Model)
.options(selectinload(Model.connection))
.where(Model.id == chat_model_id, Model.enabled.is_(True))
)
nlc = nlc_result.scalars().first()
model = model_result.scalars().first()
base_model = ""
if nlc is not None:
litellm_params = nlc.litellm_params or {}
base_model = litellm_params.get("base_model") or nlc.model_name or ""
if (
model is not None
and model.connection is not None
and model.connection.enabled
and (
model.connection.search_space_id in (None, search_space_id)
and model.connection.user_id in (None, owner_user_id)
)
):
catalog = model.catalog or {}
base_model = catalog.get("base_model") or model.model_id or ""
return owner_user_id, "free", base_model

View file

@ -14,7 +14,11 @@ from app.services.auto_model_pin_service import (
auto_model_candidates,
choose_auto_model_candidate,
)
from app.services.llm_router_service import AUTO_MODE_ID, ChatLiteLLMRouter, is_auto_mode
from app.services.llm_router_service import (
AUTO_MODE_ID,
ChatLiteLLMRouter,
is_auto_mode,
)
from app.services.model_capabilities import has_capability
from app.services.model_resolver import native_connection_from_config, to_litellm
from app.services.token_tracking_service import token_tracker
@ -96,26 +100,16 @@ class LLMRole:
def get_global_llm_config(llm_config_id: int) -> dict | None:
"""
Get a global LLM configuration by ID.
Global configs have negative IDs. ID 0 is reserved for Auto mode.
Global configs have negative IDs. Auto mode (ID 0) is resolved through the
model-candidate pipeline, not this legacy config lookup.
Args:
llm_config_id: The ID of the global config (should be negative or 0 for Auto)
llm_config_id: The ID of the global config (must be negative)
Returns:
dict: Global config dictionary or None if not found
"""
# Auto mode (ID 0) is handled separately via the router
if llm_config_id == AUTO_MODE_ID:
return {
"id": AUTO_MODE_ID,
"name": "Auto (Fastest)",
"description": "Automatically routes requests across available LLM providers for optimal performance and rate limit handling",
"provider": "AUTO",
"model_name": "auto",
"is_auto_mode": True,
}
if llm_config_id > 0:
if llm_config_id >= 0:
return None
for cfg in config.GLOBAL_LLM_CONFIGS:

View file

@ -24,7 +24,7 @@ CACHE_TTL_SECONDS = 86400 # 24 hours
_cache: list[dict] | None = None
_cache_timestamp: float = 0
# Maps OpenRouter provider slug → our LiteLLMProvider enum value.
# Maps OpenRouter provider slug to native LiteLLM provider prefixes.
# Only providers where the model-name part (after the slash) can be
# used directly with the native provider's litellm prefix are listed.
#

View file

@ -281,7 +281,7 @@ def _generate_configs(
OpenRouter's own ``openrouter/free`` meta-router is filtered out upstream
via ``_EXCLUDED_MODEL_IDS``; we don't expose a redundant auto-select layer
because our own Auto (Fastest) pin + 24 h refresh + repair logic already
because our own Auto pin + 24 h refresh + repair logic already
cover the catalogue-churn case.
"""
id_offset: int = settings.get("id_offset", -10000)
@ -346,7 +346,7 @@ def _generate_configs(
# ``"No endpoints found that support image input"``.
"supports_image_input": bool(normalized.get("supports_image_input")),
_OPENROUTER_DYNAMIC_MARKER: True,
# Auto (Fastest) ranking metadata. ``quality_score`` is initialised
# Auto ranking metadata. ``quality_score`` is initialised
# to the static score and gets re-blended with health on the next
# ``_enrich_health`` pass (synchronous on refresh, deferred on cold
# start so startup latency is unchanged).
@ -361,11 +361,7 @@ def _generate_configs(
return configs
# ID-offset bands used to keep dynamic OpenRouter configs in their own
# namespace per surface. Image / vision get separate bands so a single
# Postgres-INTEGER cfg ID is unambiguous about which selector it belongs to.
_OPENROUTER_IMAGE_ID_OFFSET_DEFAULT = -20000
_OPENROUTER_VISION_ID_OFFSET_DEFAULT = -30000
def _generate_image_gen_configs(
@ -431,89 +427,6 @@ def _generate_image_gen_configs(
return configs
def _generate_vision_llm_configs(
raw_models: list[dict], settings: dict[str, Any]
) -> list[dict]:
"""Convert OpenRouter vision-capable LLMs into global vision-LLM config
dicts (matches the YAML shape consumed by ``vision_llm_routes``).
Filter:
- architecture.input_modalities contains "image"
- architecture.output_modalities contains "text"
- compatible provider (excluded slugs blocked)
- allowed model id (excluded list blocked)
Vision-LLM is invoked from the indexer (image extraction during
document upload) via ``langchain_litellm.ChatLiteLLM.ainvoke``, so
the chat-only ``_supports_tool_calling`` and ``_has_sufficient_context``
filters do not apply: a small-context vision model that doesn't
advertise tool-calling is still perfectly viable for "describe this
image" prompts.
"""
id_offset: int = int(
settings.get("vision_id_offset") or _OPENROUTER_VISION_ID_OFFSET_DEFAULT
)
api_key: str = settings.get("api_key", "")
rpm: int = settings.get("rpm", 200)
tpm: int = settings.get("tpm", 1_000_000)
free_rpm: int = settings.get("free_rpm", 20)
free_tpm: int = settings.get("free_tpm", 100_000)
quota_reserve_tokens: int = settings.get("quota_reserve_tokens", 4000)
litellm_params: dict = settings.get("litellm_params") or {}
vision_models = [
m
for m in raw_models
if supports_image_input(m)
and _shared_is_compatible_provider(m)
and _shared_is_allowed_model(m)
and "/" in m.get("id", "")
]
configs: list[dict] = []
taken: set[int] = set()
for model in vision_models:
model_id: str = model["id"]
name: str = model.get("name", model_id)
tier = _openrouter_tier(model)
pricing = model.get("pricing") or {}
# Capture per-token prices so ``pricing_registration`` can
# register them with LiteLLM at startup (and so the cost
# estimator in ``estimate_call_reserve_micros`` can resolve
# them at reserve time).
try:
input_cost = float(pricing.get("prompt", 0) or 0)
except (TypeError, ValueError):
input_cost = 0.0
try:
output_cost = float(pricing.get("completion", 0) or 0)
except (TypeError, ValueError):
output_cost = 0.0
cfg: dict[str, Any] = {
"id": _stable_config_id(model_id, id_offset, taken),
"name": name,
"description": f"{name} via OpenRouter (vision)",
"provider": "openrouter",
"model_name": model_id,
"api_key": api_key,
"api_base": "https://openrouter.ai/api/v1",
"api_version": None,
"rpm": free_rpm if tier == "free" else rpm,
"tpm": free_tpm if tier == "free" else tpm,
"litellm_params": dict(litellm_params),
"billing_tier": tier,
"quota_reserve_tokens": quota_reserve_tokens,
"input_cost_per_token": input_cost or None,
"output_cost_per_token": output_cost or None,
_OPENROUTER_DYNAMIC_MARKER: True,
}
configs.append(cfg)
return configs
class OpenRouterIntegrationService:
"""Singleton that manages the dynamic OpenRouter model catalogue."""
@ -724,7 +637,7 @@ class OpenRouterIntegrationService:
return counts
# ------------------------------------------------------------------
# Auto (Fastest) health enrichment
# Auto health enrichment
# ------------------------------------------------------------------
async def _enrich_health_safely(

View file

@ -154,10 +154,8 @@ def _register_chat_shape_configs(
input_cost = _safe_float(entry.get("prompt"))
output_cost = _safe_float(entry.get("completion"))
else:
# Vision configs from ``_generate_vision_llm_configs``
# carry their pricing inline because the OpenRouter
# raw-pricing cache is keyed by chat-catalogue model_id;
# vision flows pick up the inline values here.
# Some dynamically materialized configs can carry pricing
# inline when the raw OpenRouter cache has no matching entry.
input_cost = _safe_float(cfg.get("input_cost_per_token"))
output_cost = _safe_float(cfg.get("output_cost_per_token"))
if input_cost == 0.0 and output_cost == 0.0:

View file

@ -1,4 +1,4 @@
"""Pure-function quality scoring for Auto (Fastest) model selection.
"""Pure-function quality scoring for Auto model selection.
This module is import-free of any service / request-path dependencies. All
numbers are computed once during the OpenRouter refresh tick (or YAML load)

View file

@ -1,160 +0,0 @@
import logging
from typing import Any
from litellm import Router
from app.services.model_resolver import native_connection_from_config, to_litellm
logger = logging.getLogger(__name__)
VISION_AUTO_MODE_ID = 0
class VisionLLMRouterService:
_instance = None
_router: Router | None = None
_model_list: list[dict] = []
_router_settings: dict = {}
_initialized: bool = False
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
@classmethod
def get_instance(cls) -> "VisionLLMRouterService":
if cls._instance is None:
cls._instance = cls()
return cls._instance
@classmethod
def initialize(
cls,
global_configs: list[dict],
router_settings: dict | None = None,
) -> None:
instance = cls.get_instance()
if instance._initialized:
logger.debug("Vision LLM Router already initialized, skipping")
return
model_list = []
for config in global_configs:
deployment = cls._config_to_deployment(config)
if deployment:
model_list.append(deployment)
if not model_list:
logger.warning(
"No valid vision LLM configs found for router initialization"
)
return
instance._model_list = model_list
instance._router_settings = router_settings or {}
default_settings = {
"routing_strategy": "usage-based-routing",
"num_retries": 3,
"allowed_fails": 3,
"cooldown_time": 60,
"retry_after": 5,
}
final_settings = {**default_settings, **instance._router_settings}
try:
instance._router = Router(
model_list=model_list,
routing_strategy=final_settings.get(
"routing_strategy", "usage-based-routing"
),
num_retries=final_settings.get("num_retries", 3),
allowed_fails=final_settings.get("allowed_fails", 3),
cooldown_time=final_settings.get("cooldown_time", 60),
set_verbose=False,
)
instance._initialized = True
logger.info(
"Vision LLM Router initialized with %d deployments, strategy: %s",
len(model_list),
final_settings.get("routing_strategy"),
)
except Exception as e:
logger.error(f"Failed to initialize Vision LLM Router: {e}")
instance._router = None
@classmethod
def _config_to_deployment(cls, config: dict) -> dict | None:
try:
if not config.get("model_name") or not config.get("api_key"):
return None
model_string, resolved_kwargs = to_litellm(
native_connection_from_config(config),
config["model_name"],
)
litellm_params: dict[str, Any] = {"model": model_string, **resolved_kwargs}
deployment: dict[str, Any] = {
"model_name": "auto",
"litellm_params": litellm_params,
}
if config.get("rpm"):
deployment["rpm"] = config["rpm"]
if config.get("tpm"):
deployment["tpm"] = config["tpm"]
return deployment
except Exception as e:
logger.warning(f"Failed to convert vision config to deployment: {e}")
return None
@classmethod
def get_router(cls) -> Router | None:
instance = cls.get_instance()
return instance._router
@classmethod
def is_initialized(cls) -> bool:
instance = cls.get_instance()
return instance._initialized and instance._router is not None
@classmethod
def get_model_count(cls) -> int:
instance = cls.get_instance()
return len(instance._model_list)
def is_vision_auto_mode(config_id: int | None) -> bool:
return config_id == VISION_AUTO_MODE_ID
def build_vision_model_string(
litellm_provider: str, model_name: str, custom_provider: str | None
) -> str:
if custom_provider:
return f"{custom_provider}/{model_name}"
return f"{litellm_provider}/{model_name}"
def get_global_vision_llm_config(config_id: int) -> dict | None:
from app.config import config
if config_id == VISION_AUTO_MODE_ID:
return {
"id": VISION_AUTO_MODE_ID,
"name": "Auto (Fastest)",
"provider": "AUTO",
"model_name": "auto",
"is_auto_mode": True,
}
if config_id > 0:
return None
for cfg in config.GLOBAL_VISION_LLM_CONFIGS:
if cfg.get("id") == config_id:
return cfg
return None

View file

@ -1,134 +0,0 @@
"""
Service for fetching and caching the vision-capable model list.
Reuses the same OpenRouter public API and local fallback as the LLM model
list service, but filters for models that accept image input.
"""
import json
import logging
import time
from pathlib import Path
import httpx
logger = logging.getLogger(__name__)
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
FALLBACK_FILE = (
Path(__file__).parent.parent / "config" / "vision_model_list_fallback.json"
)
CACHE_TTL_SECONDS = 86400 # 24 hours
_cache: list[dict] | None = None
_cache_timestamp: float = 0
OPENROUTER_SLUG_TO_VISION_PROVIDER: dict[str, str] = {
"openai": "OPENAI",
"anthropic": "ANTHROPIC",
"google": "GOOGLE",
"mistralai": "MISTRAL",
"x-ai": "XAI",
}
def _format_context_length(length: int | None) -> str | None:
if not length:
return None
if length >= 1_000_000:
return f"{length / 1_000_000:g}M"
if length >= 1_000:
return f"{length / 1_000:g}K"
return str(length)
async def _fetch_from_openrouter() -> list[dict] | None:
try:
async with httpx.AsyncClient(timeout=15) as client:
response = await client.get(OPENROUTER_API_URL)
response.raise_for_status()
data = response.json()
return data.get("data", [])
except Exception as e:
logger.warning("Failed to fetch from OpenRouter API for vision models: %s", e)
return None
def _load_fallback() -> list[dict]:
try:
with open(FALLBACK_FILE, encoding="utf-8") as f:
return json.load(f)
except Exception as e:
logger.error("Failed to load vision model fallback list: %s", e)
return []
def _is_vision_model(model: dict) -> bool:
"""Return True if the model accepts image input and outputs text."""
arch = model.get("architecture", {})
input_mods = arch.get("input_modalities", [])
output_mods = arch.get("output_modalities", [])
return "image" in input_mods and "text" in output_mods
def _process_vision_models(raw_models: list[dict]) -> list[dict]:
processed: list[dict] = []
for model in raw_models:
model_id: str = model.get("id", "")
name: str = model.get("name", "")
context_length = model.get("context_length")
if "/" not in model_id:
continue
if not _is_vision_model(model):
continue
provider_slug, model_name = model_id.split("/", 1)
context_window = _format_context_length(context_length)
processed.append(
{
"value": model_id,
"label": name,
"provider": "OPENROUTER",
"context_window": context_window,
}
)
direct_provider = OPENROUTER_SLUG_TO_VISION_PROVIDER.get(provider_slug)
if direct_provider:
if direct_provider == "GOOGLE" and not model_name.startswith("gemini-"):
continue
processed.append(
{
"value": model_name,
"label": name,
"provider": direct_provider,
"context_window": context_window,
}
)
return processed
async def get_vision_model_list() -> list[dict]:
global _cache, _cache_timestamp
if _cache is not None and (time.time() - _cache_timestamp) < CACHE_TTL_SECONDS:
return _cache
raw_models = await _fetch_from_openrouter()
if raw_models is None:
logger.info("Using fallback vision model list")
return _load_fallback()
processed = _process_vision_models(raw_models)
_cache = processed
_cache_timestamp = time.time()
return processed