2026-04-22 04:04:08 -07:00
|
|
|
import asyncio
|
2025-07-24 14:43:48 -07:00
|
|
|
import logging
|
|
|
|
|
|
2025-10-10 00:50:29 -07:00
|
|
|
import litellm
|
2025-11-05 12:15:05 -08:00
|
|
|
from langchain_core.messages import HumanMessage
|
2025-10-04 12:07:36 -07:00
|
|
|
from langchain_litellm import ChatLiteLLM
|
2025-06-09 15:50:15 -07:00
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
from sqlalchemy.future import select
|
2026-06-10 21:48:23 +05:30
|
|
|
from sqlalchemy.orm import selectinload
|
2025-06-09 15:50:15 -07:00
|
|
|
|
2025-11-14 21:53:46 -08:00
|
|
|
from app.config import config
|
2026-06-10 21:48:23 +05:30
|
|
|
from app.db import Model, SearchSpace
|
2026-06-11 18:22:23 +05:30
|
|
|
from app.services.auto_model_pin_service import (
|
|
|
|
|
auto_model_candidates,
|
|
|
|
|
choose_auto_model_candidate,
|
2026-01-29 15:28:31 -08:00
|
|
|
)
|
2026-06-13 12:45:43 +05:30
|
|
|
from app.services.llm_router_service import (
|
|
|
|
|
AUTO_MODE_ID,
|
|
|
|
|
ChatLiteLLMRouter,
|
|
|
|
|
is_auto_mode,
|
|
|
|
|
)
|
2026-06-12 02:17:22 +05:30
|
|
|
from app.services.model_capabilities import has_capability
|
2026-06-10 21:48:23 +05:30
|
|
|
from app.services.model_resolver import native_connection_from_config, to_litellm
|
2026-04-14 21:52:26 +05:30
|
|
|
from app.services.token_tracking_service import token_tracker
|
2025-10-10 00:50:29 -07:00
|
|
|
|
|
|
|
|
# Configure litellm to automatically drop unsupported parameters
|
|
|
|
|
litellm.drop_params = True
|
2025-06-09 15:50:15 -07:00
|
|
|
|
2026-02-28 17:22:34 -08:00
|
|
|
# Memory controls: prevent unbounded internal accumulation
|
|
|
|
|
litellm.telemetry = False
|
|
|
|
|
litellm.cache = None
|
|
|
|
|
litellm.failure_callback = []
|
|
|
|
|
litellm.input_callback = []
|
|
|
|
|
|
2026-04-14 13:40:32 +05:30
|
|
|
litellm.callbacks = [token_tracker]
|
|
|
|
|
|
2025-06-09 15:50:15 -07:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2026-04-22 04:04:08 -07:00
|
|
|
# Providers that require an interactive OAuth / device-flow login before
|
|
|
|
|
# issuing any completion. LiteLLM implements these with blocking sync polling
|
|
|
|
|
# (requests + time.sleep), which would freeze the FastAPI event loop if
|
|
|
|
|
# invoked from validation. They are never usable from a headless backend,
|
|
|
|
|
# so we reject them at the edge.
|
|
|
|
|
_INTERACTIVE_AUTH_PROVIDERS: frozenset[str] = frozenset(
|
|
|
|
|
{
|
|
|
|
|
"github_copilot",
|
|
|
|
|
"github-copilot",
|
|
|
|
|
"githubcopilot",
|
|
|
|
|
"copilot",
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Hard upper bound for a single validation call. Must exceed the ChatLiteLLM
|
|
|
|
|
# request timeout (30s) by a small margin so a well-behaved provider never
|
|
|
|
|
# trips the watchdog, while any pathological/blocking provider is killed.
|
|
|
|
|
_VALIDATION_TIMEOUT_SECONDS: float = 35.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_interactive_auth_provider(
|
|
|
|
|
provider: str | None, custom_provider: str | None
|
|
|
|
|
) -> bool:
|
|
|
|
|
"""Return True if the given provider triggers interactive OAuth in LiteLLM."""
|
|
|
|
|
for raw in (custom_provider, provider):
|
|
|
|
|
if not raw:
|
|
|
|
|
continue
|
|
|
|
|
normalized = raw.strip().lower().replace(" ", "_")
|
|
|
|
|
if normalized in _INTERACTIVE_AUTH_PROVIDERS:
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
def _legacy_config_connection(
|
|
|
|
|
*,
|
|
|
|
|
provider: str,
|
|
|
|
|
model_name: str,
|
|
|
|
|
api_key: str | None,
|
|
|
|
|
api_base: str | None,
|
|
|
|
|
custom_provider: str | None = None,
|
|
|
|
|
litellm_params: dict | None = None,
|
|
|
|
|
api_version: str | None = None,
|
|
|
|
|
) -> tuple[str, dict]:
|
|
|
|
|
cfg = {
|
2026-06-12 02:17:22 +05:30
|
|
|
"provider": provider.lower(),
|
2026-06-10 21:48:23 +05:30
|
|
|
"model_name": model_name,
|
|
|
|
|
"api_key": api_key,
|
|
|
|
|
"api_base": api_base,
|
|
|
|
|
"custom_provider": custom_provider,
|
|
|
|
|
"api_version": api_version,
|
|
|
|
|
"litellm_params": litellm_params or {},
|
|
|
|
|
}
|
|
|
|
|
conn = native_connection_from_config(cfg)
|
|
|
|
|
return to_litellm(conn, model_name)
|
|
|
|
|
|
|
|
|
|
|
2025-06-09 15:50:15 -07:00
|
|
|
class LLMRole:
|
2025-12-23 01:16:25 -08:00
|
|
|
AGENT = "agent" # For agent/chat operations
|
2025-06-09 15:50:15 -07:00
|
|
|
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-11-14 21:53:46 -08:00
|
|
|
def get_global_llm_config(llm_config_id: int) -> dict | None:
|
|
|
|
|
"""
|
|
|
|
|
Get a global LLM configuration by ID.
|
2026-06-13 12:45:43 +05:30
|
|
|
Global configs have negative IDs. Auto mode (ID 0) is resolved through the
|
|
|
|
|
model-candidate pipeline, not this legacy config lookup.
|
2025-11-14 21:53:46 -08:00
|
|
|
|
|
|
|
|
Args:
|
2026-06-13 12:45:43 +05:30
|
|
|
llm_config_id: The ID of the global config (must be negative)
|
2025-11-14 21:53:46 -08:00
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
dict: Global config dictionary or None if not found
|
|
|
|
|
"""
|
2026-06-13 12:45:43 +05:30
|
|
|
if llm_config_id >= 0:
|
2025-11-14 21:53:46 -08:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
for cfg in config.GLOBAL_LLM_CONFIGS:
|
|
|
|
|
if cfg.get("id") == llm_config_id:
|
|
|
|
|
return cfg
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
def get_global_model(model_id: int) -> dict | None:
|
|
|
|
|
return next((m for m in config.GLOBAL_MODELS if m.get("id") == model_id), None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_global_connection(connection_id: int) -> dict | None:
|
|
|
|
|
return next(
|
|
|
|
|
(c for c in config.GLOBAL_CONNECTIONS if c.get("id") == connection_id),
|
|
|
|
|
None,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _has_capability(model: dict | Model, capability: str) -> bool:
|
2026-06-12 02:17:22 +05:30
|
|
|
return has_capability(model, capability)
|
2026-06-10 21:48:23 +05:30
|
|
|
|
|
|
|
|
|
|
|
|
|
def _chat_litellm_from_resolved(
|
|
|
|
|
*,
|
|
|
|
|
conn: dict | object,
|
|
|
|
|
model_id: str,
|
|
|
|
|
disable_streaming: bool = False,
|
|
|
|
|
) -> tuple[str, dict]:
|
|
|
|
|
model_string, resolved_kwargs = to_litellm(conn, model_id)
|
|
|
|
|
litellm_kwargs = {"model": model_string, **resolved_kwargs}
|
|
|
|
|
if disable_streaming:
|
|
|
|
|
litellm_kwargs["disable_streaming"] = True
|
|
|
|
|
return model_string, litellm_kwargs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _get_db_model(
|
|
|
|
|
session: AsyncSession,
|
|
|
|
|
model_id: int,
|
|
|
|
|
search_space: SearchSpace,
|
|
|
|
|
) -> Model | None:
|
|
|
|
|
result = await session.execute(
|
|
|
|
|
select(Model)
|
|
|
|
|
.options(selectinload(Model.connection))
|
|
|
|
|
.where(Model.id == model_id, Model.enabled.is_(True))
|
|
|
|
|
)
|
|
|
|
|
model = result.scalars().first()
|
|
|
|
|
if not model or not model.connection or not model.connection.enabled:
|
|
|
|
|
return None
|
|
|
|
|
conn = model.connection
|
|
|
|
|
if conn.search_space_id and conn.search_space_id != search_space.id:
|
|
|
|
|
return None
|
|
|
|
|
if conn.user_id and conn.user_id != search_space.user_id:
|
|
|
|
|
return None
|
|
|
|
|
return model
|
|
|
|
|
|
|
|
|
|
|
2025-11-05 12:15:05 -08:00
|
|
|
async def validate_llm_config(
|
|
|
|
|
provider: str,
|
|
|
|
|
model_name: str,
|
|
|
|
|
api_key: str,
|
|
|
|
|
api_base: str | None = None,
|
|
|
|
|
custom_provider: str | None = None,
|
|
|
|
|
litellm_params: dict | None = None,
|
|
|
|
|
) -> tuple[bool, str]:
|
|
|
|
|
"""
|
|
|
|
|
Validate an LLM configuration by attempting to make a test API call.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
provider: LLM provider (e.g., 'OPENAI', 'ANTHROPIC')
|
|
|
|
|
model_name: Model identifier
|
|
|
|
|
api_key: API key for the provider
|
|
|
|
|
api_base: Optional custom API base URL
|
|
|
|
|
custom_provider: Optional custom provider string
|
|
|
|
|
litellm_params: Optional additional litellm parameters
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Tuple of (is_valid, error_message)
|
|
|
|
|
- is_valid: True if config works, False otherwise
|
|
|
|
|
- error_message: Empty string if valid, error description if invalid
|
|
|
|
|
"""
|
2026-04-22 04:04:08 -07:00
|
|
|
# Reject providers that require interactive OAuth/device-flow auth.
|
|
|
|
|
# LiteLLM's github_copilot provider (and similar) uses a blocking sync
|
|
|
|
|
# Authenticator that polls GitHub for up to several minutes and prints a
|
|
|
|
|
# device code to stdout. Running it on the FastAPI event loop will freeze
|
|
|
|
|
# the entire backend, so we refuse them up front.
|
|
|
|
|
if _is_interactive_auth_provider(provider, custom_provider):
|
|
|
|
|
msg = (
|
|
|
|
|
"Provider requires interactive OAuth/device-flow authentication "
|
|
|
|
|
"(e.g. github_copilot) and cannot be used in a hosted backend. "
|
|
|
|
|
"Please choose a provider that authenticates via API key."
|
|
|
|
|
)
|
|
|
|
|
logger.warning(
|
|
|
|
|
"Rejected LLM config validation for interactive-auth provider "
|
|
|
|
|
"(provider=%r, custom_provider=%r)",
|
|
|
|
|
provider,
|
|
|
|
|
custom_provider,
|
|
|
|
|
)
|
|
|
|
|
return False, msg
|
|
|
|
|
|
2025-11-05 12:15:05 -08:00
|
|
|
try:
|
2026-06-10 21:48:23 +05:30
|
|
|
model_string, resolved_kwargs = _legacy_config_connection(
|
|
|
|
|
provider=provider,
|
|
|
|
|
model_name=model_name,
|
|
|
|
|
api_key=api_key,
|
|
|
|
|
api_base=api_base,
|
|
|
|
|
custom_provider=custom_provider,
|
|
|
|
|
litellm_params=litellm_params,
|
|
|
|
|
)
|
|
|
|
|
litellm_kwargs = {"model": model_string, **resolved_kwargs, "timeout": 30}
|
2025-11-05 12:15:05 -08:00
|
|
|
|
2026-06-05 13:19:24 +02:00
|
|
|
from app.agents.chat.runtime.llm_config import (
|
refactor(agents): introduce chat/ category; dissolve top-level agents/shared
Recursive shared-folder rule: a shared/ must be shared by ALL siblings at its
level. The kernel (context, compaction, retry_after, web_search) was shared by
only 2 of the agents -- anonymous_chat + multi_agent_chat -- never by podcaster
or video_presentation. Those 2 are the "chat" category, so their shared code
belongs in that category's shared/, not the top-level one.
app/agents/anonymous_chat/ -> app/agents/chat/anonymous_chat/
app/agents/multi_agent_chat/ -> app/agents/chat/multi_agent_chat/
app/agents/shared/ -> app/agents/chat/shared/ (anon<->mac kernel)
Top-level app/agents/shared/ is gone: nothing was shared across all three
categories (chat / podcaster / video_presentation).
~289 import sites rewritten (app.agents.{anonymous_chat,multi_agent_chat,shared}
-> app.agents.chat.*); all moves are git renames (history preserved).
app/agents/ now: chat/, podcaster/, video_presentation/, runtime/.
2026-06-05 12:54:02 +02:00
|
|
|
SanitizedChatLiteLLM,
|
|
|
|
|
)
|
2026-04-23 11:40:21 +02:00
|
|
|
|
2026-04-15 23:46:29 -07:00
|
|
|
llm = SanitizedChatLiteLLM(**litellm_kwargs)
|
2025-11-05 12:15:05 -08:00
|
|
|
|
2026-04-22 04:04:08 -07:00
|
|
|
# Run the test call in a worker thread with a hard timeout. Some
|
|
|
|
|
# LiteLLM providers have synchronous blocking code paths (e.g. OAuth
|
|
|
|
|
# authenticators that call time.sleep and requests.post) that would
|
|
|
|
|
# otherwise freeze the asyncio event loop. Offloading to a thread and
|
|
|
|
|
# bounding the wait keeps the server responsive even if a provider
|
|
|
|
|
# misbehaves.
|
2025-11-05 12:15:05 -08:00
|
|
|
test_message = HumanMessage(content="Hello")
|
2026-04-22 04:04:08 -07:00
|
|
|
try:
|
|
|
|
|
response = await asyncio.wait_for(
|
|
|
|
|
asyncio.to_thread(llm.invoke, [test_message]),
|
|
|
|
|
timeout=_VALIDATION_TIMEOUT_SECONDS,
|
|
|
|
|
)
|
|
|
|
|
except TimeoutError:
|
|
|
|
|
logger.warning(
|
|
|
|
|
"LLM config validation timed out after %ss for model: %s",
|
|
|
|
|
_VALIDATION_TIMEOUT_SECONDS,
|
|
|
|
|
model_string,
|
|
|
|
|
)
|
|
|
|
|
return (
|
|
|
|
|
False,
|
|
|
|
|
f"Validation timed out after {int(_VALIDATION_TIMEOUT_SECONDS)}s. "
|
|
|
|
|
"The provider is unreachable or requires interactive "
|
|
|
|
|
"authentication that is not supported by the backend.",
|
|
|
|
|
)
|
2025-11-05 12:15:05 -08:00
|
|
|
|
|
|
|
|
# If we got here without exception, the config is valid
|
|
|
|
|
if response and response.content:
|
|
|
|
|
logger.info(f"Successfully validated LLM config for model: {model_string}")
|
|
|
|
|
return True, ""
|
|
|
|
|
else:
|
|
|
|
|
logger.warning(
|
|
|
|
|
f"LLM config validation returned empty response for model: {model_string}"
|
|
|
|
|
)
|
|
|
|
|
return False, "LLM returned an empty response"
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
error_msg = f"Failed to validate LLM configuration: {e!s}"
|
|
|
|
|
logger.error(error_msg)
|
|
|
|
|
return False, error_msg
|
|
|
|
|
|
|
|
|
|
|
2025-11-27 22:45:04 -08:00
|
|
|
async def get_search_space_llm_instance(
|
2026-02-20 14:28:01 -08:00
|
|
|
session: AsyncSession,
|
|
|
|
|
search_space_id: int,
|
|
|
|
|
role: str,
|
|
|
|
|
disable_streaming: bool = False,
|
2026-01-29 15:28:31 -08:00
|
|
|
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
2025-06-09 15:50:15 -07:00
|
|
|
"""
|
2025-11-27 22:45:04 -08:00
|
|
|
Get a ChatLiteLLM instance for a specific search space and role.
|
|
|
|
|
|
|
|
|
|
LLM preferences are stored at the search space level and shared by all members.
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2026-01-29 15:28:31 -08:00
|
|
|
If Auto mode (ID 0) is configured, returns a ChatLiteLLMRouter that uses
|
|
|
|
|
LiteLLM Router for automatic load balancing across available providers.
|
|
|
|
|
|
2025-06-09 15:50:15 -07:00
|
|
|
Args:
|
|
|
|
|
session: Database session
|
2025-10-10 00:50:29 -07:00
|
|
|
search_space_id: Search Space ID
|
2026-06-04 00:48:53 +05:30
|
|
|
role: LLM role ('agent')
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-06-09 15:50:15 -07:00
|
|
|
Returns:
|
2026-01-29 15:28:31 -08:00
|
|
|
ChatLiteLLM or ChatLiteLLMRouter instance, or None if not found
|
2025-06-09 15:50:15 -07:00
|
|
|
"""
|
|
|
|
|
try:
|
2025-11-27 22:45:04 -08:00
|
|
|
# Get the search space with its LLM preferences
|
2025-10-10 00:50:29 -07:00
|
|
|
result = await session.execute(
|
2025-11-27 22:45:04 -08:00
|
|
|
select(SearchSpace).where(SearchSpace.id == search_space_id)
|
2025-10-10 00:50:29 -07:00
|
|
|
)
|
2025-11-27 22:45:04 -08:00
|
|
|
search_space = result.scalars().first()
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-11-27 22:45:04 -08:00
|
|
|
if not search_space:
|
|
|
|
|
logger.error(f"Search space {search_space_id} not found")
|
2025-06-09 15:50:15 -07:00
|
|
|
return None
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
# Get the appropriate model binding ID based on role
|
2025-12-23 01:16:25 -08:00
|
|
|
if role == LLMRole.AGENT:
|
2026-06-10 21:48:23 +05:30
|
|
|
llm_config_id = search_space.chat_model_id
|
2025-06-09 15:50:15 -07:00
|
|
|
else:
|
|
|
|
|
logger.error(f"Invalid LLM role: {role}")
|
|
|
|
|
return None
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2026-01-29 15:28:31 -08:00
|
|
|
if llm_config_id is None:
|
2025-11-27 22:45:04 -08:00
|
|
|
logger.error(f"No {role} LLM configured for search space {search_space_id}")
|
2025-06-09 15:50:15 -07:00
|
|
|
return None
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2026-06-11 18:22:23 +05:30
|
|
|
# Auto mode resolves to one concrete global or BYOK model from the
|
|
|
|
|
# unified model-connections catalog.
|
2026-01-29 15:28:31 -08:00
|
|
|
if is_auto_mode(llm_config_id):
|
2026-06-11 18:22:23 +05:30
|
|
|
candidates = await auto_model_candidates(
|
|
|
|
|
session,
|
|
|
|
|
search_space_id=search_space_id,
|
|
|
|
|
user_id=search_space.user_id,
|
|
|
|
|
capability="chat",
|
|
|
|
|
)
|
|
|
|
|
if not candidates:
|
|
|
|
|
logger.error("No chat-capable models available for Auto mode")
|
2026-01-29 15:28:31 -08:00
|
|
|
return None
|
2026-06-11 18:22:23 +05:30
|
|
|
llm_config_id = int(
|
|
|
|
|
choose_auto_model_candidate(candidates, search_space_id)["id"]
|
|
|
|
|
)
|
2026-01-29 15:28:31 -08:00
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
# Check if this is a global virtual model (negative ID)
|
2025-11-14 21:53:46 -08:00
|
|
|
if llm_config_id < 0:
|
2026-06-10 21:48:23 +05:30
|
|
|
global_model = get_global_model(llm_config_id)
|
|
|
|
|
if not global_model or not _has_capability(global_model, "chat"):
|
|
|
|
|
logger.error(f"Global chat model {llm_config_id} not found")
|
2025-11-14 21:53:46 -08:00
|
|
|
return None
|
2026-06-10 21:48:23 +05:30
|
|
|
global_connection = get_global_connection(global_model["connection_id"])
|
|
|
|
|
if not global_connection:
|
|
|
|
|
logger.error(
|
|
|
|
|
"Global connection %s not found for model %s",
|
|
|
|
|
global_model["connection_id"],
|
|
|
|
|
llm_config_id,
|
2025-11-14 21:53:46 -08:00
|
|
|
)
|
2026-06-10 21:48:23 +05:30
|
|
|
return None
|
2025-11-14 21:53:46 -08:00
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
_, litellm_kwargs = _chat_litellm_from_resolved(
|
|
|
|
|
conn=global_connection,
|
|
|
|
|
model_id=global_model["model_id"],
|
|
|
|
|
disable_streaming=disable_streaming,
|
|
|
|
|
)
|
2026-02-18 15:10:51 +02:00
|
|
|
|
2026-06-05 13:19:24 +02:00
|
|
|
from app.agents.chat.runtime.llm_config import (
|
refactor(agents): move mac-only modules out of the cross-agent shared kernel
app/agents/shared/ is a sibling of anonymous_chat/podcaster/multi_agent_chat/
video_presentation, so it should only hold code shared across 2+ of those
agents. In practice podcaster and video_presentation import nothing from it,
and anonymous_chat needs only context + compaction + retry_after + web_search.
Everything else was multi_agent_chat-only (the boundary just passes through).
Move the multi_agent_chat-only cluster into multi_agent_chat/shared/ (files
moved verbatim via git rename; ~116 import sites rewritten):
errors, feature_flags, filesystem_selection, path_resolver, prompt_caching,
sandbox, llm_config, mention_resolver
middleware/busy_mutex, middleware/kb_persistence
busy_mutex/llm_config/mention_resolver are boundary-only but import the moved
modules, so they were folded in to avoid a backwards shared -> multi_agent_chat
dependency. main_agent builders now import the impls directly; the shared
middleware barrel keeps only the genuinely-shared compaction + retry_after.
Also delete the dead leftover shared/plugins and shared/skills dirs (live
copies already live under main_agent/).
Remaining in app/agents/shared/: context, system_prompt(+prompts), checkpointer,
middleware/{compaction,retry_after,dedup_tool_calls}, tools/. checkpointer and
system_prompt are boundary-only infra pending a dedicated home decision.
2026-06-05 12:30:15 +02:00
|
|
|
SanitizedChatLiteLLM,
|
|
|
|
|
)
|
2026-04-23 11:40:21 +02:00
|
|
|
|
2026-04-15 23:46:29 -07:00
|
|
|
return SanitizedChatLiteLLM(**litellm_kwargs)
|
2025-11-14 21:53:46 -08:00
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
model = await _get_db_model(session, llm_config_id, search_space)
|
|
|
|
|
if not model or not _has_capability(model, "chat"):
|
2025-10-10 00:50:29 -07:00
|
|
|
logger.error(
|
2026-06-10 21:48:23 +05:30
|
|
|
f"Chat model {llm_config_id} not found in search space {search_space_id}"
|
2025-10-10 00:50:29 -07:00
|
|
|
)
|
2025-06-09 15:50:15 -07:00
|
|
|
return None
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
_, litellm_kwargs = _chat_litellm_from_resolved(
|
|
|
|
|
conn=model.connection,
|
|
|
|
|
model_id=model.model_id,
|
|
|
|
|
disable_streaming=disable_streaming,
|
|
|
|
|
)
|
2026-02-18 15:10:51 +02:00
|
|
|
|
2026-06-05 13:19:24 +02:00
|
|
|
from app.agents.chat.runtime.llm_config import (
|
refactor(agents): introduce chat/ category; dissolve top-level agents/shared
Recursive shared-folder rule: a shared/ must be shared by ALL siblings at its
level. The kernel (context, compaction, retry_after, web_search) was shared by
only 2 of the agents -- anonymous_chat + multi_agent_chat -- never by podcaster
or video_presentation. Those 2 are the "chat" category, so their shared code
belongs in that category's shared/, not the top-level one.
app/agents/anonymous_chat/ -> app/agents/chat/anonymous_chat/
app/agents/multi_agent_chat/ -> app/agents/chat/multi_agent_chat/
app/agents/shared/ -> app/agents/chat/shared/ (anon<->mac kernel)
Top-level app/agents/shared/ is gone: nothing was shared across all three
categories (chat / podcaster / video_presentation).
~289 import sites rewritten (app.agents.{anonymous_chat,multi_agent_chat,shared}
-> app.agents.chat.*); all moves are git renames (history preserved).
app/agents/ now: chat/, podcaster/, video_presentation/, runtime/.
2026-06-05 12:54:02 +02:00
|
|
|
SanitizedChatLiteLLM,
|
|
|
|
|
)
|
2026-04-23 11:40:21 +02:00
|
|
|
|
2026-04-15 23:46:29 -07:00
|
|
|
return SanitizedChatLiteLLM(**litellm_kwargs)
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-06-09 15:50:15 -07:00
|
|
|
except Exception as e:
|
2025-07-24 14:43:48 -07:00
|
|
|
logger.error(
|
2025-11-27 22:45:04 -08:00
|
|
|
f"Error getting LLM instance for search space {search_space_id}, role {role}: {e!s}"
|
2025-07-24 14:43:48 -07:00
|
|
|
)
|
2025-06-09 15:50:15 -07:00
|
|
|
return None
|
|
|
|
|
|
2025-07-24 14:43:48 -07:00
|
|
|
|
2025-12-23 01:16:25 -08:00
|
|
|
async def get_agent_llm(
|
2026-02-18 15:10:51 +02:00
|
|
|
session: AsyncSession, search_space_id: int, disable_streaming: bool = False
|
2026-01-29 15:28:31 -08:00
|
|
|
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
2026-06-10 21:48:23 +05:30
|
|
|
"""Get the search space's chat model instance."""
|
2025-11-27 22:45:04 -08:00
|
|
|
return await get_search_space_llm_instance(
|
2026-02-20 14:28:01 -08:00
|
|
|
session,
|
|
|
|
|
search_space_id,
|
2026-06-04 00:48:53 +05:30
|
|
|
LLMRole.AGENT,
|
2026-02-20 14:28:01 -08:00
|
|
|
disable_streaming=disable_streaming,
|
2025-11-27 22:45:04 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-04-03 17:40:27 +02:00
|
|
|
async def get_vision_llm(
|
|
|
|
|
session: AsyncSession, search_space_id: int
|
|
|
|
|
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
2026-04-07 19:16:51 +02:00
|
|
|
"""Get the search space's vision LLM instance for screenshot analysis.
|
|
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
Resolves from the new connection/model role bindings:
|
2026-06-11 18:22:23 +05:30
|
|
|
- Auto mode (ID 0): unified global/BYOK model candidate selection
|
2026-06-10 21:48:23 +05:30
|
|
|
- Global (negative ID): virtual GLOBAL models from YAML
|
|
|
|
|
- DB (positive ID): Model + Connection tables
|
2026-05-02 14:34:23 -07:00
|
|
|
|
|
|
|
|
Premium global configs are wrapped in :class:`QuotaCheckedVisionLLM`
|
|
|
|
|
so each ``ainvoke`` debits the search-space owner's premium credit
|
|
|
|
|
pool. User-owned BYOK configs and free global configs are returned
|
|
|
|
|
unwrapped — they don't consume premium credit (issue M).
|
2026-04-07 19:16:51 +02:00
|
|
|
"""
|
2026-05-02 14:34:23 -07:00
|
|
|
from app.services.quota_checked_vision_llm import QuotaCheckedVisionLLM
|
2026-06-13 21:59:35 +05:30
|
|
|
|
2026-04-07 19:16:51 +02:00
|
|
|
try:
|
|
|
|
|
result = await session.execute(
|
|
|
|
|
select(SearchSpace).where(SearchSpace.id == search_space_id)
|
|
|
|
|
)
|
|
|
|
|
search_space = result.scalars().first()
|
|
|
|
|
if not search_space:
|
|
|
|
|
logger.error(f"Search space {search_space_id} not found")
|
|
|
|
|
return None
|
|
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
owner_user_id = search_space.user_id
|
|
|
|
|
|
|
|
|
|
# Prefer the selected chat model when it is vision-capable.
|
|
|
|
|
chat_model_id = search_space.chat_model_id
|
|
|
|
|
if chat_model_id and chat_model_id != AUTO_MODE_ID:
|
|
|
|
|
if chat_model_id < 0:
|
|
|
|
|
chat_model = get_global_model(chat_model_id)
|
|
|
|
|
if chat_model and _has_capability(chat_model, "vision"):
|
2026-06-13 21:59:35 +05:30
|
|
|
global_connection = get_global_connection(
|
|
|
|
|
chat_model["connection_id"]
|
|
|
|
|
)
|
2026-06-10 21:48:23 +05:30
|
|
|
if global_connection:
|
|
|
|
|
model_string, litellm_kwargs = _chat_litellm_from_resolved(
|
|
|
|
|
conn=global_connection,
|
|
|
|
|
model_id=chat_model["model_id"],
|
|
|
|
|
)
|
|
|
|
|
from app.agents.chat.runtime.llm_config import (
|
|
|
|
|
SanitizedChatLiteLLM,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return SanitizedChatLiteLLM(**litellm_kwargs)
|
|
|
|
|
else:
|
|
|
|
|
chat_model = await _get_db_model(session, chat_model_id, search_space)
|
|
|
|
|
if chat_model and _has_capability(chat_model, "vision"):
|
|
|
|
|
_, litellm_kwargs = _chat_litellm_from_resolved(
|
|
|
|
|
conn=chat_model.connection,
|
|
|
|
|
model_id=chat_model.model_id,
|
|
|
|
|
)
|
|
|
|
|
from app.agents.chat.runtime.llm_config import (
|
|
|
|
|
SanitizedChatLiteLLM,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return SanitizedChatLiteLLM(**litellm_kwargs)
|
|
|
|
|
|
|
|
|
|
config_id = search_space.vision_model_id
|
2026-04-07 19:16:51 +02:00
|
|
|
if config_id is None:
|
2026-04-08 05:20:03 +05:30
|
|
|
logger.error(f"No vision LLM configured for search space {search_space_id}")
|
2026-04-07 19:16:51 +02:00
|
|
|
return None
|
|
|
|
|
|
2026-06-12 02:17:22 +05:30
|
|
|
if config_id == AUTO_MODE_ID:
|
2026-06-11 18:22:23 +05:30
|
|
|
candidates = await auto_model_candidates(
|
|
|
|
|
session,
|
|
|
|
|
search_space_id=search_space_id,
|
|
|
|
|
user_id=owner_user_id,
|
|
|
|
|
capability="vision",
|
|
|
|
|
)
|
|
|
|
|
if not candidates:
|
|
|
|
|
logger.error("No vision-capable models available for Auto mode")
|
2026-04-07 19:16:51 +02:00
|
|
|
return None
|
2026-06-13 21:59:35 +05:30
|
|
|
config_id = int(
|
|
|
|
|
choose_auto_model_candidate(candidates, search_space_id)["id"]
|
|
|
|
|
)
|
2026-04-07 19:16:51 +02:00
|
|
|
|
|
|
|
|
if config_id < 0:
|
2026-06-10 21:48:23 +05:30
|
|
|
global_model = get_global_model(config_id)
|
|
|
|
|
if not global_model or not _has_capability(global_model, "vision"):
|
|
|
|
|
logger.error(f"Global vision model {config_id} not found")
|
2026-04-07 19:16:51 +02:00
|
|
|
return None
|
|
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
global_connection = get_global_connection(global_model["connection_id"])
|
|
|
|
|
if not global_connection:
|
|
|
|
|
logger.error(
|
|
|
|
|
"Global connection %s not found for model %s",
|
|
|
|
|
global_model["connection_id"],
|
|
|
|
|
config_id,
|
2026-04-07 19:16:51 +02:00
|
|
|
)
|
2026-06-10 21:48:23 +05:30
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
model_string, litellm_kwargs = _chat_litellm_from_resolved(
|
|
|
|
|
conn=global_connection,
|
|
|
|
|
model_id=global_model["model_id"],
|
2026-05-02 19:18:53 -07:00
|
|
|
)
|
2026-04-07 19:16:51 +02:00
|
|
|
|
2026-06-05 13:19:24 +02:00
|
|
|
from app.agents.chat.runtime.llm_config import (
|
refactor(agents): move mac-only modules out of the cross-agent shared kernel
app/agents/shared/ is a sibling of anonymous_chat/podcaster/multi_agent_chat/
video_presentation, so it should only hold code shared across 2+ of those
agents. In practice podcaster and video_presentation import nothing from it,
and anonymous_chat needs only context + compaction + retry_after + web_search.
Everything else was multi_agent_chat-only (the boundary just passes through).
Move the multi_agent_chat-only cluster into multi_agent_chat/shared/ (files
moved verbatim via git rename; ~116 import sites rewritten):
errors, feature_flags, filesystem_selection, path_resolver, prompt_caching,
sandbox, llm_config, mention_resolver
middleware/busy_mutex, middleware/kb_persistence
busy_mutex/llm_config/mention_resolver are boundary-only but import the moved
modules, so they were folded in to avoid a backwards shared -> multi_agent_chat
dependency. main_agent builders now import the impls directly; the shared
middleware barrel keeps only the genuinely-shared compaction + retry_after.
Also delete the dead leftover shared/plugins and shared/skills dirs (live
copies already live under main_agent/).
Remaining in app/agents/shared/: context, system_prompt(+prompts), checkpointer,
middleware/{compaction,retry_after,dedup_tool_calls}, tools/. checkpointer and
system_prompt are boundary-only infra pending a dedicated home decision.
2026-06-05 12:30:15 +02:00
|
|
|
SanitizedChatLiteLLM,
|
|
|
|
|
)
|
2026-04-23 11:40:21 +02:00
|
|
|
|
2026-05-02 14:34:23 -07:00
|
|
|
inner_llm = SanitizedChatLiteLLM(**litellm_kwargs)
|
|
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
billing_tier = str(global_model.get("billing_tier", "free")).lower()
|
2026-05-02 14:34:23 -07:00
|
|
|
if billing_tier == "premium":
|
|
|
|
|
return QuotaCheckedVisionLLM(
|
|
|
|
|
inner_llm,
|
|
|
|
|
user_id=owner_user_id,
|
|
|
|
|
search_space_id=search_space_id,
|
|
|
|
|
billing_tier=billing_tier,
|
|
|
|
|
base_model=model_string,
|
2026-06-10 21:48:23 +05:30
|
|
|
quota_reserve_tokens=global_model.get("catalog", {}).get(
|
|
|
|
|
"quota_reserve_tokens"
|
|
|
|
|
),
|
2026-05-02 14:34:23 -07:00
|
|
|
)
|
|
|
|
|
return inner_llm
|
2026-04-07 19:16:51 +02:00
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
model = await _get_db_model(session, config_id, search_space)
|
|
|
|
|
if not model or not _has_capability(model, "vision"):
|
2026-04-07 19:16:51 +02:00
|
|
|
logger.error(
|
2026-06-10 21:48:23 +05:30
|
|
|
f"Vision model {config_id} not found in search space {search_space_id}"
|
2026-04-07 19:16:51 +02:00
|
|
|
)
|
|
|
|
|
return None
|
|
|
|
|
|
2026-06-10 21:48:23 +05:30
|
|
|
_, litellm_kwargs = _chat_litellm_from_resolved(
|
|
|
|
|
conn=model.connection,
|
|
|
|
|
model_id=model.model_id,
|
2026-05-02 19:18:53 -07:00
|
|
|
)
|
2026-04-07 19:16:51 +02:00
|
|
|
|
2026-06-05 13:19:24 +02:00
|
|
|
from app.agents.chat.runtime.llm_config import (
|
refactor(agents): introduce chat/ category; dissolve top-level agents/shared
Recursive shared-folder rule: a shared/ must be shared by ALL siblings at its
level. The kernel (context, compaction, retry_after, web_search) was shared by
only 2 of the agents -- anonymous_chat + multi_agent_chat -- never by podcaster
or video_presentation. Those 2 are the "chat" category, so their shared code
belongs in that category's shared/, not the top-level one.
app/agents/anonymous_chat/ -> app/agents/chat/anonymous_chat/
app/agents/multi_agent_chat/ -> app/agents/chat/multi_agent_chat/
app/agents/shared/ -> app/agents/chat/shared/ (anon<->mac kernel)
Top-level app/agents/shared/ is gone: nothing was shared across all three
categories (chat / podcaster / video_presentation).
~289 import sites rewritten (app.agents.{anonymous_chat,multi_agent_chat,shared}
-> app.agents.chat.*); all moves are git renames (history preserved).
app/agents/ now: chat/, podcaster/, video_presentation/, runtime/.
2026-06-05 12:54:02 +02:00
|
|
|
SanitizedChatLiteLLM,
|
|
|
|
|
)
|
2026-04-23 11:40:21 +02:00
|
|
|
|
2026-04-15 23:46:29 -07:00
|
|
|
return SanitizedChatLiteLLM(**litellm_kwargs)
|
2026-04-07 19:16:51 +02:00
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(
|
|
|
|
|
f"Error getting vision LLM for search space {search_space_id}: {e!s}"
|
|
|
|
|
)
|
|
|
|
|
return None
|
2026-04-03 17:40:27 +02:00
|
|
|
|
|
|
|
|
|
2026-05-20 11:42:52 +02:00
|
|
|
def get_planner_llm() -> ChatLiteLLM | None:
|
|
|
|
|
"""Return a planner LLM instance from the first global config marked
|
|
|
|
|
``is_planner: true``, or ``None`` if no planner config is defined.
|
|
|
|
|
|
|
|
|
|
The planner role handles short, structured internal tasks (KB search
|
|
|
|
|
planning: query rewriting, date extraction, recency classification).
|
|
|
|
|
These tasks are well-served by small/fast models (e.g. gpt-4o-mini,
|
|
|
|
|
Claude Haiku, Azure gpt-5.x-nano) — using the user's chat LLM for them
|
|
|
|
|
is unnecessarily expensive and slow.
|
|
|
|
|
|
|
|
|
|
This helper reads from ``config.GLOBAL_LLM_CONFIGS`` (loaded at import
|
|
|
|
|
time from ``global_llm_config.yaml``) so it has no DB cost and can be
|
|
|
|
|
called synchronously from middleware/factory code. It returns the same
|
|
|
|
|
instance shape as the global path of ``get_search_space_llm_instance``.
|
|
|
|
|
|
|
|
|
|
Callers MUST fall back to their chat LLM when this returns ``None`` so
|
|
|
|
|
deployments without a planner config keep working unchanged.
|
|
|
|
|
"""
|
2026-06-05 13:19:24 +02:00
|
|
|
from app.agents.chat.runtime.llm_config import (
|
refactor(agents): move mac-only modules out of the cross-agent shared kernel
app/agents/shared/ is a sibling of anonymous_chat/podcaster/multi_agent_chat/
video_presentation, so it should only hold code shared across 2+ of those
agents. In practice podcaster and video_presentation import nothing from it,
and anonymous_chat needs only context + compaction + retry_after + web_search.
Everything else was multi_agent_chat-only (the boundary just passes through).
Move the multi_agent_chat-only cluster into multi_agent_chat/shared/ (files
moved verbatim via git rename; ~116 import sites rewritten):
errors, feature_flags, filesystem_selection, path_resolver, prompt_caching,
sandbox, llm_config, mention_resolver
middleware/busy_mutex, middleware/kb_persistence
busy_mutex/llm_config/mention_resolver are boundary-only but import the moved
modules, so they were folded in to avoid a backwards shared -> multi_agent_chat
dependency. main_agent builders now import the impls directly; the shared
middleware barrel keeps only the genuinely-shared compaction + retry_after.
Also delete the dead leftover shared/plugins and shared/skills dirs (live
copies already live under main_agent/).
Remaining in app/agents/shared/: context, system_prompt(+prompts), checkpointer,
middleware/{compaction,retry_after,dedup_tool_calls}, tools/. checkpointer and
system_prompt are boundary-only infra pending a dedicated home decision.
2026-06-05 12:30:15 +02:00
|
|
|
create_chat_litellm_from_config,
|
|
|
|
|
)
|
2026-05-20 11:42:52 +02:00
|
|
|
|
|
|
|
|
planner_cfg = next(
|
2026-05-27 14:58:10 -07:00
|
|
|
(cfg for cfg in config.GLOBAL_LLM_CONFIGS if cfg.get("is_planner") is True),
|
2026-05-20 11:42:52 +02:00
|
|
|
None,
|
|
|
|
|
)
|
|
|
|
|
if not planner_cfg:
|
|
|
|
|
return None
|
|
|
|
|
return create_chat_litellm_from_config(planner_cfg)
|