SurfSense/surfsense_backend/app/services/openrouter_integration_service.py

"""
OpenRouter Integration Service

Dynamically fetches all available models from the OpenRouter public API
and generates virtual global LLM config entries. These entries are injected
into config.GLOBAL_LLM_CONFIGS so they appear alongside static YAML configs
in the model selector.

All actual LLM calls go through LiteLLM with the ``openrouter/`` prefix --
this service only manages the catalogue, not the inference path.
"""

import asyncio
import logging
import threading
from typing import Any

import httpx

logger = logging.getLogger(__name__)

OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"

# Sentinel value stored on each generated config so we can distinguish
# dynamic OpenRouter entries from hand-written YAML entries during refresh.
_OPENROUTER_DYNAMIC_MARKER = "__openrouter_dynamic__"


def _is_text_output_model(model: dict) -> bool:
    """Return True if the model produces text output only (skip image/audio generators)."""
    output_mods = model.get("architecture", {}).get("output_modalities", [])
    return output_mods == ["text"]


def _supports_tool_calling(model: dict) -> bool:
    """Return True if the model supports function/tool calling."""
    supported = model.get("supported_parameters") or []
    return "tools" in supported


MIN_CONTEXT_LENGTH = 100_000

# Provider slugs whose backend is fundamentally incompatible with our agent's
# tool-call message flow (e.g. Amazon Bedrock requires toolConfig alongside
# tool history which OpenRouter doesn't relay).
_EXCLUDED_PROVIDER_SLUGS = {"amazon"}

_EXCLUDED_MODEL_IDS: set[str] = {
    # Deprecated / removed upstream
    "openai/gpt-4-1106-preview",
    "openai/gpt-4-turbo-preview",
    # Permanently no-capacity variant
    "openai/gpt-4o:extended",
    # Non-serverless model that requires a dedicated endpoint
    "arcee-ai/virtuoso-large",
    # Deep-research models reject standard params (temperature, etc.)
    "openai/o3-deep-research",
    "openai/o4-mini-deep-research",
}

_EXCLUDED_MODEL_SUFFIXES: tuple[str, ...] = ("-deep-research",)


def _has_sufficient_context(model: dict) -> bool:
    """Return True if the model's context window is at least MIN_CONTEXT_LENGTH."""
    ctx = model.get("context_length") or 0
    return ctx >= MIN_CONTEXT_LENGTH


def _is_compatible_provider(model: dict) -> bool:
    """Return False for models from providers known to be incompatible."""
    model_id = model.get("id", "")
    slug = model_id.split("/", 1)[0] if "/" in model_id else ""
    return slug not in _EXCLUDED_PROVIDER_SLUGS


def _is_allowed_model(model: dict) -> bool:
    """Return False for specific model IDs known to be broken or incompatible."""
    model_id = model.get("id", "")
    if model_id in _EXCLUDED_MODEL_IDS:
        return False
    base_id = model_id.split(":")[0]
    return not base_id.endswith(_EXCLUDED_MODEL_SUFFIXES)


def _fetch_models_sync() -> list[dict] | None:
    """Synchronous fetch for use during startup (before the event loop is running)."""
    try:
        with httpx.Client(timeout=20) as client:
            response = client.get(OPENROUTER_API_URL)
            response.raise_for_status()
            data = response.json()
            return data.get("data", [])
    except Exception as e:
        logger.warning("Failed to fetch OpenRouter models (sync): %s", e)
        return None


async def _fetch_models_async() -> list[dict] | None:
    """Async fetch for background refresh."""
    try:
        async with httpx.AsyncClient(timeout=20) as client:
            response = await client.get(OPENROUTER_API_URL)
            response.raise_for_status()
            data = response.json()
            return data.get("data", [])
    except Exception as e:
        logger.warning("Failed to fetch OpenRouter models (async): %s", e)
        return None


def _generate_configs(
    raw_models: list[dict],
    settings: dict[str, Any],
) -> list[dict]:
    """
    Convert raw OpenRouter model entries into global LLM config dicts.

    Models are sorted by ID for deterministic, stable ID assignment across
    restarts and refreshes.
    """
    id_offset: int = settings.get("id_offset", -10000)
    api_key: str = settings.get("api_key", "")
    billing_tier: str = settings.get("billing_tier", "premium")
    anonymous_enabled: bool = settings.get("anonymous_enabled", False)
    seo_enabled: bool = settings.get("seo_enabled", False)
    quota_reserve_tokens: int = settings.get("quota_reserve_tokens", 4000)
    rpm: int = settings.get("rpm", 200)
    tpm: int = settings.get("tpm", 1000000)
    litellm_params: dict = settings.get("litellm_params") or {}
    system_instructions: str = settings.get("system_instructions", "")
    use_default: bool = settings.get("use_default_system_instructions", True)
    citations_enabled: bool = settings.get("citations_enabled", True)

    text_models = [
        m
        for m in raw_models
        if _is_text_output_model(m)
        and _supports_tool_calling(m)
        and _has_sufficient_context(m)
        and _is_compatible_provider(m)
        and _is_allowed_model(m)
        and "/" in m.get("id", "")
    ]
    text_models.sort(key=lambda m: m["id"])

    configs: list[dict] = []
    for idx, model in enumerate(text_models):
        model_id: str = model["id"]
        name: str = model.get("name", model_id)

        cfg: dict[str, Any] = {
            "id": id_offset - idx,
            "name": name,
            "description": f"{name} via OpenRouter",
            "billing_tier": billing_tier,
            "anonymous_enabled": anonymous_enabled,
            "seo_enabled": seo_enabled,
            "seo_slug": None,
            "quota_reserve_tokens": quota_reserve_tokens,
            "provider": "OPENROUTER",
            "model_name": model_id,
            "api_key": api_key,
            "api_base": "",
            "rpm": rpm,
            "tpm": tpm,
            "litellm_params": dict(litellm_params),
            "system_instructions": system_instructions,
            "use_default_system_instructions": use_default,
            "citations_enabled": citations_enabled,
            _OPENROUTER_DYNAMIC_MARKER: True,
        }
        configs.append(cfg)

    return configs


class OpenRouterIntegrationService:
    """Singleton that manages the dynamic OpenRouter model catalogue."""

    _instance: "OpenRouterIntegrationService | None" = None
    _lock = threading.Lock()

    def __init__(self) -> None:
        self._settings: dict[str, Any] = {}
        self._configs: list[dict] = []
        self._configs_by_id: dict[int, dict] = {}
        self._initialized = False
        self._refresh_task: asyncio.Task | None = None

    @classmethod
    def get_instance(cls) -> "OpenRouterIntegrationService":
        if cls._instance is None:
            with cls._lock:
                if cls._instance is None:
                    cls._instance = cls()
        return cls._instance

    @classmethod
    def is_initialized(cls) -> bool:
        return cls._instance is not None and cls._instance._initialized

    # ------------------------------------------------------------------
    # Initialisation (called at startup, before event loop for Celery)
    # ------------------------------------------------------------------

    def initialize(self, settings: dict[str, Any]) -> list[dict]:
        """
        Fetch models synchronously and generate configs.
        Returns the generated configs list.
        """
        self._settings = settings
        raw_models = _fetch_models_sync()
        if raw_models is None:
            logger.warning("OpenRouter integration: could not fetch models at startup")
            self._initialized = True
            return []

        self._configs = _generate_configs(raw_models, settings)
        self._configs_by_id = {c["id"]: c for c in self._configs}
        self._initialized = True

        logger.info(
            "OpenRouter integration: loaded %d models (IDs %d to %d)",
            len(self._configs),
            self._configs[0]["id"] if self._configs else 0,
            self._configs[-1]["id"] if self._configs else 0,
        )
        return self._configs

    # ------------------------------------------------------------------
    # Background refresh
    # ------------------------------------------------------------------

    async def refresh(self) -> None:
        """Re-fetch from OpenRouter and atomically swap configs in GLOBAL_LLM_CONFIGS."""
        raw_models = await _fetch_models_async()
        if raw_models is None:
            logger.warning("OpenRouter refresh: fetch failed, keeping stale list")
            return

        new_configs = _generate_configs(raw_models, self._settings)
        new_by_id = {c["id"]: c for c in new_configs}

        from app.config import config as app_config

        static_configs = [
            c
            for c in app_config.GLOBAL_LLM_CONFIGS
            if not c.get(_OPENROUTER_DYNAMIC_MARKER)
        ]
        app_config.GLOBAL_LLM_CONFIGS = static_configs + new_configs

        self._configs = new_configs
        self._configs_by_id = new_by_id

        logger.info("OpenRouter refresh: updated to %d models", len(new_configs))

    async def _refresh_loop(self, interval_hours: float) -> None:
        interval_sec = interval_hours * 3600
        while True:
            await asyncio.sleep(interval_sec)
            try:
                await self.refresh()
            except Exception:
                logger.exception("OpenRouter background refresh failed")

    def start_background_refresh(self, interval_hours: float) -> None:
        if interval_hours <= 0:
            return
        loop = asyncio.get_event_loop()
        self._refresh_task = loop.create_task(self._refresh_loop(interval_hours))
        logger.info(
            "OpenRouter background refresh started (every %.1fh)", interval_hours
        )

    def stop_background_refresh(self) -> None:
        if self._refresh_task is not None and not self._refresh_task.done():
            self._refresh_task.cancel()
            self._refresh_task = None
            logger.info("OpenRouter background refresh stopped")

    # ------------------------------------------------------------------
    # Accessors
    # ------------------------------------------------------------------

    def get_configs(self) -> list[dict]:
        return self._configs

    def get_config_by_id(self, config_id: int) -> dict | None:
        return self._configs_by_id.get(config_id)