feat: added improved llm model selector

2026-06-22 21:28:12 +02:00 · 2026-02-20 14:28:01 -08:00 · 2026-02-20 14:28:01 -08:00 · a3cd598e01
commit a3cd598e01
parent dc19b43967
23 changed files with 14733 additions and 126 deletions
--- a/surfsense_backend/app/agents/new_chat/tools/linear/create_issue.py
+++ b/surfsense_backend/app/agents/new_chat/tools/linear/create_issue.py
@ -230,10 +230,12 @@ def create_create_linear_issue_tool(
                raise

            logger.error(f"Error creating Linear issue: {e}", exc_info=True)
-            if isinstance(e, (ValueError, LinearAPIError)):
+            if isinstance(e, ValueError | LinearAPIError):
                message = str(e)
            else:
-                message = "Something went wrong while creating the issue. Please try again."
+                message = (
+                    "Something went wrong while creating the issue. Please try again."
+                )
            return {"status": "error", "message": message}

    return create_linear_issue
--- a/surfsense_backend/app/agents/new_chat/tools/linear/delete_issue.py
+++ b/surfsense_backend/app/agents/new_chat/tools/linear/delete_issue.py
@ -238,7 +238,9 @@ def create_delete_linear_issue_tool(
            if result.get("status") == "success":
                result["deleted_from_kb"] = deleted_from_kb
                if issue_identifier:
-                    result["message"] = f"Issue {issue_identifier} archived successfully."
+                    result["message"] = (
+                        f"Issue {issue_identifier} archived successfully."
+                    )
                if deleted_from_kb:
                    result["message"] = (
                        f"{result.get('message', '')} Also removed from the knowledge base."
@ -253,10 +255,12 @@ def create_delete_linear_issue_tool(
                raise

            logger.error(f"Error deleting Linear issue: {e}", exc_info=True)
-            if isinstance(e, (ValueError, LinearAPIError)):
+            if isinstance(e, ValueError | LinearAPIError):
                message = str(e)
            else:
-                message = "Something went wrong while deleting the issue. Please try again."
+                message = (
+                    "Something went wrong while deleting the issue. Please try again."
+                )
            return {"status": "error", "message": message}

    return delete_linear_issue
--- a/surfsense_backend/app/agents/new_chat/tools/linear/update_issue.py
+++ b/surfsense_backend/app/agents/new_chat/tools/linear/update_issue.py
@ -290,10 +290,12 @@ def create_update_linear_issue_tool(
                raise

            logger.error(f"Error updating Linear issue: {e}", exc_info=True)
-            if isinstance(e, (ValueError, LinearAPIError)):
+            if isinstance(e, ValueError | LinearAPIError):
                message = str(e)
            else:
-                message = "Something went wrong while updating the issue. Please try again."
+                message = (
+                    "Something went wrong while updating the issue. Please try again."
+                )
            return {"status": "error", "message": message}

    return update_linear_issue
--- a/surfsense_backend/app/agents/new_chat/tools/notion/create_page.py
+++ b/surfsense_backend/app/agents/new_chat/tools/notion/create_page.py
@ -224,10 +224,12 @@ def create_create_notion_page_tool(
                raise

            logger.error(f"Error creating Notion page: {e}", exc_info=True)
-            if isinstance(e, (ValueError, NotionAPIError)):
+            if isinstance(e, ValueError | NotionAPIError):
                message = str(e)
            else:
-                message = "Something went wrong while creating the page. Please try again."
+                message = (
+                    "Something went wrong while creating the page. Please try again."
+                )
            return {"status": "error", "message": message}

    return create_notion_page
--- a/surfsense_backend/app/agents/new_chat/tools/notion/delete_page.py
+++ b/surfsense_backend/app/agents/new_chat/tools/notion/delete_page.py
@ -262,10 +262,12 @@ def create_delete_notion_page_tool(
                raise

            logger.error(f"Error deleting Notion page: {e}", exc_info=True)
-            if isinstance(e, (ValueError, NotionAPIError)):
+            if isinstance(e, ValueError | NotionAPIError):
                message = str(e)
            else:
-                message = "Something went wrong while deleting the page. Please try again."
+                message = (
+                    "Something went wrong while deleting the page. Please try again."
+                )
            return {"status": "error", "message": message}

    return delete_notion_page
--- a/surfsense_backend/app/agents/new_chat/tools/notion/update_page.py
+++ b/surfsense_backend/app/agents/new_chat/tools/notion/update_page.py
@ -261,10 +261,12 @@ def create_update_notion_page_tool(
                raise

            logger.error(f"Error updating Notion page: {e}", exc_info=True)
-            if isinstance(e, (ValueError, NotionAPIError)):
+            if isinstance(e, ValueError | NotionAPIError):
                message = str(e)
            else:
-                message = "Something went wrong while updating the page. Please try again."
+                message = (
+                    "Something went wrong while updating the page. Please try again."
+                )
            return {"status": "error", "message": message}

    return update_notion_page
--- a/surfsense_backend/app/config/model_list_fallback.json
+++ b/surfsense_backend/app/config/model_list_fallback.json
--- a/surfsense_backend/app/connectors/linear_connector.py
+++ b/surfsense_backend/app/connectors/linear_connector.py
@ -30,6 +30,7 @@ class LinearAPIError(Exception):
    without any additional prefix or wrapping.
    """

+
 ORGANIZATION_QUERY = """
 query {
    organization {
@ -267,7 +268,10 @@ class LinearConnector:
            if errors:
                ext = errors[0].get("extensions", {})
                code = ext.get("code", "")
-                if code == "INPUT_ERROR" and "too complex" in errors[0].get("message", "").lower():
+                if (
+                    code == "INPUT_ERROR"
+                    and "too complex" in errors[0].get("message", "").lower()
+                ):
                    friendly = (
                        "Linear rejected the request because the workspace is too large "
                        "to fetch in one query. Please try again — if the problem persists, "
--- a/surfsense_backend/app/routes/init.py
+++ b/surfsense_backend/app/routes/init.py
@ -26,6 +26,7 @@ from .jira_add_connector_route import router as jira_add_connector_router
 from .linear_add_connector_route import router as linear_add_connector_router
 from .logs_routes import router as logs_router
 from .luma_add_connector_route import router as luma_add_connector_router
+from .model_list_routes import router as model_list_router
 from .new_chat_routes import router as new_chat_router
 from .new_llm_config_routes import router as new_llm_config_router
 from .notes_routes import router as notes_router
@ -68,6 +69,7 @@ router.include_router(jira_add_connector_router)
 router.include_router(confluence_add_connector_router)
 router.include_router(clickup_add_connector_router)
 router.include_router(new_llm_config_router)  # LLM configs with prompt configuration
+router.include_router(model_list_router)  # Dynamic LLM model catalogue from OpenRouter
 router.include_router(logs_router)
 router.include_router(circleback_webhook_router)  # Circleback meeting webhooks
 router.include_router(surfsense_docs_router)  # Surfsense documentation for citations
--- a/surfsense_backend/app/routes/model_list_routes.py
+++ b/surfsense_backend/app/routes/model_list_routes.py
@ -0,0 +1,44 @@
+"""
+API route for fetching the available LLM models catalogue.
+
+Serves a dynamically-updated list sourced from the OpenRouter public API,
+with a local JSON fallback when the API is unreachable.
+"""
+
+import logging
+
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
+
+from app.db import User
+from app.services.model_list_service import get_model_list
+from app.users import current_active_user
+
+router = APIRouter()
+logger = logging.getLogger(__name__)
+
+
+class ModelListItem(BaseModel):
+    value: str
+    label: str
+    provider: str
+    context_window: str | None = None
+
+
+@router.get("/models", response_model=list[ModelListItem])
+async def list_available_models(
+    user: User = Depends(current_active_user),
+):
+    """
+    Return all available LLM models grouped by provider.
+
+    The list is sourced from the OpenRouter public API and cached for 1 hour.
+    If the API is unreachable, a local fallback file is used instead.
+    """
+    try:
+        return await get_model_list()
+    except Exception as e:
+        logger.exception("Failed to fetch model list")
+        raise HTTPException(
+            status_code=500, detail=f"Failed to fetch model list: {e!s}"
+        ) from e
--- a/surfsense_backend/app/services/linear/tool_metadata_service.py
+++ b/surfsense_backend/app/services/linear/tool_metadata_service.py
@ -110,13 +110,15 @@ class LinearToolMetadataService:
                teams = await self._fetch_teams_context(linear_client)
            except Exception as e:
                return {"error": f"Failed to fetch Linear context: {e!s}"}
-            workspaces.append({
-                "id": workspace.id,
-                "name": workspace.name,
-                "organization_name": workspace.organization_name,
-                "teams": teams,
-                "priorities": priorities,
-            })
+            workspaces.append(
+                {
+                    "id": workspace.id,
+                    "name": workspace.name,
+                    "organization_name": workspace.organization_name,
+                    "teams": teams,
+                    "priorities": priorities,
+                }
+            )

        return {"workspaces": workspaces}

@ -307,16 +309,10 @@ class LinearToolMetadataService:
                    Document.document_type == DocumentType.LINEAR_CONNECTOR,
                    SearchSourceConnector.user_id == user_id,
                    or_(
-                        func.lower(
-                            Document.document_metadata.op("->>")(
-                                "issue_title"
-                            )
-                        )
+                        func.lower(Document.document_metadata.op("->>")("issue_title"))
                        == ref_lower,
                        func.lower(
-                            Document.document_metadata.op("->>")(
-                                "issue_identifier"
-                            )
+                            Document.document_metadata.op("->>")("issue_identifier")
                        )
                        == ref_lower,
                        func.lower(Document.title) == ref_lower,
--- a/surfsense_backend/app/services/llm_service.py
+++ b/surfsense_backend/app/services/llm_service.py
@ -162,7 +162,10 @@ async def validate_llm_config(


 async def get_search_space_llm_instance(
-    session: AsyncSession, search_space_id: int, role: str, disable_streaming: bool = False
+    session: AsyncSession,
+    search_space_id: int,
+    role: str,
+    disable_streaming: bool = False,
 ) -> ChatLiteLLM | ChatLiteLLMRouter | None:
    """
    Get a ChatLiteLLM instance for a specific search space and role.
@ -384,16 +387,24 @@ async def get_document_summary_llm(
 ) -> ChatLiteLLM | ChatLiteLLMRouter | None:
    """Get the search space's document summary LLM instance."""
    return await get_search_space_llm_instance(
-        session, search_space_id, LLMRole.DOCUMENT_SUMMARY, disable_streaming=disable_streaming
+        session,
+        search_space_id,
+        LLMRole.DOCUMENT_SUMMARY,
+        disable_streaming=disable_streaming,
    )


 # Backward-compatible alias (LLM preferences are now per-search-space, not per-user)
 async def get_user_long_context_llm(
-    session: AsyncSession, user_id: str, search_space_id: int, disable_streaming: bool = False
+    session: AsyncSession,
+    user_id: str,
+    search_space_id: int,
+    disable_streaming: bool = False,
 ) -> ChatLiteLLM | ChatLiteLLMRouter | None:
    """
    Deprecated: Use get_document_summary_llm instead.
    The user_id parameter is ignored as LLM preferences are now per-search-space.
    """
-    return await get_document_summary_llm(session, search_space_id, disable_streaming=disable_streaming)
+    return await get_document_summary_llm(
+        session, search_space_id, disable_streaming=disable_streaming
+    )
--- a/surfsense_backend/app/services/model_list_service.py
+++ b/surfsense_backend/app/services/model_list_service.py
@ -0,0 +1,167 @@
+"""
+Service for fetching and caching the available LLM model list.
+
+Uses the OpenRouter public API as the primary source, with a local
+fallback JSON file when the API is unreachable.
+"""
+
+import json
+import logging
+import time
+from pathlib import Path
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
+FALLBACK_FILE = Path(__file__).parent.parent / "config" / "model_list_fallback.json"
+CACHE_TTL_SECONDS = 86400  # 24 hours
+
+# In-memory cache
+_cache: list[dict] | None = None
+_cache_timestamp: float = 0
+
+# Maps OpenRouter provider slug → our LiteLLMProvider enum value.
+# Only providers where the model-name part (after the slash) can be
+# used directly with the native provider's litellm prefix are listed.
+#
+# Excluded slugs and why:
+#   "deepseek"  - Native API only accepts "deepseek-chat" / "deepseek-reasoner";
+#                 OpenRouter uses different names (deepseek-v3.2, deepseek-r1, ...).
+#   "qwen"      - Most OpenRouter Qwen entries are open-source models (qwen3-32b, ...)
+#                 that are NOT available on the Dashscope API.
+#   "ai21"      - OpenRouter name "jamba-large-1.7" != AI21 API name "jamba-1.5-large".
+#   "microsoft" - OpenRouter "microsoft/" = open-source Phi/WizardLM, NOT Azure
+#                 OpenAI deployments (which require deployment names, not model ids).
+OPENROUTER_SLUG_TO_PROVIDER: dict[str, str] = {
+    "openai": "OPENAI",
+    "anthropic": "ANTHROPIC",
+    "google": "GOOGLE",
+    "mistralai": "MISTRAL",
+    "cohere": "COHERE",
+    "x-ai": "XAI",
+    "perplexity": "PERPLEXITY",
+}
+
+
+def _format_context_length(length: int | None) -> str | None:
+    """Convert a raw token count to a human-readable string (e.g. 128K, 1M)."""
+    if not length:
+        return None
+    if length >= 1_000_000:
+        return f"{length / 1_000_000:g}M"
+    if length >= 1_000:
+        return f"{length / 1_000:g}K"
+    return str(length)
+
+
+async def _fetch_from_openrouter() -> list[dict] | None:
+    """Try fetching the model catalogue from the OpenRouter public API."""
+    try:
+        async with httpx.AsyncClient(timeout=15) as client:
+            response = await client.get(OPENROUTER_API_URL)
+            response.raise_for_status()
+            data = response.json()
+            return data.get("data", [])
+    except Exception as e:
+        logger.warning("Failed to fetch from OpenRouter API: %s", e)
+        return None
+
+
+def _load_fallback() -> list[dict]:
+    """Load the local fallback model list."""
+    try:
+        with open(FALLBACK_FILE, encoding="utf-8") as f:
+            data = json.load(f)
+            return data.get("data", [])
+    except Exception as e:
+        logger.error("Failed to load fallback model list: %s", e)
+        return []
+
+
+def _is_text_output_model(model: dict) -> bool:
+    """Return True if the model's output is text-only (no audio/image generation)."""
+    output_mods = model.get("architecture", {}).get("output_modalities", [])
+    return output_mods == ["text"]
+
+
+def _process_models(raw_models: list[dict]) -> list[dict]:
+    """
+    Transform raw OpenRouter model entries into a flat list of
+    {value, label, provider, context_window} dicts.
+
+    Only text-output models are included (audio/image generators are skipped).
+
+    Each OpenRouter model is emitted once for OPENROUTER (full id) and,
+    when the slug maps to a native provider, once more with just the
+    model-name portion.
+    """
+    processed: list[dict] = []
+
+    for model in raw_models:
+        model_id: str = model.get("id", "")
+        name: str = model.get("name", "")
+        context_length = model.get("context_length")
+
+        if "/" not in model_id:
+            continue
+
+        if not _is_text_output_model(model):
+            continue
+
+        provider_slug, model_name = model_id.split("/", 1)
+        context_window = _format_context_length(context_length)
+
+        # 1) Always emit for OPENROUTER (value = full OpenRouter id)
+        processed.append(
+            {
+                "value": model_id,
+                "label": name,
+                "provider": "OPENROUTER",
+                "context_window": context_window,
+            }
+        )
+
+        # 2) Emit for the native provider when we have a mapping
+        native_provider = OPENROUTER_SLUG_TO_PROVIDER.get(provider_slug)
+        if native_provider:
+            # Google's Gemini API only serves gemini-* models.
+            # Open-source models like gemma-* are NOT available through it.
+            if native_provider == "GOOGLE" and not model_name.startswith("gemini-"):
+                continue
+
+            processed.append(
+                {
+                    "value": model_name,
+                    "label": name,
+                    "provider": native_provider,
+                    "context_window": context_window,
+                }
+            )
+
+    return processed
+
+
+async def get_model_list() -> list[dict]:
+    """
+    Return the processed model list, using in-memory cache when fresh.
+    Tries the OpenRouter API first, falls back to the local JSON file.
+    """
+    global _cache, _cache_timestamp
+
+    if _cache is not None and (time.time() - _cache_timestamp) < CACHE_TTL_SECONDS:
+        return _cache
+
+    raw_models = await _fetch_from_openrouter()
+
+    if raw_models is None:
+        logger.info("Using fallback model list")
+        raw_models = _load_fallback()
+
+    processed = _process_models(raw_models)
+
+    _cache = processed
+    _cache_timestamp = time.time()
+
+    return processed
--- a/surfsense_backend/app/services/notion/kb_sync_service.py
+++ b/surfsense_backend/app/services/notion/kb_sync_service.py
@ -57,7 +57,9 @@ class NotionKBSyncService:
            )

            logger.debug(f"Fetching page content from Notion for page {page_id}")
-            blocks, _ = await notion_connector.get_page_content(page_id, page_title=None)
+            blocks, _ = await notion_connector.get_page_content(
+                page_id, page_title=None
+            )

            from app.utils.notion_utils import extract_all_block_ids, process_blocks

@ -100,11 +102,16 @@ class NotionKBSyncService:
                full_content = fetched_content
                content_verified = False

-            logger.debug(f"Final content length: {len(full_content)} chars, verified={content_verified}")
+            logger.debug(
+                f"Final content length: {len(full_content)} chars, verified={content_verified}"
+            )

            logger.debug("Generating summary and embeddings")
            user_llm = await get_user_long_context_llm(
-                self.db_session, user_id, search_space_id, disable_streaming=True # disable streaming to avoid leaking into the chat
+                self.db_session,
+                user_id,
+                search_space_id,
+                disable_streaming=True,  # disable streaming to avoid leaking into the chat
            )

            if user_llm: