Merge pull request #2 from nomyo-ai/dev-v.0.2.x

Add files via upload
2025-09-01 13:59:18 +02:00 · 2025-09-01 13:59:18 +02:00 · 40ef8ec0c2
commit 40ef8ec0c2
parent 038e1b0f9a caca498f49
2 changed files with 46 additions and 12 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,3 @@
 aiocache==0.12.3
 annotated-types==0.7.0
 anyio==4.10.0
 certifi==2025.8.3
--- a/router.py
+++ b/router.py
@ -6,7 +6,7 @@ version: 0.1
 license: AGPL
 """
 # -------------------------------------------------------------
-import json, random, asyncio, yaml, httpx, ollama, openai
+import json, time, asyncio, yaml, httpx, ollama, openai
 from pathlib import Path
 from typing import Dict, Set, List
 from fastapi import FastAPI, Request, HTTPException
@ -15,7 +15,15 @@ from starlette.responses import StreamingResponse, JSONResponse, Response, HTMLR
 from pydantic import Field
 from pydantic_settings import BaseSettings
 from collections import defaultdict
-from aiocache import cached, Cache
+
 # ------------------------------------------------------------------
 # In‑memory caches
 # ------------------------------------------------------------------
 # Successful results are cached for 300 s
 _models_cache: dict[str, tuple[Set[str], float]] = {}
 # Transient errors are cached for 30 s – the key stays until the
 # timeout expires, after which the endpoint will be queried again.
 _error_cache: dict[str, float] = {}
 # -------------------------------------------------------------
 # 1. Configuration loader
@ -61,6 +69,9 @@ usage_lock = asyncio.Lock()  # protects access to usage_counts
 # -------------------------------------------------------------
 # 4. Helperfunctions 
 # -------------------------------------------------------------
 def _is_fresh(cached_at: float, ttl: int) -> bool:
    return (time.time() - cached_at) < ttl
 def get_httpx_client(endpoint: str) -> httpx.AsyncClient:
    """
    Use persistent connections to request endpoint info for reliable results
@ -75,7 +86,7 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient:
        )
    )
-@cached(cache=Cache.MEMORY, ttl=300)
+#@cached(cache=Cache.MEMORY, ttl=300)
 async def fetch_available_models(endpoint: str) -> Set[str]:
    """
    Query <endpoint>/api/tags and return a set of all model names that the
@ -86,6 +97,22 @@ async def fetch_available_models(endpoint: str) -> Set[str]:
    If the request fails (e.g. timeout, 5xx, or malformed response), an empty
    set is returned.
    """
    if endpoint in _models_cache:
        models, cached_at = _models_cache[endpoint]
        if _is_fresh(cached_at, 300):
            return models
        else:
            # stale entry – drop it
            del _models_cache[endpoint]
    if endpoint in _error_cache:
        if _is_fresh(_error_cache[endpoint], 1):
            # Still within the short error TTL – pretend nothing is available
            return set()
        else:
            # Error expired – remove it
            del _error_cache[endpoint]
    client = get_httpx_client(endpoint)
    try:
        if "/v1" in endpoint:
@ -100,10 +127,18 @@ async def fetch_available_models(endpoint: str) -> Set[str]:
            models = {m.get("id") for m in data.get("data", []) if m.get("name")}
        else:
            models = {m.get("name") for m in data.get("models", []) if m.get("name")}
-        return models
+
        if models:
            _models_cache[endpoint] = (models, time.time())
            return models
        else:
            # Empty list – treat as “no models”, but still cache for 300 s
            _models_cache[endpoint] = (models, time.time())
            return models
    except Exception as e:
        # Treat any error as if the endpoint offers no models
-        print(e)
+        print(f"[fetch_available_models] {endpoint} error: {e}")
        _error_cache[endpoint] = time.time()
        return set()
@ -131,13 +166,13 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str) -> List
    Query <endpoint>/<route> to fetch <detail> and return a List of dicts with details
    for the corresponding Ollama endpoint. If the request fails we respond with "N/A" for detail.
    """
    client = get_httpx_client(endpoint)
    try:
-        async with httpx.AsyncClient(timeout=1.0) as client:
+        resp = await client.get(f"{route}")
-            resp = await client.get(f"{endpoint}{route}")
+        resp.raise_for_status()
-            resp.raise_for_status()
+        data = resp.json()
-            data = resp.json()
+        detail = data.get(detail, [])
-            detail = data.get(detail, [])
+        return detail
            return detail
    except Exception as e:
        # If anything goes wrong we cannot reply details
        print(e)