diff --git a/requirements.txt b/requirements.txt index 4d43ce4..e39b50c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -aiocache==0.12.3 annotated-types==0.7.0 anyio==4.10.0 certifi==2025.8.3 diff --git a/router.py b/router.py index 9173976..0a3c3d6 100644 --- a/router.py +++ b/router.py @@ -6,7 +6,7 @@ version: 0.1 license: AGPL """ # ------------------------------------------------------------- -import json, random, asyncio, yaml, httpx, ollama, openai +import json, time, asyncio, yaml, httpx, ollama, openai from pathlib import Path from typing import Dict, Set, List from fastapi import FastAPI, Request, HTTPException @@ -15,7 +15,15 @@ from starlette.responses import StreamingResponse, JSONResponse, Response, HTMLR from pydantic import Field from pydantic_settings import BaseSettings from collections import defaultdict -from aiocache import cached, Cache + +# ------------------------------------------------------------------ +# In‑memory caches +# ------------------------------------------------------------------ +# Successful results are cached for 300 s +_models_cache: dict[str, tuple[Set[str], float]] = {} +# Transient errors are cached for 30 s – the key stays until the +# timeout expires, after which the endpoint will be queried again. +_error_cache: dict[str, float] = {} # ------------------------------------------------------------- # 1. Configuration loader @@ -61,6 +69,9 @@ usage_lock = asyncio.Lock() # protects access to usage_counts # ------------------------------------------------------------- # 4. Helperfunctions # ------------------------------------------------------------- +def _is_fresh(cached_at: float, ttl: int) -> bool: + return (time.time() - cached_at) < ttl + def get_httpx_client(endpoint: str) -> httpx.AsyncClient: """ Use persistent connections to request endpoint info for reliable results @@ -75,7 +86,7 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient: ) ) -@cached(cache=Cache.MEMORY, ttl=300) +#@cached(cache=Cache.MEMORY, ttl=300) async def fetch_available_models(endpoint: str) -> Set[str]: """ Query /api/tags and return a set of all model names that the @@ -86,6 +97,22 @@ async def fetch_available_models(endpoint: str) -> Set[str]: If the request fails (e.g. timeout, 5xx, or malformed response), an empty set is returned. """ + if endpoint in _models_cache: + models, cached_at = _models_cache[endpoint] + if _is_fresh(cached_at, 300): + return models + else: + # stale entry – drop it + del _models_cache[endpoint] + + if endpoint in _error_cache: + if _is_fresh(_error_cache[endpoint], 1): + # Still within the short error TTL – pretend nothing is available + return set() + else: + # Error expired – remove it + del _error_cache[endpoint] + client = get_httpx_client(endpoint) try: if "/v1" in endpoint: @@ -100,10 +127,18 @@ async def fetch_available_models(endpoint: str) -> Set[str]: models = {m.get("id") for m in data.get("data", []) if m.get("name")} else: models = {m.get("name") for m in data.get("models", []) if m.get("name")} - return models + + if models: + _models_cache[endpoint] = (models, time.time()) + return models + else: + # Empty list – treat as “no models”, but still cache for 300 s + _models_cache[endpoint] = (models, time.time()) + return models except Exception as e: # Treat any error as if the endpoint offers no models - print(e) + print(f"[fetch_available_models] {endpoint} error: {e}") + _error_cache[endpoint] = time.time() return set() @@ -131,13 +166,13 @@ async def fetch_endpoint_details(endpoint: str, route: str, detail: str) -> List Query / to fetch and return a List of dicts with details for the corresponding Ollama endpoint. If the request fails we respond with "N/A" for detail. """ + client = get_httpx_client(endpoint) try: - async with httpx.AsyncClient(timeout=1.0) as client: - resp = await client.get(f"{endpoint}{route}") - resp.raise_for_status() - data = resp.json() - detail = data.get(detail, []) - return detail + resp = await client.get(f"{route}") + resp.raise_for_status() + data = resp.json() + detail = data.get(detail, []) + return detail except Exception as e: # If anything goes wrong we cannot reply details print(e)