diff --git a/config.yaml b/config.yaml index a0d14ed..692c00e 100644 --- a/config.yaml +++ b/config.yaml @@ -3,6 +3,8 @@ endpoints: - http://192.168.0.50:11434 - http://192.168.0.51:11434 - http://192.168.0.52:11434 + - https://openrouter.ai/api/v1 + - https://api.inceptionlabs.ai/v1 -# Maximum concurrent connections *per endpoint‑model pair* -max_concurrent_connections: 2 \ No newline at end of file +# Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL) +max_concurrent_connections: 2 diff --git a/router.py b/router.py index 0a3c3d6..f58157a 100644 --- a/router.py +++ b/router.py @@ -2,7 +2,7 @@ title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing author: alpha-nerd-nomyo author_url: https://github.com/nomyo-ai -version: 0.1 +version: 0.2.1 license: AGPL """ # ------------------------------------------------------------- @@ -19,9 +19,9 @@ from collections import defaultdict # ------------------------------------------------------------------ # In‑memory caches # ------------------------------------------------------------------ -# Successful results are cached for 300 s +# Successful results are cached for 300s _models_cache: dict[str, tuple[Set[str], float]] = {} -# Transient errors are cached for 30 s – the key stays until the +# Transient errors are cached for 1s – the key stays until the # timeout expires, after which the endpoint will be queried again. _error_cache: dict[str, float] = {} @@ -86,7 +86,6 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient: ) ) -#@cached(cache=Cache.MEMORY, ttl=300) async def fetch_available_models(endpoint: str) -> Set[str]: """ Query /api/tags and return a set of all model names that the @@ -132,7 +131,7 @@ async def fetch_available_models(endpoint: str) -> Set[str]: _models_cache[endpoint] = (models, time.time()) return models else: - # Empty list – treat as “no models”, but still cache for 300 s + # Empty list – treat as “no models”, but still cache for 300s _models_cache[endpoint] = (models, time.time()) return models except Exception as e: