Add files via upload

minor updates
This commit is contained in:
Alpha Nerd 2025-09-01 16:35:22 +02:00 committed by GitHub
parent caca498f49
commit 2c82e5964f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 8 additions and 7 deletions

View file

@ -3,6 +3,8 @@ endpoints:
- http://192.168.0.50:11434
- http://192.168.0.51:11434
- http://192.168.0.52:11434
- https://openrouter.ai/api/v1
- https://api.inceptionlabs.ai/v1
# Maximum concurrent connections *per endpointmodel pair*
max_concurrent_connections: 2
# Maximum concurrent connections *per endpointmodel pair* (equals to OLLAMA_NUM_PARALLEL)
max_concurrent_connections: 2

View file

@ -2,7 +2,7 @@
title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing
author: alpha-nerd-nomyo
author_url: https://github.com/nomyo-ai
version: 0.1
version: 0.2.1
license: AGPL
"""
# -------------------------------------------------------------
@ -19,9 +19,9 @@ from collections import defaultdict
# ------------------------------------------------------------------
# Inmemory caches
# ------------------------------------------------------------------
# Successful results are cached for 300s
# Successful results are cached for 300s
_models_cache: dict[str, tuple[Set[str], float]] = {}
# Transient errors are cached for 30s the key stays until the
# Transient errors are cached for 1s the key stays until the
# timeout expires, after which the endpoint will be queried again.
_error_cache: dict[str, float] = {}
@ -86,7 +86,6 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient:
)
)
#@cached(cache=Cache.MEMORY, ttl=300)
async def fetch_available_models(endpoint: str) -> Set[str]:
"""
Query <endpoint>/api/tags and return a set of all model names that the
@ -132,7 +131,7 @@ async def fetch_available_models(endpoint: str) -> Set[str]:
_models_cache[endpoint] = (models, time.time())
return models
else:
# Empty list treat as “no models”, but still cache for 300s
# Empty list treat as “no models”, but still cache for 300s
_models_cache[endpoint] = (models, time.time())
return models
except Exception as e: