Add files via upload
minor updates
This commit is contained in:
parent
caca498f49
commit
2c82e5964f
2 changed files with 8 additions and 7 deletions
|
|
@ -3,6 +3,8 @@ endpoints:
|
|||
- http://192.168.0.50:11434
|
||||
- http://192.168.0.51:11434
|
||||
- http://192.168.0.52:11434
|
||||
- https://openrouter.ai/api/v1
|
||||
- https://api.inceptionlabs.ai/v1
|
||||
|
||||
# Maximum concurrent connections *per endpoint‑model pair*
|
||||
max_concurrent_connections: 2
|
||||
# Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL)
|
||||
max_concurrent_connections: 2
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing
|
||||
author: alpha-nerd-nomyo
|
||||
author_url: https://github.com/nomyo-ai
|
||||
version: 0.1
|
||||
version: 0.2.1
|
||||
license: AGPL
|
||||
"""
|
||||
# -------------------------------------------------------------
|
||||
|
|
@ -19,9 +19,9 @@ from collections import defaultdict
|
|||
# ------------------------------------------------------------------
|
||||
# In‑memory caches
|
||||
# ------------------------------------------------------------------
|
||||
# Successful results are cached for 300 s
|
||||
# Successful results are cached for 300s
|
||||
_models_cache: dict[str, tuple[Set[str], float]] = {}
|
||||
# Transient errors are cached for 30 s – the key stays until the
|
||||
# Transient errors are cached for 1s – the key stays until the
|
||||
# timeout expires, after which the endpoint will be queried again.
|
||||
_error_cache: dict[str, float] = {}
|
||||
|
||||
|
|
@ -86,7 +86,6 @@ def get_httpx_client(endpoint: str) -> httpx.AsyncClient:
|
|||
)
|
||||
)
|
||||
|
||||
#@cached(cache=Cache.MEMORY, ttl=300)
|
||||
async def fetch_available_models(endpoint: str) -> Set[str]:
|
||||
"""
|
||||
Query <endpoint>/api/tags and return a set of all model names that the
|
||||
|
|
@ -132,7 +131,7 @@ async def fetch_available_models(endpoint: str) -> Set[str]:
|
|||
_models_cache[endpoint] = (models, time.time())
|
||||
return models
|
||||
else:
|
||||
# Empty list – treat as “no models”, but still cache for 300 s
|
||||
# Empty list – treat as “no models”, but still cache for 300s
|
||||
_models_cache[endpoint] = (models, time.time())
|
||||
return models
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue