commit
4e0b2f9fee
5 changed files with 243 additions and 24 deletions
17
Dockerfile
Normal file
17
Dockerfile
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
FROM python:3.13-slim
|
||||||
|
|
||||||
|
ENV PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONDONTWRITEBYTECODE=1
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir --upgrade pip \
|
||||||
|
&& pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN chmod +x /app/entrypoint.sh
|
||||||
|
|
||||||
|
EXPOSE 12434
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/entrypoint.sh"]
|
||||||
24
README.md
24
README.md
|
|
@ -53,6 +53,30 @@ finally you can
|
||||||
uvicorn router:app --host 127.0.0.1 --port 12434
|
uvicorn router:app --host 127.0.0.1 --port 12434
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Docker Deployment
|
||||||
|
|
||||||
|
Build the container image locally:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker build -t nomyo-router .
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the router in Docker with your own configuration file mounted from the host. The entrypoint script accepts a `--config-path` argument so you can point to a file anywhere inside the container:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker run -d \
|
||||||
|
--name nomyo-router \
|
||||||
|
-p 12434:12434 \
|
||||||
|
-v /absolute/path/to/config_folder:/app/config/ \
|
||||||
|
-e CONFIG_PATH /app/config/config.yaml
|
||||||
|
nomyo-router \
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- `-e CONFIG_PATH` sets the `NOMYO_ROUTER_CONFIG_PATH` environment variable under the hood; you can export it directly instead if you prefer.
|
||||||
|
- To override the bind address or port, export `UVICORN_HOST` or `UVICORN_PORT`, or pass the corresponding uvicorn flags after `--`, e.g. `nomyo-router --config-path /config/config.yaml -- --port 9000`.
|
||||||
|
- Use `docker logs nomyo-router` to confirm the loaded endpoints and concurrency settings at startup.
|
||||||
|
|
||||||
# Routing
|
# Routing
|
||||||
|
|
||||||
NOMYO Router accepts any Ollama request on the configured port for any Ollama endpoint from your frontend application. It then checks the available backends for the specific request.
|
NOMYO Router accepts any Ollama request on the configured port for any Ollama endpoint from your frontend application. It then checks the available backends for the specific request.
|
||||||
|
|
|
||||||
107
entrypoint.sh
Normal file
107
entrypoint.sh
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CONFIG_PATH_ARG=""
|
||||||
|
SHOW_HELP=0
|
||||||
|
|
||||||
|
while [ "$#" -gt 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--config-path)
|
||||||
|
if [ -z "${2:-}" ]; then
|
||||||
|
echo "Error: --config-path requires a value." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
CONFIG_PATH_ARG="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--config-path=*)
|
||||||
|
CONFIG_PATH_ARG="${1#*=}"
|
||||||
|
shift 1
|
||||||
|
;;
|
||||||
|
-h|--help)
|
||||||
|
SHOW_HELP=1
|
||||||
|
shift 1
|
||||||
|
;;
|
||||||
|
--)
|
||||||
|
shift 1
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$SHOW_HELP" -eq 1 ]; then
|
||||||
|
cat <<'EOF'
|
||||||
|
Usage: entrypoint.sh [--config-path /path/to/config.yaml] [uvicorn options...]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--config-path PATH Absolute or relative path to a NOMYO Router YAML config file.
|
||||||
|
-h, --help Show this help message and exit.
|
||||||
|
|
||||||
|
Any arguments that remain after the options above are passed directly to uvicorn.
|
||||||
|
|
||||||
|
Environment variables:
|
||||||
|
CONFIG_PATH Alternative way to specify the config path.
|
||||||
|
NOMYO_ROUTER_CONFIG_PATH Overrides the config path (same as --config-path).
|
||||||
|
UVICORN_HOST Host interface to bind to (default: 0.0.0.0).
|
||||||
|
UVICORN_PORT Port to listen on (default: 12434).
|
||||||
|
UVICORN_RELOAD If set, enables --reload for uvicorn (useful for local dev).
|
||||||
|
UVICORN_BIN Path to the uvicorn executable (default: uvicorn).
|
||||||
|
EOF
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$CONFIG_PATH_ARG" ] && [ -n "${NOMYO_ROUTER_CONFIG_PATH:-}" ]; then
|
||||||
|
CONFIG_PATH_ARG="$NOMYO_ROUTER_CONFIG_PATH"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$CONFIG_PATH_ARG" ] && [ -n "${CONFIG_PATH:-}" ]; then
|
||||||
|
CONFIG_PATH_ARG="$CONFIG_PATH"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "$CONFIG_PATH_ARG" ]; then
|
||||||
|
export NOMYO_ROUTER_CONFIG_PATH="$CONFIG_PATH_ARG"
|
||||||
|
fi
|
||||||
|
|
||||||
|
UVICORN_BIN="${UVICORN_BIN:-uvicorn}"
|
||||||
|
UVICORN_HOST="${UVICORN_HOST:-0.0.0.0}"
|
||||||
|
UVICORN_PORT="${UVICORN_PORT:-12434}"
|
||||||
|
|
||||||
|
ADD_DEFAULTS=0
|
||||||
|
if [ "$#" -eq 0 ]; then
|
||||||
|
set -- "$UVICORN_BIN" "router:app"
|
||||||
|
ADD_DEFAULTS=1
|
||||||
|
elif [ "${1#-}" != "$1" ]; then
|
||||||
|
set -- "$UVICORN_BIN" "router:app" "$@"
|
||||||
|
ADD_DEFAULTS=1
|
||||||
|
elif [ "$1" = "$UVICORN_BIN" ]; then
|
||||||
|
ADD_DEFAULTS=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$ADD_DEFAULTS" -eq 1 ]; then
|
||||||
|
NEED_HOST=1
|
||||||
|
NEED_PORT=1
|
||||||
|
for arg in "$@"; do
|
||||||
|
case "$arg" in
|
||||||
|
--host|--host=*)
|
||||||
|
NEED_HOST=0
|
||||||
|
;;
|
||||||
|
--port|--port=*)
|
||||||
|
NEED_PORT=0
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
if [ "$NEED_HOST" -eq 1 ]; then
|
||||||
|
set -- "$@" "--host" "$UVICORN_HOST"
|
||||||
|
fi
|
||||||
|
if [ "$NEED_PORT" -eq 1 ]; then
|
||||||
|
set -- "$@" "--port" "$UVICORN_PORT"
|
||||||
|
fi
|
||||||
|
if [ -n "${UVICORN_RELOAD:-}" ]; then
|
||||||
|
set -- "$@" "--reload"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec "$@"
|
||||||
|
|
@ -9,7 +9,7 @@ certifi==2025.8.3
|
||||||
click==8.2.1
|
click==8.2.1
|
||||||
distro==1.9.0
|
distro==1.9.0
|
||||||
exceptiongroup==1.3.0
|
exceptiongroup==1.3.0
|
||||||
fastapi==0.116.1
|
fastapi==0.121.0
|
||||||
fastapi-sse==1.1.1
|
fastapi-sse==1.1.1
|
||||||
frozenlist==1.7.0
|
frozenlist==1.7.0
|
||||||
h11==0.16.0
|
h11==0.16.0
|
||||||
|
|
@ -18,19 +18,19 @@ httpx==0.28.1
|
||||||
idna==3.10
|
idna==3.10
|
||||||
jiter==0.10.0
|
jiter==0.10.0
|
||||||
multidict==6.6.4
|
multidict==6.6.4
|
||||||
ollama==0.5.3
|
ollama==0.6.0
|
||||||
openai==1.102.0
|
openai==1.102.0
|
||||||
pillow==11.3.0
|
pillow==11.3.0
|
||||||
propcache==0.3.2
|
propcache==0.3.2
|
||||||
pydantic==2.11.7
|
pydantic==2.11.7
|
||||||
pydantic-settings==2.10.1
|
pydantic-settings==2.10.1
|
||||||
pydantic_core==2.33.2
|
pydantic_core==2.33.2
|
||||||
python-dotenv==1.1.1
|
python-dotenv==1.2.1
|
||||||
PyYAML==6.0.2
|
PyYAML==6.0.3
|
||||||
sniffio==1.3.1
|
sniffio==1.3.1
|
||||||
starlette==0.49.1
|
starlette==0.49.1
|
||||||
tqdm==4.67.1
|
tqdm==4.67.1
|
||||||
typing-inspection==0.4.1
|
typing-inspection==0.4.1
|
||||||
typing_extensions==4.14.1
|
typing_extensions==4.14.1
|
||||||
uvicorn==0.35.0
|
uvicorn==0.38.0
|
||||||
yarl==1.20.1
|
yarl==1.20.1
|
||||||
|
|
|
||||||
109
router.py
109
router.py
|
|
@ -9,6 +9,7 @@ license: AGPL
|
||||||
import json, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, datetime, random, base64, io
|
import json, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, datetime, random, base64, io
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Set, List, Optional
|
from typing import Dict, Set, List, Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
from fastapi import FastAPI, Request, HTTPException
|
from fastapi import FastAPI, Request, HTTPException
|
||||||
from fastapi_sse import sse_handler
|
from fastapi_sse import sse_handler
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
|
@ -86,8 +87,20 @@ class Config(BaseSettings):
|
||||||
return cls(**cleaned)
|
return cls(**cleaned)
|
||||||
return cls()
|
return cls()
|
||||||
|
|
||||||
|
def _config_path_from_env() -> Path:
|
||||||
|
"""
|
||||||
|
Resolve the configuration file path. Defaults to `config.yaml`
|
||||||
|
in the current working directory unless NOMYO_ROUTER_CONFIG_PATH
|
||||||
|
is set.
|
||||||
|
"""
|
||||||
|
candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH")
|
||||||
|
if candidate:
|
||||||
|
return Path(candidate).expanduser()
|
||||||
|
return Path("config.yaml")
|
||||||
|
|
||||||
|
|
||||||
# Create the global config object – it will be overwritten on startup
|
# Create the global config object – it will be overwritten on startup
|
||||||
config = Config()
|
config = Config.from_yaml(_config_path_from_env())
|
||||||
|
|
||||||
# -------------------------------------------------------------
|
# -------------------------------------------------------------
|
||||||
# 2. FastAPI application
|
# 2. FastAPI application
|
||||||
|
|
@ -123,6 +136,47 @@ async def _ensure_success(resp: aiohttp.ClientResponse) -> None:
|
||||||
text = await resp.text()
|
text = await resp.text()
|
||||||
raise HTTPException(status_code=resp.status, detail=text)
|
raise HTTPException(status_code=resp.status, detail=text)
|
||||||
|
|
||||||
|
def _format_connection_issue(url: str, error: Exception) -> str:
|
||||||
|
"""
|
||||||
|
Provide a human-friendly error string for connection failures so operators
|
||||||
|
know which endpoint and address failed from inside the container.
|
||||||
|
"""
|
||||||
|
parsed = urlparse(url)
|
||||||
|
host_hint = parsed.hostname or ""
|
||||||
|
port_hint = parsed.port or ""
|
||||||
|
|
||||||
|
if isinstance(error, aiohttp.ClientConnectorError):
|
||||||
|
resolved_host = getattr(error, "host", host_hint) or host_hint or "?"
|
||||||
|
resolved_port = getattr(error, "port", port_hint) or port_hint or "?"
|
||||||
|
parts = [
|
||||||
|
f"Failed to connect to {url} (resolved: {resolved_host}:{resolved_port}).",
|
||||||
|
"Ensure the endpoint address is reachable from within the container.",
|
||||||
|
]
|
||||||
|
if resolved_host in {"localhost", "127.0.0.1"}:
|
||||||
|
parts.append(
|
||||||
|
"Inside Docker, 'localhost' refers to the container itself; use "
|
||||||
|
"'host.docker.internal' or a Docker network alias if the service "
|
||||||
|
"runs on the host machine."
|
||||||
|
)
|
||||||
|
os_error = getattr(error, "os_error", None)
|
||||||
|
if isinstance(os_error, OSError):
|
||||||
|
errno = getattr(os_error, "errno", None)
|
||||||
|
strerror = os_error.strerror or str(os_error)
|
||||||
|
if errno is not None or strerror:
|
||||||
|
parts.append(f"OS error [{errno}]: {strerror}.")
|
||||||
|
elif os_error:
|
||||||
|
parts.append(f"OS error: {os_error}.")
|
||||||
|
parts.append(f"Original error: {error}.")
|
||||||
|
return " ".join(parts)
|
||||||
|
|
||||||
|
if isinstance(error, asyncio.TimeoutError):
|
||||||
|
return (
|
||||||
|
f"Timed out waiting for {url}. "
|
||||||
|
"The remote endpoint may be offline or slow to respond."
|
||||||
|
)
|
||||||
|
|
||||||
|
return f"Error while contacting {url}: {error}"
|
||||||
|
|
||||||
def is_ext_openai_endpoint(endpoint: str) -> bool:
|
def is_ext_openai_endpoint(endpoint: str) -> bool:
|
||||||
if "/v1" not in endpoint:
|
if "/v1" not in endpoint:
|
||||||
return False
|
return False
|
||||||
|
|
@ -192,7 +246,8 @@ class fetch:
|
||||||
return models
|
return models
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Treat any error as if the endpoint offers no models
|
# Treat any error as if the endpoint offers no models
|
||||||
print(f"[fetch.available_models] {endpoint} error: {e}")
|
message = _format_connection_issue(endpoint_url, e)
|
||||||
|
print(f"[fetch.available_models] {message}")
|
||||||
_error_cache[endpoint] = time.time()
|
_error_cache[endpoint] = time.time()
|
||||||
return set()
|
return set()
|
||||||
|
|
||||||
|
|
@ -212,8 +267,10 @@ class fetch:
|
||||||
# {"models": [{"name": "model1"}, {"name": "model2"}]}
|
# {"models": [{"name": "model1"}, {"name": "model2"}]}
|
||||||
models = {m.get("name") for m in data.get("models", []) if m.get("name")}
|
models = {m.get("name") for m in data.get("models", []) if m.get("name")}
|
||||||
return models
|
return models
|
||||||
except Exception:
|
except Exception as e:
|
||||||
# If anything goes wrong we simply assume the endpoint has no models
|
# If anything goes wrong we simply assume the endpoint has no models
|
||||||
|
message = _format_connection_issue(f"{endpoint}/api/ps", e)
|
||||||
|
print(f"[fetch.loaded_models] {message}")
|
||||||
return set()
|
return set()
|
||||||
|
|
||||||
async def endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None) -> List[dict]:
|
async def endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None) -> List[dict]:
|
||||||
|
|
@ -226,15 +283,17 @@ class fetch:
|
||||||
if api_key is not None:
|
if api_key is not None:
|
||||||
headers = {"Authorization": "Bearer " + api_key}
|
headers = {"Authorization": "Bearer " + api_key}
|
||||||
|
|
||||||
|
request_url = f"{endpoint}{route}"
|
||||||
try:
|
try:
|
||||||
async with client.get(f"{endpoint}{route}", headers=headers) as resp:
|
async with client.get(request_url, headers=headers) as resp:
|
||||||
await _ensure_success(resp)
|
await _ensure_success(resp)
|
||||||
data = await resp.json()
|
data = await resp.json()
|
||||||
detail = data.get(detail, [])
|
detail = data.get(detail, [])
|
||||||
return detail
|
return detail
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# If anything goes wrong we cannot reply details
|
# If anything goes wrong we cannot reply details
|
||||||
print(e)
|
message = _format_connection_issue(request_url, e)
|
||||||
|
print(f"[fetch.endpoint_details] {message}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def ep2base(ep):
|
def ep2base(ep):
|
||||||
|
|
@ -1269,23 +1328,25 @@ async def config_proxy(request: Request):
|
||||||
which endpoints are being proxied.
|
which endpoints are being proxied.
|
||||||
"""
|
"""
|
||||||
async def check_endpoint(url: str):
|
async def check_endpoint(url: str):
|
||||||
|
client: aiohttp.ClientSession = app_state["session"]
|
||||||
|
headers = None
|
||||||
|
if "/v1" in url:
|
||||||
|
headers = {"Authorization": "Bearer " + config.api_keys[url]}
|
||||||
|
target_url = f"{url}/models"
|
||||||
|
else:
|
||||||
|
target_url = f"{url}/api/version"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client: aiohttp.ClientSession = app_state["session"]
|
async with client.get(target_url, headers=headers) as resp:
|
||||||
if "/v1" in url:
|
await _ensure_success(resp)
|
||||||
headers = {"Authorization": "Bearer " + config.api_keys[url]}
|
data = await resp.json()
|
||||||
async with client.get(f"{url}/models", headers=headers) as resp:
|
|
||||||
await _ensure_success(resp)
|
|
||||||
data = await resp.json()
|
|
||||||
else:
|
|
||||||
async with client.get(f"{url}/api/version") as resp:
|
|
||||||
await _ensure_success(resp)
|
|
||||||
data = await resp.json()
|
|
||||||
if "/v1" in url:
|
if "/v1" in url:
|
||||||
return {"url": url, "status": "ok", "version": "latest"}
|
return {"url": url, "status": "ok", "version": "latest"}
|
||||||
else:
|
else:
|
||||||
return {"url": url, "status": "ok", "version": data.get("version")}
|
return {"url": url, "status": "ok", "version": data.get("version")}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"url": url, "status": "error", "detail": str(e)}
|
detail = _format_connection_issue(target_url, e)
|
||||||
|
return {"url": url, "status": "error", "detail": detail}
|
||||||
|
|
||||||
results = await asyncio.gather(*[check_endpoint(ep) for ep in config.endpoints])
|
results = await asyncio.gather(*[check_endpoint(ep) for ep in config.endpoints])
|
||||||
return {"endpoints": results}
|
return {"endpoints": results}
|
||||||
|
|
@ -1664,9 +1725,19 @@ async def usage_stream(request: Request):
|
||||||
async def startup_event() -> None:
|
async def startup_event() -> None:
|
||||||
global config
|
global config
|
||||||
# Load YAML config (or use defaults if not present)
|
# Load YAML config (or use defaults if not present)
|
||||||
config = Config.from_yaml(Path("config.yaml"))
|
config_path = _config_path_from_env()
|
||||||
print(f"Loaded configuration:\n endpoints={config.endpoints},\n "
|
config = Config.from_yaml(config_path)
|
||||||
f"max_concurrent_connections={config.max_concurrent_connections}")
|
if config_path.exists():
|
||||||
|
print(
|
||||||
|
f"Loaded configuration from {config_path}:\n"
|
||||||
|
f" endpoints={config.endpoints},\n"
|
||||||
|
f" max_concurrent_connections={config.max_concurrent_connections}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"No configuration file found at {config_path}. "
|
||||||
|
"Falling back to default settings."
|
||||||
|
)
|
||||||
|
|
||||||
ssl_context = ssl.create_default_context()
|
ssl_context = ssl.create_default_context()
|
||||||
connector = aiohttp.TCPConnector(limit=0, limit_per_host=512, ssl=ssl_context)
|
connector = aiohttp.TCPConnector(limit=0, limit_per_host=512, ssl=ssl_context)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue