diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e456af3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.13-slim + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir -r requirements.txt + +COPY . . + +RUN chmod +x /app/entrypoint.sh + +EXPOSE 12434 + +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/README.md b/README.md index f6cc58f..37e1686 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,30 @@ finally you can uvicorn router:app --host 127.0.0.1 --port 12434 ``` +## Docker Deployment + +Build the container image locally: + +```sh +docker build -t nomyo-router . +``` + +Run the router in Docker with your own configuration file mounted from the host. The entrypoint script accepts a `--config-path` argument so you can point to a file anywhere inside the container: + +```sh +docker run -d \ + --name nomyo-router \ + -p 12434:12434 \ + -v /absolute/path/to/config_folder:/app/config/ \ + -e CONFIG_PATH /app/config/config.yaml + nomyo-router \ +``` + +Notes: +- `-e CONFIG_PATH` sets the `NOMYO_ROUTER_CONFIG_PATH` environment variable under the hood; you can export it directly instead if you prefer. +- To override the bind address or port, export `UVICORN_HOST` or `UVICORN_PORT`, or pass the corresponding uvicorn flags after `--`, e.g. `nomyo-router --config-path /config/config.yaml -- --port 9000`. +- Use `docker logs nomyo-router` to confirm the loaded endpoints and concurrency settings at startup. + # Routing NOMYO Router accepts any Ollama request on the configured port for any Ollama endpoint from your frontend application. It then checks the available backends for the specific request. diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..cee2f17 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env sh +set -e + +CONFIG_PATH_ARG="" +SHOW_HELP=0 + +while [ "$#" -gt 0 ]; do + case "$1" in + --config-path) + if [ -z "${2:-}" ]; then + echo "Error: --config-path requires a value." >&2 + exit 1 + fi + CONFIG_PATH_ARG="$2" + shift 2 + ;; + --config-path=*) + CONFIG_PATH_ARG="${1#*=}" + shift 1 + ;; + -h|--help) + SHOW_HELP=1 + shift 1 + ;; + --) + shift 1 + break + ;; + *) + break + ;; + esac +done + +if [ "$SHOW_HELP" -eq 1 ]; then + cat <<'EOF' +Usage: entrypoint.sh [--config-path /path/to/config.yaml] [uvicorn options...] + +Options: + --config-path PATH Absolute or relative path to a NOMYO Router YAML config file. + -h, --help Show this help message and exit. + +Any arguments that remain after the options above are passed directly to uvicorn. + +Environment variables: + CONFIG_PATH Alternative way to specify the config path. + NOMYO_ROUTER_CONFIG_PATH Overrides the config path (same as --config-path). + UVICORN_HOST Host interface to bind to (default: 0.0.0.0). + UVICORN_PORT Port to listen on (default: 12434). + UVICORN_RELOAD If set, enables --reload for uvicorn (useful for local dev). + UVICORN_BIN Path to the uvicorn executable (default: uvicorn). +EOF + exit 0 +fi + +if [ -z "$CONFIG_PATH_ARG" ] && [ -n "${NOMYO_ROUTER_CONFIG_PATH:-}" ]; then + CONFIG_PATH_ARG="$NOMYO_ROUTER_CONFIG_PATH" +fi + +if [ -z "$CONFIG_PATH_ARG" ] && [ -n "${CONFIG_PATH:-}" ]; then + CONFIG_PATH_ARG="$CONFIG_PATH" +fi + +if [ -n "$CONFIG_PATH_ARG" ]; then + export NOMYO_ROUTER_CONFIG_PATH="$CONFIG_PATH_ARG" +fi + +UVICORN_BIN="${UVICORN_BIN:-uvicorn}" +UVICORN_HOST="${UVICORN_HOST:-0.0.0.0}" +UVICORN_PORT="${UVICORN_PORT:-12434}" + +ADD_DEFAULTS=0 +if [ "$#" -eq 0 ]; then + set -- "$UVICORN_BIN" "router:app" + ADD_DEFAULTS=1 +elif [ "${1#-}" != "$1" ]; then + set -- "$UVICORN_BIN" "router:app" "$@" + ADD_DEFAULTS=1 +elif [ "$1" = "$UVICORN_BIN" ]; then + ADD_DEFAULTS=1 +fi + +if [ "$ADD_DEFAULTS" -eq 1 ]; then + NEED_HOST=1 + NEED_PORT=1 + for arg in "$@"; do + case "$arg" in + --host|--host=*) + NEED_HOST=0 + ;; + --port|--port=*) + NEED_PORT=0 + ;; + esac + done + if [ "$NEED_HOST" -eq 1 ]; then + set -- "$@" "--host" "$UVICORN_HOST" + fi + if [ "$NEED_PORT" -eq 1 ]; then + set -- "$@" "--port" "$UVICORN_PORT" + fi + if [ -n "${UVICORN_RELOAD:-}" ]; then + set -- "$@" "--reload" + fi +fi + +exec "$@" diff --git a/requirements.txt b/requirements.txt index 83ac385..f3ad896 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ certifi==2025.8.3 click==8.2.1 distro==1.9.0 exceptiongroup==1.3.0 -fastapi==0.116.1 +fastapi==0.121.0 fastapi-sse==1.1.1 frozenlist==1.7.0 h11==0.16.0 @@ -18,19 +18,19 @@ httpx==0.28.1 idna==3.10 jiter==0.10.0 multidict==6.6.4 -ollama==0.5.3 +ollama==0.6.0 openai==1.102.0 pillow==11.3.0 propcache==0.3.2 pydantic==2.11.7 pydantic-settings==2.10.1 pydantic_core==2.33.2 -python-dotenv==1.1.1 -PyYAML==6.0.2 +python-dotenv==1.2.1 +PyYAML==6.0.3 sniffio==1.3.1 starlette==0.49.1 tqdm==4.67.1 typing-inspection==0.4.1 typing_extensions==4.14.1 -uvicorn==0.35.0 +uvicorn==0.38.0 yarl==1.20.1 diff --git a/router.py b/router.py index 4f6503f..7cc24ac 100644 --- a/router.py +++ b/router.py @@ -9,6 +9,7 @@ license: AGPL import json, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, datetime, random, base64, io from pathlib import Path from typing import Dict, Set, List, Optional +from urllib.parse import urlparse from fastapi import FastAPI, Request, HTTPException from fastapi_sse import sse_handler from fastapi.staticfiles import StaticFiles @@ -86,8 +87,20 @@ class Config(BaseSettings): return cls(**cleaned) return cls() +def _config_path_from_env() -> Path: + """ + Resolve the configuration file path. Defaults to `config.yaml` + in the current working directory unless NOMYO_ROUTER_CONFIG_PATH + is set. + """ + candidate = os.getenv("NOMYO_ROUTER_CONFIG_PATH") + if candidate: + return Path(candidate).expanduser() + return Path("config.yaml") + + # Create the global config object – it will be overwritten on startup -config = Config() +config = Config.from_yaml(_config_path_from_env()) # ------------------------------------------------------------- # 2. FastAPI application @@ -123,6 +136,47 @@ async def _ensure_success(resp: aiohttp.ClientResponse) -> None: text = await resp.text() raise HTTPException(status_code=resp.status, detail=text) +def _format_connection_issue(url: str, error: Exception) -> str: + """ + Provide a human-friendly error string for connection failures so operators + know which endpoint and address failed from inside the container. + """ + parsed = urlparse(url) + host_hint = parsed.hostname or "" + port_hint = parsed.port or "" + + if isinstance(error, aiohttp.ClientConnectorError): + resolved_host = getattr(error, "host", host_hint) or host_hint or "?" + resolved_port = getattr(error, "port", port_hint) or port_hint or "?" + parts = [ + f"Failed to connect to {url} (resolved: {resolved_host}:{resolved_port}).", + "Ensure the endpoint address is reachable from within the container.", + ] + if resolved_host in {"localhost", "127.0.0.1"}: + parts.append( + "Inside Docker, 'localhost' refers to the container itself; use " + "'host.docker.internal' or a Docker network alias if the service " + "runs on the host machine." + ) + os_error = getattr(error, "os_error", None) + if isinstance(os_error, OSError): + errno = getattr(os_error, "errno", None) + strerror = os_error.strerror or str(os_error) + if errno is not None or strerror: + parts.append(f"OS error [{errno}]: {strerror}.") + elif os_error: + parts.append(f"OS error: {os_error}.") + parts.append(f"Original error: {error}.") + return " ".join(parts) + + if isinstance(error, asyncio.TimeoutError): + return ( + f"Timed out waiting for {url}. " + "The remote endpoint may be offline or slow to respond." + ) + + return f"Error while contacting {url}: {error}" + def is_ext_openai_endpoint(endpoint: str) -> bool: if "/v1" not in endpoint: return False @@ -192,7 +246,8 @@ class fetch: return models except Exception as e: # Treat any error as if the endpoint offers no models - print(f"[fetch.available_models] {endpoint} error: {e}") + message = _format_connection_issue(endpoint_url, e) + print(f"[fetch.available_models] {message}") _error_cache[endpoint] = time.time() return set() @@ -212,8 +267,10 @@ class fetch: # {"models": [{"name": "model1"}, {"name": "model2"}]} models = {m.get("name") for m in data.get("models", []) if m.get("name")} return models - except Exception: + except Exception as e: # If anything goes wrong we simply assume the endpoint has no models + message = _format_connection_issue(f"{endpoint}/api/ps", e) + print(f"[fetch.loaded_models] {message}") return set() async def endpoint_details(endpoint: str, route: str, detail: str, api_key: Optional[str] = None) -> List[dict]: @@ -226,15 +283,17 @@ class fetch: if api_key is not None: headers = {"Authorization": "Bearer " + api_key} + request_url = f"{endpoint}{route}" try: - async with client.get(f"{endpoint}{route}", headers=headers) as resp: + async with client.get(request_url, headers=headers) as resp: await _ensure_success(resp) data = await resp.json() detail = data.get(detail, []) return detail except Exception as e: # If anything goes wrong we cannot reply details - print(e) + message = _format_connection_issue(request_url, e) + print(f"[fetch.endpoint_details] {message}") return [] def ep2base(ep): @@ -1269,23 +1328,25 @@ async def config_proxy(request: Request): which endpoints are being proxied. """ async def check_endpoint(url: str): + client: aiohttp.ClientSession = app_state["session"] + headers = None + if "/v1" in url: + headers = {"Authorization": "Bearer " + config.api_keys[url]} + target_url = f"{url}/models" + else: + target_url = f"{url}/api/version" + try: - client: aiohttp.ClientSession = app_state["session"] - if "/v1" in url: - headers = {"Authorization": "Bearer " + config.api_keys[url]} - async with client.get(f"{url}/models", headers=headers) as resp: - await _ensure_success(resp) - data = await resp.json() - else: - async with client.get(f"{url}/api/version") as resp: - await _ensure_success(resp) - data = await resp.json() + async with client.get(target_url, headers=headers) as resp: + await _ensure_success(resp) + data = await resp.json() if "/v1" in url: return {"url": url, "status": "ok", "version": "latest"} else: return {"url": url, "status": "ok", "version": data.get("version")} except Exception as e: - return {"url": url, "status": "error", "detail": str(e)} + detail = _format_connection_issue(target_url, e) + return {"url": url, "status": "error", "detail": detail} results = await asyncio.gather(*[check_endpoint(ep) for ep in config.endpoints]) return {"endpoints": results} @@ -1664,9 +1725,19 @@ async def usage_stream(request: Request): async def startup_event() -> None: global config # Load YAML config (or use defaults if not present) - config = Config.from_yaml(Path("config.yaml")) - print(f"Loaded configuration:\n endpoints={config.endpoints},\n " - f"max_concurrent_connections={config.max_concurrent_connections}") + config_path = _config_path_from_env() + config = Config.from_yaml(config_path) + if config_path.exists(): + print( + f"Loaded configuration from {config_path}:\n" + f" endpoints={config.endpoints},\n" + f" max_concurrent_connections={config.max_concurrent_connections}" + ) + else: + print( + f"No configuration file found at {config_path}. " + "Falling back to default settings." + ) ssl_context = ssl.create_default_context() connector = aiohttp.TCPConnector(limit=0, limit_per_host=512, ssl=ssl_context)