# config.yaml # Ollama endpoints endpoints: - http://192.168.0.50:11434 - http://192.168.0.51:11434 - http://192.168.0.52:11434 # External OpenAI-compatible endpoints (will NOT be queried for /api/ps /api/ps_details) - https://api.openai.com/v1 # llama-server endpoints (OpenAI-compatible with /v1/models status info) # These endpoints will be queried for /api/tags, /api/ps, /api/ps_details # and included in the model selection pool for inference routing llama_server_endpoints: - http://localhost:8080/v1 - http://localhost:8081/v1 # Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL) max_concurrent_connections: 2 # Optional router-level API key that gates router/API/web UI access (leave empty to disable) nomyo-router-api-key: "" # API keys for remote endpoints # Set an environment variable like OPENAI_KEY # Confirm endpoints are exactly as in endpoints block api_keys: "http://192.168.0.50:11434": "ollama" "http://192.168.0.51:11434": "ollama" "http://192.168.0.52:11434": "ollama" "https://api.openai.com/v1": "${OPENAI_KEY}" "http://localhost:8080/v1": "llama-server" # Optional API key for llama-server "http://localhost:8081/v1": "llama-server"