2026-02-10 16:46:51 +01:00
|
|
|
|
# config.yaml
|
2026-02-13 10:52:14 +01:00
|
|
|
|
# Ollama endpoints local + remote
|
2026-01-05 17:16:31 +01:00
|
|
|
|
endpoints:
|
2026-02-13 10:52:14 +01:00
|
|
|
|
- http://localhost:11434
|
2026-02-10 16:46:51 +01:00
|
|
|
|
- http://192.168.0.51:11434
|
|
|
|
|
|
- http://192.168.0.52:11434
|
|
|
|
|
|
# External OpenAI-compatible endpoints (will NOT be queried for /api/ps /api/ps_details)
|
|
|
|
|
|
- https://api.openai.com/v1
|
|
|
|
|
|
|
|
|
|
|
|
# llama-server endpoints (OpenAI-compatible with /v1/models status info)
|
|
|
|
|
|
# These endpoints will be queried for /api/tags, /api/ps, /api/ps_details
|
|
|
|
|
|
# and included in the model selection pool for inference routing
|
|
|
|
|
|
llama_server_endpoints:
|
|
|
|
|
|
- http://localhost:8080/v1
|
2026-02-13 10:52:14 +01:00
|
|
|
|
- http://192.168.0.33:8081/v1
|
2026-02-10 16:46:51 +01:00
|
|
|
|
|
|
|
|
|
|
# Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL)
|
2026-01-05 17:16:31 +01:00
|
|
|
|
max_concurrent_connections: 2
|
|
|
|
|
|
|
2026-02-10 16:46:51 +01:00
|
|
|
|
# Optional router-level API key that gates router/API/web UI access (leave empty to disable)
|
2026-01-14 09:28:02 +01:00
|
|
|
|
nomyo-router-api-key: ""
|
|
|
|
|
|
|
2026-01-05 17:16:31 +01:00
|
|
|
|
# API keys for remote endpoints
|
2026-02-10 16:46:51 +01:00
|
|
|
|
# Set an environment variable like OPENAI_KEY
|
|
|
|
|
|
# Confirm endpoints are exactly as in endpoints block
|
2026-01-05 17:16:31 +01:00
|
|
|
|
api_keys:
|
2026-02-10 16:46:51 +01:00
|
|
|
|
"http://192.168.0.50:11434": "ollama"
|
|
|
|
|
|
"http://192.168.0.51:11434": "ollama"
|
|
|
|
|
|
"http://192.168.0.52:11434": "ollama"
|
|
|
|
|
|
"https://api.openai.com/v1": "${OPENAI_KEY}"
|
2026-02-13 10:52:14 +01:00
|
|
|
|
"http://localhost:8080/v1": "llama-server" # Optional API key for llama-server - depends on llama_server config
|
|
|
|
|
|
"http://192.168.0.33:8081/v1": "llama-server"
|