feat: add llama-swap as a backend
All checks were successful
PR Tests / test (pull_request) Successful in 1m18s
NYX Security Scan / nyx-scan (pull_request) Successful in 6m19s

This commit is contained in:
Alpha Nerd 2026-06-14 16:34:31 +02:00
parent c8da58430a
commit aa8baebac5
Signed by: alpha-nerd
SSH key fingerprint: SHA256:QkkAgVoYi9TQ0UKPkiKSfnerZy2h4qhi3SVPXJmBN+M
17 changed files with 544 additions and 52 deletions

View file

@ -23,6 +23,10 @@ class Config(BaseSettings):
)
# List of llama-server endpoints (OpenAI-compatible with /v1/models status info)
llama_server_endpoints: List[str] = Field(default_factory=list)
# List of llama-swap endpoints (OpenAI-compatible front for multiple llama-server
# workers). Same surface as llama_server_endpoints, but loaded models are read from
# /running (not /v1/models status) and unload uses POST /api/models/unload/:model_id.
llama_swap_endpoints: List[str] = Field(default_factory=list)
# Max concurrent connections per endpointmodel pair, see OLLAMA_NUM_PARALLEL
max_concurrent_connections: int = 1
# Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}}