feat: add llama-swap as a backend
This commit is contained in:
parent
c8da58430a
commit
aa8baebac5
17 changed files with 544 additions and 52 deletions
|
|
@ -23,6 +23,10 @@ class Config(BaseSettings):
|
|||
)
|
||||
# List of llama-server endpoints (OpenAI-compatible with /v1/models status info)
|
||||
llama_server_endpoints: List[str] = Field(default_factory=list)
|
||||
# List of llama-swap endpoints (OpenAI-compatible front for multiple llama-server
|
||||
# workers). Same surface as llama_server_endpoints, but loaded models are read from
|
||||
# /running (not /v1/models status) and unload uses POST /api/models/unload/:model_id.
|
||||
llama_swap_endpoints: List[str] = Field(default_factory=list)
|
||||
# Max concurrent connections per endpoint‑model pair, see OLLAMA_NUM_PARALLEL
|
||||
max_concurrent_connections: int = 1
|
||||
# Per-endpoint overrides: {endpoint_url: {max_concurrent_connections: N}}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue