feat: add llama-swap as a backend
All checks were successful
PR Tests / test (pull_request) Successful in 1m18s
NYX Security Scan / nyx-scan (pull_request) Successful in 6m19s

This commit is contained in:
Alpha Nerd 2026-06-14 16:34:31 +02:00
parent c8da58430a
commit aa8baebac5
Signed by: alpha-nerd
SSH key fingerprint: SHA256:QkkAgVoYi9TQ0UKPkiKSfnerZy2h4qhi3SVPXJmBN+M
17 changed files with 544 additions and 52 deletions

View file

@ -50,27 +50,46 @@ def dedupe_on_keys(dicts, key_fields):
return out
def is_llama_swap(endpoint: str) -> bool:
"""True if the endpoint is a configured llama-swap front."""
return endpoint in get_config().llama_swap_endpoints
def is_llama_server(endpoint: str) -> bool:
"""True for a llama.cpp llama-server OR a llama-swap front.
Both speak the same OpenAI-compatible surface, so the router treats them
identically everywhere except loaded-model detection and model unload.
"""
cfg = get_config()
return endpoint in cfg.llama_server_endpoints or endpoint in cfg.llama_swap_endpoints
def llama_endpoints(cfg) -> list:
"""Combined, de-duplicated llama-server + llama-swap endpoints (order preserved)."""
return list(dict.fromkeys([*cfg.llama_server_endpoints, *cfg.llama_swap_endpoints]))
def is_ext_openai_endpoint(endpoint: str) -> bool:
"""
Determine if an endpoint is an external OpenAI-compatible endpoint (not Ollama or llama-server).
Determine if an endpoint is an external OpenAI-compatible endpoint (not Ollama, llama-server or llama-swap).
Returns True for:
- External services like OpenAI.com, Groq, etc.
Returns False for:
- Ollama endpoints (without /v1, or with /v1 but default port 11434)
- llama-server endpoints (explicitly configured in llama_server_endpoints)
- llama-server / llama-swap endpoints (explicitly configured)
"""
cfg = get_config()
# Check if it's a llama-server endpoint (has /v1 and is in the configured list)
if endpoint in cfg.llama_server_endpoints:
# Check if it's a llama-server / llama-swap endpoint (has /v1 and is in a configured list)
if is_llama_server(endpoint):
return False
if "/v1" not in endpoint:
return False
base_endpoint = endpoint.replace('/v1', '')
if base_endpoint in cfg.endpoints:
if base_endpoint in get_config().endpoints:
return False # It's Ollama's /v1
# Check for default Ollama port
@ -83,9 +102,9 @@ def is_ext_openai_endpoint(endpoint: str) -> bool:
def is_openai_compatible(endpoint: str) -> bool:
"""
Return True if the endpoint speaks the OpenAI API (not native Ollama).
This includes external OpenAI endpoints AND llama-server endpoints.
This includes external OpenAI endpoints AND llama-server / llama-swap endpoints.
"""
return "/v1" in endpoint or endpoint in get_config().llama_server_endpoints
return "/v1" in endpoint or is_llama_server(endpoint)
def get_tracking_model(endpoint: str, model: str) -> str:
@ -102,8 +121,8 @@ def get_tracking_model(endpoint: str, model: str) -> str:
if is_ext_openai_endpoint(endpoint):
return model
# llama-server endpoints use normalized names in PS
if endpoint in get_config().llama_server_endpoints:
# llama-server / llama-swap endpoints use normalized names in PS
if is_llama_server(endpoint):
return _normalize_llama_model_name(model)
# Ollama endpoints: append ":latest" if no version suffix