feat: add llama-swap as a backend
All checks were successful
PR Tests / test (pull_request) Successful in 1m18s
NYX Security Scan / nyx-scan (pull_request) Successful in 6m19s

This commit is contained in:
Alpha Nerd 2026-06-14 16:34:31 +02:00
parent c8da58430a
commit aa8baebac5
Signed by: alpha-nerd
SSH key fingerprint: SHA256:QkkAgVoYi9TQ0UKPkiKSfnerZy2h4qhi3SVPXJmBN+M
17 changed files with 544 additions and 52 deletions

View file

@ -32,6 +32,8 @@ from backends.health import _is_fresh
from backends.normalize import (
is_ext_openai_endpoint,
is_openai_compatible,
is_llama_server,
llama_endpoints,
get_tracking_model,
)
from backends.probe import fetch
@ -93,8 +95,8 @@ async def choose_endpoint(model: str, reserve: bool = True,
"""
config = get_config()
# 1⃣ Gather advertisedmodel sets for all endpoints concurrently
# Include both config.endpoints and config.llama_server_endpoints
llama_eps_extra = [ep for ep in config.llama_server_endpoints if ep not in config.endpoints]
# Include config.endpoints plus any llama-server / llama-swap endpoints
llama_eps_extra = [ep for ep in llama_endpoints(config) if ep not in config.endpoints]
all_endpoints = config.endpoints + llama_eps_extra
tag_tasks = [fetch.available_models(ep) for ep in config.endpoints if not is_openai_compatible(ep)]
@ -114,7 +116,7 @@ async def choose_endpoint(model: str, reserve: bool = True,
model_without_latest = model.split(":latest")[0]
candidate_endpoints = [
ep for ep, models in zip(all_endpoints, advertised_sets)
if model_without_latest in models and (is_ext_openai_endpoint(ep) or ep in config.llama_server_endpoints)
if model_without_latest in models and (is_ext_openai_endpoint(ep) or is_llama_server(ep))
]
if not candidate_endpoints:
# Only add :latest suffix if model doesn't already have a version suffix