feat: add llama-swap as a backend
This commit is contained in:
parent
c8da58430a
commit
aa8baebac5
17 changed files with 544 additions and 52 deletions
|
|
@ -32,6 +32,8 @@ from backends.health import _is_fresh
|
|||
from backends.normalize import (
|
||||
is_ext_openai_endpoint,
|
||||
is_openai_compatible,
|
||||
is_llama_server,
|
||||
llama_endpoints,
|
||||
get_tracking_model,
|
||||
)
|
||||
from backends.probe import fetch
|
||||
|
|
@ -93,8 +95,8 @@ async def choose_endpoint(model: str, reserve: bool = True,
|
|||
"""
|
||||
config = get_config()
|
||||
# 1️⃣ Gather advertised‑model sets for all endpoints concurrently
|
||||
# Include both config.endpoints and config.llama_server_endpoints
|
||||
llama_eps_extra = [ep for ep in config.llama_server_endpoints if ep not in config.endpoints]
|
||||
# Include config.endpoints plus any llama-server / llama-swap endpoints
|
||||
llama_eps_extra = [ep for ep in llama_endpoints(config) if ep not in config.endpoints]
|
||||
all_endpoints = config.endpoints + llama_eps_extra
|
||||
|
||||
tag_tasks = [fetch.available_models(ep) for ep in config.endpoints if not is_openai_compatible(ep)]
|
||||
|
|
@ -114,7 +116,7 @@ async def choose_endpoint(model: str, reserve: bool = True,
|
|||
model_without_latest = model.split(":latest")[0]
|
||||
candidate_endpoints = [
|
||||
ep for ep, models in zip(all_endpoints, advertised_sets)
|
||||
if model_without_latest in models and (is_ext_openai_endpoint(ep) or ep in config.llama_server_endpoints)
|
||||
if model_without_latest in models and (is_ext_openai_endpoint(ep) or is_llama_server(ep))
|
||||
]
|
||||
if not candidate_endpoints:
|
||||
# Only add :latest suffix if model doesn't already have a version suffix
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue