fix: routing error for openai compatible endpoints #128
This commit is contained in:
parent
4f42f350a3
commit
1b3ad057a6
2 changed files with 64 additions and 3 deletions
16
routing.py
16
routing.py
|
|
@ -99,9 +99,19 @@ async def choose_endpoint(model: str, reserve: bool = True,
|
|||
llama_eps_extra = [ep for ep in llama_endpoints(config) if ep not in config.endpoints]
|
||||
all_endpoints = config.endpoints + llama_eps_extra
|
||||
|
||||
tag_tasks = [fetch.available_models(ep) for ep in config.endpoints if not is_openai_compatible(ep)]
|
||||
tag_tasks += [fetch.available_models(ep, config.api_keys.get(ep)) for ep in config.endpoints if is_openai_compatible(ep)]
|
||||
tag_tasks += [fetch.available_models(ep, config.api_keys.get(ep)) for ep in llama_eps_extra]
|
||||
# Build the probe tasks in the SAME order as ``all_endpoints`` so the
|
||||
# gathered results stay aligned for the ``zip(all_endpoints, advertised_sets)``
|
||||
# below. (A previous partition into non-OpenAI / OpenAI groups reordered the
|
||||
# tasks relative to ``all_endpoints``, pairing each endpoint with another
|
||||
# endpoint's advertised models — so an OpenAI-compatible backend listed
|
||||
# before an Ollama one would inherit the Ollama model set and get a request
|
||||
# for a model it cannot serve, 404ing. See issue #128.)
|
||||
def _advertised_task(ep: str):
|
||||
if is_openai_compatible(ep):
|
||||
return fetch.available_models(ep, config.api_keys.get(ep))
|
||||
return fetch.available_models(ep)
|
||||
|
||||
tag_tasks = [_advertised_task(ep) for ep in all_endpoints]
|
||||
advertised_sets = await asyncio.gather(*tag_tasks)
|
||||
|
||||
# 2️⃣ Filter endpoints that advertise the requested model
|
||||
|
|
|
|||
|
|
@ -279,6 +279,57 @@ class TestChooseEndpointModelNaming:
|
|||
assert ep == EP1
|
||||
|
||||
|
||||
class TestChooseEndpointAdvertisedAlignment:
|
||||
"""Regression for issue #128 — advertised-model sets must stay aligned with
|
||||
their endpoints regardless of config ordering. A previous implementation
|
||||
partitioned the probe tasks (non-OpenAI first, then OpenAI-compatible) while
|
||||
the endpoint list kept config order, so an OpenAI-compatible endpoint listed
|
||||
*before* an Ollama one inherited the Ollama model set and received requests
|
||||
for models it does not serve (404)."""
|
||||
|
||||
OPENAI_EP = "http://vllm:8000/v1"
|
||||
|
||||
async def test_openai_endpoint_before_ollama_does_not_misroute(self):
|
||||
# OpenAI-compatible endpoint deliberately listed FIRST in config.
|
||||
cfg = _make_cfg([self.OPENAI_EP, EP1])
|
||||
|
||||
async def available(ep, *_):
|
||||
if ep == self.OPENAI_EP:
|
||||
return {"vllm-only-model"}
|
||||
return {"qwen2.5-coder:1.5b-base"} # only the Ollama box advertises this
|
||||
|
||||
async def loaded(ep):
|
||||
return {"qwen2.5-coder:1.5b-base"} if ep == EP1 else set()
|
||||
|
||||
with (
|
||||
patch.object(router, "config", cfg),
|
||||
patch.object(router.fetch, "available_models", side_effect=available),
|
||||
patch.object(router.fetch, "loaded_models", side_effect=loaded),
|
||||
):
|
||||
ep, _ = await router.choose_endpoint("qwen2.5-coder:1.5b-base")
|
||||
# Must route to the endpoint that actually advertises the model.
|
||||
assert ep == EP1
|
||||
|
||||
async def test_openai_endpoint_before_ollama_still_routes_its_own_model(self):
|
||||
cfg = _make_cfg([self.OPENAI_EP, EP1])
|
||||
|
||||
async def available(ep, *_):
|
||||
if ep == self.OPENAI_EP:
|
||||
return {"vllm-only-model"}
|
||||
return {"qwen2.5-coder:1.5b-base"}
|
||||
|
||||
async def loaded(ep):
|
||||
return {"qwen2.5-coder:1.5b-base"} if ep == EP1 else set()
|
||||
|
||||
with (
|
||||
patch.object(router, "config", cfg),
|
||||
patch.object(router.fetch, "available_models", side_effect=available),
|
||||
patch.object(router.fetch, "loaded_models", side_effect=loaded),
|
||||
):
|
||||
ep, _ = await router.choose_endpoint("vllm-only-model")
|
||||
assert ep == self.OPENAI_EP
|
||||
|
||||
|
||||
class TestChooseEndpointLoadBalancing:
|
||||
async def test_random_selection_among_idle(self):
|
||||
cfg = _make_cfg([EP1, EP2, EP3])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue