Added endpoint differentiation for models ps board

Added endpoint differentiation for models PS board to see where which model is loaded and for how long to ease the viewing of multiple same models deployed for load balancing
This commit is contained in:
YetheSamartaka 2026-01-27 13:29:54 +01:00
parent bdd4dd45d9
commit d3aa87ca15
2 changed files with 124 additions and 12 deletions

View file

@ -1878,6 +1878,28 @@ async def ps_proxy(request: Request):
status_code=200,
)
# -------------------------------------------------------------
# 18b. API route ps details (backwards compatible)
# -------------------------------------------------------------
@app.get("/api/ps_details")
async def ps_details_proxy(request: Request):
"""
Proxy a ps request to all Ollama endpoints and reply with per-endpoint instances.
This keeps /api/ps backward compatible while providing richer data.
"""
tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models")) for ep in config.endpoints if "/v1" not in ep]
loaded_models = await asyncio.gather(*[task for _, task in tasks])
models: list[dict] = []
for (endpoint, modellist) in zip([ep for ep, _ in tasks], loaded_models):
for model in modellist:
if isinstance(model, dict):
model_with_endpoint = dict(model)
model_with_endpoint["endpoint"] = endpoint
models.append(model_with_endpoint)
return JSONResponse(content={"models": models}, status_code=200)
# -------------------------------------------------------------
# 19. Proxy usage route for monitoring
# -------------------------------------------------------------