From d3aa87ca15736d04bb3ab24e909fd1783283b99e Mon Sep 17 00:00:00 2001 From: YetheSamartaka <55753928+YetheSamartaka@users.noreply.github.com> Date: Tue, 27 Jan 2026 13:29:54 +0100 Subject: [PATCH] Added endpoint differentiation for models ps board Added endpoint differentiation for models PS board to see where which model is loaded and for how long to ease the viewing of multiple same models deployed for load balancing --- router.py | 22 +++++++++ static/index.html | 114 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 124 insertions(+), 12 deletions(-) diff --git a/router.py b/router.py index 39b9498..8ba1d50 100644 --- a/router.py +++ b/router.py @@ -1878,6 +1878,28 @@ async def ps_proxy(request: Request): status_code=200, ) +# ------------------------------------------------------------- +# 18b. API route – ps details (backwards compatible) +# ------------------------------------------------------------- +@app.get("/api/ps_details") +async def ps_details_proxy(request: Request): + """ + Proxy a ps request to all Ollama endpoints and reply with per-endpoint instances. + This keeps /api/ps backward compatible while providing richer data. + """ + tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models")) for ep in config.endpoints if "/v1" not in ep] + loaded_models = await asyncio.gather(*[task for _, task in tasks]) + + models: list[dict] = [] + for (endpoint, modellist) in zip([ep for ep, _ in tasks], loaded_models): + for model in modellist: + if isinstance(model, dict): + model_with_endpoint = dict(model) + model_with_endpoint["endpoint"] = endpoint + models.append(model_with_endpoint) + + return JSONResponse(content={"models": models}, status_code=200) + # ------------------------------------------------------------- # 19. Proxy usage route – for monitoring # ------------------------------------------------------------- diff --git a/static/index.html b/static/index.html index 043649e..a8409f0 100644 --- a/static/index.html +++ b/static/index.html @@ -1,4 +1,4 @@ - +
@@ -42,6 +42,7 @@ background: white; padding: 1rem; border-radius: 6px; + overflow-x: auto; } .endpoints-container { flex: 1; @@ -114,6 +115,21 @@ th { background: #e0e0e0; } + .ps-subrow { + display: block; + } + .ps-subrow + .ps-subrow { + margin-top: 2px; + } + #ps-table { + width: max-content; + min-width: 100%; + } + #ps-table th.model-col, + #ps-table td.model { + min-width: 340px; + white-space: nowrap; + } .loading { color: #777; font-style: italic; @@ -346,10 +362,14 @@| Model | +Model | +Endpoint | +Instance count | Params | Quant | Ctx | +Size | +Until | Digest | Token |
|---|---|---|---|---|---|---|---|---|---|---|
| ${m.name} stats | -${m.details.parameter_size} | -${m.details.quantization_level} | -${m.context_length} | + const params = modelInstances[0]?.details?.parameter_size ?? ""; + const quant = modelInstances[0]?.details?.quantization_level ?? ""; + const ctx = modelInstances[0]?.context_length ?? ""; + const uniqueEndpoints = Array.from(new Set(endpoints)); + const endpointsData = encodeURIComponent(JSON.stringify(uniqueEndpoints)); + return `|||||||
| ${modelName} stats | +${renderInstanceList(endpoints)} | +${instanceCount} | +${params} | +${quant} | +${ctx} | +${renderInstanceList(sizes)} | +${renderInstanceList(untils)} | ${shortDigest} | ${tokenValue} |