diff --git a/router.py b/router.py index 39b9498..8ba1d50 100644 --- a/router.py +++ b/router.py @@ -1878,6 +1878,28 @@ async def ps_proxy(request: Request): status_code=200, ) +# ------------------------------------------------------------- +# 18b. API route – ps details (backwards compatible) +# ------------------------------------------------------------- +@app.get("/api/ps_details") +async def ps_details_proxy(request: Request): + """ + Proxy a ps request to all Ollama endpoints and reply with per-endpoint instances. + This keeps /api/ps backward compatible while providing richer data. + """ + tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models")) for ep in config.endpoints if "/v1" not in ep] + loaded_models = await asyncio.gather(*[task for _, task in tasks]) + + models: list[dict] = [] + for (endpoint, modellist) in zip([ep for ep, _ in tasks], loaded_models): + for model in modellist: + if isinstance(model, dict): + model_with_endpoint = dict(model) + model_with_endpoint["endpoint"] = endpoint + models.append(model_with_endpoint) + + return JSONResponse(content={"models": models}, status_code=200) + # ------------------------------------------------------------- # 19. Proxy usage route – for monitoring # ------------------------------------------------------------- diff --git a/static/index.html b/static/index.html index 043649e..09cc6fe 100644 --- a/static/index.html +++ b/static/index.html @@ -1,4 +1,4 @@ - +
@@ -42,6 +42,7 @@ background: white; padding: 1rem; border-radius: 6px; + overflow-x: auto; } .endpoints-container { flex: 1; @@ -114,6 +115,32 @@ th { background: #e0e0e0; } + .ps-subrow { + display: block; + } + .ps-subrow + .ps-subrow { + margin-top: 2px; + } + #ps-table { + width: max-content; + min-width: 100%; + } + #ps-table th.model-col, + #ps-table td.model { + min-width: 200px; + max-width: 300px; + white-space: nowrap; + } + /* Optimize narrow columns */ + #ps-table th:nth-child(3), + #ps-table td:nth-child(3), + #ps-table th:nth-child(4), + #ps-table td:nth-child(4), + #ps-table th:nth-child(5), + #ps-table td:nth-child(5) { + width: 80px; + text-align: center; + } .loading { color: #777; font-style: italic; @@ -346,12 +373,15 @@| Model | +Model | +Endpoint | Params | Quant | Ctx | +Size | +Until | Digest | -Token | +Tokens |
|---|---|---|---|---|---|---|---|---|---|---|
| ${m.name} stats | -${m.details.parameter_size} | -${m.details.quantization_level} | -${m.context_length} | + const instanceCount = modelInstances.length; + const endpoints = modelInstances.map((m) => m.endpoint || "unknown"); + const sizes = modelInstances.map((m) => formatBytes(m.size ?? m.size_vram ?? m.details?.size)); + const untils = modelInstances.map((m) => + formatUntil(m.until ?? m.expires_at ?? m.expiresAt ?? m.expire_at), + ); + const digest = modelInstances[0]?.digest || ""; + const shortDigest = digest ? digest.slice(-6) : ""; + const params = modelInstances[0]?.details?.parameter_size ?? ""; + const quant = modelInstances[0]?.details?.quantization_level ?? ""; + const ctx = modelInstances[0]?.context_length ?? ""; + const uniqueEndpoints = Array.from(new Set(endpoints)); + const endpointsData = encodeURIComponent(JSON.stringify(uniqueEndpoints)); + return `|||||||
| ${modelName} stats | +${renderInstanceList(endpoints)} | +${params} | +${quant} | +${ctx} | +${renderInstanceList(sizes)} | +${renderInstanceList(untils)} | ${shortDigest} | ${tokenValue} |