From d3aa87ca15736d04bb3ab24e909fd1783283b99e Mon Sep 17 00:00:00 2001 From: YetheSamartaka <55753928+YetheSamartaka@users.noreply.github.com> Date: Tue, 27 Jan 2026 13:29:54 +0100 Subject: [PATCH] Added endpoint differentiation for models ps board Added endpoint differentiation for models PS board to see where which model is loaded and for how long to ease the viewing of multiple same models deployed for load balancing --- router.py | 22 +++++++++ static/index.html | 114 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 124 insertions(+), 12 deletions(-) diff --git a/router.py b/router.py index 39b9498..8ba1d50 100644 --- a/router.py +++ b/router.py @@ -1878,6 +1878,28 @@ async def ps_proxy(request: Request): status_code=200, ) +# ------------------------------------------------------------- +# 18b. API route – ps details (backwards compatible) +# ------------------------------------------------------------- +@app.get("/api/ps_details") +async def ps_details_proxy(request: Request): + """ + Proxy a ps request to all Ollama endpoints and reply with per-endpoint instances. + This keeps /api/ps backward compatible while providing richer data. + """ + tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models")) for ep in config.endpoints if "/v1" not in ep] + loaded_models = await asyncio.gather(*[task for _, task in tasks]) + + models: list[dict] = [] + for (endpoint, modellist) in zip([ep for ep, _ in tasks], loaded_models): + for model in modellist: + if isinstance(model, dict): + model_with_endpoint = dict(model) + model_with_endpoint["endpoint"] = endpoint + models.append(model_with_endpoint) + + return JSONResponse(content={"models": models}, status_code=200) + # ------------------------------------------------------------- # 19. Proxy usage route – for monitoring # ------------------------------------------------------------- diff --git a/static/index.html b/static/index.html index 043649e..a8409f0 100644 --- a/static/index.html +++ b/static/index.html @@ -1,4 +1,4 @@ - + @@ -42,6 +42,7 @@ background: white; padding: 1rem; border-radius: 6px; + overflow-x: auto; } .endpoints-container { flex: 1; @@ -114,6 +115,21 @@ th { background: #e0e0e0; } + .ps-subrow { + display: block; + } + .ps-subrow + .ps-subrow { + margin-top: 2px; + } + #ps-table { + width: max-content; + min-width: 100%; + } + #ps-table th.model-col, + #ps-table td.model { + min-width: 340px; + white-space: nowrap; + } .loading { color: #777; font-style: italic; @@ -346,10 +362,14 @@ - + + + + + @@ -698,6 +718,7 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) { document.getElementById("tags-count").textContent = `${data.models.length}`; + /* copy logic */ document.querySelectorAll(".copy-link").forEach((link) => { link.addEventListener("click", async (e) => { @@ -769,23 +790,92 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) { /* ---------- PS ---------- */ async function loadPS() { try { - const data = await fetchJSON("/api/ps"); + let instances = []; + try { + const detailed = await fetchJSON("/api/ps_details"); + instances = Array.isArray(detailed.models) ? detailed.models : []; + } catch (err) { + console.error("Failed to load ps_details, falling back to /api/ps", err); + const fallback = await fetchJSON("/api/ps"); + instances = (fallback.models || []).map((m) => ({ + ...m, + endpoint: "unknown", + })); + } const body = document.getElementById("ps-body"); - body.innerHTML = data.models - .map(m => { - const existingRow = psRows.get(m.name); + const grouped = new Map(); + for (const instance of instances) { + if (!instance || !instance.name) continue; + if (!grouped.has(instance.name)) grouped.set(instance.name, []); + grouped.get(instance.name).push(instance); + } + + const formatBytes = (value) => { + if (value === null || value === undefined || value === "") return ""; + if (typeof value === "string") return value; + if (typeof value !== "number" || Number.isNaN(value)) return ""; + const units = ["B", "KB", "MB", "GB", "TB"]; + let size = value; + let unitIndex = 0; + while (size >= 1024 && unitIndex < units.length - 1) { + size /= 1024; + unitIndex += 1; + } + const precision = size >= 10 || unitIndex == 0 ? 0 : 1; + return `${size.toFixed(precision)} ${units[unitIndex]}`; + }; + + const formatUntil = (value) => { + if (value === null || value === undefined || value === "") { + return "Forever"; + } + if (typeof value === "number") { + const ms = value > 1e12 ? value : value * 1000; + const date = new Date(ms); + return Number.isNaN(date.getTime()) ? String(value) : date.toLocaleString(); + } + if (typeof value === "string") { + const date = new Date(value); + return Number.isNaN(date.getTime()) ? value : date.toLocaleString(); + } + return String(value); + }; + + const renderInstanceList = (items) => { + if (!items.length) return ""; + return items.map((item) => `
${item || ""}
`).join(""); + }; + + body.innerHTML = Array.from(grouped.entries()) + .map(([modelName, modelInstances]) => { + const existingRow = psRows.get(modelName); const tokenValue = existingRow ? existingRow.querySelector(".token-usage")?.textContent ?? 0 : 0; - const digest = m.digest || ""; + const instanceCount = modelInstances.length; + const endpoints = modelInstances.map((m) => m.endpoint || "unknown"); + const sizes = modelInstances.map((m) => formatBytes(m.size ?? m.size_vram ?? m.details?.size)); + const untils = modelInstances.map((m) => + formatUntil(m.until ?? m.expires_at ?? m.expiresAt ?? m.expire_at), + ); + const digest = modelInstances[0]?.digest || ""; const shortDigest = digest.length > 24 ? `${digest.slice(0, 12)}...${digest.slice(-12)}` : digest; - return ` - - - - + const params = modelInstances[0]?.details?.parameter_size ?? ""; + const quant = modelInstances[0]?.details?.quantization_level ?? ""; + const ctx = modelInstances[0]?.context_length ?? ""; + const uniqueEndpoints = Array.from(new Set(endpoints)); + const endpointsData = encodeURIComponent(JSON.stringify(uniqueEndpoints)); + return ` + + + + + + + + `;
ModelModelEndpointInstance count Params Quant CtxSizeUntil Digest Token
${m.name} stats${m.details.parameter_size}${m.details.quantization_level}${m.context_length}
${modelName} stats${renderInstanceList(endpoints)}${instanceCount}${params}${quant}${ctx}${renderInstanceList(sizes)}${renderInstanceList(untils)} ${shortDigest} ${tokenValue}