From d3aa87ca15736d04bb3ab24e909fd1783283b99e Mon Sep 17 00:00:00 2001
From: YetheSamartaka <55753928+YetheSamartaka@users.noreply.github.com>
Date: Tue, 27 Jan 2026 13:29:54 +0100
Subject: [PATCH 1/2] Added endpoint differentiation for models ps board
Added endpoint differentiation for models PS board to see where which model is loaded and for how long to ease the viewing of multiple same models deployed for load balancing
---
router.py | 22 +++++++++
static/index.html | 114 +++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 124 insertions(+), 12 deletions(-)
diff --git a/router.py b/router.py
index 39b9498..8ba1d50 100644
--- a/router.py
+++ b/router.py
@@ -1878,6 +1878,28 @@ async def ps_proxy(request: Request):
status_code=200,
)
+# -------------------------------------------------------------
+# 18b. API route – ps details (backwards compatible)
+# -------------------------------------------------------------
+@app.get("/api/ps_details")
+async def ps_details_proxy(request: Request):
+ """
+ Proxy a ps request to all Ollama endpoints and reply with per-endpoint instances.
+ This keeps /api/ps backward compatible while providing richer data.
+ """
+ tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models")) for ep in config.endpoints if "/v1" not in ep]
+ loaded_models = await asyncio.gather(*[task for _, task in tasks])
+
+ models: list[dict] = []
+ for (endpoint, modellist) in zip([ep for ep, _ in tasks], loaded_models):
+ for model in modellist:
+ if isinstance(model, dict):
+ model_with_endpoint = dict(model)
+ model_with_endpoint["endpoint"] = endpoint
+ models.append(model_with_endpoint)
+
+ return JSONResponse(content={"models": models}, status_code=200)
+
# -------------------------------------------------------------
# 19. Proxy usage route – for monitoring
# -------------------------------------------------------------
diff --git a/static/index.html b/static/index.html
index 043649e..a8409f0 100644
--- a/static/index.html
+++ b/static/index.html
@@ -1,4 +1,4 @@
-
+
@@ -42,6 +42,7 @@
background: white;
padding: 1rem;
border-radius: 6px;
+ overflow-x: auto;
}
.endpoints-container {
flex: 1;
@@ -114,6 +115,21 @@
th {
background: #e0e0e0;
}
+ .ps-subrow {
+ display: block;
+ }
+ .ps-subrow + .ps-subrow {
+ margin-top: 2px;
+ }
+ #ps-table {
+ width: max-content;
+ min-width: 100%;
+ }
+ #ps-table th.model-col,
+ #ps-table td.model {
+ min-width: 340px;
+ white-space: nowrap;
+ }
.loading {
color: #777;
font-style: italic;
@@ -346,10 +362,14 @@
- | Model |
+ Model |
+ Endpoint |
+ Instance count |
Params |
Quant |
Ctx |
+ Size |
+ Until |
Digest |
Token |
@@ -698,6 +718,7 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
document.getElementById("tags-count").textContent =
`${data.models.length}`;
+
/* copy logic */
document.querySelectorAll(".copy-link").forEach((link) => {
link.addEventListener("click", async (e) => {
@@ -769,23 +790,92 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
/* ---------- PS ---------- */
async function loadPS() {
try {
- const data = await fetchJSON("/api/ps");
+ let instances = [];
+ try {
+ const detailed = await fetchJSON("/api/ps_details");
+ instances = Array.isArray(detailed.models) ? detailed.models : [];
+ } catch (err) {
+ console.error("Failed to load ps_details, falling back to /api/ps", err);
+ const fallback = await fetchJSON("/api/ps");
+ instances = (fallback.models || []).map((m) => ({
+ ...m,
+ endpoint: "unknown",
+ }));
+ }
const body = document.getElementById("ps-body");
- body.innerHTML = data.models
- .map(m => {
- const existingRow = psRows.get(m.name);
+ const grouped = new Map();
+ for (const instance of instances) {
+ if (!instance || !instance.name) continue;
+ if (!grouped.has(instance.name)) grouped.set(instance.name, []);
+ grouped.get(instance.name).push(instance);
+ }
+
+ const formatBytes = (value) => {
+ if (value === null || value === undefined || value === "") return "";
+ if (typeof value === "string") return value;
+ if (typeof value !== "number" || Number.isNaN(value)) return "";
+ const units = ["B", "KB", "MB", "GB", "TB"];
+ let size = value;
+ let unitIndex = 0;
+ while (size >= 1024 && unitIndex < units.length - 1) {
+ size /= 1024;
+ unitIndex += 1;
+ }
+ const precision = size >= 10 || unitIndex == 0 ? 0 : 1;
+ return `${size.toFixed(precision)} ${units[unitIndex]}`;
+ };
+
+ const formatUntil = (value) => {
+ if (value === null || value === undefined || value === "") {
+ return "Forever";
+ }
+ if (typeof value === "number") {
+ const ms = value > 1e12 ? value : value * 1000;
+ const date = new Date(ms);
+ return Number.isNaN(date.getTime()) ? String(value) : date.toLocaleString();
+ }
+ if (typeof value === "string") {
+ const date = new Date(value);
+ return Number.isNaN(date.getTime()) ? value : date.toLocaleString();
+ }
+ return String(value);
+ };
+
+ const renderInstanceList = (items) => {
+ if (!items.length) return "";
+ return items.map((item) => `${item || ""}
`).join("");
+ };
+
+ body.innerHTML = Array.from(grouped.entries())
+ .map(([modelName, modelInstances]) => {
+ const existingRow = psRows.get(modelName);
const tokenValue = existingRow
? existingRow.querySelector(".token-usage")?.textContent ?? 0
: 0;
- const digest = m.digest || "";
+ const instanceCount = modelInstances.length;
+ const endpoints = modelInstances.map((m) => m.endpoint || "unknown");
+ const sizes = modelInstances.map((m) => formatBytes(m.size ?? m.size_vram ?? m.details?.size));
+ const untils = modelInstances.map((m) =>
+ formatUntil(m.until ?? m.expires_at ?? m.expiresAt ?? m.expire_at),
+ );
+ const digest = modelInstances[0]?.digest || "";
const shortDigest = digest.length > 24
? `${digest.slice(0, 12)}...${digest.slice(-12)}`
: digest;
- return `
- | ${m.name} stats |
- ${m.details.parameter_size} |
- ${m.details.quantization_level} |
- ${m.context_length} |
+ const params = modelInstances[0]?.details?.parameter_size ?? "";
+ const quant = modelInstances[0]?.details?.quantization_level ?? "";
+ const ctx = modelInstances[0]?.context_length ?? "";
+ const uniqueEndpoints = Array.from(new Set(endpoints));
+ const endpointsData = encodeURIComponent(JSON.stringify(uniqueEndpoints));
+ return `
+ | ${modelName} stats |
+ ${renderInstanceList(endpoints)} |
+ ${instanceCount} |
+ ${params} |
+ ${quant} |
+ ${ctx} |
+ ${renderInstanceList(sizes)} |
+ ${renderInstanceList(untils)} |
${shortDigest} |
${tokenValue} |
`;
From efdf14a207fae5a38f4aab89899ec6d44255260b Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo
Date: Thu, 29 Jan 2026 10:54:43 +0100
Subject: [PATCH 2/2] fix: optimize table column widths and improve time
formatting for responsive layout
- Reduced min-width of model columns from 340px to 200px with max-width of 300px
- Added specific styling for narrow columns (3rd-5th) with fixed width and center alignment
- Removed "Instance count" as it has redundant information
- Enhanced time formatting logic to show relative time instead of absolute dates
- Simplified digest display to show last 6 characters instead of truncated format
- Added proper handling for various time value types (number, string, null)
---
static/index.html | 58 ++++++++++++++++++++++++++++++++++++-----------
1 file changed, 45 insertions(+), 13 deletions(-)
diff --git a/static/index.html b/static/index.html
index a8409f0..09cc6fe 100644
--- a/static/index.html
+++ b/static/index.html
@@ -127,9 +127,20 @@
}
#ps-table th.model-col,
#ps-table td.model {
- min-width: 340px;
+ min-width: 200px;
+ max-width: 300px;
white-space: nowrap;
}
+ /* Optimize narrow columns */
+ #ps-table th:nth-child(3),
+ #ps-table td:nth-child(3),
+ #ps-table th:nth-child(4),
+ #ps-table td:nth-child(4),
+ #ps-table th:nth-child(5),
+ #ps-table td:nth-child(5) {
+ width: 80px;
+ text-align: center;
+ }
.loading {
color: #777;
font-style: italic;
@@ -364,14 +375,13 @@
| Model |
Endpoint |
- Instance count |
Params |
Quant |
Ctx |
Size |
Until |
Digest |
- Token |
+ Tokens |
@@ -829,16 +839,41 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
if (value === null || value === undefined || value === "") {
return "Forever";
}
+
+ let targetTime;
if (typeof value === "number") {
const ms = value > 1e12 ? value : value * 1000;
- const date = new Date(ms);
- return Number.isNaN(date.getTime()) ? String(value) : date.toLocaleString();
+ targetTime = new Date(ms);
+ } else if (typeof value === "string") {
+ targetTime = new Date(value);
+ } else {
+ return String(value);
}
- if (typeof value === "string") {
- const date = new Date(value);
- return Number.isNaN(date.getTime()) ? value : date.toLocaleString();
+
+ if (Number.isNaN(targetTime.getTime())) {
+ return String(value);
+ }
+
+ const now = new Date();
+ const diffMs = targetTime - now;
+ const diffSec = Math.floor(Math.abs(diffMs) / 1000);
+ const diffMin = Math.floor(diffSec / 60);
+ const diffHours = Math.floor(diffMin / 60);
+ const diffDays = Math.floor(diffHours / 24);
+
+ if (diffMs < 0) {
+ return "expired";
+ }
+
+ if (diffMin < 1) {
+ return `in ${diffSec} sec`;
+ } else if (diffMin < 60) {
+ return `in ${diffMin} min`;
+ } else if (diffHours < 24) {
+ return `in ${diffHours} hr`;
+ } else {
+ return `in ${diffDays} days`;
}
- return String(value);
};
const renderInstanceList = (items) => {
@@ -859,9 +894,7 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
formatUntil(m.until ?? m.expires_at ?? m.expiresAt ?? m.expire_at),
);
const digest = modelInstances[0]?.digest || "";
- const shortDigest = digest.length > 24
- ? `${digest.slice(0, 12)}...${digest.slice(-12)}`
- : digest;
+ const shortDigest = digest ? digest.slice(-6) : "";
const params = modelInstances[0]?.details?.parameter_size ?? "";
const quant = modelInstances[0]?.details?.quantization_level ?? "";
const ctx = modelInstances[0]?.context_length ?? "";
@@ -870,7 +903,6 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
return `
| ${modelName} stats |
${renderInstanceList(endpoints)} |
- ${instanceCount} |
${params} |
${quant} |
${ctx} |