diff --git a/router.py b/router.py
index 39b9498..8ba1d50 100644
--- a/router.py
+++ b/router.py
@@ -1878,6 +1878,28 @@ async def ps_proxy(request: Request):
status_code=200,
)
+# -------------------------------------------------------------
+# 18b. API route – ps details (backwards compatible)
+# -------------------------------------------------------------
+@app.get("/api/ps_details")
+async def ps_details_proxy(request: Request):
+ """
+ Proxy a ps request to all Ollama endpoints and reply with per-endpoint instances.
+ This keeps /api/ps backward compatible while providing richer data.
+ """
+ tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models")) for ep in config.endpoints if "/v1" not in ep]
+ loaded_models = await asyncio.gather(*[task for _, task in tasks])
+
+ models: list[dict] = []
+ for (endpoint, modellist) in zip([ep for ep, _ in tasks], loaded_models):
+ for model in modellist:
+ if isinstance(model, dict):
+ model_with_endpoint = dict(model)
+ model_with_endpoint["endpoint"] = endpoint
+ models.append(model_with_endpoint)
+
+ return JSONResponse(content={"models": models}, status_code=200)
+
# -------------------------------------------------------------
# 19. Proxy usage route – for monitoring
# -------------------------------------------------------------
diff --git a/static/index.html b/static/index.html
index 043649e..a8409f0 100644
--- a/static/index.html
+++ b/static/index.html
@@ -1,4 +1,4 @@
-
+
@@ -42,6 +42,7 @@
background: white;
padding: 1rem;
border-radius: 6px;
+ overflow-x: auto;
}
.endpoints-container {
flex: 1;
@@ -114,6 +115,21 @@
th {
background: #e0e0e0;
}
+ .ps-subrow {
+ display: block;
+ }
+ .ps-subrow + .ps-subrow {
+ margin-top: 2px;
+ }
+ #ps-table {
+ width: max-content;
+ min-width: 100%;
+ }
+ #ps-table th.model-col,
+ #ps-table td.model {
+ min-width: 340px;
+ white-space: nowrap;
+ }
.loading {
color: #777;
font-style: italic;
@@ -346,10 +362,14 @@
- | Model |
+ Model |
+ Endpoint |
+ Instance count |
Params |
Quant |
Ctx |
+ Size |
+ Until |
Digest |
Token |
@@ -698,6 +718,7 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
document.getElementById("tags-count").textContent =
`${data.models.length}`;
+
/* copy logic */
document.querySelectorAll(".copy-link").forEach((link) => {
link.addEventListener("click", async (e) => {
@@ -769,23 +790,92 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
/* ---------- PS ---------- */
async function loadPS() {
try {
- const data = await fetchJSON("/api/ps");
+ let instances = [];
+ try {
+ const detailed = await fetchJSON("/api/ps_details");
+ instances = Array.isArray(detailed.models) ? detailed.models : [];
+ } catch (err) {
+ console.error("Failed to load ps_details, falling back to /api/ps", err);
+ const fallback = await fetchJSON("/api/ps");
+ instances = (fallback.models || []).map((m) => ({
+ ...m,
+ endpoint: "unknown",
+ }));
+ }
const body = document.getElementById("ps-body");
- body.innerHTML = data.models
- .map(m => {
- const existingRow = psRows.get(m.name);
+ const grouped = new Map();
+ for (const instance of instances) {
+ if (!instance || !instance.name) continue;
+ if (!grouped.has(instance.name)) grouped.set(instance.name, []);
+ grouped.get(instance.name).push(instance);
+ }
+
+ const formatBytes = (value) => {
+ if (value === null || value === undefined || value === "") return "";
+ if (typeof value === "string") return value;
+ if (typeof value !== "number" || Number.isNaN(value)) return "";
+ const units = ["B", "KB", "MB", "GB", "TB"];
+ let size = value;
+ let unitIndex = 0;
+ while (size >= 1024 && unitIndex < units.length - 1) {
+ size /= 1024;
+ unitIndex += 1;
+ }
+ const precision = size >= 10 || unitIndex == 0 ? 0 : 1;
+ return `${size.toFixed(precision)} ${units[unitIndex]}`;
+ };
+
+ const formatUntil = (value) => {
+ if (value === null || value === undefined || value === "") {
+ return "Forever";
+ }
+ if (typeof value === "number") {
+ const ms = value > 1e12 ? value : value * 1000;
+ const date = new Date(ms);
+ return Number.isNaN(date.getTime()) ? String(value) : date.toLocaleString();
+ }
+ if (typeof value === "string") {
+ const date = new Date(value);
+ return Number.isNaN(date.getTime()) ? value : date.toLocaleString();
+ }
+ return String(value);
+ };
+
+ const renderInstanceList = (items) => {
+ if (!items.length) return "";
+ return items.map((item) => `${item || ""}
`).join("");
+ };
+
+ body.innerHTML = Array.from(grouped.entries())
+ .map(([modelName, modelInstances]) => {
+ const existingRow = psRows.get(modelName);
const tokenValue = existingRow
? existingRow.querySelector(".token-usage")?.textContent ?? 0
: 0;
- const digest = m.digest || "";
+ const instanceCount = modelInstances.length;
+ const endpoints = modelInstances.map((m) => m.endpoint || "unknown");
+ const sizes = modelInstances.map((m) => formatBytes(m.size ?? m.size_vram ?? m.details?.size));
+ const untils = modelInstances.map((m) =>
+ formatUntil(m.until ?? m.expires_at ?? m.expiresAt ?? m.expire_at),
+ );
+ const digest = modelInstances[0]?.digest || "";
const shortDigest = digest.length > 24
? `${digest.slice(0, 12)}...${digest.slice(-12)}`
: digest;
- return `
- | ${m.name} stats |
- ${m.details.parameter_size} |
- ${m.details.quantization_level} |
- ${m.context_length} |
+ const params = modelInstances[0]?.details?.parameter_size ?? "";
+ const quant = modelInstances[0]?.details?.quantization_level ?? "";
+ const ctx = modelInstances[0]?.context_length ?? "";
+ const uniqueEndpoints = Array.from(new Set(endpoints));
+ const endpointsData = encodeURIComponent(JSON.stringify(uniqueEndpoints));
+ return `
+ | ${modelName} stats |
+ ${renderInstanceList(endpoints)} |
+ ${instanceCount} |
+ ${params} |
+ ${quant} |
+ ${ctx} |
+ ${renderInstanceList(sizes)} |
+ ${renderInstanceList(untils)} |
${shortDigest} |
${tokenValue} |
`;