Added endpoint differentiation for models ps board

Added endpoint differentiation for models PS board to see where which model is loaded and for how long to ease the viewing of multiple same models deployed for load balancing
2026-01-27 13:29:54 +01:00 · 2026-01-27 13:29:54 +01:00 · d3aa87ca15
commit d3aa87ca15
parent bdd4dd45d9
2 changed files with 124 additions and 12 deletions
--- a/router.py
+++ b/router.py
@ -1878,6 +1878,28 @@ async def ps_proxy(request: Request):
        status_code=200,
    )

+# -------------------------------------------------------------
+# 18b. API route – ps details (backwards compatible)
+# -------------------------------------------------------------
+@app.get("/api/ps_details")
+async def ps_details_proxy(request: Request):
+    """
+    Proxy a ps request to all Ollama endpoints and reply with per-endpoint instances.
+    This keeps /api/ps backward compatible while providing richer data.
+    """
+    tasks = [(ep, fetch.endpoint_details(ep, "/api/ps", "models")) for ep in config.endpoints if "/v1" not in ep]
+    loaded_models = await asyncio.gather(*[task for _, task in tasks])
+
+    models: list[dict] = []
+    for (endpoint, modellist) in zip([ep for ep, _ in tasks], loaded_models):
+        for model in modellist:
+            if isinstance(model, dict):
+                model_with_endpoint = dict(model)
+                model_with_endpoint["endpoint"] = endpoint
+                models.append(model_with_endpoint)
+
+    return JSONResponse(content={"models": models}, status_code=200)
+
 # -------------------------------------------------------------
 # 19. Proxy usage route – for monitoring
 # -------------------------------------------------------------