Merge pull request #25 from nomyo-ai/dev-v0.6

- updated reasoning handling - improved model and error caches - fixed openai tool calling incl. ollama translations - direct support for llama.cpp's llama_server via llama_server_endpoint config - basic llama_server model info in dashboard - improved endpoint info fetching behaviour in error cases
2026-02-13 10:34:42 +01:00 · 2026-02-13 10:34:42 +01:00 · 9ef1b770ba
commit 9ef1b770ba
parent c545f413a5 1b355d8435
3 changed files with 528 additions and 164 deletions
--- a/doc/examples/sample-config.yaml
+++ b/doc/examples/sample-config.yaml
@ -1,40 +1,32 @@
-# Sample NOMYO Router Configuration
-
-# Basic single endpoint configuration
+# config.yaml
+# Ollama endpoints
 endpoints:
-  - http://localhost:11434
+  - http://192.168.0.50:11434
+  - http://192.168.0.51:11434
+  - http://192.168.0.52:11434
+  # External OpenAI-compatible endpoints (will NOT be queried for /api/ps /api/ps_details)
+  - https://api.openai.com/v1

+# llama-server endpoints (OpenAI-compatible with /v1/models status info)
+# These endpoints will be queried for /api/tags, /api/ps, /api/ps_details
+# and included in the model selection pool for inference routing
+llama_server_endpoints:
+  - http://localhost:8080/v1
+  - http://localhost:8081/v1
+
+# Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL)
 max_concurrent_connections: 2

-# Optional router-level API key to secure the router and dashboard (leave blank to disable)
+# Optional router-level API key that gates router/API/web UI access (leave empty to disable)
 nomyo-router-api-key: ""

-# Multi-endpoint configuration with local Ollama instances
-# endpoints:
-#   - http://ollama-worker1:11434
-#   - http://ollama-worker2:11434
-#   - http://ollama-worker3:11434
-
-# Mixed configuration with Ollama and OpenAI endpoints
-# endpoints:
-#   - http://localhost:11434
-#   - https://api.openai.com/v1
-
-
 # API keys for remote endpoints
-# Use ${VAR_NAME} syntax to reference environment variables
+# Set an environment variable like OPENAI_KEY
+# Confirm endpoints are exactly as in endpoints block
 api_keys:
-  # Local Ollama instances typically don't require authentication
-  "http://localhost:11434": "ollama"
-
-  # Remote Ollama instances
-  # "http://remote-ollama:11434": "ollama"
-
-  # OpenAI API
-  # "https://api.openai.com/v1": "${OPENAI_KEY}"
-
-  # Anthropic API
-  # "https://api.anthropic.com/v1": "${ANTHROPIC_KEY}"
-
-  # Other OpenAI-compatible endpoints
-  # "https://api.mistral.ai/v1": "${MISTRAL_KEY}"
+  "http://192.168.0.50:11434": "ollama"
+  "http://192.168.0.51:11434": "ollama"
+  "http://192.168.0.52:11434": "ollama"
+  "https://api.openai.com/v1": "${OPENAI_KEY}"
+  "http://localhost:8080/v1": "llama-server"  # Optional API key for llama-server
+  "http://localhost:8081/v1": "llama-server"
--- a/router.py
+++ b/router.py
--- a/static/index.html
+++ b/static/index.html
@ -379,7 +379,7 @@
                            <th>Quant</th>
                            <th>Ctx</th>
                            <th>Size</th>
-                            <th>Until</th>
+                            <th>Unload</th>
                            <th>Digest</th>
                            <th>Tokens</th>
                        </tr>
@ -683,7 +683,12 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
                        showApiKeyModal("Enter the NOMYO Router API key to load the dashboard.");
                    }
                    const body = document.getElementById("endpoints-body");
-                    body.innerHTML = data.endpoints
+                    
+                    // Build HTML for both endpoints and llama_server_endpoints
+                    let html = "";
+                    
+                    // Add Ollama endpoints
+                    html += data.endpoints
                        .map((e) => {
                            const statusClass =
                                e.status === "ok"
@ -698,6 +703,27 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
        </tr>`;
                        })
                        .join("");
+                    
+                    // Add llama-server endpoints
+                    if (data.llama_server_endpoints && data.llama_server_endpoints.length > 0) {
+                        html += data.llama_server_endpoints
+                            .map((e) => {
+                                const statusClass =
+                                    e.status === "ok"
+                                        ? "status-ok"
+                                        : "status-error";
+                                const version = e.version || "N/A";
+                                return `
+        <tr>
+          <td class="endpoint">${e.url}</td>
+          <td class="status ${statusClass}">${e.status}</td>
+          <td class="version">${version}</td>
+        </tr>`;
+                            })
+                            .join("");
+                    }
+                    
+                    body.innerHTML = html;
                } catch (e) {
                    console.error(e);
                    const body = document.getElementById("endpoints-body");
@ -837,7 +863,7 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {

                    const formatUntil = (value) => {
                        if (value === null || value === undefined || value === "") {
-                            return "Forever";
+                            return "∞";
                        }

                        let targetTime;
@ -898,10 +924,11 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
                            const params = modelInstances[0]?.details?.parameter_size ?? "";
                            const quant = modelInstances[0]?.details?.quantization_level ?? "";
                            const ctx = modelInstances[0]?.context_length ?? "";
+                            const originalName = modelInstances[0]?.original_name || modelName;
                            const uniqueEndpoints = Array.from(new Set(endpoints));
                            const endpointsData = encodeURIComponent(JSON.stringify(uniqueEndpoints));
                            return `<tr data-model="${modelName}" data-endpoints="${endpointsData}">
-                                <td class="model">${modelName} <a href="#" class="stats-link" data-model="${modelName}">stats</a></td>
+                                <td class="model">${modelName} <a href="#" class="stats-link" data-model="${originalName}">stats</a></td>
                                <td>${renderInstanceList(endpoints)}</td>
                                <td>${params}</td>
                                <td>${quant}</td>