Merge pull request #25 from nomyo-ai/dev-v0.6

- updated reasoning handling
- improved model and error caches
- fixed openai tool calling incl. ollama translations
- direct support for llama.cpp's llama_server via llama_server_endpoint config
- basic llama_server model info in dashboard
- improved endpoint info fetching behaviour in error cases
This commit is contained in:
Alpha Nerd 2026-02-13 10:34:42 +01:00 committed by GitHub
commit 9ef1b770ba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 528 additions and 164 deletions

View file

@ -1,40 +1,32 @@
# Sample NOMYO Router Configuration
# Basic single endpoint configuration
# config.yaml
# Ollama endpoints
endpoints:
- http://localhost:11434
- http://192.168.0.50:11434
- http://192.168.0.51:11434
- http://192.168.0.52:11434
# External OpenAI-compatible endpoints (will NOT be queried for /api/ps /api/ps_details)
- https://api.openai.com/v1
# llama-server endpoints (OpenAI-compatible with /v1/models status info)
# These endpoints will be queried for /api/tags, /api/ps, /api/ps_details
# and included in the model selection pool for inference routing
llama_server_endpoints:
- http://localhost:8080/v1
- http://localhost:8081/v1
# Maximum concurrent connections *per endpointmodel pair* (equals to OLLAMA_NUM_PARALLEL)
max_concurrent_connections: 2
# Optional router-level API key to secure the router and dashboard (leave blank to disable)
# Optional router-level API key that gates router/API/web UI access (leave empty to disable)
nomyo-router-api-key: ""
# Multi-endpoint configuration with local Ollama instances
# endpoints:
# - http://ollama-worker1:11434
# - http://ollama-worker2:11434
# - http://ollama-worker3:11434
# Mixed configuration with Ollama and OpenAI endpoints
# endpoints:
# - http://localhost:11434
# - https://api.openai.com/v1
# API keys for remote endpoints
# Use ${VAR_NAME} syntax to reference environment variables
# Set an environment variable like OPENAI_KEY
# Confirm endpoints are exactly as in endpoints block
api_keys:
# Local Ollama instances typically don't require authentication
"http://localhost:11434": "ollama"
# Remote Ollama instances
# "http://remote-ollama:11434": "ollama"
# OpenAI API
# "https://api.openai.com/v1": "${OPENAI_KEY}"
# Anthropic API
# "https://api.anthropic.com/v1": "${ANTHROPIC_KEY}"
# Other OpenAI-compatible endpoints
# "https://api.mistral.ai/v1": "${MISTRAL_KEY}"
"http://192.168.0.50:11434": "ollama"
"http://192.168.0.51:11434": "ollama"
"http://192.168.0.52:11434": "ollama"
"https://api.openai.com/v1": "${OPENAI_KEY}"
"http://localhost:8080/v1": "llama-server" # Optional API key for llama-server
"http://localhost:8081/v1": "llama-server"

601
router.py

File diff suppressed because it is too large Load diff

View file

@ -379,7 +379,7 @@
<th>Quant</th>
<th>Ctx</th>
<th>Size</th>
<th>Until</th>
<th>Unload</th>
<th>Digest</th>
<th>Tokens</th>
</tr>
@ -683,7 +683,12 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
showApiKeyModal("Enter the NOMYO Router API key to load the dashboard.");
}
const body = document.getElementById("endpoints-body");
body.innerHTML = data.endpoints
// Build HTML for both endpoints and llama_server_endpoints
let html = "";
// Add Ollama endpoints
html += data.endpoints
.map((e) => {
const statusClass =
e.status === "ok"
@ -698,6 +703,27 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
</tr>`;
})
.join("");
// Add llama-server endpoints
if (data.llama_server_endpoints && data.llama_server_endpoints.length > 0) {
html += data.llama_server_endpoints
.map((e) => {
const statusClass =
e.status === "ok"
? "status-ok"
: "status-error";
const version = e.version || "N/A";
return `
<tr>
<td class="endpoint">${e.url}</td>
<td class="status ${statusClass}">${e.status}</td>
<td class="version">${version}</td>
</tr>`;
})
.join("");
}
body.innerHTML = html;
} catch (e) {
console.error(e);
const body = document.getElementById("endpoints-body");
@ -837,7 +863,7 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
const formatUntil = (value) => {
if (value === null || value === undefined || value === "") {
return "Forever";
return "";
}
let targetTime;
@ -898,10 +924,11 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
const params = modelInstances[0]?.details?.parameter_size ?? "";
const quant = modelInstances[0]?.details?.quantization_level ?? "";
const ctx = modelInstances[0]?.context_length ?? "";
const originalName = modelInstances[0]?.original_name || modelName;
const uniqueEndpoints = Array.from(new Set(endpoints));
const endpointsData = encodeURIComponent(JSON.stringify(uniqueEndpoints));
return `<tr data-model="${modelName}" data-endpoints="${endpointsData}">
<td class="model">${modelName} <a href="#" class="stats-link" data-model="${modelName}">stats</a></td>
<td class="model">${modelName} <a href="#" class="stats-link" data-model="${originalName}">stats</a></td>
<td>${renderInstanceList(endpoints)}</td>
<td>${params}</td>
<td>${quant}</td>