Merge pull request #25 from nomyo-ai/dev-v0.6
- updated reasoning handling - improved model and error caches - fixed openai tool calling incl. ollama translations - direct support for llama.cpp's llama_server via llama_server_endpoint config - basic llama_server model info in dashboard - improved endpoint info fetching behaviour in error cases
This commit is contained in:
commit
9ef1b770ba
3 changed files with 528 additions and 164 deletions
|
|
@ -1,40 +1,32 @@
|
|||
# Sample NOMYO Router Configuration
|
||||
|
||||
# Basic single endpoint configuration
|
||||
# config.yaml
|
||||
# Ollama endpoints
|
||||
endpoints:
|
||||
- http://localhost:11434
|
||||
- http://192.168.0.50:11434
|
||||
- http://192.168.0.51:11434
|
||||
- http://192.168.0.52:11434
|
||||
# External OpenAI-compatible endpoints (will NOT be queried for /api/ps /api/ps_details)
|
||||
- https://api.openai.com/v1
|
||||
|
||||
# llama-server endpoints (OpenAI-compatible with /v1/models status info)
|
||||
# These endpoints will be queried for /api/tags, /api/ps, /api/ps_details
|
||||
# and included in the model selection pool for inference routing
|
||||
llama_server_endpoints:
|
||||
- http://localhost:8080/v1
|
||||
- http://localhost:8081/v1
|
||||
|
||||
# Maximum concurrent connections *per endpoint‑model pair* (equals to OLLAMA_NUM_PARALLEL)
|
||||
max_concurrent_connections: 2
|
||||
|
||||
# Optional router-level API key to secure the router and dashboard (leave blank to disable)
|
||||
# Optional router-level API key that gates router/API/web UI access (leave empty to disable)
|
||||
nomyo-router-api-key: ""
|
||||
|
||||
# Multi-endpoint configuration with local Ollama instances
|
||||
# endpoints:
|
||||
# - http://ollama-worker1:11434
|
||||
# - http://ollama-worker2:11434
|
||||
# - http://ollama-worker3:11434
|
||||
|
||||
# Mixed configuration with Ollama and OpenAI endpoints
|
||||
# endpoints:
|
||||
# - http://localhost:11434
|
||||
# - https://api.openai.com/v1
|
||||
|
||||
|
||||
# API keys for remote endpoints
|
||||
# Use ${VAR_NAME} syntax to reference environment variables
|
||||
# Set an environment variable like OPENAI_KEY
|
||||
# Confirm endpoints are exactly as in endpoints block
|
||||
api_keys:
|
||||
# Local Ollama instances typically don't require authentication
|
||||
"http://localhost:11434": "ollama"
|
||||
|
||||
# Remote Ollama instances
|
||||
# "http://remote-ollama:11434": "ollama"
|
||||
|
||||
# OpenAI API
|
||||
# "https://api.openai.com/v1": "${OPENAI_KEY}"
|
||||
|
||||
# Anthropic API
|
||||
# "https://api.anthropic.com/v1": "${ANTHROPIC_KEY}"
|
||||
|
||||
# Other OpenAI-compatible endpoints
|
||||
# "https://api.mistral.ai/v1": "${MISTRAL_KEY}"
|
||||
"http://192.168.0.50:11434": "ollama"
|
||||
"http://192.168.0.51:11434": "ollama"
|
||||
"http://192.168.0.52:11434": "ollama"
|
||||
"https://api.openai.com/v1": "${OPENAI_KEY}"
|
||||
"http://localhost:8080/v1": "llama-server" # Optional API key for llama-server
|
||||
"http://localhost:8081/v1": "llama-server"
|
||||
|
|
@ -379,7 +379,7 @@
|
|||
<th>Quant</th>
|
||||
<th>Ctx</th>
|
||||
<th>Size</th>
|
||||
<th>Until</th>
|
||||
<th>Unload</th>
|
||||
<th>Digest</th>
|
||||
<th>Tokens</th>
|
||||
</tr>
|
||||
|
|
@ -683,7 +683,12 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
|
|||
showApiKeyModal("Enter the NOMYO Router API key to load the dashboard.");
|
||||
}
|
||||
const body = document.getElementById("endpoints-body");
|
||||
body.innerHTML = data.endpoints
|
||||
|
||||
// Build HTML for both endpoints and llama_server_endpoints
|
||||
let html = "";
|
||||
|
||||
// Add Ollama endpoints
|
||||
html += data.endpoints
|
||||
.map((e) => {
|
||||
const statusClass =
|
||||
e.status === "ok"
|
||||
|
|
@ -698,6 +703,27 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
|
|||
</tr>`;
|
||||
})
|
||||
.join("");
|
||||
|
||||
// Add llama-server endpoints
|
||||
if (data.llama_server_endpoints && data.llama_server_endpoints.length > 0) {
|
||||
html += data.llama_server_endpoints
|
||||
.map((e) => {
|
||||
const statusClass =
|
||||
e.status === "ok"
|
||||
? "status-ok"
|
||||
: "status-error";
|
||||
const version = e.version || "N/A";
|
||||
return `
|
||||
<tr>
|
||||
<td class="endpoint">${e.url}</td>
|
||||
<td class="status ${statusClass}">${e.status}</td>
|
||||
<td class="version">${version}</td>
|
||||
</tr>`;
|
||||
})
|
||||
.join("");
|
||||
}
|
||||
|
||||
body.innerHTML = html;
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
const body = document.getElementById("endpoints-body");
|
||||
|
|
@ -837,7 +863,7 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
|
|||
|
||||
const formatUntil = (value) => {
|
||||
if (value === null || value === undefined || value === "") {
|
||||
return "Forever";
|
||||
return "∞";
|
||||
}
|
||||
|
||||
let targetTime;
|
||||
|
|
@ -898,10 +924,11 @@ function renderTimeSeriesChart(timeSeriesData, chart, minutes) {
|
|||
const params = modelInstances[0]?.details?.parameter_size ?? "";
|
||||
const quant = modelInstances[0]?.details?.quantization_level ?? "";
|
||||
const ctx = modelInstances[0]?.context_length ?? "";
|
||||
const originalName = modelInstances[0]?.original_name || modelName;
|
||||
const uniqueEndpoints = Array.from(new Set(endpoints));
|
||||
const endpointsData = encodeURIComponent(JSON.stringify(uniqueEndpoints));
|
||||
return `<tr data-model="${modelName}" data-endpoints="${endpointsData}">
|
||||
<td class="model">${modelName} <a href="#" class="stats-link" data-model="${modelName}">stats</a></td>
|
||||
<td class="model">${modelName} <a href="#" class="stats-link" data-model="${originalName}">stats</a></td>
|
||||
<td>${renderInstanceList(endpoints)}</td>
|
||||
<td>${params}</td>
|
||||
<td>${quant}</td>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue