fixing token_queue, prepping chart view

This commit is contained in:
Alpha Nerd 2025-11-18 19:02:36 +01:00
parent baf5d98318
commit 541f2826e0
3 changed files with 137 additions and 8 deletions

14
db.py
View file

@ -132,3 +132,17 @@ class TokenDatabase:
'total_tokens': row[4], 'total_tokens': row[4],
'timestamp': row[5] 'timestamp': row[5]
} }
async def get_token_counts_for_model(self, model):
"""Get token counts for a specific model."""
async with aiosqlite.connect(self.db_path) as db:
async with db.execute('SELECT endpoint, model, input_tokens, output_tokens, total_tokens FROM token_counts WHERE model = ?', (model,)) as cursor:
async for row in cursor:
return {
'endpoint': row[0],
'model': row[1],
'input_tokens': row[2],
'output_tokens': row[3],
'total_tokens': row[4]
}
return None

View file

@ -797,6 +797,7 @@ async def proxy(request: Request):
chunk = rechunk.openai_completion2ollama(chunk, stream, start_ts) chunk = rechunk.openai_completion2ollama(chunk, stream, start_ts)
prompt_tok = chunk.prompt_eval_count or 0 prompt_tok = chunk.prompt_eval_count or 0
comp_tok = chunk.eval_count or 0 comp_tok = chunk.eval_count or 0
if prompt_tok != 0 or comp_tok != 0:
await token_queue.put((endpoint, model, prompt_tok, comp_tok)) await token_queue.put((endpoint, model, prompt_tok, comp_tok))
if hasattr(chunk, "model_dump_json"): if hasattr(chunk, "model_dump_json"):
json_line = chunk.model_dump_json() json_line = chunk.model_dump_json()
@ -811,6 +812,7 @@ async def proxy(request: Request):
response = async_gen.model_dump_json() response = async_gen.model_dump_json()
prompt_tok = async_gen.prompt_eval_count or 0 prompt_tok = async_gen.prompt_eval_count or 0
comp_tok = async_gen.eval_count or 0 comp_tok = async_gen.eval_count or 0
if prompt_tok != 0 or comp_tok != 0:
await token_queue.put((endpoint, model, prompt_tok, comp_tok)) await token_queue.put((endpoint, model, prompt_tok, comp_tok))
json_line = ( json_line = (
response response
@ -913,6 +915,7 @@ async def chat_proxy(request: Request):
# `chunk` can be a dict or a pydantic model dump to JSON safely # `chunk` can be a dict or a pydantic model dump to JSON safely
prompt_tok = chunk.prompt_eval_count or 0 prompt_tok = chunk.prompt_eval_count or 0
comp_tok = chunk.eval_count or 0 comp_tok = chunk.eval_count or 0
if prompt_tok != 0 or comp_tok != 0:
await token_queue.put((endpoint, model, prompt_tok, comp_tok)) await token_queue.put((endpoint, model, prompt_tok, comp_tok))
if hasattr(chunk, "model_dump_json"): if hasattr(chunk, "model_dump_json"):
json_line = chunk.model_dump_json() json_line = chunk.model_dump_json()
@ -927,6 +930,7 @@ async def chat_proxy(request: Request):
response = async_gen.model_dump_json() response = async_gen.model_dump_json()
prompt_tok = async_gen.prompt_eval_count or 0 prompt_tok = async_gen.prompt_eval_count or 0
comp_tok = async_gen.eval_count or 0 comp_tok = async_gen.eval_count or 0
if prompt_tok != 0 or comp_tok != 0:
await token_queue.put((endpoint, model, prompt_tok, comp_tok)) await token_queue.put((endpoint, model, prompt_tok, comp_tok))
json_line = ( json_line = (
response response
@ -1159,6 +1163,55 @@ async def show_proxy(request: Request, model: Optional[str] = None):
# 4. Return ShowResponse # 4. Return ShowResponse
return show return show
# -------------------------------------------------------------
# 12. API route Stats
# -------------------------------------------------------------
@app.post("/api/stats")
async def stats_proxy(request: Request, model: Optional[str] = None):
"""
Return token usage statistics for a specific model.
"""
try:
body_bytes = await request.body()
if not model:
payload = orjson.loads(body_bytes.decode("utf-8"))
model = payload.get("model")
if not model:
raise HTTPException(
status_code=400, detail="Missing required field 'model'"
)
except orjson.JSONDecodeError as e:
raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
# Get token counts from database
token_data = await db.get_token_counts_for_model(model)
if not token_data:
raise HTTPException(
status_code=404, detail="No token data found for this model"
)
# Get time series data
time_series = []
async for entry in db.get_latest_time_series(limit=10):
if entry['model'] == model:
time_series.append({
'timestamp': entry['timestamp'],
'input_tokens': entry['input_tokens'],
'output_tokens': entry['output_tokens'],
'total_tokens': entry['total_tokens']
})
return {
'model': model,
'input_tokens': token_data['input_tokens'],
'output_tokens': token_data['output_tokens'],
'total_tokens': token_data['total_tokens'],
'time_series': time_series
}
# ------------------------------------------------------------- # -------------------------------------------------------------
# 12. API route Copy # 12. API route Copy
# ------------------------------------------------------------- # -------------------------------------------------------------
@ -1584,6 +1637,7 @@ async def openai_chat_completions_proxy(request: Request):
else: else:
prompt_tok = async_gen.usage.prompt_tokens or 0 prompt_tok = async_gen.usage.prompt_tokens or 0
comp_tok = async_gen.usage.completion_tokens or 0 comp_tok = async_gen.usage.completion_tokens or 0
if prompt_tok != 0 or comp_tok != 0:
await token_queue.put((endpoint, model, prompt_tok, comp_tok)) await token_queue.put((endpoint, model, prompt_tok, comp_tok))
json_line = ( json_line = (
async_gen.model_dump_json() async_gen.model_dump_json()
@ -1690,6 +1744,7 @@ async def openai_completions_proxy(request: Request):
else: else:
prompt_tok = async_gen.usage.prompt_tokens or 0 prompt_tok = async_gen.usage.prompt_tokens or 0
comp_tok = async_gen.usage.completion_tokens or 0 comp_tok = async_gen.usage.completion_tokens or 0
if prompt_tok != 0 or comp_tok != 0:
await token_queue.put((endpoint, model, prompt_tok, comp_tok)) await token_queue.put((endpoint, model, prompt_tok, comp_tok))
json_line = ( json_line = (
async_gen.model_dump_json() async_gen.model_dump_json()

View file

@ -447,7 +447,7 @@
? `${digest.slice(0, 12)}...${digest.slice(-12)}` ? `${digest.slice(0, 12)}...${digest.slice(-12)}`
: digest; : digest;
return `<tr data-model="${m.name}"> return `<tr data-model="${m.name}">
<td class="model">${m.name}</td> <td class="model">${m.name} <a href="#" class="stats-link" data-model="${m.name}">stats</a></td>
<td>${m.details.parameter_size}</td> <td>${m.details.parameter_size}</td>
<td>${m.details.quantization_level}</td> <td>${m.details.quantization_level}</td>
<td>${m.context_length}</td> <td>${m.context_length}</td>
@ -636,6 +636,56 @@
modal.style.display = "none"; modal.style.display = "none";
} }
}); });
/* stats logic */
document.body.addEventListener("click", async (e) => {
if (!e.target.matches(".stats-link")) return;
e.preventDefault();
const model = e.target.dataset.model;
try {
const resp = await fetch(
`/api/stats?model=${encodeURIComponent(model)}`,
{ method: "POST" },
);
if (!resp.ok)
throw new Error(`Status ${resp.status}`);
const data = await resp.json();
const content = document.getElementById("stats-content");
content.innerHTML = `
<h3>Token Usage</h3>
<p>Input tokens: ${data.input_tokens}</p>
<p>Output tokens: ${data.output_tokens}</p>
<p>Total tokens: ${data.total_tokens}</p>
<h3>Usage Over Time</h3>
<div id="time-series-chart">
${data.time_series.length > 0 ?
data.time_series.map(ts => `
<div>
<strong>${new Date(ts.timestamp * 1000).toLocaleString()}</strong>
<p>Input: ${ts.input_tokens}, Output: ${ts.output_tokens}, Total: ${ts.total_tokens}</p>
</div>
`).join('') :
'<p>No time series data available</p>'
}
</div>
`;
document.getElementById("stats-modal").style.display = "flex";
} catch (err) {
console.error(err);
alert(`Could not load model stats: ${err.message}`);
}
});
/* stats modal close */
const statsModal = document.getElementById("stats-modal");
statsModal.addEventListener("click", (e) => {
if (
e.target === statsModal ||
e.target.matches(".close-btn")
) {
statsModal.style.display = "none";
}
});
}); });
</script> </script>
@ -646,5 +696,15 @@
<pre id="json-output"></pre> <pre id="json-output"></pre>
</div> </div>
</div> </div>
<div id="stats-modal" class="modal">
<div class="modal-content">
<span class="close-btn">&times;</span>
<h2>Model Stats</h2>
<div id="stats-content">
<p>Loading stats...</p>
</div>
</div>
</div>
</body> </body>
</html> </html>