fixing token_queue, prepping chart view
This commit is contained in:
parent
baf5d98318
commit
541f2826e0
3 changed files with 137 additions and 8 deletions
14
db.py
14
db.py
|
|
@ -132,3 +132,17 @@ class TokenDatabase:
|
||||||
'total_tokens': row[4],
|
'total_tokens': row[4],
|
||||||
'timestamp': row[5]
|
'timestamp': row[5]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def get_token_counts_for_model(self, model):
|
||||||
|
"""Get token counts for a specific model."""
|
||||||
|
async with aiosqlite.connect(self.db_path) as db:
|
||||||
|
async with db.execute('SELECT endpoint, model, input_tokens, output_tokens, total_tokens FROM token_counts WHERE model = ?', (model,)) as cursor:
|
||||||
|
async for row in cursor:
|
||||||
|
return {
|
||||||
|
'endpoint': row[0],
|
||||||
|
'model': row[1],
|
||||||
|
'input_tokens': row[2],
|
||||||
|
'output_tokens': row[3],
|
||||||
|
'total_tokens': row[4]
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
|
|
||||||
67
router.py
67
router.py
|
|
@ -797,7 +797,8 @@ async def proxy(request: Request):
|
||||||
chunk = rechunk.openai_completion2ollama(chunk, stream, start_ts)
|
chunk = rechunk.openai_completion2ollama(chunk, stream, start_ts)
|
||||||
prompt_tok = chunk.prompt_eval_count or 0
|
prompt_tok = chunk.prompt_eval_count or 0
|
||||||
comp_tok = chunk.eval_count or 0
|
comp_tok = chunk.eval_count or 0
|
||||||
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
if prompt_tok != 0 or comp_tok != 0:
|
||||||
|
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
||||||
if hasattr(chunk, "model_dump_json"):
|
if hasattr(chunk, "model_dump_json"):
|
||||||
json_line = chunk.model_dump_json()
|
json_line = chunk.model_dump_json()
|
||||||
else:
|
else:
|
||||||
|
|
@ -811,7 +812,8 @@ async def proxy(request: Request):
|
||||||
response = async_gen.model_dump_json()
|
response = async_gen.model_dump_json()
|
||||||
prompt_tok = async_gen.prompt_eval_count or 0
|
prompt_tok = async_gen.prompt_eval_count or 0
|
||||||
comp_tok = async_gen.eval_count or 0
|
comp_tok = async_gen.eval_count or 0
|
||||||
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
if prompt_tok != 0 or comp_tok != 0:
|
||||||
|
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
||||||
json_line = (
|
json_line = (
|
||||||
response
|
response
|
||||||
if hasattr(async_gen, "model_dump_json")
|
if hasattr(async_gen, "model_dump_json")
|
||||||
|
|
@ -913,7 +915,8 @@ async def chat_proxy(request: Request):
|
||||||
# `chunk` can be a dict or a pydantic model – dump to JSON safely
|
# `chunk` can be a dict or a pydantic model – dump to JSON safely
|
||||||
prompt_tok = chunk.prompt_eval_count or 0
|
prompt_tok = chunk.prompt_eval_count or 0
|
||||||
comp_tok = chunk.eval_count or 0
|
comp_tok = chunk.eval_count or 0
|
||||||
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
if prompt_tok != 0 or comp_tok != 0:
|
||||||
|
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
||||||
if hasattr(chunk, "model_dump_json"):
|
if hasattr(chunk, "model_dump_json"):
|
||||||
json_line = chunk.model_dump_json()
|
json_line = chunk.model_dump_json()
|
||||||
else:
|
else:
|
||||||
|
|
@ -927,7 +930,8 @@ async def chat_proxy(request: Request):
|
||||||
response = async_gen.model_dump_json()
|
response = async_gen.model_dump_json()
|
||||||
prompt_tok = async_gen.prompt_eval_count or 0
|
prompt_tok = async_gen.prompt_eval_count or 0
|
||||||
comp_tok = async_gen.eval_count or 0
|
comp_tok = async_gen.eval_count or 0
|
||||||
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
if prompt_tok != 0 or comp_tok != 0:
|
||||||
|
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
||||||
json_line = (
|
json_line = (
|
||||||
response
|
response
|
||||||
if hasattr(async_gen, "model_dump_json")
|
if hasattr(async_gen, "model_dump_json")
|
||||||
|
|
@ -1159,6 +1163,55 @@ async def show_proxy(request: Request, model: Optional[str] = None):
|
||||||
# 4. Return ShowResponse
|
# 4. Return ShowResponse
|
||||||
return show
|
return show
|
||||||
|
|
||||||
|
# -------------------------------------------------------------
|
||||||
|
# 12. API route – Stats
|
||||||
|
# -------------------------------------------------------------
|
||||||
|
@app.post("/api/stats")
|
||||||
|
async def stats_proxy(request: Request, model: Optional[str] = None):
|
||||||
|
"""
|
||||||
|
Return token usage statistics for a specific model.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
body_bytes = await request.body()
|
||||||
|
|
||||||
|
if not model:
|
||||||
|
payload = orjson.loads(body_bytes.decode("utf-8"))
|
||||||
|
model = payload.get("model")
|
||||||
|
|
||||||
|
if not model:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400, detail="Missing required field 'model'"
|
||||||
|
)
|
||||||
|
except orjson.JSONDecodeError as e:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e
|
||||||
|
|
||||||
|
# Get token counts from database
|
||||||
|
token_data = await db.get_token_counts_for_model(model)
|
||||||
|
|
||||||
|
if not token_data:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404, detail="No token data found for this model"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get time series data
|
||||||
|
time_series = []
|
||||||
|
async for entry in db.get_latest_time_series(limit=10):
|
||||||
|
if entry['model'] == model:
|
||||||
|
time_series.append({
|
||||||
|
'timestamp': entry['timestamp'],
|
||||||
|
'input_tokens': entry['input_tokens'],
|
||||||
|
'output_tokens': entry['output_tokens'],
|
||||||
|
'total_tokens': entry['total_tokens']
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'model': model,
|
||||||
|
'input_tokens': token_data['input_tokens'],
|
||||||
|
'output_tokens': token_data['output_tokens'],
|
||||||
|
'total_tokens': token_data['total_tokens'],
|
||||||
|
'time_series': time_series
|
||||||
|
}
|
||||||
|
|
||||||
# -------------------------------------------------------------
|
# -------------------------------------------------------------
|
||||||
# 12. API route – Copy
|
# 12. API route – Copy
|
||||||
# -------------------------------------------------------------
|
# -------------------------------------------------------------
|
||||||
|
|
@ -1584,7 +1637,8 @@ async def openai_chat_completions_proxy(request: Request):
|
||||||
else:
|
else:
|
||||||
prompt_tok = async_gen.usage.prompt_tokens or 0
|
prompt_tok = async_gen.usage.prompt_tokens or 0
|
||||||
comp_tok = async_gen.usage.completion_tokens or 0
|
comp_tok = async_gen.usage.completion_tokens or 0
|
||||||
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
if prompt_tok != 0 or comp_tok != 0:
|
||||||
|
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
||||||
json_line = (
|
json_line = (
|
||||||
async_gen.model_dump_json()
|
async_gen.model_dump_json()
|
||||||
if hasattr(async_gen, "model_dump_json")
|
if hasattr(async_gen, "model_dump_json")
|
||||||
|
|
@ -1690,7 +1744,8 @@ async def openai_completions_proxy(request: Request):
|
||||||
else:
|
else:
|
||||||
prompt_tok = async_gen.usage.prompt_tokens or 0
|
prompt_tok = async_gen.usage.prompt_tokens or 0
|
||||||
comp_tok = async_gen.usage.completion_tokens or 0
|
comp_tok = async_gen.usage.completion_tokens or 0
|
||||||
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
if prompt_tok != 0 or comp_tok != 0:
|
||||||
|
await token_queue.put((endpoint, model, prompt_tok, comp_tok))
|
||||||
json_line = (
|
json_line = (
|
||||||
async_gen.model_dump_json()
|
async_gen.model_dump_json()
|
||||||
if hasattr(async_gen, "model_dump_json")
|
if hasattr(async_gen, "model_dump_json")
|
||||||
|
|
|
||||||
|
|
@ -447,7 +447,7 @@
|
||||||
? `${digest.slice(0, 12)}...${digest.slice(-12)}`
|
? `${digest.slice(0, 12)}...${digest.slice(-12)}`
|
||||||
: digest;
|
: digest;
|
||||||
return `<tr data-model="${m.name}">
|
return `<tr data-model="${m.name}">
|
||||||
<td class="model">${m.name}</td>
|
<td class="model">${m.name} <a href="#" class="stats-link" data-model="${m.name}">stats</a></td>
|
||||||
<td>${m.details.parameter_size}</td>
|
<td>${m.details.parameter_size}</td>
|
||||||
<td>${m.details.quantization_level}</td>
|
<td>${m.details.quantization_level}</td>
|
||||||
<td>${m.context_length}</td>
|
<td>${m.context_length}</td>
|
||||||
|
|
@ -636,6 +636,56 @@
|
||||||
modal.style.display = "none";
|
modal.style.display = "none";
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/* stats logic */
|
||||||
|
document.body.addEventListener("click", async (e) => {
|
||||||
|
if (!e.target.matches(".stats-link")) return;
|
||||||
|
e.preventDefault();
|
||||||
|
const model = e.target.dataset.model;
|
||||||
|
try {
|
||||||
|
const resp = await fetch(
|
||||||
|
`/api/stats?model=${encodeURIComponent(model)}`,
|
||||||
|
{ method: "POST" },
|
||||||
|
);
|
||||||
|
if (!resp.ok)
|
||||||
|
throw new Error(`Status ${resp.status}`);
|
||||||
|
const data = await resp.json();
|
||||||
|
const content = document.getElementById("stats-content");
|
||||||
|
content.innerHTML = `
|
||||||
|
<h3>Token Usage</h3>
|
||||||
|
<p>Input tokens: ${data.input_tokens}</p>
|
||||||
|
<p>Output tokens: ${data.output_tokens}</p>
|
||||||
|
<p>Total tokens: ${data.total_tokens}</p>
|
||||||
|
<h3>Usage Over Time</h3>
|
||||||
|
<div id="time-series-chart">
|
||||||
|
${data.time_series.length > 0 ?
|
||||||
|
data.time_series.map(ts => `
|
||||||
|
<div>
|
||||||
|
<strong>${new Date(ts.timestamp * 1000).toLocaleString()}</strong>
|
||||||
|
<p>Input: ${ts.input_tokens}, Output: ${ts.output_tokens}, Total: ${ts.total_tokens}</p>
|
||||||
|
</div>
|
||||||
|
`).join('') :
|
||||||
|
'<p>No time series data available</p>'
|
||||||
|
}
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
document.getElementById("stats-modal").style.display = "flex";
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
alert(`Could not load model stats: ${err.message}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/* stats modal close */
|
||||||
|
const statsModal = document.getElementById("stats-modal");
|
||||||
|
statsModal.addEventListener("click", (e) => {
|
||||||
|
if (
|
||||||
|
e.target === statsModal ||
|
||||||
|
e.target.matches(".close-btn")
|
||||||
|
) {
|
||||||
|
statsModal.style.display = "none";
|
||||||
|
}
|
||||||
|
});
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
|
@ -646,5 +696,15 @@
|
||||||
<pre id="json-output"></pre>
|
<pre id="json-output"></pre>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div id="stats-modal" class="modal">
|
||||||
|
<div class="modal-content">
|
||||||
|
<span class="close-btn">×</span>
|
||||||
|
<h2>Model Stats</h2>
|
||||||
|
<div id="stats-content">
|
||||||
|
<p>Loading stats...</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue