fix(db.py): remove full table scans with proper where clauses for dashboard statistics and calc in db rather than python

This commit is contained in:
Alpha Nerd 2026-03-03 17:20:33 +01:00
parent 45315790d1
commit 8037706f0b
2 changed files with 46 additions and 17 deletions

41
db.py
View file

@ -63,6 +63,7 @@ class TokenDatabase:
) )
''') ''')
await db.execute('CREATE INDEX IF NOT EXISTS idx_token_time_series_timestamp ON token_time_series(timestamp)') await db.execute('CREATE INDEX IF NOT EXISTS idx_token_time_series_timestamp ON token_time_series(timestamp)')
await db.execute('CREATE INDEX IF NOT EXISTS idx_token_time_series_model_ts ON token_time_series(model, timestamp)')
await db.commit() await db.commit()
async def update_token_counts(self, endpoint: str, model: str, input_tokens: int, output_tokens: int): async def update_token_counts(self, endpoint: str, model: str, input_tokens: int, output_tokens: int):
@ -178,6 +179,46 @@ class TokenDatabase:
'timestamp': row[5] 'timestamp': row[5]
} }
async def get_time_series_for_model(self, model: str, limit: int = 50000):
"""Get time series entries for a specific model, newest first.
Uses the (model, timestamp) composite index so the DB does the filtering
instead of returning all rows and discarding non-matching ones in Python.
"""
db = await self._get_connection()
async with self._operation_lock:
async with db.execute('''
SELECT endpoint, input_tokens, output_tokens, total_tokens, timestamp
FROM token_time_series
WHERE model = ?
ORDER BY timestamp DESC
LIMIT ?
''', (model, limit)) as cursor:
async for row in cursor:
yield {
'endpoint': row[0],
'input_tokens': row[1],
'output_tokens': row[2],
'total_tokens': row[3],
'timestamp': row[4],
}
async def get_endpoint_distribution_for_model(self, model: str) -> dict:
"""Return total tokens per endpoint for a specific model as a plain dict.
Computed entirely in SQL so no Python-side aggregation is needed.
"""
db = await self._get_connection()
async with self._operation_lock:
async with db.execute('''
SELECT endpoint, SUM(total_tokens)
FROM token_time_series
WHERE model = ?
GROUP BY endpoint
''', (model,)) as cursor:
rows = await cursor.fetchall()
return {row[0]: row[1] for row in rows}
async def get_token_counts_for_model(self, model): async def get_token_counts_for_model(self, model):
"""Get token counts for a specific model, aggregated across all endpoints.""" """Get token counts for a specific model, aggregated across all endpoints."""
db = await self._get_connection() db = await self._get_connection()

View file

@ -2086,22 +2086,10 @@ async def stats_proxy(request: Request, model: Optional[str] = None):
status_code=404, detail="No token data found for this model" status_code=404, detail="No token data found for this model"
) )
# Get time series data for the last 30 days (43200 minutes = 30 days) time_series = [
# Assuming entries are grouped by minute, 30 days = 43200 entries max entry async for entry in db.get_time_series_for_model(model)
time_series = [] ]
endpoint_totals = defaultdict(int) # Track tokens per endpoint endpoint_distribution = await db.get_endpoint_distribution_for_model(model)
async for entry in db.get_latest_time_series(limit=50000):
if entry['model'] == model:
time_series.append({
'endpoint': entry['endpoint'],
'timestamp': entry['timestamp'],
'input_tokens': entry['input_tokens'],
'output_tokens': entry['output_tokens'],
'total_tokens': entry['total_tokens']
})
# Accumulate total tokens per endpoint
endpoint_totals[entry['endpoint']] += entry['total_tokens']
return { return {
'model': model, 'model': model,
@ -2109,7 +2097,7 @@ async def stats_proxy(request: Request, model: Optional[str] = None):
'output_tokens': token_data['output_tokens'], 'output_tokens': token_data['output_tokens'],
'total_tokens': token_data['total_tokens'], 'total_tokens': token_data['total_tokens'],
'time_series': time_series, 'time_series': time_series,
'endpoint_distribution': dict(endpoint_totals) 'endpoint_distribution': endpoint_distribution,
} }
# ------------------------------------------------------------- # -------------------------------------------------------------