mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
feat: implement token usage tracking for LLM calls with new accumulator and callback
This commit is contained in:
parent
917f35eb33
commit
3cfe53fb7f
6 changed files with 223 additions and 4 deletions
|
|
@ -1170,6 +1170,10 @@ async def stream_new_chat(
|
|||
_t_total = time.perf_counter()
|
||||
log_system_snapshot("stream_new_chat_START")
|
||||
|
||||
from app.services.token_tracking_service import start_turn
|
||||
|
||||
accumulator = start_turn()
|
||||
|
||||
session = async_session_maker()
|
||||
try:
|
||||
# Mark AI as responding to this user for live collaboration
|
||||
|
|
@ -1527,6 +1531,17 @@ async def stream_new_chat(
|
|||
if stream_result.is_interrupted:
|
||||
if title_task is not None and not title_task.done():
|
||||
title_task.cancel()
|
||||
|
||||
usage_summary = accumulator.per_message_summary()
|
||||
if usage_summary:
|
||||
yield streaming_service.format_data("token-usage", {
|
||||
"usage": usage_summary,
|
||||
"prompt_tokens": accumulator.total_prompt_tokens,
|
||||
"completion_tokens": accumulator.total_completion_tokens,
|
||||
"total_tokens": accumulator.grand_total,
|
||||
"call_details": accumulator.serialized_calls(),
|
||||
})
|
||||
|
||||
yield streaming_service.format_finish_step()
|
||||
yield streaming_service.format_finish()
|
||||
yield streaming_service.format_done()
|
||||
|
|
@ -1548,6 +1563,16 @@ async def stream_new_chat(
|
|||
chat_id, generated_title
|
||||
)
|
||||
|
||||
usage_summary = accumulator.per_message_summary()
|
||||
if usage_summary:
|
||||
yield streaming_service.format_data("token-usage", {
|
||||
"usage": usage_summary,
|
||||
"prompt_tokens": accumulator.total_prompt_tokens,
|
||||
"completion_tokens": accumulator.total_completion_tokens,
|
||||
"total_tokens": accumulator.grand_total,
|
||||
"call_details": accumulator.serialized_calls(),
|
||||
})
|
||||
|
||||
# Fire background memory extraction if the agent didn't handle it.
|
||||
# Shared threads write to team memory; private threads write to user memory.
|
||||
if not stream_result.agent_called_update_memory:
|
||||
|
|
@ -1646,6 +1671,10 @@ async def stream_resume_chat(
|
|||
stream_result = StreamResult()
|
||||
_t_total = time.perf_counter()
|
||||
|
||||
from app.services.token_tracking_service import start_turn
|
||||
|
||||
accumulator = start_turn()
|
||||
|
||||
session = async_session_maker()
|
||||
try:
|
||||
if user_id:
|
||||
|
|
@ -1769,11 +1798,31 @@ async def stream_resume_chat(
|
|||
chat_id,
|
||||
)
|
||||
if stream_result.is_interrupted:
|
||||
usage_summary = accumulator.per_message_summary()
|
||||
if usage_summary:
|
||||
yield streaming_service.format_data("token-usage", {
|
||||
"usage": usage_summary,
|
||||
"prompt_tokens": accumulator.total_prompt_tokens,
|
||||
"completion_tokens": accumulator.total_completion_tokens,
|
||||
"total_tokens": accumulator.grand_total,
|
||||
"call_details": accumulator.serialized_calls(),
|
||||
})
|
||||
|
||||
yield streaming_service.format_finish_step()
|
||||
yield streaming_service.format_finish()
|
||||
yield streaming_service.format_done()
|
||||
return
|
||||
|
||||
usage_summary = accumulator.per_message_summary()
|
||||
if usage_summary:
|
||||
yield streaming_service.format_data("token-usage", {
|
||||
"usage": usage_summary,
|
||||
"prompt_tokens": accumulator.total_prompt_tokens,
|
||||
"completion_tokens": accumulator.total_completion_tokens,
|
||||
"total_tokens": accumulator.grand_total,
|
||||
"call_details": accumulator.serialized_calls(),
|
||||
})
|
||||
|
||||
yield streaming_service.format_finish_step()
|
||||
yield streaming_service.format_finish()
|
||||
yield streaming_service.format_done()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue