From 917f35eb333dcf8d7aafe137aed45c3446449c60 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:26:53 +0530 Subject: [PATCH 01/14] feat: add token_usage table and relationships for tracking LLM token consumption --- .../versions/124_add_token_usage_table.py | 81 +++++++++++++++++++ surfsense_backend/app/db.py | 62 ++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 surfsense_backend/alembic/versions/124_add_token_usage_table.py diff --git a/surfsense_backend/alembic/versions/124_add_token_usage_table.py b/surfsense_backend/alembic/versions/124_add_token_usage_table.py new file mode 100644 index 000000000..2c88e1f51 --- /dev/null +++ b/surfsense_backend/alembic/versions/124_add_token_usage_table.py @@ -0,0 +1,81 @@ +"""124_add_token_usage_table + +Revision ID: 124 +Revises: 123 +Create Date: 2026-04-14 + +Adds token_usage table for tracking LLM token consumption per message. +Supports future extension via usage_type for indexing, image gen, etc. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import JSONB, UUID + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "124" +down_revision: str | None = "123" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + conn = op.get_bind() + if sa.inspect(conn).has_table("token_usage"): + return + + op.create_table( + "token_usage", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("prompt_tokens", sa.Integer(), nullable=False, server_default="0"), + sa.Column("completion_tokens", sa.Integer(), nullable=False, server_default="0"), + sa.Column("total_tokens", sa.Integer(), nullable=False, server_default="0"), + sa.Column("model_breakdown", JSONB, nullable=True), + sa.Column("call_details", JSONB, nullable=True), + sa.Column("usage_type", sa.String(50), nullable=False, server_default="chat"), + sa.Column( + "thread_id", + sa.Integer(), + sa.ForeignKey("new_chat_threads.id", ondelete="CASCADE"), + nullable=True, + ), + sa.Column( + "message_id", + sa.Integer(), + sa.ForeignKey("new_chat_messages.id", ondelete="SET NULL"), + nullable=True, + ), + sa.Column( + "search_space_id", + sa.Integer(), + sa.ForeignKey("searchspaces.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "user_id", + UUID(as_uuid=True), + sa.ForeignKey("user.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + nullable=False, + server_default=sa.func.now(), + ), + ) + + op.create_index("ix_token_usage_thread_id", "token_usage", ["thread_id"]) + op.create_index("ix_token_usage_message_id", "token_usage", ["message_id"]) + op.create_index("ix_token_usage_search_space_id", "token_usage", ["search_space_id"]) + op.create_index("ix_token_usage_user_id", "token_usage", ["user_id"]) + op.create_index("ix_token_usage_usage_type", "token_usage", ["usage_type"]) + + +def downgrade() -> None: + op.drop_table("token_usage") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index e69d28ac2..b9fbe8845 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -647,6 +647,11 @@ class NewChatThread(BaseModel, TimestampMixin): cascade="all, delete-orphan", foreign_keys="[PublicChatSnapshot.thread_id]", ) + token_usages = relationship( + "TokenUsage", + back_populates="thread", + cascade="all, delete-orphan", + ) class NewChatMessage(BaseModel, TimestampMixin): @@ -685,6 +690,63 @@ class NewChatMessage(BaseModel, TimestampMixin): back_populates="message", cascade="all, delete-orphan", ) + token_usage = relationship( + "TokenUsage", + back_populates="message", + uselist=False, + cascade="all, delete-orphan", + ) + + +class TokenUsage(BaseModel, TimestampMixin): + """ + Tracks LLM token consumption per assistant turn. + + One row per usage event. For chat, linked to a specific message via message_id. + The usage_type column enables future extension to track non-chat usage + (indexing, image generation, podcasts, etc.) without schema changes. + """ + + __tablename__ = "token_usage" + + prompt_tokens = Column(Integer, nullable=False, default=0) + completion_tokens = Column(Integer, nullable=False, default=0) + total_tokens = Column(Integer, nullable=False, default=0) + model_breakdown = Column(JSONB, nullable=True) + call_details = Column(JSONB, nullable=True) + + usage_type = Column(String(50), nullable=False, default="chat", index=True) + + thread_id = Column( + Integer, + ForeignKey("new_chat_threads.id", ondelete="CASCADE"), + nullable=True, + index=True, + ) + message_id = Column( + Integer, + ForeignKey("new_chat_messages.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + search_space_id = Column( + Integer, + ForeignKey("searchspaces.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + user_id = Column( + UUID(as_uuid=True), + ForeignKey("user.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + + # Relationships + thread = relationship("NewChatThread", back_populates="token_usages") + message = relationship("NewChatMessage", back_populates="token_usage") + search_space = relationship("SearchSpace") + user = relationship("User") class PublicChatSnapshot(BaseModel, TimestampMixin): From 3cfe53fb7f1ba327edbfdbbeeeef59fb833546de Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:40:32 +0530 Subject: [PATCH 02/14] feat: implement token usage tracking for LLM calls with new accumulator and callback --- .../app/routes/new_chat_routes.py | 34 ++++- surfsense_backend/app/schemas/new_chat.py | 9 ++ .../app/services/llm_router_service.py | 1 + surfsense_backend/app/services/llm_service.py | 5 +- .../app/services/token_tracking_service.py | 129 ++++++++++++++++++ .../app/tasks/chat/stream_new_chat.py | 49 +++++++ 6 files changed, 223 insertions(+), 4 deletions(-) create mode 100644 surfsense_backend/app/services/token_tracking_service.py diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 10a6951fa..a5245456e 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -30,6 +30,7 @@ from app.db import ( NewChatThread, Permission, SearchSpace, + TokenUsage, User, get_async_session, shielded_async_session, @@ -45,6 +46,7 @@ from app.schemas.new_chat import ( NewChatThreadWithMessages, PublicChatSnapshotCreateResponse, PublicChatSnapshotListResponse, + TokenUsageSummary, RegenerateRequest, ResumeRequest, ThreadHistoryLoadResponse, @@ -473,10 +475,13 @@ async def get_thread_messages( # Check thread-level access based on visibility await check_thread_access(session, thread, user) - # Get messages with their authors loaded + # Get messages with their authors and token usage loaded messages_result = await session.execute( select(NewChatMessage) - .options(selectinload(NewChatMessage.author)) + .options( + selectinload(NewChatMessage.author), + selectinload(NewChatMessage.token_usage), + ) .filter(NewChatMessage.thread_id == thread_id) .order_by(NewChatMessage.created_at) ) @@ -493,6 +498,7 @@ async def get_thread_messages( author_id=msg.author_id, author_display_name=msg.author.display_name if msg.author else None, author_avatar_url=msg.author.avatar_url if msg.author else None, + token_usage=TokenUsageSummary.model_validate(msg.token_usage) if msg.token_usage else None, ) for msg in db_messages ] @@ -530,7 +536,11 @@ async def get_thread_full( try: result = await session.execute( select(NewChatThread) - .options(selectinload(NewChatThread.messages)) + .options( + selectinload(NewChatThread.messages).selectinload( + NewChatMessage.token_usage + ), + ) .filter(NewChatThread.id == thread_id) ) thread = result.scalars().first() @@ -935,6 +945,24 @@ async def append_message( # flush assigns the PK/defaults without a round-trip SELECT await session.flush() + + # Persist token usage if provided (for assistant messages) + token_usage_data = raw_body.get("token_usage") + if token_usage_data and message_role == NewChatMessageRole.ASSISTANT: + token_usage_record = TokenUsage( + prompt_tokens=token_usage_data.get("prompt_tokens", 0), + completion_tokens=token_usage_data.get("completion_tokens", 0), + total_tokens=token_usage_data.get("total_tokens", 0), + model_breakdown=token_usage_data.get("usage"), + call_details=token_usage_data.get("call_details"), + usage_type="chat", + thread_id=thread_id, + message_id=db_message.id, + search_space_id=thread.search_space_id, + user_id=user.id, + ) + session.add(token_usage_record) + await session.commit() # Return the in-memory object (already has id from flush) instead of diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index 5d8ae207e..e523657a4 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -34,6 +34,14 @@ class NewChatMessageCreate(NewChatMessageBase): thread_id: int +class TokenUsageSummary(BaseModel): + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + model_breakdown: dict | None = None + model_config = ConfigDict(from_attributes=True) + + class NewChatMessageRead(NewChatMessageBase, IDModel, TimestampModel): """Schema for reading a message.""" @@ -41,6 +49,7 @@ class NewChatMessageRead(NewChatMessageBase, IDModel, TimestampModel): author_id: UUID | None = None author_display_name: str | None = None author_avatar_url: str | None = None + token_usage: TokenUsageSummary | None = None model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py index 63d8d10b9..d97665f7a 100644 --- a/surfsense_backend/app/services/llm_router_service.py +++ b/surfsense_backend/app/services/llm_router_service.py @@ -970,6 +970,7 @@ class ChatLiteLLMRouter(BaseChatModel): messages=formatted_messages, stop=stop, stream=True, + stream_options={"include_usage": True}, **call_kwargs, ) except ContextWindowExceededError as e: diff --git a/surfsense_backend/app/services/llm_service.py b/surfsense_backend/app/services/llm_service.py index 723b17607..c90bdfce3 100644 --- a/surfsense_backend/app/services/llm_service.py +++ b/surfsense_backend/app/services/llm_service.py @@ -22,10 +22,13 @@ litellm.drop_params = True # Memory controls: prevent unbounded internal accumulation litellm.telemetry = False litellm.cache = None -litellm.success_callback = [] litellm.failure_callback = [] litellm.input_callback = [] +from app.services.token_tracking_service import token_tracker + +litellm.callbacks = [token_tracker] + logger = logging.getLogger(__name__) diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py new file mode 100644 index 000000000..434a55ae0 --- /dev/null +++ b/surfsense_backend/app/services/token_tracking_service.py @@ -0,0 +1,129 @@ +""" +Token usage tracking via LiteLLM custom callback. + +Uses a ContextVar-scoped accumulator to group all LLM calls within a single +async request/turn. The accumulated data is emitted via SSE and persisted +when the frontend calls appendMessage. +""" + +from __future__ import annotations + +import dataclasses +import logging +from contextvars import ContextVar +from dataclasses import dataclass, field +from typing import Any + +from litellm.integrations.custom_logger import CustomLogger + +logger = logging.getLogger(__name__) + + +@dataclass +class TokenCallRecord: + model: str + prompt_tokens: int + completion_tokens: int + total_tokens: int + + +@dataclass +class TurnTokenAccumulator: + """Accumulates token usage across all LLM calls within a single user turn.""" + + calls: list[TokenCallRecord] = field(default_factory=list) + + def add( + self, + model: str, + prompt_tokens: int, + completion_tokens: int, + total_tokens: int, + ) -> None: + self.calls.append( + TokenCallRecord( + model=model, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + ) + ) + + def per_message_summary(self) -> dict[str, dict[str, int]]: + """Return token counts grouped by model name.""" + by_model: dict[str, dict[str, int]] = {} + for c in self.calls: + entry = by_model.setdefault( + c.model, + {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}, + ) + entry["prompt_tokens"] += c.prompt_tokens + entry["completion_tokens"] += c.completion_tokens + entry["total_tokens"] += c.total_tokens + return by_model + + @property + def grand_total(self) -> int: + return sum(c.total_tokens for c in self.calls) + + @property + def total_prompt_tokens(self) -> int: + return sum(c.prompt_tokens for c in self.calls) + + @property + def total_completion_tokens(self) -> int: + return sum(c.completion_tokens for c in self.calls) + + def serialized_calls(self) -> list[dict[str, Any]]: + return [dataclasses.asdict(c) for c in self.calls] + + +_turn_accumulator: ContextVar[TurnTokenAccumulator | None] = ContextVar( + "_turn_accumulator", default=None +) + + +def start_turn() -> TurnTokenAccumulator: + """Create a fresh accumulator for the current async context and return it.""" + acc = TurnTokenAccumulator() + _turn_accumulator.set(acc) + return acc + + +def get_current_accumulator() -> TurnTokenAccumulator | None: + return _turn_accumulator.get() + + +class TokenTrackingCallback(CustomLogger): + """LiteLLM callback that captures token usage into the turn accumulator.""" + + async def async_log_success_event( + self, + kwargs: dict[str, Any], + response_obj: Any, + start_time: Any, + end_time: Any, + ) -> None: + acc = _turn_accumulator.get() + if acc is None: + return + + usage = getattr(response_obj, "usage", None) + if not usage: + return + + prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0 + completion_tokens = getattr(usage, "completion_tokens", 0) or 0 + total_tokens = getattr(usage, "total_tokens", 0) or 0 + + model = kwargs.get("model", "unknown") + + acc.add( + model=model, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + ) + + +token_tracker = TokenTrackingCallback() diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index fd118528e..4459b9c06 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -1170,6 +1170,10 @@ async def stream_new_chat( _t_total = time.perf_counter() log_system_snapshot("stream_new_chat_START") + from app.services.token_tracking_service import start_turn + + accumulator = start_turn() + session = async_session_maker() try: # Mark AI as responding to this user for live collaboration @@ -1527,6 +1531,17 @@ async def stream_new_chat( if stream_result.is_interrupted: if title_task is not None and not title_task.done(): title_task.cancel() + + usage_summary = accumulator.per_message_summary() + if usage_summary: + yield streaming_service.format_data("token-usage", { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }) + yield streaming_service.format_finish_step() yield streaming_service.format_finish() yield streaming_service.format_done() @@ -1548,6 +1563,16 @@ async def stream_new_chat( chat_id, generated_title ) + usage_summary = accumulator.per_message_summary() + if usage_summary: + yield streaming_service.format_data("token-usage", { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }) + # Fire background memory extraction if the agent didn't handle it. # Shared threads write to team memory; private threads write to user memory. if not stream_result.agent_called_update_memory: @@ -1646,6 +1671,10 @@ async def stream_resume_chat( stream_result = StreamResult() _t_total = time.perf_counter() + from app.services.token_tracking_service import start_turn + + accumulator = start_turn() + session = async_session_maker() try: if user_id: @@ -1769,11 +1798,31 @@ async def stream_resume_chat( chat_id, ) if stream_result.is_interrupted: + usage_summary = accumulator.per_message_summary() + if usage_summary: + yield streaming_service.format_data("token-usage", { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }) + yield streaming_service.format_finish_step() yield streaming_service.format_finish() yield streaming_service.format_done() return + usage_summary = accumulator.per_message_summary() + if usage_summary: + yield streaming_service.format_data("token-usage", { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }) + yield streaming_service.format_finish_step() yield streaming_service.format_finish() yield streaming_service.format_done() From 55099a20acebcff99cd0561e9b1e4bea963f2dbe Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:40:46 +0530 Subject: [PATCH 03/14] feat: enhance token usage tracking in chat messages with UI integration and dropdown display --- .../new-chat/[[...chat_id]]/page.tsx | 48 ++++++++++++++ .../assistant-ui/assistant-message.tsx | 62 ++++++++++++++++++- surfsense_web/lib/chat/message-utils.ts | 13 ++-- surfsense_web/lib/chat/streaming-state.ts | 10 +++ surfsense_web/lib/chat/thread-persistence.ts | 12 +++- 5 files changed, 137 insertions(+), 8 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 58eb58f4b..34bf0c09e 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -624,6 +624,7 @@ export default function NewChatPage() { }; const { contentParts, toolCallIndices } = contentPartsState; let wasInterrupted = false; + let tokenUsageData: Record | null = null; // Add placeholder assistant message setMessages((prev) => [ @@ -821,6 +822,10 @@ export default function NewChatPage() { break; } + case "data-token-usage": + tokenUsageData = parsed.data; + break; + case "error": throw new Error(parsed.errorText || "Server error"); } @@ -828,6 +833,16 @@ export default function NewChatPage() { batcher.flush(); + if (tokenUsageData) { + setMessages((prev) => + prev.map((m) => + m.id === assistantMsgId + ? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record ?? {}), usage: tokenUsageData } } } + : m + ) + ); + } + // Skip persistence for interrupted messages -- handleResume will persist the final version const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI); if (contentParts.length > 0 && !wasInterrupted) { @@ -835,6 +850,7 @@ export default function NewChatPage() { const savedMessage = await appendMessage(currentThreadId, { role: "assistant", content: finalContent, + token_usage: tokenUsageData ?? undefined, }); // Update message ID from temporary to database ID so comments work immediately @@ -965,6 +981,7 @@ export default function NewChatPage() { toolCallIndices: new Map(), }; const { contentParts, toolCallIndices } = contentPartsState; + let tokenUsageData: Record | null = null; const existingMsg = messages.find((m) => m.id === assistantMsgId); if (existingMsg && Array.isArray(existingMsg.content)) { @@ -1149,6 +1166,10 @@ export default function NewChatPage() { break; } + case "data-token-usage": + tokenUsageData = parsed.data; + break; + case "error": throw new Error(parsed.errorText || "Server error"); } @@ -1156,12 +1177,23 @@ export default function NewChatPage() { batcher.flush(); + if (tokenUsageData) { + setMessages((prev) => + prev.map((m) => + m.id === assistantMsgId + ? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record ?? {}), usage: tokenUsageData } } } + : m + ) + ); + } + const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI); if (contentParts.length > 0) { try { const savedMessage = await appendMessage(resumeThreadId, { role: "assistant", content: finalContent, + token_usage: tokenUsageData ?? undefined, }); const newMsgId = `msg-${savedMessage.id}`; setMessages((prev) => @@ -1319,6 +1351,7 @@ export default function NewChatPage() { }; const { contentParts, toolCallIndices } = contentPartsState; const batcher = new FrameBatchedUpdater(); + let tokenUsageData: Record | null = null; // Add placeholder messages to UI // Always add back the user message (with new query for edit, or original content for reload) @@ -1428,6 +1461,10 @@ export default function NewChatPage() { break; } + case "data-token-usage": + tokenUsageData = parsed.data; + break; + case "error": throw new Error(parsed.errorText || "Server error"); } @@ -1435,6 +1472,16 @@ export default function NewChatPage() { batcher.flush(); + if (tokenUsageData) { + setMessages((prev) => + prev.map((m) => + m.id === assistantMsgId + ? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record ?? {}), usage: tokenUsageData } } } + : m + ) + ); + } + // Persist messages after streaming completes const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI); if (contentParts.length > 0) { @@ -1459,6 +1506,7 @@ export default function NewChatPage() { const savedMessage = await appendMessage(threadId, { role: "assistant", content: finalContent, + token_usage: tokenUsageData ?? undefined, }); // Update assistant message ID to database ID diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index 764acabba..25a579947 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -15,6 +15,7 @@ import { ExternalLink, Globe, MessageSquare, + MoreHorizontalIcon, RefreshCwIcon, } from "lucide-react"; import dynamic from "next/dynamic"; @@ -39,6 +40,14 @@ import { DrawerHeader, DrawerTitle, } from "@/components/ui/drawer"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuLabel, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu"; +import { Button } from "@/components/ui/button"; import { useComments } from "@/hooks/use-comments"; import { useMediaQuery } from "@/hooks/use-media-query"; import { useElectronAPI } from "@/hooks/use-platform"; @@ -366,6 +375,56 @@ export const MessageError: FC = () => { ); }; +const TokenUsageDropdown: FC = () => { + const usage = useAuiState(({ message }) => { + const custom = message?.metadata?.custom as Record | undefined; + return custom?.usage as Record | undefined; + }); + + if (!usage) return null; + + const totalTokens = (usage.total_tokens as number) ?? 0; + if (totalTokens === 0) return null; + + const modelBreakdown = (usage.usage ?? usage.model_breakdown) as + | Record + | undefined; + + const models = modelBreakdown ? Object.entries(modelBreakdown) : []; + + return ( + + + + + + + Token Usage + + {models.length > 0 ? ( + models.map(([model, counts]) => ( + e.preventDefault()}> + {model} + + {counts.total_tokens.toLocaleString()} tokens + + + )) + ) : ( + e.preventDefault()}> + + {totalTokens.toLocaleString()} tokens + + + )} + + + ); +}; + const AssistantMessageInner: FC = () => { const isMobile = !useMediaQuery("(min-width: 768px)"); @@ -427,7 +486,7 @@ const AssistantMessageInner: FC = () => { )} -
+
@@ -624,6 +683,7 @@ const AssistantActionBar: FC = () => { )} + ); }; diff --git a/surfsense_web/lib/chat/message-utils.ts b/surfsense_web/lib/chat/message-utils.ts index 7c0da03c4..6ec5bd53d 100644 --- a/surfsense_web/lib/chat/message-utils.ts +++ b/surfsense_web/lib/chat/message-utils.ts @@ -39,13 +39,16 @@ export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { content = [{ type: "text", text: String(msg.content) }]; } - const metadata = msg.author_id + const metadata = (msg.author_id || msg.token_usage) ? { custom: { - author: { - displayName: msg.author_display_name ?? null, - avatarUrl: msg.author_avatar_url ?? null, - }, + ...(msg.author_id && { + author: { + displayName: msg.author_display_name ?? null, + avatarUrl: msg.author_avatar_url ?? null, + }, + }), + ...(msg.token_usage && { usage: msg.token_usage }), }, } : undefined; diff --git a/surfsense_web/lib/chat/streaming-state.ts b/surfsense_web/lib/chat/streaming-state.ts index d54650d40..e5d77672f 100644 --- a/surfsense_web/lib/chat/streaming-state.ts +++ b/surfsense_web/lib/chat/streaming-state.ts @@ -238,6 +238,16 @@ export type SSEEvent = | { type: "data-thread-title-update"; data: { threadId: number; title: string } } | { type: "data-interrupt-request"; data: Record } | { type: "data-documents-updated"; data: Record } + | { + type: "data-token-usage"; + data: { + usage: Record; + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + call_details: Array<{ model: string; prompt_tokens: number; completion_tokens: number; total_tokens: number }>; + }; + } | { type: "error"; errorText: string }; /** diff --git a/surfsense_web/lib/chat/thread-persistence.ts b/surfsense_web/lib/chat/thread-persistence.ts index 08c08ba78..de9827c32 100644 --- a/surfsense_web/lib/chat/thread-persistence.ts +++ b/surfsense_web/lib/chat/thread-persistence.ts @@ -26,6 +26,13 @@ export interface ThreadRecord { has_comments?: boolean; } +export interface TokenUsageSummary { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + model_breakdown?: Record | null; +} + export interface MessageRecord { id: number; thread_id: number; @@ -35,6 +42,7 @@ export interface MessageRecord { author_id?: string | null; author_display_name?: string | null; author_avatar_url?: string | null; + token_usage?: TokenUsageSummary | null; } export interface ThreadListResponse { @@ -111,11 +119,11 @@ export async function getThreadMessages(threadId: number): Promise { return baseApiService.post(`/api/v1/threads/${threadId}/messages`, undefined, { body: message, From 5af6005163d1b9b749ae8e32f25c87a0234768e6 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 14:28:31 +0530 Subject: [PATCH 04/14] feat: improve token usage tracking and response handling in chat routes and services --- .../app/routes/new_chat_routes.py | 17 +++++++++++++---- .../app/services/token_tracking_service.py | 7 +++++++ .../app/tasks/chat/stream_new_chat.py | 19 +++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index a5245456e..fe79c7c06 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -46,12 +46,12 @@ from app.schemas.new_chat import ( NewChatThreadWithMessages, PublicChatSnapshotCreateResponse, PublicChatSnapshotListResponse, - TokenUsageSummary, RegenerateRequest, ResumeRequest, ThreadHistoryLoadResponse, ThreadListItem, ThreadListResponse, + TokenUsageSummary, ) from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat from app.users import current_active_user @@ -965,9 +965,17 @@ async def append_message( await session.commit() - # Return the in-memory object (already has id from flush) instead of - # doing an extra refresh() SELECT. - return db_message + # Build response manually to avoid lazy-loading the token_usage + # relationship after commit (which would trigger MissingGreenlet). + return NewChatMessageRead( + id=db_message.id, + thread_id=db_message.thread_id, + role=db_message.role, + content=db_message.content, + created_at=db_message.created_at, + author_id=db_message.author_id, + token_usage=None, + ) except HTTPException: raise @@ -1031,6 +1039,7 @@ async def list_messages( # Get messages query = ( select(NewChatMessage) + .options(selectinload(NewChatMessage.token_usage)) .filter(NewChatMessage.thread_id == thread_id) .order_by(NewChatMessage.created_at) .offset(skip) diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py index 434a55ae0..98cb13bb8 100644 --- a/surfsense_backend/app/services/token_tracking_service.py +++ b/surfsense_backend/app/services/token_tracking_service.py @@ -87,6 +87,7 @@ def start_turn() -> TurnTokenAccumulator: """Create a fresh accumulator for the current async context and return it.""" acc = TurnTokenAccumulator() _turn_accumulator.set(acc) + logger.info("[TokenTracking] start_turn: new accumulator created (id=%s)", id(acc)) return acc @@ -106,10 +107,12 @@ class TokenTrackingCallback(CustomLogger): ) -> None: acc = _turn_accumulator.get() if acc is None: + logger.debug("[TokenTracking] async_log_success_event fired but no accumulator in context") return usage = getattr(response_obj, "usage", None) if not usage: + logger.debug("[TokenTracking] async_log_success_event fired but response has no usage data") return prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0 @@ -124,6 +127,10 @@ class TokenTrackingCallback(CustomLogger): completion_tokens=completion_tokens, total_tokens=total_tokens, ) + logger.info( + "[TokenTracking] Captured: model=%s prompt=%d completion=%d total=%d (accumulator now has %d calls)", + model, prompt_tokens, completion_tokens, total_tokens, len(acc.calls), + ) token_tracker = TokenTrackingCallback() diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 4459b9c06..2002e1585 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -1532,7 +1532,12 @@ async def stream_new_chat( if title_task is not None and not title_task.done(): title_task.cancel() + await asyncio.sleep(0.2) usage_summary = accumulator.per_message_summary() + _perf_log.info( + "[token_usage] interrupted new_chat: calls=%d total=%d summary=%s", + len(accumulator.calls), accumulator.grand_total, usage_summary, + ) if usage_summary: yield streaming_service.format_data("token-usage", { "usage": usage_summary, @@ -1563,7 +1568,12 @@ async def stream_new_chat( chat_id, generated_title ) + await asyncio.sleep(0.2) usage_summary = accumulator.per_message_summary() + _perf_log.info( + "[token_usage] normal new_chat: calls=%d total=%d summary=%s", + len(accumulator.calls), accumulator.grand_total, usage_summary, + ) if usage_summary: yield streaming_service.format_data("token-usage", { "usage": usage_summary, @@ -1797,8 +1807,13 @@ async def stream_resume_chat( time.perf_counter() - _t_stream_start, chat_id, ) + await asyncio.sleep(0.2) if stream_result.is_interrupted: usage_summary = accumulator.per_message_summary() + _perf_log.info( + "[token_usage] interrupted resume_chat: calls=%d total=%d summary=%s", + len(accumulator.calls), accumulator.grand_total, usage_summary, + ) if usage_summary: yield streaming_service.format_data("token-usage", { "usage": usage_summary, @@ -1814,6 +1829,10 @@ async def stream_resume_chat( return usage_summary = accumulator.per_message_summary() + _perf_log.info( + "[token_usage] normal resume_chat: calls=%d total=%d summary=%s", + len(accumulator.calls), accumulator.grand_total, usage_summary, + ) if usage_summary: yield streaming_service.format_data("token-usage", { "usage": usage_summary, From 5510c1de033777890af2706a91ca9b831d537c10 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 14:47:59 +0530 Subject: [PATCH 05/14] feat: integrate token usage context and enhance message info display in chat UI --- .../new-chat/[[...chat_id]]/page.tsx | 60 ++++++-------- .../assistant-ui/assistant-message.tsx | 73 +++++++++-------- .../assistant-ui/token-usage-context.tsx | 79 +++++++++++++++++++ 3 files changed, 145 insertions(+), 67 deletions(-) create mode 100644 surfsense_web/components/assistant-ui/token-usage-context.tsx diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 34bf0c09e..ff953eaf9 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -42,6 +42,7 @@ import { useChatSessionStateSync } from "@/hooks/use-chat-session-state"; import { useMessagesSync } from "@/hooks/use-messages-sync"; import { documentsApiService } from "@/lib/apis/documents-api.service"; import { getBearerToken } from "@/lib/auth-utils"; +import { createTokenUsageStore, TokenUsageProvider, type TokenUsageData } from "@/components/assistant-ui/token-usage-context"; import { convertToThreadMessage } from "@/lib/chat/message-utils"; import { isPodcastGenerating, @@ -195,6 +196,7 @@ export default function NewChatPage() { const [currentThread, setCurrentThread] = useState(null); const [messages, setMessages] = useState([]); const [isRunning, setIsRunning] = useState(false); + const [tokenUsageStore] = useState(() => createTokenUsageStore()); const abortControllerRef = useRef(null); const [pendingInterrupt, setPendingInterrupt] = useState<{ threadId: number; @@ -307,6 +309,7 @@ export default function NewChatPage() { setThreadId(null); setCurrentThread(null); setMentionedDocuments([]); + tokenUsageStore.clear(); setSidebarDocuments([]); setMessageDocumentsMap({}); clearPlanOwnerRegistry(); @@ -330,6 +333,12 @@ export default function NewChatPage() { const loadedMessages = messagesResponse.messages.map(convertToThreadMessage); setMessages(loadedMessages); + for (const msg of messagesResponse.messages) { + if (msg.token_usage) { + tokenUsageStore.set(`msg-${msg.id}`, msg.token_usage as TokenUsageData); + } + } + const restoredDocsMap: Record = {}; for (const msg of messagesResponse.messages) { if (msg.role === "user") { @@ -374,6 +383,7 @@ export default function NewChatPage() { closeEditorPanel, removeChatTab, searchSpaceId, + tokenUsageStore, ]); // Initialize on mount, and re-init when switching search spaces (even if urlChatId is the same) @@ -824,6 +834,7 @@ export default function NewChatPage() { case "data-token-usage": tokenUsageData = parsed.data; + tokenUsageStore.set(assistantMsgId, parsed.data as TokenUsageData); break; case "error": @@ -833,16 +844,6 @@ export default function NewChatPage() { batcher.flush(); - if (tokenUsageData) { - setMessages((prev) => - prev.map((m) => - m.id === assistantMsgId - ? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record ?? {}), usage: tokenUsageData } } } - : m - ) - ); - } - // Skip persistence for interrupted messages -- handleResume will persist the final version const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI); if (contentParts.length > 0 && !wasInterrupted) { @@ -855,8 +856,9 @@ export default function NewChatPage() { // Update message ID from temporary to database ID so comments work immediately const newMsgId = `msg-${savedMessage.id}`; + tokenUsageStore.rename(assistantMsgId, newMsgId); setMessages((prev) => - prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)) + prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)), ); // Update pending interrupt with the new persisted message ID @@ -946,6 +948,7 @@ export default function NewChatPage() { currentUser, disabledTools, updateChatTabTitle, + tokenUsageStore, ] ); @@ -1168,6 +1171,7 @@ export default function NewChatPage() { case "data-token-usage": tokenUsageData = parsed.data; + tokenUsageStore.set(assistantMsgId, parsed.data as TokenUsageData); break; case "error": @@ -1177,16 +1181,6 @@ export default function NewChatPage() { batcher.flush(); - if (tokenUsageData) { - setMessages((prev) => - prev.map((m) => - m.id === assistantMsgId - ? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record ?? {}), usage: tokenUsageData } } } - : m - ) - ); - } - const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI); if (contentParts.length > 0) { try { @@ -1196,8 +1190,9 @@ export default function NewChatPage() { token_usage: tokenUsageData ?? undefined, }); const newMsgId = `msg-${savedMessage.id}`; + tokenUsageStore.rename(assistantMsgId, newMsgId); setMessages((prev) => - prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)) + prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)), ); } catch (err) { console.error("Failed to persist resumed assistant message:", err); @@ -1215,7 +1210,7 @@ export default function NewChatPage() { abortControllerRef.current = null; } }, - [pendingInterrupt, messages, searchSpaceId] + [pendingInterrupt, messages, searchSpaceId, tokenUsageStore] ); useEffect(() => { @@ -1463,6 +1458,7 @@ export default function NewChatPage() { case "data-token-usage": tokenUsageData = parsed.data; + tokenUsageStore.set(assistantMsgId, parsed.data as TokenUsageData); break; case "error": @@ -1472,16 +1468,6 @@ export default function NewChatPage() { batcher.flush(); - if (tokenUsageData) { - setMessages((prev) => - prev.map((m) => - m.id === assistantMsgId - ? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record ?? {}), usage: tokenUsageData } } } - : m - ) - ); - } - // Persist messages after streaming completes const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI); if (contentParts.length > 0) { @@ -1509,10 +1495,10 @@ export default function NewChatPage() { token_usage: tokenUsageData ?? undefined, }); - // Update assistant message ID to database ID const newMsgId = `msg-${savedMessage.id}`; + tokenUsageStore.rename(assistantMsgId, newMsgId); setMessages((prev) => - prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)) + prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)), ); trackChatResponseReceived(searchSpaceId, threadId); @@ -1547,7 +1533,7 @@ export default function NewChatPage() { abortControllerRef.current = null; } }, - [threadId, searchSpaceId, messages, disabledTools] + [threadId, searchSpaceId, messages, disabledTools, tokenUsageStore] ); // Handle editing a message - truncates history and regenerates with new query @@ -1616,6 +1602,7 @@ export default function NewChatPage() { } return ( +
@@ -1627,5 +1614,6 @@ export default function NewChatPage() {
+
); } diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index 25a579947..dff52c3f5 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -45,12 +45,14 @@ import { DropdownMenuContent, DropdownMenuItem, DropdownMenuLabel, + DropdownMenuSeparator, DropdownMenuTrigger, } from "@/components/ui/dropdown-menu"; import { Button } from "@/components/ui/button"; import { useComments } from "@/hooks/use-comments"; import { useMediaQuery } from "@/hooks/use-media-query"; import { useElectronAPI } from "@/hooks/use-platform"; +import { useTokenUsage } from "@/components/assistant-ui/token-usage-context"; import { cn } from "@/lib/utils"; // Captured once at module load — survives client-side navigations that strip the query param. @@ -375,22 +377,24 @@ export const MessageError: FC = () => { ); }; -const TokenUsageDropdown: FC = () => { - const usage = useAuiState(({ message }) => { - const custom = message?.metadata?.custom as Record | undefined; - return custom?.usage as Record | undefined; +function formatMessageDate(date: Date): string { + return date.toLocaleDateString(undefined, { + month: "short", + day: "numeric", + hour: "numeric", + minute: "2-digit", + hour12: true, }); +} - if (!usage) return null; - - const totalTokens = (usage.total_tokens as number) ?? 0; - if (totalTokens === 0) return null; - - const modelBreakdown = (usage.usage ?? usage.model_breakdown) as - | Record - | undefined; +const MessageInfoDropdown: FC = () => { + const messageId = useAuiState(({ message }) => message?.id); + const createdAt = useAuiState(({ message }) => message?.createdAt); + const usage = useTokenUsage(messageId); + const modelBreakdown = usage ? (usage.usage ?? usage.model_breakdown) : undefined; const models = modelBreakdown ? Object.entries(modelBreakdown) : []; + const hasUsage = usage && usage.total_tokens > 0; return ( @@ -401,24 +405,31 @@ const TokenUsageDropdown: FC = () => { - - Token Usage - - {models.length > 0 ? ( - models.map(([model, counts]) => ( - e.preventDefault()}> - {model} - - {counts.total_tokens.toLocaleString()} tokens - - - )) - ) : ( - e.preventDefault()}> - - {totalTokens.toLocaleString()} tokens - - + {createdAt && ( + + {formatMessageDate(createdAt)} + + )} + {hasUsage && ( + <> + + {models.length > 0 ? ( + models.map(([model, counts]) => ( + e.preventDefault()}> + {model} + + {counts.total_tokens.toLocaleString()} tokens + + + )) + ) : ( + e.preventDefault()}> + + {usage.total_tokens.toLocaleString()} tokens + + + )} + )} @@ -683,7 +694,7 @@ const AssistantActionBar: FC = () => { )} - + ); }; diff --git a/surfsense_web/components/assistant-ui/token-usage-context.tsx b/surfsense_web/components/assistant-ui/token-usage-context.tsx new file mode 100644 index 000000000..8b82f33ff --- /dev/null +++ b/surfsense_web/components/assistant-ui/token-usage-context.tsx @@ -0,0 +1,79 @@ +"use client"; + +import { createContext, useContext, useCallback, useSyncExternalStore, type FC, type ReactNode } from "react"; + +export interface TokenUsageData { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + usage?: Record; + model_breakdown?: Record; +} + +type Listener = () => void; + +class TokenUsageStore { + private data = new Map(); + private listeners = new Set(); + + get(messageId: string): TokenUsageData | undefined { + return this.data.get(messageId); + } + + set(messageId: string, usage: TokenUsageData): void { + this.data.set(messageId, usage); + this.notify(); + } + + rename(oldId: string, newId: string): void { + const usage = this.data.get(oldId); + if (usage) { + this.data.delete(oldId); + this.data.set(newId, usage); + this.notify(); + } + } + + clear(): void { + this.data.clear(); + this.notify(); + } + + subscribe = (listener: Listener): (() => void) => { + this.listeners.add(listener); + return () => this.listeners.delete(listener); + }; + + private notify(): void { + for (const l of this.listeners) l(); + } +} + +const TokenUsageContext = createContext(null); + +export const TokenUsageProvider: FC<{ store: TokenUsageStore; children: ReactNode }> = ({ store, children }) => ( + {children} +); + +export function useTokenUsageStore(): TokenUsageStore { + const store = useContext(TokenUsageContext); + if (!store) throw new Error("useTokenUsageStore must be used within TokenUsageProvider"); + return store; +} + +export function useTokenUsage(messageId: string | undefined): TokenUsageData | undefined { + const store = useContext(TokenUsageContext); + const getSnapshot = useCallback( + () => (store && messageId ? store.get(messageId) : undefined), + [store, messageId], + ); + const subscribe = useCallback( + (onStoreChange: () => void) => (store ? store.subscribe(onStoreChange) : () => {}), + [store], + ); + return useSyncExternalStore(subscribe, getSnapshot, getSnapshot); +} + +export function createTokenUsageStore(): TokenUsageStore { + return new TokenUsageStore(); +} From f21bdc0668e94f70f6cb67659a3d0b59572e3317 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 15:29:02 +0530 Subject: [PATCH 06/14] feat: enhance LLM response handling and token usage tracking in chat services and UI components --- .../app/services/llm_router_service.py | 29 ++++++++++++--- .../app/services/token_tracking_service.py | 4 +++ .../app/tasks/chat/stream_new_chat.py | 36 ++++++++++++------- .../assistant-ui/assistant-message.tsx | 35 +++++++++--------- 4 files changed, 67 insertions(+), 37 deletions(-) diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py index d97665f7a..1bf9e2386 100644 --- a/surfsense_backend/app/services/llm_router_service.py +++ b/surfsense_backend/app/services/llm_router_service.py @@ -820,7 +820,9 @@ class ChatLiteLLMRouter(BaseChatModel): ) # Convert response to ChatResult with potential tool calls - message = self._convert_response_to_message(response.choices[0].message) + message = self._convert_response_to_message( + response.choices[0].message, response=response + ) generation = ChatGeneration(message=message) return ChatResult(generations=[generation]) @@ -886,7 +888,9 @@ class ChatLiteLLMRouter(BaseChatModel): ) # Convert response to ChatResult with potential tool calls - message = self._convert_response_to_message(response.choices[0].message) + message = self._convert_response_to_message( + response.choices[0].message, response=response + ) generation = ChatGeneration(message=message) return ChatResult(generations=[generation]) @@ -1076,7 +1080,9 @@ class ChatLiteLLMRouter(BaseChatModel): return result - def _convert_response_to_message(self, response_message: Any) -> AIMessage: + def _convert_response_to_message( + self, response_message: Any, response: Any = None + ) -> AIMessage: """Convert a LiteLLM response message to a LangChain AIMessage.""" import json @@ -1099,9 +1105,22 @@ class ChatLiteLLMRouter(BaseChatModel): tool_call["args"] = tc.function.arguments tool_calls.append(tool_call) + extra_kwargs: dict[str, Any] = {} + if response: + usage = getattr(response, "usage", None) + if usage: + extra_kwargs["usage_metadata"] = { + "input_tokens": getattr(usage, "prompt_tokens", 0) or 0, + "output_tokens": getattr(usage, "completion_tokens", 0) or 0, + "total_tokens": getattr(usage, "total_tokens", 0) or 0, + } + extra_kwargs["response_metadata"] = { + "model_name": getattr(response, "model", "unknown"), + } + if tool_calls: - return AIMessage(content=content, tool_calls=tool_calls) - return AIMessage(content=content) + return AIMessage(content=content, tool_calls=tool_calls, **extra_kwargs) + return AIMessage(content=content, **extra_kwargs) def _convert_delta_to_chunk(self, delta: Any) -> AIMessageChunk | None: """Convert a streaming delta to an AIMessageChunk.""" diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py index 98cb13bb8..6a5b3793f 100644 --- a/surfsense_backend/app/services/token_tracking_service.py +++ b/surfsense_backend/app/services/token_tracking_service.py @@ -4,6 +4,10 @@ Token usage tracking via LiteLLM custom callback. Uses a ContextVar-scoped accumulator to group all LLM calls within a single async request/turn. The accumulated data is emitted via SSE and persisted when the frontend calls appendMessage. + +Agent LLM calls are captured automatically via the async callback. +Title-generation usage is added explicitly from the LangChain response +metadata to avoid callback-timing issues. """ from __future__ import annotations diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 2002e1585..364a14bad 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -1459,22 +1459,35 @@ async def stream_new_chat( ) is_first_response = (assistant_count_result.scalar() or 0) == 0 - title_task: asyncio.Task[str | None] | None = None + title_task: asyncio.Task[tuple[str | None, dict[str, int] | None]] | None = None if is_first_response: - async def _generate_title() -> str | None: + async def _generate_title() -> tuple[str | None, dict[str, int] | None]: + """Return (title, usage_dict) where usage_dict has model/prompt/completion/total.""" try: title_chain = TITLE_GENERATION_PROMPT_TEMPLATE | llm title_result = await title_chain.ainvoke( {"user_query": user_query[:500]} ) - if title_result and hasattr(title_result, "content"): - raw_title = title_result.content.strip() - if raw_title and len(raw_title) <= 100: - return raw_title.strip("\"'") + usage_dict: dict[str, int] | None = None + if title_result: + um = getattr(title_result, "usage_metadata", None) + if um: + rm = getattr(title_result, "response_metadata", None) or {} + raw_model = rm.get("model_name", "unknown") + usage_dict = { + "model": raw_model.split("/", 1)[-1] if "/" in raw_model else raw_model, + "prompt_tokens": um.get("input_tokens", 0), + "completion_tokens": um.get("output_tokens", 0), + "total_tokens": um.get("total_tokens", 0), + } + if hasattr(title_result, "content"): + raw_title = title_result.content.strip() + if raw_title and len(raw_title) <= 100: + return raw_title.strip("\"'"), usage_dict + return None, usage_dict except Exception: - pass - return None + return None, None title_task = asyncio.create_task(_generate_title()) @@ -1506,7 +1519,7 @@ async def stream_new_chat( # Inject title update mid-stream as soon as the background task finishes if title_task is not None and title_task.done() and not title_emitted: - generated_title = title_task.result() + generated_title, _title_usage = title_task.result() if generated_title: async with shielded_async_session() as title_session: title_thread_result = await title_session.execute( @@ -1532,7 +1545,6 @@ async def stream_new_chat( if title_task is not None and not title_task.done(): title_task.cancel() - await asyncio.sleep(0.2) usage_summary = accumulator.per_message_summary() _perf_log.info( "[token_usage] interrupted new_chat: calls=%d total=%d summary=%s", @@ -1554,7 +1566,7 @@ async def stream_new_chat( # If the title task didn't finish during streaming, await it now if title_task is not None and not title_emitted: - generated_title = await title_task + generated_title, _title_usage = await title_task if generated_title: async with shielded_async_session() as title_session: title_thread_result = await title_session.execute( @@ -1568,7 +1580,6 @@ async def stream_new_chat( chat_id, generated_title ) - await asyncio.sleep(0.2) usage_summary = accumulator.per_message_summary() _perf_log.info( "[token_usage] normal new_chat: calls=%d total=%d summary=%s", @@ -1807,7 +1818,6 @@ async def stream_resume_chat( time.perf_counter() - _t_stream_start, chat_id, ) - await asyncio.sleep(0.2) if stream_result.is_interrupted: usage_summary = accumulator.per_message_summary() _perf_log.info( diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index dff52c3f5..7a357dc85 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -1,4 +1,5 @@ import { + ActionBarMorePrimitive, ActionBarPrimitive, AuiIf, ErrorPrimitive, @@ -40,14 +41,7 @@ import { DrawerHeader, DrawerTitle, } from "@/components/ui/drawer"; -import { - DropdownMenu, - DropdownMenuContent, - DropdownMenuItem, - DropdownMenuLabel, - DropdownMenuSeparator, - DropdownMenuTrigger, -} from "@/components/ui/dropdown-menu"; +import { DropdownMenuLabel } from "@/components/ui/dropdown-menu"; import { Button } from "@/components/ui/button"; import { useComments } from "@/hooks/use-comments"; import { useMediaQuery } from "@/hooks/use-media-query"; @@ -397,14 +391,17 @@ const MessageInfoDropdown: FC = () => { const hasUsage = usage && usage.total_tokens > 0; return ( - - + + - - + + {createdAt && ( {formatMessageDate(createdAt)} @@ -412,27 +409,27 @@ const MessageInfoDropdown: FC = () => { )} {hasUsage && ( <> - + {models.length > 0 ? ( models.map(([model, counts]) => ( - e.preventDefault()}> + e.preventDefault()}> {model} {counts.total_tokens.toLocaleString()} tokens - + )) ) : ( - e.preventDefault()}> + e.preventDefault()}> {usage.total_tokens.toLocaleString()} tokens - + )} )} - - + + ); }; From 833799457bdba15927915ca874de5d688efb10f7 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 15:35:04 +0530 Subject: [PATCH 07/14] feat: integrate new LLM configuration handling and enhance model display in message info dropdown --- .../assistant-ui/assistant-message.tsx | 47 +++++++++++++++---- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index 7a357dc85..4f3dd7c00 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -23,6 +23,10 @@ import dynamic from "next/dynamic"; import type { FC } from "react"; import { useEffect, useMemo, useRef, useState } from "react"; import { commentsEnabledAtom, targetCommentIdAtom } from "@/atoms/chat/current-thread.atom"; +import { + globalNewLLMConfigsAtom, + newLLMConfigsAtom, +} from "@/atoms/new-llm-config/new-llm-config-query.atoms"; import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; import { CitationMetadataProvider, @@ -47,6 +51,7 @@ import { useComments } from "@/hooks/use-comments"; import { useMediaQuery } from "@/hooks/use-media-query"; import { useElectronAPI } from "@/hooks/use-platform"; import { useTokenUsage } from "@/components/assistant-ui/token-usage-context"; +import { getProviderIcon } from "@/lib/provider-icons"; import { cn } from "@/lib/utils"; // Captured once at module load — survives client-side navigations that strip the query param. @@ -386,6 +391,26 @@ const MessageInfoDropdown: FC = () => { const createdAt = useAuiState(({ message }) => message?.createdAt); const usage = useTokenUsage(messageId); + const { data: localConfigs } = useAtomValue(newLLMConfigsAtom); + const { data: globalConfigs } = useAtomValue(globalNewLLMConfigsAtom); + + const configByModel = useMemo(() => { + const map = new Map(); + for (const c of [...(globalConfigs ?? []), ...(localConfigs ?? [])]) { + map.set(c.model_name, { name: c.name, provider: c.provider }); + } + return map; + }, [localConfigs, globalConfigs]); + + const resolveModel = (modelKey: string) => { + const parts = modelKey.split("/"); + const bare = parts[parts.length - 1] ?? modelKey; + const config = configByModel.get(modelKey) ?? configByModel.get(bare); + return config + ? { name: config.name, icon: getProviderIcon(config.provider, { className: "size-3.5" }) } + : { name: modelKey, icon: null }; + }; + const modelBreakdown = usage ? (usage.usage ?? usage.model_breakdown) : undefined; const models = modelBreakdown ? Object.entries(modelBreakdown) : []; const hasUsage = usage && usage.total_tokens > 0; @@ -411,14 +436,20 @@ const MessageInfoDropdown: FC = () => { <> {models.length > 0 ? ( - models.map(([model, counts]) => ( - e.preventDefault()}> - {model} - - {counts.total_tokens.toLocaleString()} tokens - - - )) + models.map(([model, counts]) => { + const { name, icon } = resolveModel(model); + return ( + e.preventDefault()}> + + {icon} + {name} + + + {counts.total_tokens.toLocaleString()} tokens + + + ); + }) ) : ( e.preventDefault()}> From 1f9840d4a45a42230d84bcf5d64a9aa8f5363c9d Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 15:49:33 +0530 Subject: [PATCH 08/14] feat: update alembic migration number --- ...ken_usage_table.py => 125_add_token_usage_table.py} | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) rename surfsense_backend/alembic/versions/{124_add_token_usage_table.py => 125_add_token_usage_table.py} (95%) diff --git a/surfsense_backend/alembic/versions/124_add_token_usage_table.py b/surfsense_backend/alembic/versions/125_add_token_usage_table.py similarity index 95% rename from surfsense_backend/alembic/versions/124_add_token_usage_table.py rename to surfsense_backend/alembic/versions/125_add_token_usage_table.py index 2c88e1f51..c08280487 100644 --- a/surfsense_backend/alembic/versions/124_add_token_usage_table.py +++ b/surfsense_backend/alembic/versions/125_add_token_usage_table.py @@ -1,7 +1,7 @@ -"""124_add_token_usage_table +"""125_add_token_usage_table -Revision ID: 124 -Revises: 123 +Revision ID: 125 +Revises: 124 Create Date: 2026-04-14 Adds token_usage table for tracking LLM token consumption per message. @@ -18,8 +18,8 @@ from sqlalchemy.dialects.postgresql import JSONB, UUID from alembic import op # revision identifiers, used by Alembic. -revision: str = "124" -down_revision: str | None = "123" +revision: str = "125" +down_revision: str | None = "124" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None From 292fcb1a2c3b06e28ec7a66e335b9ca9aa13b02a Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 20:35:16 +0530 Subject: [PATCH 09/14] feat: enhance model selector UI abd added github models icon --- .../components/icons/providers/github.svg | 1 + .../components/icons/providers/index.ts | 1 + .../components/new-chat/model-selector.tsx | 163 ++++++++++++------ surfsense_web/lib/provider-icons.tsx | 3 + 4 files changed, 118 insertions(+), 50 deletions(-) create mode 100644 surfsense_web/components/icons/providers/github.svg diff --git a/surfsense_web/components/icons/providers/github.svg b/surfsense_web/components/icons/providers/github.svg new file mode 100644 index 000000000..7a51b8e0e --- /dev/null +++ b/surfsense_web/components/icons/providers/github.svg @@ -0,0 +1 @@ +Github \ No newline at end of file diff --git a/surfsense_web/components/icons/providers/index.ts b/surfsense_web/components/icons/providers/index.ts index 2afed7fa5..aefa2a053 100644 --- a/surfsense_web/components/icons/providers/index.ts +++ b/surfsense_web/components/icons/providers/index.ts @@ -10,6 +10,7 @@ export { default as DeepInfraIcon } from "./deepinfra.svg"; export { default as DeepSeekIcon } from "./deepseek.svg"; export { default as FireworksAiIcon } from "./fireworksai.svg"; export { default as GeminiIcon } from "./gemini.svg"; +export { default as GitHubModelsIcon } from "./github.svg"; export { default as GroqIcon } from "./groq.svg"; export { default as HuggingFaceIcon } from "./huggingface.svg"; export { default as MiniMaxIcon } from "./minimax.svg"; diff --git a/surfsense_web/components/new-chat/model-selector.tsx b/surfsense_web/components/new-chat/model-selector.tsx index 26937e18b..0b8708269 100644 --- a/surfsense_web/components/new-chat/model-selector.tsx +++ b/surfsense_web/components/new-chat/model-selector.tsx @@ -6,9 +6,12 @@ import { Bot, Check, ChevronDown, + ChevronLeft, + ChevronRight, + ChevronUp, Edit3, - Eye, ImageIcon, + ScanEye, Layers, Plus, Search, @@ -69,6 +72,7 @@ const PROVIDER_NAMES: Record = { DEEPSEEK: "DeepSeek", MISTRAL: "Mistral", COHERE: "Cohere", + GITHUB_MODELS: "GitHub Models", GROQ: "Groq", OLLAMA: "Ollama", TOGETHER_AI: "Together AI", @@ -274,17 +278,40 @@ export function ModelSelector({ const [searchQuery, setSearchQuery] = useState(""); const [selectedProvider, setSelectedProvider] = useState("all"); const [focusedIndex, setFocusedIndex] = useState(-1); - const [showScrollIndicator, setShowScrollIndicator] = useState(true); + const [modelScrollPos, setModelScrollPos] = useState<"top" | "middle" | "bottom">("top"); + const [sidebarScrollPos, setSidebarScrollPos] = useState<"top" | "middle" | "bottom">("top"); const providerSidebarRef = useRef(null); const modelListRef = useRef(null); const searchInputRef = useRef(null); const isMobile = useIsMobile(); + const handleModelListScroll = useCallback((e: React.UIEvent) => { + const el = e.currentTarget; + const atTop = el.scrollTop <= 2; + const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight <= 2; + setModelScrollPos(atTop ? "top" : atBottom ? "bottom" : "middle"); + }, []); + + const handleSidebarScroll = useCallback((e: React.UIEvent) => { + const el = e.currentTarget; + if (isMobile) { + const atStart = el.scrollLeft <= 2; + const atEnd = el.scrollWidth - el.scrollLeft - el.clientWidth <= 2; + setSidebarScrollPos(atStart ? "top" : atEnd ? "bottom" : "middle"); + } else { + const atTop = el.scrollTop <= 2; + const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight <= 2; + setSidebarScrollPos(atTop ? "top" : atBottom ? "bottom" : "middle"); + } + }, [isMobile]); + // Reset search + provider when tab changes + // biome-ignore lint/correctness/useExhaustiveDependencies: activeTab is intentionally used as a trigger useEffect(() => { setSelectedProvider("all"); setSearchQuery(""); setFocusedIndex(-1); + setModelScrollPos("top"); }, [activeTab]); // Reset on open @@ -295,8 +322,9 @@ export function ModelSelector({ } }, [open]); - // Cmd/Ctrl+M shortcut + // Cmd/Ctrl+M shortcut (desktop only) useEffect(() => { + if (isMobile) return; const handler = (e: KeyboardEvent) => { if ((e.metaKey || e.ctrlKey) && e.key === "m") { e.preventDefault(); @@ -305,9 +333,10 @@ export function ModelSelector({ }; document.addEventListener("keydown", handler); return () => document.removeEventListener("keydown", handler); - }, []); + }, [isMobile]); // Focus search input on open + // biome-ignore lint/correctness/useExhaustiveDependencies: activeTab is intentionally used as a trigger to re-focus on tab switch useEffect(() => { if (open && !isMobile) { requestAnimationFrame(() => searchInputRef.current?.focus()); @@ -677,6 +706,7 @@ export function ModelSelector({ ); // ─── Keyboard navigation ─── + // biome-ignore lint/correctness/useExhaustiveDependencies: searchQuery and selectedProvider are intentional triggers to reset focus useEffect(() => { setFocusedIndex(-1); }, [searchQuery, selectedProvider]); @@ -767,24 +797,35 @@ export function ModelSelector({ return (
+ {!isMobile && sidebarScrollPos !== "top" && ( +
+ +
+ )} + {isMobile && sidebarScrollPos !== "top" && ( +
+ +
+ )}
{ - const t = e.currentTarget; - setShowScrollIndicator( - t.scrollHeight - t.scrollTop > - t.clientHeight + 10, - ); - }} + onScroll={handleSidebarScroll} className={cn( isMobile - ? "flex flex-row gap-0.5 px-2 py-1.5 overflow-x-auto border-b border-border/40" + ? "flex flex-row gap-0.5 px-1 py-1.5 overflow-x-auto [&::-webkit-scrollbar]:h-0 [&::-webkit-scrollbar-track]:bg-transparent" : "flex flex-col gap-0.5 p-1 overflow-y-auto flex-1 [&::-webkit-scrollbar]:w-0 [&::-webkit-scrollbar-track]:bg-transparent", )} + style={isMobile ? { + maskImage: `linear-gradient(to right, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 24px, black calc(100% - 24px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`, + WebkitMaskImage: `linear-gradient(to right, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 24px, black calc(100% - 24px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`, + } : { + maskImage: `linear-gradient(to bottom, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 32px, black calc(100% - 32px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`, + WebkitMaskImage: `linear-gradient(to bottom, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 32px, black calc(100% - 32px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`, + }} > {activeProviders.map((provider, idx) => { const isAll = provider === "all"; @@ -849,18 +890,23 @@ export function ModelSelector({ )} {isConfigured ? ` (${count})` - : " — not configured"} + : " (not configured)"} ); })}
- {!isMobile && showScrollIndicator && ( -
+ {!isMobile && sidebarScrollPos !== "bottom" && ( +
)} + {isMobile && sidebarScrollPos !== "bottom" && ( +
+ +
+ )}
); }; @@ -889,19 +935,26 @@ export function ModelSelector({ key={`${activeTab}-${item.isGlobal ? "g" : "u"}-${config.id}`} data-model-index={index} role="option" + tabIndex={isMobile ? -1 : 0} aria-selected={isSelected} onClick={() => handleSelectItem(item)} + onKeyDown={isMobile ? undefined : (e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + handleSelectItem(item); + } + }} onMouseEnter={() => setFocusedIndex(index)} className={cn( - "group flex items-start gap-2.5 px-2.5 py-2 rounded-lg cursor-pointer", - "transition-all duration-150 mx-1", - "hover:bg-accent/40 active:scale-[0.99]", + "group flex items-center gap-2.5 px-3 py-2 rounded-xl cursor-pointer", + "transition-all duration-150 mx-2", + "hover:bg-accent/40", isSelected && "bg-primary/6 dark:bg-primary/8", - isFocused && "bg-accent/50 ring-1 ring-primary/20", + isFocused && "bg-accent/50", )} > {/* Provider icon */} -
+
{getProviderIcon(config.provider as string, { isAutoMode, className: "size-5", @@ -931,8 +984,8 @@ export function ModelSelector({ {!isAutoMode && hasCitations && ( Citations @@ -981,7 +1034,7 @@ export function ModelSelector({ : "Add Vision Model"; return ( -
+
{/* Tab header */}
@@ -999,7 +1052,7 @@ export function ModelSelector({ }, { value: "vision" as const, - icon: Eye, + icon: ScanEye, label: "Vision", }, ] as const @@ -1028,7 +1081,7 @@ export function ModelSelector({ "flex", isMobile ? "flex-col h-[60vh]" - : "flex-row h-[420px]", + : "flex-row h-[380px]", )} > {/* Provider sidebar */} @@ -1037,33 +1090,30 @@ export function ModelSelector({ {/* Main content */}
{/* Search */} -
- +
+ setSearchQuery(e.target.value) } - onKeyDown={handleKeyDown} - autoFocus={!isMobile} + onKeyDown={isMobile ? undefined : handleKeyDown} role="combobox" aria-expanded={true} aria-controls="model-selector-list" className={cn( - "w-full pl-8 pr-3 py-1.5 text-xs rounded-lg", - "bg-secondary/30 border border-border/40", - "focus:outline-none focus:ring-2 focus:ring-primary/20 focus:border-primary/40", - "placeholder:text-muted-foreground/50", - "transition-[box-shadow,border-color] duration-200", + "w-full pl-8 pr-3 py-2.5 text-sm bg-transparent", + "focus:outline-none", + "placeholder:text-muted-foreground", )} />
{/* Provider header when filtered */} {selectedProvider !== "all" && ( -
+
{getProviderIcon(selectedProvider, { className: "size-4", })} @@ -1085,10 +1135,15 @@ export function ModelSelector({ id="model-selector-list" ref={modelListRef} role="listbox" - className="overflow-y-auto flex-1 py-1" + className="overflow-y-auto flex-1 py-1 space-y-1 flex flex-col" + onScroll={handleModelListScroll} + style={{ + maskImage: `linear-gradient(to bottom, ${modelScrollPos === "top" ? "black" : "transparent"}, black 16px, black calc(100% - 16px), ${modelScrollPos === "bottom" ? "black" : "transparent"})`, + WebkitMaskImage: `linear-gradient(to bottom, ${modelScrollPos === "top" ? "black" : "transparent"}, black 16px, black calc(100% - 16px), ${modelScrollPos === "bottom" ? "black" : "transparent"})`, + }} > {currentDisplayItems.length === 0 ? ( -
+
{selectedProvider !== "all" && !configuredProviderSet.has( selectedProvider, @@ -1116,22 +1171,21 @@ export function ModelSelector({

{addHandler && ( )} - ) : ( + ) : searchQuery ? ( <> - +

No models found

@@ -1140,13 +1194,22 @@ export function ModelSelector({ term

+ ) : ( + <> +

+ No models configured +

+

+ Configure models in your search space settings +

+ )}
) : ( <> {globalItems.length > 0 && ( <> -
+
Global Models
{globalItems.map((item, i) => @@ -1163,7 +1226,7 @@ export function ModelSelector({ )} {userItems.length > 0 && ( <> -
+
Your Configurations
{userItems.map((item, i) => @@ -1180,7 +1243,7 @@ export function ModelSelector({ {/* Add model button */} {addHandler && ( -
+
-
+
+ setPassword(e.target.value)} + className={`block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} + disabled={isLoggingIn} + /> + +
- - - {aiSortBusy - ? "AI sort in progress..." - : aiSortEnabled - ? "AI sort active — click to disable" - : "Enable AI sort"} - - - )} - {/* Search Input */}
From a74ed014cc4ba250df61fa3e67fbdddcbabcb0aa Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 21:50:34 +0530 Subject: [PATCH 13/14] refactor: update UI components for better accessibility and styling consistency --- .../components/ApiKeyContent.tsx | 7 +- .../components/ProfileContent.tsx | 6 +- .../components/new-chat/model-selector.tsx | 2 +- .../public-chat-snapshot-row.tsx | 145 ++++++++---------- .../settings/general-settings-manager.tsx | 6 +- .../components/settings/llm-role-manager.tsx | 3 +- surfsense_web/messages/en.json | 1 - surfsense_web/messages/es.json | 1 - surfsense_web/messages/hi.json | 1 - surfsense_web/messages/pt.json | 1 - surfsense_web/messages/zh.json | 1 - 11 files changed, 72 insertions(+), 102 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent.tsx index 223a6e3e7..3600d30db 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent.tsx @@ -27,10 +27,9 @@ export function ApiKeyContent() { return (
- - - {t("api_key_warning_title")} - + + + {t("api_key_warning_description")} diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ProfileContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ProfileContent.tsx index 32377194a..6d9ce22be 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ProfileContent.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ProfileContent.tsx @@ -113,10 +113,10 @@ export function ProfileContent() { type="submit" variant="outline" disabled={isPending || !hasChanges} - className="gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200" + className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200" > - {isPending && } - {t("profile_save")} + {t("profile_save")} + {isPending && }
diff --git a/surfsense_web/components/new-chat/model-selector.tsx b/surfsense_web/components/new-chat/model-selector.tsx index 0b8708269..8fec4cc93 100644 --- a/surfsense_web/components/new-chat/model-selector.tsx +++ b/surfsense_web/components/new-chat/model-selector.tsx @@ -970,7 +970,7 @@ export function ModelSelector({ {isAutoMode && ( Recommended diff --git a/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx b/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx index 4bb49c48d..fbc5820c7 100644 --- a/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx +++ b/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx @@ -1,14 +1,22 @@ "use client"; -import { Check, Copy, Dot, ExternalLink, MessageSquare, Trash2 } from "lucide-react"; -import { useCallback, useRef, useState } from "react"; +import { Copy, Dot, ExternalLink, MessageSquare, MoreHorizontal, Trash2 } from "lucide-react"; +import { useCallback, useState } from "react"; +import { toast } from "sonner"; import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Card, CardContent } from "@/components/ui/card"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu"; import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; import type { PublicChatSnapshotDetail } from "@/contracts/types/chat-threads.types"; import { useMediaQuery } from "@/hooks/use-media-query"; +import { cn } from "@/lib/utils"; function getInitials(name: string): string { const parts = name.trim().split(/\s+/); @@ -35,15 +43,12 @@ export function PublicChatSnapshotRow({ isDeleting = false, memberMap, }: PublicChatSnapshotRowProps) { - const [copied, setCopied] = useState(false); - const copyTimeoutRef = useRef>(null); + const [dropdownOpen, setDropdownOpen] = useState(false); const isDesktop = useMediaQuery("(min-width: 768px)"); const handleCopyClick = useCallback(() => { onCopy(snapshot); - setCopied(true); - if (copyTimeoutRef.current) clearTimeout(copyTimeoutRef.current); - copyTimeoutRef.current = setTimeout(() => setCopied(false), 2000); + toast.success("Link copied to clipboard"); }, [onCopy, snapshot]); const formattedDate = new Date(snapshot.created_at).toLocaleDateString(undefined, { @@ -58,96 +63,66 @@ export function PublicChatSnapshotRow({ {/* Header: Title + Actions */} -
-
-

- {snapshot.thread_title} -

-
-
- - - - - - Open link - - - {canDelete && ( - - - - - - Delete - - +
+

+ title={snapshot.thread_title} + > + {snapshot.thread_title} +

+ + + + + + + + Copy link + + + + + Open link + + + {canDelete && ( + onDelete(snapshot)} + disabled={isDeleting} + > + + Delete + + )} + +
{/* Message count badge */}
{snapshot.message_count} messages
- {/* Public URL – selectable fallback for manual copy */} -
-
-

- {snapshot.public_url} -

-
- - - - - - {copied ? "Copied!" : "Copy link"} - - -
- {/* Footer: Date + Creator */}
{formattedDate} diff --git a/surfsense_web/components/settings/general-settings-manager.tsx b/surfsense_web/components/settings/general-settings-manager.tsx index 15d44906b..59aaa129e 100644 --- a/surfsense_web/components/settings/general-settings-manager.tsx +++ b/surfsense_web/components/settings/general-settings-manager.tsx @@ -181,10 +181,10 @@ export function GeneralSettingsManager({ searchSpaceId }: GeneralSettingsManager type="submit" variant="outline" disabled={!hasChanges || saving || !name.trim()} - className="gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200" + className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200" > - {saving ? : null} - {saving ? t("general_saving") : t("general_save")} + {t("general_save")} + {saving && }
diff --git a/surfsense_web/components/settings/llm-role-manager.tsx b/surfsense_web/components/settings/llm-role-manager.tsx index d6eb7c64d..aa2749967 100644 --- a/surfsense_web/components/settings/llm-role-manager.tsx +++ b/surfsense_web/components/settings/llm-role-manager.tsx @@ -395,6 +395,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { key={config.id} value={config.id.toString()} className="text-xs md:text-sm py-1.5 md:py-2" + textValue={config.name} >
@@ -403,7 +404,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { {isAuto && ( Recommended diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json index 701cb086d..2e42451cd 100644 --- a/surfsense_web/messages/en.json +++ b/surfsense_web/messages/en.json @@ -123,7 +123,6 @@ "api_key_nav_description": "Manage your API access token", "api_key_title": "API Key", "api_key_description": "Use this key to authenticate API requests", - "api_key_warning_title": "Keep it secret", "api_key_warning_description": "Your API key grants full access to your account. Never share it publicly or commit it to version control.", "your_api_key": "Your API Key", "copied": "Copied!", diff --git a/surfsense_web/messages/es.json b/surfsense_web/messages/es.json index 9c872e701..e017b0bba 100644 --- a/surfsense_web/messages/es.json +++ b/surfsense_web/messages/es.json @@ -123,7 +123,6 @@ "api_key_nav_description": "Administra tu token de acceso a la API", "api_key_title": "Clave API", "api_key_description": "Usa esta clave para autenticar las solicitudes de la API", - "api_key_warning_title": "Mantenla en secreto", "api_key_warning_description": "Tu clave API otorga acceso completo a tu cuenta. Nunca la compartas públicamente ni la incluyas en el control de versiones.", "your_api_key": "Tu clave API", "copied": "¡Copiado!", diff --git a/surfsense_web/messages/hi.json b/surfsense_web/messages/hi.json index 3b6975bf5..e60e92f03 100644 --- a/surfsense_web/messages/hi.json +++ b/surfsense_web/messages/hi.json @@ -123,7 +123,6 @@ "api_key_nav_description": "अपना API एक्सेस टोकन प्रबंधित करें", "api_key_title": "API कुंजी", "api_key_description": "API अनुरोधों को प्रमाणित करने के लिए इस कुंजी का उपयोग करें", - "api_key_warning_title": "इसे गुप्त रखें", "api_key_warning_description": "आपकी API कुंजी आपके खाते तक पूर्ण पहुंच प्रदान करती है। इसे कभी सार्वजनिक रूप से साझा न करें या संस्करण नियंत्रण में शामिल न करें।", "your_api_key": "आपकी API कुंजी", "copied": "कॉपी किया गया!", diff --git a/surfsense_web/messages/pt.json b/surfsense_web/messages/pt.json index e92dff375..acece5391 100644 --- a/surfsense_web/messages/pt.json +++ b/surfsense_web/messages/pt.json @@ -123,7 +123,6 @@ "api_key_nav_description": "Gerencie seu token de acesso à API", "api_key_title": "Chave API", "api_key_description": "Use esta chave para autenticar solicitações da API", - "api_key_warning_title": "Mantenha em segredo", "api_key_warning_description": "Sua chave API concede acesso total à sua conta. Nunca a compartilhe publicamente nem a inclua no controle de versão.", "your_api_key": "Sua chave API", "copied": "Copiado!", diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json index c53155edf..6a59115ed 100644 --- a/surfsense_web/messages/zh.json +++ b/surfsense_web/messages/zh.json @@ -108,7 +108,6 @@ "api_key_nav_description": "管理您的API访问令牌", "api_key_title": "API密钥", "api_key_description": "使用此密钥验证API请求", - "api_key_warning_title": "请保密", "api_key_warning_description": "您的API密钥可以完全访问您的账户。请勿公开分享或提交到版本控制。", "your_api_key": "您的API密钥", "copied": "已复制!", From 9fc0976d5eb7d6593d9c9c63577981fa8d398029 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 14 Apr 2026 21:52:26 +0530 Subject: [PATCH 14/14] chore: ran linting --- .../versions/125_add_token_usage_table.py | 8 +- .../app/routes/new_chat_routes.py | 4 +- surfsense_backend/app/services/llm_service.py | 3 +- .../app/services/token_tracking_service.py | 23 +++- .../app/tasks/chat/stream_new_chat.py | 106 ++++++++++++------ 5 files changed, 97 insertions(+), 47 deletions(-) diff --git a/surfsense_backend/alembic/versions/125_add_token_usage_table.py b/surfsense_backend/alembic/versions/125_add_token_usage_table.py index c08280487..915561c8c 100644 --- a/surfsense_backend/alembic/versions/125_add_token_usage_table.py +++ b/surfsense_backend/alembic/versions/125_add_token_usage_table.py @@ -33,7 +33,9 @@ def upgrade() -> None: "token_usage", sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), sa.Column("prompt_tokens", sa.Integer(), nullable=False, server_default="0"), - sa.Column("completion_tokens", sa.Integer(), nullable=False, server_default="0"), + sa.Column( + "completion_tokens", sa.Integer(), nullable=False, server_default="0" + ), sa.Column("total_tokens", sa.Integer(), nullable=False, server_default="0"), sa.Column("model_breakdown", JSONB, nullable=True), sa.Column("call_details", JSONB, nullable=True), @@ -72,7 +74,9 @@ def upgrade() -> None: op.create_index("ix_token_usage_thread_id", "token_usage", ["thread_id"]) op.create_index("ix_token_usage_message_id", "token_usage", ["message_id"]) - op.create_index("ix_token_usage_search_space_id", "token_usage", ["search_space_id"]) + op.create_index( + "ix_token_usage_search_space_id", "token_usage", ["search_space_id"] + ) op.create_index("ix_token_usage_user_id", "token_usage", ["user_id"]) op.create_index("ix_token_usage_usage_type", "token_usage", ["usage_type"]) diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 55302b873..b914b297e 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -498,7 +498,9 @@ async def get_thread_messages( author_id=msg.author_id, author_display_name=msg.author.display_name if msg.author else None, author_avatar_url=msg.author.avatar_url if msg.author else None, - token_usage=TokenUsageSummary.model_validate(msg.token_usage) if msg.token_usage else None, + token_usage=TokenUsageSummary.model_validate(msg.token_usage) + if msg.token_usage + else None, ) for msg in db_messages ] diff --git a/surfsense_backend/app/services/llm_service.py b/surfsense_backend/app/services/llm_service.py index c90bdfce3..d31e19ed3 100644 --- a/surfsense_backend/app/services/llm_service.py +++ b/surfsense_backend/app/services/llm_service.py @@ -15,6 +15,7 @@ from app.services.llm_router_service import ( get_auto_mode_llm, is_auto_mode, ) +from app.services.token_tracking_service import token_tracker # Configure litellm to automatically drop unsupported parameters litellm.drop_params = True @@ -25,8 +26,6 @@ litellm.cache = None litellm.failure_callback = [] litellm.input_callback = [] -from app.services.token_tracking_service import token_tracker - litellm.callbacks = [token_tracker] logger = logging.getLogger(__name__) diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py index 5d69e6870..9aa8c6e70 100644 --- a/surfsense_backend/app/services/token_tracking_service.py +++ b/surfsense_backend/app/services/token_tracking_service.py @@ -117,12 +117,16 @@ class TokenTrackingCallback(CustomLogger): ) -> None: acc = _turn_accumulator.get() if acc is None: - logger.debug("[TokenTracking] async_log_success_event fired but no accumulator in context") + logger.debug( + "[TokenTracking] async_log_success_event fired but no accumulator in context" + ) return usage = getattr(response_obj, "usage", None) if not usage: - logger.debug("[TokenTracking] async_log_success_event fired but response has no usage data") + logger.debug( + "[TokenTracking] async_log_success_event fired but response has no usage data" + ) return prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0 @@ -139,7 +143,11 @@ class TokenTrackingCallback(CustomLogger): ) logger.info( "[TokenTracking] Captured: model=%s prompt=%d completion=%d total=%d (accumulator now has %d calls)", - model, prompt_tokens, completion_tokens, total_tokens, len(acc.calls), + model, + prompt_tokens, + completion_tokens, + total_tokens, + len(acc.calls), ) @@ -187,11 +195,16 @@ async def record_token_usage( session.add(record) logger.debug( "[TokenTracking] recorded %s usage: prompt=%d completion=%d total=%d", - usage_type, prompt_tokens, completion_tokens, total_tokens, + usage_type, + prompt_tokens, + completion_tokens, + total_tokens, ) return record except Exception: logger.warning( - "[TokenTracking] failed to record %s token usage", usage_type, exc_info=True, + "[TokenTracking] failed to record %s token usage", + usage_type, + exc_info=True, ) return None diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index e87a1b791..478aa3671 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -1475,17 +1475,22 @@ async def stream_new_chat( """ try: from litellm import acompletion + from app.services.llm_router_service import LLMRouterService from app.services.token_tracking_service import _turn_accumulator _turn_accumulator.set(None) - prompt = TITLE_GENERATION_PROMPT.replace("{user_query}", user_query[:500]) + prompt = TITLE_GENERATION_PROMPT.replace( + "{user_query}", user_query[:500] + ) messages = [{"role": "user", "content": prompt}] if getattr(llm, "model", None) == "auto": router = LLMRouterService.get_router() - response = await router.acompletion(model="auto", messages=messages) + response = await router.acompletion( + model="auto", messages=messages + ) else: response = await acompletion( model=llm.model, @@ -1498,11 +1503,16 @@ async def stream_new_chat( usage = getattr(response, "usage", None) if usage: raw_model = getattr(llm, "model", "") or "" - model_name = raw_model.split("/", 1)[-1] if "/" in raw_model else (raw_model or response.model or "unknown") + model_name = ( + raw_model.split("/", 1)[-1] + if "/" in raw_model + else (raw_model or response.model or "unknown") + ) usage_info = { "model": model_name, "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0, - "completion_tokens": getattr(usage, "completion_tokens", 0) or 0, + "completion_tokens": getattr(usage, "completion_tokens", 0) + or 0, "total_tokens": getattr(usage, "total_tokens", 0) or 0, } @@ -1511,7 +1521,9 @@ async def stream_new_chat( return raw_title.strip("\"'"), usage_info return None, usage_info except Exception: - logging.getLogger(__name__).exception("[TitleGen] _generate_title failed") + logging.getLogger(__name__).exception( + "[TitleGen] _generate_title failed" + ) return None, None title_task = asyncio.create_task(_generate_title()) @@ -1575,16 +1587,21 @@ async def stream_new_chat( usage_summary = accumulator.per_message_summary() _perf_log.info( "[token_usage] interrupted new_chat: calls=%d total=%d summary=%s", - len(accumulator.calls), accumulator.grand_total, usage_summary, + len(accumulator.calls), + accumulator.grand_total, + usage_summary, ) if usage_summary: - yield streaming_service.format_data("token-usage", { - "usage": usage_summary, - "prompt_tokens": accumulator.total_prompt_tokens, - "completion_tokens": accumulator.total_completion_tokens, - "total_tokens": accumulator.grand_total, - "call_details": accumulator.serialized_calls(), - }) + yield streaming_service.format_data( + "token-usage", + { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }, + ) yield streaming_service.format_finish_step() yield streaming_service.format_finish() @@ -1612,16 +1629,21 @@ async def stream_new_chat( usage_summary = accumulator.per_message_summary() _perf_log.info( "[token_usage] normal new_chat: calls=%d total=%d summary=%s", - len(accumulator.calls), accumulator.grand_total, usage_summary, + len(accumulator.calls), + accumulator.grand_total, + usage_summary, ) if usage_summary: - yield streaming_service.format_data("token-usage", { - "usage": usage_summary, - "prompt_tokens": accumulator.total_prompt_tokens, - "completion_tokens": accumulator.total_completion_tokens, - "total_tokens": accumulator.grand_total, - "call_details": accumulator.serialized_calls(), - }) + yield streaming_service.format_data( + "token-usage", + { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }, + ) # Fire background memory extraction if the agent didn't handle it. # Shared threads write to team memory; private threads write to user memory. @@ -1870,16 +1892,21 @@ async def stream_resume_chat( usage_summary = accumulator.per_message_summary() _perf_log.info( "[token_usage] interrupted resume_chat: calls=%d total=%d summary=%s", - len(accumulator.calls), accumulator.grand_total, usage_summary, + len(accumulator.calls), + accumulator.grand_total, + usage_summary, ) if usage_summary: - yield streaming_service.format_data("token-usage", { - "usage": usage_summary, - "prompt_tokens": accumulator.total_prompt_tokens, - "completion_tokens": accumulator.total_completion_tokens, - "total_tokens": accumulator.grand_total, - "call_details": accumulator.serialized_calls(), - }) + yield streaming_service.format_data( + "token-usage", + { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }, + ) yield streaming_service.format_finish_step() yield streaming_service.format_finish() @@ -1889,16 +1916,21 @@ async def stream_resume_chat( usage_summary = accumulator.per_message_summary() _perf_log.info( "[token_usage] normal resume_chat: calls=%d total=%d summary=%s", - len(accumulator.calls), accumulator.grand_total, usage_summary, + len(accumulator.calls), + accumulator.grand_total, + usage_summary, ) if usage_summary: - yield streaming_service.format_data("token-usage", { - "usage": usage_summary, - "prompt_tokens": accumulator.total_prompt_tokens, - "completion_tokens": accumulator.total_completion_tokens, - "total_tokens": accumulator.grand_total, - "call_details": accumulator.serialized_calls(), - }) + yield streaming_service.format_data( + "token-usage", + { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }, + ) yield streaming_service.format_finish_step() yield streaming_service.format_finish()