From 917f35eb333dcf8d7aafe137aed45c3446449c60 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 13:26:53 +0530
Subject: [PATCH 01/14] feat: add token_usage table and relationships for
 tracking LLM token consumption

---
 .../versions/124_add_token_usage_table.py     | 81 +++++++++++++++++++
 surfsense_backend/app/db.py                   | 62 ++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100644 surfsense_backend/alembic/versions/124_add_token_usage_table.py

diff --git a/surfsense_backend/alembic/versions/124_add_token_usage_table.py b/surfsense_backend/alembic/versions/124_add_token_usage_table.py
new file mode 100644
index 000000000..2c88e1f51
--- /dev/null
+++ b/surfsense_backend/alembic/versions/124_add_token_usage_table.py
@@ -0,0 +1,81 @@
+"""124_add_token_usage_table
+
+Revision ID: 124
+Revises: 123
+Create Date: 2026-04-14
+
+Adds token_usage table for tracking LLM token consumption per message.
+Supports future extension via usage_type for indexing, image gen, etc.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "124"
+down_revision: str | None = "123"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    if sa.inspect(conn).has_table("token_usage"):
+        return
+
+    op.create_table(
+        "token_usage",
+        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+        sa.Column("prompt_tokens", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("completion_tokens", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("total_tokens", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("model_breakdown", JSONB, nullable=True),
+        sa.Column("call_details", JSONB, nullable=True),
+        sa.Column("usage_type", sa.String(50), nullable=False, server_default="chat"),
+        sa.Column(
+            "thread_id",
+            sa.Integer(),
+            sa.ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
+            nullable=True,
+        ),
+        sa.Column(
+            "message_id",
+            sa.Integer(),
+            sa.ForeignKey("new_chat_messages.id", ondelete="SET NULL"),
+            nullable=True,
+        ),
+        sa.Column(
+            "search_space_id",
+            sa.Integer(),
+            sa.ForeignKey("searchspaces.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column(
+            "user_id",
+            UUID(as_uuid=True),
+            sa.ForeignKey("user.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column(
+            "created_at",
+            sa.TIMESTAMP(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+
+    op.create_index("ix_token_usage_thread_id", "token_usage", ["thread_id"])
+    op.create_index("ix_token_usage_message_id", "token_usage", ["message_id"])
+    op.create_index("ix_token_usage_search_space_id", "token_usage", ["search_space_id"])
+    op.create_index("ix_token_usage_user_id", "token_usage", ["user_id"])
+    op.create_index("ix_token_usage_usage_type", "token_usage", ["usage_type"])
+
+
+def downgrade() -> None:
+    op.drop_table("token_usage")
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index e69d28ac2..b9fbe8845 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -647,6 +647,11 @@ class NewChatThread(BaseModel, TimestampMixin):
         cascade="all, delete-orphan",
         foreign_keys="[PublicChatSnapshot.thread_id]",
     )
+    token_usages = relationship(
+        "TokenUsage",
+        back_populates="thread",
+        cascade="all, delete-orphan",
+    )
 
 
 class NewChatMessage(BaseModel, TimestampMixin):
@@ -685,6 +690,63 @@ class NewChatMessage(BaseModel, TimestampMixin):
         back_populates="message",
         cascade="all, delete-orphan",
     )
+    token_usage = relationship(
+        "TokenUsage",
+        back_populates="message",
+        uselist=False,
+        cascade="all, delete-orphan",
+    )
+
+
+class TokenUsage(BaseModel, TimestampMixin):
+    """
+    Tracks LLM token consumption per assistant turn.
+
+    One row per usage event. For chat, linked to a specific message via message_id.
+    The usage_type column enables future extension to track non-chat usage
+    (indexing, image generation, podcasts, etc.) without schema changes.
+    """
+
+    __tablename__ = "token_usage"
+
+    prompt_tokens = Column(Integer, nullable=False, default=0)
+    completion_tokens = Column(Integer, nullable=False, default=0)
+    total_tokens = Column(Integer, nullable=False, default=0)
+    model_breakdown = Column(JSONB, nullable=True)
+    call_details = Column(JSONB, nullable=True)
+
+    usage_type = Column(String(50), nullable=False, default="chat", index=True)
+
+    thread_id = Column(
+        Integer,
+        ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
+        nullable=True,
+        index=True,
+    )
+    message_id = Column(
+        Integer,
+        ForeignKey("new_chat_messages.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    search_space_id = Column(
+        Integer,
+        ForeignKey("searchspaces.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    user_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("user.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+
+    # Relationships
+    thread = relationship("NewChatThread", back_populates="token_usages")
+    message = relationship("NewChatMessage", back_populates="token_usage")
+    search_space = relationship("SearchSpace")
+    user = relationship("User")
 
 
 class PublicChatSnapshot(BaseModel, TimestampMixin):

From 3cfe53fb7f1ba327edbfdbbeeeef59fb833546de Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 13:40:32 +0530
Subject: [PATCH 02/14] feat: implement token usage tracking for LLM calls with
 new accumulator and callback

---
 .../app/routes/new_chat_routes.py             |  34 ++++-
 surfsense_backend/app/schemas/new_chat.py     |   9 ++
 .../app/services/llm_router_service.py        |   1 +
 surfsense_backend/app/services/llm_service.py |   5 +-
 .../app/services/token_tracking_service.py    | 129 ++++++++++++++++++
 .../app/tasks/chat/stream_new_chat.py         |  49 +++++++
 6 files changed, 223 insertions(+), 4 deletions(-)
 create mode 100644 surfsense_backend/app/services/token_tracking_service.py

diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py
index 10a6951fa..a5245456e 100644
--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@@ -30,6 +30,7 @@ from app.db import (
     NewChatThread,
     Permission,
     SearchSpace,
+    TokenUsage,
     User,
     get_async_session,
     shielded_async_session,
@@ -45,6 +46,7 @@ from app.schemas.new_chat import (
     NewChatThreadWithMessages,
     PublicChatSnapshotCreateResponse,
     PublicChatSnapshotListResponse,
+    TokenUsageSummary,
     RegenerateRequest,
     ResumeRequest,
     ThreadHistoryLoadResponse,
@@ -473,10 +475,13 @@ async def get_thread_messages(
         # Check thread-level access based on visibility
         await check_thread_access(session, thread, user)
 
-        # Get messages with their authors loaded
+        # Get messages with their authors and token usage loaded
         messages_result = await session.execute(
             select(NewChatMessage)
-            .options(selectinload(NewChatMessage.author))
+            .options(
+                selectinload(NewChatMessage.author),
+                selectinload(NewChatMessage.token_usage),
+            )
             .filter(NewChatMessage.thread_id == thread_id)
             .order_by(NewChatMessage.created_at)
         )
@@ -493,6 +498,7 @@ async def get_thread_messages(
                 author_id=msg.author_id,
                 author_display_name=msg.author.display_name if msg.author else None,
                 author_avatar_url=msg.author.avatar_url if msg.author else None,
+                token_usage=TokenUsageSummary.model_validate(msg.token_usage) if msg.token_usage else None,
             )
             for msg in db_messages
         ]
@@ -530,7 +536,11 @@ async def get_thread_full(
     try:
         result = await session.execute(
             select(NewChatThread)
-            .options(selectinload(NewChatThread.messages))
+            .options(
+                selectinload(NewChatThread.messages).selectinload(
+                    NewChatMessage.token_usage
+                ),
+            )
             .filter(NewChatThread.id == thread_id)
         )
         thread = result.scalars().first()
@@ -935,6 +945,24 @@ async def append_message(
 
         # flush assigns the PK/defaults without a round-trip SELECT
         await session.flush()
+
+        # Persist token usage if provided (for assistant messages)
+        token_usage_data = raw_body.get("token_usage")
+        if token_usage_data and message_role == NewChatMessageRole.ASSISTANT:
+            token_usage_record = TokenUsage(
+                prompt_tokens=token_usage_data.get("prompt_tokens", 0),
+                completion_tokens=token_usage_data.get("completion_tokens", 0),
+                total_tokens=token_usage_data.get("total_tokens", 0),
+                model_breakdown=token_usage_data.get("usage"),
+                call_details=token_usage_data.get("call_details"),
+                usage_type="chat",
+                thread_id=thread_id,
+                message_id=db_message.id,
+                search_space_id=thread.search_space_id,
+                user_id=user.id,
+            )
+            session.add(token_usage_record)
+
         await session.commit()
 
         # Return the in-memory object (already has id from flush) instead of
diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py
index 5d8ae207e..e523657a4 100644
--- a/surfsense_backend/app/schemas/new_chat.py
+++ b/surfsense_backend/app/schemas/new_chat.py
@@ -34,6 +34,14 @@ class NewChatMessageCreate(NewChatMessageBase):
     thread_id: int
 
 
+class TokenUsageSummary(BaseModel):
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+    model_breakdown: dict | None = None
+    model_config = ConfigDict(from_attributes=True)
+
+
 class NewChatMessageRead(NewChatMessageBase, IDModel, TimestampModel):
     """Schema for reading a message."""
 
@@ -41,6 +49,7 @@ class NewChatMessageRead(NewChatMessageBase, IDModel, TimestampModel):
     author_id: UUID | None = None
     author_display_name: str | None = None
     author_avatar_url: str | None = None
+    token_usage: TokenUsageSummary | None = None
     model_config = ConfigDict(from_attributes=True)
 
 
diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py
index 63d8d10b9..d97665f7a 100644
--- a/surfsense_backend/app/services/llm_router_service.py
+++ b/surfsense_backend/app/services/llm_router_service.py
@@ -970,6 +970,7 @@ class ChatLiteLLMRouter(BaseChatModel):
                 messages=formatted_messages,
                 stop=stop,
                 stream=True,
+                stream_options={"include_usage": True},
                 **call_kwargs,
             )
         except ContextWindowExceededError as e:
diff --git a/surfsense_backend/app/services/llm_service.py b/surfsense_backend/app/services/llm_service.py
index 723b17607..c90bdfce3 100644
--- a/surfsense_backend/app/services/llm_service.py
+++ b/surfsense_backend/app/services/llm_service.py
@@ -22,10 +22,13 @@ litellm.drop_params = True
 # Memory controls: prevent unbounded internal accumulation
 litellm.telemetry = False
 litellm.cache = None
-litellm.success_callback = []
 litellm.failure_callback = []
 litellm.input_callback = []
 
+from app.services.token_tracking_service import token_tracker
+
+litellm.callbacks = [token_tracker]
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py
new file mode 100644
index 000000000..434a55ae0
--- /dev/null
+++ b/surfsense_backend/app/services/token_tracking_service.py
@@ -0,0 +1,129 @@
+"""
+Token usage tracking via LiteLLM custom callback.
+
+Uses a ContextVar-scoped accumulator to group all LLM calls within a single
+async request/turn. The accumulated data is emitted via SSE and persisted
+when the frontend calls appendMessage.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import logging
+from contextvars import ContextVar
+from dataclasses import dataclass, field
+from typing import Any
+
+from litellm.integrations.custom_logger import CustomLogger
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TokenCallRecord:
+    model: str
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+
+
+@dataclass
+class TurnTokenAccumulator:
+    """Accumulates token usage across all LLM calls within a single user turn."""
+
+    calls: list[TokenCallRecord] = field(default_factory=list)
+
+    def add(
+        self,
+        model: str,
+        prompt_tokens: int,
+        completion_tokens: int,
+        total_tokens: int,
+    ) -> None:
+        self.calls.append(
+            TokenCallRecord(
+                model=model,
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=total_tokens,
+            )
+        )
+
+    def per_message_summary(self) -> dict[str, dict[str, int]]:
+        """Return token counts grouped by model name."""
+        by_model: dict[str, dict[str, int]] = {}
+        for c in self.calls:
+            entry = by_model.setdefault(
+                c.model,
+                {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+            )
+            entry["prompt_tokens"] += c.prompt_tokens
+            entry["completion_tokens"] += c.completion_tokens
+            entry["total_tokens"] += c.total_tokens
+        return by_model
+
+    @property
+    def grand_total(self) -> int:
+        return sum(c.total_tokens for c in self.calls)
+
+    @property
+    def total_prompt_tokens(self) -> int:
+        return sum(c.prompt_tokens for c in self.calls)
+
+    @property
+    def total_completion_tokens(self) -> int:
+        return sum(c.completion_tokens for c in self.calls)
+
+    def serialized_calls(self) -> list[dict[str, Any]]:
+        return [dataclasses.asdict(c) for c in self.calls]
+
+
+_turn_accumulator: ContextVar[TurnTokenAccumulator | None] = ContextVar(
+    "_turn_accumulator", default=None
+)
+
+
+def start_turn() -> TurnTokenAccumulator:
+    """Create a fresh accumulator for the current async context and return it."""
+    acc = TurnTokenAccumulator()
+    _turn_accumulator.set(acc)
+    return acc
+
+
+def get_current_accumulator() -> TurnTokenAccumulator | None:
+    return _turn_accumulator.get()
+
+
+class TokenTrackingCallback(CustomLogger):
+    """LiteLLM callback that captures token usage into the turn accumulator."""
+
+    async def async_log_success_event(
+        self,
+        kwargs: dict[str, Any],
+        response_obj: Any,
+        start_time: Any,
+        end_time: Any,
+    ) -> None:
+        acc = _turn_accumulator.get()
+        if acc is None:
+            return
+
+        usage = getattr(response_obj, "usage", None)
+        if not usage:
+            return
+
+        prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
+        completion_tokens = getattr(usage, "completion_tokens", 0) or 0
+        total_tokens = getattr(usage, "total_tokens", 0) or 0
+
+        model = kwargs.get("model", "unknown")
+
+        acc.add(
+            model=model,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens,
+        )
+
+
+token_tracker = TokenTrackingCallback()
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index fd118528e..4459b9c06 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -1170,6 +1170,10 @@ async def stream_new_chat(
     _t_total = time.perf_counter()
     log_system_snapshot("stream_new_chat_START")
 
+    from app.services.token_tracking_service import start_turn
+
+    accumulator = start_turn()
+
     session = async_session_maker()
     try:
         # Mark AI as responding to this user for live collaboration
@@ -1527,6 +1531,17 @@ async def stream_new_chat(
         if stream_result.is_interrupted:
             if title_task is not None and not title_task.done():
                 title_task.cancel()
+
+            usage_summary = accumulator.per_message_summary()
+            if usage_summary:
+                yield streaming_service.format_data("token-usage", {
+                    "usage": usage_summary,
+                    "prompt_tokens": accumulator.total_prompt_tokens,
+                    "completion_tokens": accumulator.total_completion_tokens,
+                    "total_tokens": accumulator.grand_total,
+                    "call_details": accumulator.serialized_calls(),
+                })
+
             yield streaming_service.format_finish_step()
             yield streaming_service.format_finish()
             yield streaming_service.format_done()
@@ -1548,6 +1563,16 @@ async def stream_new_chat(
                     chat_id, generated_title
                 )
 
+        usage_summary = accumulator.per_message_summary()
+        if usage_summary:
+            yield streaming_service.format_data("token-usage", {
+                "usage": usage_summary,
+                "prompt_tokens": accumulator.total_prompt_tokens,
+                "completion_tokens": accumulator.total_completion_tokens,
+                "total_tokens": accumulator.grand_total,
+                "call_details": accumulator.serialized_calls(),
+            })
+
         # Fire background memory extraction if the agent didn't handle it.
         # Shared threads write to team memory; private threads write to user memory.
         if not stream_result.agent_called_update_memory:
@@ -1646,6 +1671,10 @@ async def stream_resume_chat(
     stream_result = StreamResult()
     _t_total = time.perf_counter()
 
+    from app.services.token_tracking_service import start_turn
+
+    accumulator = start_turn()
+
     session = async_session_maker()
     try:
         if user_id:
@@ -1769,11 +1798,31 @@ async def stream_resume_chat(
             chat_id,
         )
         if stream_result.is_interrupted:
+            usage_summary = accumulator.per_message_summary()
+            if usage_summary:
+                yield streaming_service.format_data("token-usage", {
+                    "usage": usage_summary,
+                    "prompt_tokens": accumulator.total_prompt_tokens,
+                    "completion_tokens": accumulator.total_completion_tokens,
+                    "total_tokens": accumulator.grand_total,
+                    "call_details": accumulator.serialized_calls(),
+                })
+
             yield streaming_service.format_finish_step()
             yield streaming_service.format_finish()
             yield streaming_service.format_done()
             return
 
+        usage_summary = accumulator.per_message_summary()
+        if usage_summary:
+            yield streaming_service.format_data("token-usage", {
+                "usage": usage_summary,
+                "prompt_tokens": accumulator.total_prompt_tokens,
+                "completion_tokens": accumulator.total_completion_tokens,
+                "total_tokens": accumulator.grand_total,
+                "call_details": accumulator.serialized_calls(),
+            })
+
         yield streaming_service.format_finish_step()
         yield streaming_service.format_finish()
         yield streaming_service.format_done()

From 55099a20acebcff99cd0561e9b1e4bea963f2dbe Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 13:40:46 +0530
Subject: [PATCH 03/14] feat: enhance token usage tracking in chat messages
 with UI integration and dropdown display

---
 .../new-chat/[[...chat_id]]/page.tsx          | 48 ++++++++++++++
 .../assistant-ui/assistant-message.tsx        | 62 ++++++++++++++++++-
 surfsense_web/lib/chat/message-utils.ts       | 13 ++--
 surfsense_web/lib/chat/streaming-state.ts     | 10 +++
 surfsense_web/lib/chat/thread-persistence.ts  | 12 +++-
 5 files changed, 137 insertions(+), 8 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
index 58eb58f4b..34bf0c09e 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@@ -624,6 +624,7 @@ export default function NewChatPage() {
 			};
 			const { contentParts, toolCallIndices } = contentPartsState;
 			let wasInterrupted = false;
+			let tokenUsageData: Record<string, unknown> | null = null;
 
 			// Add placeholder assistant message
 			setMessages((prev) => [
@@ -821,6 +822,10 @@ export default function NewChatPage() {
 							break;
 						}
 
+						case "data-token-usage":
+							tokenUsageData = parsed.data;
+							break;
+
 						case "error":
 							throw new Error(parsed.errorText || "Server error");
 					}
@@ -828,6 +833,16 @@ export default function NewChatPage() {
 
 				batcher.flush();
 
+				if (tokenUsageData) {
+					setMessages((prev) =>
+						prev.map((m) =>
+							m.id === assistantMsgId
+								? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record<string, unknown> ?? {}), usage: tokenUsageData } } }
+								: m
+						)
+					);
+				}
+
 				// Skip persistence for interrupted messages -- handleResume will persist the final version
 				const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI);
 				if (contentParts.length > 0 && !wasInterrupted) {
@@ -835,6 +850,7 @@ export default function NewChatPage() {
 						const savedMessage = await appendMessage(currentThreadId, {
 							role: "assistant",
 							content: finalContent,
+							token_usage: tokenUsageData ?? undefined,
 						});
 
 						// Update message ID from temporary to database ID so comments work immediately
@@ -965,6 +981,7 @@ export default function NewChatPage() {
 				toolCallIndices: new Map(),
 			};
 			const { contentParts, toolCallIndices } = contentPartsState;
+			let tokenUsageData: Record<string, unknown> | null = null;
 
 			const existingMsg = messages.find((m) => m.id === assistantMsgId);
 			if (existingMsg && Array.isArray(existingMsg.content)) {
@@ -1149,6 +1166,10 @@ export default function NewChatPage() {
 							break;
 						}
 
+						case "data-token-usage":
+							tokenUsageData = parsed.data;
+							break;
+
 						case "error":
 							throw new Error(parsed.errorText || "Server error");
 					}
@@ -1156,12 +1177,23 @@ export default function NewChatPage() {
 
 				batcher.flush();
 
+				if (tokenUsageData) {
+					setMessages((prev) =>
+						prev.map((m) =>
+							m.id === assistantMsgId
+								? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record<string, unknown> ?? {}), usage: tokenUsageData } } }
+								: m
+						)
+					);
+				}
+
 				const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI);
 				if (contentParts.length > 0) {
 					try {
 						const savedMessage = await appendMessage(resumeThreadId, {
 							role: "assistant",
 							content: finalContent,
+							token_usage: tokenUsageData ?? undefined,
 						});
 						const newMsgId = `msg-${savedMessage.id}`;
 						setMessages((prev) =>
@@ -1319,6 +1351,7 @@ export default function NewChatPage() {
 			};
 			const { contentParts, toolCallIndices } = contentPartsState;
 			const batcher = new FrameBatchedUpdater();
+			let tokenUsageData: Record<string, unknown> | null = null;
 
 			// Add placeholder messages to UI
 			// Always add back the user message (with new query for edit, or original content for reload)
@@ -1428,6 +1461,10 @@ export default function NewChatPage() {
 							break;
 						}
 
+						case "data-token-usage":
+							tokenUsageData = parsed.data;
+							break;
+
 						case "error":
 							throw new Error(parsed.errorText || "Server error");
 					}
@@ -1435,6 +1472,16 @@ export default function NewChatPage() {
 
 				batcher.flush();
 
+				if (tokenUsageData) {
+					setMessages((prev) =>
+						prev.map((m) =>
+							m.id === assistantMsgId
+								? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record<string, unknown> ?? {}), usage: tokenUsageData } } }
+								: m
+						)
+					);
+				}
+
 				// Persist messages after streaming completes
 				const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI);
 				if (contentParts.length > 0) {
@@ -1459,6 +1506,7 @@ export default function NewChatPage() {
 						const savedMessage = await appendMessage(threadId, {
 							role: "assistant",
 							content: finalContent,
+							token_usage: tokenUsageData ?? undefined,
 						});
 
 						// Update assistant message ID to database ID
diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx
index 764acabba..25a579947 100644
--- a/surfsense_web/components/assistant-ui/assistant-message.tsx
+++ b/surfsense_web/components/assistant-ui/assistant-message.tsx
@@ -15,6 +15,7 @@ import {
 	ExternalLink,
 	Globe,
 	MessageSquare,
+	MoreHorizontalIcon,
 	RefreshCwIcon,
 } from "lucide-react";
 import dynamic from "next/dynamic";
@@ -39,6 +40,14 @@ import {
 	DrawerHeader,
 	DrawerTitle,
 } from "@/components/ui/drawer";
+import {
+	DropdownMenu,
+	DropdownMenuContent,
+	DropdownMenuItem,
+	DropdownMenuLabel,
+	DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
+import { Button } from "@/components/ui/button";
 import { useComments } from "@/hooks/use-comments";
 import { useMediaQuery } from "@/hooks/use-media-query";
 import { useElectronAPI } from "@/hooks/use-platform";
@@ -366,6 +375,56 @@ export const MessageError: FC = () => {
 	);
 };
 
+const TokenUsageDropdown: FC = () => {
+	const usage = useAuiState(({ message }) => {
+		const custom = message?.metadata?.custom as Record<string, unknown> | undefined;
+		return custom?.usage as Record<string, unknown> | undefined;
+	});
+
+	if (!usage) return null;
+
+	const totalTokens = (usage.total_tokens as number) ?? 0;
+	if (totalTokens === 0) return null;
+
+	const modelBreakdown = (usage.usage ?? usage.model_breakdown) as
+		| Record<string, { prompt_tokens: number; completion_tokens: number; total_tokens: number }>
+		| undefined;
+
+	const models = modelBreakdown ? Object.entries(modelBreakdown) : [];
+
+	return (
+		<DropdownMenu>
+			<DropdownMenuTrigger asChild>
+				<Button variant="ghost" size="icon" className="aui-button-icon size-6 p-1">
+					<MoreHorizontalIcon className="size-4" />
+					<span className="sr-only">More</span>
+				</Button>
+			</DropdownMenuTrigger>
+			<DropdownMenuContent align="start" className="min-w-[180px]">
+				<DropdownMenuLabel className="text-xs text-muted-foreground font-normal">
+					Token Usage
+				</DropdownMenuLabel>
+				{models.length > 0 ? (
+					models.map(([model, counts]) => (
+						<DropdownMenuItem key={model} className="flex-col items-start gap-0.5 cursor-default" onSelect={(e) => e.preventDefault()}>
+							<span className="text-xs font-medium">{model}</span>
+							<span className="text-xs text-muted-foreground">
+								{counts.total_tokens.toLocaleString()} tokens
+							</span>
+						</DropdownMenuItem>
+					))
+				) : (
+					<DropdownMenuItem className="flex-col items-start gap-0.5 cursor-default" onSelect={(e) => e.preventDefault()}>
+						<span className="text-xs text-muted-foreground">
+							{totalTokens.toLocaleString()} tokens
+						</span>
+					</DropdownMenuItem>
+				)}
+			</DropdownMenuContent>
+		</DropdownMenu>
+	);
+};
+
 const AssistantMessageInner: FC = () => {
 	const isMobile = !useMediaQuery("(min-width: 768px)");
 
@@ -427,7 +486,7 @@ const AssistantMessageInner: FC = () => {
 				</div>
 			)}
 
-			<div className="aui-assistant-message-footer mt-1 mb-5 ml-2 flex">
+			<div className="aui-assistant-message-footer mt-1 mb-5 ml-2 flex items-center gap-2">
 				<AssistantActionBar />
 			</div>
 		</CitationMetadataProvider>
@@ -624,6 +683,7 @@ const AssistantActionBar: FC = () => {
 					<ClipboardPaste />
 				</TooltipIconButton>
 			)}
+			<TokenUsageDropdown />
 		</ActionBarPrimitive.Root>
 	);
 };
diff --git a/surfsense_web/lib/chat/message-utils.ts b/surfsense_web/lib/chat/message-utils.ts
index 7c0da03c4..6ec5bd53d 100644
--- a/surfsense_web/lib/chat/message-utils.ts
+++ b/surfsense_web/lib/chat/message-utils.ts
@@ -39,13 +39,16 @@ export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
 		content = [{ type: "text", text: String(msg.content) }];
 	}
 
-	const metadata = msg.author_id
+	const metadata = (msg.author_id || msg.token_usage)
 		? {
 				custom: {
-					author: {
-						displayName: msg.author_display_name ?? null,
-						avatarUrl: msg.author_avatar_url ?? null,
-					},
+					...(msg.author_id && {
+						author: {
+							displayName: msg.author_display_name ?? null,
+							avatarUrl: msg.author_avatar_url ?? null,
+						},
+					}),
+					...(msg.token_usage && { usage: msg.token_usage }),
 				},
 			}
 		: undefined;
diff --git a/surfsense_web/lib/chat/streaming-state.ts b/surfsense_web/lib/chat/streaming-state.ts
index d54650d40..e5d77672f 100644
--- a/surfsense_web/lib/chat/streaming-state.ts
+++ b/surfsense_web/lib/chat/streaming-state.ts
@@ -238,6 +238,16 @@ export type SSEEvent =
 	| { type: "data-thread-title-update"; data: { threadId: number; title: string } }
 	| { type: "data-interrupt-request"; data: Record<string, unknown> }
 	| { type: "data-documents-updated"; data: Record<string, unknown> }
+	| {
+			type: "data-token-usage";
+			data: {
+				usage: Record<string, { prompt_tokens: number; completion_tokens: number; total_tokens: number }>;
+				prompt_tokens: number;
+				completion_tokens: number;
+				total_tokens: number;
+				call_details: Array<{ model: string; prompt_tokens: number; completion_tokens: number; total_tokens: number }>;
+			};
+	  }
 	| { type: "error"; errorText: string };
 
 /**
diff --git a/surfsense_web/lib/chat/thread-persistence.ts b/surfsense_web/lib/chat/thread-persistence.ts
index 08c08ba78..de9827c32 100644
--- a/surfsense_web/lib/chat/thread-persistence.ts
+++ b/surfsense_web/lib/chat/thread-persistence.ts
@@ -26,6 +26,13 @@ export interface ThreadRecord {
 	has_comments?: boolean;
 }
 
+export interface TokenUsageSummary {
+	prompt_tokens: number;
+	completion_tokens: number;
+	total_tokens: number;
+	model_breakdown?: Record<string, { prompt_tokens: number; completion_tokens: number; total_tokens: number }> | null;
+}
+
 export interface MessageRecord {
 	id: number;
 	thread_id: number;
@@ -35,6 +42,7 @@ export interface MessageRecord {
 	author_id?: string | null;
 	author_display_name?: string | null;
 	author_avatar_url?: string | null;
+	token_usage?: TokenUsageSummary | null;
 }
 
 export interface ThreadListResponse {
@@ -111,11 +119,11 @@ export async function getThreadMessages(threadId: number): Promise<ThreadHistory
 }
 
 /**
- * Append a message to a thread
+ * Append a message to a thread.
  */
 export async function appendMessage(
 	threadId: number,
-	message: { role: "user" | "assistant" | "system"; content: unknown }
+	message: { role: "user" | "assistant" | "system"; content: unknown; token_usage?: unknown }
 ): Promise<MessageRecord> {
 	return baseApiService.post<MessageRecord>(`/api/v1/threads/${threadId}/messages`, undefined, {
 		body: message,

From 5af6005163d1b9b749ae8e32f25c87a0234768e6 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 14:28:31 +0530
Subject: [PATCH 04/14] feat: improve token usage tracking and response
 handling in chat routes and services

---
 .../app/routes/new_chat_routes.py             | 17 +++++++++++++----
 .../app/services/token_tracking_service.py    |  7 +++++++
 .../app/tasks/chat/stream_new_chat.py         | 19 +++++++++++++++++++
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py
index a5245456e..fe79c7c06 100644
--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@@ -46,12 +46,12 @@ from app.schemas.new_chat import (
     NewChatThreadWithMessages,
     PublicChatSnapshotCreateResponse,
     PublicChatSnapshotListResponse,
-    TokenUsageSummary,
     RegenerateRequest,
     ResumeRequest,
     ThreadHistoryLoadResponse,
     ThreadListItem,
     ThreadListResponse,
+    TokenUsageSummary,
 )
 from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
 from app.users import current_active_user
@@ -965,9 +965,17 @@ async def append_message(
 
         await session.commit()
 
-        # Return the in-memory object (already has id from flush) instead of
-        # doing an extra refresh() SELECT.
-        return db_message
+        # Build response manually to avoid lazy-loading the token_usage
+        # relationship after commit (which would trigger MissingGreenlet).
+        return NewChatMessageRead(
+            id=db_message.id,
+            thread_id=db_message.thread_id,
+            role=db_message.role,
+            content=db_message.content,
+            created_at=db_message.created_at,
+            author_id=db_message.author_id,
+            token_usage=None,
+        )
 
     except HTTPException:
         raise
@@ -1031,6 +1039,7 @@ async def list_messages(
         # Get messages
         query = (
             select(NewChatMessage)
+            .options(selectinload(NewChatMessage.token_usage))
             .filter(NewChatMessage.thread_id == thread_id)
             .order_by(NewChatMessage.created_at)
             .offset(skip)
diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py
index 434a55ae0..98cb13bb8 100644
--- a/surfsense_backend/app/services/token_tracking_service.py
+++ b/surfsense_backend/app/services/token_tracking_service.py
@@ -87,6 +87,7 @@ def start_turn() -> TurnTokenAccumulator:
     """Create a fresh accumulator for the current async context and return it."""
     acc = TurnTokenAccumulator()
     _turn_accumulator.set(acc)
+    logger.info("[TokenTracking] start_turn: new accumulator created (id=%s)", id(acc))
     return acc
 
 
@@ -106,10 +107,12 @@ class TokenTrackingCallback(CustomLogger):
     ) -> None:
         acc = _turn_accumulator.get()
         if acc is None:
+            logger.debug("[TokenTracking] async_log_success_event fired but no accumulator in context")
             return
 
         usage = getattr(response_obj, "usage", None)
         if not usage:
+            logger.debug("[TokenTracking] async_log_success_event fired but response has no usage data")
             return
 
         prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
@@ -124,6 +127,10 @@ class TokenTrackingCallback(CustomLogger):
             completion_tokens=completion_tokens,
             total_tokens=total_tokens,
         )
+        logger.info(
+            "[TokenTracking] Captured: model=%s prompt=%d completion=%d total=%d (accumulator now has %d calls)",
+            model, prompt_tokens, completion_tokens, total_tokens, len(acc.calls),
+        )
 
 
 token_tracker = TokenTrackingCallback()
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index 4459b9c06..2002e1585 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -1532,7 +1532,12 @@ async def stream_new_chat(
             if title_task is not None and not title_task.done():
                 title_task.cancel()
 
+            await asyncio.sleep(0.2)
             usage_summary = accumulator.per_message_summary()
+            _perf_log.info(
+                "[token_usage] interrupted new_chat: calls=%d total=%d summary=%s",
+                len(accumulator.calls), accumulator.grand_total, usage_summary,
+            )
             if usage_summary:
                 yield streaming_service.format_data("token-usage", {
                     "usage": usage_summary,
@@ -1563,7 +1568,12 @@ async def stream_new_chat(
                     chat_id, generated_title
                 )
 
+        await asyncio.sleep(0.2)
         usage_summary = accumulator.per_message_summary()
+        _perf_log.info(
+            "[token_usage] normal new_chat: calls=%d total=%d summary=%s",
+            len(accumulator.calls), accumulator.grand_total, usage_summary,
+        )
         if usage_summary:
             yield streaming_service.format_data("token-usage", {
                 "usage": usage_summary,
@@ -1797,8 +1807,13 @@ async def stream_resume_chat(
             time.perf_counter() - _t_stream_start,
             chat_id,
         )
+        await asyncio.sleep(0.2)
         if stream_result.is_interrupted:
             usage_summary = accumulator.per_message_summary()
+            _perf_log.info(
+                "[token_usage] interrupted resume_chat: calls=%d total=%d summary=%s",
+                len(accumulator.calls), accumulator.grand_total, usage_summary,
+            )
             if usage_summary:
                 yield streaming_service.format_data("token-usage", {
                     "usage": usage_summary,
@@ -1814,6 +1829,10 @@ async def stream_resume_chat(
             return
 
         usage_summary = accumulator.per_message_summary()
+        _perf_log.info(
+            "[token_usage] normal resume_chat: calls=%d total=%d summary=%s",
+            len(accumulator.calls), accumulator.grand_total, usage_summary,
+        )
         if usage_summary:
             yield streaming_service.format_data("token-usage", {
                 "usage": usage_summary,

From 5510c1de033777890af2706a91ca9b831d537c10 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 14:47:59 +0530
Subject: [PATCH 05/14] feat: integrate token usage context and enhance message
 info display in chat UI

---
 .../new-chat/[[...chat_id]]/page.tsx          | 60 ++++++--------
 .../assistant-ui/assistant-message.tsx        | 73 +++++++++--------
 .../assistant-ui/token-usage-context.tsx      | 79 +++++++++++++++++++
 3 files changed, 145 insertions(+), 67 deletions(-)
 create mode 100644 surfsense_web/components/assistant-ui/token-usage-context.tsx

diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
index 34bf0c09e..ff953eaf9 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@@ -42,6 +42,7 @@ import { useChatSessionStateSync } from "@/hooks/use-chat-session-state";
 import { useMessagesSync } from "@/hooks/use-messages-sync";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { getBearerToken } from "@/lib/auth-utils";
+import { createTokenUsageStore, TokenUsageProvider, type TokenUsageData } from "@/components/assistant-ui/token-usage-context";
 import { convertToThreadMessage } from "@/lib/chat/message-utils";
 import {
 	isPodcastGenerating,
@@ -195,6 +196,7 @@ export default function NewChatPage() {
 	const [currentThread, setCurrentThread] = useState<ThreadRecord | null>(null);
 	const [messages, setMessages] = useState<ThreadMessageLike[]>([]);
 	const [isRunning, setIsRunning] = useState(false);
+	const [tokenUsageStore] = useState(() => createTokenUsageStore());
 	const abortControllerRef = useRef<AbortController | null>(null);
 	const [pendingInterrupt, setPendingInterrupt] = useState<{
 		threadId: number;
@@ -307,6 +309,7 @@ export default function NewChatPage() {
 		setThreadId(null);
 		setCurrentThread(null);
 		setMentionedDocuments([]);
+		tokenUsageStore.clear();
 		setSidebarDocuments([]);
 		setMessageDocumentsMap({});
 		clearPlanOwnerRegistry();
@@ -330,6 +333,12 @@ export default function NewChatPage() {
 					const loadedMessages = messagesResponse.messages.map(convertToThreadMessage);
 					setMessages(loadedMessages);
 
+					for (const msg of messagesResponse.messages) {
+						if (msg.token_usage) {
+							tokenUsageStore.set(`msg-${msg.id}`, msg.token_usage as TokenUsageData);
+						}
+					}
+
 					const restoredDocsMap: Record<string, MentionedDocumentInfo[]> = {};
 					for (const msg of messagesResponse.messages) {
 						if (msg.role === "user") {
@@ -374,6 +383,7 @@ export default function NewChatPage() {
 		closeEditorPanel,
 		removeChatTab,
 		searchSpaceId,
+		tokenUsageStore,
 	]);
 
 	// Initialize on mount, and re-init when switching search spaces (even if urlChatId is the same)
@@ -824,6 +834,7 @@ export default function NewChatPage() {
 
 						case "data-token-usage":
 							tokenUsageData = parsed.data;
+							tokenUsageStore.set(assistantMsgId, parsed.data as TokenUsageData);
 							break;
 
 						case "error":
@@ -833,16 +844,6 @@ export default function NewChatPage() {
 
 				batcher.flush();
 
-				if (tokenUsageData) {
-					setMessages((prev) =>
-						prev.map((m) =>
-							m.id === assistantMsgId
-								? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record<string, unknown> ?? {}), usage: tokenUsageData } } }
-								: m
-						)
-					);
-				}
-
 				// Skip persistence for interrupted messages -- handleResume will persist the final version
 				const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI);
 				if (contentParts.length > 0 && !wasInterrupted) {
@@ -855,8 +856,9 @@ export default function NewChatPage() {
 
 						// Update message ID from temporary to database ID so comments work immediately
 						const newMsgId = `msg-${savedMessage.id}`;
+						tokenUsageStore.rename(assistantMsgId, newMsgId);
 						setMessages((prev) =>
-							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m))
+							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)),
 						);
 
 						// Update pending interrupt with the new persisted message ID
@@ -946,6 +948,7 @@ export default function NewChatPage() {
 			currentUser,
 			disabledTools,
 			updateChatTabTitle,
+			tokenUsageStore,
 		]
 	);
 
@@ -1168,6 +1171,7 @@ export default function NewChatPage() {
 
 						case "data-token-usage":
 							tokenUsageData = parsed.data;
+							tokenUsageStore.set(assistantMsgId, parsed.data as TokenUsageData);
 							break;
 
 						case "error":
@@ -1177,16 +1181,6 @@ export default function NewChatPage() {
 
 				batcher.flush();
 
-				if (tokenUsageData) {
-					setMessages((prev) =>
-						prev.map((m) =>
-							m.id === assistantMsgId
-								? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record<string, unknown> ?? {}), usage: tokenUsageData } } }
-								: m
-						)
-					);
-				}
-
 				const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI);
 				if (contentParts.length > 0) {
 					try {
@@ -1196,8 +1190,9 @@ export default function NewChatPage() {
 							token_usage: tokenUsageData ?? undefined,
 						});
 						const newMsgId = `msg-${savedMessage.id}`;
+						tokenUsageStore.rename(assistantMsgId, newMsgId);
 						setMessages((prev) =>
-							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m))
+							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)),
 						);
 					} catch (err) {
 						console.error("Failed to persist resumed assistant message:", err);
@@ -1215,7 +1210,7 @@ export default function NewChatPage() {
 				abortControllerRef.current = null;
 			}
 		},
-		[pendingInterrupt, messages, searchSpaceId]
+		[pendingInterrupt, messages, searchSpaceId, tokenUsageStore]
 	);
 
 	useEffect(() => {
@@ -1463,6 +1458,7 @@ export default function NewChatPage() {
 
 						case "data-token-usage":
 							tokenUsageData = parsed.data;
+							tokenUsageStore.set(assistantMsgId, parsed.data as TokenUsageData);
 							break;
 
 						case "error":
@@ -1472,16 +1468,6 @@ export default function NewChatPage() {
 
 				batcher.flush();
 
-				if (tokenUsageData) {
-					setMessages((prev) =>
-						prev.map((m) =>
-							m.id === assistantMsgId
-								? { ...m, metadata: { ...m.metadata, custom: { ...(m.metadata?.custom as Record<string, unknown> ?? {}), usage: tokenUsageData } } }
-								: m
-						)
-					);
-				}
-
 				// Persist messages after streaming completes
 				const finalContent = buildContentForPersistence(contentPartsState, TOOLS_WITH_UI);
 				if (contentParts.length > 0) {
@@ -1509,10 +1495,10 @@ export default function NewChatPage() {
 							token_usage: tokenUsageData ?? undefined,
 						});
 
-						// Update assistant message ID to database ID
 						const newMsgId = `msg-${savedMessage.id}`;
+						tokenUsageStore.rename(assistantMsgId, newMsgId);
 						setMessages((prev) =>
-							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m))
+							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)),
 						);
 
 						trackChatResponseReceived(searchSpaceId, threadId);
@@ -1547,7 +1533,7 @@ export default function NewChatPage() {
 				abortControllerRef.current = null;
 			}
 		},
-		[threadId, searchSpaceId, messages, disabledTools]
+		[threadId, searchSpaceId, messages, disabledTools, tokenUsageStore]
 	);
 
 	// Handle editing a message - truncates history and regenerates with new query
@@ -1616,6 +1602,7 @@ export default function NewChatPage() {
 	}
 
 	return (
+		<TokenUsageProvider store={tokenUsageStore}>
 		<AssistantRuntimeProvider runtime={runtime}>
 			<ThinkingStepsDataUI />
 			<div key={searchSpaceId} className="flex h-full overflow-hidden">
@@ -1627,5 +1614,6 @@ export default function NewChatPage() {
 				<MobileHitlEditPanel />
 			</div>
 		</AssistantRuntimeProvider>
+		</TokenUsageProvider>
 	);
 }
diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx
index 25a579947..dff52c3f5 100644
--- a/surfsense_web/components/assistant-ui/assistant-message.tsx
+++ b/surfsense_web/components/assistant-ui/assistant-message.tsx
@@ -45,12 +45,14 @@ import {
 	DropdownMenuContent,
 	DropdownMenuItem,
 	DropdownMenuLabel,
+	DropdownMenuSeparator,
 	DropdownMenuTrigger,
 } from "@/components/ui/dropdown-menu";
 import { Button } from "@/components/ui/button";
 import { useComments } from "@/hooks/use-comments";
 import { useMediaQuery } from "@/hooks/use-media-query";
 import { useElectronAPI } from "@/hooks/use-platform";
+import { useTokenUsage } from "@/components/assistant-ui/token-usage-context";
 import { cn } from "@/lib/utils";
 
 // Captured once at module load — survives client-side navigations that strip the query param.
@@ -375,22 +377,24 @@ export const MessageError: FC = () => {
 	);
 };
 
-const TokenUsageDropdown: FC = () => {
-	const usage = useAuiState(({ message }) => {
-		const custom = message?.metadata?.custom as Record<string, unknown> | undefined;
-		return custom?.usage as Record<string, unknown> | undefined;
+function formatMessageDate(date: Date): string {
+	return date.toLocaleDateString(undefined, {
+		month: "short",
+		day: "numeric",
+		hour: "numeric",
+		minute: "2-digit",
+		hour12: true,
 	});
+}
 
-	if (!usage) return null;
-
-	const totalTokens = (usage.total_tokens as number) ?? 0;
-	if (totalTokens === 0) return null;
-
-	const modelBreakdown = (usage.usage ?? usage.model_breakdown) as
-		| Record<string, { prompt_tokens: number; completion_tokens: number; total_tokens: number }>
-		| undefined;
+const MessageInfoDropdown: FC = () => {
+	const messageId = useAuiState(({ message }) => message?.id);
+	const createdAt = useAuiState(({ message }) => message?.createdAt);
+	const usage = useTokenUsage(messageId);
 
+	const modelBreakdown = usage ? (usage.usage ?? usage.model_breakdown) : undefined;
 	const models = modelBreakdown ? Object.entries(modelBreakdown) : [];
+	const hasUsage = usage && usage.total_tokens > 0;
 
 	return (
 		<DropdownMenu>
@@ -401,24 +405,31 @@ const TokenUsageDropdown: FC = () => {
 				</Button>
 			</DropdownMenuTrigger>
 			<DropdownMenuContent align="start" className="min-w-[180px]">
-				<DropdownMenuLabel className="text-xs text-muted-foreground font-normal">
-					Token Usage
-				</DropdownMenuLabel>
-				{models.length > 0 ? (
-					models.map(([model, counts]) => (
-						<DropdownMenuItem key={model} className="flex-col items-start gap-0.5 cursor-default" onSelect={(e) => e.preventDefault()}>
-							<span className="text-xs font-medium">{model}</span>
-							<span className="text-xs text-muted-foreground">
-								{counts.total_tokens.toLocaleString()} tokens
-							</span>
-						</DropdownMenuItem>
-					))
-				) : (
-					<DropdownMenuItem className="flex-col items-start gap-0.5 cursor-default" onSelect={(e) => e.preventDefault()}>
-						<span className="text-xs text-muted-foreground">
-							{totalTokens.toLocaleString()} tokens
-						</span>
-					</DropdownMenuItem>
+				{createdAt && (
+					<DropdownMenuLabel className="text-xs text-muted-foreground font-normal select-none">
+						{formatMessageDate(createdAt)}
+					</DropdownMenuLabel>
+				)}
+				{hasUsage && (
+					<>
+						<DropdownMenuSeparator />
+						{models.length > 0 ? (
+							models.map(([model, counts]) => (
+								<DropdownMenuItem key={model} className="flex-col items-start gap-0.5 cursor-default" onSelect={(e) => e.preventDefault()}>
+									<span className="text-xs font-medium">{model}</span>
+									<span className="text-xs text-muted-foreground">
+										{counts.total_tokens.toLocaleString()} tokens
+									</span>
+								</DropdownMenuItem>
+							))
+						) : (
+							<DropdownMenuItem className="flex-col items-start gap-0.5 cursor-default" onSelect={(e) => e.preventDefault()}>
+								<span className="text-xs text-muted-foreground">
+									{usage.total_tokens.toLocaleString()} tokens
+								</span>
+							</DropdownMenuItem>
+						)}
+					</>
 				)}
 			</DropdownMenuContent>
 		</DropdownMenu>
@@ -683,7 +694,7 @@ const AssistantActionBar: FC = () => {
 					<ClipboardPaste />
 				</TooltipIconButton>
 			)}
-			<TokenUsageDropdown />
+			<MessageInfoDropdown />
 		</ActionBarPrimitive.Root>
 	);
 };
diff --git a/surfsense_web/components/assistant-ui/token-usage-context.tsx b/surfsense_web/components/assistant-ui/token-usage-context.tsx
new file mode 100644
index 000000000..8b82f33ff
--- /dev/null
+++ b/surfsense_web/components/assistant-ui/token-usage-context.tsx
@@ -0,0 +1,79 @@
+"use client";
+
+import { createContext, useContext, useCallback, useSyncExternalStore, type FC, type ReactNode } from "react";
+
+export interface TokenUsageData {
+	prompt_tokens: number;
+	completion_tokens: number;
+	total_tokens: number;
+	usage?: Record<string, { prompt_tokens: number; completion_tokens: number; total_tokens: number }>;
+	model_breakdown?: Record<string, { prompt_tokens: number; completion_tokens: number; total_tokens: number }>;
+}
+
+type Listener = () => void;
+
+class TokenUsageStore {
+	private data = new Map<string, TokenUsageData>();
+	private listeners = new Set<Listener>();
+
+	get(messageId: string): TokenUsageData | undefined {
+		return this.data.get(messageId);
+	}
+
+	set(messageId: string, usage: TokenUsageData): void {
+		this.data.set(messageId, usage);
+		this.notify();
+	}
+
+	rename(oldId: string, newId: string): void {
+		const usage = this.data.get(oldId);
+		if (usage) {
+			this.data.delete(oldId);
+			this.data.set(newId, usage);
+			this.notify();
+		}
+	}
+
+	clear(): void {
+		this.data.clear();
+		this.notify();
+	}
+
+	subscribe = (listener: Listener): (() => void) => {
+		this.listeners.add(listener);
+		return () => this.listeners.delete(listener);
+	};
+
+	private notify(): void {
+		for (const l of this.listeners) l();
+	}
+}
+
+const TokenUsageContext = createContext<TokenUsageStore | null>(null);
+
+export const TokenUsageProvider: FC<{ store: TokenUsageStore; children: ReactNode }> = ({ store, children }) => (
+	<TokenUsageContext.Provider value={store}>{children}</TokenUsageContext.Provider>
+);
+
+export function useTokenUsageStore(): TokenUsageStore {
+	const store = useContext(TokenUsageContext);
+	if (!store) throw new Error("useTokenUsageStore must be used within TokenUsageProvider");
+	return store;
+}
+
+export function useTokenUsage(messageId: string | undefined): TokenUsageData | undefined {
+	const store = useContext(TokenUsageContext);
+	const getSnapshot = useCallback(
+		() => (store && messageId ? store.get(messageId) : undefined),
+		[store, messageId],
+	);
+	const subscribe = useCallback(
+		(onStoreChange: () => void) => (store ? store.subscribe(onStoreChange) : () => {}),
+		[store],
+	);
+	return useSyncExternalStore(subscribe, getSnapshot, getSnapshot);
+}
+
+export function createTokenUsageStore(): TokenUsageStore {
+	return new TokenUsageStore();
+}

From f21bdc0668e94f70f6cb67659a3d0b59572e3317 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 15:29:02 +0530
Subject: [PATCH 06/14] feat: enhance LLM response handling and token usage
 tracking in chat services and UI components

---
 .../app/services/llm_router_service.py        | 29 ++++++++++++---
 .../app/services/token_tracking_service.py    |  4 +++
 .../app/tasks/chat/stream_new_chat.py         | 36 ++++++++++++-------
 .../assistant-ui/assistant-message.tsx        | 35 +++++++++---------
 4 files changed, 67 insertions(+), 37 deletions(-)

diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py
index d97665f7a..1bf9e2386 100644
--- a/surfsense_backend/app/services/llm_router_service.py
+++ b/surfsense_backend/app/services/llm_router_service.py
@@ -820,7 +820,9 @@ class ChatLiteLLMRouter(BaseChatModel):
         )
 
         # Convert response to ChatResult with potential tool calls
-        message = self._convert_response_to_message(response.choices[0].message)
+        message = self._convert_response_to_message(
+            response.choices[0].message, response=response
+        )
         generation = ChatGeneration(message=message)
 
         return ChatResult(generations=[generation])
@@ -886,7 +888,9 @@ class ChatLiteLLMRouter(BaseChatModel):
         )
 
         # Convert response to ChatResult with potential tool calls
-        message = self._convert_response_to_message(response.choices[0].message)
+        message = self._convert_response_to_message(
+            response.choices[0].message, response=response
+        )
         generation = ChatGeneration(message=message)
 
         return ChatResult(generations=[generation])
@@ -1076,7 +1080,9 @@ class ChatLiteLLMRouter(BaseChatModel):
 
         return result
 
-    def _convert_response_to_message(self, response_message: Any) -> AIMessage:
+    def _convert_response_to_message(
+        self, response_message: Any, response: Any = None
+    ) -> AIMessage:
         """Convert a LiteLLM response message to a LangChain AIMessage."""
         import json
 
@@ -1099,9 +1105,22 @@ class ChatLiteLLMRouter(BaseChatModel):
                         tool_call["args"] = tc.function.arguments
                 tool_calls.append(tool_call)
 
+        extra_kwargs: dict[str, Any] = {}
+        if response:
+            usage = getattr(response, "usage", None)
+            if usage:
+                extra_kwargs["usage_metadata"] = {
+                    "input_tokens": getattr(usage, "prompt_tokens", 0) or 0,
+                    "output_tokens": getattr(usage, "completion_tokens", 0) or 0,
+                    "total_tokens": getattr(usage, "total_tokens", 0) or 0,
+                }
+            extra_kwargs["response_metadata"] = {
+                "model_name": getattr(response, "model", "unknown"),
+            }
+
         if tool_calls:
-            return AIMessage(content=content, tool_calls=tool_calls)
-        return AIMessage(content=content)
+            return AIMessage(content=content, tool_calls=tool_calls, **extra_kwargs)
+        return AIMessage(content=content, **extra_kwargs)
 
     def _convert_delta_to_chunk(self, delta: Any) -> AIMessageChunk | None:
         """Convert a streaming delta to an AIMessageChunk."""
diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py
index 98cb13bb8..6a5b3793f 100644
--- a/surfsense_backend/app/services/token_tracking_service.py
+++ b/surfsense_backend/app/services/token_tracking_service.py
@@ -4,6 +4,10 @@ Token usage tracking via LiteLLM custom callback.
 Uses a ContextVar-scoped accumulator to group all LLM calls within a single
 async request/turn. The accumulated data is emitted via SSE and persisted
 when the frontend calls appendMessage.
+
+Agent LLM calls are captured automatically via the async callback.
+Title-generation usage is added explicitly from the LangChain response
+metadata to avoid callback-timing issues.
 """
 
 from __future__ import annotations
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index 2002e1585..364a14bad 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -1459,22 +1459,35 @@ async def stream_new_chat(
         )
         is_first_response = (assistant_count_result.scalar() or 0) == 0
 
-        title_task: asyncio.Task[str | None] | None = None
+        title_task: asyncio.Task[tuple[str | None, dict[str, int] | None]] | None = None
         if is_first_response:
 
-            async def _generate_title() -> str | None:
+            async def _generate_title() -> tuple[str | None, dict[str, int] | None]:
+                """Return (title, usage_dict) where usage_dict has model/prompt/completion/total."""
                 try:
                     title_chain = TITLE_GENERATION_PROMPT_TEMPLATE | llm
                     title_result = await title_chain.ainvoke(
                         {"user_query": user_query[:500]}
                     )
-                    if title_result and hasattr(title_result, "content"):
-                        raw_title = title_result.content.strip()
-                        if raw_title and len(raw_title) <= 100:
-                            return raw_title.strip("\"'")
+                    usage_dict: dict[str, int] | None = None
+                    if title_result:
+                        um = getattr(title_result, "usage_metadata", None)
+                        if um:
+                            rm = getattr(title_result, "response_metadata", None) or {}
+                            raw_model = rm.get("model_name", "unknown")
+                            usage_dict = {
+                                "model": raw_model.split("/", 1)[-1] if "/" in raw_model else raw_model,
+                                "prompt_tokens": um.get("input_tokens", 0),
+                                "completion_tokens": um.get("output_tokens", 0),
+                                "total_tokens": um.get("total_tokens", 0),
+                            }
+                        if hasattr(title_result, "content"):
+                            raw_title = title_result.content.strip()
+                            if raw_title and len(raw_title) <= 100:
+                                return raw_title.strip("\"'"), usage_dict
+                    return None, usage_dict
                 except Exception:
-                    pass
-                return None
+                    return None, None
 
             title_task = asyncio.create_task(_generate_title())
 
@@ -1506,7 +1519,7 @@ async def stream_new_chat(
 
             # Inject title update mid-stream as soon as the background task finishes
             if title_task is not None and title_task.done() and not title_emitted:
-                generated_title = title_task.result()
+                generated_title, _title_usage = title_task.result()
                 if generated_title:
                     async with shielded_async_session() as title_session:
                         title_thread_result = await title_session.execute(
@@ -1532,7 +1545,6 @@ async def stream_new_chat(
             if title_task is not None and not title_task.done():
                 title_task.cancel()
 
-            await asyncio.sleep(0.2)
             usage_summary = accumulator.per_message_summary()
             _perf_log.info(
                 "[token_usage] interrupted new_chat: calls=%d total=%d summary=%s",
@@ -1554,7 +1566,7 @@ async def stream_new_chat(
 
         # If the title task didn't finish during streaming, await it now
         if title_task is not None and not title_emitted:
-            generated_title = await title_task
+            generated_title, _title_usage = await title_task
             if generated_title:
                 async with shielded_async_session() as title_session:
                     title_thread_result = await title_session.execute(
@@ -1568,7 +1580,6 @@ async def stream_new_chat(
                     chat_id, generated_title
                 )
 
-        await asyncio.sleep(0.2)
         usage_summary = accumulator.per_message_summary()
         _perf_log.info(
             "[token_usage] normal new_chat: calls=%d total=%d summary=%s",
@@ -1807,7 +1818,6 @@ async def stream_resume_chat(
             time.perf_counter() - _t_stream_start,
             chat_id,
         )
-        await asyncio.sleep(0.2)
         if stream_result.is_interrupted:
             usage_summary = accumulator.per_message_summary()
             _perf_log.info(
diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx
index dff52c3f5..7a357dc85 100644
--- a/surfsense_web/components/assistant-ui/assistant-message.tsx
+++ b/surfsense_web/components/assistant-ui/assistant-message.tsx
@@ -1,4 +1,5 @@
 import {
+	ActionBarMorePrimitive,
 	ActionBarPrimitive,
 	AuiIf,
 	ErrorPrimitive,
@@ -40,14 +41,7 @@ import {
 	DrawerHeader,
 	DrawerTitle,
 } from "@/components/ui/drawer";
-import {
-	DropdownMenu,
-	DropdownMenuContent,
-	DropdownMenuItem,
-	DropdownMenuLabel,
-	DropdownMenuSeparator,
-	DropdownMenuTrigger,
-} from "@/components/ui/dropdown-menu";
+import { DropdownMenuLabel } from "@/components/ui/dropdown-menu";
 import { Button } from "@/components/ui/button";
 import { useComments } from "@/hooks/use-comments";
 import { useMediaQuery } from "@/hooks/use-media-query";
@@ -397,14 +391,17 @@ const MessageInfoDropdown: FC = () => {
 	const hasUsage = usage && usage.total_tokens > 0;
 
 	return (
-		<DropdownMenu>
-			<DropdownMenuTrigger asChild>
+		<ActionBarMorePrimitive.Root>
+			<ActionBarMorePrimitive.Trigger asChild>
 				<Button variant="ghost" size="icon" className="aui-button-icon size-6 p-1">
 					<MoreHorizontalIcon className="size-4" />
 					<span className="sr-only">More</span>
 				</Button>
-			</DropdownMenuTrigger>
-			<DropdownMenuContent align="start" className="min-w-[180px]">
+			</ActionBarMorePrimitive.Trigger>
+			<ActionBarMorePrimitive.Content
+				align="start"
+				className="bg-muted text-popover-foreground z-50 max-h-(--radix-dropdown-menu-content-available-height) min-w-[180px] origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border dark:border-neutral-700 p-1 shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2"
+			>
 				{createdAt && (
 					<DropdownMenuLabel className="text-xs text-muted-foreground font-normal select-none">
 						{formatMessageDate(createdAt)}
@@ -412,27 +409,27 @@ const MessageInfoDropdown: FC = () => {
 				)}
 				{hasUsage && (
 					<>
-						<DropdownMenuSeparator />
+						<ActionBarMorePrimitive.Separator className="bg-border mx-2 my-1 h-px" />
 						{models.length > 0 ? (
 							models.map(([model, counts]) => (
-								<DropdownMenuItem key={model} className="flex-col items-start gap-0.5 cursor-default" onSelect={(e) => e.preventDefault()}>
+								<ActionBarMorePrimitive.Item key={model} className="focus:bg-neutral-200 dark:focus:bg-neutral-700 relative flex cursor-default flex-col items-start gap-0.5 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none" onSelect={(e) => e.preventDefault()}>
 									<span className="text-xs font-medium">{model}</span>
 									<span className="text-xs text-muted-foreground">
 										{counts.total_tokens.toLocaleString()} tokens
 									</span>
-								</DropdownMenuItem>
+								</ActionBarMorePrimitive.Item>
 							))
 						) : (
-							<DropdownMenuItem className="flex-col items-start gap-0.5 cursor-default" onSelect={(e) => e.preventDefault()}>
+							<ActionBarMorePrimitive.Item className="focus:bg-neutral-200 dark:focus:bg-neutral-700 relative flex cursor-default flex-col items-start gap-0.5 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none" onSelect={(e) => e.preventDefault()}>
 								<span className="text-xs text-muted-foreground">
 									{usage.total_tokens.toLocaleString()} tokens
 								</span>
-							</DropdownMenuItem>
+							</ActionBarMorePrimitive.Item>
 						)}
 					</>
 				)}
-			</DropdownMenuContent>
-		</DropdownMenu>
+			</ActionBarMorePrimitive.Content>
+		</ActionBarMorePrimitive.Root>
 	);
 };
 

From 833799457bdba15927915ca874de5d688efb10f7 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 15:35:04 +0530
Subject: [PATCH 07/14] feat: integrate new LLM configuration handling and
 enhance model display in message info dropdown

---
 .../assistant-ui/assistant-message.tsx        | 47 +++++++++++++++----
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx
index 7a357dc85..4f3dd7c00 100644
--- a/surfsense_web/components/assistant-ui/assistant-message.tsx
+++ b/surfsense_web/components/assistant-ui/assistant-message.tsx
@@ -23,6 +23,10 @@ import dynamic from "next/dynamic";
 import type { FC } from "react";
 import { useEffect, useMemo, useRef, useState } from "react";
 import { commentsEnabledAtom, targetCommentIdAtom } from "@/atoms/chat/current-thread.atom";
+import {
+	globalNewLLMConfigsAtom,
+	newLLMConfigsAtom,
+} from "@/atoms/new-llm-config/new-llm-config-query.atoms";
 import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
 import {
 	CitationMetadataProvider,
@@ -47,6 +51,7 @@ import { useComments } from "@/hooks/use-comments";
 import { useMediaQuery } from "@/hooks/use-media-query";
 import { useElectronAPI } from "@/hooks/use-platform";
 import { useTokenUsage } from "@/components/assistant-ui/token-usage-context";
+import { getProviderIcon } from "@/lib/provider-icons";
 import { cn } from "@/lib/utils";
 
 // Captured once at module load — survives client-side navigations that strip the query param.
@@ -386,6 +391,26 @@ const MessageInfoDropdown: FC = () => {
 	const createdAt = useAuiState(({ message }) => message?.createdAt);
 	const usage = useTokenUsage(messageId);
 
+	const { data: localConfigs } = useAtomValue(newLLMConfigsAtom);
+	const { data: globalConfigs } = useAtomValue(globalNewLLMConfigsAtom);
+
+	const configByModel = useMemo(() => {
+		const map = new Map<string, { name: string; provider: string }>();
+		for (const c of [...(globalConfigs ?? []), ...(localConfigs ?? [])]) {
+			map.set(c.model_name, { name: c.name, provider: c.provider });
+		}
+		return map;
+	}, [localConfigs, globalConfigs]);
+
+	const resolveModel = (modelKey: string) => {
+		const parts = modelKey.split("/");
+		const bare = parts[parts.length - 1] ?? modelKey;
+		const config = configByModel.get(modelKey) ?? configByModel.get(bare);
+		return config
+			? { name: config.name, icon: getProviderIcon(config.provider, { className: "size-3.5" }) }
+			: { name: modelKey, icon: null };
+	};
+
 	const modelBreakdown = usage ? (usage.usage ?? usage.model_breakdown) : undefined;
 	const models = modelBreakdown ? Object.entries(modelBreakdown) : [];
 	const hasUsage = usage && usage.total_tokens > 0;
@@ -411,14 +436,20 @@ const MessageInfoDropdown: FC = () => {
 					<>
 						<ActionBarMorePrimitive.Separator className="bg-border mx-2 my-1 h-px" />
 						{models.length > 0 ? (
-							models.map(([model, counts]) => (
-								<ActionBarMorePrimitive.Item key={model} className="focus:bg-neutral-200 dark:focus:bg-neutral-700 relative flex cursor-default flex-col items-start gap-0.5 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none" onSelect={(e) => e.preventDefault()}>
-									<span className="text-xs font-medium">{model}</span>
-									<span className="text-xs text-muted-foreground">
-										{counts.total_tokens.toLocaleString()} tokens
-									</span>
-								</ActionBarMorePrimitive.Item>
-							))
+							models.map(([model, counts]) => {
+								const { name, icon } = resolveModel(model);
+								return (
+									<ActionBarMorePrimitive.Item key={model} className="focus:bg-neutral-200 dark:focus:bg-neutral-700 relative flex cursor-default flex-col items-start gap-0.5 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none" onSelect={(e) => e.preventDefault()}>
+										<span className="flex items-center gap-1.5 text-xs font-medium">
+											{icon}
+											{name}
+										</span>
+										<span className="text-xs text-muted-foreground">
+											{counts.total_tokens.toLocaleString()} tokens
+										</span>
+									</ActionBarMorePrimitive.Item>
+								);
+							})
 						) : (
 							<ActionBarMorePrimitive.Item className="focus:bg-neutral-200 dark:focus:bg-neutral-700 relative flex cursor-default flex-col items-start gap-0.5 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none" onSelect={(e) => e.preventDefault()}>
 								<span className="text-xs text-muted-foreground">

From 1f9840d4a45a42230d84bcf5d64a9aa8f5363c9d Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 15:49:33 +0530
Subject: [PATCH 08/14] feat: update alembic migration number

---
 ...ken_usage_table.py => 125_add_token_usage_table.py} | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
 rename surfsense_backend/alembic/versions/{124_add_token_usage_table.py => 125_add_token_usage_table.py} (95%)

diff --git a/surfsense_backend/alembic/versions/124_add_token_usage_table.py b/surfsense_backend/alembic/versions/125_add_token_usage_table.py
similarity index 95%
rename from surfsense_backend/alembic/versions/124_add_token_usage_table.py
rename to surfsense_backend/alembic/versions/125_add_token_usage_table.py
index 2c88e1f51..c08280487 100644
--- a/surfsense_backend/alembic/versions/124_add_token_usage_table.py
+++ b/surfsense_backend/alembic/versions/125_add_token_usage_table.py
@@ -1,7 +1,7 @@
-"""124_add_token_usage_table
+"""125_add_token_usage_table
 
-Revision ID: 124
-Revises: 123
+Revision ID: 125
+Revises: 124
 Create Date: 2026-04-14
 
 Adds token_usage table for tracking LLM token consumption per message.
@@ -18,8 +18,8 @@ from sqlalchemy.dialects.postgresql import JSONB, UUID
 from alembic import op
 
 # revision identifiers, used by Alembic.
-revision: str = "124"
-down_revision: str | None = "123"
+revision: str = "125"
+down_revision: str | None = "124"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 

From 292fcb1a2c3b06e28ec7a66e335b9ca9aa13b02a Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 20:35:16 +0530
Subject: [PATCH 09/14] feat: enhance model selector UI abd added github models
 icon

---
 .../components/icons/providers/github.svg     |   1 +
 .../components/icons/providers/index.ts       |   1 +
 .../components/new-chat/model-selector.tsx    | 163 ++++++++++++------
 surfsense_web/lib/provider-icons.tsx          |   3 +
 4 files changed, 118 insertions(+), 50 deletions(-)
 create mode 100644 surfsense_web/components/icons/providers/github.svg

diff --git a/surfsense_web/components/icons/providers/github.svg b/surfsense_web/components/icons/providers/github.svg
new file mode 100644
index 000000000..7a51b8e0e
--- /dev/null
+++ b/surfsense_web/components/icons/providers/github.svg
@@ -0,0 +1 @@
+<svg fill="currentColor" fill-rule="evenodd" height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Github</title><path d="M12 0c6.63 0 12 5.276 12 11.79-.001 5.067-3.29 9.567-8.175 11.187-.6.118-.825-.25-.825-.56 0-.398.015-1.665.015-3.242 0-1.105-.375-1.813-.81-2.181 2.67-.295 5.475-1.297 5.475-5.822 0-1.297-.465-2.344-1.23-3.169.12-.295.54-1.503-.12-3.125 0 0-1.005-.324-3.3 1.209a11.32 11.32 0 00-3-.398c-1.02 0-2.04.133-3 .398-2.295-1.518-3.3-1.209-3.3-1.209-.66 1.622-.24 2.83-.12 3.125-.765.825-1.23 1.887-1.23 3.169 0 4.51 2.79 5.527 5.46 5.822-.345.294-.66.81-.765 1.577-.69.31-2.415.81-3.495-.973-.225-.354-.9-1.223-1.845-1.209-1.005.015-.405.56.015.781.51.28 1.095 1.327 1.23 1.666.24.663 1.02 1.93 4.035 1.385 0 .988.015 1.916.015 2.196 0 .31-.225.664-.825.56C3.303 21.374-.003 16.867 0 11.791 0 5.276 5.37 0 12 0z"></path></svg>
\ No newline at end of file
diff --git a/surfsense_web/components/icons/providers/index.ts b/surfsense_web/components/icons/providers/index.ts
index 2afed7fa5..aefa2a053 100644
--- a/surfsense_web/components/icons/providers/index.ts
+++ b/surfsense_web/components/icons/providers/index.ts
@@ -10,6 +10,7 @@ export { default as DeepInfraIcon } from "./deepinfra.svg";
 export { default as DeepSeekIcon } from "./deepseek.svg";
 export { default as FireworksAiIcon } from "./fireworksai.svg";
 export { default as GeminiIcon } from "./gemini.svg";
+export { default as GitHubModelsIcon } from "./github.svg";
 export { default as GroqIcon } from "./groq.svg";
 export { default as HuggingFaceIcon } from "./huggingface.svg";
 export { default as MiniMaxIcon } from "./minimax.svg";
diff --git a/surfsense_web/components/new-chat/model-selector.tsx b/surfsense_web/components/new-chat/model-selector.tsx
index 26937e18b..0b8708269 100644
--- a/surfsense_web/components/new-chat/model-selector.tsx
+++ b/surfsense_web/components/new-chat/model-selector.tsx
@@ -6,9 +6,12 @@ import {
 	Bot,
 	Check,
 	ChevronDown,
+	ChevronLeft,
+	ChevronRight,
+	ChevronUp,
 	Edit3,
-	Eye,
 	ImageIcon,
+	ScanEye,
 	Layers,
 	Plus,
 	Search,
@@ -69,6 +72,7 @@ const PROVIDER_NAMES: Record<string, string> = {
 	DEEPSEEK: "DeepSeek",
 	MISTRAL: "Mistral",
 	COHERE: "Cohere",
+	GITHUB_MODELS: "GitHub Models",
 	GROQ: "Groq",
 	OLLAMA: "Ollama",
 	TOGETHER_AI: "Together AI",
@@ -274,17 +278,40 @@ export function ModelSelector({
 	const [searchQuery, setSearchQuery] = useState("");
 	const [selectedProvider, setSelectedProvider] = useState<string>("all");
 	const [focusedIndex, setFocusedIndex] = useState(-1);
-	const [showScrollIndicator, setShowScrollIndicator] = useState(true);
+	const [modelScrollPos, setModelScrollPos] = useState<"top" | "middle" | "bottom">("top");
+	const [sidebarScrollPos, setSidebarScrollPos] = useState<"top" | "middle" | "bottom">("top");
 	const providerSidebarRef = useRef<HTMLDivElement>(null);
 	const modelListRef = useRef<HTMLDivElement>(null);
 	const searchInputRef = useRef<HTMLInputElement>(null);
 	const isMobile = useIsMobile();
 
+	const handleModelListScroll = useCallback((e: React.UIEvent<HTMLDivElement>) => {
+		const el = e.currentTarget;
+		const atTop = el.scrollTop <= 2;
+		const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight <= 2;
+		setModelScrollPos(atTop ? "top" : atBottom ? "bottom" : "middle");
+	}, []);
+
+	const handleSidebarScroll = useCallback((e: React.UIEvent<HTMLDivElement>) => {
+		const el = e.currentTarget;
+		if (isMobile) {
+			const atStart = el.scrollLeft <= 2;
+			const atEnd = el.scrollWidth - el.scrollLeft - el.clientWidth <= 2;
+			setSidebarScrollPos(atStart ? "top" : atEnd ? "bottom" : "middle");
+		} else {
+			const atTop = el.scrollTop <= 2;
+			const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight <= 2;
+			setSidebarScrollPos(atTop ? "top" : atBottom ? "bottom" : "middle");
+		}
+	}, [isMobile]);
+
 	// Reset search + provider when tab changes
+	// biome-ignore lint/correctness/useExhaustiveDependencies: activeTab is intentionally used as a trigger
 	useEffect(() => {
 		setSelectedProvider("all");
 		setSearchQuery("");
 		setFocusedIndex(-1);
+		setModelScrollPos("top");
 	}, [activeTab]);
 
 	// Reset on open
@@ -295,8 +322,9 @@ export function ModelSelector({
 		}
 	}, [open]);
 
-	// Cmd/Ctrl+M shortcut
+	// Cmd/Ctrl+M shortcut (desktop only)
 	useEffect(() => {
+		if (isMobile) return;
 		const handler = (e: KeyboardEvent) => {
 			if ((e.metaKey || e.ctrlKey) && e.key === "m") {
 				e.preventDefault();
@@ -305,9 +333,10 @@ export function ModelSelector({
 		};
 		document.addEventListener("keydown", handler);
 		return () => document.removeEventListener("keydown", handler);
-	}, []);
+	}, [isMobile]);
 
 	// Focus search input on open
+	// biome-ignore lint/correctness/useExhaustiveDependencies: activeTab is intentionally used as a trigger to re-focus on tab switch
 	useEffect(() => {
 		if (open && !isMobile) {
 			requestAnimationFrame(() => searchInputRef.current?.focus());
@@ -677,6 +706,7 @@ export function ModelSelector({
 	);
 
 	// ─── Keyboard navigation ───
+	// biome-ignore lint/correctness/useExhaustiveDependencies: searchQuery and selectedProvider are intentional triggers to reset focus
 	useEffect(() => {
 		setFocusedIndex(-1);
 	}, [searchQuery, selectedProvider]);
@@ -767,24 +797,35 @@ export function ModelSelector({
 		return (
 			<div
 				className={cn(
-					"shrink-0 border-border/50 relative flex flex-col",
-					!isMobile && "w-10 border-r",
+					"shrink-0 border-border/50 flex",
+					isMobile ? "flex-row items-center border-b border-border/40" : "flex-col w-10 border-r",
 				)}
 			>
+				{!isMobile && sidebarScrollPos !== "top" && (
+					<div className="flex items-center justify-center py-0.5 pointer-events-none">
+						<ChevronUp className="size-3 text-muted-foreground" />
+					</div>
+				)}
+				{isMobile && sidebarScrollPos !== "top" && (
+					<div className="flex items-center justify-center px-0.5 shrink-0 pointer-events-none">
+						<ChevronLeft className="size-3 text-muted-foreground" />
+					</div>
+				)}
 				<div
 					ref={providerSidebarRef}
-					onScroll={(e) => {
-						const t = e.currentTarget;
-						setShowScrollIndicator(
-							t.scrollHeight - t.scrollTop >
-								t.clientHeight + 10,
-						);
-					}}
+					onScroll={handleSidebarScroll}
 					className={cn(
 						isMobile
-							? "flex flex-row gap-0.5 px-2 py-1.5 overflow-x-auto border-b border-border/40"
+							? "flex flex-row gap-0.5 px-1 py-1.5 overflow-x-auto [&::-webkit-scrollbar]:h-0 [&::-webkit-scrollbar-track]:bg-transparent"
 							: "flex flex-col gap-0.5 p-1 overflow-y-auto flex-1 [&::-webkit-scrollbar]:w-0 [&::-webkit-scrollbar-track]:bg-transparent",
 					)}
+					style={isMobile ? {
+						maskImage: `linear-gradient(to right, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 24px, black calc(100% - 24px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`,
+						WebkitMaskImage: `linear-gradient(to right, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 24px, black calc(100% - 24px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`,
+					} : {
+						maskImage: `linear-gradient(to bottom, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 32px, black calc(100% - 32px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`,
+						WebkitMaskImage: `linear-gradient(to bottom, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 32px, black calc(100% - 32px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`,
+					}}
 				>
 					{activeProviders.map((provider, idx) => {
 						const isAll = provider === "all";
@@ -849,18 +890,23 @@ export function ModelSelector({
 												)}
 										{isConfigured
 											? ` (${count})`
-											: " — not configured"}
+											: " (not configured)"}
 									</TooltipContent>
 								</Tooltip>
 							</Fragment>
 						);
 					})}
 				</div>
-				{!isMobile && showScrollIndicator && (
-					<div className="absolute bottom-0 left-0 right-0 h-6 bg-gradient-to-t from-background to-transparent pointer-events-none flex items-end justify-center pb-0.5">
+				{!isMobile && sidebarScrollPos !== "bottom" && (
+					<div className="flex items-center justify-center py-0.5 pointer-events-none">
 						<ChevronDown className="size-3 text-muted-foreground" />
 					</div>
 				)}
+				{isMobile && sidebarScrollPos !== "bottom" && (
+					<div className="flex items-center justify-center px-0.5 shrink-0 pointer-events-none">
+						<ChevronRight className="size-3 text-muted-foreground" />
+					</div>
+				)}
 			</div>
 		);
 	};
@@ -889,19 +935,26 @@ export function ModelSelector({
 				key={`${activeTab}-${item.isGlobal ? "g" : "u"}-${config.id}`}
 				data-model-index={index}
 				role="option"
+				tabIndex={isMobile ? -1 : 0}
 				aria-selected={isSelected}
 				onClick={() => handleSelectItem(item)}
+				onKeyDown={isMobile ? undefined : (e) => {
+					if (e.key === "Enter" || e.key === " ") {
+						e.preventDefault();
+						handleSelectItem(item);
+					}
+				}}
 				onMouseEnter={() => setFocusedIndex(index)}
 				className={cn(
-					"group flex items-start gap-2.5 px-2.5 py-2 rounded-lg cursor-pointer",
-					"transition-all duration-150 mx-1",
-					"hover:bg-accent/40 active:scale-[0.99]",
+					"group flex items-center gap-2.5 px-3 py-2 rounded-xl cursor-pointer",
+					"transition-all duration-150 mx-2",
+					"hover:bg-accent/40",
 					isSelected && "bg-primary/6 dark:bg-primary/8",
-					isFocused && "bg-accent/50 ring-1 ring-primary/20",
+					isFocused && "bg-accent/50",
 				)}
 			>
 				{/* Provider icon */}
-				<div className="shrink-0 mt-0.5">
+				<div className="shrink-0">
 					{getProviderIcon(config.provider as string, {
 						isAutoMode,
 						className: "size-5",
@@ -931,8 +984,8 @@ export function ModelSelector({
 						</span>
 						{!isAutoMode && hasCitations && (
 							<Badge
-								variant="outline"
-								className="text-[9px] px-1 py-0 h-3.5 bg-primary/10 text-primary border-primary/20"
+								variant="secondary"
+								className="text-[10px] px-1.5 py-0.5 border-0 text-muted-foreground bg-muted"
 							>
 								Citations
 							</Badge>
@@ -981,7 +1034,7 @@ export function ModelSelector({
 					: "Add Vision Model";
 
 		return (
-			<div className="flex flex-col w-full">
+			<div className="flex flex-col w-full overflow-hidden">
 				{/* Tab header */}
 				<div className="border-b border-border/80 dark:border-neutral-800">
 					<div className="w-full grid grid-cols-3 h-11">
@@ -999,7 +1052,7 @@ export function ModelSelector({
 								},
 								{
 									value: "vision" as const,
-									icon: Eye,
+									icon: ScanEye,
 									label: "Vision",
 								},
 							] as const
@@ -1028,7 +1081,7 @@ export function ModelSelector({
 						"flex",
 						isMobile
 							? "flex-col h-[60vh]"
-							: "flex-row h-[420px]",
+							: "flex-row h-[380px]",
 					)}
 				>
 					{/* Provider sidebar */}
@@ -1037,33 +1090,30 @@ export function ModelSelector({
 					{/* Main content */}
 					<div className="flex flex-col min-w-0 min-h-0 flex-1 overflow-hidden">
 						{/* Search */}
-						<div className="relative px-3 py-2">
-							<Search className="absolute left-5 top-1/2 -translate-y-1/2 size-3.5 text-muted-foreground/50 pointer-events-none" />
+						<div className="relative">
+							<Search className="absolute left-3 top-1/2 -translate-y-1/2 size-3.5 text-muted-foreground/100 pointer-events-none" />
 							<input
 								ref={searchInputRef}
-								placeholder="Search models..."
+								placeholder="Search models"
 								value={searchQuery}
 								onChange={(e) =>
 									setSearchQuery(e.target.value)
 								}
-								onKeyDown={handleKeyDown}
-								autoFocus={!isMobile}
+								onKeyDown={isMobile ? undefined : handleKeyDown}
 								role="combobox"
 								aria-expanded={true}
 								aria-controls="model-selector-list"
 								className={cn(
-									"w-full pl-8 pr-3 py-1.5 text-xs rounded-lg",
-									"bg-secondary/30 border border-border/40",
-									"focus:outline-none focus:ring-2 focus:ring-primary/20 focus:border-primary/40",
-									"placeholder:text-muted-foreground/50",
-									"transition-[box-shadow,border-color] duration-200",
+									"w-full pl-8 pr-3 py-2.5 text-sm bg-transparent",
+									"focus:outline-none",
+									"placeholder:text-muted-foreground",
 								)}
 							/>
 						</div>
 
 						{/* Provider header when filtered */}
 						{selectedProvider !== "all" && (
-							<div className="flex items-center gap-2 px-3 py-1.5 border-b border-border/40">
+							<div className="flex items-center gap-2 px-3 py-1.5">
 								{getProviderIcon(selectedProvider, {
 									className: "size-4",
 								})}
@@ -1085,10 +1135,15 @@ export function ModelSelector({
 							id="model-selector-list"
 							ref={modelListRef}
 							role="listbox"
-							className="overflow-y-auto flex-1 py-1"
+							className="overflow-y-auto flex-1 py-1 space-y-1 flex flex-col"
+							onScroll={handleModelListScroll}
+							style={{
+								maskImage: `linear-gradient(to bottom, ${modelScrollPos === "top" ? "black" : "transparent"}, black 16px, black calc(100% - 16px), ${modelScrollPos === "bottom" ? "black" : "transparent"})`,
+								WebkitMaskImage: `linear-gradient(to bottom, ${modelScrollPos === "top" ? "black" : "transparent"}, black 16px, black calc(100% - 16px), ${modelScrollPos === "bottom" ? "black" : "transparent"})`,
+							}}
 						>
 							{currentDisplayItems.length === 0 ? (
-								<div className="py-8 flex flex-col items-center gap-3 px-4">
+								<div className="flex-1 flex flex-col items-center justify-center gap-3 px-4">
 									{selectedProvider !== "all" &&
 									!configuredProviderSet.has(
 										selectedProvider,
@@ -1116,22 +1171,21 @@ export function ModelSelector({
 											</p>
 											{addHandler && (
 												<Button
-													variant="outline"
+													variant="secondary"
 													size="sm"
-													className="mt-1 gap-2"
+													className="mt-1"
 													onClick={() => {
 														setOpen(false);
 														addHandler(selectedProvider !== "all" ? selectedProvider : undefined);
 													}}
 												>
-													<Plus className="size-3.5" />
 													{addLabel}
 												</Button>
 											)}
 										</>
-									) : (
+									) : searchQuery ? (
 										<>
-											<Search className="size-8 text-muted-foreground/40" />
+											<Search className="size-8 text-muted-foreground" />
 											<p className="text-sm text-muted-foreground">
 												No models found
 											</p>
@@ -1140,13 +1194,22 @@ export function ModelSelector({
 												term
 											</p>
 										</>
+									) : (
+										<>
+											<p className="text-sm font-medium text-muted-foreground">
+												No models configured
+											</p>
+											<p className="text-xs text-muted-foreground/60 text-center">
+												Configure models in your search space settings
+											</p>
+										</>
 									)}
 								</div>
 							) : (
 								<>
 									{globalItems.length > 0 && (
 										<>
-											<div className="flex items-center gap-2 px-3 py-1.5 text-[10px] font-semibold text-muted-foreground/70 uppercase tracking-wider">
+											<div className="flex items-center gap-2 px-3 py-1.5 text-[12px] font-semibold text-muted-foreground tracking-wider">
 												Global Models
 											</div>
 											{globalItems.map((item, i) =>
@@ -1163,7 +1226,7 @@ export function ModelSelector({
 										)}
 									{userItems.length > 0 && (
 										<>
-											<div className="flex items-center gap-2 px-3 py-1.5 text-[10px] font-semibold text-muted-foreground/70 uppercase tracking-wider">
+											<div className="flex items-center gap-2 px-3 py-1.5 text-[12px] font-semibold text-muted-foreground tracking-wider">
 												Your Configurations
 											</div>
 											{userItems.map((item, i) =>
@@ -1180,7 +1243,7 @@ export function ModelSelector({
 
 						{/* Add model button */}
 						{addHandler && (
-							<div className="p-2 border-t border-border/40 bg-muted/20 dark:bg-neutral-900">
+							<div className="p-2">
 								<Button
 									variant="ghost"
 									size="sm"
@@ -1271,7 +1334,7 @@ export function ModelSelector({
 							</span>
 						</>
 					) : (
-						<Eye className="size-4 text-muted-foreground" />
+						<ScanEye className="size-4 text-muted-foreground" />
 					)}
 				</>
 			)}
@@ -1301,7 +1364,7 @@ export function ModelSelector({
 		<Popover open={open} onOpenChange={setOpen}>
 			<PopoverTrigger asChild>{triggerButton}</PopoverTrigger>
 			<PopoverContent
-				className="w-[340px] md:w-[440px] p-0 rounded-lg shadow-lg bg-white border-border/60 dark:bg-neutral-900 dark:border dark:border-white/5 select-none"
+				className="w-[300px] md:w-[380px] p-0 rounded-lg shadow-lg overflow-hidden bg-white border-border/60 dark:bg-neutral-900 dark:border dark:border-white/5 select-none"
 				align="start"
 				sideOffset={8}
 				onCloseAutoFocus={(e) => e.preventDefault()}
diff --git a/surfsense_web/lib/provider-icons.tsx b/surfsense_web/lib/provider-icons.tsx
index d017d9aa2..e63c5eb2f 100644
--- a/surfsense_web/lib/provider-icons.tsx
+++ b/surfsense_web/lib/provider-icons.tsx
@@ -13,6 +13,7 @@ import {
 	DeepSeekIcon,
 	FireworksAiIcon,
 	GeminiIcon,
+	GitHubModelsIcon,
 	GroqIcon,
 	HuggingFaceIcon,
 	MiniMaxIcon,
@@ -82,6 +83,8 @@ export function getProviderIcon(
 			return <FireworksAiIcon className={cn(className)} />;
 		case "GOOGLE":
 			return <GeminiIcon className={cn(className)} />;
+		case "GITHUB_MODELS":
+			return <GitHubModelsIcon className={cn(className)} />;
 		case "GROQ":
 			return <GroqIcon className={cn(className)} />;
 		case "HUGGINGFACE":

From f01ddf3f0a153cdc5685e1739366a58737193add Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 20:56:07 +0530
Subject: [PATCH 10/14] feat: implement token usage recording in chat routes
 and enhance title generation handling

---
 .../app/routes/new_chat_routes.py             | 12 +--
 .../app/services/token_tracking_service.py    | 63 +++++++++++++-
 .../app/tasks/chat/stream_new_chat.py         | 82 +++++++++++++------
 3 files changed, 121 insertions(+), 36 deletions(-)

diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py
index fe79c7c06..55302b873 100644
--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@@ -30,7 +30,6 @@ from app.db import (
     NewChatThread,
     Permission,
     SearchSpace,
-    TokenUsage,
     User,
     get_async_session,
     shielded_async_session,
@@ -53,6 +52,7 @@ from app.schemas.new_chat import (
     ThreadListResponse,
     TokenUsageSummary,
 )
+from app.services.token_tracking_service import record_token_usage
 from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
 from app.users import current_active_user
 from app.utils.rbac import check_permission
@@ -949,19 +949,19 @@ async def append_message(
         # Persist token usage if provided (for assistant messages)
         token_usage_data = raw_body.get("token_usage")
         if token_usage_data and message_role == NewChatMessageRole.ASSISTANT:
-            token_usage_record = TokenUsage(
+            await record_token_usage(
+                session,
+                usage_type="chat",
+                search_space_id=thread.search_space_id,
+                user_id=user.id,
                 prompt_tokens=token_usage_data.get("prompt_tokens", 0),
                 completion_tokens=token_usage_data.get("completion_tokens", 0),
                 total_tokens=token_usage_data.get("total_tokens", 0),
                 model_breakdown=token_usage_data.get("usage"),
                 call_details=token_usage_data.get("call_details"),
-                usage_type="chat",
                 thread_id=thread_id,
                 message_id=db_message.id,
-                search_space_id=thread.search_space_id,
-                user_id=user.id,
             )
-            session.add(token_usage_record)
 
         await session.commit()
 
diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py
index 6a5b3793f..5d69e6870 100644
--- a/surfsense_backend/app/services/token_tracking_service.py
+++ b/surfsense_backend/app/services/token_tracking_service.py
@@ -5,9 +5,11 @@ Uses a ContextVar-scoped accumulator to group all LLM calls within a single
 async request/turn. The accumulated data is emitted via SSE and persisted
 when the frontend calls appendMessage.
 
-Agent LLM calls are captured automatically via the async callback.
-Title-generation usage is added explicitly from the LangChain response
-metadata to avoid callback-timing issues.
+The module also provides ``record_token_usage``, a thin async helper that
+creates a ``TokenUsage`` row for *any* usage type (chat, indexing, image
+generation, podcasts, …).  Call sites should prefer this helper over
+constructing ``TokenUsage`` manually so that logging and error handling
+stay consistent.
 """
 
 from __future__ import annotations
@@ -17,8 +19,12 @@ import logging
 from contextvars import ContextVar
 from dataclasses import dataclass, field
 from typing import Any
+from uuid import UUID
 
 from litellm.integrations.custom_logger import CustomLogger
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import TokenUsage
 
 logger = logging.getLogger(__name__)
 
@@ -138,3 +144,54 @@ class TokenTrackingCallback(CustomLogger):
 
 
 token_tracker = TokenTrackingCallback()
+
+
+# ---------------------------------------------------------------------------
+# Persistence helper
+# ---------------------------------------------------------------------------
+
+
+async def record_token_usage(
+    session: AsyncSession,
+    *,
+    usage_type: str,
+    search_space_id: int,
+    user_id: UUID,
+    prompt_tokens: int = 0,
+    completion_tokens: int = 0,
+    total_tokens: int = 0,
+    model_breakdown: dict[str, Any] | None = None,
+    call_details: dict[str, Any] | None = None,
+    thread_id: int | None = None,
+    message_id: int | None = None,
+) -> TokenUsage | None:
+    """Persist a single ``TokenUsage`` row.
+
+    Returns the record on success, ``None`` if persistence failed (the
+    failure is logged but never propagated so callers don't need to
+    wrap this in try/except).
+    """
+    try:
+        record = TokenUsage(
+            usage_type=usage_type,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens,
+            model_breakdown=model_breakdown,
+            call_details=call_details,
+            thread_id=thread_id,
+            message_id=message_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+        )
+        session.add(record)
+        logger.debug(
+            "[TokenTracking] recorded %s usage: prompt=%d completion=%d total=%d",
+            usage_type, prompt_tokens, completion_tokens, total_tokens,
+        )
+        return record
+    except Exception:
+        logger.warning(
+            "[TokenTracking] failed to record %s token usage", usage_type, exc_info=True,
+        )
+        return None
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index 4530f5046..e87a1b791 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -51,7 +51,7 @@ from app.db import (
     async_session_maker,
     shielded_async_session,
 )
-from app.prompts import TITLE_GENERATION_PROMPT_TEMPLATE
+from app.prompts import TITLE_GENERATION_PROMPT
 from app.services.chat_session_state_service import (
     clear_ai_responding,
     set_ai_responding,
@@ -1460,34 +1460,58 @@ async def stream_new_chat(
         )
         is_first_response = (assistant_count_result.scalar() or 0) == 0
 
-        title_task: asyncio.Task[tuple[str | None, dict[str, int] | None]] | None = None
+        title_task: asyncio.Task[tuple[str | None, dict | None]] | None = None
         if is_first_response:
 
-            async def _generate_title() -> tuple[str | None, dict[str, int] | None]:
-                """Return (title, usage_dict) where usage_dict has model/prompt/completion/total."""
+            async def _generate_title() -> tuple[str | None, dict | None]:
+                """Generate a short title via litellm.acompletion.
+
+                Returns (title, usage_dict).  Usage is extracted directly from
+                the response object because litellm fires its async callback
+                via fire-and-forget ``create_task``, so the
+                ``TokenTrackingCallback`` would run too late.  We also blank
+                the accumulator in this child-task context so the late callback
+                doesn't double-count.
+                """
                 try:
-                    title_chain = TITLE_GENERATION_PROMPT_TEMPLATE | llm
-                    title_result = await title_chain.ainvoke(
-                        {"user_query": user_query[:500]}
-                    )
-                    usage_dict: dict[str, int] | None = None
-                    if title_result:
-                        um = getattr(title_result, "usage_metadata", None)
-                        if um:
-                            rm = getattr(title_result, "response_metadata", None) or {}
-                            raw_model = rm.get("model_name", "unknown")
-                            usage_dict = {
-                                "model": raw_model.split("/", 1)[-1] if "/" in raw_model else raw_model,
-                                "prompt_tokens": um.get("input_tokens", 0),
-                                "completion_tokens": um.get("output_tokens", 0),
-                                "total_tokens": um.get("total_tokens", 0),
-                            }
-                        if hasattr(title_result, "content"):
-                            raw_title = title_result.content.strip()
-                            if raw_title and len(raw_title) <= 100:
-                                return raw_title.strip("\"'"), usage_dict
-                    return None, usage_dict
+                    from litellm import acompletion
+                    from app.services.llm_router_service import LLMRouterService
+                    from app.services.token_tracking_service import _turn_accumulator
+
+                    _turn_accumulator.set(None)
+
+                    prompt = TITLE_GENERATION_PROMPT.replace("{user_query}", user_query[:500])
+                    messages = [{"role": "user", "content": prompt}]
+
+                    if getattr(llm, "model", None) == "auto":
+                        router = LLMRouterService.get_router()
+                        response = await router.acompletion(model="auto", messages=messages)
+                    else:
+                        response = await acompletion(
+                            model=llm.model,
+                            messages=messages,
+                            api_key=getattr(llm, "api_key", None),
+                            api_base=getattr(llm, "api_base", None),
+                        )
+
+                    usage_info = None
+                    usage = getattr(response, "usage", None)
+                    if usage:
+                        raw_model = getattr(llm, "model", "") or ""
+                        model_name = raw_model.split("/", 1)[-1] if "/" in raw_model else (raw_model or response.model or "unknown")
+                        usage_info = {
+                            "model": model_name,
+                            "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0,
+                            "completion_tokens": getattr(usage, "completion_tokens", 0) or 0,
+                            "total_tokens": getattr(usage, "total_tokens", 0) or 0,
+                        }
+
+                    raw_title = response.choices[0].message.content.strip()
+                    if raw_title and len(raw_title) <= 100:
+                        return raw_title.strip("\"'"), usage_info
+                    return None, usage_info
                 except Exception:
+                    logging.getLogger(__name__).exception("[TitleGen] _generate_title failed")
                     return None, None
 
             title_task = asyncio.create_task(_generate_title())
@@ -1520,7 +1544,9 @@ async def stream_new_chat(
 
             # Inject title update mid-stream as soon as the background task finishes
             if title_task is not None and title_task.done() and not title_emitted:
-                generated_title, _title_usage = title_task.result()
+                generated_title, title_usage = title_task.result()
+                if title_usage:
+                    accumulator.add(**title_usage)
                 if generated_title:
                     async with shielded_async_session() as title_session:
                         title_thread_result = await title_session.execute(
@@ -1567,7 +1593,9 @@ async def stream_new_chat(
 
         # If the title task didn't finish during streaming, await it now
         if title_task is not None and not title_emitted:
-            generated_title, _title_usage = await title_task
+            generated_title, title_usage = await title_task
+            if title_usage:
+                accumulator.add(**title_usage)
             if generated_title:
                 async with shielded_async_session() as title_session:
                     title_thread_result = await title_session.execute(

From 17149228304229b5bb32c3021baa9f5de039209e Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 21:13:01 +0530
Subject: [PATCH 11/14] refactor: improve password input layout and enhance
 chat thread title update logic

---
 .../app/(home)/login/LocalLoginForm.tsx       | 50 +++++++++----------
 .../new-chat/[[...chat_id]]/page.tsx          | 20 ++++++--
 surfsense_web/hooks/use-typewriter.ts         |  1 -
 3 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/surfsense_web/app/(home)/login/LocalLoginForm.tsx b/surfsense_web/app/(home)/login/LocalLoginForm.tsx
index 07a4db4d3..e3c34306f 100644
--- a/surfsense_web/app/(home)/login/LocalLoginForm.tsx
+++ b/surfsense_web/app/(home)/login/LocalLoginForm.tsx
@@ -174,31 +174,31 @@ export function LocalLoginForm() {
 					<label htmlFor="password" className="block text-sm font-medium text-foreground">
 						{t("password")}
 					</label>
-					<div className="relative">
-						<input
-							id="password"
-							type={showPassword ? "text" : "password"}
-							autoComplete="current-password"
-							required
-							placeholder="Enter your password"
-							value={password}
-							onChange={(e) => setPassword(e.target.value)}
-							className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${
-								error.title
-									? "border-destructive focus:border-destructive focus:ring-destructive/40"
-									: "border-border focus:border-primary focus:ring-primary/40"
-							}`}
-							disabled={isLoggingIn}
-						/>
-						<button
-							type="button"
-							onClick={() => setShowPassword((prev) => !prev)}
-							className="absolute inset-y-0 right-0 flex items-center pr-3 mt-1 text-muted-foreground hover:text-foreground"
-							aria-label={showPassword ? t("hide_password") : t("show_password")}
-						>
-							{showPassword ? <EyeOff className="h-4 w-4" /> : <Eye className="h-4 w-4" />}
-						</button>
-					</div>
+				<div className="relative mt-1">
+					<input
+						id="password"
+						type={showPassword ? "text" : "password"}
+						autoComplete="current-password"
+						required
+						placeholder="Enter your password"
+						value={password}
+						onChange={(e) => setPassword(e.target.value)}
+						className={`block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${
+							error.title
+								? "border-destructive focus:border-destructive focus:ring-destructive/40"
+								: "border-border focus:border-primary focus:ring-primary/40"
+						}`}
+						disabled={isLoggingIn}
+					/>
+					<button
+						type="button"
+						onClick={() => setShowPassword((prev) => !prev)}
+						className="absolute inset-y-0 right-0 flex items-center pr-3 text-muted-foreground hover:text-foreground"
+						aria-label={showPassword ? t("hide_password") : t("show_password")}
+					>
+						{showPassword ? <EyeOff className="h-4 w-4" /> : <Eye className="h-4 w-4" />}
+					</button>
+				</div>
 				</div>
 
 				<button
diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
index ff953eaf9..d5bffb836 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@@ -67,6 +67,8 @@ import {
 	getRegenerateUrl,
 	getThreadFull,
 	getThreadMessages,
+	type ThreadListItem,
+	type ThreadListResponse,
 	type ThreadRecord,
 } from "@/lib/chat/thread-persistence";
 import { NotFoundError } from "@/lib/error";
@@ -770,9 +772,21 @@ export default function NewChatPage() {
 							if (titleData?.title && titleData?.threadId === currentThreadId) {
 								setCurrentThread((prev) => (prev ? { ...prev, title: titleData.title } : prev));
 								updateChatTabTitle({ chatId: currentThreadId, title: titleData.title });
-								queryClient.invalidateQueries({
-									queryKey: ["threads", String(searchSpaceId)],
-								});
+								queryClient.setQueriesData<ThreadListResponse>(
+									{ queryKey: ["threads", String(searchSpaceId)] },
+									(old) => {
+										if (!old) return old;
+										const updateTitle = (list: ThreadListItem[]) =>
+											list.map((t) =>
+												t.id === titleData.threadId ? { ...t, title: titleData.title } : t
+											);
+										return {
+											...old,
+											threads: updateTitle(old.threads),
+											archived_threads: updateTitle(old.archived_threads),
+										};
+									}
+								);
 							}
 							break;
 						}
diff --git a/surfsense_web/hooks/use-typewriter.ts b/surfsense_web/hooks/use-typewriter.ts
index 1e1ce8b83..54d33e08d 100644
--- a/surfsense_web/hooks/use-typewriter.ts
+++ b/surfsense_web/hooks/use-typewriter.ts
@@ -27,7 +27,6 @@ export function useTypewriter(text: string, speed = 35, skipFor = "New Chat"): s
 		}
 
 		let i = 0;
-		setDisplayed("");
 		intervalRef.current = setInterval(() => {
 			i++;
 			setDisplayed(text.slice(0, i));

From 2021f6c4b7f883cb0f985493a186e8e13db9ba62 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 21:39:11 +0530
Subject: [PATCH 12/14] refactor: simplify AI sort button implementation and
 improve styling

---
 .../components/documents/DocumentsFilters.tsx | 117 +++++++-----------
 1 file changed, 46 insertions(+), 71 deletions(-)

diff --git a/surfsense_web/components/documents/DocumentsFilters.tsx b/surfsense_web/components/documents/DocumentsFilters.tsx
index 2b7cf0f10..a5ee57703 100644
--- a/surfsense_web/components/documents/DocumentsFilters.tsx
+++ b/surfsense_web/components/documents/DocumentsFilters.tsx
@@ -2,7 +2,6 @@
 
 import { IconBinaryTree, IconBinaryTreeFilled } from "@tabler/icons-react";
 import { FolderPlus, ListFilter, Search, Upload, X } from "lucide-react";
-import { AnimatePresence, motion } from "motion/react";
 import { useTranslations } from "next-intl";
 import React, { useCallback, useMemo, useRef, useState } from "react";
 import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
@@ -10,6 +9,7 @@ import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
 import { Input } from "@/components/ui/input";
 import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
+import { Spinner } from "@/components/ui/spinner";
 import { ToggleGroup, ToggleGroupItem } from "@/components/ui/toggle-group";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import { cn } from "@/lib/utils";
@@ -74,17 +74,17 @@ export function DocumentsFilters({
 	return (
 		<div className="flex select-none">
 			<div className="flex items-center gap-2 w-full">
-				{/* New Folder + Filter Toggle Group */}
+				{/* New Folder + AI Sort + Filter Toggle Group */}
 				<ToggleGroup type="multiple" variant="outline" value={[]} className="overflow-visible">
 					{onCreateFolder && (
 						<Tooltip>
 							<TooltipTrigger asChild>
 								<ToggleGroupItem
 									value="folder"
-									className="h-9 w-9 shrink-0 border-sidebar-border text-sidebar-foreground/60 hover:text-sidebar-foreground hover:border-sidebar-border bg-sidebar"
-									onClick={(e) => {
-										e.preventDefault();
-										onCreateFolder();
+								className="h-9 w-9 shrink-0 border-sidebar-border text-muted-foreground hover:text-foreground hover:border-sidebar-border bg-sidebar"
+								onClick={(e) => {
+									e.preventDefault();
+									onCreateFolder();
 									}}
 								>
 									<FolderPlus size={14} />
@@ -94,13 +94,52 @@ export function DocumentsFilters({
 					</Tooltip>
 				)}
 
+					{onToggleAiSort && (
+						<Tooltip>
+							<TooltipTrigger asChild>
+								<ToggleGroupItem
+									value="ai-sort"
+									disabled={aiSortBusy}
+									className={cn(
+										"h-9 w-9 shrink-0 border-sidebar-border bg-sidebar",
+										"disabled:pointer-events-none disabled:opacity-50",
+										aiSortEnabled
+											? "bg-accent text-accent-foreground"
+											: "text-muted-foreground hover:text-foreground hover:border-sidebar-border"
+									)}
+									onClick={(e) => {
+										e.preventDefault();
+										onToggleAiSort();
+									}}
+									aria-label={aiSortEnabled ? "Disable AI sort" : "Enable AI sort"}
+									aria-pressed={aiSortEnabled}
+								>
+								{aiSortBusy ? (
+									<Spinner size="xs" />
+								) : aiSortEnabled ? (
+										<IconBinaryTreeFilled size={16} />
+									) : (
+										<IconBinaryTree size={16} />
+									)}
+								</ToggleGroupItem>
+							</TooltipTrigger>
+							<TooltipContent>
+								{aiSortBusy
+									? "AI sort in progress..."
+									: aiSortEnabled
+										? "AI sort active — click to disable"
+										: "Enable AI sort"}
+							</TooltipContent>
+						</Tooltip>
+					)}
+
 					<Popover>
 						<Tooltip>
 							<TooltipTrigger asChild>
 								<PopoverTrigger asChild>
 									<ToggleGroupItem
 										value="filter"
-										className="relative h-9 w-9 shrink-0 border-sidebar-border text-sidebar-foreground/60 hover:text-sidebar-foreground hover:border-sidebar-border bg-sidebar overflow-visible"
+										className="relative h-9 w-9 shrink-0 border-sidebar-border text-muted-foreground hover:text-foreground hover:border-sidebar-border bg-sidebar overflow-visible"
 									>
 										<ListFilter size={14} />
 										{activeTypes.length > 0 && (
@@ -182,70 +221,6 @@ export function DocumentsFilters({
 					</Popover>
 				</ToggleGroup>
 
-				{/* AI Sort Toggle */}
-				{onToggleAiSort && (
-					<Tooltip>
-						<TooltipTrigger asChild>
-							<button
-								type="button"
-								disabled={aiSortBusy}
-								onClick={onToggleAiSort}
-								className={cn(
-									"relative h-9 w-9 shrink-0 rounded-md border inline-flex items-center justify-center transition-all duration-300 ease-out",
-									"focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50 outline-none",
-									"disabled:pointer-events-none disabled:opacity-50",
-									aiSortEnabled
-										? "border-violet-400/60 bg-violet-50 text-violet-600 shadow-[0_0_8px_-1px_rgba(139,92,246,0.3)] hover:bg-violet-100 dark:border-violet-500/40 dark:bg-violet-500/15 dark:text-violet-400 dark:shadow-[0_0_8px_-1px_rgba(139,92,246,0.2)] dark:hover:bg-violet-500/25"
-										: "border-sidebar-border bg-sidebar text-sidebar-foreground/60 hover:text-sidebar-foreground hover:border-sidebar-border hover:bg-accent"
-								)}
-								aria-label={aiSortEnabled ? "Disable AI sort" : "Enable AI sort"}
-								aria-pressed={aiSortEnabled}
-							>
-								<AnimatePresence mode="wait" initial={false}>
-									{aiSortBusy ? (
-										<motion.div
-											key="busy"
-											initial={{ opacity: 0, scale: 0.6, rotate: -90 }}
-											animate={{ opacity: 1, scale: 1, rotate: 0 }}
-											exit={{ opacity: 0, scale: 0.6, rotate: 90 }}
-											transition={{ duration: 0.2, ease: "easeInOut" }}
-										>
-											<IconBinaryTree size={16} className="animate-pulse" />
-										</motion.div>
-									) : aiSortEnabled ? (
-										<motion.div
-											key="on"
-											initial={{ opacity: 0, scale: 0.6, rotate: -90 }}
-											animate={{ opacity: 1, scale: 1, rotate: 0 }}
-											exit={{ opacity: 0, scale: 0.6, rotate: 90 }}
-											transition={{ duration: 0.25, ease: "easeInOut" }}
-										>
-											<IconBinaryTreeFilled size={16} />
-										</motion.div>
-									) : (
-										<motion.div
-											key="off"
-											initial={{ opacity: 0, scale: 0.6, rotate: 90 }}
-											animate={{ opacity: 1, scale: 1, rotate: 0 }}
-											exit={{ opacity: 0, scale: 0.6, rotate: -90 }}
-											transition={{ duration: 0.25, ease: "easeInOut" }}
-										>
-											<IconBinaryTree size={16} />
-										</motion.div>
-									)}
-								</AnimatePresence>
-							</button>
-						</TooltipTrigger>
-						<TooltipContent>
-							{aiSortBusy
-								? "AI sort in progress..."
-								: aiSortEnabled
-									? "AI sort active — click to disable"
-									: "Enable AI sort"}
-						</TooltipContent>
-					</Tooltip>
-				)}
-
 				{/* Search Input */}
 				<div className="relative flex-1 min-w-0">
 					<div className="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3 text-muted-foreground">

From a74ed014cc4ba250df61fa3e67fbdddcbabcb0aa Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 21:50:34 +0530
Subject: [PATCH 13/14] refactor: update UI components for better accessibility
 and styling consistency

---
 .../components/ApiKeyContent.tsx              |   7 +-
 .../components/ProfileContent.tsx             |   6 +-
 .../components/new-chat/model-selector.tsx    |   2 +-
 .../public-chat-snapshot-row.tsx              | 145 ++++++++----------
 .../settings/general-settings-manager.tsx     |   6 +-
 .../components/settings/llm-role-manager.tsx  |   3 +-
 surfsense_web/messages/en.json                |   1 -
 surfsense_web/messages/es.json                |   1 -
 surfsense_web/messages/hi.json                |   1 -
 surfsense_web/messages/pt.json                |   1 -
 surfsense_web/messages/zh.json                |   1 -
 11 files changed, 72 insertions(+), 102 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent.tsx
index 223a6e3e7..3600d30db 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent.tsx
@@ -27,10 +27,9 @@ export function ApiKeyContent() {
 
 	return (
 		<div className="space-y-6 min-w-0 overflow-hidden">
-			<Alert className="border-border/60 bg-muted/30 text-muted-foreground">
-				<Info className="h-4 w-4 text-muted-foreground" />
-				<AlertTitle className="text-muted-foreground">{t("api_key_warning_title")}</AlertTitle>
-				<AlertDescription className="text-muted-foreground/60">
+			<Alert className="bg-muted/50 py-3 md:py-4">
+				<Info className="h-3 w-3 md:h-4 md:w-4 shrink-0" />
+				<AlertDescription className="text-xs md:text-sm">
 					{t("api_key_warning_description")}
 				</AlertDescription>
 			</Alert>
diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ProfileContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ProfileContent.tsx
index 32377194a..6d9ce22be 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ProfileContent.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ProfileContent.tsx
@@ -113,10 +113,10 @@ export function ProfileContent() {
 							type="submit"
 							variant="outline"
 							disabled={isPending || !hasChanges}
-							className="gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200"
+							className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200"
 						>
-							{isPending && <Spinner size="sm" className="mr-2" />}
-							{t("profile_save")}
+							<span className={isPending ? "opacity-0" : ""}>{t("profile_save")}</span>
+							{isPending && <Spinner size="sm" className="absolute" />}
 						</Button>
 					</div>
 				</form>
diff --git a/surfsense_web/components/new-chat/model-selector.tsx b/surfsense_web/components/new-chat/model-selector.tsx
index 0b8708269..8fec4cc93 100644
--- a/surfsense_web/components/new-chat/model-selector.tsx
+++ b/surfsense_web/components/new-chat/model-selector.tsx
@@ -970,7 +970,7 @@ export function ModelSelector({
 						{isAutoMode && (
 							<Badge
 								variant="secondary"
-								className="text-[9px] px-1 py-0 h-3.5 bg-violet-800 text-white dark:bg-violet-800 dark:text-white border-0"
+								className="text-[9px] px-1 py-0 h-3.5 bg-zinc-200 text-zinc-600 dark:bg-zinc-700 dark:text-zinc-300 border-0"
 							>
 								Recommended
 							</Badge>
diff --git a/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx b/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx
index 4bb49c48d..fbc5820c7 100644
--- a/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx
+++ b/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx
@@ -1,14 +1,22 @@
 "use client";
 
-import { Check, Copy, Dot, ExternalLink, MessageSquare, Trash2 } from "lucide-react";
-import { useCallback, useRef, useState } from "react";
+import { Copy, Dot, ExternalLink, MessageSquare, MoreHorizontal, Trash2 } from "lucide-react";
+import { useCallback, useState } from "react";
+import { toast } from "sonner";
 import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Card, CardContent } from "@/components/ui/card";
+import {
+	DropdownMenu,
+	DropdownMenuContent,
+	DropdownMenuItem,
+	DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
 import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip";
 import type { PublicChatSnapshotDetail } from "@/contracts/types/chat-threads.types";
 import { useMediaQuery } from "@/hooks/use-media-query";
+import { cn } from "@/lib/utils";
 
 function getInitials(name: string): string {
 	const parts = name.trim().split(/\s+/);
@@ -35,15 +43,12 @@ export function PublicChatSnapshotRow({
 	isDeleting = false,
 	memberMap,
 }: PublicChatSnapshotRowProps) {
-	const [copied, setCopied] = useState(false);
-	const copyTimeoutRef = useRef<ReturnType<typeof setTimeout>>(null);
+	const [dropdownOpen, setDropdownOpen] = useState(false);
 	const isDesktop = useMediaQuery("(min-width: 768px)");
 
 	const handleCopyClick = useCallback(() => {
 		onCopy(snapshot);
-		setCopied(true);
-		if (copyTimeoutRef.current) clearTimeout(copyTimeoutRef.current);
-		copyTimeoutRef.current = setTimeout(() => setCopied(false), 2000);
+		toast.success("Link copied to clipboard");
 	}, [onCopy, snapshot]);
 
 	const formattedDate = new Date(snapshot.created_at).toLocaleDateString(undefined, {
@@ -58,96 +63,66 @@ export function PublicChatSnapshotRow({
 		<Card className="group relative overflow-hidden transition-all duration-200 border-border/60 hover:shadow-md h-full">
 			<CardContent className="p-4 flex flex-col gap-3 h-full">
 				{/* Header: Title + Actions */}
-				<div className="relative">
-					<div className="min-w-0 pr-16 sm:pr-0 sm:group-hover:pr-16">
-						<h4
-							className="text-sm font-semibold tracking-tight truncate"
-							title={snapshot.thread_title}
-						>
-							{snapshot.thread_title}
-						</h4>
-					</div>
-					<div className="flex items-center gap-0.5 shrink-0 sm:hidden sm:group-hover:flex absolute right-0 top-0">
-						<TooltipProvider>
-							<Tooltip open={isDesktop ? undefined : false}>
-								<TooltipTrigger asChild>
-									<Button
-										variant="ghost"
-										size="icon"
-										asChild
-										className="h-7 w-7 text-muted-foreground hover:text-foreground"
-									>
-										<a href={snapshot.public_url} target="_blank" rel="noopener noreferrer">
-											<ExternalLink className="h-3 w-3" />
-										</a>
-									</Button>
-								</TooltipTrigger>
-								<TooltipContent>Open link</TooltipContent>
-							</Tooltip>
-						</TooltipProvider>
-						{canDelete && (
-							<TooltipProvider>
-								<Tooltip open={isDesktop ? undefined : false}>
-									<TooltipTrigger asChild>
-										<Button
-											variant="ghost"
-											size="icon"
-											onClick={() => onDelete(snapshot)}
-											disabled={isDeleting}
-											className="h-7 w-7 text-muted-foreground hover:text-destructive"
-										>
-											<Trash2 className="h-3 w-3" />
-										</Button>
-									</TooltipTrigger>
-									<TooltipContent>Delete</TooltipContent>
-								</Tooltip>
-							</TooltipProvider>
+				<div className="relative flex items-center">
+					<h4
+						className={cn(
+							"text-sm font-semibold tracking-tight truncate",
+							dropdownOpen ? "pr-8" : "sm:group-hover:pr-8"
 						)}
-					</div>
+						title={snapshot.thread_title}
+					>
+						{snapshot.thread_title}
+					</h4>
+					<DropdownMenu open={dropdownOpen} onOpenChange={setDropdownOpen}>
+						<DropdownMenuTrigger asChild>
+							<Button
+								variant="ghost"
+								size="icon"
+								className={cn(
+									"absolute right-0 h-6 w-6 shrink-0 hover:bg-transparent",
+									dropdownOpen
+										? "opacity-100"
+										: "sm:opacity-0 sm:group-hover:opacity-100"
+								)}
+							>
+								<MoreHorizontal className="h-3.5 w-3.5 text-muted-foreground" />
+							</Button>
+						</DropdownMenuTrigger>
+						<DropdownMenuContent align="end" className="w-40">
+							<DropdownMenuItem onClick={handleCopyClick}>
+								<Copy className="mr-2 h-4 w-4" />
+								Copy link
+							</DropdownMenuItem>
+							<DropdownMenuItem asChild>
+								<a href={snapshot.public_url} target="_blank" rel="noopener noreferrer">
+									<ExternalLink className="mr-2 h-4 w-4" />
+									Open link
+								</a>
+							</DropdownMenuItem>
+							{canDelete && (
+								<DropdownMenuItem
+									onClick={() => onDelete(snapshot)}
+									disabled={isDeleting}
+								>
+									<Trash2 className="mr-2 h-4 w-4" />
+									Delete
+								</DropdownMenuItem>
+							)}
+						</DropdownMenuContent>
+					</DropdownMenu>
 				</div>
 
 				{/* Message count badge */}
 				<div className="flex items-center gap-1.5">
 					<Badge
-						variant="outline"
-						className="text-[10px] px-1.5 py-0.5 border-muted-foreground/20 text-muted-foreground"
+						variant="secondary"
+						className="text-[10px] px-1.5 py-0.5 border-0 text-muted-foreground bg-muted"
 					>
 						<MessageSquare className="h-2.5 w-2.5 mr-1" />
 						{snapshot.message_count} messages
 					</Badge>
 				</div>
 
-				{/* Public URL – selectable fallback for manual copy */}
-				<div className="flex items-center gap-2 rounded-md border border-border/60 bg-muted/30 px-2.5 py-1.5">
-					<div className="min-w-0 flex-1 overflow-x-auto scrollbar-hide">
-						<p
-							className="text-[10px] font-mono text-muted-foreground whitespace-nowrap select-all cursor-text"
-							title={snapshot.public_url}
-						>
-							{snapshot.public_url}
-						</p>
-					</div>
-					<TooltipProvider>
-						<Tooltip open={isDesktop ? undefined : false}>
-							<TooltipTrigger asChild>
-								<Button
-									variant="ghost"
-									size="icon"
-									onClick={handleCopyClick}
-									className="h-6 w-6 shrink-0 text-muted-foreground hover:text-foreground"
-								>
-									{copied ? (
-										<Check className="h-3 w-3 text-green-500" />
-									) : (
-										<Copy className="h-3 w-3" />
-									)}
-								</Button>
-							</TooltipTrigger>
-							<TooltipContent>{copied ? "Copied!" : "Copy link"}</TooltipContent>
-						</Tooltip>
-					</TooltipProvider>
-				</div>
-
 				{/* Footer: Date + Creator */}
 				<div className="flex items-center gap-2 pt-2 border-t border-border/40 mt-auto">
 					<span className="text-[11px] text-muted-foreground/60">{formattedDate}</span>
diff --git a/surfsense_web/components/settings/general-settings-manager.tsx b/surfsense_web/components/settings/general-settings-manager.tsx
index 15d44906b..59aaa129e 100644
--- a/surfsense_web/components/settings/general-settings-manager.tsx
+++ b/surfsense_web/components/settings/general-settings-manager.tsx
@@ -181,10 +181,10 @@ export function GeneralSettingsManager({ searchSpaceId }: GeneralSettingsManager
 						type="submit"
 						variant="outline"
 						disabled={!hasChanges || saving || !name.trim()}
-						className="gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200"
+						className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200"
 					>
-						{saving ? <Spinner size="sm" /> : null}
-						{saving ? t("general_saving") : t("general_save")}
+						<span className={saving ? "opacity-0" : ""}>{t("general_save")}</span>
+						{saving && <Spinner size="sm" className="absolute" />}
 					</Button>
 				</div>
 			</form>
diff --git a/surfsense_web/components/settings/llm-role-manager.tsx b/surfsense_web/components/settings/llm-role-manager.tsx
index d6eb7c64d..aa2749967 100644
--- a/surfsense_web/components/settings/llm-role-manager.tsx
+++ b/surfsense_web/components/settings/llm-role-manager.tsx
@@ -395,6 +395,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
 																		key={config.id}
 																		value={config.id.toString()}
 																		className="text-xs md:text-sm py-1.5 md:py-2"
+																		textValue={config.name}
 																	>
 																		<div className="flex items-center gap-1 md:gap-1.5 flex-wrap min-w-0">
 																			<span className="truncate text-xs md:text-sm">
@@ -403,7 +404,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
 																			{isAuto && (
 																				<Badge
 																					variant="secondary"
-																					className="text-[8px] md:text-[9px] shrink-0 bg-violet-100 text-violet-700 dark:bg-violet-900/30 dark:text-violet-300"
+																					className="text-[8px] md:text-[9px] shrink-0 bg-zinc-200 text-zinc-600 dark:bg-zinc-700 dark:text-zinc-300 [[data-slot=select-trigger]_&]:hidden"
 																				>
 																					Recommended
 																				</Badge>
diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json
index 701cb086d..2e42451cd 100644
--- a/surfsense_web/messages/en.json
+++ b/surfsense_web/messages/en.json
@@ -123,7 +123,6 @@
 		"api_key_nav_description": "Manage your API access token",
 		"api_key_title": "API Key",
 		"api_key_description": "Use this key to authenticate API requests",
-		"api_key_warning_title": "Keep it secret",
 		"api_key_warning_description": "Your API key grants full access to your account. Never share it publicly or commit it to version control.",
 		"your_api_key": "Your API Key",
 		"copied": "Copied!",
diff --git a/surfsense_web/messages/es.json b/surfsense_web/messages/es.json
index 9c872e701..e017b0bba 100644
--- a/surfsense_web/messages/es.json
+++ b/surfsense_web/messages/es.json
@@ -123,7 +123,6 @@
 		"api_key_nav_description": "Administra tu token de acceso a la API",
 		"api_key_title": "Clave API",
 		"api_key_description": "Usa esta clave para autenticar las solicitudes de la API",
-		"api_key_warning_title": "Mantenla en secreto",
 		"api_key_warning_description": "Tu clave API otorga acceso completo a tu cuenta. Nunca la compartas públicamente ni la incluyas en el control de versiones.",
 		"your_api_key": "Tu clave API",
 		"copied": "¡Copiado!",
diff --git a/surfsense_web/messages/hi.json b/surfsense_web/messages/hi.json
index 3b6975bf5..e60e92f03 100644
--- a/surfsense_web/messages/hi.json
+++ b/surfsense_web/messages/hi.json
@@ -123,7 +123,6 @@
 		"api_key_nav_description": "अपना API एक्सेस टोकन प्रबंधित करें",
 		"api_key_title": "API कुंजी",
 		"api_key_description": "API अनुरोधों को प्रमाणित करने के लिए इस कुंजी का उपयोग करें",
-		"api_key_warning_title": "इसे गुप्त रखें",
 		"api_key_warning_description": "आपकी API कुंजी आपके खाते तक पूर्ण पहुंच प्रदान करती है। इसे कभी सार्वजनिक रूप से साझा न करें या संस्करण नियंत्रण में शामिल न करें।",
 		"your_api_key": "आपकी API कुंजी",
 		"copied": "कॉपी किया गया!",
diff --git a/surfsense_web/messages/pt.json b/surfsense_web/messages/pt.json
index e92dff375..acece5391 100644
--- a/surfsense_web/messages/pt.json
+++ b/surfsense_web/messages/pt.json
@@ -123,7 +123,6 @@
 		"api_key_nav_description": "Gerencie seu token de acesso à API",
 		"api_key_title": "Chave API",
 		"api_key_description": "Use esta chave para autenticar solicitações da API",
-		"api_key_warning_title": "Mantenha em segredo",
 		"api_key_warning_description": "Sua chave API concede acesso total à sua conta. Nunca a compartilhe publicamente nem a inclua no controle de versão.",
 		"your_api_key": "Sua chave API",
 		"copied": "Copiado!",
diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json
index c53155edf..6a59115ed 100644
--- a/surfsense_web/messages/zh.json
+++ b/surfsense_web/messages/zh.json
@@ -108,7 +108,6 @@
 		"api_key_nav_description": "管理您的API访问令牌",
 		"api_key_title": "API密钥",
 		"api_key_description": "使用此密钥验证API请求",
-		"api_key_warning_title": "请保密",
 		"api_key_warning_description": "您的API密钥可以完全访问您的账户。请勿公开分享或提交到版本控制。",
 		"your_api_key": "您的API密钥",
 		"copied": "已复制！",

From 9fc0976d5eb7d6593d9c9c63577981fa8d398029 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Tue, 14 Apr 2026 21:52:26 +0530
Subject: [PATCH 14/14] chore: ran linting

---
 .../versions/125_add_token_usage_table.py     |   8 +-
 .../app/routes/new_chat_routes.py             |   4 +-
 surfsense_backend/app/services/llm_service.py |   3 +-
 .../app/services/token_tracking_service.py    |  23 +++-
 .../app/tasks/chat/stream_new_chat.py         | 106 ++++++++++++------
 5 files changed, 97 insertions(+), 47 deletions(-)

diff --git a/surfsense_backend/alembic/versions/125_add_token_usage_table.py b/surfsense_backend/alembic/versions/125_add_token_usage_table.py
index c08280487..915561c8c 100644
--- a/surfsense_backend/alembic/versions/125_add_token_usage_table.py
+++ b/surfsense_backend/alembic/versions/125_add_token_usage_table.py
@@ -33,7 +33,9 @@ def upgrade() -> None:
         "token_usage",
         sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
         sa.Column("prompt_tokens", sa.Integer(), nullable=False, server_default="0"),
-        sa.Column("completion_tokens", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column(
+            "completion_tokens", sa.Integer(), nullable=False, server_default="0"
+        ),
         sa.Column("total_tokens", sa.Integer(), nullable=False, server_default="0"),
         sa.Column("model_breakdown", JSONB, nullable=True),
         sa.Column("call_details", JSONB, nullable=True),
@@ -72,7 +74,9 @@ def upgrade() -> None:
 
     op.create_index("ix_token_usage_thread_id", "token_usage", ["thread_id"])
     op.create_index("ix_token_usage_message_id", "token_usage", ["message_id"])
-    op.create_index("ix_token_usage_search_space_id", "token_usage", ["search_space_id"])
+    op.create_index(
+        "ix_token_usage_search_space_id", "token_usage", ["search_space_id"]
+    )
     op.create_index("ix_token_usage_user_id", "token_usage", ["user_id"])
     op.create_index("ix_token_usage_usage_type", "token_usage", ["usage_type"])
 
diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py
index 55302b873..b914b297e 100644
--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@@ -498,7 +498,9 @@ async def get_thread_messages(
                 author_id=msg.author_id,
                 author_display_name=msg.author.display_name if msg.author else None,
                 author_avatar_url=msg.author.avatar_url if msg.author else None,
-                token_usage=TokenUsageSummary.model_validate(msg.token_usage) if msg.token_usage else None,
+                token_usage=TokenUsageSummary.model_validate(msg.token_usage)
+                if msg.token_usage
+                else None,
             )
             for msg in db_messages
         ]
diff --git a/surfsense_backend/app/services/llm_service.py b/surfsense_backend/app/services/llm_service.py
index c90bdfce3..d31e19ed3 100644
--- a/surfsense_backend/app/services/llm_service.py
+++ b/surfsense_backend/app/services/llm_service.py
@@ -15,6 +15,7 @@ from app.services.llm_router_service import (
     get_auto_mode_llm,
     is_auto_mode,
 )
+from app.services.token_tracking_service import token_tracker
 
 # Configure litellm to automatically drop unsupported parameters
 litellm.drop_params = True
@@ -25,8 +26,6 @@ litellm.cache = None
 litellm.failure_callback = []
 litellm.input_callback = []
 
-from app.services.token_tracking_service import token_tracker
-
 litellm.callbacks = [token_tracker]
 
 logger = logging.getLogger(__name__)
diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py
index 5d69e6870..9aa8c6e70 100644
--- a/surfsense_backend/app/services/token_tracking_service.py
+++ b/surfsense_backend/app/services/token_tracking_service.py
@@ -117,12 +117,16 @@ class TokenTrackingCallback(CustomLogger):
     ) -> None:
         acc = _turn_accumulator.get()
         if acc is None:
-            logger.debug("[TokenTracking] async_log_success_event fired but no accumulator in context")
+            logger.debug(
+                "[TokenTracking] async_log_success_event fired but no accumulator in context"
+            )
             return
 
         usage = getattr(response_obj, "usage", None)
         if not usage:
-            logger.debug("[TokenTracking] async_log_success_event fired but response has no usage data")
+            logger.debug(
+                "[TokenTracking] async_log_success_event fired but response has no usage data"
+            )
             return
 
         prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
@@ -139,7 +143,11 @@ class TokenTrackingCallback(CustomLogger):
         )
         logger.info(
             "[TokenTracking] Captured: model=%s prompt=%d completion=%d total=%d (accumulator now has %d calls)",
-            model, prompt_tokens, completion_tokens, total_tokens, len(acc.calls),
+            model,
+            prompt_tokens,
+            completion_tokens,
+            total_tokens,
+            len(acc.calls),
         )
 
 
@@ -187,11 +195,16 @@ async def record_token_usage(
         session.add(record)
         logger.debug(
             "[TokenTracking] recorded %s usage: prompt=%d completion=%d total=%d",
-            usage_type, prompt_tokens, completion_tokens, total_tokens,
+            usage_type,
+            prompt_tokens,
+            completion_tokens,
+            total_tokens,
         )
         return record
     except Exception:
         logger.warning(
-            "[TokenTracking] failed to record %s token usage", usage_type, exc_info=True,
+            "[TokenTracking] failed to record %s token usage",
+            usage_type,
+            exc_info=True,
         )
         return None
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index e87a1b791..478aa3671 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -1475,17 +1475,22 @@ async def stream_new_chat(
                 """
                 try:
                     from litellm import acompletion
+
                     from app.services.llm_router_service import LLMRouterService
                     from app.services.token_tracking_service import _turn_accumulator
 
                     _turn_accumulator.set(None)
 
-                    prompt = TITLE_GENERATION_PROMPT.replace("{user_query}", user_query[:500])
+                    prompt = TITLE_GENERATION_PROMPT.replace(
+                        "{user_query}", user_query[:500]
+                    )
                     messages = [{"role": "user", "content": prompt}]
 
                     if getattr(llm, "model", None) == "auto":
                         router = LLMRouterService.get_router()
-                        response = await router.acompletion(model="auto", messages=messages)
+                        response = await router.acompletion(
+                            model="auto", messages=messages
+                        )
                     else:
                         response = await acompletion(
                             model=llm.model,
@@ -1498,11 +1503,16 @@ async def stream_new_chat(
                     usage = getattr(response, "usage", None)
                     if usage:
                         raw_model = getattr(llm, "model", "") or ""
-                        model_name = raw_model.split("/", 1)[-1] if "/" in raw_model else (raw_model or response.model or "unknown")
+                        model_name = (
+                            raw_model.split("/", 1)[-1]
+                            if "/" in raw_model
+                            else (raw_model or response.model or "unknown")
+                        )
                         usage_info = {
                             "model": model_name,
                             "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0,
-                            "completion_tokens": getattr(usage, "completion_tokens", 0) or 0,
+                            "completion_tokens": getattr(usage, "completion_tokens", 0)
+                            or 0,
                             "total_tokens": getattr(usage, "total_tokens", 0) or 0,
                         }
 
@@ -1511,7 +1521,9 @@ async def stream_new_chat(
                         return raw_title.strip("\"'"), usage_info
                     return None, usage_info
                 except Exception:
-                    logging.getLogger(__name__).exception("[TitleGen] _generate_title failed")
+                    logging.getLogger(__name__).exception(
+                        "[TitleGen] _generate_title failed"
+                    )
                     return None, None
 
             title_task = asyncio.create_task(_generate_title())
@@ -1575,16 +1587,21 @@ async def stream_new_chat(
             usage_summary = accumulator.per_message_summary()
             _perf_log.info(
                 "[token_usage] interrupted new_chat: calls=%d total=%d summary=%s",
-                len(accumulator.calls), accumulator.grand_total, usage_summary,
+                len(accumulator.calls),
+                accumulator.grand_total,
+                usage_summary,
             )
             if usage_summary:
-                yield streaming_service.format_data("token-usage", {
-                    "usage": usage_summary,
-                    "prompt_tokens": accumulator.total_prompt_tokens,
-                    "completion_tokens": accumulator.total_completion_tokens,
-                    "total_tokens": accumulator.grand_total,
-                    "call_details": accumulator.serialized_calls(),
-                })
+                yield streaming_service.format_data(
+                    "token-usage",
+                    {
+                        "usage": usage_summary,
+                        "prompt_tokens": accumulator.total_prompt_tokens,
+                        "completion_tokens": accumulator.total_completion_tokens,
+                        "total_tokens": accumulator.grand_total,
+                        "call_details": accumulator.serialized_calls(),
+                    },
+                )
 
             yield streaming_service.format_finish_step()
             yield streaming_service.format_finish()
@@ -1612,16 +1629,21 @@ async def stream_new_chat(
         usage_summary = accumulator.per_message_summary()
         _perf_log.info(
             "[token_usage] normal new_chat: calls=%d total=%d summary=%s",
-            len(accumulator.calls), accumulator.grand_total, usage_summary,
+            len(accumulator.calls),
+            accumulator.grand_total,
+            usage_summary,
         )
         if usage_summary:
-            yield streaming_service.format_data("token-usage", {
-                "usage": usage_summary,
-                "prompt_tokens": accumulator.total_prompt_tokens,
-                "completion_tokens": accumulator.total_completion_tokens,
-                "total_tokens": accumulator.grand_total,
-                "call_details": accumulator.serialized_calls(),
-            })
+            yield streaming_service.format_data(
+                "token-usage",
+                {
+                    "usage": usage_summary,
+                    "prompt_tokens": accumulator.total_prompt_tokens,
+                    "completion_tokens": accumulator.total_completion_tokens,
+                    "total_tokens": accumulator.grand_total,
+                    "call_details": accumulator.serialized_calls(),
+                },
+            )
 
         # Fire background memory extraction if the agent didn't handle it.
         # Shared threads write to team memory; private threads write to user memory.
@@ -1870,16 +1892,21 @@ async def stream_resume_chat(
             usage_summary = accumulator.per_message_summary()
             _perf_log.info(
                 "[token_usage] interrupted resume_chat: calls=%d total=%d summary=%s",
-                len(accumulator.calls), accumulator.grand_total, usage_summary,
+                len(accumulator.calls),
+                accumulator.grand_total,
+                usage_summary,
             )
             if usage_summary:
-                yield streaming_service.format_data("token-usage", {
-                    "usage": usage_summary,
-                    "prompt_tokens": accumulator.total_prompt_tokens,
-                    "completion_tokens": accumulator.total_completion_tokens,
-                    "total_tokens": accumulator.grand_total,
-                    "call_details": accumulator.serialized_calls(),
-                })
+                yield streaming_service.format_data(
+                    "token-usage",
+                    {
+                        "usage": usage_summary,
+                        "prompt_tokens": accumulator.total_prompt_tokens,
+                        "completion_tokens": accumulator.total_completion_tokens,
+                        "total_tokens": accumulator.grand_total,
+                        "call_details": accumulator.serialized_calls(),
+                    },
+                )
 
             yield streaming_service.format_finish_step()
             yield streaming_service.format_finish()
@@ -1889,16 +1916,21 @@ async def stream_resume_chat(
         usage_summary = accumulator.per_message_summary()
         _perf_log.info(
             "[token_usage] normal resume_chat: calls=%d total=%d summary=%s",
-            len(accumulator.calls), accumulator.grand_total, usage_summary,
+            len(accumulator.calls),
+            accumulator.grand_total,
+            usage_summary,
         )
         if usage_summary:
-            yield streaming_service.format_data("token-usage", {
-                "usage": usage_summary,
-                "prompt_tokens": accumulator.total_prompt_tokens,
-                "completion_tokens": accumulator.total_completion_tokens,
-                "total_tokens": accumulator.grand_total,
-                "call_details": accumulator.serialized_calls(),
-            })
+            yield streaming_service.format_data(
+                "token-usage",
+                {
+                    "usage": usage_summary,
+                    "prompt_tokens": accumulator.total_prompt_tokens,
+                    "completion_tokens": accumulator.total_completion_tokens,
+                    "total_tokens": accumulator.grand_total,
+                    "call_details": accumulator.serialized_calls(),
+                },
+            )
 
         yield streaming_service.format_finish_step()
         yield streaming_service.format_finish()