Merge pull request #1224 from AnishSarkar22/feat/token-calculation

feat: track and display per-message LLM token usage & UI/UX changes
2026-05-27 19:25:15 +02:00 · 2026-04-14 13:10:05 -07:00 · 2026-04-14 13:10:05 -07:00 · ae0df70880
commit ae0df70880
parent 91c2c06108 9fc0976d5e
31 changed files with 1118 additions and 292 deletions
--- a/surfsense_backend/alembic/versions/125_add_token_usage_table.py
+++ b/surfsense_backend/alembic/versions/125_add_token_usage_table.py
@ -0,0 +1,85 @@
+"""125_add_token_usage_table
+
+Revision ID: 125
+Revises: 124
+Create Date: 2026-04-14
+
+Adds token_usage table for tracking LLM token consumption per message.
+Supports future extension via usage_type for indexing, image gen, etc.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "125"
+down_revision: str | None = "124"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    if sa.inspect(conn).has_table("token_usage"):
+        return
+
+    op.create_table(
+        "token_usage",
+        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+        sa.Column("prompt_tokens", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column(
+            "completion_tokens", sa.Integer(), nullable=False, server_default="0"
+        ),
+        sa.Column("total_tokens", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("model_breakdown", JSONB, nullable=True),
+        sa.Column("call_details", JSONB, nullable=True),
+        sa.Column("usage_type", sa.String(50), nullable=False, server_default="chat"),
+        sa.Column(
+            "thread_id",
+            sa.Integer(),
+            sa.ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
+            nullable=True,
+        ),
+        sa.Column(
+            "message_id",
+            sa.Integer(),
+            sa.ForeignKey("new_chat_messages.id", ondelete="SET NULL"),
+            nullable=True,
+        ),
+        sa.Column(
+            "search_space_id",
+            sa.Integer(),
+            sa.ForeignKey("searchspaces.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column(
+            "user_id",
+            UUID(as_uuid=True),
+            sa.ForeignKey("user.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column(
+            "created_at",
+            sa.TIMESTAMP(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+
+    op.create_index("ix_token_usage_thread_id", "token_usage", ["thread_id"])
+    op.create_index("ix_token_usage_message_id", "token_usage", ["message_id"])
+    op.create_index(
+        "ix_token_usage_search_space_id", "token_usage", ["search_space_id"]
+    )
+    op.create_index("ix_token_usage_user_id", "token_usage", ["user_id"])
+    op.create_index("ix_token_usage_usage_type", "token_usage", ["usage_type"])
+
+
+def downgrade() -> None:
+    op.drop_table("token_usage")
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -647,6 +647,11 @@ class NewChatThread(BaseModel, TimestampMixin):
        cascade="all, delete-orphan",
        foreign_keys="[PublicChatSnapshot.thread_id]",
    )
+    token_usages = relationship(
+        "TokenUsage",
+        back_populates="thread",
+        cascade="all, delete-orphan",
+    )


 class NewChatMessage(BaseModel, TimestampMixin):
@ -685,6 +690,63 @@ class NewChatMessage(BaseModel, TimestampMixin):
        back_populates="message",
        cascade="all, delete-orphan",
    )
+    token_usage = relationship(
+        "TokenUsage",
+        back_populates="message",
+        uselist=False,
+        cascade="all, delete-orphan",
+    )
+
+
+class TokenUsage(BaseModel, TimestampMixin):
+    """
+    Tracks LLM token consumption per assistant turn.
+
+    One row per usage event. For chat, linked to a specific message via message_id.
+    The usage_type column enables future extension to track non-chat usage
+    (indexing, image generation, podcasts, etc.) without schema changes.
+    """
+
+    __tablename__ = "token_usage"
+
+    prompt_tokens = Column(Integer, nullable=False, default=0)
+    completion_tokens = Column(Integer, nullable=False, default=0)
+    total_tokens = Column(Integer, nullable=False, default=0)
+    model_breakdown = Column(JSONB, nullable=True)
+    call_details = Column(JSONB, nullable=True)
+
+    usage_type = Column(String(50), nullable=False, default="chat", index=True)
+
+    thread_id = Column(
+        Integer,
+        ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
+        nullable=True,
+        index=True,
+    )
+    message_id = Column(
+        Integer,
+        ForeignKey("new_chat_messages.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    search_space_id = Column(
+        Integer,
+        ForeignKey("searchspaces.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    user_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("user.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+
+    # Relationships
+    thread = relationship("NewChatThread", back_populates="token_usages")
+    message = relationship("NewChatMessage", back_populates="token_usage")
+    search_space = relationship("SearchSpace")
+    user = relationship("User")


 class PublicChatSnapshot(BaseModel, TimestampMixin):
--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@ -50,7 +50,9 @@ from app.schemas.new_chat import (
    ThreadHistoryLoadResponse,
    ThreadListItem,
    ThreadListResponse,
+    TokenUsageSummary,
 )
+from app.services.token_tracking_service import record_token_usage
 from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
 from app.users import current_active_user
 from app.utils.rbac import check_permission
@ -473,10 +475,13 @@ async def get_thread_messages(
        # Check thread-level access based on visibility
        await check_thread_access(session, thread, user)

-        # Get messages with their authors loaded
+        # Get messages with their authors and token usage loaded
        messages_result = await session.execute(
            select(NewChatMessage)
-            .options(selectinload(NewChatMessage.author))
+            .options(
+                selectinload(NewChatMessage.author),
+                selectinload(NewChatMessage.token_usage),
+            )
            .filter(NewChatMessage.thread_id == thread_id)
            .order_by(NewChatMessage.created_at)
        )
@ -493,6 +498,9 @@ async def get_thread_messages(
                author_id=msg.author_id,
                author_display_name=msg.author.display_name if msg.author else None,
                author_avatar_url=msg.author.avatar_url if msg.author else None,
+                token_usage=TokenUsageSummary.model_validate(msg.token_usage)
+                if msg.token_usage
+                else None,
            )
            for msg in db_messages
        ]
@ -530,7 +538,11 @@ async def get_thread_full(
    try:
        result = await session.execute(
            select(NewChatThread)
-            .options(selectinload(NewChatThread.messages))
+            .options(
+                selectinload(NewChatThread.messages).selectinload(
+                    NewChatMessage.token_usage
+                ),
+            )
            .filter(NewChatThread.id == thread_id)
        )
        thread = result.scalars().first()
@ -935,11 +947,37 @@ async def append_message(

        # flush assigns the PK/defaults without a round-trip SELECT
        await session.flush()
+
+        # Persist token usage if provided (for assistant messages)
+        token_usage_data = raw_body.get("token_usage")
+        if token_usage_data and message_role == NewChatMessageRole.ASSISTANT:
+            await record_token_usage(
+                session,
+                usage_type="chat",
+                search_space_id=thread.search_space_id,
+                user_id=user.id,
+                prompt_tokens=token_usage_data.get("prompt_tokens", 0),
+                completion_tokens=token_usage_data.get("completion_tokens", 0),
+                total_tokens=token_usage_data.get("total_tokens", 0),
+                model_breakdown=token_usage_data.get("usage"),
+                call_details=token_usage_data.get("call_details"),
+                thread_id=thread_id,
+                message_id=db_message.id,
+            )
+
        await session.commit()

-        # Return the in-memory object (already has id from flush) instead of
-        # doing an extra refresh() SELECT.
-        return db_message
+        # Build response manually to avoid lazy-loading the token_usage
+        # relationship after commit (which would trigger MissingGreenlet).
+        return NewChatMessageRead(
+            id=db_message.id,
+            thread_id=db_message.thread_id,
+            role=db_message.role,
+            content=db_message.content,
+            created_at=db_message.created_at,
+            author_id=db_message.author_id,
+            token_usage=None,
+        )

    except HTTPException:
        raise
@ -1003,6 +1041,7 @@ async def list_messages(
        # Get messages
        query = (
            select(NewChatMessage)
+            .options(selectinload(NewChatMessage.token_usage))
            .filter(NewChatMessage.thread_id == thread_id)
            .order_by(NewChatMessage.created_at)
            .offset(skip)
--- a/surfsense_backend/app/schemas/new_chat.py
+++ b/surfsense_backend/app/schemas/new_chat.py
@ -34,6 +34,14 @@ class NewChatMessageCreate(NewChatMessageBase):
    thread_id: int


+class TokenUsageSummary(BaseModel):
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+    model_breakdown: dict | None = None
+    model_config = ConfigDict(from_attributes=True)
+
+
 class NewChatMessageRead(NewChatMessageBase, IDModel, TimestampModel):
    """Schema for reading a message."""

@ -41,6 +49,7 @@ class NewChatMessageRead(NewChatMessageBase, IDModel, TimestampModel):
    author_id: UUID | None = None
    author_display_name: str | None = None
    author_avatar_url: str | None = None
+    token_usage: TokenUsageSummary | None = None
    model_config = ConfigDict(from_attributes=True)


--- a/surfsense_backend/app/services/llm_router_service.py
+++ b/surfsense_backend/app/services/llm_router_service.py
@ -820,7 +820,9 @@ class ChatLiteLLMRouter(BaseChatModel):
        )

        # Convert response to ChatResult with potential tool calls
-        message = self._convert_response_to_message(response.choices[0].message)
+        message = self._convert_response_to_message(
+            response.choices[0].message, response=response
+        )
        generation = ChatGeneration(message=message)

        return ChatResult(generations=[generation])
@ -886,7 +888,9 @@ class ChatLiteLLMRouter(BaseChatModel):
        )

        # Convert response to ChatResult with potential tool calls
-        message = self._convert_response_to_message(response.choices[0].message)
+        message = self._convert_response_to_message(
+            response.choices[0].message, response=response
+        )
        generation = ChatGeneration(message=message)

        return ChatResult(generations=[generation])
@ -970,6 +974,7 @@ class ChatLiteLLMRouter(BaseChatModel):
                messages=formatted_messages,
                stop=stop,
                stream=True,
+                stream_options={"include_usage": True},
                **call_kwargs,
            )
        except ContextWindowExceededError as e:
@ -1075,7 +1080,9 @@ class ChatLiteLLMRouter(BaseChatModel):

        return result

-    def _convert_response_to_message(self, response_message: Any) -> AIMessage:
+    def _convert_response_to_message(
+        self, response_message: Any, response: Any = None
+    ) -> AIMessage:
        """Convert a LiteLLM response message to a LangChain AIMessage."""
        import json

@ -1098,9 +1105,22 @@ class ChatLiteLLMRouter(BaseChatModel):
                        tool_call["args"] = tc.function.arguments
                tool_calls.append(tool_call)

+        extra_kwargs: dict[str, Any] = {}
+        if response:
+            usage = getattr(response, "usage", None)
+            if usage:
+                extra_kwargs["usage_metadata"] = {
+                    "input_tokens": getattr(usage, "prompt_tokens", 0) or 0,
+                    "output_tokens": getattr(usage, "completion_tokens", 0) or 0,
+                    "total_tokens": getattr(usage, "total_tokens", 0) or 0,
+                }
+            extra_kwargs["response_metadata"] = {
+                "model_name": getattr(response, "model", "unknown"),
+            }
+
        if tool_calls:
-            return AIMessage(content=content, tool_calls=tool_calls)
-        return AIMessage(content=content)
+            return AIMessage(content=content, tool_calls=tool_calls, **extra_kwargs)
+        return AIMessage(content=content, **extra_kwargs)

    def _convert_delta_to_chunk(self, delta: Any) -> AIMessageChunk | None:
        """Convert a streaming delta to an AIMessageChunk."""
--- a/surfsense_backend/app/services/llm_service.py
+++ b/surfsense_backend/app/services/llm_service.py
@ -15,6 +15,7 @@ from app.services.llm_router_service import (
    get_auto_mode_llm,
    is_auto_mode,
 )
+from app.services.token_tracking_service import token_tracker

 # Configure litellm to automatically drop unsupported parameters
 litellm.drop_params = True
@ -22,10 +23,11 @@ litellm.drop_params = True
 # Memory controls: prevent unbounded internal accumulation
 litellm.telemetry = False
 litellm.cache = None
-litellm.success_callback = []
 litellm.failure_callback = []
 litellm.input_callback = []

+litellm.callbacks = [token_tracker]
+
 logger = logging.getLogger(__name__)


--- a/surfsense_backend/app/services/token_tracking_service.py
+++ b/surfsense_backend/app/services/token_tracking_service.py
@ -0,0 +1,210 @@
+"""
+Token usage tracking via LiteLLM custom callback.
+
+Uses a ContextVar-scoped accumulator to group all LLM calls within a single
+async request/turn. The accumulated data is emitted via SSE and persisted
+when the frontend calls appendMessage.
+
+The module also provides ``record_token_usage``, a thin async helper that
+creates a ``TokenUsage`` row for *any* usage type (chat, indexing, image
+generation, podcasts, …).  Call sites should prefer this helper over
+constructing ``TokenUsage`` manually so that logging and error handling
+stay consistent.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import logging
+from contextvars import ContextVar
+from dataclasses import dataclass, field
+from typing import Any
+from uuid import UUID
+
+from litellm.integrations.custom_logger import CustomLogger
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import TokenUsage
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TokenCallRecord:
+    model: str
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+
+
+@dataclass
+class TurnTokenAccumulator:
+    """Accumulates token usage across all LLM calls within a single user turn."""
+
+    calls: list[TokenCallRecord] = field(default_factory=list)
+
+    def add(
+        self,
+        model: str,
+        prompt_tokens: int,
+        completion_tokens: int,
+        total_tokens: int,
+    ) -> None:
+        self.calls.append(
+            TokenCallRecord(
+                model=model,
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=total_tokens,
+            )
+        )
+
+    def per_message_summary(self) -> dict[str, dict[str, int]]:
+        """Return token counts grouped by model name."""
+        by_model: dict[str, dict[str, int]] = {}
+        for c in self.calls:
+            entry = by_model.setdefault(
+                c.model,
+                {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+            )
+            entry["prompt_tokens"] += c.prompt_tokens
+            entry["completion_tokens"] += c.completion_tokens
+            entry["total_tokens"] += c.total_tokens
+        return by_model
+
+    @property
+    def grand_total(self) -> int:
+        return sum(c.total_tokens for c in self.calls)
+
+    @property
+    def total_prompt_tokens(self) -> int:
+        return sum(c.prompt_tokens for c in self.calls)
+
+    @property
+    def total_completion_tokens(self) -> int:
+        return sum(c.completion_tokens for c in self.calls)
+
+    def serialized_calls(self) -> list[dict[str, Any]]:
+        return [dataclasses.asdict(c) for c in self.calls]
+
+
+_turn_accumulator: ContextVar[TurnTokenAccumulator | None] = ContextVar(
+    "_turn_accumulator", default=None
+)
+
+
+def start_turn() -> TurnTokenAccumulator:
+    """Create a fresh accumulator for the current async context and return it."""
+    acc = TurnTokenAccumulator()
+    _turn_accumulator.set(acc)
+    logger.info("[TokenTracking] start_turn: new accumulator created (id=%s)", id(acc))
+    return acc
+
+
+def get_current_accumulator() -> TurnTokenAccumulator | None:
+    return _turn_accumulator.get()
+
+
+class TokenTrackingCallback(CustomLogger):
+    """LiteLLM callback that captures token usage into the turn accumulator."""
+
+    async def async_log_success_event(
+        self,
+        kwargs: dict[str, Any],
+        response_obj: Any,
+        start_time: Any,
+        end_time: Any,
+    ) -> None:
+        acc = _turn_accumulator.get()
+        if acc is None:
+            logger.debug(
+                "[TokenTracking] async_log_success_event fired but no accumulator in context"
+            )
+            return
+
+        usage = getattr(response_obj, "usage", None)
+        if not usage:
+            logger.debug(
+                "[TokenTracking] async_log_success_event fired but response has no usage data"
+            )
+            return
+
+        prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
+        completion_tokens = getattr(usage, "completion_tokens", 0) or 0
+        total_tokens = getattr(usage, "total_tokens", 0) or 0
+
+        model = kwargs.get("model", "unknown")
+
+        acc.add(
+            model=model,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens,
+        )
+        logger.info(
+            "[TokenTracking] Captured: model=%s prompt=%d completion=%d total=%d (accumulator now has %d calls)",
+            model,
+            prompt_tokens,
+            completion_tokens,
+            total_tokens,
+            len(acc.calls),
+        )
+
+
+token_tracker = TokenTrackingCallback()
+
+
+# ---------------------------------------------------------------------------
+# Persistence helper
+# ---------------------------------------------------------------------------
+
+
+async def record_token_usage(
+    session: AsyncSession,
+    *,
+    usage_type: str,
+    search_space_id: int,
+    user_id: UUID,
+    prompt_tokens: int = 0,
+    completion_tokens: int = 0,
+    total_tokens: int = 0,
+    model_breakdown: dict[str, Any] | None = None,
+    call_details: dict[str, Any] | None = None,
+    thread_id: int | None = None,
+    message_id: int | None = None,
+) -> TokenUsage | None:
+    """Persist a single ``TokenUsage`` row.
+
+    Returns the record on success, ``None`` if persistence failed (the
+    failure is logged but never propagated so callers don't need to
+    wrap this in try/except).
+    """
+    try:
+        record = TokenUsage(
+            usage_type=usage_type,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens,
+            model_breakdown=model_breakdown,
+            call_details=call_details,
+            thread_id=thread_id,
+            message_id=message_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+        )
+        session.add(record)
+        logger.debug(
+            "[TokenTracking] recorded %s usage: prompt=%d completion=%d total=%d",
+            usage_type,
+            prompt_tokens,
+            completion_tokens,
+            total_tokens,
+        )
+        return record
+    except Exception:
+        logger.warning(
+            "[TokenTracking] failed to record %s token usage",
+            usage_type,
+            exc_info=True,
+        )
+        return None
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -51,7 +51,7 @@ from app.db import (
    async_session_maker,
    shielded_async_session,
 )
-from app.prompts import TITLE_GENERATION_PROMPT_TEMPLATE
+from app.prompts import TITLE_GENERATION_PROMPT
 from app.services.chat_session_state_service import (
    clear_ai_responding,
    set_ai_responding,
@ -1171,6 +1171,10 @@ async def stream_new_chat(
    _t_total = time.perf_counter()
    log_system_snapshot("stream_new_chat_START")

+    from app.services.token_tracking_service import start_turn
+
+    accumulator = start_turn()
+
    session = async_session_maker()
    try:
        # Mark AI as responding to this user for live collaboration
@ -1456,22 +1460,71 @@ async def stream_new_chat(
        )
        is_first_response = (assistant_count_result.scalar() or 0) == 0

-        title_task: asyncio.Task[str | None] | None = None
+        title_task: asyncio.Task[tuple[str | None, dict | None]] | None = None
        if is_first_response:

-            async def _generate_title() -> str | None:
+            async def _generate_title() -> tuple[str | None, dict | None]:
+                """Generate a short title via litellm.acompletion.
+
+                Returns (title, usage_dict).  Usage is extracted directly from
+                the response object because litellm fires its async callback
+                via fire-and-forget ``create_task``, so the
+                ``TokenTrackingCallback`` would run too late.  We also blank
+                the accumulator in this child-task context so the late callback
+                doesn't double-count.
+                """
                try:
-                    title_chain = TITLE_GENERATION_PROMPT_TEMPLATE | llm
-                    title_result = await title_chain.ainvoke(
-                        {"user_query": user_query[:500]}
+                    from litellm import acompletion
+
+                    from app.services.llm_router_service import LLMRouterService
+                    from app.services.token_tracking_service import _turn_accumulator
+
+                    _turn_accumulator.set(None)
+
+                    prompt = TITLE_GENERATION_PROMPT.replace(
+                        "{user_query}", user_query[:500]
                    )
-                    if title_result and hasattr(title_result, "content"):
-                        raw_title = title_result.content.strip()
+                    messages = [{"role": "user", "content": prompt}]
+
+                    if getattr(llm, "model", None) == "auto":
+                        router = LLMRouterService.get_router()
+                        response = await router.acompletion(
+                            model="auto", messages=messages
+                        )
+                    else:
+                        response = await acompletion(
+                            model=llm.model,
+                            messages=messages,
+                            api_key=getattr(llm, "api_key", None),
+                            api_base=getattr(llm, "api_base", None),
+                        )
+
+                    usage_info = None
+                    usage = getattr(response, "usage", None)
+                    if usage:
+                        raw_model = getattr(llm, "model", "") or ""
+                        model_name = (
+                            raw_model.split("/", 1)[-1]
+                            if "/" in raw_model
+                            else (raw_model or response.model or "unknown")
+                        )
+                        usage_info = {
+                            "model": model_name,
+                            "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0,
+                            "completion_tokens": getattr(usage, "completion_tokens", 0)
+                            or 0,
+                            "total_tokens": getattr(usage, "total_tokens", 0) or 0,
+                        }
+
+                    raw_title = response.choices[0].message.content.strip()
                    if raw_title and len(raw_title) <= 100:
-                            return raw_title.strip("\"'")
+                        return raw_title.strip("\"'"), usage_info
+                    return None, usage_info
                except Exception:
-                    pass
-                return None
+                    logging.getLogger(__name__).exception(
+                        "[TitleGen] _generate_title failed"
+                    )
+                    return None, None

            title_task = asyncio.create_task(_generate_title())

@ -1503,7 +1556,9 @@ async def stream_new_chat(

            # Inject title update mid-stream as soon as the background task finishes
            if title_task is not None and title_task.done() and not title_emitted:
-                generated_title = title_task.result()
+                generated_title, title_usage = title_task.result()
+                if title_usage:
+                    accumulator.add(**title_usage)
                if generated_title:
                    async with shielded_async_session() as title_session:
                        title_thread_result = await title_session.execute(
@ -1528,6 +1583,26 @@ async def stream_new_chat(
        if stream_result.is_interrupted:
            if title_task is not None and not title_task.done():
                title_task.cancel()
+
+            usage_summary = accumulator.per_message_summary()
+            _perf_log.info(
+                "[token_usage] interrupted new_chat: calls=%d total=%d summary=%s",
+                len(accumulator.calls),
+                accumulator.grand_total,
+                usage_summary,
+            )
+            if usage_summary:
+                yield streaming_service.format_data(
+                    "token-usage",
+                    {
+                        "usage": usage_summary,
+                        "prompt_tokens": accumulator.total_prompt_tokens,
+                        "completion_tokens": accumulator.total_completion_tokens,
+                        "total_tokens": accumulator.grand_total,
+                        "call_details": accumulator.serialized_calls(),
+                    },
+                )
+
            yield streaming_service.format_finish_step()
            yield streaming_service.format_finish()
            yield streaming_service.format_done()
@ -1535,7 +1610,9 @@ async def stream_new_chat(

        # If the title task didn't finish during streaming, await it now
        if title_task is not None and not title_emitted:
-            generated_title = await title_task
+            generated_title, title_usage = await title_task
+            if title_usage:
+                accumulator.add(**title_usage)
            if generated_title:
                async with shielded_async_session() as title_session:
                    title_thread_result = await title_session.execute(
@ -1549,6 +1626,25 @@ async def stream_new_chat(
                    chat_id, generated_title
                )

+        usage_summary = accumulator.per_message_summary()
+        _perf_log.info(
+            "[token_usage] normal new_chat: calls=%d total=%d summary=%s",
+            len(accumulator.calls),
+            accumulator.grand_total,
+            usage_summary,
+        )
+        if usage_summary:
+            yield streaming_service.format_data(
+                "token-usage",
+                {
+                    "usage": usage_summary,
+                    "prompt_tokens": accumulator.total_prompt_tokens,
+                    "completion_tokens": accumulator.total_completion_tokens,
+                    "total_tokens": accumulator.grand_total,
+                    "call_details": accumulator.serialized_calls(),
+                },
+            )
+
        # Fire background memory extraction if the agent didn't handle it.
        # Shared threads write to team memory; private threads write to user memory.
        if not stream_result.agent_called_update_memory:
@ -1666,6 +1762,10 @@ async def stream_resume_chat(
    stream_result = StreamResult()
    _t_total = time.perf_counter()

+    from app.services.token_tracking_service import start_turn
+
+    accumulator = start_turn()
+
    session = async_session_maker()
    try:
        if user_id:
@ -1789,11 +1889,49 @@ async def stream_resume_chat(
            chat_id,
        )
        if stream_result.is_interrupted:
+            usage_summary = accumulator.per_message_summary()
+            _perf_log.info(
+                "[token_usage] interrupted resume_chat: calls=%d total=%d summary=%s",
+                len(accumulator.calls),
+                accumulator.grand_total,
+                usage_summary,
+            )
+            if usage_summary:
+                yield streaming_service.format_data(
+                    "token-usage",
+                    {
+                        "usage": usage_summary,
+                        "prompt_tokens": accumulator.total_prompt_tokens,
+                        "completion_tokens": accumulator.total_completion_tokens,
+                        "total_tokens": accumulator.grand_total,
+                        "call_details": accumulator.serialized_calls(),
+                    },
+                )
+
            yield streaming_service.format_finish_step()
            yield streaming_service.format_finish()
            yield streaming_service.format_done()
            return

+        usage_summary = accumulator.per_message_summary()
+        _perf_log.info(
+            "[token_usage] normal resume_chat: calls=%d total=%d summary=%s",
+            len(accumulator.calls),
+            accumulator.grand_total,
+            usage_summary,
+        )
+        if usage_summary:
+            yield streaming_service.format_data(
+                "token-usage",
+                {
+                    "usage": usage_summary,
+                    "prompt_tokens": accumulator.total_prompt_tokens,
+                    "completion_tokens": accumulator.total_completion_tokens,
+                    "total_tokens": accumulator.grand_total,
+                    "call_details": accumulator.serialized_calls(),
+                },
+            )
+
        yield streaming_service.format_finish_step()
        yield streaming_service.format_finish()
        yield streaming_service.format_done()
--- a/surfsense_web/app/(home)/login/LocalLoginForm.tsx
+++ b/surfsense_web/app/(home)/login/LocalLoginForm.tsx
@ -174,7 +174,7 @@ export function LocalLoginForm() {
 					<label htmlFor="password" className="block text-sm font-medium text-foreground">
 						{t("password")}
 					</label>
-					<div className="relative">
+				<div className="relative mt-1">
 					<input
 						id="password"
 						type={showPassword ? "text" : "password"}
@ -183,7 +183,7 @@ export function LocalLoginForm() {
 						placeholder="Enter your password"
 						value={password}
 						onChange={(e) => setPassword(e.target.value)}
-							className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${
+						className={`block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${
 							error.title
 								? "border-destructive focus:border-destructive focus:ring-destructive/40"
 								: "border-border focus:border-primary focus:ring-primary/40"
@ -193,7 +193,7 @@ export function LocalLoginForm() {
 					<button
 						type="button"
 						onClick={() => setShowPassword((prev) => !prev)}
-							className="absolute inset-y-0 right-0 flex items-center pr-3 mt-1 text-muted-foreground hover:text-foreground"
+						className="absolute inset-y-0 right-0 flex items-center pr-3 text-muted-foreground hover:text-foreground"
 						aria-label={showPassword ? t("hide_password") : t("show_password")}
 					>
 						{showPassword ? <EyeOff className="h-4 w-4" /> : <Eye className="h-4 w-4" />}
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@ -42,6 +42,7 @@ import { useChatSessionStateSync } from "@/hooks/use-chat-session-state";
 import { useMessagesSync } from "@/hooks/use-messages-sync";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { getBearerToken } from "@/lib/auth-utils";
+import { createTokenUsageStore, TokenUsageProvider, type TokenUsageData } from "@/components/assistant-ui/token-usage-context";
 import { convertToThreadMessage } from "@/lib/chat/message-utils";
 import {
 	isPodcastGenerating,
@ -66,6 +67,8 @@ import {
 	getRegenerateUrl,
 	getThreadFull,
 	getThreadMessages,
+	type ThreadListItem,
+	type ThreadListResponse,
 	type ThreadRecord,
 } from "@/lib/chat/thread-persistence";
 import { NotFoundError } from "@/lib/error";
@ -195,6 +198,7 @@ export default function NewChatPage() {
 	const [currentThread, setCurrentThread] = useState<ThreadRecord | null>(null);
 	const [messages, setMessages] = useState<ThreadMessageLike[]>([]);
 	const [isRunning, setIsRunning] = useState(false);
+	const [tokenUsageStore] = useState(() => createTokenUsageStore());
 	const abortControllerRef = useRef<AbortController | null>(null);
 	const [pendingInterrupt, setPendingInterrupt] = useState<{
 		threadId: number;
@ -307,6 +311,7 @@ export default function NewChatPage() {
 		setThreadId(null);
 		setCurrentThread(null);
 		setMentionedDocuments([]);
+		tokenUsageStore.clear();
 		setSidebarDocuments([]);
 		setMessageDocumentsMap({});
 		clearPlanOwnerRegistry();
@ -330,6 +335,12 @@ export default function NewChatPage() {
 					const loadedMessages = messagesResponse.messages.map(convertToThreadMessage);
 					setMessages(loadedMessages);

+					for (const msg of messagesResponse.messages) {
+						if (msg.token_usage) {
+							tokenUsageStore.set(`msg-${msg.id}`, msg.token_usage as TokenUsageData);
+						}
+					}
+
 					const restoredDocsMap: Record<string, MentionedDocumentInfo[]> = {};
 					for (const msg of messagesResponse.messages) {
 						if (msg.role === "user") {
@ -374,6 +385,7 @@ export default function NewChatPage() {
 		closeEditorPanel,
 		removeChatTab,
 		searchSpaceId,
+		tokenUsageStore,
 	]);

 	// Initialize on mount, and re-init when switching search spaces (even if urlChatId is the same)
@ -624,6 +636,7 @@ export default function NewChatPage() {
 			};
 			const { contentParts, toolCallIndices } = contentPartsState;
 			let wasInterrupted = false;
+			let tokenUsageData: Record<string, unknown> | null = null;

 			// Add placeholder assistant message
 			setMessages((prev) => [
@ -759,9 +772,21 @@ export default function NewChatPage() {
 							if (titleData?.title && titleData?.threadId === currentThreadId) {
 								setCurrentThread((prev) => (prev ? { ...prev, title: titleData.title } : prev));
 								updateChatTabTitle({ chatId: currentThreadId, title: titleData.title });
-								queryClient.invalidateQueries({
-									queryKey: ["threads", String(searchSpaceId)],
-								});
+								queryClient.setQueriesData<ThreadListResponse>(
+									{ queryKey: ["threads", String(searchSpaceId)] },
+									(old) => {
+										if (!old) return old;
+										const updateTitle = (list: ThreadListItem[]) =>
+											list.map((t) =>
+												t.id === titleData.threadId ? { ...t, title: titleData.title } : t
+											);
+										return {
+											...old,
+											threads: updateTitle(old.threads),
+											archived_threads: updateTitle(old.archived_threads),
+										};
+									}
+								);
 							}
 							break;
 						}
@ -821,6 +846,11 @@ export default function NewChatPage() {
 							break;
 						}

+						case "data-token-usage":
+							tokenUsageData = parsed.data;
+							tokenUsageStore.set(assistantMsgId, parsed.data as TokenUsageData);
+							break;
+
 						case "error":
 							throw new Error(parsed.errorText || "Server error");
 					}
@ -835,12 +865,14 @@ export default function NewChatPage() {
 						const savedMessage = await appendMessage(currentThreadId, {
 							role: "assistant",
 							content: finalContent,
+							token_usage: tokenUsageData ?? undefined,
 						});

 						// Update message ID from temporary to database ID so comments work immediately
 						const newMsgId = `msg-${savedMessage.id}`;
+						tokenUsageStore.rename(assistantMsgId, newMsgId);
 						setMessages((prev) =>
-							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m))
+							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)),
 						);

 						// Update pending interrupt with the new persisted message ID
@ -930,6 +962,7 @@ export default function NewChatPage() {
 			currentUser,
 			disabledTools,
 			updateChatTabTitle,
+			tokenUsageStore,
 		]
 	);

@ -965,6 +998,7 @@ export default function NewChatPage() {
 				toolCallIndices: new Map(),
 			};
 			const { contentParts, toolCallIndices } = contentPartsState;
+			let tokenUsageData: Record<string, unknown> | null = null;

 			const existingMsg = messages.find((m) => m.id === assistantMsgId);
 			if (existingMsg && Array.isArray(existingMsg.content)) {
@ -1149,6 +1183,11 @@ export default function NewChatPage() {
 							break;
 						}

+						case "data-token-usage":
+							tokenUsageData = parsed.data;
+							tokenUsageStore.set(assistantMsgId, parsed.data as TokenUsageData);
+							break;
+
 						case "error":
 							throw new Error(parsed.errorText || "Server error");
 					}
@ -1162,10 +1201,12 @@ export default function NewChatPage() {
 						const savedMessage = await appendMessage(resumeThreadId, {
 							role: "assistant",
 							content: finalContent,
+							token_usage: tokenUsageData ?? undefined,
 						});
 						const newMsgId = `msg-${savedMessage.id}`;
+						tokenUsageStore.rename(assistantMsgId, newMsgId);
 						setMessages((prev) =>
-							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m))
+							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)),
 						);
 					} catch (err) {
 						console.error("Failed to persist resumed assistant message:", err);
@ -1183,7 +1224,7 @@ export default function NewChatPage() {
 				abortControllerRef.current = null;
 			}
 		},
-		[pendingInterrupt, messages, searchSpaceId]
+		[pendingInterrupt, messages, searchSpaceId, tokenUsageStore]
 	);

 	useEffect(() => {
@ -1319,6 +1360,7 @@ export default function NewChatPage() {
 			};
 			const { contentParts, toolCallIndices } = contentPartsState;
 			const batcher = new FrameBatchedUpdater();
+			let tokenUsageData: Record<string, unknown> | null = null;

 			// Add placeholder messages to UI
 			// Always add back the user message (with new query for edit, or original content for reload)
@ -1428,6 +1470,11 @@ export default function NewChatPage() {
 							break;
 						}

+						case "data-token-usage":
+							tokenUsageData = parsed.data;
+							tokenUsageStore.set(assistantMsgId, parsed.data as TokenUsageData);
+							break;
+
 						case "error":
 							throw new Error(parsed.errorText || "Server error");
 					}
@ -1459,12 +1506,13 @@ export default function NewChatPage() {
 						const savedMessage = await appendMessage(threadId, {
 							role: "assistant",
 							content: finalContent,
+							token_usage: tokenUsageData ?? undefined,
 						});

-						// Update assistant message ID to database ID
 						const newMsgId = `msg-${savedMessage.id}`;
+						tokenUsageStore.rename(assistantMsgId, newMsgId);
 						setMessages((prev) =>
-							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m))
+							prev.map((m) => (m.id === assistantMsgId ? { ...m, id: newMsgId } : m)),
 						);

 						trackChatResponseReceived(searchSpaceId, threadId);
@ -1499,7 +1547,7 @@ export default function NewChatPage() {
 				abortControllerRef.current = null;
 			}
 		},
-		[threadId, searchSpaceId, messages, disabledTools]
+		[threadId, searchSpaceId, messages, disabledTools, tokenUsageStore]
 	);

 	// Handle editing a message - truncates history and regenerates with new query
@ -1568,6 +1616,7 @@ export default function NewChatPage() {
 	}

 	return (
+		<TokenUsageProvider store={tokenUsageStore}>
 		<AssistantRuntimeProvider runtime={runtime}>
 			<ThinkingStepsDataUI />
 			<div key={searchSpaceId} className="flex h-full overflow-hidden">
@ -1579,5 +1628,6 @@ export default function NewChatPage() {
 				<MobileHitlEditPanel />
 			</div>
 		</AssistantRuntimeProvider>
+		</TokenUsageProvider>
 	);
 }
--- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent.tsx
@ -27,10 +27,9 @@ export function ApiKeyContent() {

 	return (
 		<div className="space-y-6 min-w-0 overflow-hidden">
-			<Alert className="border-border/60 bg-muted/30 text-muted-foreground">
-				<Info className="h-4 w-4 text-muted-foreground" />
-				<AlertTitle className="text-muted-foreground">{t("api_key_warning_title")}</AlertTitle>
-				<AlertDescription className="text-muted-foreground/60">
+			<Alert className="bg-muted/50 py-3 md:py-4">
+				<Info className="h-3 w-3 md:h-4 md:w-4 shrink-0" />
+				<AlertDescription className="text-xs md:text-sm">
 					{t("api_key_warning_description")}
 				</AlertDescription>
 			</Alert>
--- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ProfileContent.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/ProfileContent.tsx
@ -113,10 +113,10 @@ export function ProfileContent() {
 							type="submit"
 							variant="outline"
 							disabled={isPending || !hasChanges}
-							className="gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200"
+							className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200"
 						>
-							{isPending && <Spinner size="sm" className="mr-2" />}
-							{t("profile_save")}
+							<span className={isPending ? "opacity-0" : ""}>{t("profile_save")}</span>
+							{isPending && <Spinner size="sm" className="absolute" />}
 						</Button>
 					</div>
 				</form>
--- a/surfsense_web/components/assistant-ui/assistant-message.tsx
+++ b/surfsense_web/components/assistant-ui/assistant-message.tsx
@ -1,4 +1,5 @@
 import {
+	ActionBarMorePrimitive,
 	ActionBarPrimitive,
 	AuiIf,
 	ErrorPrimitive,
@ -15,12 +16,17 @@ import {
 	ExternalLink,
 	Globe,
 	MessageSquare,
+	MoreHorizontalIcon,
 	RefreshCwIcon,
 } from "lucide-react";
 import dynamic from "next/dynamic";
 import type { FC } from "react";
 import { useEffect, useMemo, useRef, useState } from "react";
 import { commentsEnabledAtom, targetCommentIdAtom } from "@/atoms/chat/current-thread.atom";
+import {
+	globalNewLLMConfigsAtom,
+	newLLMConfigsAtom,
+} from "@/atoms/new-llm-config/new-llm-config-query.atoms";
 import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
 import {
 	CitationMetadataProvider,
@ -39,9 +45,13 @@ import {
 	DrawerHeader,
 	DrawerTitle,
 } from "@/components/ui/drawer";
+import { DropdownMenuLabel } from "@/components/ui/dropdown-menu";
+import { Button } from "@/components/ui/button";
 import { useComments } from "@/hooks/use-comments";
 import { useMediaQuery } from "@/hooks/use-media-query";
 import { useElectronAPI } from "@/hooks/use-platform";
+import { useTokenUsage } from "@/components/assistant-ui/token-usage-context";
+import { getProviderIcon } from "@/lib/provider-icons";
 import { cn } from "@/lib/utils";
 import { openSafeNavigationHref, resolveSafeNavigationHref } from "@/components/tool-ui/shared/media";

@ -367,6 +377,94 @@ export const MessageError: FC = () => {
 	);
 };

+function formatMessageDate(date: Date): string {
+	return date.toLocaleDateString(undefined, {
+		month: "short",
+		day: "numeric",
+		hour: "numeric",
+		minute: "2-digit",
+		hour12: true,
+	});
+}
+
+const MessageInfoDropdown: FC = () => {
+	const messageId = useAuiState(({ message }) => message?.id);
+	const createdAt = useAuiState(({ message }) => message?.createdAt);
+	const usage = useTokenUsage(messageId);
+
+	const { data: localConfigs } = useAtomValue(newLLMConfigsAtom);
+	const { data: globalConfigs } = useAtomValue(globalNewLLMConfigsAtom);
+
+	const configByModel = useMemo(() => {
+		const map = new Map<string, { name: string; provider: string }>();
+		for (const c of [...(globalConfigs ?? []), ...(localConfigs ?? [])]) {
+			map.set(c.model_name, { name: c.name, provider: c.provider });
+		}
+		return map;
+	}, [localConfigs, globalConfigs]);
+
+	const resolveModel = (modelKey: string) => {
+		const parts = modelKey.split("/");
+		const bare = parts[parts.length - 1] ?? modelKey;
+		const config = configByModel.get(modelKey) ?? configByModel.get(bare);
+		return config
+			? { name: config.name, icon: getProviderIcon(config.provider, { className: "size-3.5" }) }
+			: { name: modelKey, icon: null };
+	};
+
+	const modelBreakdown = usage ? (usage.usage ?? usage.model_breakdown) : undefined;
+	const models = modelBreakdown ? Object.entries(modelBreakdown) : [];
+	const hasUsage = usage && usage.total_tokens > 0;
+
+	return (
+		<ActionBarMorePrimitive.Root>
+			<ActionBarMorePrimitive.Trigger asChild>
+				<Button variant="ghost" size="icon" className="aui-button-icon size-6 p-1">
+					<MoreHorizontalIcon className="size-4" />
+					<span className="sr-only">More</span>
+				</Button>
+			</ActionBarMorePrimitive.Trigger>
+			<ActionBarMorePrimitive.Content
+				align="start"
+				className="bg-muted text-popover-foreground z-50 max-h-(--radix-dropdown-menu-content-available-height) min-w-[180px] origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border dark:border-neutral-700 p-1 shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2"
+			>
+				{createdAt && (
+					<DropdownMenuLabel className="text-xs text-muted-foreground font-normal select-none">
+						{formatMessageDate(createdAt)}
+					</DropdownMenuLabel>
+				)}
+				{hasUsage && (
+					<>
+						<ActionBarMorePrimitive.Separator className="bg-border mx-2 my-1 h-px" />
+						{models.length > 0 ? (
+							models.map(([model, counts]) => {
+								const { name, icon } = resolveModel(model);
+								return (
+									<ActionBarMorePrimitive.Item key={model} className="focus:bg-neutral-200 dark:focus:bg-neutral-700 relative flex cursor-default flex-col items-start gap-0.5 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none" onSelect={(e) => e.preventDefault()}>
+										<span className="flex items-center gap-1.5 text-xs font-medium">
+											{icon}
+											{name}
+										</span>
+										<span className="text-xs text-muted-foreground">
+											{counts.total_tokens.toLocaleString()} tokens
+										</span>
+									</ActionBarMorePrimitive.Item>
+								);
+							})
+						) : (
+							<ActionBarMorePrimitive.Item className="focus:bg-neutral-200 dark:focus:bg-neutral-700 relative flex cursor-default flex-col items-start gap-0.5 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none" onSelect={(e) => e.preventDefault()}>
+								<span className="text-xs text-muted-foreground">
+									{usage.total_tokens.toLocaleString()} tokens
+								</span>
+							</ActionBarMorePrimitive.Item>
+						)}
+					</>
+				)}
+			</ActionBarMorePrimitive.Content>
+		</ActionBarMorePrimitive.Root>
+	);
+};
+
 const AssistantMessageInner: FC = () => {
 	const isMobile = !useMediaQuery("(min-width: 768px)");

@ -429,7 +527,7 @@ const AssistantMessageInner: FC = () => {
 				</div>
 			)}

-			<div className="aui-assistant-message-footer mt-1 mb-5 ml-2 flex">
+			<div className="aui-assistant-message-footer mt-1 mb-5 ml-2 flex items-center gap-2">
 				<AssistantActionBar />
 			</div>
 		</CitationMetadataProvider>
@ -626,6 +724,7 @@ const AssistantActionBar: FC = () => {
 					<ClipboardPaste />
 				</TooltipIconButton>
 			)}
+			<MessageInfoDropdown />
 		</ActionBarPrimitive.Root>
 	);
 };
--- a/surfsense_web/components/assistant-ui/token-usage-context.tsx
+++ b/surfsense_web/components/assistant-ui/token-usage-context.tsx
@ -0,0 +1,79 @@
+"use client";
+
+import { createContext, useContext, useCallback, useSyncExternalStore, type FC, type ReactNode } from "react";
+
+export interface TokenUsageData {
+	prompt_tokens: number;
+	completion_tokens: number;
+	total_tokens: number;
+	usage?: Record<string, { prompt_tokens: number; completion_tokens: number; total_tokens: number }>;
+	model_breakdown?: Record<string, { prompt_tokens: number; completion_tokens: number; total_tokens: number }>;
+}
+
+type Listener = () => void;
+
+class TokenUsageStore {
+	private data = new Map<string, TokenUsageData>();
+	private listeners = new Set<Listener>();
+
+	get(messageId: string): TokenUsageData | undefined {
+		return this.data.get(messageId);
+	}
+
+	set(messageId: string, usage: TokenUsageData): void {
+		this.data.set(messageId, usage);
+		this.notify();
+	}
+
+	rename(oldId: string, newId: string): void {
+		const usage = this.data.get(oldId);
+		if (usage) {
+			this.data.delete(oldId);
+			this.data.set(newId, usage);
+			this.notify();
+		}
+	}
+
+	clear(): void {
+		this.data.clear();
+		this.notify();
+	}
+
+	subscribe = (listener: Listener): (() => void) => {
+		this.listeners.add(listener);
+		return () => this.listeners.delete(listener);
+	};
+
+	private notify(): void {
+		for (const l of this.listeners) l();
+	}
+}
+
+const TokenUsageContext = createContext<TokenUsageStore | null>(null);
+
+export const TokenUsageProvider: FC<{ store: TokenUsageStore; children: ReactNode }> = ({ store, children }) => (
+	<TokenUsageContext.Provider value={store}>{children}</TokenUsageContext.Provider>
+);
+
+export function useTokenUsageStore(): TokenUsageStore {
+	const store = useContext(TokenUsageContext);
+	if (!store) throw new Error("useTokenUsageStore must be used within TokenUsageProvider");
+	return store;
+}
+
+export function useTokenUsage(messageId: string | undefined): TokenUsageData | undefined {
+	const store = useContext(TokenUsageContext);
+	const getSnapshot = useCallback(
+		() => (store && messageId ? store.get(messageId) : undefined),
+		[store, messageId],
+	);
+	const subscribe = useCallback(
+		(onStoreChange: () => void) => (store ? store.subscribe(onStoreChange) : () => {}),
+		[store],
+	);
+	return useSyncExternalStore(subscribe, getSnapshot, getSnapshot);
+}
+
+export function createTokenUsageStore(): TokenUsageStore {
+	return new TokenUsageStore();
+}
--- a/surfsense_web/components/documents/DocumentsFilters.tsx
+++ b/surfsense_web/components/documents/DocumentsFilters.tsx
@ -2,7 +2,6 @@

 import { IconBinaryTree, IconBinaryTreeFilled } from "@tabler/icons-react";
 import { FolderPlus, ListFilter, Search, Upload, X } from "lucide-react";
-import { AnimatePresence, motion } from "motion/react";
 import { useTranslations } from "next-intl";
 import React, { useCallback, useMemo, useRef, useState } from "react";
 import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup";
@ -10,6 +9,7 @@ import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
 import { Input } from "@/components/ui/input";
 import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
+import { Spinner } from "@/components/ui/spinner";
 import { ToggleGroup, ToggleGroupItem } from "@/components/ui/toggle-group";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import { cn } from "@/lib/utils";
@ -74,14 +74,14 @@ export function DocumentsFilters({
 	return (
 		<div className="flex select-none">
 			<div className="flex items-center gap-2 w-full">
-				{/* New Folder + Filter Toggle Group */}
+				{/* New Folder + AI Sort + Filter Toggle Group */}
 				<ToggleGroup type="multiple" variant="outline" value={[]} className="overflow-visible">
 					{onCreateFolder && (
 						<Tooltip>
 							<TooltipTrigger asChild>
 								<ToggleGroupItem
 									value="folder"
-									className="h-9 w-9 shrink-0 border-sidebar-border text-sidebar-foreground/60 hover:text-sidebar-foreground hover:border-sidebar-border bg-sidebar"
+								className="h-9 w-9 shrink-0 border-sidebar-border text-muted-foreground hover:text-foreground hover:border-sidebar-border bg-sidebar"
 								onClick={(e) => {
 									e.preventDefault();
 									onCreateFolder();
@ -94,13 +94,52 @@ export function DocumentsFilters({
 					</Tooltip>
 				)}

+					{onToggleAiSort && (
+						<Tooltip>
+							<TooltipTrigger asChild>
+								<ToggleGroupItem
+									value="ai-sort"
+									disabled={aiSortBusy}
+									className={cn(
+										"h-9 w-9 shrink-0 border-sidebar-border bg-sidebar",
+										"disabled:pointer-events-none disabled:opacity-50",
+										aiSortEnabled
+											? "bg-accent text-accent-foreground"
+											: "text-muted-foreground hover:text-foreground hover:border-sidebar-border"
+									)}
+									onClick={(e) => {
+										e.preventDefault();
+										onToggleAiSort();
+									}}
+									aria-label={aiSortEnabled ? "Disable AI sort" : "Enable AI sort"}
+									aria-pressed={aiSortEnabled}
+								>
+								{aiSortBusy ? (
+									<Spinner size="xs" />
+								) : aiSortEnabled ? (
+										<IconBinaryTreeFilled size={16} />
+									) : (
+										<IconBinaryTree size={16} />
+									)}
+								</ToggleGroupItem>
+							</TooltipTrigger>
+							<TooltipContent>
+								{aiSortBusy
+									? "AI sort in progress..."
+									: aiSortEnabled
+										? "AI sort active — click to disable"
+										: "Enable AI sort"}
+							</TooltipContent>
+						</Tooltip>
+					)}
+
 					<Popover>
 						<Tooltip>
 							<TooltipTrigger asChild>
 								<PopoverTrigger asChild>
 									<ToggleGroupItem
 										value="filter"
-										className="relative h-9 w-9 shrink-0 border-sidebar-border text-sidebar-foreground/60 hover:text-sidebar-foreground hover:border-sidebar-border bg-sidebar overflow-visible"
+										className="relative h-9 w-9 shrink-0 border-sidebar-border text-muted-foreground hover:text-foreground hover:border-sidebar-border bg-sidebar overflow-visible"
 									>
 										<ListFilter size={14} />
 										{activeTypes.length > 0 && (
@ -182,70 +221,6 @@ export function DocumentsFilters({
 					</Popover>
 				</ToggleGroup>

-				{/* AI Sort Toggle */}
-				{onToggleAiSort && (
-					<Tooltip>
-						<TooltipTrigger asChild>
-							<button
-								type="button"
-								disabled={aiSortBusy}
-								onClick={onToggleAiSort}
-								className={cn(
-									"relative h-9 w-9 shrink-0 rounded-md border inline-flex items-center justify-center transition-all duration-300 ease-out",
-									"focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50 outline-none",
-									"disabled:pointer-events-none disabled:opacity-50",
-									aiSortEnabled
-										? "border-violet-400/60 bg-violet-50 text-violet-600 shadow-[0_0_8px_-1px_rgba(139,92,246,0.3)] hover:bg-violet-100 dark:border-violet-500/40 dark:bg-violet-500/15 dark:text-violet-400 dark:shadow-[0_0_8px_-1px_rgba(139,92,246,0.2)] dark:hover:bg-violet-500/25"
-										: "border-sidebar-border bg-sidebar text-sidebar-foreground/60 hover:text-sidebar-foreground hover:border-sidebar-border hover:bg-accent"
-								)}
-								aria-label={aiSortEnabled ? "Disable AI sort" : "Enable AI sort"}
-								aria-pressed={aiSortEnabled}
-							>
-								<AnimatePresence mode="wait" initial={false}>
-									{aiSortBusy ? (
-										<motion.div
-											key="busy"
-											initial={{ opacity: 0, scale: 0.6, rotate: -90 }}
-											animate={{ opacity: 1, scale: 1, rotate: 0 }}
-											exit={{ opacity: 0, scale: 0.6, rotate: 90 }}
-											transition={{ duration: 0.2, ease: "easeInOut" }}
-										>
-											<IconBinaryTree size={16} className="animate-pulse" />
-										</motion.div>
-									) : aiSortEnabled ? (
-										<motion.div
-											key="on"
-											initial={{ opacity: 0, scale: 0.6, rotate: -90 }}
-											animate={{ opacity: 1, scale: 1, rotate: 0 }}
-											exit={{ opacity: 0, scale: 0.6, rotate: 90 }}
-											transition={{ duration: 0.25, ease: "easeInOut" }}
-										>
-											<IconBinaryTreeFilled size={16} />
-										</motion.div>
-									) : (
-										<motion.div
-											key="off"
-											initial={{ opacity: 0, scale: 0.6, rotate: 90 }}
-											animate={{ opacity: 1, scale: 1, rotate: 0 }}
-											exit={{ opacity: 0, scale: 0.6, rotate: -90 }}
-											transition={{ duration: 0.25, ease: "easeInOut" }}
-										>
-											<IconBinaryTree size={16} />
-										</motion.div>
-									)}
-								</AnimatePresence>
-							</button>
-						</TooltipTrigger>
-						<TooltipContent>
-							{aiSortBusy
-								? "AI sort in progress..."
-								: aiSortEnabled
-									? "AI sort active — click to disable"
-									: "Enable AI sort"}
-						</TooltipContent>
-					</Tooltip>
-				)}
-
 				{/* Search Input */}
 				<div className="relative flex-1 min-w-0">
 					<div className="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3 text-muted-foreground">
--- a/surfsense_web/components/icons/providers/github.svg
+++ b/surfsense_web/components/icons/providers/github.svg
@ -0,0 +1 @@
+<svg fill="currentColor" fill-rule="evenodd" height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Github</title><path d="M12 0c6.63 0 12 5.276 12 11.79-.001 5.067-3.29 9.567-8.175 11.187-.6.118-.825-.25-.825-.56 0-.398.015-1.665.015-3.242 0-1.105-.375-1.813-.81-2.181 2.67-.295 5.475-1.297 5.475-5.822 0-1.297-.465-2.344-1.23-3.169.12-.295.54-1.503-.12-3.125 0 0-1.005-.324-3.3 1.209a11.32 11.32 0 00-3-.398c-1.02 0-2.04.133-3 .398-2.295-1.518-3.3-1.209-3.3-1.209-.66 1.622-.24 2.83-.12 3.125-.765.825-1.23 1.887-1.23 3.169 0 4.51 2.79 5.527 5.46 5.822-.345.294-.66.81-.765 1.577-.69.31-2.415.81-3.495-.973-.225-.354-.9-1.223-1.845-1.209-1.005.015-.405.56.015.781.51.28 1.095 1.327 1.23 1.666.24.663 1.02 1.93 4.035 1.385 0 .988.015 1.916.015 2.196 0 .31-.225.664-.825.56C3.303 21.374-.003 16.867 0 11.791 0 5.276 5.37 0 12 0z"></path></svg>
--- a/surfsense_web/components/icons/providers/index.ts
+++ b/surfsense_web/components/icons/providers/index.ts
@ -10,6 +10,7 @@ export { default as DeepInfraIcon } from "./deepinfra.svg";
 export { default as DeepSeekIcon } from "./deepseek.svg";
 export { default as FireworksAiIcon } from "./fireworksai.svg";
 export { default as GeminiIcon } from "./gemini.svg";
+export { default as GitHubModelsIcon } from "./github.svg";
 export { default as GroqIcon } from "./groq.svg";
 export { default as HuggingFaceIcon } from "./huggingface.svg";
 export { default as MiniMaxIcon } from "./minimax.svg";
--- a/surfsense_web/components/new-chat/model-selector.tsx
+++ b/surfsense_web/components/new-chat/model-selector.tsx
@ -6,9 +6,12 @@ import {
 	Bot,
 	Check,
 	ChevronDown,
+	ChevronLeft,
+	ChevronRight,
+	ChevronUp,
 	Edit3,
-	Eye,
 	ImageIcon,
+	ScanEye,
 	Layers,
 	Plus,
 	Search,
@ -69,6 +72,7 @@ const PROVIDER_NAMES: Record<string, string> = {
 	DEEPSEEK: "DeepSeek",
 	MISTRAL: "Mistral",
 	COHERE: "Cohere",
+	GITHUB_MODELS: "GitHub Models",
 	GROQ: "Groq",
 	OLLAMA: "Ollama",
 	TOGETHER_AI: "Together AI",
@ -274,17 +278,40 @@ export function ModelSelector({
 	const [searchQuery, setSearchQuery] = useState("");
 	const [selectedProvider, setSelectedProvider] = useState<string>("all");
 	const [focusedIndex, setFocusedIndex] = useState(-1);
-	const [showScrollIndicator, setShowScrollIndicator] = useState(true);
+	const [modelScrollPos, setModelScrollPos] = useState<"top" | "middle" | "bottom">("top");
+	const [sidebarScrollPos, setSidebarScrollPos] = useState<"top" | "middle" | "bottom">("top");
 	const providerSidebarRef = useRef<HTMLDivElement>(null);
 	const modelListRef = useRef<HTMLDivElement>(null);
 	const searchInputRef = useRef<HTMLInputElement>(null);
 	const isMobile = useIsMobile();

+	const handleModelListScroll = useCallback((e: React.UIEvent<HTMLDivElement>) => {
+		const el = e.currentTarget;
+		const atTop = el.scrollTop <= 2;
+		const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight <= 2;
+		setModelScrollPos(atTop ? "top" : atBottom ? "bottom" : "middle");
+	}, []);
+
+	const handleSidebarScroll = useCallback((e: React.UIEvent<HTMLDivElement>) => {
+		const el = e.currentTarget;
+		if (isMobile) {
+			const atStart = el.scrollLeft <= 2;
+			const atEnd = el.scrollWidth - el.scrollLeft - el.clientWidth <= 2;
+			setSidebarScrollPos(atStart ? "top" : atEnd ? "bottom" : "middle");
+		} else {
+			const atTop = el.scrollTop <= 2;
+			const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight <= 2;
+			setSidebarScrollPos(atTop ? "top" : atBottom ? "bottom" : "middle");
+		}
+	}, [isMobile]);
+
 	// Reset search + provider when tab changes
+	// biome-ignore lint/correctness/useExhaustiveDependencies: activeTab is intentionally used as a trigger
 	useEffect(() => {
 		setSelectedProvider("all");
 		setSearchQuery("");
 		setFocusedIndex(-1);
+		setModelScrollPos("top");
 	}, [activeTab]);

 	// Reset on open
@ -295,8 +322,9 @@ export function ModelSelector({
 		}
 	}, [open]);

-	// Cmd/Ctrl+M shortcut
+	// Cmd/Ctrl+M shortcut (desktop only)
 	useEffect(() => {
+		if (isMobile) return;
 		const handler = (e: KeyboardEvent) => {
 			if ((e.metaKey || e.ctrlKey) && e.key === "m") {
 				e.preventDefault();
@ -305,9 +333,10 @@ export function ModelSelector({
 		};
 		document.addEventListener("keydown", handler);
 		return () => document.removeEventListener("keydown", handler);
-	}, []);
+	}, [isMobile]);

 	// Focus search input on open
+	// biome-ignore lint/correctness/useExhaustiveDependencies: activeTab is intentionally used as a trigger to re-focus on tab switch
 	useEffect(() => {
 		if (open && !isMobile) {
 			requestAnimationFrame(() => searchInputRef.current?.focus());
@ -677,6 +706,7 @@ export function ModelSelector({
 	);

 	// ─── Keyboard navigation ───
+	// biome-ignore lint/correctness/useExhaustiveDependencies: searchQuery and selectedProvider are intentional triggers to reset focus
 	useEffect(() => {
 		setFocusedIndex(-1);
 	}, [searchQuery, selectedProvider]);
@ -767,24 +797,35 @@ export function ModelSelector({
 		return (
 			<div
 				className={cn(
-					"shrink-0 border-border/50 relative flex flex-col",
-					!isMobile && "w-10 border-r",
+					"shrink-0 border-border/50 flex",
+					isMobile ? "flex-row items-center border-b border-border/40" : "flex-col w-10 border-r",
 				)}
 			>
+				{!isMobile && sidebarScrollPos !== "top" && (
+					<div className="flex items-center justify-center py-0.5 pointer-events-none">
+						<ChevronUp className="size-3 text-muted-foreground" />
+					</div>
+				)}
+				{isMobile && sidebarScrollPos !== "top" && (
+					<div className="flex items-center justify-center px-0.5 shrink-0 pointer-events-none">
+						<ChevronLeft className="size-3 text-muted-foreground" />
+					</div>
+				)}
 				<div
 					ref={providerSidebarRef}
-					onScroll={(e) => {
-						const t = e.currentTarget;
-						setShowScrollIndicator(
-							t.scrollHeight - t.scrollTop >
-								t.clientHeight + 10,
-						);
-					}}
+					onScroll={handleSidebarScroll}
 					className={cn(
 						isMobile
-							? "flex flex-row gap-0.5 px-2 py-1.5 overflow-x-auto border-b border-border/40"
+							? "flex flex-row gap-0.5 px-1 py-1.5 overflow-x-auto [&::-webkit-scrollbar]:h-0 [&::-webkit-scrollbar-track]:bg-transparent"
 							: "flex flex-col gap-0.5 p-1 overflow-y-auto flex-1 [&::-webkit-scrollbar]:w-0 [&::-webkit-scrollbar-track]:bg-transparent",
 					)}
+					style={isMobile ? {
+						maskImage: `linear-gradient(to right, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 24px, black calc(100% - 24px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`,
+						WebkitMaskImage: `linear-gradient(to right, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 24px, black calc(100% - 24px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`,
+					} : {
+						maskImage: `linear-gradient(to bottom, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 32px, black calc(100% - 32px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`,
+						WebkitMaskImage: `linear-gradient(to bottom, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 32px, black calc(100% - 32px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`,
+					}}
 				>
 					{activeProviders.map((provider, idx) => {
 						const isAll = provider === "all";
@ -849,18 +890,23 @@ export function ModelSelector({
 												)}
 										{isConfigured
 											? ` (${count})`
-											: " — not configured"}
+											: " (not configured)"}
 									</TooltipContent>
 								</Tooltip>
 							</Fragment>
 						);
 					})}
 				</div>
-				{!isMobile && showScrollIndicator && (
-					<div className="absolute bottom-0 left-0 right-0 h-6 bg-gradient-to-t from-background to-transparent pointer-events-none flex items-end justify-center pb-0.5">
+				{!isMobile && sidebarScrollPos !== "bottom" && (
+					<div className="flex items-center justify-center py-0.5 pointer-events-none">
 						<ChevronDown className="size-3 text-muted-foreground" />
 					</div>
 				)}
+				{isMobile && sidebarScrollPos !== "bottom" && (
+					<div className="flex items-center justify-center px-0.5 shrink-0 pointer-events-none">
+						<ChevronRight className="size-3 text-muted-foreground" />
+					</div>
+				)}
 			</div>
 		);
 	};
@ -889,19 +935,26 @@ export function ModelSelector({
 				key={`${activeTab}-${item.isGlobal ? "g" : "u"}-${config.id}`}
 				data-model-index={index}
 				role="option"
+				tabIndex={isMobile ? -1 : 0}
 				aria-selected={isSelected}
 				onClick={() => handleSelectItem(item)}
+				onKeyDown={isMobile ? undefined : (e) => {
+					if (e.key === "Enter" || e.key === " ") {
+						e.preventDefault();
+						handleSelectItem(item);
+					}
+				}}
 				onMouseEnter={() => setFocusedIndex(index)}
 				className={cn(
-					"group flex items-start gap-2.5 px-2.5 py-2 rounded-lg cursor-pointer",
-					"transition-all duration-150 mx-1",
-					"hover:bg-accent/40 active:scale-[0.99]",
+					"group flex items-center gap-2.5 px-3 py-2 rounded-xl cursor-pointer",
+					"transition-all duration-150 mx-2",
+					"hover:bg-accent/40",
 					isSelected && "bg-primary/6 dark:bg-primary/8",
-					isFocused && "bg-accent/50 ring-1 ring-primary/20",
+					isFocused && "bg-accent/50",
 				)}
 			>
 				{/* Provider icon */}
-				<div className="shrink-0 mt-0.5">
+				<div className="shrink-0">
 					{getProviderIcon(config.provider as string, {
 						isAutoMode,
 						className: "size-5",
@ -917,7 +970,7 @@ export function ModelSelector({
 						{isAutoMode && (
 							<Badge
 								variant="secondary"
-								className="text-[9px] px-1 py-0 h-3.5 bg-violet-800 text-white dark:bg-violet-800 dark:text-white border-0"
+								className="text-[9px] px-1 py-0 h-3.5 bg-zinc-200 text-zinc-600 dark:bg-zinc-700 dark:text-zinc-300 border-0"
 							>
 								Recommended
 							</Badge>
@ -931,8 +984,8 @@ export function ModelSelector({
 						</span>
 						{!isAutoMode && hasCitations && (
 							<Badge
-								variant="outline"
-								className="text-[9px] px-1 py-0 h-3.5 bg-primary/10 text-primary border-primary/20"
+								variant="secondary"
+								className="text-[10px] px-1.5 py-0.5 border-0 text-muted-foreground bg-muted"
 							>
 								Citations
 							</Badge>
@ -981,7 +1034,7 @@ export function ModelSelector({
 					: "Add Vision Model";

 		return (
-			<div className="flex flex-col w-full">
+			<div className="flex flex-col w-full overflow-hidden">
 				{/* Tab header */}
 				<div className="border-b border-border/80 dark:border-neutral-800">
 					<div className="w-full grid grid-cols-3 h-11">
@ -999,7 +1052,7 @@ export function ModelSelector({
 								},
 								{
 									value: "vision" as const,
-									icon: Eye,
+									icon: ScanEye,
 									label: "Vision",
 								},
 							] as const
@ -1028,7 +1081,7 @@ export function ModelSelector({
 						"flex",
 						isMobile
 							? "flex-col h-[60vh]"
-							: "flex-row h-[420px]",
+							: "flex-row h-[380px]",
 					)}
 				>
 					{/* Provider sidebar */}
@ -1037,33 +1090,30 @@ export function ModelSelector({
 					{/* Main content */}
 					<div className="flex flex-col min-w-0 min-h-0 flex-1 overflow-hidden">
 						{/* Search */}
-						<div className="relative px-3 py-2">
-							<Search className="absolute left-5 top-1/2 -translate-y-1/2 size-3.5 text-muted-foreground/50 pointer-events-none" />
+						<div className="relative">
+							<Search className="absolute left-3 top-1/2 -translate-y-1/2 size-3.5 text-muted-foreground/100 pointer-events-none" />
 							<input
 								ref={searchInputRef}
-								placeholder="Search models..."
+								placeholder="Search models"
 								value={searchQuery}
 								onChange={(e) =>
 									setSearchQuery(e.target.value)
 								}
-								onKeyDown={handleKeyDown}
-								autoFocus={!isMobile}
+								onKeyDown={isMobile ? undefined : handleKeyDown}
 								role="combobox"
 								aria-expanded={true}
 								aria-controls="model-selector-list"
 								className={cn(
-									"w-full pl-8 pr-3 py-1.5 text-xs rounded-lg",
-									"bg-secondary/30 border border-border/40",
-									"focus:outline-none focus:ring-2 focus:ring-primary/20 focus:border-primary/40",
-									"placeholder:text-muted-foreground/50",
-									"transition-[box-shadow,border-color] duration-200",
+									"w-full pl-8 pr-3 py-2.5 text-sm bg-transparent",
+									"focus:outline-none",
+									"placeholder:text-muted-foreground",
 								)}
 							/>
 						</div>

 						{/* Provider header when filtered */}
 						{selectedProvider !== "all" && (
-							<div className="flex items-center gap-2 px-3 py-1.5 border-b border-border/40">
+							<div className="flex items-center gap-2 px-3 py-1.5">
 								{getProviderIcon(selectedProvider, {
 									className: "size-4",
 								})}
@ -1085,10 +1135,15 @@ export function ModelSelector({
 							id="model-selector-list"
 							ref={modelListRef}
 							role="listbox"
-							className="overflow-y-auto flex-1 py-1"
+							className="overflow-y-auto flex-1 py-1 space-y-1 flex flex-col"
+							onScroll={handleModelListScroll}
+							style={{
+								maskImage: `linear-gradient(to bottom, ${modelScrollPos === "top" ? "black" : "transparent"}, black 16px, black calc(100% - 16px), ${modelScrollPos === "bottom" ? "black" : "transparent"})`,
+								WebkitMaskImage: `linear-gradient(to bottom, ${modelScrollPos === "top" ? "black" : "transparent"}, black 16px, black calc(100% - 16px), ${modelScrollPos === "bottom" ? "black" : "transparent"})`,
+							}}
 						>
 							{currentDisplayItems.length === 0 ? (
-								<div className="py-8 flex flex-col items-center gap-3 px-4">
+								<div className="flex-1 flex flex-col items-center justify-center gap-3 px-4">
 									{selectedProvider !== "all" &&
 									!configuredProviderSet.has(
 										selectedProvider,
@ -1116,22 +1171,21 @@ export function ModelSelector({
 											</p>
 											{addHandler && (
 												<Button
-													variant="outline"
+													variant="secondary"
 													size="sm"
-													className="mt-1 gap-2"
+													className="mt-1"
 													onClick={() => {
 														setOpen(false);
 														addHandler(selectedProvider !== "all" ? selectedProvider : undefined);
 													}}
 												>
-													<Plus className="size-3.5" />
 													{addLabel}
 												</Button>
 											)}
 										</>
-									) : (
+									) : searchQuery ? (
 										<>
-											<Search className="size-8 text-muted-foreground/40" />
+											<Search className="size-8 text-muted-foreground" />
 											<p className="text-sm text-muted-foreground">
 												No models found
 											</p>
@ -1140,13 +1194,22 @@ export function ModelSelector({
 												term
 											</p>
 										</>
+									) : (
+										<>
+											<p className="text-sm font-medium text-muted-foreground">
+												No models configured
+											</p>
+											<p className="text-xs text-muted-foreground/60 text-center">
+												Configure models in your search space settings
+											</p>
+										</>
 									)}
 								</div>
 							) : (
 								<>
 									{globalItems.length > 0 && (
 										<>
-											<div className="flex items-center gap-2 px-3 py-1.5 text-[10px] font-semibold text-muted-foreground/70 uppercase tracking-wider">
+											<div className="flex items-center gap-2 px-3 py-1.5 text-[12px] font-semibold text-muted-foreground tracking-wider">
 												Global Models
 											</div>
 											{globalItems.map((item, i) =>
@ -1163,7 +1226,7 @@ export function ModelSelector({
 										)}
 									{userItems.length > 0 && (
 										<>
-											<div className="flex items-center gap-2 px-3 py-1.5 text-[10px] font-semibold text-muted-foreground/70 uppercase tracking-wider">
+											<div className="flex items-center gap-2 px-3 py-1.5 text-[12px] font-semibold text-muted-foreground tracking-wider">
 												Your Configurations
 											</div>
 											{userItems.map((item, i) =>
@ -1180,7 +1243,7 @@ export function ModelSelector({

 						{/* Add model button */}
 						{addHandler && (
-							<div className="p-2 border-t border-border/40 bg-muted/20 dark:bg-neutral-900">
+							<div className="p-2">
 								<Button
 									variant="ghost"
 									size="sm"
@ -1271,7 +1334,7 @@ export function ModelSelector({
 							</span>
 						</>
 					) : (
-						<Eye className="size-4 text-muted-foreground" />
+						<ScanEye className="size-4 text-muted-foreground" />
 					)}
 				</>
 			)}
@ -1301,7 +1364,7 @@ export function ModelSelector({
 		<Popover open={open} onOpenChange={setOpen}>
 			<PopoverTrigger asChild>{triggerButton}</PopoverTrigger>
 			<PopoverContent
-				className="w-[340px] md:w-[440px] p-0 rounded-lg shadow-lg bg-white border-border/60 dark:bg-neutral-900 dark:border dark:border-white/5 select-none"
+				className="w-[300px] md:w-[380px] p-0 rounded-lg shadow-lg overflow-hidden bg-white border-border/60 dark:bg-neutral-900 dark:border dark:border-white/5 select-none"
 				align="start"
 				sideOffset={8}
 				onCloseAutoFocus={(e) => e.preventDefault()}
--- a/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx
+++ b/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx
@ -1,14 +1,22 @@
 "use client";

-import { Check, Copy, Dot, ExternalLink, MessageSquare, Trash2 } from "lucide-react";
-import { useCallback, useRef, useState } from "react";
+import { Copy, Dot, ExternalLink, MessageSquare, MoreHorizontal, Trash2 } from "lucide-react";
+import { useCallback, useState } from "react";
+import { toast } from "sonner";
 import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Card, CardContent } from "@/components/ui/card";
+import {
+	DropdownMenu,
+	DropdownMenuContent,
+	DropdownMenuItem,
+	DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
 import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip";
 import type { PublicChatSnapshotDetail } from "@/contracts/types/chat-threads.types";
 import { useMediaQuery } from "@/hooks/use-media-query";
+import { cn } from "@/lib/utils";

 function getInitials(name: string): string {
 	const parts = name.trim().split(/\s+/);
@ -35,15 +43,12 @@ export function PublicChatSnapshotRow({
 	isDeleting = false,
 	memberMap,
 }: PublicChatSnapshotRowProps) {
-	const [copied, setCopied] = useState(false);
-	const copyTimeoutRef = useRef<ReturnType<typeof setTimeout>>(null);
+	const [dropdownOpen, setDropdownOpen] = useState(false);
 	const isDesktop = useMediaQuery("(min-width: 768px)");

 	const handleCopyClick = useCallback(() => {
 		onCopy(snapshot);
-		setCopied(true);
-		if (copyTimeoutRef.current) clearTimeout(copyTimeoutRef.current);
-		copyTimeoutRef.current = setTimeout(() => setCopied(false), 2000);
+		toast.success("Link copied to clipboard");
 	}, [onCopy, snapshot]);

 	const formattedDate = new Date(snapshot.created_at).toLocaleDateString(undefined, {
@ -58,96 +63,66 @@ export function PublicChatSnapshotRow({
 		<Card className="group relative overflow-hidden transition-all duration-200 border-border/60 hover:shadow-md h-full">
 			<CardContent className="p-4 flex flex-col gap-3 h-full">
 				{/* Header: Title + Actions */}
-				<div className="relative">
-					<div className="min-w-0 pr-16 sm:pr-0 sm:group-hover:pr-16">
+				<div className="relative flex items-center">
 					<h4
-							className="text-sm font-semibold tracking-tight truncate"
+						className={cn(
+							"text-sm font-semibold tracking-tight truncate",
+							dropdownOpen ? "pr-8" : "sm:group-hover:pr-8"
+						)}
 						title={snapshot.thread_title}
 					>
 						{snapshot.thread_title}
 					</h4>
-					</div>
-					<div className="flex items-center gap-0.5 shrink-0 sm:hidden sm:group-hover:flex absolute right-0 top-0">
-						<TooltipProvider>
-							<Tooltip open={isDesktop ? undefined : false}>
-								<TooltipTrigger asChild>
+					<DropdownMenu open={dropdownOpen} onOpenChange={setDropdownOpen}>
+						<DropdownMenuTrigger asChild>
 							<Button
 								variant="ghost"
 								size="icon"
-										asChild
-										className="h-7 w-7 text-muted-foreground hover:text-foreground"
+								className={cn(
+									"absolute right-0 h-6 w-6 shrink-0 hover:bg-transparent",
+									dropdownOpen
+										? "opacity-100"
+										: "sm:opacity-0 sm:group-hover:opacity-100"
+								)}
 							>
-										<a href={snapshot.public_url} target="_blank" rel="noopener noreferrer">
-											<ExternalLink className="h-3 w-3" />
-										</a>
+								<MoreHorizontal className="h-3.5 w-3.5 text-muted-foreground" />
 							</Button>
-								</TooltipTrigger>
-								<TooltipContent>Open link</TooltipContent>
-							</Tooltip>
-						</TooltipProvider>
+						</DropdownMenuTrigger>
+						<DropdownMenuContent align="end" className="w-40">
+							<DropdownMenuItem onClick={handleCopyClick}>
+								<Copy className="mr-2 h-4 w-4" />
+								Copy link
+							</DropdownMenuItem>
+							<DropdownMenuItem asChild>
+								<a href={snapshot.public_url} target="_blank" rel="noopener noreferrer">
+									<ExternalLink className="mr-2 h-4 w-4" />
+									Open link
+								</a>
+							</DropdownMenuItem>
 							{canDelete && (
-							<TooltipProvider>
-								<Tooltip open={isDesktop ? undefined : false}>
-									<TooltipTrigger asChild>
-										<Button
-											variant="ghost"
-											size="icon"
+								<DropdownMenuItem
 									onClick={() => onDelete(snapshot)}
 									disabled={isDeleting}
-											className="h-7 w-7 text-muted-foreground hover:text-destructive"
 								>
-											<Trash2 className="h-3 w-3" />
-										</Button>
-									</TooltipTrigger>
-									<TooltipContent>Delete</TooltipContent>
-								</Tooltip>
-							</TooltipProvider>
+									<Trash2 className="mr-2 h-4 w-4" />
+									Delete
+								</DropdownMenuItem>
 							)}
-					</div>
+						</DropdownMenuContent>
+					</DropdownMenu>
 				</div>

 				{/* Message count badge */}
 				<div className="flex items-center gap-1.5">
 					<Badge
-						variant="outline"
-						className="text-[10px] px-1.5 py-0.5 border-muted-foreground/20 text-muted-foreground"
+						variant="secondary"
+						className="text-[10px] px-1.5 py-0.5 border-0 text-muted-foreground bg-muted"
 					>
 						<MessageSquare className="h-2.5 w-2.5 mr-1" />
 						{snapshot.message_count} messages
 					</Badge>
 				</div>

-				{/* Public URL – selectable fallback for manual copy */}
-				<div className="flex items-center gap-2 rounded-md border border-border/60 bg-muted/30 px-2.5 py-1.5">
-					<div className="min-w-0 flex-1 overflow-x-auto scrollbar-hide">
-						<p
-							className="text-[10px] font-mono text-muted-foreground whitespace-nowrap select-all cursor-text"
-							title={snapshot.public_url}
-						>
-							{snapshot.public_url}
-						</p>
-					</div>
-					<TooltipProvider>
-						<Tooltip open={isDesktop ? undefined : false}>
-							<TooltipTrigger asChild>
-								<Button
-									variant="ghost"
-									size="icon"
-									onClick={handleCopyClick}
-									className="h-6 w-6 shrink-0 text-muted-foreground hover:text-foreground"
-								>
-									{copied ? (
-										<Check className="h-3 w-3 text-green-500" />
-									) : (
-										<Copy className="h-3 w-3" />
-									)}
-								</Button>
-							</TooltipTrigger>
-							<TooltipContent>{copied ? "Copied!" : "Copy link"}</TooltipContent>
-						</Tooltip>
-					</TooltipProvider>
-				</div>
-
 				{/* Footer: Date + Creator */}
 				<div className="flex items-center gap-2 pt-2 border-t border-border/40 mt-auto">
 					<span className="text-[11px] text-muted-foreground/60">{formattedDate}</span>
--- a/surfsense_web/components/settings/general-settings-manager.tsx
+++ b/surfsense_web/components/settings/general-settings-manager.tsx
@ -181,10 +181,10 @@ export function GeneralSettingsManager({ searchSpaceId }: GeneralSettingsManager
 						type="submit"
 						variant="outline"
 						disabled={!hasChanges || saving || !name.trim()}
-						className="gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200"
+						className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200"
 					>
-						{saving ? <Spinner size="sm" /> : null}
-						{saving ? t("general_saving") : t("general_save")}
+						<span className={saving ? "opacity-0" : ""}>{t("general_save")}</span>
+						{saving && <Spinner size="sm" className="absolute" />}
 					</Button>
 				</div>
 			</form>
--- a/surfsense_web/components/settings/llm-role-manager.tsx
+++ b/surfsense_web/components/settings/llm-role-manager.tsx
@ -395,6 +395,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
 																		key={config.id}
 																		value={config.id.toString()}
 																		className="text-xs md:text-sm py-1.5 md:py-2"
+																		textValue={config.name}
 																	>
 																		<div className="flex items-center gap-1 md:gap-1.5 flex-wrap min-w-0">
 																			<span className="truncate text-xs md:text-sm">
@ -403,7 +404,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
 																			{isAuto && (
 																				<Badge
 																					variant="secondary"
-																					className="text-[8px] md:text-[9px] shrink-0 bg-violet-100 text-violet-700 dark:bg-violet-900/30 dark:text-violet-300"
+																					className="text-[8px] md:text-[9px] shrink-0 bg-zinc-200 text-zinc-600 dark:bg-zinc-700 dark:text-zinc-300 [[data-slot=select-trigger]_&]:hidden"
 																				>
 																					Recommended
 																				</Badge>
--- a/surfsense_web/hooks/use-typewriter.ts
+++ b/surfsense_web/hooks/use-typewriter.ts
@ -27,7 +27,6 @@ export function useTypewriter(text: string, speed = 35, skipFor = "New Chat"): s
 		}

 		let i = 0;
-		setDisplayed("");
 		intervalRef.current = setInterval(() => {
 			i++;
 			setDisplayed(text.slice(0, i));
--- a/surfsense_web/lib/chat/message-utils.ts
+++ b/surfsense_web/lib/chat/message-utils.ts
@ -39,13 +39,16 @@ export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
 		content = [{ type: "text", text: String(msg.content) }];
 	}

-	const metadata = msg.author_id
+	const metadata = (msg.author_id || msg.token_usage)
 		? {
 				custom: {
+					...(msg.author_id && {
 						author: {
 							displayName: msg.author_display_name ?? null,
 							avatarUrl: msg.author_avatar_url ?? null,
 						},
+					}),
+					...(msg.token_usage && { usage: msg.token_usage }),
 				},
 			}
 		: undefined;
--- a/surfsense_web/lib/chat/streaming-state.ts
+++ b/surfsense_web/lib/chat/streaming-state.ts
@ -238,6 +238,16 @@ export type SSEEvent =
 	| { type: "data-thread-title-update"; data: { threadId: number; title: string } }
 	| { type: "data-interrupt-request"; data: Record<string, unknown> }
 	| { type: "data-documents-updated"; data: Record<string, unknown> }
+	| {
+			type: "data-token-usage";
+			data: {
+				usage: Record<string, { prompt_tokens: number; completion_tokens: number; total_tokens: number }>;
+				prompt_tokens: number;
+				completion_tokens: number;
+				total_tokens: number;
+				call_details: Array<{ model: string; prompt_tokens: number; completion_tokens: number; total_tokens: number }>;
+			};
+	  }
 	| { type: "error"; errorText: string };

 /**
--- a/surfsense_web/lib/chat/thread-persistence.ts
+++ b/surfsense_web/lib/chat/thread-persistence.ts
@ -26,6 +26,13 @@ export interface ThreadRecord {
 	has_comments?: boolean;
 }

+export interface TokenUsageSummary {
+	prompt_tokens: number;
+	completion_tokens: number;
+	total_tokens: number;
+	model_breakdown?: Record<string, { prompt_tokens: number; completion_tokens: number; total_tokens: number }> | null;
+}
+
 export interface MessageRecord {
 	id: number;
 	thread_id: number;
@ -35,6 +42,7 @@ export interface MessageRecord {
 	author_id?: string | null;
 	author_display_name?: string | null;
 	author_avatar_url?: string | null;
+	token_usage?: TokenUsageSummary | null;
 }

 export interface ThreadListResponse {
@ -111,11 +119,11 @@ export async function getThreadMessages(threadId: number): Promise<ThreadHistory
 }

 /**
- * Append a message to a thread
+ * Append a message to a thread.
 */
 export async function appendMessage(
 	threadId: number,
-	message: { role: "user" | "assistant" | "system"; content: unknown }
+	message: { role: "user" | "assistant" | "system"; content: unknown; token_usage?: unknown }
 ): Promise<MessageRecord> {
 	return baseApiService.post<MessageRecord>(`/api/v1/threads/${threadId}/messages`, undefined, {
 		body: message,
--- a/surfsense_web/lib/provider-icons.tsx
+++ b/surfsense_web/lib/provider-icons.tsx
@ -13,6 +13,7 @@ import {
 	DeepSeekIcon,
 	FireworksAiIcon,
 	GeminiIcon,
+	GitHubModelsIcon,
 	GroqIcon,
 	HuggingFaceIcon,
 	MiniMaxIcon,
@ -82,6 +83,8 @@ export function getProviderIcon(
 			return <FireworksAiIcon className={cn(className)} />;
 		case "GOOGLE":
 			return <GeminiIcon className={cn(className)} />;
+		case "GITHUB_MODELS":
+			return <GitHubModelsIcon className={cn(className)} />;
 		case "GROQ":
 			return <GroqIcon className={cn(className)} />;
 		case "HUGGINGFACE":
--- a/surfsense_web/messages/en.json
+++ b/surfsense_web/messages/en.json
@ -123,7 +123,6 @@
 		"api_key_nav_description": "Manage your API access token",
 		"api_key_title": "API Key",
 		"api_key_description": "Use this key to authenticate API requests",
-		"api_key_warning_title": "Keep it secret",
 		"api_key_warning_description": "Your API key grants full access to your account. Never share it publicly or commit it to version control.",
 		"your_api_key": "Your API Key",
 		"copied": "Copied!",
--- a/surfsense_web/messages/es.json
+++ b/surfsense_web/messages/es.json
@ -123,7 +123,6 @@
 		"api_key_nav_description": "Administra tu token de acceso a la API",
 		"api_key_title": "Clave API",
 		"api_key_description": "Usa esta clave para autenticar las solicitudes de la API",
-		"api_key_warning_title": "Mantenla en secreto",
 		"api_key_warning_description": "Tu clave API otorga acceso completo a tu cuenta. Nunca la compartas públicamente ni la incluyas en el control de versiones.",
 		"your_api_key": "Tu clave API",
 		"copied": "¡Copiado!",
--- a/surfsense_web/messages/hi.json
+++ b/surfsense_web/messages/hi.json
@ -123,7 +123,6 @@
 		"api_key_nav_description": "अपना API एक्सेस टोकन प्रबंधित करें",
 		"api_key_title": "API कुंजी",
 		"api_key_description": "API अनुरोधों को प्रमाणित करने के लिए इस कुंजी का उपयोग करें",
-		"api_key_warning_title": "इसे गुप्त रखें",
 		"api_key_warning_description": "आपकी API कुंजी आपके खाते तक पूर्ण पहुंच प्रदान करती है। इसे कभी सार्वजनिक रूप से साझा न करें या संस्करण नियंत्रण में शामिल न करें।",
 		"your_api_key": "आपकी API कुंजी",
 		"copied": "कॉपी किया गया!",
--- a/surfsense_web/messages/pt.json
+++ b/surfsense_web/messages/pt.json
@ -123,7 +123,6 @@
 		"api_key_nav_description": "Gerencie seu token de acesso à API",
 		"api_key_title": "Chave API",
 		"api_key_description": "Use esta chave para autenticar solicitações da API",
-		"api_key_warning_title": "Mantenha em segredo",
 		"api_key_warning_description": "Sua chave API concede acesso total à sua conta. Nunca a compartilhe publicamente nem a inclua no controle de versão.",
 		"your_api_key": "Sua chave API",
 		"copied": "Copiado!",
--- a/surfsense_web/messages/zh.json
+++ b/surfsense_web/messages/zh.json
@ -108,7 +108,6 @@
 		"api_key_nav_description": "管理您的API访问令牌",
 		"api_key_title": "API密钥",
 		"api_key_description": "使用此密钥验证API请求",
-		"api_key_warning_title": "请保密",
 		"api_key_warning_description": "您的API密钥可以完全访问您的账户。请勿公开分享或提交到版本控制。",
 		"your_api_key": "您的API密钥",
 		"copied": "已复制！",
				`@ -0,0 +1 @@`
				<svg fill="currentColor" fill-rule="evenodd" height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Github</title><path d="M12 0c6.63 0 12 5.276 12 11.79-.001 5.067-3.29 9.567-8.175 11.187-.6.118-.825-.25-.825-.56 0-.398.015-1.665.015-3.242 0-1.105-.375-1.813-.81-2.181 2.67-.295 5.475-1.297 5.475-5.822 0-1.297-.465-2.344-1.23-3.169.12-.295.54-1.503-.12-3.125 0 0-1.005-.324-3.3 1.209a11.32 11.32 0 00-3-.398c-1.02 0-2.04.133-3 .398-2.295-1.518-3.3-1.209-3.3-1.209-.66 1.622-.24 2.83-.12 3.125-.765.825-1.23 1.887-1.23 3.169 0 4.51 2.79 5.527 5.46 5.822-.345.294-.66.81-.765 1.577-.69.31-2.415.81-3.495-.973-.225-.354-.9-1.223-1.845-1.209-1.005.015-.405.56.015.781.51.28 1.095 1.327 1.23 1.666.24.663 1.02 1.93 4.035 1.385 0 .988.015 1.916.015 2.196 0 .31-.225.664-.825.56C3.303 21.374-.003 16.867 0 11.791 0 5.276 5.37 0 12 0z"></path></svg>