diff --git a/surfsense_backend/alembic/versions/125_add_token_usage_table.py b/surfsense_backend/alembic/versions/125_add_token_usage_table.py new file mode 100644 index 000000000..915561c8c --- /dev/null +++ b/surfsense_backend/alembic/versions/125_add_token_usage_table.py @@ -0,0 +1,85 @@ +"""125_add_token_usage_table + +Revision ID: 125 +Revises: 124 +Create Date: 2026-04-14 + +Adds token_usage table for tracking LLM token consumption per message. +Supports future extension via usage_type for indexing, image gen, etc. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import JSONB, UUID + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "125" +down_revision: str | None = "124" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + conn = op.get_bind() + if sa.inspect(conn).has_table("token_usage"): + return + + op.create_table( + "token_usage", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("prompt_tokens", sa.Integer(), nullable=False, server_default="0"), + sa.Column( + "completion_tokens", sa.Integer(), nullable=False, server_default="0" + ), + sa.Column("total_tokens", sa.Integer(), nullable=False, server_default="0"), + sa.Column("model_breakdown", JSONB, nullable=True), + sa.Column("call_details", JSONB, nullable=True), + sa.Column("usage_type", sa.String(50), nullable=False, server_default="chat"), + sa.Column( + "thread_id", + sa.Integer(), + sa.ForeignKey("new_chat_threads.id", ondelete="CASCADE"), + nullable=True, + ), + sa.Column( + "message_id", + sa.Integer(), + sa.ForeignKey("new_chat_messages.id", ondelete="SET NULL"), + nullable=True, + ), + sa.Column( + "search_space_id", + sa.Integer(), + sa.ForeignKey("searchspaces.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "user_id", + UUID(as_uuid=True), + sa.ForeignKey("user.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + nullable=False, + server_default=sa.func.now(), + ), + ) + + op.create_index("ix_token_usage_thread_id", "token_usage", ["thread_id"]) + op.create_index("ix_token_usage_message_id", "token_usage", ["message_id"]) + op.create_index( + "ix_token_usage_search_space_id", "token_usage", ["search_space_id"] + ) + op.create_index("ix_token_usage_user_id", "token_usage", ["user_id"]) + op.create_index("ix_token_usage_usage_type", "token_usage", ["usage_type"]) + + +def downgrade() -> None: + op.drop_table("token_usage") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 82d77f847..61bdd65cb 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -647,6 +647,11 @@ class NewChatThread(BaseModel, TimestampMixin): cascade="all, delete-orphan", foreign_keys="[PublicChatSnapshot.thread_id]", ) + token_usages = relationship( + "TokenUsage", + back_populates="thread", + cascade="all, delete-orphan", + ) class NewChatMessage(BaseModel, TimestampMixin): @@ -685,6 +690,63 @@ class NewChatMessage(BaseModel, TimestampMixin): back_populates="message", cascade="all, delete-orphan", ) + token_usage = relationship( + "TokenUsage", + back_populates="message", + uselist=False, + cascade="all, delete-orphan", + ) + + +class TokenUsage(BaseModel, TimestampMixin): + """ + Tracks LLM token consumption per assistant turn. + + One row per usage event. For chat, linked to a specific message via message_id. + The usage_type column enables future extension to track non-chat usage + (indexing, image generation, podcasts, etc.) without schema changes. + """ + + __tablename__ = "token_usage" + + prompt_tokens = Column(Integer, nullable=False, default=0) + completion_tokens = Column(Integer, nullable=False, default=0) + total_tokens = Column(Integer, nullable=False, default=0) + model_breakdown = Column(JSONB, nullable=True) + call_details = Column(JSONB, nullable=True) + + usage_type = Column(String(50), nullable=False, default="chat", index=True) + + thread_id = Column( + Integer, + ForeignKey("new_chat_threads.id", ondelete="CASCADE"), + nullable=True, + index=True, + ) + message_id = Column( + Integer, + ForeignKey("new_chat_messages.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + search_space_id = Column( + Integer, + ForeignKey("searchspaces.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + user_id = Column( + UUID(as_uuid=True), + ForeignKey("user.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + + # Relationships + thread = relationship("NewChatThread", back_populates="token_usages") + message = relationship("NewChatMessage", back_populates="token_usage") + search_space = relationship("SearchSpace") + user = relationship("User") class PublicChatSnapshot(BaseModel, TimestampMixin): diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 10a6951fa..b914b297e 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -50,7 +50,9 @@ from app.schemas.new_chat import ( ThreadHistoryLoadResponse, ThreadListItem, ThreadListResponse, + TokenUsageSummary, ) +from app.services.token_tracking_service import record_token_usage from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat from app.users import current_active_user from app.utils.rbac import check_permission @@ -473,10 +475,13 @@ async def get_thread_messages( # Check thread-level access based on visibility await check_thread_access(session, thread, user) - # Get messages with their authors loaded + # Get messages with their authors and token usage loaded messages_result = await session.execute( select(NewChatMessage) - .options(selectinload(NewChatMessage.author)) + .options( + selectinload(NewChatMessage.author), + selectinload(NewChatMessage.token_usage), + ) .filter(NewChatMessage.thread_id == thread_id) .order_by(NewChatMessage.created_at) ) @@ -493,6 +498,9 @@ async def get_thread_messages( author_id=msg.author_id, author_display_name=msg.author.display_name if msg.author else None, author_avatar_url=msg.author.avatar_url if msg.author else None, + token_usage=TokenUsageSummary.model_validate(msg.token_usage) + if msg.token_usage + else None, ) for msg in db_messages ] @@ -530,7 +538,11 @@ async def get_thread_full( try: result = await session.execute( select(NewChatThread) - .options(selectinload(NewChatThread.messages)) + .options( + selectinload(NewChatThread.messages).selectinload( + NewChatMessage.token_usage + ), + ) .filter(NewChatThread.id == thread_id) ) thread = result.scalars().first() @@ -935,11 +947,37 @@ async def append_message( # flush assigns the PK/defaults without a round-trip SELECT await session.flush() + + # Persist token usage if provided (for assistant messages) + token_usage_data = raw_body.get("token_usage") + if token_usage_data and message_role == NewChatMessageRole.ASSISTANT: + await record_token_usage( + session, + usage_type="chat", + search_space_id=thread.search_space_id, + user_id=user.id, + prompt_tokens=token_usage_data.get("prompt_tokens", 0), + completion_tokens=token_usage_data.get("completion_tokens", 0), + total_tokens=token_usage_data.get("total_tokens", 0), + model_breakdown=token_usage_data.get("usage"), + call_details=token_usage_data.get("call_details"), + thread_id=thread_id, + message_id=db_message.id, + ) + await session.commit() - # Return the in-memory object (already has id from flush) instead of - # doing an extra refresh() SELECT. - return db_message + # Build response manually to avoid lazy-loading the token_usage + # relationship after commit (which would trigger MissingGreenlet). + return NewChatMessageRead( + id=db_message.id, + thread_id=db_message.thread_id, + role=db_message.role, + content=db_message.content, + created_at=db_message.created_at, + author_id=db_message.author_id, + token_usage=None, + ) except HTTPException: raise @@ -1003,6 +1041,7 @@ async def list_messages( # Get messages query = ( select(NewChatMessage) + .options(selectinload(NewChatMessage.token_usage)) .filter(NewChatMessage.thread_id == thread_id) .order_by(NewChatMessage.created_at) .offset(skip) diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index 5d8ae207e..e523657a4 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -34,6 +34,14 @@ class NewChatMessageCreate(NewChatMessageBase): thread_id: int +class TokenUsageSummary(BaseModel): + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + model_breakdown: dict | None = None + model_config = ConfigDict(from_attributes=True) + + class NewChatMessageRead(NewChatMessageBase, IDModel, TimestampModel): """Schema for reading a message.""" @@ -41,6 +49,7 @@ class NewChatMessageRead(NewChatMessageBase, IDModel, TimestampModel): author_id: UUID | None = None author_display_name: str | None = None author_avatar_url: str | None = None + token_usage: TokenUsageSummary | None = None model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py index 63d8d10b9..1bf9e2386 100644 --- a/surfsense_backend/app/services/llm_router_service.py +++ b/surfsense_backend/app/services/llm_router_service.py @@ -820,7 +820,9 @@ class ChatLiteLLMRouter(BaseChatModel): ) # Convert response to ChatResult with potential tool calls - message = self._convert_response_to_message(response.choices[0].message) + message = self._convert_response_to_message( + response.choices[0].message, response=response + ) generation = ChatGeneration(message=message) return ChatResult(generations=[generation]) @@ -886,7 +888,9 @@ class ChatLiteLLMRouter(BaseChatModel): ) # Convert response to ChatResult with potential tool calls - message = self._convert_response_to_message(response.choices[0].message) + message = self._convert_response_to_message( + response.choices[0].message, response=response + ) generation = ChatGeneration(message=message) return ChatResult(generations=[generation]) @@ -970,6 +974,7 @@ class ChatLiteLLMRouter(BaseChatModel): messages=formatted_messages, stop=stop, stream=True, + stream_options={"include_usage": True}, **call_kwargs, ) except ContextWindowExceededError as e: @@ -1075,7 +1080,9 @@ class ChatLiteLLMRouter(BaseChatModel): return result - def _convert_response_to_message(self, response_message: Any) -> AIMessage: + def _convert_response_to_message( + self, response_message: Any, response: Any = None + ) -> AIMessage: """Convert a LiteLLM response message to a LangChain AIMessage.""" import json @@ -1098,9 +1105,22 @@ class ChatLiteLLMRouter(BaseChatModel): tool_call["args"] = tc.function.arguments tool_calls.append(tool_call) + extra_kwargs: dict[str, Any] = {} + if response: + usage = getattr(response, "usage", None) + if usage: + extra_kwargs["usage_metadata"] = { + "input_tokens": getattr(usage, "prompt_tokens", 0) or 0, + "output_tokens": getattr(usage, "completion_tokens", 0) or 0, + "total_tokens": getattr(usage, "total_tokens", 0) or 0, + } + extra_kwargs["response_metadata"] = { + "model_name": getattr(response, "model", "unknown"), + } + if tool_calls: - return AIMessage(content=content, tool_calls=tool_calls) - return AIMessage(content=content) + return AIMessage(content=content, tool_calls=tool_calls, **extra_kwargs) + return AIMessage(content=content, **extra_kwargs) def _convert_delta_to_chunk(self, delta: Any) -> AIMessageChunk | None: """Convert a streaming delta to an AIMessageChunk.""" diff --git a/surfsense_backend/app/services/llm_service.py b/surfsense_backend/app/services/llm_service.py index 723b17607..d31e19ed3 100644 --- a/surfsense_backend/app/services/llm_service.py +++ b/surfsense_backend/app/services/llm_service.py @@ -15,6 +15,7 @@ from app.services.llm_router_service import ( get_auto_mode_llm, is_auto_mode, ) +from app.services.token_tracking_service import token_tracker # Configure litellm to automatically drop unsupported parameters litellm.drop_params = True @@ -22,10 +23,11 @@ litellm.drop_params = True # Memory controls: prevent unbounded internal accumulation litellm.telemetry = False litellm.cache = None -litellm.success_callback = [] litellm.failure_callback = [] litellm.input_callback = [] +litellm.callbacks = [token_tracker] + logger = logging.getLogger(__name__) diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py new file mode 100644 index 000000000..9aa8c6e70 --- /dev/null +++ b/surfsense_backend/app/services/token_tracking_service.py @@ -0,0 +1,210 @@ +""" +Token usage tracking via LiteLLM custom callback. + +Uses a ContextVar-scoped accumulator to group all LLM calls within a single +async request/turn. The accumulated data is emitted via SSE and persisted +when the frontend calls appendMessage. + +The module also provides ``record_token_usage``, a thin async helper that +creates a ``TokenUsage`` row for *any* usage type (chat, indexing, image +generation, podcasts, …). Call sites should prefer this helper over +constructing ``TokenUsage`` manually so that logging and error handling +stay consistent. +""" + +from __future__ import annotations + +import dataclasses +import logging +from contextvars import ContextVar +from dataclasses import dataclass, field +from typing import Any +from uuid import UUID + +from litellm.integrations.custom_logger import CustomLogger +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import TokenUsage + +logger = logging.getLogger(__name__) + + +@dataclass +class TokenCallRecord: + model: str + prompt_tokens: int + completion_tokens: int + total_tokens: int + + +@dataclass +class TurnTokenAccumulator: + """Accumulates token usage across all LLM calls within a single user turn.""" + + calls: list[TokenCallRecord] = field(default_factory=list) + + def add( + self, + model: str, + prompt_tokens: int, + completion_tokens: int, + total_tokens: int, + ) -> None: + self.calls.append( + TokenCallRecord( + model=model, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + ) + ) + + def per_message_summary(self) -> dict[str, dict[str, int]]: + """Return token counts grouped by model name.""" + by_model: dict[str, dict[str, int]] = {} + for c in self.calls: + entry = by_model.setdefault( + c.model, + {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}, + ) + entry["prompt_tokens"] += c.prompt_tokens + entry["completion_tokens"] += c.completion_tokens + entry["total_tokens"] += c.total_tokens + return by_model + + @property + def grand_total(self) -> int: + return sum(c.total_tokens for c in self.calls) + + @property + def total_prompt_tokens(self) -> int: + return sum(c.prompt_tokens for c in self.calls) + + @property + def total_completion_tokens(self) -> int: + return sum(c.completion_tokens for c in self.calls) + + def serialized_calls(self) -> list[dict[str, Any]]: + return [dataclasses.asdict(c) for c in self.calls] + + +_turn_accumulator: ContextVar[TurnTokenAccumulator | None] = ContextVar( + "_turn_accumulator", default=None +) + + +def start_turn() -> TurnTokenAccumulator: + """Create a fresh accumulator for the current async context and return it.""" + acc = TurnTokenAccumulator() + _turn_accumulator.set(acc) + logger.info("[TokenTracking] start_turn: new accumulator created (id=%s)", id(acc)) + return acc + + +def get_current_accumulator() -> TurnTokenAccumulator | None: + return _turn_accumulator.get() + + +class TokenTrackingCallback(CustomLogger): + """LiteLLM callback that captures token usage into the turn accumulator.""" + + async def async_log_success_event( + self, + kwargs: dict[str, Any], + response_obj: Any, + start_time: Any, + end_time: Any, + ) -> None: + acc = _turn_accumulator.get() + if acc is None: + logger.debug( + "[TokenTracking] async_log_success_event fired but no accumulator in context" + ) + return + + usage = getattr(response_obj, "usage", None) + if not usage: + logger.debug( + "[TokenTracking] async_log_success_event fired but response has no usage data" + ) + return + + prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0 + completion_tokens = getattr(usage, "completion_tokens", 0) or 0 + total_tokens = getattr(usage, "total_tokens", 0) or 0 + + model = kwargs.get("model", "unknown") + + acc.add( + model=model, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + ) + logger.info( + "[TokenTracking] Captured: model=%s prompt=%d completion=%d total=%d (accumulator now has %d calls)", + model, + prompt_tokens, + completion_tokens, + total_tokens, + len(acc.calls), + ) + + +token_tracker = TokenTrackingCallback() + + +# --------------------------------------------------------------------------- +# Persistence helper +# --------------------------------------------------------------------------- + + +async def record_token_usage( + session: AsyncSession, + *, + usage_type: str, + search_space_id: int, + user_id: UUID, + prompt_tokens: int = 0, + completion_tokens: int = 0, + total_tokens: int = 0, + model_breakdown: dict[str, Any] | None = None, + call_details: dict[str, Any] | None = None, + thread_id: int | None = None, + message_id: int | None = None, +) -> TokenUsage | None: + """Persist a single ``TokenUsage`` row. + + Returns the record on success, ``None`` if persistence failed (the + failure is logged but never propagated so callers don't need to + wrap this in try/except). + """ + try: + record = TokenUsage( + usage_type=usage_type, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + model_breakdown=model_breakdown, + call_details=call_details, + thread_id=thread_id, + message_id=message_id, + search_space_id=search_space_id, + user_id=user_id, + ) + session.add(record) + logger.debug( + "[TokenTracking] recorded %s usage: prompt=%d completion=%d total=%d", + usage_type, + prompt_tokens, + completion_tokens, + total_tokens, + ) + return record + except Exception: + logger.warning( + "[TokenTracking] failed to record %s token usage", + usage_type, + exc_info=True, + ) + return None diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 47a270568..478aa3671 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -51,7 +51,7 @@ from app.db import ( async_session_maker, shielded_async_session, ) -from app.prompts import TITLE_GENERATION_PROMPT_TEMPLATE +from app.prompts import TITLE_GENERATION_PROMPT from app.services.chat_session_state_service import ( clear_ai_responding, set_ai_responding, @@ -1171,6 +1171,10 @@ async def stream_new_chat( _t_total = time.perf_counter() log_system_snapshot("stream_new_chat_START") + from app.services.token_tracking_service import start_turn + + accumulator = start_turn() + session = async_session_maker() try: # Mark AI as responding to this user for live collaboration @@ -1456,22 +1460,71 @@ async def stream_new_chat( ) is_first_response = (assistant_count_result.scalar() or 0) == 0 - title_task: asyncio.Task[str | None] | None = None + title_task: asyncio.Task[tuple[str | None, dict | None]] | None = None if is_first_response: - async def _generate_title() -> str | None: + async def _generate_title() -> tuple[str | None, dict | None]: + """Generate a short title via litellm.acompletion. + + Returns (title, usage_dict). Usage is extracted directly from + the response object because litellm fires its async callback + via fire-and-forget ``create_task``, so the + ``TokenTrackingCallback`` would run too late. We also blank + the accumulator in this child-task context so the late callback + doesn't double-count. + """ try: - title_chain = TITLE_GENERATION_PROMPT_TEMPLATE | llm - title_result = await title_chain.ainvoke( - {"user_query": user_query[:500]} + from litellm import acompletion + + from app.services.llm_router_service import LLMRouterService + from app.services.token_tracking_service import _turn_accumulator + + _turn_accumulator.set(None) + + prompt = TITLE_GENERATION_PROMPT.replace( + "{user_query}", user_query[:500] ) - if title_result and hasattr(title_result, "content"): - raw_title = title_result.content.strip() - if raw_title and len(raw_title) <= 100: - return raw_title.strip("\"'") + messages = [{"role": "user", "content": prompt}] + + if getattr(llm, "model", None) == "auto": + router = LLMRouterService.get_router() + response = await router.acompletion( + model="auto", messages=messages + ) + else: + response = await acompletion( + model=llm.model, + messages=messages, + api_key=getattr(llm, "api_key", None), + api_base=getattr(llm, "api_base", None), + ) + + usage_info = None + usage = getattr(response, "usage", None) + if usage: + raw_model = getattr(llm, "model", "") or "" + model_name = ( + raw_model.split("/", 1)[-1] + if "/" in raw_model + else (raw_model or response.model or "unknown") + ) + usage_info = { + "model": model_name, + "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0, + "completion_tokens": getattr(usage, "completion_tokens", 0) + or 0, + "total_tokens": getattr(usage, "total_tokens", 0) or 0, + } + + raw_title = response.choices[0].message.content.strip() + if raw_title and len(raw_title) <= 100: + return raw_title.strip("\"'"), usage_info + return None, usage_info except Exception: - pass - return None + logging.getLogger(__name__).exception( + "[TitleGen] _generate_title failed" + ) + return None, None title_task = asyncio.create_task(_generate_title()) @@ -1503,7 +1556,9 @@ async def stream_new_chat( # Inject title update mid-stream as soon as the background task finishes if title_task is not None and title_task.done() and not title_emitted: - generated_title = title_task.result() + generated_title, title_usage = title_task.result() + if title_usage: + accumulator.add(**title_usage) if generated_title: async with shielded_async_session() as title_session: title_thread_result = await title_session.execute( @@ -1528,6 +1583,26 @@ async def stream_new_chat( if stream_result.is_interrupted: if title_task is not None and not title_task.done(): title_task.cancel() + + usage_summary = accumulator.per_message_summary() + _perf_log.info( + "[token_usage] interrupted new_chat: calls=%d total=%d summary=%s", + len(accumulator.calls), + accumulator.grand_total, + usage_summary, + ) + if usage_summary: + yield streaming_service.format_data( + "token-usage", + { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }, + ) + yield streaming_service.format_finish_step() yield streaming_service.format_finish() yield streaming_service.format_done() @@ -1535,7 +1610,9 @@ async def stream_new_chat( # If the title task didn't finish during streaming, await it now if title_task is not None and not title_emitted: - generated_title = await title_task + generated_title, title_usage = await title_task + if title_usage: + accumulator.add(**title_usage) if generated_title: async with shielded_async_session() as title_session: title_thread_result = await title_session.execute( @@ -1549,6 +1626,25 @@ async def stream_new_chat( chat_id, generated_title ) + usage_summary = accumulator.per_message_summary() + _perf_log.info( + "[token_usage] normal new_chat: calls=%d total=%d summary=%s", + len(accumulator.calls), + accumulator.grand_total, + usage_summary, + ) + if usage_summary: + yield streaming_service.format_data( + "token-usage", + { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }, + ) + # Fire background memory extraction if the agent didn't handle it. # Shared threads write to team memory; private threads write to user memory. if not stream_result.agent_called_update_memory: @@ -1666,6 +1762,10 @@ async def stream_resume_chat( stream_result = StreamResult() _t_total = time.perf_counter() + from app.services.token_tracking_service import start_turn + + accumulator = start_turn() + session = async_session_maker() try: if user_id: @@ -1789,11 +1889,49 @@ async def stream_resume_chat( chat_id, ) if stream_result.is_interrupted: + usage_summary = accumulator.per_message_summary() + _perf_log.info( + "[token_usage] interrupted resume_chat: calls=%d total=%d summary=%s", + len(accumulator.calls), + accumulator.grand_total, + usage_summary, + ) + if usage_summary: + yield streaming_service.format_data( + "token-usage", + { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }, + ) + yield streaming_service.format_finish_step() yield streaming_service.format_finish() yield streaming_service.format_done() return + usage_summary = accumulator.per_message_summary() + _perf_log.info( + "[token_usage] normal resume_chat: calls=%d total=%d summary=%s", + len(accumulator.calls), + accumulator.grand_total, + usage_summary, + ) + if usage_summary: + yield streaming_service.format_data( + "token-usage", + { + "usage": usage_summary, + "prompt_tokens": accumulator.total_prompt_tokens, + "completion_tokens": accumulator.total_completion_tokens, + "total_tokens": accumulator.grand_total, + "call_details": accumulator.serialized_calls(), + }, + ) + yield streaming_service.format_finish_step() yield streaming_service.format_finish() yield streaming_service.format_done() diff --git a/surfsense_web/app/(home)/login/LocalLoginForm.tsx b/surfsense_web/app/(home)/login/LocalLoginForm.tsx index 07a4db4d3..e3c34306f 100644 --- a/surfsense_web/app/(home)/login/LocalLoginForm.tsx +++ b/surfsense_web/app/(home)/login/LocalLoginForm.tsx @@ -174,31 +174,31 @@ export function LocalLoginForm() { -
- setPassword(e.target.value)} - className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ - error.title - ? "border-destructive focus:border-destructive focus:ring-destructive/40" - : "border-border focus:border-primary focus:ring-primary/40" - }`} - disabled={isLoggingIn} - /> - -
+
+ setPassword(e.target.value)} + className={`block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} + disabled={isLoggingIn} + /> + +
diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index d0d7616d1..8d3e90b7d 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -1,4 +1,5 @@ import { + ActionBarMorePrimitive, ActionBarPrimitive, AuiIf, ErrorPrimitive, @@ -15,12 +16,17 @@ import { ExternalLink, Globe, MessageSquare, + MoreHorizontalIcon, RefreshCwIcon, } from "lucide-react"; import dynamic from "next/dynamic"; import type { FC } from "react"; import { useEffect, useMemo, useRef, useState } from "react"; import { commentsEnabledAtom, targetCommentIdAtom } from "@/atoms/chat/current-thread.atom"; +import { + globalNewLLMConfigsAtom, + newLLMConfigsAtom, +} from "@/atoms/new-llm-config/new-llm-config-query.atoms"; import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; import { CitationMetadataProvider, @@ -39,9 +45,13 @@ import { DrawerHeader, DrawerTitle, } from "@/components/ui/drawer"; +import { DropdownMenuLabel } from "@/components/ui/dropdown-menu"; +import { Button } from "@/components/ui/button"; import { useComments } from "@/hooks/use-comments"; import { useMediaQuery } from "@/hooks/use-media-query"; import { useElectronAPI } from "@/hooks/use-platform"; +import { useTokenUsage } from "@/components/assistant-ui/token-usage-context"; +import { getProviderIcon } from "@/lib/provider-icons"; import { cn } from "@/lib/utils"; import { openSafeNavigationHref, resolveSafeNavigationHref } from "@/components/tool-ui/shared/media"; @@ -367,6 +377,94 @@ export const MessageError: FC = () => { ); }; +function formatMessageDate(date: Date): string { + return date.toLocaleDateString(undefined, { + month: "short", + day: "numeric", + hour: "numeric", + minute: "2-digit", + hour12: true, + }); +} + +const MessageInfoDropdown: FC = () => { + const messageId = useAuiState(({ message }) => message?.id); + const createdAt = useAuiState(({ message }) => message?.createdAt); + const usage = useTokenUsage(messageId); + + const { data: localConfigs } = useAtomValue(newLLMConfigsAtom); + const { data: globalConfigs } = useAtomValue(globalNewLLMConfigsAtom); + + const configByModel = useMemo(() => { + const map = new Map(); + for (const c of [...(globalConfigs ?? []), ...(localConfigs ?? [])]) { + map.set(c.model_name, { name: c.name, provider: c.provider }); + } + return map; + }, [localConfigs, globalConfigs]); + + const resolveModel = (modelKey: string) => { + const parts = modelKey.split("/"); + const bare = parts[parts.length - 1] ?? modelKey; + const config = configByModel.get(modelKey) ?? configByModel.get(bare); + return config + ? { name: config.name, icon: getProviderIcon(config.provider, { className: "size-3.5" }) } + : { name: modelKey, icon: null }; + }; + + const modelBreakdown = usage ? (usage.usage ?? usage.model_breakdown) : undefined; + const models = modelBreakdown ? Object.entries(modelBreakdown) : []; + const hasUsage = usage && usage.total_tokens > 0; + + return ( + + + + + + {createdAt && ( + + {formatMessageDate(createdAt)} + + )} + {hasUsage && ( + <> + + {models.length > 0 ? ( + models.map(([model, counts]) => { + const { name, icon } = resolveModel(model); + return ( + e.preventDefault()}> + + {icon} + {name} + + + {counts.total_tokens.toLocaleString()} tokens + + + ); + }) + ) : ( + e.preventDefault()}> + + {usage.total_tokens.toLocaleString()} tokens + + + )} + + )} + + + ); +}; + const AssistantMessageInner: FC = () => { const isMobile = !useMediaQuery("(min-width: 768px)"); @@ -429,7 +527,7 @@ const AssistantMessageInner: FC = () => { )} -
+
@@ -626,6 +724,7 @@ const AssistantActionBar: FC = () => { )} + ); }; diff --git a/surfsense_web/components/assistant-ui/token-usage-context.tsx b/surfsense_web/components/assistant-ui/token-usage-context.tsx new file mode 100644 index 000000000..8b82f33ff --- /dev/null +++ b/surfsense_web/components/assistant-ui/token-usage-context.tsx @@ -0,0 +1,79 @@ +"use client"; + +import { createContext, useContext, useCallback, useSyncExternalStore, type FC, type ReactNode } from "react"; + +export interface TokenUsageData { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + usage?: Record; + model_breakdown?: Record; +} + +type Listener = () => void; + +class TokenUsageStore { + private data = new Map(); + private listeners = new Set(); + + get(messageId: string): TokenUsageData | undefined { + return this.data.get(messageId); + } + + set(messageId: string, usage: TokenUsageData): void { + this.data.set(messageId, usage); + this.notify(); + } + + rename(oldId: string, newId: string): void { + const usage = this.data.get(oldId); + if (usage) { + this.data.delete(oldId); + this.data.set(newId, usage); + this.notify(); + } + } + + clear(): void { + this.data.clear(); + this.notify(); + } + + subscribe = (listener: Listener): (() => void) => { + this.listeners.add(listener); + return () => this.listeners.delete(listener); + }; + + private notify(): void { + for (const l of this.listeners) l(); + } +} + +const TokenUsageContext = createContext(null); + +export const TokenUsageProvider: FC<{ store: TokenUsageStore; children: ReactNode }> = ({ store, children }) => ( + {children} +); + +export function useTokenUsageStore(): TokenUsageStore { + const store = useContext(TokenUsageContext); + if (!store) throw new Error("useTokenUsageStore must be used within TokenUsageProvider"); + return store; +} + +export function useTokenUsage(messageId: string | undefined): TokenUsageData | undefined { + const store = useContext(TokenUsageContext); + const getSnapshot = useCallback( + () => (store && messageId ? store.get(messageId) : undefined), + [store, messageId], + ); + const subscribe = useCallback( + (onStoreChange: () => void) => (store ? store.subscribe(onStoreChange) : () => {}), + [store], + ); + return useSyncExternalStore(subscribe, getSnapshot, getSnapshot); +} + +export function createTokenUsageStore(): TokenUsageStore { + return new TokenUsageStore(); +} diff --git a/surfsense_web/components/documents/DocumentsFilters.tsx b/surfsense_web/components/documents/DocumentsFilters.tsx index 2b7cf0f10..a5ee57703 100644 --- a/surfsense_web/components/documents/DocumentsFilters.tsx +++ b/surfsense_web/components/documents/DocumentsFilters.tsx @@ -2,7 +2,6 @@ import { IconBinaryTree, IconBinaryTreeFilled } from "@tabler/icons-react"; import { FolderPlus, ListFilter, Search, Upload, X } from "lucide-react"; -import { AnimatePresence, motion } from "motion/react"; import { useTranslations } from "next-intl"; import React, { useCallback, useMemo, useRef, useState } from "react"; import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup"; @@ -10,6 +9,7 @@ import { Button } from "@/components/ui/button"; import { Checkbox } from "@/components/ui/checkbox"; import { Input } from "@/components/ui/input"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { Spinner } from "@/components/ui/spinner"; import { ToggleGroup, ToggleGroupItem } from "@/components/ui/toggle-group"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { cn } from "@/lib/utils"; @@ -74,17 +74,17 @@ export function DocumentsFilters({ return (
- {/* New Folder + Filter Toggle Group */} + {/* New Folder + AI Sort + Filter Toggle Group */} {onCreateFolder && ( { - e.preventDefault(); - onCreateFolder(); + className="h-9 w-9 shrink-0 border-sidebar-border text-muted-foreground hover:text-foreground hover:border-sidebar-border bg-sidebar" + onClick={(e) => { + e.preventDefault(); + onCreateFolder(); }} > @@ -94,13 +94,52 @@ export function DocumentsFilters({ )} + {onToggleAiSort && ( + + + { + e.preventDefault(); + onToggleAiSort(); + }} + aria-label={aiSortEnabled ? "Disable AI sort" : "Enable AI sort"} + aria-pressed={aiSortEnabled} + > + {aiSortBusy ? ( + + ) : aiSortEnabled ? ( + + ) : ( + + )} + + + + {aiSortBusy + ? "AI sort in progress..." + : aiSortEnabled + ? "AI sort active — click to disable" + : "Enable AI sort"} + + + )} + {activeTypes.length > 0 && ( @@ -182,70 +221,6 @@ export function DocumentsFilters({ - {/* AI Sort Toggle */} - {onToggleAiSort && ( - - - - - - {aiSortBusy - ? "AI sort in progress..." - : aiSortEnabled - ? "AI sort active — click to disable" - : "Enable AI sort"} - - - )} - {/* Search Input */}
diff --git a/surfsense_web/components/icons/providers/github.svg b/surfsense_web/components/icons/providers/github.svg new file mode 100644 index 000000000..7a51b8e0e --- /dev/null +++ b/surfsense_web/components/icons/providers/github.svg @@ -0,0 +1 @@ +Github \ No newline at end of file diff --git a/surfsense_web/components/icons/providers/index.ts b/surfsense_web/components/icons/providers/index.ts index 2afed7fa5..aefa2a053 100644 --- a/surfsense_web/components/icons/providers/index.ts +++ b/surfsense_web/components/icons/providers/index.ts @@ -10,6 +10,7 @@ export { default as DeepInfraIcon } from "./deepinfra.svg"; export { default as DeepSeekIcon } from "./deepseek.svg"; export { default as FireworksAiIcon } from "./fireworksai.svg"; export { default as GeminiIcon } from "./gemini.svg"; +export { default as GitHubModelsIcon } from "./github.svg"; export { default as GroqIcon } from "./groq.svg"; export { default as HuggingFaceIcon } from "./huggingface.svg"; export { default as MiniMaxIcon } from "./minimax.svg"; diff --git a/surfsense_web/components/new-chat/model-selector.tsx b/surfsense_web/components/new-chat/model-selector.tsx index 26937e18b..8fec4cc93 100644 --- a/surfsense_web/components/new-chat/model-selector.tsx +++ b/surfsense_web/components/new-chat/model-selector.tsx @@ -6,9 +6,12 @@ import { Bot, Check, ChevronDown, + ChevronLeft, + ChevronRight, + ChevronUp, Edit3, - Eye, ImageIcon, + ScanEye, Layers, Plus, Search, @@ -69,6 +72,7 @@ const PROVIDER_NAMES: Record = { DEEPSEEK: "DeepSeek", MISTRAL: "Mistral", COHERE: "Cohere", + GITHUB_MODELS: "GitHub Models", GROQ: "Groq", OLLAMA: "Ollama", TOGETHER_AI: "Together AI", @@ -274,17 +278,40 @@ export function ModelSelector({ const [searchQuery, setSearchQuery] = useState(""); const [selectedProvider, setSelectedProvider] = useState("all"); const [focusedIndex, setFocusedIndex] = useState(-1); - const [showScrollIndicator, setShowScrollIndicator] = useState(true); + const [modelScrollPos, setModelScrollPos] = useState<"top" | "middle" | "bottom">("top"); + const [sidebarScrollPos, setSidebarScrollPos] = useState<"top" | "middle" | "bottom">("top"); const providerSidebarRef = useRef(null); const modelListRef = useRef(null); const searchInputRef = useRef(null); const isMobile = useIsMobile(); + const handleModelListScroll = useCallback((e: React.UIEvent) => { + const el = e.currentTarget; + const atTop = el.scrollTop <= 2; + const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight <= 2; + setModelScrollPos(atTop ? "top" : atBottom ? "bottom" : "middle"); + }, []); + + const handleSidebarScroll = useCallback((e: React.UIEvent) => { + const el = e.currentTarget; + if (isMobile) { + const atStart = el.scrollLeft <= 2; + const atEnd = el.scrollWidth - el.scrollLeft - el.clientWidth <= 2; + setSidebarScrollPos(atStart ? "top" : atEnd ? "bottom" : "middle"); + } else { + const atTop = el.scrollTop <= 2; + const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight <= 2; + setSidebarScrollPos(atTop ? "top" : atBottom ? "bottom" : "middle"); + } + }, [isMobile]); + // Reset search + provider when tab changes + // biome-ignore lint/correctness/useExhaustiveDependencies: activeTab is intentionally used as a trigger useEffect(() => { setSelectedProvider("all"); setSearchQuery(""); setFocusedIndex(-1); + setModelScrollPos("top"); }, [activeTab]); // Reset on open @@ -295,8 +322,9 @@ export function ModelSelector({ } }, [open]); - // Cmd/Ctrl+M shortcut + // Cmd/Ctrl+M shortcut (desktop only) useEffect(() => { + if (isMobile) return; const handler = (e: KeyboardEvent) => { if ((e.metaKey || e.ctrlKey) && e.key === "m") { e.preventDefault(); @@ -305,9 +333,10 @@ export function ModelSelector({ }; document.addEventListener("keydown", handler); return () => document.removeEventListener("keydown", handler); - }, []); + }, [isMobile]); // Focus search input on open + // biome-ignore lint/correctness/useExhaustiveDependencies: activeTab is intentionally used as a trigger to re-focus on tab switch useEffect(() => { if (open && !isMobile) { requestAnimationFrame(() => searchInputRef.current?.focus()); @@ -677,6 +706,7 @@ export function ModelSelector({ ); // ─── Keyboard navigation ─── + // biome-ignore lint/correctness/useExhaustiveDependencies: searchQuery and selectedProvider are intentional triggers to reset focus useEffect(() => { setFocusedIndex(-1); }, [searchQuery, selectedProvider]); @@ -767,24 +797,35 @@ export function ModelSelector({ return (
+ {!isMobile && sidebarScrollPos !== "top" && ( +
+ +
+ )} + {isMobile && sidebarScrollPos !== "top" && ( +
+ +
+ )}
{ - const t = e.currentTarget; - setShowScrollIndicator( - t.scrollHeight - t.scrollTop > - t.clientHeight + 10, - ); - }} + onScroll={handleSidebarScroll} className={cn( isMobile - ? "flex flex-row gap-0.5 px-2 py-1.5 overflow-x-auto border-b border-border/40" + ? "flex flex-row gap-0.5 px-1 py-1.5 overflow-x-auto [&::-webkit-scrollbar]:h-0 [&::-webkit-scrollbar-track]:bg-transparent" : "flex flex-col gap-0.5 p-1 overflow-y-auto flex-1 [&::-webkit-scrollbar]:w-0 [&::-webkit-scrollbar-track]:bg-transparent", )} + style={isMobile ? { + maskImage: `linear-gradient(to right, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 24px, black calc(100% - 24px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`, + WebkitMaskImage: `linear-gradient(to right, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 24px, black calc(100% - 24px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`, + } : { + maskImage: `linear-gradient(to bottom, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 32px, black calc(100% - 32px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`, + WebkitMaskImage: `linear-gradient(to bottom, ${sidebarScrollPos === "top" ? "black" : "transparent"}, black 32px, black calc(100% - 32px), ${sidebarScrollPos === "bottom" ? "black" : "transparent"})`, + }} > {activeProviders.map((provider, idx) => { const isAll = provider === "all"; @@ -849,18 +890,23 @@ export function ModelSelector({ )} {isConfigured ? ` (${count})` - : " — not configured"} + : " (not configured)"} ); })}
- {!isMobile && showScrollIndicator && ( -
+ {!isMobile && sidebarScrollPos !== "bottom" && ( +
)} + {isMobile && sidebarScrollPos !== "bottom" && ( +
+ +
+ )}
); }; @@ -889,19 +935,26 @@ export function ModelSelector({ key={`${activeTab}-${item.isGlobal ? "g" : "u"}-${config.id}`} data-model-index={index} role="option" + tabIndex={isMobile ? -1 : 0} aria-selected={isSelected} onClick={() => handleSelectItem(item)} + onKeyDown={isMobile ? undefined : (e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + handleSelectItem(item); + } + }} onMouseEnter={() => setFocusedIndex(index)} className={cn( - "group flex items-start gap-2.5 px-2.5 py-2 rounded-lg cursor-pointer", - "transition-all duration-150 mx-1", - "hover:bg-accent/40 active:scale-[0.99]", + "group flex items-center gap-2.5 px-3 py-2 rounded-xl cursor-pointer", + "transition-all duration-150 mx-2", + "hover:bg-accent/40", isSelected && "bg-primary/6 dark:bg-primary/8", - isFocused && "bg-accent/50 ring-1 ring-primary/20", + isFocused && "bg-accent/50", )} > {/* Provider icon */} -
+
{getProviderIcon(config.provider as string, { isAutoMode, className: "size-5", @@ -917,7 +970,7 @@ export function ModelSelector({ {isAutoMode && ( Recommended @@ -931,8 +984,8 @@ export function ModelSelector({ {!isAutoMode && hasCitations && ( Citations @@ -981,7 +1034,7 @@ export function ModelSelector({ : "Add Vision Model"; return ( -
+
{/* Tab header */}
@@ -999,7 +1052,7 @@ export function ModelSelector({ }, { value: "vision" as const, - icon: Eye, + icon: ScanEye, label: "Vision", }, ] as const @@ -1028,7 +1081,7 @@ export function ModelSelector({ "flex", isMobile ? "flex-col h-[60vh]" - : "flex-row h-[420px]", + : "flex-row h-[380px]", )} > {/* Provider sidebar */} @@ -1037,33 +1090,30 @@ export function ModelSelector({ {/* Main content */}
{/* Search */} -
- +
+ setSearchQuery(e.target.value) } - onKeyDown={handleKeyDown} - autoFocus={!isMobile} + onKeyDown={isMobile ? undefined : handleKeyDown} role="combobox" aria-expanded={true} aria-controls="model-selector-list" className={cn( - "w-full pl-8 pr-3 py-1.5 text-xs rounded-lg", - "bg-secondary/30 border border-border/40", - "focus:outline-none focus:ring-2 focus:ring-primary/20 focus:border-primary/40", - "placeholder:text-muted-foreground/50", - "transition-[box-shadow,border-color] duration-200", + "w-full pl-8 pr-3 py-2.5 text-sm bg-transparent", + "focus:outline-none", + "placeholder:text-muted-foreground", )} />
{/* Provider header when filtered */} {selectedProvider !== "all" && ( -
+
{getProviderIcon(selectedProvider, { className: "size-4", })} @@ -1085,10 +1135,15 @@ export function ModelSelector({ id="model-selector-list" ref={modelListRef} role="listbox" - className="overflow-y-auto flex-1 py-1" + className="overflow-y-auto flex-1 py-1 space-y-1 flex flex-col" + onScroll={handleModelListScroll} + style={{ + maskImage: `linear-gradient(to bottom, ${modelScrollPos === "top" ? "black" : "transparent"}, black 16px, black calc(100% - 16px), ${modelScrollPos === "bottom" ? "black" : "transparent"})`, + WebkitMaskImage: `linear-gradient(to bottom, ${modelScrollPos === "top" ? "black" : "transparent"}, black 16px, black calc(100% - 16px), ${modelScrollPos === "bottom" ? "black" : "transparent"})`, + }} > {currentDisplayItems.length === 0 ? ( -
+
{selectedProvider !== "all" && !configuredProviderSet.has( selectedProvider, @@ -1116,22 +1171,21 @@ export function ModelSelector({

{addHandler && ( )} - ) : ( + ) : searchQuery ? ( <> - +

No models found

@@ -1140,13 +1194,22 @@ export function ModelSelector({ term

+ ) : ( + <> +

+ No models configured +

+

+ Configure models in your search space settings +

+ )}
) : ( <> {globalItems.length > 0 && ( <> -
+
Global Models
{globalItems.map((item, i) => @@ -1163,7 +1226,7 @@ export function ModelSelector({ )} {userItems.length > 0 && ( <> -
+
Your Configurations
{userItems.map((item, i) => @@ -1180,7 +1243,7 @@ export function ModelSelector({ {/* Add model button */} {addHandler && ( -
+
- - Open link - - - {canDelete && ( - - - - - - Delete - - +
+

+ title={snapshot.thread_title} + > + {snapshot.thread_title} +

+ + + + + + + + Copy link + + + + + Open link + + + {canDelete && ( + onDelete(snapshot)} + disabled={isDeleting} + > + + Delete + + )} + +
{/* Message count badge */}
{snapshot.message_count} messages
- {/* Public URL – selectable fallback for manual copy */} -
-
-

- {snapshot.public_url} -

-
- - - - - - {copied ? "Copied!" : "Copy link"} - - -
- {/* Footer: Date + Creator */}
{formattedDate} diff --git a/surfsense_web/components/settings/general-settings-manager.tsx b/surfsense_web/components/settings/general-settings-manager.tsx index 15d44906b..59aaa129e 100644 --- a/surfsense_web/components/settings/general-settings-manager.tsx +++ b/surfsense_web/components/settings/general-settings-manager.tsx @@ -181,10 +181,10 @@ export function GeneralSettingsManager({ searchSpaceId }: GeneralSettingsManager type="submit" variant="outline" disabled={!hasChanges || saving || !name.trim()} - className="gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200" + className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200" > - {saving ? : null} - {saving ? t("general_saving") : t("general_save")} + {t("general_save")} + {saving && }
diff --git a/surfsense_web/components/settings/llm-role-manager.tsx b/surfsense_web/components/settings/llm-role-manager.tsx index d6eb7c64d..aa2749967 100644 --- a/surfsense_web/components/settings/llm-role-manager.tsx +++ b/surfsense_web/components/settings/llm-role-manager.tsx @@ -395,6 +395,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { key={config.id} value={config.id.toString()} className="text-xs md:text-sm py-1.5 md:py-2" + textValue={config.name} >
@@ -403,7 +404,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { {isAuto && ( Recommended diff --git a/surfsense_web/hooks/use-typewriter.ts b/surfsense_web/hooks/use-typewriter.ts index 1e1ce8b83..54d33e08d 100644 --- a/surfsense_web/hooks/use-typewriter.ts +++ b/surfsense_web/hooks/use-typewriter.ts @@ -27,7 +27,6 @@ export function useTypewriter(text: string, speed = 35, skipFor = "New Chat"): s } let i = 0; - setDisplayed(""); intervalRef.current = setInterval(() => { i++; setDisplayed(text.slice(0, i)); diff --git a/surfsense_web/lib/chat/message-utils.ts b/surfsense_web/lib/chat/message-utils.ts index 7c0da03c4..6ec5bd53d 100644 --- a/surfsense_web/lib/chat/message-utils.ts +++ b/surfsense_web/lib/chat/message-utils.ts @@ -39,13 +39,16 @@ export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { content = [{ type: "text", text: String(msg.content) }]; } - const metadata = msg.author_id + const metadata = (msg.author_id || msg.token_usage) ? { custom: { - author: { - displayName: msg.author_display_name ?? null, - avatarUrl: msg.author_avatar_url ?? null, - }, + ...(msg.author_id && { + author: { + displayName: msg.author_display_name ?? null, + avatarUrl: msg.author_avatar_url ?? null, + }, + }), + ...(msg.token_usage && { usage: msg.token_usage }), }, } : undefined; diff --git a/surfsense_web/lib/chat/streaming-state.ts b/surfsense_web/lib/chat/streaming-state.ts index d54650d40..e5d77672f 100644 --- a/surfsense_web/lib/chat/streaming-state.ts +++ b/surfsense_web/lib/chat/streaming-state.ts @@ -238,6 +238,16 @@ export type SSEEvent = | { type: "data-thread-title-update"; data: { threadId: number; title: string } } | { type: "data-interrupt-request"; data: Record } | { type: "data-documents-updated"; data: Record } + | { + type: "data-token-usage"; + data: { + usage: Record; + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + call_details: Array<{ model: string; prompt_tokens: number; completion_tokens: number; total_tokens: number }>; + }; + } | { type: "error"; errorText: string }; /** diff --git a/surfsense_web/lib/chat/thread-persistence.ts b/surfsense_web/lib/chat/thread-persistence.ts index 08c08ba78..de9827c32 100644 --- a/surfsense_web/lib/chat/thread-persistence.ts +++ b/surfsense_web/lib/chat/thread-persistence.ts @@ -26,6 +26,13 @@ export interface ThreadRecord { has_comments?: boolean; } +export interface TokenUsageSummary { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + model_breakdown?: Record | null; +} + export interface MessageRecord { id: number; thread_id: number; @@ -35,6 +42,7 @@ export interface MessageRecord { author_id?: string | null; author_display_name?: string | null; author_avatar_url?: string | null; + token_usage?: TokenUsageSummary | null; } export interface ThreadListResponse { @@ -111,11 +119,11 @@ export async function getThreadMessages(threadId: number): Promise { return baseApiService.post(`/api/v1/threads/${threadId}/messages`, undefined, { body: message, diff --git a/surfsense_web/lib/provider-icons.tsx b/surfsense_web/lib/provider-icons.tsx index d017d9aa2..e63c5eb2f 100644 --- a/surfsense_web/lib/provider-icons.tsx +++ b/surfsense_web/lib/provider-icons.tsx @@ -13,6 +13,7 @@ import { DeepSeekIcon, FireworksAiIcon, GeminiIcon, + GitHubModelsIcon, GroqIcon, HuggingFaceIcon, MiniMaxIcon, @@ -82,6 +83,8 @@ export function getProviderIcon( return ; case "GOOGLE": return ; + case "GITHUB_MODELS": + return ; case "GROQ": return ; case "HUGGINGFACE": diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json index 701cb086d..2e42451cd 100644 --- a/surfsense_web/messages/en.json +++ b/surfsense_web/messages/en.json @@ -123,7 +123,6 @@ "api_key_nav_description": "Manage your API access token", "api_key_title": "API Key", "api_key_description": "Use this key to authenticate API requests", - "api_key_warning_title": "Keep it secret", "api_key_warning_description": "Your API key grants full access to your account. Never share it publicly or commit it to version control.", "your_api_key": "Your API Key", "copied": "Copied!", diff --git a/surfsense_web/messages/es.json b/surfsense_web/messages/es.json index 9c872e701..e017b0bba 100644 --- a/surfsense_web/messages/es.json +++ b/surfsense_web/messages/es.json @@ -123,7 +123,6 @@ "api_key_nav_description": "Administra tu token de acceso a la API", "api_key_title": "Clave API", "api_key_description": "Usa esta clave para autenticar las solicitudes de la API", - "api_key_warning_title": "Mantenla en secreto", "api_key_warning_description": "Tu clave API otorga acceso completo a tu cuenta. Nunca la compartas públicamente ni la incluyas en el control de versiones.", "your_api_key": "Tu clave API", "copied": "¡Copiado!", diff --git a/surfsense_web/messages/hi.json b/surfsense_web/messages/hi.json index 3b6975bf5..e60e92f03 100644 --- a/surfsense_web/messages/hi.json +++ b/surfsense_web/messages/hi.json @@ -123,7 +123,6 @@ "api_key_nav_description": "अपना API एक्सेस टोकन प्रबंधित करें", "api_key_title": "API कुंजी", "api_key_description": "API अनुरोधों को प्रमाणित करने के लिए इस कुंजी का उपयोग करें", - "api_key_warning_title": "इसे गुप्त रखें", "api_key_warning_description": "आपकी API कुंजी आपके खाते तक पूर्ण पहुंच प्रदान करती है। इसे कभी सार्वजनिक रूप से साझा न करें या संस्करण नियंत्रण में शामिल न करें।", "your_api_key": "आपकी API कुंजी", "copied": "कॉपी किया गया!", diff --git a/surfsense_web/messages/pt.json b/surfsense_web/messages/pt.json index e92dff375..acece5391 100644 --- a/surfsense_web/messages/pt.json +++ b/surfsense_web/messages/pt.json @@ -123,7 +123,6 @@ "api_key_nav_description": "Gerencie seu token de acesso à API", "api_key_title": "Chave API", "api_key_description": "Use esta chave para autenticar solicitações da API", - "api_key_warning_title": "Mantenha em segredo", "api_key_warning_description": "Sua chave API concede acesso total à sua conta. Nunca a compartilhe publicamente nem a inclua no controle de versão.", "your_api_key": "Sua chave API", "copied": "Copiado!", diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json index c53155edf..6a59115ed 100644 --- a/surfsense_web/messages/zh.json +++ b/surfsense_web/messages/zh.json @@ -108,7 +108,6 @@ "api_key_nav_description": "管理您的API访问令牌", "api_key_title": "API密钥", "api_key_description": "使用此密钥验证API请求", - "api_key_warning_title": "请保密", "api_key_warning_description": "您的API密钥可以完全访问您的账户。请勿公开分享或提交到版本控制。", "your_api_key": "您的API密钥", "copied": "已复制!",