From 345cb88224803a5fb0b18340882409a046e02ff9 Mon Sep 17 00:00:00 2001 From: guangyang1206 Date: Wed, 29 Apr 2026 12:14:08 +0800 Subject: [PATCH 01/77] refactor(settings): use key prop to reset LLM role manager form state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #1018 Remove the sync useEffect that copied preferences into local state, along with the savingRef guard that prevented mid-save overwrites. Instead, pass key={searchSpaceId} on the LLMRoleManager component so React remounts the form with correct initial state whenever the search space changes — no extra re-render, no effect dependency array. Changes: - llm-role-manager.tsx: remove useEffect + useRef + savingRef pattern; drop useEffect and useRef from imports (now only useCallback, useState) - search-space-settings-dialog.tsx: add key={searchSpaceId} to so the component remounts on search-space change Before: useEffect synced preferences → assignments on each preference update, with savingRef to avoid overwriting an in-flight save. After: React remounts the component with correct initial state from the preferences selector; no mid-save race possible. --- .../components/settings/llm-role-manager.tsx | 21 +------------------ .../settings/search-space-settings-dialog.tsx | 2 +- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/surfsense_web/components/settings/llm-role-manager.tsx b/surfsense_web/components/settings/llm-role-manager.tsx index 015027111..e21dc9028 100644 --- a/surfsense_web/components/settings/llm-role-manager.tsx +++ b/surfsense_web/components/settings/llm-role-manager.tsx @@ -11,7 +11,7 @@ import { RefreshCw, ScanEye, } from "lucide-react"; -import { useCallback, useEffect, useRef, useState } from "react"; +import { useCallback, useState } from "react"; import { toast } from "sonner"; import { globalImageGenConfigsAtom, @@ -143,23 +143,6 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { })); const [savingRole, setSavingRole] = useState(null); - const savingRef = useRef(false); - - useEffect(() => { - if (!savingRef.current) { - setAssignments({ - agent_llm_id: preferences.agent_llm_id ?? "", - document_summary_llm_id: preferences.document_summary_llm_id ?? "", - image_generation_config_id: preferences.image_generation_config_id ?? "", - vision_llm_config_id: preferences.vision_llm_config_id ?? "", - }); - } - }, [ - preferences?.agent_llm_id, - preferences?.document_summary_llm_id, - preferences?.image_generation_config_id, - preferences?.vision_llm_config_id, - ]); const handleRoleAssignment = useCallback( async (prefKey: string, configId: string) => { @@ -167,7 +150,6 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { setAssignments((prev) => ({ ...prev, [prefKey]: value })); setSavingRole(prefKey); - savingRef.current = true; try { await updatePreferences({ @@ -177,7 +159,6 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { toast.success("Role assignment updated"); } finally { setSavingRole(null); - savingRef.current = false; } }, [updatePreferences, searchSpaceId] diff --git a/surfsense_web/components/settings/search-space-settings-dialog.tsx b/surfsense_web/components/settings/search-space-settings-dialog.tsx index aefe1efd2..2a7ba82b6 100644 --- a/surfsense_web/components/settings/search-space-settings-dialog.tsx +++ b/surfsense_web/components/settings/search-space-settings-dialog.tsx @@ -116,7 +116,7 @@ export function SearchSpaceSettingsDialog({ searchSpaceId }: SearchSpaceSettings const content: Record = { general: , models: , - roles: , + roles: , "image-models": , "vision-models": , "team-roles": , From 57db198919bbd1e7da8d8364aa90eba01525e7d0 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 29 Apr 2026 19:14:56 +0530 Subject: [PATCH 02/77] feat(chat): add thread-level auto model pinning fields --- ...34_add_thread_auto_model_pinning_fields.py | 63 +++++++++++++++++++ surfsense_backend/app/db.py | 7 +++ 2 files changed, 70 insertions(+) create mode 100644 surfsense_backend/alembic/versions/134_add_thread_auto_model_pinning_fields.py diff --git a/surfsense_backend/alembic/versions/134_add_thread_auto_model_pinning_fields.py b/surfsense_backend/alembic/versions/134_add_thread_auto_model_pinning_fields.py new file mode 100644 index 000000000..ab1643b02 --- /dev/null +++ b/surfsense_backend/alembic/versions/134_add_thread_auto_model_pinning_fields.py @@ -0,0 +1,63 @@ +"""134_add_thread_auto_model_pinning_fields + +Revision ID: 134 +Revises: 133 +Create Date: 2026-04-29 + +Add thread-level fields to persist Auto (Fastest) model pinning metadata: +- pinned_llm_config_id: concrete resolved config id used for this thread +- pinned_auto_mode: auto policy identifier (currently "auto_fastest") +- pinned_at: timestamp when the pin was created/refreshed +""" + +from __future__ import annotations + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +revision: str = "134" +down_revision: str | None = "133" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.add_column( + "new_chat_threads", + sa.Column("pinned_llm_config_id", sa.Integer(), nullable=True), + ) + op.add_column( + "new_chat_threads", + sa.Column("pinned_auto_mode", sa.String(length=32), nullable=True), + ) + op.add_column( + "new_chat_threads", + sa.Column("pinned_at", sa.TIMESTAMP(timezone=True), nullable=True), + ) + + op.create_index( + "ix_new_chat_threads_pinned_llm_config_id", + "new_chat_threads", + ["pinned_llm_config_id"], + unique=False, + ) + op.create_index( + "ix_new_chat_threads_pinned_auto_mode", + "new_chat_threads", + ["pinned_auto_mode"], + unique=False, + ) + + +def downgrade() -> None: + op.drop_index("ix_new_chat_threads_pinned_auto_mode", table_name="new_chat_threads") + op.drop_index( + "ix_new_chat_threads_pinned_llm_config_id", table_name="new_chat_threads" + ) + + op.drop_column("new_chat_threads", "pinned_at") + op.drop_column("new_chat_threads", "pinned_auto_mode") + op.drop_column("new_chat_threads", "pinned_llm_config_id") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 75342a8e1..f8b1390d9 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -638,6 +638,13 @@ class NewChatThread(BaseModel, TimestampMixin): default=False, server_default="false", ) + # Auto model pinning metadata: + # - pinned_llm_config_id stores the concrete resolved model config id. + # - pinned_auto_mode indicates which auto policy produced the pin. + # This allows Auto (Fastest) to resolve once per thread and stay stable. + pinned_llm_config_id = Column(Integer, nullable=True, index=True) + pinned_auto_mode = Column(String(32), nullable=True, index=True) + pinned_at = Column(TIMESTAMP(timezone=True), nullable=True) # Relationships search_space = relationship("SearchSpace", back_populates="new_chat_threads") From 41849fe10f5fbe9a4792ad665308f5fea4c37721 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 29 Apr 2026 19:15:15 +0530 Subject: [PATCH 03/77] feat(chat): add auto model pin resolution service --- .../app/services/auto_model_pin_service.py | 205 ++++++++++++ .../services/test_auto_model_pin_service.py | 291 ++++++++++++++++++ 2 files changed, 496 insertions(+) create mode 100644 surfsense_backend/app/services/auto_model_pin_service.py create mode 100644 surfsense_backend/tests/unit/services/test_auto_model_pin_service.py diff --git a/surfsense_backend/app/services/auto_model_pin_service.py b/surfsense_backend/app/services/auto_model_pin_service.py new file mode 100644 index 000000000..ce417a26d --- /dev/null +++ b/surfsense_backend/app/services/auto_model_pin_service.py @@ -0,0 +1,205 @@ +"""Resolve and persist Auto (Fastest) model pins per chat thread. + +Auto (Fastest) is represented by ``agent_llm_id == 0``. For chat threads we +resolve that virtual mode to one concrete global LLM config exactly once and +persist the chosen config id on ``new_chat_threads`` so subsequent turns are +stable. +""" + +from __future__ import annotations + +import hashlib +import logging +from dataclasses import dataclass +from datetime import UTC, datetime +from uuid import UUID + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.config import config +from app.db import NewChatThread +from app.services.token_quota_service import TokenQuotaService + +logger = logging.getLogger(__name__) + +AUTO_FASTEST_ID = 0 +AUTO_FASTEST_MODE = "auto_fastest" + + +@dataclass +class AutoPinResolution: + resolved_llm_config_id: int + resolved_tier: str + from_existing_pin: bool + + +def _is_usable_global_config(cfg: dict) -> bool: + return bool( + cfg.get("id") is not None + and cfg.get("model_name") + and cfg.get("provider") + and cfg.get("api_key") + ) + + +def _global_candidates() -> list[dict]: + candidates = [cfg for cfg in config.GLOBAL_LLM_CONFIGS if _is_usable_global_config(cfg)] + return sorted(candidates, key=lambda c: int(c.get("id", 0))) + + +def _tier_of(cfg: dict) -> str: + return str(cfg.get("billing_tier", "free")).lower() + + +def _deterministic_pick(candidates: list[dict], thread_id: int) -> dict: + digest = hashlib.sha256(f"{AUTO_FASTEST_MODE}:{thread_id}".encode()).digest() + idx = int.from_bytes(digest[:8], "big") % len(candidates) + return candidates[idx] + + +def _to_uuid(user_id: str | UUID | None) -> UUID | None: + if user_id is None: + return None + if isinstance(user_id, UUID): + return user_id + try: + return UUID(str(user_id)) + except Exception: + return None + + +async def _is_premium_eligible(session: AsyncSession, user_id: str | UUID | None) -> bool: + parsed = _to_uuid(user_id) + if parsed is None: + return False + usage = await TokenQuotaService.premium_get_usage(session, parsed) + return bool(usage.allowed) + + +async def resolve_or_get_pinned_llm_config_id( + session: AsyncSession, + *, + thread_id: int, + search_space_id: int, + user_id: str | UUID | None, + selected_llm_config_id: int, +) -> AutoPinResolution: + """Resolve Auto (Fastest) to one concrete config id and persist pin metadata. + + For non-auto selections, this function clears existing auto pin metadata and + returns the selected id as-is. + """ + thread = ( + ( + await session.execute( + select(NewChatThread) + .where(NewChatThread.id == thread_id) + .with_for_update(of=NewChatThread) + ) + ) + .unique() + .scalar_one_or_none() + ) + if thread is None: + raise ValueError(f"Thread {thread_id} not found") + if thread.search_space_id != search_space_id: + raise ValueError( + f"Thread {thread_id} does not belong to search space {search_space_id}" + ) + + # Explicit model selected: clear stale auto pin metadata. + if selected_llm_config_id != AUTO_FASTEST_ID: + if ( + thread.pinned_llm_config_id is not None + or thread.pinned_auto_mode is not None + or thread.pinned_at is not None + ): + thread.pinned_llm_config_id = None + thread.pinned_auto_mode = None + thread.pinned_at = None + await session.commit() + return AutoPinResolution( + resolved_llm_config_id=selected_llm_config_id, + resolved_tier="explicit", + from_existing_pin=False, + ) + + candidates = _global_candidates() + if not candidates: + raise ValueError("No usable global LLM configs are available for Auto mode") + candidate_by_id = {int(c["id"]): c for c in candidates} + + # Reuse existing valid pin without re-checking current quota (no silent tier switch). + pinned_id = thread.pinned_llm_config_id + if ( + thread.pinned_auto_mode == AUTO_FASTEST_MODE + and pinned_id is not None + and int(pinned_id) in candidate_by_id + ): + pinned_cfg = candidate_by_id[int(pinned_id)] + logger.info( + "auto_pin_reused thread_id=%s search_space_id=%s resolved_config_id=%s tier=%s", + thread_id, + search_space_id, + pinned_id, + _tier_of(pinned_cfg), + ) + return AutoPinResolution( + resolved_llm_config_id=int(pinned_id), + resolved_tier=_tier_of(pinned_cfg), + from_existing_pin=True, + ) + if pinned_id is not None: + logger.info( + "auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s pinned_auto_mode=%s", + thread_id, + search_space_id, + pinned_id, + thread.pinned_auto_mode, + ) + + premium_eligible = await _is_premium_eligible(session, user_id) + if premium_eligible: + eligible = candidates + else: + eligible = [c for c in candidates if _tier_of(c) != "premium"] + + if not eligible: + raise ValueError( + "Auto mode could not find an eligible LLM config for this user and quota state" + ) + + selected_cfg = _deterministic_pick(eligible, thread_id) + selected_id = int(selected_cfg["id"]) + selected_tier = _tier_of(selected_cfg) + + thread.pinned_llm_config_id = selected_id + thread.pinned_auto_mode = AUTO_FASTEST_MODE + thread.pinned_at = datetime.now(UTC) + await session.commit() + + if pinned_id is None: + logger.info( + "auto_pin_created thread_id=%s search_space_id=%s resolved_config_id=%s tier=%s premium_eligible=%s", + thread_id, + search_space_id, + selected_id, + selected_tier, + premium_eligible, + ) + else: + logger.info( + "auto_pin_repaired thread_id=%s search_space_id=%s previous_config_id=%s resolved_config_id=%s tier=%s premium_eligible=%s", + thread_id, + search_space_id, + pinned_id, + selected_id, + selected_tier, + premium_eligible, + ) + return AutoPinResolution( + resolved_llm_config_id=selected_id, + resolved_tier=selected_tier, + from_existing_pin=False, + ) diff --git a/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py new file mode 100644 index 000000000..a9853c980 --- /dev/null +++ b/surfsense_backend/tests/unit/services/test_auto_model_pin_service.py @@ -0,0 +1,291 @@ +from __future__ import annotations + +from dataclasses import dataclass +from types import SimpleNamespace + +import pytest + +from app.services.auto_model_pin_service import ( + AUTO_FASTEST_MODE, + resolve_or_get_pinned_llm_config_id, +) + +pytestmark = pytest.mark.unit + + +@dataclass +class _FakeQuotaResult: + allowed: bool + + +class _FakeExecResult: + def __init__(self, thread): + self._thread = thread + + def unique(self): + return self + + def scalar_one_or_none(self): + return self._thread + + +class _FakeSession: + def __init__(self, thread): + self.thread = thread + self.commit_count = 0 + + async def execute(self, _stmt): + return _FakeExecResult(self.thread) + + async def commit(self): + self.commit_count += 1 + + +def _thread( + *, + search_space_id: int = 10, + pinned_llm_config_id: int | None = None, + pinned_auto_mode: str | None = None, +): + return SimpleNamespace( + id=1, + search_space_id=search_space_id, + pinned_llm_config_id=pinned_llm_config_id, + pinned_auto_mode=pinned_auto_mode, + pinned_at=None, + ) + + +@pytest.mark.asyncio +async def test_auto_first_turn_pins_one_model(monkeypatch): + from app.config import config + + session = _FakeSession(_thread()) + monkeypatch.setattr( + config, + "GLOBAL_LLM_CONFIGS", + [ + {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"}, + {"id": -1, "provider": "OPENAI", "model_name": "gpt-prem", "api_key": "k2", "billing_tier": "premium"}, + ], + ) + + async def _allowed(*_args, **_kwargs): + return _FakeQuotaResult(allowed=True) + + monkeypatch.setattr( + "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage", + _allowed, + ) + + result = await resolve_or_get_pinned_llm_config_id( + session, + thread_id=1, + search_space_id=10, + user_id="00000000-0000-0000-0000-000000000001", + selected_llm_config_id=0, + ) + assert result.resolved_llm_config_id in {-1, -2} + assert session.thread.pinned_llm_config_id == result.resolved_llm_config_id + assert session.thread.pinned_auto_mode == AUTO_FASTEST_MODE + assert session.thread.pinned_at is not None + assert session.commit_count == 1 + + +@pytest.mark.asyncio +async def test_next_turn_reuses_existing_pin(monkeypatch): + from app.config import config + + session = _FakeSession( + _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE) + ) + monkeypatch.setattr( + config, + "GLOBAL_LLM_CONFIGS", + [ + {"id": -1, "provider": "OPENAI", "model_name": "gpt-prem", "api_key": "k2", "billing_tier": "premium"}, + ], + ) + + async def _must_not_call(*_args, **_kwargs): + raise AssertionError("premium_get_usage should not be called for valid pin reuse") + + monkeypatch.setattr( + "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage", + _must_not_call, + ) + + result = await resolve_or_get_pinned_llm_config_id( + session, + thread_id=1, + search_space_id=10, + user_id="00000000-0000-0000-0000-000000000001", + selected_llm_config_id=0, + ) + assert result.resolved_llm_config_id == -1 + assert result.from_existing_pin is True + assert session.commit_count == 0 + + +@pytest.mark.asyncio +async def test_premium_eligible_auto_can_pin_premium(monkeypatch): + from app.config import config + + session = _FakeSession(_thread()) + monkeypatch.setattr( + config, + "GLOBAL_LLM_CONFIGS", + [ + {"id": -1, "provider": "OPENAI", "model_name": "gpt-prem", "api_key": "k2", "billing_tier": "premium"}, + ], + ) + + async def _allowed(*_args, **_kwargs): + return _FakeQuotaResult(allowed=True) + + monkeypatch.setattr( + "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage", + _allowed, + ) + + result = await resolve_or_get_pinned_llm_config_id( + session, + thread_id=1, + search_space_id=10, + user_id="00000000-0000-0000-0000-000000000001", + selected_llm_config_id=0, + ) + assert result.resolved_llm_config_id == -1 + assert result.resolved_tier == "premium" + + +@pytest.mark.asyncio +async def test_premium_ineligible_auto_pins_free_only(monkeypatch): + from app.config import config + + session = _FakeSession(_thread()) + monkeypatch.setattr( + config, + "GLOBAL_LLM_CONFIGS", + [ + {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1", "billing_tier": "free"}, + {"id": -1, "provider": "OPENAI", "model_name": "gpt-prem", "api_key": "k2", "billing_tier": "premium"}, + ], + ) + + async def _blocked(*_args, **_kwargs): + return _FakeQuotaResult(allowed=False) + + monkeypatch.setattr( + "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage", + _blocked, + ) + + result = await resolve_or_get_pinned_llm_config_id( + session, + thread_id=1, + search_space_id=10, + user_id="00000000-0000-0000-0000-000000000001", + selected_llm_config_id=0, + ) + assert result.resolved_llm_config_id == -2 + assert result.resolved_tier == "free" + + +@pytest.mark.asyncio +async def test_pinned_premium_stays_premium_after_quota_exhaustion(monkeypatch): + from app.config import config + + session = _FakeSession( + _thread(pinned_llm_config_id=-1, pinned_auto_mode=AUTO_FASTEST_MODE) + ) + monkeypatch.setattr( + config, + "GLOBAL_LLM_CONFIGS", + [ + {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1", "billing_tier": "free"}, + {"id": -1, "provider": "OPENAI", "model_name": "gpt-prem", "api_key": "k2", "billing_tier": "premium"}, + ], + ) + + async def _blocked(*_args, **_kwargs): + return _FakeQuotaResult(allowed=False) + + monkeypatch.setattr( + "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage", + _blocked, + ) + + result = await resolve_or_get_pinned_llm_config_id( + session, + thread_id=1, + search_space_id=10, + user_id="00000000-0000-0000-0000-000000000001", + selected_llm_config_id=0, + ) + assert result.resolved_llm_config_id == -1 + assert result.from_existing_pin is True + + +@pytest.mark.asyncio +async def test_explicit_user_model_change_clears_pin(monkeypatch): + from app.config import config + + session = _FakeSession( + _thread(pinned_llm_config_id=-2, pinned_auto_mode=AUTO_FASTEST_MODE) + ) + monkeypatch.setattr( + config, + "GLOBAL_LLM_CONFIGS", + [ + {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"}, + ], + ) + + result = await resolve_or_get_pinned_llm_config_id( + session, + thread_id=1, + search_space_id=10, + user_id="00000000-0000-0000-0000-000000000001", + selected_llm_config_id=7, + ) + assert result.resolved_llm_config_id == 7 + assert session.thread.pinned_llm_config_id is None + assert session.thread.pinned_auto_mode is None + assert session.thread.pinned_at is None + assert session.commit_count == 1 + + +@pytest.mark.asyncio +async def test_invalid_pinned_config_repairs_with_new_pin(monkeypatch): + from app.config import config + + session = _FakeSession( + _thread(pinned_llm_config_id=-999, pinned_auto_mode=AUTO_FASTEST_MODE) + ) + monkeypatch.setattr( + config, + "GLOBAL_LLM_CONFIGS", + [ + {"id": -2, "provider": "OPENAI", "model_name": "gpt-free", "api_key": "k1"}, + ], + ) + + async def _allowed(*_args, **_kwargs): + return _FakeQuotaResult(allowed=True) + + monkeypatch.setattr( + "app.services.auto_model_pin_service.TokenQuotaService.premium_get_usage", + _allowed, + ) + + result = await resolve_or_get_pinned_llm_config_id( + session, + thread_id=1, + search_space_id=10, + user_id="00000000-0000-0000-0000-000000000001", + selected_llm_config_id=0, + ) + assert result.resolved_llm_config_id == -2 + assert session.thread.pinned_llm_config_id == -2 + assert session.commit_count == 1 From 835bd9f65df2abfd80ecd8def501b2db8595c326 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 29 Apr 2026 19:15:36 +0530 Subject: [PATCH 04/77] fix(chat): enforce pinned model quota flow and reset stale pins --- .../app/routes/search_spaces_routes.py | 25 +++- .../app/tasks/chat/stream_new_chat.py | 107 +++++++++++------- 2 files changed, 88 insertions(+), 44 deletions(-) diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py index 828137518..7944e7d66 100644 --- a/surfsense_backend/app/routes/search_spaces_routes.py +++ b/surfsense_backend/app/routes/search_spaces_routes.py @@ -3,7 +3,7 @@ import logging from fastapi import APIRouter, Depends, HTTPException from langchain_core.messages import HumanMessage from pydantic import BaseModel as PydanticBaseModel -from sqlalchemy import func +from sqlalchemy import func, update from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select @@ -15,6 +15,7 @@ from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_mem from app.config import config from app.db import ( ImageGenerationConfig, + NewChatThread, NewLLMConfig, Permission, SearchSpace, @@ -790,9 +791,31 @@ async def update_llm_preferences( # Update preferences update_data = preferences.model_dump(exclude_unset=True) + previous_agent_llm_id = search_space.agent_llm_id for key, value in update_data.items(): setattr(search_space, key, value) + agent_llm_changed = ( + "agent_llm_id" in update_data + and update_data["agent_llm_id"] != previous_agent_llm_id + ) + if agent_llm_changed: + await session.execute( + update(NewChatThread) + .where(NewChatThread.search_space_id == search_space_id) + .values( + pinned_llm_config_id=None, + pinned_auto_mode=None, + pinned_at=None, + ) + ) + logger.info( + "Cleared auto model pins for search_space_id=%s after agent_llm_id change (%s -> %s)", + search_space_id, + previous_agent_llm_id, + update_data["agent_llm_id"], + ) + await session.commit() await session.refresh(search_space) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index c254e66e2..1a56547ca 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -56,6 +56,7 @@ from app.db import ( shielded_async_session, ) from app.prompts import TITLE_GENERATION_PROMPT +from app.services.auto_model_pin_service import resolve_or_get_pinned_llm_config_id from app.services.chat_session_state_service import ( clear_ai_responding, set_ai_responding, @@ -1456,6 +1457,21 @@ async def stream_new_chat( agent_config: AgentConfig | None = None _t0 = time.perf_counter() + try: + llm_config_id = ( + await resolve_or_get_pinned_llm_config_id( + session, + thread_id=chat_id, + search_space_id=search_space_id, + user_id=user_id, + selected_llm_config_id=llm_config_id, + ) + ).resolved_llm_config_id + except ValueError as pin_error: + yield streaming_service.format_error(str(pin_error)) + yield streaming_service.format_done() + return + if llm_config_id >= 0: # Positive ID: Load from NewLLMConfig database table agent_config = await load_agent_config( @@ -1491,12 +1507,11 @@ async def stream_new_chat( llm_config_id, ) - # Premium quota reservation — applies to explicitly premium configs - # AND Auto mode (which may route to premium models). + # Premium quota reservation for pinned premium model only. _needs_premium_quota = ( agent_config is not None and user_id - and (agent_config.is_premium or agent_config.is_auto_mode) + and agent_config.is_premium ) if _needs_premium_quota: import uuid as _uuid @@ -1519,16 +1534,18 @@ async def stream_new_chat( ) _premium_reserved = reserve_amount if not quota_result.allowed: - if agent_config.is_premium: - yield streaming_service.format_error( - "Premium token quota exceeded. Please purchase more tokens to continue using premium models." - ) - yield streaming_service.format_done() - return - # Auto mode: quota exhausted but we can still proceed - # (the router may pick a free model). Reset reservation. - _premium_request_id = None - _premium_reserved = 0 + logging.getLogger(__name__).info( + "premium_quota_blocked_pinned_model thread_id=%s search_space_id=%s user_id=%s resolved_config_id=%s", + chat_id, + search_space_id, + user_id, + llm_config_id, + ) + yield streaming_service.format_error( + "Premium token quota exceeded for this pinned model. Select a free model or re-select Auto (Fastest) to repin." + ) + yield streaming_service.format_done() + return if not llm: yield streaming_service.format_error("Failed to create LLM instance") @@ -1961,28 +1978,20 @@ async def stream_new_chat( ) # Finalize premium quota with actual tokens. - # For Auto mode, only count tokens from calls that used premium models. if _premium_request_id and user_id: try: from app.services.token_quota_service import TokenQuotaService - if agent_config and agent_config.is_auto_mode: - from app.services.llm_router_service import LLMRouterService - - actual_premium_tokens = LLMRouterService.compute_premium_tokens( - accumulator.calls - ) - else: - actual_premium_tokens = accumulator.grand_total - async with shielded_async_session() as quota_session: await TokenQuotaService.premium_finalize( db_session=quota_session, user_id=UUID(user_id), request_id=_premium_request_id, - actual_tokens=actual_premium_tokens, + actual_tokens=accumulator.grand_total, reserved_tokens=_premium_reserved, ) + _premium_request_id = None + _premium_reserved = 0 except Exception: logging.getLogger(__name__).warning( "Failed to finalize premium quota for user %s", @@ -2175,6 +2184,21 @@ async def stream_resume_chat( agent_config: AgentConfig | None = None _t0 = time.perf_counter() + try: + llm_config_id = ( + await resolve_or_get_pinned_llm_config_id( + session, + thread_id=chat_id, + search_space_id=search_space_id, + user_id=user_id, + selected_llm_config_id=llm_config_id, + ) + ).resolved_llm_config_id + except ValueError as pin_error: + yield streaming_service.format_error(str(pin_error)) + yield streaming_service.format_done() + return + if llm_config_id >= 0: agent_config = await load_agent_config( session=session, @@ -2208,7 +2232,7 @@ async def stream_resume_chat( _resume_needs_premium = ( agent_config is not None and user_id - and (agent_config.is_premium or agent_config.is_auto_mode) + and agent_config.is_premium ) if _resume_needs_premium: import uuid as _uuid @@ -2231,14 +2255,18 @@ async def stream_resume_chat( ) _resume_premium_reserved = reserve_amount if not quota_result.allowed: - if agent_config.is_premium: - yield streaming_service.format_error( - "Premium token quota exceeded. Please purchase more tokens to continue using premium models." - ) - yield streaming_service.format_done() - return - _resume_premium_request_id = None - _resume_premium_reserved = 0 + logging.getLogger(__name__).info( + "premium_quota_blocked_pinned_model thread_id=%s search_space_id=%s user_id=%s resolved_config_id=%s", + chat_id, + search_space_id, + user_id, + llm_config_id, + ) + yield streaming_service.format_error( + "Premium token quota exceeded for this pinned model. Select a free model or re-select Auto (Fastest) to repin." + ) + yield streaming_service.format_done() + return if not llm: yield streaming_service.format_error("Failed to create LLM instance") @@ -2370,23 +2398,16 @@ async def stream_resume_chat( try: from app.services.token_quota_service import TokenQuotaService - if agent_config and agent_config.is_auto_mode: - from app.services.llm_router_service import LLMRouterService - - actual_premium_tokens = LLMRouterService.compute_premium_tokens( - accumulator.calls - ) - else: - actual_premium_tokens = accumulator.grand_total - async with shielded_async_session() as quota_session: await TokenQuotaService.premium_finalize( db_session=quota_session, user_id=UUID(user_id), request_id=_resume_premium_request_id, - actual_tokens=actual_premium_tokens, + actual_tokens=accumulator.grand_total, reserved_tokens=_resume_premium_reserved, ) + _resume_premium_request_id = None + _resume_premium_reserved = 0 except Exception: logging.getLogger(__name__).warning( "Failed to finalize premium quota for user %s (resume)", From d5ef0d2598573578d3abf0140c58da6d4e63401d Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 29 Apr 2026 19:15:46 +0530 Subject: [PATCH 05/77] feat(ui): surface pinned premium quota alerts in chat thread --- .../new-chat/[[...chat_id]]/page.tsx | 81 +++++++++++++++++-- .../atoms/chat/premium-alert.atom.ts | 33 ++++++++ .../components/assistant-ui/thread.tsx | 44 +++++++++- 3 files changed, 148 insertions(+), 10 deletions(-) create mode 100644 surfsense_web/atoms/chat/premium-alert.atom.ts diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 7773a438a..a5461e17f 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -19,6 +19,7 @@ import { currentThreadAtom, setTargetCommentIdAtom, } from "@/atoms/chat/current-thread.atom"; +import { setPremiumAlertForThreadAtom } from "@/atoms/chat/premium-alert.atom"; import { type MentionedDocumentInfo, mentionedDocumentIdsAtom, @@ -200,6 +201,19 @@ const BASE_TOOLS_WITH_UI = new Set([ // "write_todos", // Disabled for now ]); +const PINNED_PREMIUM_QUOTA_MESSAGE = "Premium token quota exceeded for this pinned model."; + +function getPinnedPremiumQuotaErrorMessage(error: unknown): string | null { + if (!(error instanceof Error)) return null; + if (!error.message.toLowerCase().includes("premium token quota exceeded")) { + return null; + } + if (!error.message.toLowerCase().includes("pinned model")) { + return null; + } + return error.message || PINNED_PREMIUM_QUOTA_MESSAGE; +} + export default function NewChatPage() { const params = useParams(); const queryClient = useQueryClient(); @@ -226,6 +240,7 @@ export default function NewChatPage() { const setMentionedDocuments = useSetAtom(mentionedDocumentsAtom); const setMessageDocumentsMap = useSetAtom(messageDocumentsMapAtom); const setCurrentThreadState = useSetAtom(currentThreadAtom); + const setPremiumAlertForThread = useSetAtom(setPremiumAlertForThreadAtom); const setTargetCommentId = useSetAtom(setTargetCommentIdAtom); const clearTargetCommentId = useSetAtom(clearTargetCommentIdAtom); const closeReportPanel = useSetAtom(closeReportPanelAtom); @@ -951,6 +966,7 @@ export default function NewChatPage() { return; } console.error("[NewChatPage] Chat error:", error); + const premiumQuotaAlertMessage = getPinnedPremiumQuotaErrorMessage(error); // Track chat error trackChatError( @@ -959,7 +975,15 @@ export default function NewChatPage() { error instanceof Error ? error.message : "Unknown error" ); - toast.error("Failed to get response. Please try again."); + if (premiumQuotaAlertMessage) { + setPremiumAlertForThread({ + threadId: currentThreadId, + message: premiumQuotaAlertMessage, + }); + toast.error(PINNED_PREMIUM_QUOTA_MESSAGE); + } else { + toast.error("Failed to get response. Please try again."); + } // Update assistant message with error setMessages((prev) => prev.map((m) => @@ -969,7 +993,9 @@ export default function NewChatPage() { content: [ { type: "text", - text: "Sorry, there was an error. Please try again.", + text: + premiumQuotaAlertMessage ?? + "Sorry, there was an error. Please try again.", }, ], } @@ -998,6 +1024,7 @@ export default function NewChatPage() { pendingUserImageUrls, setPendingUserImageUrls, toolsWithUI, + setPremiumAlertForThread, ] ); @@ -1257,13 +1284,29 @@ export default function NewChatPage() { return; } console.error("[NewChatPage] Resume error:", error); - toast.error("Failed to resume. Please try again."); + const premiumQuotaAlertMessage = getPinnedPremiumQuotaErrorMessage(error); + if (premiumQuotaAlertMessage) { + setPremiumAlertForThread({ + threadId: resumeThreadId, + message: premiumQuotaAlertMessage, + }); + toast.error(PINNED_PREMIUM_QUOTA_MESSAGE); + } else { + toast.error("Failed to resume. Please try again."); + } } finally { setIsRunning(false); abortControllerRef.current = null; } }, - [pendingInterrupt, messages, searchSpaceId, tokenUsageStore, toolsWithUI] + [ + pendingInterrupt, + messages, + searchSpaceId, + tokenUsageStore, + toolsWithUI, + setPremiumAlertForThread, + ] ); useEffect(() => { @@ -1584,18 +1627,34 @@ export default function NewChatPage() { } batcher.dispose(); console.error("[NewChatPage] Regeneration error:", error); + const premiumQuotaAlertMessage = getPinnedPremiumQuotaErrorMessage(error); trackChatError( searchSpaceId, threadId, error instanceof Error ? error.message : "Unknown error" ); - toast.error("Failed to regenerate response. Please try again."); + if (premiumQuotaAlertMessage) { + setPremiumAlertForThread({ + threadId, + message: premiumQuotaAlertMessage, + }); + toast.error(PINNED_PREMIUM_QUOTA_MESSAGE); + } else { + toast.error("Failed to regenerate response. Please try again."); + } setMessages((prev) => prev.map((m) => m.id === assistantMsgId ? { ...m, - content: [{ type: "text", text: "Sorry, there was an error. Please try again." }], + content: [ + { + type: "text", + text: + premiumQuotaAlertMessage ?? + "Sorry, there was an error. Please try again.", + }, + ], } : m ) @@ -1605,7 +1664,15 @@ export default function NewChatPage() { abortControllerRef.current = null; } }, - [threadId, searchSpaceId, messages, disabledTools, tokenUsageStore, toolsWithUI] + [ + threadId, + searchSpaceId, + messages, + disabledTools, + tokenUsageStore, + toolsWithUI, + setPremiumAlertForThread, + ] ); // Handle editing a message - truncates history and regenerates with new query diff --git a/surfsense_web/atoms/chat/premium-alert.atom.ts b/surfsense_web/atoms/chat/premium-alert.atom.ts new file mode 100644 index 000000000..c0efc174f --- /dev/null +++ b/surfsense_web/atoms/chat/premium-alert.atom.ts @@ -0,0 +1,33 @@ +import { atom } from "jotai"; + +export type PremiumAlertState = { + message: string; +}; + +export const premiumAlertByThreadAtom = atom>({}); + +export const setPremiumAlertForThreadAtom = atom( + null, + ( + get, + set, + payload: { + threadId: number; + message: string; + } + ) => { + const current = get(premiumAlertByThreadAtom); + set(premiumAlertByThreadAtom, { + ...current, + [payload.threadId]: { message: payload.message }, + }); + } +); + +export const clearPremiumAlertForThreadAtom = atom(null, (get, set, threadId: number) => { + const current = get(premiumAlertByThreadAtom); + if (!(threadId in current)) return; + const next = { ...current }; + delete next[threadId]; + set(premiumAlertByThreadAtom, next); +}); diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index cf99598f1..06f25f5fb 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -37,10 +37,13 @@ import { toggleToolAtom, } from "@/atoms/agent-tools/agent-tools.atoms"; import { chatSessionStateAtom } from "@/atoms/chat/chat-session-state.atom"; -import { - mentionedDocumentsAtom, -} from "@/atoms/chat/mentioned-documents.atom"; +import { currentThreadAtom } from "@/atoms/chat/current-thread.atom"; +import { mentionedDocumentsAtom } from "@/atoms/chat/mentioned-documents.atom"; import { pendingUserImageDataUrlsAtom } from "@/atoms/chat/pending-user-images.atom"; +import { + clearPremiumAlertForThreadAtom, + premiumAlertByThreadAtom, +} from "@/atoms/chat/premium-alert.atom"; import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms"; import { connectorsAtom } from "@/atoms/connectors/connector-query.atoms"; import { membersAtom } from "@/atoms/members/members-query.atoms"; @@ -134,6 +137,9 @@ const ThreadContent: FC = () => { style={{ paddingBottom: "max(1rem, env(safe-area-inset-bottom))" }} > + !thread.isEmpty}> + + !thread.isEmpty}> @@ -143,6 +149,38 @@ const ThreadContent: FC = () => { ); }; +const PremiumQuotaPinnedAlert: FC = () => { + const currentThreadState = useAtomValue(currentThreadAtom); + const alertsByThread = useAtomValue(premiumAlertByThreadAtom); + const clearPremiumAlertForThread = useSetAtom(clearPremiumAlertForThreadAtom); + + const currentThreadId = currentThreadState?.id; + if (!currentThreadId) return null; + + const alert = alertsByThread[currentThreadId]; + if (!alert) return null; + + return ( +
+
+ +
+

Premium quota exhausted

+

{alert.message}

+
+ +
+
+ ); +}; + const ThreadScrollToBottom: FC = () => { return ( From c598d7038f4f2766e37b9e9dc3e037b07fc1938b Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 29 Apr 2026 20:17:45 +0530 Subject: [PATCH 06/77] refactor(chat): update premium token error messages for clarity and consistency --- .../app/tasks/chat/stream_new_chat.py | 4 ++-- .../new-chat/[[...chat_id]]/page.tsx | 16 ++++++---------- surfsense_web/components/assistant-ui/thread.tsx | 7 +++---- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 1a56547ca..233b45396 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -1542,7 +1542,7 @@ async def stream_new_chat( llm_config_id, ) yield streaming_service.format_error( - "Premium token quota exceeded for this pinned model. Select a free model or re-select Auto (Fastest) to repin." + "Premium tokens exhausted. Buy more tokens to continue with this model, or switch to a free model." ) yield streaming_service.format_done() return @@ -2263,7 +2263,7 @@ async def stream_resume_chat( llm_config_id, ) yield streaming_service.format_error( - "Premium token quota exceeded for this pinned model. Select a free model or re-select Auto (Fastest) to repin." + "Premium tokens exhausted. Buy more tokens to continue with this model, or switch to a free model." ) yield streaming_service.format_done() return diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index a5461e17f..05621419d 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -201,17 +201,16 @@ const BASE_TOOLS_WITH_UI = new Set([ // "write_todos", // Disabled for now ]); -const PINNED_PREMIUM_QUOTA_MESSAGE = "Premium token quota exceeded for this pinned model."; - function getPinnedPremiumQuotaErrorMessage(error: unknown): string | null { if (!(error instanceof Error)) return null; - if (!error.message.toLowerCase().includes("premium token quota exceeded")) { + const normalized = error.message.toLowerCase(); + if ( + !normalized.includes("premium tokens exhausted") + && !normalized.includes("premium token quota exceeded") + ) { return null; } - if (!error.message.toLowerCase().includes("pinned model")) { - return null; - } - return error.message || PINNED_PREMIUM_QUOTA_MESSAGE; + return error.message; } export default function NewChatPage() { @@ -980,7 +979,6 @@ export default function NewChatPage() { threadId: currentThreadId, message: premiumQuotaAlertMessage, }); - toast.error(PINNED_PREMIUM_QUOTA_MESSAGE); } else { toast.error("Failed to get response. Please try again."); } @@ -1290,7 +1288,6 @@ export default function NewChatPage() { threadId: resumeThreadId, message: premiumQuotaAlertMessage, }); - toast.error(PINNED_PREMIUM_QUOTA_MESSAGE); } else { toast.error("Failed to resume. Please try again."); } @@ -1638,7 +1635,6 @@ export default function NewChatPage() { threadId, message: premiumQuotaAlertMessage, }); - toast.error(PINNED_PREMIUM_QUOTA_MESSAGE); } else { toast.error("Failed to regenerate response. Please try again."); } diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 06f25f5fb..cb063fac3 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -161,16 +161,15 @@ const PremiumQuotaPinnedAlert: FC = () => { if (!alert) return null; return ( -
+
-

Premium quota exhausted

-

{alert.message}

+

{alert.message}

-
- )} + {/* Fixed trigger slot prevents any vertical reflow when visibility changes */} +
+ +
{/* Desktop floating comment panel — overlays on top of chat content */} {showCommentTrigger && isDesktop && isInlineOpen && dbMessageId && ( From 5826e5264d68595fcf7b0e67c03739109ae05e50 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 1 May 2026 04:39:33 +0530 Subject: [PATCH 32/77] refactor(chat): add TruncatedNameWithTooltip component in model selector --- .../components/new-chat/model-selector.tsx | 93 ++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/surfsense_web/components/new-chat/model-selector.tsx b/surfsense_web/components/new-chat/model-selector.tsx index 9fe9dd8da..1a0f8c5ba 100644 --- a/surfsense_web/components/new-chat/model-selector.tsx +++ b/surfsense_web/components/new-chat/model-selector.tsx @@ -236,6 +236,93 @@ interface DisplayItem { isAutoMode: boolean; } +const TruncatedNameWithTooltip: React.FC<{ + text: string; + className?: string; + enableTooltip: boolean; +}> = ({ text, className, enableTooltip }) => { + const textRef = useRef(null); + const openTimerRef = useRef(undefined); + const [isTruncated, setIsTruncated] = useState(false); + const [open, setOpen] = useState(false); + + const recalcTruncation = useCallback(() => { + const el = textRef.current; + if (!el) return; + setIsTruncated(el.scrollWidth > el.clientWidth + 1); + }, []); + + useEffect(() => { + if (!enableTooltip) return; + const el = textRef.current; + if (!el) return; + + const raf = requestAnimationFrame(recalcTruncation); + recalcTruncation(); + + const observer = new ResizeObserver(recalcTruncation); + observer.observe(el); + if (el.parentElement) observer.observe(el.parentElement); + window.addEventListener("resize", recalcTruncation); + + return () => { + cancelAnimationFrame(raf); + observer.disconnect(); + window.removeEventListener("resize", recalcTruncation); + }; + }, [enableTooltip, recalcTruncation]); + + useEffect(() => { + // Recompute when row text changes. + void text; + requestAnimationFrame(recalcTruncation); + }, [text, recalcTruncation]); + + useEffect( + () => () => { + if (openTimerRef.current) window.clearTimeout(openTimerRef.current); + }, + [] + ); + + if (!enableTooltip) { + return ( + + {text} + + ); + } + + const handleOpenChange = (nextOpen: boolean) => { + if (openTimerRef.current) { + window.clearTimeout(openTimerRef.current); + openTimerRef.current = undefined; + } + if (!nextOpen) { + setOpen(false); + return; + } + if (!isTruncated) return; + openTimerRef.current = window.setTimeout(() => { + setOpen(true); + openTimerRef.current = undefined; + }, 220); + }; + + return ( + + + + {text} + + + + {text} + + + ); +}; + // ─── Component ────────────────────────────────────────────────────── interface ModelSelectorProps { @@ -936,7 +1023,11 @@ export function ModelSelector({ {/* Model info */}
- {config.name} + {isAutoMode && ( Date: Thu, 30 Apr 2026 18:40:55 -0700 Subject: [PATCH 33/77] feat(markdown): enable citation rendering in MarkdownViewer and related components - Added `enableCitations` prop to `MarkdownViewer` to support interactive citation badges. - Updated instances of `MarkdownViewer` across various components to utilize the new citation feature. - Enhanced citation processing in `PlateEditor` for read-only views, ensuring citations are rendered correctly without affecting markdown serialization. - Refactored citation handling in `InlineCitation` and `MarkdownText` to improve citation context management. --- .../assistant-ui/inline-citation.tsx | 18 +- .../components/assistant-ui/markdown-text.tsx | 428 ++++++++---------- .../citation-panel/citation-panel.tsx | 2 +- .../citations/citation-renderer.tsx | 79 ++++ surfsense_web/components/document-viewer.tsx | 2 +- .../components/editor-panel/editor-panel.tsx | 9 +- .../components/editor/plate-editor.tsx | 56 ++- .../editor/plugins/citation-kit.tsx | 222 +++++++++ .../components/editor/utils/escape-mdx.ts | 2 +- .../layout/ui/tabs/DocumentTabContent.tsx | 4 +- surfsense_web/components/markdown-viewer.tsx | 100 +++- .../components/report-panel/report-panel.tsx | 5 +- .../lib/citations/citation-parser.ts | 134 ++++++ surfsense_web/lib/markdown/code-regions.ts | 8 + 14 files changed, 809 insertions(+), 260 deletions(-) create mode 100644 surfsense_web/components/citations/citation-renderer.tsx create mode 100644 surfsense_web/components/editor/plugins/citation-kit.tsx create mode 100644 surfsense_web/lib/citations/citation-parser.ts create mode 100644 surfsense_web/lib/markdown/code-regions.ts diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx index 2aeba89ca..e299f2373 100644 --- a/surfsense_web/components/assistant-ui/inline-citation.tsx +++ b/surfsense_web/components/assistant-ui/inline-citation.tsx @@ -3,11 +3,11 @@ import { useQuery } from "@tanstack/react-query"; import { useSetAtom } from "jotai"; import { ExternalLink, FileText } from "lucide-react"; +import dynamic from "next/dynamic"; import type { FC } from "react"; import { useCallback, useEffect, useRef, useState } from "react"; import { openCitationPanelAtom } from "@/atoms/citation/citation-panel.atom"; import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context"; -import { MarkdownViewer } from "@/components/markdown-viewer"; import { Citation } from "@/components/tool-ui/citation"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; import { Spinner } from "@/components/ui/spinner"; @@ -15,6 +15,16 @@ import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip import { documentsApiService } from "@/lib/apis/documents-api.service"; import { cacheKeys } from "@/lib/query-client/cache-keys"; +// Lazily load MarkdownViewer here to break the static import cycle: +// `markdown-viewer.tsx` → `citation-renderer.tsx` → `inline-citation.tsx` +// would otherwise pull `markdown-viewer.tsx` back in at module-init time. +// Only `SurfsenseDocCitation` (popover body) ever renders this viewer, so +// the lazy boundary is invisible to most call paths. +const MarkdownViewer = dynamic( + () => import("@/components/markdown-viewer").then((m) => m.MarkdownViewer), + { ssr: false, loading: () => } +); + interface InlineCitationProps { chunkId: number; isDocsChunk?: boolean; @@ -172,7 +182,11 @@ const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => {

)} {!isLoading && !error && citedChunk?.content && ( - + )} {!isLoading && !error && !citedChunk?.content && (

No content available.

diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx index 7655e10cc..2b788e88b 100644 --- a/surfsense_web/components/assistant-ui/markdown-text.tsx +++ b/surfsense_web/components/assistant-ui/markdown-text.tsx @@ -12,15 +12,26 @@ import { ExternalLinkIcon } from "lucide-react"; import dynamic from "next/dynamic"; import { useParams } from "next/navigation"; import { useTheme } from "next-themes"; -import { memo, type ReactNode } from "react"; +import { + createContext, + memo, + type ReactNode, + useCallback, + useContext, + useRef, +} from "react"; import rehypeKatex from "rehype-katex"; import remarkGfm from "remark-gfm"; import remarkMath from "remark-math"; import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom"; import { ImagePreview, ImageRoot, ImageZoom } from "@/components/assistant-ui/image"; import "katex/dist/katex.min.css"; -import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation"; +import { processChildrenWithCitations } from "@/components/citations/citation-renderer"; import { Skeleton } from "@/components/ui/skeleton"; +import { + type CitationUrlMap, + preprocessCitationMarkdown, +} from "@/lib/citations/citation-parser"; import { Table, TableBody, @@ -59,31 +70,30 @@ const LazyMarkdownCodeBlock = dynamic( } ); -// Storage for URL citations replaced during preprocess to avoid GFM autolink interference. -// Populated in preprocessMarkdown, consumed in parseTextWithCitations. -let _pendingUrlCitations = new Map(); -let _urlCiteIdx = 0; +// Per-render URL placeholder map propagated to component overrides via +// React Context. Replaces the previous module-level `_pendingUrlCitations` +// state, which was unsafe under concurrent renders / SSR. +type CitationUrlMapRef = { current: CitationUrlMap }; +const EMPTY_URL_MAP: CitationUrlMap = new Map(); +const CitationUrlMapContext = createContext({ current: EMPTY_URL_MAP }); + +function useCitationUrlMap(): CitationUrlMap { + return useContext(CitationUrlMapContext).current; +} /** * Preprocess raw markdown before it reaches the remark/rehype pipeline. * - Replaces URL-based citations with safe placeholders (prevents GFM autolinks) * - Normalises LaTeX delimiters to dollar-sign syntax for remark-math */ -function preprocessMarkdown(content: string): string { +function preprocessMarkdown(content: string, urlMapRef: CitationUrlMapRef): string { // Replace URL-based citations with safe placeholders BEFORE markdown parsing. // GFM autolinks would otherwise convert the https://... inside [citation:URL] // into an element, splitting the text and preventing our citation regex // from matching the full pattern. - _pendingUrlCitations = new Map(); - _urlCiteIdx = 0; - content = content.replace( - /[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+)\s*\u200B?[\]】]/g, - (_, url) => { - const key = `urlcite${_urlCiteIdx++}`; - _pendingUrlCitations.set(key, url.trim()); - return `[citation:${key}]`; - } - ); + const { content: rewritten, urlMap } = preprocessCitationMarkdown(content); + urlMapRef.current = urlMap; + content = rewritten; // All math forms are normalised to $$...$$ so we can disable single-dollar // inline math in remark-math (otherwise currency like "$3,120.00 and $0.00" @@ -116,113 +126,28 @@ function preprocessMarkdown(content: string): string { return content; } -// Matches [citation:...] with numeric IDs (incl. negative, doc- prefix, comma-separated), -// URL-based IDs from live web search, or urlciteN placeholders from preprocess. -// Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts. -const CITATION_REGEX = - /[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|(?:doc-)?-?\d+(?:\s*,\s*(?:doc-)?-?\d+)*)\s*\u200B?[\]】]/g; - -/** - * Parses text and replaces [citation:XXX] patterns with citation components. - * Supports: - * - Numeric chunk IDs: [citation:123] - * - Doc-prefixed IDs: [citation:doc-123] - * - Comma-separated IDs: [citation:4149, 4150, 4151] - * - URL-based citations from live search: [citation:https://example.com/page] - */ -function parseTextWithCitations(text: string): ReactNode[] { - const parts: ReactNode[] = []; - let lastIndex = 0; - let match: RegExpExecArray | null; - let instanceIndex = 0; - - CITATION_REGEX.lastIndex = 0; - - match = CITATION_REGEX.exec(text); - while (match !== null) { - if (match.index > lastIndex) { - parts.push(text.substring(lastIndex, match.index)); - } - - const captured = match[1]; - - if (captured.startsWith("http://") || captured.startsWith("https://")) { - parts.push(); - instanceIndex++; - } else if (captured.startsWith("urlcite")) { - const url = _pendingUrlCitations.get(captured); - if (url) { - parts.push(); - } - instanceIndex++; - } else { - const rawIds = captured.split(",").map((s) => s.trim()); - for (const rawId of rawIds) { - const isDocsChunk = rawId.startsWith("doc-"); - const chunkId = Number.parseInt(isDocsChunk ? rawId.slice(4) : rawId, 10); - parts.push( - - ); - instanceIndex++; - } - } - - lastIndex = match.index + match[0].length; - match = CITATION_REGEX.exec(text); - } - - if (lastIndex < text.length) { - parts.push(text.substring(lastIndex)); - } - - return parts.length > 0 ? parts : [text]; -} - const MarkdownTextImpl = () => { + const urlMapRef = useRef(EMPTY_URL_MAP); + const preprocess = useCallback( + (content: string) => preprocessMarkdown(content, urlMapRef), + [] + ); return ( - + + + ); }; export const MarkdownText = memo(MarkdownTextImpl); -/** - * Helper to process children and replace citation patterns with components - */ -function processChildrenWithCitations(children: ReactNode): ReactNode { - if (typeof children === "string") { - const parsed = parseTextWithCitations(children); - return parsed.length === 1 && typeof parsed[0] === "string" ? children : parsed; - } - - if (Array.isArray(children)) { - return children.map((child) => { - if (typeof child === "string") { - const parsed = parseTextWithCitations(child); - return parsed.length === 1 && typeof parsed[0] === "string" ? ( - child - ) : ( - {parsed} - ); - } - return child; - }); - } - - return children; -} - function extractDomain(url: string): string { try { const parsed = new URL(url); @@ -322,92 +247,125 @@ function MarkdownImage({ src, alt }: { src?: string; alt?: string }) { } const defaultComponents = memoizeMarkdownComponents({ - h1: ({ className, children, ...props }) => ( -

- {processChildrenWithCitations(children)} -

- ), - h2: ({ className, children, ...props }) => ( -

- {processChildrenWithCitations(children)} -

- ), - h3: ({ className, children, ...props }) => ( -

- {processChildrenWithCitations(children)} -

- ), - h4: ({ className, children, ...props }) => ( -

- {processChildrenWithCitations(children)} -

- ), - h5: ({ className, children, ...props }) => ( -
- {processChildrenWithCitations(children)} -
- ), - h6: ({ className, children, ...props }) => ( -
- {processChildrenWithCitations(children)} -
- ), - p: ({ className, children, ...props }) => ( -

- {processChildrenWithCitations(children)} -

- ), - a: ({ className, children, ...props }) => ( -
- {processChildrenWithCitations(children)} - - ), - blockquote: ({ className, children, ...props }) => ( -
- {processChildrenWithCitations(children)} -
- ), + h1: function H1({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +

+ {processChildrenWithCitations(children, urlMap)} +

+ ); + }, + h2: function H2({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +

+ {processChildrenWithCitations(children, urlMap)} +

+ ); + }, + h3: function H3({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +

+ {processChildrenWithCitations(children, urlMap)} +

+ ); + }, + h4: function H4({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +

+ {processChildrenWithCitations(children, urlMap)} +

+ ); + }, + h5: function H5({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +
+ {processChildrenWithCitations(children, urlMap)} +
+ ); + }, + h6: function H6({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +
+ {processChildrenWithCitations(children, urlMap)} +
+ ); + }, + p: function P({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +

+ {processChildrenWithCitations(children, urlMap)} +

+ ); + }, + a: function A({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( + + {processChildrenWithCitations(children, urlMap)} + + ); + }, + blockquote: function Blockquote({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +
+ {processChildrenWithCitations(children, urlMap)} +
+ ); + }, ul: ({ className, ...props }) => (
    li]:mt-2", className)} {...props} /> ), ol: ({ className, ...props }) => (
      li]:mt-2", className)} {...props} /> ), - li: ({ className, children, ...props }) => ( -
    1. - {processChildrenWithCitations(children)} -
    2. - ), + li: function Li({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +
    3. + {processChildrenWithCitations(children, urlMap)} +
    4. + ); + }, hr: ({ className, ...props }) => (
      ), @@ -422,28 +380,34 @@ const defaultComponents = memoizeMarkdownComponents({ tbody: ({ className, ...props }) => ( ), - th: ({ className, children, ...props }) => ( - - {processChildrenWithCitations(children)} - - ), - td: ({ className, children, ...props }) => ( - - {processChildrenWithCitations(children)} - - ), + th: function Th({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( + + {processChildrenWithCitations(children, urlMap)} + + ); + }, + td: function Td({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( + + {processChildrenWithCitations(children, urlMap)} + + ); + }, tr: ({ className, ...props }) => , sup: ({ className, ...props }) => ( a]:text-xs [&>a]:no-underline", className)} {...props} /> @@ -552,16 +516,22 @@ const defaultComponents = memoizeMarkdownComponents({ /> ); }, - strong: ({ className, children, ...props }) => ( - - {processChildrenWithCitations(children)} - - ), - em: ({ className, children, ...props }) => ( - - {processChildrenWithCitations(children)} - - ), + strong: function Strong({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( + + {processChildrenWithCitations(children, urlMap)} + + ); + }, + em: function Em({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( + + {processChildrenWithCitations(children, urlMap)} + + ); + }, img: ({ src, alt }) => ( ), diff --git a/surfsense_web/components/citation-panel/citation-panel.tsx b/surfsense_web/components/citation-panel/citation-panel.tsx index cec07b9cf..ed8acd656 100644 --- a/surfsense_web/components/citation-panel/citation-panel.tsx +++ b/surfsense_web/components/citation-panel/citation-panel.tsx @@ -169,7 +169,7 @@ export const CitationPanelContent: FC = ({ chunkId, o )}
- +
); diff --git a/surfsense_web/components/citations/citation-renderer.tsx b/surfsense_web/components/citations/citation-renderer.tsx new file mode 100644 index 000000000..bf877f03f --- /dev/null +++ b/surfsense_web/components/citations/citation-renderer.tsx @@ -0,0 +1,79 @@ +"use client"; + +import type { ReactNode } from "react"; +import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation"; +import { + type CitationToken, + type CitationUrlMap, + parseTextWithCitations, +} from "@/lib/citations/citation-parser"; + +/** + * Render a single parsed citation token as JSX. + * + * `ordinalKey` should be a stable per-render counter so duplicate identical + * citations within the same parent don't collide on `key`. The previous + * implementation in `markdown-text.tsx` used the source string itself as + * the key, which produced React warnings when two segments rendered the + * same `[citation:N]` text. + */ +export function renderCitationToken(token: CitationToken, ordinalKey: number): ReactNode { + if (token.kind === "url") { + return ; + } + return ( + + ); +} + +/** + * Walk a `ReactNode` (string, array, or arbitrary node) and replace any + * `[citation:...]` tokens inside string children with citation badges. + * + * Designed for use inside `Streamdown`/`react-markdown` `components` + * overrides where the renderer hands you `children`. Non-string children + * are returned untouched so block/phrasing structure is preserved. + */ +export function processChildrenWithCitations( + children: ReactNode, + urlMap: CitationUrlMap +): ReactNode { + if (typeof children === "string") { + const segments = parseTextWithCitations(children, urlMap); + if (segments.length === 1 && typeof segments[0] === "string") { + return children; + } + let ordinal = 0; + return segments.map((segment) => + typeof segment === "string" ? segment : renderCitationToken(segment, ordinal++) + ); + } + + if (Array.isArray(children)) { + let ordinal = 0; + return children.map((child, childIndex) => { + if (typeof child === "string") { + const segments = parseTextWithCitations(child, urlMap); + if (segments.length === 1 && typeof segments[0] === "string") { + return child; + } + return ( + + {segments.map((segment) => + typeof segment === "string" + ? segment + : renderCitationToken(segment, ordinal++) + )} + + ); + } + return child; + }); + } + + return children; +} diff --git a/surfsense_web/components/document-viewer.tsx b/surfsense_web/components/document-viewer.tsx index 0f283e567..710a04ba3 100644 --- a/surfsense_web/components/document-viewer.tsx +++ b/surfsense_web/components/document-viewer.tsx @@ -32,7 +32,7 @@ export function DocumentViewer({ title, content, trigger }: DocumentViewerProps) {title}
- +
diff --git a/surfsense_web/components/editor-panel/editor-panel.tsx b/surfsense_web/components/editor-panel/editor-panel.tsx index df138e97e..eab07a91b 100644 --- a/surfsense_web/components/editor-panel/editor-panel.tsx +++ b/surfsense_web/components/editor-panel/editor-panel.tsx @@ -652,7 +652,7 @@ export function EditorPanelContent({ // Plate is heavy on multi-MB docs.
{largeDocAlert} - +
) : renderInPlateEditor ? ( // Editable doc (FILE/NOTE) — Plate editing UX. @@ -670,12 +670,17 @@ export function EditorPanelContent({ reserveToolbarSpace defaultEditing={isEditing} className="**:[[role=toolbar]]:bg-sidebar!" + // Render `[citation:N]` badges in view mode only. + // Edit mode keeps raw text so the user can edit/delete + // tokens directly. `local_file` never reaches this branch + // (handled by the source_code editor above). + enableCitations={!isEditing && !isLocalFileMode} />
) : (
- +
)} diff --git a/surfsense_web/components/editor/plate-editor.tsx b/surfsense_web/components/editor/plate-editor.tsx index 7f12d3cae..c42cb991e 100644 --- a/surfsense_web/components/editor/plate-editor.tsx +++ b/surfsense_web/components/editor/plate-editor.tsx @@ -8,9 +8,11 @@ import { useEffect, useMemo, useRef } from "react"; import remarkGfm from "remark-gfm"; import remarkMath from "remark-math"; import { EditorSaveContext } from "@/components/editor/editor-save-context"; +import { CitationKit, injectCitationNodes } from "@/components/editor/plugins/citation-kit"; import { type EditorPreset, presetMap } from "@/components/editor/presets"; import { escapeMdxExpressions } from "@/components/editor/utils/escape-mdx"; import { Editor, EditorContainer } from "@/components/ui/editor"; +import { preprocessCitationMarkdown } from "@/lib/citations/citation-parser"; /** Live editor instance returned by `usePlateEditor`. */ export type PlateEditorInstance = ReturnType; @@ -65,6 +67,14 @@ export interface PlateEditorProps { * without modifying the core editor component. */ extraPlugins?: AnyPluginConfig[]; + /** + * Render `[citation:N]` and `[citation:URL]` tokens in the deserialized + * markdown as interactive citation badges/popovers (mirrors chat). Only + * meant for read-only views — when true, `onMarkdownChange` is suppressed + * because the in-memory tree contains custom inline-void elements that + * have no markdown serialize rule. + */ + enableCitations?: boolean; } function PlateEditorContent({ @@ -103,6 +113,7 @@ export function PlateEditor({ defaultEditing = false, preset = "full", extraPlugins = [], + enableCitations = false, }: PlateEditorProps) { const lastMarkdownRef = useRef(markdown); const lastHtmlRef = useRef(html); @@ -145,6 +156,8 @@ export function PlateEditor({ ...(onSave ? [SaveShortcutPlugin] : []), // Consumer-provided extra plugins ...extraPlugins, + // Citation void inline element (read-only document viewer). + ...(enableCitations ? CitationKit : []), MarkdownPlugin.configure({ options: { remarkPlugins: [remarkGfm, remarkMath, remarkMdx], @@ -154,8 +167,18 @@ export function PlateEditor({ value: html ? (editor) => editor.api.html.deserialize({ element: html }) as Value : markdown - ? (editor) => - editor.getApi(MarkdownPlugin).markdown.deserialize(escapeMdxExpressions(markdown)) + ? (editor) => { + if (!enableCitations) { + return editor + .getApi(MarkdownPlugin) + .markdown.deserialize(escapeMdxExpressions(markdown)); + } + const { content: rewritten, urlMap } = preprocessCitationMarkdown(markdown); + const value = editor + .getApi(MarkdownPlugin) + .markdown.deserialize(escapeMdxExpressions(rewritten)); + return injectCitationNodes(value as Descendant[], urlMap) as Value; + } : undefined, }); @@ -174,13 +197,22 @@ export function PlateEditor({ useEffect(() => { if (!html && markdown !== undefined && markdown !== lastMarkdownRef.current) { lastMarkdownRef.current = markdown; - const newValue = editor - .getApi(MarkdownPlugin) - .markdown.deserialize(escapeMdxExpressions(markdown)); + let newValue: Descendant[]; + if (enableCitations) { + const { content: rewritten, urlMap } = preprocessCitationMarkdown(markdown); + const deserialized = editor + .getApi(MarkdownPlugin) + .markdown.deserialize(escapeMdxExpressions(rewritten)) as Descendant[]; + newValue = injectCitationNodes(deserialized, urlMap); + } else { + newValue = editor + .getApi(MarkdownPlugin) + .markdown.deserialize(escapeMdxExpressions(markdown)) as Descendant[]; + } editor.tf.reset(); - editor.tf.setValue(newValue); + editor.tf.setValue(newValue as Value); } - }, [html, markdown, editor]); + }, [html, markdown, editor, enableCitations]); // When not forced read-only, the user can toggle between editing/viewing. const canToggleMode = !readOnly && allowModeToggle; @@ -205,6 +237,16 @@ export function PlateEditor({ // (initialized to true via usePlateEditor, toggled via ModeToolbarButton). {...(readOnly ? { readOnly: true } : {})} onChange={({ value }) => { + // View-only citation mode: skip serialization. The custom + // `citation` inline-void element has no markdown serialize + // rule, so emitting changes here would overwrite + // `lastMarkdownRef.current` (and downstream copy-to-clipboard + // state in EditorPanelContent) with a tree that loses every + // citation token. `enableCitations` is only ever set in + // read-only paths, so user input cannot reach this branch + // in practice — the guard exists for the initial Plate + // normalize emit. + if (enableCitations) return; if (onHtmlChange && html) { const serialized = slateToHtml(value as Descendant[]); onHtmlChange(serialized); diff --git a/surfsense_web/components/editor/plugins/citation-kit.tsx b/surfsense_web/components/editor/plugins/citation-kit.tsx new file mode 100644 index 000000000..c90cb5e28 --- /dev/null +++ b/surfsense_web/components/editor/plugins/citation-kit.tsx @@ -0,0 +1,222 @@ +"use client"; + +import { type FC } from "react"; +import { KEYS, type Descendant } from "platejs"; +import { createPlatePlugin, type PlateElementProps } from "platejs/react"; +import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation"; +import { + CITATION_REGEX, + type CitationUrlMap, + parseTextWithCitations, +} from "@/lib/citations/citation-parser"; + +/** + * Plate inline-void node modeling a single `[citation:...]` reference. + * + * Modeled after the existing `MentionPlugin` pattern in + * `inline-mention-editor.tsx` — the only confirmed pattern in this repo + * for non-text inline UI. Inline-void elements satisfy Slate's invariant + * that the editor renders both atomic widgets and surrounding text + * cleanly without breaking selection / caret semantics. + */ +export type CitationElementNode = { + type: "citation"; + kind: "chunk" | "doc" | "url"; + chunkId?: number; + url?: string; + /** Original `[citation:...]` substring for traceability/debugging. */ + rawText: string; + children: [{ text: "" }]; +}; + +const CITATION_TYPE = "citation"; + +const CitationElement: FC> = ({ + attributes, + children, + element, +}) => { + const isUrl = element.kind === "url"; + return ( + + + {isUrl && element.url ? ( + + ) : element.chunkId !== undefined ? ( + + ) : null} + + {children} + + ); +}; + +const CitationPlugin = createPlatePlugin({ + key: CITATION_TYPE, + node: { + isElement: true, + isInline: true, + isVoid: true, + type: CITATION_TYPE, + component: CitationElement, + }, +}); + +/** Plugin kit shape used elsewhere in the editor. */ +export const CitationKit = [CitationPlugin]; + +// --------------------------------------------------------------------------- +// Slate value transform — runs after MarkdownPlugin.deserialize +// --------------------------------------------------------------------------- + +// Structural shapes used by the value transform. We cannot use Plate's +// generic Element / Text type predicates directly because `Descendant` is a +// constrained union and our predicates would over-narrow. Casting through +// these row types keeps the walker readable without fighting the types. +type SlateText = { text: string } & Record; +type SlateElement = { type?: string; children: Descendant[] } & Record; + +function isText(node: Descendant): boolean { + return typeof (node as { text?: unknown }).text === "string"; +} + +function asText(node: Descendant): SlateText { + return node as unknown as SlateText; +} + +function asElement(node: Descendant): SlateElement { + return node as unknown as SlateElement; +} + +/** + * Element types whose subtrees we MUST NOT inject citation void elements + * into. Each rationale documented in the citation plan: + * - `KEYS.codeBlock` / `code_line` — Plate's schema rejects inline elements + * inside code containers; the user expects literal text inside code. + * - `KEYS.link` — ` - + ) : ( - + )} diff --git a/surfsense_web/components/markdown-viewer.tsx b/surfsense_web/components/markdown-viewer.tsx index c4d73e30b..b2420711a 100644 --- a/surfsense_web/components/markdown-viewer.tsx +++ b/surfsense_web/components/markdown-viewer.tsx @@ -3,6 +3,12 @@ import { createMathPlugin } from "@streamdown/math"; import { Streamdown, type StreamdownProps } from "streamdown"; import "katex/dist/katex.min.css"; import Image from "next/image"; +import { useMemo } from "react"; +import { processChildrenWithCitations } from "@/components/citations/citation-renderer"; +import { + type CitationUrlMap, + preprocessCitationMarkdown, +} from "@/lib/citations/citation-parser"; import { cn } from "@/lib/utils"; const code = createCodePlugin({ @@ -21,8 +27,21 @@ interface MarkdownViewerProps { content: string; className?: string; maxLength?: number; + /** + * When true, render `[citation:N]` / `[citation:URL]` tokens as the + * interactive citation badges/popovers used in chat. Default `false` + * so callers that don't need citations are unchanged. + * + * Note: we deliberately do NOT override `` to inject citations into + * link text — that would produce `