From be33b8920e60ada6515c4aaa0b868542b31a18a9 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 23 Jan 2026 19:52:15 +0200 Subject: [PATCH 01/69] add public_share_token column to NewChatThread --- surfsense_backend/app/db.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 771f956b3..1084319cd 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -358,6 +358,14 @@ class NewChatThread(BaseModel, TimestampMixin): index=True, ) + # Public sharing - cryptographic token for public URL access + public_share_token = Column( + String(64), + nullable=True, + unique=True, + index=True, + ) + # Relationships search_space = relationship("SearchSpace", back_populates="new_chat_threads") created_by = relationship("User", back_populates="new_chat_threads") From d16b086adae565cfd2a53684c59ac7b57cbc09cb Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 23 Jan 2026 19:53:02 +0200 Subject: [PATCH 02/69] add public_share_enabled column to NewChatThread --- surfsense_backend/app/db.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 1084319cd..d13e0cbdd 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -365,6 +365,13 @@ class NewChatThread(BaseModel, TimestampMixin): unique=True, index=True, ) + # Whether public sharing is currently enabled for this thread + public_share_enabled = Column( + Boolean, + nullable=False, + default=False, + server_default="false", + ) # Relationships search_space = relationship("SearchSpace", back_populates="new_chat_threads") From f42a11023fbd02fac296809197981efd00f26aa4 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 23 Jan 2026 19:55:53 +0200 Subject: [PATCH 03/69] create migration skeleton for public chat sharing --- .../79_add_public_share_to_chat_threads.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py diff --git a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py new file mode 100644 index 000000000..a9cf085d3 --- /dev/null +++ b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py @@ -0,0 +1,27 @@ +"""Add public sharing columns to new_chat_threads + +Revision ID: 79 +Revises: 78 +Create Date: 2026-01-23 + +Adds public_share_token and public_share_enabled columns to enable +public sharing of chat threads via secure tokenized URLs. +""" + +from collections.abc import Sequence + +# revision identifiers, used by Alembic. +revision: str = "79" +down_revision: str | None = "78" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add public sharing columns to new_chat_threads.""" + pass + + +def downgrade() -> None: + """Remove public sharing columns from new_chat_threads.""" + pass From 87183d1eb7732877dc0afc88d1813a5f8bf2c48b Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 23 Jan 2026 19:57:08 +0200 Subject: [PATCH 04/69] add public_share_token column to migration --- .../versions/79_add_public_share_to_chat_threads.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py index a9cf085d3..70e262b9c 100644 --- a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py +++ b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py @@ -10,6 +10,10 @@ public sharing of chat threads via secure tokenized URLs. from collections.abc import Sequence +import sqlalchemy as sa + +from alembic import op + # revision identifiers, used by Alembic. revision: str = "79" down_revision: str | None = "78" @@ -19,9 +23,13 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: """Add public sharing columns to new_chat_threads.""" - pass + # Add public_share_token column + op.add_column( + "new_chat_threads", + sa.Column("public_share_token", sa.String(64), nullable=True), + ) def downgrade() -> None: """Remove public sharing columns from new_chat_threads.""" - pass + op.drop_column("new_chat_threads", "public_share_token") From b61d96fb85453b7109a52b6b8b5b97cb43b0887b Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 23 Jan 2026 19:57:53 +0200 Subject: [PATCH 05/69] add public_share_enabled column to migration --- .../versions/79_add_public_share_to_chat_threads.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py index 70e262b9c..d82f8f56f 100644 --- a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py +++ b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py @@ -29,7 +29,19 @@ def upgrade() -> None: sa.Column("public_share_token", sa.String(64), nullable=True), ) + # Add public_share_enabled column + op.add_column( + "new_chat_threads", + sa.Column( + "public_share_enabled", + sa.Boolean(), + nullable=False, + server_default="false", + ), + ) + def downgrade() -> None: """Remove public sharing columns from new_chat_threads.""" + op.drop_column("new_chat_threads", "public_share_enabled") op.drop_column("new_chat_threads", "public_share_token") From 036104e4c845b05fb034eb38e932cadbbb4cdd8f Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 23 Jan 2026 19:58:34 +0200 Subject: [PATCH 06/69] add unique index on public_share_token --- .../versions/79_add_public_share_to_chat_threads.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py index d82f8f56f..25195d6ee 100644 --- a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py +++ b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py @@ -40,8 +40,20 @@ def upgrade() -> None: ), ) + # Add unique partial index on public_share_token (only non-null values) + op.execute( + """ + CREATE UNIQUE INDEX ix_new_chat_threads_public_share_token + ON new_chat_threads(public_share_token) + WHERE public_share_token IS NOT NULL + """ + ) + def downgrade() -> None: """Remove public sharing columns from new_chat_threads.""" + op.drop_index( + "ix_new_chat_threads_public_share_token", table_name="new_chat_threads" + ) op.drop_column("new_chat_threads", "public_share_enabled") op.drop_column("new_chat_threads", "public_share_token") From a627cc709e95fb0aa409bdff906930c6204b54d4 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 23 Jan 2026 19:59:12 +0200 Subject: [PATCH 07/69] add index on public_share_enabled --- .../versions/79_add_public_share_to_chat_threads.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py index 25195d6ee..ce02064c0 100644 --- a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py +++ b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py @@ -49,9 +49,21 @@ def upgrade() -> None: """ ) + # Add partial index on public_share_enabled for fast public chat queries + op.execute( + """ + CREATE INDEX ix_new_chat_threads_public_share_enabled + ON new_chat_threads(public_share_enabled) + WHERE public_share_enabled = TRUE + """ + ) + def downgrade() -> None: """Remove public sharing columns from new_chat_threads.""" + op.drop_index( + "ix_new_chat_threads_public_share_enabled", table_name="new_chat_threads" + ) op.drop_index( "ix_new_chat_threads_public_share_token", table_name="new_chat_threads" ) From 68dc7723af527684cc12a0e006c1a2f860f28259 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 23 Jan 2026 20:00:18 +0200 Subject: [PATCH 08/69] make migration idempotent --- .../versions/79_add_public_share_to_chat_threads.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py index ce02064c0..cb7ba555f 100644 --- a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py +++ b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py @@ -43,7 +43,7 @@ def upgrade() -> None: # Add unique partial index on public_share_token (only non-null values) op.execute( """ - CREATE UNIQUE INDEX ix_new_chat_threads_public_share_token + CREATE UNIQUE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_token ON new_chat_threads(public_share_token) WHERE public_share_token IS NOT NULL """ @@ -52,7 +52,7 @@ def upgrade() -> None: # Add partial index on public_share_enabled for fast public chat queries op.execute( """ - CREATE INDEX ix_new_chat_threads_public_share_enabled + CREATE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_enabled ON new_chat_threads(public_share_enabled) WHERE public_share_enabled = TRUE """ @@ -61,11 +61,7 @@ def upgrade() -> None: def downgrade() -> None: """Remove public sharing columns from new_chat_threads.""" - op.drop_index( - "ix_new_chat_threads_public_share_enabled", table_name="new_chat_threads" - ) - op.drop_index( - "ix_new_chat_threads_public_share_token", table_name="new_chat_threads" - ) + op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_enabled") + op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_token") op.drop_column("new_chat_threads", "public_share_enabled") op.drop_column("new_chat_threads", "public_share_token") From 3a8a9734d6eda24c80abc895b80454080f4f7ca3 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 23 Jan 2026 20:02:42 +0200 Subject: [PATCH 09/69] refactor migration to use op.execute pattern --- .../79_add_public_share_to_chat_threads.py | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py index cb7ba555f..e1c21a353 100644 --- a/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py +++ b/surfsense_backend/alembic/versions/79_add_public_share_to_chat_threads.py @@ -10,8 +10,6 @@ public sharing of chat threads via secure tokenized URLs. from collections.abc import Sequence -import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. @@ -24,20 +22,19 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: """Add public sharing columns to new_chat_threads.""" # Add public_share_token column - op.add_column( - "new_chat_threads", - sa.Column("public_share_token", sa.String(64), nullable=True), + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS public_share_token VARCHAR(64); + """ ) - # Add public_share_enabled column - op.add_column( - "new_chat_threads", - sa.Column( - "public_share_enabled", - sa.Boolean(), - nullable=False, - server_default="false", - ), + # Add public_share_enabled column with default false + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS public_share_enabled BOOLEAN NOT NULL DEFAULT FALSE; + """ ) # Add unique partial index on public_share_token (only non-null values) @@ -45,7 +42,7 @@ def upgrade() -> None: """ CREATE UNIQUE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_token ON new_chat_threads(public_share_token) - WHERE public_share_token IS NOT NULL + WHERE public_share_token IS NOT NULL; """ ) @@ -54,7 +51,7 @@ def upgrade() -> None: """ CREATE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_enabled ON new_chat_threads(public_share_enabled) - WHERE public_share_enabled = TRUE + WHERE public_share_enabled = TRUE; """ ) @@ -63,5 +60,7 @@ def downgrade() -> None: """Remove public sharing columns from new_chat_threads.""" op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_enabled") op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_token") - op.drop_column("new_chat_threads", "public_share_enabled") - op.drop_column("new_chat_threads", "public_share_token") + op.execute( + "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_enabled" + ) + op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_token") From 91543f7a7341be1ef8cb05f94c5bf32c253ca6cb Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 12:50:56 +0200 Subject: [PATCH 10/69] add public chat schemas --- surfsense_backend/app/schemas/new_chat.py | 52 +++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index 7a29fc678..5062dd846 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -204,3 +204,55 @@ class RegenerateRequest(BaseModel): attachments: list[ChatAttachment] | None = None mentioned_document_ids: list[int] | None = None mentioned_surfsense_doc_ids: list[int] | None = None + + +# ============================================================================= +# Public Sharing Schemas +# ============================================================================= + + +class PublicShareToggleRequest(BaseModel): + """Request to enable/disable public sharing for a thread.""" + + enabled: bool + + +class PublicShareToggleResponse(BaseModel): + """Response after toggling public sharing.""" + + enabled: bool + public_url: str | None = None + share_token: str | None = None + + +# ============================================================================= +# Public Chat View Schemas (for unauthenticated access) +# ============================================================================= + + +class PublicAuthor(BaseModel): + display_name: str | None = None + avatar_url: str | None = None + + +class PublicChatMessage(BaseModel): + role: NewChatMessageRole + content: Any + author: PublicAuthor | None = None + created_at: datetime + + +class PublicChatThread(BaseModel): + title: str + created_at: datetime + + +class PublicChatResponse(BaseModel): + thread: PublicChatThread + messages: list[PublicChatMessage] + + +class CloneInitiatedResponse(BaseModel): + status: str = "processing" + task_id: str + message: str = "Copying chat to your account..." From 5fae0f5a584a6a947ad9add9aed76ea99c6cee0f Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 13:07:46 +0200 Subject: [PATCH 11/69] add public chat service --- .../app/services/public_chat_service.py | 199 ++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 surfsense_backend/app/services/public_chat_service.py diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py new file mode 100644 index 000000000..af35834b0 --- /dev/null +++ b/surfsense_backend/app/services/public_chat_service.py @@ -0,0 +1,199 @@ +""" +Service layer for public chat sharing and cloning. +""" + +import re +import secrets +from uuid import UUID + +from fastapi import HTTPException +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.db import NewChatThread, User + +UI_TOOLS = { + "display_image", + "link_preview", + "generate_podcast", + "scrape_webpage", + "multi_link_preview", +} + + +def strip_citations(text: str) -> str: + """Remove [citation:X] and [citation:doc-X] patterns from text.""" + text = re.sub(r"\[citation:(doc-)?\d+\]", "", text) + text = re.sub(r"\s+", " ", text) + return text.strip() + + +def sanitize_content_for_public(content: list | str | None) -> list: + """Filter message content for public view.""" + if content is None: + return [] + + if isinstance(content, str): + clean_text = strip_citations(content) + return [{"type": "text", "text": clean_text}] if clean_text else [] + + if not isinstance(content, list): + return [] + + sanitized = [] + for part in content: + if not isinstance(part, dict): + continue + + part_type = part.get("type") + + if part_type == "text": + clean_text = strip_citations(part.get("text", "")) + if clean_text: + sanitized.append({"type": "text", "text": clean_text}) + + elif part_type == "tool-call": + if part.get("toolName") in UI_TOOLS: + sanitized.append(part) + + return sanitized + + +async def get_author_display( + session: AsyncSession, + author_id: UUID | None, + user_cache: dict[UUID, dict], +) -> dict | None: + """Transform author UUID to display info.""" + if author_id is None: + return None + + if author_id not in user_cache: + result = await session.execute(select(User).filter(User.id == author_id)) + user = result.scalars().first() + if user: + user_cache[author_id] = { + "display_name": user.display_name or "User", + "avatar_url": user.avatar_url, + } + else: + user_cache[author_id] = { + "display_name": "Unknown User", + "avatar_url": None, + } + + return user_cache[author_id] + + +async def toggle_public_share( + session: AsyncSession, + thread_id: int, + enabled: bool, + user: User, + base_url: str, +) -> dict: + """ + Enable or disable public sharing for a thread. + + Only the thread owner can toggle public sharing. + When enabling, generates a new token if one doesn't exist. + When disabling, keeps the token for potential re-enable. + """ + result = await session.execute( + select(NewChatThread).filter(NewChatThread.id == thread_id) + ) + thread = result.scalars().first() + + if not thread: + raise HTTPException(status_code=404, detail="Thread not found") + + if thread.created_by_id != user.id: + raise HTTPException( + status_code=403, + detail="Only the creator of this chat can manage public sharing", + ) + + if enabled and not thread.public_share_token: + thread.public_share_token = secrets.token_urlsafe(48) + + thread.public_share_enabled = enabled + + await session.commit() + await session.refresh(thread) + + if enabled: + return { + "enabled": True, + "public_url": f"{base_url}/public/{thread.public_share_token}", + "share_token": thread.public_share_token, + } + + return { + "enabled": False, + "public_url": None, + "share_token": None, + } + + +async def get_public_chat( + session: AsyncSession, + share_token: str, +) -> dict: + """ + Get a public chat by share token. + + Returns sanitized content suitable for public viewing. + """ + result = await session.execute( + select(NewChatThread) + .options(selectinload(NewChatThread.messages)) + .filter( + NewChatThread.public_share_token == share_token, + NewChatThread.public_share_enabled.is_(True), + ) + ) + thread = result.scalars().first() + + if not thread: + raise HTTPException(status_code=404, detail="Not found") + + user_cache: dict[UUID, dict] = {} + + messages = [] + for msg in sorted(thread.messages, key=lambda m: m.created_at): + author = await get_author_display(session, msg.author_id, user_cache) + sanitized_content = sanitize_content_for_public(msg.content) + + messages.append( + { + "role": msg.role, + "content": sanitized_content, + "author": author, + "created_at": msg.created_at, + } + ) + + return { + "thread": { + "title": thread.title, + "created_at": thread.created_at, + }, + "messages": messages, + } + + +async def get_thread_by_share_token( + session: AsyncSession, + share_token: str, +) -> NewChatThread | None: + """Get a thread by its public share token if sharing is enabled.""" + result = await session.execute( + select(NewChatThread) + .options(selectinload(NewChatThread.messages)) + .filter( + NewChatThread.public_share_token == share_token, + NewChatThread.public_share_enabled.is_(True), + ) + ) + return result.scalars().first() From 88a9e623ba5b32ada9b3eccf698a975806f2f74d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 13:18:21 +0200 Subject: [PATCH 12/69] add public chat routes --- surfsense_backend/app/routes/__init__.py | 2 + .../app/routes/public_chat_routes.py | 63 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 surfsense_backend/app/routes/public_chat_routes.py diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index 76bb5101a..81bd887a5 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -30,6 +30,7 @@ from .notes_routes import router as notes_router from .notifications_routes import router as notifications_router from .notion_add_connector_route import router as notion_add_connector_router from .podcasts_routes import router as podcasts_router +from .public_chat_routes import router as public_chat_router from .rbac_routes import router as rbac_router from .search_source_connectors_routes import router as search_source_connectors_router from .search_spaces_routes import router as search_spaces_router @@ -67,3 +68,4 @@ router.include_router(circleback_webhook_router) # Circleback meeting webhooks router.include_router(surfsense_docs_router) # Surfsense documentation for citations router.include_router(notifications_router) # Notifications with Electric SQL sync router.include_router(composio_router) # Composio OAuth and toolkit management +router.include_router(public_chat_router) # Public chat sharing and cloning diff --git a/surfsense_backend/app/routes/public_chat_routes.py b/surfsense_backend/app/routes/public_chat_routes.py new file mode 100644 index 000000000..916a53249 --- /dev/null +++ b/surfsense_backend/app/routes/public_chat_routes.py @@ -0,0 +1,63 @@ +""" +Routes for public chat access (unauthenticated and mixed-auth endpoints). +""" + +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import User, get_async_session +from app.schemas.new_chat import ( + CloneInitiatedResponse, + PublicChatResponse, +) +from app.services.public_chat_service import ( + get_public_chat, + get_thread_by_share_token, +) +from app.users import current_active_user + +router = APIRouter(prefix="/public", tags=["public"]) + + +@router.get("/{share_token}", response_model=PublicChatResponse) +async def read_public_chat( + share_token: str, + session: AsyncSession = Depends(get_async_session), +): + """ + Get a public chat by share token. + + No authentication required. + Returns sanitized content (citations stripped, non-UI tools removed). + """ + return await get_public_chat(session, share_token) + + +@router.post("/{share_token}/clone", response_model=CloneInitiatedResponse) +async def clone_public_chat( + share_token: str, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Clone a public chat to the user's account. + + Requires authentication. + Initiates a background job to copy the chat. + """ + thread = await get_thread_by_share_token(session, share_token) + + if not thread: + raise HTTPException(status_code=404, detail="Not found") + + # TODO: Implement Celery task for cloning + # For now, return a placeholder response + # The actual implementation will: + # 1. Get user's default search space + # 2. Queue Celery task to clone thread, messages, and podcasts + # 3. Create notification on completion + + raise HTTPException( + status_code=501, + detail="Clone functionality not yet implemented", + ) From 90cf6d4b1b6236c76fd424d76b7f89f5413e9a95 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 13:22:47 +0200 Subject: [PATCH 13/69] add public share toggle endpoint --- .../app/routes/new_chat_routes.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 7631ec7eb..a619b8892 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -45,11 +45,14 @@ from app.schemas.new_chat import ( NewChatThreadUpdate, NewChatThreadVisibilityUpdate, NewChatThreadWithMessages, + PublicShareToggleRequest, + PublicShareToggleResponse, RegenerateRequest, ThreadHistoryLoadResponse, ThreadListItem, ThreadListResponse, ) +from app.services.public_chat_service import toggle_public_share from app.tasks.chat.stream_new_chat import stream_new_chat from app.users import current_active_user from app.utils.rbac import check_permission @@ -729,6 +732,32 @@ async def update_thread_visibility( ) from None +@router.patch( + "/threads/{thread_id}/public-share", response_model=PublicShareToggleResponse +) +async def update_thread_public_share( + thread_id: int, + request: Request, + toggle_request: PublicShareToggleRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Enable or disable public sharing for a thread. + + Only the creator of the thread can manage public sharing. + When enabled, returns a public URL that anyone can use to view the chat. + """ + base_url = str(request.base_url).rstrip("/") + return await toggle_public_share( + session=session, + thread_id=thread_id, + enabled=toggle_request.enabled, + user=user, + base_url=base_url, + ) + + # ============================================================================= # Message Endpoints # ============================================================================= From 37526b74a95e843afc3fc5de6fb9541c8d8954e7 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 14:36:52 +0200 Subject: [PATCH 14/69] add public_share_enabled to thread response schemas --- surfsense_backend/app/routes/new_chat_routes.py | 2 ++ surfsense_backend/app/schemas/new_chat.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index a619b8892..4571e9051 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -218,6 +218,7 @@ async def list_threads( visibility=thread.visibility, created_by_id=thread.created_by_id, is_own_thread=is_own_thread, + public_share_enabled=thread.public_share_enabled, created_at=thread.created_at, updated_at=thread.updated_at, ) @@ -319,6 +320,7 @@ async def search_threads( thread.created_by_id == user.id or (thread.created_by_id is None and is_search_space_owner) ), + public_share_enabled=thread.public_share_enabled, created_at=thread.created_at, updated_at=thread.updated_at, ) diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index 5062dd846..ef2868495 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -95,6 +95,7 @@ class NewChatThreadRead(NewChatThreadBase, IDModel): search_space_id: int visibility: ChatVisibility created_by_id: UUID | None = None + public_share_enabled: bool = False created_at: datetime updated_at: datetime @@ -133,7 +134,8 @@ class ThreadListItem(BaseModel): archived: bool visibility: ChatVisibility created_by_id: UUID | None = None - is_own_thread: bool = False # True if the current user created this thread + is_own_thread: bool = False + public_share_enabled: bool = False created_at: datetime = Field(alias="createdAt") updated_at: datetime = Field(alias="updatedAt") From 1ab084aa3136a98b35d3275d63439e31a1f93eda Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 15:03:28 +0200 Subject: [PATCH 15/69] add clone public chat service logic --- .../app/services/public_chat_service.py | 215 ++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index af35834b0..5c2793451 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -197,3 +197,218 @@ async def get_thread_by_share_token( ) ) return result.scalars().first() + + +async def get_user_default_search_space( + session: AsyncSession, + user_id: UUID, +) -> int | None: + """ + Get user's default search space for cloning. + + Returns the first search space where user is owner, or None if not found. + """ + from app.db import SearchSpaceMembership + + result = await session.execute( + select(SearchSpaceMembership) + .filter( + SearchSpaceMembership.user_id == user_id, + SearchSpaceMembership.is_owner.is_(True), + ) + .limit(1) + ) + membership = result.scalars().first() + + if membership: + return membership.search_space_id + + return None + + +async def clone_public_chat( + session: AsyncSession, + share_token: str, + user_id: UUID, +) -> dict: + """ + Clone a public chat to user's account. + + Creates a new private thread with all messages and podcasts. + """ + import copy + + from app.db import ( + ChatVisibility, + NewChatMessage, + ) + + source_thread = await get_thread_by_share_token(session, share_token) + if not source_thread: + await _create_clone_failure_notification( + session, user_id, share_token, "Chat not found or no longer public" + ) + return {"status": "error", "error": "Chat not found or no longer public"} + + try: + target_search_space_id = await get_user_default_search_space(session, user_id) + + if target_search_space_id is None: + await _create_clone_failure_notification( + session, user_id, share_token, "No search space found" + ) + return {"status": "error", "error": "No search space found"} + + new_thread = NewChatThread( + title=source_thread.title, + archived=False, + visibility=ChatVisibility.PRIVATE, + search_space_id=target_search_space_id, + created_by_id=user_id, + public_share_enabled=False, + ) + session.add(new_thread) + await session.flush() + + podcast_id_map: dict[int, int] = {} + + for msg in sorted(source_thread.messages, key=lambda m: m.created_at): + new_content = copy.deepcopy(msg.content) + + if isinstance(new_content, list): + for part in new_content: + if ( + isinstance(part, dict) + and part.get("type") == "tool-call" + and part.get("toolName") == "generate_podcast" + ): + result = part.get("result", {}) + old_podcast_id = result.get("podcast_id") + if old_podcast_id and old_podcast_id not in podcast_id_map: + new_podcast_id = await _clone_podcast( + session, + old_podcast_id, + target_search_space_id, + ) + if new_podcast_id: + podcast_id_map[old_podcast_id] = new_podcast_id + + if old_podcast_id and old_podcast_id in podcast_id_map: + result["podcast_id"] = podcast_id_map[old_podcast_id] + + new_message = NewChatMessage( + thread_id=new_thread.id, + role=msg.role, + content=new_content, + author_id=msg.author_id, + created_at=msg.created_at, + ) + session.add(new_message) + + await session.commit() + + await _create_clone_success_notification( + session, + user_id, + new_thread.id, + target_search_space_id, + source_thread.title, + ) + + return { + "status": "success", + "thread_id": new_thread.id, + "search_space_id": target_search_space_id, + } + + except Exception as e: + await session.rollback() + await _create_clone_failure_notification(session, user_id, share_token, str(e)) + return {"status": "error", "error": str(e)} + + +async def _clone_podcast( + session: AsyncSession, + podcast_id: int, + target_search_space_id: int, +) -> int | None: + """Clone a podcast record and its audio file.""" + import shutil + import uuid + from pathlib import Path + + from app.db import Podcast + + result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) + original = result.scalars().first() + if not original: + return None + + new_file_path = None + if original.file_location: + original_path = Path(original.file_location) + if original_path.exists(): + new_filename = f"{uuid.uuid4()}_podcast.mp3" + new_dir = Path("podcasts") + new_dir.mkdir(parents=True, exist_ok=True) + new_file_path = str(new_dir / new_filename) + shutil.copy2(original.file_location, new_file_path) + + new_podcast = Podcast( + title=original.title, + podcast_transcript=original.podcast_transcript, + file_location=new_file_path, + search_space_id=target_search_space_id, + ) + session.add(new_podcast) + await session.flush() + + return new_podcast.id + + +async def _create_clone_success_notification( + session: AsyncSession, + user_id: UUID, + thread_id: int, + search_space_id: int, + original_title: str, +) -> None: + """Create success notification for clone operation.""" + from app.db import Notification + + notification = Notification( + user_id=user_id, + search_space_id=search_space_id, + type="chat_cloned", + title="Chat copied successfully", + message=f"Your copy of '{original_title}' is ready", + notification_metadata={ + "thread_id": thread_id, + "search_space_id": search_space_id, + }, + ) + session.add(notification) + await session.commit() + + +async def _create_clone_failure_notification( + session: AsyncSession, + user_id: UUID, + share_token: str, + error: str, +) -> None: + """Create failure notification for clone operation.""" + from app.db import Notification + + notification = Notification( + user_id=user_id, + type="chat_clone_failed", + title="Failed to copy chat", + message="Could not copy the chat. Please try again.", + notification_metadata={ + "share_token": share_token, + "error": error, + }, + ) + session.add(notification) + await session.commit() From 272e67566905ef227faaddb863a79c19a917a5eb Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 15:03:35 +0200 Subject: [PATCH 16/69] add clone celery task and update route --- .../app/routes/public_chat_routes.py | 25 +++---- .../tasks/celery_tasks/clone_chat_tasks.py | 66 +++++++++++++++++++ 2 files changed, 79 insertions(+), 12 deletions(-) create mode 100644 surfsense_backend/app/tasks/celery_tasks/clone_chat_tasks.py diff --git a/surfsense_backend/app/routes/public_chat_routes.py b/surfsense_backend/app/routes/public_chat_routes.py index 916a53249..ca70e911a 100644 --- a/surfsense_backend/app/routes/public_chat_routes.py +++ b/surfsense_backend/app/routes/public_chat_routes.py @@ -28,13 +28,13 @@ async def read_public_chat( Get a public chat by share token. No authentication required. - Returns sanitized content (citations stripped, non-UI tools removed). + Returns sanitized content (citations stripped). """ return await get_public_chat(session, share_token) @router.post("/{share_token}/clone", response_model=CloneInitiatedResponse) -async def clone_public_chat( +async def clone_public_chat_endpoint( share_token: str, session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), @@ -45,19 +45,20 @@ async def clone_public_chat( Requires authentication. Initiates a background job to copy the chat. """ + from app.tasks.celery_tasks.clone_chat_tasks import clone_public_chat_task + thread = await get_thread_by_share_token(session, share_token) if not thread: raise HTTPException(status_code=404, detail="Not found") - # TODO: Implement Celery task for cloning - # For now, return a placeholder response - # The actual implementation will: - # 1. Get user's default search space - # 2. Queue Celery task to clone thread, messages, and podcasts - # 3. Create notification on completion - - raise HTTPException( - status_code=501, - detail="Clone functionality not yet implemented", + task_result = clone_public_chat_task.delay( + share_token=share_token, + user_id=str(user.id), + ) + + return CloneInitiatedResponse( + status="processing", + task_id=task_result.id, + message="Copying chat to your account...", ) diff --git a/surfsense_backend/app/tasks/celery_tasks/clone_chat_tasks.py b/surfsense_backend/app/tasks/celery_tasks/clone_chat_tasks.py new file mode 100644 index 000000000..b846ee555 --- /dev/null +++ b/surfsense_backend/app/tasks/celery_tasks/clone_chat_tasks.py @@ -0,0 +1,66 @@ +"""Celery tasks for cloning public chats.""" + +import asyncio +import logging + +from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine +from sqlalchemy.pool import NullPool + +from app.celery_app import celery_app +from app.config import config + +logger = logging.getLogger(__name__) + + +def get_celery_session_maker(): + """Create a new async session maker for Celery tasks.""" + engine = create_async_engine( + config.DATABASE_URL, + poolclass=NullPool, + echo=False, + ) + return async_sessionmaker(engine, expire_on_commit=False) + + +@celery_app.task(name="clone_public_chat", bind=True) +def clone_public_chat_task( + self, + share_token: str, + user_id: str, +) -> dict: + """ + Celery task to clone a public chat to user's account. + + Args: + share_token: Public share token of the chat to clone + user_id: UUID string of the user cloning the chat + + Returns: + dict with status and thread_id on success, or error info on failure + """ + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + try: + result = loop.run_until_complete(_run_clone(share_token, user_id)) + return result + except Exception as e: + logger.error(f"Error cloning public chat: {e!s}") + return {"status": "error", "error": str(e)} + finally: + asyncio.set_event_loop(None) + loop.close() + + +async def _run_clone(share_token: str, user_id: str) -> dict: + """Run the clone operation with a fresh database session.""" + from uuid import UUID + + from app.services.public_chat_service import clone_public_chat + + async with get_celery_session_maker()() as session: + return await clone_public_chat( + session=session, + share_token=share_token, + user_id=UUID(user_id), + ) From 062998738a3c5326d9fe83d7ffd359acd47990d1 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 15:56:15 +0200 Subject: [PATCH 17/69] feat: add thread_id column to Podcast model --- .../versions/80_add_thread_id_to_podcasts.py | 40 +++++++++++++++++++ surfsense_backend/app/db.py | 8 ++++ 2 files changed, 48 insertions(+) create mode 100644 surfsense_backend/alembic/versions/80_add_thread_id_to_podcasts.py diff --git a/surfsense_backend/alembic/versions/80_add_thread_id_to_podcasts.py b/surfsense_backend/alembic/versions/80_add_thread_id_to_podcasts.py new file mode 100644 index 000000000..ea66a09a1 --- /dev/null +++ b/surfsense_backend/alembic/versions/80_add_thread_id_to_podcasts.py @@ -0,0 +1,40 @@ +"""Add thread_id to podcasts + +Revision ID: 80 +Revises: 79 +Create Date: 2026-01-23 + +""" + +from collections.abc import Sequence + +from alembic import op + +revision: str = "80" +down_revision: str | None = "79" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add thread_id column to podcasts.""" + op.execute( + """ + ALTER TABLE podcasts + ADD COLUMN IF NOT EXISTS thread_id INTEGER + REFERENCES new_chat_threads(id) ON DELETE SET NULL; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_podcasts_thread_id + ON podcasts(thread_id); + """ + ) + + +def downgrade() -> None: + """Remove thread_id column from podcasts.""" + op.execute("DROP INDEX IF EXISTS ix_podcasts_thread_id") + op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS thread_id") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index d13e0cbdd..7018e613c 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -693,6 +693,14 @@ class Podcast(BaseModel, TimestampMixin): ) search_space = relationship("SearchSpace", back_populates="podcasts") + thread_id = Column( + Integer, + ForeignKey("new_chat_threads.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + thread = relationship("NewChatThread") + class SearchSpace(BaseModel, TimestampMixin): __tablename__ = "searchspaces" From 7017a14107e8e06f2d16d74470f225cdc3ff3741 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 15:56:34 +0200 Subject: [PATCH 18/69] feat: pass thread_id through podcast generation chain --- surfsense_backend/app/agents/new_chat/chat_deepagent.py | 2 ++ surfsense_backend/app/agents/new_chat/tools/podcast.py | 3 +++ surfsense_backend/app/agents/new_chat/tools/registry.py | 3 ++- surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py | 5 +++++ surfsense_backend/app/tasks/chat/stream_new_chat.py | 1 + 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index 5bc6ac2e2..1a2029c42 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -35,6 +35,7 @@ async def create_surfsense_deep_agent( connector_service: ConnectorService, checkpointer: Checkpointer, user_id: str | None = None, + thread_id: int | None = None, agent_config: AgentConfig | None = None, enabled_tools: list[str] | None = None, disabled_tools: list[str] | None = None, @@ -123,6 +124,7 @@ async def create_surfsense_deep_agent( "connector_service": connector_service, "firecrawl_api_key": firecrawl_api_key, "user_id": user_id, # Required for memory tools + "thread_id": thread_id, # For podcast tool } # Build tools using the async registry (includes MCP tools) diff --git a/surfsense_backend/app/agents/new_chat/tools/podcast.py b/surfsense_backend/app/agents/new_chat/tools/podcast.py index ff567bf73..d4e023f6f 100644 --- a/surfsense_backend/app/agents/new_chat/tools/podcast.py +++ b/surfsense_backend/app/agents/new_chat/tools/podcast.py @@ -69,6 +69,7 @@ def clear_active_podcast_task(search_space_id: int) -> None: def create_generate_podcast_tool( search_space_id: int, db_session: AsyncSession, + thread_id: int | None = None, ): """ Factory function to create the generate_podcast tool with injected dependencies. @@ -76,6 +77,7 @@ def create_generate_podcast_tool( Args: search_space_id: The user's search space ID db_session: Database session (not used - Celery creates its own) + thread_id: The chat thread ID for associating the podcast Returns: A configured tool function for generating podcasts @@ -145,6 +147,7 @@ def create_generate_podcast_tool( search_space_id=search_space_id, podcast_title=podcast_title, user_prompt=user_prompt, + thread_id=thread_id, ) # Mark this task as active diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py index e4ce7a6b7..8eeff18b8 100644 --- a/surfsense_backend/app/agents/new_chat/tools/registry.py +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -102,8 +102,9 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ factory=lambda deps: create_generate_podcast_tool( search_space_id=deps["search_space_id"], db_session=deps["db_session"], + thread_id=deps["thread_id"], ), - requires=["search_space_id", "db_session"], + requires=["search_space_id", "db_session", "thread_id"], ), # Link preview tool - fetches Open Graph metadata for URLs ToolDefinition( diff --git a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py index 34b9b827c..862234b46 100644 --- a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py @@ -67,6 +67,7 @@ def generate_content_podcast_task( search_space_id: int, podcast_title: str = "SurfSense Podcast", user_prompt: str | None = None, + thread_id: int | None = None, ) -> dict: """ Celery task to generate podcast from source content (for new-chat). @@ -78,6 +79,7 @@ def generate_content_podcast_task( search_space_id: ID of the search space podcast_title: Title for the podcast user_prompt: Optional instructions for podcast style/tone + thread_id: Optional ID of the chat thread that generated this podcast Returns: dict with podcast_id on success, or error info on failure @@ -92,6 +94,7 @@ def generate_content_podcast_task( search_space_id, podcast_title, user_prompt, + thread_id, ) ) loop.run_until_complete(loop.shutdown_asyncgens()) @@ -111,6 +114,7 @@ async def _generate_content_podcast( search_space_id: int, podcast_title: str = "SurfSense Podcast", user_prompt: str | None = None, + thread_id: int | None = None, ) -> dict: """Generate content-based podcast with new session.""" async with get_celery_session_maker()() as session: @@ -158,6 +162,7 @@ async def _generate_content_podcast( podcast_transcript=serializable_transcript, file_location=file_path, search_space_id=search_space_id, + thread_id=thread_id, ) session.add(podcast) await session.commit() diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index a49c244eb..af09c4702 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -255,6 +255,7 @@ async def stream_new_chat( connector_service=connector_service, checkpointer=checkpointer, user_id=user_id, # Pass user ID for memory tools + thread_id=chat_id, # Pass chat ID for podcast association agent_config=agent_config, # Pass prompt configuration firecrawl_api_key=firecrawl_api_key, # Pass Firecrawl API key if configured ) From aeb0deb21eab21b62f9d6300b26d7cfa196386f9 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 15:56:49 +0200 Subject: [PATCH 19/69] feat: enable public access for podcasts in shared chats --- .../app/routes/podcasts_routes.py | 41 ++++++++++--------- .../app/services/public_chat_service.py | 27 ++++++++++++ surfsense_backend/app/users.py | 1 + 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/surfsense_backend/app/routes/podcasts_routes.py b/surfsense_backend/app/routes/podcasts_routes.py index ef362edb5..467ef8d23 100644 --- a/surfsense_backend/app/routes/podcasts_routes.py +++ b/surfsense_backend/app/routes/podcasts_routes.py @@ -25,7 +25,7 @@ from app.db import ( get_async_session, ) from app.schemas import PodcastRead -from app.users import current_active_user +from app.users import current_active_user, current_optional_user from app.utils.rbac import check_permission router = APIRouter() @@ -161,46 +161,49 @@ async def delete_podcast( async def stream_podcast( podcast_id: int, session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), + user: User | None = Depends(current_optional_user), ): """ Stream a podcast audio file. - Requires PODCASTS_READ permission for the search space. + + Access is allowed if: + - User is authenticated with PODCASTS_READ permission, OR + - Podcast belongs to a publicly shared thread Note: Both /stream and /audio endpoints are supported for compatibility. """ + from app.services.public_chat_service import is_podcast_publicly_accessible + try: result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) podcast = result.scalars().first() if not podcast: - raise HTTPException( - status_code=404, - detail="Podcast not found", + raise HTTPException(status_code=404, detail="Podcast not found") + + is_public = await is_podcast_publicly_accessible(session, podcast_id) + + if not is_public: + if not user: + raise HTTPException(status_code=401, detail="Authentication required") + + await check_permission( + session, + user, + podcast.search_space_id, + Permission.PODCASTS_READ.value, + "You don't have permission to access podcasts in this search space", ) - # Check permission for the search space - await check_permission( - session, - user, - podcast.search_space_id, - Permission.PODCASTS_READ.value, - "You don't have permission to access podcasts in this search space", - ) - - # Get the file path file_path = podcast.file_location - # Check if the file exists if not file_path or not os.path.isfile(file_path): raise HTTPException(status_code=404, detail="Podcast audio file not found") - # Define a generator function to stream the file def iterfile(): with open(file_path, mode="rb") as file_like: yield from file_like - # Return a streaming response with appropriate headers return StreamingResponse( iterfile(), media_type="audio/mpeg", diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index 5c2793451..08523c1f2 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -289,6 +289,7 @@ async def clone_public_chat( session, old_podcast_id, target_search_space_id, + new_thread.id, ) if new_podcast_id: podcast_id_map[old_podcast_id] = new_podcast_id @@ -331,6 +332,7 @@ async def _clone_podcast( session: AsyncSession, podcast_id: int, target_search_space_id: int, + target_thread_id: int, ) -> int | None: """Clone a podcast record and its audio file.""" import shutil @@ -359,6 +361,7 @@ async def _clone_podcast( podcast_transcript=original.podcast_transcript, file_location=new_file_path, search_space_id=target_search_space_id, + thread_id=target_thread_id, ) session.add(new_podcast) await session.flush() @@ -412,3 +415,27 @@ async def _create_clone_failure_notification( ) session.add(notification) await session.commit() + + +async def is_podcast_publicly_accessible( + session: AsyncSession, + podcast_id: int, +) -> bool: + """ + Check if a podcast belongs to a publicly shared thread. + + Uses the thread_id foreign key for efficient lookup. + """ + from app.db import Podcast + + result = await session.execute( + select(Podcast) + .options(selectinload(Podcast.thread)) + .filter(Podcast.id == podcast_id) + ) + podcast = result.scalars().first() + + if not podcast or not podcast.thread: + return False + + return podcast.thread.public_share_enabled diff --git a/surfsense_backend/app/users.py b/surfsense_backend/app/users.py index e86eb752b..4be2fe525 100644 --- a/surfsense_backend/app/users.py +++ b/surfsense_backend/app/users.py @@ -229,3 +229,4 @@ auth_backend = AuthenticationBackend( fastapi_users = FastAPIUsers[User, uuid.UUID](get_user_manager, [auth_backend]) current_active_user = fastapi_users.current_user(active=True) +current_optional_user = fastapi_users.current_user(active=True, optional=True) From 9d7259aab941c5c8b32334a3aa96732221d04ead Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 16:11:55 +0200 Subject: [PATCH 20/69] feat(web): add public chat and thread API types and services --- .../contracts/types/chat-threads.types.ts | 19 ++++++ .../contracts/types/public-chat.types.ts | 61 +++++++++++++++++++ surfsense_web/lib/apis/base-api.service.ts | 10 ++- .../lib/apis/chat-threads-api.service.ts | 33 ++++++++++ .../lib/apis/public-chat-api.service.ts | 49 +++++++++++++++ 5 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 surfsense_web/contracts/types/chat-threads.types.ts create mode 100644 surfsense_web/contracts/types/public-chat.types.ts create mode 100644 surfsense_web/lib/apis/chat-threads-api.service.ts create mode 100644 surfsense_web/lib/apis/public-chat-api.service.ts diff --git a/surfsense_web/contracts/types/chat-threads.types.ts b/surfsense_web/contracts/types/chat-threads.types.ts new file mode 100644 index 000000000..e5ca183bd --- /dev/null +++ b/surfsense_web/contracts/types/chat-threads.types.ts @@ -0,0 +1,19 @@ +import { z } from "zod"; + +/** + * Toggle public share + */ +export const togglePublicShareRequest = z.object({ + thread_id: z.number(), + enabled: z.boolean(), +}); + +export const togglePublicShareResponse = z.object({ + enabled: z.boolean(), + public_url: z.string().nullable(), + share_token: z.string().nullable(), +}); + +// Type exports +export type TogglePublicShareRequest = z.infer; +export type TogglePublicShareResponse = z.infer; diff --git a/surfsense_web/contracts/types/public-chat.types.ts b/surfsense_web/contracts/types/public-chat.types.ts new file mode 100644 index 000000000..709bedcb7 --- /dev/null +++ b/surfsense_web/contracts/types/public-chat.types.ts @@ -0,0 +1,61 @@ +import { z } from "zod"; + +/** + * Author info for public chat + */ +export const publicAuthor = z.object({ + display_name: z.string().nullable(), + avatar_url: z.string().nullable(), +}); + +/** + * Message in a public chat + */ +export const publicChatMessage = z.object({ + role: z.string(), + content: z.unknown(), + author: publicAuthor.nullable(), + created_at: z.string(), +}); + +/** + * Thread info for public chat + */ +export const publicChatThread = z.object({ + title: z.string(), + created_at: z.string(), +}); + +/** + * Get public chat + */ +export const getPublicChatRequest = z.object({ + share_token: z.string(), +}); + +export const getPublicChatResponse = z.object({ + thread: publicChatThread, + messages: z.array(publicChatMessage), +}); + +/** + * Clone public chat + */ +export const clonePublicChatRequest = z.object({ + share_token: z.string(), +}); + +export const clonePublicChatResponse = z.object({ + status: z.string(), + task_id: z.string(), + message: z.string(), +}); + +// Type exports +export type PublicAuthor = z.infer; +export type PublicChatMessage = z.infer; +export type PublicChatThread = z.infer; +export type GetPublicChatRequest = z.infer; +export type GetPublicChatResponse = z.infer; +export type ClonePublicChatRequest = z.infer; +export type ClonePublicChatResponse = z.infer; diff --git a/surfsense_web/lib/apis/base-api.service.ts b/surfsense_web/lib/apis/base-api.service.ts index dcff4768b..a87d4deaf 100644 --- a/surfsense_web/lib/apis/base-api.service.ts +++ b/surfsense_web/lib/apis/base-api.service.ts @@ -23,7 +23,10 @@ export type RequestOptions = { class BaseApiService { baseUrl: string; - noAuthEndpoints: string[] = ["/auth/jwt/login", "/auth/register", "/auth/refresh"]; // Add more endpoints as needed + noAuthEndpoints: string[] = ["/auth/jwt/login", "/auth/register", "/auth/refresh"]; + + // Prefixes that don't require auth (checked with startsWith) + noAuthPrefixes: string[] = ["/api/v1/public/"]; // Use a getter to always read fresh token from localStorage // This ensures the token is always up-to-date after login/logout @@ -84,7 +87,10 @@ class BaseApiService { } // Validate the bearer token - if (!this.bearerToken && !this.noAuthEndpoints.includes(url)) { + const isNoAuthEndpoint = + this.noAuthEndpoints.includes(url) || + this.noAuthPrefixes.some((prefix) => url.startsWith(prefix)); + if (!this.bearerToken && !isNoAuthEndpoint) { throw new AuthenticationError("You are not authenticated. Please login again."); } diff --git a/surfsense_web/lib/apis/chat-threads-api.service.ts b/surfsense_web/lib/apis/chat-threads-api.service.ts new file mode 100644 index 000000000..9ad241c42 --- /dev/null +++ b/surfsense_web/lib/apis/chat-threads-api.service.ts @@ -0,0 +1,33 @@ +import { + type TogglePublicShareRequest, + type TogglePublicShareResponse, + togglePublicShareRequest, + togglePublicShareResponse, +} from "@/contracts/types/chat-threads.types"; +import { ValidationError } from "../error"; +import { baseApiService } from "./base-api.service"; + +class ChatThreadsApiService { + /** + * Toggle public sharing for a thread. + * Requires authentication. + */ + togglePublicShare = async ( + request: TogglePublicShareRequest + ): Promise => { + const parsed = togglePublicShareRequest.safeParse(request); + + if (!parsed.success) { + const errorMessage = parsed.error.issues.map((issue) => issue.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.patch( + `/api/v1/threads/${parsed.data.thread_id}/public-share`, + togglePublicShareResponse, + { body: { enabled: parsed.data.enabled } } + ); + }; +} + +export const chatThreadsApiService = new ChatThreadsApiService(); diff --git a/surfsense_web/lib/apis/public-chat-api.service.ts b/surfsense_web/lib/apis/public-chat-api.service.ts new file mode 100644 index 000000000..52a7c1363 --- /dev/null +++ b/surfsense_web/lib/apis/public-chat-api.service.ts @@ -0,0 +1,49 @@ +import { + type ClonePublicChatRequest, + type ClonePublicChatResponse, + clonePublicChatRequest, + clonePublicChatResponse, + type GetPublicChatRequest, + type GetPublicChatResponse, + getPublicChatRequest, + getPublicChatResponse, +} from "@/contracts/types/public-chat.types"; +import { ValidationError } from "../error"; +import { baseApiService } from "./base-api.service"; + +class PublicChatApiService { + /** + * Get a public chat by share token. + * No authentication required. + */ + getPublicChat = async (request: GetPublicChatRequest): Promise => { + const parsed = getPublicChatRequest.safeParse(request); + + if (!parsed.success) { + const errorMessage = parsed.error.issues.map((issue) => issue.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.get(`/api/v1/public/${parsed.data.share_token}`, getPublicChatResponse); + }; + + /** + * Clone a public chat to the user's account. + * Requires authentication. + */ + clonePublicChat = async (request: ClonePublicChatRequest): Promise => { + const parsed = clonePublicChatRequest.safeParse(request); + + if (!parsed.success) { + const errorMessage = parsed.error.issues.map((issue) => issue.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.post( + `/api/v1/public/${parsed.data.share_token}/clone`, + clonePublicChatResponse + ); + }; +} + +export const publicChatApiService = new PublicChatApiService(); From 37adc54d6a9ea85010df8a19a703e21126594bdd Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 17:08:26 +0200 Subject: [PATCH 21/69] feat: add public chat frontend --- surfsense_web/app/public/[token]/page.tsx | 11 ++ surfsense_web/components/homepage/navbar.tsx | 8 +- .../public-chat/public-chat-footer.tsx | 56 ++++++ .../public-chat/public-chat-header.tsx | 34 ++++ .../public-chat/public-chat-view.tsx | 58 ++++++ .../components/public-chat/public-thread.tsx | 179 ++++++++++++++++++ .../hooks/use-public-chat-runtime.ts | 53 ++++++ surfsense_web/hooks/use-public-chat.ts | 14 ++ surfsense_web/lib/query-client/cache-keys.ts | 3 + 9 files changed, 415 insertions(+), 1 deletion(-) create mode 100644 surfsense_web/app/public/[token]/page.tsx create mode 100644 surfsense_web/components/public-chat/public-chat-footer.tsx create mode 100644 surfsense_web/components/public-chat/public-chat-header.tsx create mode 100644 surfsense_web/components/public-chat/public-chat-view.tsx create mode 100644 surfsense_web/components/public-chat/public-thread.tsx create mode 100644 surfsense_web/hooks/use-public-chat-runtime.ts create mode 100644 surfsense_web/hooks/use-public-chat.ts diff --git a/surfsense_web/app/public/[token]/page.tsx b/surfsense_web/app/public/[token]/page.tsx new file mode 100644 index 000000000..530664ac6 --- /dev/null +++ b/surfsense_web/app/public/[token]/page.tsx @@ -0,0 +1,11 @@ +"use client"; + +import { useParams } from "next/navigation"; +import { PublicChatView } from "@/components/public-chat/public-chat-view"; + +export default function PublicChatPage() { + const params = useParams(); + const token = params.token as string; + + return ; +} diff --git a/surfsense_web/components/homepage/navbar.tsx b/surfsense_web/components/homepage/navbar.tsx index 2a8820bd6..c83d3556a 100644 --- a/surfsense_web/components/homepage/navbar.tsx +++ b/surfsense_web/components/homepage/navbar.tsx @@ -1,5 +1,11 @@ "use client"; -import { IconBrandDiscord, IconBrandGithub, IconBrandReddit, IconMenu2, IconX } from "@tabler/icons-react"; +import { + IconBrandDiscord, + IconBrandGithub, + IconBrandReddit, + IconMenu2, + IconX, +} from "@tabler/icons-react"; import { AnimatePresence, motion } from "motion/react"; import Link from "next/link"; import { useEffect, useState } from "react"; diff --git a/surfsense_web/components/public-chat/public-chat-footer.tsx b/surfsense_web/components/public-chat/public-chat-footer.tsx new file mode 100644 index 000000000..06e3d9975 --- /dev/null +++ b/surfsense_web/components/public-chat/public-chat-footer.tsx @@ -0,0 +1,56 @@ +"use client"; + +import { Copy, Loader2 } from "lucide-react"; +import { useRouter } from "next/navigation"; +import { useState } from "react"; +import { toast } from "sonner"; +import { Button } from "@/components/ui/button"; +import { publicChatApiService } from "@/lib/apis/public-chat-api.service"; +import { getBearerToken } from "@/lib/auth-utils"; + +interface PublicChatFooterProps { + shareToken: string; +} + +export function PublicChatFooter({ shareToken }: PublicChatFooterProps) { + const router = useRouter(); + const [isCloning, setIsCloning] = useState(false); + + const handleCopyAndContinue = async () => { + const token = getBearerToken(); + + if (!token) { + const returnUrl = encodeURIComponent(`/public/${shareToken}`); + router.push(`/login?returnUrl=${returnUrl}&action=clone`); + return; + } + + setIsCloning(true); + + try { + await publicChatApiService.clonePublicChat({ + share_token: shareToken, + }); + + toast.success("Copying chat to your account...", { + description: "You'll be notified when it's ready.", + }); + + router.push("/dashboard"); + } catch (error) { + const message = error instanceof Error ? error.message : "Failed to copy chat"; + toast.error(message); + } finally { + setIsCloning(false); + } + }; + + return ( +
+ +
+ ); +} diff --git a/surfsense_web/components/public-chat/public-chat-header.tsx b/surfsense_web/components/public-chat/public-chat-header.tsx new file mode 100644 index 000000000..6f6e40a52 --- /dev/null +++ b/surfsense_web/components/public-chat/public-chat-header.tsx @@ -0,0 +1,34 @@ +import { formatDistanceToNow } from "date-fns"; +import Image from "next/image"; +import Link from "next/link"; + +interface PublicChatHeaderProps { + title: string; + createdAt: string; +} + +export function PublicChatHeader({ title, createdAt }: PublicChatHeaderProps) { + const timeAgo = formatDistanceToNow(new Date(createdAt), { addSuffix: true }); + + return ( +
+
+
+ + SurfSense + +
+

{title}

+

{timeAgo}

+
+
+
+
+ ); +} diff --git a/surfsense_web/components/public-chat/public-chat-view.tsx b/surfsense_web/components/public-chat/public-chat-view.tsx new file mode 100644 index 000000000..1b7543712 --- /dev/null +++ b/surfsense_web/components/public-chat/public-chat-view.tsx @@ -0,0 +1,58 @@ +"use client"; + +import { AssistantRuntimeProvider } from "@assistant-ui/react"; +import { Loader2 } from "lucide-react"; +import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; +import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; +import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; +import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage"; +import { usePublicChat } from "@/hooks/use-public-chat"; +import { usePublicChatRuntime } from "@/hooks/use-public-chat-runtime"; +import { PublicChatFooter } from "./public-chat-footer"; +import { PublicChatHeader } from "./public-chat-header"; +import { PublicThread } from "./public-thread"; + +interface PublicChatViewProps { + shareToken: string; +} + +export function PublicChatView({ shareToken }: PublicChatViewProps) { + const { data, isLoading, error } = usePublicChat(shareToken); + const runtime = usePublicChatRuntime({ data }); + + if (isLoading) { + return ( +
+ +
+ ); + } + + if (error || !data) { + return ( +
+

Chat not found

+

+ This chat may have been removed or is no longer public. +

+
+ ); + } + + return ( + + {/* Tool UIs for rendering tool results */} + + + + + +
+ } + footer={} + /> +
+
+ ); +} diff --git a/surfsense_web/components/public-chat/public-thread.tsx b/surfsense_web/components/public-chat/public-thread.tsx new file mode 100644 index 000000000..2fe1ecff6 --- /dev/null +++ b/surfsense_web/components/public-chat/public-thread.tsx @@ -0,0 +1,179 @@ +"use client"; + +import { + ActionBarPrimitive, + AssistantIf, + MessagePrimitive, + ThreadPrimitive, + useAssistantState, +} from "@assistant-ui/react"; +import { CheckIcon, CopyIcon } from "lucide-react"; +import { type FC, type ReactNode, useState } from "react"; +import { MarkdownText } from "@/components/assistant-ui/markdown-text"; +import { ToolFallback } from "@/components/assistant-ui/tool-fallback"; +import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; +import { cn } from "@/lib/utils"; + +interface PublicThreadProps { + header?: ReactNode; + footer?: ReactNode; +} + +/** + * Read-only thread component for public chat viewing. + * No composer, no edit capabilities - just message display. + */ +export const PublicThread: FC = ({ header, footer }) => { + return ( + + + {header} + + + + {/* Spacer to ensure footer doesn't overlap last message */} +
+ + + {footer && ( +
+ {footer} +
+ )} + + ); +}; + +/** + * User avatar component with fallback to initials + */ +interface AuthorMetadata { + displayName: string | null; + avatarUrl: string | null; +} + +const UserAvatar: FC void }> = ({ + displayName, + avatarUrl, + hasError, + onError, +}) => { + const initials = displayName + ? displayName + .split(" ") + .map((n) => n[0]) + .join("") + .toUpperCase() + .slice(0, 2) + : "U"; + + if (avatarUrl && !hasError) { + return ( + {displayName + ); + } + + return ( +
+ {initials} +
+ ); +}; + +const PublicUserMessage: FC = () => { + const metadata = useAssistantState(({ message }) => message?.metadata); + const author = metadata?.custom?.author as AuthorMetadata | undefined; + + return ( + +
+
+
+ +
+
+ {author && ( +
+ +
+ )} +
+
+ ); +}; + +const UserAvatarWithState: FC = ({ displayName, avatarUrl }) => { + const [hasError, setHasError] = useState(false); + return ( + setHasError(true)} + /> + ); +}; + +const PublicAssistantMessage: FC = () => { + return ( + +
+ +
+ +
+ +
+
+ ); +}; + +const PublicAssistantActionBar: FC = () => { + return ( + + + + message.isCopied}> + + + !message.isCopied}> + + + + + + ); +}; diff --git a/surfsense_web/hooks/use-public-chat-runtime.ts b/surfsense_web/hooks/use-public-chat-runtime.ts new file mode 100644 index 000000000..cc7e95fdc --- /dev/null +++ b/surfsense_web/hooks/use-public-chat-runtime.ts @@ -0,0 +1,53 @@ +"use client"; + +import { + type AppendMessage, + type ThreadMessageLike, + useExternalStoreRuntime, +} from "@assistant-ui/react"; +import { useCallback, useMemo } from "react"; +import type { GetPublicChatResponse, PublicChatMessage } from "@/contracts/types/public-chat.types"; + +interface UsePublicChatRuntimeOptions { + data: GetPublicChatResponse | undefined; +} + +/** + * Creates a read-only runtime for public chat viewing. + */ +export function usePublicChatRuntime({ data }: UsePublicChatRuntimeOptions) { + const messages = useMemo(() => data?.messages ?? [], [data?.messages]); + + // No-op - public chat is read-only + const onNew = useCallback(async (_message: AppendMessage) => {}, []); + + // Convert PublicChatMessage to ThreadMessageLike + const convertMessage = useCallback( + (msg: PublicChatMessage, idx: number): ThreadMessageLike => ({ + id: `public-msg-${idx}`, + role: msg.role as "user" | "assistant", + content: msg.content as ThreadMessageLike["content"], + createdAt: new Date(msg.created_at), + metadata: msg.author + ? { + custom: { + author: { + displayName: msg.author.display_name, + avatarUrl: msg.author.avatar_url, + }, + }, + } + : undefined, + }), + [] + ); + + const runtime = useExternalStoreRuntime({ + isRunning: false, + messages, + onNew, + convertMessage, + }); + + return runtime; +} diff --git a/surfsense_web/hooks/use-public-chat.ts b/surfsense_web/hooks/use-public-chat.ts new file mode 100644 index 000000000..83f34712e --- /dev/null +++ b/surfsense_web/hooks/use-public-chat.ts @@ -0,0 +1,14 @@ +import { useQuery } from "@tanstack/react-query"; +import type { GetPublicChatResponse } from "@/contracts/types/public-chat.types"; +import { publicChatApiService } from "@/lib/apis/public-chat-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; + +export function usePublicChat(shareToken: string) { + return useQuery({ + queryKey: cacheKeys.publicChat.byToken(shareToken), + queryFn: () => publicChatApiService.getPublicChat({ share_token: shareToken }), + enabled: !!shareToken, + staleTime: 30_000, + retry: false, + }); +} diff --git a/surfsense_web/lib/query-client/cache-keys.ts b/surfsense_web/lib/query-client/cache-keys.ts index 72f2bbd54..19ddbce7b 100644 --- a/surfsense_web/lib/query-client/cache-keys.ts +++ b/surfsense_web/lib/query-client/cache-keys.ts @@ -75,4 +75,7 @@ export const cacheKeys = { comments: { byMessage: (messageId: number) => ["comments", "message", messageId] as const, }, + publicChat: { + byToken: (shareToken: string) => ["public-chat", shareToken] as const, + }, }; From ee65e1377f3fb9142b9b88291042f2aae096cdba Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 18:39:59 +0200 Subject: [PATCH 22/69] feat: improve public chat UI and shared components --- .vscode/settings.json | 3 +- .../app/services/public_chat_service.py | 15 ++- .../new-chat/[[...chat_id]]/page.tsx | 108 +---------------- .../atoms/chat/chat-thread-mutation.atoms.ts | 28 +++++ .../atoms/chat/current-thread.atom.ts | 4 + .../components/auth/sign-in-button.tsx | 88 ++++++++++++++ surfsense_web/components/homepage/navbar.tsx | 68 +---------- .../components/new-chat/chat-share-button.tsx | 114 ++++++++++++++++-- .../public-chat/public-chat-header.tsx | 34 ------ .../public-chat/public-chat-view.tsx | 52 ++++---- .../components/public-chat/public-thread.tsx | 12 +- .../hooks/use-public-chat-runtime.ts | 42 +++---- surfsense_web/lib/chat/message-utils.ts | 109 +++++++++++++++++ surfsense_web/lib/chat/thread-persistence.ts | 1 + 14 files changed, 403 insertions(+), 275 deletions(-) create mode 100644 surfsense_web/atoms/chat/chat-thread-mutation.atoms.ts create mode 100644 surfsense_web/components/auth/sign-in-button.tsx delete mode 100644 surfsense_web/components/public-chat/public-chat-header.tsx create mode 100644 surfsense_web/lib/chat/message-utils.ts diff --git a/.vscode/settings.json b/.vscode/settings.json index f134660b6..05bd30702 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,4 @@ { - "biome.configurationPath": "./surfsense_web/biome.json" + "biome.configurationPath": "./surfsense_web/biome.json", + "deepscan.ignoreConfirmWarning": true } \ No newline at end of file diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index 08523c1f2..42a26c403 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -23,9 +23,18 @@ UI_TOOLS = { def strip_citations(text: str) -> str: - """Remove [citation:X] and [citation:doc-X] patterns from text.""" - text = re.sub(r"\[citation:(doc-)?\d+\]", "", text) - text = re.sub(r"\s+", " ", text) + """ + Remove [citation:X] and [citation:doc-X] patterns from text. + Preserves newlines to maintain markdown formatting. + """ + # Remove citation patterns (including Chinese brackets 【】) + text = re.sub(r"[\[【]citation:(doc-)?\d+[\]】]", "", text) + # Collapse multiple spaces/tabs (but NOT newlines) into single space + text = re.sub(r"[^\S\n]+", " ", text) + # Normalize excessive blank lines (3+ newlines → 2) + text = re.sub(r"\n{3,}", "\n\n", text) + # Clean up spaces around newlines + text = re.sub(r" *\n *", "\n", text) return text.strip() diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 59e7878c4..2af50f8e2 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -44,6 +44,7 @@ import { looksLikePodcastRequest, setActivePodcastTaskId, } from "@/lib/chat/podcast-state"; +import { convertToThreadMessage } from "@/lib/chat/message-utils"; import { appendMessage, type ChatVisibility, @@ -108,111 +109,6 @@ function extractMentionedDocuments(content: unknown): MentionedDocumentInfo[] { return []; } -/** - * Zod schema for persisted attachment info - */ -const PersistedAttachmentSchema = z.object({ - id: z.string(), - name: z.string(), - type: z.string(), - contentType: z.string().optional(), - imageDataUrl: z.string().optional(), - extractedContent: z.string().optional(), -}); - -const AttachmentsPartSchema = z.object({ - type: z.literal("attachments"), - items: z.array(PersistedAttachmentSchema), -}); - -type PersistedAttachment = z.infer; - -/** - * Extract persisted attachments from message content (type-safe with Zod) - */ -function extractPersistedAttachments(content: unknown): PersistedAttachment[] { - if (!Array.isArray(content)) return []; - - for (const part of content) { - const result = AttachmentsPartSchema.safeParse(part); - if (result.success) { - return result.data.items; - } - } - - return []; -} - -/** - * Convert backend message to assistant-ui ThreadMessageLike format - * Filters out 'thinking-steps' part as it's handled separately via messageThinkingSteps - * Restores attachments for user messages from persisted data - */ -function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { - let content: ThreadMessageLike["content"]; - - if (typeof msg.content === "string") { - content = [{ type: "text", text: msg.content }]; - } else if (Array.isArray(msg.content)) { - // Filter out custom metadata parts - they're handled separately - const filteredContent = msg.content.filter((part: unknown) => { - if (typeof part !== "object" || part === null || !("type" in part)) return true; - const partType = (part as { type: string }).type; - // Filter out thinking-steps, mentioned-documents, and attachments - return ( - partType !== "thinking-steps" && - partType !== "mentioned-documents" && - partType !== "attachments" - ); - }); - content = - filteredContent.length > 0 - ? (filteredContent as ThreadMessageLike["content"]) - : [{ type: "text", text: "" }]; - } else { - content = [{ type: "text", text: String(msg.content) }]; - } - - // Restore attachments for user messages - let attachments: ThreadMessageLike["attachments"]; - if (msg.role === "user") { - const persistedAttachments = extractPersistedAttachments(msg.content); - if (persistedAttachments.length > 0) { - attachments = persistedAttachments.map((att) => ({ - id: att.id, - name: att.name, - type: att.type as "document" | "image" | "file", - contentType: att.contentType || "application/octet-stream", - status: { type: "complete" as const }, - content: [], - // Custom fields for our ChatAttachment interface - imageDataUrl: att.imageDataUrl, - extractedContent: att.extractedContent, - })); - } - } - - // Build metadata.custom for author display in shared chats - const metadata = msg.author_id - ? { - custom: { - author: { - displayName: msg.author_display_name ?? null, - avatarUrl: msg.author_avatar_url ?? null, - }, - }, - } - : undefined; - - return { - id: `msg-${msg.id}`, - role: msg.role, - content, - createdAt: new Date(msg.created_at), - attachments, - metadata, - }; -} /** * Tools that should render custom UI in the chat. @@ -458,6 +354,8 @@ export default function NewChatPage() { visibility: currentThread?.visibility ?? null, hasComments: currentThread?.has_comments ?? false, addingCommentToMessageId: null, + publicShareEnabled: currentThread?.public_share_enabled ?? false, + publicShareToken: null, }); }, [currentThread, setCurrentThreadState]); diff --git a/surfsense_web/atoms/chat/chat-thread-mutation.atoms.ts b/surfsense_web/atoms/chat/chat-thread-mutation.atoms.ts new file mode 100644 index 000000000..a844a45fb --- /dev/null +++ b/surfsense_web/atoms/chat/chat-thread-mutation.atoms.ts @@ -0,0 +1,28 @@ +import { atomWithMutation } from "jotai-tanstack-query"; +import { toast } from "sonner"; +import type { + TogglePublicShareRequest, + TogglePublicShareResponse, +} from "@/contracts/types/chat-threads.types"; +import { chatThreadsApiService } from "@/lib/apis/chat-threads-api.service"; + +export const togglePublicShareMutationAtom = atomWithMutation(() => ({ + mutationFn: async (request: TogglePublicShareRequest) => { + return chatThreadsApiService.togglePublicShare(request); + }, + onSuccess: (response: TogglePublicShareResponse) => { + if (response.enabled && response.share_token) { + const publicUrl = `${window.location.origin}/public/${response.share_token}`; + navigator.clipboard.writeText(publicUrl); + toast.success("Public link copied to clipboard", { + description: "Anyone with this link can view the chat", + }); + } else { + toast.success("Public sharing disabled"); + } + }, + onError: (error: Error) => { + console.error("Failed to toggle public share:", error); + toast.error("Failed to update public sharing"); + }, +})); diff --git a/surfsense_web/atoms/chat/current-thread.atom.ts b/surfsense_web/atoms/chat/current-thread.atom.ts index c19b2638c..7d6ccb0db 100644 --- a/surfsense_web/atoms/chat/current-thread.atom.ts +++ b/surfsense_web/atoms/chat/current-thread.atom.ts @@ -17,6 +17,8 @@ interface CurrentThreadState { visibility: ChatVisibility | null; hasComments: boolean; addingCommentToMessageId: number | null; + publicShareEnabled: boolean; + publicShareToken: string | null; } const initialState: CurrentThreadState = { @@ -24,6 +26,8 @@ const initialState: CurrentThreadState = { visibility: null, hasComments: false, addingCommentToMessageId: null, + publicShareEnabled: false, + publicShareToken: null, }; export const currentThreadAtom = atom(initialState); diff --git a/surfsense_web/components/auth/sign-in-button.tsx b/surfsense_web/components/auth/sign-in-button.tsx new file mode 100644 index 000000000..f7270df9a --- /dev/null +++ b/surfsense_web/components/auth/sign-in-button.tsx @@ -0,0 +1,88 @@ +"use client"; + +import { motion } from "motion/react"; +import Link from "next/link"; +import { AUTH_TYPE, BACKEND_URL } from "@/lib/env-config"; +import { trackLoginAttempt } from "@/lib/posthog/events"; +import { cn } from "@/lib/utils"; + +// Official Google "G" logo with brand colors +const GoogleLogo = ({ className }: { className?: string }) => ( + + + + + + +); + +interface SignInButtonProps { + /** + * - "desktop": Hidden on mobile, visible on md+ (for navbar with separate mobile menu) + * - "mobile": Full width, always visible (for mobile menu) + * - "compact": Always visible, compact size (for headers) + */ + variant?: "desktop" | "mobile" | "compact"; +} + +export const SignInButton = ({ variant = "desktop" }: SignInButtonProps) => { + const isGoogleAuth = AUTH_TYPE === "GOOGLE"; + + const handleGoogleLogin = () => { + trackLoginAttempt("google"); + window.location.href = `${BACKEND_URL}/auth/google/authorize-redirect`; + }; + + const getClassName = () => { + if (variant === "desktop") { + return isGoogleAuth + ? "hidden rounded-full bg-white px-5 py-2 text-sm text-neutral-700 shadow-md ring-1 ring-neutral-200/50 hover:shadow-lg md:flex dark:bg-neutral-900 dark:text-neutral-200 dark:ring-neutral-700/50" + : "hidden rounded-full bg-black px-8 py-2 text-sm font-bold text-white shadow-[0px_-2px_0px_0px_rgba(255,255,255,0.4)_inset] md:block dark:bg-white dark:text-black"; + } + if (variant === "compact") { + return isGoogleAuth + ? "rounded-full bg-white px-4 py-1.5 text-sm text-neutral-700 shadow-md ring-1 ring-neutral-200/50 hover:shadow-lg dark:bg-neutral-900 dark:text-neutral-200 dark:ring-neutral-700/50" + : "rounded-full bg-black px-6 py-1.5 text-sm font-bold text-white shadow-[0px_-2px_0px_0px_rgba(255,255,255,0.4)_inset] dark:bg-white dark:text-black"; + } + // mobile + return isGoogleAuth + ? "w-full rounded-lg bg-white px-8 py-2.5 text-neutral-700 shadow-md ring-1 ring-neutral-200/50 dark:bg-neutral-900 dark:text-neutral-200 dark:ring-neutral-700/50 touch-manipulation" + : "w-full rounded-lg bg-black px-8 py-2 font-medium text-white shadow-[0px_-2px_0px_0px_rgba(255,255,255,0.4)_inset] dark:bg-white dark:text-black text-center touch-manipulation"; + }; + + if (isGoogleAuth) { + return ( + + + Sign In + + ); + } + + return ( + + Sign In + + ); +}; diff --git a/surfsense_web/components/homepage/navbar.tsx b/surfsense_web/components/homepage/navbar.tsx index c83d3556a..4c66ac759 100644 --- a/surfsense_web/components/homepage/navbar.tsx +++ b/surfsense_web/components/homepage/navbar.tsx @@ -9,78 +9,12 @@ import { import { AnimatePresence, motion } from "motion/react"; import Link from "next/link"; import { useEffect, useState } from "react"; +import { SignInButton } from "@/components/auth/sign-in-button"; import { Logo } from "@/components/Logo"; import { ThemeTogglerComponent } from "@/components/theme/theme-toggle"; import { useGithubStars } from "@/hooks/use-github-stars"; -import { AUTH_TYPE, BACKEND_URL } from "@/lib/env-config"; -import { trackLoginAttempt } from "@/lib/posthog/events"; import { cn } from "@/lib/utils"; -// Official Google "G" logo with brand colors -const GoogleLogo = ({ className }: { className?: string }) => ( - - - - - - -); - -// Sign in button component that handles both Google OAuth and local auth -const SignInButton = ({ variant = "desktop" }: { variant?: "desktop" | "mobile" }) => { - const isGoogleAuth = AUTH_TYPE === "GOOGLE"; - - const handleGoogleLogin = () => { - trackLoginAttempt("google"); - window.location.href = `${BACKEND_URL}/auth/google/authorize-redirect`; - }; - - if (isGoogleAuth) { - return ( - - - Sign In - - ); - } - - return ( - - Sign In - - ); -}; - export const Navbar = () => { const [isScrolled, setIsScrolled] = useState(false); diff --git a/surfsense_web/components/new-chat/chat-share-button.tsx b/surfsense_web/components/new-chat/chat-share-button.tsx index fcace2572..4e811779f 100644 --- a/surfsense_web/components/new-chat/chat-share-button.tsx +++ b/surfsense_web/components/new-chat/chat-share-button.tsx @@ -2,18 +2,15 @@ import { useQueryClient } from "@tanstack/react-query"; import { useAtomValue, useSetAtom } from "jotai"; -import { User, Users } from "lucide-react"; +import { Globe, Link2, User, Users } from "lucide-react"; import { useCallback, useState } from "react"; import { toast } from "sonner"; +import { togglePublicShareMutationAtom } from "@/atoms/chat/chat-thread-mutation.atoms"; import { currentThreadAtom, setThreadVisibilityAtom } from "@/atoms/chat/current-thread.atom"; import { Button } from "@/components/ui/button"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; -import { - type ChatVisibility, - type ThreadRecord, - updateThreadVisibility, -} from "@/lib/chat/thread-persistence"; +import { type ChatVisibility, type ThreadRecord, updateThreadVisibility } from "@/lib/chat/thread-persistence"; import { cn } from "@/lib/utils"; interface ChatShareButtonProps { @@ -48,11 +45,19 @@ export function ChatShareButton({ thread, onVisibilityChange, className }: ChatS // Use Jotai atom for visibility (single source of truth) const currentThreadState = useAtomValue(currentThreadAtom); + const setCurrentThreadState = useSetAtom(currentThreadAtom); const setThreadVisibility = useSetAtom(setThreadVisibilityAtom); + // Public share mutation + const { mutateAsync: togglePublicShare, isPending: isTogglingPublic } = useAtomValue( + togglePublicShareMutationAtom + ); + // Use Jotai visibility if available (synced from chat page), otherwise fall back to thread prop const currentVisibility = currentThreadState.visibility ?? thread?.visibility ?? "PRIVATE"; - const isOwnThread = thread?.created_by_id !== null; // If we have the thread, we can modify it + const isPublicEnabled = + currentThreadState.publicShareEnabled ?? thread?.public_share_enabled ?? false; + const publicShareToken = currentThreadState.publicShareToken ?? null; const handleVisibilityChange = useCallback( async (newVisibility: ChatVisibility) => { @@ -87,12 +92,41 @@ export function ChatShareButton({ thread, onVisibilityChange, className }: ChatS [thread, currentVisibility, onVisibilityChange, queryClient, setThreadVisibility] ); + const handlePublicShareToggle = useCallback(async () => { + if (!thread) return; + + try { + const response = await togglePublicShare({ + thread_id: thread.id, + enabled: !isPublicEnabled, + }); + + // Update atom state with response + setCurrentThreadState((prev) => ({ + ...prev, + publicShareEnabled: response.enabled, + publicShareToken: response.share_token, + })); + } catch(error) { + console.error("Failed to toggle public share:", error); + } + }, [thread, isPublicEnabled, togglePublicShare, setCurrentThreadState]); + + const handleCopyPublicLink = useCallback(async () => { + if (!publicShareToken) return; + + const publicUrl = `${window.location.origin}/public/${publicShareToken}`; + await navigator.clipboard.writeText(publicUrl); + toast.success("Public link copied to clipboard"); + }, [publicShareToken]); + // Don't show if no thread (new chat that hasn't been created yet) if (!thread) { return null; } - const CurrentIcon = currentVisibility === "PRIVATE" ? User : Users; + const CurrentIcon = isPublicEnabled ? Globe : currentVisibility === "PRIVATE" ? User : Users; + const buttonLabel = isPublicEnabled ? "Public" : currentVisibility === "PRIVATE" ? "Private" : "Shared"; return ( @@ -108,9 +142,7 @@ export function ChatShareButton({ thread, onVisibilityChange, className }: ChatS )} > - - {currentVisibility === "PRIVATE" ? "Private" : "Shared"} - + {buttonLabel} @@ -124,6 +156,7 @@ export function ChatShareButton({ thread, onVisibilityChange, className }: ChatS onCloseAutoFocus={(e) => e.preventDefault()} >
+ {/* Visibility Options */} {visibilityOptions.map((option) => { const isSelected = currentVisibility === option.value; const Icon = option.icon; @@ -166,6 +199,65 @@ export function ChatShareButton({ thread, onVisibilityChange, className }: ChatS ); })} + + {/* Divider */} +
+ + {/* Public Share Option */} + + )} +
diff --git a/surfsense_web/components/public-chat/public-chat-header.tsx b/surfsense_web/components/public-chat/public-chat-header.tsx deleted file mode 100644 index 6f6e40a52..000000000 --- a/surfsense_web/components/public-chat/public-chat-header.tsx +++ /dev/null @@ -1,34 +0,0 @@ -import { formatDistanceToNow } from "date-fns"; -import Image from "next/image"; -import Link from "next/link"; - -interface PublicChatHeaderProps { - title: string; - createdAt: string; -} - -export function PublicChatHeader({ title, createdAt }: PublicChatHeaderProps) { - const timeAgo = formatDistanceToNow(new Date(createdAt), { addSuffix: true }); - - return ( -
-
-
- - SurfSense - -
-

{title}

-

{timeAgo}

-
-
-
-
- ); -} diff --git a/surfsense_web/components/public-chat/public-chat-view.tsx b/surfsense_web/components/public-chat/public-chat-view.tsx index 1b7543712..8b21fede1 100644 --- a/surfsense_web/components/public-chat/public-chat-view.tsx +++ b/surfsense_web/components/public-chat/public-chat-view.tsx @@ -2,6 +2,7 @@ import { AssistantRuntimeProvider } from "@assistant-ui/react"; import { Loader2 } from "lucide-react"; +import { Navbar } from "@/components/homepage/navbar"; import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; @@ -9,7 +10,6 @@ import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage"; import { usePublicChat } from "@/hooks/use-public-chat"; import { usePublicChatRuntime } from "@/hooks/use-public-chat-runtime"; import { PublicChatFooter } from "./public-chat-footer"; -import { PublicChatHeader } from "./public-chat-header"; import { PublicThread } from "./public-thread"; interface PublicChatViewProps { @@ -22,37 +22,43 @@ export function PublicChatView({ shareToken }: PublicChatViewProps) { if (isLoading) { return ( -
- -
+
+ +
+ +
+
); } if (error || !data) { return ( -
-

Chat not found

-

- This chat may have been removed or is no longer public. -

-
+
+ +
+

Chat not found

+

+ This chat may have been removed or is no longer public. +

+
+
); } return ( - - {/* Tool UIs for rendering tool results */} - - - - +
+ + + {/* Tool UIs for rendering tool results */} + + + + -
- } - footer={} - /> -
-
+
+ } /> +
+ +
); } diff --git a/surfsense_web/components/public-chat/public-thread.tsx b/surfsense_web/components/public-chat/public-thread.tsx index 2fe1ecff6..e88e5aae7 100644 --- a/surfsense_web/components/public-chat/public-thread.tsx +++ b/surfsense_web/components/public-chat/public-thread.tsx @@ -12,10 +12,8 @@ import { type FC, type ReactNode, useState } from "react"; import { MarkdownText } from "@/components/assistant-ui/markdown-text"; import { ToolFallback } from "@/components/assistant-ui/tool-fallback"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; -import { cn } from "@/lib/utils"; interface PublicThreadProps { - header?: ReactNode; footer?: ReactNode; } @@ -23,7 +21,7 @@ interface PublicThreadProps { * Read-only thread component for public chat viewing. * No composer, no edit capabilities - just message display. */ -export const PublicThread: FC = ({ header, footer }) => { +export const PublicThread: FC = ({ footer }) => { return ( = ({ header, footer }) => { }} > - {header} - { return ( diff --git a/surfsense_web/hooks/use-public-chat-runtime.ts b/surfsense_web/hooks/use-public-chat-runtime.ts index cc7e95fdc..2e79e0e1b 100644 --- a/surfsense_web/hooks/use-public-chat-runtime.ts +++ b/surfsense_web/hooks/use-public-chat-runtime.ts @@ -1,17 +1,31 @@ "use client"; -import { - type AppendMessage, - type ThreadMessageLike, - useExternalStoreRuntime, -} from "@assistant-ui/react"; +import { type AppendMessage, useExternalStoreRuntime } from "@assistant-ui/react"; import { useCallback, useMemo } from "react"; import type { GetPublicChatResponse, PublicChatMessage } from "@/contracts/types/public-chat.types"; +import { convertToThreadMessage } from "@/lib/chat/message-utils"; +import type { MessageRecord } from "@/lib/chat/thread-persistence"; interface UsePublicChatRuntimeOptions { data: GetPublicChatResponse | undefined; } +/** + * Map PublicChatMessage to MessageRecord shape for reuse of convertToThreadMessage + */ +function toMessageRecord(msg: PublicChatMessage, idx: number): MessageRecord { + return { + id: idx, + thread_id: 0, + role: msg.role as "user" | "assistant" | "system", + content: msg.content, + created_at: msg.created_at, + author_id: msg.author ? "public" : null, + author_display_name: msg.author?.display_name ?? null, + author_avatar_url: msg.author?.avatar_url ?? null, + }; +} + /** * Creates a read-only runtime for public chat viewing. */ @@ -21,24 +35,8 @@ export function usePublicChatRuntime({ data }: UsePublicChatRuntimeOptions) { // No-op - public chat is read-only const onNew = useCallback(async (_message: AppendMessage) => {}, []); - // Convert PublicChatMessage to ThreadMessageLike const convertMessage = useCallback( - (msg: PublicChatMessage, idx: number): ThreadMessageLike => ({ - id: `public-msg-${idx}`, - role: msg.role as "user" | "assistant", - content: msg.content as ThreadMessageLike["content"], - createdAt: new Date(msg.created_at), - metadata: msg.author - ? { - custom: { - author: { - displayName: msg.author.display_name, - avatarUrl: msg.author.avatar_url, - }, - }, - } - : undefined, - }), + (msg: PublicChatMessage, idx: number) => convertToThreadMessage(toMessageRecord(msg, idx)), [] ); diff --git a/surfsense_web/lib/chat/message-utils.ts b/surfsense_web/lib/chat/message-utils.ts new file mode 100644 index 000000000..868ed28eb --- /dev/null +++ b/surfsense_web/lib/chat/message-utils.ts @@ -0,0 +1,109 @@ +import type { ThreadMessageLike } from "@assistant-ui/react"; +import { z } from "zod"; +import type { MessageRecord } from "./thread-persistence"; + +/** + * Zod schema for persisted attachment info + */ +const PersistedAttachmentSchema = z.object({ + id: z.string(), + name: z.string(), + type: z.string(), + contentType: z.string().optional(), + imageDataUrl: z.string().optional(), + extractedContent: z.string().optional(), +}); + +const AttachmentsPartSchema = z.object({ + type: z.literal("attachments"), + items: z.array(PersistedAttachmentSchema), +}); + +type PersistedAttachment = z.infer; + +/** + * Extract persisted attachments from message content (type-safe with Zod) + */ +function extractPersistedAttachments(content: unknown): PersistedAttachment[] { + if (!Array.isArray(content)) return []; + + for (const part of content) { + const result = AttachmentsPartSchema.safeParse(part); + if (result.success) { + return result.data.items; + } + } + + return []; +} + +/** + * Convert backend message to assistant-ui ThreadMessageLike format + * Filters out 'thinking-steps' part as it's handled separately via messageThinkingSteps + * Restores attachments for user messages from persisted data + */ +export function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { + let content: ThreadMessageLike["content"]; + + if (typeof msg.content === "string") { + content = [{ type: "text", text: msg.content }]; + } else if (Array.isArray(msg.content)) { + // Filter out custom metadata parts - they're handled separately + const filteredContent = msg.content.filter((part: unknown) => { + if (typeof part !== "object" || part === null || !("type" in part)) return true; + const partType = (part as { type: string }).type; + // Filter out thinking-steps, mentioned-documents, and attachments + return ( + partType !== "thinking-steps" && + partType !== "mentioned-documents" && + partType !== "attachments" + ); + }); + content = + filteredContent.length > 0 + ? (filteredContent as ThreadMessageLike["content"]) + : [{ type: "text", text: "" }]; + } else { + content = [{ type: "text", text: String(msg.content) }]; + } + + // Restore attachments for user messages + let attachments: ThreadMessageLike["attachments"]; + if (msg.role === "user") { + const persistedAttachments = extractPersistedAttachments(msg.content); + if (persistedAttachments.length > 0) { + attachments = persistedAttachments.map((att) => ({ + id: att.id, + name: att.name, + type: att.type as "document" | "image" | "file", + contentType: att.contentType || "application/octet-stream", + status: { type: "complete" as const }, + content: [], + // Custom fields for our ChatAttachment interface + imageDataUrl: att.imageDataUrl, + extractedContent: att.extractedContent, + })); + } + } + + // Build metadata.custom for author display in shared chats + const metadata = msg.author_id + ? { + custom: { + author: { + displayName: msg.author_display_name ?? null, + avatarUrl: msg.author_avatar_url ?? null, + }, + }, + } + : undefined; + + return { + id: `msg-${msg.id}`, + role: msg.role, + content, + createdAt: new Date(msg.created_at), + attachments, + metadata, + }; +} diff --git a/surfsense_web/lib/chat/thread-persistence.ts b/surfsense_web/lib/chat/thread-persistence.ts index 08c08ba78..6990ff582 100644 --- a/surfsense_web/lib/chat/thread-persistence.ts +++ b/surfsense_web/lib/chat/thread-persistence.ts @@ -24,6 +24,7 @@ export interface ThreadRecord { created_at: string; updated_at: string; has_comments?: boolean; + public_share_enabled?: boolean; } export interface MessageRecord { From 271de96cce05d94b6c9ab082ef3c87735b008c1d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 20:10:03 +0200 Subject: [PATCH 23/69] fix: public chat copy link button and podcast access --- .../app/routes/podcasts_routes.py | 29 ++++++++++++------- surfsense_backend/app/schemas/new_chat.py | 1 + .../app/services/public_chat_service.py | 17 ++++++++--- .../new-chat/[[...chat_id]]/page.tsx | 2 +- .../components/new-chat/chat-share-button.tsx | 15 +++++++--- surfsense_web/lib/apis/base-api.service.ts | 2 +- surfsense_web/lib/chat/thread-persistence.ts | 1 + 7 files changed, 47 insertions(+), 20 deletions(-) diff --git a/surfsense_backend/app/routes/podcasts_routes.py b/surfsense_backend/app/routes/podcasts_routes.py index 467ef8d23..27970b707 100644 --- a/surfsense_backend/app/routes/podcasts_routes.py +++ b/surfsense_backend/app/routes/podcasts_routes.py @@ -84,12 +84,17 @@ async def read_podcasts( async def read_podcast( podcast_id: int, session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), + user: User | None = Depends(current_optional_user), ): """ Get a specific podcast by ID. - Requires PODCASTS_READ permission for the search space. + + Access is allowed if: + - User is authenticated with PODCASTS_READ permission, OR + - Podcast belongs to a publicly shared thread """ + from app.services.public_chat_service import is_podcast_publicly_accessible + try: result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) podcast = result.scalars().first() @@ -100,14 +105,18 @@ async def read_podcast( detail="Podcast not found", ) - # Check permission for the search space - await check_permission( - session, - user, - podcast.search_space_id, - Permission.PODCASTS_READ.value, - "You don't have permission to read podcasts in this search space", - ) + is_public = await is_podcast_publicly_accessible(session, podcast_id) + + if not is_public: + if not user: + raise HTTPException(status_code=401, detail="Authentication required") + await check_permission( + session, + user, + podcast.search_space_id, + Permission.PODCASTS_READ.value, + "You don't have permission to read podcasts in this search space", + ) return podcast except HTTPException as he: diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index ef2868495..5e9d44beb 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -96,6 +96,7 @@ class NewChatThreadRead(NewChatThreadBase, IDModel): visibility: ChatVisibility created_by_id: UUID | None = None public_share_enabled: bool = False + public_share_token: str | None = None created_at: datetime updated_at: datetime diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index 42a26c403..62fd4f923 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -27,8 +27,8 @@ def strip_citations(text: str) -> str: Remove [citation:X] and [citation:doc-X] patterns from text. Preserves newlines to maintain markdown formatting. """ - # Remove citation patterns (including Chinese brackets 【】) - text = re.sub(r"[\[【]citation:(doc-)?\d+[\]】]", "", text) + # Remove citation patterns + text = re.sub(r"[\[【]\u200B?citation:(doc-)?\d+\u200B?[\]】]", "", text) # Collapse multiple spaces/tabs (but NOT newlines) into single space text = re.sub(r"[^\S\n]+", " ", text) # Normalize excessive blank lines (3+ newlines → 2) @@ -63,8 +63,17 @@ def sanitize_content_for_public(content: list | str | None) -> list: sanitized.append({"type": "text", "text": clean_text}) elif part_type == "tool-call": - if part.get("toolName") in UI_TOOLS: - sanitized.append(part) + tool_name = part.get("toolName") + if tool_name not in UI_TOOLS: + continue + + # Skip podcasts that are still processing (would cause auth errors) + if tool_name == "generate_podcast": + result = part.get("result", {}) + if result.get("status") in ("processing", "already_generating"): + continue + + sanitized.append(part) return sanitized diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 2af50f8e2..9b45d4d62 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -355,7 +355,7 @@ export default function NewChatPage() { hasComments: currentThread?.has_comments ?? false, addingCommentToMessageId: null, publicShareEnabled: currentThread?.public_share_enabled ?? false, - publicShareToken: null, + publicShareToken: currentThread?.public_share_token ?? null, }); }, [currentThread, setCurrentThreadState]); diff --git a/surfsense_web/components/new-chat/chat-share-button.tsx b/surfsense_web/components/new-chat/chat-share-button.tsx index 4e811779f..2df363203 100644 --- a/surfsense_web/components/new-chat/chat-share-button.tsx +++ b/surfsense_web/components/new-chat/chat-share-button.tsx @@ -245,17 +245,24 @@ export function ChatShareButton({ thread, onVisibilityChange, className }: ChatS

{isPublicEnabled && publicShareToken && ( - +
)} diff --git a/surfsense_web/lib/apis/base-api.service.ts b/surfsense_web/lib/apis/base-api.service.ts index a87d4deaf..b14818ac1 100644 --- a/surfsense_web/lib/apis/base-api.service.ts +++ b/surfsense_web/lib/apis/base-api.service.ts @@ -26,7 +26,7 @@ class BaseApiService { noAuthEndpoints: string[] = ["/auth/jwt/login", "/auth/register", "/auth/refresh"]; // Prefixes that don't require auth (checked with startsWith) - noAuthPrefixes: string[] = ["/api/v1/public/"]; + noAuthPrefixes: string[] = ["/api/v1/public/", "/api/v1/podcasts/"]; // Use a getter to always read fresh token from localStorage // This ensures the token is always up-to-date after login/logout diff --git a/surfsense_web/lib/chat/thread-persistence.ts b/surfsense_web/lib/chat/thread-persistence.ts index 6990ff582..2188d9cec 100644 --- a/surfsense_web/lib/chat/thread-persistence.ts +++ b/surfsense_web/lib/chat/thread-persistence.ts @@ -25,6 +25,7 @@ export interface ThreadRecord { updated_at: string; has_comments?: boolean; public_share_enabled?: boolean; + public_share_token?: string | null; } export interface MessageRecord { From 1c98ba989daa785a0059bee74101c2fef2de2a37 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 26 Jan 2026 21:01:36 +0200 Subject: [PATCH 24/69] fix: register clone task and sanitize cloned content --- surfsense_backend/app/celery_app.py | 1 + surfsense_backend/app/services/public_chat_service.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index f7bea8cc3..b4869d23f 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -65,6 +65,7 @@ celery_app = Celery( "app.tasks.celery_tasks.schedule_checker_task", "app.tasks.celery_tasks.blocknote_migration_tasks", "app.tasks.celery_tasks.document_reindex_tasks", + "app.tasks.celery_tasks.clone_chat_tasks", ], ) diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index 62fd4f923..85f1a9572 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -253,6 +253,7 @@ async def clone_public_chat( Clone a public chat to user's account. Creates a new private thread with all messages and podcasts. + Citations are stripped since they reference the original user's documents. """ import copy @@ -291,7 +292,7 @@ async def clone_public_chat( podcast_id_map: dict[int, int] = {} for msg in sorted(source_thread.messages, key=lambda m: m.created_at): - new_content = copy.deepcopy(msg.content) + new_content = sanitize_content_for_public(msg.content) if isinstance(new_content, list): for part in new_content: From 0ad59edda0df0728697fd1145b9616bfb1dfc341 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 27 Jan 2026 09:38:52 +0200 Subject: [PATCH 25/69] feat: auto-trigger clone after login redirect --- surfsense_web/app/(home)/login/page.tsx | 7 ++++ .../public-chat/public-chat-footer.tsx | 41 ++++++++++++++----- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/surfsense_web/app/(home)/login/page.tsx b/surfsense_web/app/(home)/login/page.tsx index 7aade8427..a2dadd70c 100644 --- a/surfsense_web/app/(home)/login/page.tsx +++ b/surfsense_web/app/(home)/login/page.tsx @@ -27,6 +27,13 @@ function LoginContent() { const error = searchParams.get("error"); const message = searchParams.get("message"); const logout = searchParams.get("logout"); + const returnUrl = searchParams.get("returnUrl"); + + // Save returnUrl to localStorage so it persists through OAuth flows (e.g., Google) + // This is read by TokenHandler after successful authentication + if (returnUrl) { + localStorage.setItem("surfsense_redirect_path", decodeURIComponent(returnUrl)); + } // Show registration success message if (registered === "true") { diff --git a/surfsense_web/components/public-chat/public-chat-footer.tsx b/surfsense_web/components/public-chat/public-chat-footer.tsx index 06e3d9975..80779b4e6 100644 --- a/surfsense_web/components/public-chat/public-chat-footer.tsx +++ b/surfsense_web/components/public-chat/public-chat-footer.tsx @@ -1,8 +1,8 @@ "use client"; import { Copy, Loader2 } from "lucide-react"; -import { useRouter } from "next/navigation"; -import { useState } from "react"; +import { useRouter, useSearchParams } from "next/navigation"; +import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { Button } from "@/components/ui/button"; import { publicChatApiService } from "@/lib/apis/public-chat-api.service"; @@ -14,17 +14,11 @@ interface PublicChatFooterProps { export function PublicChatFooter({ shareToken }: PublicChatFooterProps) { const router = useRouter(); + const searchParams = useSearchParams(); const [isCloning, setIsCloning] = useState(false); + const hasAutoCloned = useRef(false); - const handleCopyAndContinue = async () => { - const token = getBearerToken(); - - if (!token) { - const returnUrl = encodeURIComponent(`/public/${shareToken}`); - router.push(`/login?returnUrl=${returnUrl}&action=clone`); - return; - } - + const triggerClone = useCallback(async () => { setIsCloning(true); try { @@ -43,6 +37,31 @@ export function PublicChatFooter({ shareToken }: PublicChatFooterProps) { } finally { setIsCloning(false); } + }, [shareToken, router]); + + // Auto-trigger clone if user just logged in with action=clone + useEffect(() => { + const action = searchParams.get("action"); + const token = getBearerToken(); + + // Only auto-clone once, if authenticated and action=clone is present + if (action === "clone" && token && !hasAutoCloned.current && !isCloning) { + hasAutoCloned.current = true; + triggerClone(); + } + }, [searchParams, isCloning, triggerClone]); + + const handleCopyAndContinue = async () => { + const token = getBearerToken(); + + if (!token) { + // Include action=clone in the returnUrl so it persists after login + const returnUrl = encodeURIComponent(`/public/${shareToken}?action=clone`); + router.push(`/login?returnUrl=${returnUrl}`); + return; + } + + await triggerClone(); }; return ( From a42780a2ec1735cf437c58332b81cb35337e8240 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 27 Jan 2026 09:39:03 +0200 Subject: [PATCH 26/69] feat: add chat_cloned notification types to inbox schema --- surfsense_web/contracts/types/inbox.types.ts | 58 +++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/surfsense_web/contracts/types/inbox.types.ts b/surfsense_web/contracts/types/inbox.types.ts index 0983bbc55..9269a6935 100644 --- a/surfsense_web/contracts/types/inbox.types.ts +++ b/surfsense_web/contracts/types/inbox.types.ts @@ -9,6 +9,8 @@ export const inboxItemTypeEnum = z.enum([ "connector_indexing", "document_processing", "new_mention", + "chat_cloned", + "chat_clone_failed", ]); /** @@ -88,6 +90,22 @@ export const newMentionMetadata = z.object({ content_preview: z.string(), }); +/** + * Chat cloned success metadata schema + */ +export const chatClonedMetadata = z.object({ + thread_id: z.number(), + search_space_id: z.number(), +}); + +/** + * Chat clone failed metadata schema + */ +export const chatCloneFailedMetadata = z.object({ + share_token: z.string(), + error: z.string(), +}); + /** * Union of all inbox item metadata types * Use this when the inbox item type is unknown @@ -96,6 +114,8 @@ export const inboxItemMetadata = z.union([ connectorIndexingMetadata, documentProcessingMetadata, newMentionMetadata, + chatClonedMetadata, + chatCloneFailedMetadata, baseInboxItemMetadata, ]); @@ -133,6 +153,16 @@ export const newMentionInboxItem = inboxItem.extend({ metadata: newMentionMetadata, }); +export const chatClonedInboxItem = inboxItem.extend({ + type: z.literal("chat_cloned"), + metadata: chatClonedMetadata, +}); + +export const chatCloneFailedInboxItem = inboxItem.extend({ + type: z.literal("chat_clone_failed"), + metadata: chatCloneFailedMetadata, +}); + // ============================================================================= // API Request/Response Schemas // ============================================================================= @@ -229,13 +259,27 @@ export function isNewMentionMetadata(metadata: unknown): metadata is NewMentionM return newMentionMetadata.safeParse(metadata).success; } +/** + * Type guard for ChatClonedMetadata + */ +export function isChatClonedMetadata(metadata: unknown): metadata is ChatClonedMetadata { + return chatClonedMetadata.safeParse(metadata).success; +} + +/** + * Type guard for ChatCloneFailedMetadata + */ +export function isChatCloneFailedMetadata(metadata: unknown): metadata is ChatCloneFailedMetadata { + return chatCloneFailedMetadata.safeParse(metadata).success; +} + /** * Safe metadata parser - returns typed metadata or null */ export function parseInboxItemMetadata( type: InboxItemTypeEnum, metadata: unknown -): ConnectorIndexingMetadata | DocumentProcessingMetadata | NewMentionMetadata | null { +): ConnectorIndexingMetadata | DocumentProcessingMetadata | NewMentionMetadata | ChatClonedMetadata | ChatCloneFailedMetadata | null { switch (type) { case "connector_indexing": { const result = connectorIndexingMetadata.safeParse(metadata); @@ -249,6 +293,14 @@ export function parseInboxItemMetadata( const result = newMentionMetadata.safeParse(metadata); return result.success ? result.data : null; } + case "chat_cloned": { + const result = chatClonedMetadata.safeParse(metadata); + return result.success ? result.data : null; + } + case "chat_clone_failed": { + const result = chatCloneFailedMetadata.safeParse(metadata); + return result.success ? result.data : null; + } default: return null; } @@ -265,11 +317,15 @@ export type BaseInboxItemMetadata = z.infer; export type ConnectorIndexingMetadata = z.infer; export type DocumentProcessingMetadata = z.infer; export type NewMentionMetadata = z.infer; +export type ChatClonedMetadata = z.infer; +export type ChatCloneFailedMetadata = z.infer; export type InboxItemMetadata = z.infer; export type InboxItem = z.infer; export type ConnectorIndexingInboxItem = z.infer; export type DocumentProcessingInboxItem = z.infer; export type NewMentionInboxItem = z.infer; +export type ChatClonedInboxItem = z.infer; +export type ChatCloneFailedInboxItem = z.infer; // API Request/Response types export type GetNotificationsRequest = z.infer; From 988847922f5b5157e676ed18ca4148fa527ce237 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 27 Jan 2026 10:22:38 +0200 Subject: [PATCH 27/69] feat: clone notifications UI and PGlite resync on clone --- .../layout/ui/sidebar/InboxSidebar.tsx | 38 ++++++++++++++++++- .../public-chat/public-chat-footer.tsx | 3 ++ surfsense_web/lib/electric/client.ts | 10 +++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx index 810e3a22e..eacae6e49 100644 --- a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx @@ -7,6 +7,7 @@ import { Check, CheckCheck, CheckCircle2, + Copy, History, Inbox, LayoutGrid, @@ -43,6 +44,8 @@ import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; import { type ConnectorIndexingMetadata, + isChatClonedMetadata, + isChatCloneFailedMetadata, isConnectorIndexingMetadata, isNewMentionMetadata, type NewMentionMetadata, @@ -196,10 +199,15 @@ export function InboxSidebar({ [inboxItems] ); + // Status tab includes: connector indexing, document processing, chat clone notifications const statusItems = useMemo( () => inboxItems.filter( - (item) => item.type === "connector_indexing" || item.type === "document_processing" + (item) => + item.type === "connector_indexing" || + item.type === "document_processing" || + item.type === "chat_cloned" || + item.type === "chat_clone_failed" ), [inboxItems] ); @@ -320,7 +328,17 @@ export function InboxSidebar({ router.push(url); } } + } else if (item.type === "chat_cloned") { + // Navigate to the cloned chat + if (isChatClonedMetadata(item.metadata)) { + const { search_space_id, thread_id } = item.metadata; + const url = `/dashboard/${search_space_id}/new-chat/${thread_id}`; + onOpenChange(false); + onCloseMobileSidebar?.(); + router.push(url); + } } + // chat_clone_failed: just mark as read, no navigation }, [markAsRead, router, onOpenChange, onCloseMobileSidebar] ); @@ -380,6 +398,24 @@ export function InboxSidebar({ ); } + // For chat cloned success, show green copy icon + if (item.type === "chat_cloned") { + return ( +
+ +
+ ); + } + + // For chat clone failed, show red alert icon + if (item.type === "chat_clone_failed") { + return ( +
+ +
+ ); + } + // For status items (connector/document), show status icons // Safely access status from metadata const metadata = item.metadata as Record; diff --git a/surfsense_web/components/public-chat/public-chat-footer.tsx b/surfsense_web/components/public-chat/public-chat-footer.tsx index 80779b4e6..cc54d4150 100644 --- a/surfsense_web/components/public-chat/public-chat-footer.tsx +++ b/surfsense_web/components/public-chat/public-chat-footer.tsx @@ -26,6 +26,9 @@ export function PublicChatFooter({ shareToken }: PublicChatFooterProps) { share_token: shareToken, }); + // Force PGlite to resync notifications on next dashboard load + localStorage.setItem("surfsense_force_notif_resync", "true"); + toast.success("Copying chat to your account...", { description: "You'll be notified when it's ready.", }); diff --git a/surfsense_web/lib/electric/client.ts b/surfsense_web/lib/electric/client.ts index 148da58ec..4e7ff87e7 100644 --- a/surfsense_web/lib/electric/client.ts +++ b/surfsense_web/lib/electric/client.ts @@ -274,6 +274,16 @@ export async function initElectric(userId: string): Promise { CREATE INDEX IF NOT EXISTS idx_new_chat_messages_created_at ON new_chat_messages(created_at); `); + // Force resync notifications if flagged (e.g., after clone from public page) + if ( + typeof window !== "undefined" && + localStorage.getItem("surfsense_force_notif_resync") === "true" + ) { + console.log("[Electric] Force resync flag detected, clearing notifications table"); + await db.exec("DELETE FROM notifications"); + localStorage.removeItem("surfsense_force_notif_resync"); + } + const electricUrl = getElectricUrl(); // STEP 4: Create the client wrapper From 24472c0ea6137b3e787a92c584978f265cca2d8e Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 27 Jan 2026 10:50:37 +0200 Subject: [PATCH 28/69] fix: preserve author info in cloned chats and force PGlite resync after clone --- .../[search_space_id]/new-chat/[[...chat_id]]/page.tsx | 10 ++++++++-- surfsense_web/hooks/use-inbox.ts | 9 +++++++++ surfsense_web/lib/electric/client.ts | 10 ---------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 9b45d4d62..a56cd84ce 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -187,6 +187,12 @@ export default function NewChatPage() { ? membersData?.find((m) => m.user_id === msg.author_id) : null; + // Preserve existing author info if member lookup fails (e.g., cloned chats) + const existingMsg = prev.find((m) => m.id === `msg-${msg.id}`); + const existingAuthor = existingMsg?.metadata?.custom?.author as + | { displayName?: string | null; avatarUrl?: string | null } + | undefined; + return convertToThreadMessage({ id: msg.id, thread_id: msg.thread_id, @@ -194,8 +200,8 @@ export default function NewChatPage() { content: msg.content, author_id: msg.author_id, created_at: msg.created_at, - author_display_name: member?.user_display_name ?? null, - author_avatar_url: member?.user_avatar_url ?? null, + author_display_name: member?.user_display_name ?? existingAuthor?.displayName ?? null, + author_avatar_url: member?.user_avatar_url ?? existingAuthor?.avatarUrl ?? null, }); }); }); diff --git a/surfsense_web/hooks/use-inbox.ts b/surfsense_web/hooks/use-inbox.ts index 4c26ddcb9..656de18a8 100644 --- a/surfsense_web/hooks/use-inbox.ts +++ b/surfsense_web/hooks/use-inbox.ts @@ -119,6 +119,15 @@ export function useInbox( async function startSync() { try { + // Check for force resync flag (e.g., after clone from public page) + if (localStorage.getItem("surfsense_force_notif_resync") === "true") { + console.log("[useInbox] Force resync flag detected, clearing notifications"); + await client.db.exec("DELETE FROM notifications"); + localStorage.removeItem("surfsense_force_notif_resync"); + // Reset sync key to force a fresh sync + userSyncKeyRef.current = null; + } + const cutoffDate = getSyncCutoffDate(); const userSyncKey = `inbox_${userId}_${cutoffDate}`; diff --git a/surfsense_web/lib/electric/client.ts b/surfsense_web/lib/electric/client.ts index 4e7ff87e7..148da58ec 100644 --- a/surfsense_web/lib/electric/client.ts +++ b/surfsense_web/lib/electric/client.ts @@ -274,16 +274,6 @@ export async function initElectric(userId: string): Promise { CREATE INDEX IF NOT EXISTS idx_new_chat_messages_created_at ON new_chat_messages(created_at); `); - // Force resync notifications if flagged (e.g., after clone from public page) - if ( - typeof window !== "undefined" && - localStorage.getItem("surfsense_force_notif_resync") === "true" - ) { - console.log("[Electric] Force resync flag detected, clearing notifications table"); - await db.exec("DELETE FROM notifications"); - localStorage.removeItem("surfsense_force_notif_resync"); - } - const electricUrl = getElectricUrl(); // STEP 4: Create the client wrapper From bd921a8ec8d2b8e297989c3f8d63182ba2b79725 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 27 Jan 2026 10:58:03 +0200 Subject: [PATCH 29/69] fix: handle abort error in useGithubStars hook --- surfsense_web/hooks/use-github-stars.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/surfsense_web/hooks/use-github-stars.ts b/surfsense_web/hooks/use-github-stars.ts index a4d4f80fd..aa2bad1b9 100644 --- a/surfsense_web/hooks/use-github-stars.ts +++ b/surfsense_web/hooks/use-github-stars.ts @@ -25,6 +25,10 @@ export const useGithubStars = () => { setStars(data?.stargazers_count); } catch (err) { + // Ignore abort errors (expected on unmount) + if (err instanceof Error && err.name === "AbortError") { + return; + } if (err instanceof Error) { console.error("Error fetching stars:", err); setError(err.message); @@ -37,7 +41,7 @@ export const useGithubStars = () => { getStars(); return () => { - abortController.abort(); + abortController.abort("Component unmounted"); }; }, []); From 6091e070f333f718dfa4b044829b21e3e41823be Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 27 Jan 2026 11:11:08 +0200 Subject: [PATCH 30/69] chore: remove unused import and restrict noAuthPrefixes --- surfsense_backend/app/services/public_chat_service.py | 2 -- surfsense_web/lib/apis/base-api.service.ts | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index 85f1a9572..a6434a611 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -255,8 +255,6 @@ async def clone_public_chat( Creates a new private thread with all messages and podcasts. Citations are stripped since they reference the original user's documents. """ - import copy - from app.db import ( ChatVisibility, NewChatMessage, diff --git a/surfsense_web/lib/apis/base-api.service.ts b/surfsense_web/lib/apis/base-api.service.ts index b14818ac1..a87d4deaf 100644 --- a/surfsense_web/lib/apis/base-api.service.ts +++ b/surfsense_web/lib/apis/base-api.service.ts @@ -26,7 +26,7 @@ class BaseApiService { noAuthEndpoints: string[] = ["/auth/jwt/login", "/auth/register", "/auth/refresh"]; // Prefixes that don't require auth (checked with startsWith) - noAuthPrefixes: string[] = ["/api/v1/public/", "/api/v1/podcasts/"]; + noAuthPrefixes: string[] = ["/api/v1/public/"]; // Use a getter to always read fresh token from localStorage // This ensures the token is always up-to-date after login/logout From ba304be9777c12ef93fc5eb80b257d0d6e3e1ec2 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 27 Jan 2026 15:28:30 +0530 Subject: [PATCH 31/69] fix: remove message from loading UI --- surfsense_web/app/(home)/login/page.tsx | 2 +- surfsense_web/app/auth/callback/loading.tsx | 5 +-- .../[search_space_id]/client-layout.tsx | 6 +-- surfsense_web/app/dashboard/layout.tsx | 4 +- surfsense_web/app/dashboard/loading.tsx | 5 +-- surfsense_web/app/dashboard/page.tsx | 2 +- surfsense_web/atoms/ui/loading.atoms.ts | 19 ++------- surfsense_web/components/TokenHandler.tsx | 4 +- .../components/providers/ElectricProvider.tsx | 4 +- .../providers/GlobalLoadingProvider.tsx | 36 +++-------------- surfsense_web/hooks/use-global-loading.ts | 39 +++++++------------ surfsense_web/messages/en.json | 9 +---- surfsense_web/messages/zh.json | 9 +---- 13 files changed, 34 insertions(+), 110 deletions(-) diff --git a/surfsense_web/app/(home)/login/page.tsx b/surfsense_web/app/(home)/login/page.tsx index 0dc9c445f..a3ef7cd8f 100644 --- a/surfsense_web/app/(home)/login/page.tsx +++ b/surfsense_web/app/(home)/login/page.tsx @@ -93,7 +93,7 @@ function LoginContent() { }, [searchParams, t, tCommon]); // Use global loading screen for auth type determination - spinner animation won't reset - useGlobalLoadingEffect(isLoading, tCommon("loading"), "login"); + useGlobalLoadingEffect(isLoading); // Show nothing while loading - the GlobalLoadingProvider handles the loading UI if (isLoading) { diff --git a/surfsense_web/app/auth/callback/loading.tsx b/surfsense_web/app/auth/callback/loading.tsx index 0c94e1ee0..f12b3847d 100644 --- a/surfsense_web/app/auth/callback/loading.tsx +++ b/surfsense_web/app/auth/callback/loading.tsx @@ -1,13 +1,10 @@ "use client"; -import { useTranslations } from "next-intl"; import { useGlobalLoadingEffect } from "@/hooks/use-global-loading"; export default function AuthCallbackLoading() { - const t = useTranslations("auth"); - // Use global loading - spinner animation won't reset when page transitions - useGlobalLoadingEffect(true, t("processing_authentication"), "default"); + useGlobalLoadingEffect(true); // Return null - the GlobalLoadingProvider handles the loading UI return null; diff --git a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx index e6730d8d1..8418d4719 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx @@ -154,11 +154,7 @@ export function DashboardClientLayout({ isAutoConfiguring; // Use global loading screen - spinner animation won't reset - useGlobalLoadingEffect( - shouldShowLoading, - isAutoConfiguring ? t("setting_up_ai") : t("checking_llm_prefs"), - "default" - ); + useGlobalLoadingEffect(shouldShowLoading); if (shouldShowLoading) { return null; diff --git a/surfsense_web/app/dashboard/layout.tsx b/surfsense_web/app/dashboard/layout.tsx index 889b823d6..4a32c2147 100644 --- a/surfsense_web/app/dashboard/layout.tsx +++ b/surfsense_web/app/dashboard/layout.tsx @@ -1,6 +1,5 @@ "use client"; -import { useTranslations } from "next-intl"; import { useEffect, useState } from "react"; import { useGlobalLoadingEffect } from "@/hooks/use-global-loading"; import { getBearerToken, redirectToLogin } from "@/lib/auth-utils"; @@ -10,11 +9,10 @@ interface DashboardLayoutProps { } export default function DashboardLayout({ children }: DashboardLayoutProps) { - const t = useTranslations("dashboard"); const [isCheckingAuth, setIsCheckingAuth] = useState(true); // Use the global loading screen - spinner animation won't reset - useGlobalLoadingEffect(isCheckingAuth, t("checking_auth"), "default"); + useGlobalLoadingEffect(isCheckingAuth); useEffect(() => { // Check if user is authenticated diff --git a/surfsense_web/app/dashboard/loading.tsx b/surfsense_web/app/dashboard/loading.tsx index 2eee93651..ca6b05de0 100644 --- a/surfsense_web/app/dashboard/loading.tsx +++ b/surfsense_web/app/dashboard/loading.tsx @@ -1,13 +1,10 @@ "use client"; -import { useTranslations } from "next-intl"; import { useGlobalLoadingEffect } from "@/hooks/use-global-loading"; export default function DashboardLoading() { - const t = useTranslations("common"); - // Use global loading - spinner animation won't reset when page transitions - useGlobalLoadingEffect(true, t("loading"), "default"); + useGlobalLoadingEffect(true); // Return null - the GlobalLoadingProvider handles the loading UI return null; diff --git a/surfsense_web/app/dashboard/page.tsx b/surfsense_web/app/dashboard/page.tsx index 504d172c3..2bd8f4462 100644 --- a/surfsense_web/app/dashboard/page.tsx +++ b/surfsense_web/app/dashboard/page.tsx @@ -106,7 +106,7 @@ export default function DashboardPage() { const shouldShowLoading = isLoading || searchSpaces.length > 0; // Use global loading screen - spinner animation won't reset - useGlobalLoadingEffect(shouldShowLoading, t("fetching_spaces"), "default"); + useGlobalLoadingEffect(shouldShowLoading); if (error) return ; diff --git a/surfsense_web/atoms/ui/loading.atoms.ts b/surfsense_web/atoms/ui/loading.atoms.ts index f10d9247b..ca37e1cdc 100644 --- a/surfsense_web/atoms/ui/loading.atoms.ts +++ b/surfsense_web/atoms/ui/loading.atoms.ts @@ -2,29 +2,18 @@ import { atom } from "jotai"; interface GlobalLoadingState { isLoading: boolean; - message?: string; - variant: "login" | "default"; } export const globalLoadingAtom = atom({ isLoading: false, - message: undefined, - variant: "default", }); // Helper atom for showing global loading -export const showGlobalLoadingAtom = atom( - null, - ( - get, - set, - { message, variant = "default" }: { message?: string; variant?: "login" | "default" } - ) => { - set(globalLoadingAtom, { isLoading: true, message, variant }); - } -); +export const showGlobalLoadingAtom = atom(null, (get, set) => { + set(globalLoadingAtom, { isLoading: true }); +}); // Helper atom for hiding global loading export const hideGlobalLoadingAtom = atom(null, (get, set) => { - set(globalLoadingAtom, { isLoading: false, message: undefined, variant: "default" }); + set(globalLoadingAtom, { isLoading: false }); }); diff --git a/surfsense_web/components/TokenHandler.tsx b/surfsense_web/components/TokenHandler.tsx index 35408c1b2..e3295df7c 100644 --- a/surfsense_web/components/TokenHandler.tsx +++ b/surfsense_web/components/TokenHandler.tsx @@ -1,7 +1,6 @@ "use client"; import { useSearchParams } from "next/navigation"; -import { useTranslations } from "next-intl"; import { useEffect } from "react"; import { useGlobalLoadingEffect } from "@/hooks/use-global-loading"; import { getAndClearRedirectPath, setBearerToken } from "@/lib/auth-utils"; @@ -27,11 +26,10 @@ const TokenHandler = ({ tokenParamName = "token", storageKey = "surfsense_bearer_token", }: TokenHandlerProps) => { - const t = useTranslations("auth"); const searchParams = useSearchParams(); // Always show loading for this component - spinner animation won't reset - useGlobalLoadingEffect(true, t("processing_authentication"), "default"); + useGlobalLoadingEffect(true); useEffect(() => { // Only run on client-side diff --git a/surfsense_web/components/providers/ElectricProvider.tsx b/surfsense_web/components/providers/ElectricProvider.tsx index 07d736c64..4aa83b304 100644 --- a/surfsense_web/components/providers/ElectricProvider.tsx +++ b/surfsense_web/components/providers/ElectricProvider.tsx @@ -1,7 +1,6 @@ "use client"; import { useAtomValue } from "jotai"; -import { useTranslations } from "next-intl"; import { useEffect, useRef, useState } from "react"; import { currentUserAtom } from "@/atoms/user/user-query.atoms"; import { useGlobalLoadingEffect } from "@/hooks/use-global-loading"; @@ -30,7 +29,6 @@ interface ElectricProviderProps { * 5. Provides client via context - hooks should use useElectricClient() */ export function ElectricProvider({ children }: ElectricProviderProps) { - const t = useTranslations("common"); const [electricClient, setElectricClient] = useState(null); const [error, setError] = useState(null); const { @@ -117,7 +115,7 @@ export function ElectricProvider({ children }: ElectricProviderProps) { const shouldShowLoading = hasToken && isUserLoaded && !!user?.id && !electricClient && !error; // Use global loading hook with ownership tracking - prevents flash during transitions - useGlobalLoadingEffect(shouldShowLoading, t("initializing"), "default"); + useGlobalLoadingEffect(shouldShowLoading); // For non-authenticated pages (like landing page), render immediately with null context // Also render immediately if user query failed (e.g., token expired) diff --git a/surfsense_web/components/providers/GlobalLoadingProvider.tsx b/surfsense_web/components/providers/GlobalLoadingProvider.tsx index db66b9a64..08c888954 100644 --- a/surfsense_web/components/providers/GlobalLoadingProvider.tsx +++ b/surfsense_web/components/providers/GlobalLoadingProvider.tsx @@ -3,9 +3,7 @@ import { useAtomValue } from "jotai"; import { useEffect, useState } from "react"; import { createPortal } from "react-dom"; -import { AmbientBackground } from "@/app/(home)/login/AmbientBackground"; import { globalLoadingAtom } from "@/atoms/ui/loading.atoms"; -import { Logo } from "@/components/Logo"; import { Spinner } from "@/components/ui/spinner"; import { cn } from "@/lib/utils"; @@ -18,7 +16,7 @@ import { cn } from "@/lib/utils"; */ export function GlobalLoadingProvider({ children }: { children: React.ReactNode }) { const [mounted, setMounted] = useState(false); - const { isLoading, message, variant } = useAtomValue(globalLoadingAtom); + const { isLoading } = useAtomValue(globalLoadingAtom); useEffect(() => { setMounted(true); @@ -36,35 +34,11 @@ export function GlobalLoadingProvider({ children }: { children: React.ReactNode )} aria-hidden={!isLoading} > - {variant === "login" ? ( -
- -
- -
-
- {/* Spinner is always mounted, animation never resets */} - -
- - {message} - -
-
+
+
+
- ) : ( -
-
-
- {/* Spinner is always mounted, animation never resets */} - -
- - {message} - -
-
- )} +
); diff --git a/surfsense_web/hooks/use-global-loading.ts b/surfsense_web/hooks/use-global-loading.ts index baaa1f089..fee8ae18e 100644 --- a/surfsense_web/hooks/use-global-loading.ts +++ b/surfsense_web/hooks/use-global-loading.ts @@ -20,21 +20,18 @@ let pendingHideTimeout: ReturnType | null = null; export function useGlobalLoading() { const [loading, setLoading] = useAtom(globalLoadingAtom); - const show = useCallback( - (message?: string, variant: "login" | "default" = "default") => { - // Cancel any pending hide - new loading request takes over - if (pendingHideTimeout) { - clearTimeout(pendingHideTimeout); - pendingHideTimeout = null; - } + const show = useCallback(() => { + // Cancel any pending hide - new loading request takes over + if (pendingHideTimeout) { + clearTimeout(pendingHideTimeout); + pendingHideTimeout = null; + } - const id = ++loadingIdCounter; - currentLoadingId = id; - setLoading({ isLoading: true, message, variant }); - return id; - }, - [setLoading] - ); + const id = ++loadingIdCounter; + currentLoadingId = id; + setLoading({ isLoading: true }); + return id; + }, [setLoading]); const hide = useCallback( (id?: number) => { @@ -50,7 +47,7 @@ export function useGlobalLoading() { // Double-check we're still the current loading after the delay if (id === undefined || id === currentLoadingId) { currentLoadingId = null; - setLoading({ isLoading: false, message: undefined, variant: "default" }); + setLoading({ isLoading: false }); } pendingHideTimeout = null; }, 50); // Small delay to allow next component to mount and show loading @@ -70,27 +67,21 @@ export function useGlobalLoading() { * transition loading states (e.g., layout → page). * * @param shouldShow - Whether the loading screen should be visible - * @param message - Optional message to display - * @param variant - Visual style variant ("login" or "default") */ -export function useGlobalLoadingEffect( - shouldShow: boolean, - message?: string, - variant: "login" | "default" = "default" -) { +export function useGlobalLoadingEffect(shouldShow: boolean) { const { show, hide } = useGlobalLoading(); const loadingIdRef = useRef(null); useEffect(() => { if (shouldShow) { // Show loading and store the ID - loadingIdRef.current = show(message, variant); + loadingIdRef.current = show(); } else if (loadingIdRef.current !== null) { // Only hide if we were the ones showing loading hide(loadingIdRef.current); loadingIdRef.current = null; } - }, [shouldShow, message, variant, show, hide]); + }, [shouldShow, show, hide]); // Cleanup on unmount - only hide if we're still the active loading useEffect(() => { diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json index f14c73ddc..578bb9002 100644 --- a/surfsense_web/messages/en.json +++ b/surfsense_web/messages/en.json @@ -2,8 +2,6 @@ "common": { "app_name": "SurfSense", "welcome": "Welcome", - "loading": "Loading", - "initializing": "Initializing", "save": "Save", "cancel": "Cancel", "delete": "Delete", @@ -80,8 +78,7 @@ "passwords_no_match_desc": "The passwords you entered do not match", "creating_account": "Creating your account", "creating_account_btn": "Creating account", - "redirecting_login": "Redirecting to login page", - "processing_authentication": "Processing authentication" + "redirecting_login": "Redirecting to login page" }, "searchSpace": { "create_title": "Create Search Space", @@ -146,10 +143,7 @@ "api_keys": "API Keys", "profile": "Profile", "loading_dashboard": "Loading Dashboard", - "checking_auth": "Checking authentication", "loading_config": "Loading Configuration", - "checking_llm_prefs": "Checking your LLM preferences", - "setting_up_ai": "Setting up AI", "config_error": "Configuration Error", "failed_load_llm_config": "Failed to load your LLM configuration", "error_loading_chats": "Error loading chats", @@ -171,7 +165,6 @@ "create_search_space": "Create Search Space", "add_new_search_space": "Add New Search Space", "loading": "Loading", - "fetching_spaces": "Fetching your search spaces", "may_take_moment": "This may take a moment", "error": "Error", "something_wrong": "Something went wrong", diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json index 6838b0f52..9bbbe1ecf 100644 --- a/surfsense_web/messages/zh.json +++ b/surfsense_web/messages/zh.json @@ -2,8 +2,6 @@ "common": { "app_name": "SurfSense", "welcome": "欢迎", - "loading": "加载中...", - "initializing": "正在初始化", "save": "保存", "cancel": "取消", "delete": "删除", @@ -80,8 +78,7 @@ "passwords_no_match_desc": "您输入的密码不一致", "creating_account": "正在创建您的账户", "creating_account_btn": "创建中", - "redirecting_login": "正在跳转到登录页面", - "processing_authentication": "正在处理身份验证" + "redirecting_login": "正在跳转到登录页面" }, "searchSpace": { "create_title": "创建搜索空间", @@ -131,10 +128,7 @@ "api_keys": "API 密钥", "profile": "个人资料", "loading_dashboard": "正在加载仪表盘", - "checking_auth": "正在检查身份验证", "loading_config": "正在加载配置", - "checking_llm_prefs": "正在检查您的 LLM 偏好设置", - "setting_up_ai": "正在设置 AI", "config_error": "配置错误", "failed_load_llm_config": "无法加载您的 LLM 配置", "error_loading_chats": "加载对话失败", @@ -156,7 +150,6 @@ "create_search_space": "创建搜索空间", "add_new_search_space": "添加新的搜索空间", "loading": "加载中", - "fetching_spaces": "正在获取您的搜索空间", "may_take_moment": "这可能需要一些时间", "error": "错误", "something_wrong": "出现错误", From b148731fda4f3c6098ce4ec6c000ca4ab9609d06 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 27 Jan 2026 16:32:05 +0530 Subject: [PATCH 32/69] feat(sidebar): enhance chat sections layout and functionality with improved height management --- .../layout/providers/LayoutDataProvider.tsx | 6 +- .../components/layout/ui/sidebar/Sidebar.tsx | 213 +++++++++--------- .../layout/ui/sidebar/SidebarSection.tsx | 28 ++- 3 files changed, 139 insertions(+), 108 deletions(-) diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx index 702014050..0baf1dcfa 100644 --- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx +++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx @@ -87,10 +87,10 @@ export function LayoutDataProvider({ enabled: !!searchSpaceId, }); - // Fetch threads + // Fetch threads (40 total to allow up to 20 per section - shared/private) const { data: threadsData } = useQuery({ - queryKey: ["threads", searchSpaceId, { limit: 4 }], - queryFn: () => fetchThreads(Number(searchSpaceId), 4), + queryKey: ["threads", searchSpaceId, { limit: 40 }], + queryFn: () => fetchThreads(Number(searchSpaceId), 40), enabled: !!searchSpaceId, }); diff --git a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx index d05f21096..6db23a5c5 100644 --- a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx @@ -3,7 +3,6 @@ import { FolderOpen, MessageSquare, PenSquare } from "lucide-react"; import { useTranslations } from "next-intl"; import { Button } from "@/components/ui/button"; -import { ScrollArea } from "@/components/ui/scroll-area"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { cn } from "@/lib/utils"; import type { ChatItem, NavItem, PageUsage, SearchSpace, User } from "../../types/layout.types"; @@ -121,101 +120,109 @@ export function Sidebar({ )} - {/* Scrollable content */} - - {isCollapsed ? ( -
- {(chats.length > 0 || sharedChats.length > 0) && ( - - - - - - {t("chats")} ({chats.length + sharedChats.length}) - - + {/* Chat sections - fills available space */} + {isCollapsed ? ( +
+ {(chats.length > 0 || sharedChats.length > 0) && ( + + + + + + {t("chats")} ({chats.length + sharedChats.length}) + + + )} +
+ ) : ( +
+ {/* Shared Chats Section - takes half the space */} + + + + + + {t("view_all_shared_chats") || "View all shared chats"} + + + ) : undefined + } + > + {sharedChats.length > 0 ? ( +
+
4 ? 'pb-8' : ''}`}> + {sharedChats.slice(0, 20).map((chat) => ( + onChatSelect(chat)} + onArchive={() => onChatArchive?.(chat)} + onDelete={() => onChatDelete?.(chat)} + /> + ))} +
+ {/* Gradient fade indicator when more than 4 items */} + {sharedChats.length > 4 && ( +
+ )} +
+ ) : ( +

{t("no_shared_chats")}

)} -
- ) : ( -
- {/* Shared Chats Section */} - - - - - - {t("view_all_shared_chats") || "View all shared chats"} - - - ) : undefined - } - > - {sharedChats.length > 0 ? ( -
- {sharedChats.map((chat) => ( - onChatSelect(chat)} - onArchive={() => onChatArchive?.(chat)} - onDelete={() => onChatDelete?.(chat)} - /> - ))} -
- ) : ( -

{t("no_shared_chats")}

- )} -
+ - {/* Private Chats Section */} - - - - - - {t("view_all_private_chats") || "View all private chats"} - - - ) : undefined - } - > - {chats.length > 0 ? ( -
- {chats.map((chat) => ( + {/* Private Chats Section - takes half the space */} + + + + + + {t("view_all_private_chats") || "View all private chats"} + + + ) : undefined + } + > + {chats.length > 0 ? ( +
+
4 ? 'pb-8' : ''}`}> + {chats.slice(0, 20).map((chat) => ( ))}
- ) : ( -

{t("no_chats")}

- )} - -
- )} - + {/* Gradient fade indicator when more than 4 items */} + {chats.length > 4 && ( +
+ )} +
+ ) : ( +

{t("no_chats")}

+ )} +
+
+ )} {/* Footer */}
diff --git a/surfsense_web/components/layout/ui/sidebar/SidebarSection.tsx b/surfsense_web/components/layout/ui/sidebar/SidebarSection.tsx index 0ceafc113..e296ed3d4 100644 --- a/surfsense_web/components/layout/ui/sidebar/SidebarSection.tsx +++ b/surfsense_web/components/layout/ui/sidebar/SidebarSection.tsx @@ -11,6 +11,8 @@ interface SidebarSectionProps { children: React.ReactNode; action?: React.ReactNode; persistentAction?: React.ReactNode; + className?: string; + fillHeight?: boolean; } export function SidebarSection({ @@ -19,12 +21,22 @@ export function SidebarSection({ children, action, persistentAction, + className, + fillHeight = false, }: SidebarSectionProps) { const [isOpen, setIsOpen] = useState(defaultOpen); return ( - -
+ +
- -
{children}
+ +
+ {children} +
); From 3604a0bbf3ccfcdf9d6495b79e44c0654369e2eb Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 27 Jan 2026 16:34:35 +0530 Subject: [PATCH 33/69] chore: ran frontend linting --- .../components/layout/ui/sidebar/Sidebar.tsx | 8 ++++-- .../layout/ui/sidebar/SidebarSection.tsx | 26 +++++++------------ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx index 6db23a5c5..4a587cd58 100644 --- a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx @@ -171,7 +171,9 @@ export function Sidebar({ > {sharedChats.length > 0 ? (
-
4 ? 'pb-8' : ''}`}> +
4 ? "pb-8" : ""}`} + > {sharedChats.slice(0, 20).map((chat) => ( {chats.length > 0 ? (
-
4 ? 'pb-8' : ''}`}> +
4 ? "pb-8" : ""}`} + > {chats.slice(0, 20).map((chat) => (
@@ -60,14 +56,12 @@ export function SidebarSection({ )}
- -
+ +
{children}
From 3c40c6e3659b0de848a28d845444ce26e41b842c Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 27 Jan 2026 13:33:36 +0200 Subject: [PATCH 34/69] feat: add clone tracking and history bootstrap for cloned chats --- .vscode/settings.json | 3 +- .../versions/81_add_public_chat_features.py | 105 ++++++++++++++++++ .../81_add_public_share_to_chat_threads.py | 66 ----------- surfsense_backend/app/db.py | 19 ++++ .../app/routes/new_chat_routes.py | 2 + .../app/services/public_chat_service.py | 4 + .../app/tasks/chat/stream_new_chat.py | 21 +++- surfsense_backend/app/utils/content_utils.py | 75 +++++++++++++ 8 files changed, 225 insertions(+), 70 deletions(-) create mode 100644 surfsense_backend/alembic/versions/81_add_public_chat_features.py delete mode 100644 surfsense_backend/alembic/versions/81_add_public_share_to_chat_threads.py create mode 100644 surfsense_backend/app/utils/content_utils.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 05bd30702..f134660b6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,3 @@ { - "biome.configurationPath": "./surfsense_web/biome.json", - "deepscan.ignoreConfirmWarning": true + "biome.configurationPath": "./surfsense_web/biome.json" } \ No newline at end of file diff --git a/surfsense_backend/alembic/versions/81_add_public_chat_features.py b/surfsense_backend/alembic/versions/81_add_public_chat_features.py new file mode 100644 index 000000000..ab73b06bb --- /dev/null +++ b/surfsense_backend/alembic/versions/81_add_public_chat_features.py @@ -0,0 +1,105 @@ +"""Add public chat sharing and cloning features to new_chat_threads + +Revision ID: 81 +Revises: 80 +Create Date: 2026-01-23 + +Adds columns for: +1. Public sharing via tokenized URLs (public_share_token, public_share_enabled) +2. Clone tracking for audit (cloned_from_thread_id, cloned_at) +3. History bootstrap flag for cloned chats (needs_history_bootstrap) +""" + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "81" +down_revision: str | None = "80" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add public sharing and cloning columns to new_chat_threads.""" + + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS public_share_token VARCHAR(64); + """ + ) + + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS public_share_enabled BOOLEAN NOT NULL DEFAULT FALSE; + """ + ) + + op.execute( + """ + CREATE UNIQUE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_token + ON new_chat_threads(public_share_token) + WHERE public_share_token IS NOT NULL; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_enabled + ON new_chat_threads(public_share_enabled) + WHERE public_share_enabled = TRUE; + """ + ) + + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS cloned_from_thread_id INTEGER + REFERENCES new_chat_threads(id) ON DELETE SET NULL; + """ + ) + + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS cloned_at TIMESTAMP WITH TIME ZONE; + """ + ) + + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS needs_history_bootstrap BOOLEAN NOT NULL DEFAULT FALSE; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_new_chat_threads_cloned_from_thread_id + ON new_chat_threads(cloned_from_thread_id) + WHERE cloned_from_thread_id IS NOT NULL; + """ + ) + + +def downgrade() -> None: + """Remove public sharing and cloning columns from new_chat_threads.""" + + op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_cloned_from_thread_id") + op.execute( + "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS needs_history_bootstrap" + ) + op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS cloned_at") + op.execute( + "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS cloned_from_thread_id" + ) + + op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_enabled") + op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_token") + op.execute( + "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_enabled" + ) + op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_token") diff --git a/surfsense_backend/alembic/versions/81_add_public_share_to_chat_threads.py b/surfsense_backend/alembic/versions/81_add_public_share_to_chat_threads.py deleted file mode 100644 index 33e1a88e9..000000000 --- a/surfsense_backend/alembic/versions/81_add_public_share_to_chat_threads.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Add public sharing columns to new_chat_threads - -Revision ID: 81 -Revises: 80 -Create Date: 2026-01-23 - -Adds public_share_token and public_share_enabled columns to enable -public sharing of chat threads via secure tokenized URLs. -""" - -from collections.abc import Sequence - -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "81" -down_revision: str | None = "80" -branch_labels: str | Sequence[str] | None = None -depends_on: str | Sequence[str] | None = None - - -def upgrade() -> None: - """Add public sharing columns to new_chat_threads.""" - # Add public_share_token column - op.execute( - """ - ALTER TABLE new_chat_threads - ADD COLUMN IF NOT EXISTS public_share_token VARCHAR(64); - """ - ) - - # Add public_share_enabled column with default false - op.execute( - """ - ALTER TABLE new_chat_threads - ADD COLUMN IF NOT EXISTS public_share_enabled BOOLEAN NOT NULL DEFAULT FALSE; - """ - ) - - # Add unique partial index on public_share_token (only non-null values) - op.execute( - """ - CREATE UNIQUE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_token - ON new_chat_threads(public_share_token) - WHERE public_share_token IS NOT NULL; - """ - ) - - # Add partial index on public_share_enabled for fast public chat queries - op.execute( - """ - CREATE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_enabled - ON new_chat_threads(public_share_enabled) - WHERE public_share_enabled = TRUE; - """ - ) - - -def downgrade() -> None: - """Remove public sharing columns from new_chat_threads.""" - op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_enabled") - op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_token") - op.execute( - "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_enabled" - ) - op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_token") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index e3b077ff0..0182d2c53 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -412,6 +412,25 @@ class NewChatThread(BaseModel, TimestampMixin): server_default="false", ) + # Clone tracking - for audit and history bootstrap + cloned_from_thread_id = Column( + Integer, + ForeignKey("new_chat_threads.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + cloned_at = Column( + TIMESTAMP(timezone=True), + nullable=True, + ) + # Flag to bootstrap LangGraph checkpointer with DB messages on first message + needs_history_bootstrap = Column( + Boolean, + nullable=False, + default=False, + server_default="false", + ) + # Relationships search_space = relationship("SearchSpace", back_populates="new_chat_threads") created_by = relationship("User", back_populates="new_chat_threads") diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 4571e9051..db371a81c 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -1027,6 +1027,7 @@ async def handle_new_chat( attachments=request.attachments, mentioned_document_ids=request.mentioned_document_ids, mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids, + needs_history_bootstrap=thread.needs_history_bootstrap, ), media_type="text/event-stream", headers={ @@ -1254,6 +1255,7 @@ async def regenerate_response( mentioned_document_ids=request.mentioned_document_ids, mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids, checkpoint_id=target_checkpoint_id, + needs_history_bootstrap=thread.needs_history_bootstrap, ): yield chunk # If we get here, streaming completed successfully diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index a6434a611..7c3b89af9 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -4,6 +4,7 @@ Service layer for public chat sharing and cloning. import re import secrets +from datetime import UTC, datetime from uuid import UUID from fastapi import HTTPException @@ -283,6 +284,9 @@ async def clone_public_chat( search_space_id=target_search_space_id, created_by_id=user_id, public_share_enabled=False, + cloned_from_thread_id=source_thread.id, + cloned_at=datetime.now(UTC), + needs_history_bootstrap=True, ) session.add(new_thread) await session.flush() diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 39d85f0c6..875b7e95a 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -18,6 +18,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent +from app.utils.content_utils import bootstrap_history_from_db from app.agents.new_chat.checkpointer import get_checkpointer from app.agents.new_chat.llm_config import ( AgentConfig, @@ -205,13 +206,13 @@ async def stream_new_chat( mentioned_document_ids: list[int] | None = None, mentioned_surfsense_doc_ids: list[int] | None = None, checkpoint_id: str | None = None, + needs_history_bootstrap: bool = False, ) -> AsyncGenerator[str, None]: """ Stream chat responses from the new SurfSense deep agent. This uses the Vercel AI SDK Data Stream Protocol (SSE format) for streaming. The chat_id is used as LangGraph's thread_id for memory/checkpointing. - Message history can be passed from the frontend for context. Args: user_query: The user's query @@ -221,6 +222,7 @@ async def stream_new_chat( user_id: The current user's UUID string (for memory tools and session state) llm_config_id: The LLM configuration ID (default: -1 for first global config) attachments: Optional attachments with extracted content + needs_history_bootstrap: If True, load message history from DB (for cloned chats) mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat checkpoint_id: Optional checkpoint ID to rewind/fork from (for edit/reload operations) @@ -305,9 +307,24 @@ async def stream_new_chat( firecrawl_api_key=firecrawl_api_key, # Pass Firecrawl API key if configured ) - # Build input with message history from frontend + # Build input with message history langchain_messages = [] + # Bootstrap history for cloned chats (no LangGraph checkpoint exists yet) + if needs_history_bootstrap: + langchain_messages = await bootstrap_history_from_db(session, chat_id) + + # Clear the flag so we don't bootstrap again on next message + from app.db import NewChatThread + + thread_result = await session.execute( + select(NewChatThread).filter(NewChatThread.id == chat_id) + ) + thread = thread_result.scalars().first() + if thread: + thread.needs_history_bootstrap = False + await session.commit() + # Fetch mentioned documents if any (with chunks for proper citations) mentioned_documents: list[Document] = [] if mentioned_document_ids: diff --git a/surfsense_backend/app/utils/content_utils.py b/surfsense_backend/app/utils/content_utils.py new file mode 100644 index 000000000..d2342b79e --- /dev/null +++ b/surfsense_backend/app/utils/content_utils.py @@ -0,0 +1,75 @@ +""" +Utilities for working with message content. + +Message content in new_chat_messages can be stored in various formats: +- String: Simple text content +- List: Array of content parts [{"type": "text", "text": "..."}, {"type": "tool-call", ...}] +- Dict: Single content object + +These utilities help extract and transform content for different use cases. +""" + +from langchain_core.messages import AIMessage, HumanMessage +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + + +def extract_text_content(content: str | dict | list) -> str: + """Extract plain text content from various message formats.""" + if isinstance(content, str): + return content + if isinstance(content, dict): + # Handle dict with 'text' key + if "text" in content: + return content["text"] + return str(content) + if isinstance(content, list): + # Handle list of parts (e.g., [{"type": "text", "text": "..."}]) + texts = [] + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + texts.append(part.get("text", "")) + elif isinstance(part, str): + texts.append(part) + return "\n".join(texts) if texts else "" + return "" + + +async def bootstrap_history_from_db( + session: AsyncSession, + thread_id: int, +) -> list[HumanMessage | AIMessage]: + """ + Load message history from database and convert to LangChain format. + + Used for cloned chats where the LangGraph checkpointer has no state, + but we have messages in the database that should be used as context. + + Args: + session: Database session + thread_id: The chat thread ID + + Returns: + List of LangChain messages (HumanMessage/AIMessage) + """ + from app.db import NewChatMessage + + result = await session.execute( + select(NewChatMessage) + .filter(NewChatMessage.thread_id == thread_id) + .order_by(NewChatMessage.created_at) + ) + db_messages = result.scalars().all() + + langchain_messages: list[HumanMessage | AIMessage] = [] + + for msg in db_messages: + text_content = extract_text_content(msg.content) + if not text_content: + continue + if msg.role == "user": + langchain_messages.append(HumanMessage(content=text_content)) + elif msg.role == "assistant": + langchain_messages.append(AIMessage(content=text_content)) + + return langchain_messages From c65cda24d78b35e7efd03db600f260086539b556 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 27 Jan 2026 13:49:46 +0200 Subject: [PATCH 35/69] style: fix formatting issues --- surfsense_backend/app/routes/rbac_routes.py | 4 ++- .../app/tasks/chat/stream_new_chat.py | 2 +- .../new-chat/[[...chat_id]]/page.tsx | 3 +- .../dashboard/[search_space_id]/team/page.tsx | 34 ++++++++----------- .../components/auth/sign-in-button.tsx | 2 +- .../components/new-chat/chat-share-button.tsx | 14 ++++++-- surfsense_web/contracts/types/inbox.types.ts | 8 ++++- 7 files changed, 39 insertions(+), 28 deletions(-) diff --git a/surfsense_backend/app/routes/rbac_routes.py b/surfsense_backend/app/routes/rbac_routes.py index 5070a2724..7d2cc5c77 100644 --- a/surfsense_backend/app/routes/rbac_routes.py +++ b/surfsense_backend/app/routes/rbac_routes.py @@ -123,7 +123,9 @@ async def list_all_permissions( for perm in Permission: # Extract category from permission value (e.g., "documents:read" -> "documents") category = perm.value.split(":")[0] if ":" in perm.value else "general" - description = PERMISSION_DESCRIPTIONS.get(perm.value, f"Permission for {perm.value}") + description = PERMISSION_DESCRIPTIONS.get( + perm.value, f"Permission for {perm.value}" + ) permissions.append( PermissionInfo( diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 875b7e95a..12d7cbd4e 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -18,7 +18,6 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent -from app.utils.content_utils import bootstrap_history_from_db from app.agents.new_chat.checkpointer import get_checkpointer from app.agents.new_chat.llm_config import ( AgentConfig, @@ -35,6 +34,7 @@ from app.services.chat_session_state_service import ( ) from app.services.connector_service import ConnectorService from app.services.new_streaming_service import VercelStreamingService +from app.utils.content_utils import bootstrap_history_from_db def format_attachments_as_context(attachments: list[ChatAttachment]) -> str: diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index b5e63ca80..d025bceab 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -41,12 +41,12 @@ import { useMessagesElectric } from "@/hooks/use-messages-electric"; // import { WriteTodosToolUI } from "@/components/tool-ui/write-todos"; import { getBearerToken } from "@/lib/auth-utils"; import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter"; +import { convertToThreadMessage } from "@/lib/chat/message-utils"; import { isPodcastGenerating, looksLikePodcastRequest, setActivePodcastTaskId, } from "@/lib/chat/podcast-state"; -import { convertToThreadMessage } from "@/lib/chat/message-utils"; import { appendMessage, type ChatVisibility, @@ -111,7 +111,6 @@ function extractMentionedDocuments(content: unknown): MentionedDocumentInfo[] { return []; } - /** * Tools that should render custom UI in the chat. */ diff --git a/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx index 298871cf7..87e4281ae 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx @@ -115,13 +115,13 @@ import type { Membership, UpdateMembershipRequest, } from "@/contracts/types/members.types"; +import type { PermissionInfo } from "@/contracts/types/permissions.types"; import type { CreateRoleRequest, DeleteRoleRequest, Role, UpdateRoleRequest, } from "@/contracts/types/roles.types"; -import type { PermissionInfo } from "@/contracts/types/permissions.types"; import { invitesApiService } from "@/lib/apis/invites-api.service"; import { rolesApiService } from "@/lib/apis/roles-api.service"; import { trackSearchSpaceInviteSent, trackSearchSpaceUsersViewed } from "@/lib/posthog/events"; @@ -980,11 +980,7 @@ function RolesTab({ > {/* Create Role Button / Section */} {canCreate && !showCreateRole && ( - + + + + {isDocked ? "Close inbox" : "Dock inbox"} + + + )}
@@ -858,11 +891,70 @@ export function InboxSidebar({

)} -
- +
+ + ); + + // DOCKED MODE: Render as a static flex child (no animation, no click-away) + if (isDocked && open && !isMobile) { + return ( + + ); + } + + // FLOATING MODE: Render with animation and click-away layer + return ( + + {open && ( + <> + {/* Click-away layer - only covers the content area, not the sidebar */} + onOpenChange(false)} + aria-hidden="true" + /> + + {/* Clip container - positioned at sidebar edge with overflow hidden */} +
+ + {inboxContent} + +
)} -
, - document.body + ); } diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json index 6838b0f52..b11e43bbc 100644 --- a/surfsense_web/messages/zh.json +++ b/surfsense_web/messages/zh.json @@ -686,7 +686,7 @@ "system": "系统", "logout": "退出登录", "inbox": "收件箱", - "search_inbox": "搜索收件箱...", + "search_inbox": "搜索收件箱", "mark_all_read": "全部标记为已读", "mark_as_read": "标记为已读", "mentions": "提及", From a3b6012fb2f74c2b97e9c8c0206d164d6fe036a8 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 27 Jan 2026 19:47:07 +0530 Subject: [PATCH 37/69] feat: podcast and audio UI is now mobile responsive --- surfsense_web/components/tool-ui/audio.tsx | 52 +++++------ .../components/tool-ui/generate-podcast.tsx | 86 +++++++++---------- 2 files changed, 69 insertions(+), 69 deletions(-) diff --git a/surfsense_web/components/tool-ui/audio.tsx b/surfsense_web/components/tool-ui/audio.tsx index 4b7679cd6..24f7734d7 100644 --- a/surfsense_web/components/tool-ui/audio.tsx +++ b/surfsense_web/components/tool-ui/audio.tsx @@ -149,16 +149,16 @@ export function Audio({ id, src, title, description, artwork, durationMs, classN return (
-
- +
+
-
-

{title}

-

{error}

+
+

{title}

+

{error}

); @@ -168,7 +168,7 @@ export function Audio({ id, src, title, description, artwork, durationMs, classN
@@ -177,15 +177,15 @@ export function Audio({ id, src, title, description, artwork, durationMs, classN -
+
{/* Artwork */}
-
+
{artwork ? ( {title} ) : (
- +
)}
@@ -195,14 +195,14 @@ export function Audio({ id, src, title, description, artwork, durationMs, classN
{/* Title and description */}
-

{title}

+

{title}

{description && ( -

{description}

+

{description}

)}
{/* Progress bar */} -
+
-
+
{formatTime(currentTime)} {formatTime(duration)}
@@ -220,33 +220,33 @@ export function Audio({ id, src, title, description, artwork, durationMs, classN
{/* Controls */} -
-
+
+
{/* Play/Pause button */} {/* Volume control */} -
- {/* Custom volume bar - visually distinct from progress slider */} -
+
{/* Download button */} -
diff --git a/surfsense_web/components/tool-ui/generate-podcast.tsx b/surfsense_web/components/tool-ui/generate-podcast.tsx index c76d7ce5a..513853c1a 100644 --- a/surfsense_web/components/tool-ui/generate-podcast.tsx +++ b/surfsense_web/components/tool-ui/generate-podcast.tsx @@ -86,23 +86,23 @@ function parsePodcastDetails(data: unknown): { podcast_transcript?: PodcastTrans */ function PodcastGeneratingState({ title }: { title: string }) { return ( -
-
-
-
- +
+
+
+
+
{/* Animated rings */}
-
-

{title}

-
- - Generating podcast. This may take a few minutes. +
+

{title}

+
+ + Generating podcast. This may take a few minutes.
-
-
+
+
@@ -117,15 +117,15 @@ function PodcastGeneratingState({ title }: { title: string }) { */ function PodcastErrorState({ title, error }: { title: string; error: string }) { return ( -
-
-
- +
+
+
+
-
-

{title}

-

Failed to generate podcast

-

{error}

+
+

{title}

+

Failed to generate podcast

+

{error}

@@ -137,16 +137,16 @@ function PodcastErrorState({ title, error }: { title: string; error: string }) { */ function AudioLoadingState({ title }: { title: string }) { return ( -
-
-
- +
+
+
+
-
-

{title}

-
- - Loading audio... +
+

{title}

+
+ + Loading audio...
@@ -264,13 +264,13 @@ function PodcastPlayer({ /> {/* Transcript section */} {transcript && transcript.length > 0 && ( -
- +
+ View transcript ({transcript.length} entries) -
+
{transcript.map((entry, idx) => ( -
+
Speaker {entry.speaker_id + 1}:{" "} {entry.dialog}
@@ -392,9 +392,9 @@ export const GeneratePodcastToolUI = makeAssistantToolUI< if (status.type === "incomplete") { if (status.reason === "cancelled") { return ( -
-

- +

+

+ Podcast generation cancelled

@@ -424,16 +424,16 @@ export const GeneratePodcastToolUI = makeAssistantToolUI< // The FIRST tool call will display the podcast when ready if (result.status === "already_generating") { return ( -
-
-
- +
+
+
+
-
-

+

+

Podcast already in progress

-

+

Please wait for the current podcast to complete.

From 6d05a13167989f5d5c30947336cd8c933bb61cf2 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 27 Jan 2026 20:59:03 +0530 Subject: [PATCH 38/69] feat(chat): add comments panel collapse functionality and integrate with inbox sidebar --- .../atoms/chat/current-thread.atom.ts | 21 +++++++ .../assistant-ui/assistant-message.tsx | 6 +- .../layout/ui/shell/LayoutShell.tsx | 38 ++++++------- .../layout/ui/sidebar/InboxSidebar.tsx | 57 ++++++++++++++----- 4 files changed, 86 insertions(+), 36 deletions(-) diff --git a/surfsense_web/atoms/chat/current-thread.atom.ts b/surfsense_web/atoms/chat/current-thread.atom.ts index c19b2638c..dea926633 100644 --- a/surfsense_web/atoms/chat/current-thread.atom.ts +++ b/surfsense_web/atoms/chat/current-thread.atom.ts @@ -17,6 +17,8 @@ interface CurrentThreadState { visibility: ChatVisibility | null; hasComments: boolean; addingCommentToMessageId: number | null; + /** Whether the right-side comments panel is collapsed (desktop only) */ + commentsCollapsed: boolean; } const initialState: CurrentThreadState = { @@ -24,6 +26,7 @@ const initialState: CurrentThreadState = { visibility: null, hasComments: false, addingCommentToMessageId: null, + commentsCollapsed: false, }; export const currentThreadAtom = atom(initialState); @@ -34,6 +37,8 @@ export const commentsEnabledAtom = atom( export const showCommentsGutterAtom = atom((get) => { const thread = get(currentThreadAtom); + // Hide gutter if comments are collapsed + if (thread.commentsCollapsed) return false; return ( thread.visibility === "SEARCH_SPACE" && (thread.hasComments || thread.addingCommentToMessageId !== null) @@ -55,3 +60,19 @@ export const setThreadVisibilityAtom = atom(null, (get, set, newVisibility: Chat export const resetCurrentThreadAtom = atom(null, (_, set) => { set(currentThreadAtom, initialState); }); + +/** Atom to read whether comments panel is collapsed */ +export const commentsCollapsedAtom = atom( + (get) => get(currentThreadAtom).commentsCollapsed +); + +/** Atom to toggle the comments collapsed state */ +export const toggleCommentsCollapsedAtom = atom(null, (get, set) => { + const current = get(currentThreadAtom); + set(currentThreadAtom, { ...current, commentsCollapsed: !current.commentsCollapsed }); +}); + +/** Atom to explicitly set the comments collapsed state */ +export const setCommentsCollapsedAtom = atom(null, (get, set, collapsed: boolean) => { + set(currentThreadAtom, { ...get(currentThreadAtom), commentsCollapsed: collapsed }); +}); diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index b3cfc4476..4fb8d8393 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -11,6 +11,7 @@ import type { FC } from "react"; import { useContext, useEffect, useRef, useState } from "react"; import { addingCommentToMessageIdAtom, + commentsCollapsedAtom, commentsEnabledAtom, } from "@/atoms/chat/current-thread.atom"; import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; @@ -102,6 +103,7 @@ export const AssistantMessage: FC = () => { const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom); const dbMessageId = parseMessageId(messageId); const commentsEnabled = useAtomValue(commentsEnabledAtom); + const commentsCollapsed = useAtomValue(commentsCollapsedAtom); const [addingCommentToMessageId, setAddingCommentToMessageId] = useAtom( addingCommentToMessageIdAtom ); @@ -157,8 +159,8 @@ export const AssistantMessage: FC = () => { > - {/* Desktop comment panel - only on lg screens and above */} - {searchSpaceId && commentsEnabled && !isMessageStreaming && ( + {/* Desktop comment panel - only on lg screens and above, hidden when collapsed */} + {searchSpaceId && commentsEnabled && !isMessageStreaming && !commentsCollapsed && (
-
- {children} -
+
+ {children} +
- {/* Mobile Inbox Sidebar */} - {inbox && ( - setMobileMenuOpen(false)} - /> - )} + {/* Mobile Inbox Sidebar - only render when open to avoid scroll blocking */} + {inbox?.isOpen && ( + setMobileMenuOpen(false)} + /> + )}
diff --git a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx index db6d22cba..69ab714d8 100644 --- a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx @@ -7,19 +7,21 @@ import { Check, CheckCheck, CheckCircle2, + ChevronLeft, + ChevronRight, History, Inbox, LayoutGrid, ListFilter, - PanelLeftClose, - PanelLeft, Search, X, } from "lucide-react"; +import { useAtom } from "jotai"; import { AnimatePresence, motion } from "motion/react"; import { useRouter } from "next/navigation"; import { useTranslations } from "next-intl"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { setCommentsCollapsedAtom } from "@/atoms/chat/current-thread.atom"; import { convertRenderedToDisplay } from "@/components/chat-comments/comment-item/comment-item"; import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; import { Button } from "@/components/ui/button"; @@ -171,6 +173,9 @@ export function InboxSidebar({ const router = useRouter(); const isMobile = !useMediaQuery("(min-width: 640px)"); + // Comments collapsed state (desktop only, when docked) + const [, setCommentsCollapsed] = useAtom(setCommentsCollapsedAtom); + const [searchQuery, setSearchQuery] = useState(""); const [activeTab, setActiveTab] = useState("mentions"); const [activeFilter, setActiveFilter] = useState("all"); @@ -199,15 +204,16 @@ export function InboxSidebar({ return () => document.removeEventListener("keydown", handleEscape); }, [open, onOpenChange]); - // Only lock body scroll on mobile (Notion-style keeps desktop content scrollable) + // Only lock body scroll on mobile when inbox is open useEffect(() => { - if (open && isMobile) { - document.body.style.overflow = "hidden"; - } else { - document.body.style.overflow = ""; - } + if (!open || !isMobile) return; + + // Store original overflow to restore on cleanup + const originalOverflow = document.body.style.overflow; + document.body.style.overflow = "hidden"; + return () => { - document.body.style.overflow = ""; + document.body.style.overflow = originalOverflow; }; }, [open, isMobile]); @@ -702,6 +708,25 @@ export function InboxSidebar({ {t("mark_all_read") || "Mark all as read"} + {/* Close button - mobile only */} + {isMobile && ( + + + + + + {t("close") || "Close"} + + + )} {/* Dock/Undock button - desktop only */} {!isMobile && onDockedChange && ( @@ -712,27 +737,29 @@ export function InboxSidebar({ className="h-8 w-8 rounded-full" onClick={() => { if (isDocked) { - // Undocking: close the inbox completely + // Collapse: show comments immediately, then close inbox + setCommentsCollapsed(false); onDockedChange(false); onOpenChange(false); } else { - // Docking: keep open and dock + // Expand: hide comments immediately + setCommentsCollapsed(true); onDockedChange(true); } }} > {isDocked ? ( - + ) : ( - + )} - {isDocked ? "Close inbox" : "Dock inbox"} + {isDocked ? "Collapse panel" : "Expand panel"} - {isDocked ? "Close inbox" : "Dock inbox"} + {isDocked ? "Collapse panel" : "Expand panel"} )} From 6eedce839a757963d47e98b4530f3a63ac3968ca Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 27 Jan 2026 21:13:16 +0530 Subject: [PATCH 39/69] feat(markdown): wrap table component in a responsive container for improved layout and mobile responsive --- .../components/assistant-ui/markdown-text.tsx | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx index 5700fbc2e..dd4ce6b75 100644 --- a/surfsense_web/components/assistant-ui/markdown-text.tsx +++ b/surfsense_web/components/assistant-ui/markdown-text.tsx @@ -252,13 +252,15 @@ const defaultComponents = memoizeMarkdownComponents({
), table: ({ className, ...props }) => ( - +
+
+ ), th: ({ className, children, ...props }) => (
Date: Tue, 27 Jan 2026 17:51:36 +0200 Subject: [PATCH 40/69] feat: add podcast status tracking --- ...py => 82_add_podcast_status_and_thread.py} | 30 ++++- .../app/agents/new_chat/tools/podcast.py | 98 +++++++-------- surfsense_backend/app/db.py | 18 ++- .../app/routes/podcasts_routes.py | 65 +--------- surfsense_backend/app/schemas/podcasts.py | 9 ++ .../app/services/public_chat_service.py | 19 ++- .../app/tasks/celery_tasks/podcast_tasks.py | 119 +++++++++--------- 7 files changed, 165 insertions(+), 193 deletions(-) rename surfsense_backend/alembic/versions/{82_add_thread_id_to_podcasts.py => 82_add_podcast_status_and_thread.py} (51%) diff --git a/surfsense_backend/alembic/versions/82_add_thread_id_to_podcasts.py b/surfsense_backend/alembic/versions/82_add_podcast_status_and_thread.py similarity index 51% rename from surfsense_backend/alembic/versions/82_add_thread_id_to_podcasts.py rename to surfsense_backend/alembic/versions/82_add_podcast_status_and_thread.py index f08fe32d8..fd4eed89f 100644 --- a/surfsense_backend/alembic/versions/82_add_thread_id_to_podcasts.py +++ b/surfsense_backend/alembic/versions/82_add_podcast_status_and_thread.py @@ -1,9 +1,10 @@ -"""Add thread_id to podcasts +"""Add status and thread_id to podcasts Revision ID: 82 Revises: 81 -Create Date: 2026-01-23 +Create Date: 2026-01-27 +Adds status enum and thread_id FK to podcasts. """ from collections.abc import Sequence @@ -17,7 +18,19 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: - """Add thread_id column to podcasts.""" + op.execute( + """ + CREATE TYPE podcast_status AS ENUM ('pending', 'generating', 'ready', 'failed'); + """ + ) + + op.execute( + """ + ALTER TABLE podcasts + ADD COLUMN IF NOT EXISTS status podcast_status NOT NULL DEFAULT 'ready'; + """ + ) + op.execute( """ ALTER TABLE podcasts @@ -33,8 +46,17 @@ def upgrade() -> None: """ ) + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_podcasts_status + ON podcasts(status); + """ + ) + def downgrade() -> None: - """Remove thread_id column from podcasts.""" + op.execute("DROP INDEX IF EXISTS ix_podcasts_status") op.execute("DROP INDEX IF EXISTS ix_podcasts_thread_id") op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS thread_id") + op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS status") + op.execute("DROP TYPE IF EXISTS podcast_status") diff --git a/surfsense_backend/app/agents/new_chat/tools/podcast.py b/surfsense_backend/app/agents/new_chat/tools/podcast.py index d4e023f6f..424b04f77 100644 --- a/surfsense_backend/app/agents/new_chat/tools/podcast.py +++ b/surfsense_backend/app/agents/new_chat/tools/podcast.py @@ -18,6 +18,8 @@ import redis from langchain_core.tools import tool from sqlalchemy.ext.asyncio import AsyncSession +from app.db import Podcast, PodcastStatus + # Redis connection for tracking active podcast tasks # Uses the same Redis instance as Celery REDIS_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") @@ -32,38 +34,27 @@ def get_redis_client() -> redis.Redis: return _redis_client -def get_active_podcast_key(search_space_id: int) -> str: - """Generate Redis key for tracking active podcast task.""" - return f"podcast:active:{search_space_id}" +def _redis_key(search_space_id: int) -> str: + return f"podcast:generating:{search_space_id}" -def get_active_podcast_task(search_space_id: int) -> str | None: - """Check if there's an active podcast task for this search space.""" +def get_generating_podcast_id(search_space_id: int) -> int | None: + """Get the podcast ID currently being generated for this search space.""" try: client = get_redis_client() - return client.get(get_active_podcast_key(search_space_id)) + value = client.get(_redis_key(search_space_id)) + return int(value) if value else None except Exception: - # If Redis is unavailable, allow the request (fail open) return None -def set_active_podcast_task(search_space_id: int, task_id: str) -> None: - """Mark a podcast task as active for this search space.""" +def set_generating_podcast(search_space_id: int, podcast_id: int) -> None: + """Mark a podcast as currently generating for this search space.""" try: client = get_redis_client() - # Set with 30-minute expiry as safety net (podcast should complete before this) - client.setex(get_active_podcast_key(search_space_id), 1800, task_id) + client.setex(_redis_key(search_space_id), 1800, str(podcast_id)) except Exception as e: - print(f"[generate_podcast] Warning: Could not set active task in Redis: {e}") - - -def clear_active_podcast_task(search_space_id: int) -> None: - """Clear the active podcast task for this search space.""" - try: - client = get_redis_client() - client.delete(get_active_podcast_key(search_space_id)) - except Exception as e: - print(f"[generate_podcast] Warning: Could not clear active task in Redis: {e}") + print(f"[generate_podcast] Warning: Could not set generating podcast in Redis: {e}") def create_generate_podcast_tool( @@ -74,9 +65,12 @@ def create_generate_podcast_tool( """ Factory function to create the generate_podcast tool with injected dependencies. + Pre-creates podcast record with pending status so podcast_id is available + immediately for frontend polling. + Args: search_space_id: The user's search space ID - db_session: Database session (not used - Celery creates its own) + db_session: Database session for creating the podcast record thread_id: The chat thread ID for associating the podcast Returns: @@ -100,77 +94,71 @@ def create_generate_podcast_tool( - "Make a podcast about..." - "Turn this into a podcast" - The tool will start generating a podcast in the background. - The podcast will be available once generation completes. - - IMPORTANT: Only one podcast can be generated at a time. If a podcast - is already being generated, this tool will return a message asking - the user to wait. - Args: source_content: The text content to convert into a podcast. - This can be a summary, research findings, or any text - the user wants transformed into an audio podcast. podcast_title: Title for the podcast (default: "SurfSense Podcast") user_prompt: Optional instructions for podcast style, tone, or format. - For example: "Make it casual and fun" or "Focus on the key insights" Returns: A dictionary containing: - - status: "processing" (task submitted), "already_generating", or "error" - - task_id: The Celery task ID for polling status (if processing) + - status: PodcastStatus value (pending, generating, or failed) + - podcast_id: The podcast ID for polling (when status is pending or generating) - title: The podcast title - - message: Status message for the user + - message: Status message (or "error" field if status is failed) """ try: - # Check if a podcast is already being generated for this search space - active_task_id = get_active_podcast_task(search_space_id) - if active_task_id: + generating_podcast_id = get_generating_podcast_id(search_space_id) + if generating_podcast_id: print( - f"[generate_podcast] Blocked duplicate request. Active task: {active_task_id}" + f"[generate_podcast] Blocked duplicate request. Generating podcast: {generating_podcast_id}" ) return { - "status": "already_generating", - "task_id": active_task_id, + "status": PodcastStatus.GENERATING.value, + "podcast_id": generating_podcast_id, "title": podcast_title, - "message": "A podcast is already being generated. Please wait for it to complete before requesting another one.", + "message": "A podcast is already being generated. Please wait for it to complete.", } - # Import Celery task here to avoid circular imports + podcast = Podcast( + title=podcast_title, + status=PodcastStatus.PENDING, + search_space_id=search_space_id, + thread_id=thread_id, + ) + db_session.add(podcast) + await db_session.commit() + await db_session.refresh(podcast) + from app.tasks.celery_tasks.podcast_tasks import ( generate_content_podcast_task, ) - # Submit Celery task for background processing task = generate_content_podcast_task.delay( + podcast_id=podcast.id, source_content=source_content, search_space_id=search_space_id, - podcast_title=podcast_title, user_prompt=user_prompt, - thread_id=thread_id, ) - # Mark this task as active - set_active_podcast_task(search_space_id, task.id) + set_generating_podcast(search_space_id, podcast.id) - print(f"[generate_podcast] Submitted Celery task: {task.id}") + print(f"[generate_podcast] Created podcast {podcast.id}, task: {task.id}") - # Return immediately with task_id for polling return { - "status": "processing", - "task_id": task.id, + "status": PodcastStatus.PENDING.value, + "podcast_id": podcast.id, "title": podcast_title, "message": "Podcast generation started. This may take a few minutes.", } except Exception as e: error_message = str(e) - print(f"[generate_podcast] Error submitting task: {error_message}") + print(f"[generate_podcast] Error: {error_message}") return { - "status": "error", + "status": PodcastStatus.FAILED.value, "error": error_message, "title": podcast_title, - "task_id": None, + "podcast_id": None, } return generate_podcast diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 0182d2c53..41962b769 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -93,6 +93,13 @@ class SearchSourceConnectorType(str, Enum): COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" +class PodcastStatus(str, Enum): + PENDING = "pending" + GENERATING = "generating" + READY = "ready" + FAILED = "failed" + + class LiteLLMProvider(str, Enum): """ Enum for LLM providers supported by LiteLLM. @@ -743,8 +750,15 @@ class Podcast(BaseModel, TimestampMixin): __tablename__ = "podcasts" title = Column(String(500), nullable=False) - podcast_transcript = Column(JSONB, nullable=True) # List of transcript entries - file_location = Column(Text, nullable=True) # Path to the audio file + podcast_transcript = Column(JSONB, nullable=True) + file_location = Column(Text, nullable=True) + status = Column( + SQLAlchemyEnum(PodcastStatus, name="podcast_status", create_type=False), + nullable=False, + default=PodcastStatus.READY, + server_default="ready", + index=True, + ) search_space_id = Column( Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False diff --git a/surfsense_backend/app/routes/podcasts_routes.py b/surfsense_backend/app/routes/podcasts_routes.py index 27970b707..041dd80ee 100644 --- a/surfsense_backend/app/routes/podcasts_routes.py +++ b/surfsense_backend/app/routes/podcasts_routes.py @@ -1,21 +1,19 @@ """ -Podcast routes for task status polling and audio retrieval. +Podcast routes for CRUD operations and audio streaming. These routes support the podcast generation feature in new-chat. -Note: The old Chat-based podcast generation has been removed. +Frontend polls GET /podcasts/{podcast_id} to check status field. """ import os from pathlib import Path -from celery.result import AsyncResult from fastapi import APIRouter, Depends, HTTPException from fastapi.responses import StreamingResponse from sqlalchemy import select from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession -from app.celery_app import celery_app from app.db import ( Permission, Podcast, @@ -228,62 +226,3 @@ async def stream_podcast( raise HTTPException( status_code=500, detail=f"Error streaming podcast: {e!s}" ) from e - - -@router.get("/podcasts/task/{task_id}/status") -async def get_podcast_task_status( - task_id: str, - user: User = Depends(current_active_user), -): - """ - Get the status of a podcast generation task. - Used by new-chat frontend to poll for completion. - - Returns: - - status: "processing" | "success" | "error" - - podcast_id: (only if status == "success") - - title: (only if status == "success") - - error: (only if status == "error") - """ - try: - result = AsyncResult(task_id, app=celery_app) - - if result.ready(): - # Task completed - if result.successful(): - task_result = result.result - if isinstance(task_result, dict): - if task_result.get("status") == "success": - return { - "status": "success", - "podcast_id": task_result.get("podcast_id"), - "title": task_result.get("title"), - "transcript_entries": task_result.get("transcript_entries"), - } - else: - return { - "status": "error", - "error": task_result.get("error", "Unknown error"), - } - else: - return { - "status": "error", - "error": "Unexpected task result format", - } - else: - # Task failed - return { - "status": "error", - "error": str(result.result) if result.result else "Task failed", - } - else: - # Task still processing - return { - "status": "processing", - "state": result.state, - } - - except Exception as e: - raise HTTPException( - status_code=500, detail=f"Error checking task status: {e!s}" - ) from e diff --git a/surfsense_backend/app/schemas/podcasts.py b/surfsense_backend/app/schemas/podcasts.py index 72c915d88..ad77c27f8 100644 --- a/surfsense_backend/app/schemas/podcasts.py +++ b/surfsense_backend/app/schemas/podcasts.py @@ -1,11 +1,19 @@ """Podcast schemas for API responses.""" from datetime import datetime +from enum import Enum from typing import Any from pydantic import BaseModel +class PodcastStatusEnum(str, Enum): + PENDING = "pending" + GENERATING = "generating" + READY = "ready" + FAILED = "failed" + + class PodcastBase(BaseModel): """Base podcast schema.""" @@ -33,6 +41,7 @@ class PodcastRead(PodcastBase): """Schema for reading a podcast.""" id: int + status: PodcastStatusEnum = PodcastStatusEnum.READY created_at: datetime class Config: diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index 7c3b89af9..1dcc97a11 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -40,7 +40,10 @@ def strip_citations(text: str) -> str: def sanitize_content_for_public(content: list | str | None) -> list: - """Filter message content for public view.""" + """ + Filter message content for public view. + Strips citations and filters to UI-relevant tools. + """ if content is None: return [] @@ -67,13 +70,6 @@ def sanitize_content_for_public(content: list | str | None) -> list: tool_name = part.get("toolName") if tool_name not in UI_TOOLS: continue - - # Skip podcasts that are still processing (would cause auth errors) - if tool_name == "generate_podcast": - result = part.get("result", {}) - if result.get("status") in ("processing", "already_generating"): - continue - sanitized.append(part) return sanitized @@ -355,16 +351,16 @@ async def _clone_podcast( target_search_space_id: int, target_thread_id: int, ) -> int | None: - """Clone a podcast record and its audio file.""" + """Clone a podcast record and its audio file. Only clones ready podcasts.""" import shutil import uuid from pathlib import Path - from app.db import Podcast + from app.db import Podcast, PodcastStatus result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) original = result.scalars().first() - if not original: + if not original or original.status != PodcastStatus.READY: return None new_file_path = None @@ -381,6 +377,7 @@ async def _clone_podcast( title=original.title, podcast_transcript=original.podcast_transcript, file_location=new_file_path, + status=PodcastStatus.READY, search_space_id=target_search_space_id, thread_id=target_thread_id, ) diff --git a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py index 862234b46..0ce714cdc 100644 --- a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py @@ -4,15 +4,15 @@ import asyncio import logging import sys +from sqlalchemy import select from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine from sqlalchemy.pool import NullPool -# Import for content-based podcast (new-chat) from app.agents.podcaster.graph import graph as podcaster_graph from app.agents.podcaster.state import State as PodcasterState from app.celery_app import celery_app from app.config import config -from app.db import Podcast +from app.db import Podcast, PodcastStatus logger = logging.getLogger(__name__) @@ -44,8 +44,8 @@ def get_celery_session_maker(): # ============================================================================= -def _clear_active_podcast_redis_key(search_space_id: int) -> None: - """Clear the active podcast task key from Redis when task completes.""" +def _clear_generating_podcast(search_space_id: int) -> None: + """Clear the generating podcast marker from Redis when task completes.""" import os import redis @@ -53,36 +53,24 @@ def _clear_active_podcast_redis_key(search_space_id: int) -> None: try: redis_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") client = redis.from_url(redis_url, decode_responses=True) - key = f"podcast:active:{search_space_id}" + key = f"podcast:generating:{search_space_id}" client.delete(key) - logger.info(f"Cleared active podcast key for search_space_id={search_space_id}") + logger.info(f"Cleared generating podcast key for search_space_id={search_space_id}") except Exception as e: - logger.warning(f"Could not clear active podcast key: {e}") + logger.warning(f"Could not clear generating podcast key: {e}") @celery_app.task(name="generate_content_podcast", bind=True) def generate_content_podcast_task( self, + podcast_id: int, source_content: str, search_space_id: int, - podcast_title: str = "SurfSense Podcast", user_prompt: str | None = None, - thread_id: int | None = None, ) -> dict: """ - Celery task to generate podcast from source content (for new-chat). - - This task generates a podcast directly from provided content. - - Args: - source_content: The text content to convert into a podcast - search_space_id: ID of the search space - podcast_title: Title for the podcast - user_prompt: Optional instructions for podcast style/tone - thread_id: Optional ID of the chat thread that generated this podcast - - Returns: - dict with podcast_id on success, or error info on failure + Celery task to generate podcast from source content. + Updates existing podcast record created by the tool. """ loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) @@ -90,58 +78,79 @@ def generate_content_podcast_task( try: result = loop.run_until_complete( _generate_content_podcast( + podcast_id, source_content, search_space_id, - podcast_title, user_prompt, - thread_id, ) ) loop.run_until_complete(loop.shutdown_asyncgens()) return result except Exception as e: logger.error(f"Error generating content podcast: {e!s}") - return {"status": "error", "error": str(e)} + loop.run_until_complete(_mark_podcast_failed(podcast_id)) + return {"status": "failed", "podcast_id": podcast_id} finally: - # Always clear the active podcast key when task completes (success or failure) - _clear_active_podcast_redis_key(search_space_id) + _clear_generating_podcast(search_space_id) asyncio.set_event_loop(None) loop.close() -async def _generate_content_podcast( - source_content: str, - search_space_id: int, - podcast_title: str = "SurfSense Podcast", - user_prompt: str | None = None, - thread_id: int | None = None, -) -> dict: - """Generate content-based podcast with new session.""" +async def _mark_podcast_failed(podcast_id: int) -> None: + """Mark a podcast as failed in the database.""" async with get_celery_session_maker()() as session: try: - # Configure the podcaster graph + result = await session.execute( + select(Podcast).filter(Podcast.id == podcast_id) + ) + podcast = result.scalars().first() + if podcast: + podcast.status = PodcastStatus.FAILED + await session.commit() + except Exception as e: + logger.error(f"Failed to mark podcast as failed: {e}") + + +async def _generate_content_podcast( + podcast_id: int, + source_content: str, + search_space_id: int, + user_prompt: str | None = None, +) -> dict: + """Generate content-based podcast and update existing record.""" + async with get_celery_session_maker()() as session: + result = await session.execute( + select(Podcast).filter(Podcast.id == podcast_id) + ) + podcast = result.scalars().first() + + if not podcast: + raise ValueError(f"Podcast {podcast_id} not found") + + try: + podcast.status = PodcastStatus.GENERATING + await session.commit() + graph_config = { "configurable": { - "podcast_title": podcast_title, + "podcast_title": podcast.title, "search_space_id": search_space_id, "user_prompt": user_prompt, } } - # Initialize the podcaster state with the source content initial_state = PodcasterState( source_content=source_content, db_session=session, ) - # Run the podcaster graph - result = await podcaster_graph.ainvoke(initial_state, config=graph_config) + graph_result = await podcaster_graph.ainvoke( + initial_state, config=graph_config + ) - # Extract results - podcast_transcript = result.get("podcast_transcript", []) - file_path = result.get("final_podcast_file_path", "") + podcast_transcript = graph_result.get("podcast_transcript", []) + file_path = graph_result.get("final_podcast_file_path", "") - # Convert transcript to serializable format serializable_transcript = [] for entry in podcast_transcript: if hasattr(entry, "speaker_id"): @@ -156,28 +165,22 @@ async def _generate_content_podcast( } ) - # Save podcast to database - podcast = Podcast( - title=podcast_title, - podcast_transcript=serializable_transcript, - file_location=file_path, - search_space_id=search_space_id, - thread_id=thread_id, - ) - session.add(podcast) + podcast.podcast_transcript = serializable_transcript + podcast.file_location = file_path + podcast.status = PodcastStatus.READY await session.commit() - await session.refresh(podcast) - logger.info(f"Successfully generated content podcast: {podcast.id}") + logger.info(f"Successfully generated podcast: {podcast.id}") return { - "status": "success", + "status": "ready", "podcast_id": podcast.id, - "title": podcast_title, + "title": podcast.title, "transcript_entries": len(serializable_transcript), } except Exception as e: logger.error(f"Error in _generate_content_podcast: {e!s}") - await session.rollback() + podcast.status = PodcastStatus.FAILED + await session.commit() raise From 72c421eeb1fe270a551ff0defdd9f18f135facd6 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 27 Jan 2026 22:14:02 +0530 Subject: [PATCH 41/69] feat(chat): implement target comment navigation and highlight functionality in chat components --- .../new-chat/[[...chat_id]]/page.tsx | 49 ++++++++----------- .../atoms/chat/current-thread.atom.ts | 17 +++++++ .../assistant-ui/assistant-message.tsx | 38 ++++++++++++-- .../comment-item/comment-item.tsx | 46 ++++++++++++++++- .../layout/ui/sidebar/InboxSidebar.tsx | 12 ++++- 5 files changed, 128 insertions(+), 34 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 4509a44a7..1a00873a5 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -13,7 +13,11 @@ import { useTranslations } from "next-intl"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { toast } from "sonner"; import { z } from "zod"; -import { currentThreadAtom } from "@/atoms/chat/current-thread.atom"; +import { + clearTargetCommentIdAtom, + currentThreadAtom, + setTargetCommentIdAtom, +} from "@/atoms/chat/current-thread.atom"; import { type MentionedDocumentInfo, mentionedDocumentIdsAtom, @@ -261,6 +265,8 @@ export default function NewChatPage() { const setMessageDocumentsMap = useSetAtom(messageDocumentsMapAtom); const hydratePlanState = useSetAtom(hydratePlanStateAtom); const setCurrentThreadState = useSetAtom(currentThreadAtom); + const setTargetCommentId = useSetAtom(setTargetCommentIdAtom); + const clearTargetCommentId = useSetAtom(clearTargetCommentIdAtom); // Get current user for author info in shared chats const { data: currentUser } = useAtomValue(currentUserAtom); @@ -424,44 +430,31 @@ export default function NewChatPage() { // Handle scroll to comment from URL query params (e.g., from inbox item click) const searchParams = useSearchParams(); - const targetCommentId = searchParams.get("commentId"); + const targetCommentIdParam = searchParams.get("commentId"); + // Set target comment ID from URL param - the AssistantMessage and CommentItem + // components will handle scrolling and highlighting once comments are loaded useEffect(() => { - if (!targetCommentId || isInitializing || messages.length === 0) return; - - const tryScroll = () => { - const el = document.querySelector(`[data-comment-id="${targetCommentId}"]`); - if (el) { - el.scrollIntoView({ behavior: "smooth", block: "center" }); - return true; + if (targetCommentIdParam && !isInitializing) { + const commentId = Number.parseInt(targetCommentIdParam, 10); + if (!Number.isNaN(commentId)) { + setTargetCommentId(commentId); } - return false; - }; + } - // Try immediately - if (tryScroll()) return; - - // Retry every 200ms for up to 10 seconds - const intervalId = setInterval(() => { - if (tryScroll()) clearInterval(intervalId); - }, 200); - - const timeoutId = setTimeout(() => clearInterval(intervalId), 10000); - - return () => { - clearInterval(intervalId); - clearTimeout(timeoutId); - }; - }, [targetCommentId, isInitializing, messages.length]); + // Cleanup on unmount or when navigating away + return () => clearTargetCommentId(); + }, [targetCommentIdParam, isInitializing, setTargetCommentId, clearTargetCommentId]); // Sync current thread state to atom useEffect(() => { - setCurrentThreadState({ + setCurrentThreadState((prev) => ({ + ...prev, id: currentThread?.id ?? null, visibility: currentThread?.visibility ?? null, hasComments: currentThread?.has_comments ?? false, addingCommentToMessageId: null, - }); + })); }, [currentThread, setCurrentThreadState]); // Cancel ongoing request diff --git a/surfsense_web/atoms/chat/current-thread.atom.ts b/surfsense_web/atoms/chat/current-thread.atom.ts index dea926633..18afb8ff3 100644 --- a/surfsense_web/atoms/chat/current-thread.atom.ts +++ b/surfsense_web/atoms/chat/current-thread.atom.ts @@ -76,3 +76,20 @@ export const toggleCommentsCollapsedAtom = atom(null, (get, set) => { export const setCommentsCollapsedAtom = atom(null, (get, set, collapsed: boolean) => { set(currentThreadAtom, { ...get(currentThreadAtom), commentsCollapsed: collapsed }); }); + +/** Target comment ID to scroll to (from URL navigation or inbox click) */ +export const targetCommentIdAtom = atom(null); + +/** Setter for target comment ID - also ensures comments are not collapsed */ +export const setTargetCommentIdAtom = atom(null, (get, set, commentId: number | null) => { + // Ensure comments are not collapsed when navigating to a comment + if (commentId !== null) { + set(currentThreadAtom, { ...get(currentThreadAtom), commentsCollapsed: false }); + } + set(targetCommentIdAtom, commentId); +}); + +/** Clear target after navigation completes */ +export const clearTargetCommentIdAtom = atom(null, (_, set) => { + set(targetCommentIdAtom, null); +}); diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index 4fb8d8393..4fd2446c3 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -5,14 +5,16 @@ import { MessagePrimitive, useAssistantState, } from "@assistant-ui/react"; -import { useAtom, useAtomValue } from "jotai"; +import { useAtom, useAtomValue, useSetAtom } from "jotai"; import { CheckIcon, CopyIcon, DownloadIcon, MessageSquare, RefreshCwIcon } from "lucide-react"; import type { FC } from "react"; -import { useContext, useEffect, useRef, useState } from "react"; +import { useContext, useEffect, useMemo, useRef, useState } from "react"; import { addingCommentToMessageIdAtom, + clearTargetCommentIdAtom, commentsCollapsedAtom, commentsEnabledAtom, + targetCommentIdAtom, } from "@/atoms/chat/current-thread.atom"; import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; import { BranchPicker } from "@/components/assistant-ui/branch-picker"; @@ -117,11 +119,23 @@ export const AssistantMessage: FC = () => { const isLastMessage = useAssistantState(({ message }) => message?.isLast ?? false); const isMessageStreaming = isThreadRunning && isLastMessage; - const { data: commentsData } = useComments({ + const { data: commentsData, isSuccess: commentsLoaded } = useComments({ messageId: dbMessageId ?? 0, enabled: !!dbMessageId, }); + // Target comment navigation - read target from global atom + const targetCommentId = useAtomValue(targetCommentIdAtom); + const clearTargetCommentId = useSetAtom(clearTargetCommentIdAtom); + + // Check if target comment belongs to this message (including replies) + const hasTargetComment = useMemo(() => { + if (!targetCommentId || !commentsData?.comments) return false; + return commentsData.comments.some( + (c) => c.id === targetCommentId || c.replies?.some((r) => r.id === targetCommentId) + ); + }, [targetCommentId, commentsData]); + const commentCount = commentsData?.total_count ?? 0; const hasComments = commentCount > 0; const isAddingComment = dbMessageId !== null && addingCommentToMessageId === dbMessageId; @@ -146,6 +160,24 @@ export const AssistantMessage: FC = () => { return () => observer.disconnect(); }, []); + // Auto-open sheet on mobile/tablet when this message has the target comment + useEffect(() => { + if (hasTargetComment && !isDesktop && commentsLoaded) { + setIsSheetOpen(true); + } + }, [hasTargetComment, isDesktop, commentsLoaded]); + + // Scroll message into view when it contains target comment (desktop) + useEffect(() => { + if (hasTargetComment && isDesktop && commentsLoaded && messageRef.current) { + // Small delay to ensure DOM is ready after comments render + const timeoutId = setTimeout(() => { + messageRef.current?.scrollIntoView({ behavior: "smooth", block: "center" }); + }, 100); + return () => clearTimeout(timeoutId); + } + }, [hasTargetComment, isDesktop, commentsLoaded]); + const showCommentTrigger = searchSpaceId && commentsEnabled && !isMessageStreaming && dbMessageId; // Determine sheet side based on screen size diff --git a/surfsense_web/components/chat-comments/comment-item/comment-item.tsx b/surfsense_web/components/chat-comments/comment-item/comment-item.tsx index 8b2503e8c..847886fc8 100644 --- a/surfsense_web/components/chat-comments/comment-item/comment-item.tsx +++ b/surfsense_web/components/chat-comments/comment-item/comment-item.tsx @@ -1,6 +1,12 @@ "use client"; +import { useAtomValue, useSetAtom } from "jotai"; import { MessageSquare } from "lucide-react"; +import { useEffect, useRef, useState } from "react"; +import { + clearTargetCommentIdAtom, + targetCommentIdAtom, +} from "@/atoms/chat/current-thread.atom"; import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; import { Button } from "@/components/ui/button"; import { cn } from "@/lib/utils"; @@ -113,6 +119,37 @@ export function CommentItem({ members = [], membersLoading = false, }: CommentItemProps) { + const commentRef = useRef(null); + const [isHighlighted, setIsHighlighted] = useState(false); + + // Target comment navigation + const targetCommentId = useAtomValue(targetCommentIdAtom); + const clearTargetCommentId = useSetAtom(clearTargetCommentIdAtom); + + const isTarget = targetCommentId === comment.id; + + // Scroll into view and highlight when this is the target comment + useEffect(() => { + if (isTarget && commentRef.current) { + // Small delay to ensure DOM is ready + const scrollTimeoutId = setTimeout(() => { + commentRef.current?.scrollIntoView({ behavior: "smooth", block: "center" }); + setIsHighlighted(true); + }, 150); + + // Remove highlight and clear target after delay + const clearTimeoutId = setTimeout(() => { + setIsHighlighted(false); + clearTargetCommentId(); + }, 3000); + + return () => { + clearTimeout(scrollTimeoutId); + clearTimeout(clearTimeoutId); + }; + } + }, [isTarget, clearTargetCommentId]); + const displayName = comment.author?.displayName || comment.author?.email.split("@")[0] || "Unknown"; const email = comment.author?.email || ""; @@ -122,7 +159,14 @@ export function CommentItem({ }; return ( -
+
{comment.author?.avatarUrl && ( diff --git a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx index 69ab714d8..1a488c6b0 100644 --- a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx @@ -21,7 +21,7 @@ import { AnimatePresence, motion } from "motion/react"; import { useRouter } from "next/navigation"; import { useTranslations } from "next-intl"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; -import { setCommentsCollapsedAtom } from "@/atoms/chat/current-thread.atom"; +import { setCommentsCollapsedAtom, setTargetCommentIdAtom } from "@/atoms/chat/current-thread.atom"; import { convertRenderedToDisplay } from "@/components/chat-comments/comment-item/comment-item"; import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; import { Button } from "@/components/ui/button"; @@ -175,6 +175,8 @@ export function InboxSidebar({ // Comments collapsed state (desktop only, when docked) const [, setCommentsCollapsed] = useAtom(setCommentsCollapsedAtom); + // Target comment for navigation - also ensures comments panel is visible + const [, setTargetCommentId] = useAtom(setTargetCommentIdAtom); const [searchQuery, setSearchQuery] = useState(""); const [activeTab, setActiveTab] = useState("mentions"); @@ -346,6 +348,12 @@ export function InboxSidebar({ const commentId = item.metadata.comment_id; if (searchSpaceId && threadId) { + // Pre-set target comment ID before navigation + // This also ensures comments panel is not collapsed + if (commentId) { + setTargetCommentId(commentId); + } + const url = commentId ? `/dashboard/${searchSpaceId}/new-chat/${threadId}?commentId=${commentId}` : `/dashboard/${searchSpaceId}/new-chat/${threadId}`; @@ -356,7 +364,7 @@ export function InboxSidebar({ } } }, - [markAsRead, router, onOpenChange, onCloseMobileSidebar] + [markAsRead, router, onOpenChange, onCloseMobileSidebar, setTargetCommentId] ); const handleMarkAllAsRead = useCallback(async () => { From 0fbf5d5bdd1f844cf7c3949f5258607f4b5130f9 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 27 Jan 2026 19:07:46 +0200 Subject: [PATCH 42/69] fix: podcast status enum serialization and frontend polling --- surfsense_backend/app/db.py | 7 +- .../app/routes/podcasts_routes.py | 2 +- surfsense_backend/app/schemas/podcasts.py | 16 ++ .../new-chat/[[...chat_id]]/page.tsx | 8 +- .../components/tool-ui/generate-podcast.tsx | 143 +++++++++++------- 5 files changed, 116 insertions(+), 60 deletions(-) diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 41962b769..5a74cddeb 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -753,7 +753,12 @@ class Podcast(BaseModel, TimestampMixin): podcast_transcript = Column(JSONB, nullable=True) file_location = Column(Text, nullable=True) status = Column( - SQLAlchemyEnum(PodcastStatus, name="podcast_status", create_type=False), + SQLAlchemyEnum( + PodcastStatus, + name="podcast_status", + create_type=False, + values_callable=lambda x: [e.value for e in x], + ), nullable=False, default=PodcastStatus.READY, server_default="ready", diff --git a/surfsense_backend/app/routes/podcasts_routes.py b/surfsense_backend/app/routes/podcasts_routes.py index 041dd80ee..fa8326096 100644 --- a/surfsense_backend/app/routes/podcasts_routes.py +++ b/surfsense_backend/app/routes/podcasts_routes.py @@ -116,7 +116,7 @@ async def read_podcast( "You don't have permission to read podcasts in this search space", ) - return podcast + return PodcastRead.from_orm_with_entries(podcast) except HTTPException as he: raise he except SQLAlchemyError: diff --git a/surfsense_backend/app/schemas/podcasts.py b/surfsense_backend/app/schemas/podcasts.py index ad77c27f8..9e5cb0262 100644 --- a/surfsense_backend/app/schemas/podcasts.py +++ b/surfsense_backend/app/schemas/podcasts.py @@ -43,6 +43,22 @@ class PodcastRead(PodcastBase): id: int status: PodcastStatusEnum = PodcastStatusEnum.READY created_at: datetime + transcript_entries: int | None = None class Config: from_attributes = True + + @classmethod + def from_orm_with_entries(cls, obj): + """Create PodcastRead with transcript_entries computed.""" + data = { + "id": obj.id, + "title": obj.title, + "podcast_transcript": obj.podcast_transcript, + "file_location": obj.file_location, + "search_space_id": obj.search_space_id, + "status": obj.status, + "created_at": obj.created_at, + "transcript_entries": len(obj.podcast_transcript) if obj.podcast_transcript else None, + } + return cls(**data) diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index d025bceab..33ec64696 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -790,13 +790,13 @@ export default function NewChatPage() { // Update the tool call with its result updateToolCall(parsed.toolCallId, { result: parsed.output }); // Handle podcast-specific logic - if (parsed.output?.status === "processing" && parsed.output?.task_id) { + if (parsed.output?.status === "pending" && parsed.output?.podcast_id) { // Check if this is a podcast tool by looking at the content part const idx = toolCallIndices.get(parsed.toolCallId); if (idx !== undefined) { const part = contentParts[idx]; if (part?.type === "tool-call" && part.toolName === "generate_podcast") { - setActivePodcastTaskId(parsed.output.task_id); + setActivePodcastTaskId(String(parsed.output.podcast_id)); } } } @@ -1210,12 +1210,12 @@ export default function NewChatPage() { case "tool-output-available": updateToolCall(parsed.toolCallId, { result: parsed.output }); - if (parsed.output?.status === "processing" && parsed.output?.task_id) { + if (parsed.output?.status === "pending" && parsed.output?.podcast_id) { const idx = toolCallIndices.get(parsed.toolCallId); if (idx !== undefined) { const part = contentParts[idx]; if (part?.type === "tool-call" && part.toolName === "generate_podcast") { - setActivePodcastTaskId(parsed.output.task_id); + setActivePodcastTaskId(String(parsed.output.podcast_id)); } } } diff --git a/surfsense_web/components/tool-ui/generate-podcast.tsx b/surfsense_web/components/tool-ui/generate-podcast.tsx index c76d7ce5a..67eabbc90 100644 --- a/surfsense_web/components/tool-ui/generate-podcast.tsx +++ b/surfsense_web/components/tool-ui/generate-podcast.tsx @@ -20,21 +20,31 @@ const GeneratePodcastArgsSchema = z.object({ }); const GeneratePodcastResultSchema = z.object({ - status: z.enum(["processing", "already_generating", "success", "error"]), - task_id: z.string().nullish(), + // Support both old and new status values for backwards compatibility + status: z.enum([ + "pending", + "generating", + "ready", + "failed", + // Legacy values from old saved chats + "processing", + "already_generating", + "success", + "error", + ]), podcast_id: z.number().nullish(), + task_id: z.string().nullish(), // Legacy field for old saved chats title: z.string().nullish(), transcript_entries: z.number().nullish(), message: z.string().nullish(), error: z.string().nullish(), }); -const TaskStatusResponseSchema = z.object({ - status: z.enum(["processing", "success", "error"]), - podcast_id: z.number().nullish(), - title: z.string().nullish(), +const PodcastStatusResponseSchema = z.object({ + status: z.enum(["pending", "generating", "ready", "failed"]), + id: z.number(), + title: z.string(), transcript_entries: z.number().nullish(), - state: z.string().nullish(), error: z.string().nullish(), }); @@ -52,17 +62,17 @@ const PodcastDetailsSchema = z.object({ */ type GeneratePodcastArgs = z.infer; type GeneratePodcastResult = z.infer; -type TaskStatusResponse = z.infer; +type PodcastStatusResponse = z.infer; type PodcastTranscriptEntry = z.infer; /** - * Parse and validate task status response + * Parse and validate podcast status response */ -function parseTaskStatusResponse(data: unknown): TaskStatusResponse { - const result = TaskStatusResponseSchema.safeParse(data); +function parsePodcastStatusResponse(data: unknown): PodcastStatusResponse | null { + const result = PodcastStatusResponseSchema.safeParse(data); if (!result.success) { - console.warn("Invalid task status response:", result.error.issues); - return { status: "error", error: "Invalid response from server" }; + console.warn("Invalid podcast status response:", result.error.issues); + return null; } return result.data; } @@ -283,44 +293,42 @@ function PodcastPlayer({ } /** - * Polling component that checks task status and shows player when complete + * Polling component that checks podcast status and shows player when ready */ -function PodcastTaskPoller({ taskId, title }: { taskId: string; title: string }) { - const [taskStatus, setTaskStatus] = useState({ status: "processing" }); +function PodcastStatusPoller({ podcastId, title }: { podcastId: number; title: string }) { + const [podcastStatus, setPodcastStatus] = useState(null); const pollingRef = useRef(null); // Set active podcast state when this component mounts useEffect(() => { - setActivePodcastTaskId(taskId); + setActivePodcastTaskId(String(podcastId)); // Clear when component unmounts return () => { - // Only clear if this task is still the active one clearActivePodcastTaskId(); }; - }, [taskId]); + }, [podcastId]); - // Poll for task status + // Poll for podcast status useEffect(() => { const pollStatus = async () => { try { - const rawResponse = await baseApiService.get( - `/api/v1/podcasts/task/${taskId}/status` - ); - const response = parseTaskStatusResponse(rawResponse); - setTaskStatus(response); + const rawResponse = await baseApiService.get(`/api/v1/podcasts/${podcastId}`); + const response = parsePodcastStatusResponse(rawResponse); + if (response) { + setPodcastStatus(response); - // Stop polling if task is complete or errored - if (response.status !== "processing") { - if (pollingRef.current) { - clearInterval(pollingRef.current); - pollingRef.current = null; + // Stop polling if podcast is ready or failed + if (response.status === "ready" || response.status === "failed") { + if (pollingRef.current) { + clearInterval(pollingRef.current); + pollingRef.current = null; + } + clearActivePodcastTaskId(); } - // Clear the active podcast state when task completes - clearActivePodcastTaskId(); } } catch (err) { - console.error("Error polling task status:", err); + console.error("Error polling podcast status:", err); // Don't stop polling on network errors, continue polling } }; @@ -336,27 +344,31 @@ function PodcastTaskPoller({ taskId, title }: { taskId: string; title: string }) clearInterval(pollingRef.current); } }; - }, [taskId]); + }, [podcastId]); - // Show loading state while processing - if (taskStatus.status === "processing") { + // Show loading state while pending or generating + if ( + !podcastStatus || + podcastStatus.status === "pending" || + podcastStatus.status === "generating" + ) { return ; } // Show error state - if (taskStatus.status === "error") { - return ; + if (podcastStatus.status === "failed") { + return ; } - // Show player when complete - if (taskStatus.status === "success" && taskStatus.podcast_id) { + // Show player when ready + if (podcastStatus.status === "ready") { return ( @@ -415,14 +427,15 @@ export const GeneratePodcastToolUI = makeAssistantToolUI< return ; } - // Error result - if (result.status === "error") { - return ; + // Failed result (new: "failed", legacy: "error") + if (result.status === "failed" || result.status === "error") { + return ; } // Already generating - show simple warning, don't create another poller // The FIRST tool call will display the podcast when ready - if (result.status === "already_generating") { + // (new: "generating", legacy: "already_generating") + if (result.status === "generating" || result.status === "already_generating") { return (
@@ -442,13 +455,13 @@ export const GeneratePodcastToolUI = makeAssistantToolUI< ); } - // Processing - poll for completion - if (result.status === "processing" && result.task_id) { - return ; + // Pending - poll for completion (new: "pending" with podcast_id) + if (result.status === "pending" && result.podcast_id) { + return ; } - // Success with podcast_id (direct result, not via polling) - if (result.status === "success" && result.podcast_id) { + // Ready with podcast_id (new: "ready", legacy: "success") + if ((result.status === "ready" || result.status === "success") && result.podcast_id) { return ( +
+
+ +
+
+

+ This podcast was generated with an older version and cannot be displayed. +

+

+ Please generate a new podcast to listen. +

+
+
+
+ ); + } + // Fallback - missing required data - return ; + return ; }, }); From 8e556d8b8063b91795da79bc57fbad89bb133dcd Mon Sep 17 00:00:00 2001 From: Eric Lammertsma Date: Tue, 27 Jan 2026 13:23:45 -0500 Subject: [PATCH 43/69] feat: Implemented hero A/B test, added Contact Sales button and fixed PH toolbar compatibility --- .../components/homepage/hero-section.tsx | 51 ++++++++++++++----- .../components/providers/PostHogProvider.tsx | 5 +- surfsense_web/instrumentation-client.ts | 12 +++++ surfsense_web/types/window.d.ts | 9 ++++ 4 files changed, 61 insertions(+), 16 deletions(-) create mode 100644 surfsense_web/types/window.d.ts diff --git a/surfsense_web/components/homepage/hero-section.tsx b/surfsense_web/components/homepage/hero-section.tsx index 7ccdd850c..64e5eb4bd 100644 --- a/surfsense_web/components/homepage/hero-section.tsx +++ b/surfsense_web/components/homepage/hero-section.tsx @@ -1,4 +1,5 @@ "use client"; +import { useFeatureFlagVariantKey } from "@posthog/react"; import { AnimatePresence, motion } from "motion/react"; import Image from "next/image"; import Link from "next/link"; @@ -33,6 +34,8 @@ const GoogleLogo = ({ className }: { className?: string }) => ( export function HeroSection() { const containerRef = useRef(null); const parentRef = useRef(null); + const heroVariant = useFeatureFlagVariantKey("notebooklm_flag"); + const isNotebookLMVariant = heroVariant === "notebooklm"; return (
- The AI Workspace{" "} -
-
- Built for Teams + {isNotebookLMVariant ? ( +
+
+ NotebookLM for Teams +
-
+ ) : ( + <> + The AI Workspace{" "} +
+
+ Built for Teams +
+
+ + )} {/* // TODO:aCTUAL DESCRITION */} @@ -96,15 +109,10 @@ export function HeroSection() { Connect any LLM to your internal knowledge sources and chat with it in real time alongside your team.

-
- - {/* - Start Free Trial - */} -
+
+ + +
+ + Contact Sales + + + ); +} + const BackgroundGrids = () => { return (
diff --git a/surfsense_web/components/providers/PostHogProvider.tsx b/surfsense_web/components/providers/PostHogProvider.tsx index 2fcca1f9d..1216730f3 100644 --- a/surfsense_web/components/providers/PostHogProvider.tsx +++ b/surfsense_web/components/providers/PostHogProvider.tsx @@ -3,6 +3,7 @@ import { PostHogProvider as PHProvider } from "@posthog/react"; import posthog from "posthog-js"; import type { ReactNode } from "react"; +import "../../instrumentation-client"; import { PostHogIdentify } from "./PostHogIdentify"; interface PostHogProviderProps { @@ -10,8 +11,8 @@ interface PostHogProviderProps { } export function PostHogProvider({ children }: PostHogProviderProps) { - // posthog-js is already initialized in instrumentation-client.ts - // We just need to wrap the app with the PostHogProvider for hook access + // posthog-js is initialized by importing instrumentation-client.ts above + // We wrap the app with the PostHogProvider for hook access return ( diff --git a/surfsense_web/instrumentation-client.ts b/surfsense_web/instrumentation-client.ts index 15f989bb4..e6b346073 100644 --- a/surfsense_web/instrumentation-client.ts +++ b/surfsense_web/instrumentation-client.ts @@ -12,5 +12,17 @@ if (process.env.NEXT_PUBLIC_POSTHOG_KEY) { capture_pageview: "history_change", // Enable session recording capture_pageleave: true, + loaded: (posthog) => { + // Expose PostHog to window for console access and toolbar + if (typeof window !== "undefined") { + window.posthog = posthog; + } + }, }); } + +// Always expose posthog to window for debugging/toolbar access +// This allows testing feature flags even without POSTHOG_KEY configured +if (typeof window !== "undefined") { + window.posthog = posthog; +} diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts new file mode 100644 index 000000000..fcb6878e3 --- /dev/null +++ b/surfsense_web/types/window.d.ts @@ -0,0 +1,9 @@ +import type { PostHog } from "posthog-js"; + +declare global { + interface Window { + posthog?: PostHog; + } +} + +export {}; From 5690ac09ec70f3ef6cc466305b0b3d975dd0243d Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 27 Jan 2026 23:58:08 +0530 Subject: [PATCH 44/69] feat(translations): add translsation --- surfsense_web/messages/en.json | 3 ++- surfsense_web/messages/zh.json | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json index f14c73ddc..48f32466b 100644 --- a/surfsense_web/messages/en.json +++ b/surfsense_web/messages/en.json @@ -715,7 +715,8 @@ "all": "All", "unread": "Unread", "connectors": "Connectors", - "all_connectors": "All connectors" + "all_connectors": "All connectors", + "close": "Close" }, "errors": { "something_went_wrong": "Something went wrong", diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json index b11e43bbc..051327668 100644 --- a/surfsense_web/messages/zh.json +++ b/surfsense_web/messages/zh.json @@ -700,7 +700,8 @@ "all": "全部", "unread": "未读", "connectors": "连接器", - "all_connectors": "所有连接器" + "all_connectors": "所有连接器", + "close": "关闭" }, "errors": { "something_went_wrong": "出错了", From 79f7dfbbed00ff76c1983766b4207d7a647a099b Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 28 Jan 2026 02:14:36 +0530 Subject: [PATCH 45/69] feat(notifications): enhance inbox functionality with type filtering and independent pagination for mentions and status notifications --- .../app/routes/notifications_routes.py | 13 +- .../atoms/chat/current-thread.atom.ts | 4 +- .../components/assistant-ui/markdown-text.tsx | 5 +- .../comment-item/comment-item.tsx | 12 +- .../layout/hooks/SidebarContext.tsx | 1 - .../layout/providers/LayoutDataProvider.tsx | 82 +- .../layout/ui/shell/LayoutShell.tsx | 75 +- .../layout/ui/sidebar/InboxSidebar.tsx | 888 +++++++++--------- surfsense_web/components/tool-ui/audio.tsx | 17 +- .../components/tool-ui/generate-podcast.tsx | 16 +- surfsense_web/hooks/use-inbox.ts | 8 +- .../lib/apis/notifications-api.service.ts | 11 +- 12 files changed, 600 insertions(+), 532 deletions(-) diff --git a/surfsense_backend/app/routes/notifications_routes.py b/surfsense_backend/app/routes/notifications_routes.py index 6bc945643..84591f001 100644 --- a/surfsense_backend/app/routes/notifications_routes.py +++ b/surfsense_backend/app/routes/notifications_routes.py @@ -6,6 +6,7 @@ For older items (beyond the sync window), use the list endpoint. """ from datetime import UTC, datetime, timedelta +from typing import Literal from fastapi import APIRouter, Depends, HTTPException, Query, status from pydantic import BaseModel @@ -20,6 +21,9 @@ router = APIRouter(prefix="/notifications", tags=["notifications"]) # Must match frontend SYNC_WINDOW_DAYS in use-inbox.ts SYNC_WINDOW_DAYS = 14 +# Valid notification types - must match frontend InboxItemTypeEnum +NotificationType = Literal["connector_indexing", "document_processing", "new_mention"] + class NotificationResponse(BaseModel): """Response model for a single notification.""" @@ -73,6 +77,9 @@ class UnreadCountResponse(BaseModel): @router.get("/unread-count", response_model=UnreadCountResponse) async def get_unread_count( search_space_id: int | None = Query(None, description="Filter by search space ID"), + type_filter: NotificationType | None = Query( + None, alias="type", description="Filter by notification type" + ), user: User = Depends(current_active_user), session: AsyncSession = Depends(get_async_session), ) -> UnreadCountResponse: @@ -103,6 +110,10 @@ async def get_unread_count( | (Notification.search_space_id.is_(None)) ) + # Filter by notification type if provided + if type_filter: + base_filter.append(Notification.type == type_filter) + # Total unread count (all time) total_query = select(func.count(Notification.id)).where(*base_filter) total_result = await session.execute(total_query) @@ -125,7 +136,7 @@ async def get_unread_count( @router.get("", response_model=NotificationListResponse) async def list_notifications( search_space_id: int | None = Query(None, description="Filter by search space ID"), - type_filter: str | None = Query( + type_filter: NotificationType | None = Query( None, alias="type", description="Filter by notification type" ), before_date: str | None = Query( diff --git a/surfsense_web/atoms/chat/current-thread.atom.ts b/surfsense_web/atoms/chat/current-thread.atom.ts index 18afb8ff3..5de11eb92 100644 --- a/surfsense_web/atoms/chat/current-thread.atom.ts +++ b/surfsense_web/atoms/chat/current-thread.atom.ts @@ -62,9 +62,7 @@ export const resetCurrentThreadAtom = atom(null, (_, set) => { }); /** Atom to read whether comments panel is collapsed */ -export const commentsCollapsedAtom = atom( - (get) => get(currentThreadAtom).commentsCollapsed -); +export const commentsCollapsedAtom = atom((get) => get(currentThreadAtom).commentsCollapsed); /** Atom to toggle the comments collapsed state */ export const toggleCommentsCollapsedAtom = atom(null, (get, set) => { diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx index dd4ce6b75..cc8cec5d9 100644 --- a/surfsense_web/components/assistant-ui/markdown-text.tsx +++ b/surfsense_web/components/assistant-ui/markdown-text.tsx @@ -254,10 +254,7 @@ const defaultComponents = memoizeMarkdownComponents({ table: ({ className, ...props }) => (
diff --git a/surfsense_web/components/chat-comments/comment-item/comment-item.tsx b/surfsense_web/components/chat-comments/comment-item/comment-item.tsx index 847886fc8..4996fe01b 100644 --- a/surfsense_web/components/chat-comments/comment-item/comment-item.tsx +++ b/surfsense_web/components/chat-comments/comment-item/comment-item.tsx @@ -3,10 +3,7 @@ import { useAtomValue, useSetAtom } from "jotai"; import { MessageSquare } from "lucide-react"; import { useEffect, useRef, useState } from "react"; -import { - clearTargetCommentIdAtom, - targetCommentIdAtom, -} from "@/atoms/chat/current-thread.atom"; +import { clearTargetCommentIdAtom, targetCommentIdAtom } from "@/atoms/chat/current-thread.atom"; import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; import { Button } from "@/components/ui/button"; import { cn } from "@/lib/utils"; @@ -82,10 +79,9 @@ function renderMentions(content: string): React.ReactNode { const mentionPattern = /@\{([^}]+)\}/g; const parts: React.ReactNode[] = []; let lastIndex = 0; - let match: RegExpExecArray | null; - while ((match = mentionPattern.exec(content)) !== null) { - if (match.index > lastIndex) { + for (const match of content.matchAll(mentionPattern)) { + if (match.index !== undefined && match.index > lastIndex) { parts.push(content.slice(lastIndex, match.index)); } @@ -96,7 +92,7 @@ function renderMentions(content: string): React.ReactNode { ); - lastIndex = match.index + match[0].length; + lastIndex = (match.index ?? 0) + match[0].length; } if (lastIndex < content.length) { diff --git a/surfsense_web/components/layout/hooks/SidebarContext.tsx b/surfsense_web/components/layout/hooks/SidebarContext.tsx index 70e9311f9..7aa24d5d0 100644 --- a/surfsense_web/components/layout/hooks/SidebarContext.tsx +++ b/surfsense_web/components/layout/hooks/SidebarContext.tsx @@ -34,4 +34,3 @@ export function useSidebarContext(): SidebarContextValue { export function useSidebarContextSafe(): SidebarContextValue | null { return useContext(SidebarContext); } - diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx index 8710fdb79..ed8f28916 100644 --- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx +++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx @@ -104,19 +104,55 @@ export function LayoutDataProvider({ // Search space dialog state const [isCreateSearchSpaceDialogOpen, setIsCreateSearchSpaceDialogOpen] = useState(false); - // Inbox hook + // Inbox hooks - separate data sources for mentions and status tabs + // This ensures each tab has independent pagination and data loading const userId = user?.id ? String(user.id) : null; + + // Mentions: Only fetch "new_mention" type notifications const { - inboxItems, - unreadCount, - loading: inboxLoading, - loadingMore: inboxLoadingMore, - hasMore: inboxHasMore, - loadMore: inboxLoadMore, - markAsRead, - markAllAsRead, + inboxItems: mentionItems, + unreadCount: mentionUnreadCount, + loading: mentionLoading, + loadingMore: mentionLoadingMore, + hasMore: mentionHasMore, + loadMore: mentionLoadMore, + markAsRead: markMentionAsRead, + markAllAsRead: markAllMentionsAsRead, + } = useInbox(userId, Number(searchSpaceId) || null, "new_mention"); + + // Status: Fetch all types (will be filtered client-side to status types) + // We pass null to get all, then InboxSidebar filters to status types + const { + inboxItems: statusItems, + unreadCount: statusUnreadCount, + loading: statusLoading, + loadingMore: statusLoadingMore, + hasMore: statusHasMore, + loadMore: statusLoadMore, + markAsRead: markStatusAsRead, + markAllAsRead: markAllStatusAsRead, } = useInbox(userId, Number(searchSpaceId) || null, null); + // Combined unread count for nav badge (mentions take priority for visibility) + const totalUnreadCount = mentionUnreadCount + statusUnreadCount; + + // Unified mark as read that delegates to the correct hook + const markAsRead = useCallback( + async (id: number) => { + // Try both - one will succeed based on which list has the item + const mentionResult = await markMentionAsRead(id); + if (mentionResult) return true; + return markStatusAsRead(id); + }, + [markMentionAsRead, markStatusAsRead] + ); + + // Mark all as read for both types + const markAllAsRead = useCallback(async () => { + await Promise.all([markAllMentionsAsRead(), markAllStatusAsRead()]); + return true; + }, [markAllMentionsAsRead, markAllStatusAsRead]); + // Delete dialogs state const [showDeleteChatDialog, setShowDeleteChatDialog] = useState(false); const [chatToDelete, setChatToDelete] = useState<{ id: number; name: string } | null>(null); @@ -197,7 +233,7 @@ export function LayoutDataProvider({ url: "#inbox", // Special URL to indicate this is handled differently icon: Inbox, isActive: isInboxSidebarOpen, - badge: unreadCount > 0 ? formatInboxCount(unreadCount) : undefined, + badge: totalUnreadCount > 0 ? formatInboxCount(totalUnreadCount) : undefined, }, { title: "Documents", @@ -206,7 +242,7 @@ export function LayoutDataProvider({ isActive: pathname?.includes("/documents"), }, ], - [searchSpaceId, pathname, isInboxSidebarOpen, unreadCount] + [searchSpaceId, pathname, isInboxSidebarOpen, totalUnreadCount] ); // Handlers @@ -465,12 +501,24 @@ export function LayoutDataProvider({ inbox={{ isOpen: isInboxSidebarOpen, onOpenChange: setIsInboxSidebarOpen, - items: inboxItems, - unreadCount, - loading: inboxLoading, - loadingMore: inboxLoadingMore, - hasMore: inboxHasMore, - loadMore: inboxLoadMore, + // Separate data sources for each tab + mentions: { + items: mentionItems, + unreadCount: mentionUnreadCount, + loading: mentionLoading, + loadingMore: mentionLoadingMore, + hasMore: mentionHasMore, + loadMore: mentionLoadMore, + }, + status: { + items: statusItems, + unreadCount: statusUnreadCount, + loading: statusLoading, + loadingMore: statusLoadingMore, + hasMore: statusHasMore, + loadMore: statusLoadMore, + }, + totalUnreadCount, markAsRead, markAllAsRead, isDocked: isInboxDocked, diff --git a/surfsense_web/components/layout/ui/shell/LayoutShell.tsx b/surfsense_web/components/layout/ui/shell/LayoutShell.tsx index d84b9cdce..3624c90a3 100644 --- a/surfsense_web/components/layout/ui/shell/LayoutShell.tsx +++ b/surfsense_web/components/layout/ui/shell/LayoutShell.tsx @@ -11,16 +11,26 @@ import { Header } from "../header"; import { IconRail } from "../icon-rail"; import { InboxSidebar, MobileSidebar, MobileSidebarTrigger, Sidebar } from "../sidebar"; -// Inbox-related props -interface InboxProps { - isOpen: boolean; - onOpenChange: (open: boolean) => void; +// Tab-specific data source props +interface TabDataSource { items: InboxItem[]; unreadCount: number; loading: boolean; loadingMore?: boolean; hasMore?: boolean; loadMore?: () => void; +} + +// Inbox-related props with separate data sources per tab +interface InboxProps { + isOpen: boolean; + onOpenChange: (open: boolean) => void; + /** Mentions tab data source with independent pagination */ + mentions: TabDataSource; + /** Status tab data source with independent pagination */ + status: TabDataSource; + /** Combined unread count for nav badge */ + totalUnreadCount: number; markAsRead: (id: number) => Promise; markAllAsRead: () => Promise; /** Whether the inbox is docked (permanent) */ @@ -151,26 +161,23 @@ export function LayoutShell({ setTheme={setTheme} /> -
- {children} -
+
+ {children} +
- {/* Mobile Inbox Sidebar - only render when open to avoid scroll blocking */} - {inbox?.isOpen && ( - setMobileMenuOpen(false)} - /> - )} + {/* Mobile Inbox Sidebar - only render when open to avoid scroll blocking */} + {inbox?.isOpen && ( + setMobileMenuOpen(false)} + /> + )} @@ -181,7 +188,9 @@ export function LayoutShell({ return ( -
+
void; - inboxItems: InboxItem[]; +// Tab-specific data source with independent pagination +interface TabDataSource { + items: InboxItem[]; unreadCount: number; loading: boolean; loadingMore?: boolean; hasMore?: boolean; loadMore?: () => void; +} + +interface InboxSidebarProps { + open: boolean; + onOpenChange: (open: boolean) => void; + /** Mentions tab data source with independent pagination */ + mentions: TabDataSource; + /** Status tab data source with independent pagination */ + status: TabDataSource; + /** Combined unread count for mark all as read */ + totalUnreadCount: number; markAsRead: (id: number) => Promise; markAllAsRead: () => Promise; onCloseMobileSidebar?: () => void; @@ -157,12 +162,9 @@ interface InboxSidebarProps { export function InboxSidebar({ open, onOpenChange, - inboxItems, - unreadCount, - loading, - loadingMore = false, - hasMore = false, - loadMore, + mentions, + status, + totalUnreadCount, markAsRead, markAllAsRead, onCloseMobileSidebar, @@ -209,11 +211,11 @@ export function InboxSidebar({ // Only lock body scroll on mobile when inbox is open useEffect(() => { if (!open || !isMobile) return; - + // Store original overflow to restore on cleanup const originalOverflow = document.body.style.overflow; document.body.style.overflow = "hidden"; - + return () => { document.body.style.overflow = originalOverflow; }; @@ -226,18 +228,18 @@ export function InboxSidebar({ } }, [activeTab]); - // Split items by type - const mentionItems = useMemo( - () => inboxItems.filter((item) => item.type === "new_mention"), - [inboxItems] - ); + // Get current tab's data source - each tab has independent data and pagination + const currentDataSource = activeTab === "mentions" ? mentions : status; + const { loading, loadingMore = false, hasMore = false, loadMore } = currentDataSource; + // For status items, filter to only show status notification types + // (the status data source may include all types from API) const statusItems = useMemo( () => - inboxItems.filter( + status.items.filter( (item) => item.type === "connector_indexing" || item.type === "document_processing" ), - [inboxItems] + [status.items] ); // Get unique connector types from status items for filtering @@ -259,12 +261,12 @@ export function InboxSidebar({ })); }, [statusItems]); - // Get items for current tab - const currentTabItems = activeTab === "mentions" ? mentionItems : statusItems; + // Get items for current tab - mentions use their source directly, status uses filtered items + const displayItems = activeTab === "mentions" ? mentions.items : statusItems; // Filter items based on filter type, connector filter, and search query const filteredItems = useMemo(() => { - let items = currentTabItems; + let items = displayItems; // Apply read/unread filter if (activeFilter === "unread") { @@ -295,7 +297,7 @@ export function InboxSidebar({ } return items; - }, [currentTabItems, activeFilter, activeTab, selectedConnector, searchQuery]); + }, [displayItems, activeFilter, activeTab, selectedConnector, searchQuery]); // Intersection Observer for infinite scroll with prefetching // Only active when not searching (search results are client-side filtered) @@ -321,16 +323,11 @@ export function InboxSidebar({ } return () => observer.disconnect(); - }, [loadMore, hasMore, loadingMore, open, searchQuery, filteredItems.length]); + }, [loadMore, hasMore, loadingMore, open, searchQuery]); - // Count unread items per tab - const unreadMentionsCount = useMemo(() => { - return mentionItems.filter((item) => !item.read).length; - }, [mentionItems]); - - const unreadStatusCount = useMemo(() => { - return statusItems.filter((item) => !item.read).length; - }, [statusItems]); + // Use unread counts from data sources (more accurate than client-side counting) + const unreadMentionsCount = mentions.unreadCount; + const unreadStatusCount = status.unreadCount; const handleItemClick = useCallback( async (item: InboxItem) => { @@ -481,209 +478,128 @@ export function InboxSidebar({ const inboxContent = ( <>
-
-
-

{t("inbox") || "Inbox"}

-
-
- {/* Mobile: Button that opens bottom drawer */} - {isMobile ? ( - <> - - - - - {t("filter") || "Filter"} - - - - - - - - {t("filter") || "Filter"} - - -
- {/* Filter section */} -
-

- {t("filter") || "Filter"} -

-
- - -
-
- {/* Connectors section - only for status tab */} - {activeTab === "status" && uniqueConnectorTypes.length > 0 && ( -
-

- {t("connectors") || "Connectors"} -

-
- - {uniqueConnectorTypes.map((connector) => ( - - ))} -
-
- )} -
-
-
- - ) : ( - /* Desktop: Dropdown menu */ - setOpenDropdown(isOpen ? "filter" : null)} +
+
+

{t("inbox") || "Inbox"}

+
+
+ {/* Mobile: Button that opens bottom drawer */} + {isMobile ? ( + <> + + + - - - {t("filter") || "Filter"} - - - + + {t("filter") || "Filter"} + + + {t("filter") || "Filter"} + + + + + + + + {t("filter") || "Filter"} + + +
+ {/* Filter section */} +
+

{t("filter") || "Filter"} - - setActiveFilter("all")} - className="flex items-center justify-between" - > - - - {t("all") || "All"} - - {activeFilter === "all" && } - - setActiveFilter("unread")} - className="flex items-center justify-between" - > - - - {t("unread") || "Unread"} - - {activeFilter === "unread" && } - - {activeTab === "status" && uniqueConnectorTypes.length > 0 && ( - <> - - {t("connectors") || "Connectors"} - - setSelectedConnector(null)} - className="flex items-center justify-between" +

+
+ + +
+
+ {/* Connectors section - only for status tab */} + {activeTab === "status" && uniqueConnectorTypes.length > 0 && ( +
+

+ {t("connectors") || "Connectors"} +

+
+ {uniqueConnectorTypes.map((connector) => ( - setSelectedConnector(connector.type)} - className="flex items-center justify-between" + type="button" + onClick={() => { + setSelectedConnector(connector.type); + setFilterDrawerOpen(false); + }} + className={cn( + "flex w-full items-center justify-between rounded-lg px-3 py-2.5 text-sm transition-colors", + selectedConnector === connector.type + ? "bg-primary/10 text-primary" + : "hover:bg-muted" + )} > {getConnectorIcon(connector.type, "h-4 w-4")} @@ -692,240 +608,311 @@ export function InboxSidebar({ {selectedConnector === connector.type && ( )} - + ))} - - )} - - - )} - - -
+
+ )} +
+
+
+ + ) : ( + /* Desktop: Dropdown menu */ + setOpenDropdown(isOpen ? "filter" : null)} + > + + + + - - - {t("mark_all_read") || "Mark all as read"} - - - {/* Close button - mobile only */} - {isMobile && ( - - - - - - {t("close") || "Close"} - - - )} - {/* Dock/Undock button - desktop only */} - {!isMobile && onDockedChange && ( - - - - - - {isDocked ? "Collapse panel" : "Expand panel"} - - + {selectedConnector === connector.type && } + + ))} + )} -
-
- -
- - setSearchQuery(e.target.value)} - className="pl-9 pr-8 h-9" - /> - {searchQuery && ( + + + )} + + + + + + {t("mark_all_read") || "Mark all as read"} + + + {/* Close button - mobile only */} + {isMobile && ( + + - )} -
-
+ + {t("close") || "Close"} + + )} + {/* Dock/Undock button - desktop only */} + {!isMobile && onDockedChange && ( + + + + + + {isDocked ? "Collapse panel" : "Expand panel"} + + + )} +
+
- setActiveTab(value as InboxTab)} - className="shrink-0 mx-4" +
+ + setSearchQuery(e.target.value)} + className="pl-9 pr-8 h-9" + /> + {searchQuery && ( + + )} +
+
-
- {loading ? ( -
- -
- ) : filteredItems.length > 0 ? ( -
- {filteredItems.map((item, index) => { - const isMarkingAsRead = markingAsReadId === item.id; - // Place prefetch trigger on 5th item from end (only if not searching) - const isPrefetchTrigger = - !searchQuery && hasMore && index === filteredItems.length - 5; + setActiveTab(value as InboxTab)} + className="shrink-0 mx-4" + > + + + + + {t("mentions") || "Mentions"} + + {formatInboxCount(unreadMentionsCount)} + + + + + + + {t("status") || "Status"} + + {formatInboxCount(unreadStatusCount)} + + + + + - return ( -
+ {loading ? ( +
+ +
+ ) : filteredItems.length > 0 ? ( +
+ {filteredItems.map((item, index) => { + const isMarkingAsRead = markingAsReadId === item.id; + // Place prefetch trigger on 5th item from end (only if not searching) + const isPrefetchTrigger = + !searchQuery && hasMore && index === filteredItems.length - 5; + + return ( +
+ + + - - -

{item.title}

-

- {convertRenderedToDisplay(item.message)} -

-
-
- - {/* Time and unread dot - fixed width to prevent content shift */} -
- - {formatTime(item.created_at)} - - {!item.read && ( - - )} +
{getStatusIcon(item)}
+
+

+ {item.title} +

+

+ {convertRenderedToDisplay(item.message)} +

-
- ); - })} - {/* Fallback trigger at the very end if less than 5 items and not searching */} - {!searchQuery && filteredItems.length < 5 && hasMore && ( -
- )} + + + +

{item.title}

+

+ {convertRenderedToDisplay(item.message)} +

+
+ + + {/* Time and unread dot - fixed width to prevent content shift */} +
+ + {formatTime(item.created_at)} + + {!item.read && } +
- ) : searchQuery ? ( -
- -

- {t("no_results_found") || "No results found"} -

-

- {t("try_different_search") || "Try a different search term"} -

-
- ) : ( -
- {activeTab === "mentions" ? ( - - ) : ( - - )} -

{getEmptyStateMessage().title}

-

- {getEmptyStateMessage().hint} -

-
- )} + ); + })} + {/* Fallback trigger at the very end if less than 5 items and not searching */} + {!searchQuery && filteredItems.length < 5 && hasMore && ( +
+ )} +
+ ) : searchQuery ? ( +
+ +

+ {t("no_results_found") || "No results found"} +

+

+ {t("try_different_search") || "Try a different search term"} +

+
+ ) : ( +
+ {activeTab === "mentions" ? ( + + ) : ( + + )} +

{getEmptyStateMessage().title}

+

{getEmptyStateMessage().hint}

+
+ )}
); @@ -967,10 +954,7 @@ export function InboxSidebar({ left: isMobile ? 0 : sidebarWidth, width: isMobile ? "100%" : 360, }} - className={cn( - "absolute z-10 overflow-hidden pointer-events-none", - "inset-y-0" - )} + className={cn("absolute z-10 overflow-hidden pointer-events-none", "inset-y-0")} >

{title}

{description && ( -

{description}

+

+ {description} +

)}
@@ -243,7 +245,11 @@ export function Audio({ id, src, title, description, artwork, durationMs, classN {/* Volume control */}
{/* Custom volume bar - visually distinct from progress slider */}
@@ -268,7 +274,12 @@ export function Audio({ id, src, title, description, artwork, durationMs, classN
{/* Download button */} - diff --git a/surfsense_web/components/tool-ui/generate-podcast.tsx b/surfsense_web/components/tool-ui/generate-podcast.tsx index 513853c1a..d40024b7c 100644 --- a/surfsense_web/components/tool-ui/generate-podcast.tsx +++ b/surfsense_web/components/tool-ui/generate-podcast.tsx @@ -96,10 +96,14 @@ function PodcastGeneratingState({ title }: { title: string }) {
-

{title}

+

+ {title} +

- Generating podcast. This may take a few minutes. + + Generating podcast. This may take a few minutes. +
@@ -123,7 +127,9 @@ function PodcastErrorState({ title, error }: { title: string; error: string }) {
-

{title}

+

+ {title} +

Failed to generate podcast

{error}

@@ -143,7 +149,9 @@ function AudioLoadingState({ title }: { title: string }) {
-

{title}

+

+ {title} +

Loading audio... diff --git a/surfsense_web/hooks/use-inbox.ts b/surfsense_web/hooks/use-inbox.ts index 4c26ddcb9..362feb747 100644 --- a/surfsense_web/hooks/use-inbox.ts +++ b/surfsense_web/hooks/use-inbox.ts @@ -318,9 +318,13 @@ export function useInbox( try { // STEP 1: Fetch server counts (total and recent) - guaranteed accurate - console.log("[useInbox] Fetching unread count from server"); + console.log( + "[useInbox] Fetching unread count from server", + typeFilter ? `for type: ${typeFilter}` : "for all types" + ); const serverCounts = await notificationsApiService.getUnreadCount( - searchSpaceId ?? undefined + searchSpaceId ?? undefined, + typeFilter ?? undefined ); if (mounted) { diff --git a/surfsense_web/lib/apis/notifications-api.service.ts b/surfsense_web/lib/apis/notifications-api.service.ts index a9e81a81f..941a347db 100644 --- a/surfsense_web/lib/apis/notifications-api.service.ts +++ b/surfsense_web/lib/apis/notifications-api.service.ts @@ -2,6 +2,7 @@ import { type GetNotificationsRequest, type GetNotificationsResponse, type GetUnreadCountResponse, + type InboxItemTypeEnum, getNotificationsRequest, getNotificationsResponse, getUnreadCountResponse, @@ -92,12 +93,20 @@ class NotificationsApiService { * Get unread notification count with split between total and recent * - total_unread: All unread notifications * - recent_unread: Unread within sync window (last 14 days) + * @param searchSpaceId - Optional search space ID to filter by + * @param type - Optional notification type to filter by (type-safe enum) */ - getUnreadCount = async (searchSpaceId?: number): Promise => { + getUnreadCount = async ( + searchSpaceId?: number, + type?: InboxItemTypeEnum + ): Promise => { const params = new URLSearchParams(); if (searchSpaceId !== undefined) { params.append("search_space_id", String(searchSpaceId)); } + if (type) { + params.append("type", type); + } const queryString = params.toString(); return baseApiService.get( From 0c8d1f3fef57fcdffff316bac329e40fa2431078 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 28 Jan 2026 00:17:29 +0200 Subject: [PATCH 46/69] refactor(backend): two-phase synchronous cloning --- .../versions/81_add_public_chat_features.py | 9 + surfsense_backend/app/celery_app.py | 1 - surfsense_backend/app/db.py | 7 + .../app/routes/new_chat_routes.py | 57 +++++ .../app/routes/public_chat_routes.py | 52 +++-- surfsense_backend/app/schemas/new_chat.py | 17 +- .../app/services/public_chat_service.py | 197 +++++------------- .../tasks/celery_tasks/clone_chat_tasks.py | 66 ------ 8 files changed, 178 insertions(+), 228 deletions(-) delete mode 100644 surfsense_backend/app/tasks/celery_tasks/clone_chat_tasks.py diff --git a/surfsense_backend/alembic/versions/81_add_public_chat_features.py b/surfsense_backend/alembic/versions/81_add_public_chat_features.py index ab73b06bb..8d7e95df7 100644 --- a/surfsense_backend/alembic/versions/81_add_public_chat_features.py +++ b/surfsense_backend/alembic/versions/81_add_public_chat_features.py @@ -8,6 +8,7 @@ Adds columns for: 1. Public sharing via tokenized URLs (public_share_token, public_share_enabled) 2. Clone tracking for audit (cloned_from_thread_id, cloned_at) 3. History bootstrap flag for cloned chats (needs_history_bootstrap) +4. Clone pending flag for two-phase clone (clone_pending) """ from collections.abc import Sequence @@ -76,6 +77,13 @@ def upgrade() -> None: """ ) + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS clone_pending BOOLEAN NOT NULL DEFAULT FALSE; + """ + ) + op.execute( """ CREATE INDEX IF NOT EXISTS ix_new_chat_threads_cloned_from_thread_id @@ -89,6 +97,7 @@ def downgrade() -> None: """Remove public sharing and cloning columns from new_chat_threads.""" op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_cloned_from_thread_id") + op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS clone_pending") op.execute( "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS needs_history_bootstrap" ) diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index b4869d23f..f7bea8cc3 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -65,7 +65,6 @@ celery_app = Celery( "app.tasks.celery_tasks.schedule_checker_task", "app.tasks.celery_tasks.blocknote_migration_tasks", "app.tasks.celery_tasks.document_reindex_tasks", - "app.tasks.celery_tasks.clone_chat_tasks", ], ) diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 5a74cddeb..8c6942e44 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -437,6 +437,13 @@ class NewChatThread(BaseModel, TimestampMixin): default=False, server_default="false", ) + # Flag indicating content clone is pending (two-phase clone) + clone_pending = Column( + Boolean, + nullable=False, + default=False, + server_default="false", + ) # Relationships search_space = relationship("SearchSpace", back_populates="new_chat_threads") diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index db371a81c..541e25a75 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -37,6 +37,7 @@ from app.db import ( get_async_session, ) from app.schemas.new_chat import ( + CompleteCloneResponse, NewChatMessageAppend, NewChatMessageRead, NewChatRequest, @@ -669,6 +670,62 @@ async def delete_thread( ) from None +@router.post("/threads/{thread_id}/complete-clone", response_model=CompleteCloneResponse) +async def complete_clone( + thread_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Complete the cloning process for a thread. + + Copies messages and podcasts from the source thread. + Sets clone_pending=False and needs_history_bootstrap=True when done. + + Requires authentication and ownership of the thread. + """ + from app.services.public_chat_service import complete_clone_content + + try: + result = await session.execute( + select(NewChatThread).filter(NewChatThread.id == thread_id) + ) + thread = result.scalars().first() + + if not thread: + raise HTTPException(status_code=404, detail="Thread not found") + + if thread.created_by_id != user.id: + raise HTTPException(status_code=403, detail="Not authorized") + + if not thread.clone_pending: + raise HTTPException(status_code=400, detail="Clone already completed") + + if not thread.cloned_from_thread_id: + raise HTTPException(status_code=400, detail="No source thread to clone from") + + message_count = await complete_clone_content( + session=session, + target_thread=thread, + source_thread_id=thread.cloned_from_thread_id, + target_search_space_id=thread.search_space_id, + ) + + return CompleteCloneResponse( + status="success", + message_count=message_count, + ) + + except HTTPException: + raise + except Exception as e: + await session.rollback() + raise HTTPException( + status_code=500, + detail=f"An unexpected error occurred while completing clone: {e!s}", + ) from None + + @router.patch("/threads/{thread_id}/visibility", response_model=NewChatThreadRead) async def update_thread_visibility( thread_id: int, diff --git a/surfsense_backend/app/routes/public_chat_routes.py b/surfsense_backend/app/routes/public_chat_routes.py index ca70e911a..8b4f42559 100644 --- a/surfsense_backend/app/routes/public_chat_routes.py +++ b/surfsense_backend/app/routes/public_chat_routes.py @@ -2,17 +2,20 @@ Routes for public chat access (unauthenticated and mixed-auth endpoints). """ +from datetime import UTC, datetime + from fastapi import APIRouter, Depends, HTTPException from sqlalchemy.ext.asyncio import AsyncSession -from app.db import User, get_async_session +from app.db import ChatVisibility, NewChatThread, User, get_async_session from app.schemas.new_chat import ( - CloneInitiatedResponse, + CloneInitResponse, PublicChatResponse, ) from app.services.public_chat_service import ( get_public_chat, get_thread_by_share_token, + get_user_default_search_space, ) from app.users import current_active_user @@ -33,32 +36,47 @@ async def read_public_chat( return await get_public_chat(session, share_token) -@router.post("/{share_token}/clone", response_model=CloneInitiatedResponse) +@router.post("/{share_token}/clone", response_model=CloneInitResponse) async def clone_public_chat_endpoint( share_token: str, session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), ): """ - Clone a public chat to the user's account. + Initialize cloning a public chat to the user's account. + + Creates an empty thread with clone_pending=True. + Frontend should redirect to the new thread and call /complete-clone. Requires authentication. - Initiates a background job to copy the chat. """ - from app.tasks.celery_tasks.clone_chat_tasks import clone_public_chat_task + source_thread = await get_thread_by_share_token(session, share_token) - thread = await get_thread_by_share_token(session, share_token) + if not source_thread: + raise HTTPException(status_code=404, detail="Chat not found or no longer public") - if not thread: - raise HTTPException(status_code=404, detail="Not found") + target_search_space_id = await get_user_default_search_space(session, user.id) - task_result = clone_public_chat_task.delay( + if target_search_space_id is None: + raise HTTPException(status_code=400, detail="No search space found for user") + + new_thread = NewChatThread( + title=source_thread.title, + archived=False, + visibility=ChatVisibility.PRIVATE, + search_space_id=target_search_space_id, + created_by_id=user.id, + public_share_enabled=False, + cloned_from_thread_id=source_thread.id, + cloned_at=datetime.now(UTC), + clone_pending=True, + ) + session.add(new_thread) + await session.commit() + await session.refresh(new_thread) + + return CloneInitResponse( + thread_id=new_thread.id, + search_space_id=target_search_space_id, share_token=share_token, - user_id=str(user.id), - ) - - return CloneInitiatedResponse( - status="processing", - task_id=task_result.id, - message="Copying chat to your account...", ) diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index 5e9d44beb..b420b1b91 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -97,6 +97,7 @@ class NewChatThreadRead(NewChatThreadBase, IDModel): created_by_id: UUID | None = None public_share_enabled: bool = False public_share_token: str | None = None + clone_pending: bool = False created_at: datetime updated_at: datetime @@ -255,7 +256,15 @@ class PublicChatResponse(BaseModel): messages: list[PublicChatMessage] -class CloneInitiatedResponse(BaseModel): - status: str = "processing" - task_id: str - message: str = "Copying chat to your account..." +class CloneInitResponse(BaseModel): + + + thread_id: int + search_space_id: int + share_token: str + + +class CompleteCloneResponse(BaseModel): + + status: str + message_count: int diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index 1dcc97a11..79618974f 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -4,7 +4,6 @@ Service layer for public chat sharing and cloning. import re import secrets -from datetime import UTC, datetime from uuid import UUID from fastapi import HTTPException @@ -241,108 +240,74 @@ async def get_user_default_search_space( return None -async def clone_public_chat( +async def complete_clone_content( session: AsyncSession, - share_token: str, - user_id: UUID, -) -> dict: + target_thread: NewChatThread, + source_thread_id: int, + target_search_space_id: int, +) -> int: """ - Clone a public chat to user's account. + Copy messages and podcasts from source thread to target thread. - Creates a new private thread with all messages and podcasts. - Citations are stripped since they reference the original user's documents. + Sets clone_pending=False and needs_history_bootstrap=True when done. + Returns the number of messages copied. """ - from app.db import ( - ChatVisibility, - NewChatMessage, + from app.db import NewChatMessage + + result = await session.execute( + select(NewChatThread) + .options(selectinload(NewChatThread.messages)) + .filter(NewChatThread.id == source_thread_id) ) + source_thread = result.scalars().first() - source_thread = await get_thread_by_share_token(session, share_token) if not source_thread: - await _create_clone_failure_notification( - session, user_id, share_token, "Chat not found or no longer public" + raise ValueError("Source thread not found") + + podcast_id_map: dict[int, int] = {} + message_count = 0 + + for msg in sorted(source_thread.messages, key=lambda m: m.created_at): + new_content = sanitize_content_for_public(msg.content) + + if isinstance(new_content, list): + for part in new_content: + if ( + isinstance(part, dict) + and part.get("type") == "tool-call" + and part.get("toolName") == "generate_podcast" + ): + result_data = part.get("result", {}) + old_podcast_id = result_data.get("podcast_id") + if old_podcast_id and old_podcast_id not in podcast_id_map: + new_podcast_id = await _clone_podcast( + session, + old_podcast_id, + target_search_space_id, + target_thread.id, + ) + if new_podcast_id: + podcast_id_map[old_podcast_id] = new_podcast_id + + if old_podcast_id and old_podcast_id in podcast_id_map: + result_data["podcast_id"] = podcast_id_map[old_podcast_id] + + new_message = NewChatMessage( + thread_id=target_thread.id, + role=msg.role, + content=new_content, + author_id=msg.author_id, + created_at=msg.created_at, ) - return {"status": "error", "error": "Chat not found or no longer public"} + session.add(new_message) + message_count += 1 - try: - target_search_space_id = await get_user_default_search_space(session, user_id) + target_thread.clone_pending = False + target_thread.needs_history_bootstrap = True - if target_search_space_id is None: - await _create_clone_failure_notification( - session, user_id, share_token, "No search space found" - ) - return {"status": "error", "error": "No search space found"} + await session.commit() - new_thread = NewChatThread( - title=source_thread.title, - archived=False, - visibility=ChatVisibility.PRIVATE, - search_space_id=target_search_space_id, - created_by_id=user_id, - public_share_enabled=False, - cloned_from_thread_id=source_thread.id, - cloned_at=datetime.now(UTC), - needs_history_bootstrap=True, - ) - session.add(new_thread) - await session.flush() - - podcast_id_map: dict[int, int] = {} - - for msg in sorted(source_thread.messages, key=lambda m: m.created_at): - new_content = sanitize_content_for_public(msg.content) - - if isinstance(new_content, list): - for part in new_content: - if ( - isinstance(part, dict) - and part.get("type") == "tool-call" - and part.get("toolName") == "generate_podcast" - ): - result = part.get("result", {}) - old_podcast_id = result.get("podcast_id") - if old_podcast_id and old_podcast_id not in podcast_id_map: - new_podcast_id = await _clone_podcast( - session, - old_podcast_id, - target_search_space_id, - new_thread.id, - ) - if new_podcast_id: - podcast_id_map[old_podcast_id] = new_podcast_id - - if old_podcast_id and old_podcast_id in podcast_id_map: - result["podcast_id"] = podcast_id_map[old_podcast_id] - - new_message = NewChatMessage( - thread_id=new_thread.id, - role=msg.role, - content=new_content, - author_id=msg.author_id, - created_at=msg.created_at, - ) - session.add(new_message) - - await session.commit() - - await _create_clone_success_notification( - session, - user_id, - new_thread.id, - target_search_space_id, - source_thread.title, - ) - - return { - "status": "success", - "thread_id": new_thread.id, - "search_space_id": target_search_space_id, - } - - except Exception as e: - await session.rollback() - await _create_clone_failure_notification(session, user_id, share_token, str(e)) - return {"status": "error", "error": str(e)} + return message_count async def _clone_podcast( @@ -387,54 +352,6 @@ async def _clone_podcast( return new_podcast.id -async def _create_clone_success_notification( - session: AsyncSession, - user_id: UUID, - thread_id: int, - search_space_id: int, - original_title: str, -) -> None: - """Create success notification for clone operation.""" - from app.db import Notification - - notification = Notification( - user_id=user_id, - search_space_id=search_space_id, - type="chat_cloned", - title="Chat copied successfully", - message=f"Your copy of '{original_title}' is ready", - notification_metadata={ - "thread_id": thread_id, - "search_space_id": search_space_id, - }, - ) - session.add(notification) - await session.commit() - - -async def _create_clone_failure_notification( - session: AsyncSession, - user_id: UUID, - share_token: str, - error: str, -) -> None: - """Create failure notification for clone operation.""" - from app.db import Notification - - notification = Notification( - user_id=user_id, - type="chat_clone_failed", - title="Failed to copy chat", - message="Could not copy the chat. Please try again.", - notification_metadata={ - "share_token": share_token, - "error": error, - }, - ) - session.add(notification) - await session.commit() - - async def is_podcast_publicly_accessible( session: AsyncSession, podcast_id: int, diff --git a/surfsense_backend/app/tasks/celery_tasks/clone_chat_tasks.py b/surfsense_backend/app/tasks/celery_tasks/clone_chat_tasks.py deleted file mode 100644 index b846ee555..000000000 --- a/surfsense_backend/app/tasks/celery_tasks/clone_chat_tasks.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Celery tasks for cloning public chats.""" - -import asyncio -import logging - -from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine -from sqlalchemy.pool import NullPool - -from app.celery_app import celery_app -from app.config import config - -logger = logging.getLogger(__name__) - - -def get_celery_session_maker(): - """Create a new async session maker for Celery tasks.""" - engine = create_async_engine( - config.DATABASE_URL, - poolclass=NullPool, - echo=False, - ) - return async_sessionmaker(engine, expire_on_commit=False) - - -@celery_app.task(name="clone_public_chat", bind=True) -def clone_public_chat_task( - self, - share_token: str, - user_id: str, -) -> dict: - """ - Celery task to clone a public chat to user's account. - - Args: - share_token: Public share token of the chat to clone - user_id: UUID string of the user cloning the chat - - Returns: - dict with status and thread_id on success, or error info on failure - """ - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - result = loop.run_until_complete(_run_clone(share_token, user_id)) - return result - except Exception as e: - logger.error(f"Error cloning public chat: {e!s}") - return {"status": "error", "error": str(e)} - finally: - asyncio.set_event_loop(None) - loop.close() - - -async def _run_clone(share_token: str, user_id: str) -> dict: - """Run the clone operation with a fresh database session.""" - from uuid import UUID - - from app.services.public_chat_service import clone_public_chat - - async with get_celery_session_maker()() as session: - return await clone_public_chat( - session=session, - share_token=share_token, - user_id=UUID(user_id), - ) From 9a4da10b12767cc9abb9b3f6f7cd4433713706b0 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 28 Jan 2026 00:17:44 +0200 Subject: [PATCH 47/69] feat(frontend): two-phase cloning with loading state --- .../new-chat/[[...chat_id]]/page.tsx | 40 +++++++++++++++++++ .../public-chat/public-chat-footer.tsx | 13 ++---- .../contracts/types/public-chat.types.ts | 20 ++++++++-- .../lib/apis/public-chat-api.service.ts | 24 +++++++++++ surfsense_web/lib/chat/thread-persistence.ts | 1 + 5 files changed, 85 insertions(+), 13 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 33ec64696..38501fcab 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -38,6 +38,7 @@ import { RecallMemoryToolUI, SaveMemoryToolUI } from "@/components/tool-ui/user- import { Spinner } from "@/components/ui/spinner"; import { useChatSessionStateSync } from "@/hooks/use-chat-session-state"; import { useMessagesElectric } from "@/hooks/use-messages-electric"; +import { publicChatApiService } from "@/lib/apis/public-chat-api.service"; // import { WriteTodosToolUI } from "@/components/tool-ui/write-todos"; import { getBearerToken } from "@/lib/auth-utils"; import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter"; @@ -137,6 +138,7 @@ export default function NewChatPage() { const params = useParams(); const queryClient = useQueryClient(); const [isInitializing, setIsInitializing] = useState(true); + const [isCompletingClone, setIsCompletingClone] = useState(false); const [threadId, setThreadId] = useState(null); const [currentThread, setCurrentThread] = useState(null); const [messages, setMessages] = useState([]); @@ -323,6 +325,34 @@ export default function NewChatPage() { initializeThread(); }, [initializeThread]); + // Handle clone completion when thread has clone_pending flag + useEffect(() => { + if (!currentThread?.clone_pending || isCompletingClone) return; + + const completeClone = async () => { + setIsCompletingClone(true); + + try { + await publicChatApiService.completeClone({ thread_id: currentThread.id }); + + // Re-initialize thread to fetch cloned content using existing logic + await initializeThread(); + + // Invalidate threads query to update sidebar + queryClient.invalidateQueries({ + predicate: (query) => Array.isArray(query.queryKey) && query.queryKey[0] === "threads", + }); + } catch (error) { + console.error("[NewChatPage] Failed to complete clone:", error); + toast.error("Failed to copy chat content. Please try again."); + } finally { + setIsCompletingClone(false); + } + }; + + completeClone(); + }, [currentThread?.clone_pending, currentThread?.id, isCompletingClone, initializeThread, queryClient]); + // Handle scroll to comment from URL query params (e.g., from inbox item click) const searchParams = useSearchParams(); const targetCommentId = searchParams.get("commentId"); @@ -1388,6 +1418,16 @@ export default function NewChatPage() { ); } + // Show loading state while completing clone + if (isCompletingClone) { + return ( +
+ +
Copying chat content...
+
+ ); + } + // Show error state only if we tried to load an existing thread but failed // For new chats (urlChatId === 0), threadId being null is expected (lazy creation) if (!threadId && urlChatId > 0) { diff --git a/surfsense_web/components/public-chat/public-chat-footer.tsx b/surfsense_web/components/public-chat/public-chat-footer.tsx index cc54d4150..cf4501c23 100644 --- a/surfsense_web/components/public-chat/public-chat-footer.tsx +++ b/surfsense_web/components/public-chat/public-chat-footer.tsx @@ -22,22 +22,15 @@ export function PublicChatFooter({ shareToken }: PublicChatFooterProps) { setIsCloning(true); try { - await publicChatApiService.clonePublicChat({ + const response = await publicChatApiService.clonePublicChat({ share_token: shareToken, }); - // Force PGlite to resync notifications on next dashboard load - localStorage.setItem("surfsense_force_notif_resync", "true"); - - toast.success("Copying chat to your account...", { - description: "You'll be notified when it's ready.", - }); - - router.push("/dashboard"); + // Redirect to the new chat page (content will be loaded there) + router.push(`/dashboard/${response.search_space_id}/new-chat/${response.thread_id}`); } catch (error) { const message = error instanceof Error ? error.message : "Failed to copy chat"; toast.error(message); - } finally { setIsCloning(false); } }, [shareToken, router]); diff --git a/surfsense_web/contracts/types/public-chat.types.ts b/surfsense_web/contracts/types/public-chat.types.ts index 709bedcb7..f7aea5969 100644 --- a/surfsense_web/contracts/types/public-chat.types.ts +++ b/surfsense_web/contracts/types/public-chat.types.ts @@ -39,16 +39,28 @@ export const getPublicChatResponse = z.object({ }); /** - * Clone public chat + * Clone public chat (init) */ export const clonePublicChatRequest = z.object({ share_token: z.string(), }); export const clonePublicChatResponse = z.object({ + thread_id: z.number(), + search_space_id: z.number(), + share_token: z.string(), +}); + +/** + * Complete clone + */ +export const completeCloneRequest = z.object({ + thread_id: z.number(), +}); + +export const completeCloneResponse = z.object({ status: z.string(), - task_id: z.string(), - message: z.string(), + message_count: z.number(), }); // Type exports @@ -59,3 +71,5 @@ export type GetPublicChatRequest = z.infer; export type GetPublicChatResponse = z.infer; export type ClonePublicChatRequest = z.infer; export type ClonePublicChatResponse = z.infer; +export type CompleteCloneRequest = z.infer; +export type CompleteCloneResponse = z.infer; diff --git a/surfsense_web/lib/apis/public-chat-api.service.ts b/surfsense_web/lib/apis/public-chat-api.service.ts index 52a7c1363..49b1bd686 100644 --- a/surfsense_web/lib/apis/public-chat-api.service.ts +++ b/surfsense_web/lib/apis/public-chat-api.service.ts @@ -1,8 +1,12 @@ import { type ClonePublicChatRequest, type ClonePublicChatResponse, + type CompleteCloneRequest, + type CompleteCloneResponse, clonePublicChatRequest, clonePublicChatResponse, + completeCloneRequest, + completeCloneResponse, type GetPublicChatRequest, type GetPublicChatResponse, getPublicChatRequest, @@ -29,6 +33,7 @@ class PublicChatApiService { /** * Clone a public chat to the user's account. + * Creates an empty thread and returns thread_id for redirect. * Requires authentication. */ clonePublicChat = async (request: ClonePublicChatRequest): Promise => { @@ -44,6 +49,25 @@ class PublicChatApiService { clonePublicChatResponse ); }; + + /** + * Complete the clone by copying messages and podcasts. + * Called from the chat page after redirect. + * Requires authentication. + */ + completeClone = async (request: CompleteCloneRequest): Promise => { + const parsed = completeCloneRequest.safeParse(request); + + if (!parsed.success) { + const errorMessage = parsed.error.issues.map((issue) => issue.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.post( + `/api/v1/threads/${parsed.data.thread_id}/complete-clone`, + completeCloneResponse + ); + }; } export const publicChatApiService = new PublicChatApiService(); diff --git a/surfsense_web/lib/chat/thread-persistence.ts b/surfsense_web/lib/chat/thread-persistence.ts index 2188d9cec..540fbdc70 100644 --- a/surfsense_web/lib/chat/thread-persistence.ts +++ b/surfsense_web/lib/chat/thread-persistence.ts @@ -26,6 +26,7 @@ export interface ThreadRecord { has_comments?: boolean; public_share_enabled?: boolean; public_share_token?: string | null; + clone_pending?: boolean; } export interface MessageRecord { From 3c835bdf7ed0c7b84a57a3aa13575fff242d4d84 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 28 Jan 2026 00:17:54 +0200 Subject: [PATCH 48/69] fix(frontend): remove clone notifications, allow public podcast access --- .../layout/ui/sidebar/InboxSidebar.tsx | 46 +------------ surfsense_web/contracts/types/inbox.types.ts | 64 +------------------ surfsense_web/hooks/use-inbox.ts | 9 --- surfsense_web/lib/apis/base-api.service.ts | 2 +- 4 files changed, 5 insertions(+), 116 deletions(-) diff --git a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx index 0a749f71c..b47634b8d 100644 --- a/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/InboxSidebar.tsx @@ -7,7 +7,6 @@ import { Check, CheckCheck, CheckCircle2, - Copy, History, Inbox, LayoutGrid, @@ -42,14 +41,7 @@ import { Spinner } from "@/components/ui/spinner"; import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; -import { - type ConnectorIndexingMetadata, - isChatClonedMetadata, - isChatCloneFailedMetadata, - isConnectorIndexingMetadata, - isNewMentionMetadata, - type NewMentionMetadata, -} from "@/contracts/types/inbox.types"; +import { isConnectorIndexingMetadata, isNewMentionMetadata } from "@/contracts/types/inbox.types"; import type { InboxItem } from "@/hooks/use-inbox"; import { useMediaQuery } from "@/hooks/use-media-query"; import { cn } from "@/lib/utils"; @@ -213,15 +205,11 @@ export function InboxSidebar({ [inboxItems] ); - // Status tab includes: connector indexing, document processing, chat clone notifications + // Status tab includes: connector indexing, document processing const statusItems = useMemo( () => inboxItems.filter( - (item) => - item.type === "connector_indexing" || - item.type === "document_processing" || - item.type === "chat_cloned" || - item.type === "chat_clone_failed" + (item) => item.type === "connector_indexing" || item.type === "document_processing" ), [inboxItems] ); @@ -342,17 +330,7 @@ export function InboxSidebar({ router.push(url); } } - } else if (item.type === "chat_cloned") { - // Navigate to the cloned chat - if (isChatClonedMetadata(item.metadata)) { - const { search_space_id, thread_id } = item.metadata; - const url = `/dashboard/${search_space_id}/new-chat/${thread_id}`; - onOpenChange(false); - onCloseMobileSidebar?.(); - router.push(url); - } } - // chat_clone_failed: just mark as read, no navigation }, [markAsRead, router, onOpenChange, onCloseMobileSidebar] ); @@ -412,24 +390,6 @@ export function InboxSidebar({ ); } - // For chat cloned success, show green copy icon - if (item.type === "chat_cloned") { - return ( -
- -
- ); - } - - // For chat clone failed, show red alert icon - if (item.type === "chat_clone_failed") { - return ( -
- -
- ); - } - // For status items (connector/document), show status icons // Safely access status from metadata const metadata = item.metadata as Record; diff --git a/surfsense_web/contracts/types/inbox.types.ts b/surfsense_web/contracts/types/inbox.types.ts index 4240700dc..0983bbc55 100644 --- a/surfsense_web/contracts/types/inbox.types.ts +++ b/surfsense_web/contracts/types/inbox.types.ts @@ -9,8 +9,6 @@ export const inboxItemTypeEnum = z.enum([ "connector_indexing", "document_processing", "new_mention", - "chat_cloned", - "chat_clone_failed", ]); /** @@ -90,22 +88,6 @@ export const newMentionMetadata = z.object({ content_preview: z.string(), }); -/** - * Chat cloned success metadata schema - */ -export const chatClonedMetadata = z.object({ - thread_id: z.number(), - search_space_id: z.number(), -}); - -/** - * Chat clone failed metadata schema - */ -export const chatCloneFailedMetadata = z.object({ - share_token: z.string(), - error: z.string(), -}); - /** * Union of all inbox item metadata types * Use this when the inbox item type is unknown @@ -114,8 +96,6 @@ export const inboxItemMetadata = z.union([ connectorIndexingMetadata, documentProcessingMetadata, newMentionMetadata, - chatClonedMetadata, - chatCloneFailedMetadata, baseInboxItemMetadata, ]); @@ -153,16 +133,6 @@ export const newMentionInboxItem = inboxItem.extend({ metadata: newMentionMetadata, }); -export const chatClonedInboxItem = inboxItem.extend({ - type: z.literal("chat_cloned"), - metadata: chatClonedMetadata, -}); - -export const chatCloneFailedInboxItem = inboxItem.extend({ - type: z.literal("chat_clone_failed"), - metadata: chatCloneFailedMetadata, -}); - // ============================================================================= // API Request/Response Schemas // ============================================================================= @@ -259,33 +229,13 @@ export function isNewMentionMetadata(metadata: unknown): metadata is NewMentionM return newMentionMetadata.safeParse(metadata).success; } -/** - * Type guard for ChatClonedMetadata - */ -export function isChatClonedMetadata(metadata: unknown): metadata is ChatClonedMetadata { - return chatClonedMetadata.safeParse(metadata).success; -} - -/** - * Type guard for ChatCloneFailedMetadata - */ -export function isChatCloneFailedMetadata(metadata: unknown): metadata is ChatCloneFailedMetadata { - return chatCloneFailedMetadata.safeParse(metadata).success; -} - /** * Safe metadata parser - returns typed metadata or null */ export function parseInboxItemMetadata( type: InboxItemTypeEnum, metadata: unknown -): - | ConnectorIndexingMetadata - | DocumentProcessingMetadata - | NewMentionMetadata - | ChatClonedMetadata - | ChatCloneFailedMetadata - | null { +): ConnectorIndexingMetadata | DocumentProcessingMetadata | NewMentionMetadata | null { switch (type) { case "connector_indexing": { const result = connectorIndexingMetadata.safeParse(metadata); @@ -299,14 +249,6 @@ export function parseInboxItemMetadata( const result = newMentionMetadata.safeParse(metadata); return result.success ? result.data : null; } - case "chat_cloned": { - const result = chatClonedMetadata.safeParse(metadata); - return result.success ? result.data : null; - } - case "chat_clone_failed": { - const result = chatCloneFailedMetadata.safeParse(metadata); - return result.success ? result.data : null; - } default: return null; } @@ -323,15 +265,11 @@ export type BaseInboxItemMetadata = z.infer; export type ConnectorIndexingMetadata = z.infer; export type DocumentProcessingMetadata = z.infer; export type NewMentionMetadata = z.infer; -export type ChatClonedMetadata = z.infer; -export type ChatCloneFailedMetadata = z.infer; export type InboxItemMetadata = z.infer; export type InboxItem = z.infer; export type ConnectorIndexingInboxItem = z.infer; export type DocumentProcessingInboxItem = z.infer; export type NewMentionInboxItem = z.infer; -export type ChatClonedInboxItem = z.infer; -export type ChatCloneFailedInboxItem = z.infer; // API Request/Response types export type GetNotificationsRequest = z.infer; diff --git a/surfsense_web/hooks/use-inbox.ts b/surfsense_web/hooks/use-inbox.ts index 656de18a8..4c26ddcb9 100644 --- a/surfsense_web/hooks/use-inbox.ts +++ b/surfsense_web/hooks/use-inbox.ts @@ -119,15 +119,6 @@ export function useInbox( async function startSync() { try { - // Check for force resync flag (e.g., after clone from public page) - if (localStorage.getItem("surfsense_force_notif_resync") === "true") { - console.log("[useInbox] Force resync flag detected, clearing notifications"); - await client.db.exec("DELETE FROM notifications"); - localStorage.removeItem("surfsense_force_notif_resync"); - // Reset sync key to force a fresh sync - userSyncKeyRef.current = null; - } - const cutoffDate = getSyncCutoffDate(); const userSyncKey = `inbox_${userId}_${cutoffDate}`; diff --git a/surfsense_web/lib/apis/base-api.service.ts b/surfsense_web/lib/apis/base-api.service.ts index a87d4deaf..b14818ac1 100644 --- a/surfsense_web/lib/apis/base-api.service.ts +++ b/surfsense_web/lib/apis/base-api.service.ts @@ -26,7 +26,7 @@ class BaseApiService { noAuthEndpoints: string[] = ["/auth/jwt/login", "/auth/register", "/auth/refresh"]; // Prefixes that don't require auth (checked with startsWith) - noAuthPrefixes: string[] = ["/api/v1/public/"]; + noAuthPrefixes: string[] = ["/api/v1/public/", "/api/v1/podcasts/"]; // Use a getter to always read fresh token from localStorage // This ensures the token is always up-to-date after login/logout From 3af4fd05333a2700e9fbdfef3ee7fb2407394669 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 28 Jan 2026 03:55:25 +0530 Subject: [PATCH 49/69] feat(indexing): add content hash check to prevent duplicate indexing and update return values for indexing functions --- .../composio_google_drive_connector.py | 41 ++++++++++++++++--- .../routes/search_source_connectors_routes.py | 16 +++++++- .../app/services/notification_service.py | 21 ++++++++-- .../app/tasks/composio_indexer.py | 18 ++++---- .../config/connector-status-config.json | 5 --- 5 files changed, 76 insertions(+), 25 deletions(-) diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py index e3b988676..9a1937d6b 100644 --- a/surfsense_backend/app/connectors/composio_google_drive_connector.py +++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py @@ -464,6 +464,22 @@ async def check_document_by_unique_identifier( return existing_doc_result.scalars().first() +async def check_document_by_content_hash( + session: AsyncSession, content_hash: str +) -> Document | None: + """Check if a document with the given content hash already exists. + + This is used to prevent duplicate content from being indexed, regardless + of which connector originally indexed it. + """ + from sqlalchemy.future import select + + existing_doc_result = await session.execute( + select(Document).where(Document.content_hash == content_hash) + ) + return existing_doc_result.scalars().first() + + async def update_connector_last_indexed( session: AsyncSession, connector, @@ -487,8 +503,11 @@ async def index_composio_google_drive( log_entry, update_last_indexed: bool = True, max_items: int = 1000, -) -> tuple[int, str]: +) -> tuple[int, int, str | None]: """Index Google Drive files via Composio with delta sync support. + + Returns: + Tuple of (documents_indexed, documents_skipped, error_message or None) Delta Sync Flow: 1. First sync: Full scan + get initial page token @@ -628,11 +647,11 @@ async def index_composio_google_drive( }, ) - return documents_indexed, error_message + return documents_indexed, documents_skipped, error_message except Exception as e: logger.error(f"Failed to index Google Drive via Composio: {e!s}", exc_info=True) - return 0, f"Failed to index Google Drive via Composio: {e!s}" + return 0, 0, f"Failed to index Google Drive via Composio: {e!s}" async def _index_composio_drive_delta_sync( @@ -1000,7 +1019,7 @@ async def _process_single_drive_file( if existing_document: if existing_document.content_hash == content_hash: - return 0, 1, processing_errors # Skipped + return 0, 1, processing_errors # Skipped - unchanged # Update existing document user_llm = await get_user_long_context_llm(session, user_id, search_space_id) @@ -1039,7 +1058,17 @@ async def _process_single_drive_file( existing_document.chunks = chunks existing_document.updated_at = get_current_timestamp() - return 1, 0, processing_errors # Indexed + return 1, 0, processing_errors # Indexed - updated + + # Check if content_hash already exists (from any connector) + # This prevents duplicate content and avoids IntegrityError on unique constraint + existing_by_content_hash = await check_document_by_content_hash(session, content_hash) + if existing_by_content_hash: + logger.info( + f"Skipping file {file_name} (file_id={file_id}): identical content " + f"already indexed as '{existing_by_content_hash.title}'" + ) + return 0, 1, processing_errors # Skipped - duplicate content # Create new document user_llm = await get_user_long_context_llm(session, user_id, search_space_id) @@ -1085,7 +1114,7 @@ async def _process_single_drive_file( ) session.add(document) - return 1, 0, processing_errors # Indexed + return 1, 0, processing_errors # Indexed - new async def _fetch_folder_files_recursively( diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 191c6f954..9815ad827 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -1180,7 +1180,8 @@ async def _run_indexing_with_notifications( ) # Run the indexing function - documents_processed, error_or_warning = await indexing_function( + # Some indexers return (indexed, error), others return (indexed, skipped, error) + result = await indexing_function( session=session, connector_id=connector_id, search_space_id=search_space_id, @@ -1189,6 +1190,13 @@ async def _run_indexing_with_notifications( end_date=end_date, update_last_indexed=False, ) + + # Handle both 2-tuple and 3-tuple returns for backwards compatibility + if len(result) == 3: + documents_processed, documents_skipped, error_or_warning = result + else: + documents_processed, error_or_warning = result + documents_skipped = None # Update connector timestamp if function provided and indexing was successful if documents_processed > 0 and update_timestamp_func: @@ -1216,6 +1224,7 @@ async def _run_indexing_with_notifications( notification=notification, indexed_count=documents_processed, error_message=error_or_warning, # Show errors even if some documents were indexed + skipped_count=documents_skipped, ) await ( session.commit() @@ -1242,6 +1251,7 @@ async def _run_indexing_with_notifications( notification=notification, indexed_count=documents_processed, error_message=error_or_warning, # Show errors even if some documents were indexed + skipped_count=documents_skipped, ) await ( session.commit() @@ -1283,6 +1293,7 @@ async def _run_indexing_with_notifications( indexed_count=0, error_message=notification_message, # Pass as warning, not error is_warning=True, # Flag to indicate this is a warning, not an error + skipped_count=documents_skipped, ) await ( session.commit() @@ -1298,6 +1309,7 @@ async def _run_indexing_with_notifications( notification=notification, indexed_count=0, error_message=error_or_warning, + skipped_count=documents_skipped, ) await ( session.commit() @@ -1319,6 +1331,7 @@ async def _run_indexing_with_notifications( notification=notification, indexed_count=0, error_message=None, # No error - sync succeeded + skipped_count=documents_skipped, ) await ( session.commit() @@ -1336,6 +1349,7 @@ async def _run_indexing_with_notifications( notification=notification, indexed_count=0, error_message=str(e), + skipped_count=None, # Unknown on exception ) except Exception as notif_error: logger.error(f"Failed to update notification: {notif_error!s}") diff --git a/surfsense_backend/app/services/notification_service.py b/surfsense_backend/app/services/notification_service.py index 04f39d8ef..ab0fcbfd4 100644 --- a/surfsense_backend/app/services/notification_service.py +++ b/surfsense_backend/app/services/notification_service.py @@ -336,6 +336,7 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler): indexed_count: int, error_message: str | None = None, is_warning: bool = False, + skipped_count: int | None = None, ) -> Notification: """ Update notification when connector indexing completes. @@ -346,6 +347,7 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler): indexed_count: Total number of items indexed error_message: Error message if indexing failed, or warning message (optional) is_warning: If True, treat error_message as a warning (success case) rather than an error + skipped_count: Number of items skipped (e.g., duplicates) - optional Returns: Updated notification @@ -354,6 +356,12 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler): "connector_name", "Connector" ) + # Build the skipped text if there are skipped items + skipped_text = "" + if skipped_count and skipped_count > 0: + skipped_item_text = "item" if skipped_count == 1 else "items" + skipped_text = f" ({skipped_count} {skipped_item_text} skipped - already indexed)" + # If there's an error message but items were indexed, treat it as a warning (partial success) # If is_warning is True, treat it as success even with 0 items (e.g., duplicates found) # Otherwise, treat it as a failure @@ -362,12 +370,12 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler): # Partial success with warnings (e.g., duplicate content from other connectors) title = f"Ready: {connector_name}" item_text = "item" if indexed_count == 1 else "items" - message = f"Now searchable! {indexed_count} {item_text} synced. Note: {error_message}" + message = f"Now searchable! {indexed_count} {item_text} synced{skipped_text}. Note: {error_message}" status = "completed" elif is_warning: # Warning case (e.g., duplicates found) - treat as success title = f"Ready: {connector_name}" - message = f"Sync completed. {error_message}" + message = f"Sync completed{skipped_text}. {error_message}" status = "completed" else: # Complete failure @@ -377,14 +385,19 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler): else: title = f"Ready: {connector_name}" if indexed_count == 0: - message = "Already up to date! No new items to sync." + if skipped_count and skipped_count > 0: + skipped_item_text = "item" if skipped_count == 1 else "items" + message = f"Already up to date! {skipped_count} {skipped_item_text} skipped (already indexed)." + else: + message = "Already up to date! No new items to sync." else: item_text = "item" if indexed_count == 1 else "items" - message = f"Now searchable! {indexed_count} {item_text} synced." + message = f"Now searchable! {indexed_count} {item_text} synced{skipped_text}." status = "completed" metadata_updates = { "indexed_count": indexed_count, + "skipped_count": skipped_count or 0, "sync_stage": "completed" if (not error_message or is_warning or indexed_count > 0) else "failed", diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py index f97652114..ffc4a1f27 100644 --- a/surfsense_backend/app/tasks/composio_indexer.py +++ b/surfsense_backend/app/tasks/composio_indexer.py @@ -86,7 +86,7 @@ async def index_composio_connector( end_date: str | None = None, update_last_indexed: bool = True, max_items: int = 1000, -) -> tuple[int, str]: +) -> tuple[int, int, str | None]: """ Index content from a Composio connector. @@ -104,7 +104,7 @@ async def index_composio_connector( max_items: Maximum number of items to fetch Returns: - Tuple of (number_of_indexed_items, error_message or None) + Tuple of (number_of_indexed_items, number_of_skipped_items, error_message or None) """ task_logger = TaskLoggingService(session, search_space_id) @@ -132,14 +132,14 @@ async def index_composio_connector( await task_logger.log_task_failure( log_entry, error_msg, {"error_type": "InvalidConnectorType"} ) - return 0, error_msg + return 0, 0, error_msg if not connector: error_msg = f"Composio connector with ID {connector_id} not found" await task_logger.log_task_failure( log_entry, error_msg, {"error_type": "ConnectorNotFound"} ) - return 0, error_msg + return 0, 0, error_msg # Get toolkit ID from config toolkit_id = connector.config.get("toolkit_id") @@ -150,7 +150,7 @@ async def index_composio_connector( await task_logger.log_task_failure( log_entry, error_msg, {"error_type": "MissingToolkitId"} ) - return 0, error_msg + return 0, 0, error_msg # Check if toolkit is indexable if toolkit_id not in INDEXABLE_TOOLKITS: @@ -158,7 +158,7 @@ async def index_composio_connector( await task_logger.log_task_failure( log_entry, error_msg, {"error_type": "ToolkitNotIndexable"} ) - return 0, error_msg + return 0, 0, error_msg # Get indexer function from registry try: @@ -167,7 +167,7 @@ async def index_composio_connector( await task_logger.log_task_failure( log_entry, str(e), {"error_type": "NoIndexerImplemented"} ) - return 0, str(e) + return 0, 0, str(e) # Build kwargs for the indexer function kwargs = { @@ -199,7 +199,7 @@ async def index_composio_connector( {"error_type": "SQLAlchemyError"}, ) logger.error(f"Database error: {db_error!s}", exc_info=True) - return 0, f"Database error: {db_error!s}" + return 0, 0, f"Database error: {db_error!s}" except Exception as e: await session.rollback() await task_logger.log_task_failure( @@ -209,4 +209,4 @@ async def index_composio_connector( {"error_type": type(e).__name__}, ) logger.error(f"Failed to index Composio connector: {e!s}", exc_info=True) - return 0, f"Failed to index Composio connector: {e!s}" + return 0, 0, f"Failed to index Composio connector: {e!s}" diff --git a/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json b/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json index 9c8585a0f..b729c3f8b 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json +++ b/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json @@ -24,11 +24,6 @@ "enabled": true, "status": "warning", "statusMessage": "Some requests may be blocked if not using Firecrawl." - }, - "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": { - "enabled": false, - "status": "disabled", - "statusMessage": "Not available yet." } }, "globalSettings": { From b598cbeac33c45780f8179ace71f905e4e2f698e Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Tue, 27 Jan 2026 17:50:45 -0800 Subject: [PATCH 50/69] feat(backend): Enhance LlamaCloud upload resilience with dynamic timeout calculations and increased retry settings --- .../document_processors/file_processors.py | 141 +++++++++++++++--- surfsense_web/package.json | 4 +- 2 files changed, 119 insertions(+), 26 deletions(-) diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index 0a22c20c2..5161fb569 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -37,18 +37,30 @@ from .base import ( from .markdown_processor import add_received_markdown_file_document # Constants for LlamaCloud retry configuration -LLAMACLOUD_MAX_RETRIES = 3 -LLAMACLOUD_BASE_DELAY = 5 # Base delay in seconds for exponential backoff +LLAMACLOUD_MAX_RETRIES = 5 # Increased from 3 for large file resilience +LLAMACLOUD_BASE_DELAY = 10 # Base delay in seconds for exponential backoff +LLAMACLOUD_MAX_DELAY = 120 # Maximum delay between retries (2 minutes) LLAMACLOUD_RETRYABLE_EXCEPTIONS = ( ssl.SSLError, httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.WriteTimeout, + httpx.RemoteProtocolError, + httpx.LocalProtocolError, ConnectionError, + ConnectionResetError, TimeoutError, + OSError, # Catches various network-level errors ) +# Timeout calculation constants +UPLOAD_BYTES_PER_SECOND_SLOW = 100 * 1024 # 100 KB/s (conservative for slow connections) +MIN_UPLOAD_TIMEOUT = 120 # Minimum 2 minutes for any file +MAX_UPLOAD_TIMEOUT = 1800 # Maximum 30 minutes for very large files +BASE_JOB_TIMEOUT = 600 # 10 minutes base for job processing +PER_PAGE_JOB_TIMEOUT = 60 # 1 minute per page for processing + def get_google_drive_unique_identifier( connector: dict | None, @@ -204,6 +216,48 @@ async def find_existing_document_with_migration( return existing_document +def calculate_upload_timeout(file_size_bytes: int) -> float: + """ + Calculate appropriate upload timeout based on file size. + + Assumes a conservative slow connection speed to handle worst-case scenarios. + + Args: + file_size_bytes: Size of the file in bytes + + Returns: + Timeout in seconds + """ + # Calculate time needed at slow connection speed + # Add 50% buffer for network variability and SSL overhead + estimated_time = (file_size_bytes / UPLOAD_BYTES_PER_SECOND_SLOW) * 1.5 + + # Clamp to reasonable bounds + return max(MIN_UPLOAD_TIMEOUT, min(estimated_time, MAX_UPLOAD_TIMEOUT)) + + +def calculate_job_timeout(estimated_pages: int, file_size_bytes: int) -> float: + """ + Calculate job processing timeout based on page count and file size. + + Args: + estimated_pages: Estimated number of pages + file_size_bytes: Size of the file in bytes + + Returns: + Timeout in seconds + """ + # Base timeout + time per page + page_based_timeout = BASE_JOB_TIMEOUT + (estimated_pages * PER_PAGE_JOB_TIMEOUT) + + # Also consider file size (large images take longer to process) + # ~1 minute per 10MB of file size + size_based_timeout = BASE_JOB_TIMEOUT + (file_size_bytes / (10 * 1024 * 1024)) * 60 + + # Use the larger of the two estimates + return max(page_based_timeout, size_based_timeout) + + async def parse_with_llamacloud_retry( file_path: str, estimated_pages: int, @@ -213,6 +267,9 @@ async def parse_with_llamacloud_retry( """ Parse a file with LlamaCloud with retry logic for transient SSL/connection errors. + Uses dynamic timeout calculations based on file size and page count to handle + very large files reliably. + Args: file_path: Path to the file to parse estimated_pages: Estimated number of pages for timeout calculation @@ -225,25 +282,37 @@ async def parse_with_llamacloud_retry( Raises: Exception: If all retries fail """ + import os + import random + from llama_cloud_services import LlamaParse from llama_cloud_services.parse.utils import ResultType - # Calculate timeouts based on estimated pages - # Base timeout of 300 seconds + 30 seconds per page for large documents - base_timeout = 300 - per_page_timeout = 30 - job_timeout = base_timeout + (estimated_pages * per_page_timeout) - - # Create custom httpx client with larger timeouts for file uploads - # The SSL error often occurs during large file uploads, so we need generous timeouts + # Get file size for timeout calculations + file_size_bytes = os.path.getsize(file_path) + file_size_mb = file_size_bytes / (1024 * 1024) + + # Calculate dynamic timeouts based on file size and page count + upload_timeout = calculate_upload_timeout(file_size_bytes) + job_timeout = calculate_job_timeout(estimated_pages, file_size_bytes) + + # HTTP client timeouts - scaled based on file size + # Write timeout is critical for large file uploads custom_timeout = httpx.Timeout( - connect=60.0, # 60 seconds to establish connection - read=300.0, # 5 minutes to read response - write=300.0, # 5 minutes to write/upload (important for large files) - pool=60.0, # 60 seconds to acquire connection from pool + connect=120.0, # 2 minutes to establish connection (handles slow DNS, etc.) + read=upload_timeout, # Dynamic based on file size + write=upload_timeout, # Dynamic based on file size (upload time) + pool=120.0, # 2 minutes to acquire connection from pool + ) + + logging.info( + f"LlamaCloud upload configured: file_size={file_size_mb:.1f}MB, " + f"pages={estimated_pages}, upload_timeout={upload_timeout:.0f}s, " + f"job_timeout={job_timeout:.0f}s" ) last_exception = None + attempt_errors = [] for attempt in range(1, LLAMACLOUD_MAX_RETRIES + 1): try: @@ -257,46 +326,67 @@ async def parse_with_llamacloud_retry( language="en", result_type=ResultType.MD, # Timeout settings for large files - max_timeout=max(2000, job_timeout), # Overall max timeout + max_timeout=int(max(2000, job_timeout + upload_timeout)), job_timeout_in_seconds=job_timeout, - job_timeout_extra_time_per_page_in_seconds=per_page_timeout, + job_timeout_extra_time_per_page_in_seconds=PER_PAGE_JOB_TIMEOUT, # Use our custom client with larger timeouts custom_client=custom_client, ) # Parse the file asynchronously result = await parser.aparse(file_path) + + # Success - log if we had previous failures + if attempt > 1: + logging.info( + f"LlamaCloud upload succeeded on attempt {attempt} after " + f"{len(attempt_errors)} failures" + ) + return result except LLAMACLOUD_RETRYABLE_EXCEPTIONS as e: last_exception = e error_type = type(e).__name__ + error_msg = str(e)[:200] + attempt_errors.append(f"Attempt {attempt}: {error_type} - {error_msg}") if attempt < LLAMACLOUD_MAX_RETRIES: - # Calculate exponential backoff delay - delay = LLAMACLOUD_BASE_DELAY * (2 ** (attempt - 1)) + # Calculate exponential backoff with jitter + # Base delay doubles each attempt, capped at max delay + base_delay = min( + LLAMACLOUD_BASE_DELAY * (2 ** (attempt - 1)), + LLAMACLOUD_MAX_DELAY + ) + # Add random jitter (±25%) to prevent thundering herd + jitter = base_delay * 0.25 * (2 * random.random() - 1) + delay = base_delay + jitter if task_logger and log_entry: await task_logger.log_task_progress( log_entry, - f"LlamaCloud upload failed (attempt {attempt}/{LLAMACLOUD_MAX_RETRIES}), retrying in {delay}s", + f"LlamaCloud upload failed (attempt {attempt}/{LLAMACLOUD_MAX_RETRIES}), retrying in {delay:.0f}s", { "error_type": error_type, - "error_message": str(e)[:200], + "error_message": error_msg, "attempt": attempt, "retry_delay": delay, + "file_size_mb": round(file_size_mb, 1), + "upload_timeout": upload_timeout, }, ) else: logging.warning( - f"LlamaCloud upload failed (attempt {attempt}/{LLAMACLOUD_MAX_RETRIES}): {error_type}. " - f"Retrying in {delay}s..." + f"LlamaCloud upload failed (attempt {attempt}/{LLAMACLOUD_MAX_RETRIES}): " + f"{error_type}. File: {file_size_mb:.1f}MB. Retrying in {delay:.0f}s..." ) await asyncio.sleep(delay) else: logging.error( - f"LlamaCloud upload failed after {LLAMACLOUD_MAX_RETRIES} attempts: {error_type} - {e}" + f"LlamaCloud upload failed after {LLAMACLOUD_MAX_RETRIES} attempts. " + f"File size: {file_size_mb:.1f}MB, Pages: {estimated_pages}. " + f"Errors: {'; '.join(attempt_errors)}" ) except Exception: @@ -304,7 +394,10 @@ async def parse_with_llamacloud_retry( raise # All retries exhausted - raise last_exception or RuntimeError("LlamaCloud parsing failed after all retries") + raise last_exception or RuntimeError( + f"LlamaCloud parsing failed after {LLAMACLOUD_MAX_RETRIES} retries. " + f"File size: {file_size_mb:.1f}MB" + ) async def add_received_file_document_using_unstructured( diff --git a/surfsense_web/package.json b/surfsense_web/package.json index 17dee6251..9c7d25378 100644 --- a/surfsense_web/package.json +++ b/surfsense_web/package.json @@ -86,8 +86,8 @@ "next-themes": "^0.4.6", "pg": "^8.16.3", "postgres": "^3.4.7", - "posthog-js": "^1.335.3", - "posthog-node": "^5.24.2", + "posthog-js": "^1.335.5", + "posthog-node": "^5.24.3", "react": "^19.2.3", "react-day-picker": "^9.8.1", "react-dom": "^19.2.3", From aab547264eeaded07f536fffbad25caa621787e4 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 28 Jan 2026 09:09:58 +0530 Subject: [PATCH 51/69] feat(connector): implement duplicate detection by Google Drive file ID and generate settings hash for indexing configuration changes --- .../composio_google_drive_connector.py | 125 +++++++++++++++++- 1 file changed, 120 insertions(+), 5 deletions(-) diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py index 9a1937d6b..912f63d54 100644 --- a/surfsense_backend/app/connectors/composio_google_drive_connector.py +++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py @@ -4,6 +4,8 @@ Composio Google Drive Connector Module. Provides Google Drive specific methods for data retrieval and indexing via Composio. """ +import hashlib +import json import logging import os import tempfile @@ -480,6 +482,38 @@ async def check_document_by_content_hash( return existing_doc_result.scalars().first() +async def check_document_by_google_drive_file_id( + session: AsyncSession, file_id: str, search_space_id: int +) -> Document | None: + """Check if a document with this Google Drive file ID exists (from any connector). + + This checks both metadata key formats: + - 'google_drive_file_id' (normal Google Drive connector) + - 'file_id' (Composio Google Drive connector) + + This allows detecting duplicates BEFORE downloading/ETL, saving expensive API calls. + """ + from sqlalchemy import String, cast, or_ + from sqlalchemy.future import select + + # When casting JSON to String, the result includes quotes: "value" instead of value + # So we need to compare with the quoted version + quoted_file_id = f'"{file_id}"' + + existing_doc_result = await session.execute( + select(Document).where( + Document.search_space_id == search_space_id, + or_( + # Normal Google Drive connector format + cast(Document.document_metadata["google_drive_file_id"], String) == quoted_file_id, + # Composio Google Drive connector format + cast(Document.document_metadata["file_id"], String) == quoted_file_id, + ) + ) + ) + return existing_doc_result.scalars().first() + + async def update_connector_last_indexed( session: AsyncSession, connector, @@ -493,6 +527,33 @@ async def update_connector_last_indexed( logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}") +def generate_indexing_settings_hash( + selected_folders: list[dict], + selected_files: list[dict], + indexing_options: dict, +) -> str: + """Generate a hash of indexing settings to detect configuration changes. + + This hash is used to determine if indexing settings have changed since + the last index, which would require a full re-scan instead of delta sync. + + Args: + selected_folders: List of {id, name} for folders to index + selected_files: List of {id, name} for individual files to index + indexing_options: Dict with max_files_per_folder, include_subfolders, etc. + + Returns: + MD5 hash string of the settings + """ + settings = { + "folders": sorted([f.get("id", "") for f in selected_folders]), + "files": sorted([f.get("id", "") for f in selected_files]), + "include_subfolders": indexing_options.get("include_subfolders", True), + "max_files_per_folder": indexing_options.get("max_files_per_folder", 100), + } + return hashlib.md5(json.dumps(settings, sort_keys=True).encode()).hexdigest() + + async def index_composio_google_drive( session: AsyncSession, connector, @@ -512,6 +573,7 @@ async def index_composio_google_drive( Delta Sync Flow: 1. First sync: Full scan + get initial page token 2. Subsequent syncs: Use LIST_CHANGES to process only changed files + (unless settings changed or incremental_sync is disabled) Supports folder/file selection via connector config: - selected_folders: List of {id, name} for folders to index @@ -527,12 +589,42 @@ async def index_composio_google_drive( selected_files = connector_config.get("selected_files", []) indexing_options = connector_config.get("indexing_options", {}) - # Check for stored page token for delta sync - stored_page_token = connector_config.get("drive_page_token") - use_delta_sync = stored_page_token and connector.last_indexed_at - max_files_per_folder = indexing_options.get("max_files_per_folder", 100) include_subfolders = indexing_options.get("include_subfolders", True) + incremental_sync = indexing_options.get("incremental_sync", True) + + # Generate current settings hash to detect configuration changes + current_settings_hash = generate_indexing_settings_hash( + selected_folders, selected_files, indexing_options + ) + last_settings_hash = connector_config.get("last_indexed_settings_hash") + + # Detect if settings changed since last index + settings_changed = ( + last_settings_hash is not None and + current_settings_hash != last_settings_hash + ) + + if settings_changed: + logger.info( + f"Indexing settings changed for connector {connector_id}. " + f"Will perform full re-scan to apply new configuration." + ) + + # Check for stored page token for delta sync + stored_page_token = connector_config.get("drive_page_token") + + # Determine whether to use delta sync: + # - Must have a stored page token + # - Must have been indexed before (last_indexed_at exists) + # - User must have incremental_sync enabled + # - Settings must not have changed (folder/subfolder config) + use_delta_sync = ( + incremental_sync and + stored_page_token and + connector.last_indexed_at and + not settings_changed + ) # Route to delta sync or full scan if use_delta_sync: @@ -607,6 +699,14 @@ async def index_composio_google_drive( elif token_error: logger.warning(f"Failed to get new page token: {token_error}") + # Save current settings hash for future change detection + # This allows detecting when folder/subfolder settings change + if not connector.config: + connector.config = {} + connector.config["last_indexed_settings_hash"] = current_settings_hash + flag_modified(connector, "config") + logger.info(f"Saved indexing settings hash for connector {connector_id}") + # CRITICAL: Always update timestamp so Electric SQL syncs and UI shows indexed status await update_connector_last_indexed(session, connector, update_last_indexed) @@ -972,13 +1072,28 @@ async def _process_single_drive_file( """ processing_errors = [] + # ========== EARLY DUPLICATE CHECK BY FILE ID ========== + # Check if this Google Drive file was already indexed by ANY connector + # This happens BEFORE download/ETL to save expensive API calls + existing_by_file_id = await check_document_by_google_drive_file_id( + session, file_id, search_space_id + ) + if existing_by_file_id: + logger.info( + f"Skipping file {file_name} (file_id={file_id}): already indexed " + f"by {existing_by_file_id.document_type.value} as '{existing_by_file_id.title}' " + f"(saved download & ETL cost)" + ) + return 0, 1, processing_errors # Skip - NO download, NO ETL! + # ====================================================== + # Generate unique identifier hash document_type = DocumentType(TOOLKIT_TO_DOCUMENT_TYPE["googledrive"]) unique_identifier_hash = generate_unique_identifier_hash( document_type, f"drive_{file_id}", search_space_id ) - # Check if document exists + # Check if document exists by unique identifier (same connector, same file) existing_document = await check_document_by_unique_identifier( session, unique_identifier_hash ) From c125c9e87f24dd1965a925f355f3b4fd8ca95755 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 28 Jan 2026 09:10:37 +0530 Subject: [PATCH 52/69] chore: ran backend linting --- .../composio_google_drive_connector.py | 41 ++++++++++--------- surfsense_backend/app/routes/rbac_routes.py | 4 +- .../routes/search_source_connectors_routes.py | 2 +- .../app/services/notification_service.py | 8 +++- 4 files changed, 32 insertions(+), 23 deletions(-) diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py index 912f63d54..5b8c4b993 100644 --- a/surfsense_backend/app/connectors/composio_google_drive_connector.py +++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py @@ -470,7 +470,7 @@ async def check_document_by_content_hash( session: AsyncSession, content_hash: str ) -> Document | None: """Check if a document with the given content hash already exists. - + This is used to prevent duplicate content from being indexed, regardless of which connector originally indexed it. """ @@ -486,11 +486,11 @@ async def check_document_by_google_drive_file_id( session: AsyncSession, file_id: str, search_space_id: int ) -> Document | None: """Check if a document with this Google Drive file ID exists (from any connector). - + This checks both metadata key formats: - 'google_drive_file_id' (normal Google Drive connector) - 'file_id' (Composio Google Drive connector) - + This allows detecting duplicates BEFORE downloading/ETL, saving expensive API calls. """ from sqlalchemy import String, cast, or_ @@ -505,10 +505,11 @@ async def check_document_by_google_drive_file_id( Document.search_space_id == search_space_id, or_( # Normal Google Drive connector format - cast(Document.document_metadata["google_drive_file_id"], String) == quoted_file_id, + cast(Document.document_metadata["google_drive_file_id"], String) + == quoted_file_id, # Composio Google Drive connector format cast(Document.document_metadata["file_id"], String) == quoted_file_id, - ) + ), ) ) return existing_doc_result.scalars().first() @@ -533,15 +534,15 @@ def generate_indexing_settings_hash( indexing_options: dict, ) -> str: """Generate a hash of indexing settings to detect configuration changes. - + This hash is used to determine if indexing settings have changed since the last index, which would require a full re-scan instead of delta sync. - + Args: selected_folders: List of {id, name} for folders to index selected_files: List of {id, name} for individual files to index indexing_options: Dict with max_files_per_folder, include_subfolders, etc. - + Returns: MD5 hash string of the settings """ @@ -566,7 +567,7 @@ async def index_composio_google_drive( max_items: int = 1000, ) -> tuple[int, int, str | None]: """Index Google Drive files via Composio with delta sync support. - + Returns: Tuple of (documents_indexed, documents_skipped, error_message or None) @@ -598,13 +599,13 @@ async def index_composio_google_drive( selected_folders, selected_files, indexing_options ) last_settings_hash = connector_config.get("last_indexed_settings_hash") - + # Detect if settings changed since last index settings_changed = ( - last_settings_hash is not None and - current_settings_hash != last_settings_hash + last_settings_hash is not None + and current_settings_hash != last_settings_hash ) - + if settings_changed: logger.info( f"Indexing settings changed for connector {connector_id}. " @@ -613,17 +614,17 @@ async def index_composio_google_drive( # Check for stored page token for delta sync stored_page_token = connector_config.get("drive_page_token") - + # Determine whether to use delta sync: # - Must have a stored page token # - Must have been indexed before (last_indexed_at exists) # - User must have incremental_sync enabled # - Settings must not have changed (folder/subfolder config) use_delta_sync = ( - incremental_sync and - stored_page_token and - connector.last_indexed_at and - not settings_changed + incremental_sync + and stored_page_token + and connector.last_indexed_at + and not settings_changed ) # Route to delta sync or full scan @@ -1177,7 +1178,9 @@ async def _process_single_drive_file( # Check if content_hash already exists (from any connector) # This prevents duplicate content and avoids IntegrityError on unique constraint - existing_by_content_hash = await check_document_by_content_hash(session, content_hash) + existing_by_content_hash = await check_document_by_content_hash( + session, content_hash + ) if existing_by_content_hash: logger.info( f"Skipping file {file_name} (file_id={file_id}): identical content " diff --git a/surfsense_backend/app/routes/rbac_routes.py b/surfsense_backend/app/routes/rbac_routes.py index 5070a2724..7d2cc5c77 100644 --- a/surfsense_backend/app/routes/rbac_routes.py +++ b/surfsense_backend/app/routes/rbac_routes.py @@ -123,7 +123,9 @@ async def list_all_permissions( for perm in Permission: # Extract category from permission value (e.g., "documents:read" -> "documents") category = perm.value.split(":")[0] if ":" in perm.value else "general" - description = PERMISSION_DESCRIPTIONS.get(perm.value, f"Permission for {perm.value}") + description = PERMISSION_DESCRIPTIONS.get( + perm.value, f"Permission for {perm.value}" + ) permissions.append( PermissionInfo( diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 9815ad827..2237ddfa8 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -1190,7 +1190,7 @@ async def _run_indexing_with_notifications( end_date=end_date, update_last_indexed=False, ) - + # Handle both 2-tuple and 3-tuple returns for backwards compatibility if len(result) == 3: documents_processed, documents_skipped, error_or_warning = result diff --git a/surfsense_backend/app/services/notification_service.py b/surfsense_backend/app/services/notification_service.py index ab0fcbfd4..34acbad88 100644 --- a/surfsense_backend/app/services/notification_service.py +++ b/surfsense_backend/app/services/notification_service.py @@ -360,7 +360,9 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler): skipped_text = "" if skipped_count and skipped_count > 0: skipped_item_text = "item" if skipped_count == 1 else "items" - skipped_text = f" ({skipped_count} {skipped_item_text} skipped - already indexed)" + skipped_text = ( + f" ({skipped_count} {skipped_item_text} skipped - already indexed)" + ) # If there's an error message but items were indexed, treat it as a warning (partial success) # If is_warning is True, treat it as success even with 0 items (e.g., duplicates found) @@ -392,7 +394,9 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler): message = "Already up to date! No new items to sync." else: item_text = "item" if indexed_count == 1 else "items" - message = f"Now searchable! {indexed_count} {item_text} synced{skipped_text}." + message = ( + f"Now searchable! {indexed_count} {item_text} synced{skipped_text}." + ) status = "completed" metadata_updates = { From 5eca07f24fc77c00cb0402b945160fbec33a4fd6 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Tue, 27 Jan 2026 23:14:04 -0800 Subject: [PATCH 53/69] chore: updated lock file --- surfsense_web/pnpm-lock.yaml | 52 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/surfsense_web/pnpm-lock.yaml b/surfsense_web/pnpm-lock.yaml index 0461da320..903773831 100644 --- a/surfsense_web/pnpm-lock.yaml +++ b/surfsense_web/pnpm-lock.yaml @@ -52,7 +52,7 @@ importers: version: 0.5.10(react-dom@19.2.3(react@19.2.3))(react@19.2.3) '@posthog/react': specifier: ^1.7.0 - version: 1.7.0(@types/react@19.2.7)(posthog-js@1.335.3)(react@19.2.3) + version: 1.7.0(@types/react@19.2.7)(posthog-js@1.335.5)(react@19.2.3) '@radix-ui/react-accordion': specifier: ^1.2.11 version: 1.2.12(@types/react-dom@19.2.3(@types/react@19.2.7))(@types/react@19.2.7)(react-dom@19.2.3(react@19.2.3))(react@19.2.3) @@ -204,11 +204,11 @@ importers: specifier: ^3.4.7 version: 3.4.7 posthog-js: - specifier: ^1.335.3 - version: 1.335.3 + specifier: ^1.335.5 + version: 1.335.5 posthog-node: - specifier: ^5.24.2 - version: 5.24.2 + specifier: ^5.24.3 + version: 5.24.3 react: specifier: ^19.2.3 version: 19.2.3 @@ -1605,8 +1605,8 @@ packages: resolution: {integrity: sha512-dfUnCxiN9H4ap84DvD2ubjw+3vUNpstxa0TneY/Paat8a3R4uQZDLSvWjmznAY/DoahqTHl9V46HF/Zs3F29pg==} engines: {node: '>= 10.0.0'} - '@posthog/core@1.14.0': - resolution: {integrity: sha512-havjGYHwL8Gy6LXIR911h+M/sYlJLQbepxP/cc1M7Cp3v8F92bzpqkbuvUIUyb7/izkxfGwc9wMqKAo0QxMTrg==} + '@posthog/core@1.14.1': + resolution: {integrity: sha512-DtmJ1y1IDauX8yAZtIotRAYDRkgCCMLk5S9vFFRX7vufhWblQuRUOgn9WYSJrocJlZKm1aEjDzGQ0uyL7HcdLw==} '@posthog/react@1.7.0': resolution: {integrity: sha512-pM7GL7z/rKjiIwosbRiQA3buhLI6vUo+wg+T/ZrVZC7O5bVU07TfgNZTcuOj8E9dx7vDbfNrc1kjDN7PKMM8ug==} @@ -1618,8 +1618,8 @@ packages: '@types/react': optional: true - '@posthog/types@1.335.3': - resolution: {integrity: sha512-mReFmfI+ep5sH3cnFhjvWfOcl3j6olKpN5lHFbOomLGxYTHMXcyMUBE3/o8WfrAgR1qxKQUsWMNcv6BhLr/GKA==} + '@posthog/types@1.335.5': + resolution: {integrity: sha512-QYj5c8wSaXGvV4ugEN65GHD0sIXRveGiZxV4tqpyoP7YIAvAwwA0do0yNfTrEjDXucCQn25pMbCqO25hJrMi5w==} '@prisma/client@4.8.1': resolution: {integrity: sha512-d4xhZhETmeXK/yZ7K0KcVOzEfI5YKGGEr4F5SBV04/MU4ncN/HcE28sy3e4Yt8UFW0ZuImKFQJE+9rWt9WbGSQ==} @@ -5601,15 +5601,15 @@ packages: resolution: {integrity: sha512-Jtc2612XINuBjIl/QTWsV5UvE8UHuNblcO3vVADSrKsrc6RqGX6lOW1cEo3CM2v0XG4Nat8nI+YM7/f26VxXLw==} engines: {node: '>=12'} - posthog-js@1.335.3: - resolution: {integrity: sha512-ZQg3ozgsPom+SZtAxMN97Zx9Vqkdsv1D4TZU/OqbAZdm27PswV6+ShBurm3nKm9jrlUU1cGHMRn2ZJZf249znQ==} + posthog-js@1.335.5: + resolution: {integrity: sha512-1zCEdn7bc1mQ/jpd62YY8U1CyNiftIBE6uKqE2L+mjZ5aJyB2rtUAXefaTbaR/3A98tItjSej4aIa8FBN+O1fw==} - posthog-node@5.24.2: - resolution: {integrity: sha512-cywIUYtSIC9BilgLlZd1R2xNk6omKL6tywG/SCPmUJKeG2jhjvJHSrHXYx4x3uQsUjn8aB9UVI8km+W326Zm8g==} + posthog-node@5.24.3: + resolution: {integrity: sha512-RpjccR8k/GHjtIzRbtlS/Ipw+GvJLJCicJW6L4IZm7gXXNjdyW26x0ba0kvUtWS6mcgx8EBbgXERd5eNsXSjlQ==} engines: {node: ^20.20.0 || >=22.22.0} - preact@10.28.1: - resolution: {integrity: sha512-u1/ixq/lVQI0CakKNvLDEcW5zfCjUQfZdK9qqWuIJtsezuyG6pk9TWj75GMuI/EzRSZB/VAE43sNWWZfiy8psw==} + preact@10.28.2: + resolution: {integrity: sha512-lbteaWGzGHdlIuiJ0l2Jq454m6kcpI1zNje6d8MlGAFlYvP2GO4ibnat7P74Esfz4sPTdM6UxtTwh/d3pwM9JA==} prebuild-install@7.1.3: resolution: {integrity: sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==} @@ -7860,18 +7860,18 @@ snapshots: '@parcel/watcher-win32-ia32': 2.5.1 '@parcel/watcher-win32-x64': 2.5.1 - '@posthog/core@1.14.0': + '@posthog/core@1.14.1': dependencies: cross-spawn: 7.0.6 - '@posthog/react@1.7.0(@types/react@19.2.7)(posthog-js@1.335.3)(react@19.2.3)': + '@posthog/react@1.7.0(@types/react@19.2.7)(posthog-js@1.335.5)(react@19.2.3)': dependencies: - posthog-js: 1.335.3 + posthog-js: 1.335.5 react: 19.2.3 optionalDependencies: '@types/react': 19.2.7 - '@posthog/types@1.335.3': {} + '@posthog/types@1.335.5': {} '@prisma/client@4.8.1': dependencies: @@ -12484,27 +12484,27 @@ snapshots: postgres@3.4.7: {} - posthog-js@1.335.3: + posthog-js@1.335.5: dependencies: '@opentelemetry/api': 1.9.0 '@opentelemetry/api-logs': 0.208.0 '@opentelemetry/exporter-logs-otlp-http': 0.208.0(@opentelemetry/api@1.9.0) '@opentelemetry/resources': 2.5.0(@opentelemetry/api@1.9.0) '@opentelemetry/sdk-logs': 0.208.0(@opentelemetry/api@1.9.0) - '@posthog/core': 1.14.0 - '@posthog/types': 1.335.3 + '@posthog/core': 1.14.1 + '@posthog/types': 1.335.5 core-js: 3.47.0 dompurify: 3.3.1 fflate: 0.4.8 - preact: 10.28.1 + preact: 10.28.2 query-selector-shadow-dom: 1.0.1 web-vitals: 5.1.0 - posthog-node@5.24.2: + posthog-node@5.24.3: dependencies: - '@posthog/core': 1.14.0 + '@posthog/core': 1.14.1 - preact@10.28.1: {} + preact@10.28.2: {} prebuild-install@7.1.3: dependencies: From 41ebe162b0b386440683358c61a44de0bce48517 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 28 Jan 2026 17:43:45 +0530 Subject: [PATCH 54/69] feat(backend): Implement handling of unsupported Notion block types and track skipped content, add documentation for it --- .../app/connectors/notion_history.py | 195 +++++++++++++++--- .../connector_indexers/notion_indexer.py | 32 ++- .../views/indexing-configuration-view.tsx | 2 +- .../content/docs/connectors/notion.mdx | 23 +++ 4 files changed, 218 insertions(+), 34 deletions(-) diff --git a/surfsense_backend/app/connectors/notion_history.py b/surfsense_backend/app/connectors/notion_history.py index e38218a6e..a79168fdf 100644 --- a/surfsense_backend/app/connectors/notion_history.py +++ b/surfsense_backend/app/connectors/notion_history.py @@ -1,6 +1,7 @@ import logging from notion_client import AsyncClient +from notion_client.errors import APIResponseError from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select @@ -12,6 +13,17 @@ from app.utils.oauth_security import TokenEncryption logger = logging.getLogger(__name__) +# Known unsupported block types that Notion API doesn't expose +# These will be skipped gracefully instead of failing the entire sync +UNSUPPORTED_BLOCK_TYPE_ERRORS = [ + "transcription is not supported", + "ai_block is not supported", + "is not supported via the API", +] + +# Known unsupported block types to check before API calls +UNSUPPORTED_BLOCK_TYPES = ["transcription", "ai_block"] + class NotionHistoryConnector: def __init__( @@ -32,6 +44,8 @@ class NotionHistoryConnector: self._connector_id = connector_id self._credentials = credentials self._notion_client: AsyncClient | None = None + # Track pages with skipped unsupported content (for user notifications) + self._pages_with_skipped_content: list[str] = [] async def _get_valid_token(self) -> str: """ @@ -163,6 +177,34 @@ class NotionHistoryConnector: await self._notion_client.aclose() self._notion_client = None + def get_pages_with_skipped_content(self) -> list[str]: + """ + Get list of page titles that had unsupported content skipped. + + Returns: + List of page titles with skipped content + """ + return self._pages_with_skipped_content + + def get_skipped_content_count(self) -> int: + """ + Get count of pages that had unsupported content skipped. + + Returns: + Number of pages with skipped content + """ + return len(self._pages_with_skipped_content) + + def _record_skipped_content(self, page_title: str): + """ + Record that a page had unsupported content skipped. + + Args: + page_title: Title of the page with skipped content + """ + if page_title not in self._pages_with_skipped_content: + self._pages_with_skipped_content.append(page_title) + async def __aenter__(self): """Async context manager entry.""" return self @@ -229,14 +271,21 @@ class NotionHistoryConnector: for page in pages: page_id = page["id"] + page_title = self.get_page_title(page) - # Get detailed page information - page_content = await self.get_page_content(page_id) + # Get detailed page information (pass title for skip tracking) + page_content, had_skipped_content = await self.get_page_content( + page_id, page_title + ) + + # Record if this page had skipped content + if had_skipped_content: + self._record_skipped_content(page_title) all_page_data.append( { "page_id": page_id, - "title": self.get_page_title(page), + "title": page_title, "content": page_content, } ) @@ -265,46 +314,85 @@ class NotionHistoryConnector: # If no title found, return the page ID as fallback return f"Untitled page ({page['id']})" - async def get_page_content(self, page_id): + async def get_page_content( + self, page_id: str, page_title: str | None = None + ) -> tuple[list, bool]: """ Fetches the content (blocks) of a specific page. Args: page_id (str): The ID of the page to fetch + page_title (str, optional): Title of the page (for logging) Returns: - list: List of processed blocks from the page + tuple: (List of processed blocks, bool indicating if content was skipped) """ notion = await self._get_client() blocks = [] has_more = True cursor = None + skipped_blocks_count = 0 + had_skipped_content = False # Paginate through all blocks while has_more: - if cursor: - response = await notion.blocks.children.list( - block_id=page_id, start_cursor=cursor - ) - else: - response = await notion.blocks.children.list(block_id=page_id) + try: + if cursor: + response = await notion.blocks.children.list( + block_id=page_id, start_cursor=cursor + ) + else: + response = await notion.blocks.children.list(block_id=page_id) - blocks.extend(response["results"]) - has_more = response["has_more"] + blocks.extend(response["results"]) + has_more = response["has_more"] - if has_more: - cursor = response["next_cursor"] + if has_more: + cursor = response["next_cursor"] + + except APIResponseError as e: + error_message = str(e) + # Check if this is an unsupported block type error + if any( + err in error_message for err in UNSUPPORTED_BLOCK_TYPE_ERRORS + ): + logger.warning( + f"Skipping page blocks due to unsupported block type in page {page_id}: {error_message}" + ) + skipped_blocks_count += 1 + had_skipped_content = True + # If we haven't fetched any blocks yet, return empty + # If we have some blocks, continue with what we have + has_more = False + continue + elif "Could not find block" in error_message: + logger.warning( + f"Block not found in page {page_id}, continuing with available blocks: {error_message}" + ) + has_more = False + continue + # Re-raise other API errors + raise + + if skipped_blocks_count > 0: + logger.info( + f"Page {page_id}: Skipped {skipped_blocks_count} unsupported block sections, " + f"successfully processed {len(blocks)} blocks" + ) # Process nested blocks recursively processed_blocks = [] for block in blocks: - processed_block = await self.process_block(block) - processed_blocks.append(processed_block) + processed_block, block_had_skips = await self.process_block(block) + if processed_block: # Only add if block was processed successfully + processed_blocks.append(processed_block) + if block_had_skips: + had_skipped_content = True - return processed_blocks + return processed_blocks, had_skipped_content - async def process_block(self, block): + async def process_block(self, block) -> tuple[dict | None, bool]: """ Processes a block and recursively fetches any child blocks. @@ -312,12 +400,28 @@ class NotionHistoryConnector: block (dict): The block to process Returns: - dict: Processed block with content and children + tuple: (Processed block dict or None, bool indicating if content was skipped) """ notion = await self._get_client() block_id = block["id"] block_type = block["type"] + had_skipped_content = False + + # Check if this is a known unsupported block type before processing + if block_type in UNSUPPORTED_BLOCK_TYPES: + logger.debug( + f"Skipping unsupported block type: {block_type} (block_id: {block_id})" + ) + return ( + { + "id": block_id, + "type": block_type, + "content": f"[{block_type} block - not supported by Notion API]", + "children": [], + }, + True, # Content was skipped + ) # Extract block content based on its type content = self.extract_block_content(block) @@ -327,17 +431,48 @@ class NotionHistoryConnector: child_blocks = [] if has_children: - # Fetch and process child blocks - children_response = await notion.blocks.children.list(block_id=block_id) - for child_block in children_response["results"]: - child_blocks.append(await self.process_block(child_block)) + try: + # Fetch and process child blocks + children_response = await notion.blocks.children.list( + block_id=block_id + ) + for child_block in children_response["results"]: + processed_child, child_had_skips = await self.process_block( + child_block + ) + if processed_child: + child_blocks.append(processed_child) + if child_had_skips: + had_skipped_content = True + except APIResponseError as e: + error_message = str(e) + # Check if this is an unsupported block type error + if any( + err in error_message for err in UNSUPPORTED_BLOCK_TYPE_ERRORS + ): + logger.warning( + f"Skipping children of block {block_id} due to unsupported block type: {error_message}" + ) + had_skipped_content = True + # Continue without children instead of failing + elif "Could not find block" in error_message: + logger.warning( + f"Block {block_id} children not accessible, skipping: {error_message}" + ) + # Continue without children + else: + # Re-raise other API errors + raise - return { - "id": block_id, - "type": block_type, - "content": content, - "children": child_blocks, - } + return ( + { + "id": block_id, + "type": block_type, + "content": content, + "children": child_blocks, + }, + had_skipped_content, + ) def extract_block_content(self, block): """ diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index 2d36351fa..eee668198 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -156,6 +156,13 @@ async def index_notion_pages( start_date=start_date_iso, end_date=end_date_iso ) logger.info(f"Found {len(pages)} Notion pages") + + # Get count of pages that had unsupported content skipped + pages_with_skipped_content = notion_client.get_skipped_content_count() + if pages_with_skipped_content > 0: + logger.info( + f"{pages_with_skipped_content} pages had Notion AI content skipped (not available via API)" + ) except Exception as e: await task_logger.log_task_failure( log_entry, @@ -437,13 +444,23 @@ async def index_notion_pages( logger.info(f"Final commit: Total {documents_indexed} documents processed") await session.commit() - # Prepare result message + # Get final count of pages with skipped Notion AI content + pages_with_skipped_ai_content = notion_client.get_skipped_content_count() + + # Prepare result message with user-friendly notification about skipped content result_message = None if skipped_pages: result_message = f"Processed {total_processed} pages. Skipped {len(skipped_pages)} pages: {', '.join(skipped_pages)}" else: result_message = f"Processed {total_processed} pages." + # Add user-friendly message about skipped Notion AI content + if pages_with_skipped_ai_content > 0: + result_message += ( + f" Audio transcriptions and AI summaries from Notion aren't accessible " + f"via their API — all other content was saved." + ) + # Log success await task_logger.log_task_success( log_entry, @@ -453,6 +470,7 @@ async def index_notion_pages( "documents_indexed": documents_indexed, "documents_skipped": documents_skipped, "skipped_pages_count": len(skipped_pages), + "pages_with_skipped_ai_content": pages_with_skipped_ai_content, "result_message": result_message, }, ) @@ -464,10 +482,18 @@ async def index_notion_pages( # Clean up the async client await notion_client.close() + # Return user-friendly message about skipped AI content (if any) + # This will be shown in the notification to inform users + user_notification_message = None + if pages_with_skipped_ai_content > 0: + user_notification_message = ( + "Some Notion AI content couldn't be synced (Notion API limitation)" + ) + return ( total_processed, - None, - ) # Return None on success (result_message is for logging only) + user_notification_message, + ) # Return message about skipped AI content if any except SQLAlchemyError as db_error: await session.rollback() diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx index 35815b0b7..72069441a 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx @@ -218,7 +218,7 @@ export const IndexingConfigurationView: FC = ({ {isStartingIndexing ? ( <> - Starting... + Starting ) : ( "Start Indexing" diff --git a/surfsense_web/content/docs/connectors/notion.mdx b/surfsense_web/content/docs/connectors/notion.mdx index 0612c4f4f..6fcda8dae 100644 --- a/surfsense_web/content/docs/connectors/notion.mdx +++ b/surfsense_web/content/docs/connectors/notion.mdx @@ -66,6 +66,29 @@ Click **Save** to apply the capabilities. --- +## Limitations & Unsupported Content + +Notion's API has limitations on certain block types that cannot be retrieved. SurfSense will automatically skip these unsupported blocks and continue syncing all other content. + +### Unsupported Block Types + +The following Notion features are **not accessible via the Notion API** and will be skipped during sync: + +- **Transcription blocks** - Audio/video transcriptions from Notion AI +- **AI blocks** - AI-generated content blocks + +### Learn More + +The Notion API only supports specific block types for retrieval. The official list of **supported block types** is documented in Notion's Block reference: + +- **[Block Object Reference](https://developers.notion.com/reference/block)** - Official documentation listing all supported block types. Any block type not listed here (such as `transcription` and `ai_block`) is not accessible via the Notion API. + +For additional information: +- [Working with Page Content](https://developers.notion.com/docs/working-with-page-content) - Guide on how the Notion API handles page content +- [Notion API Reference](https://developers.notion.com/reference) - Complete API documentation + +--- + ## Running SurfSense with Notion Connector Add the Notion environment variables to your Docker run command: From a9d393327d6751db8a2b88e10aa50d6a0afa8fbd Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 28 Jan 2026 14:51:54 +0200 Subject: [PATCH 55/69] fix(backend): Add duplicate content_hash check to connector indexers Prevent UniqueViolationError on ix_documents_content_hash constraint by adding check_duplicate_document_by_hash() before inserting new documents in 15 connector indexers that were missing this check. Affected: clickup, luma, linear, jira, google_gmail, confluence, bookstack, github, webcrawler, teams, slack, notion, discord, airtable, obsidian indexers. --- .../connector_indexers/airtable_indexer.py | 19 ++++++++++++++++ .../connector_indexers/bookstack_indexer.py | 17 ++++++++++++++ .../connector_indexers/clickup_indexer.py | 17 ++++++++++++++ .../connector_indexers/confluence_indexer.py | 17 ++++++++++++++ .../connector_indexers/discord_indexer.py | 19 ++++++++++++++++ .../connector_indexers/github_indexer.py | 16 ++++++++++++++ .../google_gmail_indexer.py | 17 ++++++++++++++ .../tasks/connector_indexers/jira_indexer.py | 17 ++++++++++++++ .../connector_indexers/linear_indexer.py | 17 ++++++++++++++ .../tasks/connector_indexers/luma_indexer.py | 17 ++++++++++++++ .../connector_indexers/notion_indexer.py | 17 ++++++++++++++ .../connector_indexers/obsidian_indexer.py | 17 ++++++++++++++ .../tasks/connector_indexers/slack_indexer.py | 17 ++++++++++++++ .../tasks/connector_indexers/teams_indexer.py | 22 +++++++++++++++++++ .../connector_indexers/webcrawler_indexer.py | 17 ++++++++++++++ 15 files changed, 263 insertions(+) diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py index 4d5a33b79..6bb62d716 100644 --- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py @@ -20,6 +20,7 @@ from app.utils.document_converters import ( from .base import ( calculate_date_range, check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -317,6 +318,24 @@ async def index_airtable_records( ) continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = ( + await check_duplicate_document_by_hash( + session, content_hash + ) + ) + + if duplicate_by_content: + logger.info( + f"Airtable record {record_id} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Generate document summary user_llm = await get_user_long_context_llm( diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py index a1067255d..e183ab333 100644 --- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py @@ -22,6 +22,7 @@ from app.utils.document_converters import ( from .base import ( calculate_date_range, check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -308,6 +309,22 @@ async def index_bookstack_pages( logger.info(f"Successfully updated BookStack page {page_name}") continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"BookStack page {page_name} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Generate summary with metadata user_llm = await get_user_long_context_llm( diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py index e459584f8..887c3e2e5 100644 --- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py @@ -22,6 +22,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -302,6 +303,22 @@ async def index_clickup_tasks( ) continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"ClickUp task {task_name} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Generate summary with metadata user_llm = await get_user_long_context_llm( diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index ddbefafb9..5673839bb 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -23,6 +23,7 @@ from app.utils.document_converters import ( from .base import ( calculate_date_range, check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -306,6 +307,22 @@ async def index_confluence_pages( ) continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"Confluence page {page_title} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Generate summary with metadata user_llm = await get_user_long_context_llm( diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py index 8f0c76e53..9e401b335 100644 --- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py @@ -21,6 +21,7 @@ from app.utils.document_converters import ( from .base import ( build_document_metadata_markdown, check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -454,6 +455,24 @@ async def index_discord_messages( ) continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = ( + await check_duplicate_document_by_hash( + session, content_hash + ) + ) + + if duplicate_by_content: + logger.info( + f"Discord message {msg_id} in {guild_name}#{channel_name} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Process chunks chunks = await create_document_chunks( diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py index 4a8df4918..fb6989bb9 100644 --- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py @@ -24,6 +24,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -319,6 +320,21 @@ async def _process_repository_digest( # Delete existing document to replace with new one await session.delete(existing_document) await session.flush() + else: + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"Repository {repo_full_name} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + return 0 # Generate summary using LLM (ONE call per repository!) user_llm = await get_user_long_context_llm(session, user_id, search_space_id) diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index 08d2904d6..e832997d0 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -25,6 +25,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -316,6 +317,22 @@ async def index_google_gmail_messages( logger.info(f"Successfully updated Gmail message {subject}") continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"Gmail message {subject} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Generate summary with metadata user_llm = await get_user_long_context_llm( diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py index 4851a6466..d6095d20e 100644 --- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py @@ -23,6 +23,7 @@ from app.utils.document_converters import ( from .base import ( calculate_date_range, check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -284,6 +285,22 @@ async def index_jira_issues( ) continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"Jira issue {issue_identifier} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Generate summary with metadata user_llm = await get_user_long_context_llm( diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py index 7d8e0c30e..d00a39160 100644 --- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py @@ -22,6 +22,7 @@ from app.utils.document_converters import ( from .base import ( calculate_date_range, check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -315,6 +316,22 @@ async def index_linear_issues( ) continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"Linear issue {issue_identifier} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Generate summary with metadata user_llm = await get_user_long_context_llm( diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index ead259a44..59890dbe4 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -21,6 +21,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -363,6 +364,22 @@ async def index_luma_events( logger.info(f"Successfully updated Luma event {event_name}") continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"Luma event {event_name} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Generate summary with metadata user_llm = await get_user_long_context_llm( diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index 2d36351fa..70c4917da 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -22,6 +22,7 @@ from .base import ( build_document_metadata_string, calculate_date_range, check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -360,6 +361,22 @@ async def index_notion_pages( continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"Notion page {page_title} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Get user's long context LLM user_llm = await get_user_long_context_llm( diff --git a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py index 4c4dab4c2..a603d3fba 100644 --- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py @@ -28,6 +28,7 @@ from app.utils.document_converters import ( from .base import ( build_document_metadata_string, check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -426,6 +427,22 @@ async def index_obsidian_vault( indexed_count += 1 else: + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"Obsidian note {title} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + skipped_count += 1 + continue + # Create new document logger.info(f"Indexing new note: {title}") diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py index f6ed4f567..f244c97f8 100644 --- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py @@ -22,6 +22,7 @@ from .base import ( build_document_metadata_markdown, calculate_date_range, check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -325,6 +326,22 @@ async def index_slack_messages( logger.info(f"Successfully updated Slack message {msg_ts}") continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"Slack message {msg_ts} in channel {channel_name} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Process chunks chunks = await create_document_chunks(combined_document_string) diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py index b879ddfcb..66b709ddc 100644 --- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py @@ -21,6 +21,7 @@ from .base import ( build_document_metadata_markdown, calculate_date_range, check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -354,6 +355,27 @@ async def index_teams_messages( ) continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = ( + await check_duplicate_document_by_hash( + session, content_hash + ) + ) + + if duplicate_by_content: + logger.info( + "Teams message %s in channel %s already indexed by another connector " + "(existing document ID: %s, type: %s). Skipping.", + message_id, + channel_name, + duplicate_by_content.id, + duplicate_by_content.document_type, + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Process chunks chunks = await create_document_chunks( diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py index fb1aae5f2..6ae070c06 100644 --- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py @@ -21,6 +21,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, + check_duplicate_document_by_hash, get_connector_by_id, get_current_timestamp, logger, @@ -281,6 +282,22 @@ async def index_crawled_urls( logger.info(f"Successfully updated URL {url}") continue + # Document doesn't exist by unique_identifier_hash + # Check if a document with the same content_hash exists (from another connector) + with session.no_autoflush: + duplicate_by_content = await check_duplicate_document_by_hash( + session, content_hash + ) + + if duplicate_by_content: + logger.info( + f"URL {url} already indexed by another connector " + f"(existing document ID: {duplicate_by_content.id}, " + f"type: {duplicate_by_content.document_type}). Skipping." + ) + documents_skipped += 1 + continue + # Document doesn't exist - create new one # Generate summary with metadata user_llm = await get_user_long_context_llm( From 33316fa6db417c64bddb7025349757e9b004100c Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 28 Jan 2026 18:36:42 +0530 Subject: [PATCH 56/69] feat(backend): Add retry logic for Notion API calls with user notifications on rate limits and errors --- .../app/connectors/notion_history.py | 194 +++++++++++++++++- .../routes/search_source_connectors_routes.py | 56 ++++- .../app/services/notification_service.py | 86 ++++++++ .../connector_indexers/notion_indexer.py | 13 ++ 4 files changed, 330 insertions(+), 19 deletions(-) diff --git a/surfsense_backend/app/connectors/notion_history.py b/surfsense_backend/app/connectors/notion_history.py index a79168fdf..def86d721 100644 --- a/surfsense_backend/app/connectors/notion_history.py +++ b/surfsense_backend/app/connectors/notion_history.py @@ -1,4 +1,7 @@ +import asyncio import logging +from collections.abc import Awaitable, Callable +from typing import Any, TypeVar from notion_client import AsyncClient from notion_client.errors import APIResponseError @@ -13,6 +16,32 @@ from app.utils.oauth_security import TokenEncryption logger = logging.getLogger(__name__) +# Type variable for generic return type +T = TypeVar("T") + +# ============================================================================ +# Retry Configuration (per Notion API docs) +# https://developers.notion.com/reference/request-limits +# https://developers.notion.com/reference/status-codes +# ============================================================================ +MAX_RETRIES = 5 +BASE_RETRY_DELAY = 1.0 # seconds +MAX_RETRY_DELAY = 60.0 # seconds (Notion's max request timeout) + +# Type alias for retry callback function +# Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds) -> None +# retry_reason: 'rate_limit', 'server_error', 'timeout' +# This callback can be used to update notifications during retries +RetryCallbackType = Callable[[str, int, int, float], Awaitable[None]] + +# HTTP status codes that should trigger a retry +# 429: rate_limited - Use Retry-After header +# 500: internal_server_error - Unexpected error +# 502: bad_gateway - Failed upstream connection +# 503: service_unavailable - Notion unavailable or timeout +# 504: gateway_timeout - Notion timed out +RETRYABLE_STATUS_CODES = frozenset({429, 500, 502, 503, 504}) + # Known unsupported block types that Notion API doesn't expose # These will be skipped gracefully instead of failing the entire sync UNSUPPORTED_BLOCK_TYPE_ERRORS = [ @@ -46,6 +75,24 @@ class NotionHistoryConnector: self._notion_client: AsyncClient | None = None # Track pages with skipped unsupported content (for user notifications) self._pages_with_skipped_content: list[str] = [] + # Optional callback to notify about retry progress (for user notifications) + self._on_retry_callback: RetryCallbackType | None = None + + def set_retry_callback(self, callback: RetryCallbackType | None) -> None: + """ + Set a callback function to be called when API calls are retried. + + This allows the indexer to receive notifications about rate limits + and other transient errors, which can be used to update user-facing + notifications. + + Args: + callback: Async function with signature: + callback(retry_reason, attempt, max_attempts, wait_seconds) -> None + retry_reason: 'rate_limit', 'server_error', or 'timeout' + Set to None to disable callbacks. + """ + self._on_retry_callback = callback async def _get_valid_token(self) -> str: """ @@ -171,6 +218,120 @@ class NotionHistoryConnector: self._notion_client = AsyncClient(auth=token) return self._notion_client + async def _api_call_with_retry( + self, + api_func: Callable[..., Awaitable[T]], + *args: Any, + on_retry: RetryCallbackType | None = None, + **kwargs: Any, + ) -> T: + """ + Execute Notion API call with retry logic and exponential backoff. + + Handles retryable errors per Notion API documentation: + - 429 rate_limited: Uses Retry-After header value + - 500 internal_server_error: Retries with exponential backoff + - 502 bad_gateway: Retries with exponential backoff + - 503 service_unavailable: Retries with exponential backoff + - 504 gateway_timeout: Retries with exponential backoff + + Args: + api_func: The async Notion API function to call + *args: Positional arguments to pass to the API function + on_retry: Optional callback to notify about retry progress. + Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds) + retry_reason is one of: 'rate_limit', 'server_error', 'timeout' + **kwargs: Keyword arguments to pass to the API function + + Returns: + The result from the API call + + Raises: + APIResponseError: If all retries are exhausted or error is not retryable + """ + last_exception: APIResponseError | None = None + retry_delay = BASE_RETRY_DELAY + + for attempt in range(MAX_RETRIES): + try: + return await api_func(*args, **kwargs) + + except APIResponseError as e: + last_exception = e + + # Check if this error is retryable + if e.status not in RETRYABLE_STATUS_CODES: + # Not retryable (e.g., 400, 401, 403, 404) - raise immediately + raise + + # Check if we've exhausted retries + if attempt == MAX_RETRIES - 1: + logger.error( + f"Notion API call failed after {MAX_RETRIES} retries. " + f"Last error: {e.status} {e.code}" + ) + raise + + # Determine retry reason and wait time based on status code + if e.status == 429: + # Rate limited - use Retry-After header if available + retry_reason = "rate_limit" + retry_after = e.headers.get("Retry-After") if e.headers else None + if retry_after: + try: + wait_time = float(retry_after) + except (ValueError, TypeError): + wait_time = retry_delay + else: + wait_time = retry_delay + logger.warning( + f"Notion API rate limited (429). " + f"Waiting {wait_time}s. Attempt {attempt + 1}/{MAX_RETRIES}" + ) + elif e.status == 504: + # Gateway timeout + retry_reason = "timeout" + wait_time = min(retry_delay, MAX_RETRY_DELAY) + logger.warning( + f"Notion API timeout ({e.status}). " + f"Retrying in {wait_time}s. Attempt {attempt + 1}/{MAX_RETRIES}" + ) + else: + # Server error (500/502/503) - use exponential backoff + retry_reason = "server_error" + wait_time = min(retry_delay, MAX_RETRY_DELAY) + logger.warning( + f"Notion API error {e.status} ({e.code}). " + f"Retrying in {wait_time}s. Attempt {attempt + 1}/{MAX_RETRIES}" + ) + + # Notify about retry via callback (for user notifications) + # Call before sleeping so user sees the message while we wait + if on_retry: + try: + await on_retry( + retry_reason, + attempt + 1, # 1-based for display + MAX_RETRIES, + wait_time, + ) + except Exception as callback_error: + # Don't let callback errors break the retry logic + logger.warning( + f"Retry callback failed: {callback_error}" + ) + + # Wait before retrying + await asyncio.sleep(wait_time) + + # Exponential backoff for next attempt + retry_delay = min(retry_delay * 2, MAX_RETRY_DELAY) + + # This should not be reached, but just in case + if last_exception: + raise last_exception + raise RuntimeError("Unexpected state in retry logic") + async def close(self): """Close the async client connection.""" if self._notion_client: @@ -228,7 +389,7 @@ class NotionHistoryConnector: # Build the filter for the search # Note: Notion API requires specific filter structure - search_params = {} + search_params: dict[str, Any] = {} # Filter for pages only (not databases) search_params["filter"] = {"value": "page", "property": "object"} @@ -259,7 +420,10 @@ class NotionHistoryConnector: if cursor: search_params["start_cursor"] = cursor - search_results = await notion.search(**search_params) + # Use retry wrapper for search API call + search_results = await self._api_call_with_retry( + notion.search, on_retry=self._on_retry_callback, **search_params + ) pages.extend(search_results["results"]) has_more = search_results.get("has_more", False) @@ -338,12 +502,20 @@ class NotionHistoryConnector: # Paginate through all blocks while has_more: try: + # Use retry wrapper for blocks.children.list API call if cursor: - response = await notion.blocks.children.list( - block_id=page_id, start_cursor=cursor + response = await self._api_call_with_retry( + notion.blocks.children.list, + on_retry=self._on_retry_callback, + block_id=page_id, + start_cursor=cursor, ) else: - response = await notion.blocks.children.list(block_id=page_id) + response = await self._api_call_with_retry( + notion.blocks.children.list, + on_retry=self._on_retry_callback, + block_id=page_id, + ) blocks.extend(response["results"]) has_more = response["has_more"] @@ -372,7 +544,7 @@ class NotionHistoryConnector: ) has_more = False continue - # Re-raise other API errors + # Re-raise other API errors (after retry exhaustion) raise if skipped_blocks_count > 0: @@ -432,9 +604,11 @@ class NotionHistoryConnector: if has_children: try: - # Fetch and process child blocks - children_response = await notion.blocks.children.list( - block_id=block_id + # Use retry wrapper for blocks.children.list API call + children_response = await self._api_call_with_retry( + notion.blocks.children.list, + on_retry=self._on_retry_callback, + block_id=block_id, ) for child_block in children_response["results"]: processed_child, child_had_skips = await self.process_block( @@ -461,7 +635,7 @@ class NotionHistoryConnector: ) # Continue without children else: - # Re-raise other API errors + # Re-raise other API errors (after retry exhaustion) raise return ( diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 191c6f954..535f579a5 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -1129,6 +1129,7 @@ async def _run_indexing_with_notifications( end_date: str, indexing_function, update_timestamp_func=None, + supports_retry_callback: bool = False, ): """ Generic helper to run indexing with real-time notifications. @@ -1142,10 +1143,14 @@ async def _run_indexing_with_notifications( end_date: End date for indexing indexing_function: Async function that performs the indexing update_timestamp_func: Optional function to update connector timestamp + supports_retry_callback: Whether the indexing function supports on_retry_callback """ from uuid import UUID notification = None + # Track indexed count for retry notifications + current_indexed_count = 0 + try: # Get connector info for notification connector_result = await session.execute( @@ -1179,16 +1184,47 @@ async def _run_indexing_with_notifications( stage="fetching", ) + # Create retry callback for connectors that support it + async def on_retry_callback( + retry_reason: str, attempt: int, max_attempts: int, wait_seconds: float + ) -> None: + """Callback to update notification during API retries (rate limits, etc.)""" + nonlocal notification + if notification: + try: + await session.refresh(notification) + await NotificationService.connector_indexing.notify_retry_progress( + session=session, + notification=notification, + indexed_count=current_indexed_count, + retry_reason=retry_reason, + attempt=attempt, + max_attempts=max_attempts, + wait_seconds=wait_seconds, + ) + await session.commit() + except Exception as e: + # Don't let notification errors break the indexing + logger.warning(f"Failed to update retry notification: {e}") + + # Build kwargs for indexing function + indexing_kwargs = { + "session": session, + "connector_id": connector_id, + "search_space_id": search_space_id, + "user_id": user_id, + "start_date": start_date, + "end_date": end_date, + "update_last_indexed": False, + } + + # Add retry callback for connectors that support it + if supports_retry_callback: + indexing_kwargs["on_retry_callback"] = on_retry_callback + # Run the indexing function - documents_processed, error_or_warning = await indexing_function( - session=session, - connector_id=connector_id, - search_space_id=search_space_id, - user_id=user_id, - start_date=start_date, - end_date=end_date, - update_last_indexed=False, - ) + documents_processed, error_or_warning = await indexing_function(**indexing_kwargs) + current_indexed_count = documents_processed # Update connector timestamp if function provided and indexing was successful if documents_processed > 0 and update_timestamp_func: @@ -1362,6 +1398,7 @@ async def run_notion_indexing_with_new_session( end_date=end_date, indexing_function=index_notion_pages, update_timestamp_func=_update_connector_timestamp_by_id, + supports_retry_callback=True, # Notion connector supports retry notifications ) @@ -1393,6 +1430,7 @@ async def run_notion_indexing( end_date=end_date, indexing_function=index_notion_pages, update_timestamp_func=_update_connector_timestamp_by_id, + supports_retry_callback=True, # Notion connector supports retry notifications ) diff --git a/surfsense_backend/app/services/notification_service.py b/surfsense_backend/app/services/notification_service.py index 04f39d8ef..6a3db566b 100644 --- a/surfsense_backend/app/services/notification_service.py +++ b/surfsense_backend/app/services/notification_service.py @@ -329,6 +329,92 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler): metadata_updates=metadata_updates, ) + async def notify_retry_progress( + self, + session: AsyncSession, + notification: Notification, + indexed_count: int, + retry_reason: str, + attempt: int, + max_attempts: int, + wait_seconds: float | None = None, + service_name: str | None = None, + ) -> Notification: + """ + Update notification when a connector is retrying due to rate limits or errors. + + This method provides user-friendly feedback when external service limitations + (rate limits, temporary outages) cause delays. Users see that the delay is + not our fault and the sync is still progressing. + + This method can be used by ANY connector (Notion, Slack, Airtable, etc.) + when they hit rate limits or transient errors. + + Args: + session: Database session + notification: Notification to update + indexed_count: Number of items indexed so far + retry_reason: Reason for retry ('rate_limit', 'server_error', 'timeout') + attempt: Current retry attempt number (1-based) + max_attempts: Maximum number of retry attempts + wait_seconds: Seconds to wait before retry (optional, for display) + service_name: Name of the external service (e.g., 'Notion', 'Slack') + If not provided, extracts from notification metadata + + Returns: + Updated notification + """ + # Get service name from notification if not provided + if not service_name: + service_name = notification.notification_metadata.get( + "connector_name", "Service" + ) + # Extract just the service name if it's "Notion - My Workspace" + if " - " in service_name: + service_name = service_name.split(" - ")[0] + + # User-friendly messages for different retry reasons + # These make it clear the delay is due to the external service, not SurfSense + retry_messages = { + "rate_limit": f"{service_name} rate limit reached", + "server_error": f"{service_name} is slow to respond", + "timeout": f"{service_name} took too long", + "temporary_error": f"{service_name} temporarily unavailable", + } + + base_message = retry_messages.get( + retry_reason, f"Waiting for {service_name}" + ) + + # Add wait time and progress info + if wait_seconds and wait_seconds > 5: + # Only show wait time if it's significant + message = f"{base_message}. Retrying in {int(wait_seconds)}s..." + else: + message = f"{base_message}. Retrying..." + + # Add progress count if we have any + if indexed_count > 0: + item_text = "item" if indexed_count == 1 else "items" + message = f"{message} ({indexed_count} {item_text} synced so far)" + + metadata_updates = { + "indexed_count": indexed_count, + "sync_stage": "waiting_retry", + "retry_attempt": attempt, + "retry_max_attempts": max_attempts, + "retry_reason": retry_reason, + "retry_wait_seconds": wait_seconds, + } + + return await self.update_notification( + session=session, + notification=notification, + message=message, + status="in_progress", + metadata_updates=metadata_updates, + ) + async def notify_indexing_completed( self, session: AsyncSession, diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index eee668198..b2ab37685 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -2,6 +2,7 @@ Notion connector indexer. """ +from collections.abc import Awaitable, Callable from datetime import datetime from sqlalchemy.exc import SQLAlchemyError @@ -28,6 +29,10 @@ from .base import ( update_connector_last_indexed, ) +# Type alias for retry callback +# Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds) -> None +RetryCallbackType = Callable[[str, int, int, float], Awaitable[None]] + async def index_notion_pages( session: AsyncSession, @@ -37,6 +42,7 @@ async def index_notion_pages( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_retry_callback: RetryCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Notion pages from all accessible pages. @@ -49,6 +55,9 @@ async def index_notion_pages( start_date: Start date for indexing (YYYY-MM-DD format) end_date: End date for indexing (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_retry_callback: Optional callback for retry progress notifications. + Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds) + retry_reason is one of: 'rate_limit', 'server_error', 'timeout' Returns: Tuple containing (number of documents indexed, error message or None) @@ -138,6 +147,10 @@ async def index_notion_pages( session=session, connector_id=connector_id ) + # Set retry callback if provided (for user notifications during rate limits) + if on_retry_callback: + notion_client.set_retry_callback(on_retry_callback) + logger.info(f"Fetching Notion pages from {start_date_iso} to {end_date_iso}") await task_logger.log_task_progress( From 4f7ed8439f648001c4fb430b78b7905e3f73dea4 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 28 Jan 2026 15:20:07 +0200 Subject: [PATCH 57/69] fix(backend): Use calculate_date_range for Gmail indexer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gmail indexer was using a hardcoded 30-day default instead of respecting last_indexed_at like other connectors. Now uses calculate_date_range() for consistent behavior (last_indexed_at → now, or 365 days for first run). --- .../connector_indexers/google_gmail_indexer.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index e832997d0..e599abd22 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -24,6 +24,7 @@ from app.utils.document_converters import ( ) from .base import ( + calculate_date_range, check_document_by_unique_identifier, check_duplicate_document_by_hash, get_connector_by_id, @@ -164,10 +165,22 @@ async def index_google_gmail_messages( credentials, session, user_id, connector_id ) + # Calculate date range using last_indexed_at if dates not provided + # This ensures Gmail uses the same date logic as other connectors + # (uses last_indexed_at → now, or 365 days back for first-time indexing) + calculated_start_date, calculated_end_date = calculate_date_range( + connector, start_date, end_date, default_days_back=365 + ) + # Fetch recent Google gmail messages - logger.info(f"Fetching recent emails for connector {connector_id}") + logger.info( + f"Fetching emails for connector {connector_id} " + f"from {calculated_start_date} to {calculated_end_date}" + ) messages, error = await gmail_connector.get_recent_messages( - max_results=max_messages, start_date=start_date, end_date=end_date + max_results=max_messages, + start_date=calculated_start_date, + end_date=calculated_end_date, ) if error: From b3f553802c4347be33e1e9dc71c2162997e5b987 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 28 Jan 2026 18:58:57 +0530 Subject: [PATCH 58/69] fix(backend): Update Notion page indexing log message to clarify sharing requirements and adjust return value for no pages found --- .../app/tasks/connector_indexers/notion_indexer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index b2ab37685..5308720d9 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -190,12 +190,13 @@ async def index_notion_pages( if not pages: await task_logger.log_task_success( log_entry, - f"No Notion pages found for connector {connector_id}", + f"No Notion pages found for connector {connector_id}. " + "Ensure pages are shared with the Notion integration.", {"pages_found": 0}, ) logger.info("No Notion pages found to index") await notion_client.close() - return 0, "No Notion pages found" + return 0, None # Success with 0 pages, not an error # Track the number of documents indexed documents_indexed = 0 From b20fbaca4b008ca484b5bb2978a63ceb1437809b Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 28 Jan 2026 17:54:46 +0200 Subject: [PATCH 59/69] fix: skip webcrawler indexing gracefully when no URLs configured --- .../routes/search_source_connectors_routes.py | 30 +++++++++++++---- .../celery_tasks/schedule_checker_task.py | 33 +++++++++++++++++++ .../connector_indexers/webcrawler_indexer.py | 9 ++--- .../app/utils/periodic_scheduler.py | 16 +++++++++ .../app/utils/webcrawler_utils.py | 29 ++++++++++++++++ 5 files changed, 103 insertions(+), 14 deletions(-) create mode 100644 surfsense_backend/app/utils/webcrawler_utils.py diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 191c6f954..6ba67fb69 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -187,6 +187,7 @@ async def create_search_source_connector( user_id=str(user.id), connector_type=db_connector.connector_type, frequency_minutes=db_connector.indexing_frequency_minutes, + connector_config=db_connector.config, ) if not success: logger.warning( @@ -646,6 +647,7 @@ async def index_connector_content( # Handle different connector types response_message = "" + indexing_started = True # Use UTC for consistency with last_indexed_at storage today_str = datetime.now(UTC).strftime("%Y-%m-%d") @@ -921,14 +923,27 @@ async def index_connector_content( elif connector.connector_type == SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: from app.tasks.celery_tasks.connector_tasks import index_crawled_urls_task + from app.utils.webcrawler_utils import parse_webcrawler_urls - logger.info( - f"Triggering web pages indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}" - ) - index_crawled_urls_task.delay( - connector_id, search_space_id, str(user.id), indexing_from, indexing_to - ) - response_message = "Web page indexing started in the background." + # Check if URLs are configured before triggering indexing + connector_config = connector.config or {} + urls = parse_webcrawler_urls(connector_config.get("INITIAL_URLS")) + + if not urls: + # URLs are optional - skip indexing gracefully + logger.info( + f"Webcrawler connector {connector_id} has no URLs configured, skipping indexing" + ) + response_message = "No URLs configured for this connector. Add URLs in the connector settings to enable indexing." + indexing_started = False + else: + logger.info( + f"Triggering web pages indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}" + ) + index_crawled_urls_task.delay( + connector_id, search_space_id, str(user.id), indexing_from, indexing_to + ) + response_message = "Web page indexing started in the background." elif connector.connector_type == SearchSourceConnectorType.OBSIDIAN_CONNECTOR: from app.config import config as app_config @@ -1025,6 +1040,7 @@ async def index_connector_content( return { "message": response_message, + "indexing_started": indexing_started, "connector_id": connector_id, "search_space_id": search_space_id, "indexing_from": indexing_from, diff --git a/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py b/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py index bf80cbe78..22d45af21 100644 --- a/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py +++ b/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py @@ -156,6 +156,39 @@ async def _check_and_trigger_schedules(): ) await session.commit() continue + + # Special handling for Webcrawler - skip if no URLs configured + elif ( + connector.connector_type + == SearchSourceConnectorType.WEBCRAWLER_CONNECTOR + ): + from app.utils.webcrawler_utils import parse_webcrawler_urls + + connector_config = connector.config or {} + urls = parse_webcrawler_urls(connector_config.get("INITIAL_URLS")) + + if urls: + task.delay( + connector.id, + connector.search_space_id, + str(connector.user_id), + None, # start_date + None, # end_date + ) + else: + # No URLs configured - skip indexing but still update next_scheduled_at + logger.info( + f"Webcrawler connector {connector.id} has no URLs configured, " + "skipping periodic indexing (will check again at next scheduled time)" + ) + from datetime import timedelta + + connector.next_scheduled_at = now + timedelta( + minutes=connector.indexing_frequency_minutes + ) + await session.commit() + continue + else: task.delay( connector.id, diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py index 6ae070c06..0c63fd2f0 100644 --- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py @@ -18,6 +18,7 @@ from app.utils.document_converters import ( generate_document_summary, generate_unique_identifier_hash, ) +from app.utils.webcrawler_utils import parse_webcrawler_urls from .base import ( check_document_by_unique_identifier, @@ -97,13 +98,7 @@ async def index_crawled_urls( api_key = connector.config.get("FIRECRAWL_API_KEY") # Get URLs from connector config - initial_urls = connector.config.get("INITIAL_URLS", "") - if isinstance(initial_urls, str): - urls = [url.strip() for url in initial_urls.split("\n") if url.strip()] - elif isinstance(initial_urls, list): - urls = [url.strip() for url in initial_urls if url.strip()] - else: - urls = [] + urls = parse_webcrawler_urls(connector.config.get("INITIAL_URLS")) logger.info( f"Starting crawled web page indexing for connector {connector_id} with {len(urls)} URLs" diff --git a/surfsense_backend/app/utils/periodic_scheduler.py b/surfsense_backend/app/utils/periodic_scheduler.py index 219641933..aa8c07ce4 100644 --- a/surfsense_backend/app/utils/periodic_scheduler.py +++ b/surfsense_backend/app/utils/periodic_scheduler.py @@ -43,6 +43,7 @@ def create_periodic_schedule( user_id: str, connector_type: SearchSourceConnectorType, frequency_minutes: int, + connector_config: dict | None = None, ) -> bool: """ Trigger the first indexing run immediately when periodic indexing is enabled. @@ -57,11 +58,26 @@ def create_periodic_schedule( user_id: User ID connector_type: Type of connector frequency_minutes: Frequency in minutes (used for logging) + connector_config: Optional connector config dict for validation Returns: True if successful, False otherwise """ try: + # Special handling for connectors that require config validation + if connector_type == SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: + from app.utils.webcrawler_utils import parse_webcrawler_urls + + config = connector_config or {} + urls = parse_webcrawler_urls(config.get("INITIAL_URLS")) + + if not urls: + logger.info( + f"Webcrawler connector {connector_id} has no URLs configured, " + "skipping first indexing run (will run when URLs are added)" + ) + return True # Return success - schedule is created, just no first run + logger.info( f"Periodic indexing enabled for connector {connector_id} " f"(frequency: {frequency_minutes} minutes). Triggering first run..." diff --git a/surfsense_backend/app/utils/webcrawler_utils.py b/surfsense_backend/app/utils/webcrawler_utils.py new file mode 100644 index 000000000..d6baf6d73 --- /dev/null +++ b/surfsense_backend/app/utils/webcrawler_utils.py @@ -0,0 +1,29 @@ +""" +Utility functions for webcrawler connector. + +This module is intentionally kept separate from the connector_indexers package +to avoid circular import issues. +""" + + +def parse_webcrawler_urls(initial_urls: str | list | None) -> list[str]: + """ + Parse URLs from webcrawler INITIAL_URLS value. + + Handles both string (newline-separated) and list formats. + + Args: + initial_urls: The INITIAL_URLS value (string, list, or None) + + Returns: + List of parsed, stripped, non-empty URLs + """ + if initial_urls is None: + return [] + + if isinstance(initial_urls, str): + return [url.strip() for url in initial_urls.split("\n") if url.strip()] + elif isinstance(initial_urls, list): + return [url.strip() for url in initial_urls if url.strip()] + else: + return [] From 22943972c21e607819df2487674d691f10532c13 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 28 Jan 2026 19:15:25 +0200 Subject: [PATCH 60/69] fix(frontend): prevent infinite retry loop when chat clone fails Add cloneError state to track clone failures and prevent the useEffect from continuously retrying when completeClone() fails. --- .../[search_space_id]/new-chat/[[...chat_id]]/page.tsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index f6f70f83b..1f9dd433d 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -143,6 +143,7 @@ export default function NewChatPage() { const queryClient = useQueryClient(); const [isInitializing, setIsInitializing] = useState(true); const [isCompletingClone, setIsCompletingClone] = useState(false); + const [cloneError, setCloneError] = useState(false); const [threadId, setThreadId] = useState(null); const [currentThread, setCurrentThread] = useState(null); const [messages, setMessages] = useState([]); @@ -333,7 +334,7 @@ export default function NewChatPage() { // Handle clone completion when thread has clone_pending flag useEffect(() => { - if (!currentThread?.clone_pending || isCompletingClone) return; + if (!currentThread?.clone_pending || isCompletingClone || cloneError) return; const completeClone = async () => { setIsCompletingClone(true); @@ -351,13 +352,14 @@ export default function NewChatPage() { } catch (error) { console.error("[NewChatPage] Failed to complete clone:", error); toast.error("Failed to copy chat content. Please try again."); + setCloneError(true); } finally { setIsCompletingClone(false); } }; completeClone(); - }, [currentThread?.clone_pending, currentThread?.id, isCompletingClone, initializeThread, queryClient]); + }, [currentThread?.clone_pending, currentThread?.id, isCompletingClone, cloneError, initializeThread, queryClient]); // Handle scroll to comment from URL query params (e.g., from inbox item click) const searchParams = useSearchParams(); From c6d25ed7d88c9563cabbfd3d35aedccfdee57fc6 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 28 Jan 2026 22:53:34 +0530 Subject: [PATCH 61/69] feat(backend): Add legacy token handling in NotionHistoryConnector and log warnings for legacy usage in indexing --- .../app/connectors/notion_history.py | 52 ++++++++++++++++++- .../connector_indexers/notion_indexer.py | 29 ++++++++--- 2 files changed, 72 insertions(+), 9 deletions(-) diff --git a/surfsense_backend/app/connectors/notion_history.py b/surfsense_backend/app/connectors/notion_history.py index def86d721..af28249ee 100644 --- a/surfsense_backend/app/connectors/notion_history.py +++ b/surfsense_backend/app/connectors/notion_history.py @@ -77,6 +77,8 @@ class NotionHistoryConnector: self._pages_with_skipped_content: list[str] = [] # Optional callback to notify about retry progress (for user notifications) self._on_retry_callback: RetryCallbackType | None = None + # Track if using legacy integration token (for upgrade notification) + self._using_legacy_token: bool = False def set_retry_callback(self, callback: RetryCallbackType | None) -> None: """ @@ -119,6 +121,18 @@ class NotionHistoryConnector: config_data = connector.config.copy() + # Check for legacy integration token format first + # (for connectors created before OAuth was implemented) + legacy_token = config_data.get("NOTION_INTEGRATION_TOKEN") + raw_access_token = config_data.get("access_token") + + # Validate that we have some form of token + if not raw_access_token and not legacy_token: + raise ValueError( + "Notion integration not properly connected. " + "Please remove and re-add the Notion connector." + ) + # Decrypt credentials if they are encrypted token_encrypted = config_data.get("_token_encrypted", False) if token_encrypted and config.SECRET_KEY: @@ -143,13 +157,38 @@ class NotionHistoryConnector: f"Failed to decrypt Notion credentials for connector {self._connector_id}: {e!s}" ) raise ValueError( - f"Failed to decrypt Notion credentials: {e!s}" + "Notion credentials could not be decrypted. " + "Please remove and re-add the Notion connector." ) from e + # Handle legacy format: convert NOTION_INTEGRATION_TOKEN to access_token + if not config_data.get("access_token") and legacy_token: + config_data["access_token"] = legacy_token + self._using_legacy_token = True + logger.info( + f"Using legacy NOTION_INTEGRATION_TOKEN for connector {self._connector_id}" + ) + + # Final validation: ensure we have a valid access_token after all processing + final_token = config_data.get("access_token") + if not final_token or (isinstance(final_token, str) and not final_token.strip()): + raise ValueError( + "Notion access token is invalid or empty. " + "Please remove and re-add the Notion connector." + ) + try: self._credentials = NotionAuthCredentialsBase.from_dict(config_data) + except KeyError as e: + raise ValueError( + f"Notion credentials are incomplete (missing {e}). " + "Please reconnect your Notion account." + ) from e except Exception as e: - raise ValueError(f"Invalid Notion credentials: {e!s}") from e + raise ValueError( + f"Notion credentials format error: {e!s}. " + "Please reconnect your Notion account." + ) from e # Check if token is expired and refreshable if self._credentials.is_expired and self._credentials.is_refreshable: @@ -356,6 +395,15 @@ class NotionHistoryConnector: """ return len(self._pages_with_skipped_content) + def is_using_legacy_token(self) -> bool: + """ + Check if connector is using legacy integration token format. + + Returns: + True if using legacy NOTION_INTEGRATION_TOKEN, False if using OAuth + """ + return self._using_legacy_token + def _record_skipped_content(self, page_title: str): """ Record that a page had unsupported content skipped. diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index 5308720d9..7704dec95 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -176,6 +176,13 @@ async def index_notion_pages( logger.info( f"{pages_with_skipped_content} pages had Notion AI content skipped (not available via API)" ) + + # Check if using legacy integration token and log warning + if notion_client.is_using_legacy_token(): + logger.warning( + f"Connector {connector_id} is using legacy integration token. " + "Recommend reconnecting with OAuth." + ) except Exception as e: await task_logger.log_task_failure( log_entry, @@ -471,8 +478,8 @@ async def index_notion_pages( # Add user-friendly message about skipped Notion AI content if pages_with_skipped_ai_content > 0: result_message += ( - f" Audio transcriptions and AI summaries from Notion aren't accessible " - f"via their API — all other content was saved." + " Audio transcriptions and AI summaries from Notion aren't accessible " + "via their API - all other content was saved." ) # Log success @@ -496,18 +503,26 @@ async def index_notion_pages( # Clean up the async client await notion_client.close() - # Return user-friendly message about skipped AI content (if any) + # Build user-friendly notification messages # This will be shown in the notification to inform users - user_notification_message = None + notification_parts = [] + if pages_with_skipped_ai_content > 0: - user_notification_message = ( - "Some Notion AI content couldn't be synced (Notion API limitation)" + notification_parts.append( + "Some Notion AI content couldn't be synced (API limitation)" ) + if notion_client.is_using_legacy_token(): + notification_parts.append( + "Using legacy token. Reconnect with OAuth for better reliability." + ) + + user_notification_message = " ".join(notification_parts) if notification_parts else None + return ( total_processed, user_notification_message, - ) # Return message about skipped AI content if any + ) except SQLAlchemyError as db_error: await session.rollback() From ecb5572e693a8ab9fa411d30455c51cf80879b6a Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 28 Jan 2026 19:25:15 +0200 Subject: [PATCH 62/69] fix(backend): remove inaccessible podcast references when cloning chats When a podcast can't be cloned (not READY), remove the podcast_id from the cloned message to prevent 403 errors when users try to access it. --- surfsense_backend/app/services/public_chat_service.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index 79618974f..a5b8c9ffe 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -291,6 +291,9 @@ async def complete_clone_content( if old_podcast_id and old_podcast_id in podcast_id_map: result_data["podcast_id"] = podcast_id_map[old_podcast_id] + elif old_podcast_id: + # Podcast couldn't be cloned (not ready), remove reference + result_data.pop("podcast_id", None) new_message = NewChatMessage( thread_id=target_thread.id, From 59d5bf9aa5068b12e33cc166fdca3036da29e1c1 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 28 Jan 2026 23:18:10 +0530 Subject: [PATCH 63/69] fix(backend): Add error handling for invalid pagination cursor in NotionHistoryConnector to ensure graceful continuation of data fetching --- .../app/connectors/notion_history.py | 34 +++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/surfsense_backend/app/connectors/notion_history.py b/surfsense_backend/app/connectors/notion_history.py index af28249ee..b347eb9c6 100644 --- a/surfsense_backend/app/connectors/notion_history.py +++ b/surfsense_backend/app/connectors/notion_history.py @@ -465,19 +465,33 @@ class NotionHistoryConnector: cursor = None while has_more: - if cursor: - search_params["start_cursor"] = cursor + try: + if cursor: + search_params["start_cursor"] = cursor - # Use retry wrapper for search API call - search_results = await self._api_call_with_retry( - notion.search, on_retry=self._on_retry_callback, **search_params - ) + # Use retry wrapper for search API call + search_results = await self._api_call_with_retry( + notion.search, on_retry=self._on_retry_callback, **search_params + ) - pages.extend(search_results["results"]) - has_more = search_results.get("has_more", False) + pages.extend(search_results["results"]) + has_more = search_results.get("has_more", False) - if has_more: - cursor = search_results.get("next_cursor") + if has_more: + cursor = search_results.get("next_cursor") + + except APIResponseError as e: + error_message = str(e) + # Handle invalid cursor - stop pagination gracefully + if "start_cursor provided is invalid" in error_message: + logger.warning( + f"Invalid pagination cursor encountered. " + f"Continuing with {len(pages)} pages already fetched." + ) + has_more = False + continue + # Re-raise other errors + raise all_page_data = [] From 20b8a17254b4a705d11326e87e1b8f22acc319c5 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 28 Jan 2026 22:16:58 +0200 Subject: [PATCH 64/69] fix(backend): handle non-string elements in webcrawler URL list Add isinstance check to prevent AttributeError when INITIAL_URLS list contains non-string elements (None, int, dict) from malformed config data. --- surfsense_backend/app/utils/webcrawler_utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/surfsense_backend/app/utils/webcrawler_utils.py b/surfsense_backend/app/utils/webcrawler_utils.py index d6baf6d73..05633de4f 100644 --- a/surfsense_backend/app/utils/webcrawler_utils.py +++ b/surfsense_backend/app/utils/webcrawler_utils.py @@ -1,8 +1,5 @@ """ Utility functions for webcrawler connector. - -This module is intentionally kept separate from the connector_indexers package -to avoid circular import issues. """ @@ -24,6 +21,6 @@ def parse_webcrawler_urls(initial_urls: str | list | None) -> list[str]: if isinstance(initial_urls, str): return [url.strip() for url in initial_urls.split("\n") if url.strip()] elif isinstance(initial_urls, list): - return [url.strip() for url in initial_urls if url.strip()] + return [url.strip() for url in initial_urls if isinstance(url, str) and url.strip()] else: return [] From 949ec949f6c2feeeac2a9bbbb1d161f7d8dcd568 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 28 Jan 2026 22:20:02 +0200 Subject: [PATCH 65/69] style(backend): run ruff format on 10 files --- .../app/agents/new_chat/tools/podcast.py | 4 ++- .../app/routes/new_chat_routes.py | 8 +++-- .../app/routes/public_chat_routes.py | 4 ++- .../routes/search_source_connectors_routes.py | 6 +++- surfsense_backend/app/schemas/new_chat.py | 3 -- surfsense_backend/app/schemas/podcasts.py | 4 ++- .../app/tasks/celery_tasks/podcast_tasks.py | 8 ++--- .../celery_tasks/schedule_checker_task.py | 4 ++- .../document_processors/file_processors.py | 35 ++++++++++--------- .../app/utils/webcrawler_utils.py | 4 ++- 10 files changed, 48 insertions(+), 32 deletions(-) diff --git a/surfsense_backend/app/agents/new_chat/tools/podcast.py b/surfsense_backend/app/agents/new_chat/tools/podcast.py index 424b04f77..1048ed881 100644 --- a/surfsense_backend/app/agents/new_chat/tools/podcast.py +++ b/surfsense_backend/app/agents/new_chat/tools/podcast.py @@ -54,7 +54,9 @@ def set_generating_podcast(search_space_id: int, podcast_id: int) -> None: client = get_redis_client() client.setex(_redis_key(search_space_id), 1800, str(podcast_id)) except Exception as e: - print(f"[generate_podcast] Warning: Could not set generating podcast in Redis: {e}") + print( + f"[generate_podcast] Warning: Could not set generating podcast in Redis: {e}" + ) def create_generate_podcast_tool( diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 541e25a75..38352d348 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -670,7 +670,9 @@ async def delete_thread( ) from None -@router.post("/threads/{thread_id}/complete-clone", response_model=CompleteCloneResponse) +@router.post( + "/threads/{thread_id}/complete-clone", response_model=CompleteCloneResponse +) async def complete_clone( thread_id: int, session: AsyncSession = Depends(get_async_session), @@ -702,7 +704,9 @@ async def complete_clone( raise HTTPException(status_code=400, detail="Clone already completed") if not thread.cloned_from_thread_id: - raise HTTPException(status_code=400, detail="No source thread to clone from") + raise HTTPException( + status_code=400, detail="No source thread to clone from" + ) message_count = await complete_clone_content( session=session, diff --git a/surfsense_backend/app/routes/public_chat_routes.py b/surfsense_backend/app/routes/public_chat_routes.py index 8b4f42559..4676f2ad0 100644 --- a/surfsense_backend/app/routes/public_chat_routes.py +++ b/surfsense_backend/app/routes/public_chat_routes.py @@ -53,7 +53,9 @@ async def clone_public_chat_endpoint( source_thread = await get_thread_by_share_token(session, share_token) if not source_thread: - raise HTTPException(status_code=404, detail="Chat not found or no longer public") + raise HTTPException( + status_code=404, detail="Chat not found or no longer public" + ) target_search_space_id = await get_user_default_search_space(session, user.id) diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 6ba67fb69..edb1760f3 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -941,7 +941,11 @@ async def index_connector_content( f"Triggering web pages indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}" ) index_crawled_urls_task.delay( - connector_id, search_space_id, str(user.id), indexing_from, indexing_to + connector_id, + search_space_id, + str(user.id), + indexing_from, + indexing_to, ) response_message = "Web page indexing started in the background." diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index b420b1b91..ab6be9c9f 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -257,14 +257,11 @@ class PublicChatResponse(BaseModel): class CloneInitResponse(BaseModel): - - thread_id: int search_space_id: int share_token: str class CompleteCloneResponse(BaseModel): - status: str message_count: int diff --git a/surfsense_backend/app/schemas/podcasts.py b/surfsense_backend/app/schemas/podcasts.py index 9e5cb0262..60f9d7dc0 100644 --- a/surfsense_backend/app/schemas/podcasts.py +++ b/surfsense_backend/app/schemas/podcasts.py @@ -59,6 +59,8 @@ class PodcastRead(PodcastBase): "search_space_id": obj.search_space_id, "status": obj.status, "created_at": obj.created_at, - "transcript_entries": len(obj.podcast_transcript) if obj.podcast_transcript else None, + "transcript_entries": len(obj.podcast_transcript) + if obj.podcast_transcript + else None, } return cls(**data) diff --git a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py index 0ce714cdc..2ce8716e0 100644 --- a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py @@ -55,7 +55,9 @@ def _clear_generating_podcast(search_space_id: int) -> None: client = redis.from_url(redis_url, decode_responses=True) key = f"podcast:generating:{search_space_id}" client.delete(key) - logger.info(f"Cleared generating podcast key for search_space_id={search_space_id}") + logger.info( + f"Cleared generating podcast key for search_space_id={search_space_id}" + ) except Exception as e: logger.warning(f"Could not clear generating podcast key: {e}") @@ -119,9 +121,7 @@ async def _generate_content_podcast( ) -> dict: """Generate content-based podcast and update existing record.""" async with get_celery_session_maker()() as session: - result = await session.execute( - select(Podcast).filter(Podcast.id == podcast_id) - ) + result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) podcast = result.scalars().first() if not podcast: diff --git a/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py b/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py index 22d45af21..b33e25170 100644 --- a/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py +++ b/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py @@ -165,7 +165,9 @@ async def _check_and_trigger_schedules(): from app.utils.webcrawler_utils import parse_webcrawler_urls connector_config = connector.config or {} - urls = parse_webcrawler_urls(connector_config.get("INITIAL_URLS")) + urls = parse_webcrawler_urls( + connector_config.get("INITIAL_URLS") + ) if urls: task.delay( diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index 5161fb569..6c4be0cb8 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -55,7 +55,9 @@ LLAMACLOUD_RETRYABLE_EXCEPTIONS = ( ) # Timeout calculation constants -UPLOAD_BYTES_PER_SECOND_SLOW = 100 * 1024 # 100 KB/s (conservative for slow connections) +UPLOAD_BYTES_PER_SECOND_SLOW = ( + 100 * 1024 +) # 100 KB/s (conservative for slow connections) MIN_UPLOAD_TIMEOUT = 120 # Minimum 2 minutes for any file MAX_UPLOAD_TIMEOUT = 1800 # Maximum 30 minutes for very large files BASE_JOB_TIMEOUT = 600 # 10 minutes base for job processing @@ -219,19 +221,19 @@ async def find_existing_document_with_migration( def calculate_upload_timeout(file_size_bytes: int) -> float: """ Calculate appropriate upload timeout based on file size. - + Assumes a conservative slow connection speed to handle worst-case scenarios. - + Args: file_size_bytes: Size of the file in bytes - + Returns: Timeout in seconds """ # Calculate time needed at slow connection speed # Add 50% buffer for network variability and SSL overhead estimated_time = (file_size_bytes / UPLOAD_BYTES_PER_SECOND_SLOW) * 1.5 - + # Clamp to reasonable bounds return max(MIN_UPLOAD_TIMEOUT, min(estimated_time, MAX_UPLOAD_TIMEOUT)) @@ -239,21 +241,21 @@ def calculate_upload_timeout(file_size_bytes: int) -> float: def calculate_job_timeout(estimated_pages: int, file_size_bytes: int) -> float: """ Calculate job processing timeout based on page count and file size. - + Args: estimated_pages: Estimated number of pages file_size_bytes: Size of the file in bytes - + Returns: Timeout in seconds """ # Base timeout + time per page page_based_timeout = BASE_JOB_TIMEOUT + (estimated_pages * PER_PAGE_JOB_TIMEOUT) - + # Also consider file size (large images take longer to process) # ~1 minute per 10MB of file size size_based_timeout = BASE_JOB_TIMEOUT + (file_size_bytes / (10 * 1024 * 1024)) * 60 - + # Use the larger of the two estimates return max(page_based_timeout, size_based_timeout) @@ -284,18 +286,18 @@ async def parse_with_llamacloud_retry( """ import os import random - + from llama_cloud_services import LlamaParse from llama_cloud_services.parse.utils import ResultType # Get file size for timeout calculations file_size_bytes = os.path.getsize(file_path) file_size_mb = file_size_bytes / (1024 * 1024) - + # Calculate dynamic timeouts based on file size and page count upload_timeout = calculate_upload_timeout(file_size_bytes) job_timeout = calculate_job_timeout(estimated_pages, file_size_bytes) - + # HTTP client timeouts - scaled based on file size # Write timeout is critical for large file uploads custom_timeout = httpx.Timeout( @@ -304,7 +306,7 @@ async def parse_with_llamacloud_retry( write=upload_timeout, # Dynamic based on file size (upload time) pool=120.0, # 2 minutes to acquire connection from pool ) - + logging.info( f"LlamaCloud upload configured: file_size={file_size_mb:.1f}MB, " f"pages={estimated_pages}, upload_timeout={upload_timeout:.0f}s, " @@ -335,14 +337,14 @@ async def parse_with_llamacloud_retry( # Parse the file asynchronously result = await parser.aparse(file_path) - + # Success - log if we had previous failures if attempt > 1: logging.info( f"LlamaCloud upload succeeded on attempt {attempt} after " f"{len(attempt_errors)} failures" ) - + return result except LLAMACLOUD_RETRYABLE_EXCEPTIONS as e: @@ -355,8 +357,7 @@ async def parse_with_llamacloud_retry( # Calculate exponential backoff with jitter # Base delay doubles each attempt, capped at max delay base_delay = min( - LLAMACLOUD_BASE_DELAY * (2 ** (attempt - 1)), - LLAMACLOUD_MAX_DELAY + LLAMACLOUD_BASE_DELAY * (2 ** (attempt - 1)), LLAMACLOUD_MAX_DELAY ) # Add random jitter (±25%) to prevent thundering herd jitter = base_delay * 0.25 * (2 * random.random() - 1) diff --git a/surfsense_backend/app/utils/webcrawler_utils.py b/surfsense_backend/app/utils/webcrawler_utils.py index 05633de4f..31d2ebe50 100644 --- a/surfsense_backend/app/utils/webcrawler_utils.py +++ b/surfsense_backend/app/utils/webcrawler_utils.py @@ -21,6 +21,8 @@ def parse_webcrawler_urls(initial_urls: str | list | None) -> list[str]: if isinstance(initial_urls, str): return [url.strip() for url in initial_urls.split("\n") if url.strip()] elif isinstance(initial_urls, list): - return [url.strip() for url in initial_urls if isinstance(url, str) and url.strip()] + return [ + url.strip() for url in initial_urls if isinstance(url, str) and url.strip() + ] else: return [] From 076270ab8678faa0dab66aafc97544ef16c0cceb Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Thu, 29 Jan 2026 10:54:15 +0530 Subject: [PATCH 66/69] fix(backend): enhance indexing success logging to include informational warnings about API limitations --- .../app/routes/search_source_connectors_routes.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 26cf82e81..a27c2125c 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -1325,8 +1325,15 @@ async def _run_indexing_with_notifications( "no " in error_or_warning_lower and "found" in error_or_warning_lower ) + # Informational warnings - sync succeeded but some content couldn't be synced + # These are NOT errors, just notifications about API limitations or recommendations + is_info_warning = ( + "couldn't be synced" in error_or_warning_lower + or "using legacy token" in error_or_warning_lower + or "(api limitation)" in error_or_warning_lower + ) - if is_duplicate_warning or is_empty_result: + if is_duplicate_warning or is_empty_result or is_info_warning: # These are success cases - sync worked, just found nothing new logger.info(f"Indexing completed successfully: {error_or_warning}") # Still update timestamp so ElectricSQL syncs and clears "Syncing" UI From 16b839138d4ec085a8a2e579ce6e14a435cd1163 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Wed, 28 Jan 2026 21:58:49 -0800 Subject: [PATCH 67/69] feat(backend): add new incentive task type for Reddit follow Introduce a new incentive task type, REDDIT_FOLLOW, to encourage users to join the SurfSense community on Reddit. This includes a title, description, pages reward, and action URL for the task. --- .../83_add_reddit_follow_incentive_task.py | 35 +++++++++++++++++++ surfsense_backend/app/db.py | 7 ++++ 2 files changed, 42 insertions(+) create mode 100644 surfsense_backend/alembic/versions/83_add_reddit_follow_incentive_task.py diff --git a/surfsense_backend/alembic/versions/83_add_reddit_follow_incentive_task.py b/surfsense_backend/alembic/versions/83_add_reddit_follow_incentive_task.py new file mode 100644 index 000000000..a9fbb77ae --- /dev/null +++ b/surfsense_backend/alembic/versions/83_add_reddit_follow_incentive_task.py @@ -0,0 +1,35 @@ +"""Add REDDIT_FOLLOW to incentive task type enum + +Revision ID: 83 +Revises: 82 + +Changes: +1. Add REDDIT_FOLLOW value to incentivetasktype enum +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "83" +down_revision: str | None = "82" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add REDDIT_FOLLOW to incentivetasktype enum.""" + op.execute("ALTER TYPE incentivetasktype ADD VALUE IF NOT EXISTS 'REDDIT_FOLLOW'") + + +def downgrade() -> None: + """Remove REDDIT_FOLLOW from incentivetasktype enum. + + Note: PostgreSQL doesn't support removing values from enums directly. + This would require recreating the enum type, which is complex and risky. + For safety, we leave the enum value in place during downgrade. + """ + pass diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 8c6942e44..876bc1d3c 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -163,6 +163,7 @@ class IncentiveTaskType(str, Enum): """ GITHUB_STAR = "GITHUB_STAR" + REDDIT_FOLLOW = "REDDIT_FOLLOW" # Future tasks can be added here: # GITHUB_ISSUE = "GITHUB_ISSUE" # SOCIAL_SHARE = "SOCIAL_SHARE" @@ -178,6 +179,12 @@ INCENTIVE_TASKS_CONFIG = { "pages_reward": 100, "action_url": "https://github.com/MODSetter/SurfSense", }, + IncentiveTaskType.REDDIT_FOLLOW: { + "title": "Join our Subreddit", + "description": "Join the SurfSense community on Reddit", + "pages_reward": 100, + "action_url": "https://www.reddit.com/r/SurfSense/", + }, # Future tasks can be configured here: # IncentiveTaskType.GITHUB_ISSUE: { # "title": "Create an issue", From 70fcb2d0550945666a46828d2091a429c61b7758 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Wed, 28 Jan 2026 22:15:43 -0800 Subject: [PATCH 68/69] chore: linting --- .../83_add_reddit_follow_incentive_task.py | 4 +- .../new-chat/[[...chat_id]]/page.tsx | 9 +++- .../components/homepage/hero-section.tsx | 8 ++-- .../layout/hooks/SidebarContext.tsx | 2 +- .../components/layout/hooks/index.ts | 2 +- .../layout/ui/sidebar/InboxSidebar.tsx | 2 +- .../lib/apis/notifications-api.service.ts | 2 +- surfsense_web/package.json | 4 +- surfsense_web/pnpm-lock.yaml | 44 +++++++++---------- surfsense_web/types/window.d.ts | 2 - 10 files changed, 41 insertions(+), 38 deletions(-) diff --git a/surfsense_backend/alembic/versions/83_add_reddit_follow_incentive_task.py b/surfsense_backend/alembic/versions/83_add_reddit_follow_incentive_task.py index a9fbb77ae..52ab77446 100644 --- a/surfsense_backend/alembic/versions/83_add_reddit_follow_incentive_task.py +++ b/surfsense_backend/alembic/versions/83_add_reddit_follow_incentive_task.py @@ -9,8 +9,6 @@ Changes: from collections.abc import Sequence -import sqlalchemy as sa - from alembic import op # revision identifiers, used by Alembic. @@ -27,7 +25,7 @@ def upgrade() -> None: def downgrade() -> None: """Remove REDDIT_FOLLOW from incentivetasktype enum. - + Note: PostgreSQL doesn't support removing values from enums directly. This would require recreating the enum type, which is complex and risky. For safety, we leave the enum value in place during downgrade. diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 1f9dd433d..803bd6661 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -359,7 +359,14 @@ export default function NewChatPage() { }; completeClone(); - }, [currentThread?.clone_pending, currentThread?.id, isCompletingClone, cloneError, initializeThread, queryClient]); + }, [ + currentThread?.clone_pending, + currentThread?.id, + isCompletingClone, + cloneError, + initializeThread, + queryClient, + ]); // Handle scroll to comment from URL query params (e.g., from inbox item click) const searchParams = useSearchParams(); diff --git a/surfsense_web/components/homepage/hero-section.tsx b/surfsense_web/components/homepage/hero-section.tsx index 64e5eb4bd..b1ed38c07 100644 --- a/surfsense_web/components/homepage/hero-section.tsx +++ b/surfsense_web/components/homepage/hero-section.tsx @@ -109,10 +109,10 @@ export function HeroSection() { Connect any LLM to your internal knowledge sources and chat with it in real time alongside your team.

-
- - -
+
+ + +
= 10.0.0'} - '@posthog/core@1.14.1': - resolution: {integrity: sha512-DtmJ1y1IDauX8yAZtIotRAYDRkgCCMLk5S9vFFRX7vufhWblQuRUOgn9WYSJrocJlZKm1aEjDzGQ0uyL7HcdLw==} + '@posthog/core@1.15.0': + resolution: {integrity: sha512-n2/Yy0+qc8xhmlcOFiYqTcGHBZuuaQjVolfFXk7yTCynzdMe8Fx1zYvPPUrbdQK5tWwXyilkzybpqhK6I7aV4Q==} '@posthog/react@1.7.0': resolution: {integrity: sha512-pM7GL7z/rKjiIwosbRiQA3buhLI6vUo+wg+T/ZrVZC7O5bVU07TfgNZTcuOj8E9dx7vDbfNrc1kjDN7PKMM8ug==} @@ -1618,8 +1618,8 @@ packages: '@types/react': optional: true - '@posthog/types@1.335.5': - resolution: {integrity: sha512-QYj5c8wSaXGvV4ugEN65GHD0sIXRveGiZxV4tqpyoP7YIAvAwwA0do0yNfTrEjDXucCQn25pMbCqO25hJrMi5w==} + '@posthog/types@1.336.1': + resolution: {integrity: sha512-KSGst/a/HK7GhfLSbwAy35HtU3KjDqjLtq3+PoDlGfbz9SbO0owjc6jo6hAHnMz67QTSvrn/r0xgimDO4NQ+rA==} '@prisma/client@4.8.1': resolution: {integrity: sha512-d4xhZhETmeXK/yZ7K0KcVOzEfI5YKGGEr4F5SBV04/MU4ncN/HcE28sy3e4Yt8UFW0ZuImKFQJE+9rWt9WbGSQ==} @@ -5601,11 +5601,11 @@ packages: resolution: {integrity: sha512-Jtc2612XINuBjIl/QTWsV5UvE8UHuNblcO3vVADSrKsrc6RqGX6lOW1cEo3CM2v0XG4Nat8nI+YM7/f26VxXLw==} engines: {node: '>=12'} - posthog-js@1.335.5: - resolution: {integrity: sha512-1zCEdn7bc1mQ/jpd62YY8U1CyNiftIBE6uKqE2L+mjZ5aJyB2rtUAXefaTbaR/3A98tItjSej4aIa8FBN+O1fw==} + posthog-js@1.336.1: + resolution: {integrity: sha512-YphbVhXnImmZoALvf2oh129Cxu6IRQ9P9sWhuyY+dGe7jqt1jBp6Dg7QEK39stB4rzxmT/N3OLFcWZM7ZYQzCg==} - posthog-node@5.24.3: - resolution: {integrity: sha512-RpjccR8k/GHjtIzRbtlS/Ipw+GvJLJCicJW6L4IZm7gXXNjdyW26x0ba0kvUtWS6mcgx8EBbgXERd5eNsXSjlQ==} + posthog-node@5.24.4: + resolution: {integrity: sha512-U90zdez3jbqAZ4HNxCM/n6SK9h2W59DPS0l2rboWiaKiBi47rN+YJmFQqx1rQsQA47JhNsmAAh2iwqYQ+VTjow==} engines: {node: ^20.20.0 || >=22.22.0} preact@10.28.2: @@ -7860,18 +7860,18 @@ snapshots: '@parcel/watcher-win32-ia32': 2.5.1 '@parcel/watcher-win32-x64': 2.5.1 - '@posthog/core@1.14.1': + '@posthog/core@1.15.0': dependencies: cross-spawn: 7.0.6 - '@posthog/react@1.7.0(@types/react@19.2.7)(posthog-js@1.335.5)(react@19.2.3)': + '@posthog/react@1.7.0(@types/react@19.2.7)(posthog-js@1.336.1)(react@19.2.3)': dependencies: - posthog-js: 1.335.5 + posthog-js: 1.336.1 react: 19.2.3 optionalDependencies: '@types/react': 19.2.7 - '@posthog/types@1.335.5': {} + '@posthog/types@1.336.1': {} '@prisma/client@4.8.1': dependencies: @@ -12484,15 +12484,15 @@ snapshots: postgres@3.4.7: {} - posthog-js@1.335.5: + posthog-js@1.336.1: dependencies: '@opentelemetry/api': 1.9.0 '@opentelemetry/api-logs': 0.208.0 '@opentelemetry/exporter-logs-otlp-http': 0.208.0(@opentelemetry/api@1.9.0) '@opentelemetry/resources': 2.5.0(@opentelemetry/api@1.9.0) '@opentelemetry/sdk-logs': 0.208.0(@opentelemetry/api@1.9.0) - '@posthog/core': 1.14.1 - '@posthog/types': 1.335.5 + '@posthog/core': 1.15.0 + '@posthog/types': 1.336.1 core-js: 3.47.0 dompurify: 3.3.1 fflate: 0.4.8 @@ -12500,9 +12500,9 @@ snapshots: query-selector-shadow-dom: 1.0.1 web-vitals: 5.1.0 - posthog-node@5.24.3: + posthog-node@5.24.4: dependencies: - '@posthog/core': 1.14.1 + '@posthog/core': 1.15.0 preact@10.28.2: {} diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index fcb6878e3..4d4abc9c1 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -5,5 +5,3 @@ declare global { posthog?: PostHog; } } - -export {}; From d39bf3510f8f05cd03606d665f66587fb74ec0f5 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Wed, 28 Jan 2026 22:20:23 -0800 Subject: [PATCH 69/69] chore: linting --- .../app/connectors/notion_history.py | 16 ++++++---------- .../app/services/notification_service.py | 4 +--- .../tasks/connector_indexers/notion_indexer.py | 4 +++- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/surfsense_backend/app/connectors/notion_history.py b/surfsense_backend/app/connectors/notion_history.py index b347eb9c6..ff8478905 100644 --- a/surfsense_backend/app/connectors/notion_history.py +++ b/surfsense_backend/app/connectors/notion_history.py @@ -171,7 +171,9 @@ class NotionHistoryConnector: # Final validation: ensure we have a valid access_token after all processing final_token = config_data.get("access_token") - if not final_token or (isinstance(final_token, str) and not final_token.strip()): + if not final_token or ( + isinstance(final_token, str) and not final_token.strip() + ): raise ValueError( "Notion access token is invalid or empty. " "Please remove and re-add the Notion connector." @@ -356,9 +358,7 @@ class NotionHistoryConnector: ) except Exception as callback_error: # Don't let callback errors break the retry logic - logger.warning( - f"Retry callback failed: {callback_error}" - ) + logger.warning(f"Retry callback failed: {callback_error}") # Wait before retrying await asyncio.sleep(wait_time) @@ -588,9 +588,7 @@ class NotionHistoryConnector: except APIResponseError as e: error_message = str(e) # Check if this is an unsupported block type error - if any( - err in error_message for err in UNSUPPORTED_BLOCK_TYPE_ERRORS - ): + if any(err in error_message for err in UNSUPPORTED_BLOCK_TYPE_ERRORS): logger.warning( f"Skipping page blocks due to unsupported block type in page {page_id}: {error_message}" ) @@ -683,9 +681,7 @@ class NotionHistoryConnector: except APIResponseError as e: error_message = str(e) # Check if this is an unsupported block type error - if any( - err in error_message for err in UNSUPPORTED_BLOCK_TYPE_ERRORS - ): + if any(err in error_message for err in UNSUPPORTED_BLOCK_TYPE_ERRORS): logger.warning( f"Skipping children of block {block_id} due to unsupported block type: {error_message}" ) diff --git a/surfsense_backend/app/services/notification_service.py b/surfsense_backend/app/services/notification_service.py index 1a91d000f..e0385b91c 100644 --- a/surfsense_backend/app/services/notification_service.py +++ b/surfsense_backend/app/services/notification_service.py @@ -382,9 +382,7 @@ class ConnectorIndexingNotificationHandler(BaseNotificationHandler): "temporary_error": f"{service_name} temporarily unavailable", } - base_message = retry_messages.get( - retry_reason, f"Waiting for {service_name}" - ) + base_message = retry_messages.get(retry_reason, f"Waiting for {service_name}") # Add wait time and progress info if wait_seconds and wait_seconds > 5: diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index b8d2297c5..a65bf84a7 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -534,7 +534,9 @@ async def index_notion_pages( "Using legacy token. Reconnect with OAuth for better reliability." ) - user_notification_message = " ".join(notification_parts) if notification_parts else None + user_notification_message = ( + " ".join(notification_parts) if notification_parts else None + ) return ( total_processed,