diff --git a/.vscode/settings.json b/.vscode/settings.json index 05bd30702..f134660b6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,3 @@ { - "biome.configurationPath": "./surfsense_web/biome.json", - "deepscan.ignoreConfirmWarning": true + "biome.configurationPath": "./surfsense_web/biome.json" } \ No newline at end of file diff --git a/surfsense_backend/alembic/versions/81_add_public_chat_features.py b/surfsense_backend/alembic/versions/81_add_public_chat_features.py new file mode 100644 index 000000000..ab73b06bb --- /dev/null +++ b/surfsense_backend/alembic/versions/81_add_public_chat_features.py @@ -0,0 +1,105 @@ +"""Add public chat sharing and cloning features to new_chat_threads + +Revision ID: 81 +Revises: 80 +Create Date: 2026-01-23 + +Adds columns for: +1. Public sharing via tokenized URLs (public_share_token, public_share_enabled) +2. Clone tracking for audit (cloned_from_thread_id, cloned_at) +3. History bootstrap flag for cloned chats (needs_history_bootstrap) +""" + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "81" +down_revision: str | None = "80" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add public sharing and cloning columns to new_chat_threads.""" + + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS public_share_token VARCHAR(64); + """ + ) + + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS public_share_enabled BOOLEAN NOT NULL DEFAULT FALSE; + """ + ) + + op.execute( + """ + CREATE UNIQUE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_token + ON new_chat_threads(public_share_token) + WHERE public_share_token IS NOT NULL; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_enabled + ON new_chat_threads(public_share_enabled) + WHERE public_share_enabled = TRUE; + """ + ) + + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS cloned_from_thread_id INTEGER + REFERENCES new_chat_threads(id) ON DELETE SET NULL; + """ + ) + + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS cloned_at TIMESTAMP WITH TIME ZONE; + """ + ) + + op.execute( + """ + ALTER TABLE new_chat_threads + ADD COLUMN IF NOT EXISTS needs_history_bootstrap BOOLEAN NOT NULL DEFAULT FALSE; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_new_chat_threads_cloned_from_thread_id + ON new_chat_threads(cloned_from_thread_id) + WHERE cloned_from_thread_id IS NOT NULL; + """ + ) + + +def downgrade() -> None: + """Remove public sharing and cloning columns from new_chat_threads.""" + + op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_cloned_from_thread_id") + op.execute( + "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS needs_history_bootstrap" + ) + op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS cloned_at") + op.execute( + "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS cloned_from_thread_id" + ) + + op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_enabled") + op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_token") + op.execute( + "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_enabled" + ) + op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_token") diff --git a/surfsense_backend/alembic/versions/81_add_public_share_to_chat_threads.py b/surfsense_backend/alembic/versions/81_add_public_share_to_chat_threads.py deleted file mode 100644 index 33e1a88e9..000000000 --- a/surfsense_backend/alembic/versions/81_add_public_share_to_chat_threads.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Add public sharing columns to new_chat_threads - -Revision ID: 81 -Revises: 80 -Create Date: 2026-01-23 - -Adds public_share_token and public_share_enabled columns to enable -public sharing of chat threads via secure tokenized URLs. -""" - -from collections.abc import Sequence - -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "81" -down_revision: str | None = "80" -branch_labels: str | Sequence[str] | None = None -depends_on: str | Sequence[str] | None = None - - -def upgrade() -> None: - """Add public sharing columns to new_chat_threads.""" - # Add public_share_token column - op.execute( - """ - ALTER TABLE new_chat_threads - ADD COLUMN IF NOT EXISTS public_share_token VARCHAR(64); - """ - ) - - # Add public_share_enabled column with default false - op.execute( - """ - ALTER TABLE new_chat_threads - ADD COLUMN IF NOT EXISTS public_share_enabled BOOLEAN NOT NULL DEFAULT FALSE; - """ - ) - - # Add unique partial index on public_share_token (only non-null values) - op.execute( - """ - CREATE UNIQUE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_token - ON new_chat_threads(public_share_token) - WHERE public_share_token IS NOT NULL; - """ - ) - - # Add partial index on public_share_enabled for fast public chat queries - op.execute( - """ - CREATE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_enabled - ON new_chat_threads(public_share_enabled) - WHERE public_share_enabled = TRUE; - """ - ) - - -def downgrade() -> None: - """Remove public sharing columns from new_chat_threads.""" - op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_enabled") - op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_token") - op.execute( - "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_enabled" - ) - op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_token") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index e3b077ff0..0182d2c53 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -412,6 +412,25 @@ class NewChatThread(BaseModel, TimestampMixin): server_default="false", ) + # Clone tracking - for audit and history bootstrap + cloned_from_thread_id = Column( + Integer, + ForeignKey("new_chat_threads.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + cloned_at = Column( + TIMESTAMP(timezone=True), + nullable=True, + ) + # Flag to bootstrap LangGraph checkpointer with DB messages on first message + needs_history_bootstrap = Column( + Boolean, + nullable=False, + default=False, + server_default="false", + ) + # Relationships search_space = relationship("SearchSpace", back_populates="new_chat_threads") created_by = relationship("User", back_populates="new_chat_threads") diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 4571e9051..db371a81c 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -1027,6 +1027,7 @@ async def handle_new_chat( attachments=request.attachments, mentioned_document_ids=request.mentioned_document_ids, mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids, + needs_history_bootstrap=thread.needs_history_bootstrap, ), media_type="text/event-stream", headers={ @@ -1254,6 +1255,7 @@ async def regenerate_response( mentioned_document_ids=request.mentioned_document_ids, mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids, checkpoint_id=target_checkpoint_id, + needs_history_bootstrap=thread.needs_history_bootstrap, ): yield chunk # If we get here, streaming completed successfully diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index a6434a611..7c3b89af9 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -4,6 +4,7 @@ Service layer for public chat sharing and cloning. import re import secrets +from datetime import UTC, datetime from uuid import UUID from fastapi import HTTPException @@ -283,6 +284,9 @@ async def clone_public_chat( search_space_id=target_search_space_id, created_by_id=user_id, public_share_enabled=False, + cloned_from_thread_id=source_thread.id, + cloned_at=datetime.now(UTC), + needs_history_bootstrap=True, ) session.add(new_thread) await session.flush() diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 39d85f0c6..875b7e95a 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -18,6 +18,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent +from app.utils.content_utils import bootstrap_history_from_db from app.agents.new_chat.checkpointer import get_checkpointer from app.agents.new_chat.llm_config import ( AgentConfig, @@ -205,13 +206,13 @@ async def stream_new_chat( mentioned_document_ids: list[int] | None = None, mentioned_surfsense_doc_ids: list[int] | None = None, checkpoint_id: str | None = None, + needs_history_bootstrap: bool = False, ) -> AsyncGenerator[str, None]: """ Stream chat responses from the new SurfSense deep agent. This uses the Vercel AI SDK Data Stream Protocol (SSE format) for streaming. The chat_id is used as LangGraph's thread_id for memory/checkpointing. - Message history can be passed from the frontend for context. Args: user_query: The user's query @@ -221,6 +222,7 @@ async def stream_new_chat( user_id: The current user's UUID string (for memory tools and session state) llm_config_id: The LLM configuration ID (default: -1 for first global config) attachments: Optional attachments with extracted content + needs_history_bootstrap: If True, load message history from DB (for cloned chats) mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat checkpoint_id: Optional checkpoint ID to rewind/fork from (for edit/reload operations) @@ -305,9 +307,24 @@ async def stream_new_chat( firecrawl_api_key=firecrawl_api_key, # Pass Firecrawl API key if configured ) - # Build input with message history from frontend + # Build input with message history langchain_messages = [] + # Bootstrap history for cloned chats (no LangGraph checkpoint exists yet) + if needs_history_bootstrap: + langchain_messages = await bootstrap_history_from_db(session, chat_id) + + # Clear the flag so we don't bootstrap again on next message + from app.db import NewChatThread + + thread_result = await session.execute( + select(NewChatThread).filter(NewChatThread.id == chat_id) + ) + thread = thread_result.scalars().first() + if thread: + thread.needs_history_bootstrap = False + await session.commit() + # Fetch mentioned documents if any (with chunks for proper citations) mentioned_documents: list[Document] = [] if mentioned_document_ids: diff --git a/surfsense_backend/app/utils/content_utils.py b/surfsense_backend/app/utils/content_utils.py new file mode 100644 index 000000000..d2342b79e --- /dev/null +++ b/surfsense_backend/app/utils/content_utils.py @@ -0,0 +1,75 @@ +""" +Utilities for working with message content. + +Message content in new_chat_messages can be stored in various formats: +- String: Simple text content +- List: Array of content parts [{"type": "text", "text": "..."}, {"type": "tool-call", ...}] +- Dict: Single content object + +These utilities help extract and transform content for different use cases. +""" + +from langchain_core.messages import AIMessage, HumanMessage +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + + +def extract_text_content(content: str | dict | list) -> str: + """Extract plain text content from various message formats.""" + if isinstance(content, str): + return content + if isinstance(content, dict): + # Handle dict with 'text' key + if "text" in content: + return content["text"] + return str(content) + if isinstance(content, list): + # Handle list of parts (e.g., [{"type": "text", "text": "..."}]) + texts = [] + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + texts.append(part.get("text", "")) + elif isinstance(part, str): + texts.append(part) + return "\n".join(texts) if texts else "" + return "" + + +async def bootstrap_history_from_db( + session: AsyncSession, + thread_id: int, +) -> list[HumanMessage | AIMessage]: + """ + Load message history from database and convert to LangChain format. + + Used for cloned chats where the LangGraph checkpointer has no state, + but we have messages in the database that should be used as context. + + Args: + session: Database session + thread_id: The chat thread ID + + Returns: + List of LangChain messages (HumanMessage/AIMessage) + """ + from app.db import NewChatMessage + + result = await session.execute( + select(NewChatMessage) + .filter(NewChatMessage.thread_id == thread_id) + .order_by(NewChatMessage.created_at) + ) + db_messages = result.scalars().all() + + langchain_messages: list[HumanMessage | AIMessage] = [] + + for msg in db_messages: + text_content = extract_text_content(msg.content) + if not text_content: + continue + if msg.role == "user": + langchain_messages.append(HumanMessage(content=text_content)) + elif msg.role == "assistant": + langchain_messages.append(AIMessage(content=text_content)) + + return langchain_messages