Merge remote-tracking branch 'upstream/dev' into feat/inbox

2026-06-28 21:49:40 +02:00 · 2026-01-28 09:26:04 +05:30 · 2026-01-28 09:26:04 +05:30 · 614761bb17
commit 614761bb17
parent 79f7dfbbed b598cbeac3
64 changed files with 2604 additions and 730 deletions
--- a/surfsense_backend/alembic/versions/81_add_public_chat_features.py
+++ b/surfsense_backend/alembic/versions/81_add_public_chat_features.py
@ -0,0 +1,114 @@
+"""Add public chat sharing and cloning features to new_chat_threads
+
+Revision ID: 81
+Revises: 80
+Create Date: 2026-01-23
+
+Adds columns for:
+1. Public sharing via tokenized URLs (public_share_token, public_share_enabled)
+2. Clone tracking for audit (cloned_from_thread_id, cloned_at)
+3. History bootstrap flag for cloned chats (needs_history_bootstrap)
+4. Clone pending flag for two-phase clone (clone_pending)
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "81"
+down_revision: str | None = "80"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Add public sharing and cloning columns to new_chat_threads."""
+
+    op.execute(
+        """
+        ALTER TABLE new_chat_threads
+        ADD COLUMN IF NOT EXISTS public_share_token VARCHAR(64);
+        """
+    )
+
+    op.execute(
+        """
+        ALTER TABLE new_chat_threads
+        ADD COLUMN IF NOT EXISTS public_share_enabled BOOLEAN NOT NULL DEFAULT FALSE;
+        """
+    )
+
+    op.execute(
+        """
+        CREATE UNIQUE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_token
+        ON new_chat_threads(public_share_token)
+        WHERE public_share_token IS NOT NULL;
+        """
+    )
+
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS ix_new_chat_threads_public_share_enabled
+        ON new_chat_threads(public_share_enabled)
+        WHERE public_share_enabled = TRUE;
+        """
+    )
+
+    op.execute(
+        """
+        ALTER TABLE new_chat_threads
+        ADD COLUMN IF NOT EXISTS cloned_from_thread_id INTEGER
+        REFERENCES new_chat_threads(id) ON DELETE SET NULL;
+        """
+    )
+
+    op.execute(
+        """
+        ALTER TABLE new_chat_threads
+        ADD COLUMN IF NOT EXISTS cloned_at TIMESTAMP WITH TIME ZONE;
+        """
+    )
+
+    op.execute(
+        """
+        ALTER TABLE new_chat_threads
+        ADD COLUMN IF NOT EXISTS needs_history_bootstrap BOOLEAN NOT NULL DEFAULT FALSE;
+        """
+    )
+
+    op.execute(
+        """
+        ALTER TABLE new_chat_threads
+        ADD COLUMN IF NOT EXISTS clone_pending BOOLEAN NOT NULL DEFAULT FALSE;
+        """
+    )
+
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS ix_new_chat_threads_cloned_from_thread_id
+        ON new_chat_threads(cloned_from_thread_id)
+        WHERE cloned_from_thread_id IS NOT NULL;
+        """
+    )
+
+
+def downgrade() -> None:
+    """Remove public sharing and cloning columns from new_chat_threads."""
+
+    op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_cloned_from_thread_id")
+    op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS clone_pending")
+    op.execute(
+        "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS needs_history_bootstrap"
+    )
+    op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS cloned_at")
+    op.execute(
+        "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS cloned_from_thread_id"
+    )
+
+    op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_enabled")
+    op.execute("DROP INDEX IF EXISTS ix_new_chat_threads_public_share_token")
+    op.execute(
+        "ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_enabled"
+    )
+    op.execute("ALTER TABLE new_chat_threads DROP COLUMN IF EXISTS public_share_token")
--- a/surfsense_backend/alembic/versions/82_add_podcast_status_and_thread.py
+++ b/surfsense_backend/alembic/versions/82_add_podcast_status_and_thread.py
@ -0,0 +1,62 @@
+"""Add status and thread_id to podcasts
+
+Revision ID: 82
+Revises: 81
+Create Date: 2026-01-27
+
+Adds status enum and thread_id FK to podcasts.
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+revision: str = "82"
+down_revision: str | None = "81"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.execute(
+        """
+        CREATE TYPE podcast_status AS ENUM ('pending', 'generating', 'ready', 'failed');
+        """
+    )
+
+    op.execute(
+        """
+        ALTER TABLE podcasts
+        ADD COLUMN IF NOT EXISTS status podcast_status NOT NULL DEFAULT 'ready';
+        """
+    )
+
+    op.execute(
+        """
+        ALTER TABLE podcasts
+        ADD COLUMN IF NOT EXISTS thread_id INTEGER
+        REFERENCES new_chat_threads(id) ON DELETE SET NULL;
+        """
+    )
+
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS ix_podcasts_thread_id
+        ON podcasts(thread_id);
+        """
+    )
+
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS ix_podcasts_status
+        ON podcasts(status);
+        """
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS ix_podcasts_status")
+    op.execute("DROP INDEX IF EXISTS ix_podcasts_thread_id")
+    op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS thread_id")
+    op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS status")
+    op.execute("DROP TYPE IF EXISTS podcast_status")
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@ -120,6 +120,7 @@ async def create_surfsense_deep_agent(
    connector_service: ConnectorService,
    checkpointer: Checkpointer,
    user_id: str | None = None,
+    thread_id: int | None = None,
    agent_config: AgentConfig | None = None,
    enabled_tools: list[str] | None = None,
    disabled_tools: list[str] | None = None,
@ -232,6 +233,7 @@ async def create_surfsense_deep_agent(
        "connector_service": connector_service,
        "firecrawl_api_key": firecrawl_api_key,
        "user_id": user_id,  # Required for memory tools
+        "thread_id": thread_id,  # For podcast tool
        # Dynamic connector/document type discovery for knowledge base tool
        "available_connectors": available_connectors,
        "available_document_types": available_document_types,
--- a/surfsense_backend/app/agents/new_chat/tools/podcast.py
+++ b/surfsense_backend/app/agents/new_chat/tools/podcast.py
@ -18,6 +18,8 @@ import redis
 from langchain_core.tools import tool
 from sqlalchemy.ext.asyncio import AsyncSession

+from app.db import Podcast, PodcastStatus
+
 # Redis connection for tracking active podcast tasks
 # Uses the same Redis instance as Celery
 REDIS_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
@ -32,50 +34,44 @@ def get_redis_client() -> redis.Redis:
    return _redis_client


-def get_active_podcast_key(search_space_id: int) -> str:
-    """Generate Redis key for tracking active podcast task."""
-    return f"podcast:active:{search_space_id}"
+def _redis_key(search_space_id: int) -> str:
+    return f"podcast:generating:{search_space_id}"


-def get_active_podcast_task(search_space_id: int) -> str | None:
-    """Check if there's an active podcast task for this search space."""
+def get_generating_podcast_id(search_space_id: int) -> int | None:
+    """Get the podcast ID currently being generated for this search space."""
    try:
        client = get_redis_client()
-        return client.get(get_active_podcast_key(search_space_id))
+        value = client.get(_redis_key(search_space_id))
+        return int(value) if value else None
    except Exception:
-        # If Redis is unavailable, allow the request (fail open)
        return None


-def set_active_podcast_task(search_space_id: int, task_id: str) -> None:
-    """Mark a podcast task as active for this search space."""
+def set_generating_podcast(search_space_id: int, podcast_id: int) -> None:
+    """Mark a podcast as currently generating for this search space."""
    try:
        client = get_redis_client()
-        # Set with 30-minute expiry as safety net (podcast should complete before this)
-        client.setex(get_active_podcast_key(search_space_id), 1800, task_id)
+        client.setex(_redis_key(search_space_id), 1800, str(podcast_id))
    except Exception as e:
-        print(f"[generate_podcast] Warning: Could not set active task in Redis: {e}")
-
-
-def clear_active_podcast_task(search_space_id: int) -> None:
-    """Clear the active podcast task for this search space."""
-    try:
-        client = get_redis_client()
-        client.delete(get_active_podcast_key(search_space_id))
-    except Exception as e:
-        print(f"[generate_podcast] Warning: Could not clear active task in Redis: {e}")
+        print(f"[generate_podcast] Warning: Could not set generating podcast in Redis: {e}")


 def create_generate_podcast_tool(
    search_space_id: int,
    db_session: AsyncSession,
+    thread_id: int | None = None,
 ):
    """
    Factory function to create the generate_podcast tool with injected dependencies.

+    Pre-creates podcast record with pending status so podcast_id is available
+    immediately for frontend polling.
+
    Args:
        search_space_id: The user's search space ID
-        db_session: Database session (not used - Celery creates its own)
+        db_session: Database session for creating the podcast record
+        thread_id: The chat thread ID for associating the podcast

    Returns:
        A configured tool function for generating podcasts
@ -98,76 +94,71 @@ def create_generate_podcast_tool(
        - "Make a podcast about..."
        - "Turn this into a podcast"

-        The tool will start generating a podcast in the background.
-        The podcast will be available once generation completes.
-
-        IMPORTANT: Only one podcast can be generated at a time. If a podcast
-        is already being generated, this tool will return a message asking
-        the user to wait.
-
        Args:
            source_content: The text content to convert into a podcast.
-                           This can be a summary, research findings, or any text
-                           the user wants transformed into an audio podcast.
            podcast_title: Title for the podcast (default: "SurfSense Podcast")
            user_prompt: Optional instructions for podcast style, tone, or format.
-                        For example: "Make it casual and fun" or "Focus on the key insights"

        Returns:
            A dictionary containing:
-            - status: "processing" (task submitted), "already_generating", or "error"
-            - task_id: The Celery task ID for polling status (if processing)
+            - status: PodcastStatus value (pending, generating, or failed)
+            - podcast_id: The podcast ID for polling (when status is pending or generating)
            - title: The podcast title
-            - message: Status message for the user
+            - message: Status message (or "error" field if status is failed)
        """
        try:
-            # Check if a podcast is already being generated for this search space
-            active_task_id = get_active_podcast_task(search_space_id)
-            if active_task_id:
+            generating_podcast_id = get_generating_podcast_id(search_space_id)
+            if generating_podcast_id:
                print(
-                    f"[generate_podcast] Blocked duplicate request. Active task: {active_task_id}"
+                    f"[generate_podcast] Blocked duplicate request. Generating podcast: {generating_podcast_id}"
                )
                return {
-                    "status": "already_generating",
-                    "task_id": active_task_id,
+                    "status": PodcastStatus.GENERATING.value,
+                    "podcast_id": generating_podcast_id,
                    "title": podcast_title,
-                    "message": "A podcast is already being generated. Please wait for it to complete before requesting another one.",
+                    "message": "A podcast is already being generated. Please wait for it to complete.",
                }

-            # Import Celery task here to avoid circular imports
+            podcast = Podcast(
+                title=podcast_title,
+                status=PodcastStatus.PENDING,
+                search_space_id=search_space_id,
+                thread_id=thread_id,
+            )
+            db_session.add(podcast)
+            await db_session.commit()
+            await db_session.refresh(podcast)
+
            from app.tasks.celery_tasks.podcast_tasks import (
                generate_content_podcast_task,
            )

-            # Submit Celery task for background processing
            task = generate_content_podcast_task.delay(
+                podcast_id=podcast.id,
                source_content=source_content,
                search_space_id=search_space_id,
-                podcast_title=podcast_title,
                user_prompt=user_prompt,
            )

-            # Mark this task as active
-            set_active_podcast_task(search_space_id, task.id)
+            set_generating_podcast(search_space_id, podcast.id)

-            print(f"[generate_podcast] Submitted Celery task: {task.id}")
+            print(f"[generate_podcast] Created podcast {podcast.id}, task: {task.id}")

-            # Return immediately with task_id for polling
            return {
-                "status": "processing",
-                "task_id": task.id,
+                "status": PodcastStatus.PENDING.value,
+                "podcast_id": podcast.id,
                "title": podcast_title,
                "message": "Podcast generation started. This may take a few minutes.",
            }

        except Exception as e:
            error_message = str(e)
-            print(f"[generate_podcast] Error submitting task: {error_message}")
+            print(f"[generate_podcast] Error: {error_message}")
            return {
-                "status": "error",
+                "status": PodcastStatus.FAILED.value,
                "error": error_message,
                "title": podcast_title,
-                "task_id": None,
+                "podcast_id": None,
            }

    return generate_podcast
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@ -107,8 +107,9 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
        factory=lambda deps: create_generate_podcast_tool(
            search_space_id=deps["search_space_id"],
            db_session=deps["db_session"],
+            thread_id=deps["thread_id"],
        ),
-        requires=["search_space_id", "db_session"],
+        requires=["search_space_id", "db_session", "thread_id"],
    ),
    # Link preview tool - fetches Open Graph metadata for URLs
    ToolDefinition(
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -93,6 +93,13 @@ class SearchSourceConnectorType(str, Enum):
    COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"


+class PodcastStatus(str, Enum):
+    PENDING = "pending"
+    GENERATING = "generating"
+    READY = "ready"
+    FAILED = "failed"
+
+
 class LiteLLMProvider(str, Enum):
    """
    Enum for LLM providers supported by LiteLLM.
@ -397,6 +404,47 @@ class NewChatThread(BaseModel, TimestampMixin):
        index=True,
    )

+    # Public sharing - cryptographic token for public URL access
+    public_share_token = Column(
+        String(64),
+        nullable=True,
+        unique=True,
+        index=True,
+    )
+    # Whether public sharing is currently enabled for this thread
+    public_share_enabled = Column(
+        Boolean,
+        nullable=False,
+        default=False,
+        server_default="false",
+    )
+
+    # Clone tracking - for audit and history bootstrap
+    cloned_from_thread_id = Column(
+        Integer,
+        ForeignKey("new_chat_threads.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    cloned_at = Column(
+        TIMESTAMP(timezone=True),
+        nullable=True,
+    )
+    # Flag to bootstrap LangGraph checkpointer with DB messages on first message
+    needs_history_bootstrap = Column(
+        Boolean,
+        nullable=False,
+        default=False,
+        server_default="false",
+    )
+    # Flag indicating content clone is pending (two-phase clone)
+    clone_pending = Column(
+        Boolean,
+        nullable=False,
+        default=False,
+        server_default="false",
+    )
+
    # Relationships
    search_space = relationship("SearchSpace", back_populates="new_chat_threads")
    created_by = relationship("User", back_populates="new_chat_threads")
@ -709,14 +757,34 @@ class Podcast(BaseModel, TimestampMixin):
    __tablename__ = "podcasts"

    title = Column(String(500), nullable=False)
-    podcast_transcript = Column(JSONB, nullable=True)  # List of transcript entries
-    file_location = Column(Text, nullable=True)  # Path to the audio file
+    podcast_transcript = Column(JSONB, nullable=True)
+    file_location = Column(Text, nullable=True)
+    status = Column(
+        SQLAlchemyEnum(
+            PodcastStatus,
+            name="podcast_status",
+            create_type=False,
+            values_callable=lambda x: [e.value for e in x],
+        ),
+        nullable=False,
+        default=PodcastStatus.READY,
+        server_default="ready",
+        index=True,
+    )

    search_space_id = Column(
        Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
    )
    search_space = relationship("SearchSpace", back_populates="podcasts")

+    thread_id = Column(
+        Integer,
+        ForeignKey("new_chat_threads.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    thread = relationship("NewChatThread")
+

 class SearchSpace(BaseModel, TimestampMixin):
    __tablename__ = "searchspaces"
--- a/surfsense_backend/app/routes/init.py
+++ b/surfsense_backend/app/routes/init.py
@ -31,6 +31,7 @@ from .notes_routes import router as notes_router
 from .notifications_routes import router as notifications_router
 from .notion_add_connector_route import router as notion_add_connector_router
 from .podcasts_routes import router as podcasts_router
+from .public_chat_routes import router as public_chat_router
 from .rbac_routes import router as rbac_router
 from .search_source_connectors_routes import router as search_source_connectors_router
 from .search_spaces_routes import router as search_spaces_router
@ -68,4 +69,5 @@ router.include_router(circleback_webhook_router)  # Circleback meeting webhooks
 router.include_router(surfsense_docs_router)  # Surfsense documentation for citations
 router.include_router(notifications_router)  # Notifications with Electric SQL sync
 router.include_router(composio_router)  # Composio OAuth and toolkit management
+router.include_router(public_chat_router)  # Public chat sharing and cloning
 router.include_router(incentive_tasks_router)  # Incentive tasks for earning free pages
--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@ -37,6 +37,7 @@ from app.db import (
    get_async_session,
 )
 from app.schemas.new_chat import (
+    CompleteCloneResponse,
    NewChatMessageAppend,
    NewChatMessageRead,
    NewChatRequest,
@ -45,11 +46,14 @@ from app.schemas.new_chat import (
    NewChatThreadUpdate,
    NewChatThreadVisibilityUpdate,
    NewChatThreadWithMessages,
+    PublicShareToggleRequest,
+    PublicShareToggleResponse,
    RegenerateRequest,
    ThreadHistoryLoadResponse,
    ThreadListItem,
    ThreadListResponse,
 )
+from app.services.public_chat_service import toggle_public_share
 from app.tasks.chat.stream_new_chat import stream_new_chat
 from app.users import current_active_user
 from app.utils.rbac import check_permission
@ -215,6 +219,7 @@ async def list_threads(
                visibility=thread.visibility,
                created_by_id=thread.created_by_id,
                is_own_thread=is_own_thread,
+                public_share_enabled=thread.public_share_enabled,
                created_at=thread.created_at,
                updated_at=thread.updated_at,
            )
@ -316,6 +321,7 @@ async def search_threads(
                    thread.created_by_id == user.id
                    or (thread.created_by_id is None and is_search_space_owner)
                ),
+                public_share_enabled=thread.public_share_enabled,
                created_at=thread.created_at,
                updated_at=thread.updated_at,
            )
@ -664,6 +670,62 @@ async def delete_thread(
        ) from None


+@router.post("/threads/{thread_id}/complete-clone", response_model=CompleteCloneResponse)
+async def complete_clone(
+    thread_id: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Complete the cloning process for a thread.
+
+    Copies messages and podcasts from the source thread.
+    Sets clone_pending=False and needs_history_bootstrap=True when done.
+
+    Requires authentication and ownership of the thread.
+    """
+    from app.services.public_chat_service import complete_clone_content
+
+    try:
+        result = await session.execute(
+            select(NewChatThread).filter(NewChatThread.id == thread_id)
+        )
+        thread = result.scalars().first()
+
+        if not thread:
+            raise HTTPException(status_code=404, detail="Thread not found")
+
+        if thread.created_by_id != user.id:
+            raise HTTPException(status_code=403, detail="Not authorized")
+
+        if not thread.clone_pending:
+            raise HTTPException(status_code=400, detail="Clone already completed")
+
+        if not thread.cloned_from_thread_id:
+            raise HTTPException(status_code=400, detail="No source thread to clone from")
+
+        message_count = await complete_clone_content(
+            session=session,
+            target_thread=thread,
+            source_thread_id=thread.cloned_from_thread_id,
+            target_search_space_id=thread.search_space_id,
+        )
+
+        return CompleteCloneResponse(
+            status="success",
+            message_count=message_count,
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        await session.rollback()
+        raise HTTPException(
+            status_code=500,
+            detail=f"An unexpected error occurred while completing clone: {e!s}",
+        ) from None
+
+
@router.patch("/threads/{thread_id}/visibility", response_model=NewChatThreadRead)
 async def update_thread_visibility(
    thread_id: int,
@ -729,6 +791,32 @@ async def update_thread_visibility(
        ) from None


+@router.patch(
+    "/threads/{thread_id}/public-share", response_model=PublicShareToggleResponse
+)
+async def update_thread_public_share(
+    thread_id: int,
+    request: Request,
+    toggle_request: PublicShareToggleRequest,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Enable or disable public sharing for a thread.
+
+    Only the creator of the thread can manage public sharing.
+    When enabled, returns a public URL that anyone can use to view the chat.
+    """
+    base_url = str(request.base_url).rstrip("/")
+    return await toggle_public_share(
+        session=session,
+        thread_id=thread_id,
+        enabled=toggle_request.enabled,
+        user=user,
+        base_url=base_url,
+    )
+
+
 # =============================================================================
 # Message Endpoints
 # =============================================================================
@ -996,6 +1084,7 @@ async def handle_new_chat(
                attachments=request.attachments,
                mentioned_document_ids=request.mentioned_document_ids,
                mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids,
+                needs_history_bootstrap=thread.needs_history_bootstrap,
            ),
            media_type="text/event-stream",
            headers={
@ -1223,6 +1312,7 @@ async def regenerate_response(
                    mentioned_document_ids=request.mentioned_document_ids,
                    mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids,
                    checkpoint_id=target_checkpoint_id,
+                    needs_history_bootstrap=thread.needs_history_bootstrap,
                ):
                    yield chunk
                # If we get here, streaming completed successfully
--- a/surfsense_backend/app/routes/podcasts_routes.py
+++ b/surfsense_backend/app/routes/podcasts_routes.py
@ -1,21 +1,19 @@
 """
-Podcast routes for task status polling and audio retrieval.
+Podcast routes for CRUD operations and audio streaming.

 These routes support the podcast generation feature in new-chat.
-Note: The old Chat-based podcast generation has been removed.
+Frontend polls GET /podcasts/{podcast_id} to check status field.
 """

 import os
 from pathlib import Path

-from celery.result import AsyncResult
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import StreamingResponse
 from sqlalchemy import select
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession

-from app.celery_app import celery_app
 from app.db import (
    Permission,
    Podcast,
@ -25,7 +23,7 @@ from app.db import (
    get_async_session,
 )
 from app.schemas import PodcastRead
-from app.users import current_active_user
+from app.users import current_active_user, current_optional_user
 from app.utils.rbac import check_permission

 router = APIRouter()
@ -84,12 +82,17 @@ async def read_podcasts(
 async def read_podcast(
    podcast_id: int,
    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
+    user: User | None = Depends(current_optional_user),
 ):
    """
    Get a specific podcast by ID.
-    Requires PODCASTS_READ permission for the search space.
+
+    Access is allowed if:
+    - User is authenticated with PODCASTS_READ permission, OR
+    - Podcast belongs to a publicly shared thread
    """
+    from app.services.public_chat_service import is_podcast_publicly_accessible
+
    try:
        result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
        podcast = result.scalars().first()
@ -100,16 +103,20 @@ async def read_podcast(
                detail="Podcast not found",
            )

-        # Check permission for the search space
-        await check_permission(
-            session,
-            user,
-            podcast.search_space_id,
-            Permission.PODCASTS_READ.value,
-            "You don't have permission to read podcasts in this search space",
-        )
+        is_public = await is_podcast_publicly_accessible(session, podcast_id)

-        return podcast
+        if not is_public:
+            if not user:
+                raise HTTPException(status_code=401, detail="Authentication required")
+            await check_permission(
+                session,
+                user,
+                podcast.search_space_id,
+                Permission.PODCASTS_READ.value,
+                "You don't have permission to read podcasts in this search space",
+            )
+
+        return PodcastRead.from_orm_with_entries(podcast)
    except HTTPException as he:
        raise he
    except SQLAlchemyError:
@ -161,46 +168,49 @@ async def delete_podcast(
 async def stream_podcast(
    podcast_id: int,
    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user),
+    user: User | None = Depends(current_optional_user),
 ):
    """
    Stream a podcast audio file.
-    Requires PODCASTS_READ permission for the search space.
+
+    Access is allowed if:
+    - User is authenticated with PODCASTS_READ permission, OR
+    - Podcast belongs to a publicly shared thread

    Note: Both /stream and /audio endpoints are supported for compatibility.
    """
+    from app.services.public_chat_service import is_podcast_publicly_accessible
+
    try:
        result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
        podcast = result.scalars().first()

        if not podcast:
-            raise HTTPException(
-                status_code=404,
-                detail="Podcast not found",
+            raise HTTPException(status_code=404, detail="Podcast not found")
+
+        is_public = await is_podcast_publicly_accessible(session, podcast_id)
+
+        if not is_public:
+            if not user:
+                raise HTTPException(status_code=401, detail="Authentication required")
+
+            await check_permission(
+                session,
+                user,
+                podcast.search_space_id,
+                Permission.PODCASTS_READ.value,
+                "You don't have permission to access podcasts in this search space",
            )

-        # Check permission for the search space
-        await check_permission(
-            session,
-            user,
-            podcast.search_space_id,
-            Permission.PODCASTS_READ.value,
-            "You don't have permission to access podcasts in this search space",
-        )
-
-        # Get the file path
        file_path = podcast.file_location

-        # Check if the file exists
        if not file_path or not os.path.isfile(file_path):
            raise HTTPException(status_code=404, detail="Podcast audio file not found")

-        # Define a generator function to stream the file
        def iterfile():
            with open(file_path, mode="rb") as file_like:
                yield from file_like

-        # Return a streaming response with appropriate headers
        return StreamingResponse(
            iterfile(),
            media_type="audio/mpeg",
@ -216,62 +226,3 @@ async def stream_podcast(
        raise HTTPException(
            status_code=500, detail=f"Error streaming podcast: {e!s}"
        ) from e
-
-
-@router.get("/podcasts/task/{task_id}/status")
-async def get_podcast_task_status(
-    task_id: str,
-    user: User = Depends(current_active_user),
-):
-    """
-    Get the status of a podcast generation task.
-    Used by new-chat frontend to poll for completion.
-
-    Returns:
-    - status: "processing" | "success" | "error"
-    - podcast_id: (only if status == "success")
-    - title: (only if status == "success")
-    - error: (only if status == "error")
-    """
-    try:
-        result = AsyncResult(task_id, app=celery_app)
-
-        if result.ready():
-            # Task completed
-            if result.successful():
-                task_result = result.result
-                if isinstance(task_result, dict):
-                    if task_result.get("status") == "success":
-                        return {
-                            "status": "success",
-                            "podcast_id": task_result.get("podcast_id"),
-                            "title": task_result.get("title"),
-                            "transcript_entries": task_result.get("transcript_entries"),
-                        }
-                    else:
-                        return {
-                            "status": "error",
-                            "error": task_result.get("error", "Unknown error"),
-                        }
-                else:
-                    return {
-                        "status": "error",
-                        "error": "Unexpected task result format",
-                    }
-            else:
-                # Task failed
-                return {
-                    "status": "error",
-                    "error": str(result.result) if result.result else "Task failed",
-                }
-        else:
-            # Task still processing
-            return {
-                "status": "processing",
-                "state": result.state,
-            }
-
-    except Exception as e:
-        raise HTTPException(
-            status_code=500, detail=f"Error checking task status: {e!s}"
-        ) from e
--- a/surfsense_backend/app/routes/public_chat_routes.py
+++ b/surfsense_backend/app/routes/public_chat_routes.py
@ -0,0 +1,82 @@
+"""
+Routes for public chat access (unauthenticated and mixed-auth endpoints).
+"""
+
+from datetime import UTC, datetime
+
+from fastapi import APIRouter, Depends, HTTPException
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import ChatVisibility, NewChatThread, User, get_async_session
+from app.schemas.new_chat import (
+    CloneInitResponse,
+    PublicChatResponse,
+)
+from app.services.public_chat_service import (
+    get_public_chat,
+    get_thread_by_share_token,
+    get_user_default_search_space,
+)
+from app.users import current_active_user
+
+router = APIRouter(prefix="/public", tags=["public"])
+
+
+@router.get("/{share_token}", response_model=PublicChatResponse)
+async def read_public_chat(
+    share_token: str,
+    session: AsyncSession = Depends(get_async_session),
+):
+    """
+    Get a public chat by share token.
+
+    No authentication required.
+    Returns sanitized content (citations stripped).
+    """
+    return await get_public_chat(session, share_token)
+
+
+@router.post("/{share_token}/clone", response_model=CloneInitResponse)
+async def clone_public_chat_endpoint(
+    share_token: str,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Initialize cloning a public chat to the user's account.
+
+    Creates an empty thread with clone_pending=True.
+    Frontend should redirect to the new thread and call /complete-clone.
+
+    Requires authentication.
+    """
+    source_thread = await get_thread_by_share_token(session, share_token)
+
+    if not source_thread:
+        raise HTTPException(status_code=404, detail="Chat not found or no longer public")
+
+    target_search_space_id = await get_user_default_search_space(session, user.id)
+
+    if target_search_space_id is None:
+        raise HTTPException(status_code=400, detail="No search space found for user")
+
+    new_thread = NewChatThread(
+        title=source_thread.title,
+        archived=False,
+        visibility=ChatVisibility.PRIVATE,
+        search_space_id=target_search_space_id,
+        created_by_id=user.id,
+        public_share_enabled=False,
+        cloned_from_thread_id=source_thread.id,
+        cloned_at=datetime.now(UTC),
+        clone_pending=True,
+    )
+    session.add(new_thread)
+    await session.commit()
+    await session.refresh(new_thread)
+
+    return CloneInitResponse(
+        thread_id=new_thread.id,
+        search_space_id=target_search_space_id,
+        share_token=share_token,
+    )
--- a/surfsense_backend/app/routes/rbac_routes.py
+++ b/surfsense_backend/app/routes/rbac_routes.py
@ -123,7 +123,9 @@ async def list_all_permissions(
    for perm in Permission:
        # Extract category from permission value (e.g., "documents:read" -> "documents")
        category = perm.value.split(":")[0] if ":" in perm.value else "general"
-        description = PERMISSION_DESCRIPTIONS.get(perm.value, f"Permission for {perm.value}")
+        description = PERMISSION_DESCRIPTIONS.get(
+            perm.value, f"Permission for {perm.value}"
+        )

        permissions.append(
            PermissionInfo(
--- a/surfsense_backend/app/schemas/new_chat.py
+++ b/surfsense_backend/app/schemas/new_chat.py
@ -95,6 +95,9 @@ class NewChatThreadRead(NewChatThreadBase, IDModel):
    search_space_id: int
    visibility: ChatVisibility
    created_by_id: UUID | None = None
+    public_share_enabled: bool = False
+    public_share_token: str | None = None
+    clone_pending: bool = False
    created_at: datetime
    updated_at: datetime

@ -133,7 +136,8 @@ class ThreadListItem(BaseModel):
    archived: bool
    visibility: ChatVisibility
    created_by_id: UUID | None = None
-    is_own_thread: bool = False  # True if the current user created this thread
+    is_own_thread: bool = False
+    public_share_enabled: bool = False
    created_at: datetime = Field(alias="createdAt")
    updated_at: datetime = Field(alias="updatedAt")

@ -204,3 +208,63 @@ class RegenerateRequest(BaseModel):
    attachments: list[ChatAttachment] | None = None
    mentioned_document_ids: list[int] | None = None
    mentioned_surfsense_doc_ids: list[int] | None = None
+
+
+# =============================================================================
+# Public Sharing Schemas
+# =============================================================================
+
+
+class PublicShareToggleRequest(BaseModel):
+    """Request to enable/disable public sharing for a thread."""
+
+    enabled: bool
+
+
+class PublicShareToggleResponse(BaseModel):
+    """Response after toggling public sharing."""
+
+    enabled: bool
+    public_url: str | None = None
+    share_token: str | None = None
+
+
+# =============================================================================
+# Public Chat View Schemas (for unauthenticated access)
+# =============================================================================
+
+
+class PublicAuthor(BaseModel):
+    display_name: str | None = None
+    avatar_url: str | None = None
+
+
+class PublicChatMessage(BaseModel):
+    role: NewChatMessageRole
+    content: Any
+    author: PublicAuthor | None = None
+    created_at: datetime
+
+
+class PublicChatThread(BaseModel):
+    title: str
+    created_at: datetime
+
+
+class PublicChatResponse(BaseModel):
+    thread: PublicChatThread
+    messages: list[PublicChatMessage]
+
+
+class CloneInitResponse(BaseModel):
+
+
+    thread_id: int
+    search_space_id: int
+    share_token: str
+
+
+class CompleteCloneResponse(BaseModel):
+
+    status: str
+    message_count: int
--- a/surfsense_backend/app/schemas/podcasts.py
+++ b/surfsense_backend/app/schemas/podcasts.py
@ -1,11 +1,19 @@
 """Podcast schemas for API responses."""

 from datetime import datetime
+from enum import Enum
 from typing import Any

 from pydantic import BaseModel


+class PodcastStatusEnum(str, Enum):
+    PENDING = "pending"
+    GENERATING = "generating"
+    READY = "ready"
+    FAILED = "failed"
+
+
 class PodcastBase(BaseModel):
    """Base podcast schema."""

@ -33,7 +41,24 @@ class PodcastRead(PodcastBase):
    """Schema for reading a podcast."""

    id: int
+    status: PodcastStatusEnum = PodcastStatusEnum.READY
    created_at: datetime
+    transcript_entries: int | None = None

    class Config:
        from_attributes = True
+
+    @classmethod
+    def from_orm_with_entries(cls, obj):
+        """Create PodcastRead with transcript_entries computed."""
+        data = {
+            "id": obj.id,
+            "title": obj.title,
+            "podcast_transcript": obj.podcast_transcript,
+            "file_location": obj.file_location,
+            "search_space_id": obj.search_space_id,
+            "status": obj.status,
+            "created_at": obj.created_at,
+            "transcript_entries": len(obj.podcast_transcript) if obj.podcast_transcript else None,
+        }
+        return cls(**data)
--- a/surfsense_backend/app/services/public_chat_service.py
+++ b/surfsense_backend/app/services/public_chat_service.py
@ -0,0 +1,376 @@
+"""
+Service layer for public chat sharing and cloning.
+"""
+
+import re
+import secrets
+from uuid import UUID
+
+from fastapi import HTTPException
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import selectinload
+
+from app.db import NewChatThread, User
+
+UI_TOOLS = {
+    "display_image",
+    "link_preview",
+    "generate_podcast",
+    "scrape_webpage",
+    "multi_link_preview",
+}
+
+
+def strip_citations(text: str) -> str:
+    """
+    Remove [citation:X] and [citation:doc-X] patterns from text.
+    Preserves newlines to maintain markdown formatting.
+    """
+    # Remove citation patterns
+    text = re.sub(r"[\[【]\u200B?citation:(doc-)?\d+\u200B?[\]】]", "", text)
+    # Collapse multiple spaces/tabs (but NOT newlines) into single space
+    text = re.sub(r"[^\S\n]+", " ", text)
+    # Normalize excessive blank lines (3+ newlines → 2)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    # Clean up spaces around newlines
+    text = re.sub(r" *\n *", "\n", text)
+    return text.strip()
+
+
+def sanitize_content_for_public(content: list | str | None) -> list:
+    """
+    Filter message content for public view.
+    Strips citations and filters to UI-relevant tools.
+    """
+    if content is None:
+        return []
+
+    if isinstance(content, str):
+        clean_text = strip_citations(content)
+        return [{"type": "text", "text": clean_text}] if clean_text else []
+
+    if not isinstance(content, list):
+        return []
+
+    sanitized = []
+    for part in content:
+        if not isinstance(part, dict):
+            continue
+
+        part_type = part.get("type")
+
+        if part_type == "text":
+            clean_text = strip_citations(part.get("text", ""))
+            if clean_text:
+                sanitized.append({"type": "text", "text": clean_text})
+
+        elif part_type == "tool-call":
+            tool_name = part.get("toolName")
+            if tool_name not in UI_TOOLS:
+                continue
+            sanitized.append(part)
+
+    return sanitized
+
+
+async def get_author_display(
+    session: AsyncSession,
+    author_id: UUID | None,
+    user_cache: dict[UUID, dict],
+) -> dict | None:
+    """Transform author UUID to display info."""
+    if author_id is None:
+        return None
+
+    if author_id not in user_cache:
+        result = await session.execute(select(User).filter(User.id == author_id))
+        user = result.scalars().first()
+        if user:
+            user_cache[author_id] = {
+                "display_name": user.display_name or "User",
+                "avatar_url": user.avatar_url,
+            }
+        else:
+            user_cache[author_id] = {
+                "display_name": "Unknown User",
+                "avatar_url": None,
+            }
+
+    return user_cache[author_id]
+
+
+async def toggle_public_share(
+    session: AsyncSession,
+    thread_id: int,
+    enabled: bool,
+    user: User,
+    base_url: str,
+) -> dict:
+    """
+    Enable or disable public sharing for a thread.
+
+    Only the thread owner can toggle public sharing.
+    When enabling, generates a new token if one doesn't exist.
+    When disabling, keeps the token for potential re-enable.
+    """
+    result = await session.execute(
+        select(NewChatThread).filter(NewChatThread.id == thread_id)
+    )
+    thread = result.scalars().first()
+
+    if not thread:
+        raise HTTPException(status_code=404, detail="Thread not found")
+
+    if thread.created_by_id != user.id:
+        raise HTTPException(
+            status_code=403,
+            detail="Only the creator of this chat can manage public sharing",
+        )
+
+    if enabled and not thread.public_share_token:
+        thread.public_share_token = secrets.token_urlsafe(48)
+
+    thread.public_share_enabled = enabled
+
+    await session.commit()
+    await session.refresh(thread)
+
+    if enabled:
+        return {
+            "enabled": True,
+            "public_url": f"{base_url}/public/{thread.public_share_token}",
+            "share_token": thread.public_share_token,
+        }
+
+    return {
+        "enabled": False,
+        "public_url": None,
+        "share_token": None,
+    }
+
+
+async def get_public_chat(
+    session: AsyncSession,
+    share_token: str,
+) -> dict:
+    """
+    Get a public chat by share token.
+
+    Returns sanitized content suitable for public viewing.
+    """
+    result = await session.execute(
+        select(NewChatThread)
+        .options(selectinload(NewChatThread.messages))
+        .filter(
+            NewChatThread.public_share_token == share_token,
+            NewChatThread.public_share_enabled.is_(True),
+        )
+    )
+    thread = result.scalars().first()
+
+    if not thread:
+        raise HTTPException(status_code=404, detail="Not found")
+
+    user_cache: dict[UUID, dict] = {}
+
+    messages = []
+    for msg in sorted(thread.messages, key=lambda m: m.created_at):
+        author = await get_author_display(session, msg.author_id, user_cache)
+        sanitized_content = sanitize_content_for_public(msg.content)
+
+        messages.append(
+            {
+                "role": msg.role,
+                "content": sanitized_content,
+                "author": author,
+                "created_at": msg.created_at,
+            }
+        )
+
+    return {
+        "thread": {
+            "title": thread.title,
+            "created_at": thread.created_at,
+        },
+        "messages": messages,
+    }
+
+
+async def get_thread_by_share_token(
+    session: AsyncSession,
+    share_token: str,
+) -> NewChatThread | None:
+    """Get a thread by its public share token if sharing is enabled."""
+    result = await session.execute(
+        select(NewChatThread)
+        .options(selectinload(NewChatThread.messages))
+        .filter(
+            NewChatThread.public_share_token == share_token,
+            NewChatThread.public_share_enabled.is_(True),
+        )
+    )
+    return result.scalars().first()
+
+
+async def get_user_default_search_space(
+    session: AsyncSession,
+    user_id: UUID,
+) -> int | None:
+    """
+    Get user's default search space for cloning.
+
+    Returns the first search space where user is owner, or None if not found.
+    """
+    from app.db import SearchSpaceMembership
+
+    result = await session.execute(
+        select(SearchSpaceMembership)
+        .filter(
+            SearchSpaceMembership.user_id == user_id,
+            SearchSpaceMembership.is_owner.is_(True),
+        )
+        .limit(1)
+    )
+    membership = result.scalars().first()
+
+    if membership:
+        return membership.search_space_id
+
+    return None
+
+
+async def complete_clone_content(
+    session: AsyncSession,
+    target_thread: NewChatThread,
+    source_thread_id: int,
+    target_search_space_id: int,
+) -> int:
+    """
+    Copy messages and podcasts from source thread to target thread.
+
+    Sets clone_pending=False and needs_history_bootstrap=True when done.
+    Returns the number of messages copied.
+    """
+    from app.db import NewChatMessage
+
+    result = await session.execute(
+        select(NewChatThread)
+        .options(selectinload(NewChatThread.messages))
+        .filter(NewChatThread.id == source_thread_id)
+    )
+    source_thread = result.scalars().first()
+
+    if not source_thread:
+        raise ValueError("Source thread not found")
+
+    podcast_id_map: dict[int, int] = {}
+    message_count = 0
+
+    for msg in sorted(source_thread.messages, key=lambda m: m.created_at):
+        new_content = sanitize_content_for_public(msg.content)
+
+        if isinstance(new_content, list):
+            for part in new_content:
+                if (
+                    isinstance(part, dict)
+                    and part.get("type") == "tool-call"
+                    and part.get("toolName") == "generate_podcast"
+                ):
+                    result_data = part.get("result", {})
+                    old_podcast_id = result_data.get("podcast_id")
+                    if old_podcast_id and old_podcast_id not in podcast_id_map:
+                        new_podcast_id = await _clone_podcast(
+                            session,
+                            old_podcast_id,
+                            target_search_space_id,
+                            target_thread.id,
+                        )
+                        if new_podcast_id:
+                            podcast_id_map[old_podcast_id] = new_podcast_id
+
+                    if old_podcast_id and old_podcast_id in podcast_id_map:
+                        result_data["podcast_id"] = podcast_id_map[old_podcast_id]
+
+        new_message = NewChatMessage(
+            thread_id=target_thread.id,
+            role=msg.role,
+            content=new_content,
+            author_id=msg.author_id,
+            created_at=msg.created_at,
+        )
+        session.add(new_message)
+        message_count += 1
+
+    target_thread.clone_pending = False
+    target_thread.needs_history_bootstrap = True
+
+    await session.commit()
+
+    return message_count
+
+
+async def _clone_podcast(
+    session: AsyncSession,
+    podcast_id: int,
+    target_search_space_id: int,
+    target_thread_id: int,
+) -> int | None:
+    """Clone a podcast record and its audio file. Only clones ready podcasts."""
+    import shutil
+    import uuid
+    from pathlib import Path
+
+    from app.db import Podcast, PodcastStatus
+
+    result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
+    original = result.scalars().first()
+    if not original or original.status != PodcastStatus.READY:
+        return None
+
+    new_file_path = None
+    if original.file_location:
+        original_path = Path(original.file_location)
+        if original_path.exists():
+            new_filename = f"{uuid.uuid4()}_podcast.mp3"
+            new_dir = Path("podcasts")
+            new_dir.mkdir(parents=True, exist_ok=True)
+            new_file_path = str(new_dir / new_filename)
+            shutil.copy2(original.file_location, new_file_path)
+
+    new_podcast = Podcast(
+        title=original.title,
+        podcast_transcript=original.podcast_transcript,
+        file_location=new_file_path,
+        status=PodcastStatus.READY,
+        search_space_id=target_search_space_id,
+        thread_id=target_thread_id,
+    )
+    session.add(new_podcast)
+    await session.flush()
+
+    return new_podcast.id
+
+
+async def is_podcast_publicly_accessible(
+    session: AsyncSession,
+    podcast_id: int,
+) -> bool:
+    """
+    Check if a podcast belongs to a publicly shared thread.
+
+    Uses the thread_id foreign key for efficient lookup.
+    """
+    from app.db import Podcast
+
+    result = await session.execute(
+        select(Podcast)
+        .options(selectinload(Podcast.thread))
+        .filter(Podcast.id == podcast_id)
+    )
+    podcast = result.scalars().first()
+
+    if not podcast or not podcast.thread:
+        return False
+
+    return podcast.thread.public_share_enabled
--- a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py
@ -4,15 +4,15 @@ import asyncio
 import logging
 import sys

+from sqlalchemy import select
 from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
 from sqlalchemy.pool import NullPool

-# Import for content-based podcast (new-chat)
 from app.agents.podcaster.graph import graph as podcaster_graph
 from app.agents.podcaster.state import State as PodcasterState
 from app.celery_app import celery_app
 from app.config import config
-from app.db import Podcast
+from app.db import Podcast, PodcastStatus

 logger = logging.getLogger(__name__)

@ -44,8 +44,8 @@ def get_celery_session_maker():
 # =============================================================================


-def _clear_active_podcast_redis_key(search_space_id: int) -> None:
-    """Clear the active podcast task key from Redis when task completes."""
+def _clear_generating_podcast(search_space_id: int) -> None:
+    """Clear the generating podcast marker from Redis when task completes."""
    import os

    import redis
@ -53,34 +53,24 @@ def _clear_active_podcast_redis_key(search_space_id: int) -> None:
    try:
        redis_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
        client = redis.from_url(redis_url, decode_responses=True)
-        key = f"podcast:active:{search_space_id}"
+        key = f"podcast:generating:{search_space_id}"
        client.delete(key)
-        logger.info(f"Cleared active podcast key for search_space_id={search_space_id}")
+        logger.info(f"Cleared generating podcast key for search_space_id={search_space_id}")
    except Exception as e:
-        logger.warning(f"Could not clear active podcast key: {e}")
+        logger.warning(f"Could not clear generating podcast key: {e}")


@celery_app.task(name="generate_content_podcast", bind=True)
 def generate_content_podcast_task(
    self,
+    podcast_id: int,
    source_content: str,
    search_space_id: int,
-    podcast_title: str = "SurfSense Podcast",
    user_prompt: str | None = None,
 ) -> dict:
    """
-    Celery task to generate podcast from source content (for new-chat).
-
-    This task generates a podcast directly from provided content.
-
-    Args:
-        source_content: The text content to convert into a podcast
-        search_space_id: ID of the search space
-        podcast_title: Title for the podcast
-        user_prompt: Optional instructions for podcast style/tone
-
-    Returns:
-        dict with podcast_id on success, or error info on failure
+    Celery task to generate podcast from source content.
+    Updates existing podcast record created by the tool.
    """
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
@ -88,9 +78,9 @@ def generate_content_podcast_task(
    try:
        result = loop.run_until_complete(
            _generate_content_podcast(
+                podcast_id,
                source_content,
                search_space_id,
-                podcast_title,
                user_prompt,
            )
        )
@ -98,46 +88,69 @@ def generate_content_podcast_task(
        return result
    except Exception as e:
        logger.error(f"Error generating content podcast: {e!s}")
-        return {"status": "error", "error": str(e)}
+        loop.run_until_complete(_mark_podcast_failed(podcast_id))
+        return {"status": "failed", "podcast_id": podcast_id}
    finally:
-        # Always clear the active podcast key when task completes (success or failure)
-        _clear_active_podcast_redis_key(search_space_id)
+        _clear_generating_podcast(search_space_id)
        asyncio.set_event_loop(None)
        loop.close()


-async def _generate_content_podcast(
-    source_content: str,
-    search_space_id: int,
-    podcast_title: str = "SurfSense Podcast",
-    user_prompt: str | None = None,
-) -> dict:
-    """Generate content-based podcast with new session."""
+async def _mark_podcast_failed(podcast_id: int) -> None:
+    """Mark a podcast as failed in the database."""
    async with get_celery_session_maker()() as session:
        try:
-            # Configure the podcaster graph
+            result = await session.execute(
+                select(Podcast).filter(Podcast.id == podcast_id)
+            )
+            podcast = result.scalars().first()
+            if podcast:
+                podcast.status = PodcastStatus.FAILED
+                await session.commit()
+        except Exception as e:
+            logger.error(f"Failed to mark podcast as failed: {e}")
+
+
+async def _generate_content_podcast(
+    podcast_id: int,
+    source_content: str,
+    search_space_id: int,
+    user_prompt: str | None = None,
+) -> dict:
+    """Generate content-based podcast and update existing record."""
+    async with get_celery_session_maker()() as session:
+        result = await session.execute(
+            select(Podcast).filter(Podcast.id == podcast_id)
+        )
+        podcast = result.scalars().first()
+
+        if not podcast:
+            raise ValueError(f"Podcast {podcast_id} not found")
+
+        try:
+            podcast.status = PodcastStatus.GENERATING
+            await session.commit()
+
            graph_config = {
                "configurable": {
-                    "podcast_title": podcast_title,
+                    "podcast_title": podcast.title,
                    "search_space_id": search_space_id,
                    "user_prompt": user_prompt,
                }
            }

-            # Initialize the podcaster state with the source content
            initial_state = PodcasterState(
                source_content=source_content,
                db_session=session,
            )

-            # Run the podcaster graph
-            result = await podcaster_graph.ainvoke(initial_state, config=graph_config)
+            graph_result = await podcaster_graph.ainvoke(
+                initial_state, config=graph_config
+            )

-            # Extract results
-            podcast_transcript = result.get("podcast_transcript", [])
-            file_path = result.get("final_podcast_file_path", "")
+            podcast_transcript = graph_result.get("podcast_transcript", [])
+            file_path = graph_result.get("final_podcast_file_path", "")

-            # Convert transcript to serializable format
            serializable_transcript = []
            for entry in podcast_transcript:
                if hasattr(entry, "speaker_id"):
@ -152,27 +165,22 @@ async def _generate_content_podcast(
                        }
                    )

-            # Save podcast to database
-            podcast = Podcast(
-                title=podcast_title,
-                podcast_transcript=serializable_transcript,
-                file_location=file_path,
-                search_space_id=search_space_id,
-            )
-            session.add(podcast)
+            podcast.podcast_transcript = serializable_transcript
+            podcast.file_location = file_path
+            podcast.status = PodcastStatus.READY
            await session.commit()
-            await session.refresh(podcast)

-            logger.info(f"Successfully generated content podcast: {podcast.id}")
+            logger.info(f"Successfully generated podcast: {podcast.id}")

            return {
-                "status": "success",
+                "status": "ready",
                "podcast_id": podcast.id,
-                "title": podcast_title,
+                "title": podcast.title,
                "transcript_entries": len(serializable_transcript),
            }

        except Exception as e:
            logger.error(f"Error in _generate_content_podcast: {e!s}")
-            await session.rollback()
+            podcast.status = PodcastStatus.FAILED
+            await session.commit()
            raise
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -34,6 +34,7 @@ from app.services.chat_session_state_service import (
 )
 from app.services.connector_service import ConnectorService
 from app.services.new_streaming_service import VercelStreamingService
+from app.utils.content_utils import bootstrap_history_from_db


 def format_attachments_as_context(attachments: list[ChatAttachment]) -> str:
@ -205,13 +206,13 @@ async def stream_new_chat(
    mentioned_document_ids: list[int] | None = None,
    mentioned_surfsense_doc_ids: list[int] | None = None,
    checkpoint_id: str | None = None,
+    needs_history_bootstrap: bool = False,
 ) -> AsyncGenerator[str, None]:
    """
    Stream chat responses from the new SurfSense deep agent.

    This uses the Vercel AI SDK Data Stream Protocol (SSE format) for streaming.
    The chat_id is used as LangGraph's thread_id for memory/checkpointing.
-    Message history can be passed from the frontend for context.

    Args:
        user_query: The user's query
@ -221,6 +222,7 @@ async def stream_new_chat(
        user_id: The current user's UUID string (for memory tools and session state)
        llm_config_id: The LLM configuration ID (default: -1 for first global config)
        attachments: Optional attachments with extracted content
+        needs_history_bootstrap: If True, load message history from DB (for cloned chats)
        mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat
        mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat
        checkpoint_id: Optional checkpoint ID to rewind/fork from (for edit/reload operations)
@ -300,13 +302,29 @@ async def stream_new_chat(
            connector_service=connector_service,
            checkpointer=checkpointer,
            user_id=user_id,  # Pass user ID for memory tools
+            thread_id=chat_id,  # Pass chat ID for podcast association
            agent_config=agent_config,  # Pass prompt configuration
            firecrawl_api_key=firecrawl_api_key,  # Pass Firecrawl API key if configured
        )

-        # Build input with message history from frontend
+        # Build input with message history
        langchain_messages = []

+        # Bootstrap history for cloned chats (no LangGraph checkpoint exists yet)
+        if needs_history_bootstrap:
+            langchain_messages = await bootstrap_history_from_db(session, chat_id)
+
+            # Clear the flag so we don't bootstrap again on next message
+            from app.db import NewChatThread
+
+            thread_result = await session.execute(
+                select(NewChatThread).filter(NewChatThread.id == chat_id)
+            )
+            thread = thread_result.scalars().first()
+            if thread:
+                thread.needs_history_bootstrap = False
+                await session.commit()
+
        # Fetch mentioned documents if any (with chunks for proper citations)
        mentioned_documents: list[Document] = []
        if mentioned_document_ids:
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@ -37,18 +37,30 @@ from .base import (
 from .markdown_processor import add_received_markdown_file_document

 # Constants for LlamaCloud retry configuration
-LLAMACLOUD_MAX_RETRIES = 3
-LLAMACLOUD_BASE_DELAY = 5  # Base delay in seconds for exponential backoff
+LLAMACLOUD_MAX_RETRIES = 5  # Increased from 3 for large file resilience
+LLAMACLOUD_BASE_DELAY = 10  # Base delay in seconds for exponential backoff
+LLAMACLOUD_MAX_DELAY = 120  # Maximum delay between retries (2 minutes)
 LLAMACLOUD_RETRYABLE_EXCEPTIONS = (
    ssl.SSLError,
    httpx.ConnectError,
    httpx.ConnectTimeout,
    httpx.ReadTimeout,
    httpx.WriteTimeout,
+    httpx.RemoteProtocolError,
+    httpx.LocalProtocolError,
    ConnectionError,
+    ConnectionResetError,
    TimeoutError,
+    OSError,  # Catches various network-level errors
 )

+# Timeout calculation constants
+UPLOAD_BYTES_PER_SECOND_SLOW = 100 * 1024  # 100 KB/s (conservative for slow connections)
+MIN_UPLOAD_TIMEOUT = 120  # Minimum 2 minutes for any file
+MAX_UPLOAD_TIMEOUT = 1800  # Maximum 30 minutes for very large files
+BASE_JOB_TIMEOUT = 600  # 10 minutes base for job processing
+PER_PAGE_JOB_TIMEOUT = 60  # 1 minute per page for processing
+

 def get_google_drive_unique_identifier(
    connector: dict | None,
@ -204,6 +216,48 @@ async def find_existing_document_with_migration(
    return existing_document


+def calculate_upload_timeout(file_size_bytes: int) -> float:
+    """
+    Calculate appropriate upload timeout based on file size.
+    
+    Assumes a conservative slow connection speed to handle worst-case scenarios.
+    
+    Args:
+        file_size_bytes: Size of the file in bytes
+        
+    Returns:
+        Timeout in seconds
+    """
+    # Calculate time needed at slow connection speed
+    # Add 50% buffer for network variability and SSL overhead
+    estimated_time = (file_size_bytes / UPLOAD_BYTES_PER_SECOND_SLOW) * 1.5
+    
+    # Clamp to reasonable bounds
+    return max(MIN_UPLOAD_TIMEOUT, min(estimated_time, MAX_UPLOAD_TIMEOUT))
+
+
+def calculate_job_timeout(estimated_pages: int, file_size_bytes: int) -> float:
+    """
+    Calculate job processing timeout based on page count and file size.
+    
+    Args:
+        estimated_pages: Estimated number of pages
+        file_size_bytes: Size of the file in bytes
+        
+    Returns:
+        Timeout in seconds
+    """
+    # Base timeout + time per page
+    page_based_timeout = BASE_JOB_TIMEOUT + (estimated_pages * PER_PAGE_JOB_TIMEOUT)
+    
+    # Also consider file size (large images take longer to process)
+    # ~1 minute per 10MB of file size
+    size_based_timeout = BASE_JOB_TIMEOUT + (file_size_bytes / (10 * 1024 * 1024)) * 60
+    
+    # Use the larger of the two estimates
+    return max(page_based_timeout, size_based_timeout)
+
+
 async def parse_with_llamacloud_retry(
    file_path: str,
    estimated_pages: int,
@ -213,6 +267,9 @@ async def parse_with_llamacloud_retry(
    """
    Parse a file with LlamaCloud with retry logic for transient SSL/connection errors.

+    Uses dynamic timeout calculations based on file size and page count to handle
+    very large files reliably.
+
    Args:
        file_path: Path to the file to parse
        estimated_pages: Estimated number of pages for timeout calculation
@ -225,25 +282,37 @@ async def parse_with_llamacloud_retry(
    Raises:
        Exception: If all retries fail
    """
+    import os
+    import random
+    
    from llama_cloud_services import LlamaParse
    from llama_cloud_services.parse.utils import ResultType

-    # Calculate timeouts based on estimated pages
-    # Base timeout of 300 seconds + 30 seconds per page for large documents
-    base_timeout = 300
-    per_page_timeout = 30
-    job_timeout = base_timeout + (estimated_pages * per_page_timeout)
-
-    # Create custom httpx client with larger timeouts for file uploads
-    # The SSL error often occurs during large file uploads, so we need generous timeouts
+    # Get file size for timeout calculations
+    file_size_bytes = os.path.getsize(file_path)
+    file_size_mb = file_size_bytes / (1024 * 1024)
+    
+    # Calculate dynamic timeouts based on file size and page count
+    upload_timeout = calculate_upload_timeout(file_size_bytes)
+    job_timeout = calculate_job_timeout(estimated_pages, file_size_bytes)
+    
+    # HTTP client timeouts - scaled based on file size
+    # Write timeout is critical for large file uploads
    custom_timeout = httpx.Timeout(
-        connect=60.0,  # 60 seconds to establish connection
-        read=300.0,  # 5 minutes to read response
-        write=300.0,  # 5 minutes to write/upload (important for large files)
-        pool=60.0,  # 60 seconds to acquire connection from pool
+        connect=120.0,  # 2 minutes to establish connection (handles slow DNS, etc.)
+        read=upload_timeout,  # Dynamic based on file size
+        write=upload_timeout,  # Dynamic based on file size (upload time)
+        pool=120.0,  # 2 minutes to acquire connection from pool
+    )
+    
+    logging.info(
+        f"LlamaCloud upload configured: file_size={file_size_mb:.1f}MB, "
+        f"pages={estimated_pages}, upload_timeout={upload_timeout:.0f}s, "
+        f"job_timeout={job_timeout:.0f}s"
    )

    last_exception = None
+    attempt_errors = []

    for attempt in range(1, LLAMACLOUD_MAX_RETRIES + 1):
        try:
@ -257,46 +326,67 @@ async def parse_with_llamacloud_retry(
                    language="en",
                    result_type=ResultType.MD,
                    # Timeout settings for large files
-                    max_timeout=max(2000, job_timeout),  # Overall max timeout
+                    max_timeout=int(max(2000, job_timeout + upload_timeout)),
                    job_timeout_in_seconds=job_timeout,
-                    job_timeout_extra_time_per_page_in_seconds=per_page_timeout,
+                    job_timeout_extra_time_per_page_in_seconds=PER_PAGE_JOB_TIMEOUT,
                    # Use our custom client with larger timeouts
                    custom_client=custom_client,
                )

                # Parse the file asynchronously
                result = await parser.aparse(file_path)
+                
+                # Success - log if we had previous failures
+                if attempt > 1:
+                    logging.info(
+                        f"LlamaCloud upload succeeded on attempt {attempt} after "
+                        f"{len(attempt_errors)} failures"
+                    )
+                
                return result

        except LLAMACLOUD_RETRYABLE_EXCEPTIONS as e:
            last_exception = e
            error_type = type(e).__name__
+            error_msg = str(e)[:200]
+            attempt_errors.append(f"Attempt {attempt}: {error_type} - {error_msg}")

            if attempt < LLAMACLOUD_MAX_RETRIES:
-                # Calculate exponential backoff delay
-                delay = LLAMACLOUD_BASE_DELAY * (2 ** (attempt - 1))
+                # Calculate exponential backoff with jitter
+                # Base delay doubles each attempt, capped at max delay
+                base_delay = min(
+                    LLAMACLOUD_BASE_DELAY * (2 ** (attempt - 1)),
+                    LLAMACLOUD_MAX_DELAY
+                )
+                # Add random jitter (±25%) to prevent thundering herd
+                jitter = base_delay * 0.25 * (2 * random.random() - 1)
+                delay = base_delay + jitter

                if task_logger and log_entry:
                    await task_logger.log_task_progress(
                        log_entry,
-                        f"LlamaCloud upload failed (attempt {attempt}/{LLAMACLOUD_MAX_RETRIES}), retrying in {delay}s",
+                        f"LlamaCloud upload failed (attempt {attempt}/{LLAMACLOUD_MAX_RETRIES}), retrying in {delay:.0f}s",
                        {
                            "error_type": error_type,
-                            "error_message": str(e)[:200],
+                            "error_message": error_msg,
                            "attempt": attempt,
                            "retry_delay": delay,
+                            "file_size_mb": round(file_size_mb, 1),
+                            "upload_timeout": upload_timeout,
                        },
                    )
                else:
                    logging.warning(
-                        f"LlamaCloud upload failed (attempt {attempt}/{LLAMACLOUD_MAX_RETRIES}): {error_type}. "
-                        f"Retrying in {delay}s..."
+                        f"LlamaCloud upload failed (attempt {attempt}/{LLAMACLOUD_MAX_RETRIES}): "
+                        f"{error_type}. File: {file_size_mb:.1f}MB. Retrying in {delay:.0f}s..."
                    )

                await asyncio.sleep(delay)
            else:
                logging.error(
-                    f"LlamaCloud upload failed after {LLAMACLOUD_MAX_RETRIES} attempts: {error_type} - {e}"
+                    f"LlamaCloud upload failed after {LLAMACLOUD_MAX_RETRIES} attempts. "
+                    f"File size: {file_size_mb:.1f}MB, Pages: {estimated_pages}. "
+                    f"Errors: {'; '.join(attempt_errors)}"
                )

        except Exception:
@ -304,7 +394,10 @@ async def parse_with_llamacloud_retry(
            raise

    # All retries exhausted
-    raise last_exception or RuntimeError("LlamaCloud parsing failed after all retries")
+    raise last_exception or RuntimeError(
+        f"LlamaCloud parsing failed after {LLAMACLOUD_MAX_RETRIES} retries. "
+        f"File size: {file_size_mb:.1f}MB"
+    )


 async def add_received_file_document_using_unstructured(
--- a/surfsense_backend/app/users.py
+++ b/surfsense_backend/app/users.py
@ -229,3 +229,4 @@ auth_backend = AuthenticationBackend(
 fastapi_users = FastAPIUsers[User, uuid.UUID](get_user_manager, [auth_backend])

 current_active_user = fastapi_users.current_user(active=True)
+current_optional_user = fastapi_users.current_user(active=True, optional=True)
--- a/surfsense_backend/app/utils/content_utils.py
+++ b/surfsense_backend/app/utils/content_utils.py
@ -0,0 +1,75 @@
+"""
+Utilities for working with message content.
+
+Message content in new_chat_messages can be stored in various formats:
+- String: Simple text content
+- List: Array of content parts [{"type": "text", "text": "..."}, {"type": "tool-call", ...}]
+- Dict: Single content object
+
+These utilities help extract and transform content for different use cases.
+"""
+
+from langchain_core.messages import AIMessage, HumanMessage
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+
+def extract_text_content(content: str | dict | list) -> str:
+    """Extract plain text content from various message formats."""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, dict):
+        # Handle dict with 'text' key
+        if "text" in content:
+            return content["text"]
+        return str(content)
+    if isinstance(content, list):
+        # Handle list of parts (e.g., [{"type": "text", "text": "..."}])
+        texts = []
+        for part in content:
+            if isinstance(part, dict) and part.get("type") == "text":
+                texts.append(part.get("text", ""))
+            elif isinstance(part, str):
+                texts.append(part)
+        return "\n".join(texts) if texts else ""
+    return ""
+
+
+async def bootstrap_history_from_db(
+    session: AsyncSession,
+    thread_id: int,
+) -> list[HumanMessage | AIMessage]:
+    """
+    Load message history from database and convert to LangChain format.
+
+    Used for cloned chats where the LangGraph checkpointer has no state,
+    but we have messages in the database that should be used as context.
+
+    Args:
+        session: Database session
+        thread_id: The chat thread ID
+
+    Returns:
+        List of LangChain messages (HumanMessage/AIMessage)
+    """
+    from app.db import NewChatMessage
+
+    result = await session.execute(
+        select(NewChatMessage)
+        .filter(NewChatMessage.thread_id == thread_id)
+        .order_by(NewChatMessage.created_at)
+    )
+    db_messages = result.scalars().all()
+
+    langchain_messages: list[HumanMessage | AIMessage] = []
+
+    for msg in db_messages:
+        text_content = extract_text_content(msg.content)
+        if not text_content:
+            continue
+        if msg.role == "user":
+            langchain_messages.append(HumanMessage(content=text_content))
+        elif msg.role == "assistant":
+            langchain_messages.append(AIMessage(content=text_content))
+
+    return langchain_messages