Merge pull request #1463 from CREDO23/feat/blob-storage

[Feat] Add Document Blob Storage, Segregate Connectors, and Jira Cleanup
2026-07-22 23:31:12 +02:00 · 2026-06-02 12:15:48 -07:00 · 2026-06-02 12:15:48 -07:00 · 309bd9a2dd
commit 309bd9a2dd
parent a80a9cb87c 10a2f1ec2f
42 changed files with 5354 additions and 7404 deletions
--- a/docker/docker-compose.deps-only.yml
+++ b/docker/docker-compose.deps-only.yml
@ -128,6 +128,23 @@ services:
      timeout: 5s
      retries: 5

+  # OPTIONAL — Azurite emulates Azure Blob Storage for testing the Azure
+  # original-file backend. The default filesystem backend needs none of this.
+  # To exercise it, set in surfsense_backend/.env:
+  #   FILE_STORAGE_BACKEND=azure
+  #   AZURE_STORAGE_CONTAINER=surfsense-documents
+  #   AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:${AZURITE_BLOB_PORT:-10000}/devstoreaccount1;
+  # The backend creates blobs on upload; create the container once first
+  # (Azure CLI / Storage Explorer), then upload a document.
+  azurite:
+    image: mcr.microsoft.com/azure-storage/azurite:3.33.0
+    command: azurite-blob --blobHost 0.0.0.0 --blobPort 10000
+    ports:
+      - "${AZURITE_BLOB_PORT:-10000}:10000"
+    volumes:
+      - azurite_data:/data
+    restart: unless-stopped
+
 volumes:
  postgres_data:
    name: surfsense-deps-postgres
@ -137,3 +154,5 @@ volumes:
    name: surfsense-deps-redis
  zero_cache_data:
    name: surfsense-deps-zero-cache
+  azurite_data:
+    name: surfsense-deps-azurite
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@ -293,6 +293,16 @@ LLAMA_CLOUD_API_KEY=llx-nnn
 # AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
 # AZURE_DI_KEY=your-key

+# Original File Storage
+# Where to persist the original bytes of uploaded documents (for download today,
+# redaction / form-filling later). "local" needs no cloud creds and is the dev default.
+FILE_STORAGE_BACKEND=local
+# Local backend: directory for stored files (defaults to surfsense_backend/.local_object_store)
+# FILE_STORAGE_LOCAL_PATH=/var/lib/surfsense/object-store
+# Azure Blob backend (set FILE_STORAGE_BACKEND=azure):
+# AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net
+# AZURE_STORAGE_CONTAINER=surfsense-documents
+
 # Daytona Sandbox (isolated code execution)
 # DAYTONA_SANDBOX_ENABLED=FALSE
 # DAYTONA_API_KEY=your-daytona-api-key
--- a/surfsense_backend/.gitignore
+++ b/surfsense_backend/.gitignore
@ -2,6 +2,7 @@
 .venv
 venv/
 data/
+.local_object_store/
 __pycache__/
 .flashrank_cache
 surf_new_backend.egg-info/
--- a/surfsense_backend/alembic/versions/152_add_document_files.py
+++ b/surfsense_backend/alembic/versions/152_add_document_files.py
@ -0,0 +1,86 @@
+"""add document_files table for stored original uploads
+
+Revision ID: 152
+Revises: 151
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+revision: str = "152"
+down_revision: str | None = "151"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    # The enum type must precede the table that references it.
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF NOT EXISTS (
+                SELECT 1 FROM pg_type WHERE typname = 'document_file_kind'
+            ) THEN
+                CREATE TYPE document_file_kind AS ENUM (
+                    'ORIGINAL', 'REDACTED', 'FILLED_FORM'
+                );
+            END IF;
+        END
+        $$;
+        """
+    )
+
+    op.execute(
+        """
+        CREATE TABLE IF NOT EXISTS document_files (
+            id SERIAL PRIMARY KEY,
+            document_id INTEGER NOT NULL
+                REFERENCES documents(id) ON DELETE CASCADE,
+            search_space_id INTEGER NOT NULL
+                REFERENCES searchspaces(id) ON DELETE CASCADE,
+            kind document_file_kind NOT NULL DEFAULT 'ORIGINAL',
+            storage_backend VARCHAR(32) NOT NULL,
+            storage_key TEXT NOT NULL,
+            original_filename TEXT NOT NULL,
+            mime_type TEXT,
+            size_bytes BIGINT NOT NULL,
+            checksum_sha256 VARCHAR(64),
+            created_by_id UUID
+                REFERENCES "user"(id) ON DELETE SET NULL,
+            created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
+        );
+        """
+    )
+
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_document_files_document_id "
+        "ON document_files(document_id);"
+    )
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_document_files_search_space_id "
+        "ON document_files(search_space_id);"
+    )
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_document_files_kind "
+        "ON document_files(kind);"
+    )
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_document_files_created_by_id "
+        "ON document_files(created_by_id);"
+    )
+    op.execute(
+        "CREATE INDEX IF NOT EXISTS ix_document_files_created_at "
+        "ON document_files(created_at);"
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS ix_document_files_created_at;")
+    op.execute("DROP INDEX IF EXISTS ix_document_files_created_by_id;")
+    op.execute("DROP INDEX IF EXISTS ix_document_files_kind;")
+    op.execute("DROP INDEX IF EXISTS ix_document_files_search_space_id;")
+    op.execute("DROP INDEX IF EXISTS ix_document_files_document_id;")
+    op.execute("DROP TABLE IF EXISTS document_files;")
+    op.execute("DROP TYPE IF EXISTS document_file_kind;")
--- a/surfsense_backend/alembic/versions/153_restore_automation_runs_to_zero_publication.py
+++ b/surfsense_backend/alembic/versions/153_restore_automation_runs_to_zero_publication.py
@ -0,0 +1,109 @@
+"""restore automation_runs to zero_publication
+
+Migration 149's ``SET TABLE`` dropped ``automation_runs`` (added in 148),
+breaking the dashboard live run ticker with a SchemaVersionNotSupported
+reload loop. Re-emit the publication with ``automation_runs`` using the
+``COMMENT`` bookend pattern so zero-cache fires its schema-change hook.
+
+Revision ID: 153
+Revises: 152
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "153"
+down_revision: str | None = "152"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+PUBLICATION_NAME = "zero_publication"
+
+DOCUMENT_COLS = [
+    "id",
+    "title",
+    "document_type",
+    "search_space_id",
+    "folder_id",
+    "created_by_id",
+    "status",
+    "created_at",
+    "updated_at",
+]
+
+USER_COLS = [
+    "id",
+    "pages_limit",
+    "pages_used",
+    "premium_credit_micros_limit",
+    "premium_credit_micros_used",
+]
+
+AUTOMATION_RUN_COLS = [
+    "id",
+    "automation_id",
+    "trigger_id",
+    "status",
+    "step_results",
+    "started_at",
+    "finished_at",
+    "created_at",
+]
+
+
+def _has_zero_version(conn, table: str) -> bool:
+    return (
+        conn.execute(
+            sa.text(
+                "SELECT 1 FROM information_schema.columns "
+                "WHERE table_name = :tbl AND column_name = '_0_version'"
+            ),
+            {"tbl": table},
+        ).fetchone()
+        is not None
+    )
+
+
+def _set_table_ddl(*, with_automation_runs: bool, conn) -> str:
+    doc_cols = DOCUMENT_COLS + (['"_0_version"'] if _has_zero_version(conn, "documents") else [])
+    user_cols = USER_COLS + (['"_0_version"'] if _has_zero_version(conn, "user") else [])
+    tables = [
+        "notifications",
+        f"documents ({', '.join(doc_cols)})",
+        "folders",
+        "search_source_connectors",
+        "new_chat_messages",
+        "chat_comments",
+        "chat_session_state",
+        f'"user" ({", ".join(user_cols)})',
+    ]
+    if with_automation_runs:
+        tables.append(f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})")
+    return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
+
+
+def _resync(*, with_automation_runs: bool, tag: str) -> None:
+    conn = op.get_bind()
+    exists = conn.execute(
+        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
+        {"name": PUBLICATION_NAME},
+    ).fetchone()
+    if not exists:
+        return
+
+    tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
+    with tx:
+        conn.execute(sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'"))
+        conn.execute(sa.text(_set_table_ddl(with_automation_runs=with_automation_runs, conn=conn)))
+        conn.execute(sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'"))
+
+
+def upgrade() -> None:
+    _resync(with_automation_runs=True, tag="153-resync")
+
+
+def downgrade() -> None:
+    _resync(with_automation_runs=False, tag="153-downgrade")
--- a/surfsense_backend/app/agents/new_chat/tools/jira/init.py
+++ b/surfsense_backend/app/agents/new_chat/tools/jira/init.py
@ -1,11 +0,0 @@
-"""Jira tools for creating, updating, and deleting issues."""
-
-from .create_issue import create_create_jira_issue_tool
-from .delete_issue import create_delete_jira_issue_tool
-from .update_issue import create_update_jira_issue_tool
-
-__all__ = [
-    "create_create_jira_issue_tool",
-    "create_delete_jira_issue_tool",
-    "create_update_jira_issue_tool",
-]
--- a/surfsense_backend/app/agents/new_chat/tools/jira/create_issue.py
+++ b/surfsense_backend/app/agents/new_chat/tools/jira/create_issue.py
@ -1,248 +0,0 @@
-import asyncio
-import logging
-from typing import Any
-
-from langchain_core.tools import tool
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm.attributes import flag_modified
-
-from app.agents.new_chat.tools.hitl import request_approval
-from app.connectors.jira_history import JiraHistoryConnector
-from app.db import async_session_maker
-from app.services.jira import JiraToolMetadataService
-
-logger = logging.getLogger(__name__)
-
-
-def create_create_jira_issue_tool(
-    db_session: AsyncSession | None = None,
-    search_space_id: int | None = None,
-    user_id: str | None = None,
-    connector_id: int | None = None,
-):
-    """Factory function to create the create_jira_issue tool.
-
-    The tool acquires its own short-lived ``AsyncSession`` per call via
-    :data:`async_session_maker`. This is critical for the compiled-agent
-    cache: the compiled graph (and therefore this closure) is reused
-    across HTTP requests, so capturing a per-request session here would
-    surface stale/closed sessions on cache hits. Per-call sessions also
-    keep the request's outer transaction free of long-running Jira API
-    blocking.
-
-    Args:
-        db_session: Reserved for registry compatibility. Per-call sessions
-            are opened via :data:`async_session_maker` inside the tool body.
-        search_space_id: Search space ID to find the Jira connector
-        user_id: User ID for fetching user-specific context
-        connector_id: Optional specific connector ID (if known)
-
-    Returns:
-        Configured create_jira_issue tool
-    """
-    del db_session  # per-call session — see docstring
-
-    @tool
-    async def create_jira_issue(
-        project_key: str,
-        summary: str,
-        issue_type: str = "Task",
-        description: str | None = None,
-        priority: str | None = None,
-    ) -> dict[str, Any]:
-        """Create a new issue in Jira.
-
-        Use this tool when the user explicitly asks to create a new Jira issue/ticket.
-
-        Args:
-            project_key: The Jira project key (e.g. "PROJ", "ENG").
-            summary: Short, descriptive issue title.
-            issue_type: Issue type (default "Task"). Others: "Bug", "Story", "Epic".
-            description: Optional description body for the issue.
-            priority: Optional priority name (e.g. "High", "Medium", "Low").
-
-        Returns:
-            Dictionary with status, issue_key, and message.
-
-            IMPORTANT:
-            - If status is "rejected", the user declined. Do NOT retry.
-            - If status is "insufficient_permissions", inform user to re-authenticate.
-        """
-        logger.info(
-            f"create_jira_issue called: project_key='{project_key}', summary='{summary}'"
-        )
-
-        if search_space_id is None or user_id is None:
-            return {"status": "error", "message": "Jira tool not properly configured."}
-
-        try:
-            async with async_session_maker() as db_session:
-                metadata_service = JiraToolMetadataService(db_session)
-                context = await metadata_service.get_creation_context(
-                    search_space_id, user_id
-                )
-
-                if "error" in context:
-                    return {"status": "error", "message": context["error"]}
-
-                accounts = context.get("accounts", [])
-                if accounts and all(a.get("auth_expired") for a in accounts):
-                    return {
-                        "status": "auth_error",
-                        "message": "All connected Jira accounts need re-authentication.",
-                        "connector_type": "jira",
-                    }
-
-                result = request_approval(
-                    action_type="jira_issue_creation",
-                    tool_name="create_jira_issue",
-                    params={
-                        "project_key": project_key,
-                        "summary": summary,
-                        "issue_type": issue_type,
-                        "description": description,
-                        "priority": priority,
-                        "connector_id": connector_id,
-                    },
-                    context=context,
-                )
-
-                if result.rejected:
-                    return {
-                        "status": "rejected",
-                        "message": "User declined. Do not retry or suggest alternatives.",
-                    }
-
-                final_project_key = result.params.get("project_key", project_key)
-                final_summary = result.params.get("summary", summary)
-                final_issue_type = result.params.get("issue_type", issue_type)
-                final_description = result.params.get("description", description)
-                final_priority = result.params.get("priority", priority)
-                final_connector_id = result.params.get("connector_id", connector_id)
-
-                if not final_summary or not final_summary.strip():
-                    return {
-                        "status": "error",
-                        "message": "Issue summary cannot be empty.",
-                    }
-                if not final_project_key:
-                    return {"status": "error", "message": "A project must be selected."}
-
-                from sqlalchemy.future import select
-
-                from app.db import SearchSourceConnector, SearchSourceConnectorType
-
-                actual_connector_id = final_connector_id
-                if actual_connector_id is None:
-                    result = await db_session.execute(
-                        select(SearchSourceConnector).filter(
-                            SearchSourceConnector.search_space_id == search_space_id,
-                            SearchSourceConnector.user_id == user_id,
-                            SearchSourceConnector.connector_type
-                            == SearchSourceConnectorType.JIRA_CONNECTOR,
-                        )
-                    )
-                    connector = result.scalars().first()
-                    if not connector:
-                        return {
-                            "status": "error",
-                            "message": "No Jira connector found.",
-                        }
-                    actual_connector_id = connector.id
-                else:
-                    result = await db_session.execute(
-                        select(SearchSourceConnector).filter(
-                            SearchSourceConnector.id == actual_connector_id,
-                            SearchSourceConnector.search_space_id == search_space_id,
-                            SearchSourceConnector.user_id == user_id,
-                            SearchSourceConnector.connector_type
-                            == SearchSourceConnectorType.JIRA_CONNECTOR,
-                        )
-                    )
-                    connector = result.scalars().first()
-                    if not connector:
-                        return {
-                            "status": "error",
-                            "message": "Selected Jira connector is invalid.",
-                        }
-
-                try:
-                    jira_history = JiraHistoryConnector(
-                        session=db_session, connector_id=actual_connector_id
-                    )
-                    jira_client = await jira_history._get_jira_client()
-                    api_result = await asyncio.to_thread(
-                        jira_client.create_issue,
-                        project_key=final_project_key,
-                        summary=final_summary,
-                        issue_type=final_issue_type,
-                        description=final_description,
-                        priority=final_priority,
-                    )
-                except Exception as api_err:
-                    if "status code 403" in str(api_err).lower():
-                        try:
-                            _conn = connector
-                            _conn.config = {**_conn.config, "auth_expired": True}
-                            flag_modified(_conn, "config")
-                            await db_session.commit()
-                        except Exception:
-                            pass
-                        return {
-                            "status": "insufficient_permissions",
-                            "connector_id": actual_connector_id,
-                            "message": "This Jira account needs additional permissions. Please re-authenticate in connector settings.",
-                        }
-                    raise
-
-                issue_key = api_result.get("key", "")
-                issue_url = (
-                    f"{jira_history._base_url}/browse/{issue_key}"
-                    if jira_history._base_url and issue_key
-                    else ""
-                )
-
-                kb_message_suffix = ""
-                try:
-                    from app.services.jira import JiraKBSyncService
-
-                    kb_service = JiraKBSyncService(db_session)
-                    kb_result = await kb_service.sync_after_create(
-                        issue_id=issue_key,
-                        issue_identifier=issue_key,
-                        issue_title=final_summary,
-                        description=final_description,
-                        state="To Do",
-                        connector_id=actual_connector_id,
-                        search_space_id=search_space_id,
-                        user_id=user_id,
-                    )
-                    if kb_result["status"] == "success":
-                        kb_message_suffix = (
-                            " Your knowledge base has also been updated."
-                        )
-                    else:
-                        kb_message_suffix = " This issue will be added to your knowledge base in the next scheduled sync."
-                except Exception as kb_err:
-                    logger.warning(f"KB sync after create failed: {kb_err}")
-                    kb_message_suffix = " This issue will be added to your knowledge base in the next scheduled sync."
-
-                return {
-                    "status": "success",
-                    "issue_key": issue_key,
-                    "issue_url": issue_url,
-                    "message": f"Jira issue {issue_key} created successfully.{kb_message_suffix}",
-                }
-
-        except Exception as e:
-            from langgraph.errors import GraphInterrupt
-
-            if isinstance(e, GraphInterrupt):
-                raise
-            logger.error(f"Error creating Jira issue: {e}", exc_info=True)
-            return {
-                "status": "error",
-                "message": "Something went wrong while creating the issue.",
-            }
-
-    return create_jira_issue
--- a/surfsense_backend/app/agents/new_chat/tools/jira/delete_issue.py
+++ b/surfsense_backend/app/agents/new_chat/tools/jira/delete_issue.py
@ -1,210 +0,0 @@
-import asyncio
-import logging
-from typing import Any
-
-from langchain_core.tools import tool
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm.attributes import flag_modified
-
-from app.agents.new_chat.tools.hitl import request_approval
-from app.connectors.jira_history import JiraHistoryConnector
-from app.db import async_session_maker
-from app.services.jira import JiraToolMetadataService
-
-logger = logging.getLogger(__name__)
-
-
-def create_delete_jira_issue_tool(
-    db_session: AsyncSession | None = None,
-    search_space_id: int | None = None,
-    user_id: str | None = None,
-    connector_id: int | None = None,
-):
-    """Factory function to create the delete_jira_issue tool.
-
-    The tool acquires its own short-lived ``AsyncSession`` per call via
-    :data:`async_session_maker`. This is critical for the compiled-agent
-    cache: the compiled graph (and therefore this closure) is reused
-    across HTTP requests, so capturing a per-request session here would
-    surface stale/closed sessions on cache hits.
-
-    Args:
-        db_session: Reserved for registry compatibility. Per-call sessions
-            are opened via :data:`async_session_maker` inside the tool body.
-        search_space_id: Search space ID to find the Jira connector
-        user_id: User ID for fetching user-specific context
-        connector_id: Optional specific connector ID (if known)
-
-    Returns:
-        Configured delete_jira_issue tool
-    """
-    del db_session  # per-call session — see docstring
-
-    @tool
-    async def delete_jira_issue(
-        issue_title_or_key: str,
-        delete_from_kb: bool = False,
-    ) -> dict[str, Any]:
-        """Delete a Jira issue.
-
-        Use this tool when the user asks to delete or remove a Jira issue.
-
-        Args:
-            issue_title_or_key: The issue key (e.g. "PROJ-42") or title.
-            delete_from_kb: Whether to also remove from the knowledge base.
-
-        Returns:
-            Dictionary with status, message, and deleted_from_kb.
-
-            IMPORTANT:
-            - If status is "rejected", do NOT retry.
-            - If status is "not_found", relay the message to the user.
-            - If status is "insufficient_permissions", inform user to re-authenticate.
-        """
-        logger.info(
-            f"delete_jira_issue called: issue_title_or_key='{issue_title_or_key}'"
-        )
-
-        if search_space_id is None or user_id is None:
-            return {"status": "error", "message": "Jira tool not properly configured."}
-
-        try:
-            async with async_session_maker() as db_session:
-                metadata_service = JiraToolMetadataService(db_session)
-                context = await metadata_service.get_deletion_context(
-                    search_space_id, user_id, issue_title_or_key
-                )
-
-                if "error" in context:
-                    error_msg = context["error"]
-                    if context.get("auth_expired"):
-                        return {
-                            "status": "auth_error",
-                            "message": error_msg,
-                            "connector_id": context.get("connector_id"),
-                            "connector_type": "jira",
-                        }
-                    if "not found" in error_msg.lower():
-                        return {"status": "not_found", "message": error_msg}
-                    return {"status": "error", "message": error_msg}
-
-                issue_data = context["issue"]
-                issue_key = issue_data["issue_id"]
-                document_id = issue_data["document_id"]
-                connector_id_from_context = context.get("account", {}).get("id")
-
-                result = request_approval(
-                    action_type="jira_issue_deletion",
-                    tool_name="delete_jira_issue",
-                    params={
-                        "issue_key": issue_key,
-                        "connector_id": connector_id_from_context,
-                        "delete_from_kb": delete_from_kb,
-                    },
-                    context=context,
-                )
-
-                if result.rejected:
-                    return {
-                        "status": "rejected",
-                        "message": "User declined. Do not retry or suggest alternatives.",
-                    }
-
-                final_issue_key = result.params.get("issue_key", issue_key)
-                final_connector_id = result.params.get(
-                    "connector_id", connector_id_from_context
-                )
-                final_delete_from_kb = result.params.get(
-                    "delete_from_kb", delete_from_kb
-                )
-
-                from sqlalchemy.future import select
-
-                from app.db import SearchSourceConnector, SearchSourceConnectorType
-
-                if not final_connector_id:
-                    return {
-                        "status": "error",
-                        "message": "No connector found for this issue.",
-                    }
-
-                result = await db_session.execute(
-                    select(SearchSourceConnector).filter(
-                        SearchSourceConnector.id == final_connector_id,
-                        SearchSourceConnector.search_space_id == search_space_id,
-                        SearchSourceConnector.user_id == user_id,
-                        SearchSourceConnector.connector_type
-                        == SearchSourceConnectorType.JIRA_CONNECTOR,
-                    )
-                )
-                connector = result.scalars().first()
-                if not connector:
-                    return {
-                        "status": "error",
-                        "message": "Selected Jira connector is invalid.",
-                    }
-
-                try:
-                    jira_history = JiraHistoryConnector(
-                        session=db_session, connector_id=final_connector_id
-                    )
-                    jira_client = await jira_history._get_jira_client()
-                    await asyncio.to_thread(jira_client.delete_issue, final_issue_key)
-                except Exception as api_err:
-                    if "status code 403" in str(api_err).lower():
-                        try:
-                            connector.config = {
-                                **connector.config,
-                                "auth_expired": True,
-                            }
-                            flag_modified(connector, "config")
-                            await db_session.commit()
-                        except Exception:
-                            pass
-                        return {
-                            "status": "insufficient_permissions",
-                            "connector_id": final_connector_id,
-                            "message": "This Jira account needs additional permissions. Please re-authenticate in connector settings.",
-                        }
-                    raise
-
-                deleted_from_kb = False
-                if final_delete_from_kb and document_id:
-                    try:
-                        from app.db import Document
-
-                        doc_result = await db_session.execute(
-                            select(Document).filter(Document.id == document_id)
-                        )
-                        document = doc_result.scalars().first()
-                        if document:
-                            await db_session.delete(document)
-                            await db_session.commit()
-                            deleted_from_kb = True
-                    except Exception as e:
-                        logger.error(f"Failed to delete document from KB: {e}")
-                        await db_session.rollback()
-
-                message = f"Jira issue {final_issue_key} deleted successfully."
-                if deleted_from_kb:
-                    message += " Also removed from the knowledge base."
-
-                return {
-                    "status": "success",
-                    "issue_key": final_issue_key,
-                    "deleted_from_kb": deleted_from_kb,
-                    "message": message,
-                }
-
-        except Exception as e:
-            from langgraph.errors import GraphInterrupt
-
-            if isinstance(e, GraphInterrupt):
-                raise
-            logger.error(f"Error deleting Jira issue: {e}", exc_info=True)
-            return {
-                "status": "error",
-                "message": "Something went wrong while deleting the issue.",
-            }
-
-    return delete_jira_issue
--- a/surfsense_backend/app/agents/new_chat/tools/jira/update_issue.py
+++ b/surfsense_backend/app/agents/new_chat/tools/jira/update_issue.py
@ -1,255 +0,0 @@
-import asyncio
-import logging
-from typing import Any
-
-from langchain_core.tools import tool
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm.attributes import flag_modified
-
-from app.agents.new_chat.tools.hitl import request_approval
-from app.connectors.jira_history import JiraHistoryConnector
-from app.db import async_session_maker
-from app.services.jira import JiraToolMetadataService
-
-logger = logging.getLogger(__name__)
-
-
-def create_update_jira_issue_tool(
-    db_session: AsyncSession | None = None,
-    search_space_id: int | None = None,
-    user_id: str | None = None,
-    connector_id: int | None = None,
-):
-    """Factory function to create the update_jira_issue tool.
-
-    The tool acquires its own short-lived ``AsyncSession`` per call via
-    :data:`async_session_maker`. This is critical for the compiled-agent
-    cache: the compiled graph (and therefore this closure) is reused
-    across HTTP requests, so capturing a per-request session here would
-    surface stale/closed sessions on cache hits.
-
-    Args:
-        db_session: Reserved for registry compatibility. Per-call sessions
-            are opened via :data:`async_session_maker` inside the tool body.
-        search_space_id: Search space ID to find the Jira connector
-        user_id: User ID for fetching user-specific context
-        connector_id: Optional specific connector ID (if known)
-
-    Returns:
-        Configured update_jira_issue tool
-    """
-    del db_session  # per-call session — see docstring
-
-    @tool
-    async def update_jira_issue(
-        issue_title_or_key: str,
-        new_summary: str | None = None,
-        new_description: str | None = None,
-        new_priority: str | None = None,
-    ) -> dict[str, Any]:
-        """Update an existing Jira issue.
-
-        Use this tool when the user asks to modify, edit, or update a Jira issue.
-
-        Args:
-            issue_title_or_key: The issue key (e.g. "PROJ-42") or title to identify the issue.
-            new_summary: Optional new title/summary for the issue.
-            new_description: Optional new description.
-            new_priority: Optional new priority name.
-
-        Returns:
-            Dictionary with status and message.
-
-            IMPORTANT:
-            - If status is "rejected", do NOT retry.
-            - If status is "not_found", relay the message and ask user to verify.
-            - If status is "insufficient_permissions", inform user to re-authenticate.
-        """
-        logger.info(
-            f"update_jira_issue called: issue_title_or_key='{issue_title_or_key}'"
-        )
-
-        if search_space_id is None or user_id is None:
-            return {"status": "error", "message": "Jira tool not properly configured."}
-
-        try:
-            async with async_session_maker() as db_session:
-                metadata_service = JiraToolMetadataService(db_session)
-                context = await metadata_service.get_update_context(
-                    search_space_id, user_id, issue_title_or_key
-                )
-
-                if "error" in context:
-                    error_msg = context["error"]
-                    if context.get("auth_expired"):
-                        return {
-                            "status": "auth_error",
-                            "message": error_msg,
-                            "connector_id": context.get("connector_id"),
-                            "connector_type": "jira",
-                        }
-                    if "not found" in error_msg.lower():
-                        return {"status": "not_found", "message": error_msg}
-                    return {"status": "error", "message": error_msg}
-
-                issue_data = context["issue"]
-                issue_key = issue_data["issue_id"]
-                document_id = issue_data.get("document_id")
-                connector_id_from_context = context.get("account", {}).get("id")
-
-                result = request_approval(
-                    action_type="jira_issue_update",
-                    tool_name="update_jira_issue",
-                    params={
-                        "issue_key": issue_key,
-                        "document_id": document_id,
-                        "new_summary": new_summary,
-                        "new_description": new_description,
-                        "new_priority": new_priority,
-                        "connector_id": connector_id_from_context,
-                    },
-                    context=context,
-                )
-
-                if result.rejected:
-                    return {
-                        "status": "rejected",
-                        "message": "User declined. Do not retry or suggest alternatives.",
-                    }
-
-                final_issue_key = result.params.get("issue_key", issue_key)
-                final_summary = result.params.get("new_summary", new_summary)
-                final_description = result.params.get(
-                    "new_description", new_description
-                )
-                final_priority = result.params.get("new_priority", new_priority)
-                final_connector_id = result.params.get(
-                    "connector_id", connector_id_from_context
-                )
-                final_document_id = result.params.get("document_id", document_id)
-
-                from sqlalchemy.future import select
-
-                from app.db import SearchSourceConnector, SearchSourceConnectorType
-
-                if not final_connector_id:
-                    return {
-                        "status": "error",
-                        "message": "No connector found for this issue.",
-                    }
-
-                result = await db_session.execute(
-                    select(SearchSourceConnector).filter(
-                        SearchSourceConnector.id == final_connector_id,
-                        SearchSourceConnector.search_space_id == search_space_id,
-                        SearchSourceConnector.user_id == user_id,
-                        SearchSourceConnector.connector_type
-                        == SearchSourceConnectorType.JIRA_CONNECTOR,
-                    )
-                )
-                connector = result.scalars().first()
-                if not connector:
-                    return {
-                        "status": "error",
-                        "message": "Selected Jira connector is invalid.",
-                    }
-
-                fields: dict[str, Any] = {}
-                if final_summary:
-                    fields["summary"] = final_summary
-                if final_description is not None:
-                    fields["description"] = {
-                        "type": "doc",
-                        "version": 1,
-                        "content": [
-                            {
-                                "type": "paragraph",
-                                "content": [
-                                    {"type": "text", "text": final_description}
-                                ],
-                            }
-                        ],
-                    }
-                if final_priority:
-                    fields["priority"] = {"name": final_priority}
-
-                if not fields:
-                    return {"status": "error", "message": "No changes specified."}
-
-                try:
-                    jira_history = JiraHistoryConnector(
-                        session=db_session, connector_id=final_connector_id
-                    )
-                    jira_client = await jira_history._get_jira_client()
-                    await asyncio.to_thread(
-                        jira_client.update_issue, final_issue_key, fields
-                    )
-                except Exception as api_err:
-                    if "status code 403" in str(api_err).lower():
-                        try:
-                            connector.config = {
-                                **connector.config,
-                                "auth_expired": True,
-                            }
-                            flag_modified(connector, "config")
-                            await db_session.commit()
-                        except Exception:
-                            pass
-                        return {
-                            "status": "insufficient_permissions",
-                            "connector_id": final_connector_id,
-                            "message": "This Jira account needs additional permissions. Please re-authenticate in connector settings.",
-                        }
-                    raise
-
-                issue_url = (
-                    f"{jira_history._base_url}/browse/{final_issue_key}"
-                    if jira_history._base_url and final_issue_key
-                    else ""
-                )
-
-                kb_message_suffix = ""
-                if final_document_id:
-                    try:
-                        from app.services.jira import JiraKBSyncService
-
-                        kb_service = JiraKBSyncService(db_session)
-                        kb_result = await kb_service.sync_after_update(
-                            document_id=final_document_id,
-                            issue_id=final_issue_key,
-                            user_id=user_id,
-                            search_space_id=search_space_id,
-                        )
-                        if kb_result["status"] == "success":
-                            kb_message_suffix = (
-                                " Your knowledge base has also been updated."
-                            )
-                        else:
-                            kb_message_suffix = (
-                                " The knowledge base will be updated in the next sync."
-                            )
-                    except Exception as kb_err:
-                        logger.warning(f"KB sync after update failed: {kb_err}")
-                        kb_message_suffix = (
-                            " The knowledge base will be updated in the next sync."
-                        )
-
-                return {
-                    "status": "success",
-                    "issue_key": final_issue_key,
-                    "issue_url": issue_url,
-                    "message": f"Jira issue {final_issue_key} updated successfully.{kb_message_suffix}",
-                }
-
-        except Exception as e:
-            from langgraph.errors import GraphInterrupt
-
-            if isinstance(e, GraphInterrupt):
-                raise
-            logger.error(f"Error updating Jira issue: {e}", exc_info=True)
-            return {
-                "status": "error",
-                "message": "Something went wrong while updating the issue.",
-            }
-
-    return update_jira_issue
--- a/surfsense_backend/app/connectors/jira_connector.py
+++ b/surfsense_backend/app/connectors/jira_connector.py
@ -1,648 +0,0 @@
-"""
-Jira Connector Module
-
-A module for retrieving data from Jira.
-Allows fetching issue lists and their comments, projects and more.
-Supports both OAuth 2.0 (preferred) and legacy API token authentication.
-"""
-
-import base64
-from datetime import datetime
-from typing import Any
-
-import requests
-
-
-class JiraConnector:
-    """Class for retrieving data from Jira."""
-
-    def __init__(
-        self,
-        base_url: str | None = None,
-        access_token: str | None = None,
-        cloud_id: str | None = None,
-        email: str | None = None,
-        api_token: str | None = None,
-    ):
-        """
-        Initialize the JiraConnector class.
-
-        Args:
-            base_url: Jira instance base URL (e.g., 'https://yourcompany.atlassian.net')
-            access_token: OAuth 2.0 access token (preferred method)
-            cloud_id: Atlassian cloud ID (used with OAuth for API URL construction)
-            email: Jira account email address (legacy method, used with api_token)
-            api_token: Jira API token (legacy method, used with email)
-        """
-        self.base_url = base_url.rstrip("/") if base_url else None
-        self.access_token = access_token
-        self.cloud_id = cloud_id
-        self.email = email
-        self.api_token = api_token
-        self.api_version = "3"  # Jira Cloud API version
-        self._use_oauth = access_token is not None
-
-    def set_oauth_credentials(
-        self, base_url: str, access_token: str, cloud_id: str | None = None
-    ) -> None:
-        """
-        Set OAuth 2.0 credentials (preferred method).
-
-        Args:
-            base_url: Jira instance base URL
-            access_token: OAuth 2.0 access token
-            cloud_id: Atlassian cloud ID (optional, used for API URL construction)
-        """
-        self.base_url = base_url.rstrip("/")
-        self.access_token = access_token
-        self.cloud_id = cloud_id
-        self._use_oauth = True
-
-    def set_credentials(self, base_url: str, email: str, api_token: str) -> None:
-        """
-        Set the Jira credentials (legacy method using API token).
-
-        Args:
-            base_url: Jira instance base URL
-            email: Jira account email address
-            api_token: Jira API token
-        """
-        self.base_url = base_url.rstrip("/")
-        self.email = email
-        self.api_token = api_token
-        self._use_oauth = False
-
-    def set_email(self, email: str) -> None:
-        """
-        Set the Jira account email (legacy method).
-
-        Args:
-            email: Jira account email address
-        """
-        self.email = email
-        self._use_oauth = False
-
-    def set_api_token(self, api_token: str) -> None:
-        """
-        Set the Jira API token (legacy method).
-
-        Args:
-            api_token: Jira API token
-        """
-        self.api_token = api_token
-        self._use_oauth = False
-
-    def get_headers(self) -> dict[str, str]:
-        """
-        Get headers for Jira API requests.
-
-        Uses OAuth Bearer token if available, otherwise falls back to Basic Auth.
-
-        Returns:
-            Dictionary of headers
-
-        Raises:
-            ValueError: If credentials have not been set
-        """
-        if self._use_oauth:
-            # OAuth 2.0 authentication
-            if not self.base_url or not self.access_token:
-                raise ValueError(
-                    "Jira OAuth credentials not initialized. Call set_oauth_credentials() first."
-                )
-
-            return {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.access_token}",
-                "Accept": "application/json",
-            }
-        else:
-            # Legacy Basic Auth
-            if not all([self.base_url, self.email, self.api_token]):
-                raise ValueError(
-                    "Jira credentials not initialized. Call set_credentials() first."
-                )
-
-            # Create Basic Auth header using email:api_token
-            auth_str = f"{self.email}:{self.api_token}"
-            auth_bytes = auth_str.encode("utf-8")
-            auth_header = "Basic " + base64.b64encode(auth_bytes).decode("ascii")
-
-            return {
-                "Content-Type": "application/json",
-                "Authorization": auth_header,
-                "Accept": "application/json",
-            }
-
-    def make_api_request(
-        self,
-        endpoint: str,
-        params: dict[str, Any] | None = None,
-        method: str = "GET",
-        json_payload: dict[str, Any] | None = None,
-    ) -> dict[str, Any]:
-        """
-        Make a request to the Jira API.
-
-        Args:
-            endpoint: API endpoint (without base URL)
-            params: Query parameters for the request (optional)
-            method: HTTP method (GET or POST)
-            json_payload: JSON payload for POST requests (optional)
-
-        Returns:
-            Response data from the API
-
-        Raises:
-            ValueError: If credentials have not been set
-            Exception: If the API request fails
-        """
-        headers = self.get_headers()
-
-        # Construct API URL based on authentication method
-        if self._use_oauth and self.cloud_id:
-            # Use Atlassian API gateway with cloud_id for OAuth
-            url = f"https://api.atlassian.com/ex/jira/{self.cloud_id}/rest/api/{self.api_version}/{endpoint}"
-        else:
-            # Use direct base URL (works for both OAuth and legacy)
-            url = f"{self.base_url}/rest/api/{self.api_version}/{endpoint}"
-
-        method_upper = method.upper()
-        if method_upper == "POST":
-            response = requests.post(
-                url, headers=headers, json=json_payload, timeout=500
-            )
-        elif method_upper == "PUT":
-            response = requests.put(
-                url, headers=headers, json=json_payload, timeout=500
-            )
-        elif method_upper == "DELETE":
-            response = requests.delete(url, headers=headers, params=params, timeout=500)
-        else:
-            response = requests.get(url, headers=headers, params=params, timeout=500)
-
-        if response.status_code in (200, 201, 204):
-            if response.status_code == 204 or not response.text:
-                return {"status": "success"}
-            return response.json()
-        else:
-            raise Exception(
-                f"API request failed with status code {response.status_code}: {response.text}"
-            )
-
-    def get_all_projects(self) -> dict[str, Any]:
-        """
-        Fetch all projects from Jira.
-
-        Returns:
-            List of project objects
-
-        Raises:
-            ValueError: If credentials have not been set
-            Exception: If the API request fails
-        """
-        return self.make_api_request("project/search")
-
-    def get_all_issues(self, project_key: str | None = None) -> list[dict[str, Any]]:
-        """
-        Fetch all issues from Jira.
-
-        Args:
-            project_key: Optional project key to filter issues (e.g., 'PROJ')
-
-        Returns:
-            List of issue objects
-
-        Raises:
-            ValueError: If credentials have not been set
-            Exception: If the API request fails
-        """
-        jql = "ORDER BY created DESC"
-        if project_key:
-            jql = f'project = "{project_key}" ' + jql
-
-        fields = [
-            "summary",
-            "description",
-            "status",
-            "assignee",
-            "reporter",
-            "created",
-            "updated",
-            "priority",
-            "issuetype",
-            "project",
-        ]
-
-        all_issues = []
-        start_at = 0
-        max_results = 100
-
-        all_issues = []
-        start_at = 0
-
-        while True:
-            json_payload = {
-                "jql": jql,
-                "fields": fields,  # API accepts list
-                "maxResults": max_results,
-                "startAt": start_at,
-            }
-            result = self.make_api_request(
-                "search/jql", json_payload=json_payload, method="POST"
-            )
-
-            if not isinstance(result, dict) or "issues" not in result:
-                raise Exception("Invalid response from Jira API")
-
-            issues = result["issues"]
-            all_issues.extend(issues)
-
-            print(f"Fetched {len(issues)} issues (startAt={start_at})")
-
-            total = result.get("total", 0)
-            if start_at + len(issues) >= total:
-                break
-
-            start_at += len(issues)
-
-        return all_issues
-
-    def get_issues_by_date_range(
-        self,
-        start_date: str,
-        end_date: str,
-        include_comments: bool = True,
-        project_key: str | None = None,
-    ) -> tuple[list[dict[str, Any]], str | None]:
-        """
-        Fetch issues within a date range.
-
-        Args:
-            start_date: Start date in YYYY-MM-DD format
-            end_date: End date in YYYY-MM-DD format (inclusive)
-            include_comments: Whether to include comments in the response
-            project_key: Optional project key to filter issues
-
-        Returns:
-            Tuple containing (issues list, error message or None)
-        """
-        try:
-            # Build JQL query for date range
-            # Query issues that were either created OR updated within the date range
-            # Use end_date + 1 day with < operator to include the full end date
-            from datetime import datetime, timedelta
-
-            # Parse end_date and add 1 day for inclusive end date
-            end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
-            end_date_next = (end_date_obj + timedelta(days=1)).strftime("%Y-%m-%d")
-
-            # Check both created and updated dates to catch all relevant issues
-            # Use 'created' and 'updated' (standard JQL field names)
-            date_filter = (
-                f"(created >= '{start_date}' AND created < '{end_date_next}') "
-                f"OR (updated >= '{start_date}' AND updated < '{end_date_next}')"
-            )
-
-            jql = f"{date_filter} ORDER BY created DESC"
-            if project_key:
-                jql = f'project = "{project_key}" AND ({date_filter}) ORDER BY created DESC'
-
-            # Define fields to retrieve
-            fields = [
-                "summary",
-                "description",
-                "status",
-                "assignee",
-                "reporter",
-                "created",
-                "updated",
-                "priority",
-                "issuetype",
-                "project",
-            ]
-
-            if include_comments:
-                fields.append("comment")
-
-            params = {
-                "jql": jql,
-                "fields": ",".join(fields),
-                "maxResults": 100,
-                "startAt": 0,
-            }
-
-            all_issues = []
-            start_at = 0
-
-            while True:
-                params["startAt"] = start_at
-
-                result = self.make_api_request("search/jql", params)
-
-                if not isinstance(result, dict) or "issues" not in result:
-                    return [], "Invalid response from Jira API"
-
-                issues = result["issues"]
-                all_issues.extend(issues)
-
-                # Check if there are more issues to fetch
-                total = result.get("total", 0)
-                if start_at + len(issues) >= total:
-                    break
-
-                start_at += len(issues)
-
-            if not all_issues:
-                return [], "No issues found in the specified date range."
-
-            return all_issues, None
-
-        except Exception as e:
-            return [], f"Error fetching issues: {e!s}"
-
-    def get_myself(self) -> dict[str, Any]:
-        """Fetch the current user's profile (health check)."""
-        return self.make_api_request("myself")
-
-    def get_projects(self) -> list[dict[str, Any]]:
-        """Fetch all projects the user has access to."""
-        result = self.make_api_request("project/search")
-        return result.get("values", [])
-
-    def get_issue_types(self) -> list[dict[str, Any]]:
-        """Fetch all issue types."""
-        return self.make_api_request("issuetype")
-
-    def get_priorities(self) -> list[dict[str, Any]]:
-        """Fetch all priority levels."""
-        return self.make_api_request("priority")
-
-    def get_issue(self, issue_id_or_key: str) -> dict[str, Any]:
-        """Fetch a single issue by ID or key."""
-        return self.make_api_request(f"issue/{issue_id_or_key}")
-
-    def create_issue(
-        self,
-        project_key: str,
-        summary: str,
-        issue_type: str = "Task",
-        description: str | None = None,
-        priority: str | None = None,
-        assignee_id: str | None = None,
-    ) -> dict[str, Any]:
-        """Create a new Jira issue."""
-        fields: dict[str, Any] = {
-            "project": {"key": project_key},
-            "summary": summary,
-            "issuetype": {"name": issue_type},
-        }
-        if description:
-            fields["description"] = {
-                "type": "doc",
-                "version": 1,
-                "content": [
-                    {
-                        "type": "paragraph",
-                        "content": [{"type": "text", "text": description}],
-                    }
-                ],
-            }
-        if priority:
-            fields["priority"] = {"name": priority}
-        if assignee_id:
-            fields["assignee"] = {"accountId": assignee_id}
-
-        return self.make_api_request(
-            "issue", method="POST", json_payload={"fields": fields}
-        )
-
-    def update_issue(
-        self, issue_id_or_key: str, fields: dict[str, Any]
-    ) -> dict[str, Any]:
-        """Update an existing Jira issue fields."""
-        return self.make_api_request(
-            f"issue/{issue_id_or_key}",
-            method="PUT",
-            json_payload={"fields": fields},
-        )
-
-    def delete_issue(self, issue_id_or_key: str) -> dict[str, Any]:
-        """Delete a Jira issue."""
-        return self.make_api_request(f"issue/{issue_id_or_key}", method="DELETE")
-
-    def get_transitions(self, issue_id_or_key: str) -> list[dict[str, Any]]:
-        """Get available transitions for an issue (for status changes)."""
-        result = self.make_api_request(f"issue/{issue_id_or_key}/transitions")
-        return result.get("transitions", [])
-
-    def transition_issue(
-        self, issue_id_or_key: str, transition_id: str
-    ) -> dict[str, Any]:
-        """Transition an issue to a new status."""
-        return self.make_api_request(
-            f"issue/{issue_id_or_key}/transitions",
-            method="POST",
-            json_payload={"transition": {"id": transition_id}},
-        )
-
-    def format_issue(self, issue: dict[str, Any]) -> dict[str, Any]:
-        """
-        Format an issue for easier consumption.
-
-        Args:
-            issue: The issue object from Jira API
-
-        Returns:
-            Formatted issue dictionary
-        """
-        fields = issue.get("fields", {})
-
-        # Extract basic issue details
-        formatted = {
-            "id": issue.get("id", ""),
-            "key": issue.get("key", ""),
-            "title": fields.get("summary", ""),
-            "description": fields.get("description", ""),
-            "status": (
-                fields.get("status", {}).get("name", "Unknown")
-                if fields.get("status")
-                else "Unknown"
-            ),
-            "status_category": (
-                fields.get("status", {})
-                .get("statusCategory", {})
-                .get("name", "Unknown")
-                if fields.get("status")
-                else "Unknown"
-            ),
-            "priority": (
-                fields.get("priority", {}).get("name", "Unknown")
-                if fields.get("priority")
-                else "Unknown"
-            ),
-            "issue_type": (
-                fields.get("issuetype", {}).get("name", "Unknown")
-                if fields.get("issuetype")
-                else "Unknown"
-            ),
-            "project": (
-                fields.get("project", {}).get("key", "Unknown")
-                if fields.get("project")
-                else "Unknown"
-            ),
-            "created_at": fields.get("created", ""),
-            "updated_at": fields.get("updated", ""),
-            "reporter": (
-                {
-                    "account_id": (
-                        fields.get("reporter", {}).get("accountId", "")
-                        if fields.get("reporter")
-                        else ""
-                    ),
-                    "display_name": (
-                        fields.get("reporter", {}).get("displayName", "Unknown")
-                        if fields.get("reporter")
-                        else "Unknown"
-                    ),
-                    "email": (
-                        fields.get("reporter", {}).get("emailAddress", "")
-                        if fields.get("reporter")
-                        else ""
-                    ),
-                }
-                if fields.get("reporter")
-                else {"account_id": "", "display_name": "Unknown", "email": ""}
-            ),
-            "assignee": (
-                {
-                    "account_id": fields.get("assignee", {}).get("accountId", ""),
-                    "display_name": fields.get("assignee", {}).get(
-                        "displayName", "Unknown"
-                    ),
-                    "email": fields.get("assignee", {}).get("emailAddress", ""),
-                }
-                if fields.get("assignee")
-                else None
-            ),
-            "comments": [],
-        }
-
-        # Extract comments if available
-        if "comment" in fields and "comments" in fields["comment"]:
-            for comment in fields["comment"]["comments"]:
-                formatted_comment = {
-                    "id": comment.get("id", ""),
-                    "body": comment.get("body", ""),
-                    "created_at": comment.get("created", ""),
-                    "updated_at": comment.get("updated", ""),
-                    "author": (
-                        {
-                            "account_id": (
-                                comment.get("author", {}).get("accountId", "")
-                                if comment.get("author")
-                                else ""
-                            ),
-                            "display_name": (
-                                comment.get("author", {}).get("displayName", "Unknown")
-                                if comment.get("author")
-                                else "Unknown"
-                            ),
-                            "email": (
-                                comment.get("author", {}).get("emailAddress", "")
-                                if comment.get("author")
-                                else ""
-                            ),
-                        }
-                        if comment.get("author")
-                        else {"account_id": "", "display_name": "Unknown", "email": ""}
-                    ),
-                }
-                formatted["comments"].append(formatted_comment)
-
-        return formatted
-
-    def format_issue_to_markdown(self, issue: dict[str, Any]) -> str:
-        """
-        Convert an issue to markdown format.
-
-        Args:
-            issue: The issue object (either raw or formatted)
-
-        Returns:
-            Markdown string representation of the issue
-        """
-        # Format the issue if it's not already formatted
-        if "key" not in issue:
-            issue = self.format_issue(issue)
-
-        # Build the markdown content
-        markdown = (
-            f"# {issue.get('key', 'No Key')}: {issue.get('title', 'No Title')}\n\n"
-        )
-
-        if issue.get("status"):
-            markdown += f"**Status:** {issue['status']}\n"
-
-        if issue.get("priority"):
-            markdown += f"**Priority:** {issue['priority']}\n"
-
-        if issue.get("issue_type"):
-            markdown += f"**Type:** {issue['issue_type']}\n"
-
-        if issue.get("project"):
-            markdown += f"**Project:** {issue['project']}\n\n"
-
-        if issue.get("assignee") and issue["assignee"].get("display_name"):
-            markdown += f"**Assignee:** {issue['assignee']['display_name']}\n"
-
-        if issue.get("reporter") and issue["reporter"].get("display_name"):
-            markdown += f"**Reporter:** {issue['reporter']['display_name']}\n"
-
-        if issue.get("created_at"):
-            created_date = self.format_date(issue["created_at"])
-            markdown += f"**Created:** {created_date}\n"
-
-        if issue.get("updated_at"):
-            updated_date = self.format_date(issue["updated_at"])
-            markdown += f"**Updated:** {updated_date}\n\n"
-
-        if issue.get("description"):
-            markdown += f"## Description\n\n{issue['description']}\n\n"
-
-        if issue.get("comments"):
-            markdown += f"## Comments ({len(issue['comments'])})\n\n"
-
-            for comment in issue["comments"]:
-                author_name = "Unknown"
-                if comment.get("author") and comment["author"].get("display_name"):
-                    author_name = comment["author"]["display_name"]
-
-                comment_date = "Unknown date"
-                if comment.get("created_at"):
-                    comment_date = self.format_date(comment["created_at"])
-
-                markdown += f"### {author_name} ({comment_date})\n\n{comment.get('body', '')}\n\n---\n\n"
-
-        return markdown
-
-    @staticmethod
-    def format_date(iso_date: str) -> str:
-        """
-        Format an ISO date string to a more readable format.
-
-        Args:
-            iso_date: ISO format date string
-
-        Returns:
-            Formatted date string
-        """
-        if not iso_date or not isinstance(iso_date, str):
-            return "Unknown date"
-
-        try:
-            # Jira dates are typically in format: 2023-01-01T12:00:00.000+0000
-            dt = datetime.fromisoformat(iso_date.replace("Z", "+00:00"))
-            return dt.strftime("%Y-%m-%d %H:%M:%S")
-        except ValueError:
-            return iso_date
--- a/surfsense_backend/app/connectors/jira_history.py
+++ b/surfsense_backend/app/connectors/jira_history.py
@ -1,350 +0,0 @@
-"""
-Jira OAuth Connector.
-
-Handles OAuth-based authentication and token refresh for Jira API access.
-Supports both OAuth 2.0 (preferred) and legacy API token authentication.
-"""
-
-import logging
-from typing import Any
-
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.future import select
-
-from app.config import config
-from app.connectors.jira_connector import JiraConnector
-from app.db import SearchSourceConnector
-from app.schemas.atlassian_auth_credentials import AtlassianAuthCredentialsBase
-from app.utils.oauth_security import TokenEncryption
-
-logger = logging.getLogger(__name__)
-
-
-class JiraHistoryConnector:
-    """
-    Jira connector with OAuth support and automatic token refresh.
-
-    This connector uses OAuth 2.0 access tokens to authenticate with the
-    Jira API. It automatically refreshes expired tokens when needed.
-    Also supports legacy API token authentication for backward compatibility.
-    """
-
-    def __init__(
-        self,
-        session: AsyncSession,
-        connector_id: int,
-        credentials: AtlassianAuthCredentialsBase | None = None,
-    ):
-        """
-        Initialize the JiraHistoryConnector with auto-refresh capability.
-
-        Args:
-            session: Database session for updating connector
-            connector_id: Connector ID for direct updates
-            credentials: Jira OAuth credentials (optional, will be loaded from DB if not provided)
-        """
-        self._session = session
-        self._connector_id = connector_id
-        self._credentials = credentials
-        self._cloud_id: str | None = None
-        self._base_url: str | None = None
-        self._jira_client: JiraConnector | None = None
-        self._use_oauth = True
-        self._legacy_email: str | None = None
-        self._legacy_api_token: str | None = None
-
-    async def _get_valid_token(self) -> str:
-        """
-        Get valid Jira access token, refreshing if needed.
-
-        Returns:
-            Valid access token
-
-        Raises:
-            ValueError: If credentials are missing or invalid
-            Exception: If token refresh fails
-        """
-        # Load credentials from DB if not provided
-        if self._credentials is None:
-            result = await self._session.execute(
-                select(SearchSourceConnector).filter(
-                    SearchSourceConnector.id == self._connector_id
-                )
-            )
-            connector = result.scalars().first()
-
-            if not connector:
-                raise ValueError(f"Connector {self._connector_id} not found")
-
-            config_data = connector.config.copy()
-
-            # Check if using OAuth or legacy API token
-            is_oauth = config_data.get("_token_encrypted", False) or config_data.get(
-                "access_token"
-            )
-
-            if is_oauth:
-                # OAuth 2.0 authentication
-                # Check if access_token exists before processing
-                raw_access_token = config_data.get("access_token")
-                if not raw_access_token:
-                    raise ValueError(
-                        "Jira access token not found. "
-                        "Please reconnect your Jira account."
-                    )
-
-                if not config.SECRET_KEY:
-                    raise ValueError(
-                        "SECRET_KEY not configured but tokens are marked as encrypted"
-                    )
-
-                try:
-                    token_encryption = TokenEncryption(config.SECRET_KEY)
-
-                    # Decrypt access_token
-                    if config_data.get("access_token"):
-                        config_data["access_token"] = token_encryption.decrypt_token(
-                            config_data["access_token"]
-                        )
-                        logger.info(
-                            f"Decrypted Jira access token for connector {self._connector_id}"
-                        )
-
-                    # Decrypt refresh_token if present
-                    if config_data.get("refresh_token"):
-                        config_data["refresh_token"] = token_encryption.decrypt_token(
-                            config_data["refresh_token"]
-                        )
-                        logger.info(
-                            f"Decrypted Jira refresh token for connector {self._connector_id}"
-                        )
-                except Exception as e:
-                    logger.error(
-                        f"Failed to decrypt Jira credentials for connector {self._connector_id}: {e!s}"
-                    )
-                    raise ValueError(
-                        f"Failed to decrypt Jira credentials: {e!s}"
-                    ) from e
-
-                # Final validation after decryption
-                final_token = config_data.get("access_token")
-                if not final_token or (
-                    isinstance(final_token, str) and not final_token.strip()
-                ):
-                    raise ValueError(
-                        "Jira access token is invalid or empty. "
-                        "Please reconnect your Jira account."
-                    )
-
-                try:
-                    self._credentials = AtlassianAuthCredentialsBase.from_dict(
-                        config_data
-                    )
-                    self._cloud_id = config_data.get("cloud_id")
-                    self._base_url = config_data.get("base_url")
-                    self._use_oauth = True
-                except Exception as e:
-                    raise ValueError(f"Invalid Jira OAuth credentials: {e!s}") from e
-            else:
-                # Legacy API token authentication
-                self._legacy_email = config_data.get("JIRA_EMAIL")
-                self._legacy_api_token = config_data.get("JIRA_API_TOKEN")
-                self._base_url = config_data.get("JIRA_BASE_URL")
-                self._use_oauth = False
-
-                if (
-                    not self._legacy_email
-                    or not self._legacy_api_token
-                    or not self._base_url
-                ):
-                    raise ValueError("Jira credentials not found in connector config")
-
-        # Check if token is expired and refreshable (only for OAuth)
-        if (
-            self._use_oauth
-            and self._credentials.is_expired
-            and self._credentials.is_refreshable
-        ):
-            try:
-                logger.info(
-                    f"Jira token expired for connector {self._connector_id}, refreshing..."
-                )
-
-                # Get connector for refresh
-                result = await self._session.execute(
-                    select(SearchSourceConnector).filter(
-                        SearchSourceConnector.id == self._connector_id
-                    )
-                )
-                connector = result.scalars().first()
-
-                if not connector:
-                    raise RuntimeError(
-                        f"Connector {self._connector_id} not found; cannot refresh token."
-                    )
-
-                # Lazy import to avoid circular dependency
-                from app.routes.jira_add_connector_route import refresh_jira_token
-
-                connector = await refresh_jira_token(self._session, connector)
-
-                # Reload credentials after refresh
-                config_data = connector.config.copy()
-                token_encrypted = config_data.get("_token_encrypted", False)
-                if token_encrypted and config.SECRET_KEY:
-                    token_encryption = TokenEncryption(config.SECRET_KEY)
-                    if config_data.get("access_token"):
-                        config_data["access_token"] = token_encryption.decrypt_token(
-                            config_data["access_token"]
-                        )
-                    if config_data.get("refresh_token"):
-                        config_data["refresh_token"] = token_encryption.decrypt_token(
-                            config_data["refresh_token"]
-                        )
-
-                self._credentials = AtlassianAuthCredentialsBase.from_dict(config_data)
-                self._cloud_id = config_data.get("cloud_id")
-                self._base_url = config_data.get("base_url")
-
-                # Invalidate cached client so it's recreated with new token
-                self._jira_client = None
-
-                logger.info(
-                    f"Successfully refreshed Jira token for connector {self._connector_id}"
-                )
-            except Exception as e:
-                logger.error(
-                    f"Failed to refresh Jira token for connector {self._connector_id}: {e!s}"
-                )
-                raise Exception(
-                    f"Failed to refresh Jira OAuth credentials: {e!s}"
-                ) from e
-
-        if self._use_oauth:
-            return self._credentials.access_token
-        else:
-            # For legacy auth, return empty string (not used for token-based auth)
-            return ""
-
-    async def _get_jira_client(self) -> JiraConnector:
-        """
-        Get or create JiraConnector with valid credentials.
-
-        Returns:
-            JiraConnector instance
-        """
-        if self._jira_client is None:
-            if self._use_oauth:
-                # Ensure we have valid token (will refresh if needed)
-                await self._get_valid_token()
-
-                self._jira_client = JiraConnector(
-                    base_url=self._base_url,
-                    access_token=self._credentials.access_token,
-                    cloud_id=self._cloud_id,
-                )
-            else:
-                # Legacy API token authentication
-                self._jira_client = JiraConnector(
-                    base_url=self._base_url,
-                    email=self._legacy_email,
-                    api_token=self._legacy_api_token,
-                )
-        else:
-            # If OAuth, refresh token if expired before returning client
-            if self._use_oauth:
-                await self._get_valid_token()
-                # Update client with new token if it was refreshed
-                if self._credentials:
-                    self._jira_client.set_oauth_credentials(
-                        base_url=self._base_url or "",
-                        access_token=self._credentials.access_token,
-                        cloud_id=self._cloud_id,
-                    )
-
-        return self._jira_client
-
-    async def get_issues_by_date_range(
-        self,
-        start_date: str,
-        end_date: str,
-        include_comments: bool = True,
-        project_key: str | None = None,
-    ) -> tuple[list[dict[str, Any]], str | None]:
-        """
-        Fetch issues within a date range.
-        This method wraps JiraConnector.get_issues_by_date_range() with automatic token refresh.
-
-        Args:
-            start_date: Start date in YYYY-MM-DD format
-            end_date: End date in YYYY-MM-DD format (inclusive)
-            include_comments: Whether to include comments in the response
-            project_key: Optional project key to filter issues
-
-        Returns:
-            Tuple containing (issues list, error message or None)
-        """
-        # Ensure token is valid (will refresh if needed)
-        if self._use_oauth:
-            await self._get_valid_token()
-
-        # Get client with valid credentials
-        client = await self._get_jira_client()
-
-        # JiraConnector methods are synchronous, so we call them directly
-        # Token refresh has already been handled above
-        return client.get_issues_by_date_range(
-            start_date=start_date,
-            end_date=end_date,
-            include_comments=include_comments,
-            project_key=project_key,
-        )
-
-    def format_issue(self, issue: dict[str, Any]) -> dict[str, Any]:
-        """
-        Format an issue for easier consumption.
-        Wraps JiraConnector.format_issue().
-
-        Args:
-            issue: The issue object from Jira API
-
-        Returns:
-            Formatted issue dictionary
-        """
-        # This is a synchronous method that doesn't need token refresh
-        # since it just formats data that's already been fetched
-        if self._jira_client is None:
-            # Create a minimal client just for formatting (doesn't need credentials)
-            self._jira_client = JiraConnector()
-        return self._jira_client.format_issue(issue)
-
-    def format_issue_to_markdown(self, issue: dict[str, Any]) -> str:
-        """
-        Convert an issue to markdown format.
-        Wraps JiraConnector.format_issue_to_markdown().
-
-        Args:
-            issue: The issue object (either raw or formatted)
-
-        Returns:
-            Markdown string representation of the issue
-        """
-        # This is a synchronous method that doesn't need token refresh
-        # since it just formats data that's already been fetched
-        if self._jira_client is None:
-            # Create a minimal client just for formatting (doesn't need credentials)
-            self._jira_client = JiraConnector()
-        return self._jira_client.format_issue_to_markdown(issue)
-
-    async def close(self):
-        """Close any resources (currently no-op for JiraConnector)."""
-        # JiraConnector doesn't maintain persistent connections, so nothing to close
-        self._jira_client = None
-
-    async def __aenter__(self):
-        """Async context manager entry."""
-        return self
-
-    async def __aexit__(self, exc_type, exc_val, exc_tb):
-        """Async context manager exit."""
-        await self.close()
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -1478,6 +1478,11 @@ class Document(BaseModel, TimestampMixin):
    chunks = relationship(
        "Chunk", back_populates="document", cascade="all, delete-orphan"
    )
+    # Original upload + future derived artifacts (redacted, filled-form).
+    # Model lives in app.file_storage.persistence to keep that feature cohesive.
+    files = relationship(
+        "DocumentFile", back_populates="document", cascade="all, delete-orphan"
+    )


 class DocumentVersion(BaseModel, TimestampMixin):
@ -2931,6 +2936,7 @@ from app.automations.persistence import (  # noqa: E402, F401
    AutomationRun,
    AutomationTrigger,
 )
+from app.file_storage.persistence import DocumentFile  # noqa: E402, F401

 engine = create_async_engine(
    DATABASE_URL,
--- a/surfsense_backend/app/file_storage/init.py
+++ b/surfsense_backend/app/file_storage/init.py
@ -0,0 +1,15 @@
+"""Durable storage for original uploaded files (and future derived artifacts).
+
+Public surface: resolve the configured backend via :func:`get_storage_backend`
+and persist/retrieve a document's files via :mod:`app.file_storage.service`.
+"""
+
+from __future__ import annotations
+
+from app.file_storage.backends.base import StorageBackend
+from app.file_storage.factory import get_storage_backend
+
+__all__ = [
+    "StorageBackend",
+    "get_storage_backend",
+]
--- a/surfsense_backend/app/file_storage/api.py
+++ b/surfsense_backend/app/file_storage/api.py
@ -0,0 +1,89 @@
+"""HTTP routes for document file storage (metadata listing + original download)."""
+
+from __future__ import annotations
+
+from urllib.parse import quote
+
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import StreamingResponse
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import Document, Permission, User, get_async_session
+from app.file_storage.persistence.enums import DocumentFileKind
+from app.file_storage.schemas import DocumentFileRead
+from app.file_storage.service import (
+    get_document_file,
+    list_document_files,
+    open_document_file_stream,
+)
+from app.users import current_active_user
+from app.utils.rbac import check_permission
+
+router = APIRouter()
+
+
+async def _load_readable_document(
+    *, document_id: int, session: AsyncSession, user: User
+) -> Document:
+    """Load a document the user may read, or raise 404/403."""
+    document = (
+        await session.execute(select(Document).where(Document.id == document_id))
+    ).scalar_one_or_none()
+    if document is None:
+        raise HTTPException(status_code=404, detail="Document not found")
+
+    await check_permission(
+        session,
+        user,
+        document.search_space_id,
+        Permission.DOCUMENTS_READ.value,
+        "You don't have permission to read documents in this search space",
+    )
+    return document
+
+
+def _content_disposition(filename: str) -> str:
+    """Build an attachment header safe for arbitrary filenames (RFC 5987)."""
+    fallback = filename.encode("ascii", "ignore").decode("ascii") or "download"
+    fallback = fallback.replace('"', "")
+    return f"attachment; filename=\"{fallback}\"; filename*=UTF-8''{quote(filename)}"
+
+
+@router.get(
+    "/documents/{document_id}/files",
+    response_model=list[DocumentFileRead],
+)
+async def read_document_files(
+    document_id: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+) -> list[DocumentFileRead]:
+    """Return metadata for every stored file of a document (gates the UI)."""
+    await _load_readable_document(document_id=document_id, session=session, user=user)
+    records = await list_document_files(session, document_id=document_id)
+    return [DocumentFileRead.model_validate(r) for r in records]
+
+
+@router.get("/documents/{document_id}/download-original")
+async def download_original_document_file(
+    document_id: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+) -> StreamingResponse:
+    """Stream the document's original uploaded file."""
+    await _load_readable_document(document_id=document_id, session=session, user=user)
+
+    record = await get_document_file(
+        session, document_id=document_id, kind=DocumentFileKind.ORIGINAL
+    )
+    if record is None:
+        raise HTTPException(
+            status_code=404, detail="No original file stored for this document"
+        )
+
+    return StreamingResponse(
+        open_document_file_stream(record),
+        media_type=record.mime_type or "application/octet-stream",
+        headers={"Content-Disposition": _content_disposition(record.original_filename)},
+    )
--- a/surfsense_backend/app/file_storage/backends/init.py
+++ b/surfsense_backend/app/file_storage/backends/init.py
@ -0,0 +1,7 @@
+"""Storage backend implementations behind the shared :class:`StorageBackend`."""
+
+from __future__ import annotations
+
+from app.file_storage.backends.base import StorageBackend
+
+__all__ = ["StorageBackend"]
--- a/surfsense_backend/app/file_storage/backends/azure.py
+++ b/surfsense_backend/app/file_storage/backends/azure.py
@ -0,0 +1,54 @@
+"""Azure Blob Storage backend (the first production target)."""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+
+from app.file_storage.backends.base import StorageBackend
+
+
+class AzureBlobBackend(StorageBackend):
+    """Stores objects as blobs in an Azure Blob Storage container."""
+
+    backend_name = "azure"
+
+    def __init__(self, *, connection_string: str, container: str) -> None:
+        self._connection_string = connection_string
+        self._container = container
+
+    def _service(self):
+        from azure.storage.blob.aio import BlobServiceClient
+
+        return BlobServiceClient.from_connection_string(self._connection_string)
+
+    async def put(
+        self, key: str, data: bytes, *, content_type: str | None = None
+    ) -> None:
+        from azure.storage.blob import ContentSettings
+
+        settings = ContentSettings(content_type=content_type) if content_type else None
+        async with self._service() as service:
+            blob = service.get_blob_client(self._container, key)
+            await blob.upload_blob(data, overwrite=True, content_settings=settings)
+
+    async def open_stream(self, key: str) -> AsyncIterator[bytes]:
+        async with self._service() as service:
+            blob = service.get_blob_client(self._container, key)
+            downloader = await blob.download_blob()
+            async for chunk in downloader.chunks():
+                yield chunk
+
+    async def delete(self, key: str) -> None:
+        from azure.core.exceptions import ResourceNotFoundError
+
+        async with self._service() as service:
+            blob = service.get_blob_client(self._container, key)
+            try:
+                await blob.delete_blob()
+            except ResourceNotFoundError:
+                pass
+
+    async def exists(self, key: str) -> bool:
+        async with self._service() as service:
+            blob = service.get_blob_client(self._container, key)
+            return await blob.exists()
--- a/surfsense_backend/app/file_storage/backends/base.py
+++ b/surfsense_backend/app/file_storage/backends/base.py
@ -0,0 +1,31 @@
+"""The storage backend contract: the minimal object-store surface we depend on."""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from collections.abc import AsyncIterator
+
+
+class StorageBackend(ABC):
+    """Maps an opaque object key to durable bytes."""
+
+    #: Identifier stored on each row to record which backend holds the bytes.
+    backend_name: str
+
+    @abstractmethod
+    async def put(
+        self, key: str, data: bytes, *, content_type: str | None = None
+    ) -> None:
+        """Store ``data`` at ``key``, overwriting any existing object."""
+
+    @abstractmethod
+    def open_stream(self, key: str) -> AsyncIterator[bytes]:
+        """Yield the object's bytes in chunks. Raises if the key is absent."""
+
+    @abstractmethod
+    async def delete(self, key: str) -> None:
+        """Remove the object at ``key``; a missing key is not an error."""
+
+    @abstractmethod
+    async def exists(self, key: str) -> bool:
+        """Return whether an object is stored at ``key``."""
--- a/surfsense_backend/app/file_storage/backends/local.py
+++ b/surfsense_backend/app/file_storage/backends/local.py
@ -0,0 +1,64 @@
+"""Local filesystem backend for development (no cloud credentials required)."""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import AsyncIterator
+from pathlib import Path
+
+from app.file_storage.backends.base import StorageBackend
+
+_CHUNK_SIZE = 1024 * 1024
+
+
+class LocalFileBackend(StorageBackend):
+    """Stores objects as files under a single root directory."""
+
+    backend_name = "local"
+
+    def __init__(self, root: str) -> None:
+        self._root = Path(root).resolve()
+
+    def _path_for(self, key: str) -> Path:
+        # Resolve and confirm the key stays inside the root to block traversal.
+        target = (self._root / key).resolve()
+        if self._root not in target.parents and target != self._root:
+            raise ValueError("Resolved storage key escapes the storage root")
+        return target
+
+    async def put(
+        self, key: str, data: bytes, *, content_type: str | None = None
+    ) -> None:
+        path = self._path_for(key)
+
+        def _write() -> None:
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_bytes(data)
+
+        await asyncio.to_thread(_write)
+
+    async def open_stream(self, key: str) -> AsyncIterator[bytes]:
+        path = self._path_for(key)
+        handle = await asyncio.to_thread(path.open, "rb")
+        try:
+            while True:
+                chunk = await asyncio.to_thread(handle.read, _CHUNK_SIZE)
+                if not chunk:
+                    break
+                yield chunk
+        finally:
+            await asyncio.to_thread(handle.close)
+
+    async def delete(self, key: str) -> None:
+        path = self._path_for(key)
+
+        def _unlink() -> None:
+            try:
+                path.unlink()
+            except FileNotFoundError:
+                pass
+
+        await asyncio.to_thread(_unlink)
+
+    async def exists(self, key: str) -> bool:
+        return await asyncio.to_thread(self._path_for(key).exists)
--- a/surfsense_backend/app/file_storage/factory.py
+++ b/surfsense_backend/app/file_storage/factory.py
@ -0,0 +1,38 @@
+"""Resolve the configured :class:`StorageBackend` as a process-wide singleton."""
+
+from __future__ import annotations
+
+from functools import lru_cache
+
+from app.file_storage.backends.base import StorageBackend
+from app.file_storage.settings import (
+    AZURE_BACKEND,
+    LOCAL_BACKEND,
+    load_storage_settings,
+)
+
+
+@lru_cache(maxsize=1)
+def get_storage_backend() -> StorageBackend:
+    """Build the backend selected by ``FILE_STORAGE_BACKEND`` (lazy-imported)."""
+    settings = load_storage_settings()
+
+    if settings.backend == AZURE_BACKEND:
+        if not settings.azure_connection_string or not settings.azure_container:
+            raise ValueError(
+                "Azure storage requires AZURE_STORAGE_CONNECTION_STRING and "
+                "AZURE_STORAGE_CONTAINER."
+            )
+        from app.file_storage.backends.azure import AzureBlobBackend
+
+        return AzureBlobBackend(
+            connection_string=settings.azure_connection_string,
+            container=settings.azure_container,
+        )
+
+    if settings.backend == LOCAL_BACKEND:
+        from app.file_storage.backends.local import LocalFileBackend
+
+        return LocalFileBackend(settings.local_root)
+
+    raise ValueError(f"Unknown FILE_STORAGE_BACKEND: {settings.backend!r}")
--- a/surfsense_backend/app/file_storage/keys.py
+++ b/surfsense_backend/app/file_storage/keys.py
@ -0,0 +1,27 @@
+"""Object-key construction for stored document files."""
+
+from __future__ import annotations
+
+import os
+import uuid
+
+from app.file_storage.persistence.enums import DocumentFileKind
+
+
+def build_document_file_key(
+    *,
+    search_space_id: int,
+    document_id: int,
+    kind: DocumentFileKind,
+    filename: str,
+) -> str:
+    """Build the storage key for one document file.
+
+    Shape: ``documents/{search_space_id}/{document_id}/{kind}/{uuid}{ext}``.
+    """
+    extension = os.path.splitext(filename)[1].lower()
+    unique = uuid.uuid4().hex
+    return (
+        f"documents/{search_space_id}/{document_id}/"
+        f"{kind.value.lower()}/{unique}{extension}"
+    )
--- a/surfsense_backend/app/file_storage/persistence/init.py
+++ b/surfsense_backend/app/file_storage/persistence/init.py
@ -0,0 +1,11 @@
+"""Models and enums for the document file-storage tables."""
+
+from __future__ import annotations
+
+from .enums import DocumentFileKind
+from .models import DocumentFile
+
+__all__ = [
+    "DocumentFile",
+    "DocumentFileKind",
+]
--- a/surfsense_backend/app/file_storage/persistence/enums.py
+++ b/surfsense_backend/app/file_storage/persistence/enums.py
@ -0,0 +1,11 @@
+"""DocumentFile kinds: the original upload plus future derived artifacts."""
+
+from __future__ import annotations
+
+from enum import StrEnum
+
+
+class DocumentFileKind(StrEnum):
+    ORIGINAL = "ORIGINAL"
+    REDACTED = "REDACTED"
+    FILLED_FORM = "FILLED_FORM"
--- a/surfsense_backend/app/file_storage/persistence/models.py
+++ b/surfsense_backend/app/file_storage/persistence/models.py
@ -0,0 +1,66 @@
+"""``document_files`` table: durable blobs associated with a document."""
+
+from __future__ import annotations
+
+from sqlalchemy import (
+    BigInteger,
+    Column,
+    Enum as SQLAlchemyEnum,
+    ForeignKey,
+    Integer,
+    String,
+)
+from sqlalchemy.dialects.postgresql import UUID
+from sqlalchemy.orm import relationship
+
+from app.db import BaseModel, TimestampMixin
+
+from .enums import DocumentFileKind
+
+
+class DocumentFile(BaseModel, TimestampMixin):
+    """One stored file for a document (its original upload, or a derived copy)."""
+
+    __tablename__ = "document_files"
+
+    document_id = Column(
+        Integer,
+        ForeignKey("documents.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    search_space_id = Column(
+        Integer,
+        ForeignKey("searchspaces.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    kind = Column(
+        SQLAlchemyEnum(
+            DocumentFileKind,
+            name="document_file_kind",
+            values_callable=lambda x: [e.value for e in x],
+        ),
+        nullable=False,
+        default=DocumentFileKind.ORIGINAL,
+        server_default=DocumentFileKind.ORIGINAL.value,
+        index=True,
+    )
+
+    # Where the bytes live: the backend that stored them and its object key.
+    storage_backend = Column(String(32), nullable=False)
+    storage_key = Column(String, nullable=False)
+
+    original_filename = Column(String, nullable=False)
+    mime_type = Column(String, nullable=True)
+    size_bytes = Column(BigInteger, nullable=False)
+    checksum_sha256 = Column(String(64), nullable=True)
+
+    created_by_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("user.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+
+    document = relationship("Document", back_populates="files")
--- a/surfsense_backend/app/file_storage/schemas.py
+++ b/surfsense_backend/app/file_storage/schemas.py
@ -0,0 +1,23 @@
+"""API shapes for document file metadata."""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from pydantic import BaseModel, ConfigDict
+
+from app.file_storage.persistence.enums import DocumentFileKind
+
+
+class DocumentFileRead(BaseModel):
+    """Lightweight metadata for one stored document file (no bytes)."""
+
+    id: int
+    document_id: int
+    kind: DocumentFileKind
+    original_filename: str
+    mime_type: str | None = None
+    size_bytes: int
+    created_at: datetime
+
+    model_config = ConfigDict(from_attributes=True)
--- a/surfsense_backend/app/file_storage/service.py
+++ b/surfsense_backend/app/file_storage/service.py
@ -0,0 +1,129 @@
+"""Application service: persist, locate, and remove a document's stored files.
+
+Coordinates the storage backend (bytes) with the ``document_files`` table
+(metadata). Callers own the surrounding DB transaction/commit.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import logging
+from collections.abc import AsyncIterator, Sequence
+from uuid import UUID
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.file_storage.backends.base import StorageBackend
+from app.file_storage.factory import get_storage_backend
+from app.file_storage.keys import build_document_file_key
+from app.file_storage.persistence.enums import DocumentFileKind
+from app.file_storage.persistence.models import DocumentFile
+
+logger = logging.getLogger(__name__)
+
+
+async def store_document_file(
+    session: AsyncSession,
+    *,
+    document_id: int,
+    search_space_id: int,
+    data: bytes,
+    filename: str,
+    mime_type: str | None = None,
+    kind: DocumentFileKind = DocumentFileKind.ORIGINAL,
+    created_by_id: str | UUID | None = None,
+    backend: StorageBackend | None = None,
+) -> DocumentFile:
+    """Write bytes to storage and add a ``DocumentFile`` row to the session."""
+    backend = backend or get_storage_backend()
+    key = build_document_file_key(
+        search_space_id=search_space_id,
+        document_id=document_id,
+        kind=kind,
+        filename=filename,
+    )
+    await backend.put(key, data, content_type=mime_type)
+
+    record = DocumentFile(
+        document_id=document_id,
+        search_space_id=search_space_id,
+        kind=kind,
+        storage_backend=backend.backend_name,
+        storage_key=key,
+        original_filename=filename,
+        mime_type=mime_type,
+        size_bytes=len(data),
+        checksum_sha256=hashlib.sha256(data).hexdigest(),
+        created_by_id=created_by_id,
+    )
+    session.add(record)
+    return record
+
+
+async def list_document_files(
+    session: AsyncSession, *, document_id: int
+) -> list[DocumentFile]:
+    """Return all stored files for a document, newest first."""
+    result = await session.execute(
+        select(DocumentFile)
+        .where(DocumentFile.document_id == document_id)
+        .order_by(DocumentFile.created_at.desc())
+    )
+    return list(result.scalars().all())
+
+
+async def get_document_file(
+    session: AsyncSession,
+    *,
+    document_id: int,
+    kind: DocumentFileKind = DocumentFileKind.ORIGINAL,
+) -> DocumentFile | None:
+    """Return the most recent stored file of ``kind`` for a document."""
+    result = await session.execute(
+        select(DocumentFile)
+        .where(
+            DocumentFile.document_id == document_id,
+            DocumentFile.kind == kind,
+        )
+        .order_by(DocumentFile.created_at.desc())
+    )
+    return result.scalars().first()
+
+
+def open_document_file_stream(
+    record: DocumentFile, *, backend: StorageBackend | None = None
+) -> AsyncIterator[bytes]:
+    """Open a chunked byte stream for a stored file."""
+    backend = backend or get_storage_backend()
+    return backend.open_stream(record.storage_key)
+
+
+async def purge_document_blobs(
+    session: AsyncSession,
+    *,
+    document_ids: Sequence[int],
+    backend: StorageBackend | None = None,
+) -> None:
+    """Delete stored blobs for the given documents.
+
+    Call this before the ``document_files`` rows are removed (they cascade with
+    the document). Best-effort: a failed blob delete is logged, not raised, so
+    document deletion is never blocked by an orphaned blob.
+    """
+    if not document_ids:
+        return
+
+    backend = backend or get_storage_backend()
+    result = await session.execute(
+        select(DocumentFile.storage_key).where(
+            DocumentFile.document_id.in_(document_ids)
+        )
+    )
+    for storage_key in result.scalars().all():
+        try:
+            await backend.delete(storage_key)
+        except Exception as delete_error:
+            logger.warning(
+                "Failed to delete stored blob %s: %s", storage_key, delete_error
+            )
--- a/surfsense_backend/app/file_storage/settings.py
+++ b/surfsense_backend/app/file_storage/settings.py
@ -0,0 +1,37 @@
+"""Environment-driven configuration for the file-storage module."""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from pathlib import Path
+
+LOCAL_BACKEND = "local"
+AZURE_BACKEND = "azure"
+
+# surfsense_backend/ — two levels up from app/file_storage/settings.py
+_BACKEND_ROOT = Path(__file__).resolve().parents[2]
+_DEFAULT_LOCAL_ROOT = str(_BACKEND_ROOT / ".local_object_store")
+
+
+@dataclass(frozen=True)
+class StorageSettings:
+    """Resolved storage configuration for the current process."""
+
+    backend: str
+    azure_connection_string: str | None
+    azure_container: str | None
+    local_root: str
+
+
+def load_storage_settings() -> StorageSettings:
+    """Read storage settings from the environment.
+
+    Defaults to the ``local`` backend so development needs no cloud creds.
+    """
+    return StorageSettings(
+        backend=os.getenv("FILE_STORAGE_BACKEND", LOCAL_BACKEND).strip().lower(),
+        azure_connection_string=os.getenv("AZURE_STORAGE_CONNECTION_STRING"),
+        azure_container=os.getenv("AZURE_STORAGE_CONTAINER"),
+        local_root=os.getenv("FILE_STORAGE_LOCAL_PATH", _DEFAULT_LOCAL_ROOT),
+    )
--- a/surfsense_backend/app/routes/init.py
+++ b/surfsense_backend/app/routes/init.py
@ -1,6 +1,7 @@
 from fastapi import APIRouter

 from app.automations.api import router as automations_router
+from app.file_storage.api import router as file_storage_router

 from .agent_action_log_route import router as agent_action_log_router
 from .agent_flags_route import router as agent_flags_router
@ -126,3 +127,4 @@ router.include_router(prompts_router)
 router.include_router(memory_router)  # User personal memory (memory.md style)
 router.include_router(team_memory_router)  # Search-space team memory
 router.include_router(automations_router)  # Automations CRUD + run history
+router.include_router(file_storage_router)  # Original file metadata + download
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@ -44,10 +44,12 @@ except RuntimeError as e:
    print("Error setting event loop policy", e)
    pass

+import logging
 import os

 os.environ["UNSTRUCTURED_HAS_PATCHED_LOOP"] = "1"

+logger = logging.getLogger(__name__)

 router = APIRouter()

@ -142,9 +144,11 @@ async def create_documents_file_upload(
    import os
    import tempfile
    from datetime import datetime
+    from pathlib import Path

    from app.db import DocumentStatus
    from app.etl_pipeline.etl_document import ProcessingMode
+    from app.file_storage.service import store_document_file
    from app.tasks.document_processors.base import (
        check_document_by_unique_identifier,
        get_current_timestamp,
@ -175,11 +179,12 @@ async def create_documents_file_upload(
                )

        # ===== Read all files concurrently to avoid blocking the event loop =====
-        async def _read_and_save(file: UploadFile) -> tuple[str, str, int]:
+        async def _read_and_save(file: UploadFile) -> tuple[str, str, int, str | None]:
            """Read upload content and write to temp file off the event loop."""
            content = await file.read()
            file_size = len(content)
            filename = file.filename or "unknown"
+            content_type = file.content_type

            if file_size > MAX_FILE_SIZE_BYTES:
                raise HTTPException(
@ -196,17 +201,18 @@ async def create_documents_file_upload(
                    return tmp.name

            temp_path = await asyncio.to_thread(_write_temp)
-            return temp_path, filename, file_size
+            return temp_path, filename, file_size, content_type

        saved_files = await asyncio.gather(*(_read_and_save(f) for f in files))

        # ===== PHASE 1: Create pending documents for all files =====
        created_documents: list[Document] = []
-        files_to_process: list[tuple[Document, str, str]] = []
+        # (document, temp_path, filename, content_type)
+        files_to_process: list[tuple[Document, str, str, str | None]] = []
        skipped_duplicates = 0
        duplicate_document_ids: list[int] = []

-        for temp_path, filename, file_size in saved_files:
+        for temp_path, filename, file_size, content_type in saved_files:
            try:
                unique_identifier_hash = generate_unique_identifier_hash(
                    DocumentType.FILE, filename, search_space_id
@ -231,7 +237,9 @@ async def create_documents_file_upload(
                    }
                    existing.updated_at = get_current_timestamp()
                    created_documents.append(existing)
-                    files_to_process.append((existing, temp_path, filename))
+                    files_to_process.append(
+                        (existing, temp_path, filename, content_type)
+                    )
                    continue

                document = Document(
@ -253,7 +261,7 @@ async def create_documents_file_upload(
                )
                session.add(document)
                created_documents.append(document)
-                files_to_process.append((document, temp_path, filename))
+                files_to_process.append((document, temp_path, filename, content_type))

            except HTTPException:
                raise
@ -269,8 +277,32 @@ async def create_documents_file_upload(
            for doc in created_documents:
                await session.refresh(doc)

+        # ===== PHASE 1.5: Persist the original uploads to durable storage =====
+        # Best-effort: a storage failure must not block parsing or the response.
+        for document, temp_path, filename, content_type in files_to_process:
+            try:
+                original_bytes = await asyncio.to_thread(
+                    lambda p=temp_path: Path(p).read_bytes()
+                )
+                await store_document_file(
+                    session,
+                    document_id=document.id,
+                    search_space_id=search_space_id,
+                    data=original_bytes,
+                    filename=filename,
+                    mime_type=content_type,
+                    created_by_id=str(user.id),
+                )
+            except Exception as storage_error:
+                logger.warning(
+                    "Failed to store original upload for document %s: %s",
+                    document.id,
+                    storage_error,
+                )
+        await session.commit()
+
        # ===== PHASE 2: Dispatch tasks for each file =====
-        for document, temp_path, filename in files_to_process:
+        for document, temp_path, filename, _content_type in files_to_process:
            await dispatcher.dispatch_file_processing(
                document_id=document.id,
                temp_path=temp_path,
--- a/surfsense_backend/app/services/jira/init.py
+++ b/surfsense_backend/app/services/jira/init.py
@ -1,13 +0,0 @@
-from app.services.jira.kb_sync_service import JiraKBSyncService
-from app.services.jira.tool_metadata_service import (
-    JiraIssue,
-    JiraToolMetadataService,
-    JiraWorkspace,
-)
-
-__all__ = [
-    "JiraIssue",
-    "JiraKBSyncService",
-    "JiraToolMetadataService",
-    "JiraWorkspace",
-]
--- a/surfsense_backend/app/services/jira/kb_sync_service.py
+++ b/surfsense_backend/app/services/jira/kb_sync_service.py
@ -1,257 +0,0 @@
-import asyncio
-import logging
-from datetime import datetime
-
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.connectors.jira_history import JiraHistoryConnector
-from app.db import Document, DocumentType
-from app.utils.document_converters import (
-    create_document_chunks,
-    embed_text,
-    generate_content_hash,
-    generate_document_summary,
-    generate_unique_identifier_hash,
-)
-
-logger = logging.getLogger(__name__)
-
-
-class JiraKBSyncService:
-    """Syncs Jira issue documents to the knowledge base after HITL actions."""
-
-    def __init__(self, db_session: AsyncSession):
-        self.db_session = db_session
-
-    async def sync_after_create(
-        self,
-        issue_id: str,
-        issue_identifier: str,
-        issue_title: str,
-        description: str | None,
-        state: str | None,
-        connector_id: int,
-        search_space_id: int,
-        user_id: str,
-    ) -> dict:
-        from app.tasks.connector_indexers.base import (
-            check_document_by_unique_identifier,
-            check_duplicate_document_by_hash,
-            get_current_timestamp,
-            safe_set_chunks,
-        )
-
-        try:
-            unique_hash = generate_unique_identifier_hash(
-                DocumentType.JIRA_CONNECTOR, issue_id, search_space_id
-            )
-
-            existing = await check_document_by_unique_identifier(
-                self.db_session, unique_hash
-            )
-            if existing:
-                logger.info(
-                    "Document for Jira issue %s already exists (doc_id=%s), skipping",
-                    issue_identifier,
-                    existing.id,
-                )
-                return {"status": "success"}
-
-            indexable_content = (description or "").strip()
-            if not indexable_content:
-                indexable_content = f"Jira Issue {issue_identifier}: {issue_title}"
-
-            issue_content = (
-                f"# {issue_identifier}: {issue_title}\n\n{indexable_content}"
-            )
-
-            content_hash = generate_content_hash(issue_content, search_space_id)
-
-            with self.db_session.no_autoflush:
-                dup = await check_duplicate_document_by_hash(
-                    self.db_session, content_hash
-                )
-            if dup:
-                content_hash = unique_hash
-
-            from app.services.llm_service import get_user_long_context_llm
-
-            user_llm = await get_user_long_context_llm(
-                self.db_session,
-                user_id,
-                search_space_id,
-                disable_streaming=True,
-            )
-
-            doc_metadata_for_summary = {
-                "issue_id": issue_identifier,
-                "issue_title": issue_title,
-                "document_type": "Jira Issue",
-                "connector_type": "Jira",
-            }
-
-            if user_llm:
-                summary_content, summary_embedding = await generate_document_summary(
-                    issue_content, user_llm, doc_metadata_for_summary
-                )
-            else:
-                summary_content = (
-                    f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
-                )
-                summary_embedding = await asyncio.to_thread(embed_text, summary_content)
-
-            chunks = await create_document_chunks(issue_content)
-            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
-            document = Document(
-                title=f"{issue_identifier}: {issue_title}",
-                document_type=DocumentType.JIRA_CONNECTOR,
-                document_metadata={
-                    "issue_id": issue_id,
-                    "issue_identifier": issue_identifier,
-                    "issue_title": issue_title,
-                    "state": state or "Unknown",
-                    "indexed_at": now_str,
-                    "connector_id": connector_id,
-                },
-                content=summary_content,
-                content_hash=content_hash,
-                unique_identifier_hash=unique_hash,
-                embedding=summary_embedding,
-                search_space_id=search_space_id,
-                connector_id=connector_id,
-                updated_at=get_current_timestamp(),
-                created_by_id=user_id,
-            )
-
-            self.db_session.add(document)
-            await self.db_session.flush()
-            await safe_set_chunks(self.db_session, document, chunks)
-            await self.db_session.commit()
-
-            logger.info(
-                "KB sync after create succeeded: doc_id=%s, issue=%s",
-                document.id,
-                issue_identifier,
-            )
-            return {"status": "success"}
-
-        except Exception as e:
-            error_str = str(e).lower()
-            if (
-                "duplicate key value violates unique constraint" in error_str
-                or "uniqueviolationerror" in error_str
-            ):
-                await self.db_session.rollback()
-                return {"status": "error", "message": "Duplicate document detected"}
-
-            logger.error(
-                "KB sync after create failed for issue %s: %s",
-                issue_identifier,
-                e,
-                exc_info=True,
-            )
-            await self.db_session.rollback()
-            return {"status": "error", "message": str(e)}
-
-    async def sync_after_update(
-        self,
-        document_id: int,
-        issue_id: str,
-        user_id: str,
-        search_space_id: int,
-    ) -> dict:
-        from app.tasks.connector_indexers.base import (
-            get_current_timestamp,
-            safe_set_chunks,
-        )
-
-        try:
-            document = await self.db_session.get(Document, document_id)
-            if not document:
-                return {"status": "not_indexed"}
-
-            connector_id = document.connector_id
-            if not connector_id:
-                return {"status": "error", "message": "Document has no connector_id"}
-
-            jira_history = JiraHistoryConnector(
-                session=self.db_session, connector_id=connector_id
-            )
-            jira_client = await jira_history._get_jira_client()
-            issue_raw = await asyncio.to_thread(jira_client.get_issue, issue_id)
-            formatted = jira_client.format_issue(issue_raw)
-            issue_content = jira_client.format_issue_to_markdown(formatted)
-
-            if not issue_content:
-                return {"status": "error", "message": "Issue produced empty content"}
-
-            issue_identifier = formatted.get("key", "")
-            issue_title = formatted.get("title", "")
-            state = formatted.get("status", "Unknown")
-            comment_count = len(formatted.get("comments", []))
-
-            from app.services.llm_service import get_user_long_context_llm
-
-            user_llm = await get_user_long_context_llm(
-                self.db_session, user_id, search_space_id, disable_streaming=True
-            )
-
-            if user_llm:
-                doc_meta = {
-                    "issue_key": issue_identifier,
-                    "issue_title": issue_title,
-                    "status": state,
-                    "document_type": "Jira Issue",
-                    "connector_type": "Jira",
-                }
-                summary_content, summary_embedding = await generate_document_summary(
-                    issue_content, user_llm, doc_meta
-                )
-            else:
-                summary_content = (
-                    f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
-                )
-                summary_embedding = await asyncio.to_thread(embed_text, summary_content)
-
-            chunks = await create_document_chunks(issue_content)
-
-            document.title = f"{issue_identifier}: {issue_title}"
-            document.content = summary_content
-            document.content_hash = generate_content_hash(
-                issue_content, search_space_id
-            )
-            document.embedding = summary_embedding
-
-            from sqlalchemy.orm.attributes import flag_modified
-
-            document.document_metadata = {
-                **(document.document_metadata or {}),
-                "issue_id": issue_id,
-                "issue_identifier": issue_identifier,
-                "issue_title": issue_title,
-                "state": state,
-                "comment_count": comment_count,
-                "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                "connector_id": connector_id,
-            }
-            flag_modified(document, "document_metadata")
-            await safe_set_chunks(self.db_session, document, chunks)
-            document.updated_at = get_current_timestamp()
-
-            await self.db_session.commit()
-
-            logger.info(
-                "KB sync successful for document %s (%s: %s)",
-                document_id,
-                issue_identifier,
-                issue_title,
-            )
-            return {"status": "success"}
-
-        except Exception as e:
-            logger.error(
-                "KB sync failed for document %s: %s", document_id, e, exc_info=True
-            )
-            await self.db_session.rollback()
-            return {"status": "error", "message": str(e)}
--- a/surfsense_backend/app/services/jira/tool_metadata_service.py
+++ b/surfsense_backend/app/services/jira/tool_metadata_service.py
@ -1,332 +0,0 @@
-import asyncio
-import logging
-from dataclasses import dataclass
-
-from sqlalchemy import and_, func, or_
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.future import select
-from sqlalchemy.orm.attributes import flag_modified
-
-from app.connectors.jira_history import JiraHistoryConnector
-from app.db import (
-    Document,
-    DocumentType,
-    SearchSourceConnector,
-    SearchSourceConnectorType,
-)
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class JiraWorkspace:
-    """Represents a Jira connector as a workspace for tool context."""
-
-    id: int
-    name: str
-    base_url: str
-
-    @classmethod
-    def from_connector(cls, connector: SearchSourceConnector) -> "JiraWorkspace":
-        return cls(
-            id=connector.id,
-            name=connector.name,
-            base_url=connector.config.get("base_url", ""),
-        )
-
-    def to_dict(self) -> dict:
-        return {
-            "id": self.id,
-            "name": self.name,
-            "base_url": self.base_url,
-        }
-
-
-@dataclass
-class JiraIssue:
-    """Represents an indexed Jira issue resolved from the knowledge base."""
-
-    issue_id: str
-    issue_identifier: str
-    issue_title: str
-    state: str
-    connector_id: int
-    document_id: int
-    indexed_at: str | None
-
-    @classmethod
-    def from_document(cls, document: Document) -> "JiraIssue":
-        meta = document.document_metadata or {}
-        return cls(
-            issue_id=meta.get("issue_id", ""),
-            issue_identifier=meta.get("issue_identifier", ""),
-            issue_title=meta.get("issue_title", document.title),
-            state=meta.get("state", ""),
-            connector_id=document.connector_id,
-            document_id=document.id,
-            indexed_at=meta.get("indexed_at"),
-        )
-
-    def to_dict(self) -> dict:
-        return {
-            "issue_id": self.issue_id,
-            "issue_identifier": self.issue_identifier,
-            "issue_title": self.issue_title,
-            "state": self.state,
-            "connector_id": self.connector_id,
-            "document_id": self.document_id,
-            "indexed_at": self.indexed_at,
-        }
-
-
-class JiraToolMetadataService:
-    """Builds interrupt context for Jira HITL tools."""
-
-    def __init__(self, db_session: AsyncSession):
-        self._db_session = db_session
-
-    async def _check_account_health(self, connector: SearchSourceConnector) -> bool:
-        """Check if the Jira connector auth is still valid.
-
-        Returns True if auth is expired/invalid, False if healthy.
-        """
-        try:
-            jira_history = JiraHistoryConnector(
-                session=self._db_session, connector_id=connector.id
-            )
-            jira_client = await jira_history._get_jira_client()
-            await asyncio.to_thread(jira_client.get_myself)
-            return False
-        except Exception as e:
-            logger.warning("Jira connector %s health check failed: %s", connector.id, e)
-            try:
-                connector.config = {**connector.config, "auth_expired": True}
-                flag_modified(connector, "config")
-                await self._db_session.commit()
-                await self._db_session.refresh(connector)
-            except Exception:
-                logger.warning(
-                    "Failed to persist auth_expired for connector %s",
-                    connector.id,
-                    exc_info=True,
-                )
-            return True
-
-    async def get_creation_context(self, search_space_id: int, user_id: str) -> dict:
-        """Return context needed to create a new Jira issue.
-
-        Fetches all connected Jira accounts, and for the first healthy one
-        fetches projects, issue types, and priorities.
-        """
-        connectors = await self._get_all_jira_connectors(search_space_id, user_id)
-        if not connectors:
-            return {"error": "No Jira account connected"}
-
-        accounts = []
-        projects = []
-        issue_types = []
-        priorities = []
-        fetched_context = False
-
-        for connector in connectors:
-            auth_expired = await self._check_account_health(connector)
-            workspace = JiraWorkspace.from_connector(connector)
-            account_info = {
-                **workspace.to_dict(),
-                "auth_expired": auth_expired,
-            }
-            accounts.append(account_info)
-
-            if not auth_expired and not fetched_context:
-                try:
-                    jira_history = JiraHistoryConnector(
-                        session=self._db_session, connector_id=connector.id
-                    )
-                    jira_client = await jira_history._get_jira_client()
-                    raw_projects = await asyncio.to_thread(jira_client.get_projects)
-                    projects = [
-                        {"id": p.get("id"), "key": p.get("key"), "name": p.get("name")}
-                        for p in raw_projects
-                    ]
-                    raw_types = await asyncio.to_thread(jira_client.get_issue_types)
-                    seen_type_names: set[str] = set()
-                    issue_types = []
-                    for t in raw_types:
-                        if t.get("subtask", False):
-                            continue
-                        name = t.get("name")
-                        if name not in seen_type_names:
-                            seen_type_names.add(name)
-                            issue_types.append({"id": t.get("id"), "name": name})
-                    raw_priorities = await asyncio.to_thread(jira_client.get_priorities)
-                    priorities = [
-                        {"id": p.get("id"), "name": p.get("name")}
-                        for p in raw_priorities
-                    ]
-                    fetched_context = True
-                except Exception as e:
-                    logger.warning(
-                        "Failed to fetch Jira context for connector %s: %s",
-                        connector.id,
-                        e,
-                    )
-
-        return {
-            "accounts": accounts,
-            "projects": projects,
-            "issue_types": issue_types,
-            "priorities": priorities,
-        }
-
-    async def get_update_context(
-        self, search_space_id: int, user_id: str, issue_ref: str
-    ) -> dict:
-        """Return context needed to update an indexed Jira issue.
-
-        Resolves the issue from the KB, then fetches current details from the Jira API.
-        """
-        document = await self._resolve_issue(search_space_id, user_id, issue_ref)
-        if not document:
-            return {
-                "error": f"Issue '{issue_ref}' not found in your synced Jira issues. "
-                "Please make sure the issue is indexed in your knowledge base."
-            }
-
-        connector = await self._get_connector_for_document(document, user_id)
-        if not connector:
-            return {"error": "Connector not found or access denied"}
-
-        auth_expired = await self._check_account_health(connector)
-        if auth_expired:
-            return {
-                "error": "Jira authentication has expired. Please re-authenticate.",
-                "auth_expired": True,
-                "connector_id": connector.id,
-            }
-
-        workspace = JiraWorkspace.from_connector(connector)
-        issue = JiraIssue.from_document(document)
-
-        try:
-            jira_history = JiraHistoryConnector(
-                session=self._db_session, connector_id=connector.id
-            )
-            jira_client = await jira_history._get_jira_client()
-            issue_data = await asyncio.to_thread(jira_client.get_issue, issue.issue_id)
-            formatted = jira_client.format_issue(issue_data)
-        except Exception as e:
-            error_str = str(e).lower()
-            if (
-                "401" in error_str
-                or "403" in error_str
-                or "authentication" in error_str
-            ):
-                return {
-                    "error": f"Failed to fetch Jira issue: {e!s}",
-                    "auth_expired": True,
-                    "connector_id": connector.id,
-                }
-            return {"error": f"Failed to fetch Jira issue: {e!s}"}
-
-        return {
-            "account": {**workspace.to_dict(), "auth_expired": False},
-            "issue": {
-                "issue_id": formatted.get("key", issue.issue_id),
-                "issue_identifier": formatted.get("key", issue.issue_identifier),
-                "issue_title": formatted.get("title", issue.issue_title),
-                "state": formatted.get("status", "Unknown"),
-                "priority": formatted.get("priority", "Unknown"),
-                "issue_type": formatted.get("issue_type", "Unknown"),
-                "assignee": formatted.get("assignee"),
-                "description": formatted.get("description"),
-                "project": formatted.get("project", ""),
-                "document_id": issue.document_id,
-                "indexed_at": issue.indexed_at,
-            },
-        }
-
-    async def get_deletion_context(
-        self, search_space_id: int, user_id: str, issue_ref: str
-    ) -> dict:
-        """Return context needed to delete a Jira issue (KB metadata only, no API call)."""
-        document = await self._resolve_issue(search_space_id, user_id, issue_ref)
-        if not document:
-            return {
-                "error": f"Issue '{issue_ref}' not found in your synced Jira issues. "
-                "Please make sure the issue is indexed in your knowledge base."
-            }
-
-        connector = await self._get_connector_for_document(document, user_id)
-        if not connector:
-            return {"error": "Connector not found or access denied"}
-
-        auth_expired = connector.config.get("auth_expired", False)
-        workspace = JiraWorkspace.from_connector(connector)
-        issue = JiraIssue.from_document(document)
-
-        return {
-            "account": {**workspace.to_dict(), "auth_expired": auth_expired},
-            "issue": issue.to_dict(),
-        }
-
-    async def _resolve_issue(
-        self, search_space_id: int, user_id: str, issue_ref: str
-    ) -> Document | None:
-        """Resolve an issue from KB: issue_identifier -> issue_title -> document.title."""
-        ref_lower = issue_ref.lower()
-
-        result = await self._db_session.execute(
-            select(Document)
-            .join(
-                SearchSourceConnector, Document.connector_id == SearchSourceConnector.id
-            )
-            .filter(
-                and_(
-                    Document.search_space_id == search_space_id,
-                    Document.document_type == DocumentType.JIRA_CONNECTOR,
-                    SearchSourceConnector.user_id == user_id,
-                    or_(
-                        func.lower(
-                            Document.document_metadata.op("->>")("issue_identifier")
-                        )
-                        == ref_lower,
-                        func.lower(Document.document_metadata.op("->>")("issue_title"))
-                        == ref_lower,
-                        func.lower(Document.title) == ref_lower,
-                    ),
-                )
-            )
-            .order_by(Document.updated_at.desc().nullslast())
-            .limit(1)
-        )
-        return result.scalars().first()
-
-    async def _get_all_jira_connectors(
-        self, search_space_id: int, user_id: str
-    ) -> list[SearchSourceConnector]:
-        result = await self._db_session.execute(
-            select(SearchSourceConnector).filter(
-                and_(
-                    SearchSourceConnector.search_space_id == search_space_id,
-                    SearchSourceConnector.user_id == user_id,
-                    SearchSourceConnector.connector_type
-                    == SearchSourceConnectorType.JIRA_CONNECTOR,
-                )
-            )
-        )
-        return result.scalars().all()
-
-    async def _get_connector_for_document(
-        self, document: Document, user_id: str
-    ) -> SearchSourceConnector | None:
-        if not document.connector_id:
-            return None
-        result = await self._db_session.execute(
-            select(SearchSourceConnector).filter(
-                and_(
-                    SearchSourceConnector.id == document.connector_id,
-                    SearchSourceConnector.user_id == user_id,
-                )
-            )
-        )
-        return result.scalars().first()
--- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
@ -118,6 +118,7 @@ async def _delete_document_background(document_id: int) -> None:
    from sqlalchemy import delete as sa_delete, select

    from app.db import Chunk, Document
+    from app.file_storage.service import purge_document_blobs

    async with get_celery_session_maker()() as session:
        batch_size = 500
@ -133,6 +134,9 @@ async def _delete_document_background(document_id: int) -> None:
            await session.execute(sa_delete(Chunk).where(Chunk.id.in_(chunk_ids)))
            await session.commit()

+        # Remove stored blobs before the document_files rows cascade away.
+        await purge_document_blobs(session, document_ids=[document_id])
+
        doc = await session.get(Document, document_id)
        if doc:
            await session.delete(doc)
@ -166,6 +170,7 @@ async def _delete_folder_documents(
    from sqlalchemy import delete as sa_delete, select

    from app.db import Chunk, Document, Folder
+    from app.file_storage.service import purge_document_blobs

    async with get_celery_session_maker()() as session:
        batch_size = 500
@ -182,6 +187,9 @@ async def _delete_folder_documents(
                await session.execute(sa_delete(Chunk).where(Chunk.id.in_(chunk_ids)))
                await session.commit()

+            # Remove stored blobs before the document_files rows cascade away.
+            await purge_document_blobs(session, document_ids=[doc_id])
+
            doc = await session.get(Document, doc_id)
            if doc:
                await session.delete(doc)
@ -214,6 +222,7 @@ async def _delete_search_space_background(search_space_id: int) -> None:
    from sqlalchemy import delete as sa_delete, select

    from app.db import Chunk, Document, SearchSpace
+    from app.file_storage.service import purge_document_blobs

    async with get_celery_session_maker()() as session:
        batch_size = 500
@ -240,6 +249,8 @@ async def _delete_search_space_background(search_space_id: int) -> None:
            doc_ids = doc_ids_result.scalars().all()
            if not doc_ids:
                break
+            # Remove stored blobs before the document_files rows cascade away.
+            await purge_document_blobs(session, document_ids=list(doc_ids))
            await session.execute(sa_delete(Document).where(Document.id.in_(doc_ids)))
            await session.commit()

--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@ -1,364 +0,0 @@
-"""Jira connector indexer using the unified parallel indexing pipeline."""
-
-import contextlib
-from collections.abc import Awaitable, Callable
-
-from sqlalchemy.exc import SQLAlchemyError
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.connectors.jira_history import JiraHistoryConnector
-from app.db import DocumentType, SearchSourceConnectorType
-from app.indexing_pipeline.connector_document import ConnectorDocument
-from app.indexing_pipeline.document_hashing import compute_content_hash
-from app.indexing_pipeline.indexing_pipeline_service import (
-    IndexingPipelineService,
-    PlaceholderInfo,
-)
-from app.services.llm_service import get_user_long_context_llm
-from app.services.task_logging_service import TaskLoggingService
-
-from .base import (
-    calculate_date_range,
-    check_duplicate_document_by_hash,
-    get_connector_by_id,
-    logger,
-    update_connector_last_indexed,
-)
-
-HeartbeatCallbackType = Callable[[int], Awaitable[None]]
-HEARTBEAT_INTERVAL_SECONDS = 30
-
-
-def _build_connector_doc(
-    issue: dict,
-    formatted_issue: dict,
-    issue_content: str,
-    *,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    enable_summary: bool,
-) -> ConnectorDocument:
-    """Map a raw Jira issue dict to a ConnectorDocument."""
-    issue_id = issue.get("key", "")
-    issue_identifier = issue.get("key", "")
-    issue_title = issue.get("id", "")
-    state = formatted_issue.get("status", "Unknown")
-    priority = formatted_issue.get("priority", "Unknown")
-    comment_count = len(formatted_issue.get("comments", []))
-
-    metadata = {
-        "issue_id": issue_id,
-        "issue_identifier": issue_identifier,
-        "issue_title": issue_title,
-        "state": state,
-        "priority": priority,
-        "comment_count": comment_count,
-        "connector_id": connector_id,
-        "document_type": "Jira Issue",
-        "connector_type": "Jira",
-    }
-
-    fallback_summary = (
-        f"Jira Issue {issue_identifier}: {issue_title}\n\n"
-        f"Status: {state}\n\n{issue_content}"
-    )
-
-    return ConnectorDocument(
-        title=f"{issue_identifier}: {issue_title}",
-        source_markdown=issue_content,
-        unique_id=issue_id,
-        document_type=DocumentType.JIRA_CONNECTOR,
-        search_space_id=search_space_id,
-        connector_id=connector_id,
-        created_by_id=user_id,
-        should_summarize=enable_summary,
-        fallback_summary=fallback_summary,
-        metadata=metadata,
-    )
-
-
-async def index_jira_issues(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-    on_heartbeat_callback: HeartbeatCallbackType | None = None,
-) -> tuple[int, int, str | None]:
-    """Index Jira issues and comments."""
-    task_logger = TaskLoggingService(session, search_space_id)
-    log_entry = await task_logger.log_task_start(
-        task_name="jira_issues_indexing",
-        source="connector_indexing_task",
-        message=f"Starting Jira issues indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    try:
-        connector = await get_connector_by_id(
-            session, connector_id, SearchSourceConnectorType.JIRA_CONNECTOR
-        )
-
-        if not connector:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return 0, 0, f"Connector with ID {connector_id} not found"
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing Jira client for connector {connector_id}",
-            {"stage": "client_initialization"},
-        )
-        jira_client = JiraHistoryConnector(session=session, connector_id=connector_id)
-
-        if start_date == "undefined" or start_date == "":
-            start_date = None
-        if end_date == "undefined" or end_date == "":
-            end_date = None
-
-        start_date_str, end_date_str = calculate_date_range(
-            connector, start_date, end_date, default_days_back=365
-        )
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Fetching Jira issues from {start_date_str} to {end_date_str}",
-            {
-                "stage": "fetching_issues",
-                "start_date": start_date_str,
-                "end_date": end_date_str,
-            },
-        )
-
-        try:
-            issues, error = await jira_client.get_issues_by_date_range(
-                start_date=start_date_str, end_date=end_date_str, include_comments=True
-            )
-
-            if error:
-                if "No issues found" in error:
-                    logger.info(f"No Jira issues found: {error}")
-                    if update_last_indexed:
-                        await update_connector_last_indexed(
-                            session, connector, update_last_indexed
-                        )
-                        await session.commit()
-                        logger.info(
-                            f"Updated last_indexed_at to {connector.last_indexed_at} despite no issues found"
-                        )
-
-                    await task_logger.log_task_success(
-                        log_entry,
-                        f"No Jira issues found in date range {start_date_str} to {end_date_str}",
-                        {"issues_found": 0},
-                    )
-                    await jira_client.close()
-                    return 0, 0, None
-                else:
-                    logger.error(f"Failed to get Jira issues: {error}")
-                    await task_logger.log_task_failure(
-                        log_entry,
-                        f"Failed to get Jira issues: {error}",
-                        "API Error",
-                        {"error_type": "APIError"},
-                    )
-                    await jira_client.close()
-                    return 0, 0, f"Failed to get Jira issues: {error}"
-
-            logger.info(f"Retrieved {len(issues)} issues from Jira API")
-
-        except Exception as e:
-            logger.error(f"Error fetching Jira issues: {e!s}", exc_info=True)
-            await jira_client.close()
-            return 0, 0, f"Error fetching Jira issues: {e!s}"
-
-        if not issues:
-            logger.info("No Jira issues found for the specified date range")
-            if update_last_indexed:
-                await update_connector_last_indexed(
-                    session, connector, update_last_indexed
-                )
-                await session.commit()
-            await jira_client.close()
-            return 0, 0, None
-
-        # ── Create placeholders for instant UI feedback ───────────────
-        pipeline = IndexingPipelineService(session)
-        placeholders = [
-            PlaceholderInfo(
-                title=f"{issue.get('key', '')}: {issue.get('id', '')}",
-                document_type=DocumentType.JIRA_CONNECTOR,
-                unique_id=issue.get("key", ""),
-                search_space_id=search_space_id,
-                connector_id=connector_id,
-                created_by_id=user_id,
-                metadata={
-                    "issue_id": issue.get("key", ""),
-                    "connector_id": connector_id,
-                    "connector_type": "Jira",
-                },
-            )
-            for issue in issues
-            if issue.get("key") and issue.get("id")
-        ]
-        await pipeline.create_placeholder_documents(placeholders)
-
-        connector_docs: list[ConnectorDocument] = []
-        documents_skipped = 0
-        duplicate_content_count = 0
-
-        for issue in issues:
-            try:
-                issue_id = issue.get("key")
-                issue_identifier = issue.get("key", "")
-                issue_title = issue.get("id", "")
-
-                if not issue_id or not issue_title:
-                    logger.warning(
-                        f"Skipping issue with missing ID or title: {issue_id or 'Unknown'}"
-                    )
-                    documents_skipped += 1
-                    continue
-
-                formatted_issue = jira_client.format_issue(issue)
-                issue_content = jira_client.format_issue_to_markdown(formatted_issue)
-
-                if not issue_content:
-                    logger.warning(
-                        f"Skipping issue with no content: {issue_identifier} - {issue_title}"
-                    )
-                    documents_skipped += 1
-                    continue
-
-                doc = _build_connector_doc(
-                    issue,
-                    formatted_issue,
-                    issue_content,
-                    connector_id=connector_id,
-                    search_space_id=search_space_id,
-                    user_id=user_id,
-                    enable_summary=connector.enable_summary,
-                )
-
-                with session.no_autoflush:
-                    duplicate_by_content = await check_duplicate_document_by_hash(
-                        session, compute_content_hash(doc)
-                    )
-
-                if duplicate_by_content:
-                    logger.info(
-                        f"Jira issue {issue_identifier} already indexed by another connector "
-                        f"(existing document ID: {duplicate_by_content.id}, "
-                        f"type: {duplicate_by_content.document_type}). Skipping."
-                    )
-                    duplicate_content_count += 1
-                    documents_skipped += 1
-                    continue
-
-                connector_docs.append(doc)
-
-            except Exception as e:
-                logger.error(
-                    f"Error building ConnectorDocument for issue {issue_identifier}: {e!s}",
-                    exc_info=True,
-                )
-                documents_skipped += 1
-                continue
-
-        await pipeline.migrate_legacy_docs(connector_docs)
-
-        async def _get_llm(s: AsyncSession):
-            return await get_user_long_context_llm(s, user_id, search_space_id)
-
-        _, documents_indexed, documents_failed = await pipeline.index_batch_parallel(
-            connector_docs,
-            _get_llm,
-            max_concurrency=3,
-            on_heartbeat=on_heartbeat_callback,
-            heartbeat_interval=HEARTBEAT_INTERVAL_SECONDS,
-        )
-
-        await update_connector_last_indexed(session, connector, update_last_indexed)
-
-        logger.info(f"Final commit: Total {documents_indexed} Jira issues processed")
-        try:
-            await session.commit()
-            logger.info("Successfully committed all JIRA document changes to database")
-        except Exception as e:
-            if (
-                "duplicate key value violates unique constraint" in str(e).lower()
-                or "uniqueviolationerror" in str(e).lower()
-            ):
-                logger.warning(
-                    f"Duplicate content_hash detected during final commit. "
-                    f"This may occur if the same issue was indexed by multiple connectors. "
-                    f"Rolling back and continuing. Error: {e!s}"
-                )
-                await session.rollback()
-                # Don't fail the entire task - some documents may have been successfully indexed
-            else:
-                raise
-
-        warning_parts = []
-        if duplicate_content_count > 0:
-            warning_parts.append(f"{duplicate_content_count} duplicate")
-        if documents_failed > 0:
-            warning_parts.append(f"{documents_failed} failed")
-        warning_message = ", ".join(warning_parts) if warning_parts else None
-
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed JIRA indexing for connector {connector_id}",
-            {
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-                "documents_failed": documents_failed,
-                "duplicate_content_count": duplicate_content_count,
-            },
-        )
-        logger.info(
-            f"JIRA indexing completed: {documents_indexed} ready, "
-            f"{documents_skipped} skipped, {documents_failed} failed "
-            f"({duplicate_content_count} duplicate content)"
-        )
-        await jira_client.close()
-        return documents_indexed, documents_skipped, warning_message
-
-    except SQLAlchemyError as db_error:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during JIRA indexing for connector {connector_id}",
-            str(db_error),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(f"Database error: {db_error!s}", exc_info=True)
-        if "jira_client" in locals():
-            with contextlib.suppress(Exception):
-                await jira_client.close()
-        return 0, 0, f"Database error: {db_error!s}"
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Failed to index JIRA issues for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(f"Failed to index JIRA issues: {e!s}", exc_info=True)
-        if "jira_client" in locals():
-            with contextlib.suppress(Exception):
-                await jira_client.close()
-        return 0, 0, f"Failed to index JIRA issues: {e!s}"
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@ -46,6 +46,7 @@ dependencies = [
    "redis>=5.2.1",
    "firecrawl-py>=4.9.0",
    "boto3>=1.35.0",
+    "azure-storage-blob>=12.23.0",
    "fake-useragent>=2.2.0",
    "trafilatura>=2.0.0",
    "fastapi-users[oauth,sqlalchemy]>=15.0.3",
--- a/surfsense_backend/tests/unit/connector_indexers/test_jira_parallel.py
+++ b/surfsense_backend/tests/unit/connector_indexers/test_jira_parallel.py
@ -1,387 +0,0 @@
-"""Tests for Jira indexer migrated to the unified parallel pipeline."""
-
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-import app.tasks.connector_indexers.jira_indexer as _mod
-from app.db import DocumentType
-from app.tasks.connector_indexers.jira_indexer import (
-    _build_connector_doc,
-    index_jira_issues,
-)
-
-pytestmark = pytest.mark.unit
-
-_USER_ID = "00000000-0000-0000-0000-000000000001"
-_CONNECTOR_ID = 42
-_SEARCH_SPACE_ID = 1
-
-
-def _make_issue(
-    issue_key: str = "ENG-1",
-    issue_id: str = "10001",
-    title: str = "Fix login",
-):
-    return {"key": issue_key, "id": issue_id, "title": title}
-
-
-def _make_formatted_issue(
-    issue_key: str = "ENG-1",
-    issue_id: str = "10001",
-    title: str = "Fix login",
-    status: str = "In Progress",
-    priority: str = "High",
-    comments=None,
-):
-    return {
-        "key": issue_key,
-        "id": issue_id,
-        "title": title,
-        "status": status,
-        "priority": priority,
-        "comments": comments or [],
-    }
-
-
-# ---------------------------------------------------------------------------
-# Slice 1: _build_connector_doc tracer bullet
-# ---------------------------------------------------------------------------
-
-
-async def test_build_connector_doc_produces_correct_fields():
-    issue = _make_issue(issue_key="ENG-42", issue_id="4242", title="Fix auth bug")
-    formatted = _make_formatted_issue(
-        issue_key="ENG-42",
-        issue_id="4242",
-        title="Fix auth bug",
-        status="Done",
-        priority="Urgent",
-        comments=[{"id": "c1"}],
-    )
-    markdown = "# ENG-42: Fix auth bug\n\nBody"
-
-    doc = _build_connector_doc(
-        issue,
-        formatted,
-        markdown,
-        connector_id=_CONNECTOR_ID,
-        search_space_id=_SEARCH_SPACE_ID,
-        user_id=_USER_ID,
-        enable_summary=True,
-    )
-
-    assert doc.title == "ENG-42: 4242"
-    assert doc.unique_id == "ENG-42"
-    assert doc.document_type == DocumentType.JIRA_CONNECTOR
-    assert doc.source_markdown == markdown
-    assert doc.search_space_id == _SEARCH_SPACE_ID
-    assert doc.connector_id == _CONNECTOR_ID
-    assert doc.created_by_id == _USER_ID
-    assert doc.should_summarize is True
-    assert doc.metadata["issue_id"] == "ENG-42"
-    assert doc.metadata["issue_identifier"] == "ENG-42"
-    assert doc.metadata["issue_title"] == "4242"
-    assert doc.metadata["state"] == "Done"
-    assert doc.metadata["priority"] == "Urgent"
-    assert doc.metadata["comment_count"] == 1
-    assert doc.metadata["connector_id"] == _CONNECTOR_ID
-    assert doc.metadata["document_type"] == "Jira Issue"
-    assert doc.metadata["connector_type"] == "Jira"
-    assert doc.fallback_summary is not None
-    assert "ENG-42" in doc.fallback_summary
-    assert markdown in doc.fallback_summary
-
-
-async def test_build_connector_doc_summary_disabled():
-    doc = _build_connector_doc(
-        _make_issue(),
-        _make_formatted_issue(),
-        "# content",
-        connector_id=_CONNECTOR_ID,
-        search_space_id=_SEARCH_SPACE_ID,
-        user_id=_USER_ID,
-        enable_summary=False,
-    )
-    assert doc.should_summarize is False
-
-
-# ---------------------------------------------------------------------------
-# Shared fixtures for Slices 2-7
-# ---------------------------------------------------------------------------
-
-
-def _mock_connector(enable_summary: bool = True):
-    c = MagicMock()
-    c.config = {"access_token": "tok"}
-    c.enable_summary = enable_summary
-    c.last_indexed_at = None
-    return c
-
-
-def _mock_jira_client(issues=None, error=None):
-    client = MagicMock()
-    client.get_issues_by_date_range = AsyncMock(
-        return_value=(issues if issues is not None else [], error),
-    )
-    client.format_issue = MagicMock(
-        side_effect=lambda i: _make_formatted_issue(
-            issue_key=i.get("key", ""),
-            issue_id=i.get("id", ""),
-            title=i.get("title", ""),
-        )
-    )
-    client.format_issue_to_markdown = MagicMock(
-        side_effect=lambda fi: f"# {fi.get('key', '')}: {fi.get('id', '')}\n\nContent"
-    )
-    client.close = AsyncMock()
-    return client
-
-
-@pytest.fixture
-def jira_mocks(monkeypatch):
-    mock_session = AsyncMock()
-    mock_session.no_autoflush = MagicMock()
-
-    mock_connector = _mock_connector()
-    monkeypatch.setattr(
-        _mod,
-        "get_connector_by_id",
-        AsyncMock(return_value=mock_connector),
-    )
-
-    jira_client = _mock_jira_client(issues=[_make_issue()])
-    monkeypatch.setattr(
-        _mod,
-        "JiraHistoryConnector",
-        MagicMock(return_value=jira_client),
-    )
-
-    monkeypatch.setattr(
-        _mod,
-        "check_duplicate_document_by_hash",
-        AsyncMock(return_value=None),
-    )
-    monkeypatch.setattr(
-        _mod,
-        "update_connector_last_indexed",
-        AsyncMock(),
-    )
-    monkeypatch.setattr(
-        _mod,
-        "calculate_date_range",
-        MagicMock(return_value=("2025-01-01", "2025-12-31")),
-    )
-
-    mock_task_logger = MagicMock()
-    mock_task_logger.log_task_start = AsyncMock(return_value=MagicMock())
-    mock_task_logger.log_task_progress = AsyncMock()
-    mock_task_logger.log_task_success = AsyncMock()
-    mock_task_logger.log_task_failure = AsyncMock()
-    monkeypatch.setattr(
-        _mod,
-        "TaskLoggingService",
-        MagicMock(return_value=mock_task_logger),
-    )
-
-    batch_mock = AsyncMock(return_value=([], 1, 0))
-    pipeline_mock = MagicMock()
-    pipeline_mock.index_batch_parallel = batch_mock
-    pipeline_mock.migrate_legacy_docs = AsyncMock()
-    pipeline_mock.create_placeholder_documents = AsyncMock(return_value=0)
-    monkeypatch.setattr(
-        _mod,
-        "IndexingPipelineService",
-        MagicMock(return_value=pipeline_mock),
-    )
-
-    return {
-        "session": mock_session,
-        "connector": mock_connector,
-        "jira_client": jira_client,
-        "task_logger": mock_task_logger,
-        "pipeline_mock": pipeline_mock,
-        "batch_mock": batch_mock,
-    }
-
-
-async def _run_index(mocks, **overrides):
-    return await index_jira_issues(
-        session=mocks["session"],
-        connector_id=overrides.get("connector_id", _CONNECTOR_ID),
-        search_space_id=overrides.get("search_space_id", _SEARCH_SPACE_ID),
-        user_id=overrides.get("user_id", _USER_ID),
-        start_date=overrides.get("start_date", "2025-01-01"),
-        end_date=overrides.get("end_date", "2025-12-31"),
-        update_last_indexed=overrides.get("update_last_indexed", True),
-        on_heartbeat_callback=overrides.get("on_heartbeat_callback"),
-    )
-
-
-# ---------------------------------------------------------------------------
-# Slice 2: Full pipeline wiring
-# ---------------------------------------------------------------------------
-
-
-async def test_one_issue_calls_pipeline_and_returns_indexed_count(jira_mocks):
-    indexed, skipped, warning = await _run_index(jira_mocks)
-    assert indexed == 1
-    assert skipped == 0
-    assert warning is None
-
-    jira_mocks["batch_mock"].assert_called_once()
-    connector_docs = jira_mocks["batch_mock"].call_args[0][0]
-    assert len(connector_docs) == 1
-    assert connector_docs[0].document_type == DocumentType.JIRA_CONNECTOR
-
-
-async def test_pipeline_called_with_max_concurrency_3(jira_mocks):
-    await _run_index(jira_mocks)
-    call_kwargs = jira_mocks["batch_mock"].call_args[1]
-    assert call_kwargs.get("max_concurrency") == 3
-
-
-async def test_migrate_legacy_docs_called_before_indexing(jira_mocks):
-    await _run_index(jira_mocks)
-    jira_mocks["pipeline_mock"].migrate_legacy_docs.assert_called_once()
-
-
-# ---------------------------------------------------------------------------
-# Slice 3: Issue skipping (missing key/title/content)
-# ---------------------------------------------------------------------------
-
-
-async def test_issues_with_missing_key_are_skipped(jira_mocks):
-    issues = [
-        _make_issue(issue_key="ENG-1", issue_id="10001"),
-        {"key": "", "id": "10002", "title": "No key"},
-    ]
-    jira_mocks["jira_client"].get_issues_by_date_range.return_value = (issues, None)
-
-    _, skipped, _ = await _run_index(jira_mocks)
-    connector_docs = jira_mocks["batch_mock"].call_args[0][0]
-    assert len(connector_docs) == 1
-    assert skipped == 1
-
-
-async def test_issues_with_missing_title_are_skipped(jira_mocks):
-    issues = [
-        _make_issue(issue_key="ENG-1", issue_id="10001"),
-        {"key": "ENG-2", "id": "", "title": "Missing id used as title"},
-    ]
-    jira_mocks["jira_client"].get_issues_by_date_range.return_value = (issues, None)
-
-    _, skipped, _ = await _run_index(jira_mocks)
-    connector_docs = jira_mocks["batch_mock"].call_args[0][0]
-    assert len(connector_docs) == 1
-    assert skipped == 1
-
-
-async def test_issues_with_no_content_are_skipped(jira_mocks):
-    issues = [
-        _make_issue(issue_key="ENG-1", issue_id="10001"),
-        _make_issue(issue_key="ENG-2", issue_id="10002"),
-    ]
-    jira_mocks["jira_client"].get_issues_by_date_range.return_value = (issues, None)
-
-    jira_mocks["jira_client"].format_issue_to_markdown.side_effect = [
-        "# ENG-1: 10001\n\nContent",
-        "",
-    ]
-    _, skipped, _ = await _run_index(jira_mocks)
-    connector_docs = jira_mocks["batch_mock"].call_args[0][0]
-    assert len(connector_docs) == 1
-    assert skipped == 1
-
-
-# ---------------------------------------------------------------------------
-# Slice 4: Duplicate content skipping
-# ---------------------------------------------------------------------------
-
-
-async def test_duplicate_content_issues_are_skipped(jira_mocks, monkeypatch):
-    issues = [
-        _make_issue(issue_key="ENG-1", issue_id="10001"),
-        _make_issue(issue_key="ENG-2", issue_id="10002"),
-    ]
-    jira_mocks["jira_client"].get_issues_by_date_range.return_value = (issues, None)
-
-    call_count = 0
-
-    async def _check_dup(session, content_hash):
-        nonlocal call_count
-        call_count += 1
-        if call_count == 2:
-            dup = MagicMock()
-            dup.id = 99
-            dup.document_type = "OTHER"
-            return dup
-        return None
-
-    monkeypatch.setattr(_mod, "check_duplicate_document_by_hash", _check_dup)
-
-    _, skipped, _ = await _run_index(jira_mocks)
-    connector_docs = jira_mocks["batch_mock"].call_args[0][0]
-    assert len(connector_docs) == 1
-    assert skipped == 1
-
-
-# ---------------------------------------------------------------------------
-# Slice 5: Heartbeat callback forwarding
-# ---------------------------------------------------------------------------
-
-
-async def test_heartbeat_callback_forwarded_to_pipeline(jira_mocks):
-    heartbeat_cb = AsyncMock()
-    await _run_index(jira_mocks, on_heartbeat_callback=heartbeat_cb)
-    call_kwargs = jira_mocks["batch_mock"].call_args[1]
-    assert call_kwargs.get("on_heartbeat") is heartbeat_cb
-
-
-# ---------------------------------------------------------------------------
-# Slice 6: Empty issues and no-data success tuple
-# ---------------------------------------------------------------------------
-
-
-async def test_empty_issues_returns_zero_tuple(jira_mocks):
-    jira_mocks["jira_client"].get_issues_by_date_range.return_value = ([], None)
-    indexed, skipped, warning = await _run_index(jira_mocks)
-    assert indexed == 0
-    assert skipped == 0
-    assert warning is None
-    jira_mocks["batch_mock"].assert_not_called()
-
-
-async def test_no_issues_error_message_returns_success_tuple(jira_mocks):
-    jira_mocks["jira_client"].get_issues_by_date_range.return_value = (
-        [],
-        "No issues found in date range",
-    )
-    indexed, skipped, warning = await _run_index(jira_mocks)
-    assert indexed == 0
-    assert skipped == 0
-    assert warning is None
-
-
-async def test_api_error_still_returns_3_tuple(jira_mocks):
-    jira_mocks["jira_client"].get_issues_by_date_range.return_value = (
-        [],
-        "API exploded",
-    )
-    result = await _run_index(jira_mocks)
-    assert len(result) == 3
-    assert result[0] == 0
-    assert result[1] == 0
-    assert "Failed to get Jira issues" in result[2]
-
-
-# ---------------------------------------------------------------------------
-# Slice 7: Failed docs warning
-# ---------------------------------------------------------------------------
-
-
-async def test_failed_docs_warning_in_result(jira_mocks):
-    jira_mocks["batch_mock"].return_value = ([], 0, 2)
-    _, _, warning = await _run_index(jira_mocks)
-    assert warning is not None
-    assert "2 failed" in warning
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
--- a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts
+++ b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts
@ -243,6 +243,38 @@ export function getConnectorTitle(connectorType: string): string {
 	);
 }

+/**
+ * Primary way a user interacts with a connector.
+ * Drives the two top-level groupings in the connector catalog UI.
+ */
+export type ConnectorCategory = "knowledge_base" | "tools_live";
+
+export const CONNECTOR_CATEGORY_LABELS: Record<ConnectorCategory, string> = {
+	knowledge_base: "Knowledge Base",
+	tools_live: "Tools & Live Sources",
+};
+
+const KNOWLEDGE_BASE_CONNECTOR_TYPES = new Set<string>([
+	EnumConnectorName.GOOGLE_DRIVE_CONNECTOR,
+	EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
+	EnumConnectorName.ONEDRIVE_CONNECTOR,
+	EnumConnectorName.DROPBOX_CONNECTOR,
+	EnumConnectorName.NOTION_CONNECTOR,
+	EnumConnectorName.CONFLUENCE_CONNECTOR,
+	EnumConnectorName.YOUTUBE_CONNECTOR,
+	EnumConnectorName.WEBCRAWLER_CONNECTOR,
+	EnumConnectorName.BOOKSTACK_CONNECTOR,
+	EnumConnectorName.GITHUB_CONNECTOR,
+	EnumConnectorName.ELASTICSEARCH_CONNECTOR,
+	EnumConnectorName.CIRCLEBACK_CONNECTOR,
+	EnumConnectorName.OBSIDIAN_CONNECTOR,
+]);
+
+/** Unmapped connectors surface under Tools & Live Sources. */
+export function getConnectorCategory(connectorType: string): ConnectorCategory {
+	return KNOWLEDGE_BASE_CONNECTOR_TYPES.has(connectorType) ? "knowledge_base" : "tools_live";
+}
+
 // Composio Toolkits (available integrations via Composio)
 export const COMPOSIO_TOOLKITS = [
 	{
--- a/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx
@ -9,7 +9,10 @@ import { isSelfHosted } from "@/lib/env-config";
 import { ConnectorCard } from "../components/connector-card";
 import {
 	COMPOSIO_CONNECTORS,
+	CONNECTOR_CATEGORY_LABELS,
+	type ConnectorCategory,
 	CRAWLERS,
+	getConnectorCategory,
 	OAUTH_CONNECTORS,
 	OTHER_CONNECTORS,
 } from "../constants/connector-constants";
@ -20,19 +23,6 @@ type ComposioConnector = (typeof COMPOSIO_CONNECTORS)[number];
 type OtherConnector = (typeof OTHER_CONNECTORS)[number];
 type CrawlerConnector = (typeof CRAWLERS)[number];

-const DOCUMENT_FILE_CONNECTOR_TYPES = new Set<string>([
-	EnumConnectorName.GOOGLE_DRIVE_CONNECTOR,
-	EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
-	EnumConnectorName.ONEDRIVE_CONNECTOR,
-	EnumConnectorName.DROPBOX_CONNECTOR,
-]);
-
-const OTHER_DOCUMENT_CONNECTOR_TYPES = new Set<string>([
-	EnumConnectorName.YOUTUBE_CONNECTOR,
-	EnumConnectorName.NOTION_CONNECTOR,
-	EnumConnectorName.AIRTABLE_CONNECTOR,
-]);
-
 /**
 * Extract the display name from a full connector name.
 * Full names are in format "Base Name - identifier" (e.g., "Gmail - john@example.com").
@ -106,45 +96,23 @@ export const AllConnectorsTab: FC<AllConnectorsTabProps> = ({
 			c.description.toLowerCase().includes(searchQuery.toLowerCase())
 	);

-	const nativeGoogleDriveConnectors = filteredOAuth.filter(
-		(c) => c.connectorType === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR
-	);
-	const composioGoogleDriveConnectors = filteredComposio.filter(
-		(c) => c.connectorType === EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
-	);
-	const fileStorageConnectors = filteredOAuth.filter(
-		(c) =>
-			c.connectorType === EnumConnectorName.ONEDRIVE_CONNECTOR ||
-			c.connectorType === EnumConnectorName.DROPBOX_CONNECTOR
-	);
+	const inCategory =
+		(category: ConnectorCategory) =>
+		<T extends { connectorType?: string }>(connector: T): boolean =>
+			!!connector.connectorType && getConnectorCategory(connector.connectorType) === category;

-	const otherDocumentYouTubeConnectors = filteredCrawlers.filter(
-		(c) => c.connectorType === EnumConnectorName.YOUTUBE_CONNECTOR
-	);
-	const otherDocumentNotionConnectors = filteredOAuth.filter(
-		(c) => c.connectorType === EnumConnectorName.NOTION_CONNECTOR
-	);
-	const otherDocumentAirtableConnectors = filteredOAuth.filter(
-		(c) => c.connectorType === EnumConnectorName.AIRTABLE_CONNECTOR
-	);
-
-	const moreIntegrationsComposio = filteredComposio.filter(
-		(c) =>
-			!DOCUMENT_FILE_CONNECTOR_TYPES.has(c.connectorType) &&
-			!OTHER_DOCUMENT_CONNECTOR_TYPES.has(c.connectorType)
-	);
-	const moreIntegrationsOAuth = filteredOAuth.filter(
-		(c) =>
-			!DOCUMENT_FILE_CONNECTOR_TYPES.has(c.connectorType) &&
-			!OTHER_DOCUMENT_CONNECTOR_TYPES.has(c.connectorType)
-	);
-	const moreIntegrationsOther = filteredOther;
-	const moreIntegrationsCrawlers = filteredCrawlers.filter(
-		(c) =>
-			!c.connectorType ||
-			(!DOCUMENT_FILE_CONNECTOR_TYPES.has(c.connectorType) &&
-				!OTHER_DOCUMENT_CONNECTOR_TYPES.has(c.connectorType))
-	);
+	const knowledgeBase = {
+		oauth: filteredOAuth.filter(inCategory("knowledge_base")),
+		composio: filteredComposio.filter(inCategory("knowledge_base")),
+		other: filteredOther.filter(inCategory("knowledge_base")),
+		crawlers: filteredCrawlers.filter(inCategory("knowledge_base")),
+	};
+	const toolsLive = {
+		oauth: filteredOAuth.filter(inCategory("tools_live")),
+		composio: filteredComposio.filter(inCategory("tools_live")),
+		other: filteredOther.filter(inCategory("tools_live")),
+		crawlers: filteredCrawlers.filter(inCategory("tools_live")),
+	};

 	const renderOAuthCard = (connector: OAuthConnector | ComposioConnector) => {
 		const isConnected = connectedTypes.has(connector.connectorType);
@ -275,20 +243,18 @@ export const AllConnectorsTab: FC<AllConnectorsTabProps> = ({
 		);
 	};

-	const hasDocumentFileConnectors =
-		nativeGoogleDriveConnectors.length > 0 ||
-		composioGoogleDriveConnectors.length > 0 ||
-		fileStorageConnectors.length > 0;
-	const hasMoreIntegrations =
-		otherDocumentYouTubeConnectors.length > 0 ||
-		otherDocumentNotionConnectors.length > 0 ||
-		otherDocumentAirtableConnectors.length > 0 ||
-		moreIntegrationsComposio.length > 0 ||
-		moreIntegrationsOAuth.length > 0 ||
-		moreIntegrationsOther.length > 0 ||
-		moreIntegrationsCrawlers.length > 0;
+	const hasKnowledgeBase =
+		knowledgeBase.oauth.length > 0 ||
+		knowledgeBase.composio.length > 0 ||
+		knowledgeBase.other.length > 0 ||
+		knowledgeBase.crawlers.length > 0;
+	const hasToolsLive =
+		toolsLive.oauth.length > 0 ||
+		toolsLive.composio.length > 0 ||
+		toolsLive.other.length > 0 ||
+		toolsLive.crawlers.length > 0;

-	const hasAnyResults = hasDocumentFileConnectors || hasMoreIntegrations;
+	const hasAnyResults = hasKnowledgeBase || hasToolsLive;

 	if (!hasAnyResults && searchQuery) {
 		return (
@ -302,36 +268,34 @@ export const AllConnectorsTab: FC<AllConnectorsTabProps> = ({

 	return (
 		<div className="space-y-8">
-			{/* File Storage Integrations */}
-			{hasDocumentFileConnectors && (
+			{hasKnowledgeBase && (
 				<section>
 					<div className="flex items-center gap-2 mb-4">
 						<h3 className="text-sm font-semibold text-muted-foreground">
-							File Storage Integrations
+							{CONNECTOR_CATEGORY_LABELS.knowledge_base}
 						</h3>
 					</div>
 					<div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
-						{nativeGoogleDriveConnectors.map(renderOAuthCard)}
-						{composioGoogleDriveConnectors.map(renderOAuthCard)}
-						{fileStorageConnectors.map(renderOAuthCard)}
+						{knowledgeBase.oauth.map(renderOAuthCard)}
+						{knowledgeBase.composio.map(renderOAuthCard)}
+						{knowledgeBase.crawlers.map(renderCrawlerCard)}
+						{knowledgeBase.other.map(renderOtherCard)}
 					</div>
 				</section>
 			)}

-			{/* More Integrations */}
-			{hasMoreIntegrations && (
+			{hasToolsLive && (
 				<section>
 					<div className="flex items-center gap-2 mb-4">
-						<h3 className="text-sm font-semibold text-muted-foreground">More Integrations</h3>
+						<h3 className="text-sm font-semibold text-muted-foreground">
+							{CONNECTOR_CATEGORY_LABELS.tools_live}
+						</h3>
 					</div>
 					<div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
-						{otherDocumentYouTubeConnectors.map(renderCrawlerCard)}
-						{otherDocumentNotionConnectors.map(renderOAuthCard)}
-						{otherDocumentAirtableConnectors.map(renderOAuthCard)}
-						{moreIntegrationsComposio.map(renderOAuthCard)}
-						{moreIntegrationsOAuth.map(renderOAuthCard)}
-						{moreIntegrationsOther.map(renderOtherCard)}
-						{moreIntegrationsCrawlers.map(renderCrawlerCard)}
+						{toolsLive.oauth.map(renderOAuthCard)}
+						{toolsLive.composio.map(renderOAuthCard)}
+						{toolsLive.crawlers.map(renderCrawlerCard)}
+						{toolsLive.other.map(renderOtherCard)}
 					</div>
 				</section>
 			)}
--- a/surfsense_web/components/documents/download-original-button.tsx
+++ b/surfsense_web/components/documents/download-original-button.tsx
@ -0,0 +1,79 @@
+"use client";
+
+import { Download } from "lucide-react";
+import { useEffect, useState } from "react";
+import { toast } from "sonner";
+import { Button } from "@/components/ui/button";
+import { Spinner } from "@/components/ui/spinner";
+import { documentsApiService } from "@/lib/apis/documents-api.service";
+import { authenticatedFetch } from "@/lib/auth-utils";
+import { BACKEND_URL } from "@/lib/env-config";
+
+interface DownloadOriginalButtonProps {
+	documentId: number;
+}
+
+/** Renders only when the document has a stored ORIGINAL file; downloads it on click. */
+export function DownloadOriginalButton({ documentId }: DownloadOriginalButtonProps) {
+	const [originalFilename, setOriginalFilename] = useState<string | null>(null);
+	const [downloading, setDownloading] = useState(false);
+
+	useEffect(() => {
+		let active = true;
+		documentsApiService
+			.getDocumentFiles(documentId)
+			.then((files) => {
+				if (!active) return;
+				const original = files.find((file) => file.kind === "ORIGINAL");
+				setOriginalFilename(original?.original_filename ?? null);
+			})
+			.catch(() => {
+				if (active) setOriginalFilename(null);
+			});
+		return () => {
+			active = false;
+		};
+	}, [documentId]);
+
+	if (!originalFilename) return null;
+
+	const handleDownload = async () => {
+		setDownloading(true);
+		try {
+			const response = await authenticatedFetch(
+				`${BACKEND_URL}/api/v1/documents/${documentId}/download-original`,
+				{ method: "GET" }
+			);
+			if (!response.ok) throw new Error("Download failed");
+
+			const blob = await response.blob();
+			const url = URL.createObjectURL(blob);
+			const anchor = document.createElement("a");
+			anchor.href = url;
+			anchor.download = originalFilename;
+			document.body.appendChild(anchor);
+			anchor.click();
+			anchor.remove();
+			URL.revokeObjectURL(url);
+			toast.success("Download started");
+		} catch {
+			toast.error("Failed to download original file");
+		} finally {
+			setDownloading(false);
+		}
+	};
+
+	return (
+		<Button
+			variant="ghost"
+			size="icon"
+			className="size-6"
+			onClick={handleDownload}
+			disabled={downloading}
+			title={`Download original (${originalFilename})`}
+		>
+			{downloading ? <Spinner size="xs" /> : <Download className="size-3.5" />}
+			<span className="sr-only">Download original file</span>
+		</Button>
+	);
+}
--- a/surfsense_web/components/editor-panel/editor-panel.tsx
+++ b/surfsense_web/components/editor-panel/editor-panel.tsx
@ -15,6 +15,7 @@ import dynamic from "next/dynamic";
 import { useCallback, useEffect, useRef, useState } from "react";
 import { toast } from "sonner";
 import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-panel.atom";
+import { DownloadOriginalButton } from "@/components/documents/download-original-button";
 import { VersionHistoryButton } from "@/components/documents/version-history";
 import { SourceCodeEditor } from "@/components/editor/source-code-editor";
 import {
@ -584,6 +585,9 @@ export function EditorPanelContent({
 											documentType={editorDoc.document_type}
 										/>
 									)}
+									{!isLocalFileMode && !isMemoryMode && documentId && (
+										<DownloadOriginalButton documentId={documentId} />
+									)}
 									<Button
 										variant="ghost"
 										size="icon"
@ -668,6 +672,9 @@ export function EditorPanelContent({
 										documentType={editorDoc.document_type}
 									/>
 								)}
+								{!isLocalFileMode && !isMemoryMode && documentId && (
+									<DownloadOriginalButton documentId={documentId} />
+								)}
 								<Button
 									variant="ghost"
 									size="icon"
--- a/surfsense_web/contracts/types/document.types.ts
+++ b/surfsense_web/contracts/types/document.types.ts
@ -281,6 +281,23 @@ export const deleteDocumentResponse = z.object({
 	message: z.literal("Document deleted successfully"),
 });

+/**
+ * Document files (stored originals / derived artifacts)
+ */
+export const documentFileKindEnum = z.enum(["ORIGINAL", "REDACTED", "FILLED_FORM"]);
+
+export const documentFileRead = z.object({
+	id: z.number(),
+	document_id: z.number(),
+	kind: documentFileKindEnum,
+	original_filename: z.string(),
+	mime_type: z.string().nullable().optional(),
+	size_bytes: z.number(),
+	created_at: z.string(),
+});
+
+export const getDocumentFilesResponse = z.array(documentFileRead);
+
 export type Document = z.infer<typeof document>;
 export type DocumentTitleRead = z.infer<typeof documentTitleRead>;
 export type GetDocumentsRequest = z.infer<typeof getDocumentsRequest>;
@ -314,3 +331,6 @@ export type GetDocumentChunksRequest = z.infer<typeof getDocumentChunksRequest>;
 export type GetDocumentChunksResponse = z.infer<typeof getDocumentChunksResponse>;
 export type ChunkRead = z.infer<typeof chunkRead>;
 export type ProcessingMode = z.infer<typeof processingModeEnum>;
+export type DocumentFileKind = z.infer<typeof documentFileKindEnum>;
+export type DocumentFileRead = z.infer<typeof documentFileRead>;
+export type GetDocumentFilesResponse = z.infer<typeof getDocumentFilesResponse>;
--- a/surfsense_web/lib/apis/documents-api.service.ts
+++ b/surfsense_web/lib/apis/documents-api.service.ts
@ -30,6 +30,8 @@ import {
 	searchDocumentsResponse,
 	searchDocumentTitlesRequest,
 	searchDocumentTitlesResponse,
+	type DocumentFileRead,
+	getDocumentFilesResponse,
 	type UpdateDocumentRequest,
 	type UploadDocumentRequest,
 	updateDocumentRequest,
@ -381,6 +383,14 @@ class DocumentsApiService {
 		});
 	};

+	/**
+	 * List the stored files for a document (e.g. its original upload).
+	 * Used to gate the "Download original" affordance.
+	 */
+	getDocumentFiles = async (documentId: number): Promise<DocumentFileRead[]> => {
+		return baseApiService.get(`/api/v1/documents/${documentId}/files`, getDocumentFilesResponse);
+	};
+
 	listDocumentVersions = async (documentId: number) => {
 		return baseApiService.get(`/api/v1/documents/${documentId}/versions`);
 	};