feat: update document tracking to use 'updated_at' timestamp instead of 'last_edited_at'

2026-07-14 22:52:15 +02:00 · 2025-12-12 01:32:14 -08:00 · 2025-12-12 01:32:14 -08:00 · 8c9aa68faa
commit 8c9aa68faa
parent a313387e0f
28 changed files with 253 additions and 18 deletions
--- a/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py
+++ b/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py
@ -0,0 +1,42 @@
+"""45_add_updated_at_to_documents
+
+Revision ID: 45
+Revises: 44
+Create Date: 2025-12-12
+
+Adds updated_at field to documents table to track when documents
+are updated by indexers, processors, or editor. Includes an index
+for efficient time-based filtering.
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "45"
+down_revision: str | None = "44"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Upgrade schema - Add updated_at field with index to documents."""
+    op.add_column(
+        "documents",
+        sa.Column("updated_at", sa.TIMESTAMP(timezone=True), nullable=True),
+    )
+    op.create_index(
+        "ix_documents_updated_at",
+        "documents",
+        ["updated_at"],
+    )
+
+
+def downgrade() -> None:
+    """Downgrade schema - Remove updated_at field and index."""
+    # Use if_exists to handle cases where index wasn't created (migration modified after apply)
+    op.drop_index("ix_documents_updated_at", table_name="documents", if_exists=True)
+    op.drop_column("documents", "updated_at")
--- a/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py
+++ b/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py
@ -0,0 +1,59 @@
+"""46_remove_last_edited_at_from_documents
+
+Revision ID: 46
+Revises: 45
+Create Date: 2025-12-12
+
+Safely migrates last_edited_at values to updated_at, then removes the
+last_edited_at field from documents table since we now use updated_at
+to track all document updates (indexers, processors, and editor).
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "46"
+down_revision: str | None = "45"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Upgrade schema - Migrate last_edited_at to updated_at, then remove last_edited_at."""
+    # Step 1: Copy last_edited_at values to updated_at where updated_at is NULL
+    # This preserves edit timestamps for documents that were edited via BlockNote
+    op.execute(
+        """
+        UPDATE documents
+        SET updated_at = last_edited_at
+        WHERE last_edited_at IS NOT NULL
+          AND updated_at IS NULL
+        """
+    )
+
+    # Step 2: For documents where both exist, use the most recent timestamp
+    op.execute(
+        """
+        UPDATE documents
+        SET updated_at = GREATEST(updated_at, last_edited_at)
+        WHERE last_edited_at IS NOT NULL
+          AND updated_at IS NOT NULL
+        """
+    )
+
+    # Step 3: Drop the last_edited_at column
+    op.drop_column("documents", "last_edited_at")
+
+
+def downgrade() -> None:
+    """Downgrade schema - Re-add last_edited_at field to documents."""
+    op.add_column(
+        "documents",
+        sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
+    )
+    # Note: We cannot restore the original last_edited_at values after downgrade
+    # as that data is merged into updated_at
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -353,8 +353,8 @@ class Document(BaseModel, TimestampMixin):
        Boolean, nullable=False, default=False, server_default=text("false")
    )

-    # Track when blocknote document was last edited
-    last_edited_at = Column(TIMESTAMP(timezone=True), nullable=True)
+    # Track when document was last updated by indexers, processors, or editor
+    updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True)

    search_space_id = Column(
        Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
--- a/surfsense_backend/app/routes/editor_routes.py
+++ b/surfsense_backend/app/routes/editor_routes.py
@ -60,8 +60,8 @@ async def get_editor_content(
            "document_id": document.id,
            "title": document.title,
            "blocknote_document": document.blocknote_document,
-            "last_edited_at": document.last_edited_at.isoformat()
-            if document.last_edited_at
+            "updated_at": document.updated_at.isoformat()
+            if document.updated_at
            else None,
        }

@ -97,14 +97,13 @@ async def get_editor_content(
    # Save the generated blocknote_document (lazy migration)
    document.blocknote_document = blocknote_json
    document.content_needs_reindexing = False
-    document.last_edited_at = None
    await session.commit()

    return {
        "document_id": document.id,
        "title": document.title,
        "blocknote_document": blocknote_json,
-        "last_edited_at": None,
+        "updated_at": document.updated_at.isoformat() if document.updated_at else None,
    }


@ -150,7 +149,7 @@ async def save_document(

    # Save BlockNote document
    document.blocknote_document = blocknote_document
-    document.last_edited_at = datetime.now(UTC)
+    document.updated_at = datetime.now(UTC)
    document.content_needs_reindexing = True

    await session.commit()
@ -162,5 +161,5 @@ async def save_document(
        "status": "saved",
        "document_id": document_id,
        "message": "Document saved and will be reindexed in the background",
-        "last_edited_at": document.last_edited_at.isoformat(),
+        "updated_at": document.updated_at.isoformat(),
    }
--- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py
@ -23,6 +23,7 @@ from .base import (
    calculate_date_range,
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -327,6 +328,9 @@ async def index_airtable_records(
                                        ),
                                    }
                                    existing_document.chunks = chunks
+                                    existing_document.updated_at = (
+                                        get_current_timestamp()
+                                    )

                                    documents_indexed += 1
                                    logger.info(
@ -382,6 +386,7 @@ async def index_airtable_records(
                                unique_identifier_hash=unique_identifier_hash,
                                embedding=summary_embedding,
                                chunks=chunks,
+                                updated_at=get_current_timestamp(),
                            )

                            session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/base.py
+++ b/surfsense_backend/app/tasks/connector_indexers/base.py
@ -3,7 +3,7 @@ Base functionality and shared imports for connector indexers.
 """

 import logging
-from datetime import datetime, timedelta
+from datetime import UTC, datetime, timedelta

 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
@ -18,6 +18,16 @@ from app.db import (
 logger = logging.getLogger(__name__)


+def get_current_timestamp() -> datetime:
+    """
+    Get the current timestamp with timezone for updated_at field.
+
+    Returns:
+        Current datetime with UTC timezone
+    """
+    return datetime.now(UTC)
+
+
 async def check_duplicate_document_by_hash(
    session: AsyncSession, content_hash: str
 ) -> Document | None:
--- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py
@ -23,6 +23,7 @@ from .base import (
    calculate_date_range,
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -301,6 +302,7 @@ async def index_bookstack_pages(
                        existing_document.embedding = summary_embedding
                        existing_document.document_metadata = doc_metadata
                        existing_document.chunks = chunks
+                        existing_document.updated_at = get_current_timestamp()

                        documents_indexed += 1
                        logger.info(f"Successfully updated BookStack page {page_name}")
@ -356,6 +358,7 @@ async def index_bookstack_pages(
                    unique_identifier_hash=unique_identifier_hash,
                    embedding=summary_embedding,
                    chunks=chunks,
+                    updated_at=get_current_timestamp(),
                )

                session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py
@ -22,6 +22,7 @@ from app.utils.document_converters import (
 from .base import (
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -288,6 +289,7 @@ async def index_clickup_tasks(
                                ),
                            }
                            existing_document.chunks = chunks
+                            existing_document.updated_at = get_current_timestamp()

                            documents_indexed += 1
                            logger.info(
@ -348,6 +350,7 @@ async def index_clickup_tasks(
                        unique_identifier_hash=unique_identifier_hash,
                        embedding=summary_embedding,
                        chunks=chunks,
+                        updated_at=get_current_timestamp(),
                    )

                    session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py
@ -23,6 +23,7 @@ from .base import (
    calculate_date_range,
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -297,6 +298,7 @@ async def index_confluence_pages(
                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        }
                        existing_document.chunks = chunks
+                        existing_document.updated_at = get_current_timestamp()

                        documents_indexed += 1
                        logger.info(
@ -362,6 +364,7 @@ async def index_confluence_pages(
                    unique_identifier_hash=unique_identifier_hash,
                    embedding=summary_embedding,
                    chunks=chunks,
+                    updated_at=get_current_timestamp(),
                )

                session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py
@ -23,6 +23,7 @@ from .base import (
    build_document_metadata_string,
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -392,6 +393,7 @@ async def index_discord_messages(
                                    ),
                                }
                                existing_document.chunks = chunks
+                                existing_document.updated_at = get_current_timestamp()

                                documents_indexed += 1
                                logger.info(
@ -454,6 +456,7 @@ async def index_discord_messages(
                            unique_identifier_hash=unique_identifier_hash,
                            embedding=summary_embedding,
                            chunks=chunks,
+                            updated_at=get_current_timestamp(),
                        )

                        session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py
@ -19,7 +19,11 @@ from app.utils.document_converters import (
    generate_unique_identifier_hash,
 )

-from .base import check_document_by_unique_identifier, check_duplicate_document_by_hash
+from .base import (
+    check_document_by_unique_identifier,
+    check_duplicate_document_by_hash,
+    get_current_timestamp,
+)

 logger = logging.getLogger(__name__)

@ -249,6 +253,7 @@ async def index_elasticsearch_documents(
                            existing_doc.unique_identifier_hash = unique_identifier_hash
                            chunks = await create_document_chunks(content)
                            existing_doc.chunks = chunks
+                            existing_doc.updated_at = get_current_timestamp()
                            await session.flush()
                            documents_processed += 1
                            if documents_processed % 10 == 0:
@ -264,6 +269,7 @@ async def index_elasticsearch_documents(
                        document_type=DocumentType.ELASTICSEARCH_CONNECTOR,
                        document_metadata=metadata,
                        search_space_id=search_space_id,
+                        updated_at=get_current_timestamp(),
                    )

                    # Create chunks and attach to document (persist via relationship)
--- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
@ -22,6 +22,7 @@ from app.utils.document_converters import (
 from .base import (
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
 )

@ -288,6 +289,7 @@ async def index_github_repos(
                                ),
                            }
                            existing_document.chunks = chunks_data
+                            existing_document.updated_at = get_current_timestamp()

                            logger.info(
                                f"Successfully updated GitHub file {full_path_key}"
@ -377,6 +379,7 @@ async def index_github_repos(
                        embedding=summary_embedding,
                        search_space_id=search_space_id,
                        chunks=chunks_data,  # Associate chunks directly
+                        updated_at=get_current_timestamp(),
                    )
                    session.add(document)
                    documents_processed += 1
--- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py
@ -23,6 +23,7 @@ from app.utils.document_converters import (
 from .base import (
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -335,6 +336,7 @@ async def index_google_calendar_events(
                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        }
                        existing_document.chunks = chunks
+                        existing_document.updated_at = get_current_timestamp()

                        documents_indexed += 1
                        logger.info(
@ -401,6 +403,7 @@ async def index_google_calendar_events(
                    unique_identifier_hash=unique_identifier_hash,
                    embedding=summary_embedding,
                    chunks=chunks,
+                    updated_at=get_current_timestamp(),
                )

                session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@ -27,6 +27,7 @@ from app.utils.document_converters import (
 from .base import (
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -261,6 +262,7 @@ async def index_google_gmail_messages(
                            "connector_id": connector_id,
                        }
                        existing_document.chunks = chunks
+                        existing_document.updated_at = get_current_timestamp()

                        documents_indexed += 1
                        logger.info(f"Successfully updated Gmail message {subject}")
@ -319,6 +321,7 @@ async def index_google_gmail_messages(
                    unique_identifier_hash=unique_identifier_hash,
                    embedding=summary_embedding,
                    chunks=chunks,
+                    updated_at=get_current_timestamp(),
                )
                session.add(document)
                documents_indexed += 1
--- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py
@ -23,6 +23,7 @@ from .base import (
    calculate_date_range,
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -279,6 +280,7 @@ async def index_jira_issues(
                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        }
                        existing_document.chunks = chunks
+                        existing_document.updated_at = get_current_timestamp()

                        documents_indexed += 1
                        logger.info(
@ -344,6 +346,7 @@ async def index_jira_issues(
                    unique_identifier_hash=unique_identifier_hash,
                    embedding=summary_embedding,
                    chunks=chunks,
+                    updated_at=get_current_timestamp(),
                )

                session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py
@ -23,6 +23,7 @@ from .base import (
    calculate_date_range,
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -297,6 +298,7 @@ async def index_linear_issues(
                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        }
                        existing_document.chunks = chunks
+                        existing_document.updated_at = get_current_timestamp()

                        documents_indexed += 1
                        logger.info(
@ -363,6 +365,7 @@ async def index_linear_issues(
                    unique_identifier_hash=unique_identifier_hash,
                    embedding=summary_embedding,
                    chunks=chunks,
+                    updated_at=get_current_timestamp(),
                )

                session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py
@ -22,6 +22,7 @@ from app.utils.document_converters import (
 from .base import (
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -352,6 +353,7 @@ async def index_luma_events(
                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        }
                        existing_document.chunks = chunks
+                        existing_document.updated_at = get_current_timestamp()

                        documents_indexed += 1
                        logger.info(f"Successfully updated Luma event {event_name}")
@ -432,6 +434,7 @@ async def index_luma_events(
                    unique_identifier_hash=unique_identifier_hash,
                    embedding=summary_embedding,
                    chunks=chunks,
+                    updated_at=get_current_timestamp(),
                )

                session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
@ -22,6 +22,7 @@ from .base import (
    build_document_metadata_string,
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -353,6 +354,7 @@ async def index_notion_pages(
                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                        }
                        existing_document.chunks = chunks
+                        existing_document.updated_at = get_current_timestamp()

                        documents_indexed += 1
                        logger.info(f"Successfully updated Notion page: {page_title}")
@ -408,6 +410,7 @@ async def index_notion_pages(
                    unique_identifier_hash=unique_identifier_hash,
                    embedding=summary_embedding,
                    chunks=chunks,
+                    updated_at=get_current_timestamp(),
                )

                session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py
@ -23,6 +23,7 @@ from .base import (
    calculate_date_range,
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -319,6 +320,7 @@ async def index_slack_messages(

                            # Delete old chunks and add new ones
                            existing_document.chunks = chunks
+                            existing_document.updated_at = get_current_timestamp()

                            documents_indexed += 1
                            logger.info(f"Successfully updated Slack message {msg_ts}")
@ -349,6 +351,7 @@ async def index_slack_messages(
                        chunks=chunks,
                        content_hash=content_hash,
                        unique_identifier_hash=unique_identifier_hash,
+                        updated_at=get_current_timestamp(),
                    )

                    session.add(document)
--- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
@ -22,6 +22,7 @@ from app.utils.document_converters import (
 from .base import (
    check_document_by_unique_identifier,
    get_connector_by_id,
+    get_current_timestamp,
    logger,
    update_connector_last_indexed,
 )
@ -270,6 +271,7 @@ async def index_crawled_urls(
                            ),
                        }
                        existing_document.chunks = chunks
+                        existing_document.updated_at = get_current_timestamp()

                        documents_updated += 1
                        logger.info(f"Successfully updated URL {url}")
@ -332,6 +334,7 @@ async def index_crawled_urls(
                    unique_identifier_hash=unique_identifier_hash,
                    embedding=summary_embedding,
                    chunks=chunks,
+                    updated_at=get_current_timestamp(),
                )

                session.add(document)
--- a/surfsense_backend/app/tasks/document_processors/base.py
+++ b/surfsense_backend/app/tasks/document_processors/base.py
@ -2,6 +2,8 @@
 Base functionality and shared imports for document processors.
 """

+from datetime import UTC, datetime
+
 from langchain_community.document_transformers import MarkdownifyTransformer
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
@ -12,6 +14,16 @@ from app.db import Document
 md = MarkdownifyTransformer()


+def get_current_timestamp() -> datetime:
+    """
+    Get the current timestamp with timezone for updated_at field.
+
+    Returns:
+        Current datetime with UTC timezone
+    """
+    return datetime.now(UTC)
+
+
 async def check_duplicate_document(
    session: AsyncSession, content_hash: str
 ) -> Document | None:
--- a/surfsense_backend/app/tasks/document_processors/extension_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/extension_processor.py
@ -20,6 +20,7 @@ from app.utils.document_converters import (

 from .base import (
    check_document_by_unique_identifier,
+    get_current_timestamp,
 )


@ -165,6 +166,7 @@ async def add_extension_received_document(
            existing_document.document_metadata = content.metadata.model_dump()
            existing_document.chunks = chunks
            existing_document.blocknote_document = blocknote_json
+            existing_document.updated_at = get_current_timestamp()

            await session.commit()
            await session.refresh(existing_document)
@ -182,6 +184,7 @@ async def add_extension_received_document(
                content_hash=content_hash,
                unique_identifier_hash=unique_identifier_hash,
                blocknote_document=blocknote_json,
+                updated_at=get_current_timestamp(),
            )

            session.add(document)
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@ -27,6 +27,7 @@ from app.utils.document_converters import (

 from .base import (
    check_document_by_unique_identifier,
+    get_current_timestamp,
 )
 from .markdown_processor import add_received_markdown_file_document

@ -123,7 +124,7 @@ async def add_received_file_document_using_unstructured(
            existing_document.chunks = chunks
            existing_document.blocknote_document = blocknote_json
            existing_document.content_needs_reindexing = False
-            existing_document.last_edited_at = None
+            existing_document.updated_at = get_current_timestamp()

            await session.commit()
            await session.refresh(existing_document)
@ -145,7 +146,7 @@ async def add_received_file_document_using_unstructured(
                unique_identifier_hash=unique_identifier_hash,
                blocknote_document=blocknote_json,
                content_needs_reindexing=False,
-                last_edited_at=None,
+                updated_at=get_current_timestamp(),
            )

            session.add(document)
@ -252,7 +253,7 @@ async def add_received_file_document_using_llamacloud(
            existing_document.chunks = chunks
            existing_document.blocknote_document = blocknote_json
            existing_document.content_needs_reindexing = False
-            existing_document.last_edited_at = None
+            existing_document.updated_at = get_current_timestamp()

            await session.commit()
            await session.refresh(existing_document)
@ -274,7 +275,7 @@ async def add_received_file_document_using_llamacloud(
                unique_identifier_hash=unique_identifier_hash,
                blocknote_document=blocknote_json,
                content_needs_reindexing=False,
-                last_edited_at=None,
+                updated_at=get_current_timestamp(),
            )

            session.add(document)
@ -406,7 +407,7 @@ async def add_received_file_document_using_docling(
            existing_document.chunks = chunks
            existing_document.blocknote_document = blocknote_json
            existing_document.content_needs_reindexing = False
-            existing_document.last_edited_at = None
+            existing_document.updated_at = get_current_timestamp()

            await session.commit()
            await session.refresh(existing_document)
@ -428,7 +429,7 @@ async def add_received_file_document_using_docling(
                unique_identifier_hash=unique_identifier_hash,
                blocknote_document=blocknote_json,
                content_needs_reindexing=False,
-                last_edited_at=None,
+                updated_at=get_current_timestamp(),
            )

        session.add(document)
--- a/surfsense_backend/app/tasks/document_processors/markdown_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/markdown_processor.py
@ -19,6 +19,7 @@ from app.utils.document_converters import (

 from .base import (
    check_document_by_unique_identifier,
+    get_current_timestamp,
 )


@ -131,6 +132,7 @@ async def add_received_markdown_file_document(
            }
            existing_document.chunks = chunks
            existing_document.blocknote_document = blocknote_json
+            existing_document.updated_at = get_current_timestamp()

            await session.commit()
            await session.refresh(existing_document)
@ -150,6 +152,7 @@ async def add_received_markdown_file_document(
                content_hash=content_hash,
                unique_identifier_hash=unique_identifier_hash,
                blocknote_document=blocknote_json,
+                updated_at=get_current_timestamp(),
            )

            session.add(document)
--- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py
@ -22,6 +22,7 @@ from app.utils.document_converters import (

 from .base import (
    check_document_by_unique_identifier,
+    get_current_timestamp,
 )


@ -325,6 +326,7 @@ async def add_youtube_video_document(
            }
            existing_document.chunks = chunks
            existing_document.blocknote_document = blocknote_json
+            existing_document.updated_at = get_current_timestamp()

            await session.commit()
            await session.refresh(existing_document)
@ -354,6 +356,7 @@ async def add_youtube_video_document(
                content_hash=content_hash,
                unique_identifier_hash=unique_identifier_hash,
                blocknote_document=blocknote_json,
+                updated_at=get_current_timestamp(),
            )

            session.add(document)
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@ -506,6 +506,34 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/16/f1/8cc8118946dbb9cbd74f406d30d31ee8d2f723f6fb4c8245e2bc67175fd4/blis-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:91de2baf03da3a173cf62771f1d6b9236a27a8cbd0e0033be198f06ef6224986", size = 6258624 },
 ]

+[[package]]
+name = "boto3"
+version = "1.42.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "botocore" },
+    { name = "jmespath" },
+    { name = "s3transfer" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9d/34/64e34fb40903d358a4a3d697e2ee4784a7b52c11e7effbad01967b2d3fc3/boto3-1.42.8.tar.gz", hash = "sha256:e967706af5887339407481562c389c612d5eae641eb854ddd59026d049df740e", size = 112886 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/96/37/9702c0b8e63aaeb1ad430ece22567b03e58ea41e446d68b92e2cb00e7817/boto3-1.42.8-py3-none-any.whl", hash = "sha256:747acc83488fc80b0e7d1c4ff0c533039ff3ede21bdbd4e89544e25b010b070c", size = 140559 },
+]
+
+[[package]]
+name = "botocore"
+version = "1.42.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jmespath" },
+    { name = "python-dateutil" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3a/ea/4be7a4a640d599b5691c7cf27e125155d7d3643ecbe37e32941f412e3de5/botocore-1.42.8.tar.gz", hash = "sha256:4921aa454f82fed0880214eab21126c98a35fe31ede952693356f9c85ce3574b", size = 14861038 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1c/24/a4301564a979368d6f3644f47acc921450b5524b8846e827237d98b04746/botocore-1.42.8-py3-none-any.whl", hash = "sha256:4cb89c74dd9083d16e45868749b999265a91309b2499907c84adeffa0a8df89b", size = 14534173 },
+]
+
 [[package]]
 name = "build"
 version = "1.3.0"
@ -2292,6 +2320,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/b3/4a/4175a563579e884192ba6e81725fc0448b042024419be8d83aa8a80a3f44/jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5", size = 354213 },
 ]

+[[package]]
+name = "jmespath"
+version = "1.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256 },
+]
+
 [[package]]
 name = "joblib"
 version = "1.5.1"
@ -5397,6 +5434,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/00/db/c376b0661c24cf770cb8815268190668ec1330eba8374a126ceef8c72d55/ruff-0.12.5-py3-none-win_arm64.whl", hash = "sha256:48cdbfc633de2c5c37d9f090ba3b352d1576b0015bfc3bc98eaf230275b7e805", size = 11951564 },
 ]

+[[package]]
+name = "s3transfer"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "botocore" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830 },
+]
+
 [[package]]
 name = "safetensors"
 version = "0.5.3"
@ -5865,6 +5914,7 @@ source = { virtual = "." }
 dependencies = [
    { name = "alembic" },
    { name = "asyncpg" },
+    { name = "boto3" },
    { name = "celery", extra = ["redis"] },
    { name = "chonkie", extra = ["all"] },
    { name = "discord-py" },
@ -5918,6 +5968,7 @@ dev = [
 requires-dist = [
    { name = "alembic", specifier = ">=1.13.0" },
    { name = "asyncpg", specifier = ">=0.30.0" },
+    { name = "boto3", specifier = ">=1.35.0" },
    { name = "celery", extras = ["redis"], specifier = ">=5.5.3" },
    { name = "chonkie", extras = ["all"], specifier = ">=1.4.0" },
    { name = "discord-py", specifier = ">=2.5.2" },
--- a/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx
@ -15,7 +15,7 @@ interface EditorContent {
 	document_id: number;
 	title: string;
 	blocknote_document: any;
-	last_edited_at: string | null;
+	updated_at: string | null;
 }

 export default function EditorPage() {
--- a/surfsense_web/hooks/use-search-source-connectors.ts
+++ b/surfsense_web/hooks/use-search-source-connectors.ts
@ -104,7 +104,9 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?:
 	);

 	useEffect(() => {
-		if (!lazy) {
+		// Only auto-fetch if lazy is false AND searchSpaceId is provided
+		// This prevents 400 errors when the hook is used without a searchSpaceId
+		if (!lazy && searchSpaceId !== undefined) {
 			fetchConnectors(searchSpaceId);
 		}
 	}, [lazy, fetchConnectors, searchSpaceId]);