diff --git a/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py b/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py new file mode 100644 index 000000000..8a0d3b875 --- /dev/null +++ b/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py @@ -0,0 +1,42 @@ +"""45_add_updated_at_to_documents + +Revision ID: 45 +Revises: 44 +Create Date: 2025-12-12 + +Adds updated_at field to documents table to track when documents +are updated by indexers, processors, or editor. Includes an index +for efficient time-based filtering. +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "45" +down_revision: str | None = "44" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Upgrade schema - Add updated_at field with index to documents.""" + op.add_column( + "documents", + sa.Column("updated_at", sa.TIMESTAMP(timezone=True), nullable=True), + ) + op.create_index( + "ix_documents_updated_at", + "documents", + ["updated_at"], + ) + + +def downgrade() -> None: + """Downgrade schema - Remove updated_at field and index.""" + # Use if_exists to handle cases where index wasn't created (migration modified after apply) + op.drop_index("ix_documents_updated_at", table_name="documents", if_exists=True) + op.drop_column("documents", "updated_at") diff --git a/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py b/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py new file mode 100644 index 000000000..958a91807 --- /dev/null +++ b/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py @@ -0,0 +1,59 @@ +"""46_remove_last_edited_at_from_documents + +Revision ID: 46 +Revises: 45 +Create Date: 2025-12-12 + +Safely migrates last_edited_at values to updated_at, then removes the +last_edited_at field from documents table since we now use updated_at +to track all document updates (indexers, processors, and editor). +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "46" +down_revision: str | None = "45" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Upgrade schema - Migrate last_edited_at to updated_at, then remove last_edited_at.""" + # Step 1: Copy last_edited_at values to updated_at where updated_at is NULL + # This preserves edit timestamps for documents that were edited via BlockNote + op.execute( + """ + UPDATE documents + SET updated_at = last_edited_at + WHERE last_edited_at IS NOT NULL + AND updated_at IS NULL + """ + ) + + # Step 2: For documents where both exist, use the most recent timestamp + op.execute( + """ + UPDATE documents + SET updated_at = GREATEST(updated_at, last_edited_at) + WHERE last_edited_at IS NOT NULL + AND updated_at IS NOT NULL + """ + ) + + # Step 3: Drop the last_edited_at column + op.drop_column("documents", "last_edited_at") + + +def downgrade() -> None: + """Downgrade schema - Re-add last_edited_at field to documents.""" + op.add_column( + "documents", + sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True), + ) + # Note: We cannot restore the original last_edited_at values after downgrade + # as that data is merged into updated_at diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 6936e847a..b6581ce6a 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -353,8 +353,8 @@ class Document(BaseModel, TimestampMixin): Boolean, nullable=False, default=False, server_default=text("false") ) - # Track when blocknote document was last edited - last_edited_at = Column(TIMESTAMP(timezone=True), nullable=True) + # Track when document was last updated by indexers, processors, or editor + updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True) search_space_id = Column( Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False diff --git a/surfsense_backend/app/routes/editor_routes.py b/surfsense_backend/app/routes/editor_routes.py index 9beebfc8e..7b7a15c13 100644 --- a/surfsense_backend/app/routes/editor_routes.py +++ b/surfsense_backend/app/routes/editor_routes.py @@ -60,8 +60,8 @@ async def get_editor_content( "document_id": document.id, "title": document.title, "blocknote_document": document.blocknote_document, - "last_edited_at": document.last_edited_at.isoformat() - if document.last_edited_at + "updated_at": document.updated_at.isoformat() + if document.updated_at else None, } @@ -97,14 +97,13 @@ async def get_editor_content( # Save the generated blocknote_document (lazy migration) document.blocknote_document = blocknote_json document.content_needs_reindexing = False - document.last_edited_at = None await session.commit() return { "document_id": document.id, "title": document.title, "blocknote_document": blocknote_json, - "last_edited_at": None, + "updated_at": document.updated_at.isoformat() if document.updated_at else None, } @@ -150,7 +149,7 @@ async def save_document( # Save BlockNote document document.blocknote_document = blocknote_document - document.last_edited_at = datetime.now(UTC) + document.updated_at = datetime.now(UTC) document.content_needs_reindexing = True await session.commit() @@ -162,5 +161,5 @@ async def save_document( "status": "saved", "document_id": document_id, "message": "Document saved and will be reindexed in the background", - "last_edited_at": document.last_edited_at.isoformat(), + "updated_at": document.updated_at.isoformat(), } diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py index cf6824db8..cea2a0529 100644 --- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py @@ -23,6 +23,7 @@ from .base import ( calculate_date_range, check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -327,6 +328,9 @@ async def index_airtable_records( ), } existing_document.chunks = chunks + existing_document.updated_at = ( + get_current_timestamp() + ) documents_indexed += 1 logger.info( @@ -382,6 +386,7 @@ async def index_airtable_records( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/base.py b/surfsense_backend/app/tasks/connector_indexers/base.py index 052ae3f4a..b9a99808e 100644 --- a/surfsense_backend/app/tasks/connector_indexers/base.py +++ b/surfsense_backend/app/tasks/connector_indexers/base.py @@ -3,7 +3,7 @@ Base functionality and shared imports for connector indexers. """ import logging -from datetime import datetime, timedelta +from datetime import UTC, datetime, timedelta from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select @@ -18,6 +18,16 @@ from app.db import ( logger = logging.getLogger(__name__) +def get_current_timestamp() -> datetime: + """ + Get the current timestamp with timezone for updated_at field. + + Returns: + Current datetime with UTC timezone + """ + return datetime.now(UTC) + + async def check_duplicate_document_by_hash( session: AsyncSession, content_hash: str ) -> Document | None: diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py index 6dc9de461..2793f78db 100644 --- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py @@ -23,6 +23,7 @@ from .base import ( calculate_date_range, check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -301,6 +302,7 @@ async def index_bookstack_pages( existing_document.embedding = summary_embedding existing_document.document_metadata = doc_metadata existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info(f"Successfully updated BookStack page {page_name}") @@ -356,6 +358,7 @@ async def index_bookstack_pages( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py index 97fdbb6be..b4a349163 100644 --- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py @@ -22,6 +22,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -288,6 +289,7 @@ async def index_clickup_tasks( ), } existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info( @@ -348,6 +350,7 @@ async def index_clickup_tasks( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index c148e0879..d5e68fb8f 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -23,6 +23,7 @@ from .base import ( calculate_date_range, check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -297,6 +298,7 @@ async def index_confluence_pages( "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info( @@ -362,6 +364,7 @@ async def index_confluence_pages( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py index 5aa56aa3f..9391be788 100644 --- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py @@ -23,6 +23,7 @@ from .base import ( build_document_metadata_string, check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -392,6 +393,7 @@ async def index_discord_messages( ), } existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info( @@ -454,6 +456,7 @@ async def index_discord_messages( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py index eb3dd3790..6a18af83b 100644 --- a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py @@ -19,7 +19,11 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -from .base import check_document_by_unique_identifier, check_duplicate_document_by_hash +from .base import ( + check_document_by_unique_identifier, + check_duplicate_document_by_hash, + get_current_timestamp, +) logger = logging.getLogger(__name__) @@ -249,6 +253,7 @@ async def index_elasticsearch_documents( existing_doc.unique_identifier_hash = unique_identifier_hash chunks = await create_document_chunks(content) existing_doc.chunks = chunks + existing_doc.updated_at = get_current_timestamp() await session.flush() documents_processed += 1 if documents_processed % 10 == 0: @@ -264,6 +269,7 @@ async def index_elasticsearch_documents( document_type=DocumentType.ELASTICSEARCH_CONNECTOR, document_metadata=metadata, search_space_id=search_space_id, + updated_at=get_current_timestamp(), ) # Create chunks and attach to document (persist via relationship) diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py index 95897c29b..e1844a503 100644 --- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py @@ -22,6 +22,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, ) @@ -288,6 +289,7 @@ async def index_github_repos( ), } existing_document.chunks = chunks_data + existing_document.updated_at = get_current_timestamp() logger.info( f"Successfully updated GitHub file {full_path_key}" @@ -377,6 +379,7 @@ async def index_github_repos( embedding=summary_embedding, search_space_id=search_space_id, chunks=chunks_data, # Associate chunks directly + updated_at=get_current_timestamp(), ) session.add(document) documents_processed += 1 diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index d1effd8fb..a5d2bc73a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -23,6 +23,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -335,6 +336,7 @@ async def index_google_calendar_events( "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info( @@ -401,6 +403,7 @@ async def index_google_calendar_events( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index e92967527..d350411e1 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -27,6 +27,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -261,6 +262,7 @@ async def index_google_gmail_messages( "connector_id": connector_id, } existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info(f"Successfully updated Gmail message {subject}") @@ -319,6 +321,7 @@ async def index_google_gmail_messages( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) documents_indexed += 1 diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py index 7347e61ca..8c56b10ab 100644 --- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py @@ -23,6 +23,7 @@ from .base import ( calculate_date_range, check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -279,6 +280,7 @@ async def index_jira_issues( "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info( @@ -344,6 +346,7 @@ async def index_jira_issues( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py index 6100c3cd8..afc9ffd3b 100644 --- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py @@ -23,6 +23,7 @@ from .base import ( calculate_date_range, check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -297,6 +298,7 @@ async def index_linear_issues( "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info( @@ -363,6 +365,7 @@ async def index_linear_issues( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index a05328bb9..4d5ddc47c 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -22,6 +22,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -352,6 +353,7 @@ async def index_luma_events( "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info(f"Successfully updated Luma event {event_name}") @@ -432,6 +434,7 @@ async def index_luma_events( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index 8bac0c3ce..332d3e39d 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -22,6 +22,7 @@ from .base import ( build_document_metadata_string, check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -353,6 +354,7 @@ async def index_notion_pages( "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info(f"Successfully updated Notion page: {page_title}") @@ -408,6 +410,7 @@ async def index_notion_pages( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py index 735125834..5119aba2e 100644 --- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py @@ -23,6 +23,7 @@ from .base import ( calculate_date_range, check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -319,6 +320,7 @@ async def index_slack_messages( # Delete old chunks and add new ones existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_indexed += 1 logger.info(f"Successfully updated Slack message {msg_ts}") @@ -349,6 +351,7 @@ async def index_slack_messages( chunks=chunks, content_hash=content_hash, unique_identifier_hash=unique_identifier_hash, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py index 6a6cb0ef8..fe18f2d19 100644 --- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py @@ -22,6 +22,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, get_connector_by_id, + get_current_timestamp, logger, update_connector_last_indexed, ) @@ -270,6 +271,7 @@ async def index_crawled_urls( ), } existing_document.chunks = chunks + existing_document.updated_at = get_current_timestamp() documents_updated += 1 logger.info(f"Successfully updated URL {url}") @@ -332,6 +334,7 @@ async def index_crawled_urls( unique_identifier_hash=unique_identifier_hash, embedding=summary_embedding, chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/document_processors/base.py b/surfsense_backend/app/tasks/document_processors/base.py index b3c08fec3..f29207448 100644 --- a/surfsense_backend/app/tasks/document_processors/base.py +++ b/surfsense_backend/app/tasks/document_processors/base.py @@ -2,6 +2,8 @@ Base functionality and shared imports for document processors. """ +from datetime import UTC, datetime + from langchain_community.document_transformers import MarkdownifyTransformer from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select @@ -12,6 +14,16 @@ from app.db import Document md = MarkdownifyTransformer() +def get_current_timestamp() -> datetime: + """ + Get the current timestamp with timezone for updated_at field. + + Returns: + Current datetime with UTC timezone + """ + return datetime.now(UTC) + + async def check_duplicate_document( session: AsyncSession, content_hash: str ) -> Document | None: diff --git a/surfsense_backend/app/tasks/document_processors/extension_processor.py b/surfsense_backend/app/tasks/document_processors/extension_processor.py index 48e3efe27..7d8462872 100644 --- a/surfsense_backend/app/tasks/document_processors/extension_processor.py +++ b/surfsense_backend/app/tasks/document_processors/extension_processor.py @@ -20,6 +20,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, + get_current_timestamp, ) @@ -165,6 +166,7 @@ async def add_extension_received_document( existing_document.document_metadata = content.metadata.model_dump() existing_document.chunks = chunks existing_document.blocknote_document = blocknote_json + existing_document.updated_at = get_current_timestamp() await session.commit() await session.refresh(existing_document) @@ -182,6 +184,7 @@ async def add_extension_received_document( content_hash=content_hash, unique_identifier_hash=unique_identifier_hash, blocknote_document=blocknote_json, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index 4ae04e050..a32e75a32 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -27,6 +27,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, + get_current_timestamp, ) from .markdown_processor import add_received_markdown_file_document @@ -123,7 +124,7 @@ async def add_received_file_document_using_unstructured( existing_document.chunks = chunks existing_document.blocknote_document = blocknote_json existing_document.content_needs_reindexing = False - existing_document.last_edited_at = None + existing_document.updated_at = get_current_timestamp() await session.commit() await session.refresh(existing_document) @@ -145,7 +146,7 @@ async def add_received_file_document_using_unstructured( unique_identifier_hash=unique_identifier_hash, blocknote_document=blocknote_json, content_needs_reindexing=False, - last_edited_at=None, + updated_at=get_current_timestamp(), ) session.add(document) @@ -252,7 +253,7 @@ async def add_received_file_document_using_llamacloud( existing_document.chunks = chunks existing_document.blocknote_document = blocknote_json existing_document.content_needs_reindexing = False - existing_document.last_edited_at = None + existing_document.updated_at = get_current_timestamp() await session.commit() await session.refresh(existing_document) @@ -274,7 +275,7 @@ async def add_received_file_document_using_llamacloud( unique_identifier_hash=unique_identifier_hash, blocknote_document=blocknote_json, content_needs_reindexing=False, - last_edited_at=None, + updated_at=get_current_timestamp(), ) session.add(document) @@ -406,7 +407,7 @@ async def add_received_file_document_using_docling( existing_document.chunks = chunks existing_document.blocknote_document = blocknote_json existing_document.content_needs_reindexing = False - existing_document.last_edited_at = None + existing_document.updated_at = get_current_timestamp() await session.commit() await session.refresh(existing_document) @@ -428,7 +429,7 @@ async def add_received_file_document_using_docling( unique_identifier_hash=unique_identifier_hash, blocknote_document=blocknote_json, content_needs_reindexing=False, - last_edited_at=None, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/document_processors/markdown_processor.py b/surfsense_backend/app/tasks/document_processors/markdown_processor.py index 3036071c9..e11a6efeb 100644 --- a/surfsense_backend/app/tasks/document_processors/markdown_processor.py +++ b/surfsense_backend/app/tasks/document_processors/markdown_processor.py @@ -19,6 +19,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, + get_current_timestamp, ) @@ -131,6 +132,7 @@ async def add_received_markdown_file_document( } existing_document.chunks = chunks existing_document.blocknote_document = blocknote_json + existing_document.updated_at = get_current_timestamp() await session.commit() await session.refresh(existing_document) @@ -150,6 +152,7 @@ async def add_received_markdown_file_document( content_hash=content_hash, unique_identifier_hash=unique_identifier_hash, blocknote_document=blocknote_json, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/app/tasks/document_processors/youtube_processor.py b/surfsense_backend/app/tasks/document_processors/youtube_processor.py index 332e775e1..da1a8f538 100644 --- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py +++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py @@ -22,6 +22,7 @@ from app.utils.document_converters import ( from .base import ( check_document_by_unique_identifier, + get_current_timestamp, ) @@ -325,6 +326,7 @@ async def add_youtube_video_document( } existing_document.chunks = chunks existing_document.blocknote_document = blocknote_json + existing_document.updated_at = get_current_timestamp() await session.commit() await session.refresh(existing_document) @@ -354,6 +356,7 @@ async def add_youtube_video_document( content_hash=content_hash, unique_identifier_hash=unique_identifier_hash, blocknote_document=blocknote_json, + updated_at=get_current_timestamp(), ) session.add(document) diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock index 7509cfadb..65f09c7eb 100644 --- a/surfsense_backend/uv.lock +++ b/surfsense_backend/uv.lock @@ -506,6 +506,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/f1/8cc8118946dbb9cbd74f406d30d31ee8d2f723f6fb4c8245e2bc67175fd4/blis-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:91de2baf03da3a173cf62771f1d6b9236a27a8cbd0e0033be198f06ef6224986", size = 6258624 }, ] +[[package]] +name = "boto3" +version = "1.42.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/34/64e34fb40903d358a4a3d697e2ee4784a7b52c11e7effbad01967b2d3fc3/boto3-1.42.8.tar.gz", hash = "sha256:e967706af5887339407481562c389c612d5eae641eb854ddd59026d049df740e", size = 112886 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/37/9702c0b8e63aaeb1ad430ece22567b03e58ea41e446d68b92e2cb00e7817/boto3-1.42.8-py3-none-any.whl", hash = "sha256:747acc83488fc80b0e7d1c4ff0c533039ff3ede21bdbd4e89544e25b010b070c", size = 140559 }, +] + +[[package]] +name = "botocore" +version = "1.42.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/ea/4be7a4a640d599b5691c7cf27e125155d7d3643ecbe37e32941f412e3de5/botocore-1.42.8.tar.gz", hash = "sha256:4921aa454f82fed0880214eab21126c98a35fe31ede952693356f9c85ce3574b", size = 14861038 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1c/24/a4301564a979368d6f3644f47acc921450b5524b8846e827237d98b04746/botocore-1.42.8-py3-none-any.whl", hash = "sha256:4cb89c74dd9083d16e45868749b999265a91309b2499907c84adeffa0a8df89b", size = 14534173 }, +] + [[package]] name = "build" version = "1.3.0" @@ -2292,6 +2320,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/4a/4175a563579e884192ba6e81725fc0448b042024419be8d83aa8a80a3f44/jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5", size = 354213 }, ] +[[package]] +name = "jmespath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256 }, +] + [[package]] name = "joblib" version = "1.5.1" @@ -5397,6 +5434,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/db/c376b0661c24cf770cb8815268190668ec1330eba8374a126ceef8c72d55/ruff-0.12.5-py3-none-win_arm64.whl", hash = "sha256:48cdbfc633de2c5c37d9f090ba3b352d1576b0015bfc3bc98eaf230275b7e805", size = 11951564 }, ] +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830 }, +] + [[package]] name = "safetensors" version = "0.5.3" @@ -5865,6 +5914,7 @@ source = { virtual = "." } dependencies = [ { name = "alembic" }, { name = "asyncpg" }, + { name = "boto3" }, { name = "celery", extra = ["redis"] }, { name = "chonkie", extra = ["all"] }, { name = "discord-py" }, @@ -5918,6 +5968,7 @@ dev = [ requires-dist = [ { name = "alembic", specifier = ">=1.13.0" }, { name = "asyncpg", specifier = ">=0.30.0" }, + { name = "boto3", specifier = ">=1.35.0" }, { name = "celery", extras = ["redis"], specifier = ">=5.5.3" }, { name = "chonkie", extras = ["all"], specifier = ">=1.4.0" }, { name = "discord-py", specifier = ">=2.5.2" }, diff --git a/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx index a7e0d6861..a6dbd4536 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx @@ -15,7 +15,7 @@ interface EditorContent { document_id: number; title: string; blocknote_document: any; - last_edited_at: string | null; + updated_at: string | null; } export default function EditorPage() { diff --git a/surfsense_web/hooks/use-search-source-connectors.ts b/surfsense_web/hooks/use-search-source-connectors.ts index 22c5b3553..2f77d7d82 100644 --- a/surfsense_web/hooks/use-search-source-connectors.ts +++ b/surfsense_web/hooks/use-search-source-connectors.ts @@ -104,7 +104,9 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: ); useEffect(() => { - if (!lazy) { + // Only auto-fetch if lazy is false AND searchSpaceId is provided + // This prevents 400 errors when the hook is used without a searchSpaceId + if (!lazy && searchSpaceId !== undefined) { fetchConnectors(searchSpaceId); } }, [lazy, fetchConnectors, searchSpaceId]);