mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-27 19:25:15 +02:00
feat: update document tracking to use 'updated_at' timestamp instead of 'last_edited_at'
This commit is contained in:
parent
a313387e0f
commit
8c9aa68faa
28 changed files with 253 additions and 18 deletions
|
|
@ -0,0 +1,42 @@
|
|||
"""45_add_updated_at_to_documents
|
||||
|
||||
Revision ID: 45
|
||||
Revises: 44
|
||||
Create Date: 2025-12-12
|
||||
|
||||
Adds updated_at field to documents table to track when documents
|
||||
are updated by indexers, processors, or editor. Includes an index
|
||||
for efficient time-based filtering.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "45"
|
||||
down_revision: str | None = "44"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema - Add updated_at field with index to documents."""
|
||||
op.add_column(
|
||||
"documents",
|
||||
sa.Column("updated_at", sa.TIMESTAMP(timezone=True), nullable=True),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_documents_updated_at",
|
||||
"documents",
|
||||
["updated_at"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema - Remove updated_at field and index."""
|
||||
# Use if_exists to handle cases where index wasn't created (migration modified after apply)
|
||||
op.drop_index("ix_documents_updated_at", table_name="documents", if_exists=True)
|
||||
op.drop_column("documents", "updated_at")
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
"""46_remove_last_edited_at_from_documents
|
||||
|
||||
Revision ID: 46
|
||||
Revises: 45
|
||||
Create Date: 2025-12-12
|
||||
|
||||
Safely migrates last_edited_at values to updated_at, then removes the
|
||||
last_edited_at field from documents table since we now use updated_at
|
||||
to track all document updates (indexers, processors, and editor).
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "46"
|
||||
down_revision: str | None = "45"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema - Migrate last_edited_at to updated_at, then remove last_edited_at."""
|
||||
# Step 1: Copy last_edited_at values to updated_at where updated_at is NULL
|
||||
# This preserves edit timestamps for documents that were edited via BlockNote
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE documents
|
||||
SET updated_at = last_edited_at
|
||||
WHERE last_edited_at IS NOT NULL
|
||||
AND updated_at IS NULL
|
||||
"""
|
||||
)
|
||||
|
||||
# Step 2: For documents where both exist, use the most recent timestamp
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE documents
|
||||
SET updated_at = GREATEST(updated_at, last_edited_at)
|
||||
WHERE last_edited_at IS NOT NULL
|
||||
AND updated_at IS NOT NULL
|
||||
"""
|
||||
)
|
||||
|
||||
# Step 3: Drop the last_edited_at column
|
||||
op.drop_column("documents", "last_edited_at")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema - Re-add last_edited_at field to documents."""
|
||||
op.add_column(
|
||||
"documents",
|
||||
sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
|
||||
)
|
||||
# Note: We cannot restore the original last_edited_at values after downgrade
|
||||
# as that data is merged into updated_at
|
||||
|
|
@ -353,8 +353,8 @@ class Document(BaseModel, TimestampMixin):
|
|||
Boolean, nullable=False, default=False, server_default=text("false")
|
||||
)
|
||||
|
||||
# Track when blocknote document was last edited
|
||||
last_edited_at = Column(TIMESTAMP(timezone=True), nullable=True)
|
||||
# Track when document was last updated by indexers, processors, or editor
|
||||
updated_at = Column(TIMESTAMP(timezone=True), nullable=True, index=True)
|
||||
|
||||
search_space_id = Column(
|
||||
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
||||
|
|
|
|||
|
|
@ -60,8 +60,8 @@ async def get_editor_content(
|
|||
"document_id": document.id,
|
||||
"title": document.title,
|
||||
"blocknote_document": document.blocknote_document,
|
||||
"last_edited_at": document.last_edited_at.isoformat()
|
||||
if document.last_edited_at
|
||||
"updated_at": document.updated_at.isoformat()
|
||||
if document.updated_at
|
||||
else None,
|
||||
}
|
||||
|
||||
|
|
@ -97,14 +97,13 @@ async def get_editor_content(
|
|||
# Save the generated blocknote_document (lazy migration)
|
||||
document.blocknote_document = blocknote_json
|
||||
document.content_needs_reindexing = False
|
||||
document.last_edited_at = None
|
||||
await session.commit()
|
||||
|
||||
return {
|
||||
"document_id": document.id,
|
||||
"title": document.title,
|
||||
"blocknote_document": blocknote_json,
|
||||
"last_edited_at": None,
|
||||
"updated_at": document.updated_at.isoformat() if document.updated_at else None,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -150,7 +149,7 @@ async def save_document(
|
|||
|
||||
# Save BlockNote document
|
||||
document.blocknote_document = blocknote_document
|
||||
document.last_edited_at = datetime.now(UTC)
|
||||
document.updated_at = datetime.now(UTC)
|
||||
document.content_needs_reindexing = True
|
||||
|
||||
await session.commit()
|
||||
|
|
@ -162,5 +161,5 @@ async def save_document(
|
|||
"status": "saved",
|
||||
"document_id": document_id,
|
||||
"message": "Document saved and will be reindexed in the background",
|
||||
"last_edited_at": document.last_edited_at.isoformat(),
|
||||
"updated_at": document.updated_at.isoformat(),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from .base import (
|
|||
calculate_date_range,
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -327,6 +328,9 @@ async def index_airtable_records(
|
|||
),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = (
|
||||
get_current_timestamp()
|
||||
)
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(
|
||||
|
|
@ -382,6 +386,7 @@ async def index_airtable_records(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ Base functionality and shared imports for connector indexers.
|
|||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
|
@ -18,6 +18,16 @@ from app.db import (
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_current_timestamp() -> datetime:
|
||||
"""
|
||||
Get the current timestamp with timezone for updated_at field.
|
||||
|
||||
Returns:
|
||||
Current datetime with UTC timezone
|
||||
"""
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
async def check_duplicate_document_by_hash(
|
||||
session: AsyncSession, content_hash: str
|
||||
) -> Document | None:
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from .base import (
|
|||
calculate_date_range,
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -301,6 +302,7 @@ async def index_bookstack_pages(
|
|||
existing_document.embedding = summary_embedding
|
||||
existing_document.document_metadata = doc_metadata
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(f"Successfully updated BookStack page {page_name}")
|
||||
|
|
@ -356,6 +358,7 @@ async def index_bookstack_pages(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from app.utils.document_converters import (
|
|||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -288,6 +289,7 @@ async def index_clickup_tasks(
|
|||
),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(
|
||||
|
|
@ -348,6 +350,7 @@ async def index_clickup_tasks(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from .base import (
|
|||
calculate_date_range,
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -297,6 +298,7 @@ async def index_confluence_pages(
|
|||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(
|
||||
|
|
@ -362,6 +364,7 @@ async def index_confluence_pages(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from .base import (
|
|||
build_document_metadata_string,
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -392,6 +393,7 @@ async def index_discord_messages(
|
|||
),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(
|
||||
|
|
@ -454,6 +456,7 @@ async def index_discord_messages(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -19,7 +19,11 @@ from app.utils.document_converters import (
|
|||
generate_unique_identifier_hash,
|
||||
)
|
||||
|
||||
from .base import check_document_by_unique_identifier, check_duplicate_document_by_hash
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
check_duplicate_document_by_hash,
|
||||
get_current_timestamp,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -249,6 +253,7 @@ async def index_elasticsearch_documents(
|
|||
existing_doc.unique_identifier_hash = unique_identifier_hash
|
||||
chunks = await create_document_chunks(content)
|
||||
existing_doc.chunks = chunks
|
||||
existing_doc.updated_at = get_current_timestamp()
|
||||
await session.flush()
|
||||
documents_processed += 1
|
||||
if documents_processed % 10 == 0:
|
||||
|
|
@ -264,6 +269,7 @@ async def index_elasticsearch_documents(
|
|||
document_type=DocumentType.ELASTICSEARCH_CONNECTOR,
|
||||
document_metadata=metadata,
|
||||
search_space_id=search_space_id,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
# Create chunks and attach to document (persist via relationship)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from app.utils.document_converters import (
|
|||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
)
|
||||
|
||||
|
|
@ -288,6 +289,7 @@ async def index_github_repos(
|
|||
),
|
||||
}
|
||||
existing_document.chunks = chunks_data
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
logger.info(
|
||||
f"Successfully updated GitHub file {full_path_key}"
|
||||
|
|
@ -377,6 +379,7 @@ async def index_github_repos(
|
|||
embedding=summary_embedding,
|
||||
search_space_id=search_space_id,
|
||||
chunks=chunks_data, # Associate chunks directly
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
session.add(document)
|
||||
documents_processed += 1
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from app.utils.document_converters import (
|
|||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -335,6 +336,7 @@ async def index_google_calendar_events(
|
|||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(
|
||||
|
|
@ -401,6 +403,7 @@ async def index_google_calendar_events(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ from app.utils.document_converters import (
|
|||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -261,6 +262,7 @@ async def index_google_gmail_messages(
|
|||
"connector_id": connector_id,
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(f"Successfully updated Gmail message {subject}")
|
||||
|
|
@ -319,6 +321,7 @@ async def index_google_gmail_messages(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
session.add(document)
|
||||
documents_indexed += 1
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from .base import (
|
|||
calculate_date_range,
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -279,6 +280,7 @@ async def index_jira_issues(
|
|||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(
|
||||
|
|
@ -344,6 +346,7 @@ async def index_jira_issues(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from .base import (
|
|||
calculate_date_range,
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -297,6 +298,7 @@ async def index_linear_issues(
|
|||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(
|
||||
|
|
@ -363,6 +365,7 @@ async def index_linear_issues(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from app.utils.document_converters import (
|
|||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -352,6 +353,7 @@ async def index_luma_events(
|
|||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(f"Successfully updated Luma event {event_name}")
|
||||
|
|
@ -432,6 +434,7 @@ async def index_luma_events(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from .base import (
|
|||
build_document_metadata_string,
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -353,6 +354,7 @@ async def index_notion_pages(
|
|||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(f"Successfully updated Notion page: {page_title}")
|
||||
|
|
@ -408,6 +410,7 @@ async def index_notion_pages(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from .base import (
|
|||
calculate_date_range,
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -319,6 +320,7 @@ async def index_slack_messages(
|
|||
|
||||
# Delete old chunks and add new ones
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(f"Successfully updated Slack message {msg_ts}")
|
||||
|
|
@ -349,6 +351,7 @@ async def index_slack_messages(
|
|||
chunks=chunks,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from app.utils.document_converters import (
|
|||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
|
@ -270,6 +271,7 @@ async def index_crawled_urls(
|
|||
),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
documents_updated += 1
|
||||
logger.info(f"Successfully updated URL {url}")
|
||||
|
|
@ -332,6 +334,7 @@ async def index_crawled_urls(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
Base functionality and shared imports for document processors.
|
||||
"""
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from langchain_community.document_transformers import MarkdownifyTransformer
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
|
@ -12,6 +14,16 @@ from app.db import Document
|
|||
md = MarkdownifyTransformer()
|
||||
|
||||
|
||||
def get_current_timestamp() -> datetime:
|
||||
"""
|
||||
Get the current timestamp with timezone for updated_at field.
|
||||
|
||||
Returns:
|
||||
Current datetime with UTC timezone
|
||||
"""
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
async def check_duplicate_document(
|
||||
session: AsyncSession, content_hash: str
|
||||
) -> Document | None:
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ from app.utils.document_converters import (
|
|||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_current_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -165,6 +166,7 @@ async def add_extension_received_document(
|
|||
existing_document.document_metadata = content.metadata.model_dump()
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -182,6 +184,7 @@ async def add_extension_received_document(
|
|||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ from app.utils.document_converters import (
|
|||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_current_timestamp,
|
||||
)
|
||||
from .markdown_processor import add_received_markdown_file_document
|
||||
|
||||
|
|
@ -123,7 +124,7 @@ async def add_received_file_document_using_unstructured(
|
|||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.content_needs_reindexing = False
|
||||
existing_document.last_edited_at = None
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -145,7 +146,7 @@ async def add_received_file_document_using_unstructured(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
last_edited_at=None,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -252,7 +253,7 @@ async def add_received_file_document_using_llamacloud(
|
|||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.content_needs_reindexing = False
|
||||
existing_document.last_edited_at = None
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -274,7 +275,7 @@ async def add_received_file_document_using_llamacloud(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
last_edited_at=None,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -406,7 +407,7 @@ async def add_received_file_document_using_docling(
|
|||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.content_needs_reindexing = False
|
||||
existing_document.last_edited_at = None
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -428,7 +429,7 @@ async def add_received_file_document_using_docling(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
last_edited_at=None,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from app.utils.document_converters import (
|
|||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_current_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -131,6 +132,7 @@ async def add_received_markdown_file_document(
|
|||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -150,6 +152,7 @@ async def add_received_markdown_file_document(
|
|||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from app.utils.document_converters import (
|
|||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_current_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -325,6 +326,7 @@ async def add_youtube_video_document(
|
|||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -354,6 +356,7 @@ async def add_youtube_video_document(
|
|||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
51
surfsense_backend/uv.lock
generated
51
surfsense_backend/uv.lock
generated
|
|
@ -506,6 +506,34 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/16/f1/8cc8118946dbb9cbd74f406d30d31ee8d2f723f6fb4c8245e2bc67175fd4/blis-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:91de2baf03da3a173cf62771f1d6b9236a27a8cbd0e0033be198f06ef6224986", size = 6258624 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "boto3"
|
||||
version = "1.42.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "botocore" },
|
||||
{ name = "jmespath" },
|
||||
{ name = "s3transfer" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/9d/34/64e34fb40903d358a4a3d697e2ee4784a7b52c11e7effbad01967b2d3fc3/boto3-1.42.8.tar.gz", hash = "sha256:e967706af5887339407481562c389c612d5eae641eb854ddd59026d049df740e", size = 112886 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/96/37/9702c0b8e63aaeb1ad430ece22567b03e58ea41e446d68b92e2cb00e7817/boto3-1.42.8-py3-none-any.whl", hash = "sha256:747acc83488fc80b0e7d1c4ff0c533039ff3ede21bdbd4e89544e25b010b070c", size = 140559 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "botocore"
|
||||
version = "1.42.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "jmespath" },
|
||||
{ name = "python-dateutil" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/3a/ea/4be7a4a640d599b5691c7cf27e125155d7d3643ecbe37e32941f412e3de5/botocore-1.42.8.tar.gz", hash = "sha256:4921aa454f82fed0880214eab21126c98a35fe31ede952693356f9c85ce3574b", size = 14861038 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/24/a4301564a979368d6f3644f47acc921450b5524b8846e827237d98b04746/botocore-1.42.8-py3-none-any.whl", hash = "sha256:4cb89c74dd9083d16e45868749b999265a91309b2499907c84adeffa0a8df89b", size = 14534173 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "build"
|
||||
version = "1.3.0"
|
||||
|
|
@ -2292,6 +2320,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/b3/4a/4175a563579e884192ba6e81725fc0448b042024419be8d83aa8a80a3f44/jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5", size = 354213 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jmespath"
|
||||
version = "1.0.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "joblib"
|
||||
version = "1.5.1"
|
||||
|
|
@ -5397,6 +5434,18 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/00/db/c376b0661c24cf770cb8815268190668ec1330eba8374a126ceef8c72d55/ruff-0.12.5-py3-none-win_arm64.whl", hash = "sha256:48cdbfc633de2c5c37d9f090ba3b352d1576b0015bfc3bc98eaf230275b7e805", size = 11951564 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "s3transfer"
|
||||
version = "0.16.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "botocore" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "safetensors"
|
||||
version = "0.5.3"
|
||||
|
|
@ -5865,6 +5914,7 @@ source = { virtual = "." }
|
|||
dependencies = [
|
||||
{ name = "alembic" },
|
||||
{ name = "asyncpg" },
|
||||
{ name = "boto3" },
|
||||
{ name = "celery", extra = ["redis"] },
|
||||
{ name = "chonkie", extra = ["all"] },
|
||||
{ name = "discord-py" },
|
||||
|
|
@ -5918,6 +5968,7 @@ dev = [
|
|||
requires-dist = [
|
||||
{ name = "alembic", specifier = ">=1.13.0" },
|
||||
{ name = "asyncpg", specifier = ">=0.30.0" },
|
||||
{ name = "boto3", specifier = ">=1.35.0" },
|
||||
{ name = "celery", extras = ["redis"], specifier = ">=5.5.3" },
|
||||
{ name = "chonkie", extras = ["all"], specifier = ">=1.4.0" },
|
||||
{ name = "discord-py", specifier = ">=2.5.2" },
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ interface EditorContent {
|
|||
document_id: number;
|
||||
title: string;
|
||||
blocknote_document: any;
|
||||
last_edited_at: string | null;
|
||||
updated_at: string | null;
|
||||
}
|
||||
|
||||
export default function EditorPage() {
|
||||
|
|
|
|||
|
|
@ -104,7 +104,9 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?:
|
|||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (!lazy) {
|
||||
// Only auto-fetch if lazy is false AND searchSpaceId is provided
|
||||
// This prevents 400 errors when the hook is used without a searchSpaceId
|
||||
if (!lazy && searchSpaceId !== undefined) {
|
||||
fetchConnectors(searchSpaceId);
|
||||
}
|
||||
}, [lazy, fetchConnectors, searchSpaceId]);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue