diff --git a/surfsense_backend/alembic/versions/86_add_document_created_by.py b/surfsense_backend/alembic/versions/86_add_document_created_by.py new file mode 100644 index 000000000..e4ce2a40f --- /dev/null +++ b/surfsense_backend/alembic/versions/86_add_document_created_by.py @@ -0,0 +1,126 @@ +"""Add created_by_id column to documents table for document ownership tracking + +Revision ID: 86 +Revises: 85 +Create Date: 2026-02-02 + +Changes: +1. Add created_by_id column (UUID, nullable, foreign key to user.id) +2. Create index on created_by_id for performance +3. Backfill existing documents with search space owner's user_id +""" + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "86" +down_revision: str | None = "85" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add created_by_id column to documents and backfill with search space owner.""" + + # 1. Add created_by_id column (nullable for backward compatibility) + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'documents' AND column_name = 'created_by_id' + ) THEN + ALTER TABLE documents + ADD COLUMN created_by_id UUID; + END IF; + END$$; + """ + ) + + # 2. Create index on created_by_id for efficient queries + op.execute( + """ + CREATE INDEX IF NOT EXISTS ix_documents_created_by_id + ON documents (created_by_id); + """ + ) + + # 3. Add foreign key constraint with ON DELETE SET NULL + # First check if constraint already exists + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.table_constraints + WHERE constraint_name = 'fk_documents_created_by_id' + AND table_name = 'documents' + ) THEN + ALTER TABLE documents + ADD CONSTRAINT fk_documents_created_by_id + FOREIGN KEY (created_by_id) REFERENCES "user"(id) + ON DELETE SET NULL; + END IF; + END$$; + """ + ) + + # 4. Backfill existing documents with search space owner's user_id + # This ensures all existing documents are associated with the search space owner + op.execute( + """ + UPDATE documents + SET created_by_id = searchspaces.user_id + FROM searchspaces + WHERE documents.search_space_id = searchspaces.id + AND documents.created_by_id IS NULL; + """ + ) + + +def downgrade() -> None: + """Remove created_by_id column from documents.""" + + # Drop foreign key constraint + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.table_constraints + WHERE constraint_name = 'fk_documents_created_by_id' + AND table_name = 'documents' + ) THEN + ALTER TABLE documents + DROP CONSTRAINT fk_documents_created_by_id; + END IF; + END$$; + """ + ) + + # Drop index + op.execute( + """ + DROP INDEX IF EXISTS ix_documents_created_by_id; + """ + ) + + # Drop column + op.execute( + """ + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'documents' AND column_name = 'created_by_id' + ) THEN + ALTER TABLE documents + DROP COLUMN created_by_id; + END IF; + END$$; + """ + ) + diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py index d3a0d344b..2b470ae38 100644 --- a/surfsense_backend/app/connectors/composio_gmail_connector.py +++ b/surfsense_backend/app/connectors/composio_gmail_connector.py @@ -394,6 +394,7 @@ async def _process_gmail_message_batch( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) documents_indexed += 1 diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py index 4302e479b..960757901 100644 --- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py +++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py @@ -442,6 +442,7 @@ async def index_composio_google_calendar( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) documents_indexed += 1 diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py index 364712215..b4b3e7ee6 100644 --- a/surfsense_backend/app/connectors/composio_google_drive_connector.py +++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py @@ -1258,6 +1258,7 @@ async def _process_single_drive_file( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 97d15d90f..b3a6266a0 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -749,7 +749,18 @@ class Document(BaseModel, TimestampMixin): search_space_id = Column( Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False ) + + # Track who created/uploaded this document + created_by_id = Column( + UUID(as_uuid=True), + ForeignKey("user.id", ondelete="SET NULL"), + nullable=True, # Nullable for backward compatibility with existing records + index=True, + ) + + # Relationships search_space = relationship("SearchSpace", back_populates="documents") + created_by = relationship("User", back_populates="documents") chunks = relationship( "Chunk", back_populates="document", cascade="all, delete-orphan" ) @@ -1284,6 +1295,13 @@ if config.AUTH_TYPE == "GOOGLE": passive_deletes=True, ) + # Documents created/uploaded by this user + documents = relationship( + "Document", + back_populates="created_by", + passive_deletes=True, + ) + # User memories for personalized AI responses memories = relationship( "UserMemory", @@ -1342,6 +1360,13 @@ else: passive_deletes=True, ) + # Documents created/uploaded by this user + documents = relationship( + "Document", + back_populates="created_by", + passive_deletes=True, + ) + # User memories for personalized AI responses memories = relationship( "UserMemory", diff --git a/surfsense_backend/app/routes/notes_routes.py b/surfsense_backend/app/routes/notes_routes.py index 5bb0a88a9..928cd462a 100644 --- a/surfsense_backend/app/routes/notes_routes.py +++ b/surfsense_backend/app/routes/notes_routes.py @@ -76,6 +76,7 @@ async def create_note( document_metadata={"NOTE": True}, embedding=None, # Will be generated on first reindex updated_at=datetime.now(UTC), + created_by_id=user.id, # Track who created this note ) session.add(document) @@ -93,6 +94,7 @@ async def create_note( search_space_id=document.search_space_id, created_at=document.created_at, updated_at=document.updated_at, + created_by_id=document.created_by_id, ) diff --git a/surfsense_backend/app/schemas/documents.py b/surfsense_backend/app/schemas/documents.py index 2b4bda0ca..1f82ae9ce 100644 --- a/surfsense_backend/app/schemas/documents.py +++ b/surfsense_backend/app/schemas/documents.py @@ -1,5 +1,6 @@ from datetime import datetime from typing import TypeVar +from uuid import UUID from pydantic import BaseModel, ConfigDict @@ -51,6 +52,7 @@ class DocumentRead(BaseModel): created_at: datetime updated_at: datetime | None search_space_id: int + created_by_id: UUID | None = None # User who created/uploaded this document model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py index 3bcf95d6a..7d0837ac1 100644 --- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py @@ -417,6 +417,7 @@ async def index_airtable_records( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py index d726e5d95..fd89792e9 100644 --- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py @@ -396,6 +396,7 @@ async def index_bookstack_pages( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py index e7e8b23e5..bcdb9c72a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py @@ -395,6 +395,7 @@ async def index_clickup_tasks( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index 2f20472d2..3f8f43669 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -402,6 +402,7 @@ async def index_confluence_pages( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py index a70bc42d4..3d226ed06 100644 --- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py @@ -527,6 +527,7 @@ async def index_discord_messages( content_hash=content_hash, unique_identifier_hash=unique_identifier_hash, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py index 8fbba6463..6f2dd797f 100644 --- a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py @@ -292,6 +292,7 @@ async def index_elasticsearch_documents( document_metadata=metadata, search_space_id=search_space_id, updated_at=get_current_timestamp(), + created_by_id=user_id, ) # Create chunks and attach to document (persist via relationship) diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py index b01d235cf..947035048 100644 --- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py @@ -426,6 +426,7 @@ async def _process_repository_digest( search_space_id=search_space_id, chunks=chunks_data, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index f64a7a5c3..28037ba7e 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -499,6 +499,7 @@ async def index_google_calendar_events( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index 45ce91c6f..7c6b9ffec 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -421,6 +421,7 @@ async def index_google_gmail_messages( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) documents_indexed += 1 diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py index acee74192..6262e8535 100644 --- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py @@ -380,6 +380,7 @@ async def index_jira_issues( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py index fc4ae5f18..dd0483eda 100644 --- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py @@ -413,6 +413,7 @@ async def index_linear_issues( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index a18abf8ae..74e809384 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -476,6 +476,7 @@ async def index_luma_events( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index 52622471a..169dbd775 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -470,6 +470,7 @@ async def index_notion_pages( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py index a8cd78cc9..a2ccd64d9 100644 --- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py @@ -500,6 +500,7 @@ async def index_obsidian_vault( embedding=embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(new_document) diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py index 5923c8089..d922178ce 100644 --- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py @@ -389,6 +389,7 @@ async def index_slack_messages( content_hash=content_hash, unique_identifier_hash=unique_identifier_hash, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py index 162509a1e..7b401f6cf 100644 --- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py @@ -430,6 +430,7 @@ async def index_teams_messages( content_hash=content_hash, unique_identifier_hash=unique_identifier_hash, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py index ac16ecde6..63105d7a5 100644 --- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py @@ -371,6 +371,7 @@ async def index_crawled_urls( embedding=summary_embedding, chunks=chunks, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/document_processors/circleback_processor.py b/surfsense_backend/app/tasks/document_processors/circleback_processor.py index 0a1d91784..ce596d579 100644 --- a/surfsense_backend/app/tasks/document_processors/circleback_processor.py +++ b/surfsense_backend/app/tasks/document_processors/circleback_processor.py @@ -8,10 +8,17 @@ and stores it as searchable documents in the database. import logging from typing import Any +from sqlalchemy import select from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession -from app.db import Document, DocumentType +from app.db import ( + Document, + DocumentType, + SearchSourceConnector, + SearchSourceConnectorType, + SearchSpace, +) from app.services.llm_service import get_document_summary_llm from app.utils.document_converters import ( create_document_chunks, @@ -125,6 +132,30 @@ async def add_circleback_meeting_document( **metadata, } + # Fetch the user who set up the Circleback connector (preferred) + # or fall back to search space owner if no connector found + created_by_user_id = None + + # Try to find the Circleback connector for this search space + connector_result = await session.execute( + select(SearchSourceConnector.user_id).where( + SearchSourceConnector.search_space_id == search_space_id, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.CIRCLEBACK_CONNECTOR, + ) + ) + connector_user = connector_result.scalar_one_or_none() + + if connector_user: + # Use the user who set up the Circleback connector + created_by_user_id = connector_user + else: + # Fallback: use search space owner if no connector found + search_space_result = await session.execute( + select(SearchSpace.user_id).where(SearchSpace.id == search_space_id) + ) + created_by_user_id = search_space_result.scalar_one_or_none() + # Update or create document if existing_document: # Update existing document @@ -160,6 +191,7 @@ async def add_circleback_meeting_document( blocknote_document=blocknote_json, content_needs_reindexing=False, updated_at=get_current_timestamp(), + created_by_id=created_by_user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/document_processors/extension_processor.py b/surfsense_backend/app/tasks/document_processors/extension_processor.py index 7d8462872..9ddab4ec6 100644 --- a/surfsense_backend/app/tasks/document_processors/extension_processor.py +++ b/surfsense_backend/app/tasks/document_processors/extension_processor.py @@ -185,6 +185,7 @@ async def add_extension_received_document( unique_identifier_hash=unique_identifier_hash, blocknote_document=blocknote_json, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index 6c4be0cb8..2f2e5a2e8 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -526,6 +526,7 @@ async def add_received_file_document_using_unstructured( blocknote_document=blocknote_json, content_needs_reindexing=False, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) @@ -665,6 +666,7 @@ async def add_received_file_document_using_llamacloud( blocknote_document=blocknote_json, content_needs_reindexing=False, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) @@ -829,6 +831,7 @@ async def add_received_file_document_using_docling( blocknote_document=blocknote_json, content_needs_reindexing=False, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/document_processors/markdown_processor.py b/surfsense_backend/app/tasks/document_processors/markdown_processor.py index 3a9867fd6..a2399206a 100644 --- a/surfsense_backend/app/tasks/document_processors/markdown_processor.py +++ b/surfsense_backend/app/tasks/document_processors/markdown_processor.py @@ -295,6 +295,7 @@ async def add_received_markdown_file_document( unique_identifier_hash=primary_hash, blocknote_document=blocknote_json, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document) diff --git a/surfsense_backend/app/tasks/document_processors/youtube_processor.py b/surfsense_backend/app/tasks/document_processors/youtube_processor.py index da1a8f538..7251fb22f 100644 --- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py +++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py @@ -357,6 +357,7 @@ async def add_youtube_video_document( unique_identifier_hash=unique_identifier_hash, blocknote_document=blocknote_json, updated_at=get_current_timestamp(), + created_by_id=user_id, ) session.add(document)