diff --git a/docs/chinese-llm-setup.md b/docs/chinese-llm-setup.md index 1fb0ce2a1..6638dbba1 100644 --- a/docs/chinese-llm-setup.md +++ b/docs/chinese-llm-setup.md @@ -24,7 +24,7 @@ SurfSense 现已支持以下国产 LLM: 1. 登录 SurfSense Dashboard 2. 进入 **Settings** → **API Keys** (或 **LLM Configurations**) -3. 点击 **Add LLM Model** +3. 点击 **Add Model** 4. 从 **Provider** 下拉菜单中选择你的国产 LLM 提供商 5. 填写必填字段(见下方各提供商详细配置) 6. 点击 **Save** diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 000000000..9703ac09f --- /dev/null +++ b/package-lock.json @@ -0,0 +1,6 @@ +{ + "name": "SurfSense", + "lockfileVersion": 3, + "requires": true, + "packages": {} +} diff --git a/surfsense_backend/alembic/versions/118_add_local_folder_sync_and_versioning.py b/surfsense_backend/alembic/versions/118_add_local_folder_sync_and_versioning.py new file mode 100644 index 000000000..1fef9fbcb --- /dev/null +++ b/surfsense_backend/alembic/versions/118_add_local_folder_sync_and_versioning.py @@ -0,0 +1,149 @@ +"""Add LOCAL_FOLDER_FILE document type, folder metadata, and document_versions table + +Revision ID: 118 +Revises: 117 +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +revision: str = "118" +down_revision: str | None = "117" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + +PUBLICATION_NAME = "zero_publication" + + +def upgrade() -> None: + conn = op.get_bind() + + # Add LOCAL_FOLDER_FILE to documenttype enum + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_type t + JOIN pg_enum e ON t.oid = e.enumtypid + WHERE t.typname = 'documenttype' AND e.enumlabel = 'LOCAL_FOLDER_FILE' + ) THEN + ALTER TYPE documenttype ADD VALUE 'LOCAL_FOLDER_FILE'; + END IF; + END + $$; + """ + ) + + # Add JSONB metadata column to folders table + col_exists = conn.execute( + sa.text( + "SELECT 1 FROM information_schema.columns " + "WHERE table_name = 'folders' AND column_name = 'metadata'" + ) + ).fetchone() + if not col_exists: + op.add_column( + "folders", + sa.Column("metadata", sa.dialects.postgresql.JSONB, nullable=True), + ) + + # Create document_versions table + table_exists = conn.execute( + sa.text( + "SELECT 1 FROM information_schema.tables WHERE table_name = 'document_versions'" + ) + ).fetchone() + if not table_exists: + op.create_table( + "document_versions", + sa.Column("id", sa.Integer(), nullable=False, autoincrement=True), + sa.Column("document_id", sa.Integer(), nullable=False), + sa.Column("version_number", sa.Integer(), nullable=False), + sa.Column("source_markdown", sa.Text(), nullable=True), + sa.Column("content_hash", sa.String(), nullable=False), + sa.Column("title", sa.String(), nullable=True), + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["document_id"], + ["documents.id"], + ondelete="CASCADE", + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint( + "document_id", + "version_number", + name="uq_document_version", + ), + ) + + op.execute( + "CREATE INDEX IF NOT EXISTS ix_document_versions_document_id " + "ON document_versions (document_id)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS ix_document_versions_created_at " + "ON document_versions (created_at)" + ) + + # Add document_versions to Zero publication + pub_exists = conn.execute( + sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"), + {"name": PUBLICATION_NAME}, + ).fetchone() + if pub_exists: + already_in_pub = conn.execute( + sa.text( + "SELECT 1 FROM pg_publication_tables " + "WHERE pubname = :name AND tablename = 'document_versions'" + ), + {"name": PUBLICATION_NAME}, + ).fetchone() + if not already_in_pub: + op.execute( + f"ALTER PUBLICATION {PUBLICATION_NAME} ADD TABLE document_versions" + ) + + +def downgrade() -> None: + conn = op.get_bind() + + # Remove from publication + pub_exists = conn.execute( + sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"), + {"name": PUBLICATION_NAME}, + ).fetchone() + if pub_exists: + already_in_pub = conn.execute( + sa.text( + "SELECT 1 FROM pg_publication_tables " + "WHERE pubname = :name AND tablename = 'document_versions'" + ), + {"name": PUBLICATION_NAME}, + ).fetchone() + if already_in_pub: + op.execute( + f"ALTER PUBLICATION {PUBLICATION_NAME} DROP TABLE document_versions" + ) + + op.execute("DROP INDEX IF EXISTS ix_document_versions_created_at") + op.execute("DROP INDEX IF EXISTS ix_document_versions_document_id") + op.execute("DROP TABLE IF EXISTS document_versions") + + # Drop metadata column from folders + col_exists = conn.execute( + sa.text( + "SELECT 1 FROM information_schema.columns " + "WHERE table_name = 'folders' AND column_name = 'metadata'" + ) + ).fetchone() + if col_exists: + op.drop_column("folders", "metadata") diff --git a/surfsense_backend/alembic/versions/51_add_new_llm_config_table.py b/surfsense_backend/alembic/versions/51_add_new_llm_config_table.py index 89a5c1246..7d90f4b13 100644 --- a/surfsense_backend/alembic/versions/51_add_new_llm_config_table.py +++ b/surfsense_backend/alembic/versions/51_add_new_llm_config_table.py @@ -17,10 +17,10 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: """ - Add the new_llm_configs table that combines LLM model settings with prompt configuration. + Add the new_llm_configs table that combines model settings with prompt configuration. This table includes: - - LLM model configuration (provider, model_name, api_key, etc.) + - Model configuration (provider, model_name, api_key, etc.) - Configurable system instructions - Citation toggle """ @@ -41,7 +41,7 @@ def upgrade() -> None: name VARCHAR(100) NOT NULL, description VARCHAR(500), - -- LLM Model Configuration (same as llm_configs, excluding language) + -- Model Configuration (same as llm_configs, excluding language) provider litellmprovider NOT NULL, custom_provider VARCHAR(100), model_name VARCHAR(100) NOT NULL, diff --git a/surfsense_backend/app/config/global_llm_config.example.yaml b/surfsense_backend/app/config/global_llm_config.example.yaml index 6ca3e95e3..49a8d0295 100644 --- a/surfsense_backend/app/config/global_llm_config.example.yaml +++ b/surfsense_backend/app/config/global_llm_config.example.yaml @@ -17,7 +17,7 @@ # - Configure router_settings below to customize the load balancing behavior # # Structure matches NewLLMConfig: -# - LLM model configuration (provider, model_name, api_key, etc.) +# - Model configuration (provider, model_name, api_key, etc.) # - Prompt configuration (system_instructions, citations_enabled) # Router Settings for Auto Mode diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 90630cc83..077b7daa6 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -64,6 +64,7 @@ class DocumentType(StrEnum): COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR" COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" + LOCAL_FOLDER_FILE = "LOCAL_FOLDER_FILE" # Native Google document types → their legacy Composio equivalents. @@ -955,6 +956,7 @@ class Folder(BaseModel, TimestampMixin): onupdate=lambda: datetime.now(UTC), index=True, ) + folder_metadata = Column("metadata", JSONB, nullable=True) parent = relationship("Folder", remote_side="Folder.id", backref="children") search_space = relationship("SearchSpace", back_populates="folders") @@ -1039,6 +1041,26 @@ class Document(BaseModel, TimestampMixin): ) +class DocumentVersion(BaseModel, TimestampMixin): + __tablename__ = "document_versions" + __table_args__ = ( + UniqueConstraint("document_id", "version_number", name="uq_document_version"), + ) + + document_id = Column( + Integer, + ForeignKey("documents.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + version_number = Column(Integer, nullable=False) + source_markdown = Column(Text, nullable=True) + content_hash = Column(String, nullable=False) + title = Column(String, nullable=True) + + document = relationship("Document", backref="versions") + + class Chunk(BaseModel, TimestampMixin): __tablename__ = "chunks" diff --git a/surfsense_backend/app/indexing_pipeline/exceptions.py b/surfsense_backend/app/indexing_pipeline/exceptions.py index 9155e9baa..666fa4b9f 100644 --- a/surfsense_backend/app/indexing_pipeline/exceptions.py +++ b/surfsense_backend/app/indexing_pipeline/exceptions.py @@ -59,7 +59,7 @@ class PipelineMessages: LLM_AUTH = "LLM authentication failed. Check your API key." LLM_PERMISSION = "LLM request denied. Check your account permissions." - LLM_NOT_FOUND = "LLM model not found. Check your model configuration." + LLM_NOT_FOUND = "Model not found. Check your model configuration." LLM_BAD_REQUEST = "LLM rejected the request. Document content may be invalid." LLM_UNPROCESSABLE = ( "Document exceeds the LLM context window even after optimization." @@ -67,7 +67,7 @@ class PipelineMessages: LLM_RESPONSE = "LLM returned an invalid response." LLM_AUTH = "LLM authentication failed. Check your API key." LLM_PERMISSION = "LLM request denied. Check your account permissions." - LLM_NOT_FOUND = "LLM model not found. Check your model configuration." + LLM_NOT_FOUND = "Model not found. Check your model configuration." LLM_BAD_REQUEST = "LLM rejected the request. Document content may be invalid." LLM_UNPROCESSABLE = ( "Document exceeds the LLM context window even after optimization." diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index 1937f11cb..efa0ff2f6 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -84,7 +84,7 @@ router.include_router(confluence_add_connector_router) router.include_router(clickup_add_connector_router) router.include_router(dropbox_add_connector_router) router.include_router(new_llm_config_router) # LLM configs with prompt configuration -router.include_router(model_list_router) # Dynamic LLM model catalogue from OpenRouter +router.include_router(model_list_router) # Dynamic model catalogue from OpenRouter router.include_router(logs_router) router.include_router(circleback_webhook_router) # Circleback meeting webhooks router.include_router(surfsense_docs_router) # Surfsense documentation for citations diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index f53c81bb6..5008b1a10 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -2,6 +2,7 @@ import asyncio from fastapi import APIRouter, Depends, Form, HTTPException, Query, UploadFile +from pydantic import BaseModel as PydanticBaseModel from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.orm import selectinload @@ -10,6 +11,8 @@ from app.db import ( Chunk, Document, DocumentType, + DocumentVersion, + Folder, Permission, SearchSpace, SearchSpaceMembership, @@ -27,6 +30,7 @@ from app.schemas import ( DocumentTitleSearchResponse, DocumentUpdate, DocumentWithChunksRead, + FolderRead, PaginatedResponse, ) from app.services.task_dispatcher import TaskDispatcher, get_task_dispatcher @@ -957,6 +961,39 @@ async def get_document_by_chunk_id( ) from e +@router.get("/documents/watched-folders", response_model=list[FolderRead]) +async def get_watched_folders( + search_space_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Return root folders that are marked as watched (metadata->>'watched' = 'true').""" + await check_permission( + session, + user, + search_space_id, + Permission.DOCUMENTS_READ.value, + "You don't have permission to read documents in this search space", + ) + + folders = ( + ( + await session.execute( + select(Folder).where( + Folder.search_space_id == search_space_id, + Folder.parent_id.is_(None), + Folder.folder_metadata.isnot(None), + Folder.folder_metadata["watched"].astext == "true", + ) + ) + ) + .scalars() + .all() + ) + + return folders + + @router.get( "/documents/{document_id}/chunks", response_model=PaginatedResponse[ChunkRead], @@ -1212,3 +1249,297 @@ async def delete_document( raise HTTPException( status_code=500, detail=f"Failed to delete document: {e!s}" ) from e + + +# ==================================================================== +# Version History Endpoints +# ==================================================================== + + +@router.get("/documents/{document_id}/versions") +async def list_document_versions( + document_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """List all versions for a document, ordered by version_number descending.""" + document = ( + await session.execute(select(Document).where(Document.id == document_id)) + ).scalar_one_or_none() + if not document: + raise HTTPException(status_code=404, detail="Document not found") + + await check_permission( + session, user, document.search_space_id, Permission.DOCUMENTS_READ.value + ) + + versions = ( + ( + await session.execute( + select(DocumentVersion) + .where(DocumentVersion.document_id == document_id) + .order_by(DocumentVersion.version_number.desc()) + ) + ) + .scalars() + .all() + ) + + return [ + { + "version_number": v.version_number, + "title": v.title, + "content_hash": v.content_hash, + "created_at": v.created_at.isoformat() if v.created_at else None, + } + for v in versions + ] + + +@router.get("/documents/{document_id}/versions/{version_number}") +async def get_document_version( + document_id: int, + version_number: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Get full version content including source_markdown.""" + document = ( + await session.execute(select(Document).where(Document.id == document_id)) + ).scalar_one_or_none() + if not document: + raise HTTPException(status_code=404, detail="Document not found") + + await check_permission( + session, user, document.search_space_id, Permission.DOCUMENTS_READ.value + ) + + version = ( + await session.execute( + select(DocumentVersion).where( + DocumentVersion.document_id == document_id, + DocumentVersion.version_number == version_number, + ) + ) + ).scalar_one_or_none() + if not version: + raise HTTPException(status_code=404, detail="Version not found") + + return { + "version_number": version.version_number, + "title": version.title, + "content_hash": version.content_hash, + "source_markdown": version.source_markdown, + "created_at": version.created_at.isoformat() if version.created_at else None, + } + + +@router.post("/documents/{document_id}/versions/{version_number}/restore") +async def restore_document_version( + document_id: int, + version_number: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Restore a previous version: snapshot current state, then overwrite document content.""" + document = ( + await session.execute(select(Document).where(Document.id == document_id)) + ).scalar_one_or_none() + if not document: + raise HTTPException(status_code=404, detail="Document not found") + + await check_permission( + session, user, document.search_space_id, Permission.DOCUMENTS_UPDATE.value + ) + + version = ( + await session.execute( + select(DocumentVersion).where( + DocumentVersion.document_id == document_id, + DocumentVersion.version_number == version_number, + ) + ) + ).scalar_one_or_none() + if not version: + raise HTTPException(status_code=404, detail="Version not found") + + # Snapshot current state before restoring + from app.utils.document_versioning import create_version_snapshot + + await create_version_snapshot(session, document) + + # Restore the version's content onto the document + document.source_markdown = version.source_markdown + document.title = version.title or document.title + document.content_needs_reindexing = True + await session.commit() + + from app.tasks.celery_tasks.document_reindex_tasks import reindex_document_task + + reindex_document_task.delay(document_id, str(user.id)) + + return { + "message": f"Restored version {version_number}", + "document_id": document_id, + "restored_version": version_number, + } + + +# ===== Local folder indexing endpoints ===== + + +class FolderIndexRequest(PydanticBaseModel): + folder_path: str + folder_name: str + search_space_id: int + exclude_patterns: list[str] | None = None + file_extensions: list[str] | None = None + root_folder_id: int | None = None + enable_summary: bool = False + + +class FolderIndexFilesRequest(PydanticBaseModel): + folder_path: str + folder_name: str + search_space_id: int + target_file_paths: list[str] + root_folder_id: int | None = None + enable_summary: bool = False + + +@router.post("/documents/folder-index") +async def folder_index( + request: FolderIndexRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Full-scan index of a local folder. Creates the root Folder row synchronously + and dispatches the heavy indexing work to a Celery task. + Returns the root_folder_id so the desktop can persist it. + """ + from app.config import config as app_config + + if not app_config.is_self_hosted(): + raise HTTPException( + status_code=400, + detail="Local folder indexing is only available in self-hosted mode", + ) + + await check_permission( + session, + user, + request.search_space_id, + Permission.DOCUMENTS_CREATE.value, + "You don't have permission to create documents in this search space", + ) + + watched_metadata = { + "watched": True, + "folder_path": request.folder_path, + "exclude_patterns": request.exclude_patterns, + "file_extensions": request.file_extensions, + } + + root_folder_id = request.root_folder_id + if root_folder_id: + existing = ( + await session.execute(select(Folder).where(Folder.id == root_folder_id)) + ).scalar_one_or_none() + if not existing: + root_folder_id = None + else: + existing.folder_metadata = watched_metadata + await session.commit() + + if not root_folder_id: + root_folder = Folder( + name=request.folder_name, + search_space_id=request.search_space_id, + created_by_id=str(user.id), + position="a0", + folder_metadata=watched_metadata, + ) + session.add(root_folder) + await session.flush() + root_folder_id = root_folder.id + await session.commit() + + from app.tasks.celery_tasks.document_tasks import index_local_folder_task + + index_local_folder_task.delay( + search_space_id=request.search_space_id, + user_id=str(user.id), + folder_path=request.folder_path, + folder_name=request.folder_name, + exclude_patterns=request.exclude_patterns, + file_extensions=request.file_extensions, + root_folder_id=root_folder_id, + enable_summary=request.enable_summary, + ) + + return { + "message": "Folder indexing started", + "status": "processing", + "root_folder_id": root_folder_id, + } + + +@router.post("/documents/folder-index-files") +async def folder_index_files( + request: FolderIndexFilesRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Index multiple files within a watched folder (batched chokidar trigger). + Validates that all target_file_paths are under folder_path. + Dispatches a single Celery task that processes them in parallel. + """ + from app.config import config as app_config + + if not app_config.is_self_hosted(): + raise HTTPException( + status_code=400, + detail="Local folder indexing is only available in self-hosted mode", + ) + + if not request.target_file_paths: + raise HTTPException( + status_code=400, detail="target_file_paths must not be empty" + ) + + await check_permission( + session, + user, + request.search_space_id, + Permission.DOCUMENTS_CREATE.value, + "You don't have permission to create documents in this search space", + ) + + from pathlib import Path + + for fp in request.target_file_paths: + try: + Path(fp).relative_to(request.folder_path) + except ValueError as err: + raise HTTPException( + status_code=400, + detail=f"target_file_path {fp} must be inside folder_path", + ) from err + + from app.tasks.celery_tasks.document_tasks import index_local_folder_task + + index_local_folder_task.delay( + search_space_id=request.search_space_id, + user_id=str(user.id), + folder_path=request.folder_path, + folder_name=request.folder_name, + target_file_paths=request.target_file_paths, + root_folder_id=request.root_folder_id, + enable_summary=request.enable_summary, + ) + + return { + "message": f"Batch indexing started for {len(request.target_file_paths)} file(s)", + "status": "processing", + "file_count": len(request.target_file_paths), + } diff --git a/surfsense_backend/app/routes/editor_routes.py b/surfsense_backend/app/routes/editor_routes.py index 09a35c619..829b2cf69 100644 --- a/surfsense_backend/app/routes/editor_routes.py +++ b/surfsense_backend/app/routes/editor_routes.py @@ -128,9 +128,20 @@ async def get_editor_content( chunk_contents = chunk_contents_result.scalars().all() if not chunk_contents: + doc_status = document.status or {} + state = ( + doc_status.get("state", "ready") + if isinstance(doc_status, dict) + else "ready" + ) + if state in ("pending", "processing"): + raise HTTPException( + status_code=409, + detail="This document is still being processed. Please wait a moment and try again.", + ) raise HTTPException( status_code=400, - detail="This document has no content and cannot be edited. Please re-upload to enable editing.", + detail="This document has no viewable content yet. It may still be syncing. Try again in a few seconds, or re-upload if the issue persists.", ) markdown_content = "\n\n".join(chunk_contents) @@ -138,7 +149,7 @@ async def get_editor_content( if not markdown_content.strip(): raise HTTPException( status_code=400, - detail="This document has empty content and cannot be edited.", + detail="This document appears to be empty. Try re-uploading or editing it to add content.", ) document.source_markdown = markdown_content diff --git a/surfsense_backend/app/routes/folders_routes.py b/surfsense_backend/app/routes/folders_routes.py index d688e692a..2dc9bceac 100644 --- a/surfsense_backend/app/routes/folders_routes.py +++ b/surfsense_backend/app/routes/folders_routes.py @@ -192,6 +192,33 @@ async def get_folder_breadcrumb( ) from e +@router.patch("/folders/{folder_id}/watched") +async def stop_watching_folder( + folder_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Clear the watched flag from a folder's metadata.""" + folder = await session.get(Folder, folder_id) + if not folder: + raise HTTPException(status_code=404, detail="Folder not found") + + await check_permission( + session, + user, + folder.search_space_id, + Permission.DOCUMENTS_UPDATE.value, + "You don't have permission to update folders in this search space", + ) + + if folder.folder_metadata and isinstance(folder.folder_metadata, dict): + updated = {**folder.folder_metadata, "watched": False} + folder.folder_metadata = updated + await session.commit() + + return {"message": "Folder watch status updated"} + + @router.put("/folders/{folder_id}", response_model=FolderRead) async def update_folder( folder_id: int, @@ -340,7 +367,7 @@ async def delete_folder( session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), ): - """Delete a folder and cascade-delete subfolders. Documents are async-deleted via Celery.""" + """Mark documents for deletion and dispatch Celery to delete docs first, then folders.""" try: folder = await session.get(Folder, folder_id) if not folder: @@ -372,30 +399,29 @@ async def delete_folder( ) await session.commit() - await session.execute(Folder.__table__.delete().where(Folder.id == folder_id)) - await session.commit() + try: + from app.tasks.celery_tasks.document_tasks import ( + delete_folder_documents_task, + ) - if document_ids: - try: - from app.tasks.celery_tasks.document_tasks import ( - delete_folder_documents_task, - ) - - delete_folder_documents_task.delay(document_ids) - except Exception as err: + delete_folder_documents_task.delay( + document_ids, folder_subtree_ids=list(subtree_ids) + ) + except Exception as err: + if document_ids: await session.execute( Document.__table__.update() .where(Document.id.in_(document_ids)) .values(status={"state": "ready"}) ) await session.commit() - raise HTTPException( - status_code=503, - detail="Folder deleted but document cleanup could not be queued. Documents have been restored.", - ) from err + raise HTTPException( + status_code=503, + detail="Could not queue folder deletion. Documents have been restored.", + ) from err return { - "message": "Folder deleted successfully", + "message": "Folder deletion started", "documents_queued_for_deletion": len(document_ids), } diff --git a/surfsense_backend/app/routes/model_list_routes.py b/surfsense_backend/app/routes/model_list_routes.py index ef6e30514..79ae7221f 100644 --- a/surfsense_backend/app/routes/model_list_routes.py +++ b/surfsense_backend/app/routes/model_list_routes.py @@ -1,5 +1,5 @@ """ -API route for fetching the available LLM models catalogue. +API route for fetching the available models catalogue. Serves a dynamically-updated list sourced from the OpenRouter public API, with a local JSON fallback when the API is unreachable. @@ -30,7 +30,7 @@ async def list_available_models( user: User = Depends(current_active_user), ): """ - Return all available LLM models grouped by provider. + Return all available models grouped by provider. The list is sourced from the OpenRouter public API and cached for 1 hour. If the API is unreachable, a local fallback file is used instead. diff --git a/surfsense_backend/app/routes/new_llm_config_routes.py b/surfsense_backend/app/routes/new_llm_config_routes.py index f784bd273..78907c719 100644 --- a/surfsense_backend/app/routes/new_llm_config_routes.py +++ b/surfsense_backend/app/routes/new_llm_config_routes.py @@ -1,7 +1,7 @@ """ API routes for NewLLMConfig CRUD operations. -NewLLMConfig combines LLM model settings with prompt configuration: +NewLLMConfig combines model settings with prompt configuration: - LLM provider, model, API key, etc. - Configurable system instructions - Citation toggle diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index b73b8c789..d208ff910 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -55,23 +55,12 @@ from app.schemas import ( ) from app.services.composio_service import ComposioService, get_composio_service from app.services.notification_service import NotificationService -from app.tasks.connector_indexers import ( - index_airtable_records, - index_clickup_tasks, - index_confluence_pages, - index_crawled_urls, - index_discord_messages, - index_elasticsearch_documents, - index_github_repos, - index_google_calendar_events, - index_google_gmail_messages, - index_jira_issues, - index_linear_issues, - index_luma_events, - index_notion_pages, - index_slack_messages, -) from app.users import current_active_user + +# NOTE: connector indexer functions are imported lazily inside each +# ``run_*_indexing`` helper to break a circular import cycle: +# connector_indexers.__init__ → airtable_indexer → airtable_history +# → app.routes.__init__ → this file → connector_indexers (not ready yet) from app.utils.connector_naming import ensure_unique_connector_name from app.utils.indexing_locks import ( acquire_connector_indexing_lock, @@ -1378,6 +1367,8 @@ async def run_slack_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_slack_messages + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -1824,6 +1815,8 @@ async def run_notion_indexing_with_new_session( Create a new session and run the Notion indexing task. This prevents session leaks by creating a dedicated session for the background task. """ + from app.tasks.connector_indexers import index_notion_pages + async with async_session_maker() as session: await _run_indexing_with_notifications( session=session, @@ -1858,6 +1851,8 @@ async def run_notion_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_notion_pages + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -1910,6 +1905,8 @@ async def run_github_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_github_repos + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -1961,6 +1958,8 @@ async def run_linear_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_linear_issues + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -2011,6 +2010,8 @@ async def run_discord_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_discord_messages + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -2113,6 +2114,8 @@ async def run_jira_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_jira_issues + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -2166,6 +2169,8 @@ async def run_confluence_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_confluence_pages + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -2217,6 +2222,8 @@ async def run_clickup_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_clickup_tasks + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -2268,6 +2275,8 @@ async def run_airtable_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_airtable_records + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -2321,6 +2330,8 @@ async def run_google_calendar_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_google_calendar_events + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -2370,6 +2381,7 @@ async def run_google_gmail_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_google_gmail_messages # Create a wrapper function that calls index_google_gmail_messages with max_messages async def gmail_indexing_wrapper( @@ -2836,6 +2848,8 @@ async def run_luma_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_luma_events + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -2888,6 +2902,8 @@ async def run_elasticsearch_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_elasticsearch_documents + await _run_indexing_with_notifications( session=session, connector_id=connector_id, @@ -2938,6 +2954,8 @@ async def run_web_page_indexing( start_date: Start date for indexing end_date: End date for indexing """ + from app.tasks.connector_indexers import index_crawled_urls + await _run_indexing_with_notifications( session=session, connector_id=connector_id, diff --git a/surfsense_backend/app/schemas/folders.py b/surfsense_backend/app/schemas/folders.py index 263817182..a7e065144 100644 --- a/surfsense_backend/app/schemas/folders.py +++ b/surfsense_backend/app/schemas/folders.py @@ -1,6 +1,7 @@ """Pydantic schemas for folder CRUD, move, and reorder operations.""" from datetime import datetime +from typing import Any from uuid import UUID from pydantic import BaseModel, ConfigDict, Field @@ -34,6 +35,9 @@ class FolderRead(BaseModel): created_by_id: UUID | None created_at: datetime updated_at: datetime + metadata: dict[str, Any] | None = Field( + default=None, validation_alias="folder_metadata" + ) model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/schemas/new_llm_config.py b/surfsense_backend/app/schemas/new_llm_config.py index 9863665b6..15ed4ce67 100644 --- a/surfsense_backend/app/schemas/new_llm_config.py +++ b/surfsense_backend/app/schemas/new_llm_config.py @@ -1,7 +1,7 @@ """ Pydantic schemas for the NewLLMConfig API. -NewLLMConfig combines LLM model settings with prompt configuration: +NewLLMConfig combines model settings with prompt configuration: - LLM provider, model, API key, etc. - Configurable system instructions - Citation toggle @@ -26,7 +26,7 @@ class NewLLMConfigBase(BaseModel): None, max_length=500, description="Optional description" ) - # LLM Model Configuration + # Model Configuration provider: LiteLLMProvider = Field(..., description="LiteLLM provider type") custom_provider: str | None = Field( None, max_length=100, description="Custom provider name when provider is CUSTOM" @@ -71,7 +71,7 @@ class NewLLMConfigUpdate(BaseModel): name: str | None = Field(None, max_length=100) description: str | None = Field(None, max_length=500) - # LLM Model Configuration + # Model Configuration provider: LiteLLMProvider | None = None custom_provider: str | None = Field(None, max_length=100) model_name: str | None = Field(None, max_length=100) @@ -106,7 +106,7 @@ class NewLLMConfigPublic(BaseModel): name: str description: str | None = None - # LLM Model Configuration (no api_key) + # Model Configuration (no api_key) provider: LiteLLMProvider custom_provider: str | None = None model_name: str @@ -149,7 +149,7 @@ class GlobalNewLLMConfigRead(BaseModel): name: str description: str | None = None - # LLM Model Configuration (no api_key) + # Model Configuration (no api_key) provider: str # String because YAML doesn't enforce enum, "AUTO" for Auto mode custom_provider: str | None = None model_name: str diff --git a/surfsense_backend/app/services/model_list_service.py b/surfsense_backend/app/services/model_list_service.py index ebc0e0d7c..2a81c2d52 100644 --- a/surfsense_backend/app/services/model_list_service.py +++ b/surfsense_backend/app/services/model_list_service.py @@ -1,5 +1,5 @@ """ -Service for fetching and caching the available LLM model list. +Service for fetching and caching the available model list. Uses the OpenRouter public API as the primary source, with a local fallback JSON file when the API is unreachable. diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py index 662b41f2a..4e9249d34 100644 --- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py @@ -1,6 +1,7 @@ """Celery tasks for document processing.""" import asyncio +import contextlib import logging import os from uuid import UUID @@ -10,6 +11,7 @@ from app.config import config from app.services.notification_service import NotificationService from app.services.task_logging_service import TaskLoggingService from app.tasks.celery_tasks import get_celery_session_maker +from app.tasks.connector_indexers.local_folder_indexer import index_local_folder from app.tasks.document_processors import ( add_extension_received_document, add_youtube_video_document, @@ -141,21 +143,30 @@ async def _delete_document_background(document_id: int) -> None: retry_backoff_max=300, max_retries=5, ) -def delete_folder_documents_task(self, document_ids: list[int]): - """Celery task to batch-delete documents orphaned by folder deletion.""" +def delete_folder_documents_task( + self, + document_ids: list[int], + folder_subtree_ids: list[int] | None = None, +): + """Celery task to delete documents first, then the folder rows.""" loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: - loop.run_until_complete(_delete_folder_documents(document_ids)) + loop.run_until_complete( + _delete_folder_documents(document_ids, folder_subtree_ids) + ) finally: loop.close() -async def _delete_folder_documents(document_ids: list[int]) -> None: - """Delete chunks in batches, then document rows for each orphaned document.""" +async def _delete_folder_documents( + document_ids: list[int], + folder_subtree_ids: list[int] | None = None, +) -> None: + """Delete chunks in batches, then document rows, then folder rows.""" from sqlalchemy import delete as sa_delete, select - from app.db import Chunk, Document + from app.db import Chunk, Document, Folder async with get_celery_session_maker()() as session: batch_size = 500 @@ -177,6 +188,12 @@ async def _delete_folder_documents(document_ids: list[int]) -> None: await session.delete(doc) await session.commit() + if folder_subtree_ids: + await session.execute( + sa_delete(Folder).where(Folder.id.in_(folder_subtree_ids)) + ) + await session.commit() + @celery_app.task( name="delete_search_space_background", @@ -1243,3 +1260,154 @@ async def _process_circleback_meeting( heartbeat_task.cancel() if notification: _stop_heartbeat(notification.id) + + +# ===== Local folder indexing task ===== + + +@celery_app.task(name="index_local_folder", bind=True) +def index_local_folder_task( + self, + search_space_id: int, + user_id: str, + folder_path: str, + folder_name: str, + exclude_patterns: list[str] | None = None, + file_extensions: list[str] | None = None, + root_folder_id: int | None = None, + enable_summary: bool = False, + target_file_paths: list[str] | None = None, +): + """Celery task to index a local folder. Config is passed directly — no connector row.""" + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + try: + loop.run_until_complete( + _index_local_folder_async( + search_space_id=search_space_id, + user_id=user_id, + folder_path=folder_path, + folder_name=folder_name, + exclude_patterns=exclude_patterns, + file_extensions=file_extensions, + root_folder_id=root_folder_id, + enable_summary=enable_summary, + target_file_paths=target_file_paths, + ) + ) + finally: + loop.close() + + +async def _index_local_folder_async( + search_space_id: int, + user_id: str, + folder_path: str, + folder_name: str, + exclude_patterns: list[str] | None = None, + file_extensions: list[str] | None = None, + root_folder_id: int | None = None, + enable_summary: bool = False, + target_file_paths: list[str] | None = None, +): + """Run local folder indexing with notification + heartbeat.""" + is_batch = bool(target_file_paths) + is_full_scan = not target_file_paths + file_count = len(target_file_paths) if target_file_paths else None + + if is_batch: + doc_name = f"{folder_name} ({file_count} file{'s' if file_count != 1 else ''})" + else: + doc_name = folder_name + + notification = None + notification_id: int | None = None + heartbeat_task = None + + async with get_celery_session_maker()() as session: + try: + notification = ( + await NotificationService.document_processing.notify_processing_started( + session=session, + user_id=UUID(user_id), + document_type="LOCAL_FOLDER_FILE", + document_name=doc_name, + search_space_id=search_space_id, + ) + ) + notification_id = notification.id + _start_heartbeat(notification_id) + heartbeat_task = asyncio.create_task(_run_heartbeat_loop(notification_id)) + except Exception: + logger.warning( + "Failed to create notification for local folder indexing", + exc_info=True, + ) + + async def _heartbeat_progress(completed_count: int) -> None: + """Refresh heartbeat and optionally update notification progress.""" + if notification: + with contextlib.suppress(Exception): + await NotificationService.document_processing.notify_processing_progress( + session=session, + notification=notification, + stage="indexing", + stage_message=f"Syncing files ({completed_count}/{file_count or '?'})", + ) + + try: + _indexed, _skipped_or_failed, _rfid, err = await index_local_folder( + session=session, + search_space_id=search_space_id, + user_id=user_id, + folder_path=folder_path, + folder_name=folder_name, + exclude_patterns=exclude_patterns, + file_extensions=file_extensions, + root_folder_id=root_folder_id, + enable_summary=enable_summary, + target_file_paths=target_file_paths, + on_heartbeat_callback=_heartbeat_progress + if (is_batch or is_full_scan) + else None, + ) + + if notification: + try: + await session.refresh(notification) + if err: + await NotificationService.document_processing.notify_processing_completed( + session=session, + notification=notification, + error_message=err, + ) + else: + await NotificationService.document_processing.notify_processing_completed( + session=session, + notification=notification, + ) + except Exception: + logger.warning( + "Failed to update notification after local folder indexing", + exc_info=True, + ) + + except Exception as e: + logger.exception(f"Local folder indexing failed: {e}") + if notification: + try: + await session.refresh(notification) + await NotificationService.document_processing.notify_processing_completed( + session=session, + notification=notification, + error_message=str(e)[:200], + ) + except Exception: + pass + raise + finally: + if heartbeat_task: + heartbeat_task.cancel() + if notification_id is not None: + _stop_heartbeat(notification_id) diff --git a/surfsense_backend/app/tasks/connector_indexers/__init__.py b/surfsense_backend/app/tasks/connector_indexers/__init__.py index 9a1d17fd5..1b032d54a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/__init__.py +++ b/surfsense_backend/app/tasks/connector_indexers/__init__.py @@ -42,9 +42,9 @@ from .jira_indexer import index_jira_issues # Issue tracking and project management from .linear_indexer import index_linear_issues -from .luma_indexer import index_luma_events # Documentation and knowledge management +from .luma_indexer import index_luma_events from .notion_indexer import index_notion_pages from .obsidian_indexer import index_obsidian_vault from .slack_indexer import index_slack_messages diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py new file mode 100644 index 000000000..acfbce0bf --- /dev/null +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -0,0 +1,1247 @@ +""" +Local folder indexer. + +Indexes files from a local folder on disk. Supports: +- Full-scan mode (startup reconciliation / manual trigger) +- Batch mode (chokidar real-time trigger, 1..N files) +- Filesystem folder structure mirroring into DB Folder rows +- Document versioning via create_version_snapshot +- ETL-based file parsing for binary formats (PDF, DOCX, images, audio, etc.) + +Desktop-only: all change detection is driven by chokidar in the desktop app. +Config (folder_path, exclude_patterns, etc.) is passed in from the caller — +no connector row is read. +""" + +import asyncio +import os +from collections.abc import Awaitable, Callable +from datetime import UTC, datetime +from pathlib import Path + +from sqlalchemy import select +from sqlalchemy.exc import IntegrityError, SQLAlchemyError +from sqlalchemy.ext.asyncio import AsyncSession + +from app.config import config +from app.db import ( + Document, + DocumentStatus, + DocumentType, + Folder, +) +from app.indexing_pipeline.connector_document import ConnectorDocument +from app.indexing_pipeline.document_hashing import compute_identifier_hash +from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService +from app.services.llm_service import get_user_long_context_llm +from app.services.page_limit_service import PageLimitExceededError, PageLimitService +from app.services.task_logging_service import TaskLoggingService +from app.tasks.celery_tasks import get_celery_session_maker +from app.utils.document_versioning import create_version_snapshot + +from .base import ( + check_document_by_unique_identifier, + logger, +) + +PLAINTEXT_EXTENSIONS = frozenset( + { + ".md", + ".markdown", + ".txt", + ".text", + ".json", + ".jsonl", + ".yaml", + ".yml", + ".toml", + ".ini", + ".cfg", + ".conf", + ".xml", + ".css", + ".scss", + ".less", + ".sass", + ".py", + ".pyw", + ".pyi", + ".pyx", + ".js", + ".jsx", + ".ts", + ".tsx", + ".mjs", + ".cjs", + ".java", + ".kt", + ".kts", + ".scala", + ".groovy", + ".c", + ".h", + ".cpp", + ".cxx", + ".cc", + ".hpp", + ".hxx", + ".cs", + ".fs", + ".fsx", + ".go", + ".rs", + ".rb", + ".php", + ".pl", + ".pm", + ".lua", + ".swift", + ".m", + ".mm", + ".r", + ".R", + ".jl", + ".sh", + ".bash", + ".zsh", + ".fish", + ".bat", + ".cmd", + ".ps1", + ".sql", + ".graphql", + ".gql", + ".env", + ".gitignore", + ".dockerignore", + ".editorconfig", + ".makefile", + ".cmake", + ".log", + ".rst", + ".tex", + ".bib", + ".org", + ".adoc", + ".asciidoc", + ".vue", + ".svelte", + ".astro", + ".tf", + ".hcl", + ".proto", + } +) + +AUDIO_EXTENSIONS = frozenset( + { + ".mp3", + ".mp4", + ".mpeg", + ".mpga", + ".m4a", + ".wav", + ".webm", + } +) + + +DIRECT_CONVERT_EXTENSIONS = frozenset({".csv", ".tsv", ".html", ".htm"}) + + +def _is_plaintext_file(filename: str) -> bool: + return Path(filename).suffix.lower() in PLAINTEXT_EXTENSIONS + + +def _is_audio_file(filename: str) -> bool: + return Path(filename).suffix.lower() in AUDIO_EXTENSIONS + + +def _is_direct_convert_file(filename: str) -> bool: + return Path(filename).suffix.lower() in DIRECT_CONVERT_EXTENSIONS + + +def _needs_etl(filename: str) -> bool: + """File is not plaintext, not audio, and not direct-convert — requires ETL.""" + return ( + not _is_plaintext_file(filename) + and not _is_audio_file(filename) + and not _is_direct_convert_file(filename) + ) + + +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + + +def _estimate_pages_safe(page_limit_service: PageLimitService, file_path: str) -> int: + """Estimate page count with a file-size fallback.""" + try: + return page_limit_service.estimate_pages_before_processing(file_path) + except Exception: + file_size = os.path.getsize(file_path) + return max(1, file_size // (80 * 1024)) + + +async def _check_page_limit_or_skip( + page_limit_service: PageLimitService, + user_id: str, + file_path: str, +) -> int: + """Estimate pages and check the limit; raises PageLimitExceededError if over quota. + + Returns the estimated page count on success. + """ + estimated = _estimate_pages_safe(page_limit_service, file_path) + await page_limit_service.check_page_limit(user_id, estimated) + return estimated + + +def _compute_final_pages( + page_limit_service: PageLimitService, + estimated_pages: int, + content_length: int, +) -> int: + """Return the final page count as max(estimated, actual).""" + actual = page_limit_service.estimate_pages_from_content_length(content_length) + return max(estimated_pages, actual) + +DEFAULT_EXCLUDE_PATTERNS = [ + ".git", + "node_modules", + "__pycache__", + ".DS_Store", + ".obsidian", + ".trash", +] + + +def scan_folder( + folder_path: str, + file_extensions: list[str] | None = None, + exclude_patterns: list[str] | None = None, +) -> list[dict]: + """Walk a directory and return a list of file entries. + + Args: + folder_path: Absolute path to the folder to scan. + file_extensions: If provided, only include files with these extensions + (e.g. [".md", ".txt"]). ``None`` means include all files. + exclude_patterns: Directory/file names to exclude. Any path component + matching one of these strings is skipped. + + Returns: + List of dicts with keys: path, relative_path, name, modified_at, size. + """ + root = Path(folder_path) + if not root.exists(): + raise ValueError(f"Folder path does not exist: {folder_path}") + + if exclude_patterns is None: + exclude_patterns = [] + + files: list[dict] = [] + for dirpath, dirnames, filenames in os.walk(root): + rel_dir = Path(dirpath).relative_to(root) + + dirnames[:] = [d for d in dirnames if d not in exclude_patterns] + + if any(part in exclude_patterns for part in rel_dir.parts): + continue + + for fname in filenames: + if fname in exclude_patterns: + continue + + full = Path(dirpath) / fname + + if ( + file_extensions is not None + and full.suffix.lower() not in file_extensions + ): + continue + + try: + stat = full.stat() + rel_path = full.relative_to(root) + files.append( + { + "path": str(full), + "relative_path": str(rel_path), + "name": full.name, + "modified_at": datetime.fromtimestamp(stat.st_mtime, tz=UTC), + "size": stat.st_size, + } + ) + except OSError as e: + logger.warning(f"Could not stat file {full}: {e}") + + return files + + +def _read_plaintext_file(file_path: str) -> str: + """Read a plaintext/text-based file as UTF-8.""" + with open(file_path, encoding="utf-8", errors="replace") as f: + content = f.read() + if "\x00" in content: + raise ValueError( + f"File contains null bytes — likely a binary file opened as text: {file_path}" + ) + return content + + +async def _read_file_content(file_path: str, filename: str) -> str: + """Read file content, using ETL for binary formats. + + Plaintext files are read directly. Audio and document files (PDF, DOCX, etc.) + are routed through the configured ETL service (same as Google Drive / OneDrive). + + Raises ValueError if the file cannot be parsed (e.g. no ETL service configured + for a binary file). + """ + if _is_plaintext_file(filename): + return _read_plaintext_file(file_path) + + if _is_direct_convert_file(filename): + from app.tasks.document_processors._direct_converters import ( + convert_file_directly, + ) + + return convert_file_directly(file_path, filename) + + if _is_audio_file(filename): + etl_service = config.ETL_SERVICE if hasattr(config, "ETL_SERVICE") else None + stt_service_val = config.STT_SERVICE if hasattr(config, "STT_SERVICE") else None + if not stt_service_val and not etl_service: + raise ValueError( + f"No STT_SERVICE configured — cannot transcribe audio file: {filename}" + ) + + if _needs_etl(filename): + etl_service = getattr(config, "ETL_SERVICE", None) + if not etl_service: + raise ValueError( + f"No ETL_SERVICE configured — cannot parse binary file: {filename}. " + f"Set ETL_SERVICE to UNSTRUCTURED, LLAMACLOUD, or DOCLING in your .env" + ) + + from app.connectors.onedrive.content_extractor import ( + _parse_file_to_markdown, + ) + + return await _parse_file_to_markdown(file_path, filename) + + +def _content_hash(content: str, search_space_id: int) -> str: + """SHA-256 hash of content scoped to a search space. + + Matches the format used by ``compute_content_hash`` in the unified + pipeline so that dedup checks are consistent. + """ + import hashlib + + return hashlib.sha256(f"{search_space_id}:{content}".encode()).hexdigest() + + +async def _compute_file_content_hash( + file_path: str, + filename: str, + search_space_id: int, +) -> tuple[str, str]: + """Read a file (via ETL if needed) and compute its content hash. + + Returns (content_text, content_hash). + """ + content = await _read_file_content(file_path, filename) + return content, _content_hash(content, search_space_id) + + +async def _mirror_folder_structure( + session: AsyncSession, + folder_path: str, + folder_name: str, + search_space_id: int, + user_id: str, + root_folder_id: int | None = None, + exclude_patterns: list[str] | None = None, +) -> tuple[dict[str, int], int]: + """Mirror the local filesystem directory structure into DB Folder rows. + + Returns (mapping, root_folder_id) where mapping is + relative_dir_path -> folder_id. The empty string key maps to the root folder. + """ + root = Path(folder_path) + if exclude_patterns is None: + exclude_patterns = [] + + subdirs: list[str] = [] + for dirpath, dirnames, _ in os.walk(root): + dirnames[:] = [d for d in dirnames if d not in exclude_patterns] + rel = Path(dirpath).relative_to(root) + if any(part in exclude_patterns for part in rel.parts): + continue + rel_str = str(rel) if str(rel) != "." else "" + if rel_str: + subdirs.append(rel_str) + + subdirs.sort(key=lambda p: p.count(os.sep)) + + mapping: dict[str, int] = {} + + if root_folder_id: + existing = ( + await session.execute(select(Folder).where(Folder.id == root_folder_id)) + ).scalar_one_or_none() + if existing: + mapping[""] = existing.id + else: + root_folder_id = None + + if not root_folder_id: + root_folder = Folder( + name=folder_name, + search_space_id=search_space_id, + created_by_id=user_id, + position="a0", + ) + session.add(root_folder) + await session.flush() + mapping[""] = root_folder.id + root_folder_id = root_folder.id + + for rel_dir in subdirs: + dir_parts = Path(rel_dir).parts + dir_name = dir_parts[-1] + parent_rel = str(Path(*dir_parts[:-1])) if len(dir_parts) > 1 else "" + + parent_id = mapping.get(parent_rel, mapping[""]) + + existing_folder = ( + await session.execute( + select(Folder).where( + Folder.name == dir_name, + Folder.parent_id == parent_id, + Folder.search_space_id == search_space_id, + ) + ) + ).scalar_one_or_none() + + if existing_folder: + mapping[rel_dir] = existing_folder.id + else: + new_folder = Folder( + name=dir_name, + parent_id=parent_id, + search_space_id=search_space_id, + created_by_id=user_id, + position="a0", + ) + session.add(new_folder) + await session.flush() + mapping[rel_dir] = new_folder.id + + await session.flush() + return mapping, root_folder_id + + +async def _resolve_folder_for_file( + session: AsyncSession, + rel_path: str, + root_folder_id: int, + search_space_id: int, + user_id: str, +) -> int: + """Given a file's relative path, ensure all parent Folder rows exist and + return the folder_id for the file's immediate parent directory. + + For a file at "notes/daily/today.md", this ensures Folder rows exist for + "notes" and "notes/daily", and returns the id of "notes/daily". + For a file at "readme.md" (root level), returns root_folder_id. + """ + parent_dir = str(Path(rel_path).parent) + if parent_dir == ".": + return root_folder_id + + parts = Path(parent_dir).parts + current_parent_id = root_folder_id + + for part in parts: + existing = ( + await session.execute( + select(Folder).where( + Folder.name == part, + Folder.parent_id == current_parent_id, + Folder.search_space_id == search_space_id, + ) + ) + ).scalar_one_or_none() + + if existing: + current_parent_id = existing.id + else: + new_folder = Folder( + name=part, + parent_id=current_parent_id, + search_space_id=search_space_id, + created_by_id=user_id, + position="a0", + ) + session.add(new_folder) + await session.flush() + current_parent_id = new_folder.id + + return current_parent_id + + +async def _cleanup_empty_folder_chain( + session: AsyncSession, + folder_id: int, + root_folder_id: int, +) -> None: + """Walk up from folder_id toward root, deleting empty folders (no docs, no + children). Stops at root_folder_id which is never deleted.""" + current_id = folder_id + while current_id and current_id != root_folder_id: + has_doc = ( + await session.execute( + select(Document.id).where(Document.folder_id == current_id).limit(1) + ) + ).scalar_one_or_none() + if has_doc is not None: + break + + has_child = ( + await session.execute( + select(Folder.id).where(Folder.parent_id == current_id).limit(1) + ) + ).scalar_one_or_none() + if has_child is not None: + break + + folder = ( + await session.execute(select(Folder).where(Folder.id == current_id)) + ).scalar_one_or_none() + if not folder: + break + + parent_id = folder.parent_id + await session.delete(folder) + await session.flush() + current_id = parent_id + + +async def _cleanup_empty_folders( + session: AsyncSession, + root_folder_id: int, + search_space_id: int, + existing_dirs_on_disk: set[str], + folder_mapping: dict[str, int], +) -> None: + """Delete Folder rows that are empty (no docs, no children) and no longer on disk.""" + from sqlalchemy import delete as sa_delete + + id_to_rel: dict[int, str] = {fid: rel for rel, fid in folder_mapping.items() if rel} + + all_folders = ( + ( + await session.execute( + select(Folder).where( + Folder.search_space_id == search_space_id, + Folder.id != root_folder_id, + ) + ) + ) + .scalars() + .all() + ) + + candidates: list[Folder] = [] + for folder in all_folders: + rel = id_to_rel.get(folder.id) + if rel and rel in existing_dirs_on_disk: + continue + candidates.append(folder) + + changed = True + while changed: + changed = False + remaining: list[Folder] = [] + for folder in candidates: + doc_exists = ( + await session.execute( + select(Document.id).where(Document.folder_id == folder.id).limit(1) + ) + ).scalar_one_or_none() + if doc_exists is not None: + remaining.append(folder) + continue + + child_exists = ( + await session.execute( + select(Folder.id).where(Folder.parent_id == folder.id).limit(1) + ) + ).scalar_one_or_none() + if child_exists is not None: + remaining.append(folder) + continue + + await session.execute(sa_delete(Folder).where(Folder.id == folder.id)) + changed = True + candidates = remaining + + +def _build_connector_doc( + title: str, + content: str, + relative_path: str, + folder_name: str, + *, + search_space_id: int, + user_id: str, + enable_summary: bool, +) -> ConnectorDocument: + """Build a ConnectorDocument from a local file's extracted content.""" + unique_id = f"{folder_name}:{relative_path}" + metadata = { + "folder_name": folder_name, + "file_path": relative_path, + "document_type": "Local Folder File", + "connector_type": "Local Folder", + } + fallback_summary = f"File: {title}\n\n{content[:4000]}" + + return ConnectorDocument( + title=title, + source_markdown=content, + unique_id=unique_id, + document_type=DocumentType.LOCAL_FOLDER_FILE, + search_space_id=search_space_id, + connector_id=None, + created_by_id=user_id, + should_summarize=enable_summary, + fallback_summary=fallback_summary, + metadata=metadata, + ) + + +async def index_local_folder( + session: AsyncSession, + search_space_id: int, + user_id: str, + folder_path: str, + folder_name: str, + exclude_patterns: list[str] | None = None, + file_extensions: list[str] | None = None, + root_folder_id: int | None = None, + enable_summary: bool = False, + target_file_paths: list[str] | None = None, + on_heartbeat_callback: HeartbeatCallbackType | None = None, +) -> tuple[int, int, int | None, str | None]: + """Index files from a local folder. + + Supports two modes: + - Batch (target_file_paths set): processes 1..N files. + Single-file uses the caller's session; multi-file fans out with per-file sessions. + - Full scan (no target paths): walks entire folder, handles new/changed/deleted files. + + Returns (indexed_count, skipped_count, root_folder_id, error_or_warning_message). + """ + task_logger = TaskLoggingService(session, search_space_id) + + log_entry = await task_logger.log_task_start( + task_name="local_folder_indexing", + source="local_folder_indexing_task", + message=f"Starting local folder indexing for {folder_name}", + metadata={ + "folder_path": folder_path, + "user_id": str(user_id), + "target_file_paths_count": len(target_file_paths) + if target_file_paths + else None, + }, + ) + + try: + if not folder_path or not os.path.exists(folder_path): + await task_logger.log_task_failure( + log_entry, + f"Folder path missing or does not exist: {folder_path}", + "Folder not found", + {}, + ) + return ( + 0, + 0, + root_folder_id, + f"Folder path missing or does not exist: {folder_path}", + ) + + if exclude_patterns is None: + exclude_patterns = DEFAULT_EXCLUDE_PATTERNS + + # ==================================================================== + # BATCH MODE (1..N files) + # ==================================================================== + if target_file_paths: + if len(target_file_paths) == 1: + indexed, skipped, err = await _index_single_file( + session=session, + search_space_id=search_space_id, + user_id=user_id, + folder_path=folder_path, + folder_name=folder_name, + target_file_path=target_file_paths[0], + enable_summary=enable_summary, + root_folder_id=root_folder_id, + task_logger=task_logger, + log_entry=log_entry, + ) + return indexed, skipped, root_folder_id, err + + indexed, failed, err = await _index_batch_files( + search_space_id=search_space_id, + user_id=user_id, + folder_path=folder_path, + folder_name=folder_name, + target_file_paths=target_file_paths, + enable_summary=enable_summary, + root_folder_id=root_folder_id, + on_progress_callback=on_heartbeat_callback, + ) + if err: + await task_logger.log_task_success( + log_entry, + f"Batch indexing: {indexed} indexed, {failed} failed", + {"indexed": indexed, "failed": failed}, + ) + else: + await task_logger.log_task_success( + log_entry, + f"Batch indexing complete: {indexed} indexed", + {"indexed": indexed, "failed": failed}, + ) + return indexed, failed, root_folder_id, err + + # ==================================================================== + # FULL-SCAN MODE + # ==================================================================== + + await task_logger.log_task_progress( + log_entry, "Mirroring folder structure", {"stage": "folder_mirror"} + ) + + folder_mapping, root_folder_id = await _mirror_folder_structure( + session=session, + folder_path=folder_path, + folder_name=folder_name, + search_space_id=search_space_id, + user_id=user_id, + root_folder_id=root_folder_id, + exclude_patterns=exclude_patterns, + ) + await session.flush() + + try: + files = scan_folder(folder_path, file_extensions, exclude_patterns) + except Exception as e: + await task_logger.log_task_failure( + log_entry, f"Failed to scan folder: {e}", "Scan error", {} + ) + return 0, 0, root_folder_id, f"Failed to scan folder: {e}" + + logger.info(f"Found {len(files)} files in folder") + + indexed_count = 0 + skipped_count = 0 + failed_count = 0 + + page_limit_service = PageLimitService(session) + + # ================================================================ + # PHASE 1: Pre-filter files (mtime / content-hash), version changed + # ================================================================ + connector_docs: list[ConnectorDocument] = [] + file_meta_map: dict[str, dict] = {} + seen_unique_hashes: set[str] = set() + + for file_info in files: + try: + relative_path = file_info["relative_path"] + file_path_abs = file_info["path"] + + unique_identifier = f"{folder_name}:{relative_path}" + unique_identifier_hash = compute_identifier_hash( + DocumentType.LOCAL_FOLDER_FILE.value, + unique_identifier, + search_space_id, + ) + seen_unique_hashes.add(unique_identifier_hash) + + existing_document = await check_document_by_unique_identifier( + session, unique_identifier_hash + ) + + if existing_document: + stored_mtime = (existing_document.document_metadata or {}).get( + "mtime" + ) + current_mtime = file_info["modified_at"].timestamp() + + if stored_mtime and abs(current_mtime - stored_mtime) < 1.0: + if not DocumentStatus.is_state( + existing_document.status, DocumentStatus.READY + ): + existing_document.status = DocumentStatus.ready() + skipped_count += 1 + continue + + try: + estimated_pages = await _check_page_limit_or_skip( + page_limit_service, user_id, file_path_abs + ) + except PageLimitExceededError: + logger.warning( + f"Page limit exceeded, skipping: {file_path_abs}" + ) + failed_count += 1 + continue + + try: + content, content_hash = await _compute_file_content_hash( + file_path_abs, file_info["relative_path"], search_space_id + ) + except Exception as read_err: + logger.warning(f"Could not read {file_path_abs}: {read_err}") + skipped_count += 1 + continue + + if existing_document.content_hash == content_hash: + meta = dict(existing_document.document_metadata or {}) + meta["mtime"] = current_mtime + existing_document.document_metadata = meta + if not DocumentStatus.is_state( + existing_document.status, DocumentStatus.READY + ): + existing_document.status = DocumentStatus.ready() + skipped_count += 1 + continue + + await create_version_snapshot(session, existing_document) + else: + try: + estimated_pages = await _check_page_limit_or_skip( + page_limit_service, user_id, file_path_abs + ) + except PageLimitExceededError: + logger.warning( + f"Page limit exceeded, skipping: {file_path_abs}" + ) + failed_count += 1 + continue + + try: + content, content_hash = await _compute_file_content_hash( + file_path_abs, file_info["relative_path"], search_space_id + ) + except Exception as read_err: + logger.warning(f"Could not read {file_path_abs}: {read_err}") + skipped_count += 1 + continue + + if not content.strip(): + skipped_count += 1 + continue + + doc = _build_connector_doc( + title=file_info["name"], + content=content, + relative_path=relative_path, + folder_name=folder_name, + search_space_id=search_space_id, + user_id=user_id, + enable_summary=enable_summary, + ) + connector_docs.append(doc) + file_meta_map[unique_identifier] = { + "relative_path": relative_path, + "mtime": file_info["modified_at"].timestamp(), + "estimated_pages": estimated_pages, + "content_length": len(content), + } + + except Exception as e: + logger.exception(f"Phase 1 error for {file_info.get('path')}: {e}") + failed_count += 1 + + # ================================================================ + # PHASE 1.5: Delete documents no longer on disk + # ================================================================ + all_root_folder_ids = set(folder_mapping.values()) + all_db_folders = ( + ( + await session.execute( + select(Folder.id).where( + Folder.search_space_id == search_space_id, + ) + ) + ) + .scalars() + .all() + ) + all_root_folder_ids.update(all_db_folders) + + all_folder_docs = ( + ( + await session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == search_space_id, + Document.folder_id.in_(list(all_root_folder_ids)), + ) + ) + ) + .scalars() + .all() + ) + + for doc in all_folder_docs: + if doc.unique_identifier_hash not in seen_unique_hashes: + await session.delete(doc) + + await session.flush() + + # ================================================================ + # PHASE 2: Index via unified pipeline + # ================================================================ + if connector_docs: + from app.indexing_pipeline.document_hashing import ( + compute_unique_identifier_hash, + ) + + pipeline = IndexingPipelineService(session) + doc_map = {compute_unique_identifier_hash(cd): cd for cd in connector_docs} + documents = await pipeline.prepare_for_indexing(connector_docs) + + # Assign folder_id immediately so docs appear in the correct + # folder while still pending/processing (visible via Zero sync). + for document in documents: + cd = doc_map.get(document.unique_identifier_hash) + if cd is None: + continue + rel_path = (cd.metadata or {}).get("file_path", "") + parent_dir = str(Path(rel_path).parent) if rel_path else "" + if parent_dir == ".": + parent_dir = "" + document.folder_id = folder_mapping.get( + parent_dir, folder_mapping.get("") + ) + try: + await session.commit() + except IntegrityError: + await session.rollback() + for document in documents: + await session.refresh(document) + + llm = await get_user_long_context_llm(session, user_id, search_space_id) + + for document in documents: + connector_doc = doc_map.get(document.unique_identifier_hash) + if connector_doc is None: + failed_count += 1 + continue + + result = await pipeline.index(document, connector_doc, llm) + + if DocumentStatus.is_state(result.status, DocumentStatus.READY): + indexed_count += 1 + + unique_id = connector_doc.unique_id + mtime_info = file_meta_map.get(unique_id, {}) + + doc_meta = dict(result.document_metadata or {}) + doc_meta["mtime"] = mtime_info.get("mtime") + result.document_metadata = doc_meta + + est = mtime_info.get("estimated_pages", 1) + content_len = mtime_info.get("content_length", 0) + final_pages = _compute_final_pages( + page_limit_service, est, content_len + ) + await page_limit_service.update_page_usage( + user_id, final_pages, allow_exceed=True + ) + else: + failed_count += 1 + + if on_heartbeat_callback and indexed_count % 5 == 0: + await on_heartbeat_callback(indexed_count) + + # Cleanup empty folders + existing_dirs = set() + for dirpath, dirnames, _ in os.walk(folder_path): + dirnames[:] = [d for d in dirnames if d not in exclude_patterns] + rel = str(Path(dirpath).relative_to(folder_path)) + if rel == ".": + rel = "" + if rel and not any(part in exclude_patterns for part in Path(rel).parts): + existing_dirs.add(rel) + + root_fid = folder_mapping.get("") + if root_fid: + await _cleanup_empty_folders( + session, root_fid, search_space_id, existing_dirs, folder_mapping + ) + + try: + await session.commit() + except Exception as e: + if "duplicate key value violates unique constraint" in str(e).lower(): + logger.warning(f"Duplicate key during commit: {e}") + await session.rollback() + else: + raise + + warning_parts = [] + if failed_count > 0: + warning_parts.append(f"{failed_count} failed") + warning_message = ", ".join(warning_parts) if warning_parts else None + + await task_logger.log_task_success( + log_entry, + f"Completed local folder indexing for {folder_name}", + { + "indexed": indexed_count, + "skipped": skipped_count, + "failed": failed_count, + }, + ) + + return indexed_count, skipped_count, root_folder_id, warning_message + + except SQLAlchemyError as e: + logger.exception(f"Database error during local folder indexing: {e}") + await session.rollback() + await task_logger.log_task_failure( + log_entry, f"DB error: {e}", "Database error", {} + ) + return 0, 0, root_folder_id, f"Database error: {e}" + + except Exception as e: + logger.exception(f"Error during local folder indexing: {e}") + await task_logger.log_task_failure( + log_entry, f"Error: {e}", "Unexpected error", {} + ) + return 0, 0, root_folder_id, str(e) + + +BATCH_CONCURRENCY = 5 + + +async def _index_batch_files( + search_space_id: int, + user_id: str, + folder_path: str, + folder_name: str, + target_file_paths: list[str], + enable_summary: bool, + root_folder_id: int | None, + on_progress_callback: HeartbeatCallbackType | None = None, +) -> tuple[int, int, str | None]: + """Process multiple files in parallel with bounded concurrency. + + Each file gets its own DB session so they can run concurrently. + Returns (indexed_count, failed_count, error_summary_or_none). + """ + semaphore = asyncio.Semaphore(BATCH_CONCURRENCY) + indexed = 0 + failed = 0 + errors: list[str] = [] + lock = asyncio.Lock() + completed = 0 + + async def process_one(file_path: str) -> None: + nonlocal indexed, failed, completed + async with semaphore: + try: + async with get_celery_session_maker()() as file_session: + task_logger = TaskLoggingService(file_session, search_space_id) + log_entry = await task_logger.log_task_start( + task_name="local_folder_indexing", + source="local_folder_batch_indexing", + message=f"Batch: indexing {Path(file_path).name}", + metadata={"file_path": file_path}, + ) + ix, _sk, err = await _index_single_file( + session=file_session, + search_space_id=search_space_id, + user_id=user_id, + folder_path=folder_path, + folder_name=folder_name, + target_file_path=file_path, + enable_summary=enable_summary, + root_folder_id=root_folder_id, + task_logger=task_logger, + log_entry=log_entry, + ) + async with lock: + indexed += ix + if err: + failed += 1 + errors.append(f"{Path(file_path).name}: {err}") + completed += 1 + if on_progress_callback and completed % BATCH_CONCURRENCY == 0: + await on_progress_callback(completed) + except Exception as exc: + logger.exception(f"Batch: error processing {file_path}: {exc}") + async with lock: + failed += 1 + completed += 1 + errors.append(f"{Path(file_path).name}: {exc}") + + await asyncio.gather(*[process_one(fp) for fp in target_file_paths]) + + if on_progress_callback: + await on_progress_callback(completed) + + error_summary = None + if errors: + error_summary = f"{failed} file(s) failed: " + "; ".join(errors[:5]) + if len(errors) > 5: + error_summary += f" ... and {len(errors) - 5} more" + + return indexed, failed, error_summary + + +async def _index_single_file( + session: AsyncSession, + search_space_id: int, + user_id: str, + folder_path: str, + folder_name: str, + target_file_path: str, + enable_summary: bool, + root_folder_id: int | None, + task_logger, + log_entry, +) -> tuple[int, int, str | None]: + """Process a single file (chokidar real-time trigger).""" + try: + full_path = Path(target_file_path) + if not full_path.exists(): + rel = str(full_path.relative_to(folder_path)) + unique_id = f"{folder_name}:{rel}" + uid_hash = compute_identifier_hash( + DocumentType.LOCAL_FOLDER_FILE.value, unique_id, search_space_id + ) + existing = await check_document_by_unique_identifier(session, uid_hash) + if existing: + deleted_folder_id = existing.folder_id + await session.delete(existing) + await session.flush() + if deleted_folder_id and root_folder_id: + await _cleanup_empty_folder_chain( + session, deleted_folder_id, root_folder_id + ) + await session.commit() + return 0, 0, None + return 0, 0, None + + rel_path = str(full_path.relative_to(folder_path)) + + unique_id = f"{folder_name}:{rel_path}" + uid_hash = compute_identifier_hash( + DocumentType.LOCAL_FOLDER_FILE.value, unique_id, search_space_id + ) + + page_limit_service = PageLimitService(session) + try: + estimated_pages = await _check_page_limit_or_skip( + page_limit_service, user_id, str(full_path) + ) + except PageLimitExceededError as e: + return 0, 1, f"Page limit exceeded: {e}" + + try: + content, content_hash = await _compute_file_content_hash( + str(full_path), full_path.name, search_space_id + ) + except Exception as e: + return 0, 1, f"Could not read file: {e}" + + if not content.strip(): + return 0, 1, None + + existing = await check_document_by_unique_identifier(session, uid_hash) + + if existing: + if existing.content_hash == content_hash: + mtime = full_path.stat().st_mtime + meta = dict(existing.document_metadata or {}) + meta["mtime"] = mtime + existing.document_metadata = meta + await session.commit() + return 0, 1, None + + await create_version_snapshot(session, existing) + + mtime = full_path.stat().st_mtime + + connector_doc = _build_connector_doc( + title=full_path.name, + content=content, + relative_path=rel_path, + folder_name=folder_name, + search_space_id=search_space_id, + user_id=user_id, + enable_summary=enable_summary, + ) + + pipeline = IndexingPipelineService(session) + llm = await get_user_long_context_llm(session, user_id, search_space_id) + documents = await pipeline.prepare_for_indexing([connector_doc]) + + if not documents: + return 0, 1, None + + db_doc = documents[0] + + if root_folder_id: + try: + db_doc.folder_id = await _resolve_folder_for_file( + session, rel_path, root_folder_id, search_space_id, user_id + ) + await session.commit() + except IntegrityError: + await session.rollback() + await session.refresh(db_doc) + + await pipeline.index(db_doc, connector_doc, llm) + + await session.refresh(db_doc) + doc_meta = dict(db_doc.document_metadata or {}) + doc_meta["mtime"] = mtime + db_doc.document_metadata = doc_meta + await session.commit() + + indexed = ( + 1 if DocumentStatus.is_state(db_doc.status, DocumentStatus.READY) else 0 + ) + failed_msg = None if indexed else "Indexing failed" + + if indexed: + final_pages = _compute_final_pages( + page_limit_service, estimated_pages, len(content) + ) + await page_limit_service.update_page_usage( + user_id, final_pages, allow_exceed=True + ) + await task_logger.log_task_success( + log_entry, + f"Single file indexed: {rel_path}", + {"file": rel_path, "pages_processed": final_pages}, + ) + return indexed, 0 if indexed else 1, failed_msg + + except Exception as e: + logger.exception(f"Error indexing single file {target_file_path}: {e}") + await session.rollback() + return 0, 0, str(e) diff --git a/surfsense_backend/app/utils/document_versioning.py b/surfsense_backend/app/utils/document_versioning.py new file mode 100644 index 000000000..e6ad1fb06 --- /dev/null +++ b/surfsense_backend/app/utils/document_versioning.py @@ -0,0 +1,107 @@ +"""Document versioning: snapshot creation and cleanup. + +Rules: +- 30-minute debounce window: if the latest version was created < 30 min ago, + overwrite it instead of creating a new row. +- Maximum 20 versions per document. +- Versions older than 90 days are cleaned up. +""" + +from datetime import UTC, datetime, timedelta + +from sqlalchemy import delete, func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import Document, DocumentVersion + +MAX_VERSIONS_PER_DOCUMENT = 20 +DEBOUNCE_MINUTES = 30 +RETENTION_DAYS = 90 + + +def _now() -> datetime: + return datetime.now(UTC) + + +async def create_version_snapshot( + session: AsyncSession, + document: Document, +) -> DocumentVersion | None: + """Snapshot the document's current state into a DocumentVersion row. + + Returns the created/updated DocumentVersion, or None if nothing was done. + """ + now = _now() + + latest = ( + await session.execute( + select(DocumentVersion) + .where(DocumentVersion.document_id == document.id) + .order_by(DocumentVersion.version_number.desc()) + .limit(1) + ) + ).scalar_one_or_none() + + if latest is not None: + age = now - latest.created_at.replace(tzinfo=UTC) + if age < timedelta(minutes=DEBOUNCE_MINUTES): + latest.source_markdown = document.source_markdown + latest.content_hash = document.content_hash + latest.title = document.title + latest.created_at = now + await session.flush() + return latest + + max_num = ( + await session.execute( + select(func.coalesce(func.max(DocumentVersion.version_number), 0)).where( + DocumentVersion.document_id == document.id + ) + ) + ).scalar_one() + + version = DocumentVersion( + document_id=document.id, + version_number=max_num + 1, + source_markdown=document.source_markdown, + content_hash=document.content_hash, + title=document.title, + created_at=now, + ) + session.add(version) + await session.flush() + + # Cleanup: remove versions older than 90 days + cutoff = now - timedelta(days=RETENTION_DAYS) + await session.execute( + delete(DocumentVersion).where( + DocumentVersion.document_id == document.id, + DocumentVersion.created_at < cutoff, + ) + ) + + # Cleanup: cap at MAX_VERSIONS_PER_DOCUMENT + count = ( + await session.execute( + select(func.count()) + .select_from(DocumentVersion) + .where(DocumentVersion.document_id == document.id) + ) + ).scalar_one() + + if count > MAX_VERSIONS_PER_DOCUMENT: + excess = count - MAX_VERSIONS_PER_DOCUMENT + oldest_ids_result = await session.execute( + select(DocumentVersion.id) + .where(DocumentVersion.document_id == document.id) + .order_by(DocumentVersion.version_number.asc()) + .limit(excess) + ) + oldest_ids = [row[0] for row in oldest_ids_result.all()] + if oldest_ids: + await session.execute( + delete(DocumentVersion).where(DocumentVersion.id.in_(oldest_ids)) + ) + + await session.flush() + return version diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py new file mode 100644 index 000000000..4d9bda7ee --- /dev/null +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py @@ -0,0 +1,1180 @@ +"""Integration tests for local folder indexer — Tier 3 (I1-I5), Tier 4 (F1-F7), Tier 5 (P1), Tier 6 (B1-B2).""" + +import os +from contextlib import asynccontextmanager +from pathlib import Path + +import pytest +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import ( + Document, + DocumentStatus, + DocumentType, + DocumentVersion, + Folder, + SearchSpace, + User, +) + +pytestmark = pytest.mark.integration + +UNIFIED_FIXTURES = ( + "patched_summarize", + "patched_embed_texts", + "patched_chunk_text", +) + + +class _FakeSessionMaker: + """Wraps an existing AsyncSession so ``async with factory()`` yields it + without closing it. Used to route batch-mode DB operations through the + test's savepoint-wrapped session.""" + + def __init__(self, session: AsyncSession): + self._session = session + + def __call__(self): + @asynccontextmanager + async def _ctx(): + yield self._session + + return _ctx() + + +@pytest.fixture +def patched_batch_sessions(monkeypatch, db_session): + """Make ``_index_batch_files`` use the test session and run sequentially.""" + monkeypatch.setattr( + "app.tasks.connector_indexers.local_folder_indexer.get_celery_session_maker", + lambda: _FakeSessionMaker(db_session), + ) + monkeypatch.setattr( + "app.tasks.connector_indexers.local_folder_indexer.BATCH_CONCURRENCY", + 1, + ) + + +# ==================================================================== +# Tier 3: Full Indexer Integration (I1-I5) +# ==================================================================== + + +class TestFullIndexer: + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_i1_new_file_indexed( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """I1: Single new .md file is indexed with status READY.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + assert err is None + assert count == 1 + + docs = ( + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ) + .scalars() + .all() + ) + assert len(docs) == 1 + assert docs[0].document_type == DocumentType.LOCAL_FOLDER_FILE + assert DocumentStatus.is_state(docs[0].status, DocumentStatus.READY) + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_i2_unchanged_skipped( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """I2: Second run on unchanged directory creates no new documents.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "note.md").write_text("# Hello\n\nSame content.") + + count1, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + assert count1 == 1 + + count2, _, _, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, + ) + assert count2 == 0 + + total = ( + await db_session.execute( + select(func.count()) + .select_from(Document) + .where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ).scalar_one() + assert total == 1 + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_i3_changed_reindexed( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """I3: Modified file content triggers re-index and creates a version.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + f = tmp_path / "note.md" + f.write_text("# Version 1\n\nOriginal.") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + f.write_text("# Version 2\n\nUpdated.") + os.utime(f, (f.stat().st_atime + 10, f.stat().st_mtime + 10)) + + count, _, _, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, + ) + assert count == 1 + + versions = ( + ( + await db_session.execute( + select(DocumentVersion) + .join(Document) + .where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ) + .scalars() + .all() + ) + assert len(versions) >= 1 + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_i4_deleted_removed( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """I4: Deleted file is removed from DB on re-sync.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + f = tmp_path / "to_delete.md" + f.write_text("# Delete me") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + docs_before = ( + await db_session.execute( + select(func.count()) + .select_from(Document) + .where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ).scalar_one() + assert docs_before == 1 + + f.unlink() + + await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, + ) + + docs_after = ( + await db_session.execute( + select(func.count()) + .select_from(Document) + .where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ).scalar_one() + assert docs_after == 0 + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_i5_single_file_mode( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """I5: Batch mode with a single file only processes that file.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "a.md").write_text("File A") + (tmp_path / "b.md").write_text("File B") + (tmp_path / "c.md").write_text("File C") + + count, _, _, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[str(tmp_path / "b.md")], + ) + assert count == 1 + + docs = ( + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ) + .scalars() + .all() + ) + assert len(docs) == 1 + assert docs[0].title == "b.md" + + +# ==================================================================== +# Tier 4: Folder Mirroring (F1-F7) +# ==================================================================== + + +class TestFolderMirroring: + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_f1_root_folder_created( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """F1: First sync creates a root Folder and returns root_folder_id.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "root.md").write_text("Root file") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + assert root_folder_id is not None + + root_folder = ( + await db_session.execute(select(Folder).where(Folder.id == root_folder_id)) + ).scalar_one() + assert root_folder.name == "test-folder" + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_f2_nested_folder_rows( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """F2: Nested dirs create Folder rows with correct parent_id chain.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + daily = tmp_path / "notes" / "daily" + daily.mkdir(parents=True) + weekly = tmp_path / "notes" / "weekly" + weekly.mkdir(parents=True) + (daily / "today.md").write_text("today") + (weekly / "review.md").write_text("review") + + await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + folders = ( + ( + await db_session.execute( + select(Folder).where(Folder.search_space_id == db_search_space.id) + ) + ) + .scalars() + .all() + ) + + folder_names = {f.name for f in folders} + assert "notes" in folder_names + assert "daily" in folder_names + assert "weekly" in folder_names + + notes_folder = next(f for f in folders if f.name == "notes") + daily_folder = next(f for f in folders if f.name == "daily") + weekly_folder = next(f for f in folders if f.name == "weekly") + + assert daily_folder.parent_id == notes_folder.id + assert weekly_folder.parent_id == notes_folder.id + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_f3_resync_reuses_folders( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """F3: Re-sync reuses existing Folder rows, no duplicates.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + sub = tmp_path / "docs" + sub.mkdir() + (sub / "file.md").write_text("content") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + folders_before = ( + ( + await db_session.execute( + select(Folder).where(Folder.search_space_id == db_search_space.id) + ) + ) + .scalars() + .all() + ) + ids_before = {f.id for f in folders_before} + + await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, + ) + + folders_after = ( + ( + await db_session.execute( + select(Folder).where(Folder.search_space_id == db_search_space.id) + ) + ) + .scalars() + .all() + ) + ids_after = {f.id for f in folders_after} + + assert ids_before == ids_after + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_f4_folder_id_assigned( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """F4: Documents get correct folder_id based on their directory.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + daily = tmp_path / "notes" / "daily" + daily.mkdir(parents=True) + (daily / "today.md").write_text("today note") + (tmp_path / "root.md").write_text("root note") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + docs = ( + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ) + .scalars() + .all() + ) + + today_doc = next(d for d in docs if d.title == "today.md") + root_doc = next(d for d in docs if d.title == "root.md") + + daily_folder = ( + await db_session.execute(select(Folder).where(Folder.name == "daily")) + ).scalar_one() + + assert today_doc.folder_id == daily_folder.id + + assert root_doc.folder_id == root_folder_id + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_f5_empty_folder_cleanup( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """F5: Deleted dir's empty Folder row is cleaned up on re-sync.""" + import shutil + + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + daily = tmp_path / "notes" / "daily" + daily.mkdir(parents=True) + weekly = tmp_path / "notes" / "weekly" + weekly.mkdir(parents=True) + (daily / "today.md").write_text("today") + (weekly / "review.md").write_text("review") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + weekly_folder = ( + await db_session.execute(select(Folder).where(Folder.name == "weekly")) + ).scalar_one_or_none() + assert weekly_folder is not None + + shutil.rmtree(weekly) + + await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, + ) + + weekly_after = ( + await db_session.execute(select(Folder).where(Folder.name == "weekly")) + ).scalar_one_or_none() + assert weekly_after is None + + daily_after = ( + await db_session.execute(select(Folder).where(Folder.name == "daily")) + ).scalar_one_or_none() + assert daily_after is not None + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_f6_single_file_creates_subfolder( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """F6: Single-file mode creates missing Folder rows and assigns correct folder_id.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "root.md").write_text("root") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + sub = tmp_path / "notes" / "daily" + sub.mkdir(parents=True) + (sub / "new.md").write_text("new note in subfolder") + + count, _, _, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[str(sub / "new.md")], + root_folder_id=root_folder_id, + ) + assert count == 1 + + doc = ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.title == "new.md", + ) + ) + ).scalar_one() + + daily_folder = ( + await db_session.execute(select(Folder).where(Folder.name == "daily")) + ).scalar_one() + + assert doc.folder_id == daily_folder.id + assert daily_folder.parent_id is not None + + notes_folder = ( + await db_session.execute(select(Folder).where(Folder.name == "notes")) + ).scalar_one() + assert daily_folder.parent_id == notes_folder.id + assert notes_folder.parent_id == root_folder_id + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_f7_single_file_delete_cleans_empty_folders( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """F7: Deleting the only file in a subfolder via batch mode removes empty Folder rows.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + sub = tmp_path / "notes" / "ephemeral" + sub.mkdir(parents=True) + (sub / "temp.md").write_text("temporary") + (tmp_path / "keep.md").write_text("keep this") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + eph_folder = ( + await db_session.execute(select(Folder).where(Folder.name == "ephemeral")) + ).scalar_one_or_none() + assert eph_folder is not None + + target = sub / "temp.md" + target.unlink() + + await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[str(target)], + root_folder_id=root_folder_id, + ) + + eph_after = ( + await db_session.execute(select(Folder).where(Folder.name == "ephemeral")) + ).scalar_one_or_none() + assert eph_after is None + + notes_after = ( + await db_session.execute(select(Folder).where(Folder.name == "notes")) + ).scalar_one_or_none() + assert notes_after is None + + +# ==================================================================== +# Tier 6: Batch Mode (B1-B2) +# ==================================================================== + + +class TestBatchMode: + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_b1_batch_indexes_multiple_files( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + patched_batch_sessions, + ): + """B1: Batch with 3 files indexes all of them.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "a.md").write_text("File A content") + (tmp_path / "b.md").write_text("File B content") + (tmp_path / "c.md").write_text("File C content") + + count, failed, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[ + str(tmp_path / "a.md"), + str(tmp_path / "b.md"), + str(tmp_path / "c.md"), + ], + ) + + assert count == 3 + assert failed == 0 + assert err is None + + docs = ( + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ) + .scalars() + .all() + ) + assert len(docs) == 3 + assert {d.title for d in docs} == {"a.md", "b.md", "c.md"} + assert all( + DocumentStatus.is_state(d.status, DocumentStatus.READY) for d in docs + ) + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_b2_partial_failure( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + patched_batch_sessions, + ): + """B2: One unreadable file fails gracefully; the other two still get indexed.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "good1.md").write_text("Good file one") + (tmp_path / "good2.md").write_text("Good file two") + (tmp_path / "bad.md").write_bytes(b"\x00binary garbage") + + count, failed, _, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[ + str(tmp_path / "good1.md"), + str(tmp_path / "bad.md"), + str(tmp_path / "good2.md"), + ], + ) + + assert count == 2 + assert failed == 1 + assert err is not None + + docs = ( + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ) + .scalars() + .all() + ) + assert len(docs) == 2 + assert {d.title for d in docs} == {"good1.md", "good2.md"} + + +# ==================================================================== +# Tier 5: Pipeline Integration (P1) +# ==================================================================== + + +class TestPipelineIntegration: + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_p1_local_folder_file_through_pipeline( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + mocker, + ): + """P1: LOCAL_FOLDER_FILE ConnectorDocument through prepare+index to READY.""" + from app.indexing_pipeline.connector_document import ConnectorDocument + from app.indexing_pipeline.indexing_pipeline_service import ( + IndexingPipelineService, + ) + + doc = ConnectorDocument( + title="Test Local File", + source_markdown="## Local file\n\nContent from disk.", + unique_id="test-folder:test.md", + document_type=DocumentType.LOCAL_FOLDER_FILE, + search_space_id=db_search_space.id, + connector_id=None, + created_by_id=str(db_user.id), + ) + + service = IndexingPipelineService(session=db_session) + prepared = await service.prepare_for_indexing([doc]) + assert len(prepared) == 1 + + db_doc = prepared[0] + result = await service.index(db_doc, doc, llm=mocker.Mock()) + assert result is not None + + docs = ( + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ) + .scalars() + .all() + ) + assert len(docs) == 1 + assert DocumentStatus.is_state(docs[0].status, DocumentStatus.READY) + + +# ==================================================================== +# Tier 7: Direct Converters (DC1-DC4) +# ==================================================================== + + +class TestDirectConvert: + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_dc1_csv_produces_markdown_table( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """DC1: CSV file is indexed as a markdown table, not raw comma-separated text.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "data.csv").write_text("name,age,city\nAlice,30,NYC\nBob,25,LA\n") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + assert err is None + assert count == 1 + + doc = ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ).scalar_one() + + assert "| name" in doc.source_markdown + assert "| Alice" in doc.source_markdown + assert "name,age,city" not in doc.source_markdown + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_dc2_tsv_produces_markdown_table( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """DC2: TSV file is indexed as a markdown table.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "data.tsv").write_text( + "name\tage\tcity\nAlice\t30\tNYC\nBob\t25\tLA\n" + ) + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + assert err is None + assert count == 1 + + doc = ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ).scalar_one() + + assert "| name" in doc.source_markdown + assert "| Alice" in doc.source_markdown + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_dc3_html_produces_clean_markdown( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """DC3: HTML file is indexed as clean markdown, not raw HTML.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "page.html").write_text("

Title

Hello world

") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + assert err is None + assert count == 1 + + doc = ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ).scalar_one() + + assert "Title" in doc.source_markdown + assert "

" not in doc.source_markdown + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_dc4_csv_single_file_mode( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """DC4: CSV via single-file batch mode also produces a markdown table.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "data.csv").write_text("name,age,city\nAlice,30,NYC\nBob,25,LA\n") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[str(tmp_path / "data.csv")], + ) + + assert err is None + assert count == 1 + + doc = ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) + ) + ).scalar_one() + + assert "| name" in doc.source_markdown + assert "name,age,city" not in doc.source_markdown + + +# ==================================================================== +# Tier 8: Page Limits (PL1-PL6) +# ==================================================================== + + +class TestPageLimits: + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl1_full_scan_increments_pages_used( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """PL1: Successful full-scan sync increments user.pages_used.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 0 + db_user.pages_limit = 500 + await db_session.flush() + + (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + assert err is None + assert count == 1 + + await db_session.refresh(db_user) + assert db_user.pages_used > 0, "pages_used should increase after indexing" + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl2_full_scan_blocked_when_limit_exhausted( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """PL2: Full-scan skips file when page limit is exhausted.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 100 + db_user.pages_limit = 100 + await db_session.flush() + + (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + assert count == 0 + + await db_session.refresh(db_user) + assert db_user.pages_used == 100, "pages_used should not change on rejection" + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl3_single_file_increments_pages_used( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """PL3: Single-file mode increments user.pages_used on success.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 0 + db_user.pages_limit = 500 + await db_session.flush() + + (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[str(tmp_path / "note.md")], + ) + + assert err is None + assert count == 1 + + await db_session.refresh(db_user) + assert db_user.pages_used > 0, "pages_used should increase after indexing" + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl4_single_file_blocked_when_limit_exhausted( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """PL4: Single-file mode skips file when page limit is exhausted.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 100 + db_user.pages_limit = 100 + await db_session.flush() + + (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") + + count, _skipped, _root_folder_id, err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[str(tmp_path / "note.md")], + ) + + assert count == 0 + assert err is not None + assert "page limit" in err.lower() + + await db_session.refresh(db_user) + assert db_user.pages_used == 100, "pages_used should not change on rejection" + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl5_unchanged_resync_no_extra_pages( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """PL5: Re-syncing an unchanged file does not consume additional pages.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 0 + db_user.pages_limit = 500 + await db_session.flush() + + (tmp_path / "note.md").write_text("# Hello\n\nSame content.") + + count1, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + assert count1 == 1 + + await db_session.refresh(db_user) + pages_after_first = db_user.pages_used + assert pages_after_first > 0 + + count2, _, _, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + root_folder_id=root_folder_id, + ) + assert count2 == 0 + + await db_session.refresh(db_user) + assert db_user.pages_used == pages_after_first, ( + "pages_used should not increase for unchanged files" + ) + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_pl6_batch_partial_page_limit_exhaustion( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + patched_batch_sessions, + ): + """PL6: Batch mode with a very low page limit: some files succeed, rest fail.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + db_user.pages_used = 0 + db_user.pages_limit = 1 + await db_session.flush() + + (tmp_path / "a.md").write_text("File A content") + (tmp_path / "b.md").write_text("File B content") + (tmp_path / "c.md").write_text("File C content") + + count, failed, _root_folder_id, _err = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[ + str(tmp_path / "a.md"), + str(tmp_path / "b.md"), + str(tmp_path / "c.md"), + ], + ) + + assert count >= 1, "at least one file should succeed" + assert failed >= 1, "at least one file should fail due to page limit" + assert count + failed == 3 + + await db_session.refresh(db_user) + assert db_user.pages_used > 0 + assert db_user.pages_used <= db_user.pages_limit + 1 diff --git a/surfsense_backend/tests/integration/test_document_versioning.py b/surfsense_backend/tests/integration/test_document_versioning.py new file mode 100644 index 000000000..9bd03d219 --- /dev/null +++ b/surfsense_backend/tests/integration/test_document_versioning.py @@ -0,0 +1,167 @@ +"""Integration tests for document versioning snapshot + cleanup.""" + +from datetime import UTC, datetime, timedelta + +import pytest +import pytest_asyncio +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import Document, DocumentType, DocumentVersion, SearchSpace, User + +pytestmark = pytest.mark.integration + + +@pytest_asyncio.fixture +async def db_document( + db_session: AsyncSession, db_user: User, db_search_space: SearchSpace +) -> Document: + doc = Document( + title="Test Doc", + document_type=DocumentType.LOCAL_FOLDER_FILE, + document_metadata={}, + content="Summary of test doc.", + content_hash="abc123", + unique_identifier_hash="local_folder:test-folder:test.md", + source_markdown="# Test\n\nOriginal content.", + search_space_id=db_search_space.id, + created_by_id=db_user.id, + ) + db_session.add(doc) + await db_session.flush() + return doc + + +async def _version_count(session: AsyncSession, document_id: int) -> int: + result = await session.execute( + select(func.count()) + .select_from(DocumentVersion) + .where(DocumentVersion.document_id == document_id) + ) + return result.scalar_one() + + +async def _get_versions( + session: AsyncSession, document_id: int +) -> list[DocumentVersion]: + result = await session.execute( + select(DocumentVersion) + .where(DocumentVersion.document_id == document_id) + .order_by(DocumentVersion.version_number) + ) + return list(result.scalars().all()) + + +class TestCreateVersionSnapshot: + """V1-V5: TDD slices for create_version_snapshot.""" + + async def test_v1_creates_first_version(self, db_session, db_document): + """V1: First snapshot creates version 1 with the document's current state.""" + from app.utils.document_versioning import create_version_snapshot + + await create_version_snapshot(db_session, db_document) + + versions = await _get_versions(db_session, db_document.id) + assert len(versions) == 1 + assert versions[0].version_number == 1 + assert versions[0].source_markdown == "# Test\n\nOriginal content." + assert versions[0].content_hash == "abc123" + assert versions[0].title == "Test Doc" + assert versions[0].document_id == db_document.id + + async def test_v2_creates_version_2_after_30_min( + self, db_session, db_document, monkeypatch + ): + """V2: After 30+ minutes, a new version is created (not overwritten).""" + from app.utils.document_versioning import create_version_snapshot + + t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t0) + await create_version_snapshot(db_session, db_document) + + # Simulate content change and time passing + db_document.source_markdown = "# Test\n\nUpdated content." + db_document.content_hash = "def456" + t1 = t0 + timedelta(minutes=31) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t1) + await create_version_snapshot(db_session, db_document) + + versions = await _get_versions(db_session, db_document.id) + assert len(versions) == 2 + assert versions[0].version_number == 1 + assert versions[1].version_number == 2 + assert versions[1].source_markdown == "# Test\n\nUpdated content." + + async def test_v3_overwrites_within_30_min( + self, db_session, db_document, monkeypatch + ): + """V3: Within 30 minutes, the latest version is overwritten.""" + from app.utils.document_versioning import create_version_snapshot + + t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t0) + await create_version_snapshot(db_session, db_document) + count_after_first = await _version_count(db_session, db_document.id) + assert count_after_first == 1 + + # Simulate quick edit within 30 minutes + db_document.source_markdown = "# Test\n\nQuick edit." + db_document.content_hash = "quick123" + t1 = t0 + timedelta(minutes=10) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t1) + await create_version_snapshot(db_session, db_document) + + count_after_second = await _version_count(db_session, db_document.id) + assert count_after_second == 1 # still 1, not 2 + + versions = await _get_versions(db_session, db_document.id) + assert versions[0].source_markdown == "# Test\n\nQuick edit." + assert versions[0].content_hash == "quick123" + + async def test_v4_cleanup_90_day_old_versions( + self, db_session, db_document, monkeypatch + ): + """V4: Versions older than 90 days are cleaned up.""" + from app.utils.document_versioning import create_version_snapshot + + base = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC) + + # Create 5 versions spread across time: 3 older than 90 days, 2 recent + for i in range(5): + db_document.source_markdown = f"Content v{i + 1}" + db_document.content_hash = f"hash_{i + 1}" + t = base + timedelta(days=i) if i < 3 else base + timedelta(days=100 + i) + monkeypatch.setattr("app.utils.document_versioning._now", lambda _t=t: _t) + await create_version_snapshot(db_session, db_document) + + # Now trigger cleanup from a "current" time that makes the first 3 versions > 90 days old + now = base + timedelta(days=200) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: now) + db_document.source_markdown = "Content v6" + db_document.content_hash = "hash_6" + await create_version_snapshot(db_session, db_document) + + versions = await _get_versions(db_session, db_document.id) + # The first 3 (old) should be cleaned up; versions 4, 5, 6 remain + for v in versions: + age = now - v.created_at.replace(tzinfo=UTC) + assert age <= timedelta(days=90), f"Version {v.version_number} is too old" + + async def test_v5_cap_at_20_versions(self, db_session, db_document, monkeypatch): + """V5: More than 20 versions triggers cap — oldest gets deleted.""" + from app.utils.document_versioning import create_version_snapshot + + base = datetime(2025, 6, 1, 12, 0, 0, tzinfo=UTC) + + # Create 21 versions (all within 90 days, each 31 min apart) + for i in range(21): + db_document.source_markdown = f"Content v{i + 1}" + db_document.content_hash = f"hash_{i + 1}" + t = base + timedelta(minutes=31 * i) + monkeypatch.setattr("app.utils.document_versioning._now", lambda _t=t: _t) + await create_version_snapshot(db_session, db_document) + + versions = await _get_versions(db_session, db_document.id) + assert len(versions) == 20 + # The lowest version_number should be 2 (version 1 was the oldest and got capped) + assert versions[0].version_number == 2 diff --git a/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py b/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py new file mode 100644 index 000000000..c6e7b160c --- /dev/null +++ b/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py @@ -0,0 +1,78 @@ +"""Unit tests for scan_folder() pure logic — Tier 2 TDD slices (S1-S4).""" + +from pathlib import Path + +import pytest + +pytestmark = pytest.mark.unit + + +class TestScanFolder: + """S1-S4: scan_folder() with real tmp_path filesystem.""" + + def test_s1_single_md_file(self, tmp_path: Path): + """S1: scan_folder on a dir with one .md file returns correct entry.""" + from app.tasks.connector_indexers.local_folder_indexer import scan_folder + + md = tmp_path / "note.md" + md.write_text("# Hello") + + results = scan_folder(str(tmp_path)) + + assert len(results) == 1 + entry = results[0] + assert entry["relative_path"] == "note.md" + assert entry["size"] > 0 + assert "modified_at" in entry + assert entry["path"] == str(md) + + def test_s2_extension_filter(self, tmp_path: Path): + """S2: file_extensions filter returns only matching files.""" + from app.tasks.connector_indexers.local_folder_indexer import scan_folder + + (tmp_path / "a.md").write_text("md") + (tmp_path / "b.txt").write_text("txt") + (tmp_path / "c.pdf").write_bytes(b"%PDF") + + results = scan_folder(str(tmp_path), file_extensions=[".md"]) + names = {r["relative_path"] for r in results} + + assert names == {"a.md"} + + def test_s3_exclude_patterns(self, tmp_path: Path): + """S3: exclude_patterns skips files inside excluded directories.""" + from app.tasks.connector_indexers.local_folder_indexer import scan_folder + + (tmp_path / "good.md").write_text("good") + nm = tmp_path / "node_modules" + nm.mkdir() + (nm / "dep.js").write_text("module") + git = tmp_path / ".git" + git.mkdir() + (git / "config").write_text("gitconfig") + + results = scan_folder(str(tmp_path), exclude_patterns=["node_modules", ".git"]) + names = {r["relative_path"] for r in results} + + assert "good.md" in names + assert not any("node_modules" in n for n in names) + assert not any(".git" in n for n in names) + + def test_s4_nested_dirs(self, tmp_path: Path): + """S4: nested subdirectories produce correct relative paths.""" + from app.tasks.connector_indexers.local_folder_indexer import scan_folder + + daily = tmp_path / "notes" / "daily" + daily.mkdir(parents=True) + weekly = tmp_path / "notes" / "weekly" + weekly.mkdir(parents=True) + (daily / "today.md").write_text("today") + (weekly / "review.md").write_text("review") + (tmp_path / "root.txt").write_text("root") + + results = scan_folder(str(tmp_path)) + paths = {r["relative_path"] for r in results} + + assert "notes/daily/today.md" in paths or "notes\\daily\\today.md" in paths + assert "notes/weekly/review.md" in paths or "notes\\weekly\\review.md" in paths + assert "root.txt" in paths diff --git a/surfsense_desktop/package.json b/surfsense_desktop/package.json index bd0cc67ab..21e7f4bea 100644 --- a/surfsense_desktop/package.json +++ b/surfsense_desktop/package.json @@ -27,6 +27,8 @@ "wait-on": "^9.0.4" }, "dependencies": { + "chokidar": "^5.0.0", + "electron-store": "^11.0.2", "electron-updater": "^6.8.3", "get-port-please": "^3.2.0" } diff --git a/surfsense_desktop/pnpm-lock.yaml b/surfsense_desktop/pnpm-lock.yaml index ea65be0bb..528f81539 100644 --- a/surfsense_desktop/pnpm-lock.yaml +++ b/surfsense_desktop/pnpm-lock.yaml @@ -8,6 +8,12 @@ importers: .: dependencies: + chokidar: + specifier: ^5.0.0 + version: 5.0.0 + electron-store: + specifier: ^11.0.2 + version: 11.0.2 electron-updater: specifier: ^6.8.3 version: 6.8.3 @@ -352,6 +358,14 @@ packages: resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} engines: {node: '>= 14'} + ajv-formats@3.0.1: + resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==} + peerDependencies: + ajv: ^8.0.0 + peerDependenciesMeta: + ajv: + optional: true + ajv-keywords@3.5.2: resolution: {integrity: sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==} peerDependencies: @@ -360,6 +374,9 @@ packages: ajv@6.14.0: resolution: {integrity: sha512-IWrosm/yrn43eiKqkfkHis7QioDleaXQHdDVPKg0FSwwd/DuvyX79TZnFOnYpB7dcsFAMmtFztZuXPDvSePkFw==} + ajv@8.18.0: + resolution: {integrity: sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==} + ansi-regex@5.0.1: resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} engines: {node: '>=8'} @@ -411,6 +428,9 @@ packages: resolution: {integrity: sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg==} engines: {node: '>= 4.0.0'} + atomically@2.1.1: + resolution: {integrity: sha512-P4w9o2dqARji6P7MHprklbfiArZAWvo07yW7qs3pdljb3BWr12FIB7W+p0zJiuiVsUpRO0iZn1kFFcpPegg0tQ==} + axios@1.13.6: resolution: {integrity: sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==} @@ -477,6 +497,10 @@ packages: resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} engines: {node: '>=10'} + chokidar@5.0.0: + resolution: {integrity: sha512-TQMmc3w+5AxjpL8iIiwebF73dRDF4fBIieAqGn9RGCWaEVwQ6Fb2cGe31Yns0RRIzii5goJ1Y7xbMwo1TxMplw==} + engines: {node: '>= 20.19.0'} + chownr@3.0.0: resolution: {integrity: sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==} engines: {node: '>=18'} @@ -546,6 +570,10 @@ packages: engines: {node: '>=18'} hasBin: true + conf@15.1.0: + resolution: {integrity: sha512-Uy5YN9KEu0WWDaZAVJ5FAmZoaJt9rdK6kH+utItPyGsCqCgaTKkrmZx3zoE0/3q6S3bcp3Ihkk+ZqPxWxFK5og==} + engines: {node: '>=20'} + core-util-is@1.0.2: resolution: {integrity: sha512-3lqz5YjWTYnW6dlDa5TLaTCcShfar1e40rmcJVwCBJC6mWlFuj0eCHIElmG1g5kyuJ/GD+8Wn4FFCcz4gJPfaQ==} @@ -559,6 +587,10 @@ packages: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} engines: {node: '>= 8'} + debounce-fn@6.0.0: + resolution: {integrity: sha512-rBMW+F2TXryBwB54Q0d8drNEI+TfoS9JpNTAoVpukbWEhjXQq4rySFYLaqXMFXwdv61Zb2OHtj5bviSoimqxRQ==} + engines: {node: '>=18'} + debug@4.4.3: resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==} engines: {node: '>=6.0'} @@ -610,6 +642,10 @@ packages: os: [darwin] hasBin: true + dot-prop@10.1.0: + resolution: {integrity: sha512-MVUtAugQMOff5RnBy2d9N31iG0lNwg1qAoAOn7pOK5wf94WIaE3My2p3uwTQuvS2AcqchkcR3bHByjaM0mmi7Q==} + engines: {node: '>=20'} + dotenv-expand@11.0.7: resolution: {integrity: sha512-zIHwmZPRshsCdpMDyVsqGmgyP0yT8GAgXUnkdAoJisxvf33k7yO6OuoKmcTGuXPWSsm8Oh88nZicRLA9Y0rUeA==} engines: {node: '>=12'} @@ -645,6 +681,10 @@ packages: electron-publish@26.8.1: resolution: {integrity: sha512-q+jrSTIh/Cv4eGZa7oVR+grEJo/FoLMYBAnSL5GCtqwUpr1T+VgKB/dn1pnzxIxqD8S/jP1yilT9VrwCqINR4w==} + electron-store@11.0.2: + resolution: {integrity: sha512-4VkNRdN+BImL2KcCi41WvAYbh6zLX5AUTi4so68yPqiItjbgTjqpEnGAqasgnG+lB6GuAyUltKwVopp6Uv+gwQ==} + engines: {node: '>=20'} + electron-updater@6.8.3: resolution: {integrity: sha512-Z6sgw3jgbikWKXei1ENdqFOxBP0WlXg3TtKfz0rgw2vIZFJUyI4pD7ZN7jrkm7EoMK+tcm/qTnPUdqfZukBlBQ==} @@ -673,6 +713,10 @@ packages: resolution: {integrity: sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==} engines: {node: '>=6'} + env-paths@3.0.0: + resolution: {integrity: sha512-dtJUTepzMW3Lm/NPxRf3wP4642UWhjL2sQxc+ym2YMj1m/H2zDNQOlezafzkHwn6sMstjHTwG6iQQsctDW/b1A==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + err-code@2.0.3: resolution: {integrity: sha512-2bmlRpNKBxT/CRmPOlyISQpNj+qSeYvcym/uT0Jx2bMOlKLtSy1ZmLuVxSEKKyor/N5yhvp/ZiG1oE3DEYMSFA==} @@ -726,6 +770,9 @@ packages: fast-json-stable-stringify@2.1.0: resolution: {integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==} + fast-uri@3.1.0: + resolution: {integrity: sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==} + fd-slicer@1.1.0: resolution: {integrity: sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==} @@ -953,6 +1000,12 @@ packages: json-schema-traverse@0.4.1: resolution: {integrity: sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==} + json-schema-traverse@1.0.0: + resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==} + + json-schema-typed@8.0.2: + resolution: {integrity: sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==} + json-stringify-safe@5.0.1: resolution: {integrity: sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==} @@ -983,6 +1036,9 @@ packages: lodash@4.17.23: resolution: {integrity: sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==} + lodash@4.18.1: + resolution: {integrity: sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==} + log-symbols@4.1.0: resolution: {integrity: sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==} engines: {node: '>=10'} @@ -1027,6 +1083,10 @@ packages: resolution: {integrity: sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==} engines: {node: '>=6'} + mimic-function@5.0.1: + resolution: {integrity: sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==} + engines: {node: '>=18'} + mimic-response@1.0.1: resolution: {integrity: sha512-j5EctnkH7amfV/q5Hgmoal1g2QHFJRraOtmx0JpIqkxhBhI/lJSl1nMpQ45hVarwNETOoWEimndZ4QK0RHxuxQ==} engines: {node: '>=4'} @@ -1222,10 +1282,18 @@ packages: resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} engines: {node: '>= 6'} + readdirp@5.0.0: + resolution: {integrity: sha512-9u/XQ1pvrQtYyMpZe7DXKv2p5CNvyVwzUB6uhLAnQwHMSgKMBR62lc7AHljaeteeHXn11XTAaLLUVZYVZyuRBQ==} + engines: {node: '>= 20.19.0'} + require-directory@2.1.1: resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==} engines: {node: '>=0.10.0'} + require-from-string@2.0.2: + resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==} + engines: {node: '>=0.10.0'} + resedit@1.7.2: resolution: {integrity: sha512-vHjcY2MlAITJhC0eRD/Vv8Vlgmu9Sd3LX9zZvtGzU5ZImdTN3+d6e/4mnTyV8vEbyf1sgNIrWxhWlrys52OkEA==} engines: {node: '>=12', npm: '>=6'} @@ -1365,6 +1433,12 @@ packages: resolution: {integrity: sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==} engines: {node: '>=12'} + stubborn-fs@2.0.0: + resolution: {integrity: sha512-Y0AvSwDw8y+nlSNFXMm2g6L51rBGdAQT20J3YSOqxC53Lo3bjWRtr2BKcfYoAf352WYpsZSTURrA0tqhfgudPA==} + + stubborn-utils@1.0.2: + resolution: {integrity: sha512-zOh9jPYI+xrNOyisSelgym4tolKTJCQd5GBhK0+0xJvcYDcwlOoxF/rnFKQ2KRZknXSG9jWAp66fwP6AxN9STg==} + sumchecker@3.0.1: resolution: {integrity: sha512-MvjXzkz/BOfyVDkG0oFOtBxHX2u3gKbMHIF/dXblZsgD3BWOFLmHovIpZY7BykJdAjcqRCBi1WYBNdEC9yI7vg==} engines: {node: '>= 8.0'} @@ -1377,6 +1451,10 @@ packages: resolution: {integrity: sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==} engines: {node: '>=10'} + tagged-tag@1.0.0: + resolution: {integrity: sha512-yEFYrVhod+hdNyx7g5Bnkkb0G6si8HJurOoOEgC8B/O0uXLHlaey/65KRv6cuWBNhBgHKAROVpc7QyYqE5gFng==} + engines: {node: '>=20'} + tar@7.5.11: resolution: {integrity: sha512-ChjMH33/KetonMTAtpYdgUFr0tbz69Fp2v7zWxQfYZX4g5ZN2nOBXm1R2xyA+lMIKrLKIoKAwFj93jE/avX9cQ==} engines: {node: '>=18'} @@ -1419,11 +1497,19 @@ packages: resolution: {integrity: sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==} engines: {node: '>=10'} + type-fest@5.5.0: + resolution: {integrity: sha512-PlBfpQwiUvGViBNX84Yxwjsdhd1TUlXr6zjX7eoirtCPIr08NAmxwa+fcYBTeRQxHo9YC9wwF3m9i700sHma8g==} + engines: {node: '>=20'} + typescript@5.9.3: resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==} engines: {node: '>=14.17'} hasBin: true + uint8array-extras@1.5.0: + resolution: {integrity: sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A==} + engines: {node: '>=18'} + undici-types@7.16.0: resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} @@ -1467,6 +1553,9 @@ packages: wcwidth@1.0.1: resolution: {integrity: sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg==} + when-exit@2.1.5: + resolution: {integrity: sha512-VGkKJ564kzt6Ms1dbgPP/yuIoQCrsFAnRbptpC5wOEsDaNsbCB2bnfnaA8i/vRs5tjUSEOtIuvl9/MyVsvQZCg==} + which@2.0.2: resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} engines: {node: '>= 8'} @@ -1827,6 +1916,10 @@ snapshots: agent-base@7.1.4: {} + ajv-formats@3.0.1(ajv@8.18.0): + optionalDependencies: + ajv: 8.18.0 + ajv-keywords@3.5.2(ajv@6.14.0): dependencies: ajv: 6.14.0 @@ -1838,6 +1931,13 @@ snapshots: json-schema-traverse: 0.4.1 uri-js: 4.4.1 + ajv@8.18.0: + dependencies: + fast-deep-equal: 3.1.3 + fast-uri: 3.1.0 + json-schema-traverse: 1.0.0 + require-from-string: 2.0.2 + ansi-regex@5.0.1: {} ansi-regex@6.2.2: {} @@ -1909,6 +2009,11 @@ snapshots: at-least-node@1.0.0: {} + atomically@2.1.1: + dependencies: + stubborn-fs: 2.0.0 + when-exit: 2.1.5 + axios@1.13.6: dependencies: follow-redirects: 1.15.11 @@ -2019,6 +2124,10 @@ snapshots: ansi-styles: 4.3.0 supports-color: 7.2.0 + chokidar@5.0.0: + dependencies: + readdirp: 5.0.0 + chownr@3.0.0: {} chromium-pickle-js@0.2.0: {} @@ -2079,6 +2188,18 @@ snapshots: tree-kill: 1.2.2 yargs: 17.7.2 + conf@15.1.0: + dependencies: + ajv: 8.18.0 + ajv-formats: 3.0.1(ajv@8.18.0) + atomically: 2.1.1 + debounce-fn: 6.0.0 + dot-prop: 10.1.0 + env-paths: 3.0.0 + json-schema-typed: 8.0.2 + semver: 7.7.4 + uint8array-extras: 1.5.0 + core-util-is@1.0.2: optional: true @@ -2096,6 +2217,10 @@ snapshots: shebang-command: 2.0.0 which: 2.0.2 + debounce-fn@6.0.0: + dependencies: + mimic-function: 5.0.1 + debug@4.4.3: dependencies: ms: 2.1.3 @@ -2161,6 +2286,10 @@ snapshots: verror: 1.10.1 optional: true + dot-prop@10.1.0: + dependencies: + type-fest: 5.5.0 + dotenv-expand@11.0.7: dependencies: dotenv: 16.6.1 @@ -2219,6 +2348,11 @@ snapshots: transitivePeerDependencies: - supports-color + electron-store@11.0.2: + dependencies: + conf: 15.1.0 + type-fest: 5.5.0 + electron-updater@6.8.3: dependencies: builder-util-runtime: 9.5.1 @@ -2237,7 +2371,7 @@ snapshots: '@electron/asar': 3.4.1 debug: 4.4.3 fs-extra: 7.0.1 - lodash: 4.17.23 + lodash: 4.18.1 temp: 0.9.4 optionalDependencies: '@electron/windows-sign': 1.2.2 @@ -2267,6 +2401,8 @@ snapshots: env-paths@2.2.1: {} + env-paths@3.0.0: {} + err-code@2.0.3: {} es-define-property@1.0.1: {} @@ -2340,6 +2476,8 @@ snapshots: fast-json-stable-stringify@2.1.0: {} + fast-uri@3.1.0: {} + fd-slicer@1.1.0: dependencies: pend: 1.2.0 @@ -2595,6 +2733,10 @@ snapshots: json-schema-traverse@0.4.1: {} + json-schema-traverse@1.0.0: {} + + json-schema-typed@8.0.2: {} + json-stringify-safe@5.0.1: optional: true @@ -2622,6 +2764,8 @@ snapshots: lodash@4.17.23: {} + lodash@4.18.1: {} + log-symbols@4.1.0: dependencies: chalk: 4.1.2 @@ -2668,6 +2812,8 @@ snapshots: mimic-fn@2.1.0: {} + mimic-function@5.0.1: {} + mimic-response@1.0.1: {} mimic-response@3.1.0: {} @@ -2863,8 +3009,12 @@ snapshots: string_decoder: 1.3.0 util-deprecate: 1.0.2 + readdirp@5.0.0: {} + require-directory@2.1.1: {} + require-from-string@2.0.2: {} + resedit@1.7.2: dependencies: pe-library: 0.4.1 @@ -3002,6 +3152,12 @@ snapshots: dependencies: ansi-regex: 6.2.2 + stubborn-fs@2.0.0: + dependencies: + stubborn-utils: 1.0.2 + + stubborn-utils@1.0.2: {} + sumchecker@3.0.1: dependencies: debug: 4.4.3 @@ -3016,6 +3172,8 @@ snapshots: dependencies: has-flag: 4.0.0 + tagged-tag@1.0.0: {} + tar@7.5.11: dependencies: '@isaacs/fs-minipass': 4.0.1 @@ -3062,8 +3220,14 @@ snapshots: type-fest@0.13.1: optional: true + type-fest@5.5.0: + dependencies: + tagged-tag: 1.0.0 + typescript@5.9.3: {} + uint8array-extras@1.5.0: {} + undici-types@7.16.0: {} undici-types@7.18.2: {} @@ -3109,6 +3273,8 @@ snapshots: dependencies: defaults: 1.0.4 + when-exit@2.1.5: {} + which@2.0.2: dependencies: isexe: 2.0.0 diff --git a/surfsense_desktop/src/ipc/channels.ts b/surfsense_desktop/src/ipc/channels.ts index 25ec1bc0e..2000964c7 100644 --- a/surfsense_desktop/src/ipc/channels.ts +++ b/surfsense_desktop/src/ipc/channels.ts @@ -6,4 +6,19 @@ export const IPC_CHANNELS = { SET_QUICK_ASK_MODE: 'set-quick-ask-mode', GET_QUICK_ASK_MODE: 'get-quick-ask-mode', REPLACE_TEXT: 'replace-text', + // Folder sync channels + FOLDER_SYNC_SELECT_FOLDER: 'folder-sync:select-folder', + FOLDER_SYNC_ADD_FOLDER: 'folder-sync:add-folder', + FOLDER_SYNC_REMOVE_FOLDER: 'folder-sync:remove-folder', + FOLDER_SYNC_GET_FOLDERS: 'folder-sync:get-folders', + FOLDER_SYNC_GET_STATUS: 'folder-sync:get-status', + FOLDER_SYNC_FILE_CHANGED: 'folder-sync:file-changed', + FOLDER_SYNC_WATCHER_READY: 'folder-sync:watcher-ready', + FOLDER_SYNC_PAUSE: 'folder-sync:pause', + FOLDER_SYNC_RESUME: 'folder-sync:resume', + FOLDER_SYNC_RENDERER_READY: 'folder-sync:renderer-ready', + FOLDER_SYNC_GET_PENDING_EVENTS: 'folder-sync:get-pending-events', + FOLDER_SYNC_ACK_EVENTS: 'folder-sync:ack-events', + BROWSE_FILES: 'browse:files', + READ_LOCAL_FILES: 'browse:read-local-files', } as const; diff --git a/surfsense_desktop/src/ipc/handlers.ts b/surfsense_desktop/src/ipc/handlers.ts index 18e343719..c4251b30b 100644 --- a/surfsense_desktop/src/ipc/handlers.ts +++ b/surfsense_desktop/src/ipc/handlers.ts @@ -1,5 +1,19 @@ import { app, ipcMain, shell } from 'electron'; import { IPC_CHANNELS } from './channels'; +import { + selectFolder, + addWatchedFolder, + removeWatchedFolder, + getWatchedFolders, + getWatcherStatus, + getPendingFileEvents, + acknowledgeFileEvents, + pauseWatcher, + resumeWatcher, + markRendererReady, + browseFiles, + readLocalFiles, +} from '../modules/folder-watcher'; export function registerIpcHandlers(): void { ipcMain.on(IPC_CHANNELS.OPEN_EXTERNAL, (_event, url: string) => { @@ -16,4 +30,41 @@ export function registerIpcHandlers(): void { ipcMain.handle(IPC_CHANNELS.GET_APP_VERSION, () => { return app.getVersion(); }); + + // Folder sync handlers + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_SELECT_FOLDER, () => selectFolder()); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_ADD_FOLDER, (_event, config) => + addWatchedFolder(config) + ); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_REMOVE_FOLDER, (_event, folderPath: string) => + removeWatchedFolder(folderPath) + ); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_GET_FOLDERS, () => getWatchedFolders()); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_GET_STATUS, () => getWatcherStatus()); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_PAUSE, () => pauseWatcher()); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_RESUME, () => resumeWatcher()); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_RENDERER_READY, () => { + markRendererReady(); + }); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_GET_PENDING_EVENTS, () => + getPendingFileEvents() + ); + + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_ACK_EVENTS, (_event, eventIds: string[]) => + acknowledgeFileEvents(eventIds) + ); + + ipcMain.handle(IPC_CHANNELS.BROWSE_FILES, () => browseFiles()); + + ipcMain.handle(IPC_CHANNELS.READ_LOCAL_FILES, (_event, paths: string[]) => + readLocalFiles(paths) + ); } diff --git a/surfsense_desktop/src/main.ts b/surfsense_desktop/src/main.ts index 3ab41073b..f745d9b5e 100644 --- a/surfsense_desktop/src/main.ts +++ b/surfsense_desktop/src/main.ts @@ -6,6 +6,7 @@ import { setupDeepLinks, handlePendingDeepLink } from './modules/deep-links'; import { setupAutoUpdater } from './modules/auto-updater'; import { setupMenu } from './modules/menu'; import { registerQuickAsk, unregisterQuickAsk } from './modules/quick-ask'; +import { registerFolderWatcher, unregisterFolderWatcher } from './modules/folder-watcher'; import { registerIpcHandlers } from './ipc/handlers'; registerGlobalErrorHandlers(); @@ -28,6 +29,7 @@ app.whenReady().then(async () => { } createMainWindow(); registerQuickAsk(); + registerFolderWatcher(); setupAutoUpdater(); handlePendingDeepLink(); @@ -47,4 +49,5 @@ app.on('window-all-closed', () => { app.on('will-quit', () => { unregisterQuickAsk(); + unregisterFolderWatcher(); }); diff --git a/surfsense_desktop/src/modules/folder-watcher.ts b/surfsense_desktop/src/modules/folder-watcher.ts new file mode 100644 index 000000000..969dabe97 --- /dev/null +++ b/surfsense_desktop/src/modules/folder-watcher.ts @@ -0,0 +1,534 @@ +import { BrowserWindow, dialog } from 'electron'; +import chokidar, { type FSWatcher } from 'chokidar'; +import { randomUUID } from 'crypto'; +import * as path from 'path'; +import * as fs from 'fs'; +import { IPC_CHANNELS } from '../ipc/channels'; + +export interface WatchedFolderConfig { + path: string; + name: string; + excludePatterns: string[]; + fileExtensions: string[] | null; + rootFolderId: number | null; + searchSpaceId: number; + active: boolean; +} + +interface WatcherEntry { + config: WatchedFolderConfig; + watcher: FSWatcher | null; +} + +type MtimeMap = Record; +type FolderSyncAction = 'add' | 'change' | 'unlink'; + +export interface FolderSyncFileChangedEvent { + id: string; + rootFolderId: number | null; + searchSpaceId: number; + folderPath: string; + folderName: string; + relativePath: string; + fullPath: string; + action: FolderSyncAction; + timestamp: number; +} + +const STORE_KEY = 'watchedFolders'; +const OUTBOX_STORE_KEY = 'events'; +const MTIME_TOLERANCE_S = 1.0; + +let store: any = null; +let mtimeStore: any = null; +let outboxStore: any = null; +let watchers: Map = new Map(); + +/** + * In-memory cache of mtime maps, keyed by folder path. + * Persisted to electron-store on mutation. + */ +const mtimeMaps: Map = new Map(); + +let rendererReady = false; +const outboxEvents: Map = new Map(); +let outboxLoaded = false; + +export function markRendererReady() { + rendererReady = true; +} + +async function getStore() { + if (!store) { + const { default: Store } = await import('electron-store'); + store = new Store({ + name: 'folder-watcher', + defaults: { + [STORE_KEY]: [] as WatchedFolderConfig[], + }, + }); + } + return store; +} + +async function getMtimeStore() { + if (!mtimeStore) { + const { default: Store } = await import('electron-store'); + mtimeStore = new Store({ + name: 'folder-mtime-maps', + defaults: {} as Record, + }); + } + return mtimeStore; +} + +async function getOutboxStore() { + if (!outboxStore) { + const { default: Store } = await import('electron-store'); + outboxStore = new Store({ + name: 'folder-sync-outbox', + defaults: { + [OUTBOX_STORE_KEY]: [] as FolderSyncFileChangedEvent[], + }, + }); + } + return outboxStore; +} + +function makeEventKey(event: Pick): string { + return `${event.folderPath}:${event.relativePath}`; +} + +function persistOutbox() { + getOutboxStore().then((s) => { + s.set(OUTBOX_STORE_KEY, Array.from(outboxEvents.values())); + }); +} + +async function loadOutbox() { + if (outboxLoaded) return; + const s = await getOutboxStore(); + const stored: FolderSyncFileChangedEvent[] = s.get(OUTBOX_STORE_KEY, []); + outboxEvents.clear(); + for (const event of stored) { + if (!event?.id || !event.folderPath || !event.relativePath) continue; + outboxEvents.set(makeEventKey(event), event); + } + outboxLoaded = true; +} + +function sendFileChangedEvent( + data: Omit +) { + const event: FolderSyncFileChangedEvent = { + id: randomUUID(), + ...data, + }; + + outboxEvents.set(makeEventKey(event), event); + persistOutbox(); + + if (rendererReady) { + sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, event); + } +} + +function loadMtimeMap(folderPath: string): MtimeMap { + return mtimeMaps.get(folderPath) ?? {}; +} + +function persistMtimeMap(folderPath: string) { + const map = mtimeMaps.get(folderPath) ?? {}; + getMtimeStore().then((s) => s.set(folderPath, map)); +} + +function walkFolderMtimes(config: WatchedFolderConfig): MtimeMap { + const root = config.path; + const result: MtimeMap = {}; + const excludes = new Set(config.excludePatterns); + + function walk(dir: string) { + let entries: fs.Dirent[]; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + + for (const entry of entries) { + const name = entry.name; + + if (name.startsWith('.') || excludes.has(name)) continue; + + const full = path.join(dir, name); + + if (entry.isDirectory()) { + walk(full); + } else if (entry.isFile()) { + if ( + config.fileExtensions && + config.fileExtensions.length > 0 + ) { + const ext = path.extname(name).toLowerCase(); + if (!config.fileExtensions.includes(ext)) continue; + } + + try { + const stat = fs.statSync(full); + const rel = path.relative(root, full); + result[rel] = stat.mtimeMs; + } catch { + // File may have been removed between readdir and stat + } + } + } + } + + walk(root); + return result; +} + +function getMainWindow(): BrowserWindow | null { + const windows = BrowserWindow.getAllWindows(); + return windows.length > 0 ? windows[0] : null; +} + +function sendToRenderer(channel: string, data: any) { + const win = getMainWindow(); + if (win && !win.isDestroyed()) { + win.webContents.send(channel, data); + } +} + +async function startWatcher(config: WatchedFolderConfig) { + if (watchers.has(config.path)) { + return; + } + + const ms = await getMtimeStore(); + const storedMap: MtimeMap = ms.get(config.path) ?? {}; + mtimeMaps.set(config.path, { ...storedMap }); + + const ignored = [ + /(^|[/\\])\../, // dotfiles by default + ...config.excludePatterns.map((p) => `**/${p}/**`), + ]; + + const watcher = chokidar.watch(config.path, { + persistent: true, + ignoreInitial: true, + awaitWriteFinish: { + stabilityThreshold: 500, + pollInterval: 100, + }, + ignored, + }); + + let ready = false; + + watcher.on('ready', () => { + ready = true; + + const currentMap = walkFolderMtimes(config); + const storedSnapshot = loadMtimeMap(config.path); + const now = Date.now(); + + // Track which files are unchanged so we can selectively update the mtime map + const unchangedMap: MtimeMap = {}; + + for (const [rel, currentMtime] of Object.entries(currentMap)) { + const storedMtime = storedSnapshot[rel]; + if (storedMtime === undefined) { + sendFileChangedEvent({ + rootFolderId: config.rootFolderId, + searchSpaceId: config.searchSpaceId, + folderPath: config.path, + folderName: config.name, + relativePath: rel, + fullPath: path.join(config.path, rel), + action: 'add', + timestamp: now, + }); + } else if (Math.abs(currentMtime - storedMtime) >= MTIME_TOLERANCE_S * 1000) { + sendFileChangedEvent({ + rootFolderId: config.rootFolderId, + searchSpaceId: config.searchSpaceId, + folderPath: config.path, + folderName: config.name, + relativePath: rel, + fullPath: path.join(config.path, rel), + action: 'change', + timestamp: now, + }); + } else { + unchangedMap[rel] = currentMtime; + } + } + + for (const rel of Object.keys(storedSnapshot)) { + if (!(rel in currentMap)) { + sendFileChangedEvent({ + rootFolderId: config.rootFolderId, + searchSpaceId: config.searchSpaceId, + folderPath: config.path, + folderName: config.name, + relativePath: rel, + fullPath: path.join(config.path, rel), + action: 'unlink', + timestamp: now, + }); + } + } + + // Only update the mtime map for unchanged files; changed files keep their + // stored mtime so they'll be re-detected if the app crashes before indexing. + mtimeMaps.set(config.path, unchangedMap); + persistMtimeMap(config.path); + + sendToRenderer(IPC_CHANNELS.FOLDER_SYNC_WATCHER_READY, { + rootFolderId: config.rootFolderId, + folderPath: config.path, + }); + }); + + const handleFileEvent = (filePath: string, action: FolderSyncAction) => { + if (!ready) return; + + const relativePath = path.relative(config.path, filePath); + + if ( + config.fileExtensions && + config.fileExtensions.length > 0 + ) { + const ext = path.extname(filePath).toLowerCase(); + if (!config.fileExtensions.includes(ext)) return; + } + + const map = mtimeMaps.get(config.path); + if (map) { + if (action === 'unlink') { + delete map[relativePath]; + } else { + try { + map[relativePath] = fs.statSync(filePath).mtimeMs; + } catch { + // File may have been removed between event and stat + } + } + persistMtimeMap(config.path); + } + + sendFileChangedEvent({ + rootFolderId: config.rootFolderId, + searchSpaceId: config.searchSpaceId, + folderPath: config.path, + folderName: config.name, + relativePath, + fullPath: filePath, + action, + timestamp: Date.now(), + }); + }; + + watcher.on('add', (fp) => handleFileEvent(fp, 'add')); + watcher.on('change', (fp) => handleFileEvent(fp, 'change')); + watcher.on('unlink', (fp) => handleFileEvent(fp, 'unlink')); + + watchers.set(config.path, { config, watcher }); +} + +function stopWatcher(folderPath: string) { + persistMtimeMap(folderPath); + const entry = watchers.get(folderPath); + if (entry?.watcher) { + entry.watcher.close(); + } + watchers.delete(folderPath); +} + +export async function selectFolder(): Promise { + const result = await dialog.showOpenDialog({ + properties: ['openDirectory'], + title: 'Select a folder to watch', + }); + if (result.canceled || result.filePaths.length === 0) { + return null; + } + return result.filePaths[0]; +} + +export async function addWatchedFolder( + config: WatchedFolderConfig +): Promise { + const s = await getStore(); + const folders: WatchedFolderConfig[] = s.get(STORE_KEY, []); + + const existing = folders.findIndex((f: WatchedFolderConfig) => f.path === config.path); + if (existing >= 0) { + folders[existing] = config; + } else { + folders.push(config); + } + + s.set(STORE_KEY, folders); + + if (config.active) { + await startWatcher(config); + } + + return folders; +} + +export async function removeWatchedFolder( + folderPath: string +): Promise { + const s = await getStore(); + const folders: WatchedFolderConfig[] = s.get(STORE_KEY, []); + const updated = folders.filter((f: WatchedFolderConfig) => f.path !== folderPath); + s.set(STORE_KEY, updated); + + stopWatcher(folderPath); + + mtimeMaps.delete(folderPath); + const ms = await getMtimeStore(); + ms.delete(folderPath); + + return updated; +} + +export async function getWatchedFolders(): Promise { + const s = await getStore(); + return s.get(STORE_KEY, []); +} + +export async function getWatcherStatus(): Promise< + { path: string; active: boolean; watching: boolean }[] +> { + const s = await getStore(); + const folders: WatchedFolderConfig[] = s.get(STORE_KEY, []); + return folders.map((f: WatchedFolderConfig) => ({ + path: f.path, + active: f.active, + watching: watchers.has(f.path), + })); +} + +export async function getPendingFileEvents(): Promise { + await loadOutbox(); + return Array.from(outboxEvents.values()).sort((a, b) => a.timestamp - b.timestamp); +} + +export async function acknowledgeFileEvents(eventIds: string[]): Promise<{ acknowledged: number }> { + if (!eventIds || eventIds.length === 0) return { acknowledged: 0 }; + await loadOutbox(); + + const ackSet = new Set(eventIds); + let acknowledged = 0; + + for (const [key, event] of outboxEvents.entries()) { + if (ackSet.has(event.id)) { + outboxEvents.delete(key); + acknowledged += 1; + } + } + + if (acknowledged > 0) { + persistOutbox(); + } + + return { acknowledged }; +} + +export async function pauseWatcher(): Promise { + for (const [, entry] of watchers) { + if (entry.watcher) { + await entry.watcher.close(); + entry.watcher = null; + } + } +} + +export async function resumeWatcher(): Promise { + for (const [, entry] of watchers) { + if (!entry.watcher && entry.config.active) { + await startWatcher(entry.config); + } + } +} + +export async function registerFolderWatcher(): Promise { + await loadOutbox(); + const s = await getStore(); + const folders: WatchedFolderConfig[] = s.get(STORE_KEY, []); + + for (const config of folders) { + if (config.active && fs.existsSync(config.path)) { + await startWatcher(config); + } + } +} + +export async function unregisterFolderWatcher(): Promise { + for (const [folderPath] of watchers) { + stopWatcher(folderPath); + } + watchers.clear(); +} + +export async function browseFiles(): Promise { + const result = await dialog.showOpenDialog({ + properties: ['openFile', 'multiSelections'], + title: 'Select files', + }); + if (result.canceled || result.filePaths.length === 0) return null; + return result.filePaths; +} + +const MIME_MAP: Record = { + '.pdf': 'application/pdf', + '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + '.html': 'text/html', '.htm': 'text/html', + '.csv': 'text/csv', + '.txt': 'text/plain', + '.md': 'text/markdown', '.markdown': 'text/markdown', + '.mp3': 'audio/mpeg', '.mpeg': 'audio/mpeg', '.mpga': 'audio/mpeg', + '.mp4': 'audio/mp4', '.m4a': 'audio/mp4', + '.wav': 'audio/wav', + '.webm': 'audio/webm', + '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.bmp': 'image/bmp', + '.webp': 'image/webp', + '.tiff': 'image/tiff', + '.doc': 'application/msword', + '.rtf': 'application/rtf', + '.xml': 'application/xml', + '.epub': 'application/epub+zip', + '.xls': 'application/vnd.ms-excel', + '.ppt': 'application/vnd.ms-powerpoint', + '.eml': 'message/rfc822', + '.odt': 'application/vnd.oasis.opendocument.text', + '.msg': 'application/vnd.ms-outlook', +}; + +export interface LocalFileData { + name: string; + data: ArrayBuffer; + mimeType: string; + size: number; +} + +export function readLocalFiles(filePaths: string[]): LocalFileData[] { + return filePaths.map((p) => { + const buf = fs.readFileSync(p); + const ext = path.extname(p).toLowerCase(); + return { + name: path.basename(p), + data: buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength), + mimeType: MIME_MAP[ext] || 'application/octet-stream', + size: buf.byteLength, + }; + }); +} diff --git a/surfsense_desktop/src/preload.ts b/surfsense_desktop/src/preload.ts index 264ec25b3..6fbfd354a 100644 --- a/surfsense_desktop/src/preload.ts +++ b/surfsense_desktop/src/preload.ts @@ -21,4 +21,34 @@ contextBridge.exposeInMainWorld('electronAPI', { setQuickAskMode: (mode: string) => ipcRenderer.invoke(IPC_CHANNELS.SET_QUICK_ASK_MODE, mode), getQuickAskMode: () => ipcRenderer.invoke(IPC_CHANNELS.GET_QUICK_ASK_MODE), replaceText: (text: string) => ipcRenderer.invoke(IPC_CHANNELS.REPLACE_TEXT, text), + + // Folder sync + selectFolder: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_SELECT_FOLDER), + addWatchedFolder: (config: any) => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_ADD_FOLDER, config), + removeWatchedFolder: (folderPath: string) => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_REMOVE_FOLDER, folderPath), + getWatchedFolders: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_GET_FOLDERS), + getWatcherStatus: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_GET_STATUS), + onFileChanged: (callback: (data: any) => void) => { + const listener = (_event: unknown, data: any) => callback(data); + ipcRenderer.on(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, listener); + return () => { + ipcRenderer.removeListener(IPC_CHANNELS.FOLDER_SYNC_FILE_CHANGED, listener); + }; + }, + onWatcherReady: (callback: (data: any) => void) => { + const listener = (_event: unknown, data: any) => callback(data); + ipcRenderer.on(IPC_CHANNELS.FOLDER_SYNC_WATCHER_READY, listener); + return () => { + ipcRenderer.removeListener(IPC_CHANNELS.FOLDER_SYNC_WATCHER_READY, listener); + }; + }, + pauseWatcher: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_PAUSE), + resumeWatcher: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_RESUME), + signalRendererReady: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_RENDERER_READY), + getPendingFileEvents: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_GET_PENDING_EVENTS), + acknowledgeFileEvents: (eventIds: string[]) => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_ACK_EVENTS, eventIds), + + // Browse files via native dialog + browseFiles: () => ipcRenderer.invoke(IPC_CHANNELS.BROWSE_FILES), + readLocalFiles: (paths: string[]) => ipcRenderer.invoke(IPC_CHANNELS.READ_LOCAL_FILES, paths), }); diff --git a/surfsense_web/app/(home)/login/LocalLoginForm.tsx b/surfsense_web/app/(home)/login/LocalLoginForm.tsx index ee3b47683..e94857334 100644 --- a/surfsense_web/app/(home)/login/LocalLoginForm.tsx +++ b/surfsense_web/app/(home)/login/LocalLoginForm.tsx @@ -160,10 +160,10 @@ export function LocalLoginForm() { placeholder="you@example.com" value={username} onChange={(e) => setUsername(e.target.value)} - className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-2 focus:ring-offset-2 bg-background text-foreground transition-all ${ + className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ error.title - ? "border-destructive focus:border-destructive focus:ring-destructive" - : "border-border focus:border-primary focus:ring-primary" + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" }`} disabled={isLoggingIn} /> @@ -181,10 +181,10 @@ export function LocalLoginForm() { placeholder="Enter your password" value={password} onChange={(e) => setPassword(e.target.value)} - className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-2 focus:ring-offset-2 bg-background text-foreground transition-all ${ + className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ error.title - ? "border-destructive focus:border-destructive focus:ring-destructive" - : "border-border focus:border-primary focus:ring-primary" + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" }`} disabled={isLoggingIn} /> diff --git a/surfsense_web/app/(home)/login/page.tsx b/surfsense_web/app/(home)/login/page.tsx index 8b3be3805..09bf770d8 100644 --- a/surfsense_web/app/(home)/login/page.tsx +++ b/surfsense_web/app/(home)/login/page.tsx @@ -115,7 +115,7 @@ function LoginContent() {
- +

{t("sign_in")}

diff --git a/surfsense_web/app/(home)/register/page.tsx b/surfsense_web/app/(home)/register/page.tsx index b9200c68f..1ec179b35 100644 --- a/surfsense_web/app/(home)/register/page.tsx +++ b/surfsense_web/app/(home)/register/page.tsx @@ -160,7 +160,7 @@ export default function RegisterPage() {
- +

{t("create_account")}

@@ -229,10 +229,7 @@ export default function RegisterPage() {
-
-
); @@ -126,7 +161,10 @@ type ImageZoomProps = PropsWithChildren<{ src: string; alt?: string; }>; - +function isDataOrBlobUrl(src: string | undefined): boolean { + if (!src || typeof src !== "string") return false; + return src.startsWith("data:") || src.startsWith("blob:"); +} function ImageZoom({ src, alt = "Image preview", children }: ImageZoomProps) { const [isMounted, setIsMounted] = useState(false); const [isOpen, setIsOpen] = useState(false); @@ -177,22 +215,39 @@ function ImageZoom({ src, alt = "Image preview", children }: ImageZoomProps) { aria-label="Close zoomed image" > {/** biome-ignore lint/performance/noImgElement: */} - {alt} { - e.stopPropagation(); - handleClose(); - }} - onKeyDown={(e) => { - if (e.key === "Enter") { - e.stopPropagation(); - handleClose(); - } - }} - /> + {isDataOrBlobUrl(src) ? ( + // biome-ignore lint/performance/noImgElement: data/blob URLs need plain img + {alt} { + e.stopPropagation(); + handleClose(); + }} + onKeyDown={(e) => { + if (e.key === "Enter") { + e.stopPropagation(); + handleClose(); + } + }} + /> + ) : ( + { + e.stopPropagation(); + handleClose(); + }} + unoptimized={false} + /> + )} , document.body )} diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx index 15ad11d94..0c99090e6 100644 --- a/surfsense_web/components/assistant-ui/inline-citation.tsx +++ b/surfsense_web/components/assistant-ui/inline-citation.tsx @@ -32,7 +32,7 @@ export const InlineCitation: FC = ({ chunkId, isDocsChunk = e.stopPropagation()}> - onPreview(doc)}> + onPreview(doc)} disabled={isProcessing}> Open @@ -235,7 +244,7 @@ export const DocumentNode = React.memo(function DocumentNode({ {onExport && ( - + Export @@ -244,6 +253,12 @@ export const DocumentNode = React.memo(function DocumentNode({ )} + {onVersionHistory && isVersionableType(doc.document_type) && ( + onVersionHistory(doc)}> + + Versions + + )} e.stopPropagation()}> - onPreview(doc)}> + onPreview(doc)} disabled={isProcessing}> Open @@ -275,7 +290,7 @@ export const DocumentNode = React.memo(function DocumentNode({ {onExport && ( - + Export @@ -284,6 +299,12 @@ export const DocumentNode = React.memo(function DocumentNode({ )} + {onVersionHistory && isVersionableType(doc.document_type) && ( + onVersionHistory(doc)}> + + Versions + + )} void; onToggleExpand: (folderId: number) => void; onRename: (folder: FolderDisplay, newName: string) => void; @@ -70,6 +77,9 @@ interface FolderNodeProps { disabledDropIds?: Set; contextMenuOpen?: boolean; onContextMenuOpenChange?: (open: boolean) => void; + isWatched?: boolean; + onRescan?: (folder: FolderDisplay) => void; + onStopWatching?: (folder: FolderDisplay) => void; } function getDropZone( @@ -93,6 +103,7 @@ export const FolderNode = React.memo(function FolderNode({ isRenaming, childCount, selectionState, + processingState, onToggleSelect, onToggleExpand, onRename, @@ -107,6 +118,9 @@ export const FolderNode = React.memo(function FolderNode({ disabledDropIds, contextMenuOpen, onContextMenuOpenChange, + isWatched, + onRescan, + onStopWatching, }: FolderNodeProps) { const [renameValue, setRenameValue] = useState(folder.name); const inputRef = useRef(null); @@ -242,7 +256,9 @@ export const FolderNode = React.memo(function FolderNode({ isOver && !canDrop && "cursor-not-allowed" )} style={{ paddingLeft: `${depth * 16 + 4}px` }} - onClick={() => onToggleExpand(folder.id)} + onClick={() => { + onToggleExpand(folder.id); + }} onKeyDown={(e) => { if (e.key === "Enter" || e.key === " ") { e.preventDefault(); @@ -262,14 +278,45 @@ export const FolderNode = React.memo(function FolderNode({ )} - e.stopPropagation()} - className="h-3.5 w-3.5 shrink-0" - /> + {processingState !== "idle" && selectionState === "none" ? ( + <> + + + + {processingState === "processing" ? ( + + ) : ( + + )} + + + + {processingState === "processing" + ? "Syncing folder contents" + : "Some files failed to process"} + + + e.stopPropagation()} + className="h-3.5 w-3.5 shrink-0 hidden group-hover:flex" + /> + + ) : ( + e.stopPropagation()} + className="h-3.5 w-3.5 shrink-0" + /> + )} @@ -308,6 +355,28 @@ export const FolderNode = React.memo(function FolderNode({ + {isWatched && onRescan && ( + { + e.stopPropagation(); + onRescan(folder); + }} + > + + Re-scan + + )} + {isWatched && onStopWatching && ( + { + e.stopPropagation(); + onStopWatching(folder); + }} + > + + Stop watching + + )} { e.stopPropagation(); @@ -353,6 +422,18 @@ export const FolderNode = React.memo(function FolderNode({ {!isRenaming && contextMenuOpen && ( + {isWatched && onRescan && ( + onRescan(folder)}> + + Re-scan + + )} + {isWatched && onStopWatching && ( + onStopWatching(folder)}> + + Stop watching + + )} onCreateSubfolder(folder.id)}> New subfolder diff --git a/surfsense_web/components/documents/FolderTreeView.tsx b/surfsense_web/components/documents/FolderTreeView.tsx index f63d5da5c..1df007c0b 100644 --- a/surfsense_web/components/documents/FolderTreeView.tsx +++ b/surfsense_web/components/documents/FolderTreeView.tsx @@ -1,7 +1,7 @@ "use client"; import { useAtom } from "jotai"; -import { CirclePlus } from "lucide-react"; +import { Search } from "lucide-react"; import { useCallback, useMemo, useState } from "react"; import { DndProvider } from "react-dnd"; import { HTML5Backend } from "react-dnd-html5-backend"; @@ -32,6 +32,7 @@ interface FolderTreeViewProps { onDeleteDocument: (doc: DocumentNodeDoc) => void; onMoveDocument: (doc: DocumentNodeDoc) => void; onExportDocument?: (doc: DocumentNodeDoc, format: string) => void; + onVersionHistory?: (doc: DocumentNodeDoc) => void; activeTypes: DocumentTypeEnum[]; searchQuery?: string; onDropIntoFolder?: ( @@ -40,6 +41,9 @@ interface FolderTreeViewProps { targetFolderId: number | null ) => void; onReorderFolder?: (folderId: number, beforePos: string | null, afterPos: string | null) => void; + watchedFolderIds?: Set; + onRescanFolder?: (folder: FolderDisplay) => void; + onStopWatchingFolder?: (folder: FolderDisplay) => void; } function groupBy(items: T[], keyFn: (item: T) => string | number): Record { @@ -69,10 +73,14 @@ export function FolderTreeView({ onDeleteDocument, onMoveDocument, onExportDocument, + onVersionHistory, activeTypes, searchQuery, onDropIntoFolder, onReorderFolder, + watchedFolderIds, + onRescanFolder, + onStopWatchingFolder, }: FolderTreeViewProps) { const foldersByParent = useMemo(() => groupBy(folders, (f) => f.parentId ?? "root"), [folders]); @@ -158,6 +166,35 @@ export function FolderTreeView({ return states; }, [folders, docsByFolder, foldersByParent, mentionedDocIds]); + const folderProcessingStates = useMemo(() => { + const states: Record = {}; + + function compute(folderId: number): { hasProcessing: boolean; hasFailed: boolean } { + const directDocs = docsByFolder[folderId] ?? []; + let hasProcessing = directDocs.some( + (d) => d.status?.state === "pending" || d.status?.state === "processing" + ); + let hasFailed = directDocs.some((d) => d.status?.state === "failed"); + + for (const child of foldersByParent[folderId] ?? []) { + const sub = compute(child.id); + hasProcessing = hasProcessing || sub.hasProcessing; + hasFailed = hasFailed || sub.hasFailed; + } + + if (hasProcessing) states[folderId] = "processing"; + else if (hasFailed) states[folderId] = "failed"; + else states[folderId] = "idle"; + + return { hasProcessing, hasFailed }; + } + + for (const f of folders) { + if (states[f.id] === undefined) compute(f.id); + } + return states; + }, [folders, docsByFolder, foldersByParent]); + function renderLevel(parentId: number | null, depth: number): React.ReactNode[] { const key = parentId ?? "root"; const childFolders = (foldersByParent[key] ?? []) @@ -191,6 +228,7 @@ export function FolderTreeView({ isRenaming={renamingFolderId === f.id} childCount={folderChildCounts[f.id] ?? 0} selectionState={folderSelectionStates[f.id] ?? "none"} + processingState={folderProcessingStates[f.id] ?? "idle"} onToggleSelect={onToggleFolderSelect} onToggleExpand={onToggleExpand} onRename={onRenameFolder} @@ -204,6 +242,9 @@ export function FolderTreeView({ siblingPositions={siblingPositions} contextMenuOpen={openContextMenuId === `folder-${f.id}`} onContextMenuOpenChange={(open) => setOpenContextMenuId(open ? `folder-${f.id}` : null)} + isWatched={watchedFolderIds?.has(f.id)} + onRescan={onRescanFolder} + onStopWatching={onStopWatchingFolder} /> ); @@ -225,6 +266,7 @@ export function FolderTreeView({ onDelete={onDeleteDocument} onMove={onMoveDocument} onExport={onExportDocument} + onVersionHistory={onVersionHistory} contextMenuOpen={openContextMenuId === `doc-${d.id}`} onContextMenuOpenChange={(open) => setOpenContextMenuId(open ? `doc-${d.id}` : null)} /> @@ -250,8 +292,9 @@ export function FolderTreeView({ if (treeNodes.length === 0 && (activeTypes.length > 0 || searchQuery)) { return (
- -

No matching documents

+ +

No matching documents

+

Try a different search term

); } diff --git a/surfsense_web/components/documents/version-history.tsx b/surfsense_web/components/documents/version-history.tsx new file mode 100644 index 000000000..27343dc6a --- /dev/null +++ b/surfsense_web/components/documents/version-history.tsx @@ -0,0 +1,258 @@ +"use client"; + +import { Check, ChevronRight, Clock, Copy, RotateCcw } from "lucide-react"; +import { useCallback, useEffect, useState } from "react"; +import { toast } from "sonner"; +import { Button } from "@/components/ui/button"; +import { Dialog, DialogContent, DialogTitle, DialogTrigger } from "@/components/ui/dialog"; +import { Separator } from "@/components/ui/separator"; +import { Spinner } from "@/components/ui/spinner"; +import { documentsApiService } from "@/lib/apis/documents-api.service"; +import { cn } from "@/lib/utils"; + +interface DocumentVersionSummary { + version_number: number; + title: string; + content_hash: string; + created_at: string | null; +} + +interface VersionHistoryProps { + documentId: number; + documentType: string; +} + +const VERSION_DOCUMENT_TYPES = new Set(["LOCAL_FOLDER_FILE", "OBSIDIAN_CONNECTOR"]); + +export function isVersionableType(documentType: string) { + return VERSION_DOCUMENT_TYPES.has(documentType); +} + +const DIALOG_CLASSES = + "select-none max-w-[900px] w-[95vw] md:w-[90vw] h-[90vh] md:h-[80vh] max-h-[640px] flex flex-col md:flex-row p-0 gap-0 overflow-hidden [--card:var(--background)] dark:[--card:oklch(0.205_0_0)] dark:[--background:oklch(0.205_0_0)]"; + +export function VersionHistoryButton({ documentId, documentType }: VersionHistoryProps) { + if (!isVersionableType(documentType)) return null; + + return ( + + + + + + Version History + + + + ); +} + +export function VersionHistoryDialog({ + open, + onOpenChange, + documentId, +}: { + open: boolean; + onOpenChange: (open: boolean) => void; + documentId: number; +}) { + return ( + + + Version History + {open && } + + + ); +} + +function formatRelativeTime(dateStr: string): string { + const now = Date.now(); + const then = new Date(dateStr).getTime(); + const diffMs = now - then; + const diffMin = Math.floor(diffMs / 60_000); + if (diffMin < 1) return "Just now"; + if (diffMin < 60) return `${diffMin} minute${diffMin !== 1 ? "s" : ""} ago`; + const diffHr = Math.floor(diffMin / 60); + if (diffHr < 24) return `${diffHr} hour${diffHr !== 1 ? "s" : ""} ago`; + return new Date(dateStr).toLocaleDateString(undefined, { + weekday: "short", + month: "short", + day: "numeric", + year: "numeric", + hour: "numeric", + minute: "2-digit", + }); +} + +function VersionHistoryPanel({ documentId }: { documentId: number }) { + const [versions, setVersions] = useState([]); + const [loading, setLoading] = useState(true); + const [selectedVersion, setSelectedVersion] = useState(null); + const [versionContent, setVersionContent] = useState(""); + const [contentLoading, setContentLoading] = useState(false); + const [restoring, setRestoring] = useState(false); + const [copied, setCopied] = useState(false); + + const loadVersions = useCallback(async () => { + setLoading(true); + try { + const data = await documentsApiService.listDocumentVersions(documentId); + setVersions(data as DocumentVersionSummary[]); + } catch { + toast.error("Failed to load version history"); + } finally { + setLoading(false); + } + }, [documentId]); + + useEffect(() => { + loadVersions(); + }, [loadVersions]); + + const handleSelectVersion = async (versionNumber: number) => { + if (selectedVersion === versionNumber) return; + setSelectedVersion(versionNumber); + setContentLoading(true); + try { + const data = (await documentsApiService.getDocumentVersion(documentId, versionNumber)) as { + source_markdown: string; + }; + setVersionContent(data.source_markdown || ""); + } catch { + toast.error("Failed to load version content"); + } finally { + setContentLoading(false); + } + }; + + const handleRestore = async (versionNumber: number) => { + setRestoring(true); + try { + await documentsApiService.restoreDocumentVersion(documentId, versionNumber); + toast.success(`Restored version ${versionNumber}`); + await loadVersions(); + } catch { + toast.error("Failed to restore version"); + } finally { + setRestoring(false); + } + }; + + const handleCopy = () => { + navigator.clipboard.writeText(versionContent); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + }; + + if (loading) { + return ( +
+ +
+ ); + } + + if (versions.length === 0) { + return ( +
+

No version history available yet

+

Versions are created when file content changes

+
+ ); + } + + const selectedVersionData = versions.find((v) => v.version_number === selectedVersion); + + return ( + <> + {/* Left panel — version list */} + + + {/* Right panel — content preview */} +
+ {selectedVersion !== null && selectedVersionData ? ( + <> +
+

+ {selectedVersionData.title || `Version ${selectedVersion}`} +

+
+ + +
+
+ +
+ {contentLoading ? ( +
+ +
+ ) : ( +
+									{versionContent || "(empty)"}
+								
+ )} +
+ + ) : ( +
+

Select a version to preview

+
+ )} +
+ + ); +} diff --git a/surfsense_web/components/editor-panel/editor-panel.tsx b/surfsense_web/components/editor-panel/editor-panel.tsx index 3f167dc24..248fe68eb 100644 --- a/surfsense_web/components/editor-panel/editor-panel.tsx +++ b/surfsense_web/components/editor-panel/editor-panel.tsx @@ -1,11 +1,12 @@ "use client"; import { useAtomValue, useSetAtom } from "jotai"; -import { AlertCircle, Download, FileText, Loader2, XIcon } from "lucide-react"; +import { Download, FileQuestionMark, FileText, Loader2, RefreshCw, XIcon } from "lucide-react"; import dynamic from "next/dynamic"; import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-panel.atom"; +import { VersionHistoryButton } from "@/components/documents/version-history"; import { MarkdownViewer } from "@/components/markdown-viewer"; import { Alert, AlertDescription } from "@/components/ui/alert"; import { Button } from "@/components/ui/button"; @@ -79,7 +80,7 @@ export function EditorPanelContent({ const isLargeDocument = (editorDoc?.content_size_bytes ?? 0) > LARGE_DOCUMENT_THRESHOLD; useEffect(() => { - let cancelled = false; + const controller = new AbortController(); setIsLoading(true); setError(null); setEditorDoc(null); @@ -87,7 +88,7 @@ export function EditorPanelContent({ initialLoadDone.current = false; changeCountRef.current = 0; - const fetchContent = async () => { + const doFetch = async () => { const token = getBearerToken(); if (!token) { redirectToLogin(); @@ -95,6 +96,9 @@ export function EditorPanelContent({ } try { + const response = await authenticatedFetch( + `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content`, + { method: "GET", signal: controller.signal } const url = new URL( `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content` ); @@ -102,7 +106,7 @@ export function EditorPanelContent({ const response = await authenticatedFetch(url.toString(), { method: "GET" }); - if (cancelled) return; + if (controller.signal.aborted) return; if (!response.ok) { const errorData = await response @@ -126,18 +130,16 @@ export function EditorPanelContent({ setEditorDoc(data); initialLoadDone.current = true; } catch (err) { - if (cancelled) return; + if (controller.signal.aborted) return; console.error("Error fetching document:", err); setError(err instanceof Error ? err.message : "Failed to fetch document"); } finally { - if (!cancelled) setIsLoading(false); + if (!controller.signal.aborted) setIsLoading(false); } }; - fetchContent(); - return () => { - cancelled = true; - }; + doFetch().catch(() => {}); + return () => controller.abort(); }, [documentId, searchSpaceId, title]); const handleMarkdownChange = useCallback((md: string) => { @@ -198,12 +200,17 @@ export function EditorPanelContent({

Unsaved changes

)}
- {onClose && ( - - )} +
+ {editorDoc?.document_type && ( + + )} + {onClose && ( + + )} +
@@ -211,10 +218,24 @@ export function EditorPanelContent({ ) : error || !editorDoc ? (
- -
-

Failed to load document

-

{error || "An unknown error occurred"}

+ {error?.toLowerCase().includes("still being processed") ? ( +
+ +
+ ) : ( +
+ +
+ )} +
+

+ {error?.toLowerCase().includes("still being processed") + ? "Document is processing" + : "Document unavailable"} +

+

+ {error || "An unknown error occurred"} +

) : isLargeDocument ? ( diff --git a/surfsense_web/components/homepage/use-cases-grid.tsx b/surfsense_web/components/homepage/use-cases-grid.tsx index 2f8c2d537..f9d315b49 100644 --- a/surfsense_web/components/homepage/use-cases-grid.tsx +++ b/surfsense_web/components/homepage/use-cases-grid.tsx @@ -1,4 +1,5 @@ "use client"; +import Image from 'next/image'; import { AnimatePresence, motion } from "motion/react"; import { ExpandedGifOverlay, useExpandedGif } from "@/components/ui/expanded-gif-overlay"; @@ -81,6 +82,15 @@ function UseCaseCard({ alt={title} className="w-full rounded-xl object-cover transition-transform duration-500 group-hover:scale-[1.02]" /> +
+ {title} +

{title}

diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx index fd6b45c52..6138b67fb 100644 --- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx +++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx @@ -775,7 +775,8 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid {t("delete_chat")} - {t("delete_chat_confirm")} {chatToDelete?.name}?{" "} + {t("delete_chat_confirm")}{" "} + {chatToDelete?.name}?{" "} {t("action_cannot_undone")} @@ -835,9 +836,7 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid {tSidebar("rename") || "Rename"} - {isRenamingChat && ( - - )} + {isRenamingChat && } @@ -865,9 +864,7 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid className="relative bg-destructive text-destructive-foreground hover:bg-destructive/90" > {tCommon("delete")} - {isDeletingSearchSpace && ( - - )} + {isDeletingSearchSpace && } @@ -895,9 +892,7 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid className="relative bg-destructive text-destructive-foreground hover:bg-destructive/90" > {t("leave")} - {isLeavingSearchSpace && ( - - )} + {isLeavingSearchSpace && } diff --git a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx index ac2f65065..717f5a459 100644 --- a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx +++ b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx @@ -19,7 +19,7 @@ const EditorPanelContent = dynamic( import("@/components/editor-panel/editor-panel").then((m) => ({ default: m.EditorPanelContent, })), - { ssr: false, loading: () => } + { ssr: false, loading: () => null } ); const HitlEditPanelContent = dynamic( diff --git a/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx index 57c011f01..41bd01a99 100644 --- a/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx @@ -109,6 +109,7 @@ export function AllPrivateChatsSidebarContent({ queryKey: ["all-threads", searchSpaceId], queryFn: () => fetchThreads(Number(searchSpaceId)), enabled: !!searchSpaceId && !isSearchMode, + placeholderData: () => queryClient.getQueryData(["threads", searchSpaceId, { limit: 40 }]), }); const { diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index d880524bd..aa409e179 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -21,6 +21,7 @@ import type { DocumentNodeDoc } from "@/components/documents/DocumentNode"; import type { FolderDisplay } from "@/components/documents/FolderNode"; import { FolderPickerDialog } from "@/components/documents/FolderPickerDialog"; import { FolderTreeView } from "@/components/documents/FolderTreeView"; +import { VersionHistoryDialog } from "@/components/documents/version-history"; import { EXPORT_FILE_EXTENSIONS } from "@/components/shared/ExportMenuItems"; import { AlertDialog, @@ -40,6 +41,7 @@ import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; import type { DocumentTypeEnum } from "@/contracts/types/document.types"; import { useDebouncedValue } from "@/hooks/use-debounced-value"; import { useMediaQuery } from "@/hooks/use-media-query"; +import { documentsApiService } from "@/lib/apis/documents-api.service"; import { foldersApiService } from "@/lib/apis/folders-api.service"; import { authenticatedFetch } from "@/lib/auth-utils"; import { queries } from "@/zero/queries/index"; @@ -92,6 +94,50 @@ export function DocumentsSidebar({ const [search, setSearch] = useState(""); const debouncedSearch = useDebouncedValue(search, 250); const [activeTypes, setActiveTypes] = useState([]); + const [watchedFolderIds, setWatchedFolderIds] = useState>(new Set()); + + useEffect(() => { + const api = typeof window !== "undefined" ? window.electronAPI : null; + if (!api?.getWatchedFolders) return; + + async function loadWatchedIds() { + const folders = await api!.getWatchedFolders(); + + if (folders.length === 0) { + try { + const backendFolders = await documentsApiService.getWatchedFolders(searchSpaceId); + for (const bf of backendFolders) { + const meta = bf.metadata as Record | null; + if (!meta?.watched || !meta.folder_path) continue; + await api!.addWatchedFolder({ + path: meta.folder_path as string, + name: bf.name, + rootFolderId: bf.id, + searchSpaceId: bf.search_space_id, + excludePatterns: (meta.exclude_patterns as string[]) ?? [], + fileExtensions: (meta.file_extensions as string[] | null) ?? null, + active: true, + }); + } + const recovered = await api!.getWatchedFolders(); + const ids = new Set( + recovered.filter((f) => f.rootFolderId != null).map((f) => f.rootFolderId as number) + ); + setWatchedFolderIds(ids); + return; + } catch (err) { + console.error("[DocumentsSidebar] Recovery from backend failed:", err); + } + } + + const ids = new Set( + folders.filter((f) => f.rootFolderId != null).map((f) => f.rootFolderId as number) + ); + setWatchedFolderIds(ids); + } + + loadWatchedIds(); + }, [searchSpaceId]); const { mutateAsync: deleteDocumentMutation } = useAtomValue(deleteDocumentMutationAtom); const [sidebarDocs, setSidebarDocs] = useAtom(sidebarSelectedDocumentsAtom); @@ -134,7 +180,12 @@ export function DocumentsSidebar({ const treeDocuments: DocumentNodeDoc[] = useMemo(() => { const zeroDocs = (zeroAllDocs ?? []) - .filter((d) => d.title && d.title.trim() !== "") + .filter((d) => { + if (!d.title || d.title.trim() === "") return false; + const state = (d.status as { state?: string } | undefined)?.state; + if (state === "deleting") return false; + return true; + }) .map((d) => ({ id: d.id, title: d.title, @@ -223,6 +274,53 @@ export function DocumentsSidebar({ [createFolderParentId, searchSpaceId, setExpandedFolderMap] ); + const handleRescanFolder = useCallback( + async (folder: FolderDisplay) => { + const api = window.electronAPI; + if (!api) return; + + const watchedFolders = await api.getWatchedFolders(); + const matched = watchedFolders.find((wf) => wf.rootFolderId === folder.id); + if (!matched) { + toast.error("This folder is not being watched"); + return; + } + + try { + await documentsApiService.folderIndex(searchSpaceId, { + folder_path: matched.path, + folder_name: matched.name, + search_space_id: searchSpaceId, + root_folder_id: folder.id, + }); + toast.success(`Re-scanning folder: ${matched.name}`); + } catch (err) { + toast.error((err as Error)?.message || "Failed to re-scan folder"); + } + }, + [searchSpaceId] + ); + + const handleStopWatching = useCallback(async (folder: FolderDisplay) => { + const api = window.electronAPI; + if (!api) return; + + const watchedFolders = await api.getWatchedFolders(); + const matched = watchedFolders.find((wf) => wf.rootFolderId === folder.id); + if (!matched) { + toast.error("This folder is not being watched"); + return; + } + + await api.removeWatchedFolder(matched.path); + try { + await foldersApiService.stopWatching(folder.id); + } catch (err) { + console.error("[DocumentsSidebar] Failed to clear watched metadata:", err); + } + toast.success(`Stopped watching: ${matched.name}`); + }, []); + const handleRenameFolder = useCallback(async (folder: FolderDisplay, newName: string) => { try { await foldersApiService.updateFolder(folder.id, { name: newName }); @@ -235,6 +333,14 @@ export function DocumentsSidebar({ const handleDeleteFolder = useCallback(async (folder: FolderDisplay) => { if (!confirm(`Delete folder "${folder.name}" and all its contents?`)) return; try { + const api = window.electronAPI; + if (api) { + const watchedFolders = await api.getWatchedFolders(); + const matched = watchedFolders.find((wf) => wf.rootFolderId === folder.id); + if (matched) { + await api.removeWatchedFolder(matched.path); + } + } await foldersApiService.deleteFolder(folder.id); toast.success("Folder deleted"); } catch (e: unknown) { @@ -448,6 +554,7 @@ export function DocumentsSidebar({ const [bulkDeleteConfirmOpen, setBulkDeleteConfirmOpen] = useState(false); const [isBulkDeleting, setIsBulkDeleting] = useState(false); + const [versionDocId, setVersionDocId] = useState(null); const handleBulkDeleteSelected = useCallback(async () => { if (deletableSelectedIds.length === 0) return; @@ -651,56 +758,72 @@ export function DocumentsSidebar({ />
- {deletableSelectedIds.length > 0 && ( -
- -
- )} +
+ {deletableSelectedIds.length > 0 && ( +
+ +
+ )} - { - openEditorPanel({ - documentId: doc.id, - searchSpaceId, - title: doc.title, - }); - }} - onEditDocument={(doc) => { - openEditorPanel({ - documentId: doc.id, - searchSpaceId, - title: doc.title, - }); - }} - onDeleteDocument={(doc) => handleDeleteDocument(doc.id)} - onMoveDocument={handleMoveDocument} - onExportDocument={handleExportDocument} - activeTypes={activeTypes} - onDropIntoFolder={handleDropIntoFolder} - onReorderFolder={handleReorderFolder} - /> + { + openEditorPanel({ + documentId: doc.id, + searchSpaceId, + title: doc.title, + }); + }} + onEditDocument={(doc) => { + openEditorPanel({ + documentId: doc.id, + searchSpaceId, + title: doc.title, + }); + }} + onDeleteDocument={(doc) => handleDeleteDocument(doc.id)} + onMoveDocument={handleMoveDocument} + onExportDocument={handleExportDocument} + onVersionHistory={(doc) => setVersionDocId(doc.id)} + activeTypes={activeTypes} + onDropIntoFolder={handleDropIntoFolder} + onReorderFolder={handleReorderFolder} + watchedFolderIds={watchedFolderIds} + onRescanFolder={handleRescanFolder} + onStopWatchingFolder={handleStopWatching} + /> +
+ {versionDocId !== null && ( + { + if (!open) setVersionDocId(null); + }} + documentId={versionDocId} + /> + )} + (null); const [connectorScrollPos, setConnectorScrollPos] = useState<"top" | "middle" | "bottom">("top"); + const connectorRafRef = useRef(); const handleConnectorScroll = useCallback((e: React.UIEvent) => { const el = e.currentTarget; - const atTop = el.scrollTop <= 2; - const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight <= 2; - setConnectorScrollPos(atTop ? "top" : atBottom ? "bottom" : "middle"); + if (connectorRafRef.current) return; + connectorRafRef.current = requestAnimationFrame(() => { + const atTop = el.scrollTop <= 2; + const atBottom = el.scrollHeight - el.scrollTop - el.clientHeight <= 2; + setConnectorScrollPos(atTop ? "top" : atBottom ? "bottom" : "middle"); + connectorRafRef.current = undefined; + }); }, []); + useEffect( + () => () => { + if (connectorRafRef.current) cancelAnimationFrame(connectorRafRef.current); + }, + [] + ); const [filterDrawerOpen, setFilterDrawerOpen] = useState(false); const [markingAsReadId, setMarkingAsReadId] = useState(null); diff --git a/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx b/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx index 16457404f..d2ce3cc64 100644 --- a/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx +++ b/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx @@ -1,6 +1,6 @@ "use client"; -import { AlertCircle, Download, FileText, Loader2, Pencil } from "lucide-react"; +import { Download, FileQuestionMark, FileText, Loader2, PenLine, RefreshCw } from "lucide-react"; import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { PlateEditor } from "@/components/editor/plate-editor"; @@ -64,7 +64,7 @@ export function DocumentTabContent({ documentId, searchSpaceId, title }: Documen const isLargeDocument = (doc?.content_size_bytes ?? 0) > LARGE_DOCUMENT_THRESHOLD; useEffect(() => { - let cancelled = false; + const controller = new AbortController(); setIsLoading(true); setError(null); setDoc(null); @@ -73,7 +73,7 @@ export function DocumentTabContent({ documentId, searchSpaceId, title }: Documen initialLoadDone.current = false; changeCountRef.current = 0; - const fetchContent = async () => { + const doFetch = async () => { const token = getBearerToken(); if (!token) { redirectToLogin(); @@ -81,6 +81,9 @@ export function DocumentTabContent({ documentId, searchSpaceId, title }: Documen } try { + const response = await authenticatedFetch( + `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content`, + { method: "GET", signal: controller.signal } const url = new URL( `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content` ); @@ -88,7 +91,7 @@ export function DocumentTabContent({ documentId, searchSpaceId, title }: Documen const response = await authenticatedFetch(url.toString(), { method: "GET" }); - if (cancelled) return; + if (controller.signal.aborted) return; if (!response.ok) { const errorData = await response @@ -109,18 +112,16 @@ export function DocumentTabContent({ documentId, searchSpaceId, title }: Documen setDoc(data); initialLoadDone.current = true; } catch (err) { - if (cancelled) return; + if (controller.signal.aborted) return; console.error("Error fetching document:", err); setError(err instanceof Error ? err.message : "Failed to fetch document"); } finally { - if (!cancelled) setIsLoading(false); + if (!controller.signal.aborted) setIsLoading(false); } }; - fetchContent(); - return () => { - cancelled = true; - }; + doFetch().catch(() => {}); + return () => controller.abort(); }, [documentId, searchSpaceId]); const handleMarkdownChange = useCallback((md: string) => { @@ -171,15 +172,33 @@ export function DocumentTabContent({ documentId, searchSpaceId, title }: Documen if (isLoading) return ; if (error || !doc) { + const isProcessing = error?.toLowerCase().includes("still being processed"); return ( -
- -
-

Failed to load document

-

- {error || "An unknown error occurred"} -

+
+
+ {isProcessing ? ( + + ) : ( + + )}
+
+

+ {isProcessing ? "Document is processing" : "Document unavailable"} +

+

{error || "An unknown error occurred"}

+
+ {!isProcessing && ( + + )}
); } @@ -240,7 +259,7 @@ export function DocumentTabContent({ documentId, searchSpaceId, title }: Documen onClick={() => setIsEditing(true)} className="gap-1.5" > - + Edit )} diff --git a/surfsense_web/components/layout/ui/tabs/TabBar.tsx b/surfsense_web/components/layout/ui/tabs/TabBar.tsx index 18e1ba141..8d0d986d3 100644 --- a/surfsense_web/components/layout/ui/tabs/TabBar.tsx +++ b/surfsense_web/components/layout/ui/tabs/TabBar.tsx @@ -72,7 +72,7 @@ export function TabBar({ onTabSwitch, onNewChat, rightActions, className }: TabB if (tabs.length <= 1) return null; return ( -
+
), hr: ({ ...props }) =>
, - img: ({ src, alt, width: _w, height: _h, ...props }) => ( - // eslint-disable-next-line @next/next/no-img-element - {alt - ), + img: ({ src, alt, width: _w, height: _h, ...props }) => { + const isDataOrUnknownUrl = typeof src === "string" && (src.startsWith("data:") || !src.startsWith("http")); + + return isDataOrUnknownUrl ? ( + // eslint-disable-next-line @next/next/no-img-element + {alt + ) : ( + {alt + ); +}, table: ({ ...props }) => (
diff --git a/surfsense_web/components/new-chat/model-selector.tsx b/surfsense_web/components/new-chat/model-selector.tsx index 7a2a471ba..b207d82b4 100644 --- a/surfsense_web/components/new-chat/model-selector.tsx +++ b/surfsense_web/components/new-chat/model-selector.tsx @@ -498,7 +498,7 @@ export function ModelSelector({ }} > - Add LLM Model + Add Model diff --git a/surfsense_web/components/new-chat/source-detail-panel.tsx b/surfsense_web/components/new-chat/source-detail-panel.tsx index bff088971..ed6c7e4ac 100644 --- a/surfsense_web/components/new-chat/source-detail-panel.tsx +++ b/surfsense_web/components/new-chat/source-detail-panel.tsx @@ -6,6 +6,7 @@ import { ChevronDown, ChevronUp, ExternalLink, + FileQuestionMark, FileText, Hash, Loader2, @@ -475,13 +476,11 @@ export function SourceDetailPanel({ animate={{ opacity: 1, scale: 1 }} className="flex flex-col items-center gap-4 text-center px-6" > -
- +
+
-

- Failed to load document -

+

Document unavailable

{documentByChunkFetchingError.message || "An unexpected error occurred. Please try again."} diff --git a/surfsense_web/components/onboarding-tour.tsx b/surfsense_web/components/onboarding-tour.tsx index 1c52169cb..a7d8f108b 100644 --- a/surfsense_web/components/onboarding-tour.tsx +++ b/surfsense_web/components/onboarding-tour.tsx @@ -429,6 +429,7 @@ export function OnboardingTour() { const pathname = usePathname(); const retryCountRef = useRef(0); const retryTimerRef = useRef | null>(null); + const startCheckTimerRef = useRef | null>(null); const maxRetries = 10; // Track previous user ID to detect user changes const previousUserIdRef = useRef(null); @@ -439,8 +440,8 @@ export function OnboardingTour() { // Fetch threads data const { data: threadsData } = useQuery({ - queryKey: ["threads", searchSpaceId, { limit: 1 }], - queryFn: () => fetchThreads(Number(searchSpaceId), 1), // Only need to check if any exist + queryKey: ["threads", searchSpaceId, { limit: 40 }], // Same key as layout + queryFn: () => fetchThreads(Number(searchSpaceId), 40), enabled: !!searchSpaceId, }); @@ -460,6 +461,7 @@ export function OnboardingTour() { // Find and track target element with retry logic const updateTarget = useCallback(() => { + if (retryTimerRef.current) clearTimeout(retryTimerRef.current); if (!currentStep) return; const el = document.querySelector(currentStep.target); @@ -480,11 +482,13 @@ export function OnboardingTour() { } }, 200); } + }, [currentStep]); + useEffect(() => { return () => { if (retryTimerRef.current) clearTimeout(retryTimerRef.current); }; - }, [currentStep]); + }, []); // Check if tour should run: localStorage + data validation with user ID tracking useEffect(() => { @@ -573,15 +577,15 @@ export function OnboardingTour() { setPosition(calculatePosition(connectorEl, TOUR_STEPS[0].placement)); } else { // Retry after delay - setTimeout(checkAndStartTour, 200); + startCheckTimerRef.current = setTimeout(checkAndStartTour, 200); } }; // Start checking after initial delay - const timer = setTimeout(checkAndStartTour, 500); + startCheckTimerRef.current = setTimeout(checkAndStartTour, 500); return () => { cancelled = true; - clearTimeout(timer); + if (startCheckTimerRef.current) clearTimeout(startCheckTimerRef.current); }; }, [mounted, user?.id, searchSpaceId, pathname, threadsData, documentTypeCounts, connectors]); diff --git a/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx b/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx index ddf4746aa..4bb49c48d 100644 --- a/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx +++ b/surfsense_web/components/public-chat-snapshots/public-chat-snapshot-row.tsx @@ -1,6 +1,6 @@ "use client"; -import { Check, Copy, ExternalLink, MessageSquare, Trash2 } from "lucide-react"; +import { Check, Copy, Dot, ExternalLink, MessageSquare, Trash2 } from "lucide-react"; import { useCallback, useRef, useState } from "react"; import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; import { Badge } from "@/components/ui/badge"; @@ -153,7 +153,7 @@ export function PublicChatSnapshotRow({ {formattedDate} {member && ( <> - · + diff --git a/surfsense_web/components/public-chat-snapshots/public-chat-snapshots-empty-state.tsx b/surfsense_web/components/public-chat-snapshots/public-chat-snapshots-empty-state.tsx index 4a4a57770..4e8ec5bb6 100644 --- a/surfsense_web/components/public-chat-snapshots/public-chat-snapshots-empty-state.tsx +++ b/surfsense_web/components/public-chat-snapshots/public-chat-snapshots-empty-state.tsx @@ -11,11 +11,8 @@ export function PublicChatSnapshotsEmptyState({ }: PublicChatSnapshotsEmptyStateProps) { return (

-
- -
-

{title}

-

{description}

+

{title}

+

{description}

); } diff --git a/surfsense_web/components/settings/image-model-manager.tsx b/surfsense_web/components/settings/image-model-manager.tsx index 8f08b7db3..23162b629 100644 --- a/surfsense_web/components/settings/image-model-manager.tsx +++ b/surfsense_web/components/settings/image-model-manager.tsx @@ -1,7 +1,7 @@ "use client"; import { useAtomValue } from "jotai"; -import { AlertCircle, Edit3, Info, Plus, RefreshCw, Trash2, Wand2 } from "lucide-react"; +import { AlertCircle, Dot, Edit3, Info, RefreshCw, Trash2, Wand2 } from "lucide-react"; import { useMemo, useState } from "react"; import { deleteImageGenConfigMutationAtom } from "@/atoms/image-gen-config/image-gen-config-mutation.atoms"; import { @@ -240,27 +240,14 @@ export function ImageModelManager({ searchSpaceId }: ImageModelManagerProps) { {!isLoading && (
{(userConfigs?.length ?? 0) === 0 ? ( - + -
- -
-

No Image Models Yet

-

+

No Image Models Yet

+

{canCreate ? "Add your own image generation model (DALL-E 3, GPT Image 1, etc.)" : "No image models have been added to this space yet. Contact a space owner to add one."}

- {canCreate && ( - - )}
) : ( @@ -343,7 +330,7 @@ export function ImageModelManager({ searchSpaceId }: ImageModelManagerProps) { {member && ( <> - · + diff --git a/surfsense_web/components/settings/llm-role-manager.tsx b/surfsense_web/components/settings/llm-role-manager.tsx index 07ec492a3..718503318 100644 --- a/surfsense_web/components/settings/llm-role-manager.tsx +++ b/surfsense_web/components/settings/llm-role-manager.tsx @@ -4,16 +4,14 @@ import { useAtomValue } from "jotai"; import { AlertCircle, Bot, - CheckCircle, + CircleCheck, CircleDashed, FileText, ImageIcon, RefreshCw, - RotateCcw, - Save, Shuffle, } from "lucide-react"; -import { useEffect, useState } from "react"; +import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { globalImageGenConfigsAtom, @@ -40,6 +38,7 @@ import { SelectValue, } from "@/components/ui/select"; import { Skeleton } from "@/components/ui/skeleton"; +import { Spinner } from "@/components/ui/spinner"; import { getProviderIcon } from "@/lib/provider-icons"; import { cn } from "@/lib/utils"; @@ -48,8 +47,8 @@ const ROLE_DESCRIPTIONS = { icon: Bot, title: "Agent LLM", description: "Primary LLM for chat interactions and agent operations", - color: "text-blue-600 dark:text-blue-400", - bgColor: "bg-blue-500/10", + color: "text-muted-foreground", + bgColor: "bg-muted", prefKey: "agent_llm_id" as const, configType: "llm" as const, }, @@ -57,8 +56,8 @@ const ROLE_DESCRIPTIONS = { icon: FileText, title: "Document Summary LLM", description: "Handles document summarization and research synthesis", - color: "text-purple-600 dark:text-purple-400", - bgColor: "bg-purple-500/10", + color: "text-muted-foreground", + bgColor: "bg-muted", prefKey: "document_summary_llm_id" as const, configType: "llm" as const, }, @@ -66,8 +65,8 @@ const ROLE_DESCRIPTIONS = { icon: ImageIcon, title: "Image Generation Model", description: "Model used for AI image generation (DALL-E, GPT Image, etc.)", - color: "text-teal-600 dark:text-teal-400", - bgColor: "bg-teal-500/10", + color: "text-muted-foreground", + bgColor: "bg-muted", prefKey: "image_generation_config_id" as const, configType: "image" as const, }, @@ -118,88 +117,44 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { image_generation_config_id: preferences.image_generation_config_id ?? "", })); - const [hasChanges, setHasChanges] = useState(false); - const [isSaving, setIsSaving] = useState(false); + const [savingRole, setSavingRole] = useState(null); + const savingRef = useRef(false); useEffect(() => { - const newAssignments = { - agent_llm_id: preferences.agent_llm_id ?? "", - document_summary_llm_id: preferences.document_summary_llm_id ?? "", - image_generation_config_id: preferences.image_generation_config_id ?? "", - }; - setAssignments(newAssignments); - setHasChanges(false); + if (!savingRef.current) { + setAssignments({ + agent_llm_id: preferences.agent_llm_id ?? "", + document_summary_llm_id: preferences.document_summary_llm_id ?? "", + image_generation_config_id: preferences.image_generation_config_id ?? "", + }); + } }, [ preferences?.agent_llm_id, preferences?.document_summary_llm_id, preferences?.image_generation_config_id, ]); - const handleRoleAssignment = (prefKey: string, configId: string) => { - const newAssignments = { - ...assignments, - [prefKey]: configId === "unassigned" ? "" : parseInt(configId), - }; + const handleRoleAssignment = useCallback( + async (prefKey: string, configId: string) => { + const value = configId === "unassigned" ? "" : parseInt(configId); - setAssignments(newAssignments); + setAssignments((prev) => ({ ...prev, [prefKey]: value })); + setSavingRole(prefKey); + savingRef.current = true; - const currentPrefs = { - agent_llm_id: preferences.agent_llm_id ?? "", - document_summary_llm_id: preferences.document_summary_llm_id ?? "", - image_generation_config_id: preferences.image_generation_config_id ?? "", - }; - - const hasChangesNow = Object.keys(newAssignments).some( - (key) => - newAssignments[key as keyof typeof newAssignments] !== - currentPrefs[key as keyof typeof currentPrefs] - ); - - setHasChanges(hasChangesNow); - }; - - const handleSave = async () => { - setIsSaving(true); - - const toNumericOrUndefined = (val: string | number) => - typeof val === "string" ? (val ? parseInt(val) : undefined) : val; - - const numericAssignments = { - agent_llm_id: toNumericOrUndefined(assignments.agent_llm_id), - document_summary_llm_id: toNumericOrUndefined(assignments.document_summary_llm_id), - image_generation_config_id: toNumericOrUndefined(assignments.image_generation_config_id), - }; - - await updatePreferences({ - search_space_id: searchSpaceId, - data: numericAssignments, - }); - - setHasChanges(false); - toast.success("Role assignments saved successfully!"); - - setIsSaving(false); - }; - - const handleReset = () => { - setAssignments({ - agent_llm_id: preferences.agent_llm_id ?? "", - document_summary_llm_id: preferences.document_summary_llm_id ?? "", - image_generation_config_id: preferences.image_generation_config_id ?? "", - }); - setHasChanges(false); - }; - - const isAssignmentComplete = - assignments.agent_llm_id !== "" && - assignments.agent_llm_id !== null && - assignments.agent_llm_id !== undefined && - assignments.document_summary_llm_id !== "" && - assignments.document_summary_llm_id !== null && - assignments.document_summary_llm_id !== undefined && - assignments.image_generation_config_id !== "" && - assignments.image_generation_config_id !== null && - assignments.image_generation_config_id !== undefined; + try { + await updatePreferences({ + search_space_id: searchSpaceId, + data: { [prefKey]: value || undefined }, + }); + toast.success("Role assignment updated"); + } finally { + setSavingRole(null); + savingRef.current = false; + } + }, + [updatePreferences, searchSpaceId] + ); // Combine global and custom LLM configs const allLLMConfigs = [ @@ -213,6 +168,11 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { ...(userImageConfigs ?? []).filter((config) => config.id && config.id.toString().trim() !== ""), ]; + const isAssignmentComplete = + allLLMConfigs.some((c) => c.id === assignments.agent_llm_id) && + allLLMConfigs.some((c) => c.id === assignments.document_summary_llm_id) && + allImageConfigs.some((c) => c.id === assignments.image_generation_config_id); + const isLoading = configsLoading || preferencesLoading || @@ -242,11 +202,8 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { Refresh {isAssignmentComplete && !isLoading && !hasError && ( - - + + All roles assigned )} @@ -332,10 +289,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { const roleAllConfigs = isImageRole ? allImageConfigs : allLLMConfigs; const assignedConfig = roleAllConfigs.find((config) => config.id === currentAssignment); - const isAssigned = - currentAssignment !== "" && - currentAssignment !== null && - currentAssignment !== undefined; + const isAssigned = !!assignedConfig; const isAutoMode = assignedConfig && "is_auto_mode" in assignedConfig && assignedConfig.is_auto_mode; @@ -361,8 +315,10 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {

- {isAssigned ? ( - + {savingRole === role.prefKey ? ( + + ) : isAssigned ? ( + ) : ( )} @@ -374,7 +330,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) { Configuration + + {/* Hidden folder input for web folder browsing */} )} /> - -
- -
- + {/* MOBILE DROP ZONE */} +
+ {hasContent ? ( + !selectedFolder && + (isElectron ? ( +
{renderBrowseButton({ compact: true, fullWidth: true })}
+ ) : ( + + )) + ) : (
{ + if (!isElectron) fileInputRef.current?.click(); + }} > - - {isDragActive ? ( -
- -

{t("drop_files")}

-
- ) : ( -
- -
-

{t("drag_drop")}

-

{t("or_browse")}

-
-
- )} -
- - + +
+

+ {isElectron ? "Select files or folder" : "Tap to select files or folder"} +

+

{t("file_size_limit")}

+
+
e.stopPropagation()}> + {renderBrowseButton({ fullWidth: true })}
- - + )} +
- {files.length > 0 && ( - - -
-
- - {t("selected_files", { count: files.length })} - - - {t("total_size")}: {formatFileSize(totalFileSize)} - -
- -
-
- -
- {files.map((entry) => ( -
-
- -
-

{entry.file.name}

-
- - {formatFileSize(entry.file.size)} - - - {entry.file.type || "Unknown type"} - -
-
-
- -
- ))} -
- - {isUploading && ( -
- -
-
- {t("uploading_files")} - {Math.round(uploadProgress)}% -
- -
+ {/* DESKTOP DROP ZONE */} +
+ {hasContent ? ( +
+ + + {isDragActive ? t("drop_files") : t("drag_drop_more")} + + {renderBrowseButton({ compact: true })} +
+ ) : ( +
+ {isDragActive && ( +
+ +

{t("drop_files")}

)} - -
- +
+ +

{t("drag_drop")}

+

{t("file_size_limit")}

+
{renderBrowseButton()}
+
+ )} +
-
- + {/* FOLDER SELECTED (Electron only — web flattens folder contents into file list) */} + {isElectron && selectedFolder && ( +
+
+ +
+

{selectedFolder.name}

+

{selectedFolder.path}

- - + +
+ +
+
+
+

Watch folder

+

Auto-sync when files change

+
+ +
+
+
+

Enable AI Summary

+

+ Improves search quality but adds latency +

+
+ +
+
+ + +
)} + {/* FILES SELECTED */} + {files.length > 0 && ( +
+
+

+ {t("selected_files", { count: files.length })} + + {formatFileSize(totalFileSize)} +

+ +
+ +
+ {files.map((entry) => ( +
+ + {entry.file.name.split(".").pop() || "?"} + + {entry.file.name} + + {formatFileSize(entry.file.size)} + + +
+ ))} +
+ + {isUploading && ( +
+
+ {t("uploading_files")} + {Math.round(uploadProgress)}% +
+ +
+ )} + +
+
+

Enable AI Summary

+

+ Improves search quality but adds latency +

+
+ +
+ + +
+ )} + + {/* SUPPORTED FORMATS */} - - -
-
-
- {t("supported_file_types")} -
-
- {t("file_types_desc")} -
-
-
+ + + + {t("supported_file_types")} + - -
+ +
{supportedExtensions.map((ext) => ( - + {ext} ))} diff --git a/surfsense_web/components/tool-ui/citation/citation-list.tsx b/surfsense_web/components/tool-ui/citation/citation-list.tsx index 3151917b6..75b02bf3d 100644 --- a/surfsense_web/components/tool-ui/citation/citation-list.tsx +++ b/surfsense_web/components/tool-ui/citation/citation-list.tsx @@ -7,6 +7,8 @@ import { openSafeNavigationHref, resolveSafeNavigationHref } from "../shared/med import { cn, Popover, PopoverContent, PopoverTrigger } from "./_adapter"; import { Citation } from "./citation"; import type { CitationType, CitationVariant, SerializableCitation } from "./schema"; +import NextImage from 'next/image'; + const TYPE_ICONS: Record = { webpage: Globe, @@ -253,18 +255,18 @@ function OverflowItem({ citation, onClick }: OverflowItemProps) { className="group hover:bg-muted focus-visible:bg-muted flex w-full cursor-pointer items-center gap-2.5 rounded-md px-2 py-2 text-left transition-colors focus-visible:outline-none" > {citation.favicon ? ( - // biome-ignore lint/performance/noImgElement: external favicon from arbitrary domain — next/image requires remotePatterns config - - ) : ( -