diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index c86cdab3f..5008b1a10 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -977,15 +977,19 @@ async def get_watched_folders( ) folders = ( - await session.execute( - select(Folder).where( - Folder.search_space_id == search_space_id, - Folder.parent_id.is_(None), - Folder.folder_metadata.isnot(None), - Folder.folder_metadata["watched"].astext == "true", + ( + await session.execute( + select(Folder).where( + Folder.search_space_id == search_space_id, + Folder.parent_id.is_(None), + Folder.folder_metadata.isnot(None), + Folder.folder_metadata["watched"].astext == "true", + ) ) ) - ).scalars().all() + .scalars() + .all() + ) return folders @@ -1265,15 +1269,21 @@ async def list_document_versions( if not document: raise HTTPException(status_code=404, detail="Document not found") - await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_READ.value) + await check_permission( + session, user, document.search_space_id, Permission.DOCUMENTS_READ.value + ) versions = ( - await session.execute( - select(DocumentVersion) - .where(DocumentVersion.document_id == document_id) - .order_by(DocumentVersion.version_number.desc()) + ( + await session.execute( + select(DocumentVersion) + .where(DocumentVersion.document_id == document_id) + .order_by(DocumentVersion.version_number.desc()) + ) ) - ).scalars().all() + .scalars() + .all() + ) return [ { @@ -1300,7 +1310,9 @@ async def get_document_version( if not document: raise HTTPException(status_code=404, detail="Document not found") - await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_READ.value) + await check_permission( + session, user, document.search_space_id, Permission.DOCUMENTS_READ.value + ) version = ( await session.execute( @@ -1331,14 +1343,14 @@ async def restore_document_version( ): """Restore a previous version: snapshot current state, then overwrite document content.""" document = ( - await session.execute( - select(Document).where(Document.id == document_id) - ) + await session.execute(select(Document).where(Document.id == document_id)) ).scalar_one_or_none() if not document: raise HTTPException(status_code=404, detail="Document not found") - await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_UPDATE.value) + await check_permission( + session, user, document.search_space_id, Permission.DOCUMENTS_UPDATE.value + ) version = ( await session.execute( @@ -1363,6 +1375,7 @@ async def restore_document_version( await session.commit() from app.tasks.celery_tasks.document_reindex_tasks import reindex_document_task + reindex_document_task.delay(document_id, str(user.id)) return { @@ -1430,9 +1443,7 @@ async def folder_index( root_folder_id = request.root_folder_id if root_folder_id: existing = ( - await session.execute( - select(Folder).where(Folder.id == root_folder_id) - ) + await session.execute(select(Folder).where(Folder.id == root_folder_id)) ).scalar_one_or_none() if not existing: root_folder_id = None @@ -1492,7 +1503,9 @@ async def folder_index_files( ) if not request.target_file_paths: - raise HTTPException(status_code=400, detail="target_file_paths must not be empty") + raise HTTPException( + status_code=400, detail="target_file_paths must not be empty" + ) await check_permission( session, @@ -1507,11 +1520,11 @@ async def folder_index_files( for fp in request.target_file_paths: try: Path(fp).relative_to(request.folder_path) - except ValueError: + except ValueError as err: raise HTTPException( status_code=400, detail=f"target_file_path {fp} must be inside folder_path", - ) + ) from err from app.tasks.celery_tasks.document_tasks import index_local_folder_task @@ -1530,5 +1543,3 @@ async def folder_index_files( "status": "processing", "file_count": len(request.target_file_paths), } - - diff --git a/surfsense_backend/app/routes/editor_routes.py b/surfsense_backend/app/routes/editor_routes.py index a0505f62f..829b2cf69 100644 --- a/surfsense_backend/app/routes/editor_routes.py +++ b/surfsense_backend/app/routes/editor_routes.py @@ -129,7 +129,11 @@ async def get_editor_content( if not chunk_contents: doc_status = document.status or {} - state = doc_status.get("state", "ready") if isinstance(doc_status, dict) else "ready" + state = ( + doc_status.get("state", "ready") + if isinstance(doc_status, dict) + else "ready" + ) if state in ("pending", "processing"): raise HTTPException( status_code=409, diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index f49ba2d5d..d208ff910 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -20,7 +20,6 @@ Non-OAuth connectors (BookStack, GitHub, etc.) are limited to one per search spa import asyncio import logging -import os from contextlib import suppress from datetime import UTC, datetime, timedelta from typing import Any diff --git a/surfsense_backend/app/schemas/folders.py b/surfsense_backend/app/schemas/folders.py index e8bdf3821..a7e065144 100644 --- a/surfsense_backend/app/schemas/folders.py +++ b/surfsense_backend/app/schemas/folders.py @@ -1,9 +1,8 @@ """Pydantic schemas for folder CRUD, move, and reorder operations.""" from datetime import datetime -from uuid import UUID - from typing import Any +from uuid import UUID from pydantic import BaseModel, ConfigDict, Field @@ -36,7 +35,9 @@ class FolderRead(BaseModel): created_by_id: UUID | None created_at: datetime updated_at: datetime - metadata: dict[str, Any] | None = Field(default=None, validation_alias="folder_metadata") + metadata: dict[str, Any] | None = Field( + default=None, validation_alias="folder_metadata" + ) model_config = ConfigDict(from_attributes=True) diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py index 506f8118c..4e9249d34 100644 --- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py @@ -1,6 +1,7 @@ """Celery tasks for document processing.""" import asyncio +import contextlib import logging import os from uuid import UUID @@ -1337,9 +1338,7 @@ async def _index_local_folder_async( ) notification_id = notification.id _start_heartbeat(notification_id) - heartbeat_task = asyncio.create_task( - _run_heartbeat_loop(notification_id) - ) + heartbeat_task = asyncio.create_task(_run_heartbeat_loop(notification_id)) except Exception: logger.warning( "Failed to create notification for local folder indexing", @@ -1349,18 +1348,16 @@ async def _index_local_folder_async( async def _heartbeat_progress(completed_count: int) -> None: """Refresh heartbeat and optionally update notification progress.""" if notification: - try: + with contextlib.suppress(Exception): await NotificationService.document_processing.notify_processing_progress( session=session, notification=notification, stage="indexing", stage_message=f"Syncing files ({completed_count}/{file_count or '?'})", ) - except Exception: - pass try: - indexed, skipped_or_failed, _rfid, err = await index_local_folder( + _indexed, _skipped_or_failed, _rfid, err = await index_local_folder( session=session, search_space_id=search_space_id, user_id=user_id, @@ -1371,7 +1368,9 @@ async def _index_local_folder_async( root_folder_id=root_folder_id, enable_summary=enable_summary, target_file_paths=target_file_paths, - on_heartbeat_callback=_heartbeat_progress if (is_batch or is_full_scan) else None, + on_heartbeat_callback=_heartbeat_progress + if (is_batch or is_full_scan) + else None, ) if notification: diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 4ac8cc594..539cfdd32 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -43,30 +43,110 @@ from .base import ( logger, ) -PLAINTEXT_EXTENSIONS = frozenset({ - ".md", ".markdown", ".txt", ".text", ".csv", ".tsv", - ".json", ".jsonl", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf", - ".xml", ".html", ".htm", ".css", ".scss", ".less", ".sass", - ".py", ".pyw", ".pyi", ".pyx", - ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", - ".java", ".kt", ".kts", ".scala", ".groovy", - ".c", ".h", ".cpp", ".cxx", ".cc", ".hpp", ".hxx", - ".cs", ".fs", ".fsx", - ".go", ".rs", ".rb", ".php", ".pl", ".pm", ".lua", - ".swift", ".m", ".mm", - ".r", ".R", ".jl", - ".sh", ".bash", ".zsh", ".fish", ".bat", ".cmd", ".ps1", - ".sql", ".graphql", ".gql", - ".env", ".gitignore", ".dockerignore", ".editorconfig", - ".makefile", ".cmake", - ".log", ".rst", ".tex", ".bib", ".org", ".adoc", ".asciidoc", - ".vue", ".svelte", ".astro", - ".tf", ".hcl", ".proto", -}) +PLAINTEXT_EXTENSIONS = frozenset( + { + ".md", + ".markdown", + ".txt", + ".text", + ".csv", + ".tsv", + ".json", + ".jsonl", + ".yaml", + ".yml", + ".toml", + ".ini", + ".cfg", + ".conf", + ".xml", + ".html", + ".htm", + ".css", + ".scss", + ".less", + ".sass", + ".py", + ".pyw", + ".pyi", + ".pyx", + ".js", + ".jsx", + ".ts", + ".tsx", + ".mjs", + ".cjs", + ".java", + ".kt", + ".kts", + ".scala", + ".groovy", + ".c", + ".h", + ".cpp", + ".cxx", + ".cc", + ".hpp", + ".hxx", + ".cs", + ".fs", + ".fsx", + ".go", + ".rs", + ".rb", + ".php", + ".pl", + ".pm", + ".lua", + ".swift", + ".m", + ".mm", + ".r", + ".R", + ".jl", + ".sh", + ".bash", + ".zsh", + ".fish", + ".bat", + ".cmd", + ".ps1", + ".sql", + ".graphql", + ".gql", + ".env", + ".gitignore", + ".dockerignore", + ".editorconfig", + ".makefile", + ".cmake", + ".log", + ".rst", + ".tex", + ".bib", + ".org", + ".adoc", + ".asciidoc", + ".vue", + ".svelte", + ".astro", + ".tf", + ".hcl", + ".proto", + } +) -AUDIO_EXTENSIONS = frozenset({ - ".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", -}) +AUDIO_EXTENSIONS = frozenset( + { + ".mp3", + ".mp4", + ".mpeg", + ".mpga", + ".m4a", + ".wav", + ".webm", + } +) def _is_plaintext_file(filename: str) -> bool: @@ -81,6 +161,7 @@ def _needs_etl(filename: str) -> bool: """File is not plaintext and not audio — requires ETL service to parse.""" return not _is_plaintext_file(filename) and not _is_audio_file(filename) + HeartbeatCallbackType = Callable[[int], Awaitable[None]] DEFAULT_EXCLUDE_PATTERNS = [ @@ -121,9 +202,7 @@ def scan_folder( for dirpath, dirnames, filenames in os.walk(root): rel_dir = Path(dirpath).relative_to(root) - dirnames[:] = [ - d for d in dirnames if d not in exclude_patterns - ] + dirnames[:] = [d for d in dirnames if d not in exclude_patterns] if any(part in exclude_patterns for part in rel_dir.parts): continue @@ -134,9 +213,11 @@ def scan_folder( full = Path(dirpath) / fname - if file_extensions is not None: - if full.suffix.lower() not in file_extensions: - continue + if ( + file_extensions is not None + and full.suffix.lower() not in file_extensions + ): + continue try: stat = full.stat() @@ -209,11 +290,14 @@ def _content_hash(content: str, search_space_id: int) -> str: pipeline so that dedup checks are consistent. """ import hashlib - return hashlib.sha256(f"{search_space_id}:{content}".encode("utf-8")).hexdigest() + + return hashlib.sha256(f"{search_space_id}:{content}".encode()).hexdigest() async def _compute_file_content_hash( - file_path: str, filename: str, search_space_id: int, + file_path: str, + filename: str, + search_space_id: int, ) -> tuple[str, str]: """Read a file (via ETL if needed) and compute its content hash. @@ -257,9 +341,7 @@ async def _mirror_folder_structure( if root_folder_id: existing = ( - await session.execute( - select(Folder).where(Folder.id == root_folder_id) - ) + await session.execute(select(Folder).where(Folder.id == root_folder_id)) ).scalar_one_or_none() if existing: mapping[""] = existing.id @@ -412,13 +494,17 @@ async def _cleanup_empty_folders( id_to_rel: dict[int, str] = {fid: rel for rel, fid in folder_mapping.items() if rel} all_folders = ( - await session.execute( - select(Folder).where( - Folder.search_space_id == search_space_id, - Folder.id != root_folder_id, + ( + await session.execute( + select(Folder).where( + Folder.search_space_id == search_space_id, + Folder.id != root_folder_id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) candidates: list[Folder] = [] for folder in all_folders: @@ -520,7 +606,9 @@ async def index_local_folder( metadata={ "folder_path": folder_path, "user_id": str(user_id), - "target_file_paths_count": len(target_file_paths) if target_file_paths else None, + "target_file_paths_count": len(target_file_paths) + if target_file_paths + else None, }, ) @@ -532,7 +620,12 @@ async def index_local_folder( "Folder not found", {}, ) - return 0, 0, root_folder_id, f"Folder path missing or does not exist: {folder_path}" + return ( + 0, + 0, + root_folder_id, + f"Folder path missing or does not exist: {folder_path}", + ) if exclude_patterns is None: exclude_patterns = DEFAULT_EXCLUDE_PATTERNS @@ -639,7 +732,9 @@ async def index_local_folder( ) if existing_document: - stored_mtime = (existing_document.document_metadata or {}).get("mtime") + stored_mtime = (existing_document.document_metadata or {}).get( + "mtime" + ) current_mtime = file_info["modified_at"].timestamp() if stored_mtime and abs(current_mtime - stored_mtime) < 1.0: @@ -709,23 +804,31 @@ async def index_local_folder( # ================================================================ all_root_folder_ids = set(folder_mapping.values()) all_db_folders = ( - await session.execute( - select(Folder.id).where( - Folder.search_space_id == search_space_id, + ( + await session.execute( + select(Folder.id).where( + Folder.search_space_id == search_space_id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) all_root_folder_ids.update(all_db_folders) all_folder_docs = ( - await session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == search_space_id, - Document.folder_id.in_(list(all_root_folder_ids)), + ( + await session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == search_space_id, + Document.folder_id.in_(list(all_root_folder_ids)), + ) ) ) - ).scalars().all() + .scalars() + .all() + ) for doc in all_folder_docs: if doc.unique_identifier_hash not in seen_unique_hashes: @@ -742,9 +845,7 @@ async def index_local_folder( ) pipeline = IndexingPipelineService(session) - doc_map = { - compute_unique_identifier_hash(cd): cd for cd in connector_docs - } + doc_map = {compute_unique_identifier_hash(cd): cd for cd in connector_docs} documents = await pipeline.prepare_for_indexing(connector_docs) # Assign folder_id immediately so docs appear in the correct @@ -1033,7 +1134,9 @@ async def _index_single_file( db_doc.document_metadata = doc_meta await session.commit() - indexed = 1 if DocumentStatus.is_state(db_doc.status, DocumentStatus.READY) else 0 + indexed = ( + 1 if DocumentStatus.is_state(db_doc.status, DocumentStatus.READY) else 0 + ) failed_msg = None if indexed else "Indexing failed" if indexed: diff --git a/surfsense_backend/app/utils/document_versioning.py b/surfsense_backend/app/utils/document_versioning.py index 889bc4a3a..e6ad1fb06 100644 --- a/surfsense_backend/app/utils/document_versioning.py +++ b/surfsense_backend/app/utils/document_versioning.py @@ -83,9 +83,9 @@ async def create_version_snapshot( # Cleanup: cap at MAX_VERSIONS_PER_DOCUMENT count = ( await session.execute( - select(func.count()).select_from(DocumentVersion).where( - DocumentVersion.document_id == document.id - ) + select(func.count()) + .select_from(DocumentVersion) + .where(DocumentVersion.document_id == document.id) ) ).scalar_one() diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py index 9c91011ae..d9d7cacae 100644 --- a/surfsense_backend/tests/integration/conftest.py +++ b/surfsense_backend/tests/integration/conftest.py @@ -166,5 +166,3 @@ def make_connector_document(db_connector, db_user): return ConnectorDocument(**defaults) return _make - - diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py index 67254ec93..4062c3a3b 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py @@ -21,7 +21,9 @@ from app.db import ( pytestmark = pytest.mark.integration UNIFIED_FIXTURES = ( - "patched_summarize", "patched_embed_texts", "patched_chunk_text", + "patched_summarize", + "patched_embed_texts", + "patched_chunk_text", ) @@ -37,6 +39,7 @@ class _FakeSessionMaker: @asynccontextmanager async def _ctx(): yield self._session + return _ctx() @@ -59,7 +62,6 @@ def patched_batch_sessions(monkeypatch, db_session): class TestFullIndexer: - @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_i1_new_file_indexed( self, @@ -73,7 +75,7 @@ class TestFullIndexer: (tmp_path / "note.md").write_text("# Hello World\n\nContent here.") - count, skipped, root_folder_id, err = await index_local_folder( + count, _skipped, _root_folder_id, err = await index_local_folder( session=db_session, search_space_id=db_search_space.id, user_id=str(db_user.id), @@ -85,13 +87,17 @@ class TestFullIndexer: assert count == 1 docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(docs) == 1 assert docs[0].document_type == DocumentType.LOCAL_FOLDER_FILE assert DocumentStatus.is_state(docs[0].status, DocumentStatus.READY) @@ -130,7 +136,9 @@ class TestFullIndexer: total = ( await db_session.execute( - select(func.count()).select_from(Document).where( + select(func.count()) + .select_from(Document) + .where( Document.document_type == DocumentType.LOCAL_FOLDER_FILE, Document.search_space_id == db_search_space.id, ) @@ -174,13 +182,19 @@ class TestFullIndexer: assert count == 1 versions = ( - await db_session.execute( - select(DocumentVersion).join(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(DocumentVersion) + .join(Document) + .where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(versions) >= 1 @pytest.mark.usefixtures(*UNIFIED_FIXTURES) @@ -207,7 +221,9 @@ class TestFullIndexer: docs_before = ( await db_session.execute( - select(func.count()).select_from(Document).where( + select(func.count()) + .select_from(Document) + .where( Document.document_type == DocumentType.LOCAL_FOLDER_FILE, Document.search_space_id == db_search_space.id, ) @@ -228,7 +244,9 @@ class TestFullIndexer: docs_after = ( await db_session.execute( - select(func.count()).select_from(Document).where( + select(func.count()) + .select_from(Document) + .where( Document.document_type == DocumentType.LOCAL_FOLDER_FILE, Document.search_space_id == db_search_space.id, ) @@ -262,13 +280,17 @@ class TestFullIndexer: assert count == 1 docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(docs) == 1 assert docs[0].title == "b.md" @@ -279,7 +301,6 @@ class TestFullIndexer: class TestFolderMirroring: - @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_f1_root_folder_created( self, @@ -335,10 +356,14 @@ class TestFolderMirroring: ) folders = ( - await db_session.execute( - select(Folder).where(Folder.search_space_id == db_search_space.id) + ( + await db_session.execute( + select(Folder).where(Folder.search_space_id == db_search_space.id) + ) ) - ).scalars().all() + .scalars() + .all() + ) folder_names = {f.name for f in folders} assert "notes" in folder_names @@ -376,10 +401,14 @@ class TestFolderMirroring: ) folders_before = ( - await db_session.execute( - select(Folder).where(Folder.search_space_id == db_search_space.id) + ( + await db_session.execute( + select(Folder).where(Folder.search_space_id == db_search_space.id) + ) ) - ).scalars().all() + .scalars() + .all() + ) ids_before = {f.id for f in folders_before} await index_local_folder( @@ -392,10 +421,14 @@ class TestFolderMirroring: ) folders_after = ( - await db_session.execute( - select(Folder).where(Folder.search_space_id == db_search_space.id) + ( + await db_session.execute( + select(Folder).where(Folder.search_space_id == db_search_space.id) + ) ) - ).scalars().all() + .scalars() + .all() + ) ids_after = {f.id for f in folders_after} assert ids_before == ids_after @@ -425,21 +458,23 @@ class TestFolderMirroring: ) docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) today_doc = next(d for d in docs if d.title == "today.md") root_doc = next(d for d in docs if d.title == "root.md") daily_folder = ( - await db_session.execute( - select(Folder).where(Folder.name == "daily") - ) + await db_session.execute(select(Folder).where(Folder.name == "daily")) ).scalar_one() assert today_doc.folder_id == daily_folder.id @@ -455,9 +490,10 @@ class TestFolderMirroring: tmp_path: Path, ): """F5: Deleted dir's empty Folder row is cleaned up on re-sync.""" - from app.tasks.connector_indexers.local_folder_indexer import index_local_folder import shutil + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + daily = tmp_path / "notes" / "daily" daily.mkdir(parents=True) weekly = tmp_path / "notes" / "weekly" @@ -474,9 +510,7 @@ class TestFolderMirroring: ) weekly_folder = ( - await db_session.execute( - select(Folder).where(Folder.name == "weekly") - ) + await db_session.execute(select(Folder).where(Folder.name == "weekly")) ).scalar_one_or_none() assert weekly_folder is not None @@ -492,16 +526,12 @@ class TestFolderMirroring: ) weekly_after = ( - await db_session.execute( - select(Folder).where(Folder.name == "weekly") - ) + await db_session.execute(select(Folder).where(Folder.name == "weekly")) ).scalar_one_or_none() assert weekly_after is None daily_after = ( - await db_session.execute( - select(Folder).where(Folder.name == "daily") - ) + await db_session.execute(select(Folder).where(Folder.name == "daily")) ).scalar_one_or_none() assert daily_after is not None @@ -551,18 +581,14 @@ class TestFolderMirroring: ).scalar_one() daily_folder = ( - await db_session.execute( - select(Folder).where(Folder.name == "daily") - ) + await db_session.execute(select(Folder).where(Folder.name == "daily")) ).scalar_one() assert doc.folder_id == daily_folder.id assert daily_folder.parent_id is not None notes_folder = ( - await db_session.execute( - select(Folder).where(Folder.name == "notes") - ) + await db_session.execute(select(Folder).where(Folder.name == "notes")) ).scalar_one() assert daily_folder.parent_id == notes_folder.id assert notes_folder.parent_id == root_folder_id @@ -592,9 +618,7 @@ class TestFolderMirroring: ) eph_folder = ( - await db_session.execute( - select(Folder).where(Folder.name == "ephemeral") - ) + await db_session.execute(select(Folder).where(Folder.name == "ephemeral")) ).scalar_one_or_none() assert eph_folder is not None @@ -612,16 +636,12 @@ class TestFolderMirroring: ) eph_after = ( - await db_session.execute( - select(Folder).where(Folder.name == "ephemeral") - ) + await db_session.execute(select(Folder).where(Folder.name == "ephemeral")) ).scalar_one_or_none() assert eph_after is None notes_after = ( - await db_session.execute( - select(Folder).where(Folder.name == "notes") - ) + await db_session.execute(select(Folder).where(Folder.name == "notes")) ).scalar_one_or_none() assert notes_after is None @@ -632,7 +652,6 @@ class TestFolderMirroring: class TestBatchMode: - @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_b1_batch_indexes_multiple_files( self, @@ -649,7 +668,7 @@ class TestBatchMode: (tmp_path / "b.md").write_text("File B content") (tmp_path / "c.md").write_text("File C content") - count, failed, root_folder_id, err = await index_local_folder( + count, failed, _root_folder_id, err = await index_local_folder( session=db_session, search_space_id=db_search_space.id, user_id=str(db_user.id), @@ -667,13 +686,17 @@ class TestBatchMode: assert err is None docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(docs) == 3 assert {d.title for d in docs} == {"a.md", "b.md", "c.md"} assert all( @@ -714,13 +737,17 @@ class TestBatchMode: assert err is not None docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(docs) == 2 assert {d.title for d in docs} == {"good1.md", "good2.md"} @@ -731,7 +758,6 @@ class TestBatchMode: class TestPipelineIntegration: - @pytest.mark.usefixtures(*UNIFIED_FIXTURES) async def test_p1_local_folder_file_through_pipeline( self, @@ -742,7 +768,9 @@ class TestPipelineIntegration: ): """P1: LOCAL_FOLDER_FILE ConnectorDocument through prepare+index to READY.""" from app.indexing_pipeline.connector_document import ConnectorDocument - from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + from app.indexing_pipeline.indexing_pipeline_service import ( + IndexingPipelineService, + ) doc = ConnectorDocument( title="Test Local File", @@ -763,12 +791,16 @@ class TestPipelineIntegration: assert result is not None docs = ( - await db_session.execute( - select(Document).where( - Document.document_type == DocumentType.LOCAL_FOLDER_FILE, - Document.search_space_id == db_search_space.id, + ( + await db_session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == db_search_space.id, + ) ) ) - ).scalars().all() + .scalars() + .all() + ) assert len(docs) == 1 assert DocumentStatus.is_state(docs[0].status, DocumentStatus.READY) diff --git a/surfsense_backend/tests/integration/test_document_versioning.py b/surfsense_backend/tests/integration/test_document_versioning.py index 87e3c490c..9bd03d219 100644 --- a/surfsense_backend/tests/integration/test_document_versioning.py +++ b/surfsense_backend/tests/integration/test_document_versioning.py @@ -34,14 +34,16 @@ async def db_document( async def _version_count(session: AsyncSession, document_id: int) -> int: result = await session.execute( - select(func.count()).select_from(DocumentVersion).where( - DocumentVersion.document_id == document_id - ) + select(func.count()) + .select_from(DocumentVersion) + .where(DocumentVersion.document_id == document_id) ) return result.scalar_one() -async def _get_versions(session: AsyncSession, document_id: int) -> list[DocumentVersion]: +async def _get_versions( + session: AsyncSession, document_id: int +) -> list[DocumentVersion]: result = await session.execute( select(DocumentVersion) .where(DocumentVersion.document_id == document_id) @@ -74,18 +76,14 @@ class TestCreateVersionSnapshot: from app.utils.document_versioning import create_version_snapshot t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda: t0 - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t0) await create_version_snapshot(db_session, db_document) # Simulate content change and time passing db_document.source_markdown = "# Test\n\nUpdated content." db_document.content_hash = "def456" t1 = t0 + timedelta(minutes=31) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda: t1 - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t1) await create_version_snapshot(db_session, db_document) versions = await _get_versions(db_session, db_document.id) @@ -101,9 +99,7 @@ class TestCreateVersionSnapshot: from app.utils.document_versioning import create_version_snapshot t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda: t0 - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t0) await create_version_snapshot(db_session, db_document) count_after_first = await _version_count(db_session, db_document.id) assert count_after_first == 1 @@ -112,9 +108,7 @@ class TestCreateVersionSnapshot: db_document.source_markdown = "# Test\n\nQuick edit." db_document.content_hash = "quick123" t1 = t0 + timedelta(minutes=10) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda: t1 - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: t1) await create_version_snapshot(db_session, db_document) count_after_second = await _version_count(db_session, db_document.id) @@ -134,22 +128,15 @@ class TestCreateVersionSnapshot: # Create 5 versions spread across time: 3 older than 90 days, 2 recent for i in range(5): - db_document.source_markdown = f"Content v{i+1}" - db_document.content_hash = f"hash_{i+1}" - if i < 3: - t = base + timedelta(days=i) # old - else: - t = base + timedelta(days=100 + i) # recent - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda _t=t: _t - ) + db_document.source_markdown = f"Content v{i + 1}" + db_document.content_hash = f"hash_{i + 1}" + t = base + timedelta(days=i) if i < 3 else base + timedelta(days=100 + i) + monkeypatch.setattr("app.utils.document_versioning._now", lambda _t=t: _t) await create_version_snapshot(db_session, db_document) # Now trigger cleanup from a "current" time that makes the first 3 versions > 90 days old now = base + timedelta(days=200) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda: now - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda: now) db_document.source_markdown = "Content v6" db_document.content_hash = "hash_6" await create_version_snapshot(db_session, db_document) @@ -160,9 +147,7 @@ class TestCreateVersionSnapshot: age = now - v.created_at.replace(tzinfo=UTC) assert age <= timedelta(days=90), f"Version {v.version_number} is too old" - async def test_v5_cap_at_20_versions( - self, db_session, db_document, monkeypatch - ): + async def test_v5_cap_at_20_versions(self, db_session, db_document, monkeypatch): """V5: More than 20 versions triggers cap — oldest gets deleted.""" from app.utils.document_versioning import create_version_snapshot @@ -170,12 +155,10 @@ class TestCreateVersionSnapshot: # Create 21 versions (all within 90 days, each 31 min apart) for i in range(21): - db_document.source_markdown = f"Content v{i+1}" - db_document.content_hash = f"hash_{i+1}" + db_document.source_markdown = f"Content v{i + 1}" + db_document.content_hash = f"hash_{i + 1}" t = base + timedelta(minutes=31 * i) - monkeypatch.setattr( - "app.utils.document_versioning._now", lambda _t=t: _t - ) + monkeypatch.setattr("app.utils.document_versioning._now", lambda _t=t: _t) await create_version_snapshot(db_session, db_document) versions = await _get_versions(db_session, db_document.id) diff --git a/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py b/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py index 9b4c73f25..c6e7b160c 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_local_folder_scan.py @@ -51,9 +51,7 @@ class TestScanFolder: git.mkdir() (git / "config").write_text("gitconfig") - results = scan_folder( - str(tmp_path), exclude_patterns=["node_modules", ".git"] - ) + results = scan_folder(str(tmp_path), exclude_patterns=["node_modules", ".git"]) names = {r["relative_path"] for r in results} assert "good.md" in names diff --git a/surfsense_web/app/(home)/login/LocalLoginForm.tsx b/surfsense_web/app/(home)/login/LocalLoginForm.tsx index 1ebbf46b6..e94857334 100644 --- a/surfsense_web/app/(home)/login/LocalLoginForm.tsx +++ b/surfsense_web/app/(home)/login/LocalLoginForm.tsx @@ -160,11 +160,11 @@ export function LocalLoginForm() { placeholder="you@example.com" value={username} onChange={(e) => setUsername(e.target.value)} - className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ - error.title - ? "border-destructive focus:border-destructive focus:ring-destructive/40" - : "border-border focus:border-primary focus:ring-primary/40" - }`} + className={`mt-1 block w-full rounded-md border px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} disabled={isLoggingIn} /> @@ -181,11 +181,11 @@ export function LocalLoginForm() { placeholder="Enter your password" value={password} onChange={(e) => setPassword(e.target.value)} - className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ - error.title - ? "border-destructive focus:border-destructive focus:ring-destructive/40" - : "border-border focus:border-primary focus:ring-primary/40" - }`} + className={`mt-1 block w-full rounded-md border pr-10 px-3 py-1.5 md:py-2 shadow-sm focus:outline-none focus:ring-1 bg-background text-foreground transition-all ${ + error.title + ? "border-destructive focus:border-destructive focus:ring-destructive/40" + : "border-border focus:border-primary focus:ring-primary/40" + }`} disabled={isLoggingIn} />
-- {error || "An unknown error occurred"} -
+{error || "An unknown error occurred"}
{!isProcessing && (- Document unavailable -
+Document unavailable
{documentByChunkFetchingError.message ||
"An unexpected error occurred. Please try again."}
diff --git a/surfsense_web/components/settings/llm-role-manager.tsx b/surfsense_web/components/settings/llm-role-manager.tsx
index d1651b7f0..718503318 100644
--- a/surfsense_web/components/settings/llm-role-manager.tsx
+++ b/surfsense_web/components/settings/llm-role-manager.tsx
@@ -134,24 +134,27 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
preferences?.image_generation_config_id,
]);
- const handleRoleAssignment = useCallback(async (prefKey: string, configId: string) => {
- const value = configId === "unassigned" ? "" : parseInt(configId);
+ const handleRoleAssignment = useCallback(
+ async (prefKey: string, configId: string) => {
+ const value = configId === "unassigned" ? "" : parseInt(configId);
- setAssignments((prev) => ({ ...prev, [prefKey]: value }));
- setSavingRole(prefKey);
- savingRef.current = true;
+ setAssignments((prev) => ({ ...prev, [prefKey]: value }));
+ setSavingRole(prefKey);
+ savingRef.current = true;
- try {
- await updatePreferences({
- search_space_id: searchSpaceId,
- data: { [prefKey]: value || undefined },
- });
- toast.success("Role assignment updated");
- } finally {
- setSavingRole(null);
- savingRef.current = false;
- }
- }, [updatePreferences, searchSpaceId]);
+ try {
+ await updatePreferences({
+ search_space_id: searchSpaceId,
+ data: { [prefKey]: value || undefined },
+ });
+ toast.success("Role assignment updated");
+ } finally {
+ setSavingRole(null);
+ savingRef.current = false;
+ }
+ },
+ [updatePreferences, searchSpaceId]
+ );
// Combine global and custom LLM configs
const allLLMConfigs = [
@@ -199,10 +202,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
Refresh
{isAssignmentComplete && !isLoading && !hasError && (
-
{t("file_size_limit")}
Watch folder
-- Auto-sync when files change -
+Auto-sync when files change
- {t("selected_files", { count: files.length })} · {formatFileSize(totalFileSize)} + {t("selected_files", { count: files.length })} ·{" "} + {formatFileSize(totalFileSize)}