diff --git a/surfsense_backend/app/connectors/dropbox/client.py b/surfsense_backend/app/connectors/dropbox/client.py index b177c2f8d..e89800191 100644 --- a/surfsense_backend/app/connectors/dropbox/client.py +++ b/surfsense_backend/app/connectors/dropbox/client.py @@ -225,9 +225,7 @@ class DropboxClient: return all_items, None - async def get_latest_cursor( - self, path: str = "" - ) -> tuple[str | None, str | None]: + async def get_latest_cursor(self, path: str = "") -> tuple[str | None, str | None]: """Get a cursor representing the current state of a folder. Uses /2/files/list_folder/get_latest_cursor so we can later call @@ -251,9 +249,7 @@ class DropboxClient: """ all_entries: list[dict[str, Any]] = [] - resp = await self._request( - "/2/files/list_folder/continue", {"cursor": cursor} - ) + resp = await self._request("/2/files/list_folder/continue", {"cursor": cursor}) if resp.status_code == 401: return [], None, "Dropbox authentication expired (401)" if resp.status_code != 200: @@ -268,7 +264,11 @@ class DropboxClient: "/2/files/list_folder/continue", {"cursor": cursor} ) if resp.status_code != 200: - return all_entries, data.get("cursor"), f"Pagination failed: {resp.status_code}" + return ( + all_entries, + data.get("cursor"), + f"Pagination failed: {resp.status_code}", + ) data = resp.json() all_entries.extend(data.get("entries", [])) diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py index 025c3831a..83ff32e82 100644 --- a/surfsense_backend/app/connectors/google_drive/content_extractor.py +++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py @@ -100,7 +100,9 @@ async def download_and_extract_content( if error: return None, drive_metadata, error - etl_filename = file_name + extension if is_google_workspace_file(mime_type) else file_name + etl_filename = ( + file_name + extension if is_google_workspace_file(mime_type) else file_name + ) markdown = await _parse_file_to_markdown(temp_file_path, etl_filename) return markdown, drive_metadata, None @@ -233,7 +235,9 @@ async def download_and_process_file( "." )[-1] - etl_filename = file_name + extension if is_google_workspace_file(mime_type) else file_name + etl_filename = ( + file_name + extension if is_google_workspace_file(mime_type) else file_name + ) logger.info(f"Processing {file_name} with Surfsense's file processor") await process_file_in_background( file_path=temp_file_path, diff --git a/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py b/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py index a0041c843..6e7ab3c4c 100644 --- a/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py +++ b/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py @@ -1,6 +1,9 @@ from app.config import config as app_config from app.etl_pipeline.etl_document import EtlRequest, EtlResult -from app.etl_pipeline.exceptions import EtlServiceUnavailableError, EtlUnsupportedFileError +from app.etl_pipeline.exceptions import ( + EtlServiceUnavailableError, + EtlUnsupportedFileError, +) from app.etl_pipeline.file_classifier import FileCategory, classify_file from app.etl_pipeline.parsers.audio import transcribe_audio from app.etl_pipeline.parsers.direct_convert import convert_file_directly @@ -78,9 +81,7 @@ class EtlPipelineService: request.file_path, request.estimated_pages ) else: - raise EtlServiceUnavailableError( - f"Unknown ETL_SERVICE: {etl_service}" - ) + raise EtlServiceUnavailableError(f"Unknown ETL_SERVICE: {etl_service}") return EtlResult( markdown_content=content, diff --git a/surfsense_backend/app/etl_pipeline/file_classifier.py b/surfsense_backend/app/etl_pipeline/file_classifier.py index bc7b4537c..4e690bcdc 100644 --- a/surfsense_backend/app/etl_pipeline/file_classifier.py +++ b/surfsense_backend/app/etl_pipeline/file_classifier.py @@ -1,27 +1,96 @@ from enum import Enum from pathlib import PurePosixPath -from app.utils.file_extensions import DOCUMENT_EXTENSIONS, get_document_extensions_for_service +from app.utils.file_extensions import ( + DOCUMENT_EXTENSIONS, + get_document_extensions_for_service, +) PLAINTEXT_EXTENSIONS = frozenset( { - ".md", ".markdown", ".txt", ".text", - ".json", ".jsonl", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf", ".xml", - ".css", ".scss", ".less", ".sass", - ".py", ".pyw", ".pyi", ".pyx", - ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", - ".java", ".kt", ".kts", ".scala", ".groovy", - ".c", ".h", ".cpp", ".cxx", ".cc", ".hpp", ".hxx", - ".cs", ".fs", ".fsx", - ".go", ".rs", ".rb", ".php", ".pl", ".pm", ".lua", ".swift", - ".m", ".mm", ".r", ".jl", - ".sh", ".bash", ".zsh", ".fish", ".bat", ".cmd", ".ps1", - ".sql", ".graphql", ".gql", - ".env", ".gitignore", ".dockerignore", ".editorconfig", - ".makefile", ".cmake", - ".log", ".rst", ".tex", ".bib", ".org", ".adoc", ".asciidoc", - ".vue", ".svelte", ".astro", - ".tf", ".hcl", ".proto", + ".md", + ".markdown", + ".txt", + ".text", + ".json", + ".jsonl", + ".yaml", + ".yml", + ".toml", + ".ini", + ".cfg", + ".conf", + ".xml", + ".css", + ".scss", + ".less", + ".sass", + ".py", + ".pyw", + ".pyi", + ".pyx", + ".js", + ".jsx", + ".ts", + ".tsx", + ".mjs", + ".cjs", + ".java", + ".kt", + ".kts", + ".scala", + ".groovy", + ".c", + ".h", + ".cpp", + ".cxx", + ".cc", + ".hpp", + ".hxx", + ".cs", + ".fs", + ".fsx", + ".go", + ".rs", + ".rb", + ".php", + ".pl", + ".pm", + ".lua", + ".swift", + ".m", + ".mm", + ".r", + ".jl", + ".sh", + ".bash", + ".zsh", + ".fish", + ".bat", + ".cmd", + ".ps1", + ".sql", + ".graphql", + ".gql", + ".env", + ".gitignore", + ".dockerignore", + ".editorconfig", + ".makefile", + ".cmake", + ".log", + ".rst", + ".tex", + ".bib", + ".org", + ".adoc", + ".asciidoc", + ".vue", + ".svelte", + ".astro", + ".tf", + ".hcl", + ".proto", } ) diff --git a/surfsense_backend/app/etl_pipeline/parsers/llamacloud.py b/surfsense_backend/app/etl_pipeline/parsers/llamacloud.py index 5115aebea..ae2a34234 100644 --- a/surfsense_backend/app/etl_pipeline/parsers/llamacloud.py +++ b/surfsense_backend/app/etl_pipeline/parsers/llamacloud.py @@ -66,16 +66,12 @@ async def parse_with_llamacloud(file_path: str, estimated_pages: int) -> str: ) if hasattr(result, "get_markdown_documents"): - markdown_docs = result.get_markdown_documents( - split_by_page=False - ) + markdown_docs = result.get_markdown_documents(split_by_page=False) if markdown_docs and hasattr(markdown_docs[0], "text"): return markdown_docs[0].text if hasattr(result, "pages") and result.pages: return "\n\n".join( - p.md - for p in result.pages - if hasattr(p, "md") and p.md + p.md for p in result.pages if hasattr(p, "md") and p.md ) return str(result) @@ -83,9 +79,7 @@ async def parse_with_llamacloud(file_path: str, estimated_pages: int) -> str: if result and hasattr(result[0], "text"): return result[0].text return "\n\n".join( - doc.page_content - if hasattr(doc, "page_content") - else str(doc) + doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in result ) diff --git a/surfsense_backend/app/routes/autocomplete_routes.py b/surfsense_backend/app/routes/autocomplete_routes.py index bb56709cb..a11b7dbc1 100644 --- a/surfsense_backend/app/routes/autocomplete_routes.py +++ b/surfsense_backend/app/routes/autocomplete_routes.py @@ -1,4 +1,4 @@ -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from fastapi.responses import StreamingResponse from pydantic import BaseModel, Field from sqlalchemy.ext.asyncio import AsyncSession @@ -31,8 +31,11 @@ async def vision_autocomplete_stream( return StreamingResponse( stream_vision_autocomplete( - body.screenshot, body.search_space_id, session, - app_name=body.app_name, window_title=body.window_title, + body.screenshot, + body.search_space_id, + session, + app_name=body.app_name, + window_title=body.window_title, ), media_type="text/event-stream", headers={ diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index a30eb7297..bb20da65d 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -2647,7 +2647,12 @@ async def run_onedrive_indexing( stage="fetching", ) - total_indexed, total_skipped, error_message, total_unsupported = await index_onedrive_files( + ( + total_indexed, + total_skipped, + error_message, + total_unsupported, + ) = await index_onedrive_files( session, connector_id, search_space_id, @@ -2756,7 +2761,12 @@ async def run_dropbox_indexing( stage="fetching", ) - total_indexed, total_skipped, error_message, total_unsupported = await index_dropbox_files( + ( + total_indexed, + total_skipped, + error_message, + total_unsupported, + ) = await index_dropbox_files( session, connector_id, search_space_id, diff --git a/surfsense_backend/app/services/vision_autocomplete_service.py b/surfsense_backend/app/services/vision_autocomplete_service.py index f24a5c848..7e9408be7 100644 --- a/surfsense_backend/app/services/vision_autocomplete_service.py +++ b/surfsense_backend/app/services/vision_autocomplete_service.py @@ -1,5 +1,5 @@ import logging -from typing import AsyncGenerator +from collections.abc import AsyncGenerator from langchain_core.messages import HumanMessage, SystemMessage from sqlalchemy.ext.asyncio import AsyncSession @@ -68,8 +68,10 @@ def _is_vision_unsupported_error(e: Exception) -> bool: async def _extract_query_from_screenshot( - llm, screenshot_data_url: str, - app_name: str = "", window_title: str = "", + llm, + screenshot_data_url: str, + app_name: str = "", + window_title: str = "", ) -> str | None: """Ask the Vision LLM to describe what the user is working on. @@ -78,18 +80,26 @@ async def _extract_query_from_screenshot( """ if app_name: prompt_text = EXTRACT_QUERY_PROMPT_WITH_APP.format( - app_name=app_name, window_title=window_title, + app_name=app_name, + window_title=window_title, ) else: prompt_text = EXTRACT_QUERY_PROMPT try: - response = await llm.ainvoke([ - HumanMessage(content=[ - {"type": "text", "text": prompt_text}, - {"type": "image_url", "image_url": {"url": screenshot_data_url}}, - ]), - ]) + response = await llm.ainvoke( + [ + HumanMessage( + content=[ + {"type": "text", "text": prompt_text}, + { + "type": "image_url", + "image_url": {"url": screenshot_data_url}, + }, + ] + ), + ] + ) query = response.content.strip() if hasattr(response, "content") else "" return query if query else None except Exception as e: @@ -167,10 +177,15 @@ async def stream_vision_autocomplete( kb_context = "" try: query = await _extract_query_from_screenshot( - llm, screenshot_data_url, app_name=app_name, window_title=window_title, + llm, + screenshot_data_url, + app_name=app_name, + window_title=window_title, ) except Exception as e: - logger.warning(f"Vision autocomplete: selected model does not support vision: {e}") + logger.warning( + f"Vision autocomplete: selected model does not support vision: {e}" + ) yield streaming.format_message_start() yield streaming.format_error(vision_error_msg) yield streaming.format_done() @@ -183,16 +198,18 @@ async def stream_vision_autocomplete( messages = [ SystemMessage(content=system_prompt), - HumanMessage(content=[ - { - "type": "text", - "text": "Analyze this screenshot. Understand the full context of what the user is working on, then generate the text they most likely want to write in the active text area.", - }, - { - "type": "image_url", - "image_url": {"url": screenshot_data_url}, - }, - ]), + HumanMessage( + content=[ + { + "type": "text", + "text": "Analyze this screenshot. Understand the full context of what the user is working on, then generate the text they most likely want to write in the active text area.", + }, + { + "type": "image_url", + "image_url": {"url": screenshot_data_url}, + }, + ] + ), ] text_started = False @@ -217,7 +234,9 @@ async def stream_vision_autocomplete( yield streaming.format_text_end(text_id) if _is_vision_unsupported_error(e): - logger.warning(f"Vision autocomplete: selected model does not support vision: {e}") + logger.warning( + f"Vision autocomplete: selected model does not support vision: {e}" + ) yield streaming.format_error(vision_error_msg) else: logger.error(f"Vision autocomplete streaming error: {e}", exc_info=True) diff --git a/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py b/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py index 8d2a45e03..4a49944c2 100644 --- a/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/dropbox_indexer.py @@ -254,9 +254,7 @@ async def _download_and_index( return batch_indexed, download_failed + batch_failed -async def _remove_document( - session: AsyncSession, file_id: str, search_space_id: int -): +async def _remove_document(session: AsyncSession, file_id: str, search_space_id: int): """Remove a document that was deleted in Dropbox.""" primary_hash = compute_identifier_hash( DocumentType.DROPBOX_FILE.value, file_id, search_space_id @@ -268,8 +266,7 @@ async def _remove_document( select(Document).where( Document.search_space_id == search_space_id, Document.document_type == DocumentType.DROPBOX_FILE, - cast(Document.document_metadata["dropbox_file_id"], String) - == file_id, + cast(Document.document_metadata["dropbox_file_id"], String) == file_id, ) ) existing = result.scalar_one_or_none() @@ -671,9 +668,7 @@ async def index_dropbox_files( saved_cursor = folder_cursors.get(folder_path) can_use_delta = ( - use_delta_sync - and saved_cursor - and connector.last_indexed_at + use_delta_sync and saved_cursor and connector.last_indexed_at ) if can_use_delta: @@ -739,7 +734,11 @@ async def index_dropbox_files( await task_logger.log_task_success( log_entry, f"Successfully completed Dropbox indexing for connector {connector_id}", - {"files_processed": total_indexed, "files_skipped": total_skipped, "files_unsupported": total_unsupported}, + { + "files_processed": total_indexed, + "files_skipped": total_skipped, + "files_unsupported": total_unsupported, + }, ) logger.info( f"Dropbox indexing completed: {total_indexed} indexed, " diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py index 9916e70a0..b11087fe6 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py @@ -1010,7 +1010,11 @@ async def index_google_drive_files( documents_unsupported += ru else: logger.info(f"Using full scan for connector {connector_id}") - documents_indexed, documents_skipped, documents_unsupported = await _index_full_scan( + ( + documents_indexed, + documents_skipped, + documents_unsupported, + ) = await _index_full_scan( drive_client, session, connector, @@ -1301,7 +1305,12 @@ async def index_google_drive_selected_files( log_entry, f"Batch file indexing completed with {len(errors)} error(s)", "; ".join(errors), - {"indexed": indexed, "skipped": skipped, "unsupported": unsupported, "error_count": len(errors)}, + { + "indexed": indexed, + "skipped": skipped, + "unsupported": unsupported, + "error_count": len(errors), + }, ) else: await task_logger.log_task_success( diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index f4366fb78..7f42f4638 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -23,7 +23,6 @@ from sqlalchemy import select from sqlalchemy.exc import IntegrityError, SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession -from app.config import config from app.db import ( Document, DocumentStatus, @@ -153,8 +152,6 @@ def scan_folder( return files - - async def _read_file_content(file_path: str, filename: str) -> str: """Read file content via the unified ETL pipeline. diff --git a/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py index b26442490..06517f542 100644 --- a/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py @@ -762,7 +762,11 @@ async def index_onedrive_files( await task_logger.log_task_success( log_entry, f"Successfully completed OneDrive indexing for connector {connector_id}", - {"files_processed": total_indexed, "files_skipped": total_skipped, "files_unsupported": total_unsupported}, + { + "files_processed": total_indexed, + "files_skipped": total_skipped, + "files_unsupported": total_unsupported, + }, ) logger.info( f"OneDrive indexing completed: {total_indexed} indexed, " diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index a9a6b62be..c765dbd87 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -292,8 +292,10 @@ async def process_file_in_background( ) try: - from app.etl_pipeline.file_classifier import FileCategory as EtlFileCategory - from app.etl_pipeline.file_classifier import classify_file as etl_classify + from app.etl_pipeline.file_classifier import ( + FileCategory as EtlFileCategory, + classify_file as etl_classify, + ) category = etl_classify(filename) @@ -345,8 +347,10 @@ async def _extract_file_content( """ from app.etl_pipeline.etl_document import EtlRequest from app.etl_pipeline.etl_pipeline_service import EtlPipelineService - from app.etl_pipeline.file_classifier import FileCategory - from app.etl_pipeline.file_classifier import classify_file as etl_classify + from app.etl_pipeline.file_classifier import ( + FileCategory, + classify_file as etl_classify, + ) category = etl_classify(filename) estimated_pages = 0 diff --git a/surfsense_backend/app/utils/file_extensions.py b/surfsense_backend/app/utils/file_extensions.py index 5eed36872..8d432ce56 100644 --- a/surfsense_backend/app/utils/file_extensions.py +++ b/surfsense_backend/app/utils/file_extensions.py @@ -15,30 +15,83 @@ from pathlib import PurePosixPath # Per-parser document extension sets (from official documentation) # --------------------------------------------------------------------------- -DOCLING_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset({ - ".pdf", - ".docx", ".xlsx", ".pptx", - ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".webp", -}) +DOCLING_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset( + { + ".pdf", + ".docx", + ".xlsx", + ".pptx", + ".png", + ".jpg", + ".jpeg", + ".tiff", + ".tif", + ".bmp", + ".webp", + } +) -LLAMAPARSE_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset({ - ".pdf", - ".docx", ".doc", ".xlsx", ".xls", ".pptx", ".ppt", - ".docm", ".dot", ".dotm", ".pptm", ".pot", ".potx", - ".xlsm", ".xlsb", ".xlw", - ".rtf", ".epub", - ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".tif", ".webp", ".svg", - ".odt", ".ods", ".odp", - ".hwp", ".hwpx", -}) +LLAMAPARSE_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset( + { + ".pdf", + ".docx", + ".doc", + ".xlsx", + ".xls", + ".pptx", + ".ppt", + ".docm", + ".dot", + ".dotm", + ".pptm", + ".pot", + ".potx", + ".xlsm", + ".xlsb", + ".xlw", + ".rtf", + ".epub", + ".png", + ".jpg", + ".jpeg", + ".gif", + ".bmp", + ".tiff", + ".tif", + ".webp", + ".svg", + ".odt", + ".ods", + ".odp", + ".hwp", + ".hwpx", + } +) -UNSTRUCTURED_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset({ - ".pdf", - ".docx", ".doc", ".xlsx", ".xls", ".pptx", ".ppt", - ".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".tif", ".heic", - ".rtf", ".epub", ".odt", - ".eml", ".msg", ".p7s", -}) +UNSTRUCTURED_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset( + { + ".pdf", + ".docx", + ".doc", + ".xlsx", + ".xls", + ".pptx", + ".ppt", + ".png", + ".jpg", + ".jpeg", + ".bmp", + ".tiff", + ".tif", + ".heic", + ".rtf", + ".epub", + ".odt", + ".eml", + ".msg", + ".p7s", + } +) # --------------------------------------------------------------------------- # Union (used by classify_file for routing) + service lookup diff --git a/surfsense_backend/tests/unit/connector_indexers/test_content_extraction.py b/surfsense_backend/tests/unit/connector_indexers/test_content_extraction.py index 49f9a217a..cd112e09f 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_content_extraction.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_content_extraction.py @@ -6,7 +6,6 @@ real so we know the full path from "cloud gives us bytes" to "we get markdown back" actually works. """ -import os from unittest.mock import AsyncMock, MagicMock import pytest @@ -21,6 +20,7 @@ _CSV_CONTENT = "name,age\nAlice,30\nBob,25\n" # Helpers # --------------------------------------------------------------------------- + async def _write_file(dest_path: str, content: str) -> None: """Simulate a cloud client writing downloaded bytes to disk.""" with open(dest_path, "w", encoding="utf-8") as f: @@ -43,8 +43,8 @@ def _make_download_side_effect(content: str): # Google Drive # =================================================================== -class TestGoogleDriveContentExtraction: +class TestGoogleDriveContentExtraction: async def test_txt_file_returns_markdown(self): from app.connectors.google_drive.content_extractor import ( download_and_extract_content, @@ -76,7 +76,7 @@ class TestGoogleDriveContentExtraction: file = {"id": "f2", "name": "data.csv", "mimeType": "text/csv"} - markdown, metadata, error = await download_and_extract_content(client, file) + markdown, _metadata, error = await download_and_extract_content(client, file) assert error is None assert "Alice" in markdown @@ -93,7 +93,7 @@ class TestGoogleDriveContentExtraction: file = {"id": "f3", "name": "doc.txt", "mimeType": "text/plain"} - markdown, metadata, error = await download_and_extract_content(client, file) + markdown, _metadata, error = await download_and_extract_content(client, file) assert markdown is None assert error == "Network timeout" @@ -103,8 +103,8 @@ class TestGoogleDriveContentExtraction: # OneDrive # =================================================================== -class TestOneDriveContentExtraction: +class TestOneDriveContentExtraction: async def test_txt_file_returns_markdown(self): from app.connectors.onedrive.content_extractor import ( download_and_extract_content, @@ -144,7 +144,7 @@ class TestOneDriveContentExtraction: "file": {"mimeType": "text/csv"}, } - markdown, metadata, error = await download_and_extract_content(client, file) + markdown, _metadata, error = await download_and_extract_content(client, file) assert error is None assert "Alice" in markdown @@ -164,7 +164,7 @@ class TestOneDriveContentExtraction: "file": {"mimeType": "text/plain"}, } - markdown, metadata, error = await download_and_extract_content(client, file) + markdown, _metadata, error = await download_and_extract_content(client, file) assert markdown is None assert error == "403 Forbidden" @@ -174,8 +174,8 @@ class TestOneDriveContentExtraction: # Dropbox # =================================================================== -class TestDropboxContentExtraction: +class TestDropboxContentExtraction: async def test_txt_file_returns_markdown(self): from app.connectors.dropbox.content_extractor import ( download_and_extract_content, @@ -217,7 +217,7 @@ class TestDropboxContentExtraction: "path_lower": "/data.csv", } - markdown, metadata, error = await download_and_extract_content(client, file) + markdown, _metadata, error = await download_and_extract_content(client, file) assert error is None assert "Alice" in markdown @@ -238,7 +238,7 @@ class TestDropboxContentExtraction: "path_lower": "/big.txt", } - markdown, metadata, error = await download_and_extract_content(client, file) + markdown, _metadata, error = await download_and_extract_content(client, file) assert markdown is None assert error == "Rate limited" diff --git a/surfsense_backend/tests/unit/connector_indexers/test_dropbox_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_dropbox_parallel.py index adac90085..f72135d05 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_dropbox_parallel.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_dropbox_parallel.py @@ -265,6 +265,7 @@ def full_scan_mocks(mock_dropbox_client, monkeypatch): async def _fake_skip(session, file, search_space_id): from app.connectors.dropbox.file_types import should_skip_file as _skip + item_skip, unsup_ext = _skip(file) if item_skip: if unsup_ext: @@ -468,7 +469,11 @@ async def test_selected_files_fetch_failure_isolation(selected_files_mocks): indexed, skipped, _unsupported, errors = await _run_selected( selected_files_mocks, - [("/first.txt", "first.txt"), ("/mid.txt", "mid.txt"), ("/third.txt", "third.txt")], + [ + ("/first.txt", "first.txt"), + ("/mid.txt", "mid.txt"), + ("/third.txt", "third.txt"), + ], ) assert indexed == 2 @@ -526,8 +531,18 @@ async def test_delta_sync_deletions_call_remove_document(monkeypatch): import app.tasks.connector_indexers.dropbox_indexer as _mod entries = [ - {".tag": "deleted", "name": "gone.txt", "path_lower": "/gone.txt", "id": "id:del1"}, - {".tag": "deleted", "name": "also_gone.pdf", "path_lower": "/also_gone.pdf", "id": "id:del2"}, + { + ".tag": "deleted", + "name": "gone.txt", + "path_lower": "/gone.txt", + "id": "id:del1", + }, + { + ".tag": "deleted", + "name": "also_gone.pdf", + "path_lower": "/also_gone.pdf", + "id": "id:del2", + }, ] mock_client = MagicMock() @@ -544,7 +559,7 @@ async def test_delta_sync_deletions_call_remove_document(monkeypatch): mock_task_logger = MagicMock() mock_task_logger.log_task_progress = AsyncMock() - indexed, skipped, unsupported, cursor = await _index_with_delta_sync( + _indexed, _skipped, _unsupported, cursor = await _index_with_delta_sync( mock_client, AsyncMock(), _CONNECTOR_ID, @@ -573,7 +588,9 @@ async def test_delta_sync_upserts_filtered_and_downloaded(monkeypatch): mock_client = MagicMock() mock_client.get_changes = AsyncMock(return_value=(entries, "cursor-v2", None)) - monkeypatch.setattr(_mod, "_should_skip_file", AsyncMock(return_value=(False, None))) + monkeypatch.setattr( + _mod, "_should_skip_file", AsyncMock(return_value=(False, None)) + ) download_mock = AsyncMock(return_value=(2, 0)) monkeypatch.setattr(_mod, "_download_and_index", download_mock) @@ -581,7 +598,7 @@ async def test_delta_sync_upserts_filtered_and_downloaded(monkeypatch): mock_task_logger = MagicMock() mock_task_logger.log_task_progress = AsyncMock() - indexed, skipped, unsupported, cursor = await _index_with_delta_sync( + indexed, skipped, _unsupported, cursor = await _index_with_delta_sync( mock_client, AsyncMock(), _CONNECTOR_ID, @@ -608,8 +625,18 @@ async def test_delta_sync_mix_deletions_and_upserts(monkeypatch): import app.tasks.connector_indexers.dropbox_indexer as _mod entries = [ - {".tag": "deleted", "name": "removed.txt", "path_lower": "/removed.txt", "id": "id:del1"}, - {".tag": "deleted", "name": "trashed.pdf", "path_lower": "/trashed.pdf", "id": "id:del2"}, + { + ".tag": "deleted", + "name": "removed.txt", + "path_lower": "/removed.txt", + "id": "id:del1", + }, + { + ".tag": "deleted", + "name": "trashed.pdf", + "path_lower": "/trashed.pdf", + "id": "id:del2", + }, _make_file_dict("mod1", "updated.txt"), _make_file_dict("new1", "brandnew.docx"), ] @@ -623,7 +650,9 @@ async def test_delta_sync_mix_deletions_and_upserts(monkeypatch): remove_calls.append(file_id) monkeypatch.setattr(_mod, "_remove_document", _fake_remove) - monkeypatch.setattr(_mod, "_should_skip_file", AsyncMock(return_value=(False, None))) + monkeypatch.setattr( + _mod, "_should_skip_file", AsyncMock(return_value=(False, None)) + ) download_mock = AsyncMock(return_value=(2, 0)) monkeypatch.setattr(_mod, "_download_and_index", download_mock) @@ -631,7 +660,7 @@ async def test_delta_sync_mix_deletions_and_upserts(monkeypatch): mock_task_logger = MagicMock() mock_task_logger.log_task_progress = AsyncMock() - indexed, skipped, unsupported, cursor = await _index_with_delta_sync( + indexed, skipped, _unsupported, cursor = await _index_with_delta_sync( mock_client, AsyncMock(), _CONNECTOR_ID, @@ -665,7 +694,7 @@ async def test_delta_sync_returns_new_cursor(monkeypatch): mock_task_logger = MagicMock() mock_task_logger.log_task_progress = AsyncMock() - indexed, skipped, unsupported, cursor = await _index_with_delta_sync( + indexed, skipped, _unsupported, cursor = await _index_with_delta_sync( mock_client, AsyncMock(), _CONNECTOR_ID, @@ -723,9 +752,7 @@ def orchestrator_mocks(monkeypatch): mock_client = MagicMock() mock_client.get_latest_cursor = AsyncMock(return_value=("latest-cursor-abc", None)) - monkeypatch.setattr( - _mod, "DropboxClient", MagicMock(return_value=mock_client) - ) + monkeypatch.setattr(_mod, "DropboxClient", MagicMock(return_value=mock_client)) return { "connector": mock_connector, @@ -751,7 +778,7 @@ async def test_orchestrator_uses_delta_sync_when_cursor_and_last_indexed( mock_session = AsyncMock() mock_session.commit = AsyncMock() - indexed, skipped, error, _unsupported = await index_dropbox_files( + _indexed, _skipped, error, _unsupported = await index_dropbox_files( mock_session, _CONNECTOR_ID, _SEARCH_SPACE_ID, @@ -779,7 +806,7 @@ async def test_orchestrator_falls_back_to_full_scan_without_cursor( mock_session = AsyncMock() mock_session.commit = AsyncMock() - indexed, skipped, error, _unsupported = await index_dropbox_files( + _indexed, _skipped, error, _unsupported = await index_dropbox_files( mock_session, _CONNECTOR_ID, _SEARCH_SPACE_ID, diff --git a/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py index 7fa92ce12..0ae096361 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_google_drive_parallel.py @@ -366,7 +366,7 @@ async def test_full_scan_three_phase_counts(full_scan_mocks, monkeypatch): full_scan_mocks["download_mock"].return_value = (mock_docs, 0) full_scan_mocks["batch_mock"].return_value = ([], 2, 0) - indexed, skipped, unsupported = await _run_full_scan(full_scan_mocks) + indexed, skipped, _unsupported = await _run_full_scan(full_scan_mocks) assert indexed == 3 # 1 renamed + 2 from batch assert skipped == 1 # 1 unchanged @@ -497,7 +497,7 @@ async def test_delta_sync_removals_serial_rest_parallel(monkeypatch): mock_task_logger = MagicMock() mock_task_logger.log_task_progress = AsyncMock() - indexed, skipped, unsupported = await _index_with_delta_sync( + indexed, skipped, _unsupported = await _index_with_delta_sync( MagicMock(), mock_session, MagicMock(), @@ -589,7 +589,7 @@ async def test_selected_files_single_file_indexed(selected_files_mocks): ) selected_files_mocks["download_and_index_mock"].return_value = (1, 0) - indexed, skipped, unsup, errors = await _run_selected( + indexed, skipped, _unsup, errors = await _run_selected( selected_files_mocks, [("f1", "report.pdf")], ) @@ -613,7 +613,7 @@ async def test_selected_files_fetch_failure_isolation(selected_files_mocks): ) selected_files_mocks["download_and_index_mock"].return_value = (2, 0) - indexed, skipped, unsup, errors = await _run_selected( + indexed, skipped, _unsup, errors = await _run_selected( selected_files_mocks, [("f1", "first.txt"), ("f2", "mid.txt"), ("f3", "third.txt")], ) @@ -647,7 +647,7 @@ async def test_selected_files_skip_rename_counting(selected_files_mocks): selected_files_mocks["download_and_index_mock"].return_value = (2, 0) - indexed, skipped, unsup, errors = await _run_selected( + indexed, skipped, _unsup, errors = await _run_selected( selected_files_mocks, [ ("s1", "unchanged.txt"), diff --git a/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py b/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py index 58737b20b..573ee43d8 100644 --- a/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py +++ b/surfsense_backend/tests/unit/connector_indexers/test_page_limits.py @@ -219,7 +219,9 @@ async def test_gdrive_files_exceeding_quota_rejected(gdrive_selected_mocks): None, ) - indexed, _skipped, _unsup, errors = await _run_gdrive_selected(m, [("big", "huge.pdf")]) + indexed, _skipped, _unsup, errors = await _run_gdrive_selected( + m, [("big", "huge.pdf")] + ) assert indexed == 0 assert len(errors) == 1 @@ -552,7 +554,9 @@ async def test_onedrive_over_quota_rejected(onedrive_selected_mocks): None, ) - indexed, _skipped, _unsup, errors = await _run_onedrive_selected(m, [("big", "huge.pdf")]) + indexed, _skipped, _unsup, errors = await _run_onedrive_selected( + m, [("big", "huge.pdf")] + ) assert indexed == 0 assert len(errors) == 1 diff --git a/surfsense_backend/tests/unit/connectors/test_dropbox_client.py b/surfsense_backend/tests/unit/connectors/test_dropbox_client.py index efacbcf72..31cafe550 100644 --- a/surfsense_backend/tests/unit/connectors/test_dropbox_client.py +++ b/surfsense_backend/tests/unit/connectors/test_dropbox_client.py @@ -19,6 +19,7 @@ def _make_client() -> DropboxClient: # ---------- C1: get_latest_cursor ---------- + async def test_get_latest_cursor_returns_cursor_string(monkeypatch): client = _make_client() @@ -34,12 +35,17 @@ async def test_get_latest_cursor_returns_cursor_string(monkeypatch): assert error is None client._request.assert_called_once_with( "/2/files/list_folder/get_latest_cursor", - {"path": "/my-folder", "recursive": False, "include_non_downloadable_files": True}, + { + "path": "/my-folder", + "recursive": False, + "include_non_downloadable_files": True, + }, ) # ---------- C2: get_changes returns entries and new cursor ---------- + async def test_get_changes_returns_entries_and_cursor(monkeypatch): client = _make_client() @@ -66,6 +72,7 @@ async def test_get_changes_returns_entries_and_cursor(monkeypatch): # ---------- C3: get_changes handles pagination ---------- + async def test_get_changes_handles_pagination(monkeypatch): client = _make_client() @@ -98,6 +105,7 @@ async def test_get_changes_handles_pagination(monkeypatch): # ---------- C4: get_changes raises on 401 ---------- + async def test_get_changes_returns_error_on_401(monkeypatch): client = _make_client() diff --git a/surfsense_backend/tests/unit/connectors/test_dropbox_file_types.py b/surfsense_backend/tests/unit/connectors/test_dropbox_file_types.py index 74277d47c..b4715e083 100644 --- a/surfsense_backend/tests/unit/connectors/test_dropbox_file_types.py +++ b/surfsense_backend/tests/unit/connectors/test_dropbox_file_types.py @@ -41,15 +41,40 @@ def test_non_downloadable_item_is_skipped(): @pytest.mark.parametrize( "filename", [ - "archive.zip", "backup.tar", "data.gz", "stuff.rar", "pack.7z", - "program.exe", "lib.dll", "module.so", "image.dmg", "disk.iso", - "movie.mov", "clip.avi", "video.mkv", "film.wmv", "stream.flv", + "archive.zip", + "backup.tar", + "data.gz", + "stuff.rar", + "pack.7z", + "program.exe", + "lib.dll", + "module.so", + "image.dmg", + "disk.iso", + "movie.mov", + "clip.avi", + "video.mkv", + "film.wmv", + "stream.flv", "favicon.ico", - "raw.cr2", "photo.nef", "image.arw", "pic.dng", - "design.psd", "vector.ai", "mockup.sketch", "proto.fig", - "font.ttf", "font.otf", "font.woff", "font.woff2", - "model.stl", "scene.fbx", "mesh.blend", - "local.db", "data.sqlite", "access.mdb", + "raw.cr2", + "photo.nef", + "image.arw", + "pic.dng", + "design.psd", + "vector.ai", + "mockup.sketch", + "proto.fig", + "font.ttf", + "font.otf", + "font.woff", + "font.woff2", + "model.stl", + "scene.fbx", + "mesh.blend", + "local.db", + "data.sqlite", + "access.mdb", ], ) def test_non_parseable_extensions_are_skipped(filename, mocker): @@ -63,9 +88,16 @@ def test_non_parseable_extensions_are_skipped(filename, mocker): @pytest.mark.parametrize( "filename", [ - "report.pdf", "document.docx", "sheet.xlsx", "slides.pptx", - "readme.txt", "data.csv", "page.html", "notes.md", - "config.json", "feed.xml", + "report.pdf", + "document.docx", + "sheet.xlsx", + "slides.pptx", + "readme.txt", + "data.csv", + "page.html", + "notes.md", + "config.json", + "feed.xml", ], ) def test_parseable_documents_are_not_skipped(filename, mocker): @@ -92,30 +124,33 @@ def test_universal_images_are_not_skipped(filename, mocker): assert ext is None -@pytest.mark.parametrize("filename,service,expected_skip", [ - ("old.doc", "DOCLING", True), - ("old.doc", "LLAMACLOUD", False), - ("old.doc", "UNSTRUCTURED", False), - ("legacy.xls", "DOCLING", True), - ("legacy.xls", "LLAMACLOUD", False), - ("legacy.xls", "UNSTRUCTURED", False), - ("deck.ppt", "DOCLING", True), - ("deck.ppt", "LLAMACLOUD", False), - ("deck.ppt", "UNSTRUCTURED", False), - ("icon.svg", "DOCLING", True), - ("icon.svg", "LLAMACLOUD", False), - ("anim.gif", "DOCLING", True), - ("anim.gif", "LLAMACLOUD", False), - ("photo.webp", "DOCLING", False), - ("photo.webp", "LLAMACLOUD", False), - ("photo.webp", "UNSTRUCTURED", True), - ("live.heic", "DOCLING", True), - ("live.heic", "UNSTRUCTURED", False), - ("macro.docm", "DOCLING", True), - ("macro.docm", "LLAMACLOUD", False), - ("mail.eml", "DOCLING", True), - ("mail.eml", "UNSTRUCTURED", False), -]) +@pytest.mark.parametrize( + "filename,service,expected_skip", + [ + ("old.doc", "DOCLING", True), + ("old.doc", "LLAMACLOUD", False), + ("old.doc", "UNSTRUCTURED", False), + ("legacy.xls", "DOCLING", True), + ("legacy.xls", "LLAMACLOUD", False), + ("legacy.xls", "UNSTRUCTURED", False), + ("deck.ppt", "DOCLING", True), + ("deck.ppt", "LLAMACLOUD", False), + ("deck.ppt", "UNSTRUCTURED", False), + ("icon.svg", "DOCLING", True), + ("icon.svg", "LLAMACLOUD", False), + ("anim.gif", "DOCLING", True), + ("anim.gif", "LLAMACLOUD", False), + ("photo.webp", "DOCLING", False), + ("photo.webp", "LLAMACLOUD", False), + ("photo.webp", "UNSTRUCTURED", True), + ("live.heic", "DOCLING", True), + ("live.heic", "UNSTRUCTURED", False), + ("macro.docm", "DOCLING", True), + ("macro.docm", "LLAMACLOUD", False), + ("mail.eml", "DOCLING", True), + ("mail.eml", "UNSTRUCTURED", False), + ], +) def test_parser_specific_extensions(filename, service, expected_skip, mocker): mocker.patch("app.config.config.ETL_SERVICE", service) item = {".tag": "file", "name": filename} diff --git a/surfsense_backend/tests/unit/connectors/test_google_drive_file_types.py b/surfsense_backend/tests/unit/connectors/test_google_drive_file_types.py index 5cd43736b..ab602468d 100644 --- a/surfsense_backend/tests/unit/connectors/test_google_drive_file_types.py +++ b/surfsense_backend/tests/unit/connectors/test_google_drive_file_types.py @@ -7,21 +7,37 @@ from app.connectors.google_drive.file_types import should_skip_by_extension pytestmark = pytest.mark.unit -@pytest.mark.parametrize("filename", [ - "malware.exe", "archive.zip", "video.mov", "font.woff2", "model.blend", -]) +@pytest.mark.parametrize( + "filename", + [ + "malware.exe", + "archive.zip", + "video.mov", + "font.woff2", + "model.blend", + ], +) def test_unsupported_extensions_are_skipped_regardless_of_service(filename, mocker): """Truly unsupported files are skipped no matter which ETL service is configured.""" for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"): mocker.patch("app.config.config.ETL_SERVICE", service) - skip, ext = should_skip_by_extension(filename) + skip, _ext = should_skip_by_extension(filename) assert skip is True -@pytest.mark.parametrize("filename", [ - "report.pdf", "doc.docx", "sheet.xlsx", "slides.pptx", - "readme.txt", "data.csv", "photo.png", "notes.md", -]) +@pytest.mark.parametrize( + "filename", + [ + "report.pdf", + "doc.docx", + "sheet.xlsx", + "slides.pptx", + "readme.txt", + "data.csv", + "photo.png", + "notes.md", + ], +) def test_universal_extensions_are_not_skipped(filename, mocker): """Files supported by all parsers (or handled by plaintext/direct_convert) are never skipped.""" for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"): @@ -31,16 +47,19 @@ def test_universal_extensions_are_not_skipped(filename, mocker): assert ext is None -@pytest.mark.parametrize("filename,service,expected_skip", [ - ("macro.docm", "DOCLING", True), - ("macro.docm", "LLAMACLOUD", False), - ("mail.eml", "DOCLING", True), - ("mail.eml", "UNSTRUCTURED", False), - ("photo.gif", "DOCLING", True), - ("photo.gif", "LLAMACLOUD", False), - ("photo.heic", "UNSTRUCTURED", False), - ("photo.heic", "DOCLING", True), -]) +@pytest.mark.parametrize( + "filename,service,expected_skip", + [ + ("macro.docm", "DOCLING", True), + ("macro.docm", "LLAMACLOUD", False), + ("mail.eml", "DOCLING", True), + ("mail.eml", "UNSTRUCTURED", False), + ("photo.gif", "DOCLING", True), + ("photo.gif", "LLAMACLOUD", False), + ("photo.heic", "UNSTRUCTURED", False), + ("photo.heic", "DOCLING", True), + ], +) def test_parser_specific_extensions(filename, service, expected_skip, mocker): mocker.patch("app.config.config.ETL_SERVICE", service) skip, ext = should_skip_by_extension(filename) diff --git a/surfsense_backend/tests/unit/connectors/test_onedrive_file_types.py b/surfsense_backend/tests/unit/connectors/test_onedrive_file_types.py index 61212b340..1d9124c47 100644 --- a/surfsense_backend/tests/unit/connectors/test_onedrive_file_types.py +++ b/surfsense_backend/tests/unit/connectors/test_onedrive_file_types.py @@ -45,9 +45,16 @@ def test_onenote_is_skipped(): # --------------------------------------------------------------------------- -@pytest.mark.parametrize("filename", [ - "malware.exe", "archive.zip", "video.mov", "font.woff2", "model.blend", -]) +@pytest.mark.parametrize( + "filename", + [ + "malware.exe", + "archive.zip", + "video.mov", + "font.woff2", + "model.blend", + ], +) def test_unsupported_extensions_are_skipped(filename, mocker): mocker.patch("app.config.config.ETL_SERVICE", "DOCLING") item = {"name": filename, "file": {"mimeType": "application/octet-stream"}} @@ -56,10 +63,19 @@ def test_unsupported_extensions_are_skipped(filename, mocker): assert ext is not None -@pytest.mark.parametrize("filename", [ - "report.pdf", "doc.docx", "sheet.xlsx", "slides.pptx", - "readme.txt", "data.csv", "photo.png", "notes.md", -]) +@pytest.mark.parametrize( + "filename", + [ + "report.pdf", + "doc.docx", + "sheet.xlsx", + "slides.pptx", + "readme.txt", + "data.csv", + "photo.png", + "notes.md", + ], +) def test_universal_files_are_not_skipped(filename, mocker): for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"): mocker.patch("app.config.config.ETL_SERVICE", service) @@ -69,14 +85,17 @@ def test_universal_files_are_not_skipped(filename, mocker): assert ext is None -@pytest.mark.parametrize("filename,service,expected_skip", [ - ("macro.docm", "DOCLING", True), - ("macro.docm", "LLAMACLOUD", False), - ("mail.eml", "DOCLING", True), - ("mail.eml", "UNSTRUCTURED", False), - ("photo.heic", "UNSTRUCTURED", False), - ("photo.heic", "DOCLING", True), -]) +@pytest.mark.parametrize( + "filename,service,expected_skip", + [ + ("macro.docm", "DOCLING", True), + ("macro.docm", "LLAMACLOUD", False), + ("mail.eml", "DOCLING", True), + ("mail.eml", "UNSTRUCTURED", False), + ("photo.heic", "UNSTRUCTURED", False), + ("photo.heic", "DOCLING", True), + ], +) def test_parser_specific_extensions(filename, service, expected_skip, mocker): mocker.patch("app.config.config.ETL_SERVICE", service) item = {"name": filename, "file": {"mimeType": "application/octet-stream"}} diff --git a/surfsense_backend/tests/unit/etl_pipeline/conftest.py b/surfsense_backend/tests/unit/etl_pipeline/conftest.py index 6059caa01..082ab9771 100644 --- a/surfsense_backend/tests/unit/etl_pipeline/conftest.py +++ b/surfsense_backend/tests/unit/etl_pipeline/conftest.py @@ -24,6 +24,4 @@ def _stub_package(dotted: str, fs_dir: Path) -> None: _stub_package("app", _BACKEND / "app") _stub_package("app.etl_pipeline", _BACKEND / "app" / "etl_pipeline") -_stub_package( - "app.etl_pipeline.parsers", _BACKEND / "app" / "etl_pipeline" / "parsers" -) +_stub_package("app.etl_pipeline.parsers", _BACKEND / "app" / "etl_pipeline" / "parsers") diff --git a/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py b/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py index e90847e3a..769b1dc53 100644 --- a/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py +++ b/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py @@ -144,7 +144,7 @@ async def test_extract_mp3_returns_transcription(tmp_path, mocker): # --------------------------------------------------------------------------- -# Slice 7 – DOCLING document parsing +# Slice 7 - DOCLING document parsing # --------------------------------------------------------------------------- @@ -172,7 +172,7 @@ async def test_extract_pdf_with_docling(tmp_path, mocker): # --------------------------------------------------------------------------- -# Slice 8 – UNSTRUCTURED document parsing +# Slice 8 - UNSTRUCTURED document parsing # --------------------------------------------------------------------------- @@ -208,7 +208,7 @@ async def test_extract_pdf_with_unstructured(tmp_path, mocker): # --------------------------------------------------------------------------- -# Slice 9 – LLAMACLOUD document parsing +# Slice 9 - LLAMACLOUD document parsing # --------------------------------------------------------------------------- @@ -241,9 +241,7 @@ async def test_extract_pdf_with_llamacloud(tmp_path, mocker): ) result = await EtlPipelineService().extract( - EtlRequest( - file_path=str(pdf_file), filename="report.pdf", estimated_pages=5 - ) + EtlRequest(file_path=str(pdf_file), filename="report.pdf", estimated_pages=5) ) assert result.markdown_content == "# LlamaCloud parsed" @@ -252,7 +250,7 @@ async def test_extract_pdf_with_llamacloud(tmp_path, mocker): # --------------------------------------------------------------------------- -# Slice 10 – unknown extension falls through to document ETL +# Slice 10 - unknown extension falls through to document ETL # --------------------------------------------------------------------------- @@ -279,18 +277,18 @@ async def test_unknown_extension_uses_document_etl(tmp_path, mocker): # --------------------------------------------------------------------------- -# Slice 11 – EtlRequest validation +# Slice 11 - EtlRequest validation # --------------------------------------------------------------------------- def test_etl_request_requires_filename(): """EtlRequest rejects missing filename.""" - with pytest.raises(Exception): + with pytest.raises(ValueError, match="filename must not be empty"): EtlRequest(file_path="/tmp/some.txt", filename="") # --------------------------------------------------------------------------- -# Slice 12 – unknown ETL_SERVICE raises EtlServiceUnavailableError +# Slice 12 - unknown ETL_SERVICE raises EtlServiceUnavailableError # --------------------------------------------------------------------------- @@ -310,7 +308,7 @@ async def test_unknown_etl_service_raises(tmp_path, mocker): # --------------------------------------------------------------------------- -# Slice 13 – unsupported file types are rejected before reaching any parser +# Slice 13 - unsupported file types are rejected before reaching any parser # --------------------------------------------------------------------------- @@ -321,10 +319,19 @@ def test_unknown_extension_classified_as_unsupported(): assert classify_file("random.xyz") == FileCategory.UNSUPPORTED -@pytest.mark.parametrize("filename", [ - "malware.exe", "archive.zip", "video.mov", "font.woff2", - "model.blend", "data.parquet", "package.deb", "firmware.bin", -]) +@pytest.mark.parametrize( + "filename", + [ + "malware.exe", + "archive.zip", + "video.mov", + "font.woff2", + "model.blend", + "data.parquet", + "package.deb", + "firmware.bin", + ], +) def test_unsupported_extensions_classified_correctly(filename): """Extensions not in any allowlist are classified as UNSUPPORTED.""" from app.etl_pipeline.file_classifier import FileCategory, classify_file @@ -332,18 +339,21 @@ def test_unsupported_extensions_classified_correctly(filename): assert classify_file(filename) == FileCategory.UNSUPPORTED -@pytest.mark.parametrize("filename,expected", [ - ("report.pdf", "document"), - ("doc.docx", "document"), - ("slides.pptx", "document"), - ("sheet.xlsx", "document"), - ("photo.png", "document"), - ("photo.jpg", "document"), - ("book.epub", "document"), - ("letter.odt", "document"), - ("readme.md", "plaintext"), - ("data.csv", "direct_convert"), -]) +@pytest.mark.parametrize( + "filename,expected", + [ + ("report.pdf", "document"), + ("doc.docx", "document"), + ("slides.pptx", "document"), + ("sheet.xlsx", "document"), + ("photo.png", "document"), + ("photo.jpg", "document"), + ("book.epub", "document"), + ("letter.odt", "document"), + ("readme.md", "plaintext"), + ("data.csv", "direct_convert"), + ], +) def test_parseable_extensions_classified_correctly(filename, expected): """Parseable files are classified into their correct category.""" from app.etl_pipeline.file_classifier import FileCategory, classify_file @@ -380,31 +390,34 @@ async def test_extract_zip_raises_unsupported_error(tmp_path): # --------------------------------------------------------------------------- -# Slice 14 – should_skip_for_service (per-parser document filtering) +# Slice 14 - should_skip_for_service (per-parser document filtering) # --------------------------------------------------------------------------- -@pytest.mark.parametrize("filename,etl_service,expected_skip", [ - ("file.eml", "DOCLING", True), - ("file.eml", "UNSTRUCTURED", False), - ("file.docm", "LLAMACLOUD", False), - ("file.docm", "DOCLING", True), - ("file.txt", "DOCLING", False), - ("file.csv", "LLAMACLOUD", False), - ("file.mp3", "UNSTRUCTURED", False), - ("file.exe", "LLAMACLOUD", True), - ("file.pdf", "DOCLING", False), - ("file.webp", "DOCLING", False), - ("file.webp", "UNSTRUCTURED", True), - ("file.gif", "LLAMACLOUD", False), - ("file.gif", "DOCLING", True), - ("file.heic", "UNSTRUCTURED", False), - ("file.heic", "DOCLING", True), - ("file.svg", "LLAMACLOUD", False), - ("file.svg", "DOCLING", True), - ("file.p7s", "UNSTRUCTURED", False), - ("file.p7s", "LLAMACLOUD", True), -]) +@pytest.mark.parametrize( + "filename,etl_service,expected_skip", + [ + ("file.eml", "DOCLING", True), + ("file.eml", "UNSTRUCTURED", False), + ("file.docm", "LLAMACLOUD", False), + ("file.docm", "DOCLING", True), + ("file.txt", "DOCLING", False), + ("file.csv", "LLAMACLOUD", False), + ("file.mp3", "UNSTRUCTURED", False), + ("file.exe", "LLAMACLOUD", True), + ("file.pdf", "DOCLING", False), + ("file.webp", "DOCLING", False), + ("file.webp", "UNSTRUCTURED", True), + ("file.gif", "LLAMACLOUD", False), + ("file.gif", "DOCLING", True), + ("file.heic", "UNSTRUCTURED", False), + ("file.heic", "DOCLING", True), + ("file.svg", "LLAMACLOUD", False), + ("file.svg", "DOCLING", True), + ("file.p7s", "UNSTRUCTURED", False), + ("file.p7s", "LLAMACLOUD", True), + ], +) def test_should_skip_for_service(filename, etl_service, expected_skip): from app.etl_pipeline.file_classifier import should_skip_for_service @@ -414,7 +427,7 @@ def test_should_skip_for_service(filename, etl_service, expected_skip): # --------------------------------------------------------------------------- -# Slice 14b – ETL pipeline rejects per-parser incompatible documents +# Slice 14b - ETL pipeline rejects per-parser incompatible documents # --------------------------------------------------------------------------- diff --git a/surfsense_backend/tests/unit/services/test_docling_image_support.py b/surfsense_backend/tests/unit/services/test_docling_image_support.py index 430adbaf2..11ffc0ed1 100644 --- a/surfsense_backend/tests/unit/services/test_docling_image_support.py +++ b/surfsense_backend/tests/unit/services/test_docling_image_support.py @@ -30,26 +30,29 @@ def test_docling_service_does_not_restrict_allowed_formats(): fake_pdf_format_option_cls = MagicMock() - with patch.dict("sys.modules", { - "docling": MagicMock(), - "docling.backend": MagicMock(), - "docling.backend.pypdfium2_backend": MagicMock( - PyPdfiumDocumentBackend=mock_backend - ), - "docling.datamodel": MagicMock(), - "docling.datamodel.base_models": MagicMock( - InputFormat=_FakeInputFormat - ), - "docling.datamodel.pipeline_options": MagicMock( - PdfPipelineOptions=fake_pipeline_options_cls - ), - "docling.document_converter": MagicMock( - DocumentConverter=mock_converter_cls, - PdfFormatOption=fake_pdf_format_option_cls, - ), - }): - import app.services.docling_service as mod + with patch.dict( + "sys.modules", + { + "docling": MagicMock(), + "docling.backend": MagicMock(), + "docling.backend.pypdfium2_backend": MagicMock( + PyPdfiumDocumentBackend=mock_backend + ), + "docling.datamodel": MagicMock(), + "docling.datamodel.base_models": MagicMock(InputFormat=_FakeInputFormat), + "docling.datamodel.pipeline_options": MagicMock( + PdfPipelineOptions=fake_pipeline_options_cls + ), + "docling.document_converter": MagicMock( + DocumentConverter=mock_converter_cls, + PdfFormatOption=fake_pdf_format_option_cls, + ), + }, + ): from importlib import reload + + import app.services.docling_service as mod + reload(mod) mod.DoclingService() diff --git a/surfsense_backend/tests/unit/utils/test_file_extensions.py b/surfsense_backend/tests/unit/utils/test_file_extensions.py index acd8945ce..c33b39f05 100644 --- a/surfsense_backend/tests/unit/utils/test_file_extensions.py +++ b/surfsense_backend/tests/unit/utils/test_file_extensions.py @@ -17,36 +17,74 @@ def test_exe_is_not_supported_document(): assert is_supported_document_extension("malware.exe") is False -@pytest.mark.parametrize("filename", [ - "report.pdf", "doc.docx", "old.doc", - "sheet.xlsx", "legacy.xls", - "slides.pptx", "deck.ppt", - "macro.docm", "macro.xlsm", "macro.pptm", - "photo.png", "photo.jpg", "photo.jpeg", "scan.bmp", "scan.tiff", "scan.tif", - "photo.webp", "anim.gif", "iphone.heic", - "manual.rtf", "book.epub", - "letter.odt", "data.ods", "presentation.odp", - "inbox.eml", "outlook.msg", - "korean.hwpx", "korean.hwp", - "template.dot", "template.dotm", - "template.pot", "template.potx", - "binary.xlsb", "workspace.xlw", - "vector.svg", "signature.p7s", -]) +@pytest.mark.parametrize( + "filename", + [ + "report.pdf", + "doc.docx", + "old.doc", + "sheet.xlsx", + "legacy.xls", + "slides.pptx", + "deck.ppt", + "macro.docm", + "macro.xlsm", + "macro.pptm", + "photo.png", + "photo.jpg", + "photo.jpeg", + "scan.bmp", + "scan.tiff", + "scan.tif", + "photo.webp", + "anim.gif", + "iphone.heic", + "manual.rtf", + "book.epub", + "letter.odt", + "data.ods", + "presentation.odp", + "inbox.eml", + "outlook.msg", + "korean.hwpx", + "korean.hwp", + "template.dot", + "template.dotm", + "template.pot", + "template.potx", + "binary.xlsb", + "workspace.xlw", + "vector.svg", + "signature.p7s", + ], +) def test_document_extensions_are_supported(filename): from app.utils.file_extensions import is_supported_document_extension - assert is_supported_document_extension(filename) is True, f"{filename} should be supported" + assert is_supported_document_extension(filename) is True, ( + f"{filename} should be supported" + ) -@pytest.mark.parametrize("filename", [ - "malware.exe", "archive.zip", "video.mov", "font.woff2", - "model.blend", "random.xyz", "data.parquet", "package.deb", -]) +@pytest.mark.parametrize( + "filename", + [ + "malware.exe", + "archive.zip", + "video.mov", + "font.woff2", + "model.blend", + "random.xyz", + "data.parquet", + "package.deb", + ], +) def test_non_document_extensions_are_not_supported(filename): from app.utils.file_extensions import is_supported_document_extension - assert is_supported_document_extension(filename) is False, f"{filename} should NOT be supported" + assert is_supported_document_extension(filename) is False, ( + f"{filename} should NOT be supported" + ) # --------------------------------------------------------------------------- @@ -67,7 +105,7 @@ def test_union_equals_all_three_sets(): | LLAMAPARSE_DOCUMENT_EXTENSIONS | UNSTRUCTURED_DOCUMENT_EXTENSIONS ) - assert DOCUMENT_EXTENSIONS == expected + assert expected == DOCUMENT_EXTENSIONS def test_get_extensions_for_docling(): diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/DesktopContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/DesktopContent.tsx index 1522e153f..957ae9dae 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/DesktopContent.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/DesktopContent.tsx @@ -3,8 +3,8 @@ import { useEffect, useState } from "react"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; import { Label } from "@/components/ui/label"; -import { Switch } from "@/components/ui/switch"; import { Spinner } from "@/components/ui/spinner"; +import { Switch } from "@/components/ui/switch"; export function DesktopContent() { const [isElectron, setIsElectron] = useState(false); @@ -66,11 +66,7 @@ export function DesktopContent() { Show suggestions while typing in other applications.

- + diff --git a/surfsense_web/app/desktop/permissions/page.tsx b/surfsense_web/app/desktop/permissions/page.tsx index 6c08e35b5..37cfe826f 100644 --- a/surfsense_web/app/desktop/permissions/page.tsx +++ b/surfsense_web/app/desktop/permissions/page.tsx @@ -1,7 +1,7 @@ "use client"; -import { useEffect, useState } from "react"; import { useRouter } from "next/navigation"; +import { useEffect, useState } from "react"; import { Logo } from "@/components/Logo"; import { Button } from "@/components/ui/button"; import { Spinner } from "@/components/ui/spinner"; @@ -17,7 +17,8 @@ const STEPS = [ { id: "screen-recording", title: "Screen Recording", - description: "Lets SurfSense capture your screen to understand context and provide smart writing suggestions.", + description: + "Lets SurfSense capture your screen to understand context and provide smart writing suggestions.", action: "requestScreenRecording", field: "screenRecording" as const, }, @@ -79,7 +80,9 @@ export default function DesktopPermissionsPage() { poll(); interval = setInterval(poll, 2000); - return () => { if (interval) clearInterval(interval); }; + return () => { + if (interval) clearInterval(interval); + }; }, []); if (!isElectron) { @@ -98,7 +101,8 @@ export default function DesktopPermissionsPage() { ); } - const allGranted = permissions.accessibility === "authorized" && permissions.screenRecording === "authorized"; + const allGranted = + permissions.accessibility === "authorized" && permissions.screenRecording === "authorized"; const handleRequest = async (action: string) => { if (action === "requestScreenRecording") { @@ -175,7 +179,8 @@ export default function DesktopPermissionsPage() {

)}

- If SurfSense doesn't appear in the list, click + and select it from Applications. + If SurfSense doesn't appear in the list, click + and + select it from Applications.

)} diff --git a/surfsense_web/app/desktop/suggestion/layout.tsx b/surfsense_web/app/desktop/suggestion/layout.tsx index 36b7e037b..fd8faf099 100644 --- a/surfsense_web/app/desktop/suggestion/layout.tsx +++ b/surfsense_web/app/desktop/suggestion/layout.tsx @@ -4,10 +4,6 @@ export const metadata = { title: "SurfSense Suggestion", }; -export default function SuggestionLayout({ - children, -}: { - children: React.ReactNode; -}) { +export default function SuggestionLayout({ children }: { children: React.ReactNode }) { return
{children}
; } diff --git a/surfsense_web/app/desktop/suggestion/page.tsx b/surfsense_web/app/desktop/suggestion/page.tsx index 03944867f..6ade64883 100644 --- a/surfsense_web/app/desktop/suggestion/page.tsx +++ b/surfsense_web/app/desktop/suggestion/page.tsx @@ -72,27 +72,23 @@ export default function SuggestionPage() { return; } - const backendUrl = - process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; + const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; try { - const response = await fetch( - `${backendUrl}/api/v1/autocomplete/vision/stream`, - { - method: "POST", - headers: { - Authorization: `Bearer ${token}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - screenshot, - search_space_id: parseInt(searchSpaceId, 10), - app_name: appName || "", - window_title: windowTitle || "", - }), - signal: controller.signal, + const response = await fetch(`${backendUrl}/api/v1/autocomplete/vision/stream`, { + method: "POST", + headers: { + Authorization: `Bearer ${token}`, + "Content-Type": "application/json", }, - ); + body: JSON.stringify({ + screenshot, + search_space_id: parseInt(searchSpaceId, 10), + app_name: appName || "", + window_title: windowTitle || "", + }), + signal: controller.signal, + }); if (!response.ok) { setError(friendlyError(response.status)); @@ -132,9 +128,7 @@ export default function SuggestionPage() { } else if (parsed.type === "error") { setError(friendlyError(parsed.errorText)); } - } catch { - continue; - } + } catch {} } } } @@ -145,7 +139,7 @@ export default function SuggestionPage() { setIsLoading(false); } }, - [], + [] ); useEffect(() => { @@ -207,10 +201,18 @@ export default function SuggestionPage() {

{suggestion}

- -
diff --git a/surfsense_web/app/desktop/suggestion/suggestion.css b/surfsense_web/app/desktop/suggestion/suggestion.css index 62f4d2ea7..ef578059c 100644 --- a/surfsense_web/app/desktop/suggestion/suggestion.css +++ b/surfsense_web/app/desktop/suggestion/suggestion.css @@ -1,121 +1,125 @@ html:has(.suggestion-body), body:has(.suggestion-body) { - margin: 0 !important; - padding: 0 !important; - background: transparent !important; - overflow: hidden !important; - height: auto !important; - width: 100% !important; + margin: 0 !important; + padding: 0 !important; + background: transparent !important; + overflow: hidden !important; + height: auto !important; + width: 100% !important; } .suggestion-body { - margin: 0; - padding: 0; - background: transparent; - font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; - -webkit-font-smoothing: antialiased; - user-select: none; - -webkit-app-region: no-drag; + margin: 0; + padding: 0; + background: transparent; + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + -webkit-font-smoothing: antialiased; + user-select: none; + -webkit-app-region: no-drag; } .suggestion-tooltip { - background: #1e1e1e; - border: 1px solid #3c3c3c; - border-radius: 8px; - padding: 8px 12px; - margin: 4px; - max-width: 400px; - box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5); + background: #1e1e1e; + border: 1px solid #3c3c3c; + border-radius: 8px; + padding: 8px 12px; + margin: 4px; + max-width: 400px; + box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5); } .suggestion-text { - color: #d4d4d4; - font-size: 13px; - line-height: 1.45; - margin: 0 0 6px 0; - word-wrap: break-word; - white-space: pre-wrap; + color: #d4d4d4; + font-size: 13px; + line-height: 1.45; + margin: 0 0 6px 0; + word-wrap: break-word; + white-space: pre-wrap; } .suggestion-actions { - display: flex; - justify-content: flex-end; - gap: 4px; - border-top: 1px solid #2a2a2a; - padding-top: 6px; + display: flex; + justify-content: flex-end; + gap: 4px; + border-top: 1px solid #2a2a2a; + padding-top: 6px; } .suggestion-btn { - padding: 2px 8px; - border-radius: 3px; - border: 1px solid #3c3c3c; - font-family: inherit; - font-size: 10px; - font-weight: 500; - cursor: pointer; - line-height: 16px; - transition: background 0.15s, border-color 0.15s; + padding: 2px 8px; + border-radius: 3px; + border: 1px solid #3c3c3c; + font-family: inherit; + font-size: 10px; + font-weight: 500; + cursor: pointer; + line-height: 16px; + transition: + background 0.15s, + border-color 0.15s; } .suggestion-btn-accept { - background: #2563eb; - border-color: #3b82f6; - color: #fff; + background: #2563eb; + border-color: #3b82f6; + color: #fff; } .suggestion-btn-accept:hover { - background: #1d4ed8; + background: #1d4ed8; } .suggestion-btn-dismiss { - background: #2a2a2a; - color: #999; + background: #2a2a2a; + color: #999; } .suggestion-btn-dismiss:hover { - background: #333; - color: #ccc; + background: #333; + color: #ccc; } .suggestion-error { - border-color: #5c2626; + border-color: #5c2626; } .suggestion-error-text { - color: #f48771; - font-size: 12px; + color: #f48771; + font-size: 12px; } .suggestion-loading { - display: flex; - gap: 5px; - padding: 2px 0; - justify-content: center; + display: flex; + gap: 5px; + padding: 2px 0; + justify-content: center; } .suggestion-dot { - width: 4px; - height: 4px; - border-radius: 50%; - background: #666; - animation: suggestion-pulse 1.2s infinite ease-in-out; + width: 4px; + height: 4px; + border-radius: 50%; + background: #666; + animation: suggestion-pulse 1.2s infinite ease-in-out; } .suggestion-dot:nth-child(2) { - animation-delay: 0.15s; + animation-delay: 0.15s; } .suggestion-dot:nth-child(3) { - animation-delay: 0.3s; + animation-delay: 0.3s; } @keyframes suggestion-pulse { - 0%, 80%, 100% { - opacity: 0.3; - transform: scale(0.8); - } - 40% { - opacity: 1; - transform: scale(1.1); - } + 0%, + 80%, + 100% { + opacity: 0.3; + transform: scale(0.8); + } + 40% { + opacity: 1; + transform: scale(1.1); + } } diff --git a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx index 5e2b8452b..b4c049c5c 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx @@ -173,9 +173,7 @@ export const ConnectorAccountsListView: FC = ({ )}
- - {buttonText} - + {buttonText} diff --git a/surfsense_web/components/assistant-ui/connector-popup/views/youtube-crawler-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/views/youtube-crawler-view.tsx index c16072bca..8982b16a8 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/views/youtube-crawler-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/views/youtube-crawler-view.tsx @@ -337,9 +337,7 @@ export const YouTubeCrawlerView: FC = ({ searchSpaceId, disabled={isSubmitting || isFetchingPlaylist || videoTags.length === 0} className="relative text-xs sm:text-sm min-w-[140px] disabled:opacity-50 disabled:cursor-not-allowed disabled:pointer-events-none" > - - {t("submit")} - + {t("submit")} {isSubmitting && } diff --git a/surfsense_web/components/assistant-ui/document-upload-popup.tsx b/surfsense_web/components/assistant-ui/document-upload-popup.tsx index 7b0409345..0b38979a5 100644 --- a/surfsense_web/components/assistant-ui/document-upload-popup.tsx +++ b/surfsense_web/components/assistant-ui/document-upload-popup.tsx @@ -132,9 +132,7 @@ const DocumentUploadPopupContent: FC<{
-

- Upload Documents -

+

Upload Documents

Upload and sync your documents to your search space diff --git a/surfsense_web/components/assistant-ui/image.tsx b/surfsense_web/components/assistant-ui/image.tsx index c147eede4..59781abcf 100644 --- a/surfsense_web/components/assistant-ui/image.tsx +++ b/surfsense_web/components/assistant-ui/image.tsx @@ -3,10 +3,10 @@ import type { ImageMessagePartComponent } from "@assistant-ui/react"; import { cva, type VariantProps } from "class-variance-authority"; import { ImageIcon, ImageOffIcon } from "lucide-react"; +import NextImage from "next/image"; import { memo, type PropsWithChildren, useEffect, useRef, useState } from "react"; import { createPortal } from "react-dom"; import { cn } from "@/lib/utils"; -import NextImage from 'next/image'; const imageVariants = cva("aui-image-root relative overflow-hidden rounded-lg", { variants: { @@ -88,23 +88,23 @@ function ImagePreview({

) : isDataOrBlobUrl(src) ? ( - // biome-ignore lint/performance/noImgElement: data/blob URLs need plain img - {alt} { - if (typeof src === "string") setLoadedSrc(src); - onLoad?.(e); - }} - onError={(e) => { - if (typeof src === "string") setErrorSrc(src); - onError?.(e); - }} - {...props} - /> - ) : ( + // biome-ignore lint/performance/noImgElement: data/blob URLs need plain img + {alt} { + if (typeof src === "string") setLoadedSrc(src); + onLoad?.(e); + }} + onError={(e) => { + if (typeof src === "string") setErrorSrc(src); + onError?.(e); + }} + {...props} + /> + ) : ( // biome-ignore lint/performance/noImgElement: intentional for dynamic external URLs // { - if (typeof src === "string") setLoadedSrc(src); - onLoad?.(); - }} - onError={() => { - if (typeof src === "string") setErrorSrc(src); - onError?.(); - }} - unoptimized={false} - {...props} - /> + fill + src={src || ""} + alt={alt} + sizes="(max-width: 768px) 100vw, (max-width: 1200px) 80vw, 60vw" + className={cn("block object-contain", !loaded && "invisible", className)} + onLoad={() => { + if (typeof src === "string") setLoadedSrc(src); + onLoad?.(); + }} + onError={() => { + if (typeof src === "string") setErrorSrc(src); + onError?.(); + }} + unoptimized={false} + {...props} + /> )}
); @@ -162,8 +162,8 @@ type ImageZoomProps = PropsWithChildren<{ alt?: string; }>; function isDataOrBlobUrl(src: string | undefined): boolean { - if (!src || typeof src !== "string") return false; - return src.startsWith("data:") || src.startsWith("blob:"); + if (!src || typeof src !== "string") return false; + return src.startsWith("data:") || src.startsWith("blob:"); } function ImageZoom({ src, alt = "Image preview", children }: ImageZoomProps) { const [isMounted, setIsMounted] = useState(false); @@ -216,38 +216,38 @@ function ImageZoom({ src, alt = "Image preview", children }: ImageZoomProps) { > {/** biome-ignore lint/performance/noImgElement: */} {isDataOrBlobUrl(src) ? ( - // biome-ignore lint/performance/noImgElement: data/blob URLs need plain img - {alt} { - e.stopPropagation(); - handleClose(); - }} - onKeyDown={(e) => { - if (e.key === "Enter") { - e.stopPropagation(); - handleClose(); - } - }} - /> - ) : ( + // biome-ignore lint/performance/noImgElement: data/blob URLs need plain img + {alt} { + e.stopPropagation(); + handleClose(); + }} + onKeyDown={(e) => { + if (e.key === "Enter") { + e.stopPropagation(); + handleClose(); + } + }} + /> + ) : ( { - e.stopPropagation(); - handleClose(); - }} - unoptimized={false} - /> - )} + data-slot="image-zoom-content" + fill + src={src} + alt={alt} + sizes="90vw" + className="aui-image-zoom-content fade-in zoom-in-95 object-contain duration-200" + onClick={(e) => { + e.stopPropagation(); + handleClose(); + }} + unoptimized={false} + /> + )} , document.body )} diff --git a/surfsense_web/components/assistant-ui/thread-list.tsx b/surfsense_web/components/assistant-ui/thread-list.tsx index e8b8db6fe..bca36c037 100644 --- a/surfsense_web/components/assistant-ui/thread-list.tsx +++ b/surfsense_web/components/assistant-ui/thread-list.tsx @@ -241,9 +241,7 @@ const ThreadListItemComponent = memo(function ThreadListItemComponent({

{thread.title || "New Chat"}

-

- {relativeTime} -

+

{relativeTime}

diff --git a/surfsense_web/components/assistant-ui/tool-fallback.tsx b/surfsense_web/components/assistant-ui/tool-fallback.tsx index 40118d2e4..b658dba6d 100644 --- a/surfsense_web/components/assistant-ui/tool-fallback.tsx +++ b/surfsense_web/components/assistant-ui/tool-fallback.tsx @@ -26,7 +26,8 @@ export const ToolFallback: ToolCallMessagePartComponent = ({ ); const serializedResult = useMemo( - () => (result !== undefined && typeof result !== "string" ? JSON.stringify(result, null, 2) : null), + () => + result !== undefined && typeof result !== "string" ? JSON.stringify(result, null, 2) : null, [result] ); diff --git a/surfsense_web/components/chat-comments/comment-composer/comment-composer.tsx b/surfsense_web/components/chat-comments/comment-composer/comment-composer.tsx index e14022f5c..1c4383388 100644 --- a/surfsense_web/components/chat-comments/comment-composer/comment-composer.tsx +++ b/surfsense_web/components/chat-comments/comment-composer/comment-composer.tsx @@ -300,15 +300,15 @@ export function CommentComposer({
{onCancel && ( - + )}
diff --git a/surfsense_web/components/documents/DocumentNode.tsx b/surfsense_web/components/documents/DocumentNode.tsx index fe796b5be..d8e37df1c 100644 --- a/surfsense_web/components/documents/DocumentNode.tsx +++ b/surfsense_web/components/documents/DocumentNode.tsx @@ -207,9 +207,15 @@ export const DocumentNode = React.memo(function DocumentNode({ ); })()} - + - {doc.title} + + {doc.title} + {doc.title} @@ -276,10 +282,7 @@ export const DocumentNode = React.memo(function DocumentNode({ Versions )} - onDelete(doc)} - > + onDelete(doc)}> Delete @@ -321,10 +324,7 @@ export const DocumentNode = React.memo(function DocumentNode({ Versions )} - onDelete(doc)} - > + onDelete(doc)}> Delete diff --git a/surfsense_web/components/documents/FolderTreeView.tsx b/surfsense_web/components/documents/FolderTreeView.tsx index 371d00f42..47cd17596 100644 --- a/surfsense_web/components/documents/FolderTreeView.tsx +++ b/surfsense_web/components/documents/FolderTreeView.tsx @@ -97,7 +97,10 @@ export function FolderTreeView({ const handleCancelRename = useCallback(() => setRenamingFolderId(null), [setRenamingFolderId]); const effectiveActiveTypes = useMemo(() => { - if (activeTypes.includes("FILE" as DocumentTypeEnum) && !activeTypes.includes("LOCAL_FOLDER_FILE" as DocumentTypeEnum)) { + if ( + activeTypes.includes("FILE" as DocumentTypeEnum) && + !activeTypes.includes("LOCAL_FOLDER_FILE" as DocumentTypeEnum) + ) { return [...activeTypes, "LOCAL_FOLDER_FILE" as DocumentTypeEnum]; } return activeTypes; @@ -110,7 +113,9 @@ export function FolderTreeView({ function check(folderId: number): boolean { if (match[folderId] !== undefined) return match[folderId]; const childDocs = (docsByFolder[folderId] ?? []).some( - (d) => effectiveActiveTypes.length === 0 || effectiveActiveTypes.includes(d.document_type as DocumentTypeEnum) + (d) => + effectiveActiveTypes.length === 0 || + effectiveActiveTypes.includes(d.document_type as DocumentTypeEnum) ); if (childDocs) { match[folderId] = true; @@ -201,7 +206,9 @@ export function FolderTreeView({ ? childFolders.filter((f) => hasDescendantMatch[f.id]) : childFolders; const childDocs = (docsByFolder[key] ?? []).filter( - (d) => effectiveActiveTypes.length === 0 || effectiveActiveTypes.includes(d.document_type as DocumentTypeEnum) + (d) => + effectiveActiveTypes.length === 0 || + effectiveActiveTypes.includes(d.document_type as DocumentTypeEnum) ); const nodes: React.ReactNode[] = []; @@ -223,7 +230,7 @@ export function FolderTreeView({ depth={depth} isExpanded={isExpanded} isRenaming={renamingFolderId === f.id} - selectionState={folderSelectionStates[f.id] ?? "none"} + selectionState={folderSelectionStates[f.id] ?? "none"} processingState={folderProcessingStates[f.id] ?? "idle"} onToggleSelect={onToggleFolderSelect} onToggleExpand={onToggleExpand} diff --git a/surfsense_web/components/editor/plate-editor.tsx b/surfsense_web/components/editor/plate-editor.tsx index ed239ffa3..66e9a0e5e 100644 --- a/surfsense_web/components/editor/plate-editor.tsx +++ b/surfsense_web/components/editor/plate-editor.tsx @@ -158,17 +158,18 @@ export function PlateEditor({ // When not forced read-only, the user can toggle between editing/viewing. const canToggleMode = !readOnly; - const contextProviderValue = useMemo(()=> ({ - onSave, - hasUnsavedChanges, - isSaving, - canToggleMode, - }), [onSave, hasUnsavedChanges, isSaving, canToggleMode]); + const contextProviderValue = useMemo( + () => ({ + onSave, + hasUnsavedChanges, + isSaving, + canToggleMode, + }), + [onSave, hasUnsavedChanges, isSaving, canToggleMode] + ); return ( - +
- {title} + {title}
diff --git a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx index 74c3c64de..7d4cd9901 100644 --- a/surfsense_web/components/layout/providers/LayoutDataProvider.tsx +++ b/surfsense_web/components/layout/providers/LayoutDataProvider.tsx @@ -370,7 +370,8 @@ export function LayoutDataProvider({ searchSpaceId, children }: LayoutDataProvid url: "#announcements", icon: Megaphone, isActive: isAnnouncementsSidebarOpen, - badge: announcementUnreadCount > 0 ? formatInboxCount(announcementUnreadCount) : undefined, + badge: + announcementUnreadCount > 0 ? formatInboxCount(announcementUnreadCount) : undefined, }, ] as (NavItem | null)[] ).filter((item): item is NavItem => item !== null), diff --git a/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx index 0e2163dd4..3459fccf6 100644 --- a/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/AllPrivateChatsSidebar.tsx @@ -376,24 +376,24 @@ export function AllPrivateChatsSidebarContent({ {thread.title || "New Chat"} ) : ( - - - - - -

- {t("updated") || "Updated"}:{" "} - {format(new Date(thread.updatedAt), "MMM d, yyyy 'at' h:mm a")} -

-
-
+ + + + + +

+ {t("updated") || "Updated"}:{" "} + {format(new Date(thread.updatedAt), "MMM d, yyyy 'at' h:mm a")} +

+
+
)} {thread.title || "New Chat"} ) : ( - - - - - -

- {t("updated") || "Updated"}:{" "} - {format(new Date(thread.updatedAt), "MMM d, yyyy 'at' h:mm a")} -

-
-
+ + + + + +

+ {t("updated") || "Updated"}:{" "} + {format(new Date(thread.updatedAt), "MMM d, yyyy 'at' h:mm a")} +

+
+
)}
, img: ({ src, alt, width: _w, height: _h, ...props }) => { - const isDataOrUnknownUrl = typeof src === "string" && (src.startsWith("data:") || !src.startsWith("http")); + const isDataOrUnknownUrl = + typeof src === "string" && (src.startsWith("data:") || !src.startsWith("http")); - return isDataOrUnknownUrl ? ( - // eslint-disable-next-line @next/next/no-img-element - {alt - ) : ( - {alt - ); -}, + return isDataOrUnknownUrl ? ( + // eslint-disable-next-line @next/next/no-img-element + {alt + ) : ( + {alt + ); + }, table: ({ ...props }) => (
diff --git a/surfsense_web/components/settings/user-settings-dialog.tsx b/surfsense_web/components/settings/user-settings-dialog.tsx index b74ff973b..0afdfb2b7 100644 --- a/surfsense_web/components/settings/user-settings-dialog.tsx +++ b/surfsense_web/components/settings/user-settings-dialog.tsx @@ -5,10 +5,10 @@ import { Globe, KeyRound, Monitor, Receipt, Sparkles, User } from "lucide-react" import { useTranslations } from "next-intl"; import { ApiKeyContent } from "@/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent"; import { CommunityPromptsContent } from "@/app/dashboard/[search_space_id]/user-settings/components/CommunityPromptsContent"; +import { DesktopContent } from "@/app/dashboard/[search_space_id]/user-settings/components/DesktopContent"; import { ProfileContent } from "@/app/dashboard/[search_space_id]/user-settings/components/ProfileContent"; import { PromptsContent } from "@/app/dashboard/[search_space_id]/user-settings/components/PromptsContent"; import { PurchaseHistoryContent } from "@/app/dashboard/[search_space_id]/user-settings/components/PurchaseHistoryContent"; -import { DesktopContent } from "@/app/dashboard/[search_space_id]/user-settings/components/DesktopContent"; import { userSettingsDialogAtom } from "@/atoms/settings/settings-dialog.atoms"; import { SettingsDialog } from "@/components/settings/settings-dialog"; diff --git a/surfsense_web/components/sources/DocumentUploadTab.tsx b/surfsense_web/components/sources/DocumentUploadTab.tsx index c8ce195aa..36a24e299 100644 --- a/surfsense_web/components/sources/DocumentUploadTab.tsx +++ b/surfsense_web/components/sources/DocumentUploadTab.tsx @@ -471,13 +471,13 @@ export function DocumentUploadTab({ )) ) : ( - )} @@ -684,17 +689,17 @@ export function DocumentUploadTab({ -
- {supportedExtensions.map((ext) => ( - - {ext} - - ))} -
+
+ {supportedExtensions.map((ext) => ( + + {ext} + + ))} +
diff --git a/surfsense_web/components/tool-ui/citation/citation-list.tsx b/surfsense_web/components/tool-ui/citation/citation-list.tsx index 75b02bf3d..bbe869a09 100644 --- a/surfsense_web/components/tool-ui/citation/citation-list.tsx +++ b/surfsense_web/components/tool-ui/citation/citation-list.tsx @@ -2,13 +2,12 @@ import type { LucideIcon } from "lucide-react"; import { Code2, Database, ExternalLink, File, FileText, Globe, Newspaper } from "lucide-react"; +import NextImage from "next/image"; import * as React from "react"; import { openSafeNavigationHref, resolveSafeNavigationHref } from "../shared/media"; import { cn, Popover, PopoverContent, PopoverTrigger } from "./_adapter"; import { Citation } from "./citation"; import type { CitationType, CitationVariant, SerializableCitation } from "./schema"; -import NextImage from 'next/image'; - const TYPE_ICONS: Record = { webpage: Globe, @@ -264,9 +263,9 @@ function OverflowItem({ citation, onClick }: OverflowItemProps) { className="size-4.5 rounded-full object-cover" unoptimized={true} /> - ) : ( + ) : (