chore: ran backend and frontend linting

2026-04-27 09:46:25 +02:00 · 2026-01-17 16:30:07 +05:30 · 2026-01-17 16:30:07 +05:30 · e0be1b9133
commit e0be1b9133
parent f538d59ca3
8 changed files with 227 additions and 126 deletions
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@ -57,15 +57,15 @@ def get_google_drive_unique_identifier(
 ) -> tuple[str, str | None]:
    """
    Get unique identifier hash for a file, with special handling for Google Drive.
-    
+
    For Google Drive files, uses file_id as the unique identifier (doesn't change on rename).
    For other files, uses filename.
-    
+
    Args:
        connector: Optional connector info dict with type and metadata
        filename: The filename (used for non-Google Drive files or as fallback)
        search_space_id: The search space ID
-    
+
    Returns:
        Tuple of (primary_hash, legacy_hash or None)
        - For Google Drive: (file_id_based_hash, filename_based_hash for migration)
@ -74,7 +74,7 @@ def get_google_drive_unique_identifier(
    if connector and connector.get("type") == DocumentType.GOOGLE_DRIVE_FILE:
        metadata = connector.get("metadata", {})
        file_id = metadata.get("google_drive_file_id")
-        
+
        if file_id:
            # New method: use file_id as unique identifier (doesn't change on rename)
            primary_hash = generate_unique_identifier_hash(
@ -86,7 +86,7 @@ def get_google_drive_unique_identifier(
                DocumentType.GOOGLE_DRIVE_FILE, filename, search_space_id
            )
            return primary_hash, legacy_hash
-    
+
    # For non-Google Drive files, use filename as before
    primary_hash = generate_unique_identifier_hash(
        DocumentType.FILE, filename, search_space_id
@ -104,7 +104,7 @@ async def handle_existing_document_update(
 ) -> tuple[bool, Document | None]:
    """
    Handle update logic for an existing document.
-    
+
    Args:
        session: Database session
        existing_document: The existing document found in database
@ -112,7 +112,7 @@ async def handle_existing_document_update(
        connector: Optional connector info
        filename: Current filename
        primary_hash: The primary hash (file_id based for Google Drive)
-    
+
    Returns:
        Tuple of (should_skip_processing, document_to_return)
        - (True, document): Content unchanged, just return existing document
@ -122,7 +122,7 @@ async def handle_existing_document_update(
    if existing_document.unique_identifier_hash != primary_hash:
        existing_document.unique_identifier_hash = primary_hash
        logging.info(f"Migrated document to file_id-based identifier: {filename}")
-    
+
    # Check if content has changed
    if existing_document.content_hash == content_hash:
        # Content unchanged - check if we need to update metadata (e.g., filename changed)
@ -131,12 +131,14 @@ async def handle_existing_document_update(
            new_name = connector_metadata.get("google_drive_file_name")
            # Check both possible keys for old name (FILE_NAME is used in stored documents)
            doc_metadata = existing_document.document_metadata or {}
-            old_name = doc_metadata.get("FILE_NAME") or doc_metadata.get("google_drive_file_name")
-            
+            old_name = doc_metadata.get("FILE_NAME") or doc_metadata.get(
+                "google_drive_file_name"
+            )
+
            if new_name and old_name and old_name != new_name:
                # File was renamed - update title and metadata, skip expensive processing
                from sqlalchemy.orm.attributes import flag_modified
-                
+
                existing_document.title = new_name
                if not existing_document.document_metadata:
                    existing_document.document_metadata = {}
@ -144,8 +146,10 @@ async def handle_existing_document_update(
                existing_document.document_metadata["google_drive_file_name"] = new_name
                flag_modified(existing_document, "document_metadata")
                await session.commit()
-                logging.info(f"File renamed in Google Drive: '{old_name}' → '{new_name}' (no re-processing needed)")
-        
+                logging.info(
+                    f"File renamed in Google Drive: '{old_name}' → '{new_name}' (no re-processing needed)"
+                )
+
        logging.info(f"Document for file {filename} unchanged. Skipping.")
        return True, existing_document
    else:
@ -163,25 +167,29 @@ async def find_existing_document_with_migration(
    """
    Find existing document, checking both new hash and legacy hash for migration,
    with fallback to content_hash for cross-source deduplication.
-    
+
    Args:
        session: Database session
        primary_hash: The primary hash (file_id based for Google Drive)
        legacy_hash: The legacy hash (filename based) for migration, or None
        content_hash: The content hash for fallback deduplication, or None
-    
+
    Returns:
        Existing document if found, None otherwise
    """
    # First check with primary hash (new method)
    existing_document = await check_document_by_unique_identifier(session, primary_hash)
-    
+
    # If not found and we have a legacy hash, check with that (migration path)
    if not existing_document and legacy_hash:
-        existing_document = await check_document_by_unique_identifier(session, legacy_hash)
+        existing_document = await check_document_by_unique_identifier(
+            session, legacy_hash
+        )
        if existing_document:
-            logging.info("Found legacy document (filename-based hash), will migrate to file_id-based hash")
-    
+            logging.info(
+                "Found legacy document (filename-based hash), will migrate to file_id-based hash"
+            )
+
    # Fallback: check by content_hash to catch duplicates from different sources
    # This prevents unique constraint violations when the same content exists
    # under a different unique_identifier (e.g., manual upload vs Google Drive)
@ -192,7 +200,7 @@ async def find_existing_document_with_migration(
                f"Found duplicate content from different source (content_hash match). "
                f"Original document ID: {existing_document.id}, type: {existing_document.document_type}"
            )
-    
+
    return existing_document


@ -342,7 +350,12 @@ async def add_received_file_document_using_unstructured(
        if existing_document:
            # Handle existing document (rename detection, content change check)
            should_skip, doc = await handle_existing_document_update(
-                session, existing_document, content_hash, connector, file_name, primary_hash
+                session,
+                existing_document,
+                content_hash,
+                connector,
+                file_name,
+                primary_hash,
            )
            if should_skip:
                return doc
@ -402,7 +415,7 @@ async def add_received_file_document_using_unstructured(
            doc_type = DocumentType.FILE
            if connector and connector.get("type") == DocumentType.GOOGLE_DRIVE_FILE:
                doc_type = DocumentType.GOOGLE_DRIVE_FILE
-            
+
            document = Document(
                search_space_id=search_space_id,
                title=file_name,
@ -476,7 +489,12 @@ async def add_received_file_document_using_llamacloud(
        if existing_document:
            # Handle existing document (rename detection, content change check)
            should_skip, doc = await handle_existing_document_update(
-                session, existing_document, content_hash, connector, file_name, primary_hash
+                session,
+                existing_document,
+                content_hash,
+                connector,
+                file_name,
+                primary_hash,
            )
            if should_skip:
                return doc
@ -536,7 +554,7 @@ async def add_received_file_document_using_llamacloud(
            doc_type = DocumentType.FILE
            if connector and connector.get("type") == DocumentType.GOOGLE_DRIVE_FILE:
                doc_type = DocumentType.GOOGLE_DRIVE_FILE
-            
+
            document = Document(
                search_space_id=search_space_id,
                title=file_name,
@ -611,7 +629,12 @@ async def add_received_file_document_using_docling(
        if existing_document:
            # Handle existing document (rename detection, content change check)
            should_skip, doc = await handle_existing_document_update(
-                session, existing_document, content_hash, connector, file_name, primary_hash
+                session,
+                existing_document,
+                content_hash,
+                connector,
+                file_name,
+                primary_hash,
            )
            if should_skip:
                return doc
@ -695,7 +718,7 @@ async def add_received_file_document_using_docling(
            doc_type = DocumentType.FILE
            if connector and connector.get("type") == DocumentType.GOOGLE_DRIVE_FILE:
                doc_type = DocumentType.GOOGLE_DRIVE_FILE
-            
+
            document = Document(
                search_space_id=search_space_id,
                title=file_name,
--- a/surfsense_backend/app/tasks/document_processors/markdown_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/markdown_processor.py
@ -31,22 +31,22 @@ def _get_google_drive_unique_identifier(
 ) -> tuple[str, str | None]:
    """
    Get unique identifier hash for a file, with special handling for Google Drive.
-    
+
    For Google Drive files, uses file_id as the unique identifier (doesn't change on rename).
    For other files, uses filename.
-    
+
    Args:
        connector: Optional connector info dict with type and metadata
        filename: The filename (used for non-Google Drive files or as fallback)
        search_space_id: The search space ID
-    
+
    Returns:
        Tuple of (primary_hash, legacy_hash or None)
    """
    if connector and connector.get("type") == DocumentType.GOOGLE_DRIVE_FILE:
        metadata = connector.get("metadata", {})
        file_id = metadata.get("google_drive_file_id")
-        
+
        if file_id:
            primary_hash = generate_unique_identifier_hash(
                DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
@ -55,7 +55,7 @@ def _get_google_drive_unique_identifier(
                DocumentType.GOOGLE_DRIVE_FILE, filename, search_space_id
            )
            return primary_hash, legacy_hash
-    
+
    primary_hash = generate_unique_identifier_hash(
        DocumentType.FILE, filename, search_space_id
    )
@ -73,12 +73,16 @@ async def _find_existing_document_with_migration(
    with fallback to content_hash for cross-source deduplication.
    """
    existing_document = await check_document_by_unique_identifier(session, primary_hash)
-    
+
    if not existing_document and legacy_hash:
-        existing_document = await check_document_by_unique_identifier(session, legacy_hash)
+        existing_document = await check_document_by_unique_identifier(
+            session, legacy_hash
+        )
        if existing_document:
-            logging.info("Found legacy document (filename-based hash), will migrate to file_id-based hash")
-    
+            logging.info(
+                "Found legacy document (filename-based hash), will migrate to file_id-based hash"
+            )
+
    # Fallback: check by content_hash to catch duplicates from different sources
    if not existing_document and content_hash:
        existing_document = await check_duplicate_document(session, content_hash)
@ -87,7 +91,7 @@ async def _find_existing_document_with_migration(
                f"Found duplicate content from different source (content_hash match). "
                f"Original document ID: {existing_document.id}, type: {existing_document.document_type}"
            )
-    
+
    return existing_document


@ -103,7 +107,7 @@ async def _handle_existing_document_update(
 ) -> tuple[bool, Document | None]:
    """
    Handle update logic for an existing document.
-    
+
    Returns:
        Tuple of (should_skip_processing, document_to_return)
    """
@ -111,7 +115,7 @@ async def _handle_existing_document_update(
    if existing_document.unique_identifier_hash != primary_hash:
        existing_document.unique_identifier_hash = primary_hash
        logging.info(f"Migrated document to file_id-based identifier: {filename}")
-    
+
    # Check if content has changed
    if existing_document.content_hash == content_hash:
        # Content unchanged - check if we need to update metadata (e.g., filename changed)
@ -120,12 +124,16 @@ async def _handle_existing_document_update(
            new_name = connector_metadata.get("google_drive_file_name")
            # Check both possible keys for old name (FILE_NAME is used in stored documents)
            doc_metadata = existing_document.document_metadata or {}
-            old_name = doc_metadata.get("FILE_NAME") or doc_metadata.get("google_drive_file_name") or doc_metadata.get("file_name")
-            
+            old_name = (
+                doc_metadata.get("FILE_NAME")
+                or doc_metadata.get("google_drive_file_name")
+                or doc_metadata.get("file_name")
+            )
+
            if new_name and old_name and old_name != new_name:
                # File was renamed - update title and metadata, skip expensive processing
                from sqlalchemy.orm.attributes import flag_modified
-                
+
                existing_document.title = new_name
                if not existing_document.document_metadata:
                    existing_document.document_metadata = {}
@ -134,8 +142,10 @@ async def _handle_existing_document_update(
                existing_document.document_metadata["google_drive_file_name"] = new_name
                flag_modified(existing_document, "document_metadata")
                await session.commit()
-                logging.info(f"File renamed in Google Drive: '{old_name}' → '{new_name}' (no re-processing needed)")
-        
+                logging.info(
+                    f"File renamed in Google Drive: '{old_name}' → '{new_name}' (no re-processing needed)"
+                )
+
        await task_logger.log_task_success(
            log_entry,
            f"Markdown file document unchanged: {filename}",
@ -147,7 +157,9 @@ async def _handle_existing_document_update(
        logging.info(f"Document for markdown file {filename} unchanged. Skipping.")
        return True, existing_document
    else:
-        logging.info(f"Content changed for markdown file {filename}. Updating document.")
+        logging.info(
+            f"Content changed for markdown file {filename}. Updating document."
+        )
        return False, None


@ -204,8 +216,14 @@ async def add_received_markdown_file_document(
        if existing_document:
            # Handle existing document (rename detection, content change check)
            should_skip, doc = await _handle_existing_document_update(
-                session, existing_document, content_hash, connector, file_name, primary_hash,
-                task_logger, log_entry
+                session,
+                existing_document,
+                content_hash,
+                connector,
+                file_name,
+                primary_hash,
+                task_logger,
+                log_entry,
            )
            if should_skip:
                return doc
@ -262,7 +280,7 @@ async def add_received_markdown_file_document(
            doc_type = DocumentType.FILE
            if connector and connector.get("type") == DocumentType.GOOGLE_DRIVE_FILE:
                doc_type = DocumentType.GOOGLE_DRIVE_FILE
-            
+
            document = Document(
                search_space_id=search_space_id,
                title=file_name,