chore: ran linting

2026-05-01 03:46:25 +02:00 · 2026-02-06 05:35:15 +05:30 · 2026-02-06 05:35:15 +05:30 · aa66928154
commit aa66928154
parent 00a617ef17
44 changed files with 2025 additions and 1658 deletions
--- a/surfsense_backend/app/connectors/composio_gmail_connector.py
+++ b/surfsense_backend/app/connectors/composio_gmail_connector.py
@ -285,24 +285,28 @@ async def _analyze_gmail_messages_phase1(
            if existing_document:
                if existing_document.content_hash == content_hash:
                    # Ensure status is ready (might have been stuck in processing/pending)
-                    if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                    if not DocumentStatus.is_state(
+                        existing_document.status, DocumentStatus.READY
+                    ):
                        existing_document.status = DocumentStatus.ready()
                    documents_skipped += 1
                    continue

                # Queue existing document for update (will be set to processing in Phase 2)
-                messages_to_process.append({
-                    'document': existing_document,
-                    'is_new': False,
-                    'markdown_content': markdown_content,
-                    'content_hash': content_hash,
-                    'message_id': message_id,
-                    'thread_id': thread_id,
-                    'subject': subject,
-                    'sender': sender,
-                    'date_str': date_str,
-                    'label_ids': label_ids,
-                })
+                messages_to_process.append(
+                    {
+                        "document": existing_document,
+                        "is_new": False,
+                        "markdown_content": markdown_content,
+                        "content_hash": content_hash,
+                        "message_id": message_id,
+                        "thread_id": thread_id,
+                        "subject": subject,
+                        "sender": sender,
+                        "date_str": date_str,
+                        "label_ids": label_ids,
+                    }
+                )
                continue

            # Document doesn't exist by unique_identifier_hash
@ -350,18 +354,20 @@ async def _analyze_gmail_messages_phase1(
            )
            session.add(document)

-            messages_to_process.append({
-                'document': document,
-                'is_new': True,
-                'markdown_content': markdown_content,
-                'content_hash': content_hash,
-                'message_id': message_id,
-                'thread_id': thread_id,
-                'subject': subject,
-                'sender': sender,
-                'date_str': date_str,
-                'label_ids': label_ids,
-            })
+            messages_to_process.append(
+                {
+                    "document": document,
+                    "is_new": True,
+                    "markdown_content": markdown_content,
+                    "content_hash": content_hash,
+                    "message_id": message_id,
+                    "thread_id": thread_id,
+                    "subject": subject,
+                    "sender": sender,
+                    "date_str": date_str,
+                    "label_ids": label_ids,
+                }
+            )

        except Exception as e:
            logger.error(f"Error in Phase 1 for message: {e!s}", exc_info=True)
@ -398,7 +404,7 @@ async def _process_gmail_messages_phase2(
                await on_heartbeat_callback(documents_indexed)
                last_heartbeat_time = current_time

-        document = item['document']
+        document = item["document"]
        try:
            # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
            document.status = DocumentStatus.processing()
@ -411,37 +417,35 @@ async def _process_gmail_messages_phase2(

            if user_llm:
                document_metadata_for_summary = {
-                    "message_id": item['message_id'],
-                    "thread_id": item['thread_id'],
-                    "subject": item['subject'],
-                    "sender": item['sender'],
+                    "message_id": item["message_id"],
+                    "thread_id": item["thread_id"],
+                    "subject": item["subject"],
+                    "sender": item["sender"],
                    "document_type": "Gmail Message (Composio)",
                }
                summary_content, summary_embedding = await generate_document_summary(
-                    item['markdown_content'], user_llm, document_metadata_for_summary
+                    item["markdown_content"], user_llm, document_metadata_for_summary
                )
            else:
-                summary_content = (
-                    f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
-                )
+                summary_content = f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}"
                summary_embedding = config.embedding_model_instance.embed(
                    summary_content
                )

-            chunks = await create_document_chunks(item['markdown_content'])
+            chunks = await create_document_chunks(item["markdown_content"])

            # Update document to READY with actual content
-            document.title = item['subject']
+            document.title = item["subject"]
            document.content = summary_content
-            document.content_hash = item['content_hash']
+            document.content_hash = item["content_hash"]
            document.embedding = summary_embedding
            document.document_metadata = {
-                "message_id": item['message_id'],
-                "thread_id": item['thread_id'],
-                "subject": item['subject'],
-                "sender": item['sender'],
-                "date": item['date_str'],
-                "labels": item['label_ids'],
+                "message_id": item["message_id"],
+                "thread_id": item["thread_id"],
+                "subject": item["subject"],
+                "sender": item["sender"],
+                "date": item["date_str"],
+                "labels": item["label_ids"],
                "connector_id": connector_id,
                "source": "composio",
            }
@ -465,7 +469,9 @@ async def _process_gmail_messages_phase2(
                document.status = DocumentStatus.failed(str(e))
                document.updated_at = get_current_timestamp()
            except Exception as status_error:
-                logger.error(f"Failed to update document status to failed: {status_error}")
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
            documents_failed += 1
            continue

@ -571,7 +577,9 @@ async def index_composio_gmail(
                )

            all_messages.extend(messages)
-            logger.info(f"Fetched {len(messages)} messages (total: {len(all_messages)})")
+            logger.info(
+                f"Fetched {len(messages)} messages (total: {len(all_messages)})"
+            )

            if not next_token or len(messages) < current_batch_size:
                break
@ -616,7 +624,7 @@ async def index_composio_gmail(
        )

        # Commit all pending documents - they all appear in UI now
-        new_documents_count = len([m for m in messages_to_process if m['is_new']])
+        new_documents_count = len([m for m in messages_to_process if m["is_new"]])
        if new_documents_count > 0:
            logger.info(f"Phase 1: Committing {new_documents_count} pending documents")
            await session.commit()
@ -645,9 +653,7 @@ async def index_composio_gmail(
        await update_connector_last_indexed(session, connector, update_last_indexed)

        # Final commit to ensure all documents are persisted
-        logger.info(
-            f"Final commit: Total {documents_indexed} Gmail messages processed"
-        )
+        logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed")
        try:
            await session.commit()
            logger.info(
--- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py
@ -268,7 +268,9 @@ async def index_composio_google_calendar(
        documents_indexed = 0
        documents_skipped = 0
        documents_failed = 0  # Track events that failed processing
-        duplicate_content_count = 0  # Track events skipped due to duplicate content_hash
+        duplicate_content_count = (
+            0  # Track events skipped due to duplicate content_hash
+        )
        last_heartbeat_time = time.time()

        # =======================================================================
@ -317,23 +319,27 @@ async def index_composio_google_calendar(
                if existing_document:
                    if existing_document.content_hash == content_hash:
                        # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(existing_document.status, DocumentStatus.READY):
+                        if not DocumentStatus.is_state(
+                            existing_document.status, DocumentStatus.READY
+                        ):
                            existing_document.status = DocumentStatus.ready()
                        documents_skipped += 1
                        continue

                    # Queue existing document for update (will be set to processing in Phase 2)
-                    events_to_process.append({
-                        'document': existing_document,
-                        'is_new': False,
-                        'markdown_content': markdown_content,
-                        'content_hash': content_hash,
-                        'event_id': event_id,
-                        'summary': summary,
-                        'start_time': start_time,
-                        'end_time': end_time,
-                        'location': location,
-                    })
+                    events_to_process.append(
+                        {
+                            "document": existing_document,
+                            "is_new": False,
+                            "markdown_content": markdown_content,
+                            "content_hash": content_hash,
+                            "event_id": event_id,
+                            "summary": summary,
+                            "start_time": start_time,
+                            "end_time": end_time,
+                            "location": location,
+                        }
+                    )
                    continue

                # Document doesn't exist by unique_identifier_hash
@ -383,17 +389,19 @@ async def index_composio_google_calendar(
                session.add(document)
                new_documents_created = True

-                events_to_process.append({
-                    'document': document,
-                    'is_new': True,
-                    'markdown_content': markdown_content,
-                    'content_hash': content_hash,
-                    'event_id': event_id,
-                    'summary': summary,
-                    'start_time': start_time,
-                    'end_time': end_time,
-                    'location': location,
-                })
+                events_to_process.append(
+                    {
+                        "document": document,
+                        "is_new": True,
+                        "markdown_content": markdown_content,
+                        "content_hash": content_hash,
+                        "event_id": event_id,
+                        "summary": summary,
+                        "start_time": start_time,
+                        "end_time": end_time,
+                        "location": location,
+                    }
+                )

            except Exception as e:
                logger.error(f"Error in Phase 1 for event: {e!s}", exc_info=True)
@ -402,7 +410,9 @@ async def index_composio_google_calendar(

        # Commit all pending documents - they all appear in UI now
        if new_documents_created:
-            logger.info(f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents")
+            logger.info(
+                f"Phase 1: Committing {len([e for e in events_to_process if e['is_new']])} pending documents"
+            )
            await session.commit()

        # =======================================================================
@ -419,7 +429,7 @@ async def index_composio_google_calendar(
                    await on_heartbeat_callback(documents_indexed)
                    last_heartbeat_time = current_time

-            document = item['document']
+            document = item["document"]
            try:
                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
                document.status = DocumentStatus.processing()
@ -432,35 +442,40 @@ async def index_composio_google_calendar(

                if user_llm:
                    document_metadata_for_summary = {
-                        "event_id": item['event_id'],
-                        "summary": item['summary'],
-                        "start_time": item['start_time'],
+                        "event_id": item["event_id"],
+                        "summary": item["summary"],
+                        "start_time": item["start_time"],
                        "document_type": "Google Calendar Event (Composio)",
                    }
-                    summary_content, summary_embedding = await generate_document_summary(
-                        item['markdown_content'], user_llm, document_metadata_for_summary
+                    (
+                        summary_content,
+                        summary_embedding,
+                    ) = await generate_document_summary(
+                        item["markdown_content"],
+                        user_llm,
+                        document_metadata_for_summary,
                    )
                else:
                    summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}"
-                    if item['location']:
+                    if item["location"]:
                        summary_content += f"\nLocation: {item['location']}"
                    summary_embedding = config.embedding_model_instance.embed(
                        summary_content
                    )

-                chunks = await create_document_chunks(item['markdown_content'])
+                chunks = await create_document_chunks(item["markdown_content"])

                # Update document to READY with actual content
-                document.title = item['summary']
+                document.title = item["summary"]
                document.content = summary_content
-                document.content_hash = item['content_hash']
+                document.content_hash = item["content_hash"]
                document.embedding = summary_embedding
                document.document_metadata = {
-                    "event_id": item['event_id'],
-                    "summary": item['summary'],
-                    "start_time": item['start_time'],
-                    "end_time": item['end_time'],
-                    "location": item['location'],
+                    "event_id": item["event_id"],
+                    "summary": item["summary"],
+                    "start_time": item["start_time"],
+                    "end_time": item["end_time"],
+                    "location": item["location"],
                    "connector_id": connector_id,
                    "source": "composio",
                }
@ -484,7 +499,9 @@ async def index_composio_google_calendar(
                    document.status = DocumentStatus.failed(str(e))
                    document.updated_at = get_current_timestamp()
                except Exception as status_error:
-                    logger.error(f"Failed to update document status to failed: {status_error}")
+                    logger.error(
+                        f"Failed to update document status to failed: {status_error}"
+                    )
                documents_failed += 1
                continue

--- a/surfsense_backend/app/connectors/composio_google_drive_connector.py
+++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py
@ -938,13 +938,15 @@ async def _index_composio_drive_delta_sync(

            if existing_document:
                # Queue existing document for update
-                files_to_process.append({
-                    'document': existing_document,
-                    'is_new': False,
-                    'file_id': file_id,
-                    'file_name': file_name,
-                    'mime_type': mime_type,
-                })
+                files_to_process.append(
+                    {
+                        "document": existing_document,
+                        "is_new": False,
+                        "file_id": file_id,
+                        "file_name": file_name,
+                        "mime_type": mime_type,
+                    }
+                )
                continue

            # Create new document with PENDING status
@ -974,13 +976,15 @@ async def _index_composio_drive_delta_sync(
            session.add(document)
            new_documents_created = True

-            files_to_process.append({
-                'document': document,
-                'is_new': True,
-                'file_id': file_id,
-                'file_name': file_name,
-                'mime_type': mime_type,
-            })
+            files_to_process.append(
+                {
+                    "document": document,
+                    "is_new": True,
+                    "file_id": file_id,
+                    "file_name": file_name,
+                    "mime_type": mime_type,
+                }
+            )

        except Exception as e:
            logger.error(f"Error in Phase 1 for change: {e!s}", exc_info=True)
@ -989,7 +993,9 @@ async def _index_composio_drive_delta_sync(

    # Commit all pending documents - they all appear in UI now
    if new_documents_created:
-        logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
+        logger.info(
+            f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
+        )
        await session.commit()

    # =======================================================================
@ -1005,7 +1011,7 @@ async def _index_composio_drive_delta_sync(
                await on_heartbeat_callback(documents_indexed)
                last_heartbeat_time = current_time

-        document = item['document']
+        document = item["document"]
        try:
            # Set to PROCESSING and commit
            document.status = DocumentStatus.processing()
@ -1013,11 +1019,13 @@ async def _index_composio_drive_delta_sync(

            # Get file content
            content, content_error = await composio_connector.get_drive_file_content(
-                item['file_id'], original_mime_type=item['mime_type']
+                item["file_id"], original_mime_type=item["mime_type"]
            )

            if content_error or not content:
-                logger.warning(f"Could not get content for file {item['file_name']}: {content_error}")
+                logger.warning(
+                    f"Could not get content for file {item['file_name']}: {content_error}"
+                )
                markdown_content = f"# {item['file_name']}\n\n"
                markdown_content += f"**File ID:** {item['file_id']}\n"
                markdown_content += f"**Type:** {item['mime_type']}\n"
@ -1031,9 +1039,9 @@ async def _index_composio_drive_delta_sync(
            else:
                markdown_content = await _process_file_content(
                    content=content,
-                    file_name=item['file_name'],
-                    file_id=item['file_id'],
-                    mime_type=item['mime_type'],
+                    file_name=item["file_name"],
+                    file_id=item["file_id"],
+                    mime_type=item["mime_type"],
                    search_space_id=search_space_id,
                    user_id=user_id,
                    session=session,
@ -1045,14 +1053,14 @@ async def _index_composio_drive_delta_sync(
            content_hash = generate_content_hash(markdown_content, search_space_id)

            # For existing documents, check if content changed
-            if not item['is_new'] and document.content_hash == content_hash:
+            if not item["is_new"] and document.content_hash == content_hash:
                if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
                    document.status = DocumentStatus.ready()
                documents_skipped += 1
                continue

            # Check for duplicate content hash (for new documents)
-            if item['is_new']:
+            if item["is_new"]:
                with session.no_autoflush:
                    duplicate_by_content = await check_duplicate_document_by_hash(
                        session, content_hash
@ -1067,13 +1075,15 @@ async def _index_composio_drive_delta_sync(
                    continue

            # Heavy processing (LLM, embeddings, chunks)
-            user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
+            user_llm = await get_user_long_context_llm(
+                session, user_id, search_space_id
+            )

            if user_llm:
                document_metadata_for_summary = {
-                    "file_id": item['file_id'],
-                    "file_name": item['file_name'],
-                    "mime_type": item['mime_type'],
+                    "file_id": item["file_id"],
+                    "file_name": item["file_name"],
+                    "mime_type": item["mime_type"],
                    "document_type": "Google Drive File (Composio)",
                }
                summary_content, summary_embedding = await generate_document_summary(
@ -1081,20 +1091,22 @@ async def _index_composio_drive_delta_sync(
                )
            else:
                summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
-                summary_embedding = config.embedding_model_instance.embed(summary_content)
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )

            chunks = await create_document_chunks(markdown_content)

            # Update document to READY
-            document.title = item['file_name']
+            document.title = item["file_name"]
            document.content = summary_content
            document.content_hash = content_hash
            document.embedding = summary_embedding
            document.document_metadata = {
-                "file_id": item['file_id'],
-                "file_name": item['file_name'],
-                "FILE_NAME": item['file_name'],
-                "mime_type": item['mime_type'],
+                "file_id": item["file_id"],
+                "file_name": item["file_name"],
+                "FILE_NAME": item["file_name"],
+                "mime_type": item["mime_type"],
                "connector_id": connector_id,
                "source": "composio",
            }
@ -1117,7 +1129,9 @@ async def _index_composio_drive_delta_sync(
                document.status = DocumentStatus.failed(str(e))
                document.updated_at = get_current_timestamp()
            except Exception as status_error:
-                logger.error(f"Failed to update document status to failed: {status_error}")
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
            documents_failed += 1
            continue

@ -1329,13 +1343,15 @@ async def _index_composio_drive_full_scan(

            if existing_document:
                # Queue existing document for update (will be set to processing in Phase 2)
-                files_to_process.append({
-                    'document': existing_document,
-                    'is_new': False,
-                    'file_id': file_id,
-                    'file_name': file_name,
-                    'mime_type': mime_type,
-                })
+                files_to_process.append(
+                    {
+                        "document": existing_document,
+                        "is_new": False,
+                        "file_id": file_id,
+                        "file_name": file_name,
+                        "mime_type": mime_type,
+                    }
+                )
                continue

            # Create new document with PENDING status (visible in UI immediately)
@ -1365,13 +1381,15 @@ async def _index_composio_drive_full_scan(
            session.add(document)
            new_documents_created = True

-            files_to_process.append({
-                'document': document,
-                'is_new': True,
-                'file_id': file_id,
-                'file_name': file_name,
-                'mime_type': mime_type,
-            })
+            files_to_process.append(
+                {
+                    "document": document,
+                    "is_new": True,
+                    "file_id": file_id,
+                    "file_name": file_name,
+                    "mime_type": mime_type,
+                }
+            )

        except Exception as e:
            logger.error(f"Error in Phase 1 for file: {e!s}", exc_info=True)
@ -1380,7 +1398,9 @@ async def _index_composio_drive_full_scan(

    # Commit all pending documents - they all appear in UI now
    if new_documents_created:
-        logger.info(f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents")
+        logger.info(
+            f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
+        )
        await session.commit()

    # =======================================================================
@ -1397,7 +1417,7 @@ async def _index_composio_drive_full_scan(
                await on_heartbeat_callback(documents_indexed)
                last_heartbeat_time = current_time

-        document = item['document']
+        document = item["document"]
        try:
            # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
            document.status = DocumentStatus.processing()
@ -1405,11 +1425,13 @@ async def _index_composio_drive_full_scan(

            # Get file content (pass mime_type for Google Workspace export handling)
            content, content_error = await composio_connector.get_drive_file_content(
-                item['file_id'], original_mime_type=item['mime_type']
+                item["file_id"], original_mime_type=item["mime_type"]
            )

            if content_error or not content:
-                logger.warning(f"Could not get content for file {item['file_name']}: {content_error}")
+                logger.warning(
+                    f"Could not get content for file {item['file_name']}: {content_error}"
+                )
                markdown_content = f"# {item['file_name']}\n\n"
                markdown_content += f"**File ID:** {item['file_id']}\n"
                markdown_content += f"**Type:** {item['mime_type']}\n"
@ -1424,9 +1446,9 @@ async def _index_composio_drive_full_scan(
                # Process content based on file type
                markdown_content = await _process_file_content(
                    content=content,
-                    file_name=item['file_name'],
-                    file_id=item['file_id'],
-                    mime_type=item['mime_type'],
+                    file_name=item["file_name"],
+                    file_id=item["file_id"],
+                    mime_type=item["mime_type"],
                    search_space_id=search_space_id,
                    user_id=user_id,
                    session=session,
@ -1438,7 +1460,7 @@ async def _index_composio_drive_full_scan(
            content_hash = generate_content_hash(markdown_content, search_space_id)

            # For existing documents, check if content changed
-            if not item['is_new'] and document.content_hash == content_hash:
+            if not item["is_new"] and document.content_hash == content_hash:
                # Ensure status is ready
                if not DocumentStatus.is_state(document.status, DocumentStatus.READY):
                    document.status = DocumentStatus.ready()
@ -1446,7 +1468,7 @@ async def _index_composio_drive_full_scan(
                continue

            # Check for duplicate content hash (for new documents)
-            if item['is_new']:
+            if item["is_new"]:
                with session.no_autoflush:
                    duplicate_by_content = await check_duplicate_document_by_hash(
                        session, content_hash
@ -1462,13 +1484,15 @@ async def _index_composio_drive_full_scan(
                    continue

            # Heavy processing (LLM, embeddings, chunks)
-            user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
+            user_llm = await get_user_long_context_llm(
+                session, user_id, search_space_id
+            )

            if user_llm:
                document_metadata_for_summary = {
-                    "file_id": item['file_id'],
-                    "file_name": item['file_name'],
-                    "mime_type": item['mime_type'],
+                    "file_id": item["file_id"],
+                    "file_name": item["file_name"],
+                    "mime_type": item["mime_type"],
                    "document_type": "Google Drive File (Composio)",
                }
                summary_content, summary_embedding = await generate_document_summary(
@ -1476,20 +1500,22 @@ async def _index_composio_drive_full_scan(
                )
            else:
                summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}"
-                summary_embedding = config.embedding_model_instance.embed(summary_content)
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )

            chunks = await create_document_chunks(markdown_content)

            # Update document to READY with actual content
-            document.title = item['file_name']
+            document.title = item["file_name"]
            document.content = summary_content
            document.content_hash = content_hash
            document.embedding = summary_embedding
            document.document_metadata = {
-                "file_id": item['file_id'],
-                "file_name": item['file_name'],
-                "FILE_NAME": item['file_name'],
-                "mime_type": item['mime_type'],
+                "file_id": item["file_id"],
+                "file_name": item["file_name"],
+                "FILE_NAME": item["file_name"],
+                "mime_type": item["mime_type"],
                "connector_id": connector_id,
                "source": "composio",
            }
@ -1515,7 +1541,9 @@ async def _index_composio_drive_full_scan(
                document.status = DocumentStatus.failed(str(e))
                document.updated_at = get_current_timestamp()
            except Exception as status_error:
-                logger.error(f"Failed to update document status to failed: {status_error}")
+                logger.error(
+                    f"Failed to update document status to failed: {status_error}"
+                )
            documents_failed += 1
            continue