mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-07-04 22:02:16 +02:00
chore: ran linting
This commit is contained in:
parent
97d7207bd4
commit
a5103da3d7
21 changed files with 259 additions and 181 deletions
|
|
@ -611,4 +611,3 @@ async def index_composio_gmail(
|
|||
except Exception as e:
|
||||
logger.error(f"Failed to index Gmail via Composio: {e!s}", exc_info=True)
|
||||
return 0, f"Failed to index Gmail via Composio: {e!s}"
|
||||
|
||||
|
|
|
|||
|
|
@ -259,7 +259,9 @@ async def index_composio_google_calendar(
|
|||
|
||||
documents_indexed = 0
|
||||
documents_skipped = 0
|
||||
duplicate_content_count = 0 # Track events skipped due to duplicate content_hash
|
||||
duplicate_content_count = (
|
||||
0 # Track events skipped due to duplicate content_hash
|
||||
)
|
||||
|
||||
for event in events:
|
||||
try:
|
||||
|
|
@ -353,7 +355,7 @@ async def index_composio_google_calendar(
|
|||
logger.info(
|
||||
f"Committing batch: {documents_indexed} Google Calendar events processed so far"
|
||||
)
|
||||
await session.commit( )
|
||||
await session.commit()
|
||||
continue
|
||||
|
||||
# Document doesn't exist by unique_identifier_hash
|
||||
|
|
@ -362,7 +364,7 @@ async def index_composio_google_calendar(
|
|||
duplicate_by_content = await check_duplicate_document_by_hash(
|
||||
session, content_hash
|
||||
)
|
||||
|
||||
|
||||
if duplicate_by_content:
|
||||
# A document with the same content already exists (likely from standard connector)
|
||||
logger.info(
|
||||
|
|
@ -458,7 +460,10 @@ async def index_composio_google_calendar(
|
|||
)
|
||||
except Exception as e:
|
||||
# Handle any remaining integrity errors gracefully (race conditions, etc.)
|
||||
if "duplicate key value violates unique constraint" in str(e).lower() or "uniqueviolationerror" in str(e).lower():
|
||||
if (
|
||||
"duplicate key value violates unique constraint" in str(e).lower()
|
||||
or "uniqueviolationerror" in str(e).lower()
|
||||
):
|
||||
logger.warning(
|
||||
f"Duplicate content_hash detected during final commit. "
|
||||
f"This may occur if the same event was indexed by multiple connectors. "
|
||||
|
|
@ -495,4 +500,3 @@ async def index_composio_google_calendar(
|
|||
f"Failed to index Google Calendar via Composio: {e!s}", exc_info=True
|
||||
)
|
||||
return 0, f"Failed to index Google Calendar via Composio: {e!s}"
|
||||
|
||||
|
|
|
|||
|
|
@ -453,8 +453,8 @@ async def check_document_by_unique_identifier(
|
|||
session: AsyncSession, unique_identifier_hash: str
|
||||
) -> Document | None:
|
||||
"""Check if a document with the given unique identifier hash already exists."""
|
||||
from sqlalchemy.orm import selectinload
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
existing_doc_result = await session.execute(
|
||||
select(Document)
|
||||
|
|
@ -517,14 +517,20 @@ async def index_composio_google_drive(
|
|||
|
||||
# Route to delta sync or full scan
|
||||
if use_delta_sync:
|
||||
logger.info(f"Using delta sync for Composio Google Drive connector {connector_id}")
|
||||
logger.info(
|
||||
f"Using delta sync for Composio Google Drive connector {connector_id}"
|
||||
)
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Starting delta sync for Google Drive via Composio (connector {connector_id})",
|
||||
{"stage": "delta_sync", "token": stored_page_token[:20] + "..."},
|
||||
)
|
||||
|
||||
documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_delta_sync(
|
||||
(
|
||||
documents_indexed,
|
||||
documents_skipped,
|
||||
processing_errors,
|
||||
) = await _index_composio_drive_delta_sync(
|
||||
session=session,
|
||||
composio_connector=composio_connector,
|
||||
connector_id=connector_id,
|
||||
|
|
@ -536,7 +542,9 @@ async def index_composio_google_drive(
|
|||
log_entry=log_entry,
|
||||
)
|
||||
else:
|
||||
logger.info(f"Using full scan for Composio Google Drive connector {connector_id} (first sync or no token)")
|
||||
logger.info(
|
||||
f"Using full scan for Composio Google Drive connector {connector_id} (first sync or no token)"
|
||||
)
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Fetching Google Drive files via Composio for connector {connector_id}",
|
||||
|
|
@ -547,7 +555,11 @@ async def index_composio_google_drive(
|
|||
},
|
||||
)
|
||||
|
||||
documents_indexed, documents_skipped, processing_errors = await _index_composio_drive_full_scan(
|
||||
(
|
||||
documents_indexed,
|
||||
documents_skipped,
|
||||
processing_errors,
|
||||
) = await _index_composio_drive_full_scan(
|
||||
session=session,
|
||||
composio_connector=composio_connector,
|
||||
connector_id=connector_id,
|
||||
|
|
@ -580,9 +592,13 @@ async def index_composio_google_drive(
|
|||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||
|
||||
# Final commit
|
||||
logger.info(f"Final commit: Total {documents_indexed} Google Drive files processed")
|
||||
logger.info(
|
||||
f"Final commit: Total {documents_indexed} Google Drive files processed"
|
||||
)
|
||||
await session.commit()
|
||||
logger.info("Successfully committed all Composio Google Drive document changes to database")
|
||||
logger.info(
|
||||
"Successfully committed all Composio Google Drive document changes to database"
|
||||
)
|
||||
|
||||
# Handle processing errors
|
||||
error_message = None
|
||||
|
|
@ -731,7 +747,9 @@ async def _index_composio_drive_delta_sync(
|
|||
processing_errors.append(error_msg)
|
||||
documents_skipped += 1
|
||||
|
||||
logger.info(f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped")
|
||||
logger.info(
|
||||
f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped"
|
||||
)
|
||||
return documents_indexed, documents_skipped, processing_errors
|
||||
|
||||
|
||||
|
|
@ -858,20 +876,18 @@ async def _index_composio_drive_full_scan(
|
|||
logger.info("No Google Drive files found")
|
||||
return 0, 0, []
|
||||
|
||||
logger.info(f"Found {len(all_files)} Google Drive files to index via Composio (full scan)")
|
||||
logger.info(
|
||||
f"Found {len(all_files)} Google Drive files to index via Composio (full scan)"
|
||||
)
|
||||
|
||||
for file_info in all_files:
|
||||
try:
|
||||
# Handle both standard Google API and potential Composio variations
|
||||
file_id = file_info.get("id", "") or file_info.get("fileId", "")
|
||||
file_name = (
|
||||
file_info.get("name", "")
|
||||
or file_info.get("fileName", "")
|
||||
or "Untitled"
|
||||
)
|
||||
mime_type = file_info.get("mimeType", "") or file_info.get(
|
||||
"mime_type", ""
|
||||
file_info.get("name", "") or file_info.get("fileName", "") or "Untitled"
|
||||
)
|
||||
mime_type = file_info.get("mimeType", "") or file_info.get("mime_type", "")
|
||||
|
||||
if not file_id:
|
||||
documents_skipped += 1
|
||||
|
|
@ -901,7 +917,9 @@ async def _index_composio_drive_full_scan(
|
|||
|
||||
# Batch commit every 10 documents
|
||||
if documents_indexed > 0 and documents_indexed % 10 == 0:
|
||||
logger.info(f"Committing batch: {documents_indexed} Google Drive files processed so far")
|
||||
logger.info(
|
||||
f"Committing batch: {documents_indexed} Google Drive files processed so far"
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -910,7 +928,9 @@ async def _index_composio_drive_full_scan(
|
|||
processing_errors.append(error_msg)
|
||||
documents_skipped += 1
|
||||
|
||||
logger.info(f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped")
|
||||
logger.info(
|
||||
f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped"
|
||||
)
|
||||
return documents_indexed, documents_skipped, processing_errors
|
||||
|
||||
|
||||
|
|
@ -948,9 +968,7 @@ async def _process_single_drive_file(
|
|||
content, content_error = await composio_connector.get_drive_file_content(file_id)
|
||||
|
||||
if content_error or not content:
|
||||
logger.warning(
|
||||
f"Could not get content for file {file_name}: {content_error}"
|
||||
)
|
||||
logger.warning(f"Could not get content for file {file_name}: {content_error}")
|
||||
# Use metadata as content fallback
|
||||
markdown_content = f"# {file_name}\n\n"
|
||||
markdown_content += f"**File ID:** {file_id}\n"
|
||||
|
|
@ -985,9 +1003,7 @@ async def _process_single_drive_file(
|
|||
return 0, 1, processing_errors # Skipped
|
||||
|
||||
# Update existing document
|
||||
user_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
||||
|
||||
if user_llm:
|
||||
document_metadata = {
|
||||
|
|
@ -1003,12 +1019,8 @@ async def _process_single_drive_file(
|
|||
markdown_content, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
summary_content = (
|
||||
f"Google Drive File: {file_name}\n\nType: {mime_type}"
|
||||
)
|
||||
summary_embedding = config.embedding_model_instance.embed(
|
||||
summary_content
|
||||
)
|
||||
summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
|
||||
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
||||
|
||||
chunks = await create_document_chunks(markdown_content)
|
||||
|
||||
|
|
@ -1030,9 +1042,7 @@ async def _process_single_drive_file(
|
|||
return 1, 0, processing_errors # Indexed
|
||||
|
||||
# Create new document
|
||||
user_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
||||
|
||||
if user_llm:
|
||||
document_metadata = {
|
||||
|
|
@ -1048,12 +1058,8 @@ async def _process_single_drive_file(
|
|||
markdown_content, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
summary_content = (
|
||||
f"Google Drive File: {file_name}\n\nType: {mime_type}"
|
||||
)
|
||||
summary_embedding = config.embedding_model_instance.embed(
|
||||
summary_content
|
||||
)
|
||||
summary_content = f"Google Drive File: {file_name}\n\nType: {mime_type}"
|
||||
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
||||
|
||||
chunks = await create_document_chunks(markdown_content)
|
||||
|
||||
|
|
@ -1159,4 +1165,3 @@ async def _fetch_folder_files_recursively(
|
|||
except Exception as e:
|
||||
logger.error(f"Error in recursive folder fetch: {e!s}")
|
||||
return all_files
|
||||
|
||||
|
|
|
|||
|
|
@ -144,7 +144,10 @@ class GoogleCalendarConnector:
|
|||
except Exception as e:
|
||||
error_str = str(e)
|
||||
# Check if this is an invalid_grant error (token expired/revoked)
|
||||
if "invalid_grant" in error_str.lower() or "token has been expired or revoked" in error_str.lower():
|
||||
if (
|
||||
"invalid_grant" in error_str.lower()
|
||||
or "token has been expired or revoked" in error_str.lower()
|
||||
):
|
||||
raise Exception(
|
||||
"Google Calendar authentication failed. Please re-authenticate."
|
||||
) from e
|
||||
|
|
@ -173,7 +176,11 @@ class GoogleCalendarConnector:
|
|||
except Exception as e:
|
||||
error_str = str(e)
|
||||
# If the error already contains a user-friendly re-authentication message, preserve it
|
||||
if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
|
||||
if (
|
||||
"re-authenticate" in error_str.lower()
|
||||
or "expired or been revoked" in error_str.lower()
|
||||
or "authentication failed" in error_str.lower()
|
||||
):
|
||||
raise Exception(error_str) from e
|
||||
raise Exception(f"Failed to create Google Calendar service: {e!s}") from e
|
||||
|
||||
|
|
@ -283,7 +290,11 @@ class GoogleCalendarConnector:
|
|||
except Exception as e:
|
||||
error_str = str(e)
|
||||
# If the error already contains a user-friendly re-authentication message, preserve it
|
||||
if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
|
||||
if (
|
||||
"re-authenticate" in error_str.lower()
|
||||
or "expired or been revoked" in error_str.lower()
|
||||
or "authentication failed" in error_str.lower()
|
||||
):
|
||||
return [], error_str
|
||||
return [], f"Error fetching events: {e!s}"
|
||||
|
||||
|
|
|
|||
|
|
@ -143,7 +143,10 @@ class GoogleGmailConnector:
|
|||
except Exception as e:
|
||||
error_str = str(e)
|
||||
# Check if this is an invalid_grant error (token expired/revoked)
|
||||
if "invalid_grant" in error_str.lower() or "token has been expired or revoked" in error_str.lower():
|
||||
if (
|
||||
"invalid_grant" in error_str.lower()
|
||||
or "token has been expired or revoked" in error_str.lower()
|
||||
):
|
||||
raise Exception(
|
||||
"Gmail authentication failed. Please re-authenticate."
|
||||
) from e
|
||||
|
|
@ -172,7 +175,11 @@ class GoogleGmailConnector:
|
|||
except Exception as e:
|
||||
error_str = str(e)
|
||||
# If the error already contains a user-friendly re-authentication message, preserve it
|
||||
if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
|
||||
if (
|
||||
"re-authenticate" in error_str.lower()
|
||||
or "expired or been revoked" in error_str.lower()
|
||||
or "authentication failed" in error_str.lower()
|
||||
):
|
||||
raise Exception(error_str) from e
|
||||
raise Exception(f"Failed to create Gmail service: {e!s}") from e
|
||||
|
||||
|
|
@ -237,7 +244,11 @@ class GoogleGmailConnector:
|
|||
except Exception as e:
|
||||
error_str = str(e)
|
||||
# If the error already contains a user-friendly re-authentication message, preserve it
|
||||
if "re-authenticate" in error_str.lower() or "expired or been revoked" in error_str.lower() or "authentication failed" in error_str.lower():
|
||||
if (
|
||||
"re-authenticate" in error_str.lower()
|
||||
or "expired or been revoked" in error_str.lower()
|
||||
or "authentication failed" in error_str.lower()
|
||||
):
|
||||
return [], error_str
|
||||
return [], f"Error fetching messages list: {e!s}"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue