diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index a2a613777..898ab9735 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -76,8 +76,8 @@ celery_app.conf.update( enable_utc=True, # Task execution settings task_track_started=True, - task_time_limit=3600, # 1 hour hard limit - task_soft_time_limit=3000, # 50 minutes soft limit + task_time_limit=28800, # 8 hour hard limit + task_soft_time_limit=28200, # 7 hours 50 minutes soft limit # Result backend settings result_expires=86400, # Results expire after 24 hours result_extended=True, diff --git a/surfsense_backend/app/connectors/notion_history.py b/surfsense_backend/app/connectors/notion_history.py index 1f29a5968..81f6642f1 100644 --- a/surfsense_backend/app/connectors/notion_history.py +++ b/surfsense_backend/app/connectors/notion_history.py @@ -58,10 +58,23 @@ class NotionHistoryConnector: "timestamp": "last_edited_time", } - # First, get a list of all pages the integration has access to - search_results = await self.notion.search(**search_params) + # Paginate through all pages the integration has access to + pages = [] + has_more = True + cursor = None + + while has_more: + if cursor: + search_params["start_cursor"] = cursor + + search_results = await self.notion.search(**search_params) + + pages.extend(search_results["results"]) + has_more = search_results.get("has_more", False) + + if has_more: + cursor = search_results.get("next_cursor") - pages = search_results["results"] all_page_data = [] for page in pages: diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py index b670391eb..cf6824db8 100644 --- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py @@ -390,6 +390,13 @@ async def index_airtable_records( f"Successfully indexed new Airtable record {summary_content}" ) + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Airtable records processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing the Airtable record {record.get('id', 'Unknown')}: {e!s}", @@ -408,7 +415,10 @@ async def index_airtable_records( session, connector, update_last_indexed ) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info( + f"Final commit: Total {documents_indexed} Airtable records processed" + ) await session.commit() logger.info( "Successfully committed all Airtable document changes to database" diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py index 4c057946b..97fdbb6be 100644 --- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py @@ -354,6 +354,13 @@ async def index_clickup_tasks( documents_indexed += 1 logger.info(f"Successfully indexed new task {task_name}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} ClickUp tasks processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing task {task.get('name', 'Unknown')}: {e!s}", @@ -366,6 +373,8 @@ async def index_clickup_tasks( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} ClickUp tasks processed") await session.commit() await task_logger.log_task_success( diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index afdbdd177..c148e0879 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -368,6 +368,13 @@ async def index_confluence_pages( documents_indexed += 1 logger.info(f"Successfully indexed new page {page_title}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Confluence pages processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing page {page.get('title', 'Unknown')}: {e!s}", @@ -384,7 +391,10 @@ async def index_confluence_pages( if update_last_indexed: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info( + f"Final commit: Total {documents_indexed} Confluence pages processed" + ) await session.commit() logger.info( "Successfully committed all Confluence document changes to database" diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py index b08a36132..5aa56aa3f 100644 --- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py @@ -462,6 +462,13 @@ async def index_discord_messages( f"Successfully indexed new channel {guild_name}#{channel_name} with {len(formatted_messages)} messages" ) + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Discord channels processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing guild {guild_name}: {e!s}", exc_info=True @@ -476,6 +483,10 @@ async def index_discord_messages( if documents_indexed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) + # Final commit for any remaining documents not yet committed in batches + logger.info( + f"Final commit: Total {documents_indexed} Discord channels processed" + ) await session.commit() # Prepare result message diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py index 8cd8ca299..95897c29b 100644 --- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py @@ -381,13 +381,21 @@ async def index_github_repos( session.add(document) documents_processed += 1 + # Batch commit every 10 documents + if documents_processed % 10 == 0: + logger.info( + f"Committing batch: {documents_processed} GitHub files processed so far" + ) + await session.commit() + except Exception as repo_err: logger.error( f"Failed to process repository {repo_full_name}: {repo_err}" ) errors.append(f"Failed processing {repo_full_name}: {repo_err}") - # Commit all changes at the end + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_processed} GitHub files processed") await session.commit() logger.info( f"Finished GitHub indexing for connector {connector_id}. Processed {documents_processed} files." diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index b7d8e0b59..d1effd8fb 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -407,6 +407,13 @@ async def index_google_calendar_events( documents_indexed += 1 logger.info(f"Successfully indexed new event {event_summary}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Google Calendar events processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing event {event.get('summary', 'Unknown')}: {e!s}", @@ -422,6 +429,10 @@ async def index_google_calendar_events( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) + # Final commit for any remaining documents not yet committed in batches + logger.info( + f"Final commit: Total {documents_indexed} Google Calendar events processed" + ) await session.commit() await task_logger.log_task_success( diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index 7e821dc9f..e92967527 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -324,6 +324,13 @@ async def index_google_gmail_messages( documents_indexed += 1 logger.info(f"Successfully indexed new email {summary_content}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Gmail messages processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing the email {message_id}: {e!s}", @@ -338,7 +345,8 @@ async def index_google_gmail_messages( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed") await session.commit() logger.info( "Successfully committed all Google gmail document changes to database" diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py index 36e09c81e..7347e61ca 100644 --- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py @@ -352,6 +352,13 @@ async def index_jira_issues( f"Successfully indexed new issue {issue_identifier} - {issue_title}" ) + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Jira issues processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}", @@ -368,7 +375,8 @@ async def index_jira_issues( if update_last_indexed: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} Jira issues processed") await session.commit() logger.info("Successfully committed all JIRA document changes to database") diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py index 33d5835ee..6100c3cd8 100644 --- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py @@ -371,6 +371,13 @@ async def index_linear_issues( f"Successfully indexed new issue {issue_identifier} - {issue_title}" ) + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Linear issues processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}", @@ -387,7 +394,8 @@ async def index_linear_issues( if update_last_indexed: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} Linear issues processed") await session.commit() logger.info("Successfully committed all Linear document changes to database") diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index 15588afaa..a05328bb9 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -438,6 +438,13 @@ async def index_luma_events( documents_indexed += 1 logger.info(f"Successfully indexed new event {event_name}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Luma events processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing event {event.get('name', 'Unknown')}: {e!s}", @@ -453,6 +460,8 @@ async def index_luma_events( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} Luma events processed") await session.commit() await task_logger.log_task_success( diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index 699d2fddf..8bac0c3ce 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -356,6 +356,14 @@ async def index_notion_pages( documents_indexed += 1 logger.info(f"Successfully updated Notion page: {page_title}") + + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} documents processed so far" + ) + await session.commit() + continue # Document doesn't exist - create new one @@ -406,6 +414,13 @@ async def index_notion_pages( documents_indexed += 1 logger.info(f"Successfully indexed new Notion page: {page_title}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} documents processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing Notion page {page.get('title', 'Unknown')}: {e!s}", @@ -423,7 +438,8 @@ async def index_notion_pages( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} documents processed") await session.commit() # Prepare result message diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py index dd9edcc8d..735125834 100644 --- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py @@ -353,6 +353,14 @@ async def index_slack_messages( session.add(document) documents_indexed += 1 + + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Slack channels processed so far" + ) + await session.commit() + logger.info( f"Successfully indexed new channel {channel_name} with {len(formatted_messages)} messages" ) @@ -376,7 +384,8 @@ async def index_slack_messages( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} Slack channels processed") await session.commit() # Prepare result message diff --git a/surfsense_web/app/(home)/login/GoogleLoginButton.tsx b/surfsense_web/app/(home)/login/GoogleLoginButton.tsx index 4148141b9..3c8aaeb77 100644 --- a/surfsense_web/app/(home)/login/GoogleLoginButton.tsx +++ b/surfsense_web/app/(home)/login/GoogleLoginButton.tsx @@ -36,7 +36,7 @@ export function GoogleLoginButton() {