diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index a2a613777..898ab9735 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -76,8 +76,8 @@ celery_app.conf.update( enable_utc=True, # Task execution settings task_track_started=True, - task_time_limit=3600, # 1 hour hard limit - task_soft_time_limit=3000, # 50 minutes soft limit + task_time_limit=28800, # 8 hour hard limit + task_soft_time_limit=28200, # 7 hours 50 minutes soft limit # Result backend settings result_expires=86400, # Results expire after 24 hours result_extended=True, diff --git a/surfsense_backend/app/connectors/notion_history.py b/surfsense_backend/app/connectors/notion_history.py index 1f29a5968..81f6642f1 100644 --- a/surfsense_backend/app/connectors/notion_history.py +++ b/surfsense_backend/app/connectors/notion_history.py @@ -58,10 +58,23 @@ class NotionHistoryConnector: "timestamp": "last_edited_time", } - # First, get a list of all pages the integration has access to - search_results = await self.notion.search(**search_params) + # Paginate through all pages the integration has access to + pages = [] + has_more = True + cursor = None + + while has_more: + if cursor: + search_params["start_cursor"] = cursor + + search_results = await self.notion.search(**search_params) + + pages.extend(search_results["results"]) + has_more = search_results.get("has_more", False) + + if has_more: + cursor = search_results.get("next_cursor") - pages = search_results["results"] all_page_data = [] for page in pages: diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py index b670391eb..cf6824db8 100644 --- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py @@ -390,6 +390,13 @@ async def index_airtable_records( f"Successfully indexed new Airtable record {summary_content}" ) + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Airtable records processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing the Airtable record {record.get('id', 'Unknown')}: {e!s}", @@ -408,7 +415,10 @@ async def index_airtable_records( session, connector, update_last_indexed ) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info( + f"Final commit: Total {documents_indexed} Airtable records processed" + ) await session.commit() logger.info( "Successfully committed all Airtable document changes to database" diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py index 4c057946b..97fdbb6be 100644 --- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py @@ -354,6 +354,13 @@ async def index_clickup_tasks( documents_indexed += 1 logger.info(f"Successfully indexed new task {task_name}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} ClickUp tasks processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing task {task.get('name', 'Unknown')}: {e!s}", @@ -366,6 +373,8 @@ async def index_clickup_tasks( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} ClickUp tasks processed") await session.commit() await task_logger.log_task_success( diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index afdbdd177..c148e0879 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -368,6 +368,13 @@ async def index_confluence_pages( documents_indexed += 1 logger.info(f"Successfully indexed new page {page_title}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Confluence pages processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing page {page.get('title', 'Unknown')}: {e!s}", @@ -384,7 +391,10 @@ async def index_confluence_pages( if update_last_indexed: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info( + f"Final commit: Total {documents_indexed} Confluence pages processed" + ) await session.commit() logger.info( "Successfully committed all Confluence document changes to database" diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py index b08a36132..5aa56aa3f 100644 --- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py @@ -462,6 +462,13 @@ async def index_discord_messages( f"Successfully indexed new channel {guild_name}#{channel_name} with {len(formatted_messages)} messages" ) + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Discord channels processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing guild {guild_name}: {e!s}", exc_info=True @@ -476,6 +483,10 @@ async def index_discord_messages( if documents_indexed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) + # Final commit for any remaining documents not yet committed in batches + logger.info( + f"Final commit: Total {documents_indexed} Discord channels processed" + ) await session.commit() # Prepare result message diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py index 8cd8ca299..95897c29b 100644 --- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py @@ -381,13 +381,21 @@ async def index_github_repos( session.add(document) documents_processed += 1 + # Batch commit every 10 documents + if documents_processed % 10 == 0: + logger.info( + f"Committing batch: {documents_processed} GitHub files processed so far" + ) + await session.commit() + except Exception as repo_err: logger.error( f"Failed to process repository {repo_full_name}: {repo_err}" ) errors.append(f"Failed processing {repo_full_name}: {repo_err}") - # Commit all changes at the end + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_processed} GitHub files processed") await session.commit() logger.info( f"Finished GitHub indexing for connector {connector_id}. Processed {documents_processed} files." diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index b7d8e0b59..d1effd8fb 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -407,6 +407,13 @@ async def index_google_calendar_events( documents_indexed += 1 logger.info(f"Successfully indexed new event {event_summary}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Google Calendar events processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing event {event.get('summary', 'Unknown')}: {e!s}", @@ -422,6 +429,10 @@ async def index_google_calendar_events( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) + # Final commit for any remaining documents not yet committed in batches + logger.info( + f"Final commit: Total {documents_indexed} Google Calendar events processed" + ) await session.commit() await task_logger.log_task_success( diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index 7e821dc9f..e92967527 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -324,6 +324,13 @@ async def index_google_gmail_messages( documents_indexed += 1 logger.info(f"Successfully indexed new email {summary_content}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Gmail messages processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing the email {message_id}: {e!s}", @@ -338,7 +345,8 @@ async def index_google_gmail_messages( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} Gmail messages processed") await session.commit() logger.info( "Successfully committed all Google gmail document changes to database" diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py index 36e09c81e..7347e61ca 100644 --- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py @@ -352,6 +352,13 @@ async def index_jira_issues( f"Successfully indexed new issue {issue_identifier} - {issue_title}" ) + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Jira issues processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}", @@ -368,7 +375,8 @@ async def index_jira_issues( if update_last_indexed: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} Jira issues processed") await session.commit() logger.info("Successfully committed all JIRA document changes to database") diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py index 33d5835ee..6100c3cd8 100644 --- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py @@ -371,6 +371,13 @@ async def index_linear_issues( f"Successfully indexed new issue {issue_identifier} - {issue_title}" ) + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Linear issues processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}", @@ -387,7 +394,8 @@ async def index_linear_issues( if update_last_indexed: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} Linear issues processed") await session.commit() logger.info("Successfully committed all Linear document changes to database") diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index 15588afaa..a05328bb9 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -438,6 +438,13 @@ async def index_luma_events( documents_indexed += 1 logger.info(f"Successfully indexed new event {event_name}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Luma events processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing event {event.get('name', 'Unknown')}: {e!s}", @@ -453,6 +460,8 @@ async def index_luma_events( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} Luma events processed") await session.commit() await task_logger.log_task_success( diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index 699d2fddf..8bac0c3ce 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -356,6 +356,14 @@ async def index_notion_pages( documents_indexed += 1 logger.info(f"Successfully updated Notion page: {page_title}") + + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} documents processed so far" + ) + await session.commit() + continue # Document doesn't exist - create new one @@ -406,6 +414,13 @@ async def index_notion_pages( documents_indexed += 1 logger.info(f"Successfully indexed new Notion page: {page_title}") + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} documents processed so far" + ) + await session.commit() + except Exception as e: logger.error( f"Error processing Notion page {page.get('title', 'Unknown')}: {e!s}", @@ -423,7 +438,8 @@ async def index_notion_pages( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} documents processed") await session.commit() # Prepare result message diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py index dd9edcc8d..735125834 100644 --- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py @@ -353,6 +353,14 @@ async def index_slack_messages( session.add(document) documents_indexed += 1 + + # Batch commit every 10 documents + if documents_indexed % 10 == 0: + logger.info( + f"Committing batch: {documents_indexed} Slack channels processed so far" + ) + await session.commit() + logger.info( f"Successfully indexed new channel {channel_name} with {len(formatted_messages)} messages" ) @@ -376,7 +384,8 @@ async def index_slack_messages( if total_processed > 0: await update_connector_last_indexed(session, connector, update_last_indexed) - # Commit all changes + # Final commit for any remaining documents not yet committed in batches + logger.info(f"Final commit: Total {documents_indexed} Slack channels processed") await session.commit() # Prepare result message diff --git a/surfsense_web/app/(home)/login/GoogleLoginButton.tsx b/surfsense_web/app/(home)/login/GoogleLoginButton.tsx index 4148141b9..3c8aaeb77 100644 --- a/surfsense_web/app/(home)/login/GoogleLoginButton.tsx +++ b/surfsense_web/app/(home)/login/GoogleLoginButton.tsx @@ -36,7 +36,7 @@ export function GoogleLoginButton() {

{t("welcome_back")}

-{/* + {/* { - +

{space.name}

diff --git a/surfsense_web/components/chat/ChatInputGroup.tsx b/surfsense_web/components/chat/ChatInputGroup.tsx index 9dce08d51..010e71cd7 100644 --- a/surfsense_web/components/chat/ChatInputGroup.tsx +++ b/surfsense_web/components/chat/ChatInputGroup.tsx @@ -26,6 +26,7 @@ import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; import { useDocumentTypes } from "@/hooks/use-document-types"; import type { Document } from "@/hooks/use-documents"; import { useLLMConfigs, useLLMPreferences } from "@/hooks/use-llm-configs"; +import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; const DocumentSelector = React.memo( ({ @@ -119,14 +120,31 @@ const ConnectorSelector = React.memo( true ); + // Fetch live search connectors (non-indexable) + const { + connectors: searchConnectors, + isLoading: connectorsLoading, + isLoaded: connectorsLoaded, + fetchConnectors, + } = useSearchSourceConnectors(true, Number(search_space_id)); + + // Filter for non-indexable connectors (live search) + const liveSearchConnectors = React.useMemo( + () => searchConnectors.filter((connector) => !connector.is_indexable), + [searchConnectors] + ); + const handleOpenChange = useCallback( (open: boolean) => { setIsOpen(open); if (open && !isLoaded) { fetchDocumentTypes(Number(search_space_id)); } + if (open && !connectorsLoaded) { + fetchConnectors(Number(search_space_id)); + } }, - [fetchDocumentTypes, isLoaded, search_space_id] + [fetchDocumentTypes, isLoaded, fetchConnectors, connectorsLoaded, search_space_id] ); const handleConnectorToggle = useCallback( @@ -141,14 +159,18 @@ const ConnectorSelector = React.memo( ); const handleSelectAll = useCallback(() => { - onSelectionChange?.(documentTypes.map((dt) => dt.type)); - }, [documentTypes, onSelectionChange]); + const allTypes = [ + ...documentTypes.map((dt) => dt.type), + ...liveSearchConnectors.map((c) => c.connector_type), + ]; + onSelectionChange?.(allTypes); + }, [documentTypes, liveSearchConnectors, onSelectionChange]); const handleClearAll = useCallback(() => { onSelectionChange?.([]); }, [onSelectionChange]); - // Get display name for document type + // Get display name for connector type const getDisplayName = (type: string) => { return type .split("_") @@ -158,6 +180,13 @@ const ConnectorSelector = React.memo( // Get selected document types with their counts const selectedDocTypes = documentTypes.filter((dt) => selectedConnectors.includes(dt.type)); + const selectedLiveConnectors = liveSearchConnectors.filter((c) => + selectedConnectors.includes(c.connector_type) + ); + + // Total selected count + const totalSelectedCount = selectedDocTypes.length + selectedLiveConnectors.length; + const totalAvailableCount = documentTypes.length + liveSearchConnectors.length; return ( @@ -168,10 +197,10 @@ const ConnectorSelector = React.memo( className="relative h-9 gap-2 px-3 border-dashed hover:border-solid hover:bg-accent/50 transition-all" >
- {selectedDocTypes.length > 0 ? ( + {totalSelectedCount > 0 ? ( <>
- {selectedDocTypes.slice(0, 3).map((docType) => ( + {selectedDocTypes.slice(0, 2).map((docType) => (
))} + {selectedLiveConnectors + .slice(0, 3 - selectedDocTypes.slice(0, 2).length) + .map((connector) => ( +
+ {getConnectorIcon(connector.connector_type, "h-3 w-3")} +
+ ))}
- {selectedDocTypes.length} {selectedDocTypes.length === 1 ? "source" : "sources"} + {totalSelectedCount} {totalSelectedCount === 1 ? "source" : "sources"} ) : ( @@ -194,99 +233,167 @@ const ConnectorSelector = React.memo( - -
+ +
- Select Document Types + Select Sources - Choose which document types to include in your search + Choose indexed document types and live search connectors to include in your search
- {/* Document type selection grid */} -
- {isLoading ? ( -
-
+ {isLoading || connectorsLoading ? ( +
+
+
+ ) : totalAvailableCount === 0 ? ( +
+
+
- ) : documentTypes.length === 0 ? ( -
-
- -
-

No documents found

-

- Add documents to this search space to enable filtering by type -

-
- ) : ( - documentTypes.map((docType) => { - const isSelected = selectedConnectors.includes(docType.type); +

No sources found

+

+ Add documents or configure search connectors for this search space +

+
+ ) : ( + <> + {/* Live Search Connectors Section */} + {liveSearchConnectors.length > 0 && ( +
+
+ +

Live Search Connectors

+ + Real-time + +
+
+ {liveSearchConnectors.map((connector) => { + const isSelected = selectedConnectors.includes(connector.connector_type); - return ( -
-

- {docType.count} {docType.count === 1 ? "document" : "documents"} -

-
- - ); - }) - )} -
+
+
+

{connector.name}

+ {isSelected && ( +
+ +
+ )} +
+

+ {getDisplayName(connector.connector_type)} +

+
+ + ); + })} +
+
+ )} - {documentTypes.length > 0 && ( - - - - + {/* Document Types Section */} + {documentTypes.length > 0 && ( +
+
+ +

Indexed Document Types

+ + Stored + +
+
+ {documentTypes.map((docType) => { + const isSelected = selectedConnectors.includes(docType.type); + + return ( + + ); + })} +
+
+ )} + )}
+ + {totalAvailableCount > 0 && ( + + + + + )}
); @@ -483,7 +590,6 @@ const LLMSelector = React.memo(() => {

- ))} diff --git a/surfsense_web/components/sidebar/app-sidebar.tsx b/surfsense_web/components/sidebar/app-sidebar.tsx index 243f2c062..399ab8373 100644 --- a/surfsense_web/components/sidebar/app-sidebar.tsx +++ b/surfsense_web/components/sidebar/app-sidebar.tsx @@ -1,8 +1,5 @@ "use client"; -import Link from "next/link"; -import Image from "next/image"; - import { AlertCircle, BookOpen, @@ -20,6 +17,8 @@ import { Trash2, Undo2, } from "lucide-react"; +import Image from "next/image"; +import Link from "next/link"; import { memo, useMemo } from "react"; import { Logo } from "@/components/Logo"; @@ -36,7 +35,6 @@ import { SidebarMenuItem, } from "@/components/ui/sidebar"; - // Map of icon names to their components export const iconMap: Record = { BookOpen, @@ -218,21 +216,21 @@ export const AppSidebar = memo(function AppSidebar({ - -
- SurfSense logo -
-
- SurfSense - beta v0.0.8 -
- + +
+ SurfSense logo +
+
+ SurfSense + beta v0.0.8 +
+