From 5e555a8f9a36eba414d35d3883f8c68430f8f17c Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sat, 31 Jan 2026 16:24:43 +0530 Subject: [PATCH 01/15] fix: improve notification for token expiration and revocation errors for multiple connectors --- .../connectors/google_drive/credentials.py | 9 +++++++ .../routes/airtable_add_connector_route.py | 13 +++++++++ .../app/routes/clickup_add_connector_route.py | 11 ++++++++ .../routes/confluence_add_connector_route.py | 13 +++++++++ .../app/routes/jira_add_connector_route.py | 13 +++++++++ .../app/routes/linear_add_connector_route.py | 13 +++++++++ .../app/routes/notion_add_connector_route.py | 13 +++++++++ .../app/routes/slack_add_connector_route.py | 27 +++++++++++++++++++ .../app/routes/teams_add_connector_route.py | 13 +++++++++ 9 files changed, 125 insertions(+) diff --git a/surfsense_backend/app/connectors/google_drive/credentials.py b/surfsense_backend/app/connectors/google_drive/credentials.py index 7e6335f6d..5b1900ab2 100644 --- a/surfsense_backend/app/connectors/google_drive/credentials.py +++ b/surfsense_backend/app/connectors/google_drive/credentials.py @@ -132,6 +132,15 @@ async def get_valid_credentials( await session.commit() except Exception as e: + error_str = str(e) + # Check if this is an invalid_grant error (token expired/revoked) + if ( + "invalid_grant" in error_str.lower() + or "token has been expired or revoked" in error_str.lower() + ): + raise Exception( + "Google Drive authentication failed. Please re-authenticate." + ) from e raise Exception(f"Failed to refresh Google OAuth credentials: {e!s}") from e return credentials diff --git a/surfsense_backend/app/routes/airtable_add_connector_route.py b/surfsense_backend/app/routes/airtable_add_connector_route.py index 64fa104d8..423d61fb2 100644 --- a/surfsense_backend/app/routes/airtable_add_connector_route.py +++ b/surfsense_backend/app/routes/airtable_add_connector_route.py @@ -442,11 +442,24 @@ async def refresh_airtable_token( if token_response.status_code != 200: error_detail = token_response.text + error_code = "" try: error_json = token_response.json() error_detail = error_json.get("error_description", error_detail) + error_code = error_json.get("error", "") except Exception: pass + # Check if this is a token expiration/revocation error + error_lower = (error_detail + error_code).lower() + if ( + "invalid_grant" in error_lower + or "expired" in error_lower + or "revoked" in error_lower + ): + raise HTTPException( + status_code=401, + detail="Airtable authentication failed. Please re-authenticate.", + ) raise HTTPException( status_code=400, detail=f"Token refresh failed: {error_detail}" ) diff --git a/surfsense_backend/app/routes/clickup_add_connector_route.py b/surfsense_backend/app/routes/clickup_add_connector_route.py index f962f65fb..1b2e6795d 100644 --- a/surfsense_backend/app/routes/clickup_add_connector_route.py +++ b/surfsense_backend/app/routes/clickup_add_connector_route.py @@ -417,6 +417,17 @@ async def refresh_clickup_token( error_detail = error_json.get("error", error_detail) except Exception: pass + # Check if this is a token expiration/revocation error + error_lower = error_detail.lower() + if ( + "invalid_grant" in error_lower + or "expired" in error_lower + or "revoked" in error_lower + ): + raise HTTPException( + status_code=401, + detail="ClickUp authentication failed. Please re-authenticate.", + ) raise HTTPException( status_code=400, detail=f"Token refresh failed: {error_detail}" ) diff --git a/surfsense_backend/app/routes/confluence_add_connector_route.py b/surfsense_backend/app/routes/confluence_add_connector_route.py index 6c5830b17..24e0f858a 100644 --- a/surfsense_backend/app/routes/confluence_add_connector_route.py +++ b/surfsense_backend/app/routes/confluence_add_connector_route.py @@ -428,13 +428,26 @@ async def refresh_confluence_token( if token_response.status_code != 200: error_detail = token_response.text + error_code = "" try: error_json = token_response.json() error_detail = error_json.get( "error_description", error_json.get("error", error_detail) ) + error_code = error_json.get("error", "") except Exception: pass + # Check if this is a token expiration/revocation error + error_lower = (error_detail + error_code).lower() + if ( + "invalid_grant" in error_lower + or "expired" in error_lower + or "revoked" in error_lower + ): + raise HTTPException( + status_code=401, + detail="Confluence authentication failed. Please re-authenticate.", + ) raise HTTPException( status_code=400, detail=f"Token refresh failed: {error_detail}" ) diff --git a/surfsense_backend/app/routes/jira_add_connector_route.py b/surfsense_backend/app/routes/jira_add_connector_route.py index fb66f4da7..58903606a 100644 --- a/surfsense_backend/app/routes/jira_add_connector_route.py +++ b/surfsense_backend/app/routes/jira_add_connector_route.py @@ -446,13 +446,26 @@ async def refresh_jira_token( if token_response.status_code != 200: error_detail = token_response.text + error_code = "" try: error_json = token_response.json() error_detail = error_json.get( "error_description", error_json.get("error", error_detail) ) + error_code = error_json.get("error", "") except Exception: pass + # Check if this is a token expiration/revocation error + error_lower = (error_detail + error_code).lower() + if ( + "invalid_grant" in error_lower + or "expired" in error_lower + or "revoked" in error_lower + ): + raise HTTPException( + status_code=401, + detail="Jira authentication failed. Please re-authenticate.", + ) raise HTTPException( status_code=400, detail=f"Token refresh failed: {error_detail}" ) diff --git a/surfsense_backend/app/routes/linear_add_connector_route.py b/surfsense_backend/app/routes/linear_add_connector_route.py index fc9501bfb..dd5f7443c 100644 --- a/surfsense_backend/app/routes/linear_add_connector_route.py +++ b/surfsense_backend/app/routes/linear_add_connector_route.py @@ -403,11 +403,24 @@ async def refresh_linear_token( if token_response.status_code != 200: error_detail = token_response.text + error_code = "" try: error_json = token_response.json() error_detail = error_json.get("error_description", error_detail) + error_code = error_json.get("error", "") except Exception: pass + # Check if this is a token expiration/revocation error + error_lower = (error_detail + error_code).lower() + if ( + "invalid_grant" in error_lower + or "expired" in error_lower + or "revoked" in error_lower + ): + raise HTTPException( + status_code=401, + detail="Linear authentication failed. Please re-authenticate.", + ) raise HTTPException( status_code=400, detail=f"Token refresh failed: {error_detail}" ) diff --git a/surfsense_backend/app/routes/notion_add_connector_route.py b/surfsense_backend/app/routes/notion_add_connector_route.py index aac821793..81017af50 100644 --- a/surfsense_backend/app/routes/notion_add_connector_route.py +++ b/surfsense_backend/app/routes/notion_add_connector_route.py @@ -407,11 +407,24 @@ async def refresh_notion_token( if token_response.status_code != 200: error_detail = token_response.text + error_code = "" try: error_json = token_response.json() error_detail = error_json.get("error_description", error_detail) + error_code = error_json.get("error", "") except Exception: pass + # Check if this is a token expiration/revocation error + error_lower = (error_detail + error_code).lower() + if ( + "invalid_grant" in error_lower + or "expired" in error_lower + or "revoked" in error_lower + ): + raise HTTPException( + status_code=401, + detail="Notion authentication failed. Please re-authenticate.", + ) raise HTTPException( status_code=400, detail=f"Token refresh failed: {error_detail}" ) diff --git a/surfsense_backend/app/routes/slack_add_connector_route.py b/surfsense_backend/app/routes/slack_add_connector_route.py index 62d2ccaaa..e7f19e8b0 100644 --- a/surfsense_backend/app/routes/slack_add_connector_route.py +++ b/surfsense_backend/app/routes/slack_add_connector_route.py @@ -418,6 +418,19 @@ async def refresh_slack_token( error_detail = error_json.get("error", error_detail) except Exception: pass + # Check if this is a token expiration/revocation error + error_lower = error_detail.lower() + if ( + "invalid_grant" in error_lower + or "invalid_auth" in error_lower + or "token_revoked" in error_lower + or "expired" in error_lower + or "revoked" in error_lower + ): + raise HTTPException( + status_code=401, + detail="Slack authentication failed. Please re-authenticate.", + ) raise HTTPException( status_code=400, detail=f"Token refresh failed: {error_detail}" ) @@ -427,6 +440,20 @@ async def refresh_slack_token( # Slack OAuth v2 returns success status in the JSON if not token_json.get("ok", False): error_msg = token_json.get("error", "Unknown error") + # Check if this is a token expiration/revocation error + error_lower = error_msg.lower() + if ( + "invalid_grant" in error_lower + or "invalid_auth" in error_lower + or "invalid_refresh_token" in error_lower + or "token_revoked" in error_lower + or "expired" in error_lower + or "revoked" in error_lower + ): + raise HTTPException( + status_code=401, + detail="Slack authentication failed. Please re-authenticate.", + ) raise HTTPException( status_code=400, detail=f"Slack OAuth refresh error: {error_msg}" ) diff --git a/surfsense_backend/app/routes/teams_add_connector_route.py b/surfsense_backend/app/routes/teams_add_connector_route.py index 9ce84e171..77ce4965e 100644 --- a/surfsense_backend/app/routes/teams_add_connector_route.py +++ b/surfsense_backend/app/routes/teams_add_connector_route.py @@ -420,11 +420,24 @@ async def refresh_teams_token( if token_response.status_code != 200: error_detail = token_response.text + error_code = "" try: error_json = token_response.json() error_detail = error_json.get("error_description", error_detail) + error_code = error_json.get("error", "") except Exception: pass + # Check if this is a token expiration/revocation error + error_lower = (error_detail + error_code).lower() + if ( + "invalid_grant" in error_lower + or "expired" in error_lower + or "revoked" in error_lower + ): + raise HTTPException( + status_code=401, + detail="Microsoft Teams authentication failed. Please re-authenticate.", + ) raise HTTPException( status_code=400, detail=f"Token refresh failed: {error_detail}" ) From eaf0a454b1bb048d430a243f3cde0d94eb2c7628 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sat, 31 Jan 2026 17:07:57 +0530 Subject: [PATCH 02/15] refactor: remove chat button from collapsed sidebar for cleaner UI --- .../components/layout/ui/sidebar/Sidebar.tsx | 23 ++----------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx index 4a587cd58..070462341 100644 --- a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx @@ -1,6 +1,6 @@ "use client"; -import { FolderOpen, MessageSquare, PenSquare } from "lucide-react"; +import { FolderOpen, PenSquare } from "lucide-react"; import { useTranslations } from "next-intl"; import { Button } from "@/components/ui/button"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; @@ -122,26 +122,7 @@ export function Sidebar({ {/* Chat sections - fills available space */} {isCollapsed ? ( -
- {(chats.length > 0 || sharedChats.length > 0) && ( - - - - - - {t("chats")} ({chats.length + sharedChats.length}) - - - )} -
+
) : (
{/* Shared Chats Section - takes half the space */} From 59dd9554b35628290fe03c8b32ebaec37f59aabd Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:30:50 +0530 Subject: [PATCH 03/15] feat: add endpoint to fetch Slack channels with bot membership status and update UI to display channels --- .../app/routes/slack_add_connector_route.py | 92 ++++++++++ .../components/slack-config.tsx | 167 +++++++++++++++++- .../contracts/types/connector.types.ts | 22 +++ .../lib/apis/connectors-api.service.ts | 18 ++ 4 files changed, 296 insertions(+), 3 deletions(-) diff --git a/surfsense_backend/app/routes/slack_add_connector_route.py b/surfsense_backend/app/routes/slack_add_connector_route.py index e7f19e8b0..66ba1cd41 100644 --- a/surfsense_backend/app/routes/slack_add_connector_route.py +++ b/surfsense_backend/app/routes/slack_add_connector_route.py @@ -6,6 +6,7 @@ Handles OAuth 2.0 authentication flow for Slack connector. import logging from datetime import UTC, datetime, timedelta +from typing import Any from uuid import UUID import httpx @@ -14,6 +15,7 @@ from fastapi.responses import RedirectResponse from pydantic import ValidationError from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select from app.config import config from app.db import ( @@ -517,3 +519,93 @@ async def refresh_slack_token( raise HTTPException( status_code=500, detail=f"Failed to refresh Slack token: {e!s}" ) from e + + +@router.get("/slack/connector/{connector_id}/channels") +async def get_slack_channels( + connector_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +) -> list[dict[str, Any]]: + """ + Get list of Slack channels with bot membership status. + + This endpoint fetches all channels the bot can see and indicates + whether the bot is a member of each channel (required for accessing messages). + + Args: + connector_id: The Slack connector ID + session: Database session + user: Current authenticated user + + Returns: + List of channels with id, name, is_private, and is_member fields + """ + try: + # Get the connector and verify ownership + result = await session.execute( + select(SearchSourceConnector).where( + SearchSourceConnector.id == connector_id, + SearchSourceConnector.user_id == user.id, + SearchSourceConnector.connector_type == SearchSourceConnectorType.SLACK_CONNECTOR, + ) + ) + connector = result.scalar_one_or_none() + + if not connector: + raise HTTPException( + status_code=404, + detail="Slack connector not found or access denied", + ) + + # Get credentials and decrypt bot token + credentials = SlackAuthCredentialsBase.from_dict(connector.config) + token_encryption = get_token_encryption() + is_encrypted = connector.config.get("_token_encrypted", False) + + bot_token = credentials.bot_token + if is_encrypted and bot_token: + try: + bot_token = token_encryption.decrypt_token(bot_token) + except Exception as e: + logger.error(f"Failed to decrypt bot token: {e!s}") + raise HTTPException( + status_code=500, detail="Failed to decrypt stored bot token" + ) from e + + if not bot_token: + raise HTTPException( + status_code=400, + detail="No bot token available. Please re-authenticate.", + ) + + # Import SlackHistory here to avoid circular imports + from app.connectors.slack_history import SlackHistory + + # Create Slack client and fetch channels + slack_client = SlackHistory( + session=session, + connector_id=connector_id, + credentials=credentials, + ) + # Set the decrypted token directly + slack_client.set_token(bot_token) + + channels = await slack_client.get_all_channels(include_private=True) + + logger.info( + f"Fetched {len(channels)} channels for Slack connector {connector_id}" + ) + + return channels + + except HTTPException: + raise + except Exception as e: + logger.error( + f"Failed to get Slack channels for connector {connector_id}: {e!s}", + exc_info=True, + ) + raise HTTPException( + status_code=500, detail=f"Failed to get Slack channels: {e!s}" + ) from e diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/slack-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/slack-config.tsx index 58293c4de..3af3e564e 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/slack-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/slack-config.tsx @@ -1,16 +1,79 @@ "use client"; -import { Info } from "lucide-react"; -import type { FC } from "react"; +import { AlertCircle, CheckCircle2, Hash, Info, Lock, RefreshCw } from "lucide-react"; +import { type FC, useCallback, useEffect, useState } from "react"; +import { Button } from "@/components/ui/button"; +import { Spinner } from "@/components/ui/spinner"; +import { connectorsApiService, type SlackChannel } from "@/lib/apis/connectors-api.service"; +import { cn } from "@/lib/utils"; import type { ConnectorConfigProps } from "../index"; export interface SlackConfigProps extends ConnectorConfigProps { onNameChange?: (name: string) => void; } -export const SlackConfig: FC = () => { +export const SlackConfig: FC = ({ connector }) => { + const [channels, setChannels] = useState([]); + const [isLoading, setIsLoading] = useState(false); + const [error, setError] = useState(null); + const [lastFetched, setLastFetched] = useState(null); + + const fetchChannels = useCallback(async () => { + if (!connector?.id) return; + + setIsLoading(true); + setError(null); + + try { + const data = await connectorsApiService.getSlackChannels(connector.id); + setChannels(data); + setLastFetched(new Date()); + } catch (err) { + console.error("Failed to fetch Slack channels:", err); + setError(err instanceof Error ? err.message : "Failed to fetch channels"); + } finally { + setIsLoading(false); + } + }, [connector?.id]); + + // Fetch channels on mount + useEffect(() => { + fetchChannels(); + }, [fetchChannels]); + + // Auto-refresh when user returns to tab + useEffect(() => { + const handleVisibilityChange = () => { + if (document.visibilityState === "visible" && connector?.id) { + fetchChannels(); + } + }; + + document.addEventListener("visibilitychange", handleVisibilityChange); + return () => document.removeEventListener("visibilitychange", handleVisibilityChange); + }, [connector?.id, fetchChannels]); + + // Separate channels by bot membership + const channelsWithBot = channels.filter((ch) => ch.is_member); + const channelsWithoutBot = channels.filter((ch) => !ch.is_member); + + // Format last fetched time + const formatLastFetched = () => { + if (!lastFetched) return null; + const now = new Date(); + const diffMs = now.getTime() - lastFetched.getTime(); + const diffSecs = Math.floor(diffMs / 1000); + const diffMins = Math.floor(diffSecs / 60); + + if (diffSecs < 60) return "just now"; + if (diffMins === 1) return "1 minute ago"; + if (diffMins < 60) return `${diffMins} minutes ago`; + return lastFetched.toLocaleTimeString(); + }; + return (
+ {/* Info box */}
@@ -25,6 +88,104 @@ export const SlackConfig: FC = () => {

+ + {/* Channels Section */} +
+
+
+

Channel Access

+
+
+ {lastFetched && ( + {formatLastFetched()} + )} + +
+
+ + {error && ( +
+ {error} +
+ )} + + {isLoading && channels.length === 0 ? ( +
+ + Loading channels +
+ ) : channels.length === 0 && !error ? ( +
+ No channels found. Make sure the bot has been added to your Slack workspace. +
+ ) : ( +
+ {/* Channels with bot access */} + {channelsWithBot.length > 0 && ( +
0 && "border-b border-border")}> +
+ + Ready to index + + {channelsWithBot.length}{" "} + {channelsWithBot.length === 1 ? "channel" : "channels"} + +
+
+ {channelsWithBot.map((channel) => ( + + ))} +
+
+ )} + + {/* Channels without bot access */} + {channelsWithoutBot.length > 0 && ( +
+
+ + Add bot to index + + {channelsWithoutBot.length}{" "} + {channelsWithoutBot.length === 1 ? "channel" : "channels"} + +
+
+ {channelsWithoutBot.map((channel) => ( + + ))} +
+
+ )} +
+ )} +
+
+ ); +}; + +interface ChannelPillProps { + channel: SlackChannel; +} + +const ChannelPill: FC = ({ channel }) => { + return ( +
+ {channel.is_private ? ( + + ) : ( + + )} + {channel.name}
); }; diff --git a/surfsense_web/contracts/types/connector.types.ts b/surfsense_web/contracts/types/connector.types.ts index 5082fe49c..05efa51d2 100644 --- a/surfsense_web/contracts/types/connector.types.ts +++ b/surfsense_web/contracts/types/connector.types.ts @@ -203,6 +203,25 @@ export const listGoogleDriveFoldersResponse = z.object({ items: z.array(googleDriveItem), }); +/** + * Slack channel with bot membership status + */ +export const slackChannel = z.object({ + id: z.string(), + name: z.string(), + is_private: z.boolean(), + is_member: z.boolean(), +}); + +/** + * List Slack channels + */ +export const listSlackChannelsRequest = z.object({ + connector_id: z.number(), +}); + +export const listSlackChannelsResponse = z.array(slackChannel); + // Inferred types export type SearchSourceConnectorType = z.infer; export type SearchSourceConnector = z.infer; @@ -223,3 +242,6 @@ export type ListGitHubRepositoriesResponse = z.infer; export type ListGoogleDriveFoldersResponse = z.infer; export type GoogleDriveItem = z.infer; +export type SlackChannel = z.infer; +export type ListSlackChannelsRequest = z.infer; +export type ListSlackChannelsResponse = z.infer; diff --git a/surfsense_web/lib/apis/connectors-api.service.ts b/surfsense_web/lib/apis/connectors-api.service.ts index 10e08dc71..75e5a938a 100644 --- a/surfsense_web/lib/apis/connectors-api.service.ts +++ b/surfsense_web/lib/apis/connectors-api.service.ts @@ -20,6 +20,8 @@ import { listGitHubRepositoriesResponse, listGoogleDriveFoldersRequest, listGoogleDriveFoldersResponse, + listSlackChannelsResponse, + type SlackChannel, type UpdateConnectorRequest, updateConnectorRequest, updateConnectorResponse, @@ -335,6 +337,22 @@ class ConnectorsApiService { } ); }; + + // ============================================================================= + // Slack Connector Methods + // ============================================================================= + + /** + * Get Slack channels with bot membership status + */ + getSlackChannels = async (connectorId: number) => { + return baseApiService.get( + `/api/v1/slack/connector/${connectorId}/channels`, + listSlackChannelsResponse + ); + }; } +export type { SlackChannel }; + export const connectorsApiService = new ConnectorsApiService(); From ca7e45405ca4f650cbe2874e6caf411f2d4f1b87 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sat, 31 Jan 2026 23:15:00 +0530 Subject: [PATCH 04/15] feat: implement relative date formatting for last indexed timestamps in connector views --- .../components/date-range-selector.tsx | 20 ++++------- .../views/connector-accounts-list-view.tsx | 36 ++----------------- surfsense_web/lib/format-date.ts | 25 +++++++++++++ 3 files changed, 33 insertions(+), 48 deletions(-) create mode 100644 surfsense_web/lib/format-date.ts diff --git a/surfsense_web/components/assistant-ui/connector-popup/components/date-range-selector.tsx b/surfsense_web/components/assistant-ui/connector-popup/components/date-range-selector.tsx index 7490aa959..9d7044bb4 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/components/date-range-selector.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/components/date-range-selector.tsx @@ -7,6 +7,7 @@ import { Button } from "@/components/ui/button"; import { Calendar } from "@/components/ui/calendar"; import { Label } from "@/components/ui/label"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { formatRelativeDate } from "@/lib/format-date"; import { cn } from "@/lib/utils"; interface DateRangeSelectorProps { @@ -26,19 +27,10 @@ export const DateRangeSelector: FC = ({ allowFutureDates = false, lastIndexedAt, }) => { - // Get the placeholder text for start date based on whether connector was previously indexed - const getStartDatePlaceholder = () => { - if (lastIndexedAt) { - const date = new Date(lastIndexedAt); - const currentYear = new Date().getFullYear(); - const indexedYear = date.getFullYear(); - // Show year only if different from current year - const formatStr = indexedYear === currentYear ? "MMM d, HH:mm" : "MMM d, yyyy HH:mm"; - const formattedDate = format(date, formatStr); - return `Since (${formattedDate})`; - } - return "Default (1 year ago)"; - }; + const startDatePlaceholder = lastIndexedAt + ? `From ${formatRelativeDate(lastIndexedAt)}` + : "Default (1 year)"; + const handleLast30Days = () => { const today = new Date(); onStartDateChange(subDays(today, 30)); @@ -88,7 +80,7 @@ export const DateRangeSelector: FC = ({ )} > - {startDate ? format(startDate, "PPP") : getStartDatePlaceholder()} + {startDate ? format(startDate, "PPP") : startDatePlaceholder} diff --git a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx index bf151ab43..2f0e0eb5e 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx @@ -1,6 +1,5 @@ "use client"; -import { differenceInDays, differenceInMinutes, format, isToday, isYesterday } from "date-fns"; import { ArrowLeft, Plus, Server } from "lucide-react"; import type { FC } from "react"; import { Button } from "@/components/ui/button"; @@ -8,6 +7,7 @@ import { Spinner } from "@/components/ui/spinner"; import { EnumConnectorName } from "@/contracts/enums/connector"; import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; import type { SearchSourceConnector } from "@/contracts/types/connector.types"; +import { formatRelativeDate } from "@/lib/format-date"; import { cn } from "@/lib/utils"; import { useConnectorStatus } from "../hooks/use-connector-status"; import { getConnectorDisplayName } from "../tabs/all-connectors-tab"; @@ -32,38 +32,6 @@ function isIndexableConnector(connectorType: string): boolean { return !nonIndexableTypes.includes(connectorType); } -/** - * Format last indexed date with contextual messages - */ -function formatLastIndexedDate(dateString: string): string { - const date = new Date(dateString); - const now = new Date(); - const minutesAgo = differenceInMinutes(now, date); - const daysAgo = differenceInDays(now, date); - - if (minutesAgo < 1) { - return "Just now"; - } - - if (minutesAgo < 60) { - return `${minutesAgo} ${minutesAgo === 1 ? "minute" : "minutes"} ago`; - } - - if (isToday(date)) { - return `Today at ${format(date, "h:mm a")}`; - } - - if (isYesterday(date)) { - return `Yesterday at ${format(date, "h:mm a")}`; - } - - if (daysAgo < 7) { - return `${daysAgo} ${daysAgo === 1 ? "day" : "days"} ago`; - } - - return format(date, "MMM d, yyyy"); -} - export const ConnectorAccountsListView: FC = ({ connectorType, connectorTitle, @@ -215,7 +183,7 @@ export const ConnectorAccountsListView: FC = ({

{isIndexableConnector(connector.connector_type) ? connector.last_indexed_at - ? `Last indexed: ${formatLastIndexedDate(connector.last_indexed_at)}` + ? `Last indexed: ${formatRelativeDate(connector.last_indexed_at)}` : "Never indexed" : "Active"}

diff --git a/surfsense_web/lib/format-date.ts b/surfsense_web/lib/format-date.ts new file mode 100644 index 000000000..c7d8ca85e --- /dev/null +++ b/surfsense_web/lib/format-date.ts @@ -0,0 +1,25 @@ +import { differenceInDays, differenceInMinutes, format, isToday, isYesterday } from "date-fns"; + +/** + * Format a date string as a human-readable relative time + * - < 1 min: "Just now" + * - < 60 min: "15m ago" + * - Today: "Today, 2:30 PM" + * - Yesterday: "Yesterday, 2:30 PM" + * - < 7 days: "3d ago" + * - Older: "Jan 15, 2026" + */ +export function formatRelativeDate(dateString: string): string { + const date = new Date(dateString); + const now = new Date(); + const minutesAgo = differenceInMinutes(now, date); + const daysAgo = differenceInDays(now, date); + + if (minutesAgo < 1) return "Just now"; + if (minutesAgo < 60) return `${minutesAgo}m ago`; + if (isToday(date)) return `Today, ${format(date, "h:mm a")}`; + if (isYesterday(date)) return `Yesterday, ${format(date, "h:mm a")}`; + if (daysAgo < 7) return `${daysAgo}d ago`; + return format(date, "MMM d, yyyy"); +} + From e5f7e87f42bb1cb1a1f7157130fcee8be4910aa1 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sat, 31 Jan 2026 23:33:20 +0530 Subject: [PATCH 05/15] refactor: update sidebar layout for shared and private chats to optimize space usage --- .../components/layout/ui/sidebar/Sidebar.tsx | 11 ++++++----- .../layout/ui/sidebar/SidebarSection.tsx | 18 +++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx index 070462341..db04bf6dc 100644 --- a/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/Sidebar.tsx @@ -125,11 +125,12 @@ export function Sidebar({
) : (
- {/* Shared Chats Section - takes half the space */} + {/* Shared Chats Section - takes only space needed, max 50% */} @@ -151,9 +152,9 @@ export function Sidebar({ } > {sharedChats.length > 0 ? ( -
+
4 ? "pb-8" : ""}`} + className={`flex flex-col gap-0.5 max-h-full overflow-y-auto scrollbar-thin scrollbar-thumb-muted-foreground/20 scrollbar-track-transparent ${sharedChats.length > 4 ? "pb-8" : ""}`} > {sharedChats.slice(0, 20).map((chat) => ( - {/* Private Chats Section - takes half the space */} + {/* Private Chats Section - fills remaining space */}
@@ -56,15 +56,15 @@ export function SidebarSection({ )}
- +
-
- {children} -
- + {children} +
+
); } From 024a683b4feff66e8d7c6bd6b1ef45885979f866 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sun, 1 Feb 2026 02:17:06 +0530 Subject: [PATCH 06/15] feat: add heartbeat callback support for long-running indexing tasks and implement stale notification cleanup task --- surfsense_backend/app/celery_app.py | 11 ++ .../connectors/composio_gmail_connector.py | 15 ++ .../composio_google_calendar_connector.py | 14 ++ .../composio_google_drive_connector.py | 27 ++++ .../routes/search_source_connectors_routes.py | 75 +++++++++- .../stale_notification_cleanup_task.py | 141 ++++++++++++++++++ .../app/tasks/composio_indexer.py | 7 + .../connector_indexers/airtable_indexer.py | 24 +++ .../connector_indexers/bookstack_indexer.py | 17 +++ .../connector_indexers/clickup_indexer.py | 18 +++ .../connector_indexers/confluence_indexer.py | 17 +++ .../connector_indexers/discord_indexer.py | 18 +++ .../elasticsearch_indexer.py | 18 +++ .../connector_indexers/github_indexer.py | 18 +++ .../google_calendar_indexer.py | 17 +++ .../google_drive_indexer.py | 28 ++++ .../google_gmail_indexer.py | 18 +++ .../tasks/connector_indexers/jira_indexer.py | 17 +++ .../connector_indexers/linear_indexer.py | 18 +++ .../tasks/connector_indexers/luma_indexer.py | 17 +++ .../connector_indexers/notion_indexer.py | 19 +++ .../connector_indexers/obsidian_indexer.py | 17 +++ .../tasks/connector_indexers/slack_indexer.py | 18 +++ .../tasks/connector_indexers/teams_indexer.py | 19 +++ .../connector_indexers/webcrawler_indexer.py | 17 +++ .../hooks/use-indexing-connectors.ts | 64 +++++++- surfsense_web/lib/electric/client.ts | 3 +- 27 files changed, 685 insertions(+), 7 deletions(-) create mode 100644 surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index 8858c2619..b77f5698e 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -79,6 +79,7 @@ celery_app = Celery( "app.tasks.celery_tasks.schedule_checker_task", "app.tasks.celery_tasks.blocknote_migration_tasks", "app.tasks.celery_tasks.document_reindex_tasks", + "app.tasks.celery_tasks.stale_notification_cleanup_task", ], ) @@ -121,4 +122,14 @@ celery_app.conf.beat_schedule = { "expires": 30, # Task expires after 30 seconds if not picked up }, }, + # Cleanup stale connector indexing notifications every 5 minutes + # This detects tasks that crashed or timed out without proper cleanup + # and marks their notifications as failed so users don't see perpetual "syncing" + "cleanup-stale-indexing-notifications": { + "task": "cleanup_stale_indexing_notifications", + "schedule": crontab(minute="*/5"), # Every 5 minutes + "options": { + "expires": 60, # Task expires after 60 seconds if not picked up + }, + }, } diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py index 953e2e8fc..9bb1197b8 100644 --- a/surfsense_backend/app/connectors/composio_gmail_connector.py +++ b/surfsense_backend/app/connectors/composio_gmail_connector.py @@ -5,9 +5,15 @@ Provides Gmail specific methods for data retrieval and indexing via Composio. """ import logging +import time +from collections.abc import Awaitable, Callable from datetime import UTC, datetime from typing import Any +# Heartbeat configuration +HeartbeatCallbackType = Callable[[int], Awaitable[None]] +HEARTBEAT_INTERVAL_SECONDS = 30 + from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.orm import selectinload @@ -427,6 +433,7 @@ async def index_composio_gmail( log_entry, update_last_indexed: bool = True, max_items: int = 1000, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str]: """Index Gmail messages via Composio with pagination and incremental processing.""" try: @@ -471,8 +478,16 @@ async def index_composio_gmail( total_documents_skipped = 0 total_messages_fetched = 0 result_size_estimate = None # Will be set from first API response + last_heartbeat_time = time.time() while total_messages_fetched < max_items: + # Send heartbeat periodically to indicate task is still alive + if on_heartbeat_callback: + current_time = time.time() + if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(total_documents_indexed) + last_heartbeat_time = current_time + # Calculate how many messages to fetch in this batch remaining = max_items - total_messages_fetched current_batch_size = min(batch_size, remaining) diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py index ec5b22b7f..669543210 100644 --- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py +++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py @@ -5,9 +5,15 @@ Provides Google Calendar specific methods for data retrieval and indexing via Co """ import logging +import time +from collections.abc import Awaitable, Callable from datetime import UTC, datetime from typing import Any +# Heartbeat configuration +HeartbeatCallbackType = Callable[[int], Awaitable[None]] +HEARTBEAT_INTERVAL_SECONDS = 30 + from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.orm import selectinload @@ -191,6 +197,7 @@ async def index_composio_google_calendar( log_entry, update_last_indexed: bool = True, max_items: int = 2500, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str]: """Index Google Calendar events via Composio.""" try: @@ -262,8 +269,15 @@ async def index_composio_google_calendar( duplicate_content_count = ( 0 # Track events skipped due to duplicate content_hash ) + last_heartbeat_time = time.time() for event in events: + # Send heartbeat periodically to indicate task is still alive + if on_heartbeat_callback: + current_time = time.time() + if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = current_time try: # Handle both standard Google API and potential Composio variations event_id = event.get("id", "") or event.get("eventId", "") diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py index 5b8c4b993..debbced20 100644 --- a/surfsense_backend/app/connectors/composio_google_drive_connector.py +++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py @@ -9,10 +9,16 @@ import json import logging import os import tempfile +import time +from collections.abc import Awaitable, Callable from datetime import UTC, datetime from pathlib import Path from typing import Any +# Heartbeat configuration +HeartbeatCallbackType = Callable[[int], Awaitable[None]] +HEARTBEAT_INTERVAL_SECONDS = 30 + from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm.attributes import flag_modified @@ -565,6 +571,7 @@ async def index_composio_google_drive( log_entry, update_last_indexed: bool = True, max_items: int = 1000, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, int, str | None]: """Index Google Drive files via Composio with delta sync support. @@ -652,6 +659,7 @@ async def index_composio_google_drive( max_items=max_items, task_logger=task_logger, log_entry=log_entry, + on_heartbeat_callback=on_heartbeat_callback, ) else: logger.info( @@ -684,6 +692,7 @@ async def index_composio_google_drive( max_items=max_items, task_logger=task_logger, log_entry=log_entry, + on_heartbeat_callback=on_heartbeat_callback, ) # Get new page token for next sync (always update after successful sync) @@ -765,6 +774,7 @@ async def _index_composio_drive_delta_sync( max_items: int, task_logger: TaskLoggingService, log_entry, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, int, list[str]]: """Index Google Drive files using delta sync (only changed files). @@ -774,6 +784,7 @@ async def _index_composio_drive_delta_sync( documents_indexed = 0 documents_skipped = 0 processing_errors = [] + last_heartbeat_time = time.time() # Fetch all changes with pagination all_changes = [] @@ -804,6 +815,13 @@ async def _index_composio_drive_delta_sync( logger.info(f"Processing {len(all_changes)} changes from delta sync") for change in all_changes[:max_items]: + # Send heartbeat periodically to indicate task is still alive + if on_heartbeat_callback: + current_time = time.time() + if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = current_time + try: # Handle removed files is_removed = change.get("removed", False) @@ -886,11 +904,13 @@ async def _index_composio_drive_full_scan( max_items: int, task_logger: TaskLoggingService, log_entry, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, int, list[str]]: """Index Google Drive files using full scan (first sync or when no delta token).""" documents_indexed = 0 documents_skipped = 0 processing_errors = [] + last_heartbeat_time = time.time() all_files = [] @@ -1001,6 +1021,13 @@ async def _index_composio_drive_full_scan( ) for file_info in all_files: + # Send heartbeat periodically to indicate task is still alive + if on_heartbeat_callback: + current_time = time.time() + if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = current_time + try: # Handle both standard Google API and potential Composio variations file_id = file_info.get("id", "") or file_info.get("fileId", "") diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index a27c2125c..678bf73c0 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -1137,6 +1137,7 @@ async def run_slack_indexing( end_date=end_date, indexing_function=index_slack_messages, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -1150,6 +1151,7 @@ async def _run_indexing_with_notifications( indexing_function, update_timestamp_func=None, supports_retry_callback: bool = False, + supports_heartbeat_callback: bool = False, ): """ Generic helper to run indexing with real-time notifications. @@ -1164,11 +1166,13 @@ async def _run_indexing_with_notifications( indexing_function: Async function that performs the indexing update_timestamp_func: Optional function to update connector timestamp supports_retry_callback: Whether the indexing function supports on_retry_callback + supports_heartbeat_callback: Whether the indexing function supports on_heartbeat_callback """ + from celery.exceptions import SoftTimeLimitExceeded from uuid import UUID notification = None - # Track indexed count for retry notifications + # Track indexed count for retry notifications and heartbeat current_indexed_count = 0 try: @@ -1227,6 +1231,27 @@ async def _run_indexing_with_notifications( # Don't let notification errors break the indexing logger.warning(f"Failed to update retry notification: {e}") + # Create heartbeat callback for connectors that support it + # This updates the notification periodically during long-running indexing loops + # to prevent the task from appearing stuck if the worker crashes + async def on_heartbeat_callback(indexed_count: int) -> None: + """Callback to update notification during indexing (heartbeat).""" + nonlocal notification, current_indexed_count + current_indexed_count = indexed_count + if notification: + try: + await session.refresh(notification) + await NotificationService.connector_indexing.notify_indexing_progress( + session=session, + notification=notification, + indexed_count=indexed_count, + stage="processing", + ) + await session.commit() + except Exception as e: + # Don't let notification errors break the indexing + logger.warning(f"Failed to update heartbeat notification: {e}") + # Build kwargs for indexing function indexing_kwargs = { "session": session, @@ -1242,6 +1267,10 @@ async def _run_indexing_with_notifications( if supports_retry_callback: indexing_kwargs["on_retry_callback"] = on_retry_callback + # Add heartbeat callback for connectors that support it + if supports_heartbeat_callback: + indexing_kwargs["on_heartbeat_callback"] = on_heartbeat_callback + # Run the indexing function # Some indexers return (indexed, error), others return (indexed, skipped, error) result = await indexing_function(**indexing_kwargs) @@ -1398,6 +1427,30 @@ async def _run_indexing_with_notifications( await ( session.commit() ) # Commit to ensure Electric SQL syncs the notification update + except SoftTimeLimitExceeded: + # Celery soft time limit was reached - task is about to be killed + # Gracefully save progress and mark as interrupted + logger.warning( + f"Soft time limit reached for connector {connector_id}. " + f"Saving partial progress: {current_indexed_count} items indexed." + ) + + if notification: + try: + await session.refresh(notification) + await NotificationService.connector_indexing.notify_indexing_completed( + session=session, + notification=notification, + indexed_count=current_indexed_count, + error_message="Time limit reached. Partial sync completed. Please run again for remaining items.", + is_warning=True, # Mark as warning since partial data was indexed + ) + await session.commit() + except Exception as notif_error: + logger.error(f"Failed to update notification on soft timeout: {notif_error!s}") + + # Re-raise so Celery knows the task was terminated + raise except Exception as e: logger.error(f"Error in indexing task: {e!s}", exc_info=True) @@ -1409,7 +1462,7 @@ async def _run_indexing_with_notifications( await NotificationService.connector_indexing.notify_indexing_completed( session=session, notification=notification, - indexed_count=0, + indexed_count=current_indexed_count, # Use tracked count, not 0 error_message=str(e), skipped_count=None, # Unknown on exception ) @@ -1439,6 +1492,7 @@ async def run_notion_indexing_with_new_session( indexing_function=index_notion_pages, update_timestamp_func=_update_connector_timestamp_by_id, supports_retry_callback=True, # Notion connector supports retry notifications + supports_heartbeat_callback=True, # Notion connector supports heartbeat notifications ) @@ -1471,6 +1525,7 @@ async def run_notion_indexing( indexing_function=index_notion_pages, update_timestamp_func=_update_connector_timestamp_by_id, supports_retry_callback=True, # Notion connector supports retry notifications + supports_heartbeat_callback=True, # Notion connector supports heartbeat notifications ) @@ -1521,6 +1576,7 @@ async def run_github_indexing( end_date=end_date, indexing_function=index_github_repos, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -1571,6 +1627,7 @@ async def run_linear_indexing( end_date=end_date, indexing_function=index_linear_issues, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -1620,6 +1677,7 @@ async def run_discord_indexing( end_date=end_date, indexing_function=index_discord_messages, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -1670,6 +1728,7 @@ async def run_teams_indexing( end_date=end_date, indexing_function=index_teams_messages, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -1720,6 +1779,7 @@ async def run_jira_indexing( end_date=end_date, indexing_function=index_jira_issues, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -1772,6 +1832,7 @@ async def run_confluence_indexing( end_date=end_date, indexing_function=index_confluence_pages, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -1822,6 +1883,7 @@ async def run_clickup_indexing( end_date=end_date, indexing_function=index_clickup_tasks, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -1872,6 +1934,7 @@ async def run_airtable_indexing( end_date=end_date, indexing_function=index_airtable_records, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -1924,6 +1987,7 @@ async def run_google_calendar_indexing( end_date=end_date, indexing_function=index_google_calendar_events, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -1998,6 +2062,7 @@ async def run_google_gmail_indexing( end_date=end_date, indexing_function=gmail_indexing_wrapper, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -2206,6 +2271,7 @@ async def run_luma_indexing( end_date=end_date, indexing_function=index_luma_events, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -2257,6 +2323,7 @@ async def run_elasticsearch_indexing( end_date=end_date, indexing_function=index_elasticsearch_documents, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -2306,6 +2373,7 @@ async def run_web_page_indexing( end_date=end_date, indexing_function=index_crawled_urls, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -2360,6 +2428,7 @@ async def run_bookstack_indexing( end_date=end_date, indexing_function=index_bookstack_pages, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -2412,6 +2481,7 @@ async def run_obsidian_indexing( end_date=end_date, indexing_function=index_obsidian_vault, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) @@ -2465,6 +2535,7 @@ async def run_composio_indexing( end_date=end_date, indexing_function=index_composio_connector, update_timestamp_func=_update_connector_timestamp_by_id, + supports_heartbeat_callback=True, ) diff --git a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py new file mode 100644 index 000000000..ff162f70f --- /dev/null +++ b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py @@ -0,0 +1,141 @@ +"""Celery task to detect and mark stale connector indexing notifications as failed. + +This task runs periodically (every 5 minutes by default) to find notifications +that are stuck in "in_progress" status but haven't received a heartbeat update +in the configured timeout period. These are marked as "failed" to prevent the +frontend from showing a perpetual "syncing" state. +""" + +import logging +from datetime import UTC, datetime, timedelta + +from sqlalchemy import and_, update +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine +from sqlalchemy.future import select +from sqlalchemy.orm.attributes import flag_modified +from sqlalchemy.pool import NullPool + +from app.celery_app import celery_app +from app.config import config +from app.db import Notification + +logger = logging.getLogger(__name__) + +# Timeout in minutes - notifications without heartbeat for this long are marked as failed +# Should be longer than HEARTBEAT_INTERVAL_SECONDS (30s) * a reasonable number of missed heartbeats +# 5 minutes = 10 missed heartbeats, which is a reasonable threshold +STALE_NOTIFICATION_TIMEOUT_MINUTES = 5 + + +def get_celery_session_maker(): + """Create async session maker for Celery tasks.""" + engine = create_async_engine( + config.DATABASE_URL, + poolclass=NullPool, + echo=False, + ) + return async_sessionmaker(engine, expire_on_commit=False) + + +@celery_app.task(name="cleanup_stale_indexing_notifications") +def cleanup_stale_indexing_notifications_task(): + """ + Check for stale connector indexing notifications and mark them as failed. + + This task finds notifications that: + - Have type = 'connector_indexing' + - Have metadata.status = 'in_progress' + - Have updated_at older than STALE_NOTIFICATION_TIMEOUT_MINUTES + + And marks them as failed with an appropriate error message. + """ + import asyncio + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + try: + loop.run_until_complete(_cleanup_stale_notifications()) + finally: + loop.close() + + +async def _cleanup_stale_notifications(): + """Find and mark stale connector indexing notifications as failed.""" + async with get_celery_session_maker()() as session: + try: + # Calculate the cutoff time + cutoff_time = datetime.now(UTC) - timedelta( + minutes=STALE_NOTIFICATION_TIMEOUT_MINUTES + ) + + # Find stale notifications: + # - type = 'connector_indexing' + # - metadata->>'status' = 'in_progress' + # - updated_at < cutoff_time + result = await session.execute( + select(Notification).filter( + and_( + Notification.type == "connector_indexing", + Notification.notification_metadata["status"].astext == "in_progress", + Notification.updated_at < cutoff_time, + ) + ) + ) + stale_notifications = result.scalars().all() + + if not stale_notifications: + logger.debug("No stale connector indexing notifications found") + return + + logger.warning( + f"Found {len(stale_notifications)} stale connector indexing notifications " + f"(no heartbeat for >{STALE_NOTIFICATION_TIMEOUT_MINUTES} minutes)" + ) + + # Mark each stale notification as failed + for notification in stale_notifications: + try: + # Get current indexed count from metadata if available + indexed_count = notification.notification_metadata.get("indexed_count", 0) + connector_name = notification.notification_metadata.get("connector_name", "Unknown") + + # Calculate how long it's been stale + stale_duration = datetime.now(UTC) - notification.updated_at + stale_minutes = int(stale_duration.total_seconds() / 60) + + # Update notification metadata + notification.notification_metadata["status"] = "failed" + notification.notification_metadata["completed_at"] = datetime.now(UTC).isoformat() + notification.notification_metadata["error_message"] = ( + f"Indexing task appears to have crashed or timed out. " + f"No activity detected for {stale_minutes} minutes. " + f"Please try syncing again." + ) + + # Flag the JSONB column as modified for SQLAlchemy to detect the change + flag_modified(notification, "notification_metadata") + + logger.info( + f"Marking notification {notification.id} for connector '{connector_name}' as failed " + f"(stale for {stale_minutes} minutes, indexed {indexed_count} items before failure)" + ) + + except Exception as e: + logger.error( + f"Error marking notification {notification.id} as failed: {e!s}", + exc_info=True, + ) + continue + + # Commit all changes + await session.commit() + logger.info( + f"Successfully marked {len(stale_notifications)} stale notifications as failed" + ) + + except Exception as e: + logger.error(f"Error cleaning up stale notifications: {e!s}", exc_info=True) + await session.rollback() + diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py index ffc4a1f27..49764fd98 100644 --- a/surfsense_backend/app/tasks/composio_indexer.py +++ b/surfsense_backend/app/tasks/composio_indexer.py @@ -9,8 +9,12 @@ to avoid circular import issues with the connector_indexers package. """ import logging +from collections.abc import Awaitable, Callable from importlib import import_module +# Type alias for heartbeat callback function +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select @@ -86,6 +90,7 @@ async def index_composio_connector( end_date: str | None = None, update_last_indexed: bool = True, max_items: int = 1000, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, int, str | None]: """ Index content from a Composio connector. @@ -102,6 +107,7 @@ async def index_composio_connector( end_date: End date for filtering (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp max_items: Maximum number of items to fetch + on_heartbeat_callback: Optional callback to report progress for heartbeat updates Returns: Tuple of (number_of_indexed_items, number_of_skipped_items, error_message or None) @@ -180,6 +186,7 @@ async def index_composio_connector( "log_entry": log_entry, "update_last_indexed": update_last_indexed, "max_items": max_items, + "on_heartbeat_callback": on_heartbeat_callback, } # Add date params for toolkits that support them diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py index 6bb62d716..ab9e5d678 100644 --- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py @@ -2,6 +2,9 @@ Airtable connector indexer. """ +import time +from collections.abc import Awaitable, Callable + from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession @@ -17,6 +20,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -37,6 +46,7 @@ async def index_airtable_records( end_date: str | None = None, max_records: int = 2500, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Airtable records for a given connector. @@ -50,6 +60,7 @@ async def index_airtable_records( end_date: End date for filtering records (YYYY-MM-DD) max_records: Maximum number of records to fetch per table update_last_indexed: Whether to update the last_indexed_at timestamp + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple of (number_of_documents_processed, error_message) @@ -127,8 +138,16 @@ async def index_airtable_records( logger.info(f"Found {len(bases)} Airtable bases to process") + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + total_documents_indexed = 0 + # Process each base for base in bases: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(total_documents_indexed) + last_heartbeat_time = time.time() base_id = base.get("id") base_name = base.get("name", "Unknown Base") @@ -204,6 +223,11 @@ async def index_airtable_records( documents_skipped = 0 # Process each record for record in records: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(total_documents_indexed) + last_heartbeat_time = time.time() + try: # Generate markdown content markdown_content = ( diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py index e183ab333..90232809c 100644 --- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py @@ -2,6 +2,8 @@ BookStack connector indexer. """ +import time +from collections.abc import Awaitable, Callable from datetime import datetime from sqlalchemy.exc import SQLAlchemyError @@ -19,6 +21,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -38,6 +46,7 @@ async def index_bookstack_pages( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index BookStack pages. @@ -50,6 +59,7 @@ async def index_bookstack_pages( start_date: Start date for indexing (YYYY-MM-DD format) end_date: End date for indexing (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple containing (number of documents indexed, error message or None) @@ -179,7 +189,14 @@ async def index_bookstack_pages( skipped_pages = [] documents_skipped = 0 + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + for page in pages: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() try: page_id = page.get("id") page_name = page.get("name", "") diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py index 887c3e2e5..2b95b6a11 100644 --- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py @@ -3,6 +3,8 @@ ClickUp connector indexer. """ import contextlib +import time +from collections.abc import Awaitable, Callable from datetime import datetime from sqlalchemy.exc import SQLAlchemyError @@ -20,6 +22,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -38,6 +46,7 @@ async def index_clickup_tasks( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index tasks from ClickUp workspace. @@ -50,6 +59,7 @@ async def index_clickup_tasks( start_date: Start date for filtering tasks (YYYY-MM-DD format) end_date: End date for filtering tasks (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple of (number of indexed tasks, error message if any) @@ -132,6 +142,9 @@ async def index_clickup_tasks( documents_indexed = 0 documents_skipped = 0 + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + # Iterate workspaces and fetch tasks for workspace in workspaces: workspace_id = workspace.get("id") @@ -170,6 +183,11 @@ async def index_clickup_tasks( ) for task in tasks: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() + try: task_id = task.get("id") task_name = task.get("name", "Untitled Task") diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index 5673839bb..078aacf86 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -3,6 +3,8 @@ Confluence connector indexer. """ import contextlib +import time +from collections.abc import Awaitable, Callable from datetime import datetime from sqlalchemy.exc import SQLAlchemyError @@ -20,6 +22,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -39,6 +47,7 @@ async def index_confluence_pages( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Confluence pages and comments. @@ -51,6 +60,7 @@ async def index_confluence_pages( start_date: Start date for indexing (YYYY-MM-DD format) end_date: End date for indexing (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple containing (number of documents indexed, error message or None) @@ -175,7 +185,14 @@ async def index_confluence_pages( skipped_pages = [] documents_skipped = 0 + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + for page in pages: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() try: page_id = page.get("id") page_title = page.get("title", "") diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py index 9e401b335..4bbeff125 100644 --- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py @@ -3,6 +3,8 @@ Discord connector indexer. """ import asyncio +import time +from collections.abc import Awaitable, Callable from datetime import UTC, datetime, timedelta from sqlalchemy.exc import SQLAlchemyError @@ -28,6 +30,12 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_discord_messages( session: AsyncSession, @@ -37,6 +45,7 @@ async def index_discord_messages( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Discord messages from all accessible channels. @@ -49,6 +58,8 @@ async def index_discord_messages( start_date: Start date for indexing (YYYY-MM-DD format) end_date: End date for indexing (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. + Called periodically with (indexed_count) to prevent task appearing stuck. Returns: Tuple containing (number of documents indexed, error message or None) @@ -281,6 +292,9 @@ async def index_discord_messages( documents_skipped = 0 skipped_channels: list[str] = [] + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + # Process each guild and channel await task_logger.log_task_progress( log_entry, @@ -290,6 +304,10 @@ async def index_discord_messages( try: for guild in guilds: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() guild_id = guild["id"] guild_name = guild["name"] logger.info(f"Processing guild: {guild_name} ({guild_id})") diff --git a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py index 6a18af83b..49d82df0e 100644 --- a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py @@ -4,6 +4,8 @@ Elasticsearch indexer for SurfSense import json import logging +import time +from collections.abc import Awaitable, Callable from datetime import UTC, datetime from typing import Any @@ -19,6 +21,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -36,6 +44,7 @@ async def index_elasticsearch_documents( start_date: str, end_date: str, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index documents from Elasticsearch into SurfSense @@ -48,6 +57,7 @@ async def index_elasticsearch_documents( start_date: Start date for indexing (not used for Elasticsearch, kept for compatibility) end_date: End date for indexing (not used for Elasticsearch, kept for compatibility) update_last_indexed: Whether to update the last indexed timestamp + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple of (number of documents processed, error message if any) @@ -155,6 +165,9 @@ async def index_elasticsearch_documents( documents_processed = 0 + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + try: await task_logger.log_task_progress( log_entry, @@ -172,6 +185,11 @@ async def index_elasticsearch_documents( size=min(max_documents, 100), # Scroll in batches fields=config.get("ELASTICSEARCH_FIELDS"), ): + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_processed) + last_heartbeat_time = time.time() + if documents_processed >= max_documents: break diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py index fb6989bb9..75e7f516c 100644 --- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py @@ -5,6 +5,8 @@ This indexer processes entire repository digests in one pass, dramatically reducing LLM API calls compared to the previous file-by-file approach. """ +import time +from collections.abc import Awaitable, Callable from datetime import UTC, datetime from sqlalchemy.exc import SQLAlchemyError @@ -22,6 +24,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -43,6 +51,7 @@ async def index_github_repos( start_date: str | None = None, # Ignored - GitHub indexes full repo snapshots end_date: str | None = None, # Ignored - GitHub indexes full repo snapshots update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index GitHub repositories using gitingest for efficient processing. @@ -62,6 +71,7 @@ async def index_github_repos( start_date: Ignored - kept for API compatibility end_date: Ignored - kept for API compatibility update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple containing (number of documents indexed, error message or None) @@ -168,7 +178,15 @@ async def index_github_repos( f"Starting gitingest indexing for {len(repo_full_names_to_index)} repositories." ) + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + documents_indexed = 0 + for repo_full_name in repo_full_names_to_index: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() if not repo_full_name or not isinstance(repo_full_name, str): logger.warning(f"Skipping invalid repository entry: {repo_full_name}") continue diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index 81d33b5e2..cef2e15f1 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -2,6 +2,8 @@ Google Calendar connector indexer. """ +import time +from collections.abc import Awaitable, Callable from datetime import datetime, timedelta import pytz @@ -21,6 +23,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -39,6 +47,7 @@ async def index_google_calendar_events( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Google Calendar events. @@ -52,6 +61,7 @@ async def index_google_calendar_events( end_date: End date for indexing (YYYY-MM-DD format). Can be in the future to index upcoming events. Defaults to today if not provided. update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple containing (number of documents indexed, error message or None) @@ -281,7 +291,14 @@ async def index_google_calendar_events( 0 # Track events skipped due to duplicate content_hash ) + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + for event in events: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() try: event_id = event.get("id") event_summary = event.get("summary", "No Title") diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py index f50e149d3..98df68cd1 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py @@ -1,6 +1,8 @@ """Google Drive indexer using Surfsense file processors.""" import logging +import time +from collections.abc import Awaitable, Callable from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession @@ -24,6 +26,12 @@ from app.tasks.connector_indexers.base import ( ) from app.utils.document_converters import generate_unique_identifier_hash +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + logger = logging.getLogger(__name__) @@ -38,6 +46,7 @@ async def index_google_drive_files( update_last_indexed: bool = True, max_files: int = 500, include_subfolders: bool = False, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Google Drive files for a specific connector. @@ -53,6 +62,7 @@ async def index_google_drive_files( update_last_indexed: Whether to update last_indexed_at timestamp max_files: Maximum number of files to index include_subfolders: Whether to recursively index files in subfolders + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple of (number_of_indexed_files, error_message) @@ -147,6 +157,7 @@ async def index_google_drive_files( log_entry=log_entry, max_files=max_files, include_subfolders=include_subfolders, + on_heartbeat_callback=on_heartbeat_callback, ) else: logger.info(f"Using full scan for connector {connector_id}") @@ -163,6 +174,7 @@ async def index_google_drive_files( log_entry=log_entry, max_files=max_files, include_subfolders=include_subfolders, + on_heartbeat_callback=on_heartbeat_callback, ) documents_indexed, documents_skipped = result @@ -383,6 +395,7 @@ async def _index_full_scan( log_entry: any, max_files: int, include_subfolders: bool = False, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, int]: """Perform full scan indexing of a folder.""" await task_logger.log_task_progress( @@ -399,10 +412,17 @@ async def _index_full_scan( documents_skipped = 0 files_processed = 0 + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + # Queue of folders to process: (folder_id, folder_name) folders_to_process = [(folder_id, folder_name)] while folders_to_process and files_processed < max_files: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() current_folder_id, current_folder_name = folders_to_process.pop(0) logger.info(f"Processing folder: {current_folder_name} ({current_folder_id})") page_token = None @@ -485,6 +505,7 @@ async def _index_with_delta_sync( log_entry: any, max_files: int, include_subfolders: bool = False, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, int]: """Perform delta sync indexing using change tracking. @@ -515,7 +536,14 @@ async def _index_with_delta_sync( documents_skipped = 0 files_processed = 0 + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + for change in changes: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() if files_processed >= max_files: break diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index e599abd22..34e5a9530 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -2,6 +2,8 @@ Google Gmail connector indexer. """ +import time +from collections.abc import Awaitable, Callable from datetime import datetime from google.oauth2.credentials import Credentials @@ -23,6 +25,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -43,6 +51,7 @@ async def index_google_gmail_messages( end_date: str | None = None, update_last_indexed: bool = True, max_messages: int = 1000, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str]: """ Index Gmail messages for a specific connector. @@ -56,6 +65,7 @@ async def index_google_gmail_messages( end_date: End date for filtering messages (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) max_messages: Maximum number of messages to fetch (default: 100) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple of (number_of_indexed_messages, status_message) @@ -212,7 +222,15 @@ async def index_google_gmail_messages( documents_indexed = 0 skipped_messages = [] documents_skipped = 0 + + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + for message in messages: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() try: # Extract message information message_id = message.get("id", "") diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py index d6095d20e..ab36ae7d0 100644 --- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py @@ -3,6 +3,8 @@ Jira connector indexer. """ import contextlib +import time +from collections.abc import Awaitable, Callable from datetime import datetime from sqlalchemy.exc import SQLAlchemyError @@ -20,6 +22,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -39,6 +47,7 @@ async def index_jira_issues( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Jira issues and comments. @@ -51,6 +60,7 @@ async def index_jira_issues( start_date: Start date for indexing (YYYY-MM-DD format) end_date: End date for indexing (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple containing (number of documents indexed, error message or None) @@ -169,7 +179,14 @@ async def index_jira_issues( skipped_issues = [] documents_skipped = 0 + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + for issue in issues: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() try: issue_id = issue.get("key") issue_identifier = issue.get("key", "") diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py index d00a39160..549aa0224 100644 --- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py @@ -2,6 +2,8 @@ Linear connector indexer. """ +import time +from collections.abc import Awaitable, Callable from datetime import datetime from sqlalchemy.exc import SQLAlchemyError @@ -19,6 +21,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -38,6 +46,7 @@ async def index_linear_issues( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Linear issues and comments. @@ -50,6 +59,7 @@ async def index_linear_issues( start_date: Start date for indexing (YYYY-MM-DD format) end_date: End date for indexing (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple containing (number of documents indexed, error message or None) @@ -188,6 +198,9 @@ async def index_linear_issues( documents_skipped = 0 skipped_issues = [] + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + await task_logger.log_task_progress( log_entry, f"Starting to process {len(issues)} Linear issues", @@ -196,6 +209,11 @@ async def index_linear_issues( # Process each issue for issue in issues: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() + try: issue_id = issue.get("id", "") issue_identifier = issue.get("identifier", "") diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index 59890dbe4..22fd6d468 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -2,6 +2,8 @@ Luma connector indexer. """ +import time +from collections.abc import Awaitable, Callable from datetime import datetime, timedelta from sqlalchemy.exc import SQLAlchemyError @@ -19,6 +21,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -37,6 +45,7 @@ async def index_luma_events( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Luma events. @@ -50,6 +59,7 @@ async def index_luma_events( end_date: End date for indexing (YYYY-MM-DD format). Can be in the future to index upcoming events. Defaults to today if not provided. update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple containing (number of documents indexed, error message or None) @@ -221,7 +231,14 @@ async def index_luma_events( documents_skipped = 0 skipped_events = [] + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + for event in events: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() try: # Luma event structure fields - events have nested 'event' field event_data = event.get("event", {}) diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index a65bf84a7..88779db57 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -2,6 +2,7 @@ Notion connector indexer. """ +import time from collections.abc import Awaitable, Callable from datetime import datetime @@ -34,6 +35,13 @@ from .base import ( # Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds) -> None RetryCallbackType = Callable[[str, int, int, float], Awaitable[None]] +# Type alias for heartbeat callback +# Signature: async callback(indexed_count) -> None +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_notion_pages( session: AsyncSession, @@ -44,6 +52,7 @@ async def index_notion_pages( end_date: str | None = None, update_last_indexed: bool = True, on_retry_callback: RetryCallbackType | None = None, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Notion pages from all accessible pages. @@ -59,6 +68,8 @@ async def index_notion_pages( on_retry_callback: Optional callback for retry progress notifications. Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds) retry_reason is one of: 'rate_limit', 'server_error', 'timeout' + on_heartbeat_callback: Optional callback to update notification during long-running indexing. + Called periodically with (indexed_count) to prevent task appearing stuck. Returns: Tuple containing (number of documents indexed, error message or None) @@ -211,6 +222,9 @@ async def index_notion_pages( documents_skipped = 0 skipped_pages = [] + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + await task_logger.log_task_progress( log_entry, f"Starting to process {len(pages)} Notion pages", @@ -219,6 +233,11 @@ async def index_notion_pages( # Process each page for page in pages: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() + try: page_id = page.get("page_id") page_title = page.get("title", f"Untitled page ({page_id})") diff --git a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py index a603d3fba..48fa5f0d3 100644 --- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py @@ -7,6 +7,8 @@ This connector is only available in self-hosted mode. import os import re +import time +from collections.abc import Awaitable, Callable from datetime import UTC, datetime from pathlib import Path @@ -25,6 +27,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( build_document_metadata_string, check_document_by_unique_identifier, @@ -152,6 +160,7 @@ async def index_obsidian_vault( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index notes from a local Obsidian vault. @@ -167,6 +176,7 @@ async def index_obsidian_vault( start_date: Start date for filtering (YYYY-MM-DD format) - optional end_date: End date for filtering (YYYY-MM-DD format) - optional update_last_indexed: Whether to update the last_indexed_at timestamp + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple containing (number of documents indexed, error message or None) @@ -305,7 +315,14 @@ async def index_obsidian_vault( indexed_count = 0 skipped_count = 0 + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + for file_info in files: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(indexed_count) + last_heartbeat_time = time.time() try: file_path = file_info["path"] relative_path = file_info["relative_path"] diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py index f244c97f8..4ac87164c 100644 --- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py @@ -2,6 +2,8 @@ Slack connector indexer. """ +import time +from collections.abc import Awaitable, Callable from datetime import datetime from slack_sdk.errors import SlackApiError @@ -18,6 +20,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( build_document_metadata_markdown, calculate_date_range, @@ -38,6 +46,7 @@ async def index_slack_messages( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Slack messages from all accessible channels. @@ -50,6 +59,8 @@ async def index_slack_messages( start_date: Start date for indexing (YYYY-MM-DD format) end_date: End date for indexing (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. + Called periodically with (indexed_count) to prevent task appearing stuck. Returns: Tuple containing (number of documents indexed, error message or None) @@ -164,6 +175,9 @@ async def index_slack_messages( documents_skipped = 0 skipped_channels = [] + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + await task_logger.log_task_progress( log_entry, f"Starting to process {len(channels)} Slack channels", @@ -172,6 +186,10 @@ async def index_slack_messages( # Process each channel for channel_obj in channels: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() channel_id = channel_obj["id"] channel_name = channel_obj["name"] is_private = channel_obj["is_private"] diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py index 66b709ddc..55bb02ab9 100644 --- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py @@ -2,6 +2,8 @@ Microsoft Teams connector indexer. """ +import time +from collections.abc import Awaitable, Callable from datetime import UTC from sqlalchemy.exc import SQLAlchemyError @@ -17,6 +19,12 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( build_document_metadata_markdown, calculate_date_range, @@ -37,6 +45,7 @@ async def index_teams_messages( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index Microsoft Teams messages from all accessible teams and channels. @@ -49,6 +58,8 @@ async def index_teams_messages( start_date: Start date for indexing (YYYY-MM-DD format) end_date: End date for indexing (YYYY-MM-DD format) update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. + Called periodically with (indexed_count) to prevent task appearing stuck. Returns: Tuple containing (number of documents indexed, error message or None) @@ -161,6 +172,9 @@ async def index_teams_messages( documents_skipped = 0 skipped_channels = [] + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + await task_logger.log_task_progress( log_entry, f"Starting to process {len(teams)} Teams", @@ -185,6 +199,11 @@ async def index_teams_messages( # Process each team for team in teams: + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() + team_id = team.get("id") team_name = team.get("displayName", "Unknown Team") diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py index 0c63fd2f0..ae89b7513 100644 --- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py @@ -2,6 +2,8 @@ Webcrawler connector indexer. """ +import time +from collections.abc import Awaitable, Callable from datetime import datetime from sqlalchemy.exc import SQLAlchemyError @@ -20,6 +22,12 @@ from app.utils.document_converters import ( ) from app.utils.webcrawler_utils import parse_webcrawler_urls +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -38,6 +46,7 @@ async def index_crawled_urls( start_date: str | None = None, end_date: str | None = None, update_last_indexed: bool = True, + on_heartbeat_callback: HeartbeatCallbackType | None = None, ) -> tuple[int, str | None]: """ Index web page URLs. @@ -50,6 +59,7 @@ async def index_crawled_urls( start_date: Start date for filtering (YYYY-MM-DD format) - optional end_date: End date for filtering (YYYY-MM-DD format) - optional update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + on_heartbeat_callback: Optional callback to update notification during long-running indexing. Returns: Tuple containing (number of documents indexed, error message or None) @@ -140,7 +150,14 @@ async def index_crawled_urls( documents_skipped = 0 failed_urls = [] + # Heartbeat tracking - update notification periodically to prevent appearing stuck + last_heartbeat_time = time.time() + for idx, url in enumerate(urls, 1): + # Check if it's time for a heartbeat update + if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + await on_heartbeat_callback(documents_indexed) + last_heartbeat_time = time.time() try: logger.info(f"Processing URL {idx}/{len(urls)}: {url}") diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts index 19741e020..5783540d8 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-indexing-connectors.ts @@ -5,6 +5,44 @@ import type { SearchSourceConnector } from "@/contracts/types/connector.types"; import type { InboxItem } from "@/contracts/types/inbox.types"; import { isConnectorIndexingMetadata } from "@/contracts/types/inbox.types"; +/** + * Timeout thresholds for stuck task detection + * + * These align with the backend Celery configuration: + * - HARD_TIMEOUT: 8 hours (task_time_limit=28800 in Celery) + * Any task running longer than this is definitely dead. + * + * - STALE_THRESHOLD: 15 minutes without notification updates + * If heartbeats are being sent every 30s, missing 15+ minutes of updates + * indicates the task has likely crashed or the worker is down. + */ +const HARD_TIMEOUT_MS = 8 * 60 * 60 * 1000; // 8 hours in milliseconds +const STALE_THRESHOLD_MS = 15 * 60 * 1000; // 15 minutes in milliseconds + +/** + * Check if a notification is stale (no updates for too long) + * @param updatedAt - ISO timestamp of last notification update + * @returns true if the notification hasn't been updated recently + */ +function isNotificationStale(updatedAt: string | null | undefined): boolean { + if (!updatedAt) return false; + const lastUpdate = new Date(updatedAt).getTime(); + const now = Date.now(); + return now - lastUpdate > STALE_THRESHOLD_MS; +} + +/** + * Check if a task has exceeded the hard timeout (definitely dead) + * @param startedAt - ISO timestamp when the task started + * @returns true if the task has been running longer than the hard limit + */ +function isTaskTimedOut(startedAt: string | null | undefined): boolean { + if (!startedAt) return false; + const startTime = new Date(startedAt).getTime(); + const now = Date.now(); + return now - startTime > HARD_TIMEOUT_MS; +} + /** * Hook to track which connectors are currently indexing using local state. * @@ -13,6 +51,8 @@ import { isConnectorIndexingMetadata } from "@/contracts/types/inbox.types"; * 2. Detecting in_progress notifications from Electric SQL to restore state after remounts * 3. Clearing indexing state when notifications become completed or failed * 4. Clearing indexing state when Electric SQL detects last_indexed_at changed + * 5. Detecting stale/stuck tasks that haven't updated in 15+ minutes + * 6. Detecting hard timeout (8h) - tasks that definitely cannot still be running * * The actual `last_indexed_at` value comes from Electric SQL/PGlite, not local state. */ @@ -57,6 +97,7 @@ export function useIndexingConnectors( // Detect notification status changes and update indexing state accordingly // This restores spinner state after component remounts and handles all status transitions + // Also detects stale/stuck tasks that haven't been updated in a while useEffect(() => { if (!inboxItems || inboxItems.length === 0) return; @@ -71,11 +112,26 @@ export function useIndexingConnectors( const metadata = isConnectorIndexingMetadata(item.metadata) ? item.metadata : null; if (!metadata) continue; - // If status is "in_progress", add connector to indexing set + // If status is "in_progress", check if it's actually still running if (metadata.status === "in_progress") { - if (!newIndexingIds.has(metadata.connector_id)) { - newIndexingIds.add(metadata.connector_id); - hasChanges = true; + // Check for hard timeout (8h) - task is definitely dead + const timedOut = isTaskTimedOut(metadata.started_at); + + // Check for stale notification (15min without updates) - task likely crashed + const stale = isNotificationStale(item.updated_at); + + if (timedOut || stale) { + // Task is stuck - don't show as indexing + if (newIndexingIds.has(metadata.connector_id)) { + newIndexingIds.delete(metadata.connector_id); + hasChanges = true; + } + } else { + // Task appears to be genuinely running + if (!newIndexingIds.has(metadata.connector_id)) { + newIndexingIds.add(metadata.connector_id); + hasChanges = true; + } } } // If status is "completed" or "failed", remove connector from indexing set diff --git a/surfsense_web/lib/electric/client.ts b/surfsense_web/lib/electric/client.ts index d25e268be..177a66d28 100644 --- a/surfsense_web/lib/electric/client.ts +++ b/surfsense_web/lib/electric/client.ts @@ -55,7 +55,8 @@ const pendingSyncs = new Map>(); // Version for sync state - increment this to force fresh sync when Electric config changes // v2: user-specific database architecture // v3: consistent cutoff date for sync+queries, visibility refresh support -const SYNC_VERSION = 3; +// v4: heartbeat-based stale notification detection with updated_at tracking +const SYNC_VERSION = 4; // Database name prefix for identifying SurfSense databases const DB_PREFIX = "surfsense-"; From 3c5bf6c83c48fe024df27b8c692866f8b8db29ba Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sun, 1 Feb 2026 02:28:38 +0530 Subject: [PATCH 07/15] refactor: simplify Slack client initialization by directly passing the token --- .../app/routes/slack_add_connector_route.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/surfsense_backend/app/routes/slack_add_connector_route.py b/surfsense_backend/app/routes/slack_add_connector_route.py index 66ba1cd41..8523d14a5 100644 --- a/surfsense_backend/app/routes/slack_add_connector_route.py +++ b/surfsense_backend/app/routes/slack_add_connector_route.py @@ -582,14 +582,8 @@ async def get_slack_channels( # Import SlackHistory here to avoid circular imports from app.connectors.slack_history import SlackHistory - # Create Slack client and fetch channels - slack_client = SlackHistory( - session=session, - connector_id=connector_id, - credentials=credentials, - ) - # Set the decrypted token directly - slack_client.set_token(bot_token) + # Create Slack client with direct token (simple pattern for quick operations) + slack_client = SlackHistory(token=bot_token) channels = await slack_client.get_all_channels(include_private=True) From 2b2acfebb6b8f43ae61a6c41579f1522d3dcae97 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sun, 1 Feb 2026 03:32:45 +0530 Subject: [PATCH 08/15] feat: enhance DiscordConnector with start event signaling for improved initialization handling --- .../app/connectors/discord_connector.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/connectors/discord_connector.py b/surfsense_backend/app/connectors/discord_connector.py index 1e12cb9a4..2714c5766 100644 --- a/surfsense_backend/app/connectors/discord_connector.py +++ b/surfsense_backend/app/connectors/discord_connector.py @@ -61,6 +61,7 @@ class DiscordConnector(commands.Bot): self.token = None self._bot_task = None # Holds the async bot task self._is_running = False # Flag to track if the bot is running + self._start_called_event = asyncio.Event() # Event to signal when start() is called # Event to confirm bot is ready @self.event @@ -226,6 +227,9 @@ class DiscordConnector(commands.Bot): ) return + # Signal that we're about to call start() - this allows _wait_until_ready() to proceed + self._start_called_event.set() + await self.start(self.token) logger.info("Discord bot started successfully.") except discord.LoginFailure: @@ -260,6 +264,9 @@ class DiscordConnector(commands.Bot): else: logger.info("Bot is not running or already disconnected.") + # Reset the start event so the connector can be reused + self._start_called_event.clear() + def set_token(self, token: str) -> None: """ Set the discord bot token (for backward compatibility). @@ -277,10 +284,16 @@ class DiscordConnector(commands.Bot): """Helper to wait until the bot is connected and ready.""" logger.info("Waiting for the bot to be ready...") - # Give the event loop a chance to switch to the bot's startup task. - # This allows self.start() to begin initializing the client. - # Terrible solution, but necessary to avoid blocking the event loop. - await asyncio.sleep(1) # Yield control to the event loop + # Wait for start_bot() to actually call self.start() + # This ensures we don't call wait_until_ready() before the client is initialized + try: + await asyncio.wait_for(self._start_called_event.wait(), timeout=30.0) + logger.info("Bot start() has been called, now waiting for ready state...") + except TimeoutError: + logger.error("start_bot() did not call start() within 30 seconds") + raise RuntimeError( + "Discord client failed to initialize - start() was never called" + ) try: await asyncio.wait_for(self.wait_until_ready(), timeout=60.0) From 47eaa705bf0c9174f1912b387025730a8b806596 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sun, 1 Feb 2026 03:52:00 +0530 Subject: [PATCH 09/15] feat: sync last indexed timestamp with live data for real-time updates in connector popup --- surfsense_web/components/assistant-ui/connector-popup.tsx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/surfsense_web/components/assistant-ui/connector-popup.tsx b/surfsense_web/components/assistant-ui/connector-popup.tsx index 20d76faa2..7184d393f 100644 --- a/surfsense_web/components/assistant-ui/connector-popup.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup.tsx @@ -240,6 +240,10 @@ export const ConnectorIndicator: FC = () => { ...editingConnector, config: connectorConfig || editingConnector.config, name: editingConnector.name, + // Sync last_indexed_at with live data from Electric SQL for real-time updates + last_indexed_at: + (connectors as SearchSourceConnector[]).find((c) => c.id === editingConnector.id) + ?.last_indexed_at ?? editingConnector.last_indexed_at, }} startDate={startDate} endDate={endDate} From ff4a5742487da8e6785c1a505c77dfe1fe527e55 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sun, 1 Feb 2026 22:34:41 +0530 Subject: [PATCH 10/15] feat: implement Discord channel fetching with permission handling in connector UI --- .../app/routes/discord_add_connector_route.py | 287 ++++++++++++++++++ .../components/discord-config.tsx | 172 ++++++++++- .../contracts/types/connector.types.ts | 24 ++ .../lib/apis/connectors-api.service.ts | 18 +- 4 files changed, 493 insertions(+), 8 deletions(-) diff --git a/surfsense_backend/app/routes/discord_add_connector_route.py b/surfsense_backend/app/routes/discord_add_connector_route.py index 1d8b40fcf..09881bcac 100644 --- a/surfsense_backend/app/routes/discord_add_connector_route.py +++ b/surfsense_backend/app/routes/discord_add_connector_route.py @@ -531,3 +531,290 @@ async def refresh_discord_token( raise HTTPException( status_code=500, detail=f"Failed to refresh Discord tokens: {e!s}" ) from e + + +def _compute_channel_permissions( + base_permissions: int, + bot_role_ids: set[str], + bot_user_id: str | None, + channel_overwrites: list[dict], + guild_id: str, +) -> int: + """ + Compute effective permissions for a channel based on role permissions and overwrites. + + Discord permission computation follows this order (per official docs): + 1. Start with base permissions from roles + 2. Apply @everyone role overwrites (deny, then allow) + 3. Apply role-specific overwrites (deny, then allow) + 4. Apply member-specific overwrites (deny, then allow) + + Args: + base_permissions: Combined permissions from all bot roles + bot_role_ids: Set of role IDs the bot has + bot_user_id: The bot's user ID for member-specific overwrites + channel_overwrites: List of permission overwrites for the channel + guild_id: Guild ID (same as @everyone role ID) + + Returns: + Computed permission integer + """ + permissions = base_permissions + + # Permission overwrites are applied in order: @everyone, roles, member + everyone_allow = 0 + everyone_deny = 0 + role_allow = 0 + role_deny = 0 + member_allow = 0 + member_deny = 0 + + for overwrite in channel_overwrites: + overwrite_id = overwrite.get("id") + overwrite_type = overwrite.get("type") # 0 = role, 1 = member + allow = int(overwrite.get("allow", 0)) + deny = int(overwrite.get("deny", 0)) + + if overwrite_type == 0: # Role overwrite + if overwrite_id == guild_id: # @everyone role + everyone_allow = allow + everyone_deny = deny + elif overwrite_id in bot_role_ids: + role_allow |= allow + role_deny |= deny + elif overwrite_type == 1: # Member overwrite + if bot_user_id and overwrite_id == bot_user_id: + member_allow = allow + member_deny = deny + + # Apply in order per Discord docs: + # 1. @everyone deny, then allow + permissions &= ~everyone_deny + permissions |= everyone_allow + # 2. Role deny, then allow + permissions &= ~role_deny + permissions |= role_allow + # 3. Member deny, then allow (applied LAST, highest priority) + permissions &= ~member_deny + permissions |= member_allow + + return permissions + + +@router.get("/discord/connector/{connector_id}/channels", response_model=None) +async def get_discord_channels( + connector_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Get list of Discord text channels for a connector with permission info. + + Uses Discord's HTTP REST API directly instead of WebSocket bot connection. + Computes effective permissions to determine if bot can read message history. + + Args: + connector_id: The Discord connector ID + session: Database session + user: Current authenticated user + + Returns: + List of channels with id, name, type, position, category_id, and can_index fields + """ + from sqlalchemy import select + + # Discord permission bits + VIEW_CHANNEL = 1 << 10 # 1024 + READ_MESSAGE_HISTORY = 1 << 16 # 65536 + ADMINISTRATOR = 1 << 3 # 8 + + try: + # Get connector and verify ownership + result = await session.execute( + select(SearchSourceConnector).where( + SearchSourceConnector.id == connector_id, + SearchSourceConnector.user_id == user.id, + SearchSourceConnector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR, + ) + ) + connector = result.scalar_one_or_none() + + if not connector: + raise HTTPException( + status_code=404, + detail="Discord connector not found or access denied", + ) + + # Get credentials and decrypt bot token + credentials = DiscordAuthCredentialsBase.from_dict(connector.config) + token_encryption = get_token_encryption() + is_encrypted = connector.config.get("_token_encrypted", False) + + bot_token = credentials.bot_token + if is_encrypted and bot_token: + try: + bot_token = token_encryption.decrypt_token(bot_token) + except Exception as e: + logger.error(f"Failed to decrypt bot token: {e!s}") + raise HTTPException( + status_code=500, detail="Failed to decrypt stored bot token" + ) from e + + if not bot_token: + raise HTTPException( + status_code=400, + detail="No bot token available. Please re-authenticate.", + ) + + # Get guild_id from connector config + guild_id = connector.config.get("guild_id") + if not guild_id: + raise HTTPException( + status_code=400, + detail="No guild_id associated with this connector. Please reconnect the Discord server.", + ) + + headers = {"Authorization": f"Bot {bot_token}"} + + async with httpx.AsyncClient() as client: + # Fetch bot's user info to get bot user ID + bot_user_response = await client.get( + "https://discord.com/api/v10/users/@me", + headers=headers, + timeout=30.0, + ) + + if bot_user_response.status_code != 200: + logger.warning(f"Failed to fetch bot user info: {bot_user_response.text}") + bot_user_id = None + else: + bot_user_id = bot_user_response.json().get("id") + + # Fetch guild info to get roles + guild_response = await client.get( + f"https://discord.com/api/v10/guilds/{guild_id}", + headers=headers, + timeout=30.0, + ) + + if guild_response.status_code != 200: + raise HTTPException( + status_code=guild_response.status_code, + detail="Failed to fetch guild information", + ) + + guild_data = guild_response.json() + guild_roles = {role["id"]: role for role in guild_data.get("roles", [])} + + # Fetch bot's member info to get its roles + bot_member_response = await client.get( + f"https://discord.com/api/v10/guilds/{guild_id}/members/{bot_user_id}", + headers=headers, + timeout=30.0, + ) + + if bot_member_response.status_code != 200: + logger.warning(f"Failed to fetch bot member info: {bot_member_response.text}") + bot_role_ids = {guild_id} # At minimum, bot has @everyone role + base_permissions = int(guild_roles.get(guild_id, {}).get("permissions", 0)) + else: + bot_member_data = bot_member_response.json() + bot_role_ids = set(bot_member_data.get("roles", [])) + bot_role_ids.add(guild_id) # @everyone role is always included + + # Compute base permissions from all bot roles + base_permissions = 0 + for role_id in bot_role_ids: + if role_id in guild_roles: + role_perms = int(guild_roles[role_id].get("permissions", 0)) + base_permissions |= role_perms + + # Check if bot has administrator permission (bypasses all checks) + is_admin = (base_permissions & ADMINISTRATOR) == ADMINISTRATOR + + # Fetch channels + channels_response = await client.get( + f"https://discord.com/api/v10/guilds/{guild_id}/channels", + headers=headers, + timeout=30.0, + ) + + if channels_response.status_code == 403: + raise HTTPException( + status_code=403, + detail="Bot does not have permission to view channels in this server. Please ensure the bot has the 'View Channels' permission.", + ) + elif channels_response.status_code == 404: + raise HTTPException( + status_code=404, + detail="Discord server not found. The bot may have been removed from the server.", + ) + elif channels_response.status_code != 200: + error_detail = channels_response.text + try: + error_json = channels_response.json() + error_detail = error_json.get("message", error_detail) + except Exception: + pass + raise HTTPException( + status_code=channels_response.status_code, + detail=f"Failed to fetch Discord channels: {error_detail}", + ) + + channels_data = channels_response.json() + + # Discord channel types: + # 0 = GUILD_TEXT, 2 = GUILD_VOICE, 4 = GUILD_CATEGORY, 5 = GUILD_ANNOUNCEMENT + # We want text channels (type 0) and announcement channels (type 5) + text_channel_types = {0, 5} + + text_channels = [] + for ch in channels_data: + if ch.get("type") in text_channel_types: + # Compute effective permissions for this channel + if is_admin: + # Administrators bypass all permission checks + can_index = True + else: + channel_overwrites = ch.get("permission_overwrites", []) + effective_perms = _compute_channel_permissions( + base_permissions, + bot_role_ids, + bot_user_id, + channel_overwrites, + guild_id, + ) + + # Bot can index if it has both VIEW_CHANNEL and READ_MESSAGE_HISTORY + has_view = (effective_perms & VIEW_CHANNEL) == VIEW_CHANNEL + has_read_history = (effective_perms & READ_MESSAGE_HISTORY) == READ_MESSAGE_HISTORY + can_index = has_view and has_read_history + + text_channels.append({ + "id": ch["id"], + "name": ch["name"], + "type": "text" if ch["type"] == 0 else "announcement", + "position": ch.get("position", 0), + "category_id": ch.get("parent_id"), + "can_index": can_index, + }) + + # Sort by position + text_channels.sort(key=lambda x: x["position"]) + + logger.info( + f"Fetched {len(text_channels)} text channels for Discord connector {connector_id}" + ) + + return text_channels + + except HTTPException: + raise + except Exception as e: + logger.error( + f"Failed to get Discord channels for connector {connector_id}: {e!s}", + exc_info=True, + ) + raise HTTPException( + status_code=500, detail=f"Failed to get Discord channels: {e!s}" + ) from e diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/discord-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/discord-config.tsx index dd4c89c8e..a0fd6888f 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/discord-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/discord-config.tsx @@ -1,29 +1,187 @@ "use client"; -import { Info } from "lucide-react"; -import type { FC } from "react"; +import { AlertCircle, CheckCircle2, Hash, Info, Megaphone, RefreshCw } from "lucide-react"; +import { type FC, useCallback, useEffect, useState } from "react"; +import { Button } from "@/components/ui/button"; +import { Spinner } from "@/components/ui/spinner"; +import { connectorsApiService, type DiscordChannel } from "@/lib/apis/connectors-api.service"; +import { cn } from "@/lib/utils"; import type { ConnectorConfigProps } from "../index"; export interface DiscordConfigProps extends ConnectorConfigProps { onNameChange?: (name: string) => void; } -export const DiscordConfig: FC = () => { +export const DiscordConfig: FC = ({ connector }) => { + const [channels, setChannels] = useState([]); + const [isLoading, setIsLoading] = useState(false); + const [error, setError] = useState(null); + const [lastFetched, setLastFetched] = useState(null); + + const fetchChannels = useCallback(async () => { + if (!connector?.id) return; + + setIsLoading(true); + setError(null); + + try { + const data = await connectorsApiService.getDiscordChannels(connector.id); + setChannels(data); + setLastFetched(new Date()); + } catch (err) { + console.error("Failed to fetch Discord channels:", err); + setError(err instanceof Error ? err.message : "Failed to fetch channels"); + } finally { + setIsLoading(false); + } + }, [connector?.id]); + + // Fetch channels on mount + useEffect(() => { + fetchChannels(); + }, [fetchChannels]); + + // Auto-refresh when user returns to tab + useEffect(() => { + const handleVisibilityChange = () => { + if (document.visibilityState === "visible" && connector?.id) { + fetchChannels(); + } + }; + + document.addEventListener("visibilitychange", handleVisibilityChange); + return () => document.removeEventListener("visibilitychange", handleVisibilityChange); + }, [connector?.id, fetchChannels]); + + // Separate channels by indexing capability + const readyToIndex = channels.filter((ch) => ch.can_index); + const needsPermissions = channels.filter((ch) => !ch.can_index); + + // Format last fetched time + const formatLastFetched = () => { + if (!lastFetched) return null; + const now = new Date(); + const diffMs = now.getTime() - lastFetched.getTime(); + const diffSecs = Math.floor(diffMs / 1000); + const diffMins = Math.floor(diffSecs / 60); + + if (diffSecs < 60) return "just now"; + if (diffMins === 1) return "1 minute ago"; + if (diffMins < 60) return `${diffMins} minutes ago`; + return lastFetched.toLocaleTimeString(); + }; + return (
+ {/* Info box */}
-

Add Bot to Servers

- Before indexing, make sure the Discord bot has been added to the servers (guilds) you - want to index. The bot can only access messages from servers it's been added to. Use the - OAuth authorization flow to add the bot to your servers. + The bot needs "Read Message History" permission to index channels. + Ask a server admin to grant this permission for channels shown below.

+ + {/* Channels Section */} +
+
+
+

Channel Access

+
+
+ {lastFetched && ( + {formatLastFetched()} + )} + +
+
+ + {error && ( +
+ {error} +
+ )} + + {isLoading && channels.length === 0 ? ( +
+ + Loading channels +
+ ) : channels.length === 0 && !error ? ( +
+ No channels found. Make sure the bot has been added to your Discord server with proper permissions. +
+ ) : ( +
+ {/* Ready to index */} + {readyToIndex.length > 0 && ( +
0 && "border-b border-border")}> +
+ + Ready to index + + {readyToIndex.length} {readyToIndex.length === 1 ? "channel" : "channels"} + +
+
+ {readyToIndex.map((channel) => ( + + ))} +
+
+ )} + + {/* Needs permissions */} + {needsPermissions.length > 0 && ( +
+
+ + Grant permissions to index + + {needsPermissions.length}{" "} + {needsPermissions.length === 1 ? "channel" : "channels"} + +
+
+ {needsPermissions.map((channel) => ( + + ))} +
+
+ )} +
+ )} +
+
+ ); +}; + +interface ChannelPillProps { + channel: DiscordChannel; +} + +const ChannelPill: FC = ({ channel }) => { + return ( +
+ {channel.type === "announcement" ? ( + + ) : ( + + )} + {channel.name}
); }; diff --git a/surfsense_web/contracts/types/connector.types.ts b/surfsense_web/contracts/types/connector.types.ts index 05efa51d2..a7760745d 100644 --- a/surfsense_web/contracts/types/connector.types.ts +++ b/surfsense_web/contracts/types/connector.types.ts @@ -222,6 +222,27 @@ export const listSlackChannelsRequest = z.object({ export const listSlackChannelsResponse = z.array(slackChannel); +/** + * Discord channel with indexing permission info + */ +export const discordChannel = z.object({ + id: z.string(), + name: z.string(), + type: z.enum(["text", "announcement"]), + position: z.number(), + category_id: z.string().nullable().optional(), + can_index: z.boolean(), +}); + +/** + * List Discord channels + */ +export const listDiscordChannelsRequest = z.object({ + connector_id: z.number(), +}); + +export const listDiscordChannelsResponse = z.array(discordChannel); + // Inferred types export type SearchSourceConnectorType = z.infer; export type SearchSourceConnector = z.infer; @@ -245,3 +266,6 @@ export type GoogleDriveItem = z.infer; export type SlackChannel = z.infer; export type ListSlackChannelsRequest = z.infer; export type ListSlackChannelsResponse = z.infer; +export type DiscordChannel = z.infer; +export type ListDiscordChannelsRequest = z.infer; +export type ListDiscordChannelsResponse = z.infer; diff --git a/surfsense_web/lib/apis/connectors-api.service.ts b/surfsense_web/lib/apis/connectors-api.service.ts index 75e5a938a..45898b762 100644 --- a/surfsense_web/lib/apis/connectors-api.service.ts +++ b/surfsense_web/lib/apis/connectors-api.service.ts @@ -5,6 +5,7 @@ import { type DeleteConnectorRequest, deleteConnectorRequest, deleteConnectorResponse, + type DiscordChannel, type GetConnectorRequest, type GetConnectorsRequest, getConnectorRequest, @@ -16,6 +17,7 @@ import { indexConnectorResponse, type ListGitHubRepositoriesRequest, type ListGoogleDriveFoldersRequest, + listDiscordChannelsResponse, listGitHubRepositoriesRequest, listGitHubRepositoriesResponse, listGoogleDriveFoldersRequest, @@ -351,8 +353,22 @@ class ConnectorsApiService { listSlackChannelsResponse ); }; + + // ============================================================================= + // Discord Connector Methods + // ============================================================================= + + /** + * Get Discord text channels for a connector + */ + getDiscordChannels = async (connectorId: number) => { + return baseApiService.get( + `/api/v1/discord/connector/${connectorId}/channels`, + listDiscordChannelsResponse + ); + }; } -export type { SlackChannel }; +export type { SlackChannel, DiscordChannel }; export const connectorsApiService = new ConnectorsApiService(); From 085653d3e3af9ea9c5f48e677a375e992825eef0 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sun, 1 Feb 2026 22:54:25 +0530 Subject: [PATCH 11/15] chore: ran frontend and backend linting --- .../connectors/composio_gmail_connector.py | 8 +- .../composio_google_calendar_connector.py | 8 +- .../composio_google_drive_connector.py | 8 +- .../app/connectors/discord_connector.py | 6 +- .../connectors/google_calendar_connector.py | 8 +- .../app/routes/discord_add_connector_route.py | 99 ++++++++++--------- .../routes/search_source_connectors_routes.py | 19 ++-- .../app/routes/slack_add_connector_route.py | 3 +- .../stale_notification_cleanup_task.py | 27 ++--- .../app/tasks/composio_indexer.py | 6 +- .../connector_indexers/airtable_indexer.py | 23 +++-- .../connector_indexers/bookstack_indexer.py | 16 +-- .../connector_indexers/clickup_indexer.py | 17 ++-- .../connector_indexers/confluence_indexer.py | 16 +-- .../connector_indexers/discord_indexer.py | 6 +- .../elasticsearch_indexer.py | 18 ++-- .../connector_indexers/github_indexer.py | 17 ++-- .../google_calendar_indexer.py | 18 ++-- .../google_drive_indexer.py | 10 +- .../google_gmail_indexer.py | 16 +-- .../tasks/connector_indexers/jira_indexer.py | 16 +-- .../connector_indexers/linear_indexer.py | 16 +-- .../tasks/connector_indexers/luma_indexer.py | 16 +-- .../connector_indexers/notion_indexer.py | 5 +- .../connector_indexers/obsidian_indexer.py | 16 +-- .../tasks/connector_indexers/slack_indexer.py | 16 +-- .../tasks/connector_indexers/teams_indexer.py | 16 +-- .../connector_indexers/webcrawler_indexer.py | 16 +-- .../components/discord-config.tsx | 7 +- .../components/slack-config.tsx | 3 +- .../layout/ui/sidebar/SidebarSection.tsx | 21 ++-- surfsense_web/lib/format-date.ts | 1 - 32 files changed, 288 insertions(+), 210 deletions(-) diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py index 9bb1197b8..d3a0d344b 100644 --- a/surfsense_backend/app/connectors/composio_gmail_connector.py +++ b/surfsense_backend/app/connectors/composio_gmail_connector.py @@ -10,10 +10,6 @@ from collections.abc import Awaitable, Callable from datetime import UTC, datetime from typing import Any -# Heartbeat configuration -HeartbeatCallbackType = Callable[[int], Awaitable[None]] -HEARTBEAT_INTERVAL_SECONDS = 30 - from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.orm import selectinload @@ -32,6 +28,10 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Heartbeat configuration +HeartbeatCallbackType = Callable[[int], Awaitable[None]] +HEARTBEAT_INTERVAL_SECONDS = 30 + logger = logging.getLogger(__name__) diff --git a/surfsense_backend/app/connectors/composio_google_calendar_connector.py b/surfsense_backend/app/connectors/composio_google_calendar_connector.py index 669543210..4302e479b 100644 --- a/surfsense_backend/app/connectors/composio_google_calendar_connector.py +++ b/surfsense_backend/app/connectors/composio_google_calendar_connector.py @@ -10,10 +10,6 @@ from collections.abc import Awaitable, Callable from datetime import UTC, datetime from typing import Any -# Heartbeat configuration -HeartbeatCallbackType = Callable[[int], Awaitable[None]] -HEARTBEAT_INTERVAL_SECONDS = 30 - from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.orm import selectinload @@ -35,6 +31,10 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Heartbeat configuration +HeartbeatCallbackType = Callable[[int], Awaitable[None]] +HEARTBEAT_INTERVAL_SECONDS = 30 + logger = logging.getLogger(__name__) diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py index debbced20..5e4fc8c0f 100644 --- a/surfsense_backend/app/connectors/composio_google_drive_connector.py +++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py @@ -15,10 +15,6 @@ from datetime import UTC, datetime from pathlib import Path from typing import Any -# Heartbeat configuration -HeartbeatCallbackType = Callable[[int], Awaitable[None]] -HEARTBEAT_INTERVAL_SECONDS = 30 - from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm.attributes import flag_modified @@ -35,6 +31,10 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) +# Heartbeat configuration +HeartbeatCallbackType = Callable[[int], Awaitable[None]] +HEARTBEAT_INTERVAL_SECONDS = 30 + logger = logging.getLogger(__name__) diff --git a/surfsense_backend/app/connectors/discord_connector.py b/surfsense_backend/app/connectors/discord_connector.py index 2714c5766..7a9e8d5dd 100644 --- a/surfsense_backend/app/connectors/discord_connector.py +++ b/surfsense_backend/app/connectors/discord_connector.py @@ -61,7 +61,9 @@ class DiscordConnector(commands.Bot): self.token = None self._bot_task = None # Holds the async bot task self._is_running = False # Flag to track if the bot is running - self._start_called_event = asyncio.Event() # Event to signal when start() is called + self._start_called_event = ( + asyncio.Event() + ) # Event to signal when start() is called # Event to confirm bot is ready @self.event @@ -293,7 +295,7 @@ class DiscordConnector(commands.Bot): logger.error("start_bot() did not call start() within 30 seconds") raise RuntimeError( "Discord client failed to initialize - start() was never called" - ) + ) from None try: await asyncio.wait_for(self.wait_until_ready(), timeout=60.0) diff --git a/surfsense_backend/app/connectors/google_calendar_connector.py b/surfsense_backend/app/connectors/google_calendar_connector.py index 7e24f3642..4681251ad 100644 --- a/surfsense_backend/app/connectors/google_calendar_connector.py +++ b/surfsense_backend/app/connectors/google_calendar_connector.py @@ -252,12 +252,16 @@ class GoogleCalendarConnector: if dt_start.tzinfo is None: dt_start = dt_start.replace(hour=0, minute=0, second=0, tzinfo=pytz.UTC) else: - dt_start = dt_start.astimezone(pytz.UTC).replace(hour=0, minute=0, second=0) + dt_start = dt_start.astimezone(pytz.UTC).replace( + hour=0, minute=0, second=0 + ) if dt_end.tzinfo is None: dt_end = dt_end.replace(hour=23, minute=59, second=59, tzinfo=pytz.UTC) else: - dt_end = dt_end.astimezone(pytz.UTC).replace(hour=23, minute=59, second=59) + dt_end = dt_end.astimezone(pytz.UTC).replace( + hour=23, minute=59, second=59 + ) if dt_start >= dt_end: return [], ( diff --git a/surfsense_backend/app/routes/discord_add_connector_route.py b/surfsense_backend/app/routes/discord_add_connector_route.py index 09881bcac..e49acf30b 100644 --- a/surfsense_backend/app/routes/discord_add_connector_route.py +++ b/surfsense_backend/app/routes/discord_add_connector_route.py @@ -46,6 +46,11 @@ SCOPES = [ "guilds.members.read", # Read member information ] +# Discord permission bits +VIEW_CHANNEL = 1 << 10 # 1024 +READ_MESSAGE_HISTORY = 1 << 16 # 65536 +ADMINISTRATOR = 1 << 3 # 8 + # Initialize security utilities _state_manager = None _token_encryption = None @@ -542,25 +547,25 @@ def _compute_channel_permissions( ) -> int: """ Compute effective permissions for a channel based on role permissions and overwrites. - + Discord permission computation follows this order (per official docs): 1. Start with base permissions from roles 2. Apply @everyone role overwrites (deny, then allow) 3. Apply role-specific overwrites (deny, then allow) 4. Apply member-specific overwrites (deny, then allow) - + Args: base_permissions: Combined permissions from all bot roles bot_role_ids: Set of role IDs the bot has bot_user_id: The bot's user ID for member-specific overwrites channel_overwrites: List of permission overwrites for the channel guild_id: Guild ID (same as @everyone role ID) - + Returns: Computed permission integer """ permissions = base_permissions - + # Permission overwrites are applied in order: @everyone, roles, member everyone_allow = 0 everyone_deny = 0 @@ -568,13 +573,13 @@ def _compute_channel_permissions( role_deny = 0 member_allow = 0 member_deny = 0 - + for overwrite in channel_overwrites: overwrite_id = overwrite.get("id") overwrite_type = overwrite.get("type") # 0 = role, 1 = member allow = int(overwrite.get("allow", 0)) deny = int(overwrite.get("deny", 0)) - + if overwrite_type == 0: # Role overwrite if overwrite_id == guild_id: # @everyone role everyone_allow = allow @@ -582,11 +587,11 @@ def _compute_channel_permissions( elif overwrite_id in bot_role_ids: role_allow |= allow role_deny |= deny - elif overwrite_type == 1: # Member overwrite - if bot_user_id and overwrite_id == bot_user_id: - member_allow = allow - member_deny = deny - + elif overwrite_type == 1 and bot_user_id and overwrite_id == bot_user_id: + # Member-specific overwrite for the bot + member_allow = allow + member_deny = deny + # Apply in order per Discord docs: # 1. @everyone deny, then allow permissions &= ~everyone_deny @@ -597,7 +602,7 @@ def _compute_channel_permissions( # 3. Member deny, then allow (applied LAST, highest priority) permissions &= ~member_deny permissions |= member_allow - + return permissions @@ -609,7 +614,7 @@ async def get_discord_channels( ): """ Get list of Discord text channels for a connector with permission info. - + Uses Discord's HTTP REST API directly instead of WebSocket bot connection. Computes effective permissions to determine if bot can read message history. @@ -623,18 +628,14 @@ async def get_discord_channels( """ from sqlalchemy import select - # Discord permission bits - VIEW_CHANNEL = 1 << 10 # 1024 - READ_MESSAGE_HISTORY = 1 << 16 # 65536 - ADMINISTRATOR = 1 << 3 # 8 - try: # Get connector and verify ownership result = await session.execute( select(SearchSourceConnector).where( SearchSourceConnector.id == connector_id, SearchSourceConnector.user_id == user.id, - SearchSourceConnector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.DISCORD_CONNECTOR, ) ) connector = result.scalar_one_or_none() @@ -675,7 +676,7 @@ async def get_discord_channels( ) headers = {"Authorization": f"Bot {bot_token}"} - + async with httpx.AsyncClient() as client: # Fetch bot's user info to get bot user ID bot_user_response = await client.get( @@ -683,55 +684,61 @@ async def get_discord_channels( headers=headers, timeout=30.0, ) - + if bot_user_response.status_code != 200: - logger.warning(f"Failed to fetch bot user info: {bot_user_response.text}") + logger.warning( + f"Failed to fetch bot user info: {bot_user_response.text}" + ) bot_user_id = None else: bot_user_id = bot_user_response.json().get("id") - + # Fetch guild info to get roles guild_response = await client.get( f"https://discord.com/api/v10/guilds/{guild_id}", headers=headers, timeout=30.0, ) - + if guild_response.status_code != 200: raise HTTPException( status_code=guild_response.status_code, detail="Failed to fetch guild information", ) - + guild_data = guild_response.json() guild_roles = {role["id"]: role for role in guild_data.get("roles", [])} - + # Fetch bot's member info to get its roles bot_member_response = await client.get( f"https://discord.com/api/v10/guilds/{guild_id}/members/{bot_user_id}", headers=headers, timeout=30.0, ) - + if bot_member_response.status_code != 200: - logger.warning(f"Failed to fetch bot member info: {bot_member_response.text}") + logger.warning( + f"Failed to fetch bot member info: {bot_member_response.text}" + ) bot_role_ids = {guild_id} # At minimum, bot has @everyone role - base_permissions = int(guild_roles.get(guild_id, {}).get("permissions", 0)) + base_permissions = int( + guild_roles.get(guild_id, {}).get("permissions", 0) + ) else: bot_member_data = bot_member_response.json() bot_role_ids = set(bot_member_data.get("roles", [])) bot_role_ids.add(guild_id) # @everyone role is always included - + # Compute base permissions from all bot roles base_permissions = 0 for role_id in bot_role_ids: if role_id in guild_roles: role_perms = int(guild_roles[role_id].get("permissions", 0)) base_permissions |= role_perms - + # Check if bot has administrator permission (bypasses all checks) is_admin = (base_permissions & ADMINISTRATOR) == ADMINISTRATOR - + # Fetch channels channels_response = await client.get( f"https://discord.com/api/v10/guilds/{guild_id}/channels", @@ -767,7 +774,7 @@ async def get_discord_channels( # 0 = GUILD_TEXT, 2 = GUILD_VOICE, 4 = GUILD_CATEGORY, 5 = GUILD_ANNOUNCEMENT # We want text channels (type 0) and announcement channels (type 5) text_channel_types = {0, 5} - + text_channels = [] for ch in channels_data: if ch.get("type") in text_channel_types: @@ -784,20 +791,24 @@ async def get_discord_channels( channel_overwrites, guild_id, ) - + # Bot can index if it has both VIEW_CHANNEL and READ_MESSAGE_HISTORY has_view = (effective_perms & VIEW_CHANNEL) == VIEW_CHANNEL - has_read_history = (effective_perms & READ_MESSAGE_HISTORY) == READ_MESSAGE_HISTORY + has_read_history = ( + effective_perms & READ_MESSAGE_HISTORY + ) == READ_MESSAGE_HISTORY can_index = has_view and has_read_history - - text_channels.append({ - "id": ch["id"], - "name": ch["name"], - "type": "text" if ch["type"] == 0 else "announcement", - "position": ch.get("position", 0), - "category_id": ch.get("parent_id"), - "can_index": can_index, - }) + + text_channels.append( + { + "id": ch["id"], + "name": ch["name"], + "type": "text" if ch["type"] == 0 else "announcement", + "position": ch.get("position", 0), + "category_id": ch.get("parent_id"), + "can_index": can_index, + } + ) # Sort by position text_channels.sort(key=lambda x: x["position"]) diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 678bf73c0..b3e152e28 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -1168,9 +1168,10 @@ async def _run_indexing_with_notifications( supports_retry_callback: Whether the indexing function supports on_retry_callback supports_heartbeat_callback: Whether the indexing function supports on_heartbeat_callback """ - from celery.exceptions import SoftTimeLimitExceeded from uuid import UUID + from celery.exceptions import SoftTimeLimitExceeded + notification = None # Track indexed count for retry notifications and heartbeat current_indexed_count = 0 @@ -1241,11 +1242,13 @@ async def _run_indexing_with_notifications( if notification: try: await session.refresh(notification) - await NotificationService.connector_indexing.notify_indexing_progress( - session=session, - notification=notification, - indexed_count=indexed_count, - stage="processing", + await ( + NotificationService.connector_indexing.notify_indexing_progress( + session=session, + notification=notification, + indexed_count=indexed_count, + stage="processing", + ) ) await session.commit() except Exception as e: @@ -1447,7 +1450,9 @@ async def _run_indexing_with_notifications( ) await session.commit() except Exception as notif_error: - logger.error(f"Failed to update notification on soft timeout: {notif_error!s}") + logger.error( + f"Failed to update notification on soft timeout: {notif_error!s}" + ) # Re-raise so Celery knows the task was terminated raise diff --git a/surfsense_backend/app/routes/slack_add_connector_route.py b/surfsense_backend/app/routes/slack_add_connector_route.py index 8523d14a5..0cbfdef44 100644 --- a/surfsense_backend/app/routes/slack_add_connector_route.py +++ b/surfsense_backend/app/routes/slack_add_connector_route.py @@ -547,7 +547,8 @@ async def get_slack_channels( select(SearchSourceConnector).where( SearchSourceConnector.id == connector_id, SearchSourceConnector.user_id == user.id, - SearchSourceConnector.connector_type == SearchSourceConnectorType.SLACK_CONNECTOR, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.SLACK_CONNECTOR, ) ) connector = result.scalar_one_or_none() diff --git a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py index ff162f70f..7fe7b6936 100644 --- a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py +++ b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py @@ -9,8 +9,7 @@ frontend from showing a perpetual "syncing" state. import logging from datetime import UTC, datetime, timedelta -from sqlalchemy import and_, update -from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy import and_ from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine from sqlalchemy.future import select from sqlalchemy.orm.attributes import flag_modified @@ -42,12 +41,12 @@ def get_celery_session_maker(): def cleanup_stale_indexing_notifications_task(): """ Check for stale connector indexing notifications and mark them as failed. - + This task finds notifications that: - Have type = 'connector_indexing' - Have metadata.status = 'in_progress' - Have updated_at older than STALE_NOTIFICATION_TIMEOUT_MINUTES - + And marks them as failed with an appropriate error message. """ import asyncio @@ -78,7 +77,8 @@ async def _cleanup_stale_notifications(): select(Notification).filter( and_( Notification.type == "connector_indexing", - Notification.notification_metadata["status"].astext == "in_progress", + Notification.notification_metadata["status"].astext + == "in_progress", Notification.updated_at < cutoff_time, ) ) @@ -98,22 +98,28 @@ async def _cleanup_stale_notifications(): for notification in stale_notifications: try: # Get current indexed count from metadata if available - indexed_count = notification.notification_metadata.get("indexed_count", 0) - connector_name = notification.notification_metadata.get("connector_name", "Unknown") - + indexed_count = notification.notification_metadata.get( + "indexed_count", 0 + ) + connector_name = notification.notification_metadata.get( + "connector_name", "Unknown" + ) + # Calculate how long it's been stale stale_duration = datetime.now(UTC) - notification.updated_at stale_minutes = int(stale_duration.total_seconds() / 60) # Update notification metadata notification.notification_metadata["status"] = "failed" - notification.notification_metadata["completed_at"] = datetime.now(UTC).isoformat() + notification.notification_metadata["completed_at"] = datetime.now( + UTC + ).isoformat() notification.notification_metadata["error_message"] = ( f"Indexing task appears to have crashed or timed out. " f"No activity detected for {stale_minutes} minutes. " f"Please try syncing again." ) - + # Flag the JSONB column as modified for SQLAlchemy to detect the change flag_modified(notification, "notification_metadata") @@ -138,4 +144,3 @@ async def _cleanup_stale_notifications(): except Exception as e: logger.error(f"Error cleaning up stale notifications: {e!s}", exc_info=True) await session.rollback() - diff --git a/surfsense_backend/app/tasks/composio_indexer.py b/surfsense_backend/app/tasks/composio_indexer.py index 49764fd98..0518ad2a6 100644 --- a/surfsense_backend/app/tasks/composio_indexer.py +++ b/surfsense_backend/app/tasks/composio_indexer.py @@ -12,9 +12,6 @@ import logging from collections.abc import Awaitable, Callable from importlib import import_module -# Type alias for heartbeat callback function -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select @@ -26,6 +23,9 @@ from app.db import ( from app.services.composio_service import INDEXABLE_TOOLKITS, TOOLKIT_TO_INDEXER from app.services.task_logging_service import TaskLoggingService +# Type alias for heartbeat callback function +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + # Set up logging logger = logging.getLogger(__name__) diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py index ab9e5d678..c2d609587 100644 --- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py @@ -20,12 +20,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -36,6 +30,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_airtable_records( session: AsyncSession, @@ -145,7 +144,11 @@ async def index_airtable_records( # Process each base for base in bases: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) + >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(total_documents_indexed) last_heartbeat_time = time.time() base_id = base.get("id") @@ -224,7 +227,11 @@ async def index_airtable_records( # Process each record for record in records: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) + >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(total_documents_indexed) last_heartbeat_time = time.time() diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py index 90232809c..cc428047c 100644 --- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py @@ -21,12 +21,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -37,6 +31,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_bookstack_pages( session: AsyncSession, @@ -194,7 +193,10 @@ async def index_bookstack_pages( for page in pages: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() try: diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py index 2b95b6a11..8ecf7e20d 100644 --- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py @@ -22,12 +22,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -37,6 +31,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_clickup_tasks( session: AsyncSession, @@ -184,7 +183,11 @@ async def index_clickup_tasks( for task in tasks: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) + >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index 078aacf86..914d91fb7 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -22,12 +22,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -38,6 +32,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_confluence_pages( session: AsyncSession, @@ -190,7 +189,10 @@ async def index_confluence_pages( for page in pages: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() try: diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py index 4bbeff125..a70bc42d4 100644 --- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py @@ -305,7 +305,11 @@ async def index_discord_messages( try: for guild in guilds: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) + >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() guild_id = guild["id"] diff --git a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py index 49d82df0e..8fbba6463 100644 --- a/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/elasticsearch_indexer.py @@ -21,18 +21,18 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, get_current_timestamp, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + logger = logging.getLogger(__name__) @@ -186,7 +186,11 @@ async def index_elasticsearch_documents( fields=config.get("ELASTICSEARCH_FIELDS"), ): # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) + >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_processed) last_heartbeat_time = time.time() diff --git a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py index 75e7f516c..b01d235cf 100644 --- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py @@ -24,12 +24,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds - update notification every 30 seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -38,6 +32,12 @@ from .base import ( logger, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 + # Maximum tokens for a single digest before splitting # Most LLMs can handle 128k+ tokens now, but we'll be conservative MAX_DIGEST_CHARS = 500_000 # ~125k tokens @@ -184,7 +184,10 @@ async def index_github_repos( for repo_full_name in repo_full_names_to_index: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() if not repo_full_name or not isinstance(repo_full_name, str): diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index cef2e15f1..0156c3db4 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -6,8 +6,6 @@ import time from collections.abc import Awaitable, Callable from datetime import datetime, timedelta -import pytz -from dateutil.parser import isoparse from google.oauth2.credentials import Credentials from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession @@ -23,12 +21,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -38,6 +30,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_google_calendar_events( session: AsyncSession, @@ -296,7 +293,10 @@ async def index_google_calendar_events( for event in events: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() try: diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py index 98df68cd1..3cd59674e 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py @@ -420,7 +420,10 @@ async def _index_full_scan( while folders_to_process and files_processed < max_files: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() current_folder_id, current_folder_name = folders_to_process.pop(0) @@ -541,7 +544,10 @@ async def _index_with_delta_sync( for change in changes: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() if files_processed >= max_files: diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index 34e5a9530..ec50a2b96 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -25,12 +25,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -41,6 +35,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_google_gmail_messages( session: AsyncSession, @@ -228,7 +227,10 @@ async def index_google_gmail_messages( for message in messages: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() try: diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py index ab36ae7d0..08f6d2d54 100644 --- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py @@ -22,12 +22,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds - update notification every 30 seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -38,6 +32,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_jira_issues( session: AsyncSession, @@ -184,7 +183,10 @@ async def index_jira_issues( for issue in issues: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() try: diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py index 549aa0224..41ef32af4 100644 --- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py @@ -21,12 +21,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds - update notification every 30 seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( calculate_date_range, check_document_by_unique_identifier, @@ -37,6 +31,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_linear_issues( session: AsyncSession, @@ -210,7 +209,10 @@ async def index_linear_issues( # Process each issue for issue in issues: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index 22fd6d468..56e1f82cd 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -21,12 +21,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -36,6 +30,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_luma_events( session: AsyncSession, @@ -236,7 +235,10 @@ async def index_luma_events( for event in events: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() try: diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py index 88779db57..52622471a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py @@ -234,7 +234,10 @@ async def index_notion_pages( # Process each page for page in pages: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() diff --git a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py index 48fa5f0d3..93a671cdb 100644 --- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py @@ -27,12 +27,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( build_document_metadata_string, check_document_by_unique_identifier, @@ -43,6 +37,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 def parse_frontmatter(content: str) -> tuple[dict | None, str]: """ @@ -320,7 +319,10 @@ async def index_obsidian_vault( for file_info in files: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(indexed_count) last_heartbeat_time = time.time() try: diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py index 4ac87164c..1fa8ae339 100644 --- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py @@ -20,12 +20,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds - update notification every 30 seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( build_document_metadata_markdown, calculate_date_range, @@ -37,6 +31,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_slack_messages( session: AsyncSession, @@ -187,7 +186,10 @@ async def index_slack_messages( # Process each channel for channel_obj in channels: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() channel_id = channel_obj["id"] diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py index 55bb02ab9..3f1cbb338 100644 --- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py @@ -19,12 +19,6 @@ from app.utils.document_converters import ( generate_unique_identifier_hash, ) -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds - update notification every 30 seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( build_document_metadata_markdown, calculate_date_range, @@ -36,6 +30,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds - update notification every 30 seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_teams_messages( session: AsyncSession, @@ -200,7 +199,10 @@ async def index_teams_messages( # Process each team for team in teams: # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py index ae89b7513..eb4d9c61a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py @@ -22,12 +22,6 @@ from app.utils.document_converters import ( ) from app.utils.webcrawler_utils import parse_webcrawler_urls -# Type hint for heartbeat callback -HeartbeatCallbackType = Callable[[int], Awaitable[None]] - -# Heartbeat interval in seconds -HEARTBEAT_INTERVAL_SECONDS = 30 - from .base import ( check_document_by_unique_identifier, check_duplicate_document_by_hash, @@ -37,6 +31,11 @@ from .base import ( update_connector_last_indexed, ) +# Type hint for heartbeat callback +HeartbeatCallbackType = Callable[[int], Awaitable[None]] + +# Heartbeat interval in seconds +HEARTBEAT_INTERVAL_SECONDS = 30 async def index_crawled_urls( session: AsyncSession, @@ -155,7 +154,10 @@ async def index_crawled_urls( for idx, url in enumerate(urls, 1): # Check if it's time for a heartbeat update - if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS: + if ( + on_heartbeat_callback + and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS + ): await on_heartbeat_callback(documents_indexed) last_heartbeat_time = time.time() try: diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/discord-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/discord-config.tsx index a0fd6888f..f782a6f4d 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/discord-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/discord-config.tsx @@ -80,8 +80,8 @@ export const DiscordConfig: FC = ({ connector }) => {

- The bot needs "Read Message History" permission to index channels. - Ask a server admin to grant this permission for channels shown below. + The bot needs "Read Message History" permission to index channels. Ask a + server admin to grant this permission for channels shown below.

@@ -122,7 +122,8 @@ export const DiscordConfig: FC = ({ connector }) => {
) : channels.length === 0 && !error ? (
- No channels found. Make sure the bot has been added to your Discord server with proper permissions. + No channels found. Make sure the bot has been added to your Discord server with proper + permissions.
) : (
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/slack-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/slack-config.tsx index 3af3e564e..ff01ac96a 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/slack-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/slack-config.tsx @@ -136,8 +136,7 @@ export const SlackConfig: FC = ({ connector }) => { Ready to index - {channelsWithBot.length}{" "} - {channelsWithBot.length === 1 ? "channel" : "channels"} + {channelsWithBot.length} {channelsWithBot.length === 1 ? "channel" : "channels"}
diff --git a/surfsense_web/components/layout/ui/sidebar/SidebarSection.tsx b/surfsense_web/components/layout/ui/sidebar/SidebarSection.tsx index 6baf3b678..776aa2244 100644 --- a/surfsense_web/components/layout/ui/sidebar/SidebarSection.tsx +++ b/surfsense_web/components/layout/ui/sidebar/SidebarSection.tsx @@ -30,7 +30,12 @@ export function SidebarSection({
@@ -56,15 +61,11 @@ export function SidebarSection({ )}
- -
- {children} -
-
+ +
+ {children} +
+
); } diff --git a/surfsense_web/lib/format-date.ts b/surfsense_web/lib/format-date.ts index c7d8ca85e..ee60d113d 100644 --- a/surfsense_web/lib/format-date.ts +++ b/surfsense_web/lib/format-date.ts @@ -22,4 +22,3 @@ export function formatRelativeDate(dateString: string): string { if (daysAgo < 7) return `${daysAgo}d ago`; return format(date, "MMM d, yyyy"); } - From 05d1d6ac04ebb76fce3f639d82cdc5452876e9b0 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Mon, 2 Feb 2026 00:18:47 +0530 Subject: [PATCH 12/15] feat: implement Redis heartbeat tracking for connector indexing tasks and update stale notification cleanup logic --- .../routes/search_source_connectors_routes.py | 52 ++++++ .../stale_notification_cleanup_task.py | 156 ++++++++++-------- 2 files changed, 139 insertions(+), 69 deletions(-) diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index b3e152e28..3a937653d 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -19,10 +19,12 @@ Non-OAuth connectors (BookStack, GitHub, etc.) are limited to one per search spa """ import logging +import os from datetime import UTC, datetime, timedelta from typing import Any import pytz +import redis from dateutil.parser import isoparse from fastapi import APIRouter, Body, Depends, HTTPException, Query from pydantic import BaseModel, Field, ValidationError @@ -78,6 +80,27 @@ from app.utils.rbac import check_permission # Set up logging logger = logging.getLogger(__name__) +# Redis client for heartbeat tracking +_heartbeat_redis_client: redis.Redis | None = None + +# Redis key TTL - notification is stale if no heartbeat in this time +HEARTBEAT_TTL_SECONDS = 120 # 2 minutes + + +def get_heartbeat_redis_client() -> redis.Redis: + """Get or create Redis client for heartbeat tracking.""" + global _heartbeat_redis_client + if _heartbeat_redis_client is None: + redis_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") + _heartbeat_redis_client = redis.from_url(redis_url, decode_responses=True) + return _heartbeat_redis_client + + +def _get_heartbeat_key(notification_id: int) -> str: + """Generate Redis key for notification heartbeat.""" + return f"indexing:heartbeat:{notification_id}" + + router = APIRouter() @@ -1200,6 +1223,16 @@ async def _run_indexing_with_notifications( ) ) + # Set initial Redis heartbeat for stale detection + if notification: + try: + heartbeat_key = _get_heartbeat_key(notification.id) + get_heartbeat_redis_client().setex( + heartbeat_key, HEARTBEAT_TTL_SECONDS, "0" + ) + except Exception as e: + logger.warning(f"Failed to set initial Redis heartbeat: {e}") + # Update notification to fetching stage if notification: await NotificationService.connector_indexing.notify_indexing_progress( @@ -1241,6 +1274,17 @@ async def _run_indexing_with_notifications( current_indexed_count = indexed_count if notification: try: + # Set Redis heartbeat key with TTL (fast, for stale detection) + heartbeat_key = _get_heartbeat_key(notification.id) + get_heartbeat_redis_client().setex( + heartbeat_key, HEARTBEAT_TTL_SECONDS, str(indexed_count) + ) + except Exception as e: + # Don't let Redis errors break the indexing + logger.warning(f"Failed to set Redis heartbeat: {e}") + + try: + # Still update DB notification for progress display await session.refresh(notification) await ( NotificationService.connector_indexing.notify_indexing_progress( @@ -1473,6 +1517,14 @@ async def _run_indexing_with_notifications( ) except Exception as notif_error: logger.error(f"Failed to update notification: {notif_error!s}") + finally: + # Clean up Redis heartbeat key when task completes (success or failure) + if notification: + try: + heartbeat_key = _get_heartbeat_key(notification.id) + get_heartbeat_redis_client().delete(heartbeat_key) + except Exception: + pass # Ignore cleanup errors - key will expire anyway async def run_notion_indexing_with_new_session( diff --git a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py index 7fe7b6936..e77b3225e 100644 --- a/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py +++ b/surfsense_backend/app/tasks/celery_tasks/stale_notification_cleanup_task.py @@ -1,18 +1,25 @@ """Celery task to detect and mark stale connector indexing notifications as failed. This task runs periodically (every 5 minutes by default) to find notifications -that are stuck in "in_progress" status but haven't received a heartbeat update -in the configured timeout period. These are marked as "failed" to prevent the -frontend from showing a perpetual "syncing" state. +that are stuck in "in_progress" status but don't have an active Redis heartbeat key. +These are marked as "failed" to prevent the frontend from showing a perpetual "syncing" state. + +Detection mechanism: +- Active indexing tasks set a Redis key with TTL (2 minutes) as a heartbeat +- If the task crashes, the Redis key expires automatically +- This cleanup task checks for in-progress notifications without a Redis heartbeat key +- Such notifications are marked as failed with O(1) batch UPDATE """ +import json import logging -from datetime import UTC, datetime, timedelta +import os +from datetime import UTC, datetime -from sqlalchemy import and_ +import redis +from sqlalchemy import and_, text from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine from sqlalchemy.future import select -from sqlalchemy.orm.attributes import flag_modified from sqlalchemy.pool import NullPool from app.celery_app import celery_app @@ -21,10 +28,22 @@ from app.db import Notification logger = logging.getLogger(__name__) -# Timeout in minutes - notifications without heartbeat for this long are marked as failed -# Should be longer than HEARTBEAT_INTERVAL_SECONDS (30s) * a reasonable number of missed heartbeats -# 5 minutes = 10 missed heartbeats, which is a reasonable threshold -STALE_NOTIFICATION_TIMEOUT_MINUTES = 5 +# Redis client for checking heartbeats +_redis_client: redis.Redis | None = None + + +def get_redis_client() -> redis.Redis: + """Get or create Redis client for heartbeat checking.""" + global _redis_client + if _redis_client is None: + redis_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") + _redis_client = redis.from_url(redis_url, decode_responses=True) + return _redis_client + + +def _get_heartbeat_key(notification_id: int) -> str: + """Generate Redis key for notification heartbeat.""" + return f"indexing:heartbeat:{notification_id}" def get_celery_session_maker(): @@ -45,9 +64,9 @@ def cleanup_stale_indexing_notifications_task(): This task finds notifications that: - Have type = 'connector_indexing' - Have metadata.status = 'in_progress' - - Have updated_at older than STALE_NOTIFICATION_TIMEOUT_MINUTES + - Do NOT have a corresponding Redis heartbeat key (meaning task crashed) - And marks them as failed with an appropriate error message. + And marks them as failed with O(1) batch UPDATE. """ import asyncio @@ -61,84 +80,83 @@ def cleanup_stale_indexing_notifications_task(): async def _cleanup_stale_notifications(): - """Find and mark stale connector indexing notifications as failed.""" + """Find and mark stale connector indexing notifications as failed. + + Uses Redis TTL-based detection: + 1. Find all in-progress notifications + 2. Check which ones are missing their Redis heartbeat key + 3. Mark those as failed with O(1) batch UPDATE using JSONB || operator + """ async with get_celery_session_maker()() as session: try: - # Calculate the cutoff time - cutoff_time = datetime.now(UTC) - timedelta( - minutes=STALE_NOTIFICATION_TIMEOUT_MINUTES - ) - - # Find stale notifications: - # - type = 'connector_indexing' - # - metadata->>'status' = 'in_progress' - # - updated_at < cutoff_time + # Find all in-progress connector indexing notifications result = await session.execute( - select(Notification).filter( + select(Notification.id).where( and_( Notification.type == "connector_indexing", Notification.notification_metadata["status"].astext == "in_progress", - Notification.updated_at < cutoff_time, ) ) ) - stale_notifications = result.scalars().all() + in_progress_ids = [row[0] for row in result.fetchall()] - if not stale_notifications: - logger.debug("No stale connector indexing notifications found") + if not in_progress_ids: + logger.debug("No in-progress connector indexing notifications found") + return + + # Check which ones are missing heartbeat keys in Redis + redis_client = get_redis_client() + stale_notification_ids = [] + + for notification_id in in_progress_ids: + heartbeat_key = _get_heartbeat_key(notification_id) + if not redis_client.exists(heartbeat_key): + stale_notification_ids.append(notification_id) + + if not stale_notification_ids: + logger.debug( + f"All {len(in_progress_ids)} in-progress notifications have active Redis heartbeats" + ) return logger.warning( - f"Found {len(stale_notifications)} stale connector indexing notifications " - f"(no heartbeat for >{STALE_NOTIFICATION_TIMEOUT_MINUTES} minutes)" + f"Found {len(stale_notification_ids)} stale connector indexing notifications " + f"(no Redis heartbeat key): {stale_notification_ids}" ) - # Mark each stale notification as failed - for notification in stale_notifications: - try: - # Get current indexed count from metadata if available - indexed_count = notification.notification_metadata.get( - "indexed_count", 0 - ) - connector_name = notification.notification_metadata.get( - "connector_name", "Unknown" - ) + # O(1) Batch UPDATE using JSONB || operator + # This merges the update data into existing notification_metadata + # Also updates title and message for proper UI display + error_message = ( + "Something went wrong while syncing your content. Please retry." + ) - # Calculate how long it's been stale - stale_duration = datetime.now(UTC) - notification.updated_at - stale_minutes = int(stale_duration.total_seconds() / 60) + update_data = { + "status": "failed", + "completed_at": datetime.now(UTC).isoformat(), + "error_message": error_message, + "sync_stage": "failed", + } - # Update notification metadata - notification.notification_metadata["status"] = "failed" - notification.notification_metadata["completed_at"] = datetime.now( - UTC - ).isoformat() - notification.notification_metadata["error_message"] = ( - f"Indexing task appears to have crashed or timed out. " - f"No activity detected for {stale_minutes} minutes. " - f"Please try syncing again." - ) + await session.execute( + text(""" + UPDATE notifications + SET metadata = metadata || CAST(:update_json AS jsonb), + title = 'Failed: ' || COALESCE(metadata->>'connector_name', 'Connector'), + message = :display_message + WHERE id = ANY(:ids) + """), + { + "update_json": json.dumps(update_data), + "display_message": f"{error_message}", + "ids": stale_notification_ids, + }, + ) - # Flag the JSONB column as modified for SQLAlchemy to detect the change - flag_modified(notification, "notification_metadata") - - logger.info( - f"Marking notification {notification.id} for connector '{connector_name}' as failed " - f"(stale for {stale_minutes} minutes, indexed {indexed_count} items before failure)" - ) - - except Exception as e: - logger.error( - f"Error marking notification {notification.id} as failed: {e!s}", - exc_info=True, - ) - continue - - # Commit all changes await session.commit() logger.info( - f"Successfully marked {len(stale_notifications)} stale notifications as failed" + f"Successfully marked {len(stale_notification_ids)} stale notifications as failed (batch UPDATE)" ) except Exception as e: From cf339ff350611bb28a4b4ae28cee47bf0c77f2de Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Mon, 2 Feb 2026 00:19:19 +0530 Subject: [PATCH 13/15] chore: ran backend linting --- .../app/tasks/connector_indexers/airtable_indexer.py | 1 + .../app/tasks/connector_indexers/bookstack_indexer.py | 1 + .../app/tasks/connector_indexers/clickup_indexer.py | 1 + .../app/tasks/connector_indexers/confluence_indexer.py | 1 + .../app/tasks/connector_indexers/google_calendar_indexer.py | 1 + .../app/tasks/connector_indexers/google_gmail_indexer.py | 1 + surfsense_backend/app/tasks/connector_indexers/jira_indexer.py | 1 + surfsense_backend/app/tasks/connector_indexers/linear_indexer.py | 1 + surfsense_backend/app/tasks/connector_indexers/luma_indexer.py | 1 + .../app/tasks/connector_indexers/obsidian_indexer.py | 1 + surfsense_backend/app/tasks/connector_indexers/slack_indexer.py | 1 + surfsense_backend/app/tasks/connector_indexers/teams_indexer.py | 1 + .../app/tasks/connector_indexers/webcrawler_indexer.py | 1 + 13 files changed, 13 insertions(+) diff --git a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py index c2d609587..3bcf95d6a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/airtable_indexer.py @@ -36,6 +36,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_airtable_records( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py index cc428047c..d726e5d95 100644 --- a/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/bookstack_indexer.py @@ -37,6 +37,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_bookstack_pages( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py index 8ecf7e20d..e7e8b23e5 100644 --- a/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/clickup_indexer.py @@ -37,6 +37,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_clickup_tasks( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index 914d91fb7..2f20472d2 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -38,6 +38,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_confluence_pages( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index 0156c3db4..f64a7a5c3 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -36,6 +36,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_google_calendar_events( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py index ec50a2b96..45ce91c6f 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py @@ -41,6 +41,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_google_gmail_messages( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py index 08f6d2d54..acee74192 100644 --- a/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/jira_indexer.py @@ -38,6 +38,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds - update notification every 30 seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_jira_issues( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py index 41ef32af4..fc4ae5f18 100644 --- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py @@ -37,6 +37,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds - update notification every 30 seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_linear_issues( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index 56e1f82cd..a18abf8ae 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -36,6 +36,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_luma_events( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py index 93a671cdb..a8cd78cc9 100644 --- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py @@ -43,6 +43,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 + def parse_frontmatter(content: str) -> tuple[dict | None, str]: """ Parse YAML frontmatter from markdown content. diff --git a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py index 1fa8ae339..5923c8089 100644 --- a/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/slack_indexer.py @@ -37,6 +37,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds - update notification every 30 seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_slack_messages( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py index 3f1cbb338..162509a1e 100644 --- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py @@ -36,6 +36,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds - update notification every 30 seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_teams_messages( session: AsyncSession, connector_id: int, diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py index eb4d9c61a..ac16ecde6 100644 --- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py @@ -37,6 +37,7 @@ HeartbeatCallbackType = Callable[[int], Awaitable[None]] # Heartbeat interval in seconds HEARTBEAT_INTERVAL_SECONDS = 30 + async def index_crawled_urls( session: AsyncSession, connector_id: int, From f7c3b36798f291c939d5f6b5a7f6dcbeedcc0073 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Mon, 2 Feb 2026 00:41:04 +0530 Subject: [PATCH 14/15] fix: update hashlib usage in generate_indexing_settings_hash to improve security compliance --- .../app/connectors/composio_google_drive_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py index 5e4fc8c0f..2816b4a8b 100644 --- a/surfsense_backend/app/connectors/composio_google_drive_connector.py +++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py @@ -558,7 +558,7 @@ def generate_indexing_settings_hash( "include_subfolders": indexing_options.get("include_subfolders", True), "max_files_per_folder": indexing_options.get("max_files_per_folder", 100), } - return hashlib.md5(json.dumps(settings, sort_keys=True).encode()).hexdigest() + return hashlib.md5(json.dumps(settings, sort_keys=True).encode(), usedforsecurity=False).hexdigest() async def index_composio_google_drive( From 0d0d08fabde71c776ef51171c4784084aa77ecb9 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Mon, 2 Feb 2026 00:43:25 +0530 Subject: [PATCH 15/15] chore: ran linting --- .../app/connectors/composio_google_drive_connector.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/connectors/composio_google_drive_connector.py b/surfsense_backend/app/connectors/composio_google_drive_connector.py index 2816b4a8b..364712215 100644 --- a/surfsense_backend/app/connectors/composio_google_drive_connector.py +++ b/surfsense_backend/app/connectors/composio_google_drive_connector.py @@ -558,7 +558,9 @@ def generate_indexing_settings_hash( "include_subfolders": indexing_options.get("include_subfolders", True), "max_files_per_folder": indexing_options.get("max_files_per_folder", 100), } - return hashlib.md5(json.dumps(settings, sort_keys=True).encode(), usedforsecurity=False).hexdigest() + return hashlib.md5( + json.dumps(settings, sort_keys=True).encode(), usedforsecurity=False + ).hexdigest() async def index_composio_google_drive(