feat: add heartbeat callback support for long-running indexing tasks and implement stale notification cleanup task

This commit is contained in:
Anish Sarkar 2026-02-01 02:17:06 +05:30
parent e5f7e87f42
commit 024a683b4f
27 changed files with 685 additions and 7 deletions

View file

@ -5,6 +5,44 @@ import type { SearchSourceConnector } from "@/contracts/types/connector.types";
import type { InboxItem } from "@/contracts/types/inbox.types";
import { isConnectorIndexingMetadata } from "@/contracts/types/inbox.types";
/**
* Timeout thresholds for stuck task detection
*
* These align with the backend Celery configuration:
* - HARD_TIMEOUT: 8 hours (task_time_limit=28800 in Celery)
* Any task running longer than this is definitely dead.
*
* - STALE_THRESHOLD: 15 minutes without notification updates
* If heartbeats are being sent every 30s, missing 15+ minutes of updates
* indicates the task has likely crashed or the worker is down.
*/
const HARD_TIMEOUT_MS = 8 * 60 * 60 * 1000; // 8 hours in milliseconds
const STALE_THRESHOLD_MS = 15 * 60 * 1000; // 15 minutes in milliseconds
/**
* Check if a notification is stale (no updates for too long)
* @param updatedAt - ISO timestamp of last notification update
* @returns true if the notification hasn't been updated recently
*/
function isNotificationStale(updatedAt: string | null | undefined): boolean {
if (!updatedAt) return false;
const lastUpdate = new Date(updatedAt).getTime();
const now = Date.now();
return now - lastUpdate > STALE_THRESHOLD_MS;
}
/**
* Check if a task has exceeded the hard timeout (definitely dead)
* @param startedAt - ISO timestamp when the task started
* @returns true if the task has been running longer than the hard limit
*/
function isTaskTimedOut(startedAt: string | null | undefined): boolean {
if (!startedAt) return false;
const startTime = new Date(startedAt).getTime();
const now = Date.now();
return now - startTime > HARD_TIMEOUT_MS;
}
/**
* Hook to track which connectors are currently indexing using local state.
*
@ -13,6 +51,8 @@ import { isConnectorIndexingMetadata } from "@/contracts/types/inbox.types";
* 2. Detecting in_progress notifications from Electric SQL to restore state after remounts
* 3. Clearing indexing state when notifications become completed or failed
* 4. Clearing indexing state when Electric SQL detects last_indexed_at changed
* 5. Detecting stale/stuck tasks that haven't updated in 15+ minutes
* 6. Detecting hard timeout (8h) - tasks that definitely cannot still be running
*
* The actual `last_indexed_at` value comes from Electric SQL/PGlite, not local state.
*/
@ -57,6 +97,7 @@ export function useIndexingConnectors(
// Detect notification status changes and update indexing state accordingly
// This restores spinner state after component remounts and handles all status transitions
// Also detects stale/stuck tasks that haven't been updated in a while
useEffect(() => {
if (!inboxItems || inboxItems.length === 0) return;
@ -71,11 +112,26 @@ export function useIndexingConnectors(
const metadata = isConnectorIndexingMetadata(item.metadata) ? item.metadata : null;
if (!metadata) continue;
// If status is "in_progress", add connector to indexing set
// If status is "in_progress", check if it's actually still running
if (metadata.status === "in_progress") {
if (!newIndexingIds.has(metadata.connector_id)) {
newIndexingIds.add(metadata.connector_id);
hasChanges = true;
// Check for hard timeout (8h) - task is definitely dead
const timedOut = isTaskTimedOut(metadata.started_at);
// Check for stale notification (15min without updates) - task likely crashed
const stale = isNotificationStale(item.updated_at);
if (timedOut || stale) {
// Task is stuck - don't show as indexing
if (newIndexingIds.has(metadata.connector_id)) {
newIndexingIds.delete(metadata.connector_id);
hasChanges = true;
}
} else {
// Task appears to be genuinely running
if (!newIndexingIds.has(metadata.connector_id)) {
newIndexingIds.add(metadata.connector_id);
hasChanges = true;
}
}
}
// If status is "completed" or "failed", remove connector from indexing set