mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-07-04 22:02:16 +02:00
feat: add heartbeat callback support for long-running indexing tasks and implement stale notification cleanup task
This commit is contained in:
parent
e5f7e87f42
commit
024a683b4f
27 changed files with 685 additions and 7 deletions
|
|
@ -79,6 +79,7 @@ celery_app = Celery(
|
||||||
"app.tasks.celery_tasks.schedule_checker_task",
|
"app.tasks.celery_tasks.schedule_checker_task",
|
||||||
"app.tasks.celery_tasks.blocknote_migration_tasks",
|
"app.tasks.celery_tasks.blocknote_migration_tasks",
|
||||||
"app.tasks.celery_tasks.document_reindex_tasks",
|
"app.tasks.celery_tasks.document_reindex_tasks",
|
||||||
|
"app.tasks.celery_tasks.stale_notification_cleanup_task",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -121,4 +122,14 @@ celery_app.conf.beat_schedule = {
|
||||||
"expires": 30, # Task expires after 30 seconds if not picked up
|
"expires": 30, # Task expires after 30 seconds if not picked up
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Cleanup stale connector indexing notifications every 5 minutes
|
||||||
|
# This detects tasks that crashed or timed out without proper cleanup
|
||||||
|
# and marks their notifications as failed so users don't see perpetual "syncing"
|
||||||
|
"cleanup-stale-indexing-notifications": {
|
||||||
|
"task": "cleanup_stale_indexing_notifications",
|
||||||
|
"schedule": crontab(minute="*/5"), # Every 5 minutes
|
||||||
|
"options": {
|
||||||
|
"expires": 60, # Task expires after 60 seconds if not picked up
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,15 @@ Provides Gmail specific methods for data retrieval and indexing via Composio.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
# Heartbeat configuration
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
from sqlalchemy.orm import selectinload
|
from sqlalchemy.orm import selectinload
|
||||||
|
|
@ -427,6 +433,7 @@ async def index_composio_gmail(
|
||||||
log_entry,
|
log_entry,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
max_items: int = 1000,
|
max_items: int = 1000,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str]:
|
) -> tuple[int, str]:
|
||||||
"""Index Gmail messages via Composio with pagination and incremental processing."""
|
"""Index Gmail messages via Composio with pagination and incremental processing."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -471,8 +478,16 @@ async def index_composio_gmail(
|
||||||
total_documents_skipped = 0
|
total_documents_skipped = 0
|
||||||
total_messages_fetched = 0
|
total_messages_fetched = 0
|
||||||
result_size_estimate = None # Will be set from first API response
|
result_size_estimate = None # Will be set from first API response
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
while total_messages_fetched < max_items:
|
while total_messages_fetched < max_items:
|
||||||
|
# Send heartbeat periodically to indicate task is still alive
|
||||||
|
if on_heartbeat_callback:
|
||||||
|
current_time = time.time()
|
||||||
|
if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(total_documents_indexed)
|
||||||
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
# Calculate how many messages to fetch in this batch
|
# Calculate how many messages to fetch in this batch
|
||||||
remaining = max_items - total_messages_fetched
|
remaining = max_items - total_messages_fetched
|
||||||
current_batch_size = min(batch_size, remaining)
|
current_batch_size = min(batch_size, remaining)
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,15 @@ Provides Google Calendar specific methods for data retrieval and indexing via Co
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
# Heartbeat configuration
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
from sqlalchemy.orm import selectinload
|
from sqlalchemy.orm import selectinload
|
||||||
|
|
@ -191,6 +197,7 @@ async def index_composio_google_calendar(
|
||||||
log_entry,
|
log_entry,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
max_items: int = 2500,
|
max_items: int = 2500,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str]:
|
) -> tuple[int, str]:
|
||||||
"""Index Google Calendar events via Composio."""
|
"""Index Google Calendar events via Composio."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -262,8 +269,15 @@ async def index_composio_google_calendar(
|
||||||
duplicate_content_count = (
|
duplicate_content_count = (
|
||||||
0 # Track events skipped due to duplicate content_hash
|
0 # Track events skipped due to duplicate content_hash
|
||||||
)
|
)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
for event in events:
|
for event in events:
|
||||||
|
# Send heartbeat periodically to indicate task is still alive
|
||||||
|
if on_heartbeat_callback:
|
||||||
|
current_time = time.time()
|
||||||
|
if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = current_time
|
||||||
try:
|
try:
|
||||||
# Handle both standard Google API and potential Composio variations
|
# Handle both standard Google API and potential Composio variations
|
||||||
event_id = event.get("id", "") or event.get("eventId", "")
|
event_id = event.get("id", "") or event.get("eventId", "")
|
||||||
|
|
|
||||||
|
|
@ -9,10 +9,16 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
# Heartbeat configuration
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.orm.attributes import flag_modified
|
from sqlalchemy.orm.attributes import flag_modified
|
||||||
|
|
||||||
|
|
@ -565,6 +571,7 @@ async def index_composio_google_drive(
|
||||||
log_entry,
|
log_entry,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
max_items: int = 1000,
|
max_items: int = 1000,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, int, str | None]:
|
) -> tuple[int, int, str | None]:
|
||||||
"""Index Google Drive files via Composio with delta sync support.
|
"""Index Google Drive files via Composio with delta sync support.
|
||||||
|
|
||||||
|
|
@ -652,6 +659,7 @@ async def index_composio_google_drive(
|
||||||
max_items=max_items,
|
max_items=max_items,
|
||||||
task_logger=task_logger,
|
task_logger=task_logger,
|
||||||
log_entry=log_entry,
|
log_entry=log_entry,
|
||||||
|
on_heartbeat_callback=on_heartbeat_callback,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
@ -684,6 +692,7 @@ async def index_composio_google_drive(
|
||||||
max_items=max_items,
|
max_items=max_items,
|
||||||
task_logger=task_logger,
|
task_logger=task_logger,
|
||||||
log_entry=log_entry,
|
log_entry=log_entry,
|
||||||
|
on_heartbeat_callback=on_heartbeat_callback,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get new page token for next sync (always update after successful sync)
|
# Get new page token for next sync (always update after successful sync)
|
||||||
|
|
@ -765,6 +774,7 @@ async def _index_composio_drive_delta_sync(
|
||||||
max_items: int,
|
max_items: int,
|
||||||
task_logger: TaskLoggingService,
|
task_logger: TaskLoggingService,
|
||||||
log_entry,
|
log_entry,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, int, list[str]]:
|
) -> tuple[int, int, list[str]]:
|
||||||
"""Index Google Drive files using delta sync (only changed files).
|
"""Index Google Drive files using delta sync (only changed files).
|
||||||
|
|
||||||
|
|
@ -774,6 +784,7 @@ async def _index_composio_drive_delta_sync(
|
||||||
documents_indexed = 0
|
documents_indexed = 0
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
processing_errors = []
|
processing_errors = []
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
# Fetch all changes with pagination
|
# Fetch all changes with pagination
|
||||||
all_changes = []
|
all_changes = []
|
||||||
|
|
@ -804,6 +815,13 @@ async def _index_composio_drive_delta_sync(
|
||||||
logger.info(f"Processing {len(all_changes)} changes from delta sync")
|
logger.info(f"Processing {len(all_changes)} changes from delta sync")
|
||||||
|
|
||||||
for change in all_changes[:max_items]:
|
for change in all_changes[:max_items]:
|
||||||
|
# Send heartbeat periodically to indicate task is still alive
|
||||||
|
if on_heartbeat_callback:
|
||||||
|
current_time = time.time()
|
||||||
|
if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Handle removed files
|
# Handle removed files
|
||||||
is_removed = change.get("removed", False)
|
is_removed = change.get("removed", False)
|
||||||
|
|
@ -886,11 +904,13 @@ async def _index_composio_drive_full_scan(
|
||||||
max_items: int,
|
max_items: int,
|
||||||
task_logger: TaskLoggingService,
|
task_logger: TaskLoggingService,
|
||||||
log_entry,
|
log_entry,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, int, list[str]]:
|
) -> tuple[int, int, list[str]]:
|
||||||
"""Index Google Drive files using full scan (first sync or when no delta token)."""
|
"""Index Google Drive files using full scan (first sync or when no delta token)."""
|
||||||
documents_indexed = 0
|
documents_indexed = 0
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
processing_errors = []
|
processing_errors = []
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
all_files = []
|
all_files = []
|
||||||
|
|
||||||
|
|
@ -1001,6 +1021,13 @@ async def _index_composio_drive_full_scan(
|
||||||
)
|
)
|
||||||
|
|
||||||
for file_info in all_files:
|
for file_info in all_files:
|
||||||
|
# Send heartbeat periodically to indicate task is still alive
|
||||||
|
if on_heartbeat_callback:
|
||||||
|
current_time = time.time()
|
||||||
|
if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = current_time
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Handle both standard Google API and potential Composio variations
|
# Handle both standard Google API and potential Composio variations
|
||||||
file_id = file_info.get("id", "") or file_info.get("fileId", "")
|
file_id = file_info.get("id", "") or file_info.get("fileId", "")
|
||||||
|
|
|
||||||
|
|
@ -1137,6 +1137,7 @@ async def run_slack_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_slack_messages,
|
indexing_function=index_slack_messages,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1150,6 +1151,7 @@ async def _run_indexing_with_notifications(
|
||||||
indexing_function,
|
indexing_function,
|
||||||
update_timestamp_func=None,
|
update_timestamp_func=None,
|
||||||
supports_retry_callback: bool = False,
|
supports_retry_callback: bool = False,
|
||||||
|
supports_heartbeat_callback: bool = False,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Generic helper to run indexing with real-time notifications.
|
Generic helper to run indexing with real-time notifications.
|
||||||
|
|
@ -1164,11 +1166,13 @@ async def _run_indexing_with_notifications(
|
||||||
indexing_function: Async function that performs the indexing
|
indexing_function: Async function that performs the indexing
|
||||||
update_timestamp_func: Optional function to update connector timestamp
|
update_timestamp_func: Optional function to update connector timestamp
|
||||||
supports_retry_callback: Whether the indexing function supports on_retry_callback
|
supports_retry_callback: Whether the indexing function supports on_retry_callback
|
||||||
|
supports_heartbeat_callback: Whether the indexing function supports on_heartbeat_callback
|
||||||
"""
|
"""
|
||||||
|
from celery.exceptions import SoftTimeLimitExceeded
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
notification = None
|
notification = None
|
||||||
# Track indexed count for retry notifications
|
# Track indexed count for retry notifications and heartbeat
|
||||||
current_indexed_count = 0
|
current_indexed_count = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -1227,6 +1231,27 @@ async def _run_indexing_with_notifications(
|
||||||
# Don't let notification errors break the indexing
|
# Don't let notification errors break the indexing
|
||||||
logger.warning(f"Failed to update retry notification: {e}")
|
logger.warning(f"Failed to update retry notification: {e}")
|
||||||
|
|
||||||
|
# Create heartbeat callback for connectors that support it
|
||||||
|
# This updates the notification periodically during long-running indexing loops
|
||||||
|
# to prevent the task from appearing stuck if the worker crashes
|
||||||
|
async def on_heartbeat_callback(indexed_count: int) -> None:
|
||||||
|
"""Callback to update notification during indexing (heartbeat)."""
|
||||||
|
nonlocal notification, current_indexed_count
|
||||||
|
current_indexed_count = indexed_count
|
||||||
|
if notification:
|
||||||
|
try:
|
||||||
|
await session.refresh(notification)
|
||||||
|
await NotificationService.connector_indexing.notify_indexing_progress(
|
||||||
|
session=session,
|
||||||
|
notification=notification,
|
||||||
|
indexed_count=indexed_count,
|
||||||
|
stage="processing",
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
except Exception as e:
|
||||||
|
# Don't let notification errors break the indexing
|
||||||
|
logger.warning(f"Failed to update heartbeat notification: {e}")
|
||||||
|
|
||||||
# Build kwargs for indexing function
|
# Build kwargs for indexing function
|
||||||
indexing_kwargs = {
|
indexing_kwargs = {
|
||||||
"session": session,
|
"session": session,
|
||||||
|
|
@ -1242,6 +1267,10 @@ async def _run_indexing_with_notifications(
|
||||||
if supports_retry_callback:
|
if supports_retry_callback:
|
||||||
indexing_kwargs["on_retry_callback"] = on_retry_callback
|
indexing_kwargs["on_retry_callback"] = on_retry_callback
|
||||||
|
|
||||||
|
# Add heartbeat callback for connectors that support it
|
||||||
|
if supports_heartbeat_callback:
|
||||||
|
indexing_kwargs["on_heartbeat_callback"] = on_heartbeat_callback
|
||||||
|
|
||||||
# Run the indexing function
|
# Run the indexing function
|
||||||
# Some indexers return (indexed, error), others return (indexed, skipped, error)
|
# Some indexers return (indexed, error), others return (indexed, skipped, error)
|
||||||
result = await indexing_function(**indexing_kwargs)
|
result = await indexing_function(**indexing_kwargs)
|
||||||
|
|
@ -1398,6 +1427,30 @@ async def _run_indexing_with_notifications(
|
||||||
await (
|
await (
|
||||||
session.commit()
|
session.commit()
|
||||||
) # Commit to ensure Electric SQL syncs the notification update
|
) # Commit to ensure Electric SQL syncs the notification update
|
||||||
|
except SoftTimeLimitExceeded:
|
||||||
|
# Celery soft time limit was reached - task is about to be killed
|
||||||
|
# Gracefully save progress and mark as interrupted
|
||||||
|
logger.warning(
|
||||||
|
f"Soft time limit reached for connector {connector_id}. "
|
||||||
|
f"Saving partial progress: {current_indexed_count} items indexed."
|
||||||
|
)
|
||||||
|
|
||||||
|
if notification:
|
||||||
|
try:
|
||||||
|
await session.refresh(notification)
|
||||||
|
await NotificationService.connector_indexing.notify_indexing_completed(
|
||||||
|
session=session,
|
||||||
|
notification=notification,
|
||||||
|
indexed_count=current_indexed_count,
|
||||||
|
error_message="Time limit reached. Partial sync completed. Please run again for remaining items.",
|
||||||
|
is_warning=True, # Mark as warning since partial data was indexed
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
except Exception as notif_error:
|
||||||
|
logger.error(f"Failed to update notification on soft timeout: {notif_error!s}")
|
||||||
|
|
||||||
|
# Re-raise so Celery knows the task was terminated
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in indexing task: {e!s}", exc_info=True)
|
logger.error(f"Error in indexing task: {e!s}", exc_info=True)
|
||||||
|
|
||||||
|
|
@ -1409,7 +1462,7 @@ async def _run_indexing_with_notifications(
|
||||||
await NotificationService.connector_indexing.notify_indexing_completed(
|
await NotificationService.connector_indexing.notify_indexing_completed(
|
||||||
session=session,
|
session=session,
|
||||||
notification=notification,
|
notification=notification,
|
||||||
indexed_count=0,
|
indexed_count=current_indexed_count, # Use tracked count, not 0
|
||||||
error_message=str(e),
|
error_message=str(e),
|
||||||
skipped_count=None, # Unknown on exception
|
skipped_count=None, # Unknown on exception
|
||||||
)
|
)
|
||||||
|
|
@ -1439,6 +1492,7 @@ async def run_notion_indexing_with_new_session(
|
||||||
indexing_function=index_notion_pages,
|
indexing_function=index_notion_pages,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
supports_retry_callback=True, # Notion connector supports retry notifications
|
supports_retry_callback=True, # Notion connector supports retry notifications
|
||||||
|
supports_heartbeat_callback=True, # Notion connector supports heartbeat notifications
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1471,6 +1525,7 @@ async def run_notion_indexing(
|
||||||
indexing_function=index_notion_pages,
|
indexing_function=index_notion_pages,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
supports_retry_callback=True, # Notion connector supports retry notifications
|
supports_retry_callback=True, # Notion connector supports retry notifications
|
||||||
|
supports_heartbeat_callback=True, # Notion connector supports heartbeat notifications
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1521,6 +1576,7 @@ async def run_github_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_github_repos,
|
indexing_function=index_github_repos,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1571,6 +1627,7 @@ async def run_linear_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_linear_issues,
|
indexing_function=index_linear_issues,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1620,6 +1677,7 @@ async def run_discord_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_discord_messages,
|
indexing_function=index_discord_messages,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1670,6 +1728,7 @@ async def run_teams_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_teams_messages,
|
indexing_function=index_teams_messages,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1720,6 +1779,7 @@ async def run_jira_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_jira_issues,
|
indexing_function=index_jira_issues,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1772,6 +1832,7 @@ async def run_confluence_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_confluence_pages,
|
indexing_function=index_confluence_pages,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1822,6 +1883,7 @@ async def run_clickup_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_clickup_tasks,
|
indexing_function=index_clickup_tasks,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1872,6 +1934,7 @@ async def run_airtable_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_airtable_records,
|
indexing_function=index_airtable_records,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1924,6 +1987,7 @@ async def run_google_calendar_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_google_calendar_events,
|
indexing_function=index_google_calendar_events,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1998,6 +2062,7 @@ async def run_google_gmail_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=gmail_indexing_wrapper,
|
indexing_function=gmail_indexing_wrapper,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2206,6 +2271,7 @@ async def run_luma_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_luma_events,
|
indexing_function=index_luma_events,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2257,6 +2323,7 @@ async def run_elasticsearch_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_elasticsearch_documents,
|
indexing_function=index_elasticsearch_documents,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2306,6 +2373,7 @@ async def run_web_page_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_crawled_urls,
|
indexing_function=index_crawled_urls,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2360,6 +2428,7 @@ async def run_bookstack_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_bookstack_pages,
|
indexing_function=index_bookstack_pages,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2412,6 +2481,7 @@ async def run_obsidian_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_obsidian_vault,
|
indexing_function=index_obsidian_vault,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2465,6 +2535,7 @@ async def run_composio_indexing(
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
indexing_function=index_composio_connector,
|
indexing_function=index_composio_connector,
|
||||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||||
|
supports_heartbeat_callback=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,141 @@
|
||||||
|
"""Celery task to detect and mark stale connector indexing notifications as failed.
|
||||||
|
|
||||||
|
This task runs periodically (every 5 minutes by default) to find notifications
|
||||||
|
that are stuck in "in_progress" status but haven't received a heartbeat update
|
||||||
|
in the configured timeout period. These are marked as "failed" to prevent the
|
||||||
|
frontend from showing a perpetual "syncing" state.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import UTC, datetime, timedelta
|
||||||
|
|
||||||
|
from sqlalchemy import and_, update
|
||||||
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
|
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||||
|
from sqlalchemy.future import select
|
||||||
|
from sqlalchemy.orm.attributes import flag_modified
|
||||||
|
from sqlalchemy.pool import NullPool
|
||||||
|
|
||||||
|
from app.celery_app import celery_app
|
||||||
|
from app.config import config
|
||||||
|
from app.db import Notification
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Timeout in minutes - notifications without heartbeat for this long are marked as failed
|
||||||
|
# Should be longer than HEARTBEAT_INTERVAL_SECONDS (30s) * a reasonable number of missed heartbeats
|
||||||
|
# 5 minutes = 10 missed heartbeats, which is a reasonable threshold
|
||||||
|
STALE_NOTIFICATION_TIMEOUT_MINUTES = 5
|
||||||
|
|
||||||
|
|
||||||
|
def get_celery_session_maker():
|
||||||
|
"""Create async session maker for Celery tasks."""
|
||||||
|
engine = create_async_engine(
|
||||||
|
config.DATABASE_URL,
|
||||||
|
poolclass=NullPool,
|
||||||
|
echo=False,
|
||||||
|
)
|
||||||
|
return async_sessionmaker(engine, expire_on_commit=False)
|
||||||
|
|
||||||
|
|
||||||
|
@celery_app.task(name="cleanup_stale_indexing_notifications")
|
||||||
|
def cleanup_stale_indexing_notifications_task():
|
||||||
|
"""
|
||||||
|
Check for stale connector indexing notifications and mark them as failed.
|
||||||
|
|
||||||
|
This task finds notifications that:
|
||||||
|
- Have type = 'connector_indexing'
|
||||||
|
- Have metadata.status = 'in_progress'
|
||||||
|
- Have updated_at older than STALE_NOTIFICATION_TIMEOUT_MINUTES
|
||||||
|
|
||||||
|
And marks them as failed with an appropriate error message.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
|
||||||
|
try:
|
||||||
|
loop.run_until_complete(_cleanup_stale_notifications())
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def _cleanup_stale_notifications():
|
||||||
|
"""Find and mark stale connector indexing notifications as failed."""
|
||||||
|
async with get_celery_session_maker()() as session:
|
||||||
|
try:
|
||||||
|
# Calculate the cutoff time
|
||||||
|
cutoff_time = datetime.now(UTC) - timedelta(
|
||||||
|
minutes=STALE_NOTIFICATION_TIMEOUT_MINUTES
|
||||||
|
)
|
||||||
|
|
||||||
|
# Find stale notifications:
|
||||||
|
# - type = 'connector_indexing'
|
||||||
|
# - metadata->>'status' = 'in_progress'
|
||||||
|
# - updated_at < cutoff_time
|
||||||
|
result = await session.execute(
|
||||||
|
select(Notification).filter(
|
||||||
|
and_(
|
||||||
|
Notification.type == "connector_indexing",
|
||||||
|
Notification.notification_metadata["status"].astext == "in_progress",
|
||||||
|
Notification.updated_at < cutoff_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
stale_notifications = result.scalars().all()
|
||||||
|
|
||||||
|
if not stale_notifications:
|
||||||
|
logger.debug("No stale connector indexing notifications found")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
f"Found {len(stale_notifications)} stale connector indexing notifications "
|
||||||
|
f"(no heartbeat for >{STALE_NOTIFICATION_TIMEOUT_MINUTES} minutes)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mark each stale notification as failed
|
||||||
|
for notification in stale_notifications:
|
||||||
|
try:
|
||||||
|
# Get current indexed count from metadata if available
|
||||||
|
indexed_count = notification.notification_metadata.get("indexed_count", 0)
|
||||||
|
connector_name = notification.notification_metadata.get("connector_name", "Unknown")
|
||||||
|
|
||||||
|
# Calculate how long it's been stale
|
||||||
|
stale_duration = datetime.now(UTC) - notification.updated_at
|
||||||
|
stale_minutes = int(stale_duration.total_seconds() / 60)
|
||||||
|
|
||||||
|
# Update notification metadata
|
||||||
|
notification.notification_metadata["status"] = "failed"
|
||||||
|
notification.notification_metadata["completed_at"] = datetime.now(UTC).isoformat()
|
||||||
|
notification.notification_metadata["error_message"] = (
|
||||||
|
f"Indexing task appears to have crashed or timed out. "
|
||||||
|
f"No activity detected for {stale_minutes} minutes. "
|
||||||
|
f"Please try syncing again."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Flag the JSONB column as modified for SQLAlchemy to detect the change
|
||||||
|
flag_modified(notification, "notification_metadata")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Marking notification {notification.id} for connector '{connector_name}' as failed "
|
||||||
|
f"(stale for {stale_minutes} minutes, indexed {indexed_count} items before failure)"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"Error marking notification {notification.id} as failed: {e!s}",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Commit all changes
|
||||||
|
await session.commit()
|
||||||
|
logger.info(
|
||||||
|
f"Successfully marked {len(stale_notifications)} stale notifications as failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error cleaning up stale notifications: {e!s}", exc_info=True)
|
||||||
|
await session.rollback()
|
||||||
|
|
||||||
|
|
@ -9,8 +9,12 @@ to avoid circular import issues with the connector_indexers package.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
|
|
||||||
|
# Type alias for heartbeat callback function
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
|
|
@ -86,6 +90,7 @@ async def index_composio_connector(
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
max_items: int = 1000,
|
max_items: int = 1000,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, int, str | None]:
|
) -> tuple[int, int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index content from a Composio connector.
|
Index content from a Composio connector.
|
||||||
|
|
@ -102,6 +107,7 @@ async def index_composio_connector(
|
||||||
end_date: End date for filtering (YYYY-MM-DD format)
|
end_date: End date for filtering (YYYY-MM-DD format)
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp
|
update_last_indexed: Whether to update the last_indexed_at timestamp
|
||||||
max_items: Maximum number of items to fetch
|
max_items: Maximum number of items to fetch
|
||||||
|
on_heartbeat_callback: Optional callback to report progress for heartbeat updates
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (number_of_indexed_items, number_of_skipped_items, error_message or None)
|
Tuple of (number_of_indexed_items, number_of_skipped_items, error_message or None)
|
||||||
|
|
@ -180,6 +186,7 @@ async def index_composio_connector(
|
||||||
"log_entry": log_entry,
|
"log_entry": log_entry,
|
||||||
"update_last_indexed": update_last_indexed,
|
"update_last_indexed": update_last_indexed,
|
||||||
"max_items": max_items,
|
"max_items": max_items,
|
||||||
|
"on_heartbeat_callback": on_heartbeat_callback,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add date params for toolkits that support them
|
# Add date params for toolkits that support them
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,9 @@
|
||||||
Airtable connector indexer.
|
Airtable connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
|
@ -17,6 +20,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
calculate_date_range,
|
calculate_date_range,
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
|
|
@ -37,6 +46,7 @@ async def index_airtable_records(
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
max_records: int = 2500,
|
max_records: int = 2500,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Airtable records for a given connector.
|
Index Airtable records for a given connector.
|
||||||
|
|
@ -50,6 +60,7 @@ async def index_airtable_records(
|
||||||
end_date: End date for filtering records (YYYY-MM-DD)
|
end_date: End date for filtering records (YYYY-MM-DD)
|
||||||
max_records: Maximum number of records to fetch per table
|
max_records: Maximum number of records to fetch per table
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp
|
update_last_indexed: Whether to update the last_indexed_at timestamp
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (number_of_documents_processed, error_message)
|
Tuple of (number_of_documents_processed, error_message)
|
||||||
|
|
@ -127,8 +138,16 @@ async def index_airtable_records(
|
||||||
|
|
||||||
logger.info(f"Found {len(bases)} Airtable bases to process")
|
logger.info(f"Found {len(bases)} Airtable bases to process")
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
total_documents_indexed = 0
|
||||||
|
|
||||||
# Process each base
|
# Process each base
|
||||||
for base in bases:
|
for base in bases:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(total_documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
base_id = base.get("id")
|
base_id = base.get("id")
|
||||||
base_name = base.get("name", "Unknown Base")
|
base_name = base.get("name", "Unknown Base")
|
||||||
|
|
||||||
|
|
@ -204,6 +223,11 @@ async def index_airtable_records(
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
# Process each record
|
# Process each record
|
||||||
for record in records:
|
for record in records:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(total_documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Generate markdown content
|
# Generate markdown content
|
||||||
markdown_content = (
|
markdown_content = (
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
BookStack connector indexer.
|
BookStack connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
@ -19,6 +21,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
calculate_date_range,
|
calculate_date_range,
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
|
|
@ -38,6 +46,7 @@ async def index_bookstack_pages(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index BookStack pages.
|
Index BookStack pages.
|
||||||
|
|
@ -50,6 +59,7 @@ async def index_bookstack_pages(
|
||||||
start_date: Start date for indexing (YYYY-MM-DD format)
|
start_date: Start date for indexing (YYYY-MM-DD format)
|
||||||
end_date: End date for indexing (YYYY-MM-DD format)
|
end_date: End date for indexing (YYYY-MM-DD format)
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -179,7 +189,14 @@ async def index_bookstack_pages(
|
||||||
skipped_pages = []
|
skipped_pages = []
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
for page in pages:
|
for page in pages:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
try:
|
try:
|
||||||
page_id = page.get("id")
|
page_id = page.get("id")
|
||||||
page_name = page.get("name", "")
|
page_name = page.get("name", "")
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ ClickUp connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
@ -20,6 +22,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
check_duplicate_document_by_hash,
|
check_duplicate_document_by_hash,
|
||||||
|
|
@ -38,6 +46,7 @@ async def index_clickup_tasks(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index tasks from ClickUp workspace.
|
Index tasks from ClickUp workspace.
|
||||||
|
|
@ -50,6 +59,7 @@ async def index_clickup_tasks(
|
||||||
start_date: Start date for filtering tasks (YYYY-MM-DD format)
|
start_date: Start date for filtering tasks (YYYY-MM-DD format)
|
||||||
end_date: End date for filtering tasks (YYYY-MM-DD format)
|
end_date: End date for filtering tasks (YYYY-MM-DD format)
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp
|
update_last_indexed: Whether to update the last_indexed_at timestamp
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (number of indexed tasks, error message if any)
|
Tuple of (number of indexed tasks, error message if any)
|
||||||
|
|
@ -132,6 +142,9 @@ async def index_clickup_tasks(
|
||||||
documents_indexed = 0
|
documents_indexed = 0
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
# Iterate workspaces and fetch tasks
|
# Iterate workspaces and fetch tasks
|
||||||
for workspace in workspaces:
|
for workspace in workspaces:
|
||||||
workspace_id = workspace.get("id")
|
workspace_id = workspace.get("id")
|
||||||
|
|
@ -170,6 +183,11 @@ async def index_clickup_tasks(
|
||||||
)
|
)
|
||||||
|
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
task_id = task.get("id")
|
task_id = task.get("id")
|
||||||
task_name = task.get("name", "Untitled Task")
|
task_name = task.get("name", "Untitled Task")
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ Confluence connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
@ -20,6 +22,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
calculate_date_range,
|
calculate_date_range,
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
|
|
@ -39,6 +47,7 @@ async def index_confluence_pages(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Confluence pages and comments.
|
Index Confluence pages and comments.
|
||||||
|
|
@ -51,6 +60,7 @@ async def index_confluence_pages(
|
||||||
start_date: Start date for indexing (YYYY-MM-DD format)
|
start_date: Start date for indexing (YYYY-MM-DD format)
|
||||||
end_date: End date for indexing (YYYY-MM-DD format)
|
end_date: End date for indexing (YYYY-MM-DD format)
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -175,7 +185,14 @@ async def index_confluence_pages(
|
||||||
skipped_pages = []
|
skipped_pages = []
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
for page in pages:
|
for page in pages:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
try:
|
try:
|
||||||
page_id = page.get("id")
|
page_id = page.get("id")
|
||||||
page_title = page.get("title", "")
|
page_title = page.get("title", "")
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ Discord connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import UTC, datetime, timedelta
|
from datetime import UTC, datetime, timedelta
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
@ -28,6 +30,12 @@ from .base import (
|
||||||
update_connector_last_indexed,
|
update_connector_last_indexed,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds - update notification every 30 seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
|
|
||||||
async def index_discord_messages(
|
async def index_discord_messages(
|
||||||
session: AsyncSession,
|
session: AsyncSession,
|
||||||
|
|
@ -37,6 +45,7 @@ async def index_discord_messages(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Discord messages from all accessible channels.
|
Index Discord messages from all accessible channels.
|
||||||
|
|
@ -49,6 +58,8 @@ async def index_discord_messages(
|
||||||
start_date: Start date for indexing (YYYY-MM-DD format)
|
start_date: Start date for indexing (YYYY-MM-DD format)
|
||||||
end_date: End date for indexing (YYYY-MM-DD format)
|
end_date: End date for indexing (YYYY-MM-DD format)
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
Called periodically with (indexed_count) to prevent task appearing stuck.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -281,6 +292,9 @@ async def index_discord_messages(
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
skipped_channels: list[str] = []
|
skipped_channels: list[str] = []
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
# Process each guild and channel
|
# Process each guild and channel
|
||||||
await task_logger.log_task_progress(
|
await task_logger.log_task_progress(
|
||||||
log_entry,
|
log_entry,
|
||||||
|
|
@ -290,6 +304,10 @@ async def index_discord_messages(
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for guild in guilds:
|
for guild in guilds:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
guild_id = guild["id"]
|
guild_id = guild["id"]
|
||||||
guild_name = guild["name"]
|
guild_name = guild["name"]
|
||||||
logger.info(f"Processing guild: {guild_name} ({guild_id})")
|
logger.info(f"Processing guild: {guild_name} ({guild_id})")
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ Elasticsearch indexer for SurfSense
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
@ -19,6 +21,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
check_duplicate_document_by_hash,
|
check_duplicate_document_by_hash,
|
||||||
|
|
@ -36,6 +44,7 @@ async def index_elasticsearch_documents(
|
||||||
start_date: str,
|
start_date: str,
|
||||||
end_date: str,
|
end_date: str,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index documents from Elasticsearch into SurfSense
|
Index documents from Elasticsearch into SurfSense
|
||||||
|
|
@ -48,6 +57,7 @@ async def index_elasticsearch_documents(
|
||||||
start_date: Start date for indexing (not used for Elasticsearch, kept for compatibility)
|
start_date: Start date for indexing (not used for Elasticsearch, kept for compatibility)
|
||||||
end_date: End date for indexing (not used for Elasticsearch, kept for compatibility)
|
end_date: End date for indexing (not used for Elasticsearch, kept for compatibility)
|
||||||
update_last_indexed: Whether to update the last indexed timestamp
|
update_last_indexed: Whether to update the last indexed timestamp
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (number of documents processed, error message if any)
|
Tuple of (number of documents processed, error message if any)
|
||||||
|
|
@ -155,6 +165,9 @@ async def index_elasticsearch_documents(
|
||||||
|
|
||||||
documents_processed = 0
|
documents_processed = 0
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await task_logger.log_task_progress(
|
await task_logger.log_task_progress(
|
||||||
log_entry,
|
log_entry,
|
||||||
|
|
@ -172,6 +185,11 @@ async def index_elasticsearch_documents(
|
||||||
size=min(max_documents, 100), # Scroll in batches
|
size=min(max_documents, 100), # Scroll in batches
|
||||||
fields=config.get("ELASTICSEARCH_FIELDS"),
|
fields=config.get("ELASTICSEARCH_FIELDS"),
|
||||||
):
|
):
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_processed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
if documents_processed >= max_documents:
|
if documents_processed >= max_documents:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ This indexer processes entire repository digests in one pass, dramatically
|
||||||
reducing LLM API calls compared to the previous file-by-file approach.
|
reducing LLM API calls compared to the previous file-by-file approach.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
@ -22,6 +24,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds - update notification every 30 seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
check_duplicate_document_by_hash,
|
check_duplicate_document_by_hash,
|
||||||
|
|
@ -43,6 +51,7 @@ async def index_github_repos(
|
||||||
start_date: str | None = None, # Ignored - GitHub indexes full repo snapshots
|
start_date: str | None = None, # Ignored - GitHub indexes full repo snapshots
|
||||||
end_date: str | None = None, # Ignored - GitHub indexes full repo snapshots
|
end_date: str | None = None, # Ignored - GitHub indexes full repo snapshots
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index GitHub repositories using gitingest for efficient processing.
|
Index GitHub repositories using gitingest for efficient processing.
|
||||||
|
|
@ -62,6 +71,7 @@ async def index_github_repos(
|
||||||
start_date: Ignored - kept for API compatibility
|
start_date: Ignored - kept for API compatibility
|
||||||
end_date: Ignored - kept for API compatibility
|
end_date: Ignored - kept for API compatibility
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -168,7 +178,15 @@ async def index_github_repos(
|
||||||
f"Starting gitingest indexing for {len(repo_full_names_to_index)} repositories."
|
f"Starting gitingest indexing for {len(repo_full_names_to_index)} repositories."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
documents_indexed = 0
|
||||||
|
|
||||||
for repo_full_name in repo_full_names_to_index:
|
for repo_full_name in repo_full_names_to_index:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
if not repo_full_name or not isinstance(repo_full_name, str):
|
if not repo_full_name or not isinstance(repo_full_name, str):
|
||||||
logger.warning(f"Skipping invalid repository entry: {repo_full_name}")
|
logger.warning(f"Skipping invalid repository entry: {repo_full_name}")
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
Google Calendar connector indexer.
|
Google Calendar connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
@ -21,6 +23,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
check_duplicate_document_by_hash,
|
check_duplicate_document_by_hash,
|
||||||
|
|
@ -39,6 +47,7 @@ async def index_google_calendar_events(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Google Calendar events.
|
Index Google Calendar events.
|
||||||
|
|
@ -52,6 +61,7 @@ async def index_google_calendar_events(
|
||||||
end_date: End date for indexing (YYYY-MM-DD format). Can be in the future to index upcoming events.
|
end_date: End date for indexing (YYYY-MM-DD format). Can be in the future to index upcoming events.
|
||||||
Defaults to today if not provided.
|
Defaults to today if not provided.
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -281,7 +291,14 @@ async def index_google_calendar_events(
|
||||||
0 # Track events skipped due to duplicate content_hash
|
0 # Track events skipped due to duplicate content_hash
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
for event in events:
|
for event in events:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
try:
|
try:
|
||||||
event_id = event.get("id")
|
event_id = event.get("id")
|
||||||
event_summary = event.get("summary", "No Title")
|
event_summary = event.get("summary", "No Title")
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
"""Google Drive indexer using Surfsense file processors."""
|
"""Google Drive indexer using Surfsense file processors."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
@ -24,6 +26,12 @@ from app.tasks.connector_indexers.base import (
|
||||||
)
|
)
|
||||||
from app.utils.document_converters import generate_unique_identifier_hash
|
from app.utils.document_converters import generate_unique_identifier_hash
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -38,6 +46,7 @@ async def index_google_drive_files(
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
max_files: int = 500,
|
max_files: int = 500,
|
||||||
include_subfolders: bool = False,
|
include_subfolders: bool = False,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Google Drive files for a specific connector.
|
Index Google Drive files for a specific connector.
|
||||||
|
|
@ -53,6 +62,7 @@ async def index_google_drive_files(
|
||||||
update_last_indexed: Whether to update last_indexed_at timestamp
|
update_last_indexed: Whether to update last_indexed_at timestamp
|
||||||
max_files: Maximum number of files to index
|
max_files: Maximum number of files to index
|
||||||
include_subfolders: Whether to recursively index files in subfolders
|
include_subfolders: Whether to recursively index files in subfolders
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (number_of_indexed_files, error_message)
|
Tuple of (number_of_indexed_files, error_message)
|
||||||
|
|
@ -147,6 +157,7 @@ async def index_google_drive_files(
|
||||||
log_entry=log_entry,
|
log_entry=log_entry,
|
||||||
max_files=max_files,
|
max_files=max_files,
|
||||||
include_subfolders=include_subfolders,
|
include_subfolders=include_subfolders,
|
||||||
|
on_heartbeat_callback=on_heartbeat_callback,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.info(f"Using full scan for connector {connector_id}")
|
logger.info(f"Using full scan for connector {connector_id}")
|
||||||
|
|
@ -163,6 +174,7 @@ async def index_google_drive_files(
|
||||||
log_entry=log_entry,
|
log_entry=log_entry,
|
||||||
max_files=max_files,
|
max_files=max_files,
|
||||||
include_subfolders=include_subfolders,
|
include_subfolders=include_subfolders,
|
||||||
|
on_heartbeat_callback=on_heartbeat_callback,
|
||||||
)
|
)
|
||||||
|
|
||||||
documents_indexed, documents_skipped = result
|
documents_indexed, documents_skipped = result
|
||||||
|
|
@ -383,6 +395,7 @@ async def _index_full_scan(
|
||||||
log_entry: any,
|
log_entry: any,
|
||||||
max_files: int,
|
max_files: int,
|
||||||
include_subfolders: bool = False,
|
include_subfolders: bool = False,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
"""Perform full scan indexing of a folder."""
|
"""Perform full scan indexing of a folder."""
|
||||||
await task_logger.log_task_progress(
|
await task_logger.log_task_progress(
|
||||||
|
|
@ -399,10 +412,17 @@ async def _index_full_scan(
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
files_processed = 0
|
files_processed = 0
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
# Queue of folders to process: (folder_id, folder_name)
|
# Queue of folders to process: (folder_id, folder_name)
|
||||||
folders_to_process = [(folder_id, folder_name)]
|
folders_to_process = [(folder_id, folder_name)]
|
||||||
|
|
||||||
while folders_to_process and files_processed < max_files:
|
while folders_to_process and files_processed < max_files:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
current_folder_id, current_folder_name = folders_to_process.pop(0)
|
current_folder_id, current_folder_name = folders_to_process.pop(0)
|
||||||
logger.info(f"Processing folder: {current_folder_name} ({current_folder_id})")
|
logger.info(f"Processing folder: {current_folder_name} ({current_folder_id})")
|
||||||
page_token = None
|
page_token = None
|
||||||
|
|
@ -485,6 +505,7 @@ async def _index_with_delta_sync(
|
||||||
log_entry: any,
|
log_entry: any,
|
||||||
max_files: int,
|
max_files: int,
|
||||||
include_subfolders: bool = False,
|
include_subfolders: bool = False,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
"""Perform delta sync indexing using change tracking.
|
"""Perform delta sync indexing using change tracking.
|
||||||
|
|
||||||
|
|
@ -515,7 +536,14 @@ async def _index_with_delta_sync(
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
files_processed = 0
|
files_processed = 0
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
for change in changes:
|
for change in changes:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
if files_processed >= max_files:
|
if files_processed >= max_files:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
Google Gmail connector indexer.
|
Google Gmail connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from google.oauth2.credentials import Credentials
|
from google.oauth2.credentials import Credentials
|
||||||
|
|
@ -23,6 +25,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
calculate_date_range,
|
calculate_date_range,
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
|
|
@ -43,6 +51,7 @@ async def index_google_gmail_messages(
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
max_messages: int = 1000,
|
max_messages: int = 1000,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str]:
|
) -> tuple[int, str]:
|
||||||
"""
|
"""
|
||||||
Index Gmail messages for a specific connector.
|
Index Gmail messages for a specific connector.
|
||||||
|
|
@ -56,6 +65,7 @@ async def index_google_gmail_messages(
|
||||||
end_date: End date for filtering messages (YYYY-MM-DD format)
|
end_date: End date for filtering messages (YYYY-MM-DD format)
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
max_messages: Maximum number of messages to fetch (default: 100)
|
max_messages: Maximum number of messages to fetch (default: 100)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (number_of_indexed_messages, status_message)
|
Tuple of (number_of_indexed_messages, status_message)
|
||||||
|
|
@ -212,7 +222,15 @@ async def index_google_gmail_messages(
|
||||||
documents_indexed = 0
|
documents_indexed = 0
|
||||||
skipped_messages = []
|
skipped_messages = []
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
for message in messages:
|
for message in messages:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
try:
|
try:
|
||||||
# Extract message information
|
# Extract message information
|
||||||
message_id = message.get("id", "")
|
message_id = message.get("id", "")
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ Jira connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
@ -20,6 +22,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds - update notification every 30 seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
calculate_date_range,
|
calculate_date_range,
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
|
|
@ -39,6 +47,7 @@ async def index_jira_issues(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Jira issues and comments.
|
Index Jira issues and comments.
|
||||||
|
|
@ -51,6 +60,7 @@ async def index_jira_issues(
|
||||||
start_date: Start date for indexing (YYYY-MM-DD format)
|
start_date: Start date for indexing (YYYY-MM-DD format)
|
||||||
end_date: End date for indexing (YYYY-MM-DD format)
|
end_date: End date for indexing (YYYY-MM-DD format)
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -169,7 +179,14 @@ async def index_jira_issues(
|
||||||
skipped_issues = []
|
skipped_issues = []
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
for issue in issues:
|
for issue in issues:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
try:
|
try:
|
||||||
issue_id = issue.get("key")
|
issue_id = issue.get("key")
|
||||||
issue_identifier = issue.get("key", "")
|
issue_identifier = issue.get("key", "")
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
Linear connector indexer.
|
Linear connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
@ -19,6 +21,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds - update notification every 30 seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
calculate_date_range,
|
calculate_date_range,
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
|
|
@ -38,6 +46,7 @@ async def index_linear_issues(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Linear issues and comments.
|
Index Linear issues and comments.
|
||||||
|
|
@ -50,6 +59,7 @@ async def index_linear_issues(
|
||||||
start_date: Start date for indexing (YYYY-MM-DD format)
|
start_date: Start date for indexing (YYYY-MM-DD format)
|
||||||
end_date: End date for indexing (YYYY-MM-DD format)
|
end_date: End date for indexing (YYYY-MM-DD format)
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -188,6 +198,9 @@ async def index_linear_issues(
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
skipped_issues = []
|
skipped_issues = []
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
await task_logger.log_task_progress(
|
await task_logger.log_task_progress(
|
||||||
log_entry,
|
log_entry,
|
||||||
f"Starting to process {len(issues)} Linear issues",
|
f"Starting to process {len(issues)} Linear issues",
|
||||||
|
|
@ -196,6 +209,11 @@ async def index_linear_issues(
|
||||||
|
|
||||||
# Process each issue
|
# Process each issue
|
||||||
for issue in issues:
|
for issue in issues:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
issue_id = issue.get("id", "")
|
issue_id = issue.get("id", "")
|
||||||
issue_identifier = issue.get("identifier", "")
|
issue_identifier = issue.get("identifier", "")
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
Luma connector indexer.
|
Luma connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
@ -19,6 +21,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
check_duplicate_document_by_hash,
|
check_duplicate_document_by_hash,
|
||||||
|
|
@ -37,6 +45,7 @@ async def index_luma_events(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Luma events.
|
Index Luma events.
|
||||||
|
|
@ -50,6 +59,7 @@ async def index_luma_events(
|
||||||
end_date: End date for indexing (YYYY-MM-DD format). Can be in the future to index upcoming events.
|
end_date: End date for indexing (YYYY-MM-DD format). Can be in the future to index upcoming events.
|
||||||
Defaults to today if not provided.
|
Defaults to today if not provided.
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -221,7 +231,14 @@ async def index_luma_events(
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
skipped_events = []
|
skipped_events = []
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
for event in events:
|
for event in events:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
try:
|
try:
|
||||||
# Luma event structure fields - events have nested 'event' field
|
# Luma event structure fields - events have nested 'event' field
|
||||||
event_data = event.get("event", {})
|
event_data = event.get("event", {})
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
Notion connector indexer.
|
Notion connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
from collections.abc import Awaitable, Callable
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
@ -34,6 +35,13 @@ from .base import (
|
||||||
# Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds) -> None
|
# Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds) -> None
|
||||||
RetryCallbackType = Callable[[str, int, int, float], Awaitable[None]]
|
RetryCallbackType = Callable[[str, int, int, float], Awaitable[None]]
|
||||||
|
|
||||||
|
# Type alias for heartbeat callback
|
||||||
|
# Signature: async callback(indexed_count) -> None
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds - update notification every 30 seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
|
|
||||||
async def index_notion_pages(
|
async def index_notion_pages(
|
||||||
session: AsyncSession,
|
session: AsyncSession,
|
||||||
|
|
@ -44,6 +52,7 @@ async def index_notion_pages(
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
on_retry_callback: RetryCallbackType | None = None,
|
on_retry_callback: RetryCallbackType | None = None,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Notion pages from all accessible pages.
|
Index Notion pages from all accessible pages.
|
||||||
|
|
@ -59,6 +68,8 @@ async def index_notion_pages(
|
||||||
on_retry_callback: Optional callback for retry progress notifications.
|
on_retry_callback: Optional callback for retry progress notifications.
|
||||||
Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds)
|
Signature: async callback(retry_reason, attempt, max_attempts, wait_seconds)
|
||||||
retry_reason is one of: 'rate_limit', 'server_error', 'timeout'
|
retry_reason is one of: 'rate_limit', 'server_error', 'timeout'
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
Called periodically with (indexed_count) to prevent task appearing stuck.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -211,6 +222,9 @@ async def index_notion_pages(
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
skipped_pages = []
|
skipped_pages = []
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
await task_logger.log_task_progress(
|
await task_logger.log_task_progress(
|
||||||
log_entry,
|
log_entry,
|
||||||
f"Starting to process {len(pages)} Notion pages",
|
f"Starting to process {len(pages)} Notion pages",
|
||||||
|
|
@ -219,6 +233,11 @@ async def index_notion_pages(
|
||||||
|
|
||||||
# Process each page
|
# Process each page
|
||||||
for page in pages:
|
for page in pages:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
page_id = page.get("page_id")
|
page_id = page.get("page_id")
|
||||||
page_title = page.get("title", f"Untitled page ({page_id})")
|
page_title = page.get("title", f"Untitled page ({page_id})")
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,8 @@ This connector is only available in self-hosted mode.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
@ -25,6 +27,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
build_document_metadata_string,
|
build_document_metadata_string,
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
|
|
@ -152,6 +160,7 @@ async def index_obsidian_vault(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index notes from a local Obsidian vault.
|
Index notes from a local Obsidian vault.
|
||||||
|
|
@ -167,6 +176,7 @@ async def index_obsidian_vault(
|
||||||
start_date: Start date for filtering (YYYY-MM-DD format) - optional
|
start_date: Start date for filtering (YYYY-MM-DD format) - optional
|
||||||
end_date: End date for filtering (YYYY-MM-DD format) - optional
|
end_date: End date for filtering (YYYY-MM-DD format) - optional
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp
|
update_last_indexed: Whether to update the last_indexed_at timestamp
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -305,7 +315,14 @@ async def index_obsidian_vault(
|
||||||
indexed_count = 0
|
indexed_count = 0
|
||||||
skipped_count = 0
|
skipped_count = 0
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
for file_info in files:
|
for file_info in files:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(indexed_count)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
try:
|
try:
|
||||||
file_path = file_info["path"]
|
file_path = file_info["path"]
|
||||||
relative_path = file_info["relative_path"]
|
relative_path = file_info["relative_path"]
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
Slack connector indexer.
|
Slack connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from slack_sdk.errors import SlackApiError
|
from slack_sdk.errors import SlackApiError
|
||||||
|
|
@ -18,6 +20,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds - update notification every 30 seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
build_document_metadata_markdown,
|
build_document_metadata_markdown,
|
||||||
calculate_date_range,
|
calculate_date_range,
|
||||||
|
|
@ -38,6 +46,7 @@ async def index_slack_messages(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Slack messages from all accessible channels.
|
Index Slack messages from all accessible channels.
|
||||||
|
|
@ -50,6 +59,8 @@ async def index_slack_messages(
|
||||||
start_date: Start date for indexing (YYYY-MM-DD format)
|
start_date: Start date for indexing (YYYY-MM-DD format)
|
||||||
end_date: End date for indexing (YYYY-MM-DD format)
|
end_date: End date for indexing (YYYY-MM-DD format)
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
Called periodically with (indexed_count) to prevent task appearing stuck.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -164,6 +175,9 @@ async def index_slack_messages(
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
skipped_channels = []
|
skipped_channels = []
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
await task_logger.log_task_progress(
|
await task_logger.log_task_progress(
|
||||||
log_entry,
|
log_entry,
|
||||||
f"Starting to process {len(channels)} Slack channels",
|
f"Starting to process {len(channels)} Slack channels",
|
||||||
|
|
@ -172,6 +186,10 @@ async def index_slack_messages(
|
||||||
|
|
||||||
# Process each channel
|
# Process each channel
|
||||||
for channel_obj in channels:
|
for channel_obj in channels:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
channel_id = channel_obj["id"]
|
channel_id = channel_obj["id"]
|
||||||
channel_name = channel_obj["name"]
|
channel_name = channel_obj["name"]
|
||||||
is_private = channel_obj["is_private"]
|
is_private = channel_obj["is_private"]
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
Microsoft Teams connector indexer.
|
Microsoft Teams connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import UTC
|
from datetime import UTC
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
@ -17,6 +19,12 @@ from app.utils.document_converters import (
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds - update notification every 30 seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
build_document_metadata_markdown,
|
build_document_metadata_markdown,
|
||||||
calculate_date_range,
|
calculate_date_range,
|
||||||
|
|
@ -37,6 +45,7 @@ async def index_teams_messages(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index Microsoft Teams messages from all accessible teams and channels.
|
Index Microsoft Teams messages from all accessible teams and channels.
|
||||||
|
|
@ -49,6 +58,8 @@ async def index_teams_messages(
|
||||||
start_date: Start date for indexing (YYYY-MM-DD format)
|
start_date: Start date for indexing (YYYY-MM-DD format)
|
||||||
end_date: End date for indexing (YYYY-MM-DD format)
|
end_date: End date for indexing (YYYY-MM-DD format)
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
Called periodically with (indexed_count) to prevent task appearing stuck.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -161,6 +172,9 @@ async def index_teams_messages(
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
skipped_channels = []
|
skipped_channels = []
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
await task_logger.log_task_progress(
|
await task_logger.log_task_progress(
|
||||||
log_entry,
|
log_entry,
|
||||||
f"Starting to process {len(teams)} Teams",
|
f"Starting to process {len(teams)} Teams",
|
||||||
|
|
@ -185,6 +199,11 @@ async def index_teams_messages(
|
||||||
|
|
||||||
# Process each team
|
# Process each team
|
||||||
for team in teams:
|
for team in teams:
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
team_id = team.get("id")
|
team_id = team.get("id")
|
||||||
team_name = team.get("displayName", "Unknown Team")
|
team_name = team.get("displayName", "Unknown Team")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
Webcrawler connector indexer.
|
Webcrawler connector indexer.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
@ -20,6 +22,12 @@ from app.utils.document_converters import (
|
||||||
)
|
)
|
||||||
from app.utils.webcrawler_utils import parse_webcrawler_urls
|
from app.utils.webcrawler_utils import parse_webcrawler_urls
|
||||||
|
|
||||||
|
# Type hint for heartbeat callback
|
||||||
|
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||||
|
|
||||||
|
# Heartbeat interval in seconds
|
||||||
|
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
check_duplicate_document_by_hash,
|
check_duplicate_document_by_hash,
|
||||||
|
|
@ -38,6 +46,7 @@ async def index_crawled_urls(
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
end_date: str | None = None,
|
end_date: str | None = None,
|
||||||
update_last_indexed: bool = True,
|
update_last_indexed: bool = True,
|
||||||
|
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||||
) -> tuple[int, str | None]:
|
) -> tuple[int, str | None]:
|
||||||
"""
|
"""
|
||||||
Index web page URLs.
|
Index web page URLs.
|
||||||
|
|
@ -50,6 +59,7 @@ async def index_crawled_urls(
|
||||||
start_date: Start date for filtering (YYYY-MM-DD format) - optional
|
start_date: Start date for filtering (YYYY-MM-DD format) - optional
|
||||||
end_date: End date for filtering (YYYY-MM-DD format) - optional
|
end_date: End date for filtering (YYYY-MM-DD format) - optional
|
||||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||||
|
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple containing (number of documents indexed, error message or None)
|
Tuple containing (number of documents indexed, error message or None)
|
||||||
|
|
@ -140,7 +150,14 @@ async def index_crawled_urls(
|
||||||
documents_skipped = 0
|
documents_skipped = 0
|
||||||
failed_urls = []
|
failed_urls = []
|
||||||
|
|
||||||
|
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
|
|
||||||
for idx, url in enumerate(urls, 1):
|
for idx, url in enumerate(urls, 1):
|
||||||
|
# Check if it's time for a heartbeat update
|
||||||
|
if on_heartbeat_callback and (time.time() - last_heartbeat_time) >= HEARTBEAT_INTERVAL_SECONDS:
|
||||||
|
await on_heartbeat_callback(documents_indexed)
|
||||||
|
last_heartbeat_time = time.time()
|
||||||
try:
|
try:
|
||||||
logger.info(f"Processing URL {idx}/{len(urls)}: {url}")
|
logger.info(f"Processing URL {idx}/{len(urls)}: {url}")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,44 @@ import type { SearchSourceConnector } from "@/contracts/types/connector.types";
|
||||||
import type { InboxItem } from "@/contracts/types/inbox.types";
|
import type { InboxItem } from "@/contracts/types/inbox.types";
|
||||||
import { isConnectorIndexingMetadata } from "@/contracts/types/inbox.types";
|
import { isConnectorIndexingMetadata } from "@/contracts/types/inbox.types";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Timeout thresholds for stuck task detection
|
||||||
|
*
|
||||||
|
* These align with the backend Celery configuration:
|
||||||
|
* - HARD_TIMEOUT: 8 hours (task_time_limit=28800 in Celery)
|
||||||
|
* Any task running longer than this is definitely dead.
|
||||||
|
*
|
||||||
|
* - STALE_THRESHOLD: 15 minutes without notification updates
|
||||||
|
* If heartbeats are being sent every 30s, missing 15+ minutes of updates
|
||||||
|
* indicates the task has likely crashed or the worker is down.
|
||||||
|
*/
|
||||||
|
const HARD_TIMEOUT_MS = 8 * 60 * 60 * 1000; // 8 hours in milliseconds
|
||||||
|
const STALE_THRESHOLD_MS = 15 * 60 * 1000; // 15 minutes in milliseconds
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a notification is stale (no updates for too long)
|
||||||
|
* @param updatedAt - ISO timestamp of last notification update
|
||||||
|
* @returns true if the notification hasn't been updated recently
|
||||||
|
*/
|
||||||
|
function isNotificationStale(updatedAt: string | null | undefined): boolean {
|
||||||
|
if (!updatedAt) return false;
|
||||||
|
const lastUpdate = new Date(updatedAt).getTime();
|
||||||
|
const now = Date.now();
|
||||||
|
return now - lastUpdate > STALE_THRESHOLD_MS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a task has exceeded the hard timeout (definitely dead)
|
||||||
|
* @param startedAt - ISO timestamp when the task started
|
||||||
|
* @returns true if the task has been running longer than the hard limit
|
||||||
|
*/
|
||||||
|
function isTaskTimedOut(startedAt: string | null | undefined): boolean {
|
||||||
|
if (!startedAt) return false;
|
||||||
|
const startTime = new Date(startedAt).getTime();
|
||||||
|
const now = Date.now();
|
||||||
|
return now - startTime > HARD_TIMEOUT_MS;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hook to track which connectors are currently indexing using local state.
|
* Hook to track which connectors are currently indexing using local state.
|
||||||
*
|
*
|
||||||
|
|
@ -13,6 +51,8 @@ import { isConnectorIndexingMetadata } from "@/contracts/types/inbox.types";
|
||||||
* 2. Detecting in_progress notifications from Electric SQL to restore state after remounts
|
* 2. Detecting in_progress notifications from Electric SQL to restore state after remounts
|
||||||
* 3. Clearing indexing state when notifications become completed or failed
|
* 3. Clearing indexing state when notifications become completed or failed
|
||||||
* 4. Clearing indexing state when Electric SQL detects last_indexed_at changed
|
* 4. Clearing indexing state when Electric SQL detects last_indexed_at changed
|
||||||
|
* 5. Detecting stale/stuck tasks that haven't updated in 15+ minutes
|
||||||
|
* 6. Detecting hard timeout (8h) - tasks that definitely cannot still be running
|
||||||
*
|
*
|
||||||
* The actual `last_indexed_at` value comes from Electric SQL/PGlite, not local state.
|
* The actual `last_indexed_at` value comes from Electric SQL/PGlite, not local state.
|
||||||
*/
|
*/
|
||||||
|
|
@ -57,6 +97,7 @@ export function useIndexingConnectors(
|
||||||
|
|
||||||
// Detect notification status changes and update indexing state accordingly
|
// Detect notification status changes and update indexing state accordingly
|
||||||
// This restores spinner state after component remounts and handles all status transitions
|
// This restores spinner state after component remounts and handles all status transitions
|
||||||
|
// Also detects stale/stuck tasks that haven't been updated in a while
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!inboxItems || inboxItems.length === 0) return;
|
if (!inboxItems || inboxItems.length === 0) return;
|
||||||
|
|
||||||
|
|
@ -71,11 +112,26 @@ export function useIndexingConnectors(
|
||||||
const metadata = isConnectorIndexingMetadata(item.metadata) ? item.metadata : null;
|
const metadata = isConnectorIndexingMetadata(item.metadata) ? item.metadata : null;
|
||||||
if (!metadata) continue;
|
if (!metadata) continue;
|
||||||
|
|
||||||
// If status is "in_progress", add connector to indexing set
|
// If status is "in_progress", check if it's actually still running
|
||||||
if (metadata.status === "in_progress") {
|
if (metadata.status === "in_progress") {
|
||||||
if (!newIndexingIds.has(metadata.connector_id)) {
|
// Check for hard timeout (8h) - task is definitely dead
|
||||||
newIndexingIds.add(metadata.connector_id);
|
const timedOut = isTaskTimedOut(metadata.started_at);
|
||||||
hasChanges = true;
|
|
||||||
|
// Check for stale notification (15min without updates) - task likely crashed
|
||||||
|
const stale = isNotificationStale(item.updated_at);
|
||||||
|
|
||||||
|
if (timedOut || stale) {
|
||||||
|
// Task is stuck - don't show as indexing
|
||||||
|
if (newIndexingIds.has(metadata.connector_id)) {
|
||||||
|
newIndexingIds.delete(metadata.connector_id);
|
||||||
|
hasChanges = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Task appears to be genuinely running
|
||||||
|
if (!newIndexingIds.has(metadata.connector_id)) {
|
||||||
|
newIndexingIds.add(metadata.connector_id);
|
||||||
|
hasChanges = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If status is "completed" or "failed", remove connector from indexing set
|
// If status is "completed" or "failed", remove connector from indexing set
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,8 @@ const pendingSyncs = new Map<string, Promise<SyncHandle>>();
|
||||||
// Version for sync state - increment this to force fresh sync when Electric config changes
|
// Version for sync state - increment this to force fresh sync when Electric config changes
|
||||||
// v2: user-specific database architecture
|
// v2: user-specific database architecture
|
||||||
// v3: consistent cutoff date for sync+queries, visibility refresh support
|
// v3: consistent cutoff date for sync+queries, visibility refresh support
|
||||||
const SYNC_VERSION = 3;
|
// v4: heartbeat-based stale notification detection with updated_at tracking
|
||||||
|
const SYNC_VERSION = 4;
|
||||||
|
|
||||||
// Database name prefix for identifying SurfSense databases
|
// Database name prefix for identifying SurfSense databases
|
||||||
const DB_PREFIX = "surfsense-";
|
const DB_PREFIX = "surfsense-";
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue