mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 08:46:22 +02:00
feat: implement Redis heartbeat mechanism for document processing tasks and enhance stale notification cleanup
This commit is contained in:
parent
652eb6ece8
commit
72205ce11b
3 changed files with 350 additions and 30 deletions
|
|
@ -1,6 +1,8 @@
|
|||
"""Celery tasks for document processing."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||
|
|
@ -17,6 +19,79 @@ from app.tasks.document_processors import (
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ===== Redis heartbeat for document processing tasks =====
|
||||
# Same mechanism as connector indexing heartbeats (search_source_connectors_routes.py).
|
||||
# A background coroutine refreshes a Redis key every 60s with a 2-min TTL.
|
||||
# If the Celery worker crashes, the coroutine dies, the key expires, and the
|
||||
# stale_notification_cleanup_task detects the missing key and marks the
|
||||
# notification + document as failed.
|
||||
_doc_heartbeat_redis = None
|
||||
HEARTBEAT_TTL_SECONDS = 120 # 2 minutes — same as connector indexing
|
||||
HEARTBEAT_REFRESH_INTERVAL = 60 # Refresh every 60 seconds
|
||||
|
||||
|
||||
def _get_doc_heartbeat_redis():
|
||||
"""Get Redis client for document processing heartbeat."""
|
||||
import redis
|
||||
|
||||
global _doc_heartbeat_redis
|
||||
if _doc_heartbeat_redis is None:
|
||||
redis_url = os.getenv(
|
||||
"REDIS_APP_URL",
|
||||
os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0"),
|
||||
)
|
||||
_doc_heartbeat_redis = redis.from_url(redis_url, decode_responses=True)
|
||||
return _doc_heartbeat_redis
|
||||
|
||||
|
||||
def _get_heartbeat_key(notification_id: int) -> str:
|
||||
"""Generate Redis key for document processing heartbeat.
|
||||
|
||||
Uses same key pattern as connector indexing: indexing:heartbeat:{notification_id}
|
||||
"""
|
||||
return f"indexing:heartbeat:{notification_id}"
|
||||
|
||||
|
||||
def _start_heartbeat(notification_id: int) -> None:
|
||||
"""Set initial Redis heartbeat key for a document processing task."""
|
||||
try:
|
||||
key = _get_heartbeat_key(notification_id)
|
||||
_get_doc_heartbeat_redis().setex(key, HEARTBEAT_TTL_SECONDS, "started")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to set initial heartbeat for notification {notification_id}: {e}"
|
||||
)
|
||||
|
||||
|
||||
def _stop_heartbeat(notification_id: int) -> None:
|
||||
"""Delete Redis heartbeat key when task completes (success or failure)."""
|
||||
try:
|
||||
key = _get_heartbeat_key(notification_id)
|
||||
_get_doc_heartbeat_redis().delete(key)
|
||||
except Exception:
|
||||
pass # Key will expire on its own
|
||||
|
||||
|
||||
async def _run_heartbeat_loop(notification_id: int):
|
||||
"""Background coroutine that refreshes Redis heartbeat every 60 seconds.
|
||||
|
||||
This keeps the heartbeat alive while the task is running.
|
||||
When the task finishes, this coroutine is cancelled via heartbeat_task.cancel().
|
||||
When the worker crashes, this coroutine dies with it and the key expires.
|
||||
"""
|
||||
key = _get_heartbeat_key(notification_id)
|
||||
try:
|
||||
while True:
|
||||
await asyncio.sleep(HEARTBEAT_REFRESH_INTERVAL)
|
||||
try:
|
||||
_get_doc_heartbeat_redis().setex(key, HEARTBEAT_TTL_SECONDS, "alive")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to refresh heartbeat for notification {notification_id}: {e}"
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
pass # Normal cancellation when task completes
|
||||
|
||||
|
||||
def get_celery_session_maker():
|
||||
"""
|
||||
|
|
@ -44,8 +119,6 @@ def process_extension_document_task(
|
|||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
# Create a new event loop for this task
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
|
@ -196,8 +269,6 @@ def process_youtube_video_task(self, url: str, search_space_id: int, user_id: st
|
|||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
|
|
@ -226,6 +297,10 @@ async def _process_youtube_video(url: str, search_space_id: int, user_id: str):
|
|||
)
|
||||
)
|
||||
|
||||
# Start Redis heartbeat for stale task detection
|
||||
_start_heartbeat(notification.id)
|
||||
heartbeat_task = asyncio.create_task(_run_heartbeat_loop(notification.id))
|
||||
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="process_youtube_video",
|
||||
source="document_processor",
|
||||
|
|
@ -243,7 +318,7 @@ async def _process_youtube_video(url: str, search_space_id: int, user_id: str):
|
|||
)
|
||||
|
||||
result = await add_youtube_video_document(
|
||||
session, url, search_space_id, user_id
|
||||
session, url, search_space_id, user_id, notification=notification
|
||||
)
|
||||
|
||||
if result:
|
||||
|
|
@ -307,6 +382,10 @@ async def _process_youtube_video(url: str, search_space_id: int, user_id: str):
|
|||
|
||||
logger.error(f"Error processing YouTube video: {e!s}")
|
||||
raise
|
||||
finally:
|
||||
# Stop heartbeat — key deleted on success, expires on crash
|
||||
heartbeat_task.cancel()
|
||||
_stop_heartbeat(notification.id)
|
||||
|
||||
|
||||
@celery_app.task(name="process_file_upload", bind=True)
|
||||
|
|
@ -322,8 +401,6 @@ def process_file_upload_task(
|
|||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import traceback
|
||||
|
||||
logger.info(
|
||||
|
|
@ -370,8 +447,6 @@ async def _process_file_upload(
|
|||
file_path: str, filename: str, search_space_id: int, user_id: str
|
||||
):
|
||||
"""Process file upload with new session."""
|
||||
import os
|
||||
|
||||
from app.tasks.document_processors.file_processors import process_file_in_background
|
||||
|
||||
logger.info(f"[_process_file_upload] Starting async processing for: {filename}")
|
||||
|
|
@ -404,6 +479,10 @@ async def _process_file_upload(
|
|||
f"[_process_file_upload] Notification created with ID: {notification.id if notification else 'None'}"
|
||||
)
|
||||
|
||||
# Start Redis heartbeat for stale task detection
|
||||
_start_heartbeat(notification.id)
|
||||
heartbeat_task = asyncio.create_task(_run_heartbeat_loop(notification.id))
|
||||
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="process_file_upload",
|
||||
source="document_processor",
|
||||
|
|
@ -535,6 +614,10 @@ async def _process_file_upload(
|
|||
)
|
||||
logger.error(error_message)
|
||||
raise
|
||||
finally:
|
||||
# Stop heartbeat — key deleted on success, expires on crash
|
||||
heartbeat_task.cancel()
|
||||
_stop_heartbeat(notification.id)
|
||||
|
||||
|
||||
@celery_app.task(name="process_file_upload_with_document", bind=True)
|
||||
|
|
@ -560,8 +643,6 @@ def process_file_upload_with_document_task(
|
|||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import traceback
|
||||
|
||||
logger.info(
|
||||
|
|
@ -640,8 +721,6 @@ async def _process_file_with_document(
|
|||
- Processes the file (parsing, embedding, chunking)
|
||||
- Updates document to 'ready' on success or 'failed' on error
|
||||
"""
|
||||
import os
|
||||
|
||||
from app.db import Document, DocumentStatus
|
||||
from app.tasks.document_processors.base import get_current_timestamp
|
||||
from app.tasks.document_processors.file_processors import (
|
||||
|
|
@ -689,6 +768,19 @@ async def _process_file_with_document(
|
|||
)
|
||||
)
|
||||
|
||||
# Store document_id in notification metadata so cleanup task can find the document
|
||||
if notification and notification.notification_metadata is not None:
|
||||
notification.notification_metadata["document_id"] = document_id
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
flag_modified(notification, "notification_metadata")
|
||||
await session.commit()
|
||||
await session.refresh(notification)
|
||||
|
||||
# Start Redis heartbeat for stale task detection
|
||||
_start_heartbeat(notification.id)
|
||||
heartbeat_task = asyncio.create_task(_run_heartbeat_loop(notification.id))
|
||||
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="process_file_upload_with_document",
|
||||
source="document_processor",
|
||||
|
|
@ -822,6 +914,10 @@ async def _process_file_with_document(
|
|||
raise
|
||||
|
||||
finally:
|
||||
# Stop heartbeat — key deleted on success, expires on crash
|
||||
heartbeat_task.cancel()
|
||||
_stop_heartbeat(notification.id)
|
||||
|
||||
# Clean up temp file
|
||||
if os.path.exists(temp_path):
|
||||
try:
|
||||
|
|
@ -856,8 +952,6 @@ def process_circleback_meeting_task(
|
|||
search_space_id: ID of the search space
|
||||
connector_id: ID of the Circleback connector (for deletion support)
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
|
|
@ -897,6 +991,7 @@ async def _process_circleback_meeting(
|
|||
|
||||
# Create notification if user_id is available
|
||||
notification = None
|
||||
heartbeat_task = None
|
||||
if user_id:
|
||||
notification = (
|
||||
await NotificationService.document_processing.notify_processing_started(
|
||||
|
|
@ -908,6 +1003,12 @@ async def _process_circleback_meeting(
|
|||
)
|
||||
)
|
||||
|
||||
# Start Redis heartbeat for stale task detection
|
||||
_start_heartbeat(notification.id)
|
||||
heartbeat_task = asyncio.create_task(
|
||||
_run_heartbeat_loop(notification.id)
|
||||
)
|
||||
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="process_circleback_meeting",
|
||||
source="circleback_webhook",
|
||||
|
|
@ -1000,3 +1101,9 @@ async def _process_circleback_meeting(
|
|||
|
||||
logger.error(f"Error processing Circleback meeting: {e!s}")
|
||||
raise
|
||||
finally:
|
||||
# Stop heartbeat — key deleted on success, expires on crash
|
||||
if heartbeat_task:
|
||||
heartbeat_task.cancel()
|
||||
if notification:
|
||||
_stop_heartbeat(notification.id)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue