mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
feat: fixed and improved search and background task management.
This commit is contained in:
parent
20a13df7e7
commit
17b7348f61
13 changed files with 281 additions and 336 deletions
|
|
@ -36,11 +36,9 @@ def _get_doc_heartbeat_redis():
|
|||
|
||||
global _doc_heartbeat_redis
|
||||
if _doc_heartbeat_redis is None:
|
||||
redis_url = os.getenv(
|
||||
"REDIS_APP_URL",
|
||||
os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0"),
|
||||
_doc_heartbeat_redis = redis.from_url(
|
||||
config.REDIS_APP_URL, decode_responses=True
|
||||
)
|
||||
_doc_heartbeat_redis = redis.from_url(redis_url, decode_responses=True)
|
||||
return _doc_heartbeat_redis
|
||||
|
||||
|
||||
|
|
@ -1104,4 +1102,4 @@ async def _process_circleback_meeting(
|
|||
if heartbeat_task:
|
||||
heartbeat_task.cancel()
|
||||
if notification:
|
||||
_stop_heartbeat(notification.id)
|
||||
_stop_heartbeat(notification.id)
|
||||
|
|
@ -46,16 +46,10 @@ def get_celery_session_maker():
|
|||
|
||||
def _clear_generating_podcast(search_space_id: int) -> None:
|
||||
"""Clear the generating podcast marker from Redis when task completes."""
|
||||
import os
|
||||
|
||||
import redis
|
||||
|
||||
try:
|
||||
redis_url = os.getenv(
|
||||
"REDIS_APP_URL",
|
||||
os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0"),
|
||||
)
|
||||
client = redis.from_url(redis_url, decode_responses=True)
|
||||
client = redis.from_url(config.REDIS_APP_URL, decode_responses=True)
|
||||
key = f"podcast:generating:{search_space_id}"
|
||||
client.delete(key)
|
||||
logger.info(
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@ from sqlalchemy.pool import NullPool
|
|||
|
||||
from app.celery_app import celery_app
|
||||
from app.config import config
|
||||
from app.db import SearchSourceConnector, SearchSourceConnectorType
|
||||
from app.db import Notification, SearchSourceConnector, SearchSourceConnectorType
|
||||
from app.utils.indexing_locks import is_connector_indexing_locked
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -107,6 +108,32 @@ async def _check_and_trigger_schedules():
|
|||
|
||||
# Trigger indexing for each due connector
|
||||
for connector in due_connectors:
|
||||
# Primary guard: Redis lock indicates a task is currently running.
|
||||
if is_connector_indexing_locked(connector.id):
|
||||
logger.info(
|
||||
f"Skipping periodic indexing for connector {connector.id} "
|
||||
"(Redis lock indicates indexing is already in progress)"
|
||||
)
|
||||
continue
|
||||
|
||||
# Skip scheduling if a sync for this connector is already in progress.
|
||||
# This prevents duplicate tasks from piling up under slow/rate-limited providers.
|
||||
in_progress_result = await session.execute(
|
||||
select(Notification.id).where(
|
||||
Notification.type == "connector_indexing",
|
||||
Notification.notification_metadata["connector_id"].astext
|
||||
== str(connector.id),
|
||||
Notification.notification_metadata["status"].astext
|
||||
== "in_progress",
|
||||
)
|
||||
)
|
||||
if in_progress_result.first():
|
||||
logger.info(
|
||||
f"Skipping periodic indexing for connector {connector.id} "
|
||||
"(already has in-progress indexing notification)"
|
||||
)
|
||||
continue
|
||||
|
||||
task = task_map.get(connector.connector_type)
|
||||
if task:
|
||||
logger.info(
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ Detection mechanism:
|
|||
import contextlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import UTC, datetime
|
||||
|
||||
import redis
|
||||
|
|
@ -52,11 +51,7 @@ def get_redis_client() -> redis.Redis:
|
|||
"""Get or create Redis client for heartbeat checking."""
|
||||
global _redis_client
|
||||
if _redis_client is None:
|
||||
redis_url = os.getenv(
|
||||
"REDIS_APP_URL",
|
||||
os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0"),
|
||||
)
|
||||
_redis_client = redis.from_url(redis_url, decode_responses=True)
|
||||
_redis_client = redis.from_url(config.REDIS_APP_URL, decode_responses=True)
|
||||
return _redis_client
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -52,10 +52,22 @@ def safe_set_chunks(document: Document, chunks: list) -> None:
|
|||
# Instead of: document.chunks = chunks (DANGEROUS!)
|
||||
safe_set_chunks(document, chunks) # Always safe
|
||||
"""
|
||||
from sqlalchemy.orm import object_session
|
||||
from sqlalchemy.orm.attributes import set_committed_value
|
||||
|
||||
# Keep relationship assignment lazy-load-safe.
|
||||
set_committed_value(document, "chunks", chunks)
|
||||
|
||||
# Ensure chunk rows are actually persisted.
|
||||
# set_committed_value bypasses normal unit-of-work tracking, so we need to
|
||||
# explicitly attach chunk objects to the current session.
|
||||
session = object_session(document)
|
||||
if session is not None:
|
||||
if document.id is not None:
|
||||
for chunk in chunks:
|
||||
chunk.document_id = document.id
|
||||
session.add_all(chunks)
|
||||
|
||||
|
||||
def parse_date_flexible(date_str: str) -> datetime:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -38,10 +38,22 @@ def safe_set_chunks(document: Document, chunks: list) -> None:
|
|||
# Instead of: document.chunks = chunks (DANGEROUS!)
|
||||
safe_set_chunks(document, chunks) # Always safe
|
||||
"""
|
||||
from sqlalchemy.orm import object_session
|
||||
from sqlalchemy.orm.attributes import set_committed_value
|
||||
|
||||
# Keep relationship assignment lazy-load-safe.
|
||||
set_committed_value(document, "chunks", chunks)
|
||||
|
||||
# Ensure chunk rows are actually persisted.
|
||||
# set_committed_value bypasses normal unit-of-work tracking, so we need to
|
||||
# explicitly attach chunk objects to the current session.
|
||||
session = object_session(document)
|
||||
if session is not None:
|
||||
if document.id is not None:
|
||||
for chunk in chunks:
|
||||
chunk.document_id = document.id
|
||||
session.add_all(chunks)
|
||||
|
||||
|
||||
def get_current_timestamp() -> datetime:
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue