mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
feat: added ai file sorting
This commit is contained in:
parent
fa0b47dfca
commit
4bee367d4a
51 changed files with 1703 additions and 72 deletions
|
|
@ -4,6 +4,7 @@ import asyncio
|
|||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from uuid import UUID
|
||||
|
||||
from app.celery_app import celery_app
|
||||
|
|
@ -1551,3 +1552,121 @@ async def _index_uploaded_folder_files_async(
|
|||
heartbeat_task.cancel()
|
||||
if notification_id is not None:
|
||||
_stop_heartbeat(notification_id)
|
||||
|
||||
|
||||
# ===== AI File Sort tasks =====
|
||||
|
||||
AI_SORT_LOCK_TTL_SECONDS = 600 # 10 minutes
|
||||
_ai_sort_redis = None
|
||||
|
||||
|
||||
def _get_ai_sort_redis():
|
||||
import redis
|
||||
|
||||
global _ai_sort_redis
|
||||
if _ai_sort_redis is None:
|
||||
_ai_sort_redis = redis.from_url(config.REDIS_APP_URL, decode_responses=True)
|
||||
return _ai_sort_redis
|
||||
|
||||
|
||||
def _ai_sort_lock_key(search_space_id: int) -> str:
|
||||
return f"ai_sort:search_space:{search_space_id}:lock"
|
||||
|
||||
|
||||
@celery_app.task(name="ai_sort_search_space", bind=True, max_retries=1)
|
||||
def ai_sort_search_space_task(self, search_space_id: int, user_id: str):
|
||||
"""Full AI sort for all documents in a search space."""
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(_ai_sort_search_space_async(search_space_id, user_id))
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _ai_sort_search_space_async(search_space_id: int, user_id: str):
|
||||
r = _get_ai_sort_redis()
|
||||
lock_key = _ai_sort_lock_key(search_space_id)
|
||||
|
||||
if not r.set(lock_key, "running", nx=True, ex=AI_SORT_LOCK_TTL_SECONDS):
|
||||
logger.info(
|
||||
"AI sort already running for search_space=%d, skipping",
|
||||
search_space_id,
|
||||
)
|
||||
return
|
||||
|
||||
t_start = time.perf_counter()
|
||||
try:
|
||||
from app.services.ai_file_sort_service import ai_sort_all_documents
|
||||
from app.services.llm_service import get_document_summary_llm
|
||||
|
||||
async with get_celery_session_maker()() as session:
|
||||
llm = await get_document_summary_llm(
|
||||
session, search_space_id, disable_streaming=True
|
||||
)
|
||||
if llm is None:
|
||||
logger.warning(
|
||||
"No LLM configured for search_space=%d, skipping AI sort",
|
||||
search_space_id,
|
||||
)
|
||||
return
|
||||
|
||||
sorted_count, failed_count = await ai_sort_all_documents(
|
||||
session, search_space_id, llm
|
||||
)
|
||||
elapsed = time.perf_counter() - t_start
|
||||
logger.info(
|
||||
"AI sort search_space=%d done in %.1fs: sorted=%d failed=%d",
|
||||
search_space_id,
|
||||
elapsed,
|
||||
sorted_count,
|
||||
failed_count,
|
||||
)
|
||||
finally:
|
||||
r.delete(lock_key)
|
||||
|
||||
|
||||
@celery_app.task(
|
||||
name="ai_sort_document", bind=True, max_retries=2, default_retry_delay=10
|
||||
)
|
||||
def ai_sort_document_task(self, search_space_id: int, user_id: str, document_id: int):
|
||||
"""Incremental AI sort for a single document after indexing."""
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
_ai_sort_document_async(search_space_id, user_id, document_id)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _ai_sort_document_async(search_space_id: int, user_id: str, document_id: int):
|
||||
from app.db import Document
|
||||
from app.services.ai_file_sort_service import ai_sort_document
|
||||
from app.services.llm_service import get_document_summary_llm
|
||||
|
||||
async with get_celery_session_maker()() as session:
|
||||
document = await session.get(Document, document_id)
|
||||
if document is None:
|
||||
logger.warning("Document %d not found, skipping AI sort", document_id)
|
||||
return
|
||||
|
||||
llm = await get_document_summary_llm(
|
||||
session, search_space_id, disable_streaming=True
|
||||
)
|
||||
if llm is None:
|
||||
logger.warning(
|
||||
"No LLM for search_space=%d, skipping AI sort of doc=%d",
|
||||
search_space_id,
|
||||
document_id,
|
||||
)
|
||||
return
|
||||
|
||||
await ai_sort_document(session, document, llm)
|
||||
await session.commit()
|
||||
logger.info(
|
||||
"AI sorted document=%d into search_space=%d",
|
||||
document_id,
|
||||
search_space_id,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue