mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
fix: Resolve merge conflict in documents_routes.py
- Integrated Docling ETL service with new task logging system - Maintained consistent logging pattern across all ETL services - Added progress and success/failure logging for Docling processing
This commit is contained in:
commit
f117d94ef7
34 changed files with 4160 additions and 520 deletions
|
|
@ -23,17 +23,138 @@ class StreamingService:
|
|||
"content": []
|
||||
}
|
||||
]
|
||||
# It is used to send annotations to the frontend
|
||||
|
||||
# DEPRECATED: This sends the full annotation array every time (inefficient)
|
||||
def _format_annotations(self) -> str:
|
||||
"""
|
||||
Format the annotations as a string
|
||||
|
||||
|
||||
DEPRECATED: This method sends the full annotation state every time.
|
||||
Use the delta formatters instead for optimal streaming.
|
||||
|
||||
Returns:
|
||||
str: The formatted annotations string
|
||||
"""
|
||||
return f'8:{json.dumps(self.message_annotations)}\n'
|
||||
|
||||
# It is used to end Streaming
|
||||
|
||||
def format_terminal_info_delta(self, text: str, message_type: str = "info") -> str:
|
||||
"""
|
||||
Format a single terminal info message as a delta annotation
|
||||
|
||||
Args:
|
||||
text: The terminal message text
|
||||
message_type: The message type (info, error, success, etc.)
|
||||
|
||||
Returns:
|
||||
str: The formatted annotation delta string
|
||||
"""
|
||||
message = {"id": self.terminal_idx, "text": text, "type": message_type}
|
||||
self.terminal_idx += 1
|
||||
|
||||
# Update internal state for reference
|
||||
self.message_annotations[0]["content"].append(message)
|
||||
|
||||
# Return only the delta annotation
|
||||
annotation = {"type": "TERMINAL_INFO", "content": [message]}
|
||||
return f"8:[{json.dumps(annotation)}]\n"
|
||||
|
||||
def format_sources_delta(self, sources: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
Format sources as a delta annotation
|
||||
|
||||
Args:
|
||||
sources: List of source objects
|
||||
|
||||
Returns:
|
||||
str: The formatted annotation delta string
|
||||
"""
|
||||
# Update internal state
|
||||
self.message_annotations[1]["content"] = sources
|
||||
|
||||
# Return only the delta annotation
|
||||
annotation = {"type": "SOURCES", "content": sources}
|
||||
return f"8:[{json.dumps(annotation)}]\n"
|
||||
|
||||
def format_answer_delta(self, answer_chunk: str) -> str:
|
||||
"""
|
||||
Format a single answer chunk as a delta annotation
|
||||
|
||||
Args:
|
||||
answer_chunk: The new answer chunk to add
|
||||
|
||||
Returns:
|
||||
str: The formatted annotation delta string
|
||||
"""
|
||||
# Update internal state by appending the chunk
|
||||
if isinstance(self.message_annotations[2]["content"], list):
|
||||
self.message_annotations[2]["content"].append(answer_chunk)
|
||||
else:
|
||||
self.message_annotations[2]["content"] = [answer_chunk]
|
||||
|
||||
# Return only the delta annotation with the new chunk
|
||||
annotation = {"type": "ANSWER", "content": [answer_chunk]}
|
||||
return f"8:[{json.dumps(annotation)}]\n"
|
||||
|
||||
def format_answer_annotation(self, answer_lines: List[str]) -> str:
|
||||
"""
|
||||
Format the complete answer as a replacement annotation
|
||||
|
||||
Args:
|
||||
answer_lines: Complete list of answer lines
|
||||
|
||||
Returns:
|
||||
str: The formatted annotation string
|
||||
"""
|
||||
# Update internal state
|
||||
self.message_annotations[2]["content"] = answer_lines
|
||||
|
||||
# Return the full answer annotation
|
||||
annotation = {"type": "ANSWER", "content": answer_lines}
|
||||
return f"8:[{json.dumps(annotation)}]\n"
|
||||
|
||||
def format_further_questions_delta(
|
||||
self, further_questions: List[Dict[str, Any]]
|
||||
) -> str:
|
||||
"""
|
||||
Format further questions as a delta annotation
|
||||
|
||||
Args:
|
||||
further_questions: List of further question objects
|
||||
|
||||
Returns:
|
||||
str: The formatted annotation delta string
|
||||
"""
|
||||
# Update internal state
|
||||
self.message_annotations[3]["content"] = further_questions
|
||||
|
||||
# Return only the delta annotation
|
||||
annotation = {"type": "FURTHER_QUESTIONS", "content": further_questions}
|
||||
return f"8:[{json.dumps(annotation)}]\n"
|
||||
|
||||
def format_text_chunk(self, text: str) -> str:
|
||||
"""
|
||||
Format a text chunk using the text stream part
|
||||
|
||||
Args:
|
||||
text: The text chunk to stream
|
||||
|
||||
Returns:
|
||||
str: The formatted text part string
|
||||
"""
|
||||
return f"0:{json.dumps(text)}\n"
|
||||
|
||||
def format_error(self, error_message: str) -> str:
|
||||
"""
|
||||
Format an error using the error stream part
|
||||
|
||||
Args:
|
||||
error_message: The error message
|
||||
|
||||
Returns:
|
||||
str: The formatted error part string
|
||||
"""
|
||||
return f"3:{json.dumps(error_message)}\n"
|
||||
|
||||
def format_completion(self, prompt_tokens: int = 156, completion_tokens: int = 204) -> str:
|
||||
"""
|
||||
Format a completion message
|
||||
|
|
@ -56,7 +177,12 @@ class StreamingService:
|
|||
}
|
||||
return f'd:{json.dumps(completion_data)}\n'
|
||||
|
||||
|
||||
# DEPRECATED METHODS: Keep for backward compatibility but mark as deprecated
|
||||
def only_update_terminal(self, text: str, message_type: str = "info") -> str:
|
||||
"""
|
||||
DEPRECATED: Use format_terminal_info_delta() instead for optimal streaming
|
||||
"""
|
||||
self.message_annotations[0]["content"].append({
|
||||
"id": self.terminal_idx,
|
||||
"text": text,
|
||||
|
|
@ -66,17 +192,23 @@ class StreamingService:
|
|||
return self.message_annotations
|
||||
|
||||
def only_update_sources(self, sources: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
DEPRECATED: Use format_sources_delta() instead for optimal streaming
|
||||
"""
|
||||
self.message_annotations[1]["content"] = sources
|
||||
return self.message_annotations
|
||||
|
||||
def only_update_answer(self, answer: List[str]) -> str:
|
||||
"""
|
||||
DEPRECATED: Use format_answer_delta() or format_answer_annotation() instead for optimal streaming
|
||||
"""
|
||||
self.message_annotations[2]["content"] = answer
|
||||
return self.message_annotations
|
||||
|
||||
|
||||
def only_update_further_questions(self, further_questions: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
Update the further questions annotation
|
||||
|
||||
DEPRECATED: Use format_further_questions_delta() instead for optimal streaming
|
||||
|
||||
Args:
|
||||
further_questions: List of further question objects with id and question fields
|
||||
|
||||
|
|
|
|||
204
surfsense_backend/app/services/task_logging_service.py
Normal file
204
surfsense_backend/app/services/task_logging_service.py
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
from typing import Optional, Dict, Any
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.db import Log, LogLevel, LogStatus
|
||||
import logging
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TaskLoggingService:
|
||||
"""Service for logging background tasks using the database Log model"""
|
||||
|
||||
def __init__(self, session: AsyncSession, search_space_id: int):
|
||||
self.session = session
|
||||
self.search_space_id = search_space_id
|
||||
|
||||
async def log_task_start(
|
||||
self,
|
||||
task_name: str,
|
||||
source: str,
|
||||
message: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> Log:
|
||||
"""
|
||||
Log the start of a task with IN_PROGRESS status
|
||||
|
||||
Args:
|
||||
task_name: Name/identifier of the task
|
||||
source: Source service/component (e.g., 'document_processor', 'slack_indexer')
|
||||
message: Human-readable message about the task
|
||||
metadata: Additional context data
|
||||
|
||||
Returns:
|
||||
Log: The created log entry
|
||||
"""
|
||||
log_metadata = metadata or {}
|
||||
log_metadata.update({
|
||||
"task_name": task_name,
|
||||
"started_at": datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
log_entry = Log(
|
||||
level=LogLevel.INFO,
|
||||
status=LogStatus.IN_PROGRESS,
|
||||
message=message,
|
||||
source=source,
|
||||
log_metadata=log_metadata,
|
||||
search_space_id=self.search_space_id
|
||||
)
|
||||
|
||||
self.session.add(log_entry)
|
||||
await self.session.commit()
|
||||
await self.session.refresh(log_entry)
|
||||
|
||||
logger.info(f"Started task {task_name}: {message}")
|
||||
return log_entry
|
||||
|
||||
async def log_task_success(
|
||||
self,
|
||||
log_entry: Log,
|
||||
message: str,
|
||||
additional_metadata: Optional[Dict[str, Any]] = None
|
||||
) -> Log:
|
||||
"""
|
||||
Update a log entry to SUCCESS status
|
||||
|
||||
Args:
|
||||
log_entry: The original log entry to update
|
||||
message: Success message
|
||||
additional_metadata: Additional metadata to merge
|
||||
|
||||
Returns:
|
||||
Log: The updated log entry
|
||||
"""
|
||||
# Update the existing log entry
|
||||
log_entry.status = LogStatus.SUCCESS
|
||||
log_entry.message = message
|
||||
|
||||
# Merge additional metadata
|
||||
if additional_metadata:
|
||||
if log_entry.log_metadata is None:
|
||||
log_entry.log_metadata = {}
|
||||
log_entry.log_metadata.update(additional_metadata)
|
||||
log_entry.log_metadata["completed_at"] = datetime.utcnow().isoformat()
|
||||
|
||||
await self.session.commit()
|
||||
await self.session.refresh(log_entry)
|
||||
|
||||
task_name = log_entry.log_metadata.get("task_name", "unknown") if log_entry.log_metadata else "unknown"
|
||||
logger.info(f"Completed task {task_name}: {message}")
|
||||
return log_entry
|
||||
|
||||
async def log_task_failure(
|
||||
self,
|
||||
log_entry: Log,
|
||||
error_message: str,
|
||||
error_details: Optional[str] = None,
|
||||
additional_metadata: Optional[Dict[str, Any]] = None
|
||||
) -> Log:
|
||||
"""
|
||||
Update a log entry to FAILED status
|
||||
|
||||
Args:
|
||||
log_entry: The original log entry to update
|
||||
error_message: Error message
|
||||
error_details: Detailed error information
|
||||
additional_metadata: Additional metadata to merge
|
||||
|
||||
Returns:
|
||||
Log: The updated log entry
|
||||
"""
|
||||
# Update the existing log entry
|
||||
log_entry.status = LogStatus.FAILED
|
||||
log_entry.level = LogLevel.ERROR
|
||||
log_entry.message = error_message
|
||||
|
||||
# Merge additional metadata
|
||||
if log_entry.log_metadata is None:
|
||||
log_entry.log_metadata = {}
|
||||
|
||||
log_entry.log_metadata.update({
|
||||
"failed_at": datetime.utcnow().isoformat(),
|
||||
"error_details": error_details
|
||||
})
|
||||
|
||||
if additional_metadata:
|
||||
log_entry.log_metadata.update(additional_metadata)
|
||||
|
||||
await self.session.commit()
|
||||
await self.session.refresh(log_entry)
|
||||
|
||||
task_name = log_entry.log_metadata.get("task_name", "unknown") if log_entry.log_metadata else "unknown"
|
||||
logger.error(f"Failed task {task_name}: {error_message}")
|
||||
if error_details:
|
||||
logger.error(f"Error details: {error_details}")
|
||||
|
||||
return log_entry
|
||||
|
||||
async def log_task_progress(
|
||||
self,
|
||||
log_entry: Log,
|
||||
progress_message: str,
|
||||
progress_metadata: Optional[Dict[str, Any]] = None
|
||||
) -> Log:
|
||||
"""
|
||||
Update a log entry with progress information while keeping IN_PROGRESS status
|
||||
|
||||
Args:
|
||||
log_entry: The log entry to update
|
||||
progress_message: Progress update message
|
||||
progress_metadata: Additional progress metadata
|
||||
|
||||
Returns:
|
||||
Log: The updated log entry
|
||||
"""
|
||||
log_entry.message = progress_message
|
||||
|
||||
if progress_metadata:
|
||||
if log_entry.log_metadata is None:
|
||||
log_entry.log_metadata = {}
|
||||
log_entry.log_metadata.update(progress_metadata)
|
||||
log_entry.log_metadata["last_progress_update"] = datetime.utcnow().isoformat()
|
||||
|
||||
await self.session.commit()
|
||||
await self.session.refresh(log_entry)
|
||||
|
||||
task_name = log_entry.log_metadata.get("task_name", "unknown") if log_entry.log_metadata else "unknown"
|
||||
logger.info(f"Progress update for task {task_name}: {progress_message}")
|
||||
return log_entry
|
||||
|
||||
async def log_simple_event(
|
||||
self,
|
||||
level: LogLevel,
|
||||
source: str,
|
||||
message: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> Log:
|
||||
"""
|
||||
Log a simple event (not a long-running task)
|
||||
|
||||
Args:
|
||||
level: Log level
|
||||
source: Source service/component
|
||||
message: Log message
|
||||
metadata: Additional context data
|
||||
|
||||
Returns:
|
||||
Log: The created log entry
|
||||
"""
|
||||
log_entry = Log(
|
||||
level=level,
|
||||
status=LogStatus.SUCCESS, # Simple events are immediately complete
|
||||
message=message,
|
||||
source=source,
|
||||
log_metadata=metadata or {},
|
||||
search_space_id=self.search_space_id
|
||||
)
|
||||
|
||||
self.session.add(log_entry)
|
||||
await self.session.commit()
|
||||
await self.session.refresh(log_entry)
|
||||
|
||||
logger.info(f"Logged event from {source}: {message}")
|
||||
return log_entry
|
||||
Loading…
Add table
Add a link
Reference in a new issue