feat: simplified document upload handling

- Introduced a new endpoint for batch document status retrieval, allowing users to check the status of multiple documents in a search space.
- Enhanced the document upload process to return duplicate document IDs and improved response structure.
- Updated schemas to include new response models for document status.
- Removed unused attachment processing code from chat routes and UI components to streamline functionality.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-09 16:46:54 -08:00
parent d11e76aaa1
commit c979609041
15 changed files with 475 additions and 1090 deletions

View file

@ -28,7 +28,6 @@ from app.agents.new_chat.llm_config import (
)
from app.db import ChatVisibility, Document, SurfsenseDocsDocument
from app.prompts import TITLE_GENERATION_PROMPT_TEMPLATE
from app.schemas.new_chat import ChatAttachment
from app.services.chat_session_state_service import (
clear_ai_responding,
set_ai_responding,
@ -37,24 +36,6 @@ from app.services.connector_service import ConnectorService
from app.services.new_streaming_service import VercelStreamingService
from app.utils.content_utils import bootstrap_history_from_db
def format_attachments_as_context(attachments: list[ChatAttachment]) -> str:
"""Format attachments as context for the agent."""
if not attachments:
return ""
context_parts = ["<user_attachments>"]
for i, attachment in enumerate(attachments, 1):
context_parts.append(
f"<attachment index='{i}' name='{attachment.name}' type='{attachment.type}'>"
)
context_parts.append(f"<![CDATA[{attachment.content}]]>")
context_parts.append("</attachment>")
context_parts.append("</user_attachments>")
return "\n".join(context_parts)
def format_mentioned_documents_as_context(documents: list[Document]) -> str:
"""
Format mentioned documents as context for the agent.
@ -203,7 +184,6 @@ async def stream_new_chat(
session: AsyncSession,
user_id: str | None = None,
llm_config_id: int = -1,
attachments: list[ChatAttachment] | None = None,
mentioned_document_ids: list[int] | None = None,
mentioned_surfsense_doc_ids: list[int] | None = None,
checkpoint_id: str | None = None,
@ -224,7 +204,6 @@ async def stream_new_chat(
session: The database session
user_id: The current user's UUID string (for memory tools and session state)
llm_config_id: The LLM configuration ID (default: -1 for first global config)
attachments: Optional attachments with extracted content
needs_history_bootstrap: If True, load message history from DB (for cloned chats)
mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat
mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat
@ -360,13 +339,10 @@ async def stream_new_chat(
)
mentioned_surfsense_docs = list(result.scalars().all())
# Format the user query with context (attachments + mentioned documents + surfsense docs)
# Format the user query with context (mentioned documents + SurfSense docs)
final_query = user_query
context_parts = []
if attachments:
context_parts.append(format_attachments_as_context(attachments))
if mentioned_documents:
context_parts.append(
format_mentioned_documents_as_context(mentioned_documents)
@ -459,39 +435,20 @@ async def stream_new_chat(
last_active_step_id = analyze_step_id
# Determine step title and action verb based on context
if attachments and (mentioned_documents or mentioned_surfsense_docs):
last_active_step_title = "Analyzing your content"
action_verb = "Reading"
elif attachments:
last_active_step_title = "Reading your content"
action_verb = "Reading"
elif mentioned_documents or mentioned_surfsense_docs:
if mentioned_documents or mentioned_surfsense_docs:
last_active_step_title = "Analyzing referenced content"
action_verb = "Analyzing"
else:
last_active_step_title = "Understanding your request"
action_verb = "Processing"
# Build the message with inline context about attachments/documents
# Build the message with inline context about referenced documents
processing_parts = []
# Add the user query
query_text = user_query[:80] + ("..." if len(user_query) > 80 else "")
processing_parts.append(query_text)
# Add file attachment names inline
if attachments:
attachment_names = []
for attachment in attachments:
name = attachment.name
if len(name) > 30:
name = name[:27] + "..."
attachment_names.append(name)
if len(attachment_names) == 1:
processing_parts.append(f"[{attachment_names[0]}]")
else:
processing_parts.append(f"[{len(attachment_names)} files]")
# Add mentioned document names inline
if mentioned_documents:
doc_names = []