diff --git a/.dockerignore b/.dockerignore
index ad6805174..70d7fb07e 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -75,7 +75,6 @@ surfsense_backend/lib64/
# Logs
**/*.log
-**/logs/
# Temporary files
**/tmp/
diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
index 8fd5f3b71..6c8deb409 100644
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@@ -50,6 +50,9 @@ def create_surfsense_deep_agent(
- display_image: Display images in chat
- scrape_webpage: Extract content from webpages
+ The agent also includes TodoListMiddleware by default (via create_deep_agent) which provides:
+ - write_todos: Create and update planning/todo lists for complex tasks
+
The system prompt can be configured via agent_config:
- Custom system instructions (or use defaults)
- Citation toggle (enable/disable citation requirements)
@@ -138,6 +141,7 @@ def create_surfsense_deep_agent(
system_prompt = build_surfsense_system_prompt()
# Create the deep agent with system prompt and checkpointer
+ # Note: TodoListMiddleware (write_todos) is included by default in create_deep_agent
agent = create_deep_agent(
model=llm,
tools=tools,
diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py
index 61a8fbdd6..695d62bfb 100644
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@@ -64,18 +64,23 @@ You have access to the following tools:
- The preview card will automatically be displayed in the chat.
4. display_image: Display an image in the chat with metadata.
- - Use this tool when you want to show an image from a URL to the user.
+ - Use this tool ONLY when you have a valid public HTTP/HTTPS image URL to show.
- This displays the image with an optional title, description, and source attribution.
- - Common use cases:
- * Showing an image from a URL mentioned in the conversation
- * Displaying a diagram, chart, or illustration you're referencing
- * Showing visual examples when explaining concepts
- - IMPORTANT: Do NOT use this tool for user-uploaded image attachments!
- * User attachments are already visible in the chat UI - the user can see them
- * This tool requires a valid HTTP/HTTPS URL, not a local file path
- * When a user uploads an image, just analyze it and respond - don't try to display it again
+ - Valid use cases:
+ * Showing an image from a URL the user explicitly mentioned in their message
+ * Displaying images found in scraped webpage content (from scrape_webpage tool)
+ * Showing a publicly accessible diagram or chart from a known URL
+
+ CRITICAL - NEVER USE THIS TOOL FOR USER-UPLOADED ATTACHMENTS:
+ When a user uploads/attaches an image file to their message:
+ * The image is ALREADY VISIBLE in the chat UI as a thumbnail on their message
+ * You do NOT have a URL for their uploaded image - only extracted text/description
+ * Calling display_image will FAIL and show "Image not available" error
+ * Simply analyze the image content and respond with your analysis - DO NOT try to display it
+ * The user can already see their own uploaded image - they don't need you to show it again
+
- Args:
- - src: The URL of the image to display (must be a valid HTTP/HTTPS image URL, not a local path)
+ - src: The URL of the image (MUST be a valid public HTTP/HTTPS URL that you know exists)
- alt: Alternative text describing the image (for accessibility)
- title: Optional title to display below the image
- description: Optional description providing context about the image
@@ -104,6 +109,20 @@ You have access to the following tools:
* This makes your response more visual and engaging.
* Prioritize showing: diagrams, charts, infographics, key illustrations, or images that help explain the content.
* Don't show every image - just the most relevant 1-3 images that enhance understanding.
+
+6. write_todos: Create and update a planning/todo list to break down complex tasks.
+ - IMPORTANT: Use this tool when the user asks you to create a plan, break down a task, or explain something in structured steps.
+ - This tool creates a visual plan with progress tracking that the user can see in the UI.
+ - When to use:
+ * User asks to "create a plan" or "break down" a task
+ * User asks for "steps" to do something
+ * User asks you to "explain" something in sections
+ * Any multi-step task that would benefit from structured planning
+ - Args:
+ - todos: List of todo items, each with:
+ * content: Description of the task (required)
+ * status: "pending", "in_progress", or "completed" (required)
+ - The tool automatically adds IDs and formats the output for the UI.
- User: "Fetch all my notes and what's in them?"
@@ -134,8 +153,15 @@ You have access to the following tools:
- User: "Show me this image: https://example.com/image.png"
- Call: `display_image(src="https://example.com/image.png", alt="User shared image")`
-- User: "Can you display a diagram of a neural network?"
- - Call: `display_image(src="https://example.com/neural-network.png", alt="Neural network diagram", title="Neural Network Architecture", description="A visual representation of a neural network with input, hidden, and output layers")`
+- User uploads an image file and asks: "What is this image about?"
+ - DO NOT call display_image! The user's uploaded image is already visible in the chat.
+ - Simply analyze the image content (which you receive as extracted text/description) and respond.
+ - WRONG: `display_image(src="...", ...)` - This will fail with "Image not available"
+ - CORRECT: Just provide your analysis directly: "Based on the image you shared, this appears to be..."
+
+- User uploads a screenshot and asks: "Can you explain what's in this image?"
+ - DO NOT call display_image! Just analyze and respond directly.
+ - The user can already see their screenshot - they don't need you to display it again.
- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
- Call: `scrape_webpage(url="https://example.com/blog/ai-trends")`
@@ -154,6 +180,34 @@ You have access to the following tools:
- Then, if the content contains useful diagrams/images like ``:
- Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")`
- Then provide your explanation, referencing the displayed image
+
+- User: "Create a plan for building a user authentication system"
+ - Call: `write_todos(todos=[{"content": "Design database schema for users and sessions", "status": "in_progress"}, {"content": "Implement registration and login endpoints", "status": "pending"}, {"content": "Add password reset functionality", "status": "pending"}])`
+ - Then explain each step in detail as you work through them
+
+- User: "Break down how to build a REST API into steps"
+ - Call: `write_todos(todos=[{"content": "Design API endpoints and data models", "status": "in_progress"}, {"content": "Set up server framework and routing", "status": "pending"}, {"content": "Implement CRUD operations", "status": "pending"}, {"content": "Add authentication and error handling", "status": "pending"}])`
+ - Then provide detailed explanations for each step
+
+- User: "Help me plan my trip to Japan"
+ - Call: `write_todos(todos=[{"content": "Research best time to visit and book flights", "status": "in_progress"}, {"content": "Plan itinerary for cities to visit", "status": "pending"}, {"content": "Book accommodations", "status": "pending"}, {"content": "Prepare travel documents and currency", "status": "pending"}])`
+ - Then provide travel preparation guidance
+
+- User: "Break down how to learn guitar"
+ - Call: `write_todos(todos=[{"content": "Learn basic chords and finger positioning", "status": "in_progress"}, {"content": "Practice strumming patterns", "status": "pending"}, {"content": "Learn to read tabs and sheet music", "status": "pending"}, {"content": "Master simple songs", "status": "pending"}])`
+ - Then provide learning milestones and tips
+
+- User: "Plan my workout routine for the week"
+ - Call: `write_todos(todos=[{"content": "Monday: Upper body strength training", "status": "in_progress"}, {"content": "Tuesday: Cardio and core workout", "status": "pending"}, {"content": "Wednesday: Rest or light stretching", "status": "pending"}, {"content": "Thursday: Lower body strength training", "status": "pending"}, {"content": "Friday: Full body HIIT session", "status": "pending"}])`
+ - Then provide exercise details and tips
+
+- User: "Help me organize my home renovation project"
+ - Call: `write_todos(todos=[{"content": "Define scope and create budget", "status": "in_progress"}, {"content": "Research and hire contractors", "status": "pending"}, {"content": "Obtain necessary permits", "status": "pending"}, {"content": "Order materials and fixtures", "status": "pending"}, {"content": "Execute renovation phases", "status": "pending"}])`
+ - Then provide detailed renovation guidance
+
+- User: "What steps should I take to start a podcast?"
+ - Call: `write_todos(todos=[{"content": "Define podcast concept and target audience", "status": "in_progress"}, {"content": "Set up recording equipment and software", "status": "pending"}, {"content": "Plan episode structure and content", "status": "pending"}, {"content": "Record and edit first episodes", "status": "pending"}, {"content": "Choose hosting platform and publish", "status": "pending"}])`
+ - Then provide podcast launch guidance
"""
diff --git a/surfsense_backend/app/agents/new_chat/tools/link_preview.py b/surfsense_backend/app/agents/new_chat/tools/link_preview.py
index 17e89345e..13f8a1f1a 100644
--- a/surfsense_backend/app/agents/new_chat/tools/link_preview.py
+++ b/surfsense_backend/app/agents/new_chat/tools/link_preview.py
@@ -280,15 +280,18 @@ def create_link_preview_tool():
url = f"https://{url}"
try:
+ # Generate a random User-Agent to avoid bot detection
+ ua = UserAgent()
+ user_agent = ua.random
+
# Use a browser-like User-Agent to fetch Open Graph metadata.
- # This is the same approach used by Slack, Discord, Twitter, etc. for link previews.
# We're only fetching publicly available metadata (title, description, thumbnail)
# that websites intentionally expose via OG tags for link preview purposes.
async with httpx.AsyncClient(
timeout=10.0,
follow_redirects=True,
headers={
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+ "User-Agent": user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py
index 3b0c2ddac..bc305aecc 100644
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@@ -125,6 +125,7 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
),
requires=[], # firecrawl_api_key is optional
),
+ # Note: write_todos is now provided by TodoListMiddleware from deepagents
# =========================================================================
# ADD YOUR CUSTOM TOOLS BELOW
# =========================================================================
diff --git a/surfsense_backend/app/connectors/webcrawler_connector.py b/surfsense_backend/app/connectors/webcrawler_connector.py
index 3fc61f0b5..7ffc66644 100644
--- a/surfsense_backend/app/connectors/webcrawler_connector.py
+++ b/surfsense_backend/app/connectors/webcrawler_connector.py
@@ -25,7 +25,9 @@ class WebCrawlerConnector:
Initialize the WebCrawlerConnector class.
Args:
- firecrawl_api_key: Firecrawl API key (optional, will use AsyncChromiumLoader if not provided)
+ firecrawl_api_key: Firecrawl API key (optional). If provided, Firecrawl will be tried first
+ and Chromium will be used as fallback if Firecrawl fails. If not provided,
+ Chromium will be used directly.
"""
self.firecrawl_api_key = firecrawl_api_key
self.use_firecrawl = bool(firecrawl_api_key)
@@ -46,6 +48,9 @@ class WebCrawlerConnector:
"""
Crawl a single URL and extract its content.
+ If Firecrawl API key is provided, tries Firecrawl first and falls back to Chromium
+ if Firecrawl fails. If no Firecrawl API key is provided, uses Chromium directly.
+
Args:
url: URL to crawl
formats: List of formats to extract (e.g., ["markdown", "html"]) - only for Firecrawl
@@ -56,19 +61,37 @@ class WebCrawlerConnector:
- content: Extracted content (markdown or HTML)
- metadata: Page metadata (title, description, etc.)
- source: Original URL
- - crawler_type: Type of crawler used
+ - crawler_type: Type of crawler used ("firecrawl" or "chromium")
"""
try:
# Validate URL
if not validators.url(url):
return None, f"Invalid URL: {url}"
+ # Try Firecrawl first if API key is provided
if self.use_firecrawl:
- result = await self._crawl_with_firecrawl(url, formats)
+ try:
+ logger.info(f"[webcrawler] Using Firecrawl for: {url}")
+ result = await self._crawl_with_firecrawl(url, formats)
+ return result, None
+ except Exception as firecrawl_error:
+ # Firecrawl failed, fallback to Chromium
+ logger.warning(
+ f"[webcrawler] Firecrawl failed, falling back to Chromium+Trafilatura for: {url}"
+ )
+ try:
+ result = await self._crawl_with_chromium(url)
+ return result, None
+ except Exception as chromium_error:
+ return (
+ None,
+ f"Both Firecrawl and Chromium failed. Firecrawl error: {firecrawl_error!s}, Chromium error: {chromium_error!s}",
+ )
else:
+ # No Firecrawl API key, use Chromium directly
+ logger.info(f"[webcrawler] Using Chromium+Trafilatura for: {url}")
result = await self._crawl_with_chromium(url)
-
- return result, None
+ return result, None
except Exception as e:
return None, f"Error crawling URL {url}: {e!s}"
@@ -162,10 +185,6 @@ class WebCrawlerConnector:
trafilatura_metadata = None
try:
- logger.info(
- f"Attempting to extract main content from {url} using Trafilatura"
- )
-
# Extract main content as markdown
extracted_content = trafilatura.extract(
raw_html,
@@ -179,23 +198,10 @@ class WebCrawlerConnector:
# Extract metadata using Trafilatura
trafilatura_metadata = trafilatura.extract_metadata(raw_html)
- if extracted_content and len(extracted_content.strip()) > 0:
- logger.info(
- f"Successfully extracted main content from {url} using Trafilatura "
- f"({len(extracted_content)} chars vs {len(raw_html)} chars raw HTML)"
- )
- else:
- logger.warning(
- f"Trafilatura extraction returned empty content for {url}, "
- "falling back to raw HTML"
- )
+ if not extracted_content or len(extracted_content.strip()) == 0:
extracted_content = None
- except Exception as e:
- logger.warning(
- f"Trafilatura extraction failed for {url}: {e}. "
- "Falling back to raw HTML"
- )
+ except Exception:
extracted_content = None
# Build metadata, preferring Trafilatura metadata when available
diff --git a/surfsense_backend/app/routes/logs_routes.py b/surfsense_backend/app/routes/logs_routes.py
index 98fd9141e..e7e00280e 100644
--- a/surfsense_backend/app/routes/logs_routes.py
+++ b/surfsense_backend/app/routes/logs_routes.py
@@ -319,6 +319,9 @@ async def get_logs_summary(
if log.log_metadata
else "Unknown"
)
+ document_id = (
+ log.log_metadata.get("document_id") if log.log_metadata else None
+ )
summary["active_tasks"].append(
{
"id": log.id,
@@ -326,6 +329,7 @@ async def get_logs_summary(
"message": log.message,
"started_at": log.created_at,
"source": log.source,
+ "document_id": document_id,
}
)
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index aff6fa32b..11024e513 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -69,6 +69,30 @@ def format_mentioned_documents_as_context(documents: list[Document]) -> str:
return "\n".join(context_parts)
+def extract_todos_from_deepagents(command_output) -> dict:
+ """
+ Extract todos from deepagents' TodoListMiddleware Command output.
+
+ deepagents returns a Command object with:
+ - Command.update['todos'] = [{'content': '...', 'status': '...'}]
+
+ Returns the todos directly (no transformation needed - UI matches deepagents format).
+ """
+ todos_data = []
+ if hasattr(command_output, "update"):
+ # It's a Command object from deepagents
+ update = command_output.update
+ todos_data = update.get("todos", [])
+ elif isinstance(command_output, dict):
+ # Already a dict - check if it has todos directly or in update
+ if "todos" in command_output:
+ todos_data = command_output.get("todos", [])
+ elif "update" in command_output and isinstance(command_output["update"], dict):
+ todos_data = command_output["update"].get("todos", [])
+
+ return {"todos": todos_data}
+
+
async def stream_new_chat(
user_query: str,
search_space_id: int,
@@ -146,6 +170,16 @@ async def stream_new_chat(
# Create connector service
connector_service = ConnectorService(session, search_space_id=search_space_id)
+ # Get Firecrawl API key from webcrawler connector if configured
+ from app.db import SearchSourceConnectorType
+
+ firecrawl_api_key = None
+ webcrawler_connector = await connector_service.get_connector_by_type(
+ SearchSourceConnectorType.WEBCRAWLER_CONNECTOR, search_space_id
+ )
+ if webcrawler_connector and webcrawler_connector.config:
+ firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")
+
# Get the PostgreSQL checkpointer for persistent conversation memory
checkpointer = await get_checkpointer()
@@ -157,6 +191,7 @@ async def stream_new_chat(
connector_service=connector_service,
checkpointer=checkpointer,
agent_config=agent_config, # Pass prompt configuration
+ firecrawl_api_key=firecrawl_api_key, # Pass Firecrawl API key if configured
)
# Build input with message history from frontend
@@ -211,7 +246,8 @@ async def stream_new_chat(
config = {
"configurable": {
"thread_id": str(chat_id),
- }
+ },
+ "recursion_limit": 80, # Increase from default 25 to allow more tool iterations
}
# Start the message stream
@@ -233,6 +269,8 @@ async def stream_new_chat(
completed_step_ids: set[str] = set()
# Track if we just finished a tool (text flows silently after tools)
just_finished_tool: bool = False
+ # Track write_todos calls to show "Creating plan" vs "Updating plan"
+ write_todos_call_count: int = 0
def next_thinking_step_id() -> str:
nonlocal thinking_step_counter
@@ -441,6 +479,60 @@ async def stream_new_chat(
status="in_progress",
items=last_active_step_items,
)
+ elif tool_name == "write_todos":
+ # Track write_todos calls for better messaging
+ write_todos_call_count += 1
+ todos = (
+ tool_input.get("todos", [])
+ if isinstance(tool_input, dict)
+ else []
+ )
+ todo_count = len(todos) if isinstance(todos, list) else 0
+
+ if write_todos_call_count == 1:
+ # First call - creating the plan
+ last_active_step_title = "Creating plan"
+ last_active_step_items = [f"Defining {todo_count} tasks..."]
+ else:
+ # Subsequent calls - updating the plan
+ # Try to provide context about what's being updated
+ in_progress_count = (
+ sum(
+ 1
+ for t in todos
+ if isinstance(t, dict)
+ and t.get("status") == "in_progress"
+ )
+ if isinstance(todos, list)
+ else 0
+ )
+ completed_count = (
+ sum(
+ 1
+ for t in todos
+ if isinstance(t, dict)
+ and t.get("status") == "completed"
+ )
+ if isinstance(todos, list)
+ else 0
+ )
+
+ last_active_step_title = "Updating progress"
+ last_active_step_items = (
+ [
+ f"Progress: {completed_count}/{todo_count} completed",
+ f"In progress: {in_progress_count} tasks",
+ ]
+ if completed_count > 0
+ else [f"Working on {todo_count} tasks"]
+ )
+
+ yield streaming_service.format_thinking_step(
+ step_id=tool_step_id,
+ title=last_active_step_title,
+ status="in_progress",
+ items=last_active_step_items,
+ )
elif tool_name == "generate_podcast":
podcast_title = (
tool_input.get("podcast_title", "SurfSense Podcast")
@@ -465,6 +557,15 @@ async def stream_new_chat(
status="in_progress",
items=last_active_step_items,
)
+ # elif tool_name == "ls":
+ # last_active_step_title = "Exploring files"
+ # last_active_step_items = []
+ # yield streaming_service.format_thinking_step(
+ # step_id=tool_step_id,
+ # title="Exploring files",
+ # status="in_progress",
+ # items=None,
+ # )
else:
last_active_step_title = f"Using {tool_name.replace('_', ' ')}"
last_active_step_items = []
@@ -546,9 +647,11 @@ async def stream_new_chat(
tool_name = event.get("name", "unknown_tool")
raw_output = event.get("data", {}).get("output", "")
- # Extract content from ToolMessage if needed
- # LangGraph may return a ToolMessage object instead of raw dict
- if hasattr(raw_output, "content"):
+ # Handle deepagents' write_todos Command object specially
+ if tool_name == "write_todos" and hasattr(raw_output, "update"):
+ # deepagents returns a Command object - extract todos directly
+ tool_output = extract_todos_from_deepagents(raw_output)
+ elif hasattr(raw_output, "content"):
# It's a ToolMessage object - extract the content
content = raw_output.content
# If content is a string that looks like JSON, try to parse it
@@ -707,6 +810,104 @@ async def stream_new_chat(
status="completed",
items=completed_items,
)
+ elif tool_name == "write_todos":
+ # Build completion items for planning/updating
+ if isinstance(tool_output, dict):
+ todos = tool_output.get("todos", [])
+ todo_count = len(todos) if isinstance(todos, list) else 0
+ completed_count = (
+ sum(
+ 1
+ for t in todos
+ if isinstance(t, dict)
+ and t.get("status") == "completed"
+ )
+ if isinstance(todos, list)
+ else 0
+ )
+ in_progress_count = (
+ sum(
+ 1
+ for t in todos
+ if isinstance(t, dict)
+ and t.get("status") == "in_progress"
+ )
+ if isinstance(todos, list)
+ else 0
+ )
+
+ # Use context-aware completion message
+ if last_active_step_title == "Creating plan":
+ completed_items = [f"Created {todo_count} tasks"]
+ else:
+ # Updating progress - show stats
+ completed_items = [
+ f"Progress: {completed_count}/{todo_count} completed",
+ ]
+ if in_progress_count > 0:
+ # Find the currently in-progress task name
+ in_progress_task = next(
+ (
+ t.get("content", "")[:40]
+ for t in todos
+ if isinstance(t, dict)
+ and t.get("status") == "in_progress"
+ ),
+ None,
+ )
+ if in_progress_task:
+ completed_items.append(
+ f"Current: {in_progress_task}..."
+ )
+ else:
+ completed_items = ["Plan updated"]
+ yield streaming_service.format_thinking_step(
+ step_id=original_step_id,
+ title=last_active_step_title,
+ status="completed",
+ items=completed_items,
+ )
+ elif tool_name == "ls":
+ # Build completion items showing file names found
+ if isinstance(tool_output, dict):
+ result = tool_output.get("result", "")
+ elif isinstance(tool_output, str):
+ result = tool_output
+ else:
+ result = str(tool_output) if tool_output else ""
+
+ # Parse file paths and extract just the file names
+ file_names = []
+ if result:
+ # The ls tool returns paths, extract just the file/folder names
+ for line in result.strip().split("\n"):
+ line = line.strip()
+ if line:
+ # Get just the filename from the path
+ name = line.rstrip("/").split("/")[-1]
+ if name and len(name) <= 40:
+ file_names.append(name)
+ elif name:
+ file_names.append(name[:37] + "...")
+
+ # Build display items - wrap file names in brackets for icon rendering
+ if file_names:
+ if len(file_names) <= 5:
+ # Wrap each file name in brackets for styled tile rendering
+ completed_items = [f"[{name}]" for name in file_names]
+ else:
+ # Show first few with brackets and count
+ completed_items = [f"[{name}]" for name in file_names[:4]]
+ completed_items.append(f"(+{len(file_names) - 4} more)")
+ else:
+ completed_items = ["No files found"]
+
+ yield streaming_service.format_thinking_step(
+ step_id=original_step_id,
+ title="Exploring files",
+ status="completed",
+ items=completed_items,
+ )
else:
yield streaming_service.format_thinking_step(
step_id=original_step_id,
@@ -843,6 +1044,27 @@ async def stream_new_chat(
yield streaming_service.format_terminal_info(
"Knowledge base search completed", "success"
)
+ elif tool_name == "write_todos":
+ # Stream the full write_todos result so frontend can render the Plan component
+ yield streaming_service.format_tool_output_available(
+ tool_call_id,
+ tool_output
+ if isinstance(tool_output, dict)
+ else {"result": tool_output},
+ )
+ # Send terminal message with plan info
+ if isinstance(tool_output, dict):
+ todos = tool_output.get("todos", [])
+ todo_count = len(todos) if isinstance(todos, list) else 0
+ yield streaming_service.format_terminal_info(
+ f"Plan created ({todo_count} tasks)",
+ "success",
+ )
+ else:
+ yield streaming_service.format_terminal_info(
+ "Plan created",
+ "success",
+ )
else:
# Default handling for other tools
yield streaming_service.format_tool_output_available(
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx
index 950ea5fcd..890497310 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx
@@ -30,7 +30,7 @@ export default function AirtableConnectorPage() {
const [isConnecting, setIsConnecting] = useState(false);
const [doesConnectorExist, setDoesConnectorExist] = useState(false);
- const { refetch : fetchConnectors } = useAtomValue(connectorsAtom);
+ const { refetch: fetchConnectors } = useAtomValue(connectorsAtom);
useEffect(() => {
fetchConnectors().then((data) => {
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-calendar-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-calendar-connector/page.tsx
index fdee2b55b..21f7a31d6 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-calendar-connector/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-calendar-connector/page.tsx
@@ -32,13 +32,14 @@ export default function GoogleCalendarConnectorPage() {
const [isConnecting, setIsConnecting] = useState(false);
const [doesConnectorExist, setDoesConnectorExist] = useState(false);
- const { refetch : fetchConnectors } = useAtomValue(connectorsAtom);
+ const { refetch: fetchConnectors } = useAtomValue(connectorsAtom);
useEffect(() => {
fetchConnectors().then((data) => {
const connectors = data.data || [];
const connector = connectors.find(
- (c: SearchSourceConnector) => c.connector_type === EnumConnectorName.GOOGLE_CALENDAR_CONNECTOR
+ (c: SearchSourceConnector) =>
+ c.connector_type === EnumConnectorName.GOOGLE_CALENDAR_CONNECTOR
);
if (connector) {
setDoesConnectorExist(true);
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx
index 8beef0e4b..d99c35fac 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx
@@ -32,7 +32,7 @@ export default function GoogleGmailConnectorPage() {
const [isConnecting, setIsConnecting] = useState(false);
const [doesConnectorExist, setDoesConnectorExist] = useState(false);
- const { refetch : fetchConnectors } = useAtomValue(connectorsAtom);
+ const { refetch: fetchConnectors } = useAtomValue(connectorsAtom);
useEffect(() => {
fetchConnectors().then((data) => {
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/luma-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/luma-connector/page.tsx
index 29354263b..e253b6a01 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/luma-connector/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/luma-connector/page.tsx
@@ -67,7 +67,7 @@ export default function LumaConnectorPage() {
},
});
- const { refetch : fetchConnectors } = useAtomValue(connectorsAtom);
+ const { refetch: fetchConnectors } = useAtomValue(connectorsAtom);
useEffect(() => {
fetchConnectors().then((data) => {
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/webcrawler-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/webcrawler-connector/page.tsx
index 4a63244b8..5423cd94f 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/webcrawler-connector/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/webcrawler-connector/page.tsx
@@ -55,7 +55,7 @@ export default function WebcrawlerConnectorPage() {
const [isSubmitting, setIsSubmitting] = useState(false);
const [doesConnectorExist, setDoesConnectorExist] = useState(false);
- const { refetch : fetchConnectors } = useAtomValue(connectorsAtom);
+ const { refetch: fetchConnectors } = useAtomValue(connectorsAtom);
const { mutateAsync: createConnector } = useAtomValue(createConnectorMutationAtom);
// Initialize the form
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/ProcessingIndicator.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/ProcessingIndicator.tsx
new file mode 100644
index 000000000..827694d22
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/ProcessingIndicator.tsx
@@ -0,0 +1,43 @@
+"use client";
+
+import { Loader2 } from "lucide-react";
+import { motion, AnimatePresence } from "motion/react";
+import { useTranslations } from "next-intl";
+import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
+
+interface ProcessingIndicatorProps {
+ activeTasksCount: number;
+}
+
+export function ProcessingIndicator({ activeTasksCount }: ProcessingIndicatorProps) {
+ const t = useTranslations("documents");
+
+ if (activeTasksCount === 0) return null;
+
+ return (
+
+
+
+
+
+
+
+ );
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
index 065267b2e..69458d5bc 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@@ -2,11 +2,14 @@
import { useQuery } from "@tanstack/react-query";
import { useAtomValue } from "jotai";
+import { RefreshCw } from "lucide-react";
import { motion } from "motion/react";
import { useParams } from "next/navigation";
import { useTranslations } from "next-intl";
-import { useCallback, useEffect, useId, useMemo, useState } from "react";
+import { useCallback, useEffect, useId, useMemo, useRef, useState } from "react";
import { toast } from "sonner";
+import { useLogsSummary } from "@/hooks/use-logs";
+import { Button } from "@/components/ui/button";
import { deleteDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms";
import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms";
import type { DocumentTypeEnum } from "@/contracts/types/document.types";
@@ -15,6 +18,7 @@ import { cacheKeys } from "@/lib/query-client/cache-keys";
import { DocumentsFilters } from "./components/DocumentsFilters";
import { DocumentsTableShell, type SortKey } from "./components/DocumentsTableShell";
import { PaginationControls } from "./components/PaginationControls";
+import { ProcessingIndicator } from "./components/ProcessingIndicator";
import type { ColumnVisibility } from "./components/types";
function useDebounced(value: T, delay = 250) {
@@ -127,7 +131,22 @@ export default function DocumentsTable() {
} else {
await refetchDocuments();
}
- }, [debouncedSearch, refetchSearch, refetchDocuments]);
+ toast.success(t("refresh_success") || "Documents refreshed");
+ }, [debouncedSearch, refetchSearch, refetchDocuments, t]);
+
+ // Set up polling for active tasks
+ const { summary } = useLogsSummary(searchSpaceId, 24, { refetchInterval: 5000 });
+ const activeTasksCount = summary?.active_tasks.length || 0;
+ const prevActiveTasksCount = useRef(activeTasksCount);
+
+ // Auto-refresh when a task finishes
+ useEffect(() => {
+ if (prevActiveTasksCount.current > activeTasksCount) {
+ // A task has finished!
+ refreshCurrentView();
+ }
+ prevActiveTasksCount.current = activeTasksCount;
+ }, [activeTasksCount, refreshCurrentView]);
// Create a delete function for single document deletion
const deleteDocument = useCallback(
@@ -189,8 +208,26 @@ export default function DocumentsTable() {
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.3 }}
- className="w-full px-6 py-4 min-h-[calc(100vh-64px)]"
+ className="w-full px-6 py-4 space-y-6 min-h-[calc(100vh-64px)]"
>
+
+
+
{t("title")}
+
{t("subtitle")}
+
+
+
+
+
+
;
+
+/**
+ * Extract persisted attachments from message content (type-safe with Zod)
+ */
+function extractPersistedAttachments(content: unknown): PersistedAttachment[] {
+ if (!Array.isArray(content)) return [];
+
+ for (const part of content) {
+ const result = AttachmentsPartSchema.safeParse(part);
+ if (result.success) {
+ return result.data.items;
+ }
+ }
+
+ return [];
+}
+
/**
* Convert backend message to assistant-ui ThreadMessageLike format
* Filters out 'thinking-steps' part as it's handled separately via messageThinkingSteps
+ * Restores attachments for user messages from persisted data
*/
function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
let content: ThreadMessageLike["content"];
@@ -105,8 +148,12 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
const filteredContent = msg.content.filter((part: unknown) => {
if (typeof part !== "object" || part === null || !("type" in part)) return true;
const partType = (part as { type: string }).type;
- // Filter out thinking-steps and mentioned-documents
- return partType !== "thinking-steps" && partType !== "mentioned-documents";
+ // Filter out thinking-steps, mentioned-documents, and attachments
+ return (
+ partType !== "thinking-steps" &&
+ partType !== "mentioned-documents" &&
+ partType !== "attachments"
+ );
});
content =
filteredContent.length > 0
@@ -116,11 +163,31 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
content = [{ type: "text", text: String(msg.content) }];
}
+ // Restore attachments for user messages
+ let attachments: ThreadMessageLike["attachments"];
+ if (msg.role === "user") {
+ const persistedAttachments = extractPersistedAttachments(msg.content);
+ if (persistedAttachments.length > 0) {
+ attachments = persistedAttachments.map((att) => ({
+ id: att.id,
+ name: att.name,
+ type: att.type as "document" | "image" | "file",
+ contentType: att.contentType || "application/octet-stream",
+ status: { type: "complete" as const },
+ content: [],
+ // Custom fields for our ChatAttachment interface
+ imageDataUrl: att.imageDataUrl,
+ extractedContent: att.extractedContent,
+ }));
+ }
+ }
+
return {
id: `msg-${msg.id}`,
role: msg.role,
content,
createdAt: new Date(msg.created_at),
+ attachments,
};
}
@@ -132,6 +199,7 @@ const TOOLS_WITH_UI = new Set([
"link_preview",
"display_image",
"scrape_webpage",
+ "write_todos",
]);
/**
@@ -146,6 +214,7 @@ interface ThinkingStepData {
export default function NewChatPage() {
const params = useParams();
+ const queryClient = useQueryClient();
const [isInitializing, setIsInitializing] = useState(true);
const [threadId, setThreadId] = useState(null);
const [messages, setMessages] = useState([]);
@@ -163,6 +232,7 @@ export default function NewChatPage() {
const setMentionedDocumentIds = useSetAtom(mentionedDocumentIdsAtom);
const setMentionedDocuments = useSetAtom(mentionedDocumentsAtom);
const setMessageDocumentsMap = useSetAtom(messageDocumentsMapAtom);
+ const hydratePlanState = useSetAtom(hydratePlanStateAtom);
// Create the attachment adapter for file processing
const attachmentAdapter = useMemo(() => createAttachmentAdapter(), []);
@@ -198,6 +268,7 @@ export default function NewChatPage() {
setMentionedDocumentIds([]);
setMentionedDocuments([]);
setMessageDocumentsMap({});
+ clearPlanOwnerRegistry(); // Reset plan ownership for new chat
try {
if (urlChatId > 0) {
@@ -219,6 +290,11 @@ export default function NewChatPage() {
if (steps.length > 0) {
restoredThinkingSteps.set(`msg-${msg.id}`, steps);
}
+ // Hydrate write_todos plan state from persisted tool calls
+ const writeTodosCalls = extractWriteTodosFromContent(msg.content);
+ for (const todoData of writeTodosCalls) {
+ hydratePlanState(todoData);
+ }
}
if (msg.role === "user") {
const docs = extractMentionedDocuments(msg.content);
@@ -247,7 +323,13 @@ export default function NewChatPage() {
} finally {
setIsInitializing(false);
}
- }, [urlChatId, setMessageDocumentsMap, setMentionedDocumentIds, setMentionedDocuments]);
+ }, [
+ urlChatId,
+ setMessageDocumentsMap,
+ setMentionedDocumentIds,
+ setMentionedDocuments,
+ hydratePlanState,
+ ]);
// Initialize on mount
useEffect(() => {
@@ -306,6 +388,7 @@ export default function NewChatPage() {
// Lazy thread creation: create thread on first message if it doesn't exist
let currentThreadId = threadId;
+ let isNewThread = false;
if (!currentThreadId) {
try {
const newThread = await createThread(searchSpaceId, "New Chat");
@@ -315,6 +398,7 @@ export default function NewChatPage() {
// Track chat creation
trackChatCreated(searchSpaceId, currentThreadId);
+ isNewThread = true;
// Update URL silently using browser API (not router.replace) to avoid
// interrupting the ongoing fetch/streaming with React navigation
window.history.replaceState(
@@ -361,25 +445,50 @@ export default function NewChatPage() {
}));
}
- // Persist user message with mentioned documents (don't await, fire and forget)
- const persistContent =
- mentionedDocuments.length > 0
- ? [
- ...message.content,
- {
- type: "mentioned-documents",
- documents: mentionedDocuments.map((doc) => ({
- id: doc.id,
- title: doc.title,
- document_type: doc.document_type,
- })),
- },
- ]
- : message.content;
+ // Persist user message with mentioned documents and attachments (don't await, fire and forget)
+ const persistContent: unknown[] = [...message.content];
+
+ // Add mentioned documents for persistence
+ if (mentionedDocuments.length > 0) {
+ persistContent.push({
+ type: "mentioned-documents",
+ documents: mentionedDocuments.map((doc) => ({
+ id: doc.id,
+ title: doc.title,
+ document_type: doc.document_type,
+ })),
+ });
+ }
+
+ // Add attachments for persistence (so they survive page reload)
+ if (message.attachments && message.attachments.length > 0) {
+ persistContent.push({
+ type: "attachments",
+ items: message.attachments.map((att) => ({
+ id: att.id,
+ name: att.name,
+ type: att.type,
+ contentType: (att as { contentType?: string }).contentType,
+ // Include imageDataUrl for images so they can be displayed after reload
+ imageDataUrl: (att as { imageDataUrl?: string }).imageDataUrl,
+ // Include extractedContent for context (already extracted, no re-processing needed)
+ extractedContent: (att as { extractedContent?: string }).extractedContent,
+ })),
+ });
+ }
+
appendMessage(currentThreadId, {
role: "user",
content: persistContent,
- }).catch((err) => console.error("Failed to persist user message:", err));
+ })
+ .then(() => {
+ // For new threads, the backend updates the title from the first user message
+ // Invalidate threads query so sidebar shows the updated title in real-time
+ if (isNewThread) {
+ queryClient.invalidateQueries({ queryKey: ["threads", String(searchSpaceId)] });
+ }
+ })
+ .catch((err) => console.error("Failed to persist user message:", err));
// Start streaming response
setIsRunning(true);
@@ -676,7 +785,19 @@ export default function NewChatPage() {
}
} catch (error) {
if (error instanceof Error && error.name === "AbortError") {
- // Request was cancelled
+ // Request was cancelled by user - persist partial response if any content was received
+ const hasContent = contentParts.some(
+ (part) =>
+ (part.type === "text" && part.text.length > 0) ||
+ (part.type === "tool-call" && TOOLS_WITH_UI.has(part.toolName))
+ );
+ if (hasContent && currentThreadId) {
+ const partialContent = buildContentForPersistence();
+ appendMessage(currentThreadId, {
+ role: "assistant",
+ content: partialContent,
+ }).catch((err) => console.error("Failed to persist partial assistant message:", err));
+ }
return;
}
console.error("[NewChatPage] Chat error:", error);
@@ -720,6 +841,7 @@ export default function NewChatPage() {
setMentionedDocumentIds,
setMentionedDocuments,
setMessageDocumentsMap,
+ queryClient,
]
);
@@ -789,6 +911,7 @@ export default function NewChatPage() {
+
{
- router.push(`/dashboard/${search_space_id}/chats`);
- }, []);
+ router.push(`/dashboard/${search_space_id}/new-chat`);
+ }, [router, search_space_id]);
return <>>;
}
diff --git a/surfsense_web/atoms/chat/plan-state.atom.ts b/surfsense_web/atoms/chat/plan-state.atom.ts
new file mode 100644
index 000000000..2436dd300
--- /dev/null
+++ b/surfsense_web/atoms/chat/plan-state.atom.ts
@@ -0,0 +1,224 @@
+/**
+ * Plan State Atom
+ *
+ * Tracks the latest state of each plan by title.
+ * When write_todos is called multiple times with the same title,
+ * only the FIRST component renders (stays fixed in position),
+ * subsequent calls just update the shared state.
+ */
+
+import { atom } from "jotai";
+
+export interface PlanTodo {
+ id: string;
+ content: string;
+ status: "pending" | "in_progress" | "completed" | "cancelled";
+}
+
+export interface PlanState {
+ id: string;
+ title: string;
+ todos: PlanTodo[];
+ lastUpdated: number;
+ /** The toolCallId of the first component that rendered this plan */
+ ownerToolCallId: string;
+}
+
+/**
+ * SYNCHRONOUS ownership registry - prevents race conditions
+ * Only ONE plan allowed per conversation - first plan wins
+ */
+let firstPlanOwner: { toolCallId: string; title: string } | null = null;
+
+/**
+ * Register as owner of a plan SYNCHRONOUSLY
+ * ONE PLAN PER CONVERSATION: Only the first write_todos call renders.
+ * All subsequent calls update the state but don't render their own card.
+ */
+export function registerPlanOwner(title: string, toolCallId: string): boolean {
+ if (!firstPlanOwner) {
+ // First plan in this conversation - claim ownership
+ firstPlanOwner = { toolCallId, title };
+ return true;
+ }
+
+ // Check if we're the owner
+ return firstPlanOwner.toolCallId === toolCallId;
+}
+
+/**
+ * Get the canonical title for a plan
+ * Returns the first plan's title if one exists, otherwise the provided title
+ */
+export function getCanonicalPlanTitle(title: string): string {
+ return firstPlanOwner?.title || title;
+}
+
+/**
+ * Check if a plan already exists in this conversation
+ */
+export function hasPlan(): boolean {
+ return firstPlanOwner !== null;
+}
+
+/**
+ * Get the first plan's info
+ */
+export function getFirstPlanInfo(): { toolCallId: string; title: string } | null {
+ return firstPlanOwner;
+}
+
+/**
+ * Check if a toolCallId is the owner of the plan SYNCHRONOUSLY
+ */
+export function isPlanOwner(toolCallId: string): boolean {
+ return !firstPlanOwner || firstPlanOwner.toolCallId === toolCallId;
+}
+
+/**
+ * Clear ownership registry (call when starting a new chat)
+ */
+export function clearPlanOwnerRegistry(): void {
+ firstPlanOwner = null;
+}
+
+/**
+ * Map of plan title -> latest plan state
+ * Using title as key since it stays constant across updates
+ */
+export const planStatesAtom = atom