diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
index 1c97a05ad..aa1950bff 100644
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@@ -14,8 +14,11 @@ from langgraph.types import Checkpointer
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.context import SurfSenseContextSchema
+from app.agents.new_chat.display_image import create_display_image_tool
from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool
+from app.agents.new_chat.link_preview import create_link_preview_tool
from app.agents.new_chat.podcast import create_generate_podcast_tool
+from app.agents.new_chat.scrape_webpage import create_scrape_webpage_tool
from app.agents.new_chat.system_prompt import build_surfsense_system_prompt
from app.services.connector_service import ConnectorService
@@ -34,6 +37,10 @@ def create_surfsense_deep_agent(
user_instructions: str | None = None,
enable_citations: bool = True,
enable_podcast: bool = True,
+ enable_link_preview: bool = True,
+ enable_display_image: bool = True,
+ enable_scrape_webpage: bool = True,
+ firecrawl_api_key: str | None = None,
additional_tools: Sequence[BaseTool] | None = None,
):
"""
@@ -53,6 +60,14 @@ def create_surfsense_deep_agent(
When False, the agent will not be instructed to add citations to responses.
enable_podcast: Whether to include the podcast generation tool (default: True).
When True and user_id is provided, the agent can generate podcasts.
+ enable_link_preview: Whether to include the link preview tool (default: True).
+ When True, the agent can fetch and display rich link previews.
+ enable_display_image: Whether to include the display image tool (default: True).
+ When True, the agent can display images with metadata.
+ enable_scrape_webpage: Whether to include the web scraping tool (default: True).
+ When True, the agent can scrape and read webpage content.
+ firecrawl_api_key: Optional Firecrawl API key for premium web scraping.
+ Falls back to Chromium/Trafilatura if not provided.
additional_tools: Optional sequence of additional tools to inject into the agent.
The search_knowledge_base tool will always be included.
@@ -78,6 +93,21 @@ def create_surfsense_deep_agent(
)
tools.append(podcast_tool)
+ # Add link preview tool if enabled
+ if enable_link_preview:
+ link_preview_tool = create_link_preview_tool()
+ tools.append(link_preview_tool)
+
+ # Add display image tool if enabled
+ if enable_display_image:
+ display_image_tool = create_display_image_tool()
+ tools.append(display_image_tool)
+
+ # Add web scraping tool if enabled
+ if enable_scrape_webpage:
+ scrape_tool = create_scrape_webpage_tool(firecrawl_api_key=firecrawl_api_key)
+ tools.append(scrape_tool)
+
if additional_tools:
tools.extend(additional_tools)
diff --git a/surfsense_backend/app/agents/new_chat/display_image.py b/surfsense_backend/app/agents/new_chat/display_image.py
new file mode 100644
index 000000000..0cd05b523
--- /dev/null
+++ b/surfsense_backend/app/agents/new_chat/display_image.py
@@ -0,0 +1,104 @@
+"""
+Display image tool for the new chat agent.
+
+This module provides a tool for displaying images in the chat UI
+with metadata like title, description, and source attribution.
+"""
+
+import hashlib
+from typing import Any
+from urllib.parse import urlparse
+
+from langchain_core.tools import tool
+
+
+def extract_domain(url: str) -> str:
+ """Extract the domain from a URL."""
+ try:
+ parsed = urlparse(url)
+ domain = parsed.netloc
+ # Remove 'www.' prefix if present
+ if domain.startswith("www."):
+ domain = domain[4:]
+ return domain
+ except Exception:
+ return ""
+
+
+def generate_image_id(src: str) -> str:
+ """Generate a unique ID for an image."""
+ hash_val = hashlib.md5(src.encode()).hexdigest()[:12]
+ return f"image-{hash_val}"
+
+
+def create_display_image_tool():
+ """
+ Factory function to create the display_image tool.
+
+ Returns:
+ A configured tool function for displaying images.
+ """
+
+ @tool
+ async def display_image(
+ src: str,
+ alt: str = "Image",
+ title: str | None = None,
+ description: str | None = None,
+ ) -> dict[str, Any]:
+ """
+ Display an image in the chat with metadata.
+
+ Use this tool when you want to show an image to the user.
+ This displays the image with an optional title, description,
+ and source attribution.
+
+ Common use cases:
+ - Showing an image from a URL the user mentioned
+ - Displaying a diagram or chart you're referencing
+ - Showing example images when explaining concepts
+
+ Args:
+ src: The URL of the image to display (must be a valid HTTP/HTTPS URL)
+ alt: Alternative text describing the image (for accessibility)
+ title: Optional title to display below the image
+ description: Optional description providing context about the image
+
+ Returns:
+ A dictionary containing image metadata for the UI to render:
+ - id: Unique identifier for this image
+ - assetId: The image URL (for deduplication)
+ - src: The image URL
+ - alt: Alt text for accessibility
+ - title: Image title (if provided)
+ - description: Image description (if provided)
+ - domain: Source domain
+ """
+ image_id = generate_image_id(src)
+
+ # Ensure URL has protocol
+ if not src.startswith(("http://", "https://")):
+ src = f"https://{src}"
+
+ domain = extract_domain(src)
+
+ # Determine aspect ratio based on common image sources
+ ratio = "16:9" # Default
+ if "unsplash.com" in src or "pexels.com" in src:
+ ratio = "16:9"
+ elif "imgur.com" in src or "github.com" in src or "githubusercontent.com" in src:
+ ratio = "auto"
+
+ return {
+ "id": image_id,
+ "assetId": src,
+ "src": src,
+ "alt": alt,
+ "title": title,
+ "description": description,
+ "domain": domain,
+ "ratio": ratio,
+ }
+
+ return display_image
+
diff --git a/surfsense_backend/app/agents/new_chat/link_preview.py b/surfsense_backend/app/agents/new_chat/link_preview.py
new file mode 100644
index 000000000..388a6c14e
--- /dev/null
+++ b/surfsense_backend/app/agents/new_chat/link_preview.py
@@ -0,0 +1,292 @@
+"""
+Link preview tool for the new chat agent.
+
+This module provides a tool for fetching URL metadata (title, description,
+Open Graph image, etc.) to display rich link previews in the chat UI.
+"""
+
+import hashlib
+import re
+from typing import Any
+from urllib.parse import urlparse
+
+import httpx
+from langchain_core.tools import tool
+
+
+def extract_domain(url: str) -> str:
+ """Extract the domain from a URL."""
+ try:
+ parsed = urlparse(url)
+ domain = parsed.netloc
+ # Remove 'www.' prefix if present
+ if domain.startswith("www."):
+ domain = domain[4:]
+ return domain
+ except Exception:
+ return ""
+
+
+def extract_og_content(html: str, property_name: str) -> str | None:
+ """Extract Open Graph meta content from HTML."""
+ # Try og:property first
+ pattern = rf']+property=["\']og:{property_name}["\'][^>]+content=["\']([^"\']+)["\']'
+ match = re.search(pattern, html, re.IGNORECASE)
+ if match:
+ return match.group(1)
+
+ # Try content before property
+ pattern = rf']+content=["\']([^"\']+)["\'][^>]+property=["\']og:{property_name}["\']'
+ match = re.search(pattern, html, re.IGNORECASE)
+ if match:
+ return match.group(1)
+
+ return None
+
+
+def extract_twitter_content(html: str, name: str) -> str | None:
+ """Extract Twitter Card meta content from HTML."""
+ pattern = rf']+name=["\']twitter:{name}["\'][^>]+content=["\']([^"\']+)["\']'
+ match = re.search(pattern, html, re.IGNORECASE)
+ if match:
+ return match.group(1)
+
+ # Try content before name
+ pattern = rf']+content=["\']([^"\']+)["\'][^>]+name=["\']twitter:{name}["\']'
+ match = re.search(pattern, html, re.IGNORECASE)
+ if match:
+ return match.group(1)
+
+ return None
+
+
+def extract_meta_description(html: str) -> str | None:
+ """Extract meta description from HTML."""
+ pattern = r']+name=["\']description["\'][^>]+content=["\']([^"\']+)["\']'
+ match = re.search(pattern, html, re.IGNORECASE)
+ if match:
+ return match.group(1)
+
+ # Try content before name
+ pattern = r']+content=["\']([^"\']+)["\'][^>]+name=["\']description["\']'
+ match = re.search(pattern, html, re.IGNORECASE)
+ if match:
+ return match.group(1)
+
+ return None
+
+
+def extract_title(html: str) -> str | None:
+ """Extract title from HTML."""
+ # Try og:title first
+ og_title = extract_og_content(html, "title")
+ if og_title:
+ return og_title
+
+ # Try twitter:title
+ twitter_title = extract_twitter_content(html, "title")
+ if twitter_title:
+ return twitter_title
+
+ # Fall back to
tag
+ pattern = r"]*>([^<]+)"
+ match = re.search(pattern, html, re.IGNORECASE)
+ if match:
+ return match.group(1).strip()
+
+ return None
+
+
+def extract_description(html: str) -> str | None:
+ """Extract description from HTML."""
+ # Try og:description first
+ og_desc = extract_og_content(html, "description")
+ if og_desc:
+ return og_desc
+
+ # Try twitter:description
+ twitter_desc = extract_twitter_content(html, "description")
+ if twitter_desc:
+ return twitter_desc
+
+ # Fall back to meta description
+ return extract_meta_description(html)
+
+
+def extract_image(html: str) -> str | None:
+ """Extract image URL from HTML."""
+ # Try og:image first
+ og_image = extract_og_content(html, "image")
+ if og_image:
+ return og_image
+
+ # Try twitter:image
+ twitter_image = extract_twitter_content(html, "image")
+ if twitter_image:
+ return twitter_image
+
+ return None
+
+
+def generate_preview_id(url: str) -> str:
+ """Generate a unique ID for a link preview."""
+ hash_val = hashlib.md5(url.encode()).hexdigest()[:12]
+ return f"link-preview-{hash_val}"
+
+
+def create_link_preview_tool():
+ """
+ Factory function to create the link_preview tool.
+
+ Returns:
+ A configured tool function for fetching link previews.
+ """
+
+ @tool
+ async def link_preview(url: str) -> dict[str, Any]:
+ """
+ Fetch metadata for a URL to display a rich link preview.
+
+ Use this tool when the user shares a URL or asks about a specific webpage.
+ This tool fetches the page's Open Graph metadata (title, description, image)
+ to display a nice preview card in the chat.
+
+ Common triggers include:
+ - User shares a URL in the chat
+ - User asks "What's this link about?" or similar
+ - User says "Show me a preview of this page"
+ - User wants to preview an article or webpage
+
+ Args:
+ url: The URL to fetch metadata for. Must be a valid HTTP/HTTPS URL.
+
+ Returns:
+ A dictionary containing:
+ - id: Unique identifier for this preview
+ - assetId: The URL itself (for deduplication)
+ - kind: "link" (type of media card)
+ - href: The URL to open when clicked
+ - title: Page title
+ - description: Page description (if available)
+ - thumb: Thumbnail/preview image URL (if available)
+ - domain: The domain name
+ - error: Error message (if fetch failed)
+ """
+ preview_id = generate_preview_id(url)
+ domain = extract_domain(url)
+
+ # Validate URL
+ if not url.startswith(("http://", "https://")):
+ url = f"https://{url}"
+
+ try:
+ async with httpx.AsyncClient(
+ timeout=10.0,
+ follow_redirects=True,
+ headers={
+ "User-Agent": "Mozilla/5.0 (compatible; SurfSenseBot/1.0; +https://surfsense.net)",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ "Accept-Language": "en-US,en;q=0.5",
+ },
+ ) as client:
+ response = await client.get(url)
+ response.raise_for_status()
+
+ # Get content type to ensure it's HTML
+ content_type = response.headers.get("content-type", "")
+ if "text/html" not in content_type.lower():
+ # Not an HTML page, return basic info
+ return {
+ "id": preview_id,
+ "assetId": url,
+ "kind": "link",
+ "href": url,
+ "title": url.split("/")[-1] or domain,
+ "description": f"File from {domain}",
+ "domain": domain,
+ }
+
+ html = response.text
+
+ # Extract metadata
+ title = extract_title(html) or domain
+ description = extract_description(html)
+ image = extract_image(html)
+
+ # Make sure image URL is absolute
+ if image and not image.startswith(("http://", "https://")):
+ if image.startswith("//"):
+ image = f"https:{image}"
+ elif image.startswith("/"):
+ parsed = urlparse(url)
+ image = f"{parsed.scheme}://{parsed.netloc}{image}"
+
+ # Clean up title and description (unescape HTML entities)
+ if title:
+ title = (
+ title.replace("&", "&")
+ .replace("<", "<")
+ .replace(">", ">")
+ .replace(""", '"')
+ .replace("'", "'")
+ .replace("'", "'")
+ )
+ if description:
+ description = (
+ description.replace("&", "&")
+ .replace("<", "<")
+ .replace(">", ">")
+ .replace(""", '"')
+ .replace("'", "'")
+ .replace("'", "'")
+ )
+ # Truncate long descriptions
+ if len(description) > 200:
+ description = description[:197] + "..."
+
+ return {
+ "id": preview_id,
+ "assetId": url,
+ "kind": "link",
+ "href": url,
+ "title": title,
+ "description": description,
+ "thumb": image,
+ "domain": domain,
+ }
+
+ except httpx.TimeoutException:
+ return {
+ "id": preview_id,
+ "assetId": url,
+ "kind": "link",
+ "href": url,
+ "title": domain or "Link",
+ "domain": domain,
+ "error": "Request timed out",
+ }
+ except httpx.HTTPStatusError as e:
+ return {
+ "id": preview_id,
+ "assetId": url,
+ "kind": "link",
+ "href": url,
+ "title": domain or "Link",
+ "domain": domain,
+ "error": f"HTTP {e.response.status_code}",
+ }
+ except Exception as e:
+ error_message = str(e)
+ print(f"[link_preview] Error fetching {url}: {error_message}")
+ return {
+ "id": preview_id,
+ "assetId": url,
+ "kind": "link",
+ "href": url,
+ "title": domain or "Link",
+ "domain": domain,
+ "error": f"Failed to fetch: {error_message[:50]}",
+ }
+
+ return link_preview
+
diff --git a/surfsense_backend/app/agents/new_chat/scrape_webpage.py b/surfsense_backend/app/agents/new_chat/scrape_webpage.py
new file mode 100644
index 000000000..40a9c917f
--- /dev/null
+++ b/surfsense_backend/app/agents/new_chat/scrape_webpage.py
@@ -0,0 +1,197 @@
+"""
+Web scraping tool for the new chat agent.
+
+This module provides a tool for scraping and extracting content from webpages
+using the existing WebCrawlerConnector. The scraped content can be used by
+the agent to answer questions about web pages.
+"""
+
+import hashlib
+from typing import Any
+from urllib.parse import urlparse
+
+from langchain_core.tools import tool
+
+from app.connectors.webcrawler_connector import WebCrawlerConnector
+
+
+def extract_domain(url: str) -> str:
+ """Extract the domain from a URL."""
+ try:
+ parsed = urlparse(url)
+ domain = parsed.netloc
+ # Remove 'www.' prefix if present
+ if domain.startswith("www."):
+ domain = domain[4:]
+ return domain
+ except Exception:
+ return ""
+
+
+def generate_scrape_id(url: str) -> str:
+ """Generate a unique ID for a scraped webpage."""
+ hash_val = hashlib.md5(url.encode()).hexdigest()[:12]
+ return f"scrape-{hash_val}"
+
+
+def truncate_content(content: str, max_length: int = 50000) -> tuple[str, bool]:
+ """
+ Truncate content to a maximum length.
+
+ Returns:
+ Tuple of (truncated_content, was_truncated)
+ """
+ if len(content) <= max_length:
+ return content, False
+
+ # Try to truncate at a sentence boundary
+ truncated = content[:max_length]
+ last_period = truncated.rfind(".")
+ last_newline = truncated.rfind("\n\n")
+
+ # Use the later of the two boundaries, or just truncate
+ boundary = max(last_period, last_newline)
+ if boundary > max_length * 0.8: # Only use boundary if it's not too far back
+ truncated = content[: boundary + 1]
+
+ return truncated + "\n\n[Content truncated...]", True
+
+
+def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
+ """
+ Factory function to create the scrape_webpage tool.
+
+ Args:
+ firecrawl_api_key: Optional Firecrawl API key for premium web scraping.
+ Falls back to Chromium/Trafilatura if not provided.
+
+ Returns:
+ A configured tool function for scraping webpages.
+ """
+
+ @tool
+ async def scrape_webpage(
+ url: str,
+ max_length: int = 50000,
+ ) -> dict[str, Any]:
+ """
+ Scrape and extract the main content from a webpage.
+
+ Use this tool when the user wants you to read, summarize, or answer
+ questions about a specific webpage's content. This tool actually
+ fetches and reads the full page content.
+
+ Common triggers:
+ - "Read this article and summarize it"
+ - "What does this page say about X?"
+ - "Summarize this blog post for me"
+ - "Tell me the key points from this article"
+ - "What's in this webpage?"
+
+ Args:
+ url: The URL of the webpage to scrape (must be HTTP/HTTPS)
+ max_length: Maximum content length to return (default: 50000 chars)
+
+ Returns:
+ A dictionary containing:
+ - id: Unique identifier for this scrape
+ - assetId: The URL (for deduplication)
+ - kind: "article" (type of content)
+ - href: The URL to open when clicked
+ - title: Page title
+ - description: Brief description or excerpt
+ - content: The extracted main content (markdown format)
+ - domain: The domain name
+ - word_count: Approximate word count
+ - was_truncated: Whether content was truncated
+ - error: Error message (if scraping failed)
+ """
+ scrape_id = generate_scrape_id(url)
+ domain = extract_domain(url)
+
+ # Validate and normalize URL
+ if not url.startswith(("http://", "https://")):
+ url = f"https://{url}"
+
+ try:
+ # Create webcrawler connector
+ connector = WebCrawlerConnector(firecrawl_api_key=firecrawl_api_key)
+
+ # Crawl the URL
+ result, error = await connector.crawl_url(url, formats=["markdown"])
+
+ if error:
+ return {
+ "id": scrape_id,
+ "assetId": url,
+ "kind": "article",
+ "href": url,
+ "title": domain or "Webpage",
+ "domain": domain,
+ "error": error,
+ }
+
+ if not result:
+ return {
+ "id": scrape_id,
+ "assetId": url,
+ "kind": "article",
+ "href": url,
+ "title": domain or "Webpage",
+ "domain": domain,
+ "error": "No content returned from crawler",
+ }
+
+ # Extract content and metadata
+ content = result.get("content", "")
+ metadata = result.get("metadata", {})
+
+ # Get title from metadata
+ title = metadata.get("title", "")
+ if not title:
+ title = domain or url.split("/")[-1] or "Webpage"
+
+ # Get description from metadata
+ description = metadata.get("description", "")
+ if not description and content:
+ # Use first paragraph as description
+ first_para = content.split("\n\n")[0] if content else ""
+ description = first_para[:300] + "..." if len(first_para) > 300 else first_para
+
+ # Truncate content if needed
+ content, was_truncated = truncate_content(content, max_length)
+
+ # Calculate word count
+ word_count = len(content.split())
+
+ return {
+ "id": scrape_id,
+ "assetId": url,
+ "kind": "article",
+ "href": url,
+ "title": title,
+ "description": description,
+ "content": content,
+ "domain": domain,
+ "word_count": word_count,
+ "was_truncated": was_truncated,
+ "crawler_type": result.get("crawler_type", "unknown"),
+ "author": metadata.get("author"),
+ "date": metadata.get("date"),
+ }
+
+ except Exception as e:
+ error_message = str(e)
+ print(f"[scrape_webpage] Error scraping {url}: {error_message}")
+ return {
+ "id": scrape_id,
+ "assetId": url,
+ "kind": "article",
+ "href": url,
+ "title": domain or "Webpage",
+ "domain": domain,
+ "error": f"Failed to scrape: {error_message[:100]}",
+ }
+
+ return scrape_webpage
+
diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py
index f725be684..2677b21fd 100644
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@@ -145,6 +145,57 @@ You have access to the following tools:
- Returns: A task_id for tracking. The podcast will be generated in the background.
- IMPORTANT: Only one podcast can be generated at a time. If a podcast is already being generated, the tool will return status "already_generating".
- After calling this tool, inform the user that podcast generation has started and they will see the player when it's ready (takes 3-5 minutes).
+
+3. link_preview: Fetch metadata for a URL to display a rich preview card.
+ - IMPORTANT: Use this tool WHENEVER the user shares or mentions a URL/link in their message.
+ - This fetches the page's Open Graph metadata (title, description, thumbnail) to show a preview card.
+ - NOTE: This tool only fetches metadata, NOT the full page content. It cannot read the article text.
+ - Trigger scenarios:
+ * User shares a URL (e.g., "Check out https://example.com")
+ * User pastes a link in their message
+ * User asks about a URL or link
+ - Args:
+ - url: The URL to fetch metadata for (must be a valid HTTP/HTTPS URL)
+ - Returns: A rich preview card with title, description, thumbnail, and domain
+ - The preview card will automatically be displayed in the chat.
+
+4. display_image: Display an image in the chat with metadata.
+ - Use this tool when you want to show an image to the user.
+ - This displays the image with an optional title, description, and source attribution.
+ - Common use cases:
+ * Showing an image from a URL mentioned in the conversation
+ * Displaying a diagram, chart, or illustration you're referencing
+ * Showing visual examples when explaining concepts
+ - Args:
+ - src: The URL of the image to display (must be a valid HTTP/HTTPS image URL)
+ - alt: Alternative text describing the image (for accessibility)
+ - title: Optional title to display below the image
+ - description: Optional description providing context about the image
+ - Returns: An image card with the image, title, and description
+ - The image will automatically be displayed in the chat.
+
+5. scrape_webpage: Scrape and extract the main content from a webpage.
+ - Use this when the user wants you to READ and UNDERSTAND the actual content of a webpage.
+ - IMPORTANT: This is different from link_preview:
+ * link_preview: Only fetches metadata (title, description, thumbnail) for display
+ * scrape_webpage: Actually reads the FULL page content so you can analyze/summarize it
+ - Trigger scenarios:
+ * "Read this article and summarize it"
+ * "What does this page say about X?"
+ * "Summarize this blog post for me"
+ * "Tell me the key points from this article"
+ * "What's in this webpage?"
+ * "Can you analyze this article?"
+ - Args:
+ - url: The URL of the webpage to scrape (must be HTTP/HTTPS)
+ - max_length: Maximum content length to return (default: 50000 chars)
+ - Returns: The page title, description, full content (in markdown), word count, and metadata
+ - After scraping, you will have the full article text and can analyze, summarize, or answer questions about it.
+ - IMAGES: The scraped content may contain image URLs in markdown format like ``.
+ * When you find relevant/important images in the scraped content, use the `display_image` tool to show them to the user.
+ * This makes your response more visual and engaging.
+ * Prioritize showing: diagrams, charts, infographics, key illustrations, or images that help explain the content.
+ * Don't show every image - just the most relevant 1-3 images that enhance understanding.
- User: "Fetch all my notes and what's in them?"
@@ -162,6 +213,39 @@ You have access to the following tools:
- User: "Make a podcast about quantum computing"
- First search: `search_knowledge_base(query="quantum computing")`
- Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\n\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")`
+
+- User: "Check out https://dev.to/some-article"
+ - Call: `link_preview(url="https://dev.to/some-article")`
+
+- User: "What's this blog post about? https://example.com/blog/post"
+ - Call: `link_preview(url="https://example.com/blog/post")`
+
+- User: "https://github.com/some/repo"
+ - Call: `link_preview(url="https://github.com/some/repo")`
+
+- User: "Show me this image: https://example.com/image.png"
+ - Call: `display_image(src="https://example.com/image.png", alt="User shared image")`
+
+- User: "Can you display a diagram of a neural network?"
+ - Call: `display_image(src="https://example.com/neural-network.png", alt="Neural network diagram", title="Neural Network Architecture", description="A visual representation of a neural network with input, hidden, and output layers")`
+
+- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
+ - Call: `scrape_webpage(url="https://example.com/blog/ai-trends")`
+ - After getting the content, provide a summary based on the scraped text
+
+- User: "What does this page say about machine learning? https://docs.example.com/ml-guide"
+ - Call: `scrape_webpage(url="https://docs.example.com/ml-guide")`
+ - Then answer the question using the extracted content
+
+- User: "Summarize this blog post: https://medium.com/some-article"
+ - Call: `scrape_webpage(url="https://medium.com/some-article")`
+ - Provide a comprehensive summary of the article content
+
+- User: "Read this tutorial and explain it: https://example.com/ml-tutorial"
+ - First: `scrape_webpage(url="https://example.com/ml-tutorial")`
+ - Then, if the content contains useful diagrams/images like ``:
+ - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")`
+ - Then provide your explanation, referencing the displayed image
{citation_section}
"""
diff --git a/surfsense_backend/app/services/new_streaming_service.py b/surfsense_backend/app/services/new_streaming_service.py
index f0f05cdb6..05dd2d4dd 100644
--- a/surfsense_backend/app/services/new_streaming_service.py
+++ b/surfsense_backend/app/services/new_streaming_service.py
@@ -450,6 +450,35 @@ class VercelStreamingService:
"""
return self.format_data("further-questions", {"questions": questions})
+ def format_thinking_step(
+ self,
+ step_id: str,
+ title: str,
+ status: str = "in_progress",
+ items: list[str] | None = None,
+ ) -> str:
+ """
+ Format a thinking step for chain-of-thought display (SurfSense specific).
+
+ Args:
+ step_id: Unique identifier for the step
+ title: The step title (e.g., "Analyzing your request")
+ status: Step status - "pending", "in_progress", or "completed"
+ items: Optional list of sub-items/details for this step
+
+ Returns:
+ str: SSE formatted thinking step data part
+ """
+ return self.format_data(
+ "thinking-step",
+ {
+ "id": step_id,
+ "title": title,
+ "status": status,
+ "items": items or [],
+ },
+ )
+
# =========================================================================
# Error Part
# =========================================================================
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index 9711445aa..a201ece22 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -154,6 +154,49 @@ async def stream_new_chat(
# Reset text tracking for this stream
accumulated_text = ""
+ # Track thinking steps for chain-of-thought display
+ thinking_step_counter = 0
+ # Map run_id -> step_id for tool calls so we can update them on completion
+ tool_step_ids: dict[str, str] = {}
+ # Track the last active step so we can mark it complete at the end
+ last_active_step_id: str | None = None
+ last_active_step_title: str = ""
+ last_active_step_items: list[str] = []
+ # Track which steps have been completed to avoid duplicate completions
+ completed_step_ids: set[str] = set()
+ # Track if we just finished a tool (text flows silently after tools)
+ just_finished_tool: bool = False
+
+ def next_thinking_step_id() -> str:
+ nonlocal thinking_step_counter
+ thinking_step_counter += 1
+ return f"thinking-{thinking_step_counter}"
+
+ def complete_current_step() -> str | None:
+ """Complete the current active step and return the completion event, if any."""
+ nonlocal last_active_step_id, last_active_step_title, last_active_step_items
+ if last_active_step_id and last_active_step_id not in completed_step_ids:
+ completed_step_ids.add(last_active_step_id)
+ return streaming_service.format_thinking_step(
+ step_id=last_active_step_id,
+ title=last_active_step_title,
+ status="completed",
+ items=last_active_step_items if last_active_step_items else None,
+ )
+ return None
+
+ # Initial thinking step - analyzing the request
+ analyze_step_id = next_thinking_step_id()
+ last_active_step_id = analyze_step_id
+ last_active_step_title = "Understanding your request"
+ last_active_step_items = [f"Processing: {user_query[:80]}{'...' if len(user_query) > 80 else ''}"]
+ yield streaming_service.format_thinking_step(
+ step_id=analyze_step_id,
+ title="Understanding your request",
+ status="in_progress",
+ items=last_active_step_items,
+ )
+
# Stream the agent response with thread config for memory
async for event in agent.astream_events(
input_state, config=config, version="v2"
@@ -168,6 +211,18 @@ async def stream_new_chat(
if content and isinstance(content, str):
# Start a new text block if needed
if current_text_id is None:
+ # Complete any previous step
+ completion_event = complete_current_step()
+ if completion_event:
+ yield completion_event
+
+ if just_finished_tool:
+ # Clear the active step tracking - text flows without a dedicated step
+ last_active_step_id = None
+ last_active_step_title = ""
+ last_active_step_items = []
+ just_finished_tool = False
+
current_text_id = streaming_service.generate_text_id()
yield streaming_service.format_text_start(current_text_id)
@@ -188,6 +243,116 @@ async def stream_new_chat(
yield streaming_service.format_text_end(current_text_id)
current_text_id = None
+ # Complete any previous step EXCEPT "Synthesizing response"
+ # (we want to reuse the Synthesizing step after tools complete)
+ if last_active_step_title != "Synthesizing response":
+ completion_event = complete_current_step()
+ if completion_event:
+ yield completion_event
+
+ # Reset the just_finished_tool flag since we're starting a new tool
+ just_finished_tool = False
+
+ # Create thinking step for the tool call and store it for later update
+ tool_step_id = next_thinking_step_id()
+ tool_step_ids[run_id] = tool_step_id
+ last_active_step_id = tool_step_id
+ if tool_name == "search_knowledge_base":
+ query = (
+ tool_input.get("query", "")
+ if isinstance(tool_input, dict)
+ else str(tool_input)
+ )
+ last_active_step_title = "Searching knowledge base"
+ last_active_step_items = [f"Query: {query[:100]}{'...' if len(query) > 100 else ''}"]
+ yield streaming_service.format_thinking_step(
+ step_id=tool_step_id,
+ title="Searching knowledge base",
+ status="in_progress",
+ items=last_active_step_items,
+ )
+ elif tool_name == "link_preview":
+ url = (
+ tool_input.get("url", "")
+ if isinstance(tool_input, dict)
+ else str(tool_input)
+ )
+ last_active_step_title = "Fetching link preview"
+ last_active_step_items = [f"URL: {url[:80]}{'...' if len(url) > 80 else ''}"]
+ yield streaming_service.format_thinking_step(
+ step_id=tool_step_id,
+ title="Fetching link preview",
+ status="in_progress",
+ items=last_active_step_items,
+ )
+ elif tool_name == "display_image":
+ src = (
+ tool_input.get("src", "")
+ if isinstance(tool_input, dict)
+ else str(tool_input)
+ )
+ title = (
+ tool_input.get("title", "")
+ if isinstance(tool_input, dict)
+ else ""
+ )
+ last_active_step_title = "Displaying image"
+ last_active_step_items = [
+ f"Image: {title[:50] if title else src[:50]}{'...' if len(title or src) > 50 else ''}"
+ ]
+ yield streaming_service.format_thinking_step(
+ step_id=tool_step_id,
+ title="Displaying image",
+ status="in_progress",
+ items=last_active_step_items,
+ )
+ elif tool_name == "scrape_webpage":
+ url = (
+ tool_input.get("url", "")
+ if isinstance(tool_input, dict)
+ else str(tool_input)
+ )
+ last_active_step_title = "Scraping webpage"
+ last_active_step_items = [f"URL: {url[:80]}{'...' if len(url) > 80 else ''}"]
+ yield streaming_service.format_thinking_step(
+ step_id=tool_step_id,
+ title="Scraping webpage",
+ status="in_progress",
+ items=last_active_step_items,
+ )
+ elif tool_name == "generate_podcast":
+ podcast_title = (
+ tool_input.get("podcast_title", "SurfSense Podcast")
+ if isinstance(tool_input, dict)
+ else "SurfSense Podcast"
+ )
+ # Get content length for context
+ content_len = len(
+ tool_input.get("source_content", "")
+ if isinstance(tool_input, dict)
+ else ""
+ )
+ last_active_step_title = "Generating podcast"
+ last_active_step_items = [
+ f"Title: {podcast_title}",
+ f"Content: {content_len:,} characters",
+ "Preparing audio generation...",
+ ]
+ yield streaming_service.format_thinking_step(
+ step_id=tool_step_id,
+ title="Generating podcast",
+ status="in_progress",
+ items=last_active_step_items,
+ )
+ else:
+ last_active_step_title = f"Using {tool_name.replace('_', ' ')}"
+ last_active_step_items = []
+ yield streaming_service.format_thinking_step(
+ step_id=tool_step_id,
+ title=last_active_step_title,
+ status="in_progress",
+ )
+
# Stream tool info
tool_call_id = (
f"call_{run_id[:32]}"
@@ -214,6 +379,36 @@ async def stream_new_chat(
f"Searching knowledge base: {query[:100]}{'...' if len(query) > 100 else ''}",
"info",
)
+ elif tool_name == "link_preview":
+ url = (
+ tool_input.get("url", "")
+ if isinstance(tool_input, dict)
+ else str(tool_input)
+ )
+ yield streaming_service.format_terminal_info(
+ f"Fetching link preview: {url[:80]}{'...' if len(url) > 80 else ''}",
+ "info",
+ )
+ elif tool_name == "display_image":
+ src = (
+ tool_input.get("src", "")
+ if isinstance(tool_input, dict)
+ else str(tool_input)
+ )
+ yield streaming_service.format_terminal_info(
+ f"Displaying image: {src[:60]}{'...' if len(src) > 60 else ''}",
+ "info",
+ )
+ elif tool_name == "scrape_webpage":
+ url = (
+ tool_input.get("url", "")
+ if isinstance(tool_input, dict)
+ else str(tool_input)
+ )
+ yield streaming_service.format_terminal_info(
+ f"Scraping webpage: {url[:70]}{'...' if len(url) > 70 else ''}",
+ "info",
+ )
elif tool_name == "generate_podcast":
title = (
tool_input.get("podcast_title", "SurfSense Podcast")
@@ -254,6 +449,155 @@ async def stream_new_chat(
tool_call_id = f"call_{run_id[:32]}" if run_id else "call_unknown"
+ # Get the original tool step ID to update it (not create a new one)
+ original_step_id = tool_step_ids.get(run_id, f"thinking-unknown-{run_id[:8]}")
+
+ # Mark the tool thinking step as completed using the SAME step ID
+ # Also add to completed set so we don't try to complete it again
+ completed_step_ids.add(original_step_id)
+ if tool_name == "search_knowledge_base":
+ # Get result count if available
+ result_info = "Search completed"
+ if isinstance(tool_output, dict):
+ result_len = tool_output.get("result_length", 0)
+ if result_len > 0:
+ result_info = f"Found relevant information ({result_len} chars)"
+ # Include original query in completed items
+ completed_items = [*last_active_step_items, result_info]
+ yield streaming_service.format_thinking_step(
+ step_id=original_step_id,
+ title="Searching knowledge base",
+ status="completed",
+ items=completed_items,
+ )
+ elif tool_name == "link_preview":
+ # Build completion items based on link preview result
+ if isinstance(tool_output, dict):
+ title = tool_output.get("title", "Link")
+ domain = tool_output.get("domain", "")
+ has_error = "error" in tool_output
+ if has_error:
+ completed_items = [
+ *last_active_step_items,
+ f"Error: {tool_output.get('error', 'Failed to fetch')}",
+ ]
+ else:
+ completed_items = [
+ *last_active_step_items,
+ f"Title: {title[:60]}{'...' if len(title) > 60 else ''}",
+ f"Domain: {domain}" if domain else "Preview loaded",
+ ]
+ else:
+ completed_items = [*last_active_step_items, "Preview loaded"]
+ yield streaming_service.format_thinking_step(
+ step_id=original_step_id,
+ title="Fetching link preview",
+ status="completed",
+ items=completed_items,
+ )
+ elif tool_name == "display_image":
+ # Build completion items for image display
+ if isinstance(tool_output, dict):
+ title = tool_output.get("title", "")
+ alt = tool_output.get("alt", "Image")
+ display_name = title or alt
+ completed_items = [
+ *last_active_step_items,
+ f"Showing: {display_name[:50]}{'...' if len(display_name) > 50 else ''}",
+ ]
+ else:
+ completed_items = [*last_active_step_items, "Image displayed"]
+ yield streaming_service.format_thinking_step(
+ step_id=original_step_id,
+ title="Displaying image",
+ status="completed",
+ items=completed_items,
+ )
+ elif tool_name == "scrape_webpage":
+ # Build completion items for webpage scraping
+ if isinstance(tool_output, dict):
+ title = tool_output.get("title", "Webpage")
+ word_count = tool_output.get("word_count", 0)
+ has_error = "error" in tool_output
+ if has_error:
+ completed_items = [
+ *last_active_step_items,
+ f"Error: {tool_output.get('error', 'Failed to scrape')[:50]}",
+ ]
+ else:
+ completed_items = [
+ *last_active_step_items,
+ f"Title: {title[:50]}{'...' if len(title) > 50 else ''}",
+ f"Extracted: {word_count:,} words",
+ ]
+ else:
+ completed_items = [*last_active_step_items, "Content extracted"]
+ yield streaming_service.format_thinking_step(
+ step_id=original_step_id,
+ title="Scraping webpage",
+ status="completed",
+ items=completed_items,
+ )
+ elif tool_name == "generate_podcast":
+ # Build detailed completion items based on podcast status
+ podcast_status = (
+ tool_output.get("status", "unknown")
+ if isinstance(tool_output, dict)
+ else "unknown"
+ )
+ podcast_title = (
+ tool_output.get("title", "Podcast")
+ if isinstance(tool_output, dict)
+ else "Podcast"
+ )
+
+ if podcast_status == "processing":
+ completed_items = [
+ f"Title: {podcast_title}",
+ "Audio generation started",
+ "Processing in background...",
+ ]
+ elif podcast_status == "already_generating":
+ completed_items = [
+ f"Title: {podcast_title}",
+ "Podcast already in progress",
+ "Please wait for it to complete",
+ ]
+ elif podcast_status == "error":
+ error_msg = (
+ tool_output.get("error", "Unknown error")
+ if isinstance(tool_output, dict)
+ else "Unknown error"
+ )
+ completed_items = [
+ f"Title: {podcast_title}",
+ f"Error: {error_msg[:50]}",
+ ]
+ else:
+ completed_items = last_active_step_items
+
+ yield streaming_service.format_thinking_step(
+ step_id=original_step_id,
+ title="Generating podcast",
+ status="completed",
+ items=completed_items,
+ )
+ else:
+ yield streaming_service.format_thinking_step(
+ step_id=original_step_id,
+ title=f"Using {tool_name.replace('_', ' ')}",
+ status="completed",
+ items=last_active_step_items,
+ )
+
+ # Mark that we just finished a tool - "Synthesizing response" will be created
+ # when text actually starts flowing (not immediately)
+ just_finished_tool = True
+ # Clear the active step since the tool is done
+ last_active_step_id = None
+ last_active_step_title = ""
+ last_active_step_items = []
+
# Handle different tool outputs
if tool_name == "generate_podcast":
# Stream the full podcast result so frontend can render the audio player
@@ -282,8 +626,89 @@ async def stream_new_chat(
f"Podcast generation failed: {error_msg}",
"error",
)
- else:
- # Don't stream the full output for other tools (can be very large), just acknowledge
+ elif tool_name == "link_preview":
+ # Stream the full link preview result so frontend can render the MediaCard
+ yield streaming_service.format_tool_output_available(
+ tool_call_id,
+ tool_output
+ if isinstance(tool_output, dict)
+ else {"result": tool_output},
+ )
+ # Send appropriate terminal message
+ if isinstance(tool_output, dict) and "error" not in tool_output:
+ title = tool_output.get("title", "Link")
+ yield streaming_service.format_terminal_info(
+ f"Link preview loaded: {title[:50]}{'...' if len(title) > 50 else ''}",
+ "success",
+ )
+ else:
+ error_msg = (
+ tool_output.get("error", "Failed to fetch")
+ if isinstance(tool_output, dict)
+ else "Failed to fetch"
+ )
+ yield streaming_service.format_terminal_info(
+ f"Link preview failed: {error_msg}",
+ "error",
+ )
+ elif tool_name == "display_image":
+ # Stream the full image result so frontend can render the Image component
+ yield streaming_service.format_tool_output_available(
+ tool_call_id,
+ tool_output
+ if isinstance(tool_output, dict)
+ else {"result": tool_output},
+ )
+ # Send terminal message
+ if isinstance(tool_output, dict):
+ title = tool_output.get("title") or tool_output.get("alt", "Image")
+ yield streaming_service.format_terminal_info(
+ f"Image displayed: {title[:40]}{'...' if len(title) > 40 else ''}",
+ "success",
+ )
+ elif tool_name == "scrape_webpage":
+ # Stream the scrape result so frontend can render the Article component
+ # Note: We send metadata for display, but content goes to LLM for processing
+ if isinstance(tool_output, dict):
+ # Create a display-friendly output (without full content for the card)
+ display_output = {
+ k: v for k, v in tool_output.items() if k != "content"
+ }
+ # But keep a truncated content preview
+ if "content" in tool_output:
+ content = tool_output.get("content", "")
+ display_output["content_preview"] = (
+ content[:500] + "..." if len(content) > 500 else content
+ )
+ yield streaming_service.format_tool_output_available(
+ tool_call_id,
+ display_output,
+ )
+ else:
+ yield streaming_service.format_tool_output_available(
+ tool_call_id,
+ {"result": tool_output},
+ )
+ # Send terminal message
+ if isinstance(tool_output, dict) and "error" not in tool_output:
+ title = tool_output.get("title", "Webpage")
+ word_count = tool_output.get("word_count", 0)
+ yield streaming_service.format_terminal_info(
+ f"Scraped: {title[:40]}{'...' if len(title) > 40 else ''} ({word_count:,} words)",
+ "success",
+ )
+ else:
+ error_msg = (
+ tool_output.get("error", "Failed to scrape")
+ if isinstance(tool_output, dict)
+ else "Failed to scrape"
+ )
+ yield streaming_service.format_terminal_info(
+ f"Scrape failed: {error_msg}",
+ "error",
+ )
+ elif tool_name == "search_knowledge_base":
+ # Don't stream the full output for search (can be very large), just acknowledge
yield streaming_service.format_tool_output_available(
tool_call_id,
{"status": "completed", "result_length": len(str(tool_output))},
@@ -291,6 +716,15 @@ async def stream_new_chat(
yield streaming_service.format_terminal_info(
"Knowledge base search completed", "success"
)
+ else:
+ # Default handling for other tools
+ yield streaming_service.format_tool_output_available(
+ tool_call_id,
+ {"status": "completed", "result_length": len(str(tool_output))},
+ )
+ yield streaming_service.format_terminal_info(
+ f"Tool {tool_name} completed", "success"
+ )
# Handle chain/agent end to close any open text blocks
elif event_type in ("on_chain_end", "on_agent_end"):
@@ -302,6 +736,11 @@ async def stream_new_chat(
if current_text_id is not None:
yield streaming_service.format_text_end(current_text_id)
+ # Mark the last active thinking step as completed using the same title
+ completion_event = complete_current_step()
+ if completion_event:
+ yield completion_event
+
# Finish the step and message
yield streaming_service.format_finish_step()
yield streaming_service.format_finish()
diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
index 1426ecea1..9c7e3cb4a 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@@ -11,6 +11,10 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { toast } from "sonner";
import { Thread } from "@/components/assistant-ui/thread";
import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
+import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview";
+import { DisplayImageToolUI } from "@/components/tool-ui/display-image";
+import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage";
+import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking";
import { getBearerToken } from "@/lib/auth-utils";
import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter";
import {
@@ -25,8 +29,26 @@ import {
type MessageRecord,
} from "@/lib/chat/thread-persistence";
+/**
+ * Extract thinking steps from message content
+ */
+function extractThinkingSteps(content: unknown): ThinkingStep[] {
+ if (!Array.isArray(content)) return [];
+
+ const thinkingPart = content.find(
+ (part: unknown) =>
+ typeof part === "object" &&
+ part !== null &&
+ "type" in part &&
+ (part as { type: string }).type === "thinking-steps"
+ ) as { type: "thinking-steps"; steps: ThinkingStep[] } | undefined;
+
+ return thinkingPart?.steps || [];
+}
+
/**
* Convert backend message to assistant-ui ThreadMessageLike format
+ * Filters out 'thinking-steps' part as it's handled separately
*/
function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
let content: ThreadMessageLike["content"];
@@ -34,7 +56,17 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
if (typeof msg.content === "string") {
content = [{ type: "text", text: msg.content }];
} else if (Array.isArray(msg.content)) {
- content = msg.content as ThreadMessageLike["content"];
+ // Filter out thinking-steps part - it's handled separately via messageThinkingSteps
+ const filteredContent = msg.content.filter(
+ (part: unknown) =>
+ !(typeof part === "object" &&
+ part !== null &&
+ "type" in part &&
+ (part as { type: string }).type === "thinking-steps")
+ );
+ content = filteredContent.length > 0
+ ? (filteredContent as ThreadMessageLike["content"])
+ : [{ type: "text", text: "" }];
} else {
content = [{ type: "text", text: String(msg.content) }];
}
@@ -50,7 +82,17 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
/**
* Tools that should render custom UI in the chat.
*/
-const TOOLS_WITH_UI = new Set(["generate_podcast"]);
+const TOOLS_WITH_UI = new Set(["generate_podcast", "link_preview", "display_image", "scrape_webpage"]);
+
+/**
+ * Type for thinking step data from the backend
+ */
+interface ThinkingStepData {
+ id: string;
+ title: string;
+ status: "pending" | "in_progress" | "completed";
+ items: string[];
+}
export default function NewChatPage() {
const params = useParams();
@@ -59,6 +101,10 @@ export default function NewChatPage() {
const [threadId, setThreadId] = useState(null);
const [messages, setMessages] = useState([]);
const [isRunning, setIsRunning] = useState(false);
+ // Store thinking steps per message ID
+ const [messageThinkingSteps, setMessageThinkingSteps] = useState<
+ Map
+ >(new Map());
const abortControllerRef = useRef(null);
// Create the attachment adapter for file processing
@@ -95,6 +141,20 @@ export default function NewChatPage() {
if (response.messages && response.messages.length > 0) {
const loadedMessages = response.messages.map(convertToThreadMessage);
setMessages(loadedMessages);
+
+ // Extract and restore thinking steps from persisted messages
+ const restoredThinkingSteps = new Map();
+ for (const msg of response.messages) {
+ if (msg.role === "assistant") {
+ const steps = extractThinkingSteps(msg.content);
+ if (steps.length > 0) {
+ restoredThinkingSteps.set(`msg-${msg.id}`, steps);
+ }
+ }
+ }
+ if (restoredThinkingSteps.size > 0) {
+ setMessageThinkingSteps(restoredThinkingSteps);
+ }
}
} else {
// Create new thread
@@ -186,46 +246,99 @@ export default function NewChatPage() {
// Prepare assistant message
const assistantMsgId = `msg-assistant-${Date.now()}`;
- let accumulatedText = "";
- const toolCalls = new Map<
- string,
- {
- toolCallId: string;
- toolName: string;
- args: Record;
- result?: unknown;
+ const currentThinkingSteps = new Map();
+
+ // Ordered content parts to preserve inline tool call positions
+ // Each part is either a text segment or a tool call
+ type ContentPart =
+ | { type: "text"; text: string }
+ | {
+ type: "tool-call";
+ toolCallId: string;
+ toolName: string;
+ args: Record;
+ result?: unknown;
+ };
+ const contentParts: ContentPart[] = [];
+
+ // Track the current text segment index (for appending text deltas)
+ let currentTextPartIndex = -1;
+
+ // Map to track tool call indices for updating results
+ const toolCallIndices = new Map();
+
+ // Helper to get or create the current text part for appending text
+ const appendText = (delta: string) => {
+ if (currentTextPartIndex >= 0 && contentParts[currentTextPartIndex]?.type === "text") {
+ // Append to existing text part
+ (contentParts[currentTextPartIndex] as { type: "text"; text: string }).text += delta;
+ } else {
+ // Create new text part
+ contentParts.push({ type: "text", text: delta });
+ currentTextPartIndex = contentParts.length - 1;
}
- >();
+ };
+
+ // Helper to add a tool call (this "breaks" the current text segment)
+ const addToolCall = (toolCallId: string, toolName: string, args: Record) => {
+ if (TOOLS_WITH_UI.has(toolName)) {
+ contentParts.push({
+ type: "tool-call",
+ toolCallId,
+ toolName,
+ args,
+ });
+ toolCallIndices.set(toolCallId, contentParts.length - 1);
+ // Reset text part index so next text creates a new segment
+ currentTextPartIndex = -1;
+ }
+ };
+
+ // Helper to update a tool call's args or result
+ const updateToolCall = (toolCallId: string, update: { args?: Record; result?: unknown }) => {
+ const index = toolCallIndices.get(toolCallId);
+ if (index !== undefined && contentParts[index]?.type === "tool-call") {
+ const tc = contentParts[index] as ContentPart & { type: "tool-call" };
+ if (update.args) tc.args = update.args;
+ if (update.result !== undefined) tc.result = update.result;
+ }
+ };
- // Helper to build content
- const buildContent = (): ThreadMessageLike["content"] => {
- const parts: Array<
- | { type: "text"; text: string }
- | {
- type: "tool-call";
- toolCallId: string;
- toolName: string;
- args: Record;
- result?: unknown;
- }
- > = [];
- if (accumulatedText) {
- parts.push({ type: "text", text: accumulatedText });
+ // Helper to build content for UI (without thinking-steps)
+ const buildContentForUI = (): ThreadMessageLike["content"] => {
+ // Filter to only include text parts with content and tool-calls with UI
+ const filtered = contentParts.filter((part) => {
+ if (part.type === "text") return part.text.length > 0;
+ if (part.type === "tool-call") return TOOLS_WITH_UI.has(part.toolName);
+ return false;
+ });
+ return filtered.length > 0
+ ? (filtered as ThreadMessageLike["content"])
+ : [{ type: "text", text: "" }];
+ };
+
+ // Helper to build content for persistence (includes thinking-steps)
+ const buildContentForPersistence = (): unknown[] => {
+ const parts: unknown[] = [];
+
+ // Include thinking steps for persistence
+ if (currentThinkingSteps.size > 0) {
+ parts.push({
+ type: "thinking-steps",
+ steps: Array.from(currentThinkingSteps.values()),
+ });
}
- for (const toolCall of toolCalls.values()) {
- if (TOOLS_WITH_UI.has(toolCall.toolName)) {
- parts.push({
- type: "tool-call",
- toolCallId: toolCall.toolCallId,
- toolName: toolCall.toolName,
- args: toolCall.args,
- result: toolCall.result,
- });
+
+ // Add content parts (filtered)
+ for (const part of contentParts) {
+ if (part.type === "text" && part.text.length > 0) {
+ parts.push(part);
+ } else if (part.type === "tool-call" && TOOLS_WITH_UI.has(part.toolName)) {
+ parts.push(part);
}
}
- return parts.length > 0
- ? (parts as ThreadMessageLike["content"])
- : [{ type: "text", text: "" }];
+
+ return parts.length > 0 ? parts : [{ type: "text", text: "" }];
};
// Add placeholder assistant message
@@ -309,64 +422,79 @@ export default function NewChatPage() {
switch (parsed.type) {
case "text-delta":
- accumulatedText += parsed.delta;
+ appendText(parsed.delta);
setMessages((prev) =>
prev.map((m) =>
- m.id === assistantMsgId ? { ...m, content: buildContent() } : m
+ m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m
)
);
break;
case "tool-input-start":
- toolCalls.set(parsed.toolCallId, {
- toolCallId: parsed.toolCallId,
- toolName: parsed.toolName,
- args: {},
- });
+ // Add tool call inline - this breaks the current text segment
+ addToolCall(parsed.toolCallId, parsed.toolName, {});
setMessages((prev) =>
prev.map((m) =>
- m.id === assistantMsgId ? { ...m, content: buildContent() } : m
+ m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m
)
);
break;
case "tool-input-available": {
- const tc = toolCalls.get(parsed.toolCallId);
- if (tc) tc.args = parsed.input || {};
- else
- toolCalls.set(parsed.toolCallId, {
- toolCallId: parsed.toolCallId,
- toolName: parsed.toolName,
- args: parsed.input || {},
- });
+ // Update existing tool call's args, or add if not exists
+ if (toolCallIndices.has(parsed.toolCallId)) {
+ updateToolCall(parsed.toolCallId, { args: parsed.input || {} });
+ } else {
+ addToolCall(parsed.toolCallId, parsed.toolName, parsed.input || {});
+ }
setMessages((prev) =>
prev.map((m) =>
- m.id === assistantMsgId ? { ...m, content: buildContent() } : m
+ m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m
)
);
break;
}
case "tool-output-available": {
- const tc = toolCalls.get(parsed.toolCallId);
- if (tc) {
- tc.result = parsed.output;
- if (
- tc.toolName === "generate_podcast" &&
- parsed.output?.status === "processing" &&
- parsed.output?.task_id
- ) {
- setActivePodcastTaskId(parsed.output.task_id);
+ // Update the tool call with its result
+ updateToolCall(parsed.toolCallId, { result: parsed.output });
+ // Handle podcast-specific logic
+ if (parsed.output?.status === "processing" && parsed.output?.task_id) {
+ // Check if this is a podcast tool by looking at the content part
+ const idx = toolCallIndices.get(parsed.toolCallId);
+ if (idx !== undefined) {
+ const part = contentParts[idx];
+ if (part?.type === "tool-call" && part.toolName === "generate_podcast") {
+ setActivePodcastTaskId(parsed.output.task_id);
+ }
}
}
setMessages((prev) =>
prev.map((m) =>
- m.id === assistantMsgId ? { ...m, content: buildContent() } : m
+ m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m
)
);
break;
}
+ case "data-thinking-step": {
+ // Handle thinking step events for chain-of-thought display
+ const stepData = parsed.data as ThinkingStepData;
+ if (stepData?.id) {
+ currentThinkingSteps.set(stepData.id, stepData);
+ // Update message-specific thinking steps
+ setMessageThinkingSteps((prev) => {
+ const newMap = new Map(prev);
+ newMap.set(
+ assistantMsgId,
+ Array.from(currentThinkingSteps.values())
+ );
+ return newMap;
+ });
+ }
+ break;
+ }
+
case "error":
throw new Error(parsed.errorText || "Server error");
}
@@ -381,9 +509,9 @@ export default function NewChatPage() {
reader.releaseLock();
}
- // Persist assistant message
- const finalContent = buildContent();
- if (accumulatedText || toolCalls.size > 0) {
+ // Persist assistant message (with thinking steps for restoration on refresh)
+ const finalContent = buildContentForPersistence();
+ if (contentParts.length > 0) {
appendMessage(threadId, {
role: "assistant",
content: finalContent,
@@ -415,6 +543,7 @@ export default function NewChatPage() {
} finally {
setIsRunning(false);
abortControllerRef.current = null;
+ // Note: We no longer clear thinking steps - they persist with the message
}
},
[threadId, searchSpaceId, messages]
@@ -482,8 +611,11 @@ export default function NewChatPage() {
return (
+
+
+
-
+
);
diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx
index a41d2e143..e3ee85dce 100644
--- a/surfsense_web/components/assistant-ui/thread.tsx
+++ b/surfsense_web/components/assistant-ui/thread.tsx
@@ -7,10 +7,13 @@ import {
MessagePrimitive,
ThreadPrimitive,
useAssistantState,
+ useMessage,
} from "@assistant-ui/react";
import {
ArrowDownIcon,
ArrowUpIcon,
+ Brain,
+ CheckCircle2,
CheckIcon,
ChevronLeftIcon,
ChevronRightIcon,
@@ -18,10 +21,21 @@ import {
DownloadIcon,
Loader2,
PencilIcon,
+ Plug2,
+ Plus,
RefreshCwIcon,
+ Search,
+ Sparkles,
SquareIcon,
} from "lucide-react";
-import type { FC } from "react";
+import Image from "next/image";
+import Link from "next/link";
+import { type FC, useState, useRef, useCallback, useEffect } from "react";
+import { useAtomValue } from "jotai";
+import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
+import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors";
+import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
+import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
import {
ComposerAddAttachment,
ComposerAttachments,
@@ -30,39 +44,201 @@ import {
import { MarkdownText } from "@/components/assistant-ui/markdown-text";
import { ToolFallback } from "@/components/assistant-ui/tool-fallback";
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
+import {
+ ChainOfThought,
+ ChainOfThoughtContent,
+ ChainOfThoughtItem,
+ ChainOfThoughtStep,
+ ChainOfThoughtTrigger,
+} from "@/components/prompt-kit/chain-of-thought";
import { Button } from "@/components/ui/button";
import { cn } from "@/lib/utils";
+import { currentUserAtom } from "@/atoms/user/user-query.atoms";
+import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking";
-export const Thread: FC = () => {
+/**
+ * Props for the Thread component
+ */
+interface ThreadProps {
+ messageThinkingSteps?: Map;
+}
+
+// Context to pass thinking steps to AssistantMessage
+import { createContext, useContext } from "react";
+
+const ThinkingStepsContext = createContext