mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-10 20:35:17 +02:00
feat: add web scraping tool to chat agent for extracting and summarizing webpage content
This commit is contained in:
parent
da7cb81252
commit
24dd52ed99
9 changed files with 1018 additions and 76 deletions
|
|
@ -18,6 +18,7 @@ from app.agents.new_chat.display_image import create_display_image_tool
|
|||
from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool
|
||||
from app.agents.new_chat.link_preview import create_link_preview_tool
|
||||
from app.agents.new_chat.podcast import create_generate_podcast_tool
|
||||
from app.agents.new_chat.scrape_webpage import create_scrape_webpage_tool
|
||||
from app.agents.new_chat.system_prompt import build_surfsense_system_prompt
|
||||
from app.services.connector_service import ConnectorService
|
||||
|
||||
|
|
@ -38,6 +39,8 @@ def create_surfsense_deep_agent(
|
|||
enable_podcast: bool = True,
|
||||
enable_link_preview: bool = True,
|
||||
enable_display_image: bool = True,
|
||||
enable_scrape_webpage: bool = True,
|
||||
firecrawl_api_key: str | None = None,
|
||||
additional_tools: Sequence[BaseTool] | None = None,
|
||||
):
|
||||
"""
|
||||
|
|
@ -61,6 +64,10 @@ def create_surfsense_deep_agent(
|
|||
When True, the agent can fetch and display rich link previews.
|
||||
enable_display_image: Whether to include the display image tool (default: True).
|
||||
When True, the agent can display images with metadata.
|
||||
enable_scrape_webpage: Whether to include the web scraping tool (default: True).
|
||||
When True, the agent can scrape and read webpage content.
|
||||
firecrawl_api_key: Optional Firecrawl API key for premium web scraping.
|
||||
Falls back to Chromium/Trafilatura if not provided.
|
||||
additional_tools: Optional sequence of additional tools to inject into the agent.
|
||||
The search_knowledge_base tool will always be included.
|
||||
|
||||
|
|
@ -96,6 +103,11 @@ def create_surfsense_deep_agent(
|
|||
display_image_tool = create_display_image_tool()
|
||||
tools.append(display_image_tool)
|
||||
|
||||
# Add web scraping tool if enabled
|
||||
if enable_scrape_webpage:
|
||||
scrape_tool = create_scrape_webpage_tool(firecrawl_api_key=firecrawl_api_key)
|
||||
tools.append(scrape_tool)
|
||||
|
||||
if additional_tools:
|
||||
tools.extend(additional_tools)
|
||||
|
||||
|
|
|
|||
|
|
@ -86,9 +86,7 @@ def create_display_image_tool():
|
|||
ratio = "16:9" # Default
|
||||
if "unsplash.com" in src or "pexels.com" in src:
|
||||
ratio = "16:9"
|
||||
elif "imgur.com" in src:
|
||||
ratio = "auto"
|
||||
elif "github.com" in src or "githubusercontent.com" in src:
|
||||
elif "imgur.com" in src or "github.com" in src or "githubusercontent.com" in src:
|
||||
ratio = "auto"
|
||||
|
||||
return {
|
||||
|
|
|
|||
197
surfsense_backend/app/agents/new_chat/scrape_webpage.py
Normal file
197
surfsense_backend/app/agents/new_chat/scrape_webpage.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
"""
|
||||
Web scraping tool for the new chat agent.
|
||||
|
||||
This module provides a tool for scraping and extracting content from webpages
|
||||
using the existing WebCrawlerConnector. The scraped content can be used by
|
||||
the agent to answer questions about web pages.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from langchain_core.tools import tool
|
||||
|
||||
from app.connectors.webcrawler_connector import WebCrawlerConnector
|
||||
|
||||
|
||||
def extract_domain(url: str) -> str:
|
||||
"""Extract the domain from a URL."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc
|
||||
# Remove 'www.' prefix if present
|
||||
if domain.startswith("www."):
|
||||
domain = domain[4:]
|
||||
return domain
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def generate_scrape_id(url: str) -> str:
|
||||
"""Generate a unique ID for a scraped webpage."""
|
||||
hash_val = hashlib.md5(url.encode()).hexdigest()[:12]
|
||||
return f"scrape-{hash_val}"
|
||||
|
||||
|
||||
def truncate_content(content: str, max_length: int = 50000) -> tuple[str, bool]:
|
||||
"""
|
||||
Truncate content to a maximum length.
|
||||
|
||||
Returns:
|
||||
Tuple of (truncated_content, was_truncated)
|
||||
"""
|
||||
if len(content) <= max_length:
|
||||
return content, False
|
||||
|
||||
# Try to truncate at a sentence boundary
|
||||
truncated = content[:max_length]
|
||||
last_period = truncated.rfind(".")
|
||||
last_newline = truncated.rfind("\n\n")
|
||||
|
||||
# Use the later of the two boundaries, or just truncate
|
||||
boundary = max(last_period, last_newline)
|
||||
if boundary > max_length * 0.8: # Only use boundary if it's not too far back
|
||||
truncated = content[: boundary + 1]
|
||||
|
||||
return truncated + "\n\n[Content truncated...]", True
|
||||
|
||||
|
||||
def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
|
||||
"""
|
||||
Factory function to create the scrape_webpage tool.
|
||||
|
||||
Args:
|
||||
firecrawl_api_key: Optional Firecrawl API key for premium web scraping.
|
||||
Falls back to Chromium/Trafilatura if not provided.
|
||||
|
||||
Returns:
|
||||
A configured tool function for scraping webpages.
|
||||
"""
|
||||
|
||||
@tool
|
||||
async def scrape_webpage(
|
||||
url: str,
|
||||
max_length: int = 50000,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Scrape and extract the main content from a webpage.
|
||||
|
||||
Use this tool when the user wants you to read, summarize, or answer
|
||||
questions about a specific webpage's content. This tool actually
|
||||
fetches and reads the full page content.
|
||||
|
||||
Common triggers:
|
||||
- "Read this article and summarize it"
|
||||
- "What does this page say about X?"
|
||||
- "Summarize this blog post for me"
|
||||
- "Tell me the key points from this article"
|
||||
- "What's in this webpage?"
|
||||
|
||||
Args:
|
||||
url: The URL of the webpage to scrape (must be HTTP/HTTPS)
|
||||
max_length: Maximum content length to return (default: 50000 chars)
|
||||
|
||||
Returns:
|
||||
A dictionary containing:
|
||||
- id: Unique identifier for this scrape
|
||||
- assetId: The URL (for deduplication)
|
||||
- kind: "article" (type of content)
|
||||
- href: The URL to open when clicked
|
||||
- title: Page title
|
||||
- description: Brief description or excerpt
|
||||
- content: The extracted main content (markdown format)
|
||||
- domain: The domain name
|
||||
- word_count: Approximate word count
|
||||
- was_truncated: Whether content was truncated
|
||||
- error: Error message (if scraping failed)
|
||||
"""
|
||||
scrape_id = generate_scrape_id(url)
|
||||
domain = extract_domain(url)
|
||||
|
||||
# Validate and normalize URL
|
||||
if not url.startswith(("http://", "https://")):
|
||||
url = f"https://{url}"
|
||||
|
||||
try:
|
||||
# Create webcrawler connector
|
||||
connector = WebCrawlerConnector(firecrawl_api_key=firecrawl_api_key)
|
||||
|
||||
# Crawl the URL
|
||||
result, error = await connector.crawl_url(url, formats=["markdown"])
|
||||
|
||||
if error:
|
||||
return {
|
||||
"id": scrape_id,
|
||||
"assetId": url,
|
||||
"kind": "article",
|
||||
"href": url,
|
||||
"title": domain or "Webpage",
|
||||
"domain": domain,
|
||||
"error": error,
|
||||
}
|
||||
|
||||
if not result:
|
||||
return {
|
||||
"id": scrape_id,
|
||||
"assetId": url,
|
||||
"kind": "article",
|
||||
"href": url,
|
||||
"title": domain or "Webpage",
|
||||
"domain": domain,
|
||||
"error": "No content returned from crawler",
|
||||
}
|
||||
|
||||
# Extract content and metadata
|
||||
content = result.get("content", "")
|
||||
metadata = result.get("metadata", {})
|
||||
|
||||
# Get title from metadata
|
||||
title = metadata.get("title", "")
|
||||
if not title:
|
||||
title = domain or url.split("/")[-1] or "Webpage"
|
||||
|
||||
# Get description from metadata
|
||||
description = metadata.get("description", "")
|
||||
if not description and content:
|
||||
# Use first paragraph as description
|
||||
first_para = content.split("\n\n")[0] if content else ""
|
||||
description = first_para[:300] + "..." if len(first_para) > 300 else first_para
|
||||
|
||||
# Truncate content if needed
|
||||
content, was_truncated = truncate_content(content, max_length)
|
||||
|
||||
# Calculate word count
|
||||
word_count = len(content.split())
|
||||
|
||||
return {
|
||||
"id": scrape_id,
|
||||
"assetId": url,
|
||||
"kind": "article",
|
||||
"href": url,
|
||||
"title": title,
|
||||
"description": description,
|
||||
"content": content,
|
||||
"domain": domain,
|
||||
"word_count": word_count,
|
||||
"was_truncated": was_truncated,
|
||||
"crawler_type": result.get("crawler_type", "unknown"),
|
||||
"author": metadata.get("author"),
|
||||
"date": metadata.get("date"),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
print(f"[scrape_webpage] Error scraping {url}: {error_message}")
|
||||
return {
|
||||
"id": scrape_id,
|
||||
"assetId": url,
|
||||
"kind": "article",
|
||||
"href": url,
|
||||
"title": domain or "Webpage",
|
||||
"domain": domain,
|
||||
"error": f"Failed to scrape: {error_message[:100]}",
|
||||
}
|
||||
|
||||
return scrape_webpage
|
||||
|
||||
|
|
@ -173,6 +173,29 @@ You have access to the following tools:
|
|||
- description: Optional description providing context about the image
|
||||
- Returns: An image card with the image, title, and description
|
||||
- The image will automatically be displayed in the chat.
|
||||
|
||||
5. scrape_webpage: Scrape and extract the main content from a webpage.
|
||||
- Use this when the user wants you to READ and UNDERSTAND the actual content of a webpage.
|
||||
- IMPORTANT: This is different from link_preview:
|
||||
* link_preview: Only fetches metadata (title, description, thumbnail) for display
|
||||
* scrape_webpage: Actually reads the FULL page content so you can analyze/summarize it
|
||||
- Trigger scenarios:
|
||||
* "Read this article and summarize it"
|
||||
* "What does this page say about X?"
|
||||
* "Summarize this blog post for me"
|
||||
* "Tell me the key points from this article"
|
||||
* "What's in this webpage?"
|
||||
* "Can you analyze this article?"
|
||||
- Args:
|
||||
- url: The URL of the webpage to scrape (must be HTTP/HTTPS)
|
||||
- max_length: Maximum content length to return (default: 50000 chars)
|
||||
- Returns: The page title, description, full content (in markdown), word count, and metadata
|
||||
- After scraping, you will have the full article text and can analyze, summarize, or answer questions about it.
|
||||
- IMAGES: The scraped content may contain image URLs in markdown format like ``.
|
||||
* When you find relevant/important images in the scraped content, use the `display_image` tool to show them to the user.
|
||||
* This makes your response more visual and engaging.
|
||||
* Prioritize showing: diagrams, charts, infographics, key illustrations, or images that help explain the content.
|
||||
* Don't show every image - just the most relevant 1-3 images that enhance understanding.
|
||||
</tools>
|
||||
<tool_call_examples>
|
||||
- User: "Fetch all my notes and what's in them?"
|
||||
|
|
@ -205,6 +228,24 @@ You have access to the following tools:
|
|||
|
||||
- User: "Can you display a diagram of a neural network?"
|
||||
- Call: `display_image(src="https://example.com/neural-network.png", alt="Neural network diagram", title="Neural Network Architecture", description="A visual representation of a neural network with input, hidden, and output layers")`
|
||||
|
||||
- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
|
||||
- Call: `scrape_webpage(url="https://example.com/blog/ai-trends")`
|
||||
- After getting the content, provide a summary based on the scraped text
|
||||
|
||||
- User: "What does this page say about machine learning? https://docs.example.com/ml-guide"
|
||||
- Call: `scrape_webpage(url="https://docs.example.com/ml-guide")`
|
||||
- Then answer the question using the extracted content
|
||||
|
||||
- User: "Summarize this blog post: https://medium.com/some-article"
|
||||
- Call: `scrape_webpage(url="https://medium.com/some-article")`
|
||||
- Provide a comprehensive summary of the article content
|
||||
|
||||
- User: "Read this tutorial and explain it: https://example.com/ml-tutorial"
|
||||
- First: `scrape_webpage(url="https://example.com/ml-tutorial")`
|
||||
- Then, if the content contains useful diagrams/images like ``:
|
||||
- Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")`
|
||||
- Then provide your explanation, referencing the displayed image
|
||||
</tool_call_examples>{citation_section}
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -319,6 +319,20 @@ async def stream_new_chat(
|
|||
status="in_progress",
|
||||
items=last_active_step_items,
|
||||
)
|
||||
elif tool_name == "scrape_webpage":
|
||||
url = (
|
||||
tool_input.get("url", "")
|
||||
if isinstance(tool_input, dict)
|
||||
else str(tool_input)
|
||||
)
|
||||
last_active_step_title = "Scraping webpage"
|
||||
last_active_step_items = [f"URL: {url[:80]}{'...' if len(url) > 80 else ''}"]
|
||||
yield streaming_service.format_thinking_step(
|
||||
step_id=tool_step_id,
|
||||
title="Scraping webpage",
|
||||
status="in_progress",
|
||||
items=last_active_step_items,
|
||||
)
|
||||
elif tool_name == "generate_podcast":
|
||||
podcast_title = (
|
||||
tool_input.get("podcast_title", "SurfSense Podcast")
|
||||
|
|
@ -398,6 +412,16 @@ async def stream_new_chat(
|
|||
f"Displaying image: {src[:60]}{'...' if len(src) > 60 else ''}",
|
||||
"info",
|
||||
)
|
||||
elif tool_name == "scrape_webpage":
|
||||
url = (
|
||||
tool_input.get("url", "")
|
||||
if isinstance(tool_input, dict)
|
||||
else str(tool_input)
|
||||
)
|
||||
yield streaming_service.format_terminal_info(
|
||||
f"Scraping webpage: {url[:70]}{'...' if len(url) > 70 else ''}",
|
||||
"info",
|
||||
)
|
||||
elif tool_name == "generate_podcast":
|
||||
title = (
|
||||
tool_input.get("podcast_title", "SurfSense Podcast")
|
||||
|
|
@ -502,6 +526,31 @@ async def stream_new_chat(
|
|||
status="completed",
|
||||
items=completed_items,
|
||||
)
|
||||
elif tool_name == "scrape_webpage":
|
||||
# Build completion items for webpage scraping
|
||||
if isinstance(tool_output, dict):
|
||||
title = tool_output.get("title", "Webpage")
|
||||
word_count = tool_output.get("word_count", 0)
|
||||
has_error = "error" in tool_output
|
||||
if has_error:
|
||||
completed_items = [
|
||||
*last_active_step_items,
|
||||
f"Error: {tool_output.get('error', 'Failed to scrape')[:50]}",
|
||||
]
|
||||
else:
|
||||
completed_items = [
|
||||
*last_active_step_items,
|
||||
f"Title: {title[:50]}{'...' if len(title) > 50 else ''}",
|
||||
f"Extracted: {word_count:,} words",
|
||||
]
|
||||
else:
|
||||
completed_items = [*last_active_step_items, "Content extracted"]
|
||||
yield streaming_service.format_thinking_step(
|
||||
step_id=original_step_id,
|
||||
title="Scraping webpage",
|
||||
status="completed",
|
||||
items=completed_items,
|
||||
)
|
||||
elif tool_name == "generate_podcast":
|
||||
# Build detailed completion items based on podcast status
|
||||
podcast_status = (
|
||||
|
|
@ -630,6 +679,47 @@ async def stream_new_chat(
|
|||
f"Image displayed: {title[:40]}{'...' if len(title) > 40 else ''}",
|
||||
"success",
|
||||
)
|
||||
elif tool_name == "scrape_webpage":
|
||||
# Stream the scrape result so frontend can render the Article component
|
||||
# Note: We send metadata for display, but content goes to LLM for processing
|
||||
if isinstance(tool_output, dict):
|
||||
# Create a display-friendly output (without full content for the card)
|
||||
display_output = {
|
||||
k: v for k, v in tool_output.items() if k != "content"
|
||||
}
|
||||
# But keep a truncated content preview
|
||||
if "content" in tool_output:
|
||||
content = tool_output.get("content", "")
|
||||
display_output["content_preview"] = (
|
||||
content[:500] + "..." if len(content) > 500 else content
|
||||
)
|
||||
yield streaming_service.format_tool_output_available(
|
||||
tool_call_id,
|
||||
display_output,
|
||||
)
|
||||
else:
|
||||
yield streaming_service.format_tool_output_available(
|
||||
tool_call_id,
|
||||
{"result": tool_output},
|
||||
)
|
||||
# Send terminal message
|
||||
if isinstance(tool_output, dict) and "error" not in tool_output:
|
||||
title = tool_output.get("title", "Webpage")
|
||||
word_count = tool_output.get("word_count", 0)
|
||||
yield streaming_service.format_terminal_info(
|
||||
f"Scraped: {title[:40]}{'...' if len(title) > 40 else ''} ({word_count:,} words)",
|
||||
"success",
|
||||
)
|
||||
else:
|
||||
error_msg = (
|
||||
tool_output.get("error", "Failed to scrape")
|
||||
if isinstance(tool_output, dict)
|
||||
else "Failed to scrape"
|
||||
)
|
||||
yield streaming_service.format_terminal_info(
|
||||
f"Scrape failed: {error_msg}",
|
||||
"error",
|
||||
)
|
||||
elif tool_name == "search_knowledge_base":
|
||||
# Don't stream the full output for search (can be very large), just acknowledge
|
||||
yield streaming_service.format_tool_output_available(
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import { Thread } from "@/components/assistant-ui/thread";
|
|||
import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
|
||||
import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview";
|
||||
import { DisplayImageToolUI } from "@/components/tool-ui/display-image";
|
||||
import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage";
|
||||
import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking";
|
||||
import { getBearerToken } from "@/lib/auth-utils";
|
||||
import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter";
|
||||
|
|
@ -81,7 +82,7 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
|
|||
/**
|
||||
* Tools that should render custom UI in the chat.
|
||||
*/
|
||||
const TOOLS_WITH_UI = new Set(["generate_podcast", "link_preview", "display_image"]);
|
||||
const TOOLS_WITH_UI = new Set(["generate_podcast", "link_preview", "display_image", "scrape_webpage"]);
|
||||
|
||||
/**
|
||||
* Type for thinking step data from the backend
|
||||
|
|
@ -245,47 +246,74 @@ export default function NewChatPage() {
|
|||
|
||||
// Prepare assistant message
|
||||
const assistantMsgId = `msg-assistant-${Date.now()}`;
|
||||
let accumulatedText = "";
|
||||
const currentThinkingSteps = new Map<string, ThinkingStepData>();
|
||||
const toolCalls = new Map<
|
||||
string,
|
||||
{
|
||||
toolCallId: string;
|
||||
toolName: string;
|
||||
args: Record<string, unknown>;
|
||||
result?: unknown;
|
||||
|
||||
// Ordered content parts to preserve inline tool call positions
|
||||
// Each part is either a text segment or a tool call
|
||||
type ContentPart =
|
||||
| { type: "text"; text: string }
|
||||
| {
|
||||
type: "tool-call";
|
||||
toolCallId: string;
|
||||
toolName: string;
|
||||
args: Record<string, unknown>;
|
||||
result?: unknown;
|
||||
};
|
||||
const contentParts: ContentPart[] = [];
|
||||
|
||||
// Track the current text segment index (for appending text deltas)
|
||||
let currentTextPartIndex = -1;
|
||||
|
||||
// Map to track tool call indices for updating results
|
||||
const toolCallIndices = new Map<string, number>();
|
||||
|
||||
// Helper to get or create the current text part for appending text
|
||||
const appendText = (delta: string) => {
|
||||
if (currentTextPartIndex >= 0 && contentParts[currentTextPartIndex]?.type === "text") {
|
||||
// Append to existing text part
|
||||
(contentParts[currentTextPartIndex] as { type: "text"; text: string }).text += delta;
|
||||
} else {
|
||||
// Create new text part
|
||||
contentParts.push({ type: "text", text: delta });
|
||||
currentTextPartIndex = contentParts.length - 1;
|
||||
}
|
||||
>();
|
||||
};
|
||||
|
||||
// Helper to add a tool call (this "breaks" the current text segment)
|
||||
const addToolCall = (toolCallId: string, toolName: string, args: Record<string, unknown>) => {
|
||||
if (TOOLS_WITH_UI.has(toolName)) {
|
||||
contentParts.push({
|
||||
type: "tool-call",
|
||||
toolCallId,
|
||||
toolName,
|
||||
args,
|
||||
});
|
||||
toolCallIndices.set(toolCallId, contentParts.length - 1);
|
||||
// Reset text part index so next text creates a new segment
|
||||
currentTextPartIndex = -1;
|
||||
}
|
||||
};
|
||||
|
||||
// Helper to update a tool call's args or result
|
||||
const updateToolCall = (toolCallId: string, update: { args?: Record<string, unknown>; result?: unknown }) => {
|
||||
const index = toolCallIndices.get(toolCallId);
|
||||
if (index !== undefined && contentParts[index]?.type === "tool-call") {
|
||||
const tc = contentParts[index] as ContentPart & { type: "tool-call" };
|
||||
if (update.args) tc.args = update.args;
|
||||
if (update.result !== undefined) tc.result = update.result;
|
||||
}
|
||||
};
|
||||
|
||||
// Helper to build content for UI (without thinking-steps)
|
||||
const buildContentForUI = (): ThreadMessageLike["content"] => {
|
||||
const parts: Array<
|
||||
| { type: "text"; text: string }
|
||||
| {
|
||||
type: "tool-call";
|
||||
toolCallId: string;
|
||||
toolName: string;
|
||||
args: Record<string, unknown>;
|
||||
result?: unknown;
|
||||
}
|
||||
> = [];
|
||||
|
||||
if (accumulatedText) {
|
||||
parts.push({ type: "text", text: accumulatedText });
|
||||
}
|
||||
for (const toolCall of toolCalls.values()) {
|
||||
if (TOOLS_WITH_UI.has(toolCall.toolName)) {
|
||||
parts.push({
|
||||
type: "tool-call",
|
||||
toolCallId: toolCall.toolCallId,
|
||||
toolName: toolCall.toolName,
|
||||
args: toolCall.args,
|
||||
result: toolCall.result,
|
||||
});
|
||||
}
|
||||
}
|
||||
return parts.length > 0
|
||||
? (parts as ThreadMessageLike["content"])
|
||||
// Filter to only include text parts with content and tool-calls with UI
|
||||
const filtered = contentParts.filter((part) => {
|
||||
if (part.type === "text") return part.text.length > 0;
|
||||
if (part.type === "tool-call") return TOOLS_WITH_UI.has(part.toolName);
|
||||
return false;
|
||||
});
|
||||
return filtered.length > 0
|
||||
? (filtered as ThreadMessageLike["content"])
|
||||
: [{ type: "text", text: "" }];
|
||||
};
|
||||
|
||||
|
|
@ -301,20 +329,15 @@ export default function NewChatPage() {
|
|||
});
|
||||
}
|
||||
|
||||
if (accumulatedText) {
|
||||
parts.push({ type: "text", text: accumulatedText });
|
||||
}
|
||||
for (const toolCall of toolCalls.values()) {
|
||||
if (TOOLS_WITH_UI.has(toolCall.toolName)) {
|
||||
parts.push({
|
||||
type: "tool-call",
|
||||
toolCallId: toolCall.toolCallId,
|
||||
toolName: toolCall.toolName,
|
||||
args: toolCall.args,
|
||||
result: toolCall.result,
|
||||
});
|
||||
// Add content parts (filtered)
|
||||
for (const part of contentParts) {
|
||||
if (part.type === "text" && part.text.length > 0) {
|
||||
parts.push(part);
|
||||
} else if (part.type === "tool-call" && TOOLS_WITH_UI.has(part.toolName)) {
|
||||
parts.push(part);
|
||||
}
|
||||
}
|
||||
|
||||
return parts.length > 0 ? parts : [{ type: "text", text: "" }];
|
||||
};
|
||||
|
||||
|
|
@ -399,7 +422,7 @@ export default function NewChatPage() {
|
|||
|
||||
switch (parsed.type) {
|
||||
case "text-delta":
|
||||
accumulatedText += parsed.delta;
|
||||
appendText(parsed.delta);
|
||||
setMessages((prev) =>
|
||||
prev.map((m) =>
|
||||
m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m
|
||||
|
|
@ -408,11 +431,8 @@ export default function NewChatPage() {
|
|||
break;
|
||||
|
||||
case "tool-input-start":
|
||||
toolCalls.set(parsed.toolCallId, {
|
||||
toolCallId: parsed.toolCallId,
|
||||
toolName: parsed.toolName,
|
||||
args: {},
|
||||
});
|
||||
// Add tool call inline - this breaks the current text segment
|
||||
addToolCall(parsed.toolCallId, parsed.toolName, {});
|
||||
setMessages((prev) =>
|
||||
prev.map((m) =>
|
||||
m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m
|
||||
|
|
@ -421,14 +441,12 @@ export default function NewChatPage() {
|
|||
break;
|
||||
|
||||
case "tool-input-available": {
|
||||
const tc = toolCalls.get(parsed.toolCallId);
|
||||
if (tc) tc.args = parsed.input || {};
|
||||
else
|
||||
toolCalls.set(parsed.toolCallId, {
|
||||
toolCallId: parsed.toolCallId,
|
||||
toolName: parsed.toolName,
|
||||
args: parsed.input || {},
|
||||
});
|
||||
// Update existing tool call's args, or add if not exists
|
||||
if (toolCallIndices.has(parsed.toolCallId)) {
|
||||
updateToolCall(parsed.toolCallId, { args: parsed.input || {} });
|
||||
} else {
|
||||
addToolCall(parsed.toolCallId, parsed.toolName, parsed.input || {});
|
||||
}
|
||||
setMessages((prev) =>
|
||||
prev.map((m) =>
|
||||
m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m
|
||||
|
|
@ -438,15 +456,17 @@ export default function NewChatPage() {
|
|||
}
|
||||
|
||||
case "tool-output-available": {
|
||||
const tc = toolCalls.get(parsed.toolCallId);
|
||||
if (tc) {
|
||||
tc.result = parsed.output;
|
||||
if (
|
||||
tc.toolName === "generate_podcast" &&
|
||||
parsed.output?.status === "processing" &&
|
||||
parsed.output?.task_id
|
||||
) {
|
||||
setActivePodcastTaskId(parsed.output.task_id);
|
||||
// Update the tool call with its result
|
||||
updateToolCall(parsed.toolCallId, { result: parsed.output });
|
||||
// Handle podcast-specific logic
|
||||
if (parsed.output?.status === "processing" && parsed.output?.task_id) {
|
||||
// Check if this is a podcast tool by looking at the content part
|
||||
const idx = toolCallIndices.get(parsed.toolCallId);
|
||||
if (idx !== undefined) {
|
||||
const part = contentParts[idx];
|
||||
if (part?.type === "tool-call" && part.toolName === "generate_podcast") {
|
||||
setActivePodcastTaskId(parsed.output.task_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
setMessages((prev) =>
|
||||
|
|
@ -491,7 +511,7 @@ export default function NewChatPage() {
|
|||
|
||||
// Persist assistant message (with thinking steps for restoration on refresh)
|
||||
const finalContent = buildContentForPersistence();
|
||||
if (accumulatedText || toolCalls.size > 0) {
|
||||
if (contentParts.length > 0) {
|
||||
appendMessage(threadId, {
|
||||
role: "assistant",
|
||||
content: finalContent,
|
||||
|
|
@ -593,6 +613,7 @@ export default function NewChatPage() {
|
|||
<GeneratePodcastToolUI />
|
||||
<LinkPreviewToolUI />
|
||||
<DisplayImageToolUI />
|
||||
<ScrapeWebpageToolUI />
|
||||
<div className="h-[calc(100vh-64px)] max-h-[calc(100vh-64px)] overflow-hidden">
|
||||
<Thread messageThinkingSteps={messageThinkingSteps} />
|
||||
</div>
|
||||
|
|
|
|||
406
surfsense_web/components/tool-ui/article/index.tsx
Normal file
406
surfsense_web/components/tool-ui/article/index.tsx
Normal file
|
|
@ -0,0 +1,406 @@
|
|||
"use client";
|
||||
|
||||
import { Card, CardContent } from "@/components/ui/card";
|
||||
import {
|
||||
Tooltip,
|
||||
TooltipContent,
|
||||
TooltipProvider,
|
||||
TooltipTrigger,
|
||||
} from "@/components/ui/tooltip";
|
||||
import { cn } from "@/lib/utils";
|
||||
import {
|
||||
AlertCircleIcon,
|
||||
BookOpenIcon,
|
||||
CalendarIcon,
|
||||
ExternalLinkIcon,
|
||||
FileTextIcon,
|
||||
UserIcon,
|
||||
} from "lucide-react";
|
||||
import { Component, type ReactNode, useCallback } from "react";
|
||||
|
||||
/**
|
||||
* Article component props
|
||||
*/
|
||||
export interface ArticleProps {
|
||||
/** Unique identifier for the article */
|
||||
id: string;
|
||||
/** Asset identifier (usually the URL) */
|
||||
assetId?: string;
|
||||
/** Article title */
|
||||
title: string;
|
||||
/** Brief description or excerpt */
|
||||
description?: string;
|
||||
/** Full content of the article (markdown) */
|
||||
content?: string;
|
||||
/** URL to the original article */
|
||||
href?: string;
|
||||
/** Domain of the article source */
|
||||
domain?: string;
|
||||
/** Author name */
|
||||
author?: string;
|
||||
/** Publication date */
|
||||
date?: string;
|
||||
/** Word count */
|
||||
wordCount?: number;
|
||||
/** Whether content was truncated */
|
||||
wasTruncated?: boolean;
|
||||
/** Optional max width */
|
||||
maxWidth?: string;
|
||||
/** Optional error message */
|
||||
error?: string;
|
||||
/** Optional className */
|
||||
className?: string;
|
||||
/** Response actions */
|
||||
responseActions?: Array<{
|
||||
id: string;
|
||||
label: string;
|
||||
variant?: "default" | "outline";
|
||||
}>;
|
||||
/** Response action handler */
|
||||
onResponseAction?: (actionId: string) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Serializable article data type (from backend)
|
||||
*/
|
||||
export interface SerializableArticle {
|
||||
id: string;
|
||||
assetId?: string;
|
||||
kind?: "article";
|
||||
title: string;
|
||||
description?: string;
|
||||
content?: string;
|
||||
href?: string;
|
||||
domain?: string;
|
||||
author?: string;
|
||||
date?: string;
|
||||
word_count?: number;
|
||||
wordCount?: number;
|
||||
was_truncated?: boolean;
|
||||
wasTruncated?: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse serializable article data to ArticleProps
|
||||
*/
|
||||
export function parseSerializableArticle(data: unknown): ArticleProps {
|
||||
const obj = data as Record<string, unknown>;
|
||||
return {
|
||||
id: String(obj.id || "article-unknown"),
|
||||
assetId: obj.assetId as string | undefined,
|
||||
title: String(obj.title || "Untitled Article"),
|
||||
description: obj.description as string | undefined,
|
||||
content: obj.content as string | undefined,
|
||||
href: obj.href as string | undefined,
|
||||
domain: obj.domain as string | undefined,
|
||||
author: obj.author as string | undefined,
|
||||
date: obj.date as string | undefined,
|
||||
wordCount: (obj.word_count || obj.wordCount) as number | undefined,
|
||||
wasTruncated: (obj.was_truncated || obj.wasTruncated) as boolean | undefined,
|
||||
error: obj.error as string | undefined,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Format word count for display
|
||||
*/
|
||||
function formatWordCount(count: number): string {
|
||||
if (count >= 1000) {
|
||||
return `${(count / 1000).toFixed(1)}k words`;
|
||||
}
|
||||
return `${count} words`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Article card component for displaying scraped webpage content
|
||||
*/
|
||||
export function Article({
|
||||
id,
|
||||
title,
|
||||
description,
|
||||
content,
|
||||
href,
|
||||
domain,
|
||||
author,
|
||||
date,
|
||||
wordCount,
|
||||
wasTruncated,
|
||||
maxWidth = "100%",
|
||||
error,
|
||||
className,
|
||||
responseActions,
|
||||
onResponseAction,
|
||||
}: ArticleProps) {
|
||||
const handleCardClick = useCallback(() => {
|
||||
if (href) {
|
||||
window.open(href, "_blank", "noopener,noreferrer");
|
||||
}
|
||||
}, [href]);
|
||||
|
||||
// Error state
|
||||
if (error) {
|
||||
return (
|
||||
<Card
|
||||
id={id}
|
||||
className={cn(
|
||||
"overflow-hidden border-destructive/20 bg-destructive/5",
|
||||
className
|
||||
)}
|
||||
style={{ maxWidth }}
|
||||
>
|
||||
<CardContent className="p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="flex size-10 shrink-0 items-center justify-center rounded-lg bg-destructive/10">
|
||||
<AlertCircleIcon className="size-5 text-destructive" />
|
||||
</div>
|
||||
<div className="flex-1 min-w-0">
|
||||
<p className="font-medium text-destructive text-sm">
|
||||
Failed to scrape webpage
|
||||
</p>
|
||||
{href && (
|
||||
<p className="text-muted-foreground text-xs mt-0.5 truncate">
|
||||
{href}
|
||||
</p>
|
||||
)}
|
||||
<p className="text-muted-foreground text-xs mt-1">{error}</p>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<TooltipProvider>
|
||||
<Card
|
||||
id={id}
|
||||
className={cn(
|
||||
"group relative overflow-hidden transition-all duration-200",
|
||||
"hover:shadow-lg hover:border-primary/20",
|
||||
href && "cursor-pointer",
|
||||
className
|
||||
)}
|
||||
style={{ maxWidth }}
|
||||
onClick={href ? handleCardClick : undefined}
|
||||
role={href ? "link" : undefined}
|
||||
tabIndex={href ? 0 : undefined}
|
||||
onKeyDown={(e) => {
|
||||
if (href && (e.key === "Enter" || e.key === " ")) {
|
||||
e.preventDefault();
|
||||
handleCardClick();
|
||||
}
|
||||
}}
|
||||
>
|
||||
{/* Header */}
|
||||
<CardContent className="p-4">
|
||||
<div className="flex items-start gap-3">
|
||||
{/* Icon */}
|
||||
<div className="flex size-10 shrink-0 items-center justify-center rounded-lg bg-primary/10">
|
||||
<BookOpenIcon className="size-5 text-primary" />
|
||||
</div>
|
||||
|
||||
{/* Content */}
|
||||
<div className="flex-1 min-w-0">
|
||||
{/* Title */}
|
||||
<h3 className="font-semibold text-sm line-clamp-2 group-hover:text-primary transition-colors">
|
||||
{title}
|
||||
</h3>
|
||||
|
||||
{/* Description */}
|
||||
{description && (
|
||||
<p className="text-muted-foreground text-xs mt-1 line-clamp-2">
|
||||
{description}
|
||||
</p>
|
||||
)}
|
||||
|
||||
{/* Metadata row */}
|
||||
<div className="flex flex-wrap items-center gap-x-3 gap-y-1 mt-2 text-xs text-muted-foreground">
|
||||
{domain && (
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<span className="flex items-center gap-1">
|
||||
<ExternalLinkIcon className="size-3" />
|
||||
<span className="truncate max-w-[120px]">{domain}</span>
|
||||
</span>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<p>Source: {domain}</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
)}
|
||||
|
||||
{author && (
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<span className="flex items-center gap-1">
|
||||
<UserIcon className="size-3" />
|
||||
<span className="truncate max-w-[100px]">{author}</span>
|
||||
</span>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<p>Author: {author}</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
)}
|
||||
|
||||
{date && (
|
||||
<span className="flex items-center gap-1">
|
||||
<CalendarIcon className="size-3" />
|
||||
<span>{date}</span>
|
||||
</span>
|
||||
)}
|
||||
|
||||
{wordCount && (
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<span className="flex items-center gap-1">
|
||||
<FileTextIcon className="size-3" />
|
||||
<span>{formatWordCount(wordCount)}</span>
|
||||
{wasTruncated && (
|
||||
<span className="text-warning">(truncated)</span>
|
||||
)}
|
||||
</span>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<p>
|
||||
{wasTruncated
|
||||
? "Content was truncated due to length"
|
||||
: "Full article content available"}
|
||||
</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* External link indicator */}
|
||||
{href && (
|
||||
<div className="flex-shrink-0 opacity-0 group-hover:opacity-100 transition-opacity">
|
||||
<ExternalLinkIcon className="size-4 text-muted-foreground" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Response actions */}
|
||||
{responseActions && responseActions.length > 0 && (
|
||||
<div className="flex gap-2 mt-3 pt-3 border-t">
|
||||
{responseActions.map((action) => (
|
||||
<button
|
||||
key={action.id}
|
||||
type="button"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onResponseAction?.(action.id);
|
||||
}}
|
||||
className={cn(
|
||||
"px-3 py-1.5 text-xs font-medium rounded-md transition-colors",
|
||||
action.variant === "outline"
|
||||
? "border border-input bg-background hover:bg-accent hover:text-accent-foreground"
|
||||
: "bg-primary text-primary-foreground hover:bg-primary/90"
|
||||
)}
|
||||
>
|
||||
{action.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</TooltipProvider>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loading state for article component
|
||||
*/
|
||||
export function ArticleLoading({
|
||||
title = "Loading article...",
|
||||
}: { title?: string }) {
|
||||
return (
|
||||
<Card className="overflow-hidden animate-pulse">
|
||||
<CardContent className="p-4">
|
||||
<div className="flex items-start gap-3">
|
||||
<div className="size-10 rounded-lg bg-muted" />
|
||||
<div className="flex-1 space-y-2">
|
||||
<div className="h-4 bg-muted rounded w-3/4" />
|
||||
<div className="h-3 bg-muted rounded w-full" />
|
||||
<div className="h-3 bg-muted rounded w-1/2" />
|
||||
</div>
|
||||
</div>
|
||||
<p className="text-xs text-muted-foreground mt-3">{title}</p>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Skeleton for article component
|
||||
*/
|
||||
export function ArticleSkeleton() {
|
||||
return (
|
||||
<Card className="overflow-hidden">
|
||||
<CardContent className="p-4">
|
||||
<div className="flex items-start gap-3 animate-pulse">
|
||||
<div className="size-10 rounded-lg bg-muted" />
|
||||
<div className="flex-1 space-y-2">
|
||||
<div className="h-4 bg-muted rounded w-3/4" />
|
||||
<div className="h-3 bg-muted rounded w-full" />
|
||||
<div className="h-3 bg-muted rounded w-2/3" />
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Error boundary props
|
||||
*/
|
||||
interface ErrorBoundaryProps {
|
||||
children: ReactNode;
|
||||
fallback?: ReactNode;
|
||||
}
|
||||
|
||||
interface ErrorBoundaryState {
|
||||
hasError: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Error boundary for article component
|
||||
*/
|
||||
export class ArticleErrorBoundary extends Component<
|
||||
ErrorBoundaryProps,
|
||||
ErrorBoundaryState
|
||||
> {
|
||||
constructor(props: ErrorBoundaryProps) {
|
||||
super(props);
|
||||
this.state = { hasError: false };
|
||||
}
|
||||
|
||||
static getDerivedStateFromError(): ErrorBoundaryState {
|
||||
return { hasError: true };
|
||||
}
|
||||
|
||||
render() {
|
||||
if (this.state.hasError) {
|
||||
return (
|
||||
this.props.fallback || (
|
||||
<Card className="overflow-hidden border-destructive/20 bg-destructive/5">
|
||||
<CardContent className="p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<AlertCircleIcon className="size-5 text-destructive" />
|
||||
<p className="text-sm text-destructive">
|
||||
Failed to render article
|
||||
</p>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
return this.props.children;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -46,3 +46,17 @@ export {
|
|||
type DisplayImageArgs,
|
||||
type DisplayImageResult,
|
||||
} from "./display-image";
|
||||
export {
|
||||
Article,
|
||||
ArticleErrorBoundary,
|
||||
ArticleLoading,
|
||||
ArticleSkeleton,
|
||||
parseSerializableArticle,
|
||||
type ArticleProps,
|
||||
type SerializableArticle,
|
||||
} from "./article";
|
||||
export {
|
||||
ScrapeWebpageToolUI,
|
||||
type ScrapeWebpageArgs,
|
||||
type ScrapeWebpageResult,
|
||||
} from "./scrape-webpage";
|
||||
|
|
|
|||
163
surfsense_web/components/tool-ui/scrape-webpage.tsx
Normal file
163
surfsense_web/components/tool-ui/scrape-webpage.tsx
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
"use client";
|
||||
|
||||
import { makeAssistantToolUI } from "@assistant-ui/react";
|
||||
import { AlertCircleIcon, FileTextIcon } from "lucide-react";
|
||||
import {
|
||||
Article,
|
||||
ArticleErrorBoundary,
|
||||
ArticleLoading,
|
||||
parseSerializableArticle,
|
||||
} from "@/components/tool-ui/article";
|
||||
|
||||
/**
|
||||
* Type definitions for the scrape_webpage tool
|
||||
*/
|
||||
interface ScrapeWebpageArgs {
|
||||
url: string;
|
||||
max_length?: number;
|
||||
}
|
||||
|
||||
interface ScrapeWebpageResult {
|
||||
id: string;
|
||||
assetId: string;
|
||||
kind: "article";
|
||||
href: string;
|
||||
title: string;
|
||||
description?: string;
|
||||
content?: string;
|
||||
domain?: string;
|
||||
author?: string;
|
||||
date?: string;
|
||||
word_count?: number;
|
||||
was_truncated?: boolean;
|
||||
crawler_type?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Error state component shown when webpage scraping fails
|
||||
*/
|
||||
function ScrapeErrorState({ url, error }: { url: string; error: string }) {
|
||||
return (
|
||||
<div className="my-4 overflow-hidden rounded-xl border border-destructive/20 bg-destructive/5 p-4 max-w-md">
|
||||
<div className="flex items-center gap-4">
|
||||
<div className="flex size-12 shrink-0 items-center justify-center rounded-lg bg-destructive/10">
|
||||
<AlertCircleIcon className="size-6 text-destructive" />
|
||||
</div>
|
||||
<div className="flex-1 min-w-0">
|
||||
<p className="font-medium text-destructive text-sm">Failed to scrape webpage</p>
|
||||
<p className="text-muted-foreground text-xs mt-0.5 truncate">{url}</p>
|
||||
<p className="text-muted-foreground text-xs mt-1">{error}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancelled state component
|
||||
*/
|
||||
function ScrapeCancelledState({ url }: { url: string }) {
|
||||
return (
|
||||
<div className="my-4 rounded-xl border border-muted p-4 text-muted-foreground max-w-md">
|
||||
<p className="flex items-center gap-2">
|
||||
<FileTextIcon className="size-4" />
|
||||
<span className="line-through truncate">Scraping: {url}</span>
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parsed Article component with error handling
|
||||
*/
|
||||
function ParsedArticle({ result }: { result: unknown }) {
|
||||
const article = parseSerializableArticle(result);
|
||||
|
||||
return (
|
||||
<Article
|
||||
{...article}
|
||||
maxWidth="480px"
|
||||
responseActions={[
|
||||
{ id: "open", label: "Open Source", variant: "default" },
|
||||
]}
|
||||
onResponseAction={(id) => {
|
||||
if (id === "open" && article.href) {
|
||||
window.open(article.href, "_blank", "noopener,noreferrer");
|
||||
}
|
||||
}}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrape Webpage Tool UI Component
|
||||
*
|
||||
* This component is registered with assistant-ui to render an article card
|
||||
* when the scrape_webpage tool is called by the agent.
|
||||
*
|
||||
* It displays scraped webpage content including:
|
||||
* - Title and description
|
||||
* - Author and date (if available)
|
||||
* - Word count
|
||||
* - Link to original source
|
||||
*/
|
||||
export const ScrapeWebpageToolUI = makeAssistantToolUI<
|
||||
ScrapeWebpageArgs,
|
||||
ScrapeWebpageResult
|
||||
>({
|
||||
toolName: "scrape_webpage",
|
||||
render: function ScrapeWebpageUI({ args, result, status }) {
|
||||
const url = args.url || "Unknown URL";
|
||||
|
||||
// Loading state - tool is still running
|
||||
if (status.type === "running" || status.type === "requires-action") {
|
||||
return (
|
||||
<div className="my-4">
|
||||
<ArticleLoading title={`Scraping ${url}...`} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Incomplete/cancelled state
|
||||
if (status.type === "incomplete") {
|
||||
if (status.reason === "cancelled") {
|
||||
return <ScrapeCancelledState url={url} />;
|
||||
}
|
||||
if (status.reason === "error") {
|
||||
return (
|
||||
<ScrapeErrorState
|
||||
url={url}
|
||||
error={typeof status.error === "string" ? status.error : "An error occurred"}
|
||||
/>
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// No result yet
|
||||
if (!result) {
|
||||
return (
|
||||
<div className="my-4">
|
||||
<ArticleLoading title={`Extracting content from ${url}...`} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Error result from the tool
|
||||
if (result.error) {
|
||||
return <ScrapeErrorState url={url} error={result.error} />;
|
||||
}
|
||||
|
||||
// Success - render the article card
|
||||
return (
|
||||
<div className="my-4">
|
||||
<ArticleErrorBoundary>
|
||||
<ParsedArticle result={result} />
|
||||
</ArticleErrorBoundary>
|
||||
</div>
|
||||
);
|
||||
},
|
||||
});
|
||||
|
||||
export type { ScrapeWebpageArgs, ScrapeWebpageResult };
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue