From 2f622891aedaa47fe67386d2d4b53fe72cc49bae Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Mon, 22 Dec 2025 18:38:08 +0530 Subject: [PATCH 01/13] feat: implement dynamic greeting and thinking steps display in Thread component --- .../components/assistant-ui/thread.tsx | 188 ++++++++++++------ 1 file changed, 128 insertions(+), 60 deletions(-) diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index a41d2e143..5b4e430d7 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -21,7 +21,9 @@ import { RefreshCwIcon, SquareIcon, } from "lucide-react"; +import Image from "next/image"; import type { FC } from "react"; +import { useAtomValue } from "jotai"; import { ComposerAddAttachment, ComposerAttachments, @@ -32,6 +34,7 @@ import { ToolFallback } from "@/components/assistant-ui/tool-fallback"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; import { Button } from "@/components/ui/button"; import { cn } from "@/lib/utils"; +import { currentUserAtom } from "@/atoms/user/user-query.atoms"; export const Thread: FC = () => { return ( @@ -59,7 +62,11 @@ export const Thread: FC = () => { - + !thread.isEmpty}> +
+ +
+
@@ -80,62 +87,109 @@ const ThreadScrollToBottom: FC = () => { ); }; -const ThreadWelcome: FC = () => { - return ( -
-
-
-

- Hello there! -

-

- How can I help you today? -

-
-
- -
- ); +const getTimeBasedGreeting = (userEmail?: string): string => { + const hour = new Date().getHours(); + + // Extract first name from email if available + const firstName = userEmail + ? userEmail.split("@")[0].split(".")[0].charAt(0).toUpperCase() + + userEmail.split("@")[0].split(".")[0].slice(1) + : null; + + // Array of greeting variations for each time period + const morningGreetings = [ + "Good morning", + "Rise and shine", + "Morning", + "Hey there", + "Welcome back", + ]; + + const afternoonGreetings = [ + "Good afternoon", + "Afternoon", + "Hey there", + "Welcome back", + "Hope you're having a great day", + ]; + + const eveningGreetings = [ + "Good evening", + "Evening", + "Hey there", + "Welcome back", + "Hope you had a great day", + ]; + + const nightGreetings = [ + "Late night", + "Still up", + "Hey there", + "Welcome back", + "Burning the midnight oil", + ]; + + // Select a random greeting based on time + let greeting: string; + if (hour < 12) { + greeting = morningGreetings[Math.floor(Math.random() * morningGreetings.length)]; + } else if (hour < 17) { + greeting = afternoonGreetings[Math.floor(Math.random() * afternoonGreetings.length)]; + } else if (hour < 21) { + greeting = eveningGreetings[Math.floor(Math.random() * eveningGreetings.length)]; + } else { + greeting = nightGreetings[Math.floor(Math.random() * nightGreetings.length)]; + } + + // Add personalization with first name if available + if (firstName) { + return `${greeting}, ${firstName}!`; + } + + return `${greeting}!`; }; -const SUGGESTIONS = [ - { - title: "What's the weather", - label: "in San Francisco?", - prompt: "What's the weather in San Francisco?", - }, - { - title: "Explain React hooks", - label: "like useState and useEffect", - prompt: "Explain React hooks like useState and useEffect", - }, -] as const; - -const ThreadSuggestions: FC = () => { +const ThreadWelcome: FC = () => { + const { data: user } = useAtomValue(currentUserAtom); + return ( -
- {SUGGESTIONS.map((suggestion, index) => ( -
- - - -
- ))} +
+ {/* Greeting positioned near the composer */} +
+

+ {/** biome-ignore lint/a11y/noStaticElementInteractions: wrong lint error, this is a workaround to fix the lint error */} +
{ + const rect = e.currentTarget.getBoundingClientRect(); + const x = (e.clientX - rect.left - rect.width / 2) / 3; + const y = (e.clientY - rect.top - rect.height / 2) / 3; + e.currentTarget.style.setProperty("--mag-x", `${x}px`); + e.currentTarget.style.setProperty("--mag-y", `${y}px`); + }} + onMouseLeave={(e) => { + e.currentTarget.style.setProperty("--mag-x", "0px"); + e.currentTarget.style.setProperty("--mag-y", "0px"); + }} + > + SurfSense +
+ {getTimeBasedGreeting(user?.email)} +

+
+ {/* Composer centered in the middle of the screen */} +
+ +
); }; @@ -143,10 +197,10 @@ const ThreadSuggestions: FC = () => { const Composer: FC = () => { return ( - + { }) ); + // Check if composer text is empty + const isComposerEmpty = useAssistantState(({ composer }) => { + const text = composer.text?.trim() || ""; + return text.length === 0; + }); + + const isSendDisabled = hasProcessingAttachments || isComposerEmpty; + return (
@@ -183,19 +245,25 @@ const ComposerAction: FC = () => { )} !thread.isRunning}> - + From 8a99752f2ff6e94aa3e5c732227d0010589111fe Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Mon, 22 Dec 2025 22:54:22 +0530 Subject: [PATCH 02/13] feat: enhance chat functionality with chain-of-thought display and thinking steps management --- .../app/services/new_streaming_service.py | 29 +++ .../app/tasks/chat/stream_new_chat.py | 215 ++++++++++++++++++ .../new-chat/[[...chat_id]]/page.tsx | 83 ++++++- .../components/assistant-ui/thread.tsx | 190 +++++++++++++--- .../prompt-kit/chain-of-thought.tsx | 148 ++++++++++++ .../components/tool-ui/deepagent-thinking.tsx | 203 +++++++++++++++++ surfsense_web/components/tool-ui/index.ts | 7 + surfsense_web/components/ui/collapsible.tsx | 30 ++- 8 files changed, 857 insertions(+), 48 deletions(-) create mode 100644 surfsense_web/components/prompt-kit/chain-of-thought.tsx create mode 100644 surfsense_web/components/tool-ui/deepagent-thinking.tsx diff --git a/surfsense_backend/app/services/new_streaming_service.py b/surfsense_backend/app/services/new_streaming_service.py index f0f05cdb6..05dd2d4dd 100644 --- a/surfsense_backend/app/services/new_streaming_service.py +++ b/surfsense_backend/app/services/new_streaming_service.py @@ -450,6 +450,35 @@ class VercelStreamingService: """ return self.format_data("further-questions", {"questions": questions}) + def format_thinking_step( + self, + step_id: str, + title: str, + status: str = "in_progress", + items: list[str] | None = None, + ) -> str: + """ + Format a thinking step for chain-of-thought display (SurfSense specific). + + Args: + step_id: Unique identifier for the step + title: The step title (e.g., "Analyzing your request") + status: Step status - "pending", "in_progress", or "completed" + items: Optional list of sub-items/details for this step + + Returns: + str: SSE formatted thinking step data part + """ + return self.format_data( + "thinking-step", + { + "id": step_id, + "title": title, + "status": status, + "items": items or [], + }, + ) + # ========================================================================= # Error Part # ========================================================================= diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 9711445aa..cb40a82a8 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -154,6 +154,49 @@ async def stream_new_chat( # Reset text tracking for this stream accumulated_text = "" + # Track thinking steps for chain-of-thought display + thinking_step_counter = 0 + # Map run_id -> step_id for tool calls so we can update them on completion + tool_step_ids: dict[str, str] = {} + # Track the last active step so we can mark it complete at the end + last_active_step_id: str | None = None + last_active_step_title: str = "" + last_active_step_items: list[str] = [] + # Track which steps have been completed to avoid duplicate completions + completed_step_ids: set[str] = set() + # Track if we just finished a tool (text flows silently after tools) + just_finished_tool: bool = False + + def next_thinking_step_id() -> str: + nonlocal thinking_step_counter + thinking_step_counter += 1 + return f"thinking-{thinking_step_counter}" + + def complete_current_step() -> str | None: + """Complete the current active step and return the completion event, if any.""" + nonlocal last_active_step_id, last_active_step_title, last_active_step_items + if last_active_step_id and last_active_step_id not in completed_step_ids: + completed_step_ids.add(last_active_step_id) + return streaming_service.format_thinking_step( + step_id=last_active_step_id, + title=last_active_step_title, + status="completed", + items=last_active_step_items if last_active_step_items else None, + ) + return None + + # Initial thinking step - analyzing the request + analyze_step_id = next_thinking_step_id() + last_active_step_id = analyze_step_id + last_active_step_title = "Understanding your request" + last_active_step_items = [f"Processing: {user_query[:80]}{'...' if len(user_query) > 80 else ''}"] + yield streaming_service.format_thinking_step( + step_id=analyze_step_id, + title="Understanding your request", + status="in_progress", + items=last_active_step_items, + ) + # Stream the agent response with thread config for memory async for event in agent.astream_events( input_state, config=config, version="v2" @@ -168,6 +211,31 @@ async def stream_new_chat( if content and isinstance(content, str): # Start a new text block if needed if current_text_id is None: + # Complete any previous step + completion_event = complete_current_step() + if completion_event: + yield completion_event + + if just_finished_tool: + # We just finished a tool - don't create a step here, + # text will flow silently after tools. + # Clear the active step tracking. + last_active_step_id = None + last_active_step_title = "" + last_active_step_items = [] + just_finished_tool = False + else: + # Normal text generation (not after a tool) + gen_step_id = next_thinking_step_id() + last_active_step_id = gen_step_id + last_active_step_title = "Generating response" + last_active_step_items = [] + yield streaming_service.format_thinking_step( + step_id=gen_step_id, + title="Generating response", + status="in_progress", + ) + current_text_id = streaming_service.generate_text_id() yield streaming_service.format_text_start(current_text_id) @@ -188,6 +256,67 @@ async def stream_new_chat( yield streaming_service.format_text_end(current_text_id) current_text_id = None + # Complete any previous step EXCEPT "Synthesizing response" + # (we want to reuse the Synthesizing step after tools complete) + if last_active_step_title != "Synthesizing response": + completion_event = complete_current_step() + if completion_event: + yield completion_event + + # Reset the just_finished_tool flag since we're starting a new tool + just_finished_tool = False + + # Create thinking step for the tool call and store it for later update + tool_step_id = next_thinking_step_id() + tool_step_ids[run_id] = tool_step_id + last_active_step_id = tool_step_id + if tool_name == "search_knowledge_base": + query = ( + tool_input.get("query", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + last_active_step_title = "Searching knowledge base" + last_active_step_items = [f"Query: {query[:100]}{'...' if len(query) > 100 else ''}"] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Searching knowledge base", + status="in_progress", + items=last_active_step_items, + ) + elif tool_name == "generate_podcast": + podcast_title = ( + tool_input.get("podcast_title", "SurfSense Podcast") + if isinstance(tool_input, dict) + else "SurfSense Podcast" + ) + # Get content length for context + content_len = len( + tool_input.get("source_content", "") + if isinstance(tool_input, dict) + else "" + ) + last_active_step_title = "Generating podcast" + last_active_step_items = [ + f"Title: {podcast_title}", + f"Content: {content_len:,} characters", + "Preparing audio generation...", + ] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Generating podcast", + status="in_progress", + items=last_active_step_items, + ) + else: + last_active_step_title = f"Using {tool_name.replace('_', ' ')}" + last_active_step_items = [] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title=last_active_step_title, + status="in_progress", + ) + # Stream tool info tool_call_id = ( f"call_{run_id[:32]}" @@ -254,6 +383,87 @@ async def stream_new_chat( tool_call_id = f"call_{run_id[:32]}" if run_id else "call_unknown" + # Get the original tool step ID to update it (not create a new one) + original_step_id = tool_step_ids.get(run_id, f"thinking-unknown-{run_id[:8]}") + + # Mark the tool thinking step as completed using the SAME step ID + # Also add to completed set so we don't try to complete it again + completed_step_ids.add(original_step_id) + if tool_name == "search_knowledge_base": + # Get result count if available + result_info = "Search completed" + if isinstance(tool_output, dict): + result_len = tool_output.get("result_length", 0) + if result_len > 0: + result_info = f"Found relevant information ({result_len} chars)" + # Include original query in completed items + completed_items = [*last_active_step_items, result_info] + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Searching knowledge base", + status="completed", + items=completed_items, + ) + elif tool_name == "generate_podcast": + # Build detailed completion items based on podcast status + podcast_status = ( + tool_output.get("status", "unknown") + if isinstance(tool_output, dict) + else "unknown" + ) + podcast_title = ( + tool_output.get("title", "Podcast") + if isinstance(tool_output, dict) + else "Podcast" + ) + + if podcast_status == "processing": + completed_items = [ + f"Title: {podcast_title}", + "Audio generation started", + "Processing in background...", + ] + elif podcast_status == "already_generating": + completed_items = [ + f"Title: {podcast_title}", + "Podcast already in progress", + "Please wait for it to complete", + ] + elif podcast_status == "error": + error_msg = ( + tool_output.get("error", "Unknown error") + if isinstance(tool_output, dict) + else "Unknown error" + ) + completed_items = [ + f"Title: {podcast_title}", + f"Error: {error_msg[:50]}", + ] + else: + completed_items = last_active_step_items + + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Generating podcast", + status="completed", + items=completed_items, + ) + else: + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title=f"Using {tool_name.replace('_', ' ')}", + status="completed", + items=last_active_step_items, + ) + + # Mark that we just finished a tool - "Synthesizing response" will be created + # when text actually starts flowing (not immediately) + just_finished_tool = True + # Clear the active step since the tool is done + last_active_step_id = None + last_active_step_title = "" + last_active_step_items = [] + # Handle different tool outputs if tool_name == "generate_podcast": # Stream the full podcast result so frontend can render the audio player @@ -302,6 +512,11 @@ async def stream_new_chat( if current_text_id is not None: yield streaming_service.format_text_end(current_text_id) + # Mark the last active thinking step as completed using the same title + completion_event = complete_current_step() + if completion_event: + yield completion_event + # Finish the step and message yield streaming_service.format_finish_step() yield streaming_service.format_finish() diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 1426ecea1..5d0838816 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -11,6 +11,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { toast } from "sonner"; import { Thread } from "@/components/assistant-ui/thread"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; +import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; import { getBearerToken } from "@/lib/auth-utils"; import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter"; import { @@ -25,6 +26,23 @@ import { type MessageRecord, } from "@/lib/chat/thread-persistence"; +/** + * Extract thinking steps from message content + */ +function extractThinkingSteps(content: unknown): ThinkingStep[] { + if (!Array.isArray(content)) return []; + + const thinkingPart = content.find( + (part: unknown) => + typeof part === "object" && + part !== null && + "type" in part && + (part as { type: string }).type === "thinking-steps" + ) as { type: "thinking-steps"; steps: ThinkingStep[] } | undefined; + + return thinkingPart?.steps || []; +} + /** * Convert backend message to assistant-ui ThreadMessageLike format */ @@ -52,6 +70,16 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { */ const TOOLS_WITH_UI = new Set(["generate_podcast"]); +/** + * Type for thinking step data from the backend + */ +interface ThinkingStepData { + id: string; + title: string; + status: "pending" | "in_progress" | "completed"; + items: string[]; +} + export default function NewChatPage() { const params = useParams(); const router = useRouter(); @@ -59,6 +87,10 @@ export default function NewChatPage() { const [threadId, setThreadId] = useState(null); const [messages, setMessages] = useState([]); const [isRunning, setIsRunning] = useState(false); + // Store thinking steps per message ID + const [messageThinkingSteps, setMessageThinkingSteps] = useState< + Map + >(new Map()); const abortControllerRef = useRef(null); // Create the attachment adapter for file processing @@ -95,6 +127,20 @@ export default function NewChatPage() { if (response.messages && response.messages.length > 0) { const loadedMessages = response.messages.map(convertToThreadMessage); setMessages(loadedMessages); + + // Extract and restore thinking steps from persisted messages + const restoredThinkingSteps = new Map(); + for (const msg of response.messages) { + if (msg.role === "assistant") { + const steps = extractThinkingSteps(msg.content); + if (steps.length > 0) { + restoredThinkingSteps.set(`msg-${msg.id}`, steps); + } + } + } + if (restoredThinkingSteps.size > 0) { + setMessageThinkingSteps(restoredThinkingSteps); + } } } else { // Create new thread @@ -187,6 +233,7 @@ export default function NewChatPage() { // Prepare assistant message const assistantMsgId = `msg-assistant-${Date.now()}`; let accumulatedText = ""; + const currentThinkingSteps = new Map(); const toolCalls = new Map< string, { @@ -197,7 +244,7 @@ export default function NewChatPage() { } >(); - // Helper to build content + // Helper to build content (includes thinking steps for persistence) const buildContent = (): ThreadMessageLike["content"] => { const parts: Array< | { type: "text"; text: string } @@ -208,7 +255,20 @@ export default function NewChatPage() { args: Record; result?: unknown; } + | { + type: "thinking-steps"; + steps: ThinkingStepData[]; + } > = []; + + // Include thinking steps for persistence + if (currentThinkingSteps.size > 0) { + parts.push({ + type: "thinking-steps", + steps: Array.from(currentThinkingSteps.values()), + }); + } + if (accumulatedText) { parts.push({ type: "text", text: accumulatedText }); } @@ -367,6 +427,24 @@ export default function NewChatPage() { break; } + case "data-thinking-step": { + // Handle thinking step events for chain-of-thought display + const stepData = parsed.data as ThinkingStepData; + if (stepData?.id) { + currentThinkingSteps.set(stepData.id, stepData); + // Update message-specific thinking steps + setMessageThinkingSteps((prev) => { + const newMap = new Map(prev); + newMap.set( + assistantMsgId, + Array.from(currentThinkingSteps.values()) + ); + return newMap; + }); + } + break; + } + case "error": throw new Error(parsed.errorText || "Server error"); } @@ -415,6 +493,7 @@ export default function NewChatPage() { } finally { setIsRunning(false); abortControllerRef.current = null; + // Note: We no longer clear thinking steps - they persist with the message } }, [threadId, searchSpaceId, messages] @@ -483,7 +562,7 @@ export default function NewChatPage() {
- +
); diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 5b4e430d7..b33ad3c87 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -7,10 +7,13 @@ import { MessagePrimitive, ThreadPrimitive, useAssistantState, + useMessage, } from "@assistant-ui/react"; import { ArrowDownIcon, ArrowUpIcon, + Brain, + CheckCircle2, CheckIcon, ChevronLeftIcon, ChevronRightIcon, @@ -19,6 +22,8 @@ import { Loader2, PencilIcon, RefreshCwIcon, + Search, + Sparkles, SquareIcon, } from "lucide-react"; import Image from "next/image"; @@ -32,44 +37,134 @@ import { import { MarkdownText } from "@/components/assistant-ui/markdown-text"; import { ToolFallback } from "@/components/assistant-ui/tool-fallback"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; +import { + ChainOfThought, + ChainOfThoughtContent, + ChainOfThoughtItem, + ChainOfThoughtStep, + ChainOfThoughtTrigger, +} from "@/components/prompt-kit/chain-of-thought"; import { Button } from "@/components/ui/button"; import { cn } from "@/lib/utils"; import { currentUserAtom } from "@/atoms/user/user-query.atoms"; +import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; -export const Thread: FC = () => { +/** + * Props for the Thread component + */ +interface ThreadProps { + messageThinkingSteps?: Map; +} + +// Context to pass thinking steps to AssistantMessage +import { createContext, useContext } from "react"; + +const ThinkingStepsContext = createContext>(new Map()); + +/** + * Get icon based on step status and title + */ +function getStepIcon(status: "pending" | "in_progress" | "completed", title: string) { + const titleLower = title.toLowerCase(); + + if (status === "in_progress") { + return ; + } + + if (status === "completed") { + return ; + } + + if (titleLower.includes("search") || titleLower.includes("knowledge")) { + return ; + } + + if (titleLower.includes("analy") || titleLower.includes("understand")) { + return ; + } + + return ; +} + +/** + * Chain of thought display component + */ +const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[] }> = ({ steps }) => { + if (steps.length === 0) return null; + return ( - - + + {steps.map((step) => { + const icon = getStepIcon(step.status, step.title); + return ( + + + {step.title} + + {step.items && step.items.length > 0 && ( + + {step.items.map((item, index) => ( + + {item} + + ))} + + )} + + ); + })} + +
+ ); +}; + +export const Thread: FC = ({ messageThinkingSteps = new Map() }) => { + return ( + + - thread.isEmpty}> - - - - - - - - !thread.isEmpty}> -
- -
+ + thread.isEmpty}> + -
- -
+ + + + + + !thread.isEmpty}> +
+ +
+
+
+ + +
); }; @@ -197,7 +292,7 @@ const ThreadWelcome: FC = () => { const Composer: FC = () => { return ( - + { ); }; -const AssistantMessage: FC = () => { +const AssistantMessageInner: FC = () => { + const thinkingStepsMap = useContext(ThinkingStepsContext); + + // Get the current message ID to look up thinking steps + const messageId = useMessage((m) => m.id); + const thinkingSteps = thinkingStepsMap.get(messageId) || []; + return ( - + <> + {/* Show thinking steps BEFORE the text response */} + {thinkingSteps.length > 0 && ( +
+ +
+ )} +
{
+ + ); +}; + +const AssistantMessage: FC = () => { + return ( + + ); }; diff --git a/surfsense_web/components/prompt-kit/chain-of-thought.tsx b/surfsense_web/components/prompt-kit/chain-of-thought.tsx new file mode 100644 index 000000000..118ffeff7 --- /dev/null +++ b/surfsense_web/components/prompt-kit/chain-of-thought.tsx @@ -0,0 +1,148 @@ +"use client" + +import { + Collapsible, + CollapsibleContent, + CollapsibleTrigger, +} from "@/components/ui/collapsible" +import { cn } from "@/lib/utils" +import { Brain, ChevronDown, Circle, Loader2, Search, Sparkles, Lightbulb, CheckCircle2 } from "lucide-react" +import React from "react" + +export type ChainOfThoughtItemProps = React.ComponentProps<"div"> + +export const ChainOfThoughtItem = ({ + children, + className, + ...props +}: ChainOfThoughtItemProps) => ( +
+ {children} +
+) + +export type ChainOfThoughtTriggerProps = React.ComponentProps< + typeof CollapsibleTrigger +> & { + leftIcon?: React.ReactNode + swapIconOnHover?: boolean +} + +export const ChainOfThoughtTrigger = ({ + children, + className, + leftIcon, + swapIconOnHover = true, + ...props +}: ChainOfThoughtTriggerProps) => ( + +
+ {leftIcon ? ( + + + {leftIcon} + + {swapIconOnHover && ( + + )} + + ) : ( + + + + )} + {children} +
+ {!leftIcon && ( + + )} +
+) + +export type ChainOfThoughtContentProps = React.ComponentProps< + typeof CollapsibleContent +> + +export const ChainOfThoughtContent = ({ + children, + className, + ...props +}: ChainOfThoughtContentProps) => { + return ( + +
+
+
+
{children}
+
+ + ) +} + +export type ChainOfThoughtProps = { + children: React.ReactNode + className?: string +} + +export function ChainOfThought({ children, className }: ChainOfThoughtProps) { + const childrenArray = React.Children.toArray(children) + + return ( +
+ {childrenArray.map((child, index) => ( + + {React.isValidElement(child) && + React.cloneElement( + child as React.ReactElement, + { + isLast: index === childrenArray.length - 1, + } + )} + + ))} +
+ ) +} + +export type ChainOfThoughtStepProps = { + children: React.ReactNode + className?: string + isLast?: boolean +} + +export const ChainOfThoughtStep = ({ + children, + className, + isLast = false, + ...props +}: ChainOfThoughtStepProps & React.ComponentProps) => { + return ( + + {children} +
+
+
+ + ) +} diff --git a/surfsense_web/components/tool-ui/deepagent-thinking.tsx b/surfsense_web/components/tool-ui/deepagent-thinking.tsx new file mode 100644 index 000000000..f0c03e570 --- /dev/null +++ b/surfsense_web/components/tool-ui/deepagent-thinking.tsx @@ -0,0 +1,203 @@ +"use client"; + +import { makeAssistantToolUI } from "@assistant-ui/react"; +import { Brain, CheckCircle2, Loader2, Search, Sparkles } from "lucide-react"; +import { useMemo } from "react"; +import { + ChainOfThought, + ChainOfThoughtContent, + ChainOfThoughtItem, + ChainOfThoughtStep, + ChainOfThoughtTrigger, +} from "@/components/prompt-kit/chain-of-thought"; +import { cn } from "@/lib/utils"; + +/** + * Types for the deepagent thinking/reasoning tool + */ +interface ThinkingStep { + id: string; + title: string; + items: string[]; + status: "pending" | "in_progress" | "completed"; +} + +interface DeepAgentThinkingArgs { + query?: string; + context?: string; +} + +interface DeepAgentThinkingResult { + steps?: ThinkingStep[]; + status?: "thinking" | "searching" | "synthesizing" | "completed"; + summary?: string; +} + +/** + * Get icon based on step status and type + */ +function getStepIcon(status: "pending" | "in_progress" | "completed", title: string) { + // Check for specific step types based on title keywords + const titleLower = title.toLowerCase(); + + if (status === "in_progress") { + return ; + } + + if (status === "completed") { + return ; + } + + // Default icons based on step type + if (titleLower.includes("search") || titleLower.includes("knowledge")) { + return ; + } + + if (titleLower.includes("analy") || titleLower.includes("understand")) { + return ; + } + + return ; +} + +/** + * Component to display a single thinking step + */ +function ThinkingStepDisplay({ step }: { step: ThinkingStep }) { + const icon = useMemo(() => getStepIcon(step.status, step.title), [step.status, step.title]); + + return ( + + + {step.title} + + + {step.items.map((item, index) => ( + + {item} + + ))} + + + ); +} + +/** + * Loading state with animated thinking indicator + */ +function ThinkingLoadingState({ status }: { status?: string }) { + const statusText = useMemo(() => { + switch (status) { + case "searching": + return "Searching knowledge base..."; + case "synthesizing": + return "Synthesizing response..."; + case "thinking": + default: + return "Thinking..."; + } + }, [status]); + + return ( +
+
+ + + + + +
+ {statusText} +
+ ); +} + +/** + * DeepAgent Thinking Tool UI Component + * + * This component displays the agent's chain-of-thought reasoning + * when the deepagent is processing a query. It shows thinking steps + * in a collapsible, hierarchical format. + */ +export const DeepAgentThinkingToolUI = makeAssistantToolUI< + DeepAgentThinkingArgs, + DeepAgentThinkingResult +>({ + toolName: "deepagent_thinking", + render: function DeepAgentThinkingUI({ args, result, status }) { + // Loading state - tool is still running + if (status.type === "running" || status.type === "requires-action") { + return ; + } + + // Incomplete/cancelled state + if (status.type === "incomplete") { + if (status.reason === "cancelled") { + return null; // Don't show anything if cancelled + } + if (status.reason === "error") { + return null; // Don't show error for thinking - it's not critical + } + } + + // No result or no steps - don't render anything + if (!result?.steps || result.steps.length === 0) { + return null; + } + + // Render the chain of thought + return ( +
+ + {result.steps.map((step) => ( + + ))} + +
+ ); + }, +}); + +/** + * Inline Thinking Display Component + * + * A simpler version that can be used inline with the message content + * for displaying reasoning without the full tool UI infrastructure. + */ +export function InlineThinkingDisplay({ + steps, + isStreaming = false, + className, +}: { + steps: ThinkingStep[]; + isStreaming?: boolean; + className?: string; +}) { + if (steps.length === 0 && !isStreaming) { + return null; + } + + return ( +
+ {isStreaming && steps.length === 0 ? ( + + ) : ( + + {steps.map((step) => ( + + ))} + + )} +
+ ); +} + +export type { ThinkingStep, DeepAgentThinkingArgs, DeepAgentThinkingResult }; + diff --git a/surfsense_web/components/tool-ui/index.ts b/surfsense_web/components/tool-ui/index.ts index 6125f625f..3cddfba21 100644 --- a/surfsense_web/components/tool-ui/index.ts +++ b/surfsense_web/components/tool-ui/index.ts @@ -8,3 +8,10 @@ export { Audio } from "./audio"; export { GeneratePodcastToolUI } from "./generate-podcast"; +export { + DeepAgentThinkingToolUI, + InlineThinkingDisplay, + type ThinkingStep, + type DeepAgentThinkingArgs, + type DeepAgentThinkingResult, +} from "./deepagent-thinking"; diff --git a/surfsense_web/components/ui/collapsible.tsx b/surfsense_web/components/ui/collapsible.tsx index 442972d7c..ae9fad04a 100644 --- a/surfsense_web/components/ui/collapsible.tsx +++ b/surfsense_web/components/ui/collapsible.tsx @@ -1,21 +1,33 @@ -"use client"; +"use client" -import * as CollapsiblePrimitive from "@radix-ui/react-collapsible"; +import * as CollapsiblePrimitive from "@radix-ui/react-collapsible" -function Collapsible({ ...props }: React.ComponentProps) { - return ; +function Collapsible({ + ...props +}: React.ComponentProps) { + return } function CollapsibleTrigger({ - ...props + ...props }: React.ComponentProps) { - return ; + return ( + + ) } function CollapsibleContent({ - ...props + ...props }: React.ComponentProps) { - return ; + return ( + + ) } -export { Collapsible, CollapsibleTrigger, CollapsibleContent }; +export { Collapsible, CollapsibleTrigger, CollapsibleContent } From d75ddf60876c0b3fe25df8fc1655bc56bc0148c4 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Mon, 22 Dec 2025 23:02:57 +0530 Subject: [PATCH 03/13] refactor: separate content building for UI and persistence, filtering out thinking steps for improved clarity --- .../new-chat/[[...chat_id]]/page.tsx | 60 ++++++++++++++----- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 5d0838816..51a2f9875 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -45,6 +45,7 @@ function extractThinkingSteps(content: unknown): ThinkingStep[] { /** * Convert backend message to assistant-ui ThreadMessageLike format + * Filters out 'thinking-steps' part as it's handled separately */ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { let content: ThreadMessageLike["content"]; @@ -52,7 +53,17 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { if (typeof msg.content === "string") { content = [{ type: "text", text: msg.content }]; } else if (Array.isArray(msg.content)) { - content = msg.content as ThreadMessageLike["content"]; + // Filter out thinking-steps part - it's handled separately via messageThinkingSteps + const filteredContent = msg.content.filter( + (part: unknown) => + !(typeof part === "object" && + part !== null && + "type" in part && + (part as { type: string }).type === "thinking-steps") + ); + content = filteredContent.length > 0 + ? (filteredContent as ThreadMessageLike["content"]) + : [{ type: "text", text: "" }]; } else { content = [{ type: "text", text: String(msg.content) }]; } @@ -244,8 +255,8 @@ export default function NewChatPage() { } >(); - // Helper to build content (includes thinking steps for persistence) - const buildContent = (): ThreadMessageLike["content"] => { + // Helper to build content for UI (without thinking-steps) + const buildContentForUI = (): ThreadMessageLike["content"] => { const parts: Array< | { type: "text"; text: string } | { @@ -255,12 +266,31 @@ export default function NewChatPage() { args: Record; result?: unknown; } - | { - type: "thinking-steps"; - steps: ThinkingStepData[]; - } > = []; + if (accumulatedText) { + parts.push({ type: "text", text: accumulatedText }); + } + for (const toolCall of toolCalls.values()) { + if (TOOLS_WITH_UI.has(toolCall.toolName)) { + parts.push({ + type: "tool-call", + toolCallId: toolCall.toolCallId, + toolName: toolCall.toolName, + args: toolCall.args, + result: toolCall.result, + }); + } + } + return parts.length > 0 + ? (parts as ThreadMessageLike["content"]) + : [{ type: "text", text: "" }]; + }; + + // Helper to build content for persistence (includes thinking-steps) + const buildContentForPersistence = (): unknown[] => { + const parts: unknown[] = []; + // Include thinking steps for persistence if (currentThinkingSteps.size > 0) { parts.push({ @@ -283,9 +313,7 @@ export default function NewChatPage() { }); } } - return parts.length > 0 - ? (parts as ThreadMessageLike["content"]) - : [{ type: "text", text: "" }]; + return parts.length > 0 ? parts : [{ type: "text", text: "" }]; }; // Add placeholder assistant message @@ -372,7 +400,7 @@ export default function NewChatPage() { accumulatedText += parsed.delta; setMessages((prev) => prev.map((m) => - m.id === assistantMsgId ? { ...m, content: buildContent() } : m + m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m ) ); break; @@ -385,7 +413,7 @@ export default function NewChatPage() { }); setMessages((prev) => prev.map((m) => - m.id === assistantMsgId ? { ...m, content: buildContent() } : m + m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m ) ); break; @@ -401,7 +429,7 @@ export default function NewChatPage() { }); setMessages((prev) => prev.map((m) => - m.id === assistantMsgId ? { ...m, content: buildContent() } : m + m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m ) ); break; @@ -421,7 +449,7 @@ export default function NewChatPage() { } setMessages((prev) => prev.map((m) => - m.id === assistantMsgId ? { ...m, content: buildContent() } : m + m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m ) ); break; @@ -459,8 +487,8 @@ export default function NewChatPage() { reader.releaseLock(); } - // Persist assistant message - const finalContent = buildContent(); + // Persist assistant message (with thinking steps for restoration on refresh) + const finalContent = buildContentForPersistence(); if (accumulatedText || toolCalls.size > 0) { appendMessage(threadId, { role: "assistant", From a3b22e61e5fc754ee2a65c40f60a7b12e8361805 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Mon, 22 Dec 2025 23:57:16 +0530 Subject: [PATCH 04/13] refactor: update connector navigation to use router.back() for improved user experience --- .../add/airtable-connector/page.tsx | 2 +- .../connectors/add/baidu-search-api/page.tsx | 2 +- .../add/bookstack-connector/page.tsx | 2 +- .../connectors/add/clickup-connector/page.tsx | 2 +- .../add/confluence-connector/page.tsx | 2 +- .../connectors/add/discord-connector/page.tsx | 2 +- .../add/elasticsearch-connector/page.tsx | 2 +- .../connectors/add/github-connector/page.tsx | 2 +- .../add/google-calendar-connector/page.tsx | 2 +- .../add/google-gmail-connector/page.tsx | 2 +- .../connectors/add/jira-connector/page.tsx | 2 +- .../connectors/add/linear-connector/page.tsx | 2 +- .../connectors/add/linkup-api/page.tsx | 2 +- .../connectors/add/luma-connector/page.tsx | 2 +- .../connectors/add/notion-connector/page.tsx | 2 +- .../connectors/add/searxng/page.tsx | 2 +- .../connectors/add/slack-connector/page.tsx | 2 +- .../connectors/add/tavily-api/page.tsx | 2 +- .../add/webcrawler-connector/page.tsx | 2 +- .../components/assistant-ui/thread.tsx | 223 ++++++++++++++++-- 20 files changed, 228 insertions(+), 33 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx index cc4330203..7ebbeec3f 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx @@ -123,7 +123,7 @@ export default function AirtableConnectorPage() { diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx index 3e9f4898e..e1a84a8ab 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx @@ -126,7 +126,7 @@ export default function BaiduSearchApiPage() { diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx index 8659d937c..41b9e42f8 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx @@ -133,7 +133,7 @@ export default function GoogleGmailConnectorPage() { diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/jira-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/jira-connector/page.tsx index 6f4e31114..132c58521 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/jira-connector/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/jira-connector/page.tsx @@ -125,7 +125,7 @@ export default function JiraConnectorPage() { diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/notion-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/notion-connector/page.tsx index 310c31811..5cfd16a5f 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/notion-connector/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/notion-connector/page.tsx @@ -105,7 +105,7 @@ export default function NotionConnectorPage() { diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index b33ad3c87..1296ab28f 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -21,14 +21,22 @@ import { DownloadIcon, Loader2, PencilIcon, + Plug2, + Plus, RefreshCwIcon, Search, Sparkles, SquareIcon, } from "lucide-react"; import Image from "next/image"; -import type { FC } from "react"; +import Link from "next/link"; +import { type FC, useState, useRef, useCallback } from "react"; import { useAtomValue } from "jotai"; +import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; +import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; +import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { connectorCategories } from "@/components/sources/connector-data"; import { ComposerAddAttachment, ComposerAttachments, @@ -197,14 +205,12 @@ const getTimeBasedGreeting = (userEmail?: string): string => { "Rise and shine", "Morning", "Hey there", - "Welcome back", ]; const afternoonGreetings = [ "Good afternoon", "Afternoon", "Hey there", - "Welcome back", "Hope you're having a great day", ]; @@ -212,27 +218,36 @@ const getTimeBasedGreeting = (userEmail?: string): string => { "Good evening", "Evening", "Hey there", - "Welcome back", "Hope you had a great day", ]; const nightGreetings = [ - "Late night", - "Still up", + "Good night", + "Evening", "Hey there", - "Welcome back", + "Winding down", + ]; + + const lateNightGreetings = [ "Burning the midnight oil", + "Still up", + "Night owl mode", + "The night is young", ]; // Select a random greeting based on time let greeting: string; - if (hour < 12) { + if (hour < 5) { + // Late night: midnight to 5 AM + greeting = lateNightGreetings[Math.floor(Math.random() * lateNightGreetings.length)]; + } else if (hour < 12) { greeting = morningGreetings[Math.floor(Math.random() * morningGreetings.length)]; - } else if (hour < 17) { + } else if (hour < 18) { greeting = afternoonGreetings[Math.floor(Math.random() * afternoonGreetings.length)]; - } else if (hour < 21) { + } else if (hour < 22) { greeting = eveningGreetings[Math.floor(Math.random() * eveningGreetings.length)]; } else { + // Night: 10 PM to midnight greeting = nightGreetings[Math.floor(Math.random() * nightGreetings.length)]; } @@ -251,7 +266,7 @@ const ThreadWelcome: FC = () => {
{/* Greeting positioned near the composer */}
-

+

{/** biome-ignore lint/a11y/noStaticElementInteractions: wrong lint error, this is a workaround to fix the lint error */}
{ SurfSense { ); }; +const ConnectorIndicator: FC = () => { + const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom); + const { connectors, isLoading } = useSearchSourceConnectors(false, searchSpaceId ? Number(searchSpaceId) : undefined); + const [isOpen, setIsOpen] = useState(false); + const [searchQuery, setSearchQuery] = useState(""); + const closeTimeoutRef = useRef(null); + + const hasConnectors = connectors.length > 0; + + // Get connected connector types for comparison + const connectedTypes = new Set(connectors.map(c => c.connector_type)); + + // Flatten all available connectors from categories + const allAvailableConnectors = connectorCategories.flatMap(category => + category.connectors.filter(c => c.status === "available") + ); + + // Filter connectors based on search query + const filteredConnectors = allAvailableConnectors.filter(connector => + connector.title.toLowerCase().includes(searchQuery.toLowerCase()) + ); + + // Filter connected connectors based on search query + const filteredConnectedConnectors = connectors.filter(connector => + connector.name.toLowerCase().includes(searchQuery.toLowerCase()) + ); + + // Filter available (not connected) connectors + const filteredAvailableConnectors = filteredConnectors.filter(connector => { + // Map connector id to connector_type for comparison + const connectorTypeMap: Record = { + "webcrawler-connector": "WEBCRAWLER_CONNECTOR", + "tavily-api": "TAVILY_API", + "searxng": "SEARXNG_API", + "linkup-api": "LINKUP_API", + "baidu-search-api": "BAIDU_SEARCH_API", + "slack-connector": "SLACK_CONNECTOR", + "discord-connector": "DISCORD_CONNECTOR", + "linear-connector": "LINEAR_CONNECTOR", + "jira-connector": "JIRA_CONNECTOR", + "clickup-connector": "CLICKUP_CONNECTOR", + "notion-connector": "NOTION_CONNECTOR", + "confluence-connector": "CONFLUENCE_CONNECTOR", + "bookstack-connector": "BOOKSTACK_CONNECTOR", + "github-connector": "GITHUB_CONNECTOR", + "elasticsearch-connector": "ELASTICSEARCH_CONNECTOR", + "airtable-connector": "AIRTABLE_CONNECTOR", + "google-calendar-connector": "GOOGLE_CALENDAR_CONNECTOR", + "google-gmail-connector": "GOOGLE_GMAIL_CONNECTOR", + "luma-connector": "LUMA_CONNECTOR", + }; + const connectorType = connectorTypeMap[connector.id]; + return !connectorType || !connectedTypes.has(connectorType); + }); + + const handleMouseEnter = useCallback(() => { + // Clear any pending close timeout + if (closeTimeoutRef.current) { + clearTimeout(closeTimeoutRef.current); + closeTimeoutRef.current = null; + } + setIsOpen(true); + }, []); + + const handleMouseLeave = useCallback(() => { + // Delay closing by 150ms for better UX + closeTimeoutRef.current = setTimeout(() => { + setIsOpen(false); + setSearchQuery(""); // Reset search when closing + }, 150); + }, []); + + if (!searchSpaceId) return null; + + return ( + + + + + +
+ {/* Search input - sticky at top */} +
+
+ + setSearchQuery(e.target.value)} + className="w-full pl-8 pr-3 py-1.5 text-sm bg-transparent border-none outline-none focus:ring-0 placeholder:text-muted-foreground" + /> +
+
+ + {/* Connectors list - scrollable */} +
+ {/* Connected connectors first */} + {filteredConnectedConnectors.length > 0 && ( + <> + {filteredConnectedConnectors.map((connector) => ( + +
+
+ {getConnectorIcon(connector.connector_type, "size-5")} +
+ {connector.name} +
+ + + ))} + {filteredAvailableConnectors.length > 0 && ( +
+ )} + + )} + + {/* Available connectors */} + {filteredAvailableConnectors.length > 0 ? ( + filteredAvailableConnectors.map((connector) => ( + +
+
+ {connector.icon} +
+ {connector.title} +
+ + + )) + ) : filteredConnectedConnectors.length === 0 ? ( +
+ No connectors found +
+ ) : null} +
+
+ + + ); +}; + const ComposerAction: FC = () => { // Check if any attachments are still being processed (running AND progress < 100) // When progress is 100, processing is done but waiting for send() @@ -329,7 +521,10 @@ const ComposerAction: FC = () => { return (
- +
+ + +
{/* Show processing indicator when attachments are being processed */} {hasProcessingAttachments && ( From 898b062bed6f958b99ea55b1d5f8ab8f12d5ee50 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 23 Dec 2025 00:22:50 +0530 Subject: [PATCH 05/13] fix: removed revamped connectors --- .../add/airtable-connector/page.tsx | 2 +- .../connectors/add/baidu-search-api/page.tsx | 2 +- .../add/bookstack-connector/page.tsx | 2 +- .../connectors/add/clickup-connector/page.tsx | 2 +- .../add/confluence-connector/page.tsx | 2 +- .../connectors/add/discord-connector/page.tsx | 2 +- .../add/elasticsearch-connector/page.tsx | 2 +- .../connectors/add/github-connector/page.tsx | 2 +- .../add/google-calendar-connector/page.tsx | 2 +- .../add/google-gmail-connector/page.tsx | 2 +- .../connectors/add/jira-connector/page.tsx | 2 +- .../connectors/add/linear-connector/page.tsx | 2 +- .../connectors/add/linkup-api/page.tsx | 2 +- .../connectors/add/luma-connector/page.tsx | 2 +- .../connectors/add/notion-connector/page.tsx | 2 +- .../connectors/add/searxng/page.tsx | 2 +- .../connectors/add/slack-connector/page.tsx | 2 +- .../connectors/add/tavily-api/page.tsx | 2 +- .../add/webcrawler-connector/page.tsx | 2 +- .../components/assistant-ui/thread.tsx | 148 +++++------------- 20 files changed, 55 insertions(+), 131 deletions(-) diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx index 7ebbeec3f..cc4330203 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/airtable-connector/page.tsx @@ -123,7 +123,7 @@ export default function AirtableConnectorPage() { diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx index e1a84a8ab..3e9f4898e 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/baidu-search-api/page.tsx @@ -126,7 +126,7 @@ export default function BaiduSearchApiPage() { diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx index 41b9e42f8..8659d937c 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx @@ -133,7 +133,7 @@ export default function GoogleGmailConnectorPage() { diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/jira-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/jira-connector/page.tsx index 132c58521..6f4e31114 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/jira-connector/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/jira-connector/page.tsx @@ -125,7 +125,7 @@ export default function JiraConnectorPage() { diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/notion-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/notion-connector/page.tsx index 5cfd16a5f..310c31811 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/notion-connector/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/notion-connector/page.tsx @@ -105,7 +105,7 @@ export default function NotionConnectorPage() { diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 1296ab28f..30e2a42d9 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -36,7 +36,6 @@ import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-quer import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; -import { connectorCategories } from "@/components/sources/connector-data"; import { ComposerAddAttachment, ComposerAttachments, @@ -326,57 +325,10 @@ const ConnectorIndicator: FC = () => { const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom); const { connectors, isLoading } = useSearchSourceConnectors(false, searchSpaceId ? Number(searchSpaceId) : undefined); const [isOpen, setIsOpen] = useState(false); - const [searchQuery, setSearchQuery] = useState(""); const closeTimeoutRef = useRef(null); const hasConnectors = connectors.length > 0; - // Get connected connector types for comparison - const connectedTypes = new Set(connectors.map(c => c.connector_type)); - - // Flatten all available connectors from categories - const allAvailableConnectors = connectorCategories.flatMap(category => - category.connectors.filter(c => c.status === "available") - ); - - // Filter connectors based on search query - const filteredConnectors = allAvailableConnectors.filter(connector => - connector.title.toLowerCase().includes(searchQuery.toLowerCase()) - ); - - // Filter connected connectors based on search query - const filteredConnectedConnectors = connectors.filter(connector => - connector.name.toLowerCase().includes(searchQuery.toLowerCase()) - ); - - // Filter available (not connected) connectors - const filteredAvailableConnectors = filteredConnectors.filter(connector => { - // Map connector id to connector_type for comparison - const connectorTypeMap: Record = { - "webcrawler-connector": "WEBCRAWLER_CONNECTOR", - "tavily-api": "TAVILY_API", - "searxng": "SEARXNG_API", - "linkup-api": "LINKUP_API", - "baidu-search-api": "BAIDU_SEARCH_API", - "slack-connector": "SLACK_CONNECTOR", - "discord-connector": "DISCORD_CONNECTOR", - "linear-connector": "LINEAR_CONNECTOR", - "jira-connector": "JIRA_CONNECTOR", - "clickup-connector": "CLICKUP_CONNECTOR", - "notion-connector": "NOTION_CONNECTOR", - "confluence-connector": "CONFLUENCE_CONNECTOR", - "bookstack-connector": "BOOKSTACK_CONNECTOR", - "github-connector": "GITHUB_CONNECTOR", - "elasticsearch-connector": "ELASTICSEARCH_CONNECTOR", - "airtable-connector": "AIRTABLE_CONNECTOR", - "google-calendar-connector": "GOOGLE_CALENDAR_CONNECTOR", - "google-gmail-connector": "GOOGLE_GMAIL_CONNECTOR", - "luma-connector": "LUMA_CONNECTOR", - }; - const connectorType = connectorTypeMap[connector.id]; - return !connectorType || !connectedTypes.has(connectorType); - }); - const handleMouseEnter = useCallback(() => { // Clear any pending close timeout if (closeTimeoutRef.current) { @@ -390,7 +342,6 @@ const ConnectorIndicator: FC = () => { // Delay closing by 150ms for better UX closeTimeoutRef.current = setTimeout(() => { setIsOpen(false); - setSearchQuery(""); // Reset search when closing }, 150); }, []); @@ -425,75 +376,48 @@ const ConnectorIndicator: FC = () => { -
- {/* Search input - sticky at top */} -
-
- - setSearchQuery(e.target.value)} - className="w-full pl-8 pr-3 py-1.5 text-sm bg-transparent border-none outline-none focus:ring-0 placeholder:text-muted-foreground" - /> -
-
- - {/* Connectors list - scrollable */} -
- {/* Connected connectors first */} - {filteredConnectedConnectors.length > 0 && ( - <> - {filteredConnectedConnectors.map((connector) => ( - -
-
- {getConnectorIcon(connector.connector_type, "size-5")} -
- {connector.name} -
- - - ))} - {filteredAvailableConnectors.length > 0 && ( -
- )} - - )} - - {/* Available connectors */} - {filteredAvailableConnectors.length > 0 ? ( - filteredAvailableConnectors.map((connector) => ( - +

+ Connected Sources ({connectors.length}) +

+
+ {connectors.map((connector) => ( +
-
-
- {connector.icon} -
- {connector.title} -
- - - )) - ) : filteredConnectedConnectors.length === 0 ? ( -
- No connectors found -
- ) : null} + {getConnectorIcon(connector.connector_type, "size-3")} + {connector.name} +
+ ))} +
+ + Manage connectors → +
-
+ ) : ( +
+

No connectors yet

+

+ Connect your first data source to enhance search results. +

+ + + Add Connector + +
+ )} ); From 3703487c593c6ff8442976d0a0939d8437e37d61 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 23 Dec 2025 00:26:45 +0530 Subject: [PATCH 06/13] style: improve connector display and layout in the UI for better readability and user interaction --- .../components/assistant-ui/thread.tsx | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 30e2a42d9..aaf35c329 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -381,27 +381,36 @@ const ConnectorIndicator: FC = () => { onMouseLeave={handleMouseLeave} > {hasConnectors ? ( -
-

- Connected Sources ({connectors.length}) -

+
+
+

+ Connected Sources +

+ + {connectors.length} + +
{connectors.map((connector) => (
- {getConnectorIcon(connector.connector_type, "size-3")} + {getConnectorIcon(connector.connector_type, "size-3.5")} {connector.name}
))}
- - Manage connectors → - +
+ + + Manage connectors + + +
) : (
From 28985e6af45eecea4a9192919fb2d963262a0995 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 23 Dec 2025 00:35:08 +0530 Subject: [PATCH 07/13] style: update greeting messages for improved user engagement and consistency --- surfsense_web/components/assistant-ui/thread.tsx | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index aaf35c329..57408391d 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -210,14 +210,14 @@ const getTimeBasedGreeting = (userEmail?: string): string => { "Good afternoon", "Afternoon", "Hey there", - "Hope you're having a great day", + "Hi there", ]; const eveningGreetings = [ "Good evening", "Evening", "Hey there", - "Hope you had a great day", + "Hi there", ]; const nightGreetings = [ @@ -228,10 +228,10 @@ const getTimeBasedGreeting = (userEmail?: string): string => { ]; const lateNightGreetings = [ - "Burning the midnight oil", "Still up", "Night owl mode", "The night is young", + "Hi there", ]; // Select a random greeting based on time @@ -358,9 +358,7 @@ const ConnectorIndicator: FC = () => { "outline-none focus:outline-none focus-visible:outline-none", "border-0 ring-0 focus:ring-0 shadow-none focus:shadow-none", "data-[state=open]:bg-transparent data-[state=open]:shadow-none data-[state=open]:ring-0", - hasConnectors - ? "text-muted-foreground" - : "text-muted-foreground/60" + "text-muted-foreground" )} aria-label={hasConnectors ? "View connected sources" : "Add your first connector"} onMouseEnter={handleMouseEnter} From 4b69fdf214dae95198929bb51b543248392a0313 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 23 Dec 2025 00:58:27 +0530 Subject: [PATCH 08/13] feat: add link preview tool for enhanced URL metadata display in chat --- .../app/agents/new_chat/chat_deepagent.py | 9 + .../app/agents/new_chat/link_preview.py | 292 ++++++++++++++ .../app/agents/new_chat/system_prompt.py | 22 + .../app/tasks/chat/stream_new_chat.py | 87 +++- .../new-chat/[[...chat_id]]/page.tsx | 4 +- surfsense_web/components/tool-ui/index.ts | 17 + .../components/tool-ui/link-preview.tsx | 226 +++++++++++ .../components/tool-ui/media-card/index.tsx | 381 ++++++++++++++++++ 8 files changed, 1035 insertions(+), 3 deletions(-) create mode 100644 surfsense_backend/app/agents/new_chat/link_preview.py create mode 100644 surfsense_web/components/tool-ui/link-preview.tsx create mode 100644 surfsense_web/components/tool-ui/media-card/index.tsx diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index 1c97a05ad..23db4f813 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -15,6 +15,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.agents.new_chat.context import SurfSenseContextSchema from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool +from app.agents.new_chat.link_preview import create_link_preview_tool from app.agents.new_chat.podcast import create_generate_podcast_tool from app.agents.new_chat.system_prompt import build_surfsense_system_prompt from app.services.connector_service import ConnectorService @@ -34,6 +35,7 @@ def create_surfsense_deep_agent( user_instructions: str | None = None, enable_citations: bool = True, enable_podcast: bool = True, + enable_link_preview: bool = True, additional_tools: Sequence[BaseTool] | None = None, ): """ @@ -53,6 +55,8 @@ def create_surfsense_deep_agent( When False, the agent will not be instructed to add citations to responses. enable_podcast: Whether to include the podcast generation tool (default: True). When True and user_id is provided, the agent can generate podcasts. + enable_link_preview: Whether to include the link preview tool (default: True). + When True, the agent can fetch and display rich link previews. additional_tools: Optional sequence of additional tools to inject into the agent. The search_knowledge_base tool will always be included. @@ -78,6 +82,11 @@ def create_surfsense_deep_agent( ) tools.append(podcast_tool) + # Add link preview tool if enabled + if enable_link_preview: + link_preview_tool = create_link_preview_tool() + tools.append(link_preview_tool) + if additional_tools: tools.extend(additional_tools) diff --git a/surfsense_backend/app/agents/new_chat/link_preview.py b/surfsense_backend/app/agents/new_chat/link_preview.py new file mode 100644 index 000000000..388a6c14e --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/link_preview.py @@ -0,0 +1,292 @@ +""" +Link preview tool for the new chat agent. + +This module provides a tool for fetching URL metadata (title, description, +Open Graph image, etc.) to display rich link previews in the chat UI. +""" + +import hashlib +import re +from typing import Any +from urllib.parse import urlparse + +import httpx +from langchain_core.tools import tool + + +def extract_domain(url: str) -> str: + """Extract the domain from a URL.""" + try: + parsed = urlparse(url) + domain = parsed.netloc + # Remove 'www.' prefix if present + if domain.startswith("www."): + domain = domain[4:] + return domain + except Exception: + return "" + + +def extract_og_content(html: str, property_name: str) -> str | None: + """Extract Open Graph meta content from HTML.""" + # Try og:property first + pattern = rf']+property=["\']og:{property_name}["\'][^>]+content=["\']([^"\']+)["\']' + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + # Try content before property + pattern = rf']+content=["\']([^"\']+)["\'][^>]+property=["\']og:{property_name}["\']' + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + return None + + +def extract_twitter_content(html: str, name: str) -> str | None: + """Extract Twitter Card meta content from HTML.""" + pattern = rf']+name=["\']twitter:{name}["\'][^>]+content=["\']([^"\']+)["\']' + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + # Try content before name + pattern = rf']+content=["\']([^"\']+)["\'][^>]+name=["\']twitter:{name}["\']' + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + return None + + +def extract_meta_description(html: str) -> str | None: + """Extract meta description from HTML.""" + pattern = r']+name=["\']description["\'][^>]+content=["\']([^"\']+)["\']' + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + # Try content before name + pattern = r']+content=["\']([^"\']+)["\'][^>]+name=["\']description["\']' + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1) + + return None + + +def extract_title(html: str) -> str | None: + """Extract title from HTML.""" + # Try og:title first + og_title = extract_og_content(html, "title") + if og_title: + return og_title + + # Try twitter:title + twitter_title = extract_twitter_content(html, "title") + if twitter_title: + return twitter_title + + # Fall back to tag + pattern = r"<title[^>]*>([^<]+)" + match = re.search(pattern, html, re.IGNORECASE) + if match: + return match.group(1).strip() + + return None + + +def extract_description(html: str) -> str | None: + """Extract description from HTML.""" + # Try og:description first + og_desc = extract_og_content(html, "description") + if og_desc: + return og_desc + + # Try twitter:description + twitter_desc = extract_twitter_content(html, "description") + if twitter_desc: + return twitter_desc + + # Fall back to meta description + return extract_meta_description(html) + + +def extract_image(html: str) -> str | None: + """Extract image URL from HTML.""" + # Try og:image first + og_image = extract_og_content(html, "image") + if og_image: + return og_image + + # Try twitter:image + twitter_image = extract_twitter_content(html, "image") + if twitter_image: + return twitter_image + + return None + + +def generate_preview_id(url: str) -> str: + """Generate a unique ID for a link preview.""" + hash_val = hashlib.md5(url.encode()).hexdigest()[:12] + return f"link-preview-{hash_val}" + + +def create_link_preview_tool(): + """ + Factory function to create the link_preview tool. + + Returns: + A configured tool function for fetching link previews. + """ + + @tool + async def link_preview(url: str) -> dict[str, Any]: + """ + Fetch metadata for a URL to display a rich link preview. + + Use this tool when the user shares a URL or asks about a specific webpage. + This tool fetches the page's Open Graph metadata (title, description, image) + to display a nice preview card in the chat. + + Common triggers include: + - User shares a URL in the chat + - User asks "What's this link about?" or similar + - User says "Show me a preview of this page" + - User wants to preview an article or webpage + + Args: + url: The URL to fetch metadata for. Must be a valid HTTP/HTTPS URL. + + Returns: + A dictionary containing: + - id: Unique identifier for this preview + - assetId: The URL itself (for deduplication) + - kind: "link" (type of media card) + - href: The URL to open when clicked + - title: Page title + - description: Page description (if available) + - thumb: Thumbnail/preview image URL (if available) + - domain: The domain name + - error: Error message (if fetch failed) + """ + preview_id = generate_preview_id(url) + domain = extract_domain(url) + + # Validate URL + if not url.startswith(("http://", "https://")): + url = f"https://{url}" + + try: + async with httpx.AsyncClient( + timeout=10.0, + follow_redirects=True, + headers={ + "User-Agent": "Mozilla/5.0 (compatible; SurfSenseBot/1.0; +https://surfsense.net)", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + }, + ) as client: + response = await client.get(url) + response.raise_for_status() + + # Get content type to ensure it's HTML + content_type = response.headers.get("content-type", "") + if "text/html" not in content_type.lower(): + # Not an HTML page, return basic info + return { + "id": preview_id, + "assetId": url, + "kind": "link", + "href": url, + "title": url.split("/")[-1] or domain, + "description": f"File from {domain}", + "domain": domain, + } + + html = response.text + + # Extract metadata + title = extract_title(html) or domain + description = extract_description(html) + image = extract_image(html) + + # Make sure image URL is absolute + if image and not image.startswith(("http://", "https://")): + if image.startswith("//"): + image = f"https:{image}" + elif image.startswith("/"): + parsed = urlparse(url) + image = f"{parsed.scheme}://{parsed.netloc}{image}" + + # Clean up title and description (unescape HTML entities) + if title: + title = ( + title.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace(""", '"') + .replace("'", "'") + .replace("'", "'") + ) + if description: + description = ( + description.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace(""", '"') + .replace("'", "'") + .replace("'", "'") + ) + # Truncate long descriptions + if len(description) > 200: + description = description[:197] + "..." + + return { + "id": preview_id, + "assetId": url, + "kind": "link", + "href": url, + "title": title, + "description": description, + "thumb": image, + "domain": domain, + } + + except httpx.TimeoutException: + return { + "id": preview_id, + "assetId": url, + "kind": "link", + "href": url, + "title": domain or "Link", + "domain": domain, + "error": "Request timed out", + } + except httpx.HTTPStatusError as e: + return { + "id": preview_id, + "assetId": url, + "kind": "link", + "href": url, + "title": domain or "Link", + "domain": domain, + "error": f"HTTP {e.response.status_code}", + } + except Exception as e: + error_message = str(e) + print(f"[link_preview] Error fetching {url}: {error_message}") + return { + "id": preview_id, + "assetId": url, + "kind": "link", + "href": url, + "title": domain or "Link", + "domain": domain, + "error": f"Failed to fetch: {error_message[:50]}", + } + + return link_preview + diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index f725be684..32e649e5d 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -145,6 +145,19 @@ You have access to the following tools: - Returns: A task_id for tracking. The podcast will be generated in the background. - IMPORTANT: Only one podcast can be generated at a time. If a podcast is already being generated, the tool will return status "already_generating". - After calling this tool, inform the user that podcast generation has started and they will see the player when it's ready (takes 3-5 minutes). + +3. link_preview: Fetch metadata for a URL to display a rich preview card. + - IMPORTANT: Use this tool WHENEVER the user shares or mentions a URL/link in their message. + - This fetches the page's Open Graph metadata (title, description, thumbnail) to show a preview card. + - NOTE: This tool only fetches metadata, NOT the full page content. It cannot read the article text. + - Trigger scenarios: + * User shares a URL (e.g., "Check out https://example.com") + * User pastes a link in their message + * User asks about a URL or link + - Args: + - url: The URL to fetch metadata for (must be a valid HTTP/HTTPS URL) + - Returns: A rich preview card with title, description, thumbnail, and domain + - The preview card will automatically be displayed in the chat. - User: "Fetch all my notes and what's in them?" @@ -162,6 +175,15 @@ You have access to the following tools: - User: "Make a podcast about quantum computing" - First search: `search_knowledge_base(query="quantum computing")` - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\n\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")` + +- User: "Check out https://dev.to/some-article" + - Call: `link_preview(url="https://dev.to/some-article")` + +- User: "What's this blog post about? https://example.com/blog/post" + - Call: `link_preview(url="https://example.com/blog/post")` + +- User: "https://github.com/some/repo" + - Call: `link_preview(url="https://github.com/some/repo")` {citation_section} """ diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index cb40a82a8..853fb3392 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -284,6 +284,20 @@ async def stream_new_chat( status="in_progress", items=last_active_step_items, ) + elif tool_name == "link_preview": + url = ( + tool_input.get("url", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + last_active_step_title = "Fetching link preview" + last_active_step_items = [f"URL: {url[:80]}{'...' if len(url) > 80 else ''}"] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Fetching link preview", + status="in_progress", + items=last_active_step_items, + ) elif tool_name == "generate_podcast": podcast_title = ( tool_input.get("podcast_title", "SurfSense Podcast") @@ -343,6 +357,16 @@ async def stream_new_chat( f"Searching knowledge base: {query[:100]}{'...' if len(query) > 100 else ''}", "info", ) + elif tool_name == "link_preview": + url = ( + tool_input.get("url", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + yield streaming_service.format_terminal_info( + f"Fetching link preview: {url[:80]}{'...' if len(url) > 80 else ''}", + "info", + ) elif tool_name == "generate_podcast": title = ( tool_input.get("podcast_title", "SurfSense Podcast") @@ -404,6 +428,31 @@ async def stream_new_chat( status="completed", items=completed_items, ) + elif tool_name == "link_preview": + # Build completion items based on link preview result + if isinstance(tool_output, dict): + title = tool_output.get("title", "Link") + domain = tool_output.get("domain", "") + has_error = "error" in tool_output + if has_error: + completed_items = [ + *last_active_step_items, + f"Error: {tool_output.get('error', 'Failed to fetch')}", + ] + else: + completed_items = [ + *last_active_step_items, + f"Title: {title[:60]}{'...' if len(title) > 60 else ''}", + f"Domain: {domain}" if domain else "Preview loaded", + ] + else: + completed_items = [*last_active_step_items, "Preview loaded"] + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Fetching link preview", + status="completed", + items=completed_items, + ) elif tool_name == "generate_podcast": # Build detailed completion items based on podcast status podcast_status = ( @@ -492,8 +541,33 @@ async def stream_new_chat( f"Podcast generation failed: {error_msg}", "error", ) - else: - # Don't stream the full output for other tools (can be very large), just acknowledge + elif tool_name == "link_preview": + # Stream the full link preview result so frontend can render the MediaCard + yield streaming_service.format_tool_output_available( + tool_call_id, + tool_output + if isinstance(tool_output, dict) + else {"result": tool_output}, + ) + # Send appropriate terminal message + if isinstance(tool_output, dict) and "error" not in tool_output: + title = tool_output.get("title", "Link") + yield streaming_service.format_terminal_info( + f"Link preview loaded: {title[:50]}{'...' if len(title) > 50 else ''}", + "success", + ) + else: + error_msg = ( + tool_output.get("error", "Failed to fetch") + if isinstance(tool_output, dict) + else "Failed to fetch" + ) + yield streaming_service.format_terminal_info( + f"Link preview failed: {error_msg}", + "error", + ) + elif tool_name == "search_knowledge_base": + # Don't stream the full output for search (can be very large), just acknowledge yield streaming_service.format_tool_output_available( tool_call_id, {"status": "completed", "result_length": len(str(tool_output))}, @@ -501,6 +575,15 @@ async def stream_new_chat( yield streaming_service.format_terminal_info( "Knowledge base search completed", "success" ) + else: + # Default handling for other tools + yield streaming_service.format_tool_output_available( + tool_call_id, + {"status": "completed", "result_length": len(str(tool_output))}, + ) + yield streaming_service.format_terminal_info( + f"Tool {tool_name} completed", "success" + ) # Handle chain/agent end to close any open text blocks elif event_type in ("on_chain_end", "on_agent_end"): diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 51a2f9875..4d45db118 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -11,6 +11,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { toast } from "sonner"; import { Thread } from "@/components/assistant-ui/thread"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; +import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; import { getBearerToken } from "@/lib/auth-utils"; import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter"; @@ -79,7 +80,7 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { /** * Tools that should render custom UI in the chat. */ -const TOOLS_WITH_UI = new Set(["generate_podcast"]); +const TOOLS_WITH_UI = new Set(["generate_podcast", "link_preview"]); /** * Type for thinking step data from the backend @@ -589,6 +590,7 @@ export default function NewChatPage() { return ( +
diff --git a/surfsense_web/components/tool-ui/index.ts b/surfsense_web/components/tool-ui/index.ts index 3cddfba21..aebe400d5 100644 --- a/surfsense_web/components/tool-ui/index.ts +++ b/surfsense_web/components/tool-ui/index.ts @@ -15,3 +15,20 @@ export { type DeepAgentThinkingArgs, type DeepAgentThinkingResult, } from "./deepagent-thinking"; +export { + LinkPreviewToolUI, + MultiLinkPreviewToolUI, + type LinkPreviewArgs, + type LinkPreviewResult, + type MultiLinkPreviewArgs, + type MultiLinkPreviewResult, +} from "./link-preview"; +export { + MediaCard, + MediaCardErrorBoundary, + MediaCardLoading, + MediaCardSkeleton, + parseSerializableMediaCard, + type MediaCardProps, + type SerializableMediaCard, +} from "./media-card"; diff --git a/surfsense_web/components/tool-ui/link-preview.tsx b/surfsense_web/components/tool-ui/link-preview.tsx new file mode 100644 index 000000000..c97b6820b --- /dev/null +++ b/surfsense_web/components/tool-ui/link-preview.tsx @@ -0,0 +1,226 @@ +"use client"; + +import { makeAssistantToolUI } from "@assistant-ui/react"; +import { AlertCircleIcon, ExternalLinkIcon, LinkIcon } from "lucide-react"; +import { + MediaCard, + MediaCardErrorBoundary, + MediaCardLoading, + parseSerializableMediaCard, + type SerializableMediaCard, +} from "@/components/tool-ui/media-card"; + +/** + * Type definitions for the link_preview tool + */ +interface LinkPreviewArgs { + url: string; + title?: string; +} + +interface LinkPreviewResult { + id: string; + assetId: string; + kind: "link"; + href: string; + title: string; + description?: string; + thumb?: string; + domain?: string; + error?: string; +} + +/** + * Error state component shown when link preview fails + */ +function LinkPreviewErrorState({ url, error }: { url: string; error: string }) { + return ( +
+
+
+ +
+
+

Failed to load preview

+

{url}

+

{error}

+
+
+
+ ); +} + +/** + * Cancelled state component + */ +function LinkPreviewCancelledState({ url }: { url: string }) { + return ( +
+

+ + Preview: {url} +

+
+ ); +} + +/** + * Parsed MediaCard component with error handling + */ +function ParsedMediaCard({ result }: { result: unknown }) { + const card = parseSerializableMediaCard(result); + + return ( + { + if (id === "open" && card.href) { + window.open(card.href, "_blank", "noopener,noreferrer"); + } + }} + /> + ); +} + +/** + * Link Preview Tool UI Component + * + * This component is registered with assistant-ui to render a rich + * link preview card when the link_preview tool is called by the agent. + * + * It displays website metadata including: + * - Title and description + * - Thumbnail/Open Graph image + * - Domain name + * - Clickable link to open in new tab + */ +export const LinkPreviewToolUI = makeAssistantToolUI< + LinkPreviewArgs, + LinkPreviewResult +>({ + toolName: "link_preview", + render: function LinkPreviewUI({ args, result, status }) { + const url = args.url || "Unknown URL"; + + // Loading state - tool is still running + if (status.type === "running" || status.type === "requires-action") { + return ( +
+ +
+ ); + } + + // Incomplete/cancelled state + if (status.type === "incomplete") { + if (status.reason === "cancelled") { + return ; + } + if (status.reason === "error") { + return ( + + ); + } + } + + // No result yet + if (!result) { + return ( +
+ +
+ ); + } + + // Error result from the tool + if (result.error) { + return ; + } + + // Success - render the media card + return ( +
+ + + +
+ ); + }, +}); + +/** + * Multiple Link Previews Tool UI Component + * + * This component handles cases where multiple links need to be previewed. + * It renders a grid of link preview cards. + */ +interface MultiLinkPreviewArgs { + urls: string[]; +} + +interface MultiLinkPreviewResult { + previews: LinkPreviewResult[]; + errors?: { url: string; error: string }[]; +} + +export const MultiLinkPreviewToolUI = makeAssistantToolUI< + MultiLinkPreviewArgs, + MultiLinkPreviewResult +>({ + toolName: "multi_link_preview", + render: function MultiLinkPreviewUI({ args, result, status }) { + const urls = args.urls || []; + + // Loading state + if (status.type === "running" || status.type === "requires-action") { + return ( +
+ {urls.slice(0, 4).map((url, index) => ( + + ))} +
+ ); + } + + // Incomplete state + if (status.type === "incomplete") { + return ( +
+

+ + Link previews cancelled +

+
+ ); + } + + // No result + if (!result || !result.previews) { + return null; + } + + // Render grid of previews + return ( +
+ {result.previews.map((preview) => ( + + + + ))} + {result.errors?.map((err) => ( + + ))} +
+ ); + }, +}); + +export type { LinkPreviewArgs, LinkPreviewResult, MultiLinkPreviewArgs, MultiLinkPreviewResult }; + diff --git a/surfsense_web/components/tool-ui/media-card/index.tsx b/surfsense_web/components/tool-ui/media-card/index.tsx new file mode 100644 index 000000000..47c28e49b --- /dev/null +++ b/surfsense_web/components/tool-ui/media-card/index.tsx @@ -0,0 +1,381 @@ +"use client"; + +import { ExternalLinkIcon, Globe, ImageIcon, LinkIcon, Loader2 } from "lucide-react"; +import Image from "next/image"; +import { Component, type ReactNode } from "react"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Card, CardContent } from "@/components/ui/card"; +import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; +import { cn } from "@/lib/utils"; + +/** + * Aspect ratio options for media cards + */ +type AspectRatio = "1:1" | "4:3" | "16:9" | "21:9" | "auto"; + +/** + * MediaCard kind - determines the display style + */ +type MediaCardKind = "link" | "image" | "video" | "audio"; + +/** + * Response action configuration + */ +interface ResponseAction { + id: string; + label: string; + variant?: "default" | "secondary" | "outline" | "destructive" | "ghost"; + confirmLabel?: string; +} + +/** + * Props for the MediaCard component + */ +export interface MediaCardProps { + id: string; + assetId: string; + kind: MediaCardKind; + href?: string; + src?: string; + title: string; + description?: string; + thumb?: string; + ratio?: AspectRatio; + domain?: string; + maxWidth?: string; + alt?: string; + className?: string; + responseActions?: ResponseAction[]; + onResponseAction?: (id: string) => void; +} + +/** + * Serializable schema for MediaCard props (for tool results) + */ +export interface SerializableMediaCard { + id: string; + assetId: string; + kind: MediaCardKind; + href?: string; + src?: string; + title: string; + description?: string; + thumb?: string; + ratio?: AspectRatio; + domain?: string; +} + +/** + * Parse and validate serializable media card from tool result + */ +export function parseSerializableMediaCard(result: unknown): SerializableMediaCard { + if (typeof result !== "object" || result === null) { + throw new Error("Invalid media card result: expected object"); + } + + const obj = result as Record; + + // Validate required fields + if (typeof obj.id !== "string") { + throw new Error("Invalid media card: missing id"); + } + if (typeof obj.assetId !== "string") { + throw new Error("Invalid media card: missing assetId"); + } + if (typeof obj.kind !== "string") { + throw new Error("Invalid media card: missing kind"); + } + if (typeof obj.title !== "string") { + throw new Error("Invalid media card: missing title"); + } + + return { + id: obj.id, + assetId: obj.assetId, + kind: obj.kind as MediaCardKind, + href: typeof obj.href === "string" ? obj.href : undefined, + src: typeof obj.src === "string" ? obj.src : undefined, + title: obj.title, + description: typeof obj.description === "string" ? obj.description : undefined, + thumb: typeof obj.thumb === "string" ? obj.thumb : undefined, + ratio: typeof obj.ratio === "string" ? (obj.ratio as AspectRatio) : undefined, + domain: typeof obj.domain === "string" ? obj.domain : undefined, + }; +} + +/** + * Get aspect ratio class based on ratio prop + */ +function getAspectRatioClass(ratio?: AspectRatio): string { + switch (ratio) { + case "1:1": + return "aspect-square"; + case "4:3": + return "aspect-[4/3]"; + case "16:9": + return "aspect-video"; + case "21:9": + return "aspect-[21/9]"; + case "auto": + default: + return "aspect-[2/1]"; + } +} + +/** + * Get icon based on media card kind + */ +function getKindIcon(kind: MediaCardKind) { + switch (kind) { + case "link": + return ; + case "image": + return ; + case "video": + case "audio": + return ; + default: + return ; + } +} + +/** + * Error boundary for MediaCard + */ +interface MediaCardErrorBoundaryState { + hasError: boolean; + error?: Error; +} + +export class MediaCardErrorBoundary extends Component< + { children: ReactNode }, + MediaCardErrorBoundaryState +> { + constructor(props: { children: ReactNode }) { + super(props); + this.state = { hasError: false }; + } + + static getDerivedStateFromError(error: Error): MediaCardErrorBoundaryState { + return { hasError: true, error }; + } + + render() { + if (this.state.hasError) { + return ( + + +
+ +
+
+

Failed to load preview

+

+ {this.state.error?.message || "An error occurred"} +

+
+
+
+ ); + } + + return this.props.children; + } +} + +/** + * Loading skeleton for MediaCard + */ +export function MediaCardSkeleton({ maxWidth = "420px" }: { maxWidth?: string }) { + return ( + +
+ +
+
+
+ + + ); +} + +/** + * MediaCard Component + * + * A rich media card for displaying link previews, images, and other media + * in AI chat applications. Supports thumbnails, descriptions, and actions. + */ +export function MediaCard({ + id, + kind, + href, + title, + description, + thumb, + ratio = "auto", + domain, + maxWidth = "420px", + alt, + className, + responseActions, + onResponseAction, +}: MediaCardProps) { + const aspectRatioClass = getAspectRatioClass(ratio); + const displayDomain = domain || (href ? new URL(href).hostname.replace("www.", "") : undefined); + + const handleCardClick = () => { + if (href) { + window.open(href, "_blank", "noopener,noreferrer"); + } + }; + + return ( + + { + if (href && (e.key === "Enter" || e.key === " ")) { + e.preventDefault(); + handleCardClick(); + } + }} + > + {/* Thumbnail */} + {thumb && ( +
+ {alt { + // Hide broken images + e.currentTarget.style.display = "none"; + }} + /> + {/* Gradient overlay */} +
+
+ )} + + {/* Fallback when no thumbnail */} + {!thumb && ( +
+
+ {getKindIcon(kind)} + {kind === "link" ? "Link Preview" : kind} +
+
+ )} + + {/* Content */} + +
+ {/* Domain favicon placeholder */} +
+ +
+ +
+ {/* Domain badge */} + {displayDomain && ( +
+ + {displayDomain} + + {href && ( + + )} +
+ )} + + {/* Title */} +

+ {title} +

+ + {/* Description */} + {description && ( +

+ {description} +

+ )} +
+
+ + {/* Response Actions */} + {responseActions && responseActions.length > 0 && ( +
e.stopPropagation()} + onKeyDown={(e) => e.stopPropagation()} + > + {responseActions.map((action) => ( + + + + + {action.confirmLabel && ( + +

{action.confirmLabel}

+
+ )} +
+ ))} +
+ )} +
+ + + ); +} + +/** + * MediaCard Loading State + */ +export function MediaCardLoading({ title = "Loading preview..." }: { title?: string }) { + return ( + +
+ +
+ +
+
+
+
+
+
+
+

{title}

+ + + ); +} + From da7cb812528562c729e3a5dc00ea13023e6b0923 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 23 Dec 2025 01:11:56 +0530 Subject: [PATCH 09/13] feat: introduce display image tool for enhanced image rendering in chat with metadata support --- .../app/agents/new_chat/chat_deepagent.py | 9 + .../app/agents/new_chat/display_image.py | 106 ++++++ .../app/agents/new_chat/system_prompt.py | 21 ++ .../app/tasks/chat/stream_new_chat.py | 64 ++++ .../new-chat/[[...chat_id]]/page.tsx | 4 +- .../components/tool-ui/display-image.tsx | 160 +++++++++ .../components/tool-ui/image/index.tsx | 332 ++++++++++++++++++ surfsense_web/components/tool-ui/index.ts | 14 + 8 files changed, 709 insertions(+), 1 deletion(-) create mode 100644 surfsense_backend/app/agents/new_chat/display_image.py create mode 100644 surfsense_web/components/tool-ui/display-image.tsx create mode 100644 surfsense_web/components/tool-ui/image/index.tsx diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index 23db4f813..e6b2aca06 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -14,6 +14,7 @@ from langgraph.types import Checkpointer from sqlalchemy.ext.asyncio import AsyncSession from app.agents.new_chat.context import SurfSenseContextSchema +from app.agents.new_chat.display_image import create_display_image_tool from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool from app.agents.new_chat.link_preview import create_link_preview_tool from app.agents.new_chat.podcast import create_generate_podcast_tool @@ -36,6 +37,7 @@ def create_surfsense_deep_agent( enable_citations: bool = True, enable_podcast: bool = True, enable_link_preview: bool = True, + enable_display_image: bool = True, additional_tools: Sequence[BaseTool] | None = None, ): """ @@ -57,6 +59,8 @@ def create_surfsense_deep_agent( When True and user_id is provided, the agent can generate podcasts. enable_link_preview: Whether to include the link preview tool (default: True). When True, the agent can fetch and display rich link previews. + enable_display_image: Whether to include the display image tool (default: True). + When True, the agent can display images with metadata. additional_tools: Optional sequence of additional tools to inject into the agent. The search_knowledge_base tool will always be included. @@ -87,6 +91,11 @@ def create_surfsense_deep_agent( link_preview_tool = create_link_preview_tool() tools.append(link_preview_tool) + # Add display image tool if enabled + if enable_display_image: + display_image_tool = create_display_image_tool() + tools.append(display_image_tool) + if additional_tools: tools.extend(additional_tools) diff --git a/surfsense_backend/app/agents/new_chat/display_image.py b/surfsense_backend/app/agents/new_chat/display_image.py new file mode 100644 index 000000000..03153300a --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/display_image.py @@ -0,0 +1,106 @@ +""" +Display image tool for the new chat agent. + +This module provides a tool for displaying images in the chat UI +with metadata like title, description, and source attribution. +""" + +import hashlib +from typing import Any +from urllib.parse import urlparse + +from langchain_core.tools import tool + + +def extract_domain(url: str) -> str: + """Extract the domain from a URL.""" + try: + parsed = urlparse(url) + domain = parsed.netloc + # Remove 'www.' prefix if present + if domain.startswith("www."): + domain = domain[4:] + return domain + except Exception: + return "" + + +def generate_image_id(src: str) -> str: + """Generate a unique ID for an image.""" + hash_val = hashlib.md5(src.encode()).hexdigest()[:12] + return f"image-{hash_val}" + + +def create_display_image_tool(): + """ + Factory function to create the display_image tool. + + Returns: + A configured tool function for displaying images. + """ + + @tool + async def display_image( + src: str, + alt: str = "Image", + title: str | None = None, + description: str | None = None, + ) -> dict[str, Any]: + """ + Display an image in the chat with metadata. + + Use this tool when you want to show an image to the user. + This displays the image with an optional title, description, + and source attribution. + + Common use cases: + - Showing an image from a URL the user mentioned + - Displaying a diagram or chart you're referencing + - Showing example images when explaining concepts + + Args: + src: The URL of the image to display (must be a valid HTTP/HTTPS URL) + alt: Alternative text describing the image (for accessibility) + title: Optional title to display below the image + description: Optional description providing context about the image + + Returns: + A dictionary containing image metadata for the UI to render: + - id: Unique identifier for this image + - assetId: The image URL (for deduplication) + - src: The image URL + - alt: Alt text for accessibility + - title: Image title (if provided) + - description: Image description (if provided) + - domain: Source domain + """ + image_id = generate_image_id(src) + + # Ensure URL has protocol + if not src.startswith(("http://", "https://")): + src = f"https://{src}" + + domain = extract_domain(src) + + # Determine aspect ratio based on common image sources + ratio = "16:9" # Default + if "unsplash.com" in src or "pexels.com" in src: + ratio = "16:9" + elif "imgur.com" in src: + ratio = "auto" + elif "github.com" in src or "githubusercontent.com" in src: + ratio = "auto" + + return { + "id": image_id, + "assetId": src, + "src": src, + "alt": alt, + "title": title, + "description": description, + "domain": domain, + "ratio": ratio, + } + + return display_image + diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 32e649e5d..74abadd38 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -158,6 +158,21 @@ You have access to the following tools: - url: The URL to fetch metadata for (must be a valid HTTP/HTTPS URL) - Returns: A rich preview card with title, description, thumbnail, and domain - The preview card will automatically be displayed in the chat. + +4. display_image: Display an image in the chat with metadata. + - Use this tool when you want to show an image to the user. + - This displays the image with an optional title, description, and source attribution. + - Common use cases: + * Showing an image from a URL mentioned in the conversation + * Displaying a diagram, chart, or illustration you're referencing + * Showing visual examples when explaining concepts + - Args: + - src: The URL of the image to display (must be a valid HTTP/HTTPS image URL) + - alt: Alternative text describing the image (for accessibility) + - title: Optional title to display below the image + - description: Optional description providing context about the image + - Returns: An image card with the image, title, and description + - The image will automatically be displayed in the chat. - User: "Fetch all my notes and what's in them?" @@ -184,6 +199,12 @@ You have access to the following tools: - User: "https://github.com/some/repo" - Call: `link_preview(url="https://github.com/some/repo")` + +- User: "Show me this image: https://example.com/image.png" + - Call: `display_image(src="https://example.com/image.png", alt="User shared image")` + +- User: "Can you display a diagram of a neural network?" + - Call: `display_image(src="https://example.com/neural-network.png", alt="Neural network diagram", title="Neural Network Architecture", description="A visual representation of a neural network with input, hidden, and output layers")` {citation_section} """ diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 853fb3392..6e2b30c6a 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -298,6 +298,27 @@ async def stream_new_chat( status="in_progress", items=last_active_step_items, ) + elif tool_name == "display_image": + src = ( + tool_input.get("src", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + title = ( + tool_input.get("title", "") + if isinstance(tool_input, dict) + else "" + ) + last_active_step_title = "Displaying image" + last_active_step_items = [ + f"Image: {title[:50] if title else src[:50]}{'...' if len(title or src) > 50 else ''}" + ] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Displaying image", + status="in_progress", + items=last_active_step_items, + ) elif tool_name == "generate_podcast": podcast_title = ( tool_input.get("podcast_title", "SurfSense Podcast") @@ -367,6 +388,16 @@ async def stream_new_chat( f"Fetching link preview: {url[:80]}{'...' if len(url) > 80 else ''}", "info", ) + elif tool_name == "display_image": + src = ( + tool_input.get("src", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + yield streaming_service.format_terminal_info( + f"Displaying image: {src[:60]}{'...' if len(src) > 60 else ''}", + "info", + ) elif tool_name == "generate_podcast": title = ( tool_input.get("podcast_title", "SurfSense Podcast") @@ -453,6 +484,24 @@ async def stream_new_chat( status="completed", items=completed_items, ) + elif tool_name == "display_image": + # Build completion items for image display + if isinstance(tool_output, dict): + title = tool_output.get("title", "") + alt = tool_output.get("alt", "Image") + display_name = title or alt + completed_items = [ + *last_active_step_items, + f"Showing: {display_name[:50]}{'...' if len(display_name) > 50 else ''}", + ] + else: + completed_items = [*last_active_step_items, "Image displayed"] + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Displaying image", + status="completed", + items=completed_items, + ) elif tool_name == "generate_podcast": # Build detailed completion items based on podcast status podcast_status = ( @@ -566,6 +615,21 @@ async def stream_new_chat( f"Link preview failed: {error_msg}", "error", ) + elif tool_name == "display_image": + # Stream the full image result so frontend can render the Image component + yield streaming_service.format_tool_output_available( + tool_call_id, + tool_output + if isinstance(tool_output, dict) + else {"result": tool_output}, + ) + # Send terminal message + if isinstance(tool_output, dict): + title = tool_output.get("title") or tool_output.get("alt", "Image") + yield streaming_service.format_terminal_info( + f"Image displayed: {title[:40]}{'...' if len(title) > 40 else ''}", + "success", + ) elif tool_name == "search_knowledge_base": # Don't stream the full output for search (can be very large), just acknowledge yield streaming_service.format_tool_output_available( diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 4d45db118..7c6bd34c1 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -12,6 +12,7 @@ import { toast } from "sonner"; import { Thread } from "@/components/assistant-ui/thread"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; +import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; import { getBearerToken } from "@/lib/auth-utils"; import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter"; @@ -80,7 +81,7 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { /** * Tools that should render custom UI in the chat. */ -const TOOLS_WITH_UI = new Set(["generate_podcast", "link_preview"]); +const TOOLS_WITH_UI = new Set(["generate_podcast", "link_preview", "display_image"]); /** * Type for thinking step data from the backend @@ -591,6 +592,7 @@ export default function NewChatPage() { +
diff --git a/surfsense_web/components/tool-ui/display-image.tsx b/surfsense_web/components/tool-ui/display-image.tsx new file mode 100644 index 000000000..37bdad5fa --- /dev/null +++ b/surfsense_web/components/tool-ui/display-image.tsx @@ -0,0 +1,160 @@ +"use client"; + +import { makeAssistantToolUI } from "@assistant-ui/react"; +import { AlertCircleIcon, ImageIcon } from "lucide-react"; +import { + Image, + ImageErrorBoundary, + ImageLoading, + parseSerializableImage, +} from "@/components/tool-ui/image"; + +/** + * Type definitions for the display_image tool + */ +interface DisplayImageArgs { + src: string; + alt?: string; + title?: string; + description?: string; +} + +interface DisplayImageResult { + id: string; + assetId: string; + src: string; + alt: string; + title?: string; + description?: string; + domain?: string; + ratio?: string; + error?: string; +} + +/** + * Error state component shown when image display fails + */ +function ImageErrorState({ src, error }: { src: string; error: string }) { + return ( +
+
+
+ +
+
+

Failed to display image

+

{src}

+

{error}

+
+
+
+ ); +} + +/** + * Cancelled state component + */ +function ImageCancelledState({ src }: { src: string }) { + return ( +
+

+ + Image: {src} +

+
+ ); +} + +/** + * Parsed Image component with error handling + */ +function ParsedImage({ result }: { result: unknown }) { + const image = parseSerializableImage(result); + + return ( + { + if (id === "open" && image.src) { + window.open(image.src, "_blank", "noopener,noreferrer"); + } + }} + /> + ); +} + +/** + * Display Image Tool UI Component + * + * This component is registered with assistant-ui to render an image + * when the display_image tool is called by the agent. + * + * It displays images with: + * - Title and description + * - Source attribution + * - Hover overlay effects + * - Click to open full size + */ +export const DisplayImageToolUI = makeAssistantToolUI< + DisplayImageArgs, + DisplayImageResult +>({ + toolName: "display_image", + render: function DisplayImageUI({ args, result, status }) { + const src = args.src || "Unknown"; + + // Loading state - tool is still running + if (status.type === "running" || status.type === "requires-action") { + return ( +
+ +
+ ); + } + + // Incomplete/cancelled state + if (status.type === "incomplete") { + if (status.reason === "cancelled") { + return ; + } + if (status.reason === "error") { + return ( + + ); + } + } + + // No result yet + if (!result) { + return ( +
+ +
+ ); + } + + // Error result from the tool + if (result.error) { + return ; + } + + // Success - render the image + return ( +
+ + + +
+ ); + }, +}); + +export type { DisplayImageArgs, DisplayImageResult }; + diff --git a/surfsense_web/components/tool-ui/image/index.tsx b/surfsense_web/components/tool-ui/image/index.tsx new file mode 100644 index 000000000..97c983625 --- /dev/null +++ b/surfsense_web/components/tool-ui/image/index.tsx @@ -0,0 +1,332 @@ +"use client"; + +import { ExternalLinkIcon, ImageIcon, Loader2 } from "lucide-react"; +import NextImage from "next/image"; +import { Component, type ReactNode, useState } from "react"; +import { Badge } from "@/components/ui/badge"; +import { Card } from "@/components/ui/card"; +import { cn } from "@/lib/utils"; + +/** + * Aspect ratio options for images + */ +type AspectRatio = "1:1" | "4:3" | "16:9" | "9:16" | "auto"; + +/** + * Image fit options + */ +type ImageFit = "cover" | "contain"; + +/** + * Source attribution + */ +interface ImageSource { + label: string; + iconUrl?: string; + url?: string; +} + +/** + * Props for the Image component + */ +export interface ImageProps { + id: string; + assetId: string; + src: string; + alt: string; + title?: string; + description?: string; + href?: string; + domain?: string; + ratio?: AspectRatio; + fit?: ImageFit; + source?: ImageSource; + maxWidth?: string; + className?: string; +} + +/** + * Serializable schema for Image props (for tool results) + */ +export interface SerializableImage { + id: string; + assetId: string; + src: string; + alt: string; + title?: string; + description?: string; + href?: string; + domain?: string; + ratio?: AspectRatio; + source?: ImageSource; +} + +/** + * Parse and validate serializable image from tool result + */ +export function parseSerializableImage(result: unknown): SerializableImage { + if (typeof result !== "object" || result === null) { + throw new Error("Invalid image result: expected object"); + } + + const obj = result as Record; + + // Validate required fields + if (typeof obj.id !== "string") { + throw new Error("Invalid image: missing id"); + } + if (typeof obj.assetId !== "string") { + throw new Error("Invalid image: missing assetId"); + } + if (typeof obj.src !== "string") { + throw new Error("Invalid image: missing src"); + } + if (typeof obj.alt !== "string") { + throw new Error("Invalid image: missing alt"); + } + + return { + id: obj.id, + assetId: obj.assetId, + src: obj.src, + alt: obj.alt, + title: typeof obj.title === "string" ? obj.title : undefined, + description: typeof obj.description === "string" ? obj.description : undefined, + href: typeof obj.href === "string" ? obj.href : undefined, + domain: typeof obj.domain === "string" ? obj.domain : undefined, + ratio: typeof obj.ratio === "string" ? (obj.ratio as AspectRatio) : undefined, + source: typeof obj.source === "object" && obj.source !== null ? (obj.source as ImageSource) : undefined, + }; +} + +/** + * Get aspect ratio class based on ratio prop + */ +function getAspectRatioClass(ratio?: AspectRatio): string { + switch (ratio) { + case "1:1": + return "aspect-square"; + case "4:3": + return "aspect-[4/3]"; + case "16:9": + return "aspect-video"; + case "9:16": + return "aspect-[9/16]"; + case "auto": + default: + return "aspect-[4/3]"; + } +} + +/** + * Error boundary for Image component + */ +interface ImageErrorBoundaryState { + hasError: boolean; + error?: Error; +} + +export class ImageErrorBoundary extends Component< + { children: ReactNode }, + ImageErrorBoundaryState +> { + constructor(props: { children: ReactNode }) { + super(props); + this.state = { hasError: false }; + } + + static getDerivedStateFromError(error: Error): ImageErrorBoundaryState { + return { hasError: true, error }; + } + + render() { + if (this.state.hasError) { + return ( + +
+
+ +

Failed to load image

+
+
+
+ ); + } + + return this.props.children; + } +} + +/** + * Loading skeleton for Image + */ +export function ImageSkeleton({ maxWidth = "420px" }: { maxWidth?: string }) { + return ( + +
+ +
+
+ ); +} + +/** + * Image Loading State + */ +export function ImageLoading({ title = "Loading image..." }: { title?: string }) { + return ( + +
+
+ +

{title}

+
+
+
+ ); +} + +/** + * Image Component + * + * Display images with metadata and attribution. + * Features hover overlay with title and source attribution. + */ +export function Image({ + id, + src, + alt, + title, + description, + href, + domain, + ratio = "4:3", + fit = "cover", + source, + maxWidth = "420px", + className, +}: ImageProps) { + const [isHovered, setIsHovered] = useState(false); + const [imageError, setImageError] = useState(false); + const aspectRatioClass = getAspectRatioClass(ratio); + const displayDomain = domain || source?.label; + + const handleClick = () => { + const targetUrl = href || source?.url || src; + if (targetUrl) { + window.open(targetUrl, "_blank", "noopener,noreferrer"); + } + }; + + if (imageError) { + return ( + +
+
+ +

Image not available

+
+
+
+ ); + } + + return ( + setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + onKeyDown={(e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + handleClick(); + } + }} + role="button" + tabIndex={0} + > +
+ {/* Image */} + setImageError(true)} + /> + + {/* Hover overlay - appears on hover */} +
+ {/* Content at bottom */} +
+ {/* Title */} + {title && ( +

+ {title} +

+ )} + + {/* Description */} + {description && ( +

+ {description} +

+ )} + + {/* Source attribution */} + {displayDomain && ( +
+ {source?.iconUrl ? ( + + ) : ( + + )} + {displayDomain} +
+ )} +
+
+ + {/* Always visible domain badge (bottom right, shown when NOT hovered) */} + {displayDomain && !isHovered && ( +
+ + {displayDomain} + +
+ )} +
+
+ ); +} diff --git a/surfsense_web/components/tool-ui/index.ts b/surfsense_web/components/tool-ui/index.ts index aebe400d5..b9500e7cd 100644 --- a/surfsense_web/components/tool-ui/index.ts +++ b/surfsense_web/components/tool-ui/index.ts @@ -32,3 +32,17 @@ export { type MediaCardProps, type SerializableMediaCard, } from "./media-card"; +export { + Image, + ImageErrorBoundary, + ImageLoading, + ImageSkeleton, + parseSerializableImage, + type ImageProps, + type SerializableImage, +} from "./image"; +export { + DisplayImageToolUI, + type DisplayImageArgs, + type DisplayImageResult, +} from "./display-image"; From 24dd52ed9998860f0ac26e3eccc96a549776eb70 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 23 Dec 2025 01:49:29 +0530 Subject: [PATCH 10/13] feat: add web scraping tool to chat agent for extracting and summarizing webpage content --- .../app/agents/new_chat/chat_deepagent.py | 12 + .../app/agents/new_chat/display_image.py | 4 +- .../app/agents/new_chat/scrape_webpage.py | 197 +++++++++ .../app/agents/new_chat/system_prompt.py | 41 ++ .../app/tasks/chat/stream_new_chat.py | 90 ++++ .../new-chat/[[...chat_id]]/page.tsx | 167 +++---- .../components/tool-ui/article/index.tsx | 406 ++++++++++++++++++ surfsense_web/components/tool-ui/index.ts | 14 + .../components/tool-ui/scrape-webpage.tsx | 163 +++++++ 9 files changed, 1018 insertions(+), 76 deletions(-) create mode 100644 surfsense_backend/app/agents/new_chat/scrape_webpage.py create mode 100644 surfsense_web/components/tool-ui/article/index.tsx create mode 100644 surfsense_web/components/tool-ui/scrape-webpage.tsx diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index e6b2aca06..aa1950bff 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -18,6 +18,7 @@ from app.agents.new_chat.display_image import create_display_image_tool from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool from app.agents.new_chat.link_preview import create_link_preview_tool from app.agents.new_chat.podcast import create_generate_podcast_tool +from app.agents.new_chat.scrape_webpage import create_scrape_webpage_tool from app.agents.new_chat.system_prompt import build_surfsense_system_prompt from app.services.connector_service import ConnectorService @@ -38,6 +39,8 @@ def create_surfsense_deep_agent( enable_podcast: bool = True, enable_link_preview: bool = True, enable_display_image: bool = True, + enable_scrape_webpage: bool = True, + firecrawl_api_key: str | None = None, additional_tools: Sequence[BaseTool] | None = None, ): """ @@ -61,6 +64,10 @@ def create_surfsense_deep_agent( When True, the agent can fetch and display rich link previews. enable_display_image: Whether to include the display image tool (default: True). When True, the agent can display images with metadata. + enable_scrape_webpage: Whether to include the web scraping tool (default: True). + When True, the agent can scrape and read webpage content. + firecrawl_api_key: Optional Firecrawl API key for premium web scraping. + Falls back to Chromium/Trafilatura if not provided. additional_tools: Optional sequence of additional tools to inject into the agent. The search_knowledge_base tool will always be included. @@ -96,6 +103,11 @@ def create_surfsense_deep_agent( display_image_tool = create_display_image_tool() tools.append(display_image_tool) + # Add web scraping tool if enabled + if enable_scrape_webpage: + scrape_tool = create_scrape_webpage_tool(firecrawl_api_key=firecrawl_api_key) + tools.append(scrape_tool) + if additional_tools: tools.extend(additional_tools) diff --git a/surfsense_backend/app/agents/new_chat/display_image.py b/surfsense_backend/app/agents/new_chat/display_image.py index 03153300a..0cd05b523 100644 --- a/surfsense_backend/app/agents/new_chat/display_image.py +++ b/surfsense_backend/app/agents/new_chat/display_image.py @@ -86,9 +86,7 @@ def create_display_image_tool(): ratio = "16:9" # Default if "unsplash.com" in src or "pexels.com" in src: ratio = "16:9" - elif "imgur.com" in src: - ratio = "auto" - elif "github.com" in src or "githubusercontent.com" in src: + elif "imgur.com" in src or "github.com" in src or "githubusercontent.com" in src: ratio = "auto" return { diff --git a/surfsense_backend/app/agents/new_chat/scrape_webpage.py b/surfsense_backend/app/agents/new_chat/scrape_webpage.py new file mode 100644 index 000000000..40a9c917f --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/scrape_webpage.py @@ -0,0 +1,197 @@ +""" +Web scraping tool for the new chat agent. + +This module provides a tool for scraping and extracting content from webpages +using the existing WebCrawlerConnector. The scraped content can be used by +the agent to answer questions about web pages. +""" + +import hashlib +from typing import Any +from urllib.parse import urlparse + +from langchain_core.tools import tool + +from app.connectors.webcrawler_connector import WebCrawlerConnector + + +def extract_domain(url: str) -> str: + """Extract the domain from a URL.""" + try: + parsed = urlparse(url) + domain = parsed.netloc + # Remove 'www.' prefix if present + if domain.startswith("www."): + domain = domain[4:] + return domain + except Exception: + return "" + + +def generate_scrape_id(url: str) -> str: + """Generate a unique ID for a scraped webpage.""" + hash_val = hashlib.md5(url.encode()).hexdigest()[:12] + return f"scrape-{hash_val}" + + +def truncate_content(content: str, max_length: int = 50000) -> tuple[str, bool]: + """ + Truncate content to a maximum length. + + Returns: + Tuple of (truncated_content, was_truncated) + """ + if len(content) <= max_length: + return content, False + + # Try to truncate at a sentence boundary + truncated = content[:max_length] + last_period = truncated.rfind(".") + last_newline = truncated.rfind("\n\n") + + # Use the later of the two boundaries, or just truncate + boundary = max(last_period, last_newline) + if boundary > max_length * 0.8: # Only use boundary if it's not too far back + truncated = content[: boundary + 1] + + return truncated + "\n\n[Content truncated...]", True + + +def create_scrape_webpage_tool(firecrawl_api_key: str | None = None): + """ + Factory function to create the scrape_webpage tool. + + Args: + firecrawl_api_key: Optional Firecrawl API key for premium web scraping. + Falls back to Chromium/Trafilatura if not provided. + + Returns: + A configured tool function for scraping webpages. + """ + + @tool + async def scrape_webpage( + url: str, + max_length: int = 50000, + ) -> dict[str, Any]: + """ + Scrape and extract the main content from a webpage. + + Use this tool when the user wants you to read, summarize, or answer + questions about a specific webpage's content. This tool actually + fetches and reads the full page content. + + Common triggers: + - "Read this article and summarize it" + - "What does this page say about X?" + - "Summarize this blog post for me" + - "Tell me the key points from this article" + - "What's in this webpage?" + + Args: + url: The URL of the webpage to scrape (must be HTTP/HTTPS) + max_length: Maximum content length to return (default: 50000 chars) + + Returns: + A dictionary containing: + - id: Unique identifier for this scrape + - assetId: The URL (for deduplication) + - kind: "article" (type of content) + - href: The URL to open when clicked + - title: Page title + - description: Brief description or excerpt + - content: The extracted main content (markdown format) + - domain: The domain name + - word_count: Approximate word count + - was_truncated: Whether content was truncated + - error: Error message (if scraping failed) + """ + scrape_id = generate_scrape_id(url) + domain = extract_domain(url) + + # Validate and normalize URL + if not url.startswith(("http://", "https://")): + url = f"https://{url}" + + try: + # Create webcrawler connector + connector = WebCrawlerConnector(firecrawl_api_key=firecrawl_api_key) + + # Crawl the URL + result, error = await connector.crawl_url(url, formats=["markdown"]) + + if error: + return { + "id": scrape_id, + "assetId": url, + "kind": "article", + "href": url, + "title": domain or "Webpage", + "domain": domain, + "error": error, + } + + if not result: + return { + "id": scrape_id, + "assetId": url, + "kind": "article", + "href": url, + "title": domain or "Webpage", + "domain": domain, + "error": "No content returned from crawler", + } + + # Extract content and metadata + content = result.get("content", "") + metadata = result.get("metadata", {}) + + # Get title from metadata + title = metadata.get("title", "") + if not title: + title = domain or url.split("/")[-1] or "Webpage" + + # Get description from metadata + description = metadata.get("description", "") + if not description and content: + # Use first paragraph as description + first_para = content.split("\n\n")[0] if content else "" + description = first_para[:300] + "..." if len(first_para) > 300 else first_para + + # Truncate content if needed + content, was_truncated = truncate_content(content, max_length) + + # Calculate word count + word_count = len(content.split()) + + return { + "id": scrape_id, + "assetId": url, + "kind": "article", + "href": url, + "title": title, + "description": description, + "content": content, + "domain": domain, + "word_count": word_count, + "was_truncated": was_truncated, + "crawler_type": result.get("crawler_type", "unknown"), + "author": metadata.get("author"), + "date": metadata.get("date"), + } + + except Exception as e: + error_message = str(e) + print(f"[scrape_webpage] Error scraping {url}: {error_message}") + return { + "id": scrape_id, + "assetId": url, + "kind": "article", + "href": url, + "title": domain or "Webpage", + "domain": domain, + "error": f"Failed to scrape: {error_message[:100]}", + } + + return scrape_webpage + diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 74abadd38..2677b21fd 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -173,6 +173,29 @@ You have access to the following tools: - description: Optional description providing context about the image - Returns: An image card with the image, title, and description - The image will automatically be displayed in the chat. + +5. scrape_webpage: Scrape and extract the main content from a webpage. + - Use this when the user wants you to READ and UNDERSTAND the actual content of a webpage. + - IMPORTANT: This is different from link_preview: + * link_preview: Only fetches metadata (title, description, thumbnail) for display + * scrape_webpage: Actually reads the FULL page content so you can analyze/summarize it + - Trigger scenarios: + * "Read this article and summarize it" + * "What does this page say about X?" + * "Summarize this blog post for me" + * "Tell me the key points from this article" + * "What's in this webpage?" + * "Can you analyze this article?" + - Args: + - url: The URL of the webpage to scrape (must be HTTP/HTTPS) + - max_length: Maximum content length to return (default: 50000 chars) + - Returns: The page title, description, full content (in markdown), word count, and metadata + - After scraping, you will have the full article text and can analyze, summarize, or answer questions about it. + - IMAGES: The scraped content may contain image URLs in markdown format like `![alt text](image_url)`. + * When you find relevant/important images in the scraped content, use the `display_image` tool to show them to the user. + * This makes your response more visual and engaging. + * Prioritize showing: diagrams, charts, infographics, key illustrations, or images that help explain the content. + * Don't show every image - just the most relevant 1-3 images that enhance understanding. - User: "Fetch all my notes and what's in them?" @@ -205,6 +228,24 @@ You have access to the following tools: - User: "Can you display a diagram of a neural network?" - Call: `display_image(src="https://example.com/neural-network.png", alt="Neural network diagram", title="Neural Network Architecture", description="A visual representation of a neural network with input, hidden, and output layers")` + +- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends" + - Call: `scrape_webpage(url="https://example.com/blog/ai-trends")` + - After getting the content, provide a summary based on the scraped text + +- User: "What does this page say about machine learning? https://docs.example.com/ml-guide" + - Call: `scrape_webpage(url="https://docs.example.com/ml-guide")` + - Then answer the question using the extracted content + +- User: "Summarize this blog post: https://medium.com/some-article" + - Call: `scrape_webpage(url="https://medium.com/some-article")` + - Provide a comprehensive summary of the article content + +- User: "Read this tutorial and explain it: https://example.com/ml-tutorial" + - First: `scrape_webpage(url="https://example.com/ml-tutorial")` + - Then, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: + - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` + - Then provide your explanation, referencing the displayed image {citation_section} """ diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 6e2b30c6a..d465e23f6 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -319,6 +319,20 @@ async def stream_new_chat( status="in_progress", items=last_active_step_items, ) + elif tool_name == "scrape_webpage": + url = ( + tool_input.get("url", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + last_active_step_title = "Scraping webpage" + last_active_step_items = [f"URL: {url[:80]}{'...' if len(url) > 80 else ''}"] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Scraping webpage", + status="in_progress", + items=last_active_step_items, + ) elif tool_name == "generate_podcast": podcast_title = ( tool_input.get("podcast_title", "SurfSense Podcast") @@ -398,6 +412,16 @@ async def stream_new_chat( f"Displaying image: {src[:60]}{'...' if len(src) > 60 else ''}", "info", ) + elif tool_name == "scrape_webpage": + url = ( + tool_input.get("url", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + yield streaming_service.format_terminal_info( + f"Scraping webpage: {url[:70]}{'...' if len(url) > 70 else ''}", + "info", + ) elif tool_name == "generate_podcast": title = ( tool_input.get("podcast_title", "SurfSense Podcast") @@ -502,6 +526,31 @@ async def stream_new_chat( status="completed", items=completed_items, ) + elif tool_name == "scrape_webpage": + # Build completion items for webpage scraping + if isinstance(tool_output, dict): + title = tool_output.get("title", "Webpage") + word_count = tool_output.get("word_count", 0) + has_error = "error" in tool_output + if has_error: + completed_items = [ + *last_active_step_items, + f"Error: {tool_output.get('error', 'Failed to scrape')[:50]}", + ] + else: + completed_items = [ + *last_active_step_items, + f"Title: {title[:50]}{'...' if len(title) > 50 else ''}", + f"Extracted: {word_count:,} words", + ] + else: + completed_items = [*last_active_step_items, "Content extracted"] + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Scraping webpage", + status="completed", + items=completed_items, + ) elif tool_name == "generate_podcast": # Build detailed completion items based on podcast status podcast_status = ( @@ -630,6 +679,47 @@ async def stream_new_chat( f"Image displayed: {title[:40]}{'...' if len(title) > 40 else ''}", "success", ) + elif tool_name == "scrape_webpage": + # Stream the scrape result so frontend can render the Article component + # Note: We send metadata for display, but content goes to LLM for processing + if isinstance(tool_output, dict): + # Create a display-friendly output (without full content for the card) + display_output = { + k: v for k, v in tool_output.items() if k != "content" + } + # But keep a truncated content preview + if "content" in tool_output: + content = tool_output.get("content", "") + display_output["content_preview"] = ( + content[:500] + "..." if len(content) > 500 else content + ) + yield streaming_service.format_tool_output_available( + tool_call_id, + display_output, + ) + else: + yield streaming_service.format_tool_output_available( + tool_call_id, + {"result": tool_output}, + ) + # Send terminal message + if isinstance(tool_output, dict) and "error" not in tool_output: + title = tool_output.get("title", "Webpage") + word_count = tool_output.get("word_count", 0) + yield streaming_service.format_terminal_info( + f"Scraped: {title[:40]}{'...' if len(title) > 40 else ''} ({word_count:,} words)", + "success", + ) + else: + error_msg = ( + tool_output.get("error", "Failed to scrape") + if isinstance(tool_output, dict) + else "Failed to scrape" + ) + yield streaming_service.format_terminal_info( + f"Scrape failed: {error_msg}", + "error", + ) elif tool_name == "search_knowledge_base": # Don't stream the full output for search (can be very large), just acknowledge yield streaming_service.format_tool_output_available( diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 7c6bd34c1..9c7e3cb4a 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -13,6 +13,7 @@ import { Thread } from "@/components/assistant-ui/thread"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; +import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage"; import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; import { getBearerToken } from "@/lib/auth-utils"; import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter"; @@ -81,7 +82,7 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike { /** * Tools that should render custom UI in the chat. */ -const TOOLS_WITH_UI = new Set(["generate_podcast", "link_preview", "display_image"]); +const TOOLS_WITH_UI = new Set(["generate_podcast", "link_preview", "display_image", "scrape_webpage"]); /** * Type for thinking step data from the backend @@ -245,47 +246,74 @@ export default function NewChatPage() { // Prepare assistant message const assistantMsgId = `msg-assistant-${Date.now()}`; - let accumulatedText = ""; const currentThinkingSteps = new Map(); - const toolCalls = new Map< - string, - { - toolCallId: string; - toolName: string; - args: Record; - result?: unknown; + + // Ordered content parts to preserve inline tool call positions + // Each part is either a text segment or a tool call + type ContentPart = + | { type: "text"; text: string } + | { + type: "tool-call"; + toolCallId: string; + toolName: string; + args: Record; + result?: unknown; + }; + const contentParts: ContentPart[] = []; + + // Track the current text segment index (for appending text deltas) + let currentTextPartIndex = -1; + + // Map to track tool call indices for updating results + const toolCallIndices = new Map(); + + // Helper to get or create the current text part for appending text + const appendText = (delta: string) => { + if (currentTextPartIndex >= 0 && contentParts[currentTextPartIndex]?.type === "text") { + // Append to existing text part + (contentParts[currentTextPartIndex] as { type: "text"; text: string }).text += delta; + } else { + // Create new text part + contentParts.push({ type: "text", text: delta }); + currentTextPartIndex = contentParts.length - 1; } - >(); + }; + + // Helper to add a tool call (this "breaks" the current text segment) + const addToolCall = (toolCallId: string, toolName: string, args: Record) => { + if (TOOLS_WITH_UI.has(toolName)) { + contentParts.push({ + type: "tool-call", + toolCallId, + toolName, + args, + }); + toolCallIndices.set(toolCallId, contentParts.length - 1); + // Reset text part index so next text creates a new segment + currentTextPartIndex = -1; + } + }; + + // Helper to update a tool call's args or result + const updateToolCall = (toolCallId: string, update: { args?: Record; result?: unknown }) => { + const index = toolCallIndices.get(toolCallId); + if (index !== undefined && contentParts[index]?.type === "tool-call") { + const tc = contentParts[index] as ContentPart & { type: "tool-call" }; + if (update.args) tc.args = update.args; + if (update.result !== undefined) tc.result = update.result; + } + }; // Helper to build content for UI (without thinking-steps) const buildContentForUI = (): ThreadMessageLike["content"] => { - const parts: Array< - | { type: "text"; text: string } - | { - type: "tool-call"; - toolCallId: string; - toolName: string; - args: Record; - result?: unknown; - } - > = []; - - if (accumulatedText) { - parts.push({ type: "text", text: accumulatedText }); - } - for (const toolCall of toolCalls.values()) { - if (TOOLS_WITH_UI.has(toolCall.toolName)) { - parts.push({ - type: "tool-call", - toolCallId: toolCall.toolCallId, - toolName: toolCall.toolName, - args: toolCall.args, - result: toolCall.result, - }); - } - } - return parts.length > 0 - ? (parts as ThreadMessageLike["content"]) + // Filter to only include text parts with content and tool-calls with UI + const filtered = contentParts.filter((part) => { + if (part.type === "text") return part.text.length > 0; + if (part.type === "tool-call") return TOOLS_WITH_UI.has(part.toolName); + return false; + }); + return filtered.length > 0 + ? (filtered as ThreadMessageLike["content"]) : [{ type: "text", text: "" }]; }; @@ -301,20 +329,15 @@ export default function NewChatPage() { }); } - if (accumulatedText) { - parts.push({ type: "text", text: accumulatedText }); - } - for (const toolCall of toolCalls.values()) { - if (TOOLS_WITH_UI.has(toolCall.toolName)) { - parts.push({ - type: "tool-call", - toolCallId: toolCall.toolCallId, - toolName: toolCall.toolName, - args: toolCall.args, - result: toolCall.result, - }); + // Add content parts (filtered) + for (const part of contentParts) { + if (part.type === "text" && part.text.length > 0) { + parts.push(part); + } else if (part.type === "tool-call" && TOOLS_WITH_UI.has(part.toolName)) { + parts.push(part); } } + return parts.length > 0 ? parts : [{ type: "text", text: "" }]; }; @@ -399,7 +422,7 @@ export default function NewChatPage() { switch (parsed.type) { case "text-delta": - accumulatedText += parsed.delta; + appendText(parsed.delta); setMessages((prev) => prev.map((m) => m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m @@ -408,11 +431,8 @@ export default function NewChatPage() { break; case "tool-input-start": - toolCalls.set(parsed.toolCallId, { - toolCallId: parsed.toolCallId, - toolName: parsed.toolName, - args: {}, - }); + // Add tool call inline - this breaks the current text segment + addToolCall(parsed.toolCallId, parsed.toolName, {}); setMessages((prev) => prev.map((m) => m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m @@ -421,14 +441,12 @@ export default function NewChatPage() { break; case "tool-input-available": { - const tc = toolCalls.get(parsed.toolCallId); - if (tc) tc.args = parsed.input || {}; - else - toolCalls.set(parsed.toolCallId, { - toolCallId: parsed.toolCallId, - toolName: parsed.toolName, - args: parsed.input || {}, - }); + // Update existing tool call's args, or add if not exists + if (toolCallIndices.has(parsed.toolCallId)) { + updateToolCall(parsed.toolCallId, { args: parsed.input || {} }); + } else { + addToolCall(parsed.toolCallId, parsed.toolName, parsed.input || {}); + } setMessages((prev) => prev.map((m) => m.id === assistantMsgId ? { ...m, content: buildContentForUI() } : m @@ -438,15 +456,17 @@ export default function NewChatPage() { } case "tool-output-available": { - const tc = toolCalls.get(parsed.toolCallId); - if (tc) { - tc.result = parsed.output; - if ( - tc.toolName === "generate_podcast" && - parsed.output?.status === "processing" && - parsed.output?.task_id - ) { - setActivePodcastTaskId(parsed.output.task_id); + // Update the tool call with its result + updateToolCall(parsed.toolCallId, { result: parsed.output }); + // Handle podcast-specific logic + if (parsed.output?.status === "processing" && parsed.output?.task_id) { + // Check if this is a podcast tool by looking at the content part + const idx = toolCallIndices.get(parsed.toolCallId); + if (idx !== undefined) { + const part = contentParts[idx]; + if (part?.type === "tool-call" && part.toolName === "generate_podcast") { + setActivePodcastTaskId(parsed.output.task_id); + } } } setMessages((prev) => @@ -491,7 +511,7 @@ export default function NewChatPage() { // Persist assistant message (with thinking steps for restoration on refresh) const finalContent = buildContentForPersistence(); - if (accumulatedText || toolCalls.size > 0) { + if (contentParts.length > 0) { appendMessage(threadId, { role: "assistant", content: finalContent, @@ -593,6 +613,7 @@ export default function NewChatPage() { +
diff --git a/surfsense_web/components/tool-ui/article/index.tsx b/surfsense_web/components/tool-ui/article/index.tsx new file mode 100644 index 000000000..bf8e83411 --- /dev/null +++ b/surfsense_web/components/tool-ui/article/index.tsx @@ -0,0 +1,406 @@ +"use client"; + +import { Card, CardContent } from "@/components/ui/card"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import { cn } from "@/lib/utils"; +import { + AlertCircleIcon, + BookOpenIcon, + CalendarIcon, + ExternalLinkIcon, + FileTextIcon, + UserIcon, +} from "lucide-react"; +import { Component, type ReactNode, useCallback } from "react"; + +/** + * Article component props + */ +export interface ArticleProps { + /** Unique identifier for the article */ + id: string; + /** Asset identifier (usually the URL) */ + assetId?: string; + /** Article title */ + title: string; + /** Brief description or excerpt */ + description?: string; + /** Full content of the article (markdown) */ + content?: string; + /** URL to the original article */ + href?: string; + /** Domain of the article source */ + domain?: string; + /** Author name */ + author?: string; + /** Publication date */ + date?: string; + /** Word count */ + wordCount?: number; + /** Whether content was truncated */ + wasTruncated?: boolean; + /** Optional max width */ + maxWidth?: string; + /** Optional error message */ + error?: string; + /** Optional className */ + className?: string; + /** Response actions */ + responseActions?: Array<{ + id: string; + label: string; + variant?: "default" | "outline"; + }>; + /** Response action handler */ + onResponseAction?: (actionId: string) => void; +} + +/** + * Serializable article data type (from backend) + */ +export interface SerializableArticle { + id: string; + assetId?: string; + kind?: "article"; + title: string; + description?: string; + content?: string; + href?: string; + domain?: string; + author?: string; + date?: string; + word_count?: number; + wordCount?: number; + was_truncated?: boolean; + wasTruncated?: boolean; + error?: string; +} + +/** + * Parse serializable article data to ArticleProps + */ +export function parseSerializableArticle(data: unknown): ArticleProps { + const obj = data as Record; + return { + id: String(obj.id || "article-unknown"), + assetId: obj.assetId as string | undefined, + title: String(obj.title || "Untitled Article"), + description: obj.description as string | undefined, + content: obj.content as string | undefined, + href: obj.href as string | undefined, + domain: obj.domain as string | undefined, + author: obj.author as string | undefined, + date: obj.date as string | undefined, + wordCount: (obj.word_count || obj.wordCount) as number | undefined, + wasTruncated: (obj.was_truncated || obj.wasTruncated) as boolean | undefined, + error: obj.error as string | undefined, + }; +} + +/** + * Format word count for display + */ +function formatWordCount(count: number): string { + if (count >= 1000) { + return `${(count / 1000).toFixed(1)}k words`; + } + return `${count} words`; +} + +/** + * Article card component for displaying scraped webpage content + */ +export function Article({ + id, + title, + description, + content, + href, + domain, + author, + date, + wordCount, + wasTruncated, + maxWidth = "100%", + error, + className, + responseActions, + onResponseAction, +}: ArticleProps) { + const handleCardClick = useCallback(() => { + if (href) { + window.open(href, "_blank", "noopener,noreferrer"); + } + }, [href]); + + // Error state + if (error) { + return ( + + +
+
+ +
+
+

+ Failed to scrape webpage +

+ {href && ( +

+ {href} +

+ )} +

{error}

+
+
+
+
+ ); + } + + return ( + + { + if (href && (e.key === "Enter" || e.key === " ")) { + e.preventDefault(); + handleCardClick(); + } + }} + > + {/* Header */} + +
+ {/* Icon */} +
+ +
+ + {/* Content */} +
+ {/* Title */} +

+ {title} +

+ + {/* Description */} + {description && ( +

+ {description} +

+ )} + + {/* Metadata row */} +
+ {domain && ( + + + + + {domain} + + + +

Source: {domain}

+
+
+ )} + + {author && ( + + + + + {author} + + + +

Author: {author}

+
+
+ )} + + {date && ( + + + {date} + + )} + + {wordCount && ( + + + + + {formatWordCount(wordCount)} + {wasTruncated && ( + (truncated) + )} + + + +

+ {wasTruncated + ? "Content was truncated due to length" + : "Full article content available"} +

+
+
+ )} +
+
+ + {/* External link indicator */} + {href && ( +
+ +
+ )} +
+ + {/* Response actions */} + {responseActions && responseActions.length > 0 && ( +
+ {responseActions.map((action) => ( + + ))} +
+ )} +
+
+
+ ); +} + +/** + * Loading state for article component + */ +export function ArticleLoading({ + title = "Loading article...", +}: { title?: string }) { + return ( + + +
+
+
+
+
+
+
+
+

{title}

+ + + ); +} + +/** + * Skeleton for article component + */ +export function ArticleSkeleton() { + return ( + + +
+
+
+
+
+
+
+
+ + + ); +} + +/** + * Error boundary props + */ +interface ErrorBoundaryProps { + children: ReactNode; + fallback?: ReactNode; +} + +interface ErrorBoundaryState { + hasError: boolean; +} + +/** + * Error boundary for article component + */ +export class ArticleErrorBoundary extends Component< + ErrorBoundaryProps, + ErrorBoundaryState +> { + constructor(props: ErrorBoundaryProps) { + super(props); + this.state = { hasError: false }; + } + + static getDerivedStateFromError(): ErrorBoundaryState { + return { hasError: true }; + } + + render() { + if (this.state.hasError) { + return ( + this.props.fallback || ( + + +
+ +

+ Failed to render article +

+
+
+
+ ) + ); + } + + return this.props.children; + } +} + diff --git a/surfsense_web/components/tool-ui/index.ts b/surfsense_web/components/tool-ui/index.ts index b9500e7cd..163d279a9 100644 --- a/surfsense_web/components/tool-ui/index.ts +++ b/surfsense_web/components/tool-ui/index.ts @@ -46,3 +46,17 @@ export { type DisplayImageArgs, type DisplayImageResult, } from "./display-image"; +export { + Article, + ArticleErrorBoundary, + ArticleLoading, + ArticleSkeleton, + parseSerializableArticle, + type ArticleProps, + type SerializableArticle, +} from "./article"; +export { + ScrapeWebpageToolUI, + type ScrapeWebpageArgs, + type ScrapeWebpageResult, +} from "./scrape-webpage"; diff --git a/surfsense_web/components/tool-ui/scrape-webpage.tsx b/surfsense_web/components/tool-ui/scrape-webpage.tsx new file mode 100644 index 000000000..c9ced3d80 --- /dev/null +++ b/surfsense_web/components/tool-ui/scrape-webpage.tsx @@ -0,0 +1,163 @@ +"use client"; + +import { makeAssistantToolUI } from "@assistant-ui/react"; +import { AlertCircleIcon, FileTextIcon } from "lucide-react"; +import { + Article, + ArticleErrorBoundary, + ArticleLoading, + parseSerializableArticle, +} from "@/components/tool-ui/article"; + +/** + * Type definitions for the scrape_webpage tool + */ +interface ScrapeWebpageArgs { + url: string; + max_length?: number; +} + +interface ScrapeWebpageResult { + id: string; + assetId: string; + kind: "article"; + href: string; + title: string; + description?: string; + content?: string; + domain?: string; + author?: string; + date?: string; + word_count?: number; + was_truncated?: boolean; + crawler_type?: string; + error?: string; +} + +/** + * Error state component shown when webpage scraping fails + */ +function ScrapeErrorState({ url, error }: { url: string; error: string }) { + return ( +
+
+
+ +
+
+

Failed to scrape webpage

+

{url}

+

{error}

+
+
+
+ ); +} + +/** + * Cancelled state component + */ +function ScrapeCancelledState({ url }: { url: string }) { + return ( +
+

+ + Scraping: {url} +

+
+ ); +} + +/** + * Parsed Article component with error handling + */ +function ParsedArticle({ result }: { result: unknown }) { + const article = parseSerializableArticle(result); + + return ( +
{ + if (id === "open" && article.href) { + window.open(article.href, "_blank", "noopener,noreferrer"); + } + }} + /> + ); +} + +/** + * Scrape Webpage Tool UI Component + * + * This component is registered with assistant-ui to render an article card + * when the scrape_webpage tool is called by the agent. + * + * It displays scraped webpage content including: + * - Title and description + * - Author and date (if available) + * - Word count + * - Link to original source + */ +export const ScrapeWebpageToolUI = makeAssistantToolUI< + ScrapeWebpageArgs, + ScrapeWebpageResult +>({ + toolName: "scrape_webpage", + render: function ScrapeWebpageUI({ args, result, status }) { + const url = args.url || "Unknown URL"; + + // Loading state - tool is still running + if (status.type === "running" || status.type === "requires-action") { + return ( +
+ +
+ ); + } + + // Incomplete/cancelled state + if (status.type === "incomplete") { + if (status.reason === "cancelled") { + return ; + } + if (status.reason === "error") { + return ( + + ); + } + } + + // No result yet + if (!result) { + return ( +
+ +
+ ); + } + + // Error result from the tool + if (result.error) { + return ; + } + + // Success - render the article card + return ( +
+ + + +
+ ); + }, +}); + +export type { ScrapeWebpageArgs, ScrapeWebpageResult }; + From 7ca490c740869e61e0a6b9e39f459f16b0469f82 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 23 Dec 2025 02:21:41 +0530 Subject: [PATCH 11/13] feat: enhance chain-of-thought display with smart expand/collapse behavior and state management for improved user interaction --- .../app/tasks/chat/stream_new_chat.py | 15 +-- .../components/assistant-ui/thread.tsx | 102 ++++++++++++++--- .../components/tool-ui/deepagent-thinking.tsx | 106 +++++++++++++++--- 3 files changed, 178 insertions(+), 45 deletions(-) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index d465e23f6..a201ece22 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -217,24 +217,11 @@ async def stream_new_chat( yield completion_event if just_finished_tool: - # We just finished a tool - don't create a step here, - # text will flow silently after tools. - # Clear the active step tracking. + # Clear the active step tracking - text flows without a dedicated step last_active_step_id = None last_active_step_title = "" last_active_step_items = [] just_finished_tool = False - else: - # Normal text generation (not after a tool) - gen_step_id = next_thinking_step_id() - last_active_step_id = gen_step_id - last_active_step_title = "Generating response" - last_active_step_items = [] - yield streaming_service.format_thinking_step( - step_id=gen_step_id, - title="Generating response", - status="in_progress", - ) current_text_id = streaming_service.generate_text_id() yield streaming_service.format_text_start(current_text_id) diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 57408391d..e3ee85dce 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -30,7 +30,7 @@ import { } from "lucide-react"; import Image from "next/image"; import Link from "next/link"; -import { type FC, useState, useRef, useCallback } from "react"; +import { type FC, useState, useRef, useCallback, useEffect } from "react"; import { useAtomValue } from "jotai"; import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; @@ -94,35 +94,102 @@ function getStepIcon(status: "pending" | "in_progress" | "completed", title: str } /** - * Chain of thought display component + * Chain of thought display component with smart expand/collapse behavior */ -const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[] }> = ({ steps }) => { +const ThinkingStepsDisplay: FC<{ steps: ThinkingStep[]; isThreadRunning?: boolean }> = ({ steps, isThreadRunning = true }) => { + // Track which steps the user has manually toggled (overrides auto behavior) + const [manualOverrides, setManualOverrides] = useState>({}); + // Track previous step statuses to detect changes + const prevStatusesRef = useRef>({}); + + // Derive effective status: if thread stopped and step is in_progress, treat as completed + const getEffectiveStatus = (step: ThinkingStep): "pending" | "in_progress" | "completed" => { + if (step.status === "in_progress" && !isThreadRunning) { + return "completed"; // Thread was stopped, so mark as completed + } + return step.status; + }; + + // Check if any step is effectively in progress + const hasInProgressStep = steps.some(step => getEffectiveStatus(step) === "in_progress"); + + // Find the last completed step index (using effective status) + const lastCompletedIndex = steps + .map((s, i) => getEffectiveStatus(s) === "completed" ? i : -1) + .filter(i => i !== -1) + .pop(); + + // Clear manual overrides when a step's status changes + useEffect(() => { + const currentStatuses: Record = {}; + steps.forEach(step => { + currentStatuses[step.id] = step.status; + // If status changed, clear any manual override for this step + if (prevStatusesRef.current[step.id] && prevStatusesRef.current[step.id] !== step.status) { + setManualOverrides(prev => { + const next = { ...prev }; + delete next[step.id]; + return next; + }); + } + }); + prevStatusesRef.current = currentStatuses; + }, [steps]); + if (steps.length === 0) return null; + const getStepOpenState = (step: ThinkingStep, index: number): boolean => { + const effectiveStatus = getEffectiveStatus(step); + // If user has manually toggled, respect that + if (manualOverrides[step.id] !== undefined) { + return manualOverrides[step.id]; + } + // Auto behavior: open if in progress + if (effectiveStatus === "in_progress") { + return true; + } + // Auto behavior: keep last completed step open if no in-progress step + if (!hasInProgressStep && index === lastCompletedIndex) { + return true; + } + // Default: collapsed + return false; + }; + + const handleToggle = (stepId: string, currentOpen: boolean) => { + setManualOverrides(prev => ({ + ...prev, + [stepId]: !currentOpen, + })); + }; + return (
- {steps.map((step) => { - const icon = getStepIcon(step.status, step.title); + {steps.map((step, index) => { + const effectiveStatus = getEffectiveStatus(step); + const icon = getStepIcon(effectiveStatus, step.title); + const isOpen = getStepOpenState(step, index); return ( handleToggle(step.id, isOpen)} > {step.title} {step.items && step.items.length > 0 && ( - {step.items.map((item, index) => ( - + {step.items.map((item, idx) => ( + {item} ))} @@ -263,8 +330,8 @@ const ThreadWelcome: FC = () => { return (
- {/* Greeting positioned near the composer */} -
+ {/* Greeting positioned above the composer - fixed position */} +

{/** biome-ignore lint/a11y/noStaticElementInteractions: wrong lint error, this is a workaround to fix the lint error */}
{ {getTimeBasedGreeting(user?.email)}

- {/* Composer centered in the middle of the screen */} -
+ {/* Composer - top edge fixed, expands downward only */} +
@@ -525,12 +592,15 @@ const AssistantMessageInner: FC = () => { const messageId = useMessage((m) => m.id); const thinkingSteps = thinkingStepsMap.get(messageId) || []; + // Check if thread is still running (for stopping the spinner when cancelled) + const isThreadRunning = useAssistantState(({ thread }) => thread.isRunning); + return ( <> {/* Show thinking steps BEFORE the text response */} {thinkingSteps.length > 0 && (
- +
)} diff --git a/surfsense_web/components/tool-ui/deepagent-thinking.tsx b/surfsense_web/components/tool-ui/deepagent-thinking.tsx index f0c03e570..8cd901e69 100644 --- a/surfsense_web/components/tool-ui/deepagent-thinking.tsx +++ b/surfsense_web/components/tool-ui/deepagent-thinking.tsx @@ -2,7 +2,7 @@ import { makeAssistantToolUI } from "@assistant-ui/react"; import { Brain, CheckCircle2, Loader2, Search, Sparkles } from "lucide-react"; -import { useMemo } from "react"; +import { useMemo, useState, useEffect, useRef } from "react"; import { ChainOfThought, ChainOfThoughtContent, @@ -61,13 +61,21 @@ function getStepIcon(status: "pending" | "in_progress" | "completed", title: str } /** - * Component to display a single thinking step + * Component to display a single thinking step with controlled open state */ -function ThinkingStepDisplay({ step }: { step: ThinkingStep }) { +function ThinkingStepDisplay({ + step, + isOpen, + onToggle +}: { + step: ThinkingStep; + isOpen: boolean; + onToggle: () => void; +}) { const icon = useMemo(() => getStepIcon(step.status, step.title), [step.status, step.title]); return ( - + >({}); + // Track previous step statuses to detect changes + const prevStatusesRef = useRef>({}); + + // Check if any step is currently in progress + const hasInProgressStep = steps.some(step => step.status === "in_progress"); + + // Find the last completed step index + const lastCompletedIndex = steps + .map((s, i) => s.status === "completed" ? i : -1) + .filter(i => i !== -1) + .pop(); + + // Clear manual overrides when a step's status changes + useEffect(() => { + const currentStatuses: Record = {}; + steps.forEach(step => { + currentStatuses[step.id] = step.status; + // If status changed, clear any manual override for this step + if (prevStatusesRef.current[step.id] && prevStatusesRef.current[step.id] !== step.status) { + setManualOverrides(prev => { + const next = { ...prev }; + delete next[step.id]; + return next; + }); + } + }); + prevStatusesRef.current = currentStatuses; + }, [steps]); + + const getStepOpenState = (step: ThinkingStep, index: number): boolean => { + // If user has manually toggled, respect that + if (manualOverrides[step.id] !== undefined) { + return manualOverrides[step.id]; + } + // Auto behavior: open if in progress + if (step.status === "in_progress") { + return true; + } + // Auto behavior: keep last completed step open if no in-progress step + if (!hasInProgressStep && index === lastCompletedIndex) { + return true; + } + // Default: collapsed + return false; + }; + + const handleToggle = (stepId: string, currentOpen: boolean) => { + setManualOverrides(prev => ({ + ...prev, + [stepId]: !currentOpen, + })); + }; + + return ( + + {steps.map((step, index) => { + const isOpen = getStepOpenState(step, index); + return ( + handleToggle(step.id, isOpen)} + /> + ); + })} + + ); +} + /** * DeepAgent Thinking Tool UI Component * @@ -131,7 +215,7 @@ export const DeepAgentThinkingToolUI = makeAssistantToolUI< DeepAgentThinkingResult >({ toolName: "deepagent_thinking", - render: function DeepAgentThinkingUI({ args, result, status }) { + render: function DeepAgentThinkingUI({ result, status }) { // Loading state - tool is still running if (status.type === "running" || status.type === "requires-action") { return ; @@ -155,11 +239,7 @@ export const DeepAgentThinkingToolUI = makeAssistantToolUI< // Render the chain of thought return (
- - {result.steps.map((step) => ( - - ))} - +
); }, @@ -189,11 +269,7 @@ export function InlineThinkingDisplay({ {isStreaming && steps.length === 0 ? ( ) : ( - - {steps.map((step) => ( - - ))} - + )}
); From 5fd872b7980d2525c4bf060f17e410aacefb5b3a Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 23 Dec 2025 02:42:48 +0530 Subject: [PATCH 12/13] feat: implement Zod schemas for runtime validation across various components to enhance data integrity and error handling --- .../components/tool-ui/article/index.tsx | 89 +++++++++------ .../components/tool-ui/deepagent-thinking.tsx | 68 ++++++++--- .../components/tool-ui/display-image.tsx | 10 +- .../components/tool-ui/generate-podcast.tsx | 106 ++++++++++++------ .../components/tool-ui/image/index.tsx | 97 ++++++---------- .../components/tool-ui/media-card/index.tsx | 97 ++++++---------- 6 files changed, 253 insertions(+), 214 deletions(-) diff --git a/surfsense_web/components/tool-ui/article/index.tsx b/surfsense_web/components/tool-ui/article/index.tsx index bf8e83411..62068d117 100644 --- a/surfsense_web/components/tool-ui/article/index.tsx +++ b/surfsense_web/components/tool-ui/article/index.tsx @@ -17,6 +17,33 @@ import { UserIcon, } from "lucide-react"; import { Component, type ReactNode, useCallback } from "react"; +import { z } from "zod"; + +/** + * Zod schema for serializable article data (from backend) + */ +const SerializableArticleSchema = z.object({ + id: z.string().default("article-unknown"), + assetId: z.string().optional(), + kind: z.literal("article").optional(), + title: z.string().default("Untitled Article"), + description: z.string().optional(), + content: z.string().optional(), + href: z.string().url().optional(), + domain: z.string().optional(), + author: z.string().optional(), + date: z.string().optional(), + word_count: z.number().optional(), + wordCount: z.number().optional(), + was_truncated: z.boolean().optional(), + wasTruncated: z.boolean().optional(), + error: z.string().optional(), +}); + +/** + * Serializable article data type (from backend) + */ +export type SerializableArticle = z.infer; /** * Article component props @@ -61,44 +88,36 @@ export interface ArticleProps { } /** - * Serializable article data type (from backend) - */ -export interface SerializableArticle { - id: string; - assetId?: string; - kind?: "article"; - title: string; - description?: string; - content?: string; - href?: string; - domain?: string; - author?: string; - date?: string; - word_count?: number; - wordCount?: number; - was_truncated?: boolean; - wasTruncated?: boolean; - error?: string; -} - -/** - * Parse serializable article data to ArticleProps + * Parse and validate serializable article data to ArticleProps */ export function parseSerializableArticle(data: unknown): ArticleProps { - const obj = data as Record; + const result = SerializableArticleSchema.safeParse(data); + + if (!result.success) { + console.warn("Invalid article data:", result.error.issues); + // Return fallback with basic info + const obj = (data && typeof data === "object" ? data : {}) as Record; + return { + id: String(obj.id || "article-unknown"), + title: String(obj.title || "Untitled Article"), + error: "Failed to parse article data", + }; + } + + const parsed = result.data; return { - id: String(obj.id || "article-unknown"), - assetId: obj.assetId as string | undefined, - title: String(obj.title || "Untitled Article"), - description: obj.description as string | undefined, - content: obj.content as string | undefined, - href: obj.href as string | undefined, - domain: obj.domain as string | undefined, - author: obj.author as string | undefined, - date: obj.date as string | undefined, - wordCount: (obj.word_count || obj.wordCount) as number | undefined, - wasTruncated: (obj.was_truncated || obj.wasTruncated) as boolean | undefined, - error: obj.error as string | undefined, + id: parsed.id, + assetId: parsed.assetId, + title: parsed.title, + description: parsed.description, + content: parsed.content, + href: parsed.href, + domain: parsed.domain, + author: parsed.author, + date: parsed.date, + wordCount: parsed.word_count ?? parsed.wordCount, + wasTruncated: parsed.was_truncated ?? parsed.wasTruncated, + error: parsed.error, }; } diff --git a/surfsense_web/components/tool-ui/deepagent-thinking.tsx b/surfsense_web/components/tool-ui/deepagent-thinking.tsx index 8cd901e69..ff8baac9a 100644 --- a/surfsense_web/components/tool-ui/deepagent-thinking.tsx +++ b/surfsense_web/components/tool-ui/deepagent-thinking.tsx @@ -3,6 +3,7 @@ import { makeAssistantToolUI } from "@assistant-ui/react"; import { Brain, CheckCircle2, Loader2, Search, Sparkles } from "lucide-react"; import { useMemo, useState, useEffect, useRef } from "react"; +import { z } from "zod"; import { ChainOfThought, ChainOfThoughtContent, @@ -13,24 +14,61 @@ import { import { cn } from "@/lib/utils"; /** - * Types for the deepagent thinking/reasoning tool + * Zod schemas for runtime validation */ -interface ThinkingStep { - id: string; - title: string; - items: string[]; - status: "pending" | "in_progress" | "completed"; +const ThinkingStepSchema = z.object({ + id: z.string(), + title: z.string(), + items: z.array(z.string()).default([]), + status: z.enum(["pending", "in_progress", "completed"]).default("pending"), +}); + +const DeepAgentThinkingArgsSchema = z.object({ + query: z.string().optional(), + context: z.string().optional(), +}); + +const DeepAgentThinkingResultSchema = z.object({ + steps: z.array(ThinkingStepSchema).optional(), + status: z.enum(["thinking", "searching", "synthesizing", "completed"]).optional(), + summary: z.string().optional(), +}); + +/** + * Types derived from Zod schemas + */ +type ThinkingStep = z.infer; +type DeepAgentThinkingArgs = z.infer; +type DeepAgentThinkingResult = z.infer; + +/** + * Parse and validate a single thinking step + */ +export function parseThinkingStep(data: unknown): ThinkingStep { + const result = ThinkingStepSchema.safeParse(data); + if (!result.success) { + console.warn("Invalid thinking step data:", result.error.issues); + // Return a fallback step + return { + id: "unknown", + title: "Processing...", + items: [], + status: "pending", + }; + } + return result.data; } -interface DeepAgentThinkingArgs { - query?: string; - context?: string; -} - -interface DeepAgentThinkingResult { - steps?: ThinkingStep[]; - status?: "thinking" | "searching" | "synthesizing" | "completed"; - summary?: string; +/** + * Parse and validate thinking result + */ +export function parseThinkingResult(data: unknown): DeepAgentThinkingResult { + const result = DeepAgentThinkingResultSchema.safeParse(data); + if (!result.success) { + console.warn("Invalid thinking result data:", result.error.issues); + return {}; + } + return result.data; } /** diff --git a/surfsense_web/components/tool-ui/display-image.tsx b/surfsense_web/components/tool-ui/display-image.tsx index 37bdad5fa..a9c87b29b 100644 --- a/surfsense_web/components/tool-ui/display-image.tsx +++ b/surfsense_web/components/tool-ui/display-image.tsx @@ -67,6 +67,8 @@ function ImageCancelledState({ src }: { src: string }) { /** * Parsed Image component with error handling + * Note: Image component has built-in click handling via href/src, + * so no additional responseActions needed. */ function ParsedImage({ result }: { result: unknown }) { const image = parseSerializableImage(result); @@ -75,14 +77,6 @@ function ParsedImage({ result }: { result: unknown }) { { - if (id === "open" && image.src) { - window.open(image.src, "_blank", "noopener,noreferrer"); - } - }} /> ); } diff --git a/surfsense_web/components/tool-ui/generate-podcast.tsx b/surfsense_web/components/tool-ui/generate-podcast.tsx index 669b11a57..862eab144 100644 --- a/surfsense_web/components/tool-ui/generate-podcast.tsx +++ b/surfsense_web/components/tool-ui/generate-podcast.tsx @@ -3,37 +3,79 @@ import { makeAssistantToolUI } from "@assistant-ui/react"; import { AlertCircleIcon, Loader2Icon, MicIcon } from "lucide-react"; import { useCallback, useEffect, useRef, useState } from "react"; +import { z } from "zod"; import { Audio } from "@/components/tool-ui/audio"; import { baseApiService } from "@/lib/apis/base-api.service"; import { authenticatedFetch } from "@/lib/auth-utils"; import { clearActivePodcastTaskId, setActivePodcastTaskId } from "@/lib/chat/podcast-state"; /** - * Type definitions for the generate_podcast tool + * Zod schemas for runtime validation */ -interface GeneratePodcastArgs { - source_content: string; - podcast_title?: string; - user_prompt?: string; +const GeneratePodcastArgsSchema = z.object({ + source_content: z.string(), + podcast_title: z.string().optional(), + user_prompt: z.string().optional(), +}); + +const GeneratePodcastResultSchema = z.object({ + status: z.enum(["processing", "already_generating", "success", "error"]), + task_id: z.string().optional(), + podcast_id: z.number().optional(), + title: z.string().optional(), + transcript_entries: z.number().optional(), + message: z.string().optional(), + error: z.string().optional(), +}); + +const TaskStatusResponseSchema = z.object({ + status: z.enum(["processing", "success", "error"]), + podcast_id: z.number().optional(), + title: z.string().optional(), + transcript_entries: z.number().optional(), + state: z.string().optional(), + error: z.string().optional(), +}); + +const PodcastTranscriptEntrySchema = z.object({ + speaker_id: z.number(), + dialog: z.string(), +}); + +const PodcastDetailsSchema = z.object({ + podcast_transcript: z.array(PodcastTranscriptEntrySchema).optional(), +}); + +/** + * Types derived from Zod schemas + */ +type GeneratePodcastArgs = z.infer; +type GeneratePodcastResult = z.infer; +type TaskStatusResponse = z.infer; +type PodcastTranscriptEntry = z.infer; + +/** + * Parse and validate task status response + */ +function parseTaskStatusResponse(data: unknown): TaskStatusResponse { + const result = TaskStatusResponseSchema.safeParse(data); + if (!result.success) { + console.warn("Invalid task status response:", result.error.issues); + return { status: "error", error: "Invalid response from server" }; + } + return result.data; } -interface GeneratePodcastResult { - status: "processing" | "already_generating" | "success" | "error"; - task_id?: string; - podcast_id?: number; - title?: string; - transcript_entries?: number; - message?: string; - error?: string; -} - -interface TaskStatusResponse { - status: "processing" | "success" | "error"; - podcast_id?: number; - title?: string; - transcript_entries?: number; - state?: string; - error?: string; +/** + * Parse and validate podcast details + */ +function parsePodcastDetails(data: unknown): { podcast_transcript?: PodcastTranscriptEntry[] } { + const result = PodcastDetailsSchema.safeParse(data); + if (!result.success) { + console.warn("Invalid podcast details:", result.error.issues); + return {}; + } + return result.data; } /** @@ -112,14 +154,6 @@ function AudioLoadingState({ title }: { title: string }) { /** * Podcast Player Component - Fetches audio and transcript with authentication */ -/** - * Transcript entry type for podcast transcripts - */ -interface PodcastTranscriptEntry { - speaker_id: number; - dialog: string; -} - function PodcastPlayer({ podcastId, title, @@ -163,12 +197,12 @@ function PodcastPlayer({ try { // Fetch audio blob and podcast details in parallel - const [audioResponse, podcastDetails] = await Promise.all([ + const [audioResponse, rawPodcastDetails] = await Promise.all([ authenticatedFetch( `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/podcasts/${podcastId}/audio`, { method: "GET", signal: controller.signal } ), - baseApiService.get<{ podcast_transcript?: PodcastTranscriptEntry[] }>( + baseApiService.get( `/api/v1/podcasts/${podcastId}` ), ]); @@ -184,8 +218,9 @@ function PodcastPlayer({ objectUrlRef.current = objectUrl; setAudioSrc(objectUrl); - // Set transcript from podcast details - if (podcastDetails?.podcast_transcript) { + // Parse and validate podcast details, then set transcript + const podcastDetails = parsePodcastDetails(rawPodcastDetails); + if (podcastDetails.podcast_transcript) { setTranscript(podcastDetails.podcast_transcript); } } finally { @@ -268,9 +303,10 @@ function PodcastTaskPoller({ taskId, title }: { taskId: string; title: string }) useEffect(() => { const pollStatus = async () => { try { - const response = await baseApiService.get( + const rawResponse = await baseApiService.get( `/api/v1/podcasts/task/${taskId}/status` ); + const response = parseTaskStatusResponse(rawResponse); setTaskStatus(response); // Stop polling if task is complete or errored diff --git a/surfsense_web/components/tool-ui/image/index.tsx b/surfsense_web/components/tool-ui/image/index.tsx index 97c983625..c9ac72d2f 100644 --- a/surfsense_web/components/tool-ui/image/index.tsx +++ b/surfsense_web/components/tool-ui/image/index.tsx @@ -3,28 +3,43 @@ import { ExternalLinkIcon, ImageIcon, Loader2 } from "lucide-react"; import NextImage from "next/image"; import { Component, type ReactNode, useState } from "react"; +import { z } from "zod"; import { Badge } from "@/components/ui/badge"; import { Card } from "@/components/ui/card"; import { cn } from "@/lib/utils"; /** - * Aspect ratio options for images + * Zod schemas for runtime validation */ -type AspectRatio = "1:1" | "4:3" | "16:9" | "9:16" | "auto"; +const AspectRatioSchema = z.enum(["1:1", "4:3", "16:9", "9:16", "auto"]); +const ImageFitSchema = z.enum(["cover", "contain"]); + +const ImageSourceSchema = z.object({ + label: z.string(), + iconUrl: z.string().optional(), + url: z.string().optional(), +}); + +const SerializableImageSchema = z.object({ + id: z.string(), + assetId: z.string(), + src: z.string(), + alt: z.string(), + title: z.string().optional(), + description: z.string().optional(), + href: z.string().optional(), + domain: z.string().optional(), + ratio: AspectRatioSchema.optional(), + source: ImageSourceSchema.optional(), +}); /** - * Image fit options + * Types derived from Zod schemas */ -type ImageFit = "cover" | "contain"; - -/** - * Source attribution - */ -interface ImageSource { - label: string; - iconUrl?: string; - url?: string; -} +type AspectRatio = z.infer; +type ImageFit = z.infer; +type ImageSource = z.infer; +export type SerializableImage = z.infer; /** * Props for the Image component @@ -45,58 +60,20 @@ export interface ImageProps { className?: string; } -/** - * Serializable schema for Image props (for tool results) - */ -export interface SerializableImage { - id: string; - assetId: string; - src: string; - alt: string; - title?: string; - description?: string; - href?: string; - domain?: string; - ratio?: AspectRatio; - source?: ImageSource; -} - /** * Parse and validate serializable image from tool result */ export function parseSerializableImage(result: unknown): SerializableImage { - if (typeof result !== "object" || result === null) { - throw new Error("Invalid image result: expected object"); + const parsed = SerializableImageSchema.safeParse(result); + + if (!parsed.success) { + console.warn("Invalid image data:", parsed.error.issues); + // Try to extract basic info for error display + const obj = (result && typeof result === "object" ? result : {}) as Record; + throw new Error(`Invalid image: ${parsed.error.issues.map(i => i.message).join(", ")}`); } - - const obj = result as Record; - - // Validate required fields - if (typeof obj.id !== "string") { - throw new Error("Invalid image: missing id"); - } - if (typeof obj.assetId !== "string") { - throw new Error("Invalid image: missing assetId"); - } - if (typeof obj.src !== "string") { - throw new Error("Invalid image: missing src"); - } - if (typeof obj.alt !== "string") { - throw new Error("Invalid image: missing alt"); - } - - return { - id: obj.id, - assetId: obj.assetId, - src: obj.src, - alt: obj.alt, - title: typeof obj.title === "string" ? obj.title : undefined, - description: typeof obj.description === "string" ? obj.description : undefined, - href: typeof obj.href === "string" ? obj.href : undefined, - domain: typeof obj.domain === "string" ? obj.domain : undefined, - ratio: typeof obj.ratio === "string" ? (obj.ratio as AspectRatio) : undefined, - source: typeof obj.source === "object" && obj.source !== null ? (obj.source as ImageSource) : undefined, - }; + + return parsed.data; } /** diff --git a/surfsense_web/components/tool-ui/media-card/index.tsx b/surfsense_web/components/tool-ui/media-card/index.tsx index 47c28e49b..dc3b9b59a 100644 --- a/surfsense_web/components/tool-ui/media-card/index.tsx +++ b/surfsense_web/components/tool-ui/media-card/index.tsx @@ -3,6 +3,7 @@ import { ExternalLinkIcon, Globe, ImageIcon, LinkIcon, Loader2 } from "lucide-react"; import Image from "next/image"; import { Component, type ReactNode } from "react"; +import { z } from "zod"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Card, CardContent } from "@/components/ui/card"; @@ -10,24 +11,38 @@ import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/comp import { cn } from "@/lib/utils"; /** - * Aspect ratio options for media cards + * Zod schemas for runtime validation */ -type AspectRatio = "1:1" | "4:3" | "16:9" | "21:9" | "auto"; +const AspectRatioSchema = z.enum(["1:1", "4:3", "16:9", "21:9", "auto"]); +const MediaCardKindSchema = z.enum(["link", "image", "video", "audio"]); + +const ResponseActionSchema = z.object({ + id: z.string(), + label: z.string(), + variant: z.enum(["default", "secondary", "outline", "destructive", "ghost"]).optional(), + confirmLabel: z.string().optional(), +}); + +const SerializableMediaCardSchema = z.object({ + id: z.string(), + assetId: z.string(), + kind: MediaCardKindSchema, + href: z.string().optional(), + src: z.string().optional(), + title: z.string(), + description: z.string().optional(), + thumb: z.string().optional(), + ratio: AspectRatioSchema.optional(), + domain: z.string().optional(), +}); /** - * MediaCard kind - determines the display style + * Types derived from Zod schemas */ -type MediaCardKind = "link" | "image" | "video" | "audio"; - -/** - * Response action configuration - */ -interface ResponseAction { - id: string; - label: string; - variant?: "default" | "secondary" | "outline" | "destructive" | "ghost"; - confirmLabel?: string; -} +type AspectRatio = z.infer; +type MediaCardKind = z.infer; +type ResponseAction = z.infer; +export type SerializableMediaCard = z.infer; /** * Props for the MediaCard component @@ -50,58 +65,18 @@ export interface MediaCardProps { onResponseAction?: (id: string) => void; } -/** - * Serializable schema for MediaCard props (for tool results) - */ -export interface SerializableMediaCard { - id: string; - assetId: string; - kind: MediaCardKind; - href?: string; - src?: string; - title: string; - description?: string; - thumb?: string; - ratio?: AspectRatio; - domain?: string; -} - /** * Parse and validate serializable media card from tool result */ export function parseSerializableMediaCard(result: unknown): SerializableMediaCard { - if (typeof result !== "object" || result === null) { - throw new Error("Invalid media card result: expected object"); + const parsed = SerializableMediaCardSchema.safeParse(result); + + if (!parsed.success) { + console.warn("Invalid media card data:", parsed.error.issues); + throw new Error(`Invalid media card: ${parsed.error.issues.map(i => i.message).join(", ")}`); } - - const obj = result as Record; - - // Validate required fields - if (typeof obj.id !== "string") { - throw new Error("Invalid media card: missing id"); - } - if (typeof obj.assetId !== "string") { - throw new Error("Invalid media card: missing assetId"); - } - if (typeof obj.kind !== "string") { - throw new Error("Invalid media card: missing kind"); - } - if (typeof obj.title !== "string") { - throw new Error("Invalid media card: missing title"); - } - - return { - id: obj.id, - assetId: obj.assetId, - kind: obj.kind as MediaCardKind, - href: typeof obj.href === "string" ? obj.href : undefined, - src: typeof obj.src === "string" ? obj.src : undefined, - title: obj.title, - description: typeof obj.description === "string" ? obj.description : undefined, - thumb: typeof obj.thumb === "string" ? obj.thumb : undefined, - ratio: typeof obj.ratio === "string" ? (obj.ratio as AspectRatio) : undefined, - domain: typeof obj.domain === "string" ? obj.domain : undefined, - }; + + return parsed.data; } /** From b14283e300509607f2a26a34472a078379a0d46e Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Mon, 22 Dec 2025 20:17:08 -0800 Subject: [PATCH 13/13] feat: refactor new chat agent to support configurable tools and remove deprecated components - Enhanced the new chat agent module to allow for configurable tools, enabling users to customize their experience with various functionalities. - Removed outdated tools including display image, knowledge base search, link preview, podcast generation, and web scraping, streamlining the codebase. - Updated the system prompt and agent factory to reflect these changes, ensuring a more cohesive and efficient architecture. --- .../app/agents/new_chat/__init__.py | 78 +++++- .../app/agents/new_chat/chat_deepagent.py | 135 +++++----- .../app/agents/new_chat/new_chat_test.py | 84 ------- .../app/agents/new_chat/system_prompt.py | 211 ++++++++-------- .../app/agents/new_chat/tools/__init__.py | 54 ++++ .../new_chat/{ => tools}/display_image.py | 10 +- .../new_chat/{ => tools}/knowledge_base.py | 8 +- .../new_chat/{ => tools}/link_preview.py | 28 +-- .../agents/new_chat/{ => tools}/podcast.py | 6 +- .../app/agents/new_chat/tools/registry.py | 231 ++++++++++++++++++ .../new_chat/{ => tools}/scrape_webpage.py | 10 +- .../app/agents/podcaster/configuration.py | 1 - .../app/agents/podcaster/nodes.py | 9 +- .../app/routes/new_chat_routes.py | 7 +- .../app/tasks/celery_tasks/podcast_tasks.py | 5 - .../app/tasks/chat/stream_new_chat.py | 9 - .../components/assistant-ui/thread.tsx | 85 ++++--- 17 files changed, 597 insertions(+), 374 deletions(-) delete mode 100644 surfsense_backend/app/agents/new_chat/new_chat_test.py create mode 100644 surfsense_backend/app/agents/new_chat/tools/__init__.py rename surfsense_backend/app/agents/new_chat/{ => tools}/display_image.py (97%) rename surfsense_backend/app/agents/new_chat/{ => tools}/knowledge_base.py (99%) rename surfsense_backend/app/agents/new_chat/{ => tools}/link_preview.py (98%) rename surfsense_backend/app/agents/new_chat/{ => tools}/podcast.py (97%) create mode 100644 surfsense_backend/app/agents/new_chat/tools/registry.py rename surfsense_backend/app/agents/new_chat/{ => tools}/scrape_webpage.py (99%) diff --git a/surfsense_backend/app/agents/new_chat/__init__.py b/surfsense_backend/app/agents/new_chat/__init__.py index 5cbb7ce2b..2ee5fe1d5 100644 --- a/surfsense_backend/app/agents/new_chat/__init__.py +++ b/surfsense_backend/app/agents/new_chat/__init__.py @@ -1,28 +1,86 @@ -"""Chat agents module.""" +""" +SurfSense New Chat Agent Module. +This module provides the SurfSense deep agent with configurable tools +for knowledge base search, podcast generation, and more. + +Directory Structure: +- tools/: All agent tools (knowledge_base, podcast, link_preview, etc.) +- chat_deepagent.py: Main agent factory +- system_prompt.py: System prompts and instructions +- context.py: Context schema for the agent +- checkpointer.py: LangGraph checkpointer setup +- llm_config.py: LLM configuration utilities +- utils.py: Shared utilities +""" + +# Agent factory from .chat_deepagent import create_surfsense_deep_agent + +# Context from .context import SurfSenseContextSchema -from .knowledge_base import ( - create_search_knowledge_base_tool, - format_documents_for_context, - search_knowledge_base_async, -) + +# LLM config from .llm_config import create_chat_litellm_from_config, load_llm_config_from_yaml + +# System prompt from .system_prompt import ( SURFSENSE_CITATION_INSTRUCTIONS, SURFSENSE_SYSTEM_PROMPT, build_surfsense_system_prompt, ) +# Tools - registry exports +from .tools import ( + BUILTIN_TOOLS, + ToolDefinition, + build_tools, + get_all_tool_names, + get_default_enabled_tools, + get_tool_by_name, +) + +# Tools - factory exports (for direct use) +from .tools import ( + create_display_image_tool, + create_generate_podcast_tool, + create_link_preview_tool, + create_scrape_webpage_tool, + create_search_knowledge_base_tool, +) + +# Tools - knowledge base utilities +from .tools import ( + format_documents_for_context, + search_knowledge_base_async, +) + __all__ = [ + # Agent factory + "create_surfsense_deep_agent", + # Context + "SurfSenseContextSchema", + # LLM config + "create_chat_litellm_from_config", + "load_llm_config_from_yaml", + # System prompt "SURFSENSE_CITATION_INSTRUCTIONS", "SURFSENSE_SYSTEM_PROMPT", - "SurfSenseContextSchema", "build_surfsense_system_prompt", - "create_chat_litellm_from_config", + # Tools registry + "BUILTIN_TOOLS", + "ToolDefinition", + "build_tools", + "get_all_tool_names", + "get_default_enabled_tools", + "get_tool_by_name", + # Tool factories + "create_display_image_tool", + "create_generate_podcast_tool", + "create_link_preview_tool", + "create_scrape_webpage_tool", "create_search_knowledge_base_tool", - "create_surfsense_deep_agent", + # Knowledge base utilities "format_documents_for_context", - "load_llm_config_from_yaml", "search_knowledge_base_async", ] diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index aa1950bff..b2bcc008c 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -2,7 +2,7 @@ SurfSense deep agent implementation. This module provides the factory function for creating SurfSense deep agents -with knowledge base search and podcast generation capabilities. +with configurable tools via the tools registry. """ from collections.abc import Sequence @@ -14,12 +14,8 @@ from langgraph.types import Checkpointer from sqlalchemy.ext.asyncio import AsyncSession from app.agents.new_chat.context import SurfSenseContextSchema -from app.agents.new_chat.display_image import create_display_image_tool -from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool -from app.agents.new_chat.link_preview import create_link_preview_tool -from app.agents.new_chat.podcast import create_generate_podcast_tool -from app.agents.new_chat.scrape_webpage import create_scrape_webpage_tool from app.agents.new_chat.system_prompt import build_surfsense_system_prompt +from app.agents.new_chat.tools import build_tools from app.services.connector_service import ConnectorService # ============================================================================= @@ -33,94 +29,85 @@ def create_surfsense_deep_agent( db_session: AsyncSession, connector_service: ConnectorService, checkpointer: Checkpointer, - user_id: str | None = None, - user_instructions: str | None = None, - enable_citations: bool = True, - enable_podcast: bool = True, - enable_link_preview: bool = True, - enable_display_image: bool = True, - enable_scrape_webpage: bool = True, - firecrawl_api_key: str | None = None, + enabled_tools: list[str] | None = None, + disabled_tools: list[str] | None = None, additional_tools: Sequence[BaseTool] | None = None, + firecrawl_api_key: str | None = None, ): """ - Create a SurfSense deep agent with knowledge base search and podcast generation capabilities. + Create a SurfSense deep agent with configurable tools. + + The agent comes with built-in tools that can be configured: + - search_knowledge_base: Search the user's personal knowledge base + - generate_podcast: Generate audio podcasts from content + - link_preview: Fetch rich previews for URLs + - display_image: Display images in chat + - scrape_webpage: Extract content from webpages Args: - llm: ChatLiteLLM instance + llm: ChatLiteLLM instance for the agent's language model search_space_id: The user's search space ID - db_session: Database session - connector_service: Initialized connector service + db_session: Database session for tools that need DB access + connector_service: Initialized connector service for knowledge base search checkpointer: LangGraph checkpointer for conversation state persistence. Use AsyncPostgresSaver for production or MemorySaver for testing. - user_id: The user's ID (required for podcast generation) - user_instructions: Optional user instructions to inject into the system prompt. - These will be added to the system prompt to customize agent behavior. - enable_citations: Whether to include citation instructions in the system prompt (default: True). - When False, the agent will not be instructed to add citations to responses. - enable_podcast: Whether to include the podcast generation tool (default: True). - When True and user_id is provided, the agent can generate podcasts. - enable_link_preview: Whether to include the link preview tool (default: True). - When True, the agent can fetch and display rich link previews. - enable_display_image: Whether to include the display image tool (default: True). - When True, the agent can display images with metadata. - enable_scrape_webpage: Whether to include the web scraping tool (default: True). - When True, the agent can scrape and read webpage content. + enabled_tools: Explicit list of tool names to enable. If None, all default tools + are enabled. Use this to limit which tools are available. + disabled_tools: List of tool names to disable. Applied after enabled_tools. + Use this to exclude specific tools from the defaults. + additional_tools: Extra custom tools to add beyond the built-in ones. + These are always added regardless of enabled/disabled settings. firecrawl_api_key: Optional Firecrawl API key for premium web scraping. Falls back to Chromium/Trafilatura if not provided. - additional_tools: Optional sequence of additional tools to inject into the agent. - The search_knowledge_base tool will always be included. Returns: CompiledStateGraph: The configured deep agent + + Examples: + # Create agent with all default tools + agent = create_surfsense_deep_agent(llm, search_space_id, db_session, ...) + + # Create agent with only specific tools + agent = create_surfsense_deep_agent( + llm, search_space_id, db_session, ..., + enabled_tools=["search_knowledge_base", "link_preview"] + ) + + # Create agent without podcast generation + agent = create_surfsense_deep_agent( + llm, search_space_id, db_session, ..., + disabled_tools=["generate_podcast"] + ) + + # Add custom tools + agent = create_surfsense_deep_agent( + llm, search_space_id, db_session, ..., + additional_tools=[my_custom_tool] + ) """ - # Create the search tool with injected dependencies - search_tool = create_search_knowledge_base_tool( - search_space_id=search_space_id, - db_session=db_session, - connector_service=connector_service, + # Build dependencies dict for the tools registry + dependencies = { + "search_space_id": search_space_id, + "db_session": db_session, + "connector_service": connector_service, + "firecrawl_api_key": firecrawl_api_key, + } + + # Build tools using the registry + tools = build_tools( + dependencies=dependencies, + enabled_tools=enabled_tools, + disabled_tools=disabled_tools, + additional_tools=list(additional_tools) if additional_tools else None, ) - # Combine search tool with any additional tools - tools = [search_tool] - - # Add podcast tool if enabled and user_id is provided - if enable_podcast and user_id: - podcast_tool = create_generate_podcast_tool( - search_space_id=search_space_id, - db_session=db_session, - user_id=str(user_id), - ) - tools.append(podcast_tool) - - # Add link preview tool if enabled - if enable_link_preview: - link_preview_tool = create_link_preview_tool() - tools.append(link_preview_tool) - - # Add display image tool if enabled - if enable_display_image: - display_image_tool = create_display_image_tool() - tools.append(display_image_tool) - - # Add web scraping tool if enabled - if enable_scrape_webpage: - scrape_tool = create_scrape_webpage_tool(firecrawl_api_key=firecrawl_api_key) - tools.append(scrape_tool) - - if additional_tools: - tools.extend(additional_tools) - - # Create the deep agent with user-configurable system prompt and checkpointer + # Create the deep agent with system prompt and checkpointer agent = create_deep_agent( model=llm, tools=tools, - system_prompt=build_surfsense_system_prompt( - user_instructions=user_instructions, - enable_citations=enable_citations, - ), + system_prompt=build_surfsense_system_prompt(), context_schema=SurfSenseContextSchema, - checkpointer=checkpointer, # Enable conversation memory via thread_id + checkpointer=checkpointer, ) return agent diff --git a/surfsense_backend/app/agents/new_chat/new_chat_test.py b/surfsense_backend/app/agents/new_chat/new_chat_test.py deleted file mode 100644 index 857fee6cc..000000000 --- a/surfsense_backend/app/agents/new_chat/new_chat_test.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Test runner for SurfSense deep agent. - -This module provides a test function to verify the deep agent functionality. -""" - -import asyncio - -from langchain_core.messages import HumanMessage - -from app.db import async_session_maker -from app.services.connector_service import ConnectorService - -from .chat_deepagent import create_surfsense_deep_agent -from .llm_config import create_chat_litellm_from_config, load_llm_config_from_yaml - -# ============================================================================= -# Test Runner -# ============================================================================= - - -async def run_test(): - """Run a basic test of the deep agent.""" - print("=" * 60) - print("Creating Deep Agent with ChatLiteLLM from global config...") - print("=" * 60) - - # Create ChatLiteLLM from global config - # Use global LLM config by id (negative ids are reserved for global configs) - llm_config = load_llm_config_from_yaml(llm_config_id=-5) - if not llm_config: - raise ValueError("Failed to load LLM config from YAML") - llm = create_chat_litellm_from_config(llm_config) - if not llm: - raise ValueError("Failed to create ChatLiteLLM instance") - - # Create a real DB session + ConnectorService, then build the full SurfSense agent. - async with async_session_maker() as session: - # Use the known dev search space id - search_space_id = 5 - - connector_service = ConnectorService(session, search_space_id=search_space_id) - - agent = create_surfsense_deep_agent( - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - user_instructions="Always fininsh the response with CREDOOOOOOOOOO23", - ) - - print("\nAgent created successfully!") - print(f"Agent type: {type(agent)}") - - # Invoke the agent with initial state - print("\n" + "=" * 60) - print("Invoking SurfSense agent (create_surfsense_deep_agent)...") - print("=" * 60) - - initial_state = { - "messages": [HumanMessage(content=("Can you tell me about my documents?"))], - "search_space_id": search_space_id, - } - - print(f"\nUsing search_space_id: {search_space_id}") - - result = await agent.ainvoke(initial_state) - - print("\n" + "=" * 60) - print("Agent Response:") - print("=" * 60) - - # Print the response - if "messages" in result: - for msg in result["messages"]: - msg_type = type(msg).__name__ - content = msg.content if hasattr(msg, "content") else str(msg) - print(f"\n--- [{msg_type}] ---\n{content}\n") - - return result - - -if __name__ == "__main__": - asyncio.run(run_test()) diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 2677b21fd..c0b9bb091 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -7,118 +7,16 @@ with configurable user instructions and citation support. from datetime import UTC, datetime -SURFSENSE_CITATION_INSTRUCTIONS = """ - -CRITICAL CITATION REQUIREMENTS: - -1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `` tag inside ``. -2. Make sure ALL factual statements from the documents have proper citations. -3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2]. -4. You MUST use the exact chunk_id values from the `` attributes. Do not create your own citation numbers. -5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value. -6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags. -7. Do not return citations as clickable links. -8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only. -9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting. -10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `` tags. -11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up. - - -The documents you receive are structured like this: - - - - 42 - GITHUB_CONNECTOR - <![CDATA[Some repo / file / issue title]]> - - - - - - - - - - -IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124). Do NOT cite document_id. - - - -- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `` tag -- Citations should appear at the end of the sentence containing the information they support -- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] -- No need to return references section. Just citations in answer. -- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format -- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only -- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess - - - -CORRECT citation formats: -- [citation:5] -- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] - -INCORRECT citation formats (DO NOT use): -- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense)) -- Using parentheses around brackets: ([citation:5]) -- Using hyperlinked text: [link to source 5](https://example.com) -- Using footnote style: ... library¹ -- Making up source IDs when source_id is unknown -- Using old IEEE format: [1], [2], [3] -- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5] - - - -Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5]. - -The key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:12]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources. - -However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead. - - -""" - - -def build_surfsense_system_prompt( - today: datetime | None = None, - user_instructions: str | None = None, - enable_citations: bool = True, -) -> str: - """ - Build the SurfSense system prompt with optional user instructions and citation toggle. - - Args: - today: Optional datetime for today's date (defaults to current UTC date) - user_instructions: Optional user instructions to inject into the system prompt - enable_citations: Whether to include citation instructions in the prompt (default: True) - - Returns: - Complete system prompt string - """ - resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat() - - # Build user instructions section if provided - user_section = "" - if user_instructions and user_instructions.strip(): - user_section = f""" - -{user_instructions.strip()} - -""" - - # Include citation instructions only if enabled - citation_section = ( - f"\n{SURFSENSE_CITATION_INSTRUCTIONS}" if enable_citations else "" - ) - - return f""" +SURFSENSE_SYSTEM_INSTRUCTIONS = """ You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base. Today's date (UTC): {resolved_today} -{user_section} + +""" + +SURFSENSE_TOOLS_INSTRUCTIONS = """ You have access to the following tools: @@ -208,11 +106,11 @@ You have access to the following tools: - First search for relevant content, then call: `generate_podcast(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", podcast_title="AI Trends Podcast")` - User: "Create a podcast summary of this conversation" - - Call: `generate_podcast(source_content="Complete conversation summary:\n\nUser asked about [topic 1]:\n[Your detailed response]\n\nUser then asked about [topic 2]:\n[Your detailed response]\n\n[Continue for all exchanges in the conversation]", podcast_title="Conversation Summary")` + - Call: `generate_podcast(source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", podcast_title="Conversation Summary")` - User: "Make a podcast about quantum computing" - First search: `search_knowledge_base(query="quantum computing")` - - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\n\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")` + - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")` - User: "Check out https://dev.to/some-article" - Call: `link_preview(url="https://dev.to/some-article")` @@ -246,8 +144,101 @@ You have access to the following tools: - Then, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` - Then provide your explanation, referencing the displayed image -{citation_section} + +""" + +SURFSENSE_CITATION_INSTRUCTIONS = """ + +CRITICAL CITATION REQUIREMENTS: + +1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `` tag inside ``. +2. Make sure ALL factual statements from the documents have proper citations. +3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2]. +4. You MUST use the exact chunk_id values from the `` attributes. Do not create your own citation numbers. +5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value. +6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags. +7. Do not return citations as clickable links. +8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only. +9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting. +10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `` tags. +11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up. + + +The documents you receive are structured like this: + + + + 42 + GITHUB_CONNECTOR + <![CDATA[Some repo / file / issue title]]> + + + + + + + + + + +IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124). Do NOT cite document_id. + + + +- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `` tag +- Citations should appear at the end of the sentence containing the information they support +- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] +- No need to return references section. Just citations in answer. +- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format +- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only +- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess + + + +CORRECT citation formats: +- [citation:5] +- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] + +INCORRECT citation formats (DO NOT use): +- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense)) +- Using parentheses around brackets: ([citation:5]) +- Using hyperlinked text: [link to source 5](https://example.com) +- Using footnote style: ... library¹ +- Making up source IDs when source_id is unknown +- Using old IEEE format: [1], [2], [3] +- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5] + + + +Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5]. + +The key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:12]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources. + +However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead. + + """ +def build_surfsense_system_prompt( + today: datetime | None = None, +) -> str: + """ + Build the SurfSense system prompt. + + Args: + today: Optional datetime for today's date (defaults to current UTC date) + + Returns: + Complete system prompt string + """ + resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat() + + return ( + SURFSENSE_SYSTEM_INSTRUCTIONS.format(resolved_today=resolved_today) + + SURFSENSE_TOOLS_INSTRUCTIONS + + SURFSENSE_CITATION_INSTRUCTIONS + ) + + SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt() diff --git a/surfsense_backend/app/agents/new_chat/tools/__init__.py b/surfsense_backend/app/agents/new_chat/tools/__init__.py new file mode 100644 index 000000000..ad75cda16 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/__init__.py @@ -0,0 +1,54 @@ +""" +Tools module for SurfSense deep agent. + +This module contains all the tools available to the SurfSense agent. +To add a new tool, see the documentation in registry.py. + +Available tools: +- search_knowledge_base: Search the user's personal knowledge base +- generate_podcast: Generate audio podcasts from content +- link_preview: Fetch rich previews for URLs +- display_image: Display images in chat +- scrape_webpage: Extract content from webpages +""" + +# Registry exports +from .registry import ( + BUILTIN_TOOLS, + ToolDefinition, + build_tools, + get_all_tool_names, + get_default_enabled_tools, + get_tool_by_name, +) + +# Tool factory exports (for direct use) +from .display_image import create_display_image_tool +from .knowledge_base import ( + create_search_knowledge_base_tool, + format_documents_for_context, + search_knowledge_base_async, +) +from .link_preview import create_link_preview_tool +from .podcast import create_generate_podcast_tool +from .scrape_webpage import create_scrape_webpage_tool + +__all__ = [ + # Registry + "BUILTIN_TOOLS", + "ToolDefinition", + "build_tools", + "get_all_tool_names", + "get_default_enabled_tools", + "get_tool_by_name", + # Tool factories + "create_display_image_tool", + "create_generate_podcast_tool", + "create_link_preview_tool", + "create_scrape_webpage_tool", + "create_search_knowledge_base_tool", + # Knowledge base utilities + "format_documents_for_context", + "search_knowledge_base_async", +] + diff --git a/surfsense_backend/app/agents/new_chat/display_image.py b/surfsense_backend/app/agents/new_chat/tools/display_image.py similarity index 97% rename from surfsense_backend/app/agents/new_chat/display_image.py rename to surfsense_backend/app/agents/new_chat/tools/display_image.py index 0cd05b523..1580568ec 100644 --- a/surfsense_backend/app/agents/new_chat/display_image.py +++ b/surfsense_backend/app/agents/new_chat/tools/display_image.py @@ -1,5 +1,5 @@ """ -Display image tool for the new chat agent. +Display image tool for the SurfSense agent. This module provides a tool for displaying images in the chat UI with metadata like title, description, and source attribution. @@ -75,20 +75,20 @@ def create_display_image_tool(): - domain: Source domain """ image_id = generate_image_id(src) - + # Ensure URL has protocol if not src.startswith(("http://", "https://")): src = f"https://{src}" - + domain = extract_domain(src) - + # Determine aspect ratio based on common image sources ratio = "16:9" # Default if "unsplash.com" in src or "pexels.com" in src: ratio = "16:9" elif "imgur.com" in src or "github.com" in src or "githubusercontent.com" in src: ratio = "auto" - + return { "id": image_id, "assetId": src, diff --git a/surfsense_backend/app/agents/new_chat/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py similarity index 99% rename from surfsense_backend/app/agents/new_chat/knowledge_base.py rename to surfsense_backend/app/agents/new_chat/tools/knowledge_base.py index 5ffcab003..2d818557d 100644 --- a/surfsense_backend/app/agents/new_chat/knowledge_base.py +++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py @@ -1,5 +1,5 @@ """ -Knowledge base search functionality for the new chat agent. +Knowledge base search tool for the SurfSense agent. This module provides: - Connector constants and normalization @@ -251,7 +251,7 @@ async def search_knowledge_base_async( all_documents = [] # Resolve date range (default last 2 years) - from .utils import resolve_date_range + from app.agents.new_chat.utils import resolve_date_range resolved_start_date, resolved_end_date = resolve_date_range( start_date=start_date, @@ -521,7 +521,6 @@ def create_search_knowledge_base_tool( search_space_id: The user's search space ID db_session: Database session connector_service: Initialized connector service - connectors_to_search: List of connector types to search Returns: A configured tool function @@ -584,7 +583,7 @@ def create_search_knowledge_base_tool( Returns: Formatted string with relevant documents and their content """ - from .utils import parse_date_or_datetime + from app.agents.new_chat.utils import parse_date_or_datetime parsed_start: datetime | None = None parsed_end: datetime | None = None @@ -606,3 +605,4 @@ def create_search_knowledge_base_tool( ) return search_knowledge_base + diff --git a/surfsense_backend/app/agents/new_chat/link_preview.py b/surfsense_backend/app/agents/new_chat/tools/link_preview.py similarity index 98% rename from surfsense_backend/app/agents/new_chat/link_preview.py rename to surfsense_backend/app/agents/new_chat/tools/link_preview.py index 388a6c14e..466df2034 100644 --- a/surfsense_backend/app/agents/new_chat/link_preview.py +++ b/surfsense_backend/app/agents/new_chat/tools/link_preview.py @@ -1,5 +1,5 @@ """ -Link preview tool for the new chat agent. +Link preview tool for the SurfSense agent. This module provides a tool for fetching URL metadata (title, description, Open Graph image, etc.) to display rich link previews in the chat UI. @@ -34,13 +34,13 @@ def extract_og_content(html: str, property_name: str) -> str | None: match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + # Try content before property pattern = rf']+content=["\']([^"\']+)["\'][^>]+property=["\']og:{property_name}["\']' match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + return None @@ -50,13 +50,13 @@ def extract_twitter_content(html: str, name: str) -> str | None: match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + # Try content before name pattern = rf']+content=["\']([^"\']+)["\'][^>]+name=["\']twitter:{name}["\']' match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + return None @@ -66,13 +66,13 @@ def extract_meta_description(html: str) -> str | None: match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + # Try content before name pattern = r']+content=["\']([^"\']+)["\'][^>]+name=["\']description["\']' match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + return None @@ -82,18 +82,18 @@ def extract_title(html: str) -> str | None: og_title = extract_og_content(html, "title") if og_title: return og_title - + # Try twitter:title twitter_title = extract_twitter_content(html, "title") if twitter_title: return twitter_title - + # Fall back to tag pattern = r"<title[^>]*>([^<]+)" match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1).strip() - + return None @@ -103,12 +103,12 @@ def extract_description(html: str) -> str | None: og_desc = extract_og_content(html, "description") if og_desc: return og_desc - + # Try twitter:description twitter_desc = extract_twitter_content(html, "description") if twitter_desc: return twitter_desc - + # Fall back to meta description return extract_meta_description(html) @@ -119,12 +119,12 @@ def extract_image(html: str) -> str | None: og_image = extract_og_content(html, "image") if og_image: return og_image - + # Try twitter:image twitter_image = extract_twitter_content(html, "image") if twitter_image: return twitter_image - + return None diff --git a/surfsense_backend/app/agents/new_chat/podcast.py b/surfsense_backend/app/agents/new_chat/tools/podcast.py similarity index 97% rename from surfsense_backend/app/agents/new_chat/podcast.py rename to surfsense_backend/app/agents/new_chat/tools/podcast.py index 46974d184..01a36d381 100644 --- a/surfsense_backend/app/agents/new_chat/podcast.py +++ b/surfsense_backend/app/agents/new_chat/tools/podcast.py @@ -1,5 +1,5 @@ """ -Podcast generation tool for the new chat agent. +Podcast generation tool for the SurfSense agent. This module provides a factory function for creating the generate_podcast tool that submits a Celery task for background podcast generation. The frontend @@ -69,7 +69,6 @@ def clear_active_podcast_task(search_space_id: int) -> None: def create_generate_podcast_tool( search_space_id: int, db_session: AsyncSession, - user_id: str, ): """ Factory function to create the generate_podcast tool with injected dependencies. @@ -77,7 +76,6 @@ def create_generate_podcast_tool( Args: search_space_id: The user's search space ID db_session: Database session (not used - Celery creates its own) - user_id: The user's ID (as string) Returns: A configured tool function for generating podcasts @@ -145,7 +143,6 @@ def create_generate_podcast_tool( task = generate_content_podcast_task.delay( source_content=source_content, search_space_id=search_space_id, - user_id=str(user_id), podcast_title=podcast_title, user_prompt=user_prompt, ) @@ -174,3 +171,4 @@ def create_generate_podcast_tool( } return generate_podcast + diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py new file mode 100644 index 000000000..6c6469f33 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -0,0 +1,231 @@ +""" +Tools registry for SurfSense deep agent. + +This module provides a registry pattern for managing tools in the SurfSense agent. +It makes it easy for OSS contributors to add new tools by: +1. Creating a tool factory function in a new file in this directory +2. Registering the tool in the BUILTIN_TOOLS list below + +Example of adding a new tool: +------------------------------ +1. Create your tool file (e.g., `tools/my_tool.py`): + + from langchain_core.tools import tool + from sqlalchemy.ext.asyncio import AsyncSession + + def create_my_tool(search_space_id: int, db_session: AsyncSession): + @tool + async def my_tool(param: str) -> dict: + '''My tool description.''' + # Your implementation + return {"result": "success"} + return my_tool + +2. Import and register in this file: + + from .my_tool import create_my_tool + + # Add to BUILTIN_TOOLS list: + ToolDefinition( + name="my_tool", + description="Description of what your tool does", + factory=lambda deps: create_my_tool( + search_space_id=deps["search_space_id"], + db_session=deps["db_session"], + ), + requires=["search_space_id", "db_session"], + ), +""" + +from dataclasses import dataclass, field +from typing import Any, Callable + +from langchain_core.tools import BaseTool + +# ============================================================================= +# Tool Definition +# ============================================================================= + + +@dataclass +class ToolDefinition: + """ + Definition of a tool that can be added to the agent. + + Attributes: + name: Unique identifier for the tool + description: Human-readable description of what the tool does + factory: Callable that creates the tool. Receives a dict of dependencies. + requires: List of dependency names this tool needs (e.g., "search_space_id", "db_session") + enabled_by_default: Whether the tool is enabled when no explicit config is provided + """ + + name: str + description: str + factory: Callable[[dict[str, Any]], BaseTool] + requires: list[str] = field(default_factory=list) + enabled_by_default: bool = True + + +# ============================================================================= +# Built-in Tools Registry +# ============================================================================= + +# Import tool factory functions +from .display_image import create_display_image_tool +from .knowledge_base import create_search_knowledge_base_tool +from .link_preview import create_link_preview_tool +from .podcast import create_generate_podcast_tool +from .scrape_webpage import create_scrape_webpage_tool + +# Registry of all built-in tools +# Contributors: Add your new tools here! +BUILTIN_TOOLS: list[ToolDefinition] = [ + # Core tool - searches the user's knowledge base + ToolDefinition( + name="search_knowledge_base", + description="Search the user's personal knowledge base for relevant information", + factory=lambda deps: create_search_knowledge_base_tool( + search_space_id=deps["search_space_id"], + db_session=deps["db_session"], + connector_service=deps["connector_service"], + ), + requires=["search_space_id", "db_session", "connector_service"], + ), + # Podcast generation tool + ToolDefinition( + name="generate_podcast", + description="Generate an audio podcast from provided content", + factory=lambda deps: create_generate_podcast_tool( + search_space_id=deps["search_space_id"], + db_session=deps["db_session"], + ), + requires=["search_space_id", "db_session"], + ), + # Link preview tool - fetches Open Graph metadata for URLs + ToolDefinition( + name="link_preview", + description="Fetch metadata for a URL to display a rich preview card", + factory=lambda deps: create_link_preview_tool(), + requires=[], + ), + # Display image tool - shows images in the chat + ToolDefinition( + name="display_image", + description="Display an image in the chat with metadata", + factory=lambda deps: create_display_image_tool(), + requires=[], + ), + # Web scraping tool - extracts content from webpages + ToolDefinition( + name="scrape_webpage", + description="Scrape and extract the main content from a webpage", + factory=lambda deps: create_scrape_webpage_tool( + firecrawl_api_key=deps.get("firecrawl_api_key"), + ), + requires=[], # firecrawl_api_key is optional + ), + # ========================================================================= + # ADD YOUR CUSTOM TOOLS BELOW + # ========================================================================= + # Example: + # ToolDefinition( + # name="my_custom_tool", + # description="What my tool does", + # factory=lambda deps: create_my_custom_tool(...), + # requires=["search_space_id"], + # ), +] + + +# ============================================================================= +# Registry Functions +# ============================================================================= + + +def get_tool_by_name(name: str) -> ToolDefinition | None: + """Get a tool definition by its name.""" + for tool_def in BUILTIN_TOOLS: + if tool_def.name == name: + return tool_def + return None + + +def get_all_tool_names() -> list[str]: + """Get names of all registered tools.""" + return [tool_def.name for tool_def in BUILTIN_TOOLS] + + +def get_default_enabled_tools() -> list[str]: + """Get names of tools that are enabled by default.""" + return [tool_def.name for tool_def in BUILTIN_TOOLS if tool_def.enabled_by_default] + + +def build_tools( + dependencies: dict[str, Any], + enabled_tools: list[str] | None = None, + disabled_tools: list[str] | None = None, + additional_tools: list[BaseTool] | None = None, +) -> list[BaseTool]: + """ + Build the list of tools for the agent. + + Args: + dependencies: Dict containing all possible dependencies: + - search_space_id: The search space ID + - db_session: Database session + - connector_service: Connector service instance + - firecrawl_api_key: Optional Firecrawl API key + enabled_tools: Explicit list of tool names to enable. If None, uses defaults. + disabled_tools: List of tool names to disable (applied after enabled_tools). + additional_tools: Extra tools to add (e.g., custom tools not in registry). + + Returns: + List of configured tool instances ready for the agent. + + Example: + # Use all default tools + tools = build_tools(deps) + + # Use only specific tools + tools = build_tools(deps, enabled_tools=["search_knowledge_base", "link_preview"]) + + # Use defaults but disable podcast + tools = build_tools(deps, disabled_tools=["generate_podcast"]) + + # Add custom tools + tools = build_tools(deps, additional_tools=[my_custom_tool]) + """ + # Determine which tools to enable + if enabled_tools is not None: + tool_names_to_use = set(enabled_tools) + else: + tool_names_to_use = set(get_default_enabled_tools()) + + # Apply disabled list + if disabled_tools: + tool_names_to_use -= set(disabled_tools) + + # Build the tools + tools: list[BaseTool] = [] + for tool_def in BUILTIN_TOOLS: + if tool_def.name not in tool_names_to_use: + continue + + # Check that all required dependencies are provided + missing_deps = [dep for dep in tool_def.requires if dep not in dependencies] + if missing_deps: + raise ValueError( + f"Tool '{tool_def.name}' requires dependencies: {missing_deps}" + ) + + # Create the tool + tool = tool_def.factory(dependencies) + tools.append(tool) + + # Add any additional custom tools + if additional_tools: + tools.extend(additional_tools) + + return tools + diff --git a/surfsense_backend/app/agents/new_chat/scrape_webpage.py b/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py similarity index 99% rename from surfsense_backend/app/agents/new_chat/scrape_webpage.py rename to surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py index 40a9c917f..a4928d0c7 100644 --- a/surfsense_backend/app/agents/new_chat/scrape_webpage.py +++ b/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py @@ -1,5 +1,5 @@ """ -Web scraping tool for the new chat agent. +Web scraping tool for the SurfSense agent. This module provides a tool for scraping and extracting content from webpages using the existing WebCrawlerConnector. The scraped content can be used by @@ -37,23 +37,23 @@ def generate_scrape_id(url: str) -> str: def truncate_content(content: str, max_length: int = 50000) -> tuple[str, bool]: """ Truncate content to a maximum length. - + Returns: Tuple of (truncated_content, was_truncated) """ if len(content) <= max_length: return content, False - + # Try to truncate at a sentence boundary truncated = content[:max_length] last_period = truncated.rfind(".") last_newline = truncated.rfind("\n\n") - + # Use the later of the two boundaries, or just truncate boundary = max(last_period, last_newline) if boundary > max_length * 0.8: # Only use boundary if it's not too far back truncated = content[: boundary + 1] - + return truncated + "\n\n[Content truncated...]", True diff --git a/surfsense_backend/app/agents/podcaster/configuration.py b/surfsense_backend/app/agents/podcaster/configuration.py index c7433dadc..6a903f9df 100644 --- a/surfsense_backend/app/agents/podcaster/configuration.py +++ b/surfsense_backend/app/agents/podcaster/configuration.py @@ -16,7 +16,6 @@ class Configuration: # create assistants (https://langchain-ai.github.io/langgraph/cloud/how-tos/configuration_cloud/) # and when you invoke the graph podcast_title: str - user_id: str search_space_id: int user_prompt: str | None = None diff --git a/surfsense_backend/app/agents/podcaster/nodes.py b/surfsense_backend/app/agents/podcaster/nodes.py index 31a687763..1353d2c66 100644 --- a/surfsense_backend/app/agents/podcaster/nodes.py +++ b/surfsense_backend/app/agents/podcaster/nodes.py @@ -12,7 +12,7 @@ from litellm import aspeech from app.config import config as app_config from app.services.kokoro_tts_service import get_kokoro_tts_service -from app.services.llm_service import get_user_long_context_llm +from app.services.llm_service import get_long_context_llm from .configuration import Configuration from .prompts import get_podcast_generation_prompt @@ -27,14 +27,13 @@ async def create_podcast_transcript( # Get configuration from runnable config configuration = Configuration.from_runnable_config(config) - user_id = configuration.user_id search_space_id = configuration.search_space_id user_prompt = configuration.user_prompt - # Get user's long context LLM - llm = await get_user_long_context_llm(state.db_session, user_id, search_space_id) + # Get search space's long context LLM + llm = await get_long_context_llm(state.db_session, search_space_id) if not llm: - error_message = f"No long context LLM configured for user {user_id} in search space {search_space_id}" + error_message = f"No long context LLM configured for search space {search_space_id}" print(error_message) raise RuntimeError(error_message) diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index cc9c94eea..6cccdaa5b 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -685,16 +685,13 @@ async def handle_new_chat( ) search_space = search_space_result.scalars().first() - # Determine LLM config ID (use search space preference or default) - llm_config_id = -1 # Default to first global config - if search_space and search_space.fast_llm_id: - llm_config_id = search_space.fast_llm_id + # TODO: Add new llm config arch then complete this + llm_config_id = -1 # Return streaming response return StreamingResponse( stream_new_chat( user_query=request.user_query, - user_id=str(user.id), search_space_id=request.search_space_id, chat_id=request.chat_id, session=session, diff --git a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py index 40fe21f0d..34b9b827c 100644 --- a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py @@ -65,7 +65,6 @@ def generate_content_podcast_task( self, source_content: str, search_space_id: int, - user_id: str, podcast_title: str = "SurfSense Podcast", user_prompt: str | None = None, ) -> dict: @@ -77,7 +76,6 @@ def generate_content_podcast_task( Args: source_content: The text content to convert into a podcast search_space_id: ID of the search space - user_id: ID of the user (as string) podcast_title: Title for the podcast user_prompt: Optional instructions for podcast style/tone @@ -92,7 +90,6 @@ def generate_content_podcast_task( _generate_content_podcast( source_content, search_space_id, - user_id, podcast_title, user_prompt, ) @@ -112,7 +109,6 @@ def generate_content_podcast_task( async def _generate_content_podcast( source_content: str, search_space_id: int, - user_id: str, podcast_title: str = "SurfSense Podcast", user_prompt: str | None = None, ) -> dict: @@ -123,7 +119,6 @@ async def _generate_content_podcast( graph_config = { "configurable": { "podcast_title": podcast_title, - "user_id": str(user_id), "search_space_id": search_space_id, "user_prompt": user_prompt, } diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index a201ece22..de318a7d5 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -7,8 +7,6 @@ Data Stream Protocol (SSE format). import json from collections.abc import AsyncGenerator -from uuid import UUID - from langchain_core.messages import HumanMessage from sqlalchemy.ext.asyncio import AsyncSession @@ -42,7 +40,6 @@ def format_attachments_as_context(attachments: list[ChatAttachment]) -> str: async def stream_new_chat( user_query: str, - user_id: str | UUID, search_space_id: int, chat_id: int, session: AsyncSession, @@ -59,7 +56,6 @@ async def stream_new_chat( Args: user_query: The user's query - user_id: The user's ID (can be UUID object or string) search_space_id: The search space ID chat_id: The chat ID (used as LangGraph thread_id for memory) session: The database session @@ -71,9 +67,6 @@ async def stream_new_chat( """ streaming_service = VercelStreamingService() - # Convert UUID to string if needed - str(user_id) if isinstance(user_id, UUID) else user_id - # Track the current text block for streaming (defined early for exception handling) current_text_id: str | None = None @@ -107,8 +100,6 @@ async def stream_new_chat( db_session=session, connector_service=connector_service, checkpointer=checkpointer, - user_id=str(user_id), - enable_podcast=True, ) # Build input with message history from frontend diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index e3ee85dce..103440673 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -28,13 +28,14 @@ import { Sparkles, SquareIcon, } from "lucide-react"; -import Image from "next/image"; import Link from "next/link"; import { type FC, useState, useRef, useCallback, useEffect } from "react"; import { useAtomValue } from "jotai"; import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; +import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms"; import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; +import { getDocumentTypeLabel } from "@/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; import { ComposerAddAttachment, @@ -332,35 +333,9 @@ const ThreadWelcome: FC = () => {
{/* Greeting positioned above the composer - fixed position */}
-

- {/** biome-ignore lint/a11y/noStaticElementInteractions: wrong lint error, this is a workaround to fix the lint error */} -
{ - const rect = e.currentTarget.getBoundingClientRect(); - const x = (e.clientX - rect.left - rect.width / 2) / 3; - const y = (e.clientY - rect.top - rect.height / 2) / 3; - e.currentTarget.style.setProperty("--mag-x", `${x}px`); - e.currentTarget.style.setProperty("--mag-y", `${y}px`); - }} - onMouseLeave={(e) => { - e.currentTarget.style.setProperty("--mag-x", "0px"); - e.currentTarget.style.setProperty("--mag-y", "0px"); - }} - > - SurfSense -
- {getTimeBasedGreeting(user?.email)} -

+

+ {getTimeBasedGreeting(user?.email)} +

{/* Composer - top edge fixed, expands downward only */}
@@ -390,11 +365,21 @@ const Composer: FC = () => { const ConnectorIndicator: FC = () => { const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom); - const { connectors, isLoading } = useSearchSourceConnectors(false, searchSpaceId ? Number(searchSpaceId) : undefined); + const { connectors, isLoading: connectorsLoading } = useSearchSourceConnectors(false, searchSpaceId ? Number(searchSpaceId) : undefined); + const { data: documentTypeCounts, isLoading: documentTypesLoading } = useAtomValue(documentTypeCountsAtom); const [isOpen, setIsOpen] = useState(false); const closeTimeoutRef = useRef(null); + const isLoading = connectorsLoading || documentTypesLoading; + + // Get document types that have documents in the search space + const activeDocumentTypes = documentTypeCounts + ? Object.entries(documentTypeCounts).filter(([_, count]) => count > 0) + : []; + const hasConnectors = connectors.length > 0; + const hasSources = hasConnectors || activeDocumentTypes.length > 0; + const totalSourceCount = connectors.length + activeDocumentTypes.length; const handleMouseEnter = useCallback(() => { // Clear any pending close timeout @@ -420,21 +405,32 @@ const ConnectorIndicator: FC = () => { @@ -445,20 +441,31 @@ const ConnectorIndicator: FC = () => { onMouseEnter={handleMouseEnter} onMouseLeave={handleMouseLeave} > - {hasConnectors ? ( + {hasSources ? (

Connected Sources

- {connectors.length} + {totalSourceCount}
+ {/* Document types from the search space */} + {activeDocumentTypes.map(([docType, count]) => ( +
+ {getConnectorIcon(docType, "size-3.5")} + {getDocumentTypeLabel(docType)} +
+ ))} + {/* Search source connectors */} {connectors.map((connector) => (
{getConnectorIcon(connector.connector_type, "size-3.5")} @@ -479,9 +486,9 @@ const ConnectorIndicator: FC = () => {
) : (
-

No connectors yet

+

No sources yet

- Connect your first data source to enhance search results. + Add documents or connect data sources to enhance search results.