feat: introduce display image tool for enhanced image rendering in chat with metadata support

This commit is contained in:
Anish Sarkar 2025-12-23 01:11:56 +05:30
parent 4b69fdf214
commit da7cb81252
8 changed files with 709 additions and 1 deletions

View file

@ -14,6 +14,7 @@ from langgraph.types import Checkpointer
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.context import SurfSenseContextSchema from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.new_chat.display_image import create_display_image_tool
from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool
from app.agents.new_chat.link_preview import create_link_preview_tool from app.agents.new_chat.link_preview import create_link_preview_tool
from app.agents.new_chat.podcast import create_generate_podcast_tool from app.agents.new_chat.podcast import create_generate_podcast_tool
@ -36,6 +37,7 @@ def create_surfsense_deep_agent(
enable_citations: bool = True, enable_citations: bool = True,
enable_podcast: bool = True, enable_podcast: bool = True,
enable_link_preview: bool = True, enable_link_preview: bool = True,
enable_display_image: bool = True,
additional_tools: Sequence[BaseTool] | None = None, additional_tools: Sequence[BaseTool] | None = None,
): ):
""" """
@ -57,6 +59,8 @@ def create_surfsense_deep_agent(
When True and user_id is provided, the agent can generate podcasts. When True and user_id is provided, the agent can generate podcasts.
enable_link_preview: Whether to include the link preview tool (default: True). enable_link_preview: Whether to include the link preview tool (default: True).
When True, the agent can fetch and display rich link previews. When True, the agent can fetch and display rich link previews.
enable_display_image: Whether to include the display image tool (default: True).
When True, the agent can display images with metadata.
additional_tools: Optional sequence of additional tools to inject into the agent. additional_tools: Optional sequence of additional tools to inject into the agent.
The search_knowledge_base tool will always be included. The search_knowledge_base tool will always be included.
@ -87,6 +91,11 @@ def create_surfsense_deep_agent(
link_preview_tool = create_link_preview_tool() link_preview_tool = create_link_preview_tool()
tools.append(link_preview_tool) tools.append(link_preview_tool)
# Add display image tool if enabled
if enable_display_image:
display_image_tool = create_display_image_tool()
tools.append(display_image_tool)
if additional_tools: if additional_tools:
tools.extend(additional_tools) tools.extend(additional_tools)

View file

@ -0,0 +1,106 @@
"""
Display image tool for the new chat agent.
This module provides a tool for displaying images in the chat UI
with metadata like title, description, and source attribution.
"""
import hashlib
from typing import Any
from urllib.parse import urlparse
from langchain_core.tools import tool
def extract_domain(url: str) -> str:
"""Extract the domain from a URL."""
try:
parsed = urlparse(url)
domain = parsed.netloc
# Remove 'www.' prefix if present
if domain.startswith("www."):
domain = domain[4:]
return domain
except Exception:
return ""
def generate_image_id(src: str) -> str:
"""Generate a unique ID for an image."""
hash_val = hashlib.md5(src.encode()).hexdigest()[:12]
return f"image-{hash_val}"
def create_display_image_tool():
"""
Factory function to create the display_image tool.
Returns:
A configured tool function for displaying images.
"""
@tool
async def display_image(
src: str,
alt: str = "Image",
title: str | None = None,
description: str | None = None,
) -> dict[str, Any]:
"""
Display an image in the chat with metadata.
Use this tool when you want to show an image to the user.
This displays the image with an optional title, description,
and source attribution.
Common use cases:
- Showing an image from a URL the user mentioned
- Displaying a diagram or chart you're referencing
- Showing example images when explaining concepts
Args:
src: The URL of the image to display (must be a valid HTTP/HTTPS URL)
alt: Alternative text describing the image (for accessibility)
title: Optional title to display below the image
description: Optional description providing context about the image
Returns:
A dictionary containing image metadata for the UI to render:
- id: Unique identifier for this image
- assetId: The image URL (for deduplication)
- src: The image URL
- alt: Alt text for accessibility
- title: Image title (if provided)
- description: Image description (if provided)
- domain: Source domain
"""
image_id = generate_image_id(src)
# Ensure URL has protocol
if not src.startswith(("http://", "https://")):
src = f"https://{src}"
domain = extract_domain(src)
# Determine aspect ratio based on common image sources
ratio = "16:9" # Default
if "unsplash.com" in src or "pexels.com" in src:
ratio = "16:9"
elif "imgur.com" in src:
ratio = "auto"
elif "github.com" in src or "githubusercontent.com" in src:
ratio = "auto"
return {
"id": image_id,
"assetId": src,
"src": src,
"alt": alt,
"title": title,
"description": description,
"domain": domain,
"ratio": ratio,
}
return display_image

View file

@ -158,6 +158,21 @@ You have access to the following tools:
- url: The URL to fetch metadata for (must be a valid HTTP/HTTPS URL) - url: The URL to fetch metadata for (must be a valid HTTP/HTTPS URL)
- Returns: A rich preview card with title, description, thumbnail, and domain - Returns: A rich preview card with title, description, thumbnail, and domain
- The preview card will automatically be displayed in the chat. - The preview card will automatically be displayed in the chat.
4. display_image: Display an image in the chat with metadata.
- Use this tool when you want to show an image to the user.
- This displays the image with an optional title, description, and source attribution.
- Common use cases:
* Showing an image from a URL mentioned in the conversation
* Displaying a diagram, chart, or illustration you're referencing
* Showing visual examples when explaining concepts
- Args:
- src: The URL of the image to display (must be a valid HTTP/HTTPS image URL)
- alt: Alternative text describing the image (for accessibility)
- title: Optional title to display below the image
- description: Optional description providing context about the image
- Returns: An image card with the image, title, and description
- The image will automatically be displayed in the chat.
</tools> </tools>
<tool_call_examples> <tool_call_examples>
- User: "Fetch all my notes and what's in them?" - User: "Fetch all my notes and what's in them?"
@ -184,6 +199,12 @@ You have access to the following tools:
- User: "https://github.com/some/repo" - User: "https://github.com/some/repo"
- Call: `link_preview(url="https://github.com/some/repo")` - Call: `link_preview(url="https://github.com/some/repo")`
- User: "Show me this image: https://example.com/image.png"
- Call: `display_image(src="https://example.com/image.png", alt="User shared image")`
- User: "Can you display a diagram of a neural network?"
- Call: `display_image(src="https://example.com/neural-network.png", alt="Neural network diagram", title="Neural Network Architecture", description="A visual representation of a neural network with input, hidden, and output layers")`
</tool_call_examples>{citation_section} </tool_call_examples>{citation_section}
""" """

View file

@ -298,6 +298,27 @@ async def stream_new_chat(
status="in_progress", status="in_progress",
items=last_active_step_items, items=last_active_step_items,
) )
elif tool_name == "display_image":
src = (
tool_input.get("src", "")
if isinstance(tool_input, dict)
else str(tool_input)
)
title = (
tool_input.get("title", "")
if isinstance(tool_input, dict)
else ""
)
last_active_step_title = "Displaying image"
last_active_step_items = [
f"Image: {title[:50] if title else src[:50]}{'...' if len(title or src) > 50 else ''}"
]
yield streaming_service.format_thinking_step(
step_id=tool_step_id,
title="Displaying image",
status="in_progress",
items=last_active_step_items,
)
elif tool_name == "generate_podcast": elif tool_name == "generate_podcast":
podcast_title = ( podcast_title = (
tool_input.get("podcast_title", "SurfSense Podcast") tool_input.get("podcast_title", "SurfSense Podcast")
@ -367,6 +388,16 @@ async def stream_new_chat(
f"Fetching link preview: {url[:80]}{'...' if len(url) > 80 else ''}", f"Fetching link preview: {url[:80]}{'...' if len(url) > 80 else ''}",
"info", "info",
) )
elif tool_name == "display_image":
src = (
tool_input.get("src", "")
if isinstance(tool_input, dict)
else str(tool_input)
)
yield streaming_service.format_terminal_info(
f"Displaying image: {src[:60]}{'...' if len(src) > 60 else ''}",
"info",
)
elif tool_name == "generate_podcast": elif tool_name == "generate_podcast":
title = ( title = (
tool_input.get("podcast_title", "SurfSense Podcast") tool_input.get("podcast_title", "SurfSense Podcast")
@ -453,6 +484,24 @@ async def stream_new_chat(
status="completed", status="completed",
items=completed_items, items=completed_items,
) )
elif tool_name == "display_image":
# Build completion items for image display
if isinstance(tool_output, dict):
title = tool_output.get("title", "")
alt = tool_output.get("alt", "Image")
display_name = title or alt
completed_items = [
*last_active_step_items,
f"Showing: {display_name[:50]}{'...' if len(display_name) > 50 else ''}",
]
else:
completed_items = [*last_active_step_items, "Image displayed"]
yield streaming_service.format_thinking_step(
step_id=original_step_id,
title="Displaying image",
status="completed",
items=completed_items,
)
elif tool_name == "generate_podcast": elif tool_name == "generate_podcast":
# Build detailed completion items based on podcast status # Build detailed completion items based on podcast status
podcast_status = ( podcast_status = (
@ -566,6 +615,21 @@ async def stream_new_chat(
f"Link preview failed: {error_msg}", f"Link preview failed: {error_msg}",
"error", "error",
) )
elif tool_name == "display_image":
# Stream the full image result so frontend can render the Image component
yield streaming_service.format_tool_output_available(
tool_call_id,
tool_output
if isinstance(tool_output, dict)
else {"result": tool_output},
)
# Send terminal message
if isinstance(tool_output, dict):
title = tool_output.get("title") or tool_output.get("alt", "Image")
yield streaming_service.format_terminal_info(
f"Image displayed: {title[:40]}{'...' if len(title) > 40 else ''}",
"success",
)
elif tool_name == "search_knowledge_base": elif tool_name == "search_knowledge_base":
# Don't stream the full output for search (can be very large), just acknowledge # Don't stream the full output for search (can be very large), just acknowledge
yield streaming_service.format_tool_output_available( yield streaming_service.format_tool_output_available(

View file

@ -12,6 +12,7 @@ import { toast } from "sonner";
import { Thread } from "@/components/assistant-ui/thread"; import { Thread } from "@/components/assistant-ui/thread";
import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview";
import { DisplayImageToolUI } from "@/components/tool-ui/display-image";
import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking";
import { getBearerToken } from "@/lib/auth-utils"; import { getBearerToken } from "@/lib/auth-utils";
import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter"; import { createAttachmentAdapter, extractAttachmentContent } from "@/lib/chat/attachment-adapter";
@ -80,7 +81,7 @@ function convertToThreadMessage(msg: MessageRecord): ThreadMessageLike {
/** /**
* Tools that should render custom UI in the chat. * Tools that should render custom UI in the chat.
*/ */
const TOOLS_WITH_UI = new Set(["generate_podcast", "link_preview"]); const TOOLS_WITH_UI = new Set(["generate_podcast", "link_preview", "display_image"]);
/** /**
* Type for thinking step data from the backend * Type for thinking step data from the backend
@ -591,6 +592,7 @@ export default function NewChatPage() {
<AssistantRuntimeProvider runtime={runtime}> <AssistantRuntimeProvider runtime={runtime}>
<GeneratePodcastToolUI /> <GeneratePodcastToolUI />
<LinkPreviewToolUI /> <LinkPreviewToolUI />
<DisplayImageToolUI />
<div className="h-[calc(100vh-64px)] max-h-[calc(100vh-64px)] overflow-hidden"> <div className="h-[calc(100vh-64px)] max-h-[calc(100vh-64px)] overflow-hidden">
<Thread messageThinkingSteps={messageThinkingSteps} /> <Thread messageThinkingSteps={messageThinkingSteps} />
</div> </div>

View file

@ -0,0 +1,160 @@
"use client";
import { makeAssistantToolUI } from "@assistant-ui/react";
import { AlertCircleIcon, ImageIcon } from "lucide-react";
import {
Image,
ImageErrorBoundary,
ImageLoading,
parseSerializableImage,
} from "@/components/tool-ui/image";
/**
* Type definitions for the display_image tool
*/
interface DisplayImageArgs {
src: string;
alt?: string;
title?: string;
description?: string;
}
interface DisplayImageResult {
id: string;
assetId: string;
src: string;
alt: string;
title?: string;
description?: string;
domain?: string;
ratio?: string;
error?: string;
}
/**
* Error state component shown when image display fails
*/
function ImageErrorState({ src, error }: { src: string; error: string }) {
return (
<div className="my-4 overflow-hidden rounded-xl border border-destructive/20 bg-destructive/5 p-4 max-w-md">
<div className="flex items-center gap-4">
<div className="flex size-12 shrink-0 items-center justify-center rounded-lg bg-destructive/10">
<AlertCircleIcon className="size-6 text-destructive" />
</div>
<div className="flex-1 min-w-0">
<p className="font-medium text-destructive text-sm">Failed to display image</p>
<p className="text-muted-foreground text-xs mt-0.5 truncate">{src}</p>
<p className="text-muted-foreground text-xs mt-1">{error}</p>
</div>
</div>
</div>
);
}
/**
* Cancelled state component
*/
function ImageCancelledState({ src }: { src: string }) {
return (
<div className="my-4 rounded-xl border border-muted p-4 text-muted-foreground max-w-md">
<p className="flex items-center gap-2">
<ImageIcon className="size-4" />
<span className="line-through truncate">Image: {src}</span>
</p>
</div>
);
}
/**
* Parsed Image component with error handling
*/
function ParsedImage({ result }: { result: unknown }) {
const image = parseSerializableImage(result);
return (
<Image
{...image}
maxWidth="420px"
responseActions={[
{ id: "open", label: "Open", variant: "default" },
]}
onResponseAction={(id) => {
if (id === "open" && image.src) {
window.open(image.src, "_blank", "noopener,noreferrer");
}
}}
/>
);
}
/**
* Display Image Tool UI Component
*
* This component is registered with assistant-ui to render an image
* when the display_image tool is called by the agent.
*
* It displays images with:
* - Title and description
* - Source attribution
* - Hover overlay effects
* - Click to open full size
*/
export const DisplayImageToolUI = makeAssistantToolUI<
DisplayImageArgs,
DisplayImageResult
>({
toolName: "display_image",
render: function DisplayImageUI({ args, result, status }) {
const src = args.src || "Unknown";
// Loading state - tool is still running
if (status.type === "running" || status.type === "requires-action") {
return (
<div className="my-4">
<ImageLoading title={`Loading image...`} />
</div>
);
}
// Incomplete/cancelled state
if (status.type === "incomplete") {
if (status.reason === "cancelled") {
return <ImageCancelledState src={src} />;
}
if (status.reason === "error") {
return (
<ImageErrorState
src={src}
error={typeof status.error === "string" ? status.error : "An error occurred"}
/>
);
}
}
// No result yet
if (!result) {
return (
<div className="my-4">
<ImageLoading title="Preparing image..." />
</div>
);
}
// Error result from the tool
if (result.error) {
return <ImageErrorState src={src} error={result.error} />;
}
// Success - render the image
return (
<div className="my-4">
<ImageErrorBoundary>
<ParsedImage result={result} />
</ImageErrorBoundary>
</div>
);
},
});
export type { DisplayImageArgs, DisplayImageResult };

View file

@ -0,0 +1,332 @@
"use client";
import { ExternalLinkIcon, ImageIcon, Loader2 } from "lucide-react";
import NextImage from "next/image";
import { Component, type ReactNode, useState } from "react";
import { Badge } from "@/components/ui/badge";
import { Card } from "@/components/ui/card";
import { cn } from "@/lib/utils";
/**
* Aspect ratio options for images
*/
type AspectRatio = "1:1" | "4:3" | "16:9" | "9:16" | "auto";
/**
* Image fit options
*/
type ImageFit = "cover" | "contain";
/**
* Source attribution
*/
interface ImageSource {
label: string;
iconUrl?: string;
url?: string;
}
/**
* Props for the Image component
*/
export interface ImageProps {
id: string;
assetId: string;
src: string;
alt: string;
title?: string;
description?: string;
href?: string;
domain?: string;
ratio?: AspectRatio;
fit?: ImageFit;
source?: ImageSource;
maxWidth?: string;
className?: string;
}
/**
* Serializable schema for Image props (for tool results)
*/
export interface SerializableImage {
id: string;
assetId: string;
src: string;
alt: string;
title?: string;
description?: string;
href?: string;
domain?: string;
ratio?: AspectRatio;
source?: ImageSource;
}
/**
* Parse and validate serializable image from tool result
*/
export function parseSerializableImage(result: unknown): SerializableImage {
if (typeof result !== "object" || result === null) {
throw new Error("Invalid image result: expected object");
}
const obj = result as Record<string, unknown>;
// Validate required fields
if (typeof obj.id !== "string") {
throw new Error("Invalid image: missing id");
}
if (typeof obj.assetId !== "string") {
throw new Error("Invalid image: missing assetId");
}
if (typeof obj.src !== "string") {
throw new Error("Invalid image: missing src");
}
if (typeof obj.alt !== "string") {
throw new Error("Invalid image: missing alt");
}
return {
id: obj.id,
assetId: obj.assetId,
src: obj.src,
alt: obj.alt,
title: typeof obj.title === "string" ? obj.title : undefined,
description: typeof obj.description === "string" ? obj.description : undefined,
href: typeof obj.href === "string" ? obj.href : undefined,
domain: typeof obj.domain === "string" ? obj.domain : undefined,
ratio: typeof obj.ratio === "string" ? (obj.ratio as AspectRatio) : undefined,
source: typeof obj.source === "object" && obj.source !== null ? (obj.source as ImageSource) : undefined,
};
}
/**
* Get aspect ratio class based on ratio prop
*/
function getAspectRatioClass(ratio?: AspectRatio): string {
switch (ratio) {
case "1:1":
return "aspect-square";
case "4:3":
return "aspect-[4/3]";
case "16:9":
return "aspect-video";
case "9:16":
return "aspect-[9/16]";
case "auto":
default:
return "aspect-[4/3]";
}
}
/**
* Error boundary for Image component
*/
interface ImageErrorBoundaryState {
hasError: boolean;
error?: Error;
}
export class ImageErrorBoundary extends Component<
{ children: ReactNode },
ImageErrorBoundaryState
> {
constructor(props: { children: ReactNode }) {
super(props);
this.state = { hasError: false };
}
static getDerivedStateFromError(error: Error): ImageErrorBoundaryState {
return { hasError: true, error };
}
render() {
if (this.state.hasError) {
return (
<Card className="w-full max-w-md overflow-hidden">
<div className="aspect-[4/3] bg-muted flex items-center justify-center">
<div className="flex flex-col items-center gap-2 text-muted-foreground">
<ImageIcon className="size-8" />
<p className="text-sm">Failed to load image</p>
</div>
</div>
</Card>
);
}
return this.props.children;
}
}
/**
* Loading skeleton for Image
*/
export function ImageSkeleton({ maxWidth = "420px" }: { maxWidth?: string }) {
return (
<Card className="w-full overflow-hidden animate-pulse" style={{ maxWidth }}>
<div className="aspect-[4/3] bg-muted flex items-center justify-center">
<ImageIcon className="size-12 text-muted-foreground/30" />
</div>
</Card>
);
}
/**
* Image Loading State
*/
export function ImageLoading({ title = "Loading image..." }: { title?: string }) {
return (
<Card className="w-full max-w-md overflow-hidden">
<div className="aspect-[4/3] bg-muted flex items-center justify-center">
<div className="flex flex-col items-center gap-3">
<Loader2 className="size-8 text-muted-foreground animate-spin" />
<p className="text-muted-foreground text-sm">{title}</p>
</div>
</div>
</Card>
);
}
/**
* Image Component
*
* Display images with metadata and attribution.
* Features hover overlay with title and source attribution.
*/
export function Image({
id,
src,
alt,
title,
description,
href,
domain,
ratio = "4:3",
fit = "cover",
source,
maxWidth = "420px",
className,
}: ImageProps) {
const [isHovered, setIsHovered] = useState(false);
const [imageError, setImageError] = useState(false);
const aspectRatioClass = getAspectRatioClass(ratio);
const displayDomain = domain || source?.label;
const handleClick = () => {
const targetUrl = href || source?.url || src;
if (targetUrl) {
window.open(targetUrl, "_blank", "noopener,noreferrer");
}
};
if (imageError) {
return (
<Card
id={id}
className={cn("w-full overflow-hidden", className)}
style={{ maxWidth }}
>
<div className={cn("bg-muted flex items-center justify-center", aspectRatioClass)}>
<div className="flex flex-col items-center gap-2 text-muted-foreground">
<ImageIcon className="size-8" />
<p className="text-sm">Image not available</p>
</div>
</div>
</Card>
);
}
return (
<Card
id={id}
className={cn(
"group w-full overflow-hidden cursor-pointer transition-shadow duration-200 hover:shadow-lg",
className
)}
style={{ maxWidth }}
onClick={handleClick}
onMouseEnter={() => setIsHovered(true)}
onMouseLeave={() => setIsHovered(false)}
onKeyDown={(e) => {
if (e.key === "Enter" || e.key === " ") {
e.preventDefault();
handleClick();
}
}}
role="button"
tabIndex={0}
>
<div className={cn("relative w-full overflow-hidden bg-muted", aspectRatioClass)}>
{/* Image */}
<NextImage
src={src}
alt={alt}
fill
className={cn(
"transition-transform duration-300",
fit === "cover" ? "object-cover" : "object-contain",
isHovered && "scale-105"
)}
unoptimized
onError={() => setImageError(true)}
/>
{/* Hover overlay - appears on hover */}
<div
className={cn(
"absolute inset-0 bg-gradient-to-t from-black/80 via-black/20 to-transparent",
"transition-opacity duration-200",
isHovered ? "opacity-100" : "opacity-0"
)}
>
{/* Content at bottom */}
<div className="absolute bottom-0 left-0 right-0 p-4">
{/* Title */}
{title && (
<h3 className="font-semibold text-white text-base leading-tight line-clamp-2 mb-1">
{title}
</h3>
)}
{/* Description */}
{description && (
<p className="text-white/80 text-sm line-clamp-2 mb-2">
{description}
</p>
)}
{/* Source attribution */}
{displayDomain && (
<div className="flex items-center gap-1.5">
{source?.iconUrl ? (
<NextImage
src={source.iconUrl}
alt={source.label}
width={16}
height={16}
className="rounded"
unoptimized
/>
) : (
<ExternalLinkIcon className="size-4 text-white/70" />
)}
<span className="text-white/70 text-sm">{displayDomain}</span>
</div>
)}
</div>
</div>
{/* Always visible domain badge (bottom right, shown when NOT hovered) */}
{displayDomain && !isHovered && (
<div className="absolute bottom-2 right-2">
<Badge
variant="secondary"
className="bg-black/60 text-white border-0 text-xs backdrop-blur-sm"
>
{displayDomain}
</Badge>
</div>
)}
</div>
</Card>
);
}

View file

@ -32,3 +32,17 @@ export {
type MediaCardProps, type MediaCardProps,
type SerializableMediaCard, type SerializableMediaCard,
} from "./media-card"; } from "./media-card";
export {
Image,
ImageErrorBoundary,
ImageLoading,
ImageSkeleton,
parseSerializableImage,
type ImageProps,
type SerializableImage,
} from "./image";
export {
DisplayImageToolUI,
type DisplayImageArgs,
type DisplayImageResult,
} from "./display-image";