refactor: remove display_image tool and update related components to streamline image handling

This commit is contained in:
Anish Sarkar 2026-03-24 16:28:11 +05:30
parent eed792c19a
commit 6c507989d2
16 changed files with 385 additions and 93 deletions

View file

@ -37,7 +37,6 @@ from .tools import (
BUILTIN_TOOLS, BUILTIN_TOOLS,
ToolDefinition, ToolDefinition,
build_tools, build_tools,
create_display_image_tool,
create_generate_podcast_tool, create_generate_podcast_tool,
create_link_preview_tool, create_link_preview_tool,
create_scrape_webpage_tool, create_scrape_webpage_tool,
@ -63,7 +62,6 @@ __all__ = [
# LLM config # LLM config
"create_chat_litellm_from_config", "create_chat_litellm_from_config",
# Tool factories # Tool factories
"create_display_image_tool",
"create_generate_podcast_tool", "create_generate_podcast_tool",
"create_link_preview_tool", "create_link_preview_tool",
"create_scrape_webpage_tool", "create_scrape_webpage_tool",

View file

@ -151,7 +151,6 @@ async def create_surfsense_deep_agent(
- generate_podcast: Generate audio podcasts from content - generate_podcast: Generate audio podcasts from content
- generate_image: Generate images from text descriptions using AI models - generate_image: Generate images from text descriptions using AI models
- link_preview: Fetch rich previews for URLs - link_preview: Fetch rich previews for URLs
- display_image: Display images in chat
- scrape_webpage: Extract content from webpages - scrape_webpage: Extract content from webpages
- save_memory: Store facts/preferences about the user - save_memory: Store facts/preferences about the user
- recall_memory: Retrieve relevant user memories - recall_memory: Retrieve relevant user memories

View file

@ -199,33 +199,6 @@ _TOOL_INSTRUCTIONS["link_preview"] = """
- The preview card will automatically be displayed in the chat. - The preview card will automatically be displayed in the chat.
""" """
_TOOL_INSTRUCTIONS["display_image"] = """
- display_image: Display an image in the chat with metadata.
- Use this tool ONLY when you have a valid public HTTP/HTTPS image URL to show.
- This displays the image with an optional title, description, and source attribution.
- Valid use cases:
* Showing an image from a URL the user explicitly mentioned in their message
* Displaying images found in scraped webpage content (from scrape_webpage tool)
* Showing a publicly accessible diagram or chart from a known URL
* Displaying an AI-generated image after calling the generate_image tool (ALWAYS required)
CRITICAL - NEVER USE THIS TOOL FOR USER-UPLOADED ATTACHMENTS:
When a user uploads/attaches an image file to their message:
* The image is ALREADY VISIBLE in the chat UI as a thumbnail on their message
* You do NOT have a URL for their uploaded image - only extracted text/description
* Calling display_image will FAIL and show "Image not available" error
* Simply analyze the image content and respond with your analysis - DO NOT try to display it
* The user can already see their own uploaded image - they don't need you to show it again
- Args:
- src: The URL of the image (MUST be a valid public HTTP/HTTPS URL that you know exists)
- alt: Alternative text describing the image (for accessibility)
- title: Optional title to display below the image
- description: Optional description providing context about the image
- Returns: An image card with the image, title, and description
- The image will automatically be displayed in the chat.
"""
_TOOL_INSTRUCTIONS["generate_image"] = """ _TOOL_INSTRUCTIONS["generate_image"] = """
- generate_image: Generate images from text descriptions using AI image models. - generate_image: Generate images from text descriptions using AI image models.
- Use this when the user asks you to create, generate, draw, design, or make an image. - Use this when the user asks you to create, generate, draw, design, or make an image.
@ -233,10 +206,7 @@ _TOOL_INSTRUCTIONS["generate_image"] = """
- Args: - Args:
- prompt: A detailed text description of the image to generate. Be specific about subject, style, colors, composition, and mood. - prompt: A detailed text description of the image to generate. Be specific about subject, style, colors, composition, and mood.
- n: Number of images to generate (1-4, default: 1) - n: Number of images to generate (1-4, default: 1)
- Returns: A dictionary with the generated image URL in the "src" field, along with metadata. - Returns: A dictionary with the generated image metadata. The image will automatically be displayed in the chat.
- CRITICAL: After calling generate_image, you MUST call `display_image` with the returned "src" URL
to actually show the image in the chat. The generate_image tool only generates the image and returns
the URL it does NOT display anything. You must always follow up with display_image.
- IMPORTANT: Write a detailed, descriptive prompt for best results. Don't just pass the user's words verbatim - - IMPORTANT: Write a detailed, descriptive prompt for best results. Don't just pass the user's words verbatim -
expand and improve the prompt with specific details about style, lighting, composition, and mood. expand and improve the prompt with specific details about style, lighting, composition, and mood.
- If the user's request is vague (e.g., "make me an image of a cat"), enhance the prompt with artistic details. - If the user's request is vague (e.g., "make me an image of a cat"), enhance the prompt with artistic details.
@ -270,7 +240,7 @@ _TOOL_INSTRUCTIONS["scrape_webpage"] = """
- Returns: The page title, description, full content (in markdown), word count, and metadata - Returns: The page title, description, full content (in markdown), word count, and metadata
- After scraping, you will have the full article text and can analyze, summarize, or answer questions about it. - After scraping, you will have the full article text and can analyze, summarize, or answer questions about it.
- IMAGES: The scraped content may contain image URLs in markdown format like `![alt text](image_url)`. - IMAGES: The scraped content may contain image URLs in markdown format like `![alt text](image_url)`.
* When you find relevant/important images in the scraped content, use the `display_image` tool to show them to the user. * When you find relevant/important images in the scraped content, include them in your response using standard markdown image syntax: `![alt text](image_url)`.
* This makes your response more visual and engaging. * This makes your response more visual and engaging.
* Prioritize showing: diagrams, charts, infographics, key illustrations, or images that help explain the content. * Prioritize showing: diagrams, charts, infographics, key illustrations, or images that help explain the content.
* Don't show every image - just the most relevant 1-3 images that enhance understanding. * Don't show every image - just the most relevant 1-3 images that enhance understanding.
@ -487,21 +457,18 @@ _TOOL_EXAMPLES["scrape_webpage"] = """
- IMPORTANT: Always attempt scraping first. Never refuse before trying the tool. - IMPORTANT: Always attempt scraping first. Never refuse before trying the tool.
""" """
_TOOL_EXAMPLES["display_image"] = """
- User: "Show me this image: https://example.com/image.png"
- Call: `display_image(src="https://example.com/image.png", alt="User shared image")`
- User uploads an image file and asks: "What is this image about?"
- DO NOT call display_image! The user's uploaded image is already visible in the chat.
- Simply analyze the image content and respond directly.
"""
_TOOL_EXAMPLES["generate_image"] = """ _TOOL_EXAMPLES["generate_image"] = """
- User: "Generate an image of a cat" - User: "Generate an image of a cat"
- Step 1: `generate_image(prompt="A fluffy orange tabby cat sitting on a windowsill, bathed in warm golden sunlight, soft bokeh background with green houseplants, photorealistic style, cozy atmosphere")` - Call: `generate_image(prompt="A fluffy orange tabby cat sitting on a windowsill, bathed in warm golden sunlight, soft bokeh background with green houseplants, photorealistic style, cozy atmosphere")`
- Step 2: Use the returned "src" URL to display it: `display_image(src="<returned_url>", alt="A fluffy orange tabby cat on a windowsill", title="Generated Image")` - The generated image will automatically be displayed in the chat.
- User: "Draw me a logo for a coffee shop called Bean Dream" - User: "Draw me a logo for a coffee shop called Bean Dream"
- Step 1: `generate_image(prompt="Minimalist modern logo design for a coffee shop called 'Bean Dream', featuring a stylized coffee bean with dream-like swirls of steam, clean vector style, warm brown and cream color palette, white background, professional branding")` - Call: `generate_image(prompt="Minimalist modern logo design for a coffee shop called 'Bean Dream', featuring a stylized coffee bean with dream-like swirls of steam, clean vector style, warm brown and cream color palette, white background, professional branding")`
- Step 2: `display_image(src="<returned_url>", alt="Bean Dream coffee shop logo", title="Generated Image")` - The generated image will automatically be displayed in the chat.
- User: "Show me this image: https://example.com/image.png"
- Simply include it in your response using markdown: `![Image](https://example.com/image.png)`
- User uploads an image file and asks: "What is this image about?"
- The user's uploaded image is already visible in the chat.
- Simply analyze the image content and respond directly.
""" """
_TOOL_EXAMPLES["web_search"] = """ _TOOL_EXAMPLES["web_search"] = """
@ -523,7 +490,6 @@ _ALL_TOOL_NAMES_ORDERED = [
"generate_video_presentation", "generate_video_presentation",
"generate_report", "generate_report",
"link_preview", "link_preview",
"display_image",
"generate_image", "generate_image",
"scrape_webpage", "scrape_webpage",
"save_memory", "save_memory",
@ -764,7 +730,7 @@ Do not use the sandbox for:
When your code creates output files (images, CSVs, PDFs, etc.) in the sandbox: When your code creates output files (images, CSVs, PDFs, etc.) in the sandbox:
- **Print the absolute path** at the end of your script so the user can download the file. Example: `print("SANDBOX_FILE: /tmp/chart.png")` - **Print the absolute path** at the end of your script so the user can download the file. Example: `print("SANDBOX_FILE: /tmp/chart.png")`
- **DO NOT call `display_image`** for files created inside the sandbox. Sandbox files are not accessible via public URLs, so `display_image` will always show "Image not available". The frontend automatically renders a download button from the `SANDBOX_FILE:` marker. - **DO NOT use markdown image syntax** for files created inside the sandbox. Sandbox files are not accessible via public URLs and will show "Image not available". The frontend automatically renders a download button from the `SANDBOX_FILE:` marker.
- You can output multiple files, one per line: `print("SANDBOX_FILE: /tmp/report.csv")`, `print("SANDBOX_FILE: /tmp/chart.png")` - You can output multiple files, one per line: `print("SANDBOX_FILE: /tmp/report.csv")`, `print("SANDBOX_FILE: /tmp/chart.png")`
- Always describe what the file contains in your response text so the user knows what they are downloading. - Always describe what the file contains in your response text so the user knows what they are downloading.
- IMPORTANT: Every `execute` call that saves a file MUST print the `SANDBOX_FILE: <path>` marker. Without it the user cannot download the file. - IMPORTANT: Every `execute` call that saves a file MUST print the `SANDBOX_FILE: <path>` marker. Without it the user cannot download the file.

View file

@ -11,7 +11,6 @@ Available tools:
- generate_video_presentation: Generate video presentations with slides and narration - generate_video_presentation: Generate video presentations with slides and narration
- generate_image: Generate images from text descriptions using AI models - generate_image: Generate images from text descriptions using AI models
- link_preview: Fetch rich previews for URLs - link_preview: Fetch rich previews for URLs
- display_image: Display images in chat
- scrape_webpage: Extract content from webpages - scrape_webpage: Extract content from webpages
- save_memory: Store facts/preferences about the user - save_memory: Store facts/preferences about the user
- recall_memory: Retrieve relevant user memories - recall_memory: Retrieve relevant user memories
@ -19,7 +18,6 @@ Available tools:
# Registry exports # Registry exports
# Tool factory exports (for direct use) # Tool factory exports (for direct use)
from .display_image import create_display_image_tool
from .generate_image import create_generate_image_tool from .generate_image import create_generate_image_tool
from .knowledge_base import ( from .knowledge_base import (
CONNECTOR_DESCRIPTIONS, CONNECTOR_DESCRIPTIONS,
@ -50,7 +48,6 @@ __all__ = [
"ToolDefinition", "ToolDefinition",
"build_tools", "build_tools",
# Tool factories # Tool factories
"create_display_image_tool",
"create_generate_image_tool", "create_generate_image_tool",
"create_generate_podcast_tool", "create_generate_podcast_tool",
"create_generate_video_presentation_tool", "create_generate_video_presentation_tool",

View file

@ -2,8 +2,7 @@
Image generation tool for the SurfSense agent. Image generation tool for the SurfSense agent.
This module provides a tool that generates images using litellm.aimage_generation() This module provides a tool that generates images using litellm.aimage_generation()
and returns the result via the existing display_image tool format so the frontend and returns the result directly in a format the frontend Image component can render.
renders the generated image inline in the chat.
Config resolution: Config resolution:
1. Uses the search space's image_generation_config_id preference 1. Uses the search space's image_generation_config_id preference
@ -11,6 +10,7 @@ Config resolution:
3. Supports global YAML configs (negative IDs) and user DB configs (positive IDs) 3. Supports global YAML configs (negative IDs) and user DB configs (positive IDs)
""" """
import hashlib
import logging import logging
from typing import Any from typing import Any
@ -222,11 +222,17 @@ def create_generate_image_tool(
else: else:
return {"error": "No displayable image data in the response"} return {"error": "No displayable image data in the response"}
image_id = f"image-{hashlib.md5(image_url.encode()).hexdigest()[:12]}"
return { return {
"id": image_id,
"assetId": image_url,
"src": image_url, "src": image_url,
"alt": revised_prompt or prompt, "alt": revised_prompt or prompt,
"title": "Generated Image", "title": "Generated Image",
"description": revised_prompt if revised_prompt != prompt else None, "description": revised_prompt if revised_prompt != prompt else None,
"domain": "ai-generated",
"ratio": "auto",
"generated": True, "generated": True,
"prompt": prompt, "prompt": prompt,
"image_count": len(images), "image_count": len(images),

View file

@ -50,7 +50,6 @@ from .confluence import (
create_delete_confluence_page_tool, create_delete_confluence_page_tool,
create_update_confluence_page_tool, create_update_confluence_page_tool,
) )
from .display_image import create_display_image_tool
from .generate_image import create_generate_image_tool from .generate_image import create_generate_image_tool
from .gmail import ( from .gmail import (
create_create_gmail_draft_tool, create_create_gmail_draft_tool,
@ -194,13 +193,6 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
factory=lambda deps: create_link_preview_tool(), factory=lambda deps: create_link_preview_tool(),
requires=[], requires=[],
), ),
# Display image tool - shows images in the chat
ToolDefinition(
name="display_image",
description="Display an image in the chat with metadata",
factory=lambda deps: create_display_image_tool(),
requires=[],
),
# Generate image tool - creates images using AI models (DALL-E, GPT Image, etc.) # Generate image tool - creates images using AI models (DALL-E, GPT Image, etc.)
ToolDefinition( ToolDefinition(
name="generate_image", name="generate_image",

View file

@ -38,7 +38,7 @@ from app.db import (
from app.utils.rbac import check_permission from app.utils.rbac import check_permission
UI_TOOLS = { UI_TOOLS = {
"display_image", "generate_image",
"link_preview", "link_preview",
"generate_podcast", "generate_podcast",
"generate_report", "generate_report",

View file

@ -351,22 +351,19 @@ async def _stream_agent_events(
status="in_progress", status="in_progress",
items=last_active_step_items, items=last_active_step_items,
) )
elif tool_name == "display_image": elif tool_name == "generate_image":
src = ( prompt = (
tool_input.get("src", "") tool_input.get("prompt", "")
if isinstance(tool_input, dict) if isinstance(tool_input, dict)
else str(tool_input) else str(tool_input)
) )
title = ( last_active_step_title = "Generating image"
tool_input.get("title", "") if isinstance(tool_input, dict) else ""
)
last_active_step_title = "Analyzing the image"
last_active_step_items = [ last_active_step_items = [
f"Analyzing: {title[:50] if title else src[:50]}{'...' if len(title or src) > 50 else ''}" f"Prompt: {prompt[:80]}{'...' if len(prompt) > 80 else ''}"
] ]
yield streaming_service.format_thinking_step( yield streaming_service.format_thinking_step(
step_id=tool_step_id, step_id=tool_step_id,
title="Analyzing the image", title="Generating image",
status="in_progress", status="in_progress",
items=last_active_step_items, items=last_active_step_items,
) )
@ -531,20 +528,22 @@ async def _stream_agent_events(
status="completed", status="completed",
items=completed_items, items=completed_items,
) )
elif tool_name == "display_image": elif tool_name == "generate_image":
if isinstance(tool_output, dict): if isinstance(tool_output, dict) and not tool_output.get("error"):
title = tool_output.get("title", "")
alt = tool_output.get("alt", "Image")
display_name = title or alt
completed_items = [ completed_items = [
*last_active_step_items, *last_active_step_items,
f"Analyzed: {display_name[:50]}{'...' if len(display_name) > 50 else ''}", "Image generated successfully",
] ]
else: else:
completed_items = [*last_active_step_items, "Image analyzed"] error_msg = (
tool_output.get("error", "Generation failed")
if isinstance(tool_output, dict)
else "Generation failed"
)
completed_items = [*last_active_step_items, f"Error: {error_msg}"]
yield streaming_service.format_thinking_step( yield streaming_service.format_thinking_step(
step_id=original_step_id, step_id=original_step_id,
title="Analyzing the image", title="Generating image",
status="completed", status="completed",
items=completed_items, items=completed_items,
) )
@ -842,7 +841,7 @@ async def _stream_agent_events(
f"Link preview failed: {error_msg}", f"Link preview failed: {error_msg}",
"error", "error",
) )
elif tool_name == "display_image": elif tool_name == "generate_image":
yield streaming_service.format_tool_output_available( yield streaming_service.format_tool_output_available(
tool_call_id, tool_call_id,
tool_output tool_output
@ -850,11 +849,16 @@ async def _stream_agent_events(
else {"result": tool_output}, else {"result": tool_output},
) )
if isinstance(tool_output, dict): if isinstance(tool_output, dict):
title = tool_output.get("title") or tool_output.get("alt", "Image") if tool_output.get("error"):
yield streaming_service.format_terminal_info( yield streaming_service.format_terminal_info(
f"Image analyzed: {title[:40]}{'...' if len(title) > 40 else ''}", f"Image generation failed: {tool_output['error'][:60]}",
"success", "error",
) )
else:
yield streaming_service.format_terminal_info(
"Image generated successfully",
"success",
)
elif tool_name == "scrape_webpage": elif tool_name == "scrape_webpage":
if isinstance(tool_output, dict): if isinstance(tool_output, dict):
display_output = { display_output = {

View file

@ -133,6 +133,7 @@ const TOOLS_WITH_UI = new Set([
"generate_video_presentation", "generate_video_presentation",
"link_preview", "link_preview",
"display_image", "display_image",
"generate_image",
"delete_notion_page", "delete_notion_page",
"scrape_webpage", "scrape_webpage",
"create_notion_page", "create_notion_page",

View file

@ -18,6 +18,7 @@ import { CommentPanelContainer } from "@/components/chat-comments/comment-panel-
import { CommentSheet } from "@/components/chat-comments/comment-sheet/comment-sheet"; import { CommentSheet } from "@/components/chat-comments/comment-sheet/comment-sheet";
import { CreateConfluencePageToolUI, DeleteConfluencePageToolUI, UpdateConfluencePageToolUI } from "@/components/tool-ui/confluence"; import { CreateConfluencePageToolUI, DeleteConfluencePageToolUI, UpdateConfluencePageToolUI } from "@/components/tool-ui/confluence";
import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; import { DisplayImageToolUI } from "@/components/tool-ui/display-image";
import { GenerateImageToolUI } from "@/components/tool-ui/generate-image";
import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
import { GenerateReportToolUI } from "@/components/tool-ui/generate-report"; import { GenerateReportToolUI } from "@/components/tool-ui/generate-report";
import { GenerateVideoPresentationToolUI } from "@/components/tool-ui/video-presentation"; import { GenerateVideoPresentationToolUI } from "@/components/tool-ui/video-presentation";
@ -60,6 +61,7 @@ const AssistantMessageInner: FC = () => {
link_preview: LinkPreviewToolUI, link_preview: LinkPreviewToolUI,
multi_link_preview: MultiLinkPreviewToolUI, multi_link_preview: MultiLinkPreviewToolUI,
display_image: DisplayImageToolUI, display_image: DisplayImageToolUI,
generate_image: GenerateImageToolUI,
scrape_webpage: ScrapeWebpageToolUI, scrape_webpage: ScrapeWebpageToolUI,
save_memory: SaveMemoryToolUI, save_memory: SaveMemoryToolUI,
recall_memory: RecallMemoryToolUI, recall_memory: RecallMemoryToolUI,

View file

@ -0,0 +1,268 @@
"use client";
import {
memo,
useState,
useEffect,
useRef,
type PropsWithChildren,
} from "react";
import { createPortal } from "react-dom";
import { cva, type VariantProps } from "class-variance-authority";
import { ImageIcon, ImageOffIcon } from "lucide-react";
import type { ImageMessagePartComponent } from "@assistant-ui/react";
import { cn } from "@/lib/utils";
const imageVariants = cva(
"aui-image-root relative overflow-hidden rounded-lg",
{
variants: {
variant: {
outline: "border border-border",
ghost: "",
muted: "bg-muted/50",
},
size: {
sm: "max-w-64",
default: "max-w-96",
lg: "max-w-[512px]",
full: "w-full",
},
},
defaultVariants: {
variant: "outline",
size: "default",
},
},
);
export type ImageRootProps = React.ComponentProps<"div"> &
VariantProps<typeof imageVariants>;
function ImageRoot({
className,
variant,
size,
children,
...props
}: ImageRootProps) {
return (
<div
data-slot="image-root"
data-variant={variant}
data-size={size}
className={cn(imageVariants({ variant, size, className }))}
{...props}
>
{children}
</div>
);
}
type ImagePreviewProps = Omit<React.ComponentProps<"img">, "children"> & {
containerClassName?: string;
};
function ImagePreview({
className,
containerClassName,
onLoad,
onError,
alt = "Image content",
src,
...props
}: ImagePreviewProps) {
const imgRef = useRef<HTMLImageElement>(null);
const [loadedSrc, setLoadedSrc] = useState<string | undefined>(undefined);
const [errorSrc, setErrorSrc] = useState<string | undefined>(undefined);
const loaded = loadedSrc === src;
const error = errorSrc === src;
useEffect(() => {
if (
typeof src === "string" &&
imgRef.current?.complete &&
imgRef.current.naturalWidth > 0
) {
setLoadedSrc(src);
}
}, [src]);
return (
<div
data-slot="image-preview"
className={cn("relative min-h-32", containerClassName)}
>
{!loaded && !error && (
<div
data-slot="image-preview-loading"
className="absolute inset-0 flex items-center justify-center bg-muted/50"
>
<ImageIcon className="size-8 animate-pulse text-muted-foreground" />
</div>
)}
{error ? (
<div
data-slot="image-preview-error"
className="flex min-h-32 items-center justify-center bg-muted/50 p-4"
>
<ImageOffIcon className="size-8 text-muted-foreground" />
</div>
) : (
// biome-ignore lint/performance/noImgElement: intentional for dynamic external URLs
<img
ref={imgRef}
src={src}
alt={alt}
className={cn(
"block h-auto w-full object-contain",
!loaded && "invisible",
className,
)}
onLoad={(e) => {
if (typeof src === "string") setLoadedSrc(src);
onLoad?.(e);
}}
onError={(e) => {
if (typeof src === "string") setErrorSrc(src);
onError?.(e);
}}
{...props}
/>
)}
</div>
);
}
function ImageFilename({
className,
children,
...props
}: React.ComponentProps<"span">) {
if (!children) return null;
return (
<span
data-slot="image-filename"
className={cn(
"block truncate px-2 py-1.5 text-muted-foreground text-xs",
className,
)}
{...props}
>
{children}
</span>
);
}
type ImageZoomProps = PropsWithChildren<{
src: string;
alt?: string;
}>;
function ImageZoom({ src, alt = "Image preview", children }: ImageZoomProps) {
const [isMounted, setIsMounted] = useState(false);
const [isOpen, setIsOpen] = useState(false);
useEffect(() => {
setIsMounted(true);
}, []);
const handleOpen = () => setIsOpen(true);
const handleClose = () => setIsOpen(false);
useEffect(() => {
if (!isOpen) return;
const handleKeyDown = (e: KeyboardEvent) => {
if (e.key === "Escape") setIsOpen(false);
};
document.addEventListener("keydown", handleKeyDown);
return () => document.removeEventListener("keydown", handleKeyDown);
}, [isOpen]);
useEffect(() => {
if (!isOpen) return;
const originalOverflow = document.body.style.overflow;
document.body.style.overflow = "hidden";
return () => {
document.body.style.overflow = originalOverflow;
};
}, [isOpen]);
return (
<>
<button
type="button"
onClick={handleOpen}
className="aui-image-zoom-trigger cursor-zoom-in border-0 bg-transparent p-0 text-left"
aria-label="Click to zoom image"
>
{children}
</button>
{isMounted &&
isOpen &&
createPortal(
<button
type="button"
data-slot="image-zoom-overlay"
className="aui-image-zoom-overlay fade-in fixed inset-0 z-50 flex animate-in cursor-zoom-out items-center justify-center border-0 bg-black/80 p-0 duration-200"
onClick={handleClose}
aria-label="Close zoomed image"
>
{/** biome-ignore lint/performance/noImgElement: <explanation> */}
<img
data-slot="image-zoom-content"
src={src}
alt={alt}
className="aui-image-zoom-content fade-in zoom-in-95 max-h-[90vh] max-w-[90vw] animate-in object-contain duration-200"
onClick={(e) => {
e.stopPropagation();
handleClose();
}}
onKeyDown={(e) => {
if (e.key === "Enter") {
e.stopPropagation();
handleClose();
}
}}
/>
</button>,
document.body,
)}
</>
);
}
const ImageImpl: ImageMessagePartComponent = ({ image, filename }) => {
return (
<ImageRoot>
<ImageZoom src={image} alt={filename || "Image content"}>
<ImagePreview src={image} alt={filename || "Image content"} />
</ImageZoom>
<ImageFilename>{filename}</ImageFilename>
</ImageRoot>
);
};
const Image = memo(ImageImpl) as unknown as ImageMessagePartComponent & {
Root: typeof ImageRoot;
Preview: typeof ImagePreview;
Filename: typeof ImageFilename;
Zoom: typeof ImageZoom;
};
Image.displayName = "Image";
Image.Root = ImageRoot;
Image.Preview = ImagePreview;
Image.Filename = ImageFilename;
Image.Zoom = ImageZoom;
export {
Image,
ImageRoot,
ImagePreview,
ImageFilename,
ImageZoom,
imageVariants,
};

View file

@ -8,8 +8,9 @@ import {
unstable_memoizeMarkdownComponents as memoizeMarkdownComponents, unstable_memoizeMarkdownComponents as memoizeMarkdownComponents,
useIsMarkdownCodeBlock, useIsMarkdownCodeBlock,
} from "@assistant-ui/react-markdown"; } from "@assistant-ui/react-markdown";
import { CheckIcon, CopyIcon } from "lucide-react"; import { CheckIcon, CopyIcon, ExternalLinkIcon } from "lucide-react";
import { type FC, memo, type ReactNode, useState } from "react"; import { type FC, memo, type ReactNode, useState } from "react";
import { ImagePreview, ImageRoot, ImageZoom } from "@/components/assistant-ui/image";
import rehypeKatex from "rehype-katex"; import rehypeKatex from "rehype-katex";
import remarkGfm from "remark-gfm"; import remarkGfm from "remark-gfm";
import remarkMath from "remark-math"; import remarkMath from "remark-math";
@ -188,17 +189,17 @@ const useCopyToClipboard = ({ copiedDuration = 3000 }: { copiedDuration?: number
function processChildrenWithCitations(children: ReactNode): ReactNode { function processChildrenWithCitations(children: ReactNode): ReactNode {
if (typeof children === "string") { if (typeof children === "string") {
const parsed = parseTextWithCitations(children); const parsed = parseTextWithCitations(children);
return parsed.length === 1 && typeof parsed[0] === "string" ? children : <>{parsed}</>; return parsed.length === 1 && typeof parsed[0] === "string" ? children : parsed;
} }
if (Array.isArray(children)) { if (Array.isArray(children)) {
return children.map((child, index) => { return children.map((child) => {
if (typeof child === "string") { if (typeof child === "string") {
const parsed = parseTextWithCitations(child); const parsed = parseTextWithCitations(child);
return parsed.length === 1 && typeof parsed[0] === "string" ? ( return parsed.length === 1 && typeof parsed[0] === "string" ? (
child child
) : ( ) : (
<span key={index}>{parsed}</span> <span key={child}>{parsed}</span>
); );
} }
return child; return child;
@ -208,6 +209,56 @@ function processChildrenWithCitations(children: ReactNode): ReactNode {
return children; return children;
} }
function extractDomain(url: string): string {
try {
const parsed = new URL(url);
return parsed.hostname.replace(/^www\./, "");
} catch {
return "";
}
}
function MarkdownImage({ src, alt }: { src?: string; alt?: string }) {
if (!src) return null;
const domain = extractDomain(src);
return (
<div className="my-4 w-fit max-w-lg overflow-hidden rounded-2xl border bg-muted/30 select-none">
<ImageRoot variant="ghost" size="full">
<ImageZoom src={src} alt={alt || "Image"}>
<ImagePreview
src={src}
alt={alt || "Image"}
className="max-h-[20rem] w-auto max-w-full object-contain"
/>
</ImageZoom>
</ImageRoot>
<div className="flex items-center justify-between px-5 py-3">
<div className="min-w-0 flex-1">
{alt && alt !== "Image" && (
<p className="text-sm font-semibold text-foreground line-clamp-2">{alt}</p>
)}
{domain && (
<p className="text-xs text-muted-foreground mt-0.5 truncate">{domain}</p>
)}
</div>
<a
href={src}
target="_blank"
rel="noopener noreferrer"
className="ml-3 shrink-0 inline-flex items-center gap-1.5 rounded-lg border px-3 py-1.5 text-xs font-medium text-foreground transition-colors hover:bg-muted"
onClick={(e) => e.stopPropagation()}
>
Open
<ExternalLinkIcon className="size-3" />
</a>
</div>
</div>
);
}
const defaultComponents = memoizeMarkdownComponents({ const defaultComponents = memoizeMarkdownComponents({
h1: ({ className, children, ...props }) => ( h1: ({ className, children, ...props }) => (
<h1 <h1
@ -371,5 +422,6 @@ const defaultComponents = memoizeMarkdownComponents({
{processChildrenWithCitations(children)} {processChildrenWithCitations(children)}
</em> </em>
), ),
img: ({ src, alt }) => <MarkdownImage src={typeof src === "string" ? src : undefined} alt={alt} />,
CodeHeader, CodeHeader,
}); });

View file

@ -1058,7 +1058,7 @@ const TOOL_GROUPS: ToolGroup[] = [
}, },
{ {
label: "Generate", label: "Generate",
tools: ["generate_podcast", "generate_video_presentation", "generate_report", "generate_image", "display_image"], tools: ["generate_podcast", "generate_video_presentation", "generate_report", "generate_image"],
}, },
{ {
label: "Memory", label: "Memory",

View file

@ -13,6 +13,7 @@ import { MarkdownText } from "@/components/assistant-ui/markdown-text";
import { ToolFallback } from "@/components/assistant-ui/tool-fallback"; import { ToolFallback } from "@/components/assistant-ui/tool-fallback";
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; import { DisplayImageToolUI } from "@/components/tool-ui/display-image";
import { GenerateImageToolUI } from "@/components/tool-ui/generate-image";
import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
import { GenerateReportToolUI } from "@/components/tool-ui/generate-report"; import { GenerateReportToolUI } from "@/components/tool-ui/generate-report";
import { GenerateVideoPresentationToolUI } from "@/components/tool-ui/video-presentation"; import { GenerateVideoPresentationToolUI } from "@/components/tool-ui/video-presentation";
@ -152,6 +153,7 @@ const PublicAssistantMessage: FC = () => {
generate_video_presentation: GenerateVideoPresentationToolUI, generate_video_presentation: GenerateVideoPresentationToolUI,
link_preview: LinkPreviewToolUI, link_preview: LinkPreviewToolUI,
display_image: DisplayImageToolUI, display_image: DisplayImageToolUI,
generate_image: GenerateImageToolUI,
scrape_webpage: ScrapeWebpageToolUI, scrape_webpage: ScrapeWebpageToolUI,
}, },
Fallback: ToolFallback, Fallback: ToolFallback,

View file

@ -23,6 +23,13 @@ export {
DisplayImageResultSchema, DisplayImageResultSchema,
DisplayImageToolUI, DisplayImageToolUI,
} from "./display-image"; } from "./display-image";
export {
type GenerateImageArgs,
GenerateImageArgsSchema,
type GenerateImageResult,
GenerateImageResultSchema,
GenerateImageToolUI,
} from "./generate-image";
export { GeneratePodcastToolUI } from "./generate-podcast"; export { GeneratePodcastToolUI } from "./generate-podcast";
export { GenerateReportToolUI } from "./generate-report"; export { GenerateReportToolUI } from "./generate-report";
export { GenerateVideoPresentationToolUI } from "./video-presentation"; export { GenerateVideoPresentationToolUI } from "./video-presentation";

View file

@ -5,7 +5,6 @@ import {
FileText, FileText,
Film, Film,
Globe, Globe,
ImageIcon,
Link2, Link2,
type LucideIcon, type LucideIcon,
Podcast, Podcast,
@ -20,7 +19,6 @@ const TOOL_ICONS: Record<string, LucideIcon> = {
generate_video_presentation: Film, generate_video_presentation: Film,
generate_report: FileText, generate_report: FileText,
link_preview: Link2, link_preview: Link2,
display_image: ImageIcon,
generate_image: Sparkles, generate_image: Sparkles,
scrape_webpage: ScanLine, scrape_webpage: ScanLine,
web_search: Globe, web_search: Globe,