feat: introduce display image tool for enhanced image rendering in chat with metadata support

2026-04-26 01:06:23 +02:00 · 2025-12-23 01:11:56 +05:30 · 2025-12-23 01:11:56 +05:30 · da7cb81252
commit da7cb81252
parent 4b69fdf214
8 changed files with 709 additions and 1 deletions
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -298,6 +298,27 @@ async def stream_new_chat(
                        status="in_progress",
                        items=last_active_step_items,
                    )
+                elif tool_name == "display_image":
+                    src = (
+                        tool_input.get("src", "")
+                        if isinstance(tool_input, dict)
+                        else str(tool_input)
+                    )
+                    title = (
+                        tool_input.get("title", "")
+                        if isinstance(tool_input, dict)
+                        else ""
+                    )
+                    last_active_step_title = "Displaying image"
+                    last_active_step_items = [
+                        f"Image: {title[:50] if title else src[:50]}{'...' if len(title or src) > 50 else ''}"
+                    ]
+                    yield streaming_service.format_thinking_step(
+                        step_id=tool_step_id,
+                        title="Displaying image",
+                        status="in_progress",
+                        items=last_active_step_items,
+                    )
                elif tool_name == "generate_podcast":
                    podcast_title = (
                        tool_input.get("podcast_title", "SurfSense Podcast")
@ -367,6 +388,16 @@ async def stream_new_chat(
                        f"Fetching link preview: {url[:80]}{'...' if len(url) > 80 else ''}",
                        "info",
                    )
+                elif tool_name == "display_image":
+                    src = (
+                        tool_input.get("src", "")
+                        if isinstance(tool_input, dict)
+                        else str(tool_input)
+                    )
+                    yield streaming_service.format_terminal_info(
+                        f"Displaying image: {src[:60]}{'...' if len(src) > 60 else ''}",
+                        "info",
+                    )
                elif tool_name == "generate_podcast":
                    title = (
                        tool_input.get("podcast_title", "SurfSense Podcast")
@ -453,6 +484,24 @@ async def stream_new_chat(
                        status="completed",
                        items=completed_items,
                    )
+                elif tool_name == "display_image":
+                    # Build completion items for image display
+                    if isinstance(tool_output, dict):
+                        title = tool_output.get("title", "")
+                        alt = tool_output.get("alt", "Image")
+                        display_name = title or alt
+                        completed_items = [
+                            *last_active_step_items,
+                            f"Showing: {display_name[:50]}{'...' if len(display_name) > 50 else ''}",
+                        ]
+                    else:
+                        completed_items = [*last_active_step_items, "Image displayed"]
+                    yield streaming_service.format_thinking_step(
+                        step_id=original_step_id,
+                        title="Displaying image",
+                        status="completed",
+                        items=completed_items,
+                    )
                elif tool_name == "generate_podcast":
                    # Build detailed completion items based on podcast status
                    podcast_status = (
@ -566,6 +615,21 @@ async def stream_new_chat(
                            f"Link preview failed: {error_msg}",
                            "error",
                        )
+                elif tool_name == "display_image":
+                    # Stream the full image result so frontend can render the Image component
+                    yield streaming_service.format_tool_output_available(
+                        tool_call_id,
+                        tool_output
+                        if isinstance(tool_output, dict)
+                        else {"result": tool_output},
+                    )
+                    # Send terminal message
+                    if isinstance(tool_output, dict):
+                        title = tool_output.get("title") or tool_output.get("alt", "Image")
+                        yield streaming_service.format_terminal_info(
+                            f"Image displayed: {title[:40]}{'...' if len(title) > 40 else ''}",
+                            "success",
+                        )
                elif tool_name == "search_knowledge_base":
                    # Don't stream the full output for search (can be very large), just acknowledge
                    yield streaming_service.format_tool_output_available(