chore: made generate_image more agnostic

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-05 17:18:27 -08:00
parent 19e2857343
commit f85adefe5e
15 changed files with 176 additions and 112 deletions

View file

@ -106,8 +106,6 @@ You have access to the following tools:
- Trigger phrases: "generate an image of", "create a picture of", "draw me", "make an image", "design a logo", "create artwork"
- Args:
- prompt: A detailed text description of the image to generate. Be specific about subject, style, colors, composition, and mood.
- size: Image size. Options: "1024x1024" (square, default), "1536x1024" (landscape), "1024x1536" (portrait), "1792x1024" (wide)
- quality: Image quality. Options: "auto" (default), "high", "medium", "low"
- n: Number of images to generate (1-4, default: 1)
- Returns: A dictionary with the generated image URL in the "src" field, along with metadata.
- CRITICAL: After calling generate_image, you MUST call `display_image` with the returned "src" URL
@ -300,19 +298,19 @@ You have access to the following tools:
- Then provide your explanation, referencing the displayed image
- User: "Generate an image of a cat"
- Step 1: `generate_image(prompt="A fluffy orange tabby cat sitting on a windowsill, bathed in warm golden sunlight, soft bokeh background with green houseplants, photorealistic style, cozy atmosphere", size="1024x1024", quality="auto")`
- Step 1: `generate_image(prompt="A fluffy orange tabby cat sitting on a windowsill, bathed in warm golden sunlight, soft bokeh background with green houseplants, photorealistic style, cozy atmosphere")`
- Step 2: Use the returned "src" URL to display it: `display_image(src="<returned_url>", alt="A fluffy orange tabby cat on a windowsill", title="Generated Image")`
- User: "Create a landscape painting of mountains"
- Step 1: `generate_image(prompt="Majestic snow-capped mountain range at sunset, dramatic orange and purple sky, alpine meadow with wildflowers in the foreground, oil painting style with visible brushstrokes, inspired by the Hudson River School art movement", size="1536x1024", quality="high")`
- Step 1: `generate_image(prompt="Majestic snow-capped mountain range at sunset, dramatic orange and purple sky, alpine meadow with wildflowers in the foreground, oil painting style with visible brushstrokes, inspired by the Hudson River School art movement")`
- Step 2: `display_image(src="<returned_url>", alt="Mountain landscape painting", title="Generated Image")`
- User: "Draw me a logo for a coffee shop called Bean Dream"
- Step 1: `generate_image(prompt="Minimalist modern logo design for a coffee shop called 'Bean Dream', featuring a stylized coffee bean with dream-like swirls of steam, clean vector style, warm brown and cream color palette, white background, professional branding", size="1024x1024", quality="high")`
- Step 1: `generate_image(prompt="Minimalist modern logo design for a coffee shop called 'Bean Dream', featuring a stylized coffee bean with dream-like swirls of steam, clean vector style, warm brown and cream color palette, white background, professional branding")`
- Step 2: `display_image(src="<returned_url>", alt="Bean Dream coffee shop logo", title="Generated Image")`
- User: "Make a wide banner image for my blog about AI"
- Step 1: `generate_image(prompt="Wide banner illustration for an AI technology blog, featuring abstract neural network patterns, glowing blue and purple connections, modern futuristic aesthetic, digital art style, clean and professional", size="1792x1024", quality="high")`
- Step 1: `generate_image(prompt="Wide banner illustration for an AI technology blog, featuring abstract neural network patterns, glowing blue and purple connections, modern futuristic aesthetic, digital art style, clean and professional")`
- Step 2: `display_image(src="<returned_url>", alt="AI blog banner", title="Generated Image")`
</tool_call_examples>
"""

View file

@ -21,12 +21,12 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.db import ImageGeneration, ImageGenerationConfig, SearchSpace
from app.utils.signed_image_urls import generate_image_token
from app.services.image_gen_router_service import (
IMAGE_GEN_AUTO_MODE_ID,
ImageGenRouterService,
is_image_gen_auto_mode,
)
from app.utils.signed_image_urls import generate_image_token
logger = logging.getLogger(__name__)
@ -76,8 +76,6 @@ def create_generate_image_tool(
@tool
async def generate_image(
prompt: str,
size: str = "1024x1024",
quality: str = "auto",
n: int = 1,
) -> dict[str, Any]:
"""
@ -89,10 +87,6 @@ def create_generate_image_tool(
Args:
prompt: A detailed text description of the image to generate.
Be specific about subject, style, colors, composition, and mood.
size: Image size. Options: "1024x1024" (square), "1536x1024" (landscape),
"1024x1536" (portrait), "1792x1024" (wide). Default: "1024x1024"
quality: Image quality. Options: "auto" (default), "high", "medium", "low".
Default: "auto"
n: Number of images to generate (1-4). Default: 1
Returns:
@ -112,18 +106,14 @@ def create_generate_image_tool(
)
# Build generation kwargs
# NOTE: 'style' is intentionally excluded from gen_kwargs because
# it is only supported by DALL-E 3 and causes errors with other
# models (e.g. gpt-image-1 rejects it as an unknown parameter).
# Since we can't predict which model auto-mode will route to,
# it's safest to omit it.
# NOTE: size, quality, and style are intentionally NOT passed.
# Different models support different values for these params
# (e.g. DALL-E 3 wants "hd"/"standard" for quality while
# gpt-image-1 wants "high"/"medium"/"low"; size options also
# differ). Letting the model use its own defaults avoids errors.
gen_kwargs: dict[str, Any] = {}
if n is not None and n > 1:
gen_kwargs["n"] = n
if quality:
gen_kwargs["quality"] = quality
if size:
gen_kwargs["size"] = size
# Call litellm based on config type
if is_image_gen_auto_mode(config_id):
@ -199,8 +189,6 @@ def create_generate_image_tool(
prompt=prompt,
model=getattr(response, "_hidden_params", {}).get("model"),
n=n,
quality=quality,
size=size,
image_generation_config_id=config_id,
response_data=response_dict,
search_space_id=search_space_id,