diff --git a/surfsense_backend/app/config/global_llm_config.example.yaml b/surfsense_backend/app/config/global_llm_config.example.yaml index e727b8d56..0bb00c398 100644 --- a/surfsense_backend/app/config/global_llm_config.example.yaml +++ b/surfsense_backend/app/config/global_llm_config.example.yaml @@ -208,8 +208,7 @@ global_image_generation_configs: model_name: "dall-e-3" api_key: "sk-your-openai-api-key-here" api_base: "" - rpm: 50 - tpm: 100000 + rpm: 50 # Requests per minute (image gen is rate-limited by RPM, not tokens) litellm_params: {} # Example: OpenAI GPT Image 1 @@ -221,7 +220,6 @@ global_image_generation_configs: api_key: "sk-your-openai-api-key-here" api_base: "" rpm: 50 - tpm: 100000 litellm_params: {} # Example: Azure OpenAI DALL-E 3 @@ -234,7 +232,6 @@ global_image_generation_configs: api_base: "https://your-resource.openai.azure.com" api_version: "2024-02-15-preview" rpm: 50 - tpm: 100000 litellm_params: base_model: "dall-e-3" @@ -247,7 +244,6 @@ global_image_generation_configs: # api_key: "your-openrouter-api-key-here" # api_base: "" # rpm: 30 - # tpm: 50000 # litellm_params: {} # Notes: @@ -262,17 +258,11 @@ global_image_generation_configs: # - rpm/tpm: Optional rate limits for load balancing (requests/tokens per minute) # These help the router distribute load evenly and avoid rate limit errors # -# AZURE-SPECIFIC NOTES: -# - Always add 'base_model' in litellm_params for Azure deployments -# - This fixes "Could not identify azure model 'X'" warnings -# - base_model should match the underlying OpenAI model (e.g., gpt-4o, gpt-4-turbo, gpt-3.5-turbo) -# - model_name format: "azure/" -# - api_version: Use a recent Azure API version (e.g., "2024-02-15-preview") -# - See: https://docs.litellm.ai/docs/proxy/cost_tracking#spend-tracking-for-azure-openai-models # # IMAGE GENERATION NOTES: # - Image generation configs use the same ID scheme as LLM configs (negative for global) # - Supported models: dall-e-2, dall-e-3, gpt-image-1 (OpenAI), azure/* (Azure), # bedrock/* (AWS), vertex_ai/* (Google), recraft/* (Recraft), openrouter/* (OpenRouter) # - The router uses litellm.aimage_generation() for async image generation -# - api_version is required for Azure image generation deployments +# - Only RPM (requests per minute) is relevant for image generation rate limiting. +# TPM (tokens per minute) does not apply since image APIs are billed/rate-limited per request, not per token. diff --git a/surfsense_backend/app/services/image_gen_router_service.py b/surfsense_backend/app/services/image_gen_router_service.py index eb6936efd..f45a6ab63 100644 --- a/surfsense_backend/app/services/image_gen_router_service.py +++ b/surfsense_backend/app/services/image_gen_router_service.py @@ -183,11 +183,11 @@ class ImageGenRouterService: "litellm_params": litellm_params, } - # Add rate limits from config if available + # Add RPM rate limit from config if available + # Note: TPM (tokens per minute) is not applicable for image generation + # since image APIs are rate-limited by requests, not tokens. if config.get("rpm"): deployment["rpm"] = config["rpm"] - if config.get("tpm"): - deployment["tpm"] = config["tpm"] return deployment @@ -219,10 +219,6 @@ class ImageGenRouterService: prompt: str, model: str = "auto", n: int | None = None, - quality: str | None = None, - size: str | None = None, - style: str | None = None, - response_format: str | None = None, timeout: int = 600, **kwargs, ) -> ImageResponse: @@ -232,16 +228,16 @@ class ImageGenRouterService: Uses Router.aimage_generation() which distributes requests across configured image generation deployments. + Parameters like size, quality, style, and response_format are intentionally + omitted to keep the interface model-agnostic. Providers use their own + sensible defaults. If needed, pass them via **kwargs. + Args: prompt: Text description of the desired image(s) model: Model alias (default "auto" for router routing) n: Number of images to generate - quality: Image quality setting - size: Image size - style: Style parameter - response_format: "url" or "b64_json" timeout: Request timeout in seconds - **kwargs: Additional litellm params + **kwargs: Additional provider-specific params (size, quality, etc.) Returns: ImageResponse from litellm @@ -264,14 +260,6 @@ class ImageGenRouterService: } if n is not None: gen_kwargs["n"] = n - if quality is not None: - gen_kwargs["quality"] = quality - if size is not None: - gen_kwargs["size"] = size - if style is not None: - gen_kwargs["style"] = style - if response_format is not None: - gen_kwargs["response_format"] = response_format gen_kwargs.update(kwargs) return await instance._router.aimage_generation(**gen_kwargs)