Merge remote-tracking branch 'upstream/dev' into feat/azure-ocr

This commit is contained in:
Anish Sarkar 2026-04-08 05:00:32 +05:30
commit 6038f6dfc0
84 changed files with 6041 additions and 1065 deletions

View file

@ -102,6 +102,44 @@ def load_global_image_gen_configs():
return []
def load_global_vision_llm_configs():
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
if not global_config_file.exists():
return []
try:
with open(global_config_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
return data.get("global_vision_llm_configs", [])
except Exception as e:
print(f"Warning: Failed to load global vision LLM configs: {e}")
return []
def load_vision_llm_router_settings():
default_settings = {
"routing_strategy": "usage-based-routing",
"num_retries": 3,
"allowed_fails": 3,
"cooldown_time": 60,
}
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
if not global_config_file.exists():
return default_settings
try:
with open(global_config_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
settings = data.get("vision_llm_router_settings", {})
return {**default_settings, **settings}
except Exception as e:
print(f"Warning: Failed to load vision LLM router settings: {e}")
return default_settings
def load_image_gen_router_settings():
"""
Load router settings for image generation Auto mode from YAML file.
@ -182,6 +220,29 @@ def initialize_image_gen_router():
print(f"Warning: Failed to initialize Image Generation Router: {e}")
def initialize_vision_llm_router():
vision_configs = load_global_vision_llm_configs()
router_settings = load_vision_llm_router_settings()
if not vision_configs:
print(
"Info: No global vision LLM configs found, "
"Vision LLM Auto mode will not be available"
)
return
try:
from app.services.vision_llm_router_service import VisionLLMRouterService
VisionLLMRouterService.initialize(vision_configs, router_settings)
print(
f"Info: Vision LLM Router initialized with {len(vision_configs)} models "
f"(strategy: {router_settings.get('routing_strategy', 'usage-based-routing')})"
)
except Exception as e:
print(f"Warning: Failed to initialize Vision LLM Router: {e}")
class Config:
# Check if ffmpeg is installed
if not is_ffmpeg_installed():
@ -335,6 +396,12 @@ class Config:
# Router settings for Image Generation Auto mode
IMAGE_GEN_ROUTER_SETTINGS = load_image_gen_router_settings()
# Global Vision LLM Configurations (optional)
GLOBAL_VISION_LLM_CONFIGS = load_global_vision_llm_configs()
# Router settings for Vision LLM Auto mode
VISION_LLM_ROUTER_SETTINGS = load_vision_llm_router_settings()
# Chonkie Configuration | Edit this to your needs
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
# Azure OpenAI credentials from environment variables

View file

@ -263,6 +263,82 @@ global_image_generation_configs:
# rpm: 30
# litellm_params: {}
# =============================================================================
# Vision LLM Configuration
# =============================================================================
# These configurations power the vision autocomplete feature (screenshot analysis).
# Only vision-capable models should be used here (e.g. GPT-4o, Gemini Pro, Claude 3).
# Supported providers: OpenAI, Anthropic, Google, Azure OpenAI, Vertex AI, Bedrock,
# xAI, OpenRouter, Ollama, Groq, Together AI, Fireworks AI, DeepSeek, Mistral, Custom
#
# Auto mode (ID 0) uses LiteLLM Router for load balancing across all vision configs.
# Router Settings for Vision LLM Auto Mode
vision_llm_router_settings:
routing_strategy: "usage-based-routing"
num_retries: 3
allowed_fails: 3
cooldown_time: 60
global_vision_llm_configs:
# Example: OpenAI GPT-4o (recommended for vision)
- id: -1
name: "Global GPT-4o Vision"
description: "OpenAI's GPT-4o with strong vision capabilities"
provider: "OPENAI"
model_name: "gpt-4o"
api_key: "sk-your-openai-api-key-here"
api_base: ""
rpm: 500
tpm: 100000
litellm_params:
temperature: 0.3
max_tokens: 1000
# Example: Google Gemini 2.0 Flash
- id: -2
name: "Global Gemini 2.0 Flash"
description: "Google's fast vision model with large context"
provider: "GOOGLE"
model_name: "gemini-2.0-flash"
api_key: "your-google-ai-api-key-here"
api_base: ""
rpm: 1000
tpm: 200000
litellm_params:
temperature: 0.3
max_tokens: 1000
# Example: Anthropic Claude 3.5 Sonnet
- id: -3
name: "Global Claude 3.5 Sonnet Vision"
description: "Anthropic's Claude 3.5 Sonnet with vision support"
provider: "ANTHROPIC"
model_name: "claude-3-5-sonnet-20241022"
api_key: "sk-ant-your-anthropic-api-key-here"
api_base: ""
rpm: 1000
tpm: 100000
litellm_params:
temperature: 0.3
max_tokens: 1000
# Example: Azure OpenAI GPT-4o
# - id: -4
# name: "Global Azure GPT-4o Vision"
# description: "Azure-hosted GPT-4o for vision analysis"
# provider: "AZURE_OPENAI"
# model_name: "azure/gpt-4o-deployment"
# api_key: "your-azure-api-key-here"
# api_base: "https://your-resource.openai.azure.com"
# api_version: "2024-02-15-preview"
# rpm: 500
# tpm: 100000
# litellm_params:
# temperature: 0.3
# max_tokens: 1000
# base_model: "gpt-4o"
# Notes:
# - ID 0 is reserved for "Auto" mode - uses LiteLLM Router for load balancing
# - Use negative IDs to distinguish global configs from user configs (NewLLMConfig in DB)
@ -283,3 +359,9 @@ global_image_generation_configs:
# - The router uses litellm.aimage_generation() for async image generation
# - Only RPM (requests per minute) is relevant for image generation rate limiting.
# TPM (tokens per minute) does not apply since image APIs are billed/rate-limited per request, not per token.
#
# VISION LLM NOTES:
# - Vision configs use the same ID scheme (negative for global, positive for user DB)
# - Only use vision-capable models (GPT-4o, Gemini, Claude 3, etc.)
# - Lower temperature (0.3) is recommended for accurate screenshot analysis
# - Lower max_tokens (1000) is sufficient since autocomplete produces short suggestions

View file

@ -0,0 +1,23 @@
[
{"value": "gpt-4o", "label": "GPT-4o", "provider": "OPENAI", "context_window": "128K"},
{"value": "gpt-4o-mini", "label": "GPT-4o Mini", "provider": "OPENAI", "context_window": "128K"},
{"value": "gpt-4-turbo", "label": "GPT-4 Turbo", "provider": "OPENAI", "context_window": "128K"},
{"value": "claude-sonnet-4-20250514", "label": "Claude Sonnet 4", "provider": "ANTHROPIC", "context_window": "200K"},
{"value": "claude-3-7-sonnet-20250219", "label": "Claude 3.7 Sonnet", "provider": "ANTHROPIC", "context_window": "200K"},
{"value": "claude-3-5-sonnet-20241022", "label": "Claude 3.5 Sonnet", "provider": "ANTHROPIC", "context_window": "200K"},
{"value": "claude-3-opus-20240229", "label": "Claude 3 Opus", "provider": "ANTHROPIC", "context_window": "200K"},
{"value": "claude-3-haiku-20240307", "label": "Claude 3 Haiku", "provider": "ANTHROPIC", "context_window": "200K"},
{"value": "gemini-2.5-flash", "label": "Gemini 2.5 Flash", "provider": "GOOGLE", "context_window": "1M"},
{"value": "gemini-2.5-pro", "label": "Gemini 2.5 Pro", "provider": "GOOGLE", "context_window": "1M"},
{"value": "gemini-2.0-flash", "label": "Gemini 2.0 Flash", "provider": "GOOGLE", "context_window": "1M"},
{"value": "gemini-1.5-pro", "label": "Gemini 1.5 Pro", "provider": "GOOGLE", "context_window": "1M"},
{"value": "gemini-1.5-flash", "label": "Gemini 1.5 Flash", "provider": "GOOGLE", "context_window": "1M"},
{"value": "pixtral-large-latest", "label": "Pixtral Large", "provider": "MISTRAL", "context_window": "128K"},
{"value": "pixtral-12b-2409", "label": "Pixtral 12B", "provider": "MISTRAL", "context_window": "128K"},
{"value": "grok-2-vision-1212", "label": "Grok 2 Vision", "provider": "XAI", "context_window": "32K"},
{"value": "llava", "label": "LLaVA", "provider": "OLLAMA"},
{"value": "bakllava", "label": "BakLLaVA", "provider": "OLLAMA"},
{"value": "llava-llama3", "label": "LLaVA Llama 3", "provider": "OLLAMA"},
{"value": "llama-4-scout-17b-16e-instruct", "label": "Llama 4 Scout 17B", "provider": "GROQ", "context_window": "128K"},
{"value": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "label": "Llama 4 Scout 17B", "provider": "TOGETHER_AI", "context_window": "128K"}
]