mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-01 20:03:30 +02:00
Merge remote-tracking branch 'upstream/dev' into feat/azure-ocr
This commit is contained in:
commit
6038f6dfc0
84 changed files with 6041 additions and 1065 deletions
|
|
@ -102,6 +102,44 @@ def load_global_image_gen_configs():
|
|||
return []
|
||||
|
||||
|
||||
def load_global_vision_llm_configs():
|
||||
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
|
||||
|
||||
if not global_config_file.exists():
|
||||
return []
|
||||
|
||||
try:
|
||||
with open(global_config_file, encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
return data.get("global_vision_llm_configs", [])
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to load global vision LLM configs: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def load_vision_llm_router_settings():
|
||||
default_settings = {
|
||||
"routing_strategy": "usage-based-routing",
|
||||
"num_retries": 3,
|
||||
"allowed_fails": 3,
|
||||
"cooldown_time": 60,
|
||||
}
|
||||
|
||||
global_config_file = BASE_DIR / "app" / "config" / "global_llm_config.yaml"
|
||||
|
||||
if not global_config_file.exists():
|
||||
return default_settings
|
||||
|
||||
try:
|
||||
with open(global_config_file, encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
settings = data.get("vision_llm_router_settings", {})
|
||||
return {**default_settings, **settings}
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to load vision LLM router settings: {e}")
|
||||
return default_settings
|
||||
|
||||
|
||||
def load_image_gen_router_settings():
|
||||
"""
|
||||
Load router settings for image generation Auto mode from YAML file.
|
||||
|
|
@ -182,6 +220,29 @@ def initialize_image_gen_router():
|
|||
print(f"Warning: Failed to initialize Image Generation Router: {e}")
|
||||
|
||||
|
||||
def initialize_vision_llm_router():
|
||||
vision_configs = load_global_vision_llm_configs()
|
||||
router_settings = load_vision_llm_router_settings()
|
||||
|
||||
if not vision_configs:
|
||||
print(
|
||||
"Info: No global vision LLM configs found, "
|
||||
"Vision LLM Auto mode will not be available"
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
from app.services.vision_llm_router_service import VisionLLMRouterService
|
||||
|
||||
VisionLLMRouterService.initialize(vision_configs, router_settings)
|
||||
print(
|
||||
f"Info: Vision LLM Router initialized with {len(vision_configs)} models "
|
||||
f"(strategy: {router_settings.get('routing_strategy', 'usage-based-routing')})"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to initialize Vision LLM Router: {e}")
|
||||
|
||||
|
||||
class Config:
|
||||
# Check if ffmpeg is installed
|
||||
if not is_ffmpeg_installed():
|
||||
|
|
@ -335,6 +396,12 @@ class Config:
|
|||
# Router settings for Image Generation Auto mode
|
||||
IMAGE_GEN_ROUTER_SETTINGS = load_image_gen_router_settings()
|
||||
|
||||
# Global Vision LLM Configurations (optional)
|
||||
GLOBAL_VISION_LLM_CONFIGS = load_global_vision_llm_configs()
|
||||
|
||||
# Router settings for Vision LLM Auto mode
|
||||
VISION_LLM_ROUTER_SETTINGS = load_vision_llm_router_settings()
|
||||
|
||||
# Chonkie Configuration | Edit this to your needs
|
||||
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
|
||||
# Azure OpenAI credentials from environment variables
|
||||
|
|
|
|||
|
|
@ -263,6 +263,82 @@ global_image_generation_configs:
|
|||
# rpm: 30
|
||||
# litellm_params: {}
|
||||
|
||||
# =============================================================================
|
||||
# Vision LLM Configuration
|
||||
# =============================================================================
|
||||
# These configurations power the vision autocomplete feature (screenshot analysis).
|
||||
# Only vision-capable models should be used here (e.g. GPT-4o, Gemini Pro, Claude 3).
|
||||
# Supported providers: OpenAI, Anthropic, Google, Azure OpenAI, Vertex AI, Bedrock,
|
||||
# xAI, OpenRouter, Ollama, Groq, Together AI, Fireworks AI, DeepSeek, Mistral, Custom
|
||||
#
|
||||
# Auto mode (ID 0) uses LiteLLM Router for load balancing across all vision configs.
|
||||
|
||||
# Router Settings for Vision LLM Auto Mode
|
||||
vision_llm_router_settings:
|
||||
routing_strategy: "usage-based-routing"
|
||||
num_retries: 3
|
||||
allowed_fails: 3
|
||||
cooldown_time: 60
|
||||
|
||||
global_vision_llm_configs:
|
||||
# Example: OpenAI GPT-4o (recommended for vision)
|
||||
- id: -1
|
||||
name: "Global GPT-4o Vision"
|
||||
description: "OpenAI's GPT-4o with strong vision capabilities"
|
||||
provider: "OPENAI"
|
||||
model_name: "gpt-4o"
|
||||
api_key: "sk-your-openai-api-key-here"
|
||||
api_base: ""
|
||||
rpm: 500
|
||||
tpm: 100000
|
||||
litellm_params:
|
||||
temperature: 0.3
|
||||
max_tokens: 1000
|
||||
|
||||
# Example: Google Gemini 2.0 Flash
|
||||
- id: -2
|
||||
name: "Global Gemini 2.0 Flash"
|
||||
description: "Google's fast vision model with large context"
|
||||
provider: "GOOGLE"
|
||||
model_name: "gemini-2.0-flash"
|
||||
api_key: "your-google-ai-api-key-here"
|
||||
api_base: ""
|
||||
rpm: 1000
|
||||
tpm: 200000
|
||||
litellm_params:
|
||||
temperature: 0.3
|
||||
max_tokens: 1000
|
||||
|
||||
# Example: Anthropic Claude 3.5 Sonnet
|
||||
- id: -3
|
||||
name: "Global Claude 3.5 Sonnet Vision"
|
||||
description: "Anthropic's Claude 3.5 Sonnet with vision support"
|
||||
provider: "ANTHROPIC"
|
||||
model_name: "claude-3-5-sonnet-20241022"
|
||||
api_key: "sk-ant-your-anthropic-api-key-here"
|
||||
api_base: ""
|
||||
rpm: 1000
|
||||
tpm: 100000
|
||||
litellm_params:
|
||||
temperature: 0.3
|
||||
max_tokens: 1000
|
||||
|
||||
# Example: Azure OpenAI GPT-4o
|
||||
# - id: -4
|
||||
# name: "Global Azure GPT-4o Vision"
|
||||
# description: "Azure-hosted GPT-4o for vision analysis"
|
||||
# provider: "AZURE_OPENAI"
|
||||
# model_name: "azure/gpt-4o-deployment"
|
||||
# api_key: "your-azure-api-key-here"
|
||||
# api_base: "https://your-resource.openai.azure.com"
|
||||
# api_version: "2024-02-15-preview"
|
||||
# rpm: 500
|
||||
# tpm: 100000
|
||||
# litellm_params:
|
||||
# temperature: 0.3
|
||||
# max_tokens: 1000
|
||||
# base_model: "gpt-4o"
|
||||
|
||||
# Notes:
|
||||
# - ID 0 is reserved for "Auto" mode - uses LiteLLM Router for load balancing
|
||||
# - Use negative IDs to distinguish global configs from user configs (NewLLMConfig in DB)
|
||||
|
|
@ -283,3 +359,9 @@ global_image_generation_configs:
|
|||
# - The router uses litellm.aimage_generation() for async image generation
|
||||
# - Only RPM (requests per minute) is relevant for image generation rate limiting.
|
||||
# TPM (tokens per minute) does not apply since image APIs are billed/rate-limited per request, not per token.
|
||||
#
|
||||
# VISION LLM NOTES:
|
||||
# - Vision configs use the same ID scheme (negative for global, positive for user DB)
|
||||
# - Only use vision-capable models (GPT-4o, Gemini, Claude 3, etc.)
|
||||
# - Lower temperature (0.3) is recommended for accurate screenshot analysis
|
||||
# - Lower max_tokens (1000) is sufficient since autocomplete produces short suggestions
|
||||
|
|
|
|||
23
surfsense_backend/app/config/vision_model_list_fallback.json
Normal file
23
surfsense_backend/app/config/vision_model_list_fallback.json
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
[
|
||||
{"value": "gpt-4o", "label": "GPT-4o", "provider": "OPENAI", "context_window": "128K"},
|
||||
{"value": "gpt-4o-mini", "label": "GPT-4o Mini", "provider": "OPENAI", "context_window": "128K"},
|
||||
{"value": "gpt-4-turbo", "label": "GPT-4 Turbo", "provider": "OPENAI", "context_window": "128K"},
|
||||
{"value": "claude-sonnet-4-20250514", "label": "Claude Sonnet 4", "provider": "ANTHROPIC", "context_window": "200K"},
|
||||
{"value": "claude-3-7-sonnet-20250219", "label": "Claude 3.7 Sonnet", "provider": "ANTHROPIC", "context_window": "200K"},
|
||||
{"value": "claude-3-5-sonnet-20241022", "label": "Claude 3.5 Sonnet", "provider": "ANTHROPIC", "context_window": "200K"},
|
||||
{"value": "claude-3-opus-20240229", "label": "Claude 3 Opus", "provider": "ANTHROPIC", "context_window": "200K"},
|
||||
{"value": "claude-3-haiku-20240307", "label": "Claude 3 Haiku", "provider": "ANTHROPIC", "context_window": "200K"},
|
||||
{"value": "gemini-2.5-flash", "label": "Gemini 2.5 Flash", "provider": "GOOGLE", "context_window": "1M"},
|
||||
{"value": "gemini-2.5-pro", "label": "Gemini 2.5 Pro", "provider": "GOOGLE", "context_window": "1M"},
|
||||
{"value": "gemini-2.0-flash", "label": "Gemini 2.0 Flash", "provider": "GOOGLE", "context_window": "1M"},
|
||||
{"value": "gemini-1.5-pro", "label": "Gemini 1.5 Pro", "provider": "GOOGLE", "context_window": "1M"},
|
||||
{"value": "gemini-1.5-flash", "label": "Gemini 1.5 Flash", "provider": "GOOGLE", "context_window": "1M"},
|
||||
{"value": "pixtral-large-latest", "label": "Pixtral Large", "provider": "MISTRAL", "context_window": "128K"},
|
||||
{"value": "pixtral-12b-2409", "label": "Pixtral 12B", "provider": "MISTRAL", "context_window": "128K"},
|
||||
{"value": "grok-2-vision-1212", "label": "Grok 2 Vision", "provider": "XAI", "context_window": "32K"},
|
||||
{"value": "llava", "label": "LLaVA", "provider": "OLLAMA"},
|
||||
{"value": "bakllava", "label": "BakLLaVA", "provider": "OLLAMA"},
|
||||
{"value": "llava-llama3", "label": "LLaVA Llama 3", "provider": "OLLAMA"},
|
||||
{"value": "llama-4-scout-17b-16e-instruct", "label": "Llama 4 Scout 17B", "provider": "GROQ", "context_window": "128K"},
|
||||
{"value": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "label": "Llama 4 Scout 17B", "provider": "TOGETHER_AI", "context_window": "128K"}
|
||||
]
|
||||
Loading…
Add table
Add a link
Reference in a new issue