mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
try(hotpatch): add autoscaling command
This commit is contained in:
parent
8fb5a7fb8f
commit
6f92eac3da
3 changed files with 93 additions and 3 deletions
|
|
@ -122,8 +122,52 @@ global_llm_configs:
|
||||||
use_default_system_instructions: false
|
use_default_system_instructions: false
|
||||||
citations_enabled: true
|
citations_enabled: true
|
||||||
|
|
||||||
# Example: Groq - Fast inference
|
# Example: Azure OpenAI GPT-4o
|
||||||
|
# IMPORTANT: For Azure deployments, always include 'base_model' in litellm_params
|
||||||
|
# to enable accurate token counting, cost tracking, and max token limits
|
||||||
- id: -5
|
- id: -5
|
||||||
|
name: "Global Azure GPT-4o"
|
||||||
|
description: "Azure OpenAI GPT-4o deployment"
|
||||||
|
provider: "AZURE"
|
||||||
|
# model_name format for Azure: azure/<your-deployment-name>
|
||||||
|
model_name: "azure/gpt-4o-deployment"
|
||||||
|
api_key: "your-azure-api-key-here"
|
||||||
|
api_base: "https://your-resource.openai.azure.com"
|
||||||
|
api_version: "2024-02-15-preview" # Azure API version
|
||||||
|
rpm: 1000
|
||||||
|
tpm: 150000
|
||||||
|
litellm_params:
|
||||||
|
temperature: 0.7
|
||||||
|
max_tokens: 4000
|
||||||
|
# REQUIRED for Azure: Specify the underlying OpenAI model
|
||||||
|
# This fixes "Could not identify azure model" warnings
|
||||||
|
# Common base_model values: gpt-4, gpt-4-turbo, gpt-4o, gpt-4o-mini, gpt-3.5-turbo
|
||||||
|
base_model: "gpt-4o"
|
||||||
|
system_instructions: ""
|
||||||
|
use_default_system_instructions: true
|
||||||
|
citations_enabled: true
|
||||||
|
|
||||||
|
# Example: Azure OpenAI GPT-4 Turbo
|
||||||
|
- id: -6
|
||||||
|
name: "Global Azure GPT-4 Turbo"
|
||||||
|
description: "Azure OpenAI GPT-4 Turbo deployment"
|
||||||
|
provider: "AZURE"
|
||||||
|
model_name: "azure/gpt-4-turbo-deployment"
|
||||||
|
api_key: "your-azure-api-key-here"
|
||||||
|
api_base: "https://your-resource.openai.azure.com"
|
||||||
|
api_version: "2024-02-15-preview"
|
||||||
|
rpm: 500
|
||||||
|
tpm: 100000
|
||||||
|
litellm_params:
|
||||||
|
temperature: 0.7
|
||||||
|
max_tokens: 4000
|
||||||
|
base_model: "gpt-4-turbo" # Maps to gpt-4-turbo-preview
|
||||||
|
system_instructions: ""
|
||||||
|
use_default_system_instructions: true
|
||||||
|
citations_enabled: true
|
||||||
|
|
||||||
|
# Example: Groq - Fast inference
|
||||||
|
- id: -7
|
||||||
name: "Global Groq Llama 3"
|
name: "Global Groq Llama 3"
|
||||||
description: "Ultra-fast Llama 3 70B via Groq"
|
description: "Ultra-fast Llama 3 70B via Groq"
|
||||||
provider: "GROQ"
|
provider: "GROQ"
|
||||||
|
|
@ -150,3 +194,11 @@ global_llm_configs:
|
||||||
# - All standard LiteLLM providers are supported
|
# - All standard LiteLLM providers are supported
|
||||||
# - rpm/tpm: Optional rate limits for load balancing (requests/tokens per minute)
|
# - rpm/tpm: Optional rate limits for load balancing (requests/tokens per minute)
|
||||||
# These help the router distribute load evenly and avoid rate limit errors
|
# These help the router distribute load evenly and avoid rate limit errors
|
||||||
|
#
|
||||||
|
# AZURE-SPECIFIC NOTES:
|
||||||
|
# - Always add 'base_model' in litellm_params for Azure deployments
|
||||||
|
# - This fixes "Could not identify azure model 'X'" warnings
|
||||||
|
# - base_model should match the underlying OpenAI model (e.g., gpt-4o, gpt-4-turbo, gpt-3.5-turbo)
|
||||||
|
# - model_name format: "azure/<your-deployment-name>"
|
||||||
|
# - api_version: Use a recent Azure API version (e.g., "2024-02-15-preview")
|
||||||
|
# - See: https://docs.litellm.ai/docs/proxy/cost_tracking#spend-tracking-for-azure-openai-models
|
||||||
|
|
|
||||||
|
|
@ -323,6 +323,28 @@ def process_file_upload_task(
|
||||||
user_id: ID of the user
|
user_id: ID of the user
|
||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import os
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"[process_file_upload] Task started - file: {filename}, "
|
||||||
|
f"search_space_id: {search_space_id}, user_id: {user_id}"
|
||||||
|
)
|
||||||
|
logger.info(f"[process_file_upload] File path: {file_path}")
|
||||||
|
|
||||||
|
# Check if file exists and is accessible
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
logger.error(
|
||||||
|
f"[process_file_upload] File does not exist: {file_path}. "
|
||||||
|
"The temp file may have been cleaned up before the task ran."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_size = os.path.getsize(file_path)
|
||||||
|
logger.info(f"[process_file_upload] File size: {file_size} bytes")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[process_file_upload] Could not get file size: {e}")
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
asyncio.set_event_loop(loop)
|
asyncio.set_event_loop(loop)
|
||||||
|
|
@ -331,6 +353,13 @@ def process_file_upload_task(
|
||||||
loop.run_until_complete(
|
loop.run_until_complete(
|
||||||
_process_file_upload(file_path, filename, search_space_id, user_id)
|
_process_file_upload(file_path, filename, search_space_id, user_id)
|
||||||
)
|
)
|
||||||
|
logger.info(f"[process_file_upload] Task completed successfully for: {filename}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"[process_file_upload] Task failed for {filename}: {e}\n"
|
||||||
|
f"Traceback:\n{traceback.format_exc()}"
|
||||||
|
)
|
||||||
|
raise
|
||||||
finally:
|
finally:
|
||||||
loop.close()
|
loop.close()
|
||||||
|
|
||||||
|
|
@ -343,16 +372,22 @@ async def _process_file_upload(
|
||||||
|
|
||||||
from app.tasks.document_processors.file_processors import process_file_in_background
|
from app.tasks.document_processors.file_processors import process_file_in_background
|
||||||
|
|
||||||
|
logger.info(f"[_process_file_upload] Starting async processing for: {filename}")
|
||||||
|
|
||||||
async with get_celery_session_maker()() as session:
|
async with get_celery_session_maker()() as session:
|
||||||
|
logger.info(f"[_process_file_upload] Database session created for: {filename}")
|
||||||
task_logger = TaskLoggingService(session, search_space_id)
|
task_logger = TaskLoggingService(session, search_space_id)
|
||||||
|
|
||||||
# Get file size for notification metadata
|
# Get file size for notification metadata
|
||||||
try:
|
try:
|
||||||
file_size = os.path.getsize(file_path)
|
file_size = os.path.getsize(file_path)
|
||||||
except Exception:
|
logger.info(f"[_process_file_upload] File size: {file_size} bytes")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[_process_file_upload] Could not get file size: {e}")
|
||||||
file_size = None
|
file_size = None
|
||||||
|
|
||||||
# Create notification for document processing
|
# Create notification for document processing
|
||||||
|
logger.info(f"[_process_file_upload] Creating notification for: {filename}")
|
||||||
notification = (
|
notification = (
|
||||||
await NotificationService.document_processing.notify_processing_started(
|
await NotificationService.document_processing.notify_processing_started(
|
||||||
session=session,
|
session=session,
|
||||||
|
|
@ -363,6 +398,9 @@ async def _process_file_upload(
|
||||||
file_size=file_size,
|
file_size=file_size,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
logger.info(
|
||||||
|
f"[_process_file_upload] Notification created with ID: {notification.id if notification else 'None'}"
|
||||||
|
)
|
||||||
|
|
||||||
log_entry = await task_logger.log_task_start(
|
log_entry = await task_logger.log_task_start(
|
||||||
task_name="process_file_upload",
|
task_name="process_file_upload",
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ backend_pid=$!
|
||||||
sleep 5
|
sleep 5
|
||||||
|
|
||||||
echo "Starting Celery Worker..."
|
echo "Starting Celery Worker..."
|
||||||
celery -A app.celery_app worker --loglevel=info &
|
celery -A app.celery_app worker --loglevel=info --autoscale=64,4 &
|
||||||
celery_worker_pid=$!
|
celery_worker_pid=$!
|
||||||
|
|
||||||
# Wait a bit for worker to initialize
|
# Wait a bit for worker to initialize
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue