diff --git a/surfsense_backend/app/config/global_llm_config.example.yaml b/surfsense_backend/app/config/global_llm_config.example.yaml index 75ea238e3..9b213aafe 100644 --- a/surfsense_backend/app/config/global_llm_config.example.yaml +++ b/surfsense_backend/app/config/global_llm_config.example.yaml @@ -122,8 +122,52 @@ global_llm_configs: use_default_system_instructions: false citations_enabled: true - # Example: Groq - Fast inference + # Example: Azure OpenAI GPT-4o + # IMPORTANT: For Azure deployments, always include 'base_model' in litellm_params + # to enable accurate token counting, cost tracking, and max token limits - id: -5 + name: "Global Azure GPT-4o" + description: "Azure OpenAI GPT-4o deployment" + provider: "AZURE" + # model_name format for Azure: azure/ + model_name: "azure/gpt-4o-deployment" + api_key: "your-azure-api-key-here" + api_base: "https://your-resource.openai.azure.com" + api_version: "2024-02-15-preview" # Azure API version + rpm: 1000 + tpm: 150000 + litellm_params: + temperature: 0.7 + max_tokens: 4000 + # REQUIRED for Azure: Specify the underlying OpenAI model + # This fixes "Could not identify azure model" warnings + # Common base_model values: gpt-4, gpt-4-turbo, gpt-4o, gpt-4o-mini, gpt-3.5-turbo + base_model: "gpt-4o" + system_instructions: "" + use_default_system_instructions: true + citations_enabled: true + + # Example: Azure OpenAI GPT-4 Turbo + - id: -6 + name: "Global Azure GPT-4 Turbo" + description: "Azure OpenAI GPT-4 Turbo deployment" + provider: "AZURE" + model_name: "azure/gpt-4-turbo-deployment" + api_key: "your-azure-api-key-here" + api_base: "https://your-resource.openai.azure.com" + api_version: "2024-02-15-preview" + rpm: 500 + tpm: 100000 + litellm_params: + temperature: 0.7 + max_tokens: 4000 + base_model: "gpt-4-turbo" # Maps to gpt-4-turbo-preview + system_instructions: "" + use_default_system_instructions: true + citations_enabled: true + + # Example: Groq - Fast inference + - id: -7 name: "Global Groq Llama 3" description: "Ultra-fast Llama 3 70B via Groq" provider: "GROQ" @@ -150,3 +194,11 @@ global_llm_configs: # - All standard LiteLLM providers are supported # - rpm/tpm: Optional rate limits for load balancing (requests/tokens per minute) # These help the router distribute load evenly and avoid rate limit errors +# +# AZURE-SPECIFIC NOTES: +# - Always add 'base_model' in litellm_params for Azure deployments +# - This fixes "Could not identify azure model 'X'" warnings +# - base_model should match the underlying OpenAI model (e.g., gpt-4o, gpt-4-turbo, gpt-3.5-turbo) +# - model_name format: "azure/" +# - api_version: Use a recent Azure API version (e.g., "2024-02-15-preview") +# - See: https://docs.litellm.ai/docs/proxy/cost_tracking#spend-tracking-for-azure-openai-models diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py index f21ff5a30..66bde3d43 100644 --- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py @@ -323,6 +323,28 @@ def process_file_upload_task( user_id: ID of the user """ import asyncio + import os + import traceback + + logger.info( + f"[process_file_upload] Task started - file: {filename}, " + f"search_space_id: {search_space_id}, user_id: {user_id}" + ) + logger.info(f"[process_file_upload] File path: {file_path}") + + # Check if file exists and is accessible + if not os.path.exists(file_path): + logger.error( + f"[process_file_upload] File does not exist: {file_path}. " + "The temp file may have been cleaned up before the task ran." + ) + return + + try: + file_size = os.path.getsize(file_path) + logger.info(f"[process_file_upload] File size: {file_size} bytes") + except Exception as e: + logger.warning(f"[process_file_upload] Could not get file size: {e}") loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) @@ -331,6 +353,13 @@ def process_file_upload_task( loop.run_until_complete( _process_file_upload(file_path, filename, search_space_id, user_id) ) + logger.info(f"[process_file_upload] Task completed successfully for: {filename}") + except Exception as e: + logger.error( + f"[process_file_upload] Task failed for {filename}: {e}\n" + f"Traceback:\n{traceback.format_exc()}" + ) + raise finally: loop.close() @@ -343,16 +372,22 @@ async def _process_file_upload( from app.tasks.document_processors.file_processors import process_file_in_background + logger.info(f"[_process_file_upload] Starting async processing for: {filename}") + async with get_celery_session_maker()() as session: + logger.info(f"[_process_file_upload] Database session created for: {filename}") task_logger = TaskLoggingService(session, search_space_id) # Get file size for notification metadata try: file_size = os.path.getsize(file_path) - except Exception: + logger.info(f"[_process_file_upload] File size: {file_size} bytes") + except Exception as e: + logger.warning(f"[_process_file_upload] Could not get file size: {e}") file_size = None # Create notification for document processing + logger.info(f"[_process_file_upload] Creating notification for: {filename}") notification = ( await NotificationService.document_processing.notify_processing_started( session=session, @@ -363,6 +398,9 @@ async def _process_file_upload( file_size=file_size, ) ) + logger.info( + f"[_process_file_upload] Notification created with ID: {notification.id if notification else 'None'}" + ) log_entry = await task_logger.log_task_start( task_name="process_file_upload", diff --git a/surfsense_backend/scripts/docker/entrypoint.sh b/surfsense_backend/scripts/docker/entrypoint.sh index 8619d2025..6818219e5 100644 --- a/surfsense_backend/scripts/docker/entrypoint.sh +++ b/surfsense_backend/scripts/docker/entrypoint.sh @@ -39,7 +39,7 @@ backend_pid=$! sleep 5 echo "Starting Celery Worker..." -celery -A app.celery_app worker --loglevel=info & +celery -A app.celery_app worker --loglevel=info --autoscale=64,4 & celery_worker_pid=$! # Wait a bit for worker to initialize