diff --git a/README.md b/README.md index 77c34334d..d7845fdfc 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ +readme_banner -![new_header](https://github.com/user-attachments/assets/e236b764-0ddc-42ff-a1f1-8fbb3d2e0e65)
diff --git a/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py b/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py index a4f6db0b8..dca37b90e 100644 --- a/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py +++ b/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py @@ -24,9 +24,7 @@ def enum_exists(enum_name: str) -> bool: """Check if an enum type exists in the database.""" conn = op.get_bind() result = conn.execute( - sa.text( - "SELECT EXISTS (SELECT 1 FROM pg_type WHERE typname = :enum_name)" - ), + sa.text("SELECT EXISTS (SELECT 1 FROM pg_type WHERE typname = :enum_name)"), {"enum_name": enum_name}, ) return result.scalar() diff --git a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py index 6f3ee2a01..a031e7693 100644 --- a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py +++ b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py @@ -22,9 +22,7 @@ def enum_exists(enum_name: str) -> bool: """Check if an enum type exists in the database.""" conn = op.get_bind() result = conn.execute( - sa.text( - "SELECT EXISTS (SELECT 1 FROM pg_type WHERE typname = :enum_name)" - ), + sa.text("SELECT EXISTS (SELECT 1 FROM pg_type WHERE typname = :enum_name)"), {"enum_name": enum_name}, ) return result.scalar() diff --git a/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py b/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py index ef38add26..488f46227 100644 --- a/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py +++ b/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py @@ -197,9 +197,7 @@ def enum_exists(enum_name: str) -> bool: """Check if an enum type exists in the database.""" conn = op.get_bind() result = conn.execute( - sa.text( - "SELECT EXISTS (SELECT 1 FROM pg_type WHERE typname = :enum_name)" - ), + sa.text("SELECT EXISTS (SELECT 1 FROM pg_type WHERE typname = :enum_name)"), {"enum_name": enum_name}, ) return result.scalar() diff --git a/surfsense_backend/alembic/versions/5263aa4e7f94_allow_multiple_connectors_with_unique_.py b/surfsense_backend/alembic/versions/5263aa4e7f94_allow_multiple_connectors_with_unique_.py deleted file mode 100644 index d49009c94..000000000 --- a/surfsense_backend/alembic/versions/5263aa4e7f94_allow_multiple_connectors_with_unique_.py +++ /dev/null @@ -1,96 +0,0 @@ -"""allow_multiple_connectors_with_unique_names - -Revision ID: 5263aa4e7f94 -Revises: a1b2c3d4e5f6 -Create Date: 2026-01-13 12:23:31.481643 - -""" -from collections.abc import Sequence - -from sqlalchemy import text - -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = '5263aa4e7f94' -down_revision: str | None = 'a1b2c3d4e5f6' -branch_labels: str | Sequence[str] | None = None -depends_on: str | Sequence[str] | None = None - - -def upgrade() -> None: - """Upgrade schema.""" - connection = op.get_bind() - - # Check if old constraint exists before trying to drop it - old_constraint_exists = connection.execute( - text(""" - SELECT 1 FROM information_schema.table_constraints - WHERE table_name='search_source_connectors' - AND constraint_type='UNIQUE' - AND constraint_name='uq_searchspace_user_connector_type' - """) - ).scalar() - - if old_constraint_exists: - op.drop_constraint( - 'uq_searchspace_user_connector_type', - 'search_source_connectors', - type_='unique' - ) - - # Check if new constraint already exists before creating - new_constraint_exists = connection.execute( - text(""" - SELECT 1 FROM information_schema.table_constraints - WHERE table_name='search_source_connectors' - AND constraint_type='UNIQUE' - AND constraint_name='uq_searchspace_user_connector_type_name' - """) - ).scalar() - - if not new_constraint_exists: - op.create_unique_constraint( - 'uq_searchspace_user_connector_type_name', - 'search_source_connectors', - ['search_space_id', 'user_id', 'connector_type', 'name'] - ) - - -def downgrade() -> None: - """Downgrade schema.""" - connection = op.get_bind() - - # Check if new constraint exists before dropping - new_constraint_exists = connection.execute( - text(""" - SELECT 1 FROM information_schema.table_constraints - WHERE table_name='search_source_connectors' - AND constraint_type='UNIQUE' - AND constraint_name='uq_searchspace_user_connector_type_name' - """) - ).scalar() - - if new_constraint_exists: - op.drop_constraint( - 'uq_searchspace_user_connector_type_name', - 'search_source_connectors', - type_='unique' - ) - - # Only restore old constraint if it doesn't exist - old_constraint_exists = connection.execute( - text(""" - SELECT 1 FROM information_schema.table_constraints - WHERE table_name='search_source_connectors' - AND constraint_type='UNIQUE' - AND constraint_name='uq_searchspace_user_connector_type' - """) - ).scalar() - - if not old_constraint_exists: - op.create_unique_constraint( - 'uq_searchspace_user_connector_type', - 'search_source_connectors', - ['search_space_id', 'user_id', 'connector_type'] - ) diff --git a/surfsense_backend/alembic/versions/a1b2c3d4e5f6_add_mcp_connector_type.py b/surfsense_backend/alembic/versions/62_add_mcp_connector_type.py similarity index 99% rename from surfsense_backend/alembic/versions/a1b2c3d4e5f6_add_mcp_connector_type.py rename to surfsense_backend/alembic/versions/62_add_mcp_connector_type.py index 2fb4b65c2..d28e76019 100644 --- a/surfsense_backend/alembic/versions/a1b2c3d4e5f6_add_mcp_connector_type.py +++ b/surfsense_backend/alembic/versions/62_add_mcp_connector_type.py @@ -5,6 +5,7 @@ Revises: 63 Create Date: 2026-01-09 15:19:51.827647 """ + from collections.abc import Sequence from alembic import op diff --git a/surfsense_backend/alembic/versions/62_add_user_profile_columns.py b/surfsense_backend/alembic/versions/64_add_user_profile_columns.py similarity index 95% rename from surfsense_backend/alembic/versions/62_add_user_profile_columns.py rename to surfsense_backend/alembic/versions/64_add_user_profile_columns.py index a6fef0c5b..db45982d8 100644 --- a/surfsense_backend/alembic/versions/62_add_user_profile_columns.py +++ b/surfsense_backend/alembic/versions/64_add_user_profile_columns.py @@ -4,8 +4,8 @@ This migration adds: - display_name column for user's full name from OAuth - avatar_url column for user's profile picture URL from OAuth -Revision ID: 62 -Revises: 61 +Revision ID: 64 +Revises: 63 """ from collections.abc import Sequence @@ -13,8 +13,8 @@ from collections.abc import Sequence from alembic import op # revision identifiers, used by Alembic. -revision: str = "62" -down_revision: str | None = "61" +revision: str = "64" +down_revision: str | None = "63" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None diff --git a/surfsense_backend/alembic/versions/63_add_message_author_id.py b/surfsense_backend/alembic/versions/65_add_message_author_id.py similarity index 78% rename from surfsense_backend/alembic/versions/63_add_message_author_id.py rename to surfsense_backend/alembic/versions/65_add_message_author_id.py index 42d4af85e..8d891db81 100644 --- a/surfsense_backend/alembic/versions/63_add_message_author_id.py +++ b/surfsense_backend/alembic/versions/65_add_message_author_id.py @@ -1,15 +1,15 @@ """Add author_id column to new_chat_messages table -Revision ID: 63 -Revises: 62 +Revision ID: 65 +Revises: 64 """ from collections.abc import Sequence from alembic import op -revision: str = "63" -down_revision: str | None = "62" +revision: str = "65" +down_revision: str | None = "64" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None @@ -37,6 +37,10 @@ def upgrade() -> None: def downgrade() -> None: """Remove author_id column from new_chat_messages table.""" - op.execute("DROP INDEX IF EXISTS ix_new_chat_messages_author_id") - op.execute("ALTER TABLE new_chat_messages DROP COLUMN IF EXISTS author_id") - + op.execute( + """ + DROP INDEX IF EXISTS ix_new_chat_messages_author_id; + ALTER TABLE new_chat_messages + DROP COLUMN IF EXISTS author_id; + """ + ) diff --git a/surfsense_backend/app/agents/new_chat/tools/mcp_client.py b/surfsense_backend/app/agents/new_chat/tools/mcp_client.py index d91065661..437f93043 100644 --- a/surfsense_backend/app/agents/new_chat/tools/mcp_client.py +++ b/surfsense_backend/app/agents/new_chat/tools/mcp_client.py @@ -18,7 +18,9 @@ logger = logging.getLogger(__name__) class MCPClient: """Client for communicating with an MCP server.""" - def __init__(self, command: str, args: list[str], env: dict[str, str] | None = None): + def __init__( + self, command: str, args: list[str], env: dict[str, str] | None = None + ): """Initialize MCP client. Args: @@ -44,18 +46,16 @@ class MCPClient: # Merge env vars with current environment server_env = os.environ.copy() server_env.update(self.env) - + # Create server parameters with env server_params = StdioServerParameters( - command=self.command, - args=self.args, - env=server_env + command=self.command, args=self.args, env=server_env ) - + # Spawn server process and create session # Note: Cannot combine these context managers because ClientSession # needs the read/write streams from stdio_client - async with stdio_client(server=server_params) as (read, write): + async with stdio_client(server=server_params) as (read, write): # noqa: SIM117 async with ClientSession(read, write) as session: # Initialize the connection await session.initialize() @@ -85,7 +85,9 @@ class MCPClient: """ if not self.session: - raise RuntimeError("Not connected to MCP server. Use 'async with client.connect():'") + raise RuntimeError( + "Not connected to MCP server. Use 'async with client.connect():'" + ) try: # Call tools/list RPC method @@ -93,11 +95,15 @@ class MCPClient: tools = [] for tool in response.tools: - tools.append({ - "name": tool.name, - "description": tool.description or "", - "input_schema": tool.inputSchema if hasattr(tool, "inputSchema") else {}, - }) + tools.append( + { + "name": tool.name, + "description": tool.description or "", + "input_schema": tool.inputSchema + if hasattr(tool, "inputSchema") + else {}, + } + ) logger.info("Listed %d tools from MCP server", len(tools)) return tools @@ -121,10 +127,14 @@ class MCPClient: """ if not self.session: - raise RuntimeError("Not connected to MCP server. Use 'async with client.connect():'") + raise RuntimeError( + "Not connected to MCP server. Use 'async with client.connect():'" + ) try: - logger.info("Calling MCP tool '%s' with arguments: %s", tool_name, arguments) + logger.info( + "Calling MCP tool '%s' with arguments: %s", tool_name, arguments + ) # Call tools/call RPC method response = await self.session.call_tool(tool_name, arguments=arguments) @@ -147,12 +157,17 @@ class MCPClient: # Handle validation errors from MCP server responses # Some MCP servers (like server-memory) return extra fields not in their schema if "Invalid structured content" in str(e): - logger.warning("MCP server returned data not matching its schema, but continuing: %s", e) + logger.warning( + "MCP server returned data not matching its schema, but continuing: %s", + e, + ) # Try to extract result from error message or return a success message return "Operation completed (server returned unexpected format)" raise except (ValueError, TypeError, AttributeError, KeyError) as e: - logger.error("Failed to call MCP tool '%s': %s", tool_name, e, exc_info=True) + logger.error( + "Failed to call MCP tool '%s': %s", tool_name, e, exc_info=True + ) return f"Error calling tool: {e!s}" diff --git a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py index dd374cdae..95960dad2 100644 --- a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py +++ b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py @@ -21,7 +21,8 @@ logger = logging.getLogger(__name__) def _create_dynamic_input_model_from_schema( - tool_name: str, input_schema: dict[str, Any], + tool_name: str, + input_schema: dict[str, Any], ) -> type[BaseModel]: """Create a Pydantic model from MCP tool's JSON schema. @@ -41,15 +42,18 @@ def _create_dynamic_input_model_from_schema( for param_name, param_schema in properties.items(): param_description = param_schema.get("description", "") is_required = param_name in required_fields - + # Use Any type for complex schemas to preserve structure # This allows the MCP server to do its own validation from typing import Any as AnyType from pydantic import Field - + if is_required: - field_definitions[param_name] = (AnyType, Field(..., description=param_description)) + field_definitions[param_name] = ( + AnyType, + Field(..., description=param_description), + ) else: field_definitions[param_name] = ( AnyType | None, @@ -88,7 +92,7 @@ async def _create_mcp_tool_from_definition( async def mcp_tool_call(**kwargs) -> str: """Execute the MCP tool call via the client.""" logger.info(f"MCP tool '{tool_name}' called with params: {kwargs}") - + try: # Connect to server and call tool async with mcp_client.connect(): @@ -114,7 +118,8 @@ async def _create_mcp_tool_from_definition( async def load_mcp_tools( - session: AsyncSession, search_space_id: int, + session: AsyncSession, + search_space_id: int, ) -> list[StructuredTool]: """Load all MCP tools from user's active MCP server connectors. @@ -155,9 +160,11 @@ async def load_mcp_tools( args = server_config.get("args", []) env = server_config.get("env", {}) - if not command: - logger.warning(f"MCP connector {connector.id} server config missing command, skipping") - continue + if not command: + logger.warning( + f"MCP connector {connector.id} missing command, skipping" + ) + continue # Create MCP client mcp_client = MCPClient(command, args, env) @@ -171,16 +178,18 @@ async def load_mcp_tools( f"'{command}' (connector {connector.id})" ) - # Create LangChain tools from definitions - for tool_def in tool_definitions: - try: - tool = await _create_mcp_tool_from_definition(tool_def, mcp_client) - tools.append(tool) - except Exception as e: - logger.exception( - f"Failed to create tool '{tool_def.get('name')}' " - f"from connector {connector.id}: {e!s}", - ) + # Create LangChain tools from definitions + for tool_def in tool_definitions: + try: + tool = await _create_mcp_tool_from_definition( + tool_def, mcp_client + ) + tools.append(tool) + except Exception as e: + logger.exception( + f"Failed to create tool '{tool_def.get('name')}' " + f"from connector {connector.id}: {e!s}", + ) except Exception as e: logger.exception( diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py index bb8708b2b..6873f864c 100644 --- a/surfsense_backend/app/agents/new_chat/tools/registry.py +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -283,7 +283,8 @@ async def build_tools_async( ): try: mcp_tools = await load_mcp_tools( - dependencies["db_session"], dependencies["search_space_id"], + dependencies["db_session"], + dependencies["search_space_id"], ) tools.extend(mcp_tools) logging.info( diff --git a/surfsense_backend/app/schemas/search_source_connector.py b/surfsense_backend/app/schemas/search_source_connector.py index 93d877261..73ab56e53 100644 --- a/surfsense_backend/app/schemas/search_source_connector.py +++ b/surfsense_backend/app/schemas/search_source_connector.py @@ -23,7 +23,9 @@ class SearchSourceConnectorBase(BaseModel): @field_validator("config") @classmethod def validate_config_for_connector_type( - cls, config: dict[str, Any], values: dict[str, Any], + cls, + config: dict[str, Any], + values: dict[str, Any], ) -> dict[str, Any]: connector_type = values.data.get("connector_type") return validate_connector_config(connector_type, config) diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index 596cd9830..f3b5cba9d 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -2,11 +2,14 @@ File document processors for different ETL services (Unstructured, LlamaCloud, Docling). """ +import asyncio import contextlib import logging +import ssl import warnings from logging import ERROR, getLogger +import httpx from fastapi import HTTPException from langchain_core.documents import Document as LangChainDocument from litellm import atranscription @@ -31,6 +34,122 @@ from .base import ( ) from .markdown_processor import add_received_markdown_file_document +# Constants for LlamaCloud retry configuration +LLAMACLOUD_MAX_RETRIES = 3 +LLAMACLOUD_BASE_DELAY = 5 # Base delay in seconds for exponential backoff +LLAMACLOUD_RETRYABLE_EXCEPTIONS = ( + ssl.SSLError, + httpx.ConnectError, + httpx.ConnectTimeout, + httpx.ReadTimeout, + httpx.WriteTimeout, + ConnectionError, + TimeoutError, +) + + +async def parse_with_llamacloud_retry( + file_path: str, + estimated_pages: int, + task_logger: TaskLoggingService | None = None, + log_entry: Log | None = None, +): + """ + Parse a file with LlamaCloud with retry logic for transient SSL/connection errors. + + Args: + file_path: Path to the file to parse + estimated_pages: Estimated number of pages for timeout calculation + task_logger: Optional task logger for progress updates + log_entry: Optional log entry for progress updates + + Returns: + LlamaParse result object + + Raises: + Exception: If all retries fail + """ + from llama_cloud_services import LlamaParse + from llama_cloud_services.parse.utils import ResultType + + # Calculate timeouts based on estimated pages + # Base timeout of 300 seconds + 30 seconds per page for large documents + base_timeout = 300 + per_page_timeout = 30 + job_timeout = base_timeout + (estimated_pages * per_page_timeout) + + # Create custom httpx client with larger timeouts for file uploads + # The SSL error often occurs during large file uploads, so we need generous timeouts + custom_timeout = httpx.Timeout( + connect=60.0, # 60 seconds to establish connection + read=300.0, # 5 minutes to read response + write=300.0, # 5 minutes to write/upload (important for large files) + pool=60.0, # 60 seconds to acquire connection from pool + ) + + last_exception = None + + for attempt in range(1, LLAMACLOUD_MAX_RETRIES + 1): + try: + # Create a fresh httpx client for each attempt + async with httpx.AsyncClient(timeout=custom_timeout) as custom_client: + # Create LlamaParse parser instance with optimized settings + parser = LlamaParse( + api_key=app_config.LLAMA_CLOUD_API_KEY, + num_workers=1, # Use single worker for file processing + verbose=True, + language="en", + result_type=ResultType.MD, + # Timeout settings for large files + max_timeout=max(2000, job_timeout), # Overall max timeout + job_timeout_in_seconds=job_timeout, + job_timeout_extra_time_per_page_in_seconds=per_page_timeout, + # Use our custom client with larger timeouts + custom_client=custom_client, + ) + + # Parse the file asynchronously + result = await parser.aparse(file_path) + return result + + except LLAMACLOUD_RETRYABLE_EXCEPTIONS as e: + last_exception = e + error_type = type(e).__name__ + + if attempt < LLAMACLOUD_MAX_RETRIES: + # Calculate exponential backoff delay + delay = LLAMACLOUD_BASE_DELAY * (2 ** (attempt - 1)) + + if task_logger and log_entry: + await task_logger.log_task_progress( + log_entry, + f"LlamaCloud upload failed (attempt {attempt}/{LLAMACLOUD_MAX_RETRIES}), retrying in {delay}s", + { + "error_type": error_type, + "error_message": str(e)[:200], + "attempt": attempt, + "retry_delay": delay, + }, + ) + else: + logging.warning( + f"LlamaCloud upload failed (attempt {attempt}/{LLAMACLOUD_MAX_RETRIES}): {error_type}. " + f"Retrying in {delay}s..." + ) + + await asyncio.sleep(delay) + else: + logging.error( + f"LlamaCloud upload failed after {LLAMACLOUD_MAX_RETRIES} attempts: {error_type} - {e}" + ) + + except Exception: + # Non-retryable exception, raise immediately + raise + + # All retries exhausted + raise last_exception or RuntimeError("LlamaCloud parsing failed after all retries") + async def add_received_file_document_using_unstructured( session: AsyncSession, @@ -819,24 +938,18 @@ async def process_file_in_background( "file_type": "document", "etl_service": "LLAMACLOUD", "processing_stage": "parsing", + "estimated_pages": estimated_pages_before, }, ) - from llama_cloud_services import LlamaParse - from llama_cloud_services.parse.utils import ResultType - - # Create LlamaParse parser instance - parser = LlamaParse( - api_key=app_config.LLAMA_CLOUD_API_KEY, - num_workers=1, # Use single worker for file processing - verbose=True, - language="en", - result_type=ResultType.MD, + # Parse file with retry logic for SSL/connection errors (common with large files) + result = await parse_with_llamacloud_retry( + file_path=file_path, + estimated_pages=estimated_pages_before, + task_logger=task_logger, + log_entry=log_entry, ) - # Parse the file asynchronously - result = await parser.aparse(file_path) - # Clean up the temp file import os diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock index 8ec09ddd9..2187a88cb 100644 --- a/surfsense_backend/uv.lock +++ b/surfsense_backend/uv.lock @@ -175,6 +175,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/26/99/fc813cd978842c26c82534010ea849eee9ab3a13ea2b74e95cb9c99e747b/amqp-5.3.1-py3-none-any.whl", hash = "sha256:43b3319e1b4e7d1251833a93d672b4af1e40f3d632d479b98661a95f117880a2", size = 50944 }, ] +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303 }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -1568,16 +1577,17 @@ wheels = [ [[package]] name = "fastapi" -version = "0.115.9" +version = "0.128.0" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "annotated-doc" }, { name = "pydantic" }, { name = "starlette" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ab/dd/d854f85e70f7341b29e3fda754f2833aec197bd355f805238758e3bcd8ed/fastapi-0.115.9.tar.gz", hash = "sha256:9d7da3b196c5eed049bc769f9475cd55509a112fbe031c0ef2f53768ae68d13f", size = 293774 } +sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682 } wheels = [ - { url = "https://files.pythonhosted.org/packages/32/b6/7517af5234378518f27ad35a7b24af9591bc500b8c1780929c1295999eb6/fastapi-0.115.9-py3-none-any.whl", hash = "sha256:4a439d7923e4de796bcc88b64e9754340fcd1574673cbd865ba8a99fe0d28c56", size = 94919 }, + { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094 }, ] [[package]] @@ -3482,6 +3492,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1b/92/9a45c91089c3cf690b5badd4be81e392ff086ccca8a1d4e3a08463d8a966/matplotlib-3.10.3-cp313-cp313t-win_amd64.whl", hash = "sha256:4f23ffe95c5667ef8a2b56eea9b53db7f43910fa4a2d5472ae0f72b64deab4d5", size = 8139044 }, ] +[[package]] +name = "mcp" +version = "1.25.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "httpx" }, + { name = "httpx-sse" }, + { name = "jsonschema" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "python-multipart" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "sse-starlette" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, + { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d5/2d/649d80a0ecf6a1f82632ca44bec21c0461a9d9fc8934d38cb5b319f2db5e/mcp-1.25.0.tar.gz", hash = "sha256:56310361ebf0364e2d438e5b45f7668cbb124e158bb358333cd06e49e83a6802", size = 605387 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e2/fc/6dc7659c2ae5ddf280477011f4213a74f806862856b796ef08f028e664bf/mcp-1.25.0-py3-none-any.whl", hash = "sha256:b37c38144a666add0862614cc79ec276e97d72aa8ca26d622818d4e278b9721a", size = 233076 }, +] + [[package]] name = "mdurl" version = "0.1.2" @@ -6382,15 +6417,29 @@ wheels = [ ] [[package]] -name = "starlette" -version = "0.45.3" +name = "sse-starlette" +version = "3.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, + { name = "starlette" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ff/fb/2984a686808b89a6781526129a4b51266f678b2d2b97ab2d325e56116df8/starlette-0.45.3.tar.gz", hash = "sha256:2cbcba2a75806f8a41c722141486f37c28e30a0921c5f6fe4346cb0dcee1302f", size = 2574076 } +sdist = { url = "https://files.pythonhosted.org/packages/62/08/8f554b0e5bad3e4e880521a1686d96c05198471eed860b0eb89b57ea3636/sse_starlette-3.1.1.tar.gz", hash = "sha256:bffa531420c1793ab224f63648c059bcadc412bf9fdb1301ac8de1cf9a67b7fb", size = 24306 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/61/f2b52e107b1fc8944b33ef56bf6ac4ebbe16d91b94d2b87ce013bf63fb84/starlette-0.45.3-py3-none-any.whl", hash = "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d", size = 71507 }, + { url = "https://files.pythonhosted.org/packages/e3/31/4c281581a0f8de137b710a07f65518b34bcf333b201cfa06cfda9af05f8a/sse_starlette-3.1.1-py3-none-any.whl", hash = "sha256:bb38f71ae74cfd86b529907a9fda5632195dfa6ae120f214ea4c890c7ee9d436", size = 12442 }, +] + +[[package]] +name = "starlette" +version = "0.50.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033 }, ] [[package]] @@ -6443,6 +6492,7 @@ dependencies = [ { name = "litellm" }, { name = "llama-cloud-services" }, { name = "markdownify" }, + { name = "mcp" }, { name = "notion-client" }, { name = "numpy" }, { name = "pgvector" }, @@ -6457,6 +6507,8 @@ dependencies = [ { name = "slack-sdk" }, { name = "soundfile" }, { name = "spacy" }, + { name = "sse-starlette" }, + { name = "starlette" }, { name = "static-ffmpeg" }, { name = "tavily-python" }, { name = "trafilatura" }, @@ -6505,6 +6557,7 @@ requires-dist = [ { name = "litellm", specifier = ">=1.80.10" }, { name = "llama-cloud-services", specifier = ">=0.6.25" }, { name = "markdownify", specifier = ">=0.14.1" }, + { name = "mcp", specifier = ">=1.25.0" }, { name = "notion-client", specifier = ">=2.3.0" }, { name = "numpy", specifier = ">=1.24.0" }, { name = "pgvector", specifier = ">=0.3.6" }, @@ -6519,6 +6572,8 @@ requires-dist = [ { name = "slack-sdk", specifier = ">=3.34.0" }, { name = "soundfile", specifier = ">=0.13.1" }, { name = "spacy", specifier = ">=3.8.7" }, + { name = "sse-starlette", specifier = ">=3.1.1,<3.1.2" }, + { name = "starlette", specifier = ">=0.40.0,<0.51.0" }, { name = "static-ffmpeg", specifier = ">=2.13" }, { name = "tavily-python", specifier = ">=0.3.2" }, { name = "trafilatura", specifier = ">=2.0.0" }, diff --git a/surfsense_web/app/dashboard/user/settings/components/ApiKeyContent.tsx b/surfsense_web/app/dashboard/user/settings/components/ApiKeyContent.tsx index 40e7b1d34..6bf10a78f 100644 --- a/surfsense_web/app/dashboard/user/settings/components/ApiKeyContent.tsx +++ b/surfsense_web/app/dashboard/user/settings/components/ApiKeyContent.tsx @@ -120,4 +120,3 @@ export function ApiKeyContent({ onMenuClick }: ApiKeyContentProps) { ); } - diff --git a/surfsense_web/app/dashboard/user/settings/components/ProfileContent.tsx b/surfsense_web/app/dashboard/user/settings/components/ProfileContent.tsx index fab978b49..511a09fd1 100644 --- a/surfsense_web/app/dashboard/user/settings/components/ProfileContent.tsx +++ b/surfsense_web/app/dashboard/user/settings/components/ProfileContent.tsx @@ -6,8 +6,8 @@ import { AnimatePresence, motion } from "motion/react"; import { useTranslations } from "next-intl"; import { useEffect, useState } from "react"; import { toast } from "sonner"; -import { currentUserAtom } from "@/atoms/user/user-query.atoms"; import { updateUserMutationAtom } from "@/atoms/user/user-mutation.atoms"; +import { currentUserAtom } from "@/atoms/user/user-query.atoms"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; diff --git a/surfsense_web/app/dashboard/user/settings/components/UserSettingsSidebar.tsx b/surfsense_web/app/dashboard/user/settings/components/UserSettingsSidebar.tsx index e25d318f3..b7040b4e3 100644 --- a/surfsense_web/app/dashboard/user/settings/components/UserSettingsSidebar.tsx +++ b/surfsense_web/app/dashboard/user/settings/components/UserSettingsSidebar.tsx @@ -1,7 +1,7 @@ "use client"; -import { ArrowLeft, ChevronRight, X } from "lucide-react"; import type { LucideIcon } from "lucide-react"; +import { ArrowLeft, ChevronRight, X } from "lucide-react"; import { AnimatePresence, motion } from "motion/react"; import { useTranslations } from "next-intl"; import { Button } from "@/components/ui/button"; @@ -152,4 +152,3 @@ export function UserSettingsSidebar({ ); } - diff --git a/surfsense_web/app/dashboard/user/settings/page.tsx b/surfsense_web/app/dashboard/user/settings/page.tsx index 973b39076..8e04ce37a 100644 --- a/surfsense_web/app/dashboard/user/settings/page.tsx +++ b/surfsense_web/app/dashboard/user/settings/page.tsx @@ -7,7 +7,7 @@ import { useTranslations } from "next-intl"; import { useCallback, useState } from "react"; import { ApiKeyContent } from "./components/ApiKeyContent"; import { ProfileContent } from "./components/ProfileContent"; -import { UserSettingsSidebar, type SettingsNavItem } from "./components/UserSettingsSidebar"; +import { type SettingsNavItem, UserSettingsSidebar } from "./components/UserSettingsSidebar"; export default function UserSettingsPage() { const t = useTranslations("userSettings"); diff --git a/surfsense_web/atoms/user/user-mutation.atoms.ts b/surfsense_web/atoms/user/user-mutation.atoms.ts index 02a9f2146..caf4436a5 100644 --- a/surfsense_web/atoms/user/user-mutation.atoms.ts +++ b/surfsense_web/atoms/user/user-mutation.atoms.ts @@ -16,4 +16,3 @@ export const updateUserMutationAtom = atomWithMutation((get) => { }, }; }); - diff --git a/surfsense_web/components/assistant-ui/document-upload-popup.tsx b/surfsense_web/components/assistant-ui/document-upload-popup.tsx index 29f633ebf..1023c5c40 100644 --- a/surfsense_web/components/assistant-ui/document-upload-popup.tsx +++ b/surfsense_web/components/assistant-ui/document-upload-popup.tsx @@ -96,35 +96,37 @@ const DocumentUploadPopupContent: FC<{ return ( - + Upload Document - {/* Fixed Header */} -
- {/* Upload header */} -
-
- -
-
-

Upload Documents

-

- Upload and sync your documents to your search space -

+ {/* Scrollable container for mobile */} +
+ {/* Header - scrolls with content on mobile */} +
+ {/* Upload header */} +
+
+ +
+
+

+ Upload Documents +

+

+ Upload and sync your documents to your search space +

+
+ + {/* Content */} +
+ +
- {/* Scrollable Content */} -
-
-
- -
-
- {/* Bottom fade shadow */} -
-
+ {/* Bottom fade shadow - hidden on very small screens */} +
); diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 2507fb8a9..9f844ba2b 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -36,11 +36,7 @@ import { newLLMConfigsAtom, } from "@/atoms/new-llm-config/new-llm-config-query.atoms"; import { currentUserAtom } from "@/atoms/user/user-query.atoms"; -import { - ComposerAddAttachment, - ComposerAttachments, -} from "@/components/assistant-ui/attachment"; -import { UserMessage } from "@/components/assistant-ui/user-message"; +import { ComposerAddAttachment, ComposerAttachments } from "@/components/assistant-ui/attachment"; import { ConnectorIndicator } from "@/components/assistant-ui/connector-popup"; import { InlineMentionEditor, @@ -53,6 +49,7 @@ import { } from "@/components/assistant-ui/thinking-steps"; import { ToolFallback } from "@/components/assistant-ui/tool-fallback"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; +import { UserMessage } from "@/components/assistant-ui/user-message"; import { DocumentMentionPicker, type DocumentMentionPickerRef, @@ -636,7 +633,6 @@ const AssistantActionBar: FC = () => { ); }; - const EditComposer: FC = () => { return ( diff --git a/surfsense_web/components/sources/DocumentUploadTab.tsx b/surfsense_web/components/sources/DocumentUploadTab.tsx index 0b7f7b51f..cc27d326a 100644 --- a/surfsense_web/components/sources/DocumentUploadTab.tsx +++ b/surfsense_web/components/sources/DocumentUploadTab.tsx @@ -110,6 +110,11 @@ const FILE_TYPE_CONFIG: Record> = { const cardClass = "border border-border bg-slate-400/5 dark:bg-white/5"; +// Upload limits +const MAX_FILES = 10; +const MAX_TOTAL_SIZE_MB = 200; +const MAX_TOTAL_SIZE_BYTES = MAX_TOTAL_SIZE_MB * 1024 * 1024; + export function DocumentUploadTab({ searchSpaceId, onSuccess, @@ -134,15 +139,40 @@ export function DocumentUploadTab({ [acceptedFileTypes] ); - const onDrop = useCallback((acceptedFiles: File[]) => { - setFiles((prev) => [...prev, ...acceptedFiles]); - }, []); + const onDrop = useCallback( + (acceptedFiles: File[]) => { + setFiles((prev) => { + const newFiles = [...prev, ...acceptedFiles]; + + // Check file count limit + if (newFiles.length > MAX_FILES) { + toast.error(t("max_files_exceeded"), { + description: t("max_files_exceeded_desc", { max: MAX_FILES }), + }); + return prev; + } + + // Check total size limit + const newTotalSize = newFiles.reduce((sum, file) => sum + file.size, 0); + if (newTotalSize > MAX_TOTAL_SIZE_BYTES) { + toast.error(t("max_size_exceeded"), { + description: t("max_size_exceeded_desc", { max: MAX_TOTAL_SIZE_MB }), + }); + return prev; + } + + return newFiles; + }); + }, + [t] + ); const { getRootProps, getInputProps, isDragActive } = useDropzone({ onDrop, accept: acceptedFileTypes, - maxSize: 50 * 1024 * 1024, + maxSize: 50 * 1024 * 1024, // 50MB per file noClick: false, + disabled: files.length >= MAX_FILES, }); // Handle file input click to prevent event bubbling that might reopen dialog @@ -160,6 +190,15 @@ export function DocumentUploadTab({ const totalFileSize = files.reduce((total, file) => total + file.size, 0); + // Check if limits are reached + const isFileCountLimitReached = files.length >= MAX_FILES; + const isSizeLimitReached = totalFileSize >= MAX_TOTAL_SIZE_BYTES; + const remainingFiles = MAX_FILES - files.length; + const remainingSizeMB = Math.max( + 0, + (MAX_TOTAL_SIZE_BYTES - totalFileSize) / (1024 * 1024) + ).toFixed(1); + // Track accordion state changes const handleAccordionChange = useCallback( (value: string) => { @@ -210,7 +249,8 @@ export function DocumentUploadTab({ - {t("file_size_limit")} + {t("file_size_limit")}{" "} + {t("upload_limits", { maxFiles: MAX_FILES, maxSizeMB: MAX_TOTAL_SIZE_MB })} @@ -221,7 +261,11 @@ export function DocumentUploadTab({
- {isDragActive ? ( + {isFileCountLimitReached ? ( +
+ +
+

+ {t("file_limit_reached")} +

+

+ {t("file_limit_reached_desc", { max: MAX_FILES })} +

+
+
+ ) : isDragActive ? ( {t("drag_drop")}

{t("or_browse")}

+ {files.length > 0 && ( +

+ {t("remaining_capacity", { files: remainingFiles, sizeMB: remainingSizeMB })} +

+ )} +
+ )} + {!isFileCountLimitReached && ( +
+
)} -
- -
diff --git a/surfsense_web/lib/apis/user-api.service.ts b/surfsense_web/lib/apis/user-api.service.ts index 94914ebaa..083fd8dee 100644 --- a/surfsense_web/lib/apis/user-api.service.ts +++ b/surfsense_web/lib/apis/user-api.service.ts @@ -1,7 +1,7 @@ import { getMeResponse, - updateUserResponse, type UpdateUserRequest, + updateUserResponse, } from "@/contracts/types/user.types"; import { baseApiService } from "./base-api.service"; diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json index 5159d4df0..b6eaf8824 100644 --- a/surfsense_web/messages/en.json +++ b/surfsense_web/messages/en.json @@ -378,6 +378,7 @@ "title": "Upload Documents", "subtitle": "Upload your files to make them searchable and accessible through AI-powered conversations.", "file_size_limit": "Maximum file size: 50MB per file. Supported formats vary based on your ETL service configuration.", + "upload_limits": "Upload limit: {maxFiles} files, {maxSizeMB}MB total.", "drop_files": "Drop files here", "drag_drop": "Drag & drop files here", "or_browse": "or click to browse", @@ -393,7 +394,14 @@ "upload_error": "Upload Error", "upload_error_desc": "Error uploading files", "supported_file_types": "Supported File Types", - "file_types_desc": "These file types are supported based on your current ETL service configuration." + "file_types_desc": "These file types are supported based on your current ETL service configuration.", + "max_files_exceeded": "File Limit Exceeded", + "max_files_exceeded_desc": "You can upload a maximum of {max} files at a time.", + "max_size_exceeded": "Size Limit Exceeded", + "max_size_exceeded_desc": "Total file size cannot exceed {max}MB.", + "file_limit_reached": "Maximum Files Reached", + "file_limit_reached_desc": "Remove some files to add more (max {max} files).", + "remaining_capacity": "{files} files remaining • {sizeMB}MB available" }, "add_webpage": { "title": "Add Webpages for Crawling", diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json index 1404c176f..b48e3e9c7 100644 --- a/surfsense_web/messages/zh.json +++ b/surfsense_web/messages/zh.json @@ -363,6 +363,7 @@ "title": "上传文档", "subtitle": "上传您的文件,使其可通过 AI 对话进行搜索和访问。", "file_size_limit": "最大文件大小:每个文件 50MB。支持的格式因您的 ETL 服务配置而异。", + "upload_limits": "上传限制:最多 {maxFiles} 个文件,总大小不超过 {maxSizeMB}MB。", "drop_files": "放下文件到这里", "drag_drop": "拖放文件到这里", "or_browse": "或点击浏览", @@ -378,7 +379,14 @@ "upload_error": "上传错误", "upload_error_desc": "上传文件时出错", "supported_file_types": "支持的文件类型", - "file_types_desc": "根据您当前的 ETL 服务配置支持这些文件类型。" + "file_types_desc": "根据您当前的 ETL 服务配置支持这些文件类型。", + "max_files_exceeded": "超过文件数量限制", + "max_files_exceeded_desc": "一次最多只能上传 {max} 个文件。", + "max_size_exceeded": "超过文件大小限制", + "max_size_exceeded_desc": "文件总大小不能超过 {max}MB。", + "file_limit_reached": "已达到最大文件数量", + "file_limit_reached_desc": "移除一些文件以添加更多(最多 {max} 个文件)。", + "remaining_capacity": "剩余 {files} 个文件名额 • 可用 {sizeMB}MB" }, "add_webpage": { "title": "添加网页爬取", diff --git a/surfsense_web/public/og-image.png b/surfsense_web/public/og-image.png index 695ab55eb..b28e33e0f 100644 Binary files a/surfsense_web/public/og-image.png and b/surfsense_web/public/og-image.png differ