feat: added file limit tracking for a user

2026-05-04 05:12:38 +02:00 · 2025-10-30 14:58:08 -07:00 · 2025-10-30 14:58:08 -07:00 · 4be9d099bf
commit 4be9d099bf
parent 5654f6c78f
7 changed files with 695 additions and 8 deletions
--- a/surfsense_backend/alembic/versions/33_add_page_limits_to_user.py
+++ b/surfsense_backend/alembic/versions/33_add_page_limits_to_user.py
@ -0,0 +1,76 @@
 """Add page limit fields to user table
 Revision ID: 33
 Revises: 32
 Changes:
 1. Add pages_limit column (Integer, default 500)
 2. Add pages_used column (Integer, default 0)
 """
 from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "33"
 down_revision: str | None = "32"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
    """Add page limit fields to user table."""
    from sqlalchemy import inspect
    conn = op.get_bind()
    inspector = inspect(conn)
    # Get existing columns
    user_columns = [col["name"] for col in inspector.get_columns("user")]
    # Add pages_limit column if it doesn't exist
    if "pages_limit" not in user_columns:
        op.add_column(
            "user",
            sa.Column(
                "pages_limit",
                sa.Integer(),
                nullable=False,
                server_default="500",
            ),
        )
    # Add pages_used column if it doesn't exist
    if "pages_used" not in user_columns:
        op.add_column(
            "user",
            sa.Column(
                "pages_used",
                sa.Integer(),
                nullable=False,
                server_default="0",
            ),
        )
 def downgrade() -> None:
    """Remove page limit fields from user table."""
    from sqlalchemy import inspect
    conn = op.get_bind()
    inspector = inspect(conn)
    # Get existing columns
    user_columns = [col["name"] for col in inspector.get_columns("user")]
    # Drop columns if they exist
    if "pages_used" in user_columns:
        op.drop_column("user", "pages_used")
    if "pages_limit" in user_columns:
        op.drop_column("user", "pages_limit")
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -81,7 +81,6 @@ class ChatType(str, Enum):
 class LiteLLMProvider(str, Enum):
    """
    Enum for LLM providers supported by LiteLLM.
    LiteLLM 支持的 LLM 提供商枚举。
    """
    OPENAI = "OPENAI"
@ -401,6 +400,10 @@ if config.AUTH_TYPE == "GOOGLE":
            cascade="all, delete-orphan",
        )
        # Page usage tracking for ETL services
        pages_limit = Column(Integer, nullable=False, default=500, server_default="500")
        pages_used = Column(Integer, nullable=False, default=0, server_default="0")
 else:
    class User(SQLAlchemyBaseUserTableUUID, Base):
@ -411,6 +414,10 @@ else:
            cascade="all, delete-orphan",
        )
        # Page usage tracking for ETL services
        pages_limit = Column(Integer, nullable=False, default=500, server_default="500")
        pages_used = Column(Integer, nullable=False, default=0, server_default="0")
 engine = create_async_engine(DATABASE_URL)
 async_session_maker = async_sessionmaker(engine, expire_on_commit=False)
--- a/surfsense_backend/app/services/page_limit_service.py
+++ b/surfsense_backend/app/services/page_limit_service.py
@ -0,0 +1,401 @@
 """
 Service for managing user page limits for ETL services.
 """
 import os
 from pathlib import Path
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 class PageLimitExceededError(Exception):
    """
    Exception raised when a user exceeds their page processing limit.
    """
    def __init__(
        self,
        message: str = "Page limit exceeded. Please contact admin to increase limits for your account.",
        pages_used: int = 0,
        pages_limit: int = 0,
        pages_to_add: int = 0,
    ):
        self.pages_used = pages_used
        self.pages_limit = pages_limit
        self.pages_to_add = pages_to_add
        super().__init__(message)
 class PageLimitService:
    """Service for checking and updating user page limits."""
    def __init__(self, session: AsyncSession):
        self.session = session
    async def check_page_limit(
        self, user_id: str, estimated_pages: int = 1
    ) -> tuple[bool, int, int]:
        """
        Check if user has enough pages remaining for processing.
        Args:
            user_id: The user's ID
            estimated_pages: Estimated number of pages to be processed
        Returns:
            Tuple of (has_capacity, pages_used, pages_limit)
        Raises:
            PageLimitExceededError: If user would exceed their page limit
        """
        from app.db import User
        # Get user's current page usage
        result = await self.session.execute(
            select(User.pages_used, User.pages_limit).where(User.id == user_id)
        )
        row = result.first()
        if not row:
            raise ValueError(f"User with ID {user_id} not found")
        pages_used, pages_limit = row
        # Check if adding estimated pages would exceed limit
        if pages_used + estimated_pages > pages_limit:
            raise PageLimitExceededError(
                message=f"Processing this document would exceed your page limit. "
                f"Used: {pages_used}/{pages_limit} pages. "
                f"Document has approximately {estimated_pages} page(s). "
                f"Please contact admin to increase limits for your account.",
                pages_used=pages_used,
                pages_limit=pages_limit,
                pages_to_add=estimated_pages,
            )
        return True, pages_used, pages_limit
    async def update_page_usage(
        self, user_id: str, pages_to_add: int, allow_exceed: bool = False
    ) -> int:
        """
        Update user's page usage after successful processing.
        Args:
            user_id: The user's ID
            pages_to_add: Number of pages to add to usage
            allow_exceed: If True, allows update even if it exceeds limit
                         (used when document was already processed after passing initial check)
        Returns:
            New total pages_used value
        Raises:
            PageLimitExceededError: If adding pages would exceed limit and allow_exceed is False
        """
        from app.db import User
        # Get user
        result = await self.session.execute(select(User).where(User.id == user_id))
        user = result.scalar_one_or_none()
        if not user:
            raise ValueError(f"User with ID {user_id} not found")
        # Check if this would exceed limit (only if allow_exceed is False)
        new_usage = user.pages_used + pages_to_add
        if not allow_exceed and new_usage > user.pages_limit:
            raise PageLimitExceededError(
                message=f"Cannot update page usage. Would exceed limit. "
                f"Current: {user.pages_used}/{user.pages_limit}, "
                f"Trying to add: {pages_to_add}",
                pages_used=user.pages_used,
                pages_limit=user.pages_limit,
                pages_to_add=pages_to_add,
            )
        # Update usage
        user.pages_used = new_usage
        await self.session.commit()
        await self.session.refresh(user)
        return user.pages_used
    async def get_page_usage(self, user_id: str) -> tuple[int, int]:
        """
        Get user's current page usage and limit.
        Args:
            user_id: The user's ID
        Returns:
            Tuple of (pages_used, pages_limit)
        """
        from app.db import User
        result = await self.session.execute(
            select(User.pages_used, User.pages_limit).where(User.id == user_id)
        )
        row = result.first()
        if not row:
            raise ValueError(f"User with ID {user_id} not found")
        return row
    def estimate_pages_from_elements(self, elements: list) -> int:
        """
        Estimate page count from document elements (for Unstructured).
        Args:
            elements: List of document elements
        Returns:
            Estimated number of pages
        """
        # For Unstructured, we can count unique page numbers in metadata
        # or estimate based on content length
        page_numbers = set()
        for element in elements:
            # Try to get page number from metadata
            if hasattr(element, "metadata") and element.metadata:
                page_num = element.metadata.get("page_number")
                if page_num is not None:
                    page_numbers.add(page_num)
        # If we found page numbers in metadata, use that count
        if page_numbers:
            return len(page_numbers)
        # Otherwise, estimate: assume ~2000 chars per page
        total_content_length = sum(
            len(element.page_content) if hasattr(element, "page_content") else 0
            for element in elements
        )
        estimated_pages = max(1, total_content_length // 2000)
        return estimated_pages
    def estimate_pages_from_markdown(self, markdown_documents: list) -> int:
        """
        Estimate page count from markdown documents (for LlamaCloud).
        Args:
            markdown_documents: List of markdown document objects
        Returns:
            Estimated number of pages
        """
        # For LlamaCloud, if split_by_page=True was used, each doc is a page
        # Otherwise, estimate based on content length
        if not markdown_documents:
            return 1
        # Check if documents have page metadata
        total_pages = 0
        for doc in markdown_documents:
            if hasattr(doc, "metadata") and doc.metadata:
                # If metadata contains page info, use it
                page_num = doc.metadata.get("page", doc.metadata.get("page_number"))
                if page_num is not None:
                    total_pages += 1
                    continue
            # Otherwise estimate from content length
            content_length = len(doc.text) if hasattr(doc, "text") else 0
            estimated = max(1, content_length // 2000)
            total_pages += estimated
        return max(1, total_pages)
    def estimate_pages_from_content_length(self, content_length: int) -> int:
        """
        Estimate page count from content length (for Docling).
        Args:
            content_length: Length of the document content
        Returns:
            Estimated number of pages
        """
        # Estimate ~2000 characters per page
        return max(1, content_length // 2000)
    def estimate_pages_before_processing(self, file_path: str) -> int:
        """
        Estimate page count from file before processing (to avoid unnecessary API calls).
        This is called BEFORE sending to ETL services to prevent cost on rejected files.
        Args:
            file_path: Path to the file
        Returns:
            Estimated number of pages
        """
        if not os.path.exists(file_path):
            raise ValueError(f"File not found: {file_path}")
        file_ext = Path(file_path).suffix.lower()
        file_size = os.path.getsize(file_path)
        # PDF files - try to get actual page count
        if file_ext == ".pdf":
            try:
                import pypdf
                with open(file_path, "rb") as f:
                    pdf_reader = pypdf.PdfReader(f)
                    return len(pdf_reader.pages)
            except Exception:
                # If PDF reading fails, fall back to size estimation
                # Typical PDF: ~100KB per page (conservative estimate)
                return max(1, file_size // (100 * 1024))
        # Word Processing Documents
        # Microsoft Word, LibreOffice Writer, WordPerfect, Pages, etc.
        elif file_ext in [
            ".doc",
            ".docx",
            ".docm",
            ".dot",
            ".dotm",  # Microsoft Word
            ".odt",
            ".ott",
            ".sxw",
            ".stw",
            ".uot",  # OpenDocument/StarOffice Writer
            ".rtf",  # Rich Text Format
            ".pages",  # Apple Pages
            ".wpd",
            ".wps",  # WordPerfect, Microsoft Works
            ".abw",
            ".zabw",  # AbiWord
            ".cwk",
            ".hwp",
            ".lwp",
            ".mcw",
            ".mw",
            ".sdw",
            ".vor",  # Other word processors
        ]:
            # Typical word document: ~50KB per page (conservative)
            return max(1, file_size // (50 * 1024))
        # Presentation Documents
        # PowerPoint, Impress, Keynote, etc.
        elif file_ext in [
            ".ppt",
            ".pptx",
            ".pptm",
            ".pot",
            ".potx",  # Microsoft PowerPoint
            ".odp",
            ".otp",
            ".sxi",
            ".sti",
            ".uop",  # OpenDocument/StarOffice Impress
            ".key",  # Apple Keynote
            ".sda",
            ".sdd",
            ".sdp",  # StarOffice Draw/Impress
        ]:
            # Typical presentation: ~200KB per slide (conservative)
            return max(1, file_size // (200 * 1024))
        # Spreadsheet Documents
        # Excel, Calc, Numbers, Lotus, etc.
        elif file_ext in [
            ".xls",
            ".xlsx",
            ".xlsm",
            ".xlsb",
            ".xlw",
            ".xlr",  # Microsoft Excel
            ".ods",
            ".ots",
            ".fods",  # OpenDocument Spreadsheet
            ".numbers",  # Apple Numbers
            ".123",
            ".wk1",
            ".wk2",
            ".wk3",
            ".wk4",
            ".wks",  # Lotus 1-2-3
            ".wb1",
            ".wb2",
            ".wb3",
            ".wq1",
            ".wq2",  # Quattro Pro
            ".csv",
            ".tsv",
            ".slk",
            ".sylk",
            ".dif",
            ".dbf",
            ".prn",
            ".qpw",  # Data formats
            ".602",
            ".et",
            ".eth",  # Other spreadsheets
        ]:
            # Spreadsheets typically have 1 sheet = 1 page for ETL
            # Conservative: ~100KB per sheet
            return max(1, file_size // (100 * 1024))
        # E-books
        elif file_ext in [".epub"]:
            # E-books vary widely, estimate by size
            # Typical e-book: ~50KB per page
            return max(1, file_size // (50 * 1024))
        # Plain Text and Markup Files
        elif file_ext in [
            ".txt",
            ".log",  # Plain text
            ".md",
            ".markdown",  # Markdown
            ".htm",
            ".html",
            ".xml",  # Markup
        ]:
            # Plain text: ~3000 bytes per page
            return max(1, file_size // 3000)
        # Image Files
        # Each image is typically processed as 1 page
        elif file_ext in [
            ".jpg",
            ".jpeg",  # JPEG
            ".png",  # PNG
            ".gif",  # GIF
            ".bmp",  # Bitmap
            ".tiff",  # TIFF
            ".webp",  # WebP
            ".svg",  # SVG
            ".cgm",  # Computer Graphics Metafile
            ".odg",
            ".pbd",  # OpenDocument Graphics
        ]:
            # Each image = 1 page
            return 1
        # Audio Files (transcription = typically 1 page per minute)
        # Note: These should be handled by audio transcription flow, not ETL
        elif file_ext in [".mp3", ".m4a", ".wav", ".mpga"]:
            # Audio files: estimate based on duration
            # Fallback: ~1MB per minute of audio, 1 page per minute transcript
            return max(1, file_size // (1024 * 1024))
        # Video Files (typically not processed for pages, but just in case)
        elif file_ext in [".mp4", ".mpeg", ".webm"]:
            # Video files: very rough estimate
            # Typically wouldn't be page-based, but use conservative estimate
            return max(1, file_size // (5 * 1024 * 1024))
        # Other/Unknown Document Types
        else:
            # Conservative estimate: ~80KB per page
            # This catches: .sgl, .sxg, .uof, .uos1, .uos2, .web, and any future formats
            return max(1, file_size // (80 * 1024))
--- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py
@ -308,11 +308,24 @@ async def _process_file_upload(
                log_entry,
            )
        except Exception as e:
            # Import here to avoid circular dependencies
            from fastapi import HTTPException
            from app.services.page_limit_service import PageLimitExceededError
            # For page limit errors, use the detailed message from the exception
            if isinstance(e, PageLimitExceededError):
                error_message = str(e)
            elif isinstance(e, HTTPException) and "page limit" in str(e.detail).lower():
                error_message = str(e.detail)
            else:
                error_message = f"Failed to process file: {filename}"
            await task_logger.log_task_failure(
                log_entry,
-                f"Failed to process file: {filename}",
+                error_message,
                str(e),
                {"error_type": type(e).__name__},
            )
-            logger.error(f"Error processing file: {e!s}")
+            logger.error(error_message)
            raise
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@ -2,6 +2,7 @@
 File document processors for different ETL services (Unstructured, LlamaCloud, Docling).
 """
 import contextlib
 import logging
 from fastapi import HTTPException
@ -579,6 +580,67 @@ async def process_file_in_background(
                )
        else:
            # Import page limit service
            from app.services.page_limit_service import (
                PageLimitExceededError,
                PageLimitService,
            )
            # Initialize page limit service
            page_limit_service = PageLimitService(session)
            # CRITICAL: Estimate page count BEFORE making expensive ETL API calls
            # This prevents users from incurring costs on files that would exceed their limit
            try:
                estimated_pages_before = (
                    page_limit_service.estimate_pages_before_processing(file_path)
                )
            except Exception:
                # If estimation fails, use a conservative estimate based on file size
                import os
                file_size = os.path.getsize(file_path)
                estimated_pages_before = max(
                    1, file_size // (80 * 1024)
                )  # ~80KB per page
            await task_logger.log_task_progress(
                log_entry,
                f"Estimated {estimated_pages_before} pages for file: {filename}",
                {
                    "estimated_pages": estimated_pages_before,
                    "file_type": "document",
                },
            )
            # Check page limit BEFORE calling ETL service to avoid unnecessary costs
            try:
                await page_limit_service.check_page_limit(
                    user_id, estimated_pages_before
                )
            except PageLimitExceededError as e:
                await task_logger.log_task_failure(
                    log_entry,
                    f"Page limit exceeded before processing: {filename}",
                    str(e),
                    {
                        "error_type": "PageLimitExceeded",
                        "pages_used": e.pages_used,
                        "pages_limit": e.pages_limit,
                        "estimated_pages": estimated_pages_before,
                    },
                )
                # Clean up the temp file
                import os
                with contextlib.suppress(Exception):
                    os.unlink(file_path)
                raise HTTPException(
                    status_code=403,
                    detail=str(e),
                ) from e
            if app_config.ETL_SERVICE == "UNSTRUCTURED":
                await task_logger.log_task_progress(
                    log_entry,
@ -611,6 +673,24 @@ async def process_file_in_background(
                    {"processing_stage": "etl_complete", "elements_count": len(docs)},
                )
                # Verify actual page count from parsed documents
                actual_pages = page_limit_service.estimate_pages_from_elements(docs)
                # Use the higher of the two estimates for safety (in case pre-estimate was too low)
                final_page_count = max(estimated_pages_before, actual_pages)
                # If actual is significantly higher than estimate, log a warning
                if actual_pages > estimated_pages_before * 1.5:
                    await task_logger.log_task_progress(
                        log_entry,
                        f"Actual page count higher than estimate: {filename}",
                        {
                            "estimated_before": estimated_pages_before,
                            "actual_pages": actual_pages,
                            "using_count": final_page_count,
                        },
                    )
                # Clean up the temp file
                import os
@ -626,6 +706,12 @@ async def process_file_in_background(
                )
                if result:
                    # Update page usage after successful processing
                    # allow_exceed=True because document was already created after passing initial check
                    await page_limit_service.update_page_usage(
                        user_id, final_page_count, allow_exceed=True
                    )
                    await task_logger.log_task_success(
                        log_entry,
                        f"Successfully processed file with Unstructured: {filename}",
@ -634,6 +720,7 @@ async def process_file_in_background(
                            "content_hash": result.content_hash,
                            "file_type": "document",
                            "etl_service": "UNSTRUCTURED",
                            "pages_processed": final_page_count,
                        },
                    )
                else:
@ -696,6 +783,45 @@ async def process_file_in_background(
                    },
                )
                # Check if LlamaCloud returned any documents
                if not markdown_documents or len(markdown_documents) == 0:
                    await task_logger.log_task_failure(
                        log_entry,
                        f"LlamaCloud parsing returned no documents: {filename}",
                        "ETL service returned empty document list",
                        {
                            "error_type": "EmptyDocumentList",
                            "etl_service": "LLAMACLOUD",
                        },
                    )
                    raise ValueError(
                        f"LlamaCloud parsing returned no documents for {filename}"
                    )
                # Verify actual page count from parsed markdown documents
                actual_pages = page_limit_service.estimate_pages_from_markdown(
                    markdown_documents
                )
                # Use the higher of the two estimates for safety (in case pre-estimate was too low)
                final_page_count = max(estimated_pages_before, actual_pages)
                # If actual is significantly higher than estimate, log a warning
                if actual_pages > estimated_pages_before * 1.5:
                    await task_logger.log_task_progress(
                        log_entry,
                        f"Actual page count higher than estimate: {filename}",
                        {
                            "estimated_before": estimated_pages_before,
                            "actual_pages": actual_pages,
                            "using_count": final_page_count,
                        },
                    )
                # Track if any document was successfully created (not a duplicate)
                any_doc_created = False
                last_created_doc = None
                for doc in markdown_documents:
                    # Extract text content from the markdown documents
                    markdown_content = doc.text
@ -709,18 +835,34 @@ async def process_file_in_background(
                        user_id=user_id,
                    )
                    # Track if this document was successfully created
                    if doc_result:
                        any_doc_created = True
                        last_created_doc = doc_result
                # Update page usage once after processing all documents
                # Only update if at least one document was created (not all duplicates)
                if any_doc_created:
                    # Update page usage after successful processing
                    # allow_exceed=True because document was already created after passing initial check
                    await page_limit_service.update_page_usage(
                        user_id, final_page_count, allow_exceed=True
                    )
                    await task_logger.log_task_success(
                        log_entry,
                        f"Successfully processed file with LlamaCloud: {filename}",
                        {
-                            "document_id": doc_result.id,
+                            "document_id": last_created_doc.id,
-                            "content_hash": doc_result.content_hash,
+                            "content_hash": last_created_doc.content_hash,
                            "file_type": "document",
                            "etl_service": "LLAMACLOUD",
                            "pages_processed": final_page_count,
                            "documents_count": len(markdown_documents),
                        },
                    )
                else:
                    # All documents were duplicates (markdown_documents was not empty, but all returned None)
                    await task_logger.log_task_success(
                        log_entry,
                        f"Document already exists (duplicate): {filename}",
@ -728,6 +870,7 @@ async def process_file_in_background(
                            "duplicate_detected": True,
                            "file_type": "document",
                            "etl_service": "LLAMACLOUD",
                            "documents_count": len(markdown_documents),
                        },
                    )
@ -769,6 +912,26 @@ async def process_file_in_background(
                    },
                )
                # Verify actual page count from content length
                actual_pages = page_limit_service.estimate_pages_from_content_length(
                    len(result["content"])
                )
                # Use the higher of the two estimates for safety (in case pre-estimate was too low)
                final_page_count = max(estimated_pages_before, actual_pages)
                # If actual is significantly higher than estimate, log a warning
                if actual_pages > estimated_pages_before * 1.5:
                    await task_logger.log_task_progress(
                        log_entry,
                        f"Actual page count higher than estimate: {filename}",
                        {
                            "estimated_before": estimated_pages_before,
                            "actual_pages": actual_pages,
                            "using_count": final_page_count,
                        },
                    )
                # Process the document using our Docling background task
                doc_result = await add_received_file_document_using_docling(
                    session,
@ -779,6 +942,12 @@ async def process_file_in_background(
                )
                if doc_result:
                    # Update page usage after successful processing
                    # allow_exceed=True because document was already created after passing initial check
                    await page_limit_service.update_page_usage(
                        user_id, final_page_count, allow_exceed=True
                    )
                    await task_logger.log_task_success(
                        log_entry,
                        f"Successfully processed file with Docling: {filename}",
@ -787,6 +956,7 @@ async def process_file_in_background(
                            "content_hash": doc_result.content_hash,
                            "file_type": "document",
                            "etl_service": "DOCLING",
                            "pages_processed": final_page_count,
                        },
                    )
                else:
@ -801,13 +971,24 @@ async def process_file_in_background(
                    )
    except Exception as e:
        await session.rollback()
        # For page limit errors, use the detailed message from the exception
        from app.services.page_limit_service import PageLimitExceededError
        if isinstance(e, PageLimitExceededError):
            error_message = str(e)
        elif isinstance(e, HTTPException) and "page limit" in str(e.detail).lower():
            error_message = str(e.detail)
        else:
            error_message = f"Failed to process file: {filename}"
        await task_logger.log_task_failure(
            log_entry,
-            f"Failed to process file: {filename}",
+            error_message,
            str(e),
            {"error_type": type(e).__name__, "filename": filename},
        )
        import logging
-        logging.error(f"Error processing file in background: {e!s}")
+        logging.error(f"Error processing file in background: {error_message}")
        raise  # Re-raise so the wrapper can also handle it
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@ -26,6 +26,7 @@ dependencies = [
    "numpy>=1.24.0",
    "pgvector>=0.3.6",
    "playwright>=1.50.0",
    "pypdf>=5.1.0",
    "python-ffmpeg>=2.0.12",
    "rerankers[flashrank]>=0.7.1",
    "sentence-transformers>=3.4.1",
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@ -4423,8 +4423,10 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/2d/75/364847b879eb630b3ac8293798e380e441a957c53657995053c5ec39a316/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a", size = 4411159 },
    { url = "https://files.pythonhosted.org/packages/6f/a0/567f7ea38b6e1c62aafd58375665a547c00c608a471620c0edc364733e13/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e", size = 4468234 },
    { url = "https://files.pythonhosted.org/packages/30/da/4e42788fb811bbbfd7b7f045570c062f49e350e1d1f3df056c3fb5763353/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db", size = 4166236 },
    { url = "https://files.pythonhosted.org/packages/3c/94/c1777c355bc560992af848d98216148be5f1be001af06e06fc49cbded578/psycopg2_binary-2.9.11-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a1cf393f1cdaf6a9b57c0a719a1068ba1069f022a59b8b1fe44b006745b59757", size = 3983083 },
    { url = "https://files.pythonhosted.org/packages/bd/42/c9a21edf0e3daa7825ed04a4a8588686c6c14904344344a039556d78aa58/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3", size = 3652281 },
    { url = "https://files.pythonhosted.org/packages/12/22/dedfbcfa97917982301496b6b5e5e6c5531d1f35dd2b488b08d1ebc52482/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a", size = 3298010 },
    { url = "https://files.pythonhosted.org/packages/66/ea/d3390e6696276078bd01b2ece417deac954dfdd552d2edc3d03204416c0c/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:edcb3aeb11cb4bf13a2af3c53a15b3d612edeb6409047ea0b5d6a21a9d744b34", size = 3044641 },
    { url = "https://files.pythonhosted.org/packages/12/9a/0402ded6cbd321da0c0ba7d34dc12b29b14f5764c2fc10750daa38e825fc/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d", size = 3347940 },
    { url = "https://files.pythonhosted.org/packages/b1/d2/99b55e85832ccde77b211738ff3925a5d73ad183c0b37bcbbe5a8ff04978/psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:b33fabeb1fde21180479b2d4667e994de7bbf0eec22832ba5d9b5e4cf65b6c6d", size = 2714147 },
    { url = "https://files.pythonhosted.org/packages/ff/a8/a2709681b3ac11b0b1786def10006b8995125ba268c9a54bea6f5ae8bd3e/psycopg2_binary-2.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b8fb3db325435d34235b044b199e56cdf9ff41223a4b9752e8576465170bb38c", size = 3756572 },
@ -4432,8 +4434,10 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/11/32/b2ffe8f3853c181e88f0a157c5fb4e383102238d73c52ac6d93a5c8bffe6/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c55b385daa2f92cb64b12ec4536c66954ac53654c7f15a203578da4e78105c0", size = 4411242 },
    { url = "https://files.pythonhosted.org/packages/10/04/6ca7477e6160ae258dc96f67c371157776564679aefd247b66f4661501a2/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c0377174bf1dd416993d16edc15357f6eb17ac998244cca19bc67cdc0e2e5766", size = 4468258 },
    { url = "https://files.pythonhosted.org/packages/3c/7e/6a1a38f86412df101435809f225d57c1a021307dd0689f7a5e7fe83588b1/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c6ff3335ce08c75afaed19e08699e8aacf95d4a260b495a4a8545244fe2ceb3", size = 4166295 },
    { url = "https://files.pythonhosted.org/packages/f2/7d/c07374c501b45f3579a9eb761cbf2604ddef3d96ad48679112c2c5aa9c25/psycopg2_binary-2.9.11-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:84011ba3109e06ac412f95399b704d3d6950e386b7994475b231cf61eec2fc1f", size = 3983133 },
    { url = "https://files.pythonhosted.org/packages/82/56/993b7104cb8345ad7d4516538ccf8f0d0ac640b1ebd8c754a7b024e76878/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba34475ceb08cccbdd98f6b46916917ae6eeb92b5ae111df10b544c3a4621dc4", size = 3652383 },
    { url = "https://files.pythonhosted.org/packages/2d/ac/eaeb6029362fd8d454a27374d84c6866c82c33bfc24587b4face5a8e43ef/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b31e90fdd0f968c2de3b26ab014314fe814225b6c324f770952f7d38abf17e3c", size = 3298168 },
    { url = "https://files.pythonhosted.org/packages/2b/39/50c3facc66bded9ada5cbc0de867499a703dc6bca6be03070b4e3b65da6c/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d526864e0f67f74937a8fce859bd56c979f5e2ec57ca7c627f5f1071ef7fee60", size = 3044712 },
    { url = "https://files.pythonhosted.org/packages/9c/8e/b7de019a1f562f72ada81081a12823d3c1590bedc48d7d2559410a2763fe/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04195548662fa544626c8ea0f06561eb6203f1984ba5b4562764fbeb4c3d14b1", size = 3347549 },
    { url = "https://files.pythonhosted.org/packages/80/2d/1bb683f64737bbb1f86c82b7359db1eb2be4e2c0c13b947f80efefa7d3e5/psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:efff12b432179443f54e230fdf60de1f6cc726b6c832db8701227d089310e8aa", size = 2714215 },
    { url = "https://files.pythonhosted.org/packages/64/12/93ef0098590cf51d9732b4f139533732565704f45bdc1ffa741b7c95fb54/psycopg2_binary-2.9.11-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:92e3b669236327083a2e33ccfa0d320dd01b9803b3e14dd986a4fc54aa00f4e1", size = 3756567 },
@ -4441,8 +4445,10 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/13/1e/98874ce72fd29cbde93209977b196a2edae03f8490d1bd8158e7f1daf3a0/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b52a3f9bb540a3e4ec0f6ba6d31339727b2950c9772850d6545b7eae0b9d7c5", size = 4411646 },
    { url = "https://files.pythonhosted.org/packages/5a/bd/a335ce6645334fb8d758cc358810defca14a1d19ffbc8a10bd38a2328565/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:db4fd476874ccfdbb630a54426964959e58da4c61c9feba73e6094d51303d7d8", size = 4468701 },
    { url = "https://files.pythonhosted.org/packages/44/d6/c8b4f53f34e295e45709b7568bf9b9407a612ea30387d35eb9fa84f269b4/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47f212c1d3be608a12937cc131bd85502954398aaa1320cb4c14421a0ffccf4c", size = 4166293 },
    { url = "https://files.pythonhosted.org/packages/4b/e0/f8cc36eadd1b716ab36bb290618a3292e009867e5c97ce4aba908cb99644/psycopg2_binary-2.9.11-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e35b7abae2b0adab776add56111df1735ccc71406e56203515e228a8dc07089f", size = 3983184 },
    { url = "https://files.pythonhosted.org/packages/53/3e/2a8fe18a4e61cfb3417da67b6318e12691772c0696d79434184a511906dc/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fcf21be3ce5f5659daefd2b3b3b6e4727b028221ddc94e6c1523425579664747", size = 3652650 },
    { url = "https://files.pythonhosted.org/packages/76/36/03801461b31b29fe58d228c24388f999fe814dfc302856e0d17f97d7c54d/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:9bd81e64e8de111237737b29d68039b9c813bdf520156af36d26819c9a979e5f", size = 3298663 },
    { url = "https://files.pythonhosted.org/packages/97/77/21b0ea2e1a73aa5fa9222b2a6b8ba325c43c3a8d54272839c991f2345656/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:32770a4d666fbdafab017086655bcddab791d7cb260a16679cc5a7338b64343b", size = 3044737 },
    { url = "https://files.pythonhosted.org/packages/67/69/f36abe5f118c1dca6d3726ceae164b9356985805480731ac6712a63f24f0/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3cb3a676873d7506825221045bd70e0427c905b9c8ee8d6acd70cfcbd6e576d", size = 3347643 },
    { url = "https://files.pythonhosted.org/packages/e1/36/9c0c326fe3a4227953dfb29f5d0c8ae3b8eb8c1cd2967aa569f50cb3c61f/psycopg2_binary-2.9.11-cp314-cp314-win_amd64.whl", hash = "sha256:4012c9c954dfaccd28f94e84ab9f94e12df76b4afb22331b1f0d3154893a6316", size = 2803913 },
 ]
@ -5885,6 +5891,7 @@ dependencies = [
    { name = "numpy" },
    { name = "pgvector" },
    { name = "playwright" },
    { name = "pypdf" },
    { name = "python-ffmpeg" },
    { name = "redis" },
    { name = "rerankers", extra = ["flashrank"] },
@ -5937,6 +5944,7 @@ requires-dist = [
    { name = "numpy", specifier = ">=1.24.0" },
    { name = "pgvector", specifier = ">=0.3.6" },
    { name = "playwright", specifier = ">=1.50.0" },
    { name = "pypdf", specifier = ">=5.1.0" },
    { name = "python-ffmpeg", specifier = ">=2.0.12" },
    { name = "redis", specifier = ">=5.2.1" },
    { name = "rerankers", extras = ["flashrank"], specifier = ">=0.7.1" },