feat: added file limit tracking for a user

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-10-30 14:58:08 -07:00
parent 5654f6c78f
commit 4be9d099bf
7 changed files with 695 additions and 8 deletions

View file

@ -0,0 +1,76 @@
"""Add page limit fields to user table
Revision ID: 33
Revises: 32
Changes:
1. Add pages_limit column (Integer, default 500)
2. Add pages_used column (Integer, default 0)
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "33"
down_revision: str | None = "32"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Add page limit fields to user table."""
from sqlalchemy import inspect
conn = op.get_bind()
inspector = inspect(conn)
# Get existing columns
user_columns = [col["name"] for col in inspector.get_columns("user")]
# Add pages_limit column if it doesn't exist
if "pages_limit" not in user_columns:
op.add_column(
"user",
sa.Column(
"pages_limit",
sa.Integer(),
nullable=False,
server_default="500",
),
)
# Add pages_used column if it doesn't exist
if "pages_used" not in user_columns:
op.add_column(
"user",
sa.Column(
"pages_used",
sa.Integer(),
nullable=False,
server_default="0",
),
)
def downgrade() -> None:
"""Remove page limit fields from user table."""
from sqlalchemy import inspect
conn = op.get_bind()
inspector = inspect(conn)
# Get existing columns
user_columns = [col["name"] for col in inspector.get_columns("user")]
# Drop columns if they exist
if "pages_used" in user_columns:
op.drop_column("user", "pages_used")
if "pages_limit" in user_columns:
op.drop_column("user", "pages_limit")

View file

@ -81,7 +81,6 @@ class ChatType(str, Enum):
class LiteLLMProvider(str, Enum): class LiteLLMProvider(str, Enum):
""" """
Enum for LLM providers supported by LiteLLM. Enum for LLM providers supported by LiteLLM.
LiteLLM 支持的 LLM 提供商枚举
""" """
OPENAI = "OPENAI" OPENAI = "OPENAI"
@ -401,6 +400,10 @@ if config.AUTH_TYPE == "GOOGLE":
cascade="all, delete-orphan", cascade="all, delete-orphan",
) )
# Page usage tracking for ETL services
pages_limit = Column(Integer, nullable=False, default=500, server_default="500")
pages_used = Column(Integer, nullable=False, default=0, server_default="0")
else: else:
class User(SQLAlchemyBaseUserTableUUID, Base): class User(SQLAlchemyBaseUserTableUUID, Base):
@ -411,6 +414,10 @@ else:
cascade="all, delete-orphan", cascade="all, delete-orphan",
) )
# Page usage tracking for ETL services
pages_limit = Column(Integer, nullable=False, default=500, server_default="500")
pages_used = Column(Integer, nullable=False, default=0, server_default="0")
engine = create_async_engine(DATABASE_URL) engine = create_async_engine(DATABASE_URL)
async_session_maker = async_sessionmaker(engine, expire_on_commit=False) async_session_maker = async_sessionmaker(engine, expire_on_commit=False)

View file

@ -0,0 +1,401 @@
"""
Service for managing user page limits for ETL services.
"""
import os
from pathlib import Path
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
class PageLimitExceededError(Exception):
"""
Exception raised when a user exceeds their page processing limit.
"""
def __init__(
self,
message: str = "Page limit exceeded. Please contact admin to increase limits for your account.",
pages_used: int = 0,
pages_limit: int = 0,
pages_to_add: int = 0,
):
self.pages_used = pages_used
self.pages_limit = pages_limit
self.pages_to_add = pages_to_add
super().__init__(message)
class PageLimitService:
"""Service for checking and updating user page limits."""
def __init__(self, session: AsyncSession):
self.session = session
async def check_page_limit(
self, user_id: str, estimated_pages: int = 1
) -> tuple[bool, int, int]:
"""
Check if user has enough pages remaining for processing.
Args:
user_id: The user's ID
estimated_pages: Estimated number of pages to be processed
Returns:
Tuple of (has_capacity, pages_used, pages_limit)
Raises:
PageLimitExceededError: If user would exceed their page limit
"""
from app.db import User
# Get user's current page usage
result = await self.session.execute(
select(User.pages_used, User.pages_limit).where(User.id == user_id)
)
row = result.first()
if not row:
raise ValueError(f"User with ID {user_id} not found")
pages_used, pages_limit = row
# Check if adding estimated pages would exceed limit
if pages_used + estimated_pages > pages_limit:
raise PageLimitExceededError(
message=f"Processing this document would exceed your page limit. "
f"Used: {pages_used}/{pages_limit} pages. "
f"Document has approximately {estimated_pages} page(s). "
f"Please contact admin to increase limits for your account.",
pages_used=pages_used,
pages_limit=pages_limit,
pages_to_add=estimated_pages,
)
return True, pages_used, pages_limit
async def update_page_usage(
self, user_id: str, pages_to_add: int, allow_exceed: bool = False
) -> int:
"""
Update user's page usage after successful processing.
Args:
user_id: The user's ID
pages_to_add: Number of pages to add to usage
allow_exceed: If True, allows update even if it exceeds limit
(used when document was already processed after passing initial check)
Returns:
New total pages_used value
Raises:
PageLimitExceededError: If adding pages would exceed limit and allow_exceed is False
"""
from app.db import User
# Get user
result = await self.session.execute(select(User).where(User.id == user_id))
user = result.scalar_one_or_none()
if not user:
raise ValueError(f"User with ID {user_id} not found")
# Check if this would exceed limit (only if allow_exceed is False)
new_usage = user.pages_used + pages_to_add
if not allow_exceed and new_usage > user.pages_limit:
raise PageLimitExceededError(
message=f"Cannot update page usage. Would exceed limit. "
f"Current: {user.pages_used}/{user.pages_limit}, "
f"Trying to add: {pages_to_add}",
pages_used=user.pages_used,
pages_limit=user.pages_limit,
pages_to_add=pages_to_add,
)
# Update usage
user.pages_used = new_usage
await self.session.commit()
await self.session.refresh(user)
return user.pages_used
async def get_page_usage(self, user_id: str) -> tuple[int, int]:
"""
Get user's current page usage and limit.
Args:
user_id: The user's ID
Returns:
Tuple of (pages_used, pages_limit)
"""
from app.db import User
result = await self.session.execute(
select(User.pages_used, User.pages_limit).where(User.id == user_id)
)
row = result.first()
if not row:
raise ValueError(f"User with ID {user_id} not found")
return row
def estimate_pages_from_elements(self, elements: list) -> int:
"""
Estimate page count from document elements (for Unstructured).
Args:
elements: List of document elements
Returns:
Estimated number of pages
"""
# For Unstructured, we can count unique page numbers in metadata
# or estimate based on content length
page_numbers = set()
for element in elements:
# Try to get page number from metadata
if hasattr(element, "metadata") and element.metadata:
page_num = element.metadata.get("page_number")
if page_num is not None:
page_numbers.add(page_num)
# If we found page numbers in metadata, use that count
if page_numbers:
return len(page_numbers)
# Otherwise, estimate: assume ~2000 chars per page
total_content_length = sum(
len(element.page_content) if hasattr(element, "page_content") else 0
for element in elements
)
estimated_pages = max(1, total_content_length // 2000)
return estimated_pages
def estimate_pages_from_markdown(self, markdown_documents: list) -> int:
"""
Estimate page count from markdown documents (for LlamaCloud).
Args:
markdown_documents: List of markdown document objects
Returns:
Estimated number of pages
"""
# For LlamaCloud, if split_by_page=True was used, each doc is a page
# Otherwise, estimate based on content length
if not markdown_documents:
return 1
# Check if documents have page metadata
total_pages = 0
for doc in markdown_documents:
if hasattr(doc, "metadata") and doc.metadata:
# If metadata contains page info, use it
page_num = doc.metadata.get("page", doc.metadata.get("page_number"))
if page_num is not None:
total_pages += 1
continue
# Otherwise estimate from content length
content_length = len(doc.text) if hasattr(doc, "text") else 0
estimated = max(1, content_length // 2000)
total_pages += estimated
return max(1, total_pages)
def estimate_pages_from_content_length(self, content_length: int) -> int:
"""
Estimate page count from content length (for Docling).
Args:
content_length: Length of the document content
Returns:
Estimated number of pages
"""
# Estimate ~2000 characters per page
return max(1, content_length // 2000)
def estimate_pages_before_processing(self, file_path: str) -> int:
"""
Estimate page count from file before processing (to avoid unnecessary API calls).
This is called BEFORE sending to ETL services to prevent cost on rejected files.
Args:
file_path: Path to the file
Returns:
Estimated number of pages
"""
if not os.path.exists(file_path):
raise ValueError(f"File not found: {file_path}")
file_ext = Path(file_path).suffix.lower()
file_size = os.path.getsize(file_path)
# PDF files - try to get actual page count
if file_ext == ".pdf":
try:
import pypdf
with open(file_path, "rb") as f:
pdf_reader = pypdf.PdfReader(f)
return len(pdf_reader.pages)
except Exception:
# If PDF reading fails, fall back to size estimation
# Typical PDF: ~100KB per page (conservative estimate)
return max(1, file_size // (100 * 1024))
# Word Processing Documents
# Microsoft Word, LibreOffice Writer, WordPerfect, Pages, etc.
elif file_ext in [
".doc",
".docx",
".docm",
".dot",
".dotm", # Microsoft Word
".odt",
".ott",
".sxw",
".stw",
".uot", # OpenDocument/StarOffice Writer
".rtf", # Rich Text Format
".pages", # Apple Pages
".wpd",
".wps", # WordPerfect, Microsoft Works
".abw",
".zabw", # AbiWord
".cwk",
".hwp",
".lwp",
".mcw",
".mw",
".sdw",
".vor", # Other word processors
]:
# Typical word document: ~50KB per page (conservative)
return max(1, file_size // (50 * 1024))
# Presentation Documents
# PowerPoint, Impress, Keynote, etc.
elif file_ext in [
".ppt",
".pptx",
".pptm",
".pot",
".potx", # Microsoft PowerPoint
".odp",
".otp",
".sxi",
".sti",
".uop", # OpenDocument/StarOffice Impress
".key", # Apple Keynote
".sda",
".sdd",
".sdp", # StarOffice Draw/Impress
]:
# Typical presentation: ~200KB per slide (conservative)
return max(1, file_size // (200 * 1024))
# Spreadsheet Documents
# Excel, Calc, Numbers, Lotus, etc.
elif file_ext in [
".xls",
".xlsx",
".xlsm",
".xlsb",
".xlw",
".xlr", # Microsoft Excel
".ods",
".ots",
".fods", # OpenDocument Spreadsheet
".numbers", # Apple Numbers
".123",
".wk1",
".wk2",
".wk3",
".wk4",
".wks", # Lotus 1-2-3
".wb1",
".wb2",
".wb3",
".wq1",
".wq2", # Quattro Pro
".csv",
".tsv",
".slk",
".sylk",
".dif",
".dbf",
".prn",
".qpw", # Data formats
".602",
".et",
".eth", # Other spreadsheets
]:
# Spreadsheets typically have 1 sheet = 1 page for ETL
# Conservative: ~100KB per sheet
return max(1, file_size // (100 * 1024))
# E-books
elif file_ext in [".epub"]:
# E-books vary widely, estimate by size
# Typical e-book: ~50KB per page
return max(1, file_size // (50 * 1024))
# Plain Text and Markup Files
elif file_ext in [
".txt",
".log", # Plain text
".md",
".markdown", # Markdown
".htm",
".html",
".xml", # Markup
]:
# Plain text: ~3000 bytes per page
return max(1, file_size // 3000)
# Image Files
# Each image is typically processed as 1 page
elif file_ext in [
".jpg",
".jpeg", # JPEG
".png", # PNG
".gif", # GIF
".bmp", # Bitmap
".tiff", # TIFF
".webp", # WebP
".svg", # SVG
".cgm", # Computer Graphics Metafile
".odg",
".pbd", # OpenDocument Graphics
]:
# Each image = 1 page
return 1
# Audio Files (transcription = typically 1 page per minute)
# Note: These should be handled by audio transcription flow, not ETL
elif file_ext in [".mp3", ".m4a", ".wav", ".mpga"]:
# Audio files: estimate based on duration
# Fallback: ~1MB per minute of audio, 1 page per minute transcript
return max(1, file_size // (1024 * 1024))
# Video Files (typically not processed for pages, but just in case)
elif file_ext in [".mp4", ".mpeg", ".webm"]:
# Video files: very rough estimate
# Typically wouldn't be page-based, but use conservative estimate
return max(1, file_size // (5 * 1024 * 1024))
# Other/Unknown Document Types
else:
# Conservative estimate: ~80KB per page
# This catches: .sgl, .sxg, .uof, .uos1, .uos2, .web, and any future formats
return max(1, file_size // (80 * 1024))

View file

@ -308,11 +308,24 @@ async def _process_file_upload(
log_entry, log_entry,
) )
except Exception as e: except Exception as e:
# Import here to avoid circular dependencies
from fastapi import HTTPException
from app.services.page_limit_service import PageLimitExceededError
# For page limit errors, use the detailed message from the exception
if isinstance(e, PageLimitExceededError):
error_message = str(e)
elif isinstance(e, HTTPException) and "page limit" in str(e.detail).lower():
error_message = str(e.detail)
else:
error_message = f"Failed to process file: {filename}"
await task_logger.log_task_failure( await task_logger.log_task_failure(
log_entry, log_entry,
f"Failed to process file: {filename}", error_message,
str(e), str(e),
{"error_type": type(e).__name__}, {"error_type": type(e).__name__},
) )
logger.error(f"Error processing file: {e!s}") logger.error(error_message)
raise raise

View file

@ -2,6 +2,7 @@
File document processors for different ETL services (Unstructured, LlamaCloud, Docling). File document processors for different ETL services (Unstructured, LlamaCloud, Docling).
""" """
import contextlib
import logging import logging
from fastapi import HTTPException from fastapi import HTTPException
@ -579,6 +580,67 @@ async def process_file_in_background(
) )
else: else:
# Import page limit service
from app.services.page_limit_service import (
PageLimitExceededError,
PageLimitService,
)
# Initialize page limit service
page_limit_service = PageLimitService(session)
# CRITICAL: Estimate page count BEFORE making expensive ETL API calls
# This prevents users from incurring costs on files that would exceed their limit
try:
estimated_pages_before = (
page_limit_service.estimate_pages_before_processing(file_path)
)
except Exception:
# If estimation fails, use a conservative estimate based on file size
import os
file_size = os.path.getsize(file_path)
estimated_pages_before = max(
1, file_size // (80 * 1024)
) # ~80KB per page
await task_logger.log_task_progress(
log_entry,
f"Estimated {estimated_pages_before} pages for file: {filename}",
{
"estimated_pages": estimated_pages_before,
"file_type": "document",
},
)
# Check page limit BEFORE calling ETL service to avoid unnecessary costs
try:
await page_limit_service.check_page_limit(
user_id, estimated_pages_before
)
except PageLimitExceededError as e:
await task_logger.log_task_failure(
log_entry,
f"Page limit exceeded before processing: {filename}",
str(e),
{
"error_type": "PageLimitExceeded",
"pages_used": e.pages_used,
"pages_limit": e.pages_limit,
"estimated_pages": estimated_pages_before,
},
)
# Clean up the temp file
import os
with contextlib.suppress(Exception):
os.unlink(file_path)
raise HTTPException(
status_code=403,
detail=str(e),
) from e
if app_config.ETL_SERVICE == "UNSTRUCTURED": if app_config.ETL_SERVICE == "UNSTRUCTURED":
await task_logger.log_task_progress( await task_logger.log_task_progress(
log_entry, log_entry,
@ -611,6 +673,24 @@ async def process_file_in_background(
{"processing_stage": "etl_complete", "elements_count": len(docs)}, {"processing_stage": "etl_complete", "elements_count": len(docs)},
) )
# Verify actual page count from parsed documents
actual_pages = page_limit_service.estimate_pages_from_elements(docs)
# Use the higher of the two estimates for safety (in case pre-estimate was too low)
final_page_count = max(estimated_pages_before, actual_pages)
# If actual is significantly higher than estimate, log a warning
if actual_pages > estimated_pages_before * 1.5:
await task_logger.log_task_progress(
log_entry,
f"Actual page count higher than estimate: {filename}",
{
"estimated_before": estimated_pages_before,
"actual_pages": actual_pages,
"using_count": final_page_count,
},
)
# Clean up the temp file # Clean up the temp file
import os import os
@ -626,6 +706,12 @@ async def process_file_in_background(
) )
if result: if result:
# Update page usage after successful processing
# allow_exceed=True because document was already created after passing initial check
await page_limit_service.update_page_usage(
user_id, final_page_count, allow_exceed=True
)
await task_logger.log_task_success( await task_logger.log_task_success(
log_entry, log_entry,
f"Successfully processed file with Unstructured: {filename}", f"Successfully processed file with Unstructured: {filename}",
@ -634,6 +720,7 @@ async def process_file_in_background(
"content_hash": result.content_hash, "content_hash": result.content_hash,
"file_type": "document", "file_type": "document",
"etl_service": "UNSTRUCTURED", "etl_service": "UNSTRUCTURED",
"pages_processed": final_page_count,
}, },
) )
else: else:
@ -696,6 +783,45 @@ async def process_file_in_background(
}, },
) )
# Check if LlamaCloud returned any documents
if not markdown_documents or len(markdown_documents) == 0:
await task_logger.log_task_failure(
log_entry,
f"LlamaCloud parsing returned no documents: {filename}",
"ETL service returned empty document list",
{
"error_type": "EmptyDocumentList",
"etl_service": "LLAMACLOUD",
},
)
raise ValueError(
f"LlamaCloud parsing returned no documents for {filename}"
)
# Verify actual page count from parsed markdown documents
actual_pages = page_limit_service.estimate_pages_from_markdown(
markdown_documents
)
# Use the higher of the two estimates for safety (in case pre-estimate was too low)
final_page_count = max(estimated_pages_before, actual_pages)
# If actual is significantly higher than estimate, log a warning
if actual_pages > estimated_pages_before * 1.5:
await task_logger.log_task_progress(
log_entry,
f"Actual page count higher than estimate: {filename}",
{
"estimated_before": estimated_pages_before,
"actual_pages": actual_pages,
"using_count": final_page_count,
},
)
# Track if any document was successfully created (not a duplicate)
any_doc_created = False
last_created_doc = None
for doc in markdown_documents: for doc in markdown_documents:
# Extract text content from the markdown documents # Extract text content from the markdown documents
markdown_content = doc.text markdown_content = doc.text
@ -709,18 +835,34 @@ async def process_file_in_background(
user_id=user_id, user_id=user_id,
) )
# Track if this document was successfully created
if doc_result: if doc_result:
any_doc_created = True
last_created_doc = doc_result
# Update page usage once after processing all documents
# Only update if at least one document was created (not all duplicates)
if any_doc_created:
# Update page usage after successful processing
# allow_exceed=True because document was already created after passing initial check
await page_limit_service.update_page_usage(
user_id, final_page_count, allow_exceed=True
)
await task_logger.log_task_success( await task_logger.log_task_success(
log_entry, log_entry,
f"Successfully processed file with LlamaCloud: {filename}", f"Successfully processed file with LlamaCloud: {filename}",
{ {
"document_id": doc_result.id, "document_id": last_created_doc.id,
"content_hash": doc_result.content_hash, "content_hash": last_created_doc.content_hash,
"file_type": "document", "file_type": "document",
"etl_service": "LLAMACLOUD", "etl_service": "LLAMACLOUD",
"pages_processed": final_page_count,
"documents_count": len(markdown_documents),
}, },
) )
else: else:
# All documents were duplicates (markdown_documents was not empty, but all returned None)
await task_logger.log_task_success( await task_logger.log_task_success(
log_entry, log_entry,
f"Document already exists (duplicate): {filename}", f"Document already exists (duplicate): {filename}",
@ -728,6 +870,7 @@ async def process_file_in_background(
"duplicate_detected": True, "duplicate_detected": True,
"file_type": "document", "file_type": "document",
"etl_service": "LLAMACLOUD", "etl_service": "LLAMACLOUD",
"documents_count": len(markdown_documents),
}, },
) )
@ -769,6 +912,26 @@ async def process_file_in_background(
}, },
) )
# Verify actual page count from content length
actual_pages = page_limit_service.estimate_pages_from_content_length(
len(result["content"])
)
# Use the higher of the two estimates for safety (in case pre-estimate was too low)
final_page_count = max(estimated_pages_before, actual_pages)
# If actual is significantly higher than estimate, log a warning
if actual_pages > estimated_pages_before * 1.5:
await task_logger.log_task_progress(
log_entry,
f"Actual page count higher than estimate: {filename}",
{
"estimated_before": estimated_pages_before,
"actual_pages": actual_pages,
"using_count": final_page_count,
},
)
# Process the document using our Docling background task # Process the document using our Docling background task
doc_result = await add_received_file_document_using_docling( doc_result = await add_received_file_document_using_docling(
session, session,
@ -779,6 +942,12 @@ async def process_file_in_background(
) )
if doc_result: if doc_result:
# Update page usage after successful processing
# allow_exceed=True because document was already created after passing initial check
await page_limit_service.update_page_usage(
user_id, final_page_count, allow_exceed=True
)
await task_logger.log_task_success( await task_logger.log_task_success(
log_entry, log_entry,
f"Successfully processed file with Docling: {filename}", f"Successfully processed file with Docling: {filename}",
@ -787,6 +956,7 @@ async def process_file_in_background(
"content_hash": doc_result.content_hash, "content_hash": doc_result.content_hash,
"file_type": "document", "file_type": "document",
"etl_service": "DOCLING", "etl_service": "DOCLING",
"pages_processed": final_page_count,
}, },
) )
else: else:
@ -801,13 +971,24 @@ async def process_file_in_background(
) )
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
# For page limit errors, use the detailed message from the exception
from app.services.page_limit_service import PageLimitExceededError
if isinstance(e, PageLimitExceededError):
error_message = str(e)
elif isinstance(e, HTTPException) and "page limit" in str(e.detail).lower():
error_message = str(e.detail)
else:
error_message = f"Failed to process file: {filename}"
await task_logger.log_task_failure( await task_logger.log_task_failure(
log_entry, log_entry,
f"Failed to process file: {filename}", error_message,
str(e), str(e),
{"error_type": type(e).__name__, "filename": filename}, {"error_type": type(e).__name__, "filename": filename},
) )
import logging import logging
logging.error(f"Error processing file in background: {e!s}") logging.error(f"Error processing file in background: {error_message}")
raise # Re-raise so the wrapper can also handle it raise # Re-raise so the wrapper can also handle it

View file

@ -26,6 +26,7 @@ dependencies = [
"numpy>=1.24.0", "numpy>=1.24.0",
"pgvector>=0.3.6", "pgvector>=0.3.6",
"playwright>=1.50.0", "playwright>=1.50.0",
"pypdf>=5.1.0",
"python-ffmpeg>=2.0.12", "python-ffmpeg>=2.0.12",
"rerankers[flashrank]>=0.7.1", "rerankers[flashrank]>=0.7.1",
"sentence-transformers>=3.4.1", "sentence-transformers>=3.4.1",

View file

@ -4423,8 +4423,10 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/2d/75/364847b879eb630b3ac8293798e380e441a957c53657995053c5ec39a316/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a", size = 4411159 }, { url = "https://files.pythonhosted.org/packages/2d/75/364847b879eb630b3ac8293798e380e441a957c53657995053c5ec39a316/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a", size = 4411159 },
{ url = "https://files.pythonhosted.org/packages/6f/a0/567f7ea38b6e1c62aafd58375665a547c00c608a471620c0edc364733e13/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e", size = 4468234 }, { url = "https://files.pythonhosted.org/packages/6f/a0/567f7ea38b6e1c62aafd58375665a547c00c608a471620c0edc364733e13/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e", size = 4468234 },
{ url = "https://files.pythonhosted.org/packages/30/da/4e42788fb811bbbfd7b7f045570c062f49e350e1d1f3df056c3fb5763353/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db", size = 4166236 }, { url = "https://files.pythonhosted.org/packages/30/da/4e42788fb811bbbfd7b7f045570c062f49e350e1d1f3df056c3fb5763353/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db", size = 4166236 },
{ url = "https://files.pythonhosted.org/packages/3c/94/c1777c355bc560992af848d98216148be5f1be001af06e06fc49cbded578/psycopg2_binary-2.9.11-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a1cf393f1cdaf6a9b57c0a719a1068ba1069f022a59b8b1fe44b006745b59757", size = 3983083 },
{ url = "https://files.pythonhosted.org/packages/bd/42/c9a21edf0e3daa7825ed04a4a8588686c6c14904344344a039556d78aa58/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3", size = 3652281 }, { url = "https://files.pythonhosted.org/packages/bd/42/c9a21edf0e3daa7825ed04a4a8588686c6c14904344344a039556d78aa58/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3", size = 3652281 },
{ url = "https://files.pythonhosted.org/packages/12/22/dedfbcfa97917982301496b6b5e5e6c5531d1f35dd2b488b08d1ebc52482/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a", size = 3298010 }, { url = "https://files.pythonhosted.org/packages/12/22/dedfbcfa97917982301496b6b5e5e6c5531d1f35dd2b488b08d1ebc52482/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a", size = 3298010 },
{ url = "https://files.pythonhosted.org/packages/66/ea/d3390e6696276078bd01b2ece417deac954dfdd552d2edc3d03204416c0c/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:edcb3aeb11cb4bf13a2af3c53a15b3d612edeb6409047ea0b5d6a21a9d744b34", size = 3044641 },
{ url = "https://files.pythonhosted.org/packages/12/9a/0402ded6cbd321da0c0ba7d34dc12b29b14f5764c2fc10750daa38e825fc/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d", size = 3347940 }, { url = "https://files.pythonhosted.org/packages/12/9a/0402ded6cbd321da0c0ba7d34dc12b29b14f5764c2fc10750daa38e825fc/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d", size = 3347940 },
{ url = "https://files.pythonhosted.org/packages/b1/d2/99b55e85832ccde77b211738ff3925a5d73ad183c0b37bcbbe5a8ff04978/psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:b33fabeb1fde21180479b2d4667e994de7bbf0eec22832ba5d9b5e4cf65b6c6d", size = 2714147 }, { url = "https://files.pythonhosted.org/packages/b1/d2/99b55e85832ccde77b211738ff3925a5d73ad183c0b37bcbbe5a8ff04978/psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:b33fabeb1fde21180479b2d4667e994de7bbf0eec22832ba5d9b5e4cf65b6c6d", size = 2714147 },
{ url = "https://files.pythonhosted.org/packages/ff/a8/a2709681b3ac11b0b1786def10006b8995125ba268c9a54bea6f5ae8bd3e/psycopg2_binary-2.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b8fb3db325435d34235b044b199e56cdf9ff41223a4b9752e8576465170bb38c", size = 3756572 }, { url = "https://files.pythonhosted.org/packages/ff/a8/a2709681b3ac11b0b1786def10006b8995125ba268c9a54bea6f5ae8bd3e/psycopg2_binary-2.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b8fb3db325435d34235b044b199e56cdf9ff41223a4b9752e8576465170bb38c", size = 3756572 },
@ -4432,8 +4434,10 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/11/32/b2ffe8f3853c181e88f0a157c5fb4e383102238d73c52ac6d93a5c8bffe6/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c55b385daa2f92cb64b12ec4536c66954ac53654c7f15a203578da4e78105c0", size = 4411242 }, { url = "https://files.pythonhosted.org/packages/11/32/b2ffe8f3853c181e88f0a157c5fb4e383102238d73c52ac6d93a5c8bffe6/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c55b385daa2f92cb64b12ec4536c66954ac53654c7f15a203578da4e78105c0", size = 4411242 },
{ url = "https://files.pythonhosted.org/packages/10/04/6ca7477e6160ae258dc96f67c371157776564679aefd247b66f4661501a2/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c0377174bf1dd416993d16edc15357f6eb17ac998244cca19bc67cdc0e2e5766", size = 4468258 }, { url = "https://files.pythonhosted.org/packages/10/04/6ca7477e6160ae258dc96f67c371157776564679aefd247b66f4661501a2/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c0377174bf1dd416993d16edc15357f6eb17ac998244cca19bc67cdc0e2e5766", size = 4468258 },
{ url = "https://files.pythonhosted.org/packages/3c/7e/6a1a38f86412df101435809f225d57c1a021307dd0689f7a5e7fe83588b1/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c6ff3335ce08c75afaed19e08699e8aacf95d4a260b495a4a8545244fe2ceb3", size = 4166295 }, { url = "https://files.pythonhosted.org/packages/3c/7e/6a1a38f86412df101435809f225d57c1a021307dd0689f7a5e7fe83588b1/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c6ff3335ce08c75afaed19e08699e8aacf95d4a260b495a4a8545244fe2ceb3", size = 4166295 },
{ url = "https://files.pythonhosted.org/packages/f2/7d/c07374c501b45f3579a9eb761cbf2604ddef3d96ad48679112c2c5aa9c25/psycopg2_binary-2.9.11-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:84011ba3109e06ac412f95399b704d3d6950e386b7994475b231cf61eec2fc1f", size = 3983133 },
{ url = "https://files.pythonhosted.org/packages/82/56/993b7104cb8345ad7d4516538ccf8f0d0ac640b1ebd8c754a7b024e76878/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba34475ceb08cccbdd98f6b46916917ae6eeb92b5ae111df10b544c3a4621dc4", size = 3652383 }, { url = "https://files.pythonhosted.org/packages/82/56/993b7104cb8345ad7d4516538ccf8f0d0ac640b1ebd8c754a7b024e76878/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba34475ceb08cccbdd98f6b46916917ae6eeb92b5ae111df10b544c3a4621dc4", size = 3652383 },
{ url = "https://files.pythonhosted.org/packages/2d/ac/eaeb6029362fd8d454a27374d84c6866c82c33bfc24587b4face5a8e43ef/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b31e90fdd0f968c2de3b26ab014314fe814225b6c324f770952f7d38abf17e3c", size = 3298168 }, { url = "https://files.pythonhosted.org/packages/2d/ac/eaeb6029362fd8d454a27374d84c6866c82c33bfc24587b4face5a8e43ef/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b31e90fdd0f968c2de3b26ab014314fe814225b6c324f770952f7d38abf17e3c", size = 3298168 },
{ url = "https://files.pythonhosted.org/packages/2b/39/50c3facc66bded9ada5cbc0de867499a703dc6bca6be03070b4e3b65da6c/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d526864e0f67f74937a8fce859bd56c979f5e2ec57ca7c627f5f1071ef7fee60", size = 3044712 },
{ url = "https://files.pythonhosted.org/packages/9c/8e/b7de019a1f562f72ada81081a12823d3c1590bedc48d7d2559410a2763fe/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04195548662fa544626c8ea0f06561eb6203f1984ba5b4562764fbeb4c3d14b1", size = 3347549 }, { url = "https://files.pythonhosted.org/packages/9c/8e/b7de019a1f562f72ada81081a12823d3c1590bedc48d7d2559410a2763fe/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04195548662fa544626c8ea0f06561eb6203f1984ba5b4562764fbeb4c3d14b1", size = 3347549 },
{ url = "https://files.pythonhosted.org/packages/80/2d/1bb683f64737bbb1f86c82b7359db1eb2be4e2c0c13b947f80efefa7d3e5/psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:efff12b432179443f54e230fdf60de1f6cc726b6c832db8701227d089310e8aa", size = 2714215 }, { url = "https://files.pythonhosted.org/packages/80/2d/1bb683f64737bbb1f86c82b7359db1eb2be4e2c0c13b947f80efefa7d3e5/psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:efff12b432179443f54e230fdf60de1f6cc726b6c832db8701227d089310e8aa", size = 2714215 },
{ url = "https://files.pythonhosted.org/packages/64/12/93ef0098590cf51d9732b4f139533732565704f45bdc1ffa741b7c95fb54/psycopg2_binary-2.9.11-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:92e3b669236327083a2e33ccfa0d320dd01b9803b3e14dd986a4fc54aa00f4e1", size = 3756567 }, { url = "https://files.pythonhosted.org/packages/64/12/93ef0098590cf51d9732b4f139533732565704f45bdc1ffa741b7c95fb54/psycopg2_binary-2.9.11-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:92e3b669236327083a2e33ccfa0d320dd01b9803b3e14dd986a4fc54aa00f4e1", size = 3756567 },
@ -4441,8 +4445,10 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/13/1e/98874ce72fd29cbde93209977b196a2edae03f8490d1bd8158e7f1daf3a0/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b52a3f9bb540a3e4ec0f6ba6d31339727b2950c9772850d6545b7eae0b9d7c5", size = 4411646 }, { url = "https://files.pythonhosted.org/packages/13/1e/98874ce72fd29cbde93209977b196a2edae03f8490d1bd8158e7f1daf3a0/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b52a3f9bb540a3e4ec0f6ba6d31339727b2950c9772850d6545b7eae0b9d7c5", size = 4411646 },
{ url = "https://files.pythonhosted.org/packages/5a/bd/a335ce6645334fb8d758cc358810defca14a1d19ffbc8a10bd38a2328565/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:db4fd476874ccfdbb630a54426964959e58da4c61c9feba73e6094d51303d7d8", size = 4468701 }, { url = "https://files.pythonhosted.org/packages/5a/bd/a335ce6645334fb8d758cc358810defca14a1d19ffbc8a10bd38a2328565/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:db4fd476874ccfdbb630a54426964959e58da4c61c9feba73e6094d51303d7d8", size = 4468701 },
{ url = "https://files.pythonhosted.org/packages/44/d6/c8b4f53f34e295e45709b7568bf9b9407a612ea30387d35eb9fa84f269b4/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47f212c1d3be608a12937cc131bd85502954398aaa1320cb4c14421a0ffccf4c", size = 4166293 }, { url = "https://files.pythonhosted.org/packages/44/d6/c8b4f53f34e295e45709b7568bf9b9407a612ea30387d35eb9fa84f269b4/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47f212c1d3be608a12937cc131bd85502954398aaa1320cb4c14421a0ffccf4c", size = 4166293 },
{ url = "https://files.pythonhosted.org/packages/4b/e0/f8cc36eadd1b716ab36bb290618a3292e009867e5c97ce4aba908cb99644/psycopg2_binary-2.9.11-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e35b7abae2b0adab776add56111df1735ccc71406e56203515e228a8dc07089f", size = 3983184 },
{ url = "https://files.pythonhosted.org/packages/53/3e/2a8fe18a4e61cfb3417da67b6318e12691772c0696d79434184a511906dc/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fcf21be3ce5f5659daefd2b3b3b6e4727b028221ddc94e6c1523425579664747", size = 3652650 }, { url = "https://files.pythonhosted.org/packages/53/3e/2a8fe18a4e61cfb3417da67b6318e12691772c0696d79434184a511906dc/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fcf21be3ce5f5659daefd2b3b3b6e4727b028221ddc94e6c1523425579664747", size = 3652650 },
{ url = "https://files.pythonhosted.org/packages/76/36/03801461b31b29fe58d228c24388f999fe814dfc302856e0d17f97d7c54d/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:9bd81e64e8de111237737b29d68039b9c813bdf520156af36d26819c9a979e5f", size = 3298663 }, { url = "https://files.pythonhosted.org/packages/76/36/03801461b31b29fe58d228c24388f999fe814dfc302856e0d17f97d7c54d/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:9bd81e64e8de111237737b29d68039b9c813bdf520156af36d26819c9a979e5f", size = 3298663 },
{ url = "https://files.pythonhosted.org/packages/97/77/21b0ea2e1a73aa5fa9222b2a6b8ba325c43c3a8d54272839c991f2345656/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:32770a4d666fbdafab017086655bcddab791d7cb260a16679cc5a7338b64343b", size = 3044737 },
{ url = "https://files.pythonhosted.org/packages/67/69/f36abe5f118c1dca6d3726ceae164b9356985805480731ac6712a63f24f0/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3cb3a676873d7506825221045bd70e0427c905b9c8ee8d6acd70cfcbd6e576d", size = 3347643 }, { url = "https://files.pythonhosted.org/packages/67/69/f36abe5f118c1dca6d3726ceae164b9356985805480731ac6712a63f24f0/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3cb3a676873d7506825221045bd70e0427c905b9c8ee8d6acd70cfcbd6e576d", size = 3347643 },
{ url = "https://files.pythonhosted.org/packages/e1/36/9c0c326fe3a4227953dfb29f5d0c8ae3b8eb8c1cd2967aa569f50cb3c61f/psycopg2_binary-2.9.11-cp314-cp314-win_amd64.whl", hash = "sha256:4012c9c954dfaccd28f94e84ab9f94e12df76b4afb22331b1f0d3154893a6316", size = 2803913 }, { url = "https://files.pythonhosted.org/packages/e1/36/9c0c326fe3a4227953dfb29f5d0c8ae3b8eb8c1cd2967aa569f50cb3c61f/psycopg2_binary-2.9.11-cp314-cp314-win_amd64.whl", hash = "sha256:4012c9c954dfaccd28f94e84ab9f94e12df76b4afb22331b1f0d3154893a6316", size = 2803913 },
] ]
@ -5885,6 +5891,7 @@ dependencies = [
{ name = "numpy" }, { name = "numpy" },
{ name = "pgvector" }, { name = "pgvector" },
{ name = "playwright" }, { name = "playwright" },
{ name = "pypdf" },
{ name = "python-ffmpeg" }, { name = "python-ffmpeg" },
{ name = "redis" }, { name = "redis" },
{ name = "rerankers", extra = ["flashrank"] }, { name = "rerankers", extra = ["flashrank"] },
@ -5937,6 +5944,7 @@ requires-dist = [
{ name = "numpy", specifier = ">=1.24.0" }, { name = "numpy", specifier = ">=1.24.0" },
{ name = "pgvector", specifier = ">=0.3.6" }, { name = "pgvector", specifier = ">=0.3.6" },
{ name = "playwright", specifier = ">=1.50.0" }, { name = "playwright", specifier = ">=1.50.0" },
{ name = "pypdf", specifier = ">=5.1.0" },
{ name = "python-ffmpeg", specifier = ">=2.0.12" }, { name = "python-ffmpeg", specifier = ">=2.0.12" },
{ name = "redis", specifier = ">=5.2.1" }, { name = "redis", specifier = ">=5.2.1" },
{ name = "rerankers", extras = ["flashrank"], specifier = ">=0.7.1" }, { name = "rerankers", extras = ["flashrank"], specifier = ">=0.7.1" },