mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 17:26:23 +02:00
feat: Implement file upload limits and page limit enforcement in backend
- Added constants for maximum files per upload, per-file size, and total upload size. - Enhanced document upload route to validate file counts and sizes, returning appropriate HTTP errors. - Introduced end-to-end tests for upload limits and page limit enforcement, ensuring correct behavior under various scenarios. - Updated test helpers to support notification retrieval for page limit exceeded scenarios.
This commit is contained in:
parent
93c0af475b
commit
a57ab02900
7 changed files with 628 additions and 2 deletions
318
surfsense_backend/tests/e2e/test_page_limits.py
Normal file
318
surfsense_backend/tests/e2e/test_page_limits.py
Normal file
|
|
@ -0,0 +1,318 @@
|
|||
"""
|
||||
End-to-end tests for page-limit enforcement during document upload.
|
||||
|
||||
These tests manipulate the test user's ``pages_used`` / ``pages_limit``
|
||||
columns directly in the database and then exercise the upload pipeline to
|
||||
verify that:
|
||||
|
||||
- Uploads are rejected *before* ETL when the limit is exhausted.
|
||||
- ``pages_used`` increases after a successful upload.
|
||||
- A ``page_limit_exceeded`` notification is created on rejection.
|
||||
- ``pages_used`` is not modified when a document fails processing.
|
||||
|
||||
All tests reuse the existing small fixtures (``sample.pdf``, ``sample.txt``)
|
||||
so no additional processing time is introduced.
|
||||
|
||||
Prerequisites (must be running):
|
||||
- FastAPI backend
|
||||
- PostgreSQL + pgvector
|
||||
- Redis
|
||||
- Celery worker
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from tests.utils.helpers import (
|
||||
get_notifications,
|
||||
poll_document_status,
|
||||
upload_file,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.page_limit
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test A: Successful upload increments pages_used
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPageUsageIncrementsOnSuccess:
|
||||
"""After a successful PDF upload the user's ``pages_used`` must grow."""
|
||||
|
||||
async def test_pages_used_increases_after_pdf_upload(
|
||||
self,
|
||||
client: httpx.AsyncClient,
|
||||
headers: dict[str, str],
|
||||
search_space_id: int,
|
||||
cleanup_doc_ids: list[int],
|
||||
page_limits,
|
||||
):
|
||||
await page_limits.set(pages_used=0, pages_limit=1000)
|
||||
|
||||
resp = await upload_file(
|
||||
client, headers, "sample.pdf", search_space_id=search_space_id
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
doc_ids = resp.json()["document_ids"]
|
||||
cleanup_doc_ids.extend(doc_ids)
|
||||
|
||||
statuses = await poll_document_status(
|
||||
client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0
|
||||
)
|
||||
for did in doc_ids:
|
||||
assert statuses[did]["status"]["state"] == "ready"
|
||||
|
||||
used, _ = await page_limits.get()
|
||||
assert used > 0, "pages_used should have increased after successful processing"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test B: Upload rejected when page limit is fully exhausted
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestUploadRejectedWhenLimitExhausted:
|
||||
"""
|
||||
When ``pages_used == pages_limit`` (zero remaining) the document
|
||||
should reach ``failed`` status with a page-limit reason.
|
||||
"""
|
||||
|
||||
async def test_pdf_fails_when_no_pages_remaining(
|
||||
self,
|
||||
client: httpx.AsyncClient,
|
||||
headers: dict[str, str],
|
||||
search_space_id: int,
|
||||
cleanup_doc_ids: list[int],
|
||||
page_limits,
|
||||
):
|
||||
await page_limits.set(pages_used=100, pages_limit=100)
|
||||
|
||||
resp = await upload_file(
|
||||
client, headers, "sample.pdf", search_space_id=search_space_id
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
doc_ids = resp.json()["document_ids"]
|
||||
cleanup_doc_ids.extend(doc_ids)
|
||||
|
||||
statuses = await poll_document_status(
|
||||
client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0
|
||||
)
|
||||
for did in doc_ids:
|
||||
assert statuses[did]["status"]["state"] == "failed"
|
||||
reason = statuses[did]["status"].get("reason", "").lower()
|
||||
assert "page limit" in reason, (
|
||||
f"Expected 'page limit' in failure reason, got: {reason!r}"
|
||||
)
|
||||
|
||||
async def test_pages_used_unchanged_after_limit_rejection(
|
||||
self,
|
||||
client: httpx.AsyncClient,
|
||||
headers: dict[str, str],
|
||||
search_space_id: int,
|
||||
cleanup_doc_ids: list[int],
|
||||
page_limits,
|
||||
):
|
||||
await page_limits.set(pages_used=50, pages_limit=50)
|
||||
|
||||
resp = await upload_file(
|
||||
client, headers, "sample.pdf", search_space_id=search_space_id
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
doc_ids = resp.json()["document_ids"]
|
||||
cleanup_doc_ids.extend(doc_ids)
|
||||
|
||||
await poll_document_status(
|
||||
client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0
|
||||
)
|
||||
|
||||
used, _ = await page_limits.get()
|
||||
assert used == 50, (
|
||||
f"pages_used should remain 50 after rejected upload, got {used}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test C: Page-limit notification is created on rejection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPageLimitNotification:
|
||||
"""A ``page_limit_exceeded`` notification must be created when upload
|
||||
is rejected due to the limit."""
|
||||
|
||||
async def test_page_limit_exceeded_notification_created(
|
||||
self,
|
||||
client: httpx.AsyncClient,
|
||||
headers: dict[str, str],
|
||||
search_space_id: int,
|
||||
cleanup_doc_ids: list[int],
|
||||
page_limits,
|
||||
):
|
||||
await page_limits.set(pages_used=100, pages_limit=100)
|
||||
|
||||
resp = await upload_file(
|
||||
client, headers, "sample.pdf", search_space_id=search_space_id
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
doc_ids = resp.json()["document_ids"]
|
||||
cleanup_doc_ids.extend(doc_ids)
|
||||
|
||||
await poll_document_status(
|
||||
client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0
|
||||
)
|
||||
|
||||
notifications = await get_notifications(
|
||||
client,
|
||||
headers,
|
||||
type_filter="page_limit_exceeded",
|
||||
search_space_id=search_space_id,
|
||||
)
|
||||
assert len(notifications) >= 1, (
|
||||
"Expected at least one page_limit_exceeded notification"
|
||||
)
|
||||
|
||||
latest = notifications[0]
|
||||
assert "page limit" in latest["title"].lower() or "page limit" in latest["message"].lower(), (
|
||||
f"Notification should mention page limit: title={latest['title']!r}, "
|
||||
f"message={latest['message']!r}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test D: Successful upload creates a completed document_processing notification
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDocumentProcessingNotification:
|
||||
"""A ``document_processing`` notification with ``completed`` status must
|
||||
exist after a successful upload."""
|
||||
|
||||
async def test_processing_completed_notification_exists(
|
||||
self,
|
||||
client: httpx.AsyncClient,
|
||||
headers: dict[str, str],
|
||||
search_space_id: int,
|
||||
cleanup_doc_ids: list[int],
|
||||
page_limits,
|
||||
):
|
||||
await page_limits.set(pages_used=0, pages_limit=1000)
|
||||
|
||||
resp = await upload_file(
|
||||
client, headers, "sample.txt", search_space_id=search_space_id
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
doc_ids = resp.json()["document_ids"]
|
||||
cleanup_doc_ids.extend(doc_ids)
|
||||
|
||||
await poll_document_status(
|
||||
client, headers, doc_ids, search_space_id=search_space_id
|
||||
)
|
||||
|
||||
notifications = await get_notifications(
|
||||
client,
|
||||
headers,
|
||||
type_filter="document_processing",
|
||||
search_space_id=search_space_id,
|
||||
)
|
||||
completed = [n for n in notifications if n.get("metadata", {}).get("processing_stage") == "completed"]
|
||||
assert len(completed) >= 1, (
|
||||
"Expected at least one document_processing notification with 'completed' stage"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test E: pages_used unchanged when a document fails for non-limit reasons
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPagesUnchangedOnProcessingFailure:
|
||||
"""If a document fails during ETL (e.g. empty/corrupt file) rather than
|
||||
a page-limit rejection, ``pages_used`` should remain unchanged."""
|
||||
|
||||
async def test_pages_used_stable_on_etl_failure(
|
||||
self,
|
||||
client: httpx.AsyncClient,
|
||||
headers: dict[str, str],
|
||||
search_space_id: int,
|
||||
cleanup_doc_ids: list[int],
|
||||
page_limits,
|
||||
):
|
||||
await page_limits.set(pages_used=10, pages_limit=1000)
|
||||
|
||||
resp = await upload_file(
|
||||
client, headers, "empty.pdf", search_space_id=search_space_id
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
doc_ids = resp.json()["document_ids"]
|
||||
cleanup_doc_ids.extend(doc_ids)
|
||||
|
||||
if doc_ids:
|
||||
statuses = await poll_document_status(
|
||||
client, headers, doc_ids, search_space_id=search_space_id, timeout=120.0
|
||||
)
|
||||
for did in doc_ids:
|
||||
assert statuses[did]["status"]["state"] == "failed"
|
||||
|
||||
used, _ = await page_limits.get()
|
||||
assert used == 10, (
|
||||
f"pages_used should remain 10 after ETL failure, got {used}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test F: Second upload rejected after first consumes remaining quota
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSecondUploadExceedsLimit:
|
||||
"""Upload one PDF successfully, consuming the quota, then verify a
|
||||
second upload is rejected."""
|
||||
|
||||
async def test_second_upload_rejected_after_quota_consumed(
|
||||
self,
|
||||
client: httpx.AsyncClient,
|
||||
headers: dict[str, str],
|
||||
search_space_id: int,
|
||||
cleanup_doc_ids: list[int],
|
||||
page_limits,
|
||||
):
|
||||
# Give just enough room for one ~1-page PDF
|
||||
await page_limits.set(pages_used=0, pages_limit=1)
|
||||
|
||||
resp1 = await upload_file(
|
||||
client, headers, "sample.pdf", search_space_id=search_space_id
|
||||
)
|
||||
assert resp1.status_code == 200
|
||||
first_ids = resp1.json()["document_ids"]
|
||||
cleanup_doc_ids.extend(first_ids)
|
||||
|
||||
statuses1 = await poll_document_status(
|
||||
client, headers, first_ids, search_space_id=search_space_id, timeout=300.0
|
||||
)
|
||||
for did in first_ids:
|
||||
assert statuses1[did]["status"]["state"] == "ready"
|
||||
|
||||
# Second upload — should fail because quota is now consumed
|
||||
resp2 = await upload_file(
|
||||
client,
|
||||
headers,
|
||||
"sample.pdf",
|
||||
search_space_id=search_space_id,
|
||||
filename_override="sample_copy.pdf",
|
||||
)
|
||||
assert resp2.status_code == 200
|
||||
second_ids = resp2.json()["document_ids"]
|
||||
cleanup_doc_ids.extend(second_ids)
|
||||
|
||||
statuses2 = await poll_document_status(
|
||||
client, headers, second_ids, search_space_id=search_space_id, timeout=300.0
|
||||
)
|
||||
for did in second_ids:
|
||||
assert statuses2[did]["status"]["state"] == "failed"
|
||||
reason = statuses2[did]["status"].get("reason", "").lower()
|
||||
assert "page limit" in reason, (
|
||||
f"Expected 'page limit' in failure reason, got: {reason!r}"
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue