feat: implement page limit checks in local folder indexing to manage user page usage

This commit is contained in:
Anish Sarkar 2026-04-03 19:13:25 +05:30
parent 5068a6b4f3
commit 9c0af6569d
2 changed files with 303 additions and 4 deletions

View file

@ -959,3 +959,222 @@ class TestDirectConvert:
assert "| name" in doc.source_markdown
assert "name,age,city" not in doc.source_markdown
# ====================================================================
# Tier 8: Page Limits (PL1-PL6)
# ====================================================================
class TestPageLimits:
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
async def test_pl1_full_scan_increments_pages_used(
self,
db_session: AsyncSession,
db_user: User,
db_search_space: SearchSpace,
tmp_path: Path,
):
"""PL1: Successful full-scan sync increments user.pages_used."""
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
db_user.pages_used = 0
db_user.pages_limit = 500
await db_session.flush()
(tmp_path / "note.md").write_text("# Hello World\n\nContent here.")
count, _skipped, _root_folder_id, err = await index_local_folder(
session=db_session,
search_space_id=db_search_space.id,
user_id=str(db_user.id),
folder_path=str(tmp_path),
folder_name="test-folder",
)
assert err is None
assert count == 1
await db_session.refresh(db_user)
assert db_user.pages_used > 0, "pages_used should increase after indexing"
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
async def test_pl2_full_scan_blocked_when_limit_exhausted(
self,
db_session: AsyncSession,
db_user: User,
db_search_space: SearchSpace,
tmp_path: Path,
):
"""PL2: Full-scan skips file when page limit is exhausted."""
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
db_user.pages_used = 100
db_user.pages_limit = 100
await db_session.flush()
(tmp_path / "note.md").write_text("# Hello World\n\nContent here.")
count, _skipped, _root_folder_id, err = await index_local_folder(
session=db_session,
search_space_id=db_search_space.id,
user_id=str(db_user.id),
folder_path=str(tmp_path),
folder_name="test-folder",
)
assert count == 0
await db_session.refresh(db_user)
assert db_user.pages_used == 100, "pages_used should not change on rejection"
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
async def test_pl3_single_file_increments_pages_used(
self,
db_session: AsyncSession,
db_user: User,
db_search_space: SearchSpace,
tmp_path: Path,
):
"""PL3: Single-file mode increments user.pages_used on success."""
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
db_user.pages_used = 0
db_user.pages_limit = 500
await db_session.flush()
(tmp_path / "note.md").write_text("# Hello World\n\nContent here.")
count, _skipped, _root_folder_id, err = await index_local_folder(
session=db_session,
search_space_id=db_search_space.id,
user_id=str(db_user.id),
folder_path=str(tmp_path),
folder_name="test-folder",
target_file_paths=[str(tmp_path / "note.md")],
)
assert err is None
assert count == 1
await db_session.refresh(db_user)
assert db_user.pages_used > 0, "pages_used should increase after indexing"
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
async def test_pl4_single_file_blocked_when_limit_exhausted(
self,
db_session: AsyncSession,
db_user: User,
db_search_space: SearchSpace,
tmp_path: Path,
):
"""PL4: Single-file mode skips file when page limit is exhausted."""
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
db_user.pages_used = 100
db_user.pages_limit = 100
await db_session.flush()
(tmp_path / "note.md").write_text("# Hello World\n\nContent here.")
count, _skipped, _root_folder_id, err = await index_local_folder(
session=db_session,
search_space_id=db_search_space.id,
user_id=str(db_user.id),
folder_path=str(tmp_path),
folder_name="test-folder",
target_file_paths=[str(tmp_path / "note.md")],
)
assert count == 0
assert err is not None
assert "page limit" in err.lower()
await db_session.refresh(db_user)
assert db_user.pages_used == 100, "pages_used should not change on rejection"
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
async def test_pl5_unchanged_resync_no_extra_pages(
self,
db_session: AsyncSession,
db_user: User,
db_search_space: SearchSpace,
tmp_path: Path,
):
"""PL5: Re-syncing an unchanged file does not consume additional pages."""
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
db_user.pages_used = 0
db_user.pages_limit = 500
await db_session.flush()
(tmp_path / "note.md").write_text("# Hello\n\nSame content.")
count1, _, root_folder_id, _ = await index_local_folder(
session=db_session,
search_space_id=db_search_space.id,
user_id=str(db_user.id),
folder_path=str(tmp_path),
folder_name="test-folder",
)
assert count1 == 1
await db_session.refresh(db_user)
pages_after_first = db_user.pages_used
assert pages_after_first > 0
count2, _, _, _ = await index_local_folder(
session=db_session,
search_space_id=db_search_space.id,
user_id=str(db_user.id),
folder_path=str(tmp_path),
folder_name="test-folder",
root_folder_id=root_folder_id,
)
assert count2 == 0
await db_session.refresh(db_user)
assert db_user.pages_used == pages_after_first, (
"pages_used should not increase for unchanged files"
)
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
async def test_pl6_batch_partial_page_limit_exhaustion(
self,
db_session: AsyncSession,
db_user: User,
db_search_space: SearchSpace,
tmp_path: Path,
patched_batch_sessions,
):
"""PL6: Batch mode with a very low page limit: some files succeed, rest fail."""
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
db_user.pages_used = 0
db_user.pages_limit = 1
await db_session.flush()
(tmp_path / "a.md").write_text("File A content")
(tmp_path / "b.md").write_text("File B content")
(tmp_path / "c.md").write_text("File C content")
count, failed, _root_folder_id, _err = await index_local_folder(
session=db_session,
search_space_id=db_search_space.id,
user_id=str(db_user.id),
folder_path=str(tmp_path),
folder_name="test-folder",
target_file_paths=[
str(tmp_path / "a.md"),
str(tmp_path / "b.md"),
str(tmp_path / "c.md"),
],
)
assert count >= 1, "at least one file should succeed"
assert failed >= 1, "at least one file should fail due to page limit"
assert count + failed == 3
await db_session.refresh(db_user)
assert db_user.pages_used > 0
assert db_user.pages_used <= db_user.pages_limit + 1