mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-28 21:49:40 +02:00
chore: ran linting
This commit is contained in:
parent
6ace8850bb
commit
746c730b2e
31 changed files with 801 additions and 660 deletions
|
|
@ -977,15 +977,19 @@ async def get_watched_folders(
|
|||
)
|
||||
|
||||
folders = (
|
||||
await session.execute(
|
||||
select(Folder).where(
|
||||
Folder.search_space_id == search_space_id,
|
||||
Folder.parent_id.is_(None),
|
||||
Folder.folder_metadata.isnot(None),
|
||||
Folder.folder_metadata["watched"].astext == "true",
|
||||
(
|
||||
await session.execute(
|
||||
select(Folder).where(
|
||||
Folder.search_space_id == search_space_id,
|
||||
Folder.parent_id.is_(None),
|
||||
Folder.folder_metadata.isnot(None),
|
||||
Folder.folder_metadata["watched"].astext == "true",
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
return folders
|
||||
|
||||
|
|
@ -1265,15 +1269,21 @@ async def list_document_versions(
|
|||
if not document:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_READ.value)
|
||||
await check_permission(
|
||||
session, user, document.search_space_id, Permission.DOCUMENTS_READ.value
|
||||
)
|
||||
|
||||
versions = (
|
||||
await session.execute(
|
||||
select(DocumentVersion)
|
||||
.where(DocumentVersion.document_id == document_id)
|
||||
.order_by(DocumentVersion.version_number.desc())
|
||||
(
|
||||
await session.execute(
|
||||
select(DocumentVersion)
|
||||
.where(DocumentVersion.document_id == document_id)
|
||||
.order_by(DocumentVersion.version_number.desc())
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
|
|
@ -1300,7 +1310,9 @@ async def get_document_version(
|
|||
if not document:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_READ.value)
|
||||
await check_permission(
|
||||
session, user, document.search_space_id, Permission.DOCUMENTS_READ.value
|
||||
)
|
||||
|
||||
version = (
|
||||
await session.execute(
|
||||
|
|
@ -1331,14 +1343,14 @@ async def restore_document_version(
|
|||
):
|
||||
"""Restore a previous version: snapshot current state, then overwrite document content."""
|
||||
document = (
|
||||
await session.execute(
|
||||
select(Document).where(Document.id == document_id)
|
||||
)
|
||||
await session.execute(select(Document).where(Document.id == document_id))
|
||||
).scalar_one_or_none()
|
||||
if not document:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
await check_permission(session, user, document.search_space_id, Permission.DOCUMENTS_UPDATE.value)
|
||||
await check_permission(
|
||||
session, user, document.search_space_id, Permission.DOCUMENTS_UPDATE.value
|
||||
)
|
||||
|
||||
version = (
|
||||
await session.execute(
|
||||
|
|
@ -1363,6 +1375,7 @@ async def restore_document_version(
|
|||
await session.commit()
|
||||
|
||||
from app.tasks.celery_tasks.document_reindex_tasks import reindex_document_task
|
||||
|
||||
reindex_document_task.delay(document_id, str(user.id))
|
||||
|
||||
return {
|
||||
|
|
@ -1430,9 +1443,7 @@ async def folder_index(
|
|||
root_folder_id = request.root_folder_id
|
||||
if root_folder_id:
|
||||
existing = (
|
||||
await session.execute(
|
||||
select(Folder).where(Folder.id == root_folder_id)
|
||||
)
|
||||
await session.execute(select(Folder).where(Folder.id == root_folder_id))
|
||||
).scalar_one_or_none()
|
||||
if not existing:
|
||||
root_folder_id = None
|
||||
|
|
@ -1492,7 +1503,9 @@ async def folder_index_files(
|
|||
)
|
||||
|
||||
if not request.target_file_paths:
|
||||
raise HTTPException(status_code=400, detail="target_file_paths must not be empty")
|
||||
raise HTTPException(
|
||||
status_code=400, detail="target_file_paths must not be empty"
|
||||
)
|
||||
|
||||
await check_permission(
|
||||
session,
|
||||
|
|
@ -1507,11 +1520,11 @@ async def folder_index_files(
|
|||
for fp in request.target_file_paths:
|
||||
try:
|
||||
Path(fp).relative_to(request.folder_path)
|
||||
except ValueError:
|
||||
except ValueError as err:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"target_file_path {fp} must be inside folder_path",
|
||||
)
|
||||
) from err
|
||||
|
||||
from app.tasks.celery_tasks.document_tasks import index_local_folder_task
|
||||
|
||||
|
|
@ -1530,5 +1543,3 @@ async def folder_index_files(
|
|||
"status": "processing",
|
||||
"file_count": len(request.target_file_paths),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -129,7 +129,11 @@ async def get_editor_content(
|
|||
|
||||
if not chunk_contents:
|
||||
doc_status = document.status or {}
|
||||
state = doc_status.get("state", "ready") if isinstance(doc_status, dict) else "ready"
|
||||
state = (
|
||||
doc_status.get("state", "ready")
|
||||
if isinstance(doc_status, dict)
|
||||
else "ready"
|
||||
)
|
||||
if state in ("pending", "processing"):
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ Non-OAuth connectors (BookStack, GitHub, etc.) are limited to one per search spa
|
|||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from contextlib import suppress
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from typing import Any
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
"""Pydantic schemas for folder CRUD, move, and reorder operations."""
|
||||
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
|
@ -36,7 +35,9 @@ class FolderRead(BaseModel):
|
|||
created_by_id: UUID | None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
metadata: dict[str, Any] | None = Field(default=None, validation_alias="folder_metadata")
|
||||
metadata: dict[str, Any] | None = Field(
|
||||
default=None, validation_alias="folder_metadata"
|
||||
)
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
"""Celery tasks for document processing."""
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
from uuid import UUID
|
||||
|
|
@ -1337,9 +1338,7 @@ async def _index_local_folder_async(
|
|||
)
|
||||
notification_id = notification.id
|
||||
_start_heartbeat(notification_id)
|
||||
heartbeat_task = asyncio.create_task(
|
||||
_run_heartbeat_loop(notification_id)
|
||||
)
|
||||
heartbeat_task = asyncio.create_task(_run_heartbeat_loop(notification_id))
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to create notification for local folder indexing",
|
||||
|
|
@ -1349,18 +1348,16 @@ async def _index_local_folder_async(
|
|||
async def _heartbeat_progress(completed_count: int) -> None:
|
||||
"""Refresh heartbeat and optionally update notification progress."""
|
||||
if notification:
|
||||
try:
|
||||
with contextlib.suppress(Exception):
|
||||
await NotificationService.document_processing.notify_processing_progress(
|
||||
session=session,
|
||||
notification=notification,
|
||||
stage="indexing",
|
||||
stage_message=f"Syncing files ({completed_count}/{file_count or '?'})",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
indexed, skipped_or_failed, _rfid, err = await index_local_folder(
|
||||
_indexed, _skipped_or_failed, _rfid, err = await index_local_folder(
|
||||
session=session,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
|
|
@ -1371,7 +1368,9 @@ async def _index_local_folder_async(
|
|||
root_folder_id=root_folder_id,
|
||||
enable_summary=enable_summary,
|
||||
target_file_paths=target_file_paths,
|
||||
on_heartbeat_callback=_heartbeat_progress if (is_batch or is_full_scan) else None,
|
||||
on_heartbeat_callback=_heartbeat_progress
|
||||
if (is_batch or is_full_scan)
|
||||
else None,
|
||||
)
|
||||
|
||||
if notification:
|
||||
|
|
|
|||
|
|
@ -43,30 +43,110 @@ from .base import (
|
|||
logger,
|
||||
)
|
||||
|
||||
PLAINTEXT_EXTENSIONS = frozenset({
|
||||
".md", ".markdown", ".txt", ".text", ".csv", ".tsv",
|
||||
".json", ".jsonl", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf",
|
||||
".xml", ".html", ".htm", ".css", ".scss", ".less", ".sass",
|
||||
".py", ".pyw", ".pyi", ".pyx",
|
||||
".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs",
|
||||
".java", ".kt", ".kts", ".scala", ".groovy",
|
||||
".c", ".h", ".cpp", ".cxx", ".cc", ".hpp", ".hxx",
|
||||
".cs", ".fs", ".fsx",
|
||||
".go", ".rs", ".rb", ".php", ".pl", ".pm", ".lua",
|
||||
".swift", ".m", ".mm",
|
||||
".r", ".R", ".jl",
|
||||
".sh", ".bash", ".zsh", ".fish", ".bat", ".cmd", ".ps1",
|
||||
".sql", ".graphql", ".gql",
|
||||
".env", ".gitignore", ".dockerignore", ".editorconfig",
|
||||
".makefile", ".cmake",
|
||||
".log", ".rst", ".tex", ".bib", ".org", ".adoc", ".asciidoc",
|
||||
".vue", ".svelte", ".astro",
|
||||
".tf", ".hcl", ".proto",
|
||||
})
|
||||
PLAINTEXT_EXTENSIONS = frozenset(
|
||||
{
|
||||
".md",
|
||||
".markdown",
|
||||
".txt",
|
||||
".text",
|
||||
".csv",
|
||||
".tsv",
|
||||
".json",
|
||||
".jsonl",
|
||||
".yaml",
|
||||
".yml",
|
||||
".toml",
|
||||
".ini",
|
||||
".cfg",
|
||||
".conf",
|
||||
".xml",
|
||||
".html",
|
||||
".htm",
|
||||
".css",
|
||||
".scss",
|
||||
".less",
|
||||
".sass",
|
||||
".py",
|
||||
".pyw",
|
||||
".pyi",
|
||||
".pyx",
|
||||
".js",
|
||||
".jsx",
|
||||
".ts",
|
||||
".tsx",
|
||||
".mjs",
|
||||
".cjs",
|
||||
".java",
|
||||
".kt",
|
||||
".kts",
|
||||
".scala",
|
||||
".groovy",
|
||||
".c",
|
||||
".h",
|
||||
".cpp",
|
||||
".cxx",
|
||||
".cc",
|
||||
".hpp",
|
||||
".hxx",
|
||||
".cs",
|
||||
".fs",
|
||||
".fsx",
|
||||
".go",
|
||||
".rs",
|
||||
".rb",
|
||||
".php",
|
||||
".pl",
|
||||
".pm",
|
||||
".lua",
|
||||
".swift",
|
||||
".m",
|
||||
".mm",
|
||||
".r",
|
||||
".R",
|
||||
".jl",
|
||||
".sh",
|
||||
".bash",
|
||||
".zsh",
|
||||
".fish",
|
||||
".bat",
|
||||
".cmd",
|
||||
".ps1",
|
||||
".sql",
|
||||
".graphql",
|
||||
".gql",
|
||||
".env",
|
||||
".gitignore",
|
||||
".dockerignore",
|
||||
".editorconfig",
|
||||
".makefile",
|
||||
".cmake",
|
||||
".log",
|
||||
".rst",
|
||||
".tex",
|
||||
".bib",
|
||||
".org",
|
||||
".adoc",
|
||||
".asciidoc",
|
||||
".vue",
|
||||
".svelte",
|
||||
".astro",
|
||||
".tf",
|
||||
".hcl",
|
||||
".proto",
|
||||
}
|
||||
)
|
||||
|
||||
AUDIO_EXTENSIONS = frozenset({
|
||||
".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm",
|
||||
})
|
||||
AUDIO_EXTENSIONS = frozenset(
|
||||
{
|
||||
".mp3",
|
||||
".mp4",
|
||||
".mpeg",
|
||||
".mpga",
|
||||
".m4a",
|
||||
".wav",
|
||||
".webm",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _is_plaintext_file(filename: str) -> bool:
|
||||
|
|
@ -81,6 +161,7 @@ def _needs_etl(filename: str) -> bool:
|
|||
"""File is not plaintext and not audio — requires ETL service to parse."""
|
||||
return not _is_plaintext_file(filename) and not _is_audio_file(filename)
|
||||
|
||||
|
||||
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||
|
||||
DEFAULT_EXCLUDE_PATTERNS = [
|
||||
|
|
@ -121,9 +202,7 @@ def scan_folder(
|
|||
for dirpath, dirnames, filenames in os.walk(root):
|
||||
rel_dir = Path(dirpath).relative_to(root)
|
||||
|
||||
dirnames[:] = [
|
||||
d for d in dirnames if d not in exclude_patterns
|
||||
]
|
||||
dirnames[:] = [d for d in dirnames if d not in exclude_patterns]
|
||||
|
||||
if any(part in exclude_patterns for part in rel_dir.parts):
|
||||
continue
|
||||
|
|
@ -134,9 +213,11 @@ def scan_folder(
|
|||
|
||||
full = Path(dirpath) / fname
|
||||
|
||||
if file_extensions is not None:
|
||||
if full.suffix.lower() not in file_extensions:
|
||||
continue
|
||||
if (
|
||||
file_extensions is not None
|
||||
and full.suffix.lower() not in file_extensions
|
||||
):
|
||||
continue
|
||||
|
||||
try:
|
||||
stat = full.stat()
|
||||
|
|
@ -209,11 +290,14 @@ def _content_hash(content: str, search_space_id: int) -> str:
|
|||
pipeline so that dedup checks are consistent.
|
||||
"""
|
||||
import hashlib
|
||||
return hashlib.sha256(f"{search_space_id}:{content}".encode("utf-8")).hexdigest()
|
||||
|
||||
return hashlib.sha256(f"{search_space_id}:{content}".encode()).hexdigest()
|
||||
|
||||
|
||||
async def _compute_file_content_hash(
|
||||
file_path: str, filename: str, search_space_id: int,
|
||||
file_path: str,
|
||||
filename: str,
|
||||
search_space_id: int,
|
||||
) -> tuple[str, str]:
|
||||
"""Read a file (via ETL if needed) and compute its content hash.
|
||||
|
||||
|
|
@ -257,9 +341,7 @@ async def _mirror_folder_structure(
|
|||
|
||||
if root_folder_id:
|
||||
existing = (
|
||||
await session.execute(
|
||||
select(Folder).where(Folder.id == root_folder_id)
|
||||
)
|
||||
await session.execute(select(Folder).where(Folder.id == root_folder_id))
|
||||
).scalar_one_or_none()
|
||||
if existing:
|
||||
mapping[""] = existing.id
|
||||
|
|
@ -412,13 +494,17 @@ async def _cleanup_empty_folders(
|
|||
id_to_rel: dict[int, str] = {fid: rel for rel, fid in folder_mapping.items() if rel}
|
||||
|
||||
all_folders = (
|
||||
await session.execute(
|
||||
select(Folder).where(
|
||||
Folder.search_space_id == search_space_id,
|
||||
Folder.id != root_folder_id,
|
||||
(
|
||||
await session.execute(
|
||||
select(Folder).where(
|
||||
Folder.search_space_id == search_space_id,
|
||||
Folder.id != root_folder_id,
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
candidates: list[Folder] = []
|
||||
for folder in all_folders:
|
||||
|
|
@ -520,7 +606,9 @@ async def index_local_folder(
|
|||
metadata={
|
||||
"folder_path": folder_path,
|
||||
"user_id": str(user_id),
|
||||
"target_file_paths_count": len(target_file_paths) if target_file_paths else None,
|
||||
"target_file_paths_count": len(target_file_paths)
|
||||
if target_file_paths
|
||||
else None,
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -532,7 +620,12 @@ async def index_local_folder(
|
|||
"Folder not found",
|
||||
{},
|
||||
)
|
||||
return 0, 0, root_folder_id, f"Folder path missing or does not exist: {folder_path}"
|
||||
return (
|
||||
0,
|
||||
0,
|
||||
root_folder_id,
|
||||
f"Folder path missing or does not exist: {folder_path}",
|
||||
)
|
||||
|
||||
if exclude_patterns is None:
|
||||
exclude_patterns = DEFAULT_EXCLUDE_PATTERNS
|
||||
|
|
@ -639,7 +732,9 @@ async def index_local_folder(
|
|||
)
|
||||
|
||||
if existing_document:
|
||||
stored_mtime = (existing_document.document_metadata or {}).get("mtime")
|
||||
stored_mtime = (existing_document.document_metadata or {}).get(
|
||||
"mtime"
|
||||
)
|
||||
current_mtime = file_info["modified_at"].timestamp()
|
||||
|
||||
if stored_mtime and abs(current_mtime - stored_mtime) < 1.0:
|
||||
|
|
@ -709,23 +804,31 @@ async def index_local_folder(
|
|||
# ================================================================
|
||||
all_root_folder_ids = set(folder_mapping.values())
|
||||
all_db_folders = (
|
||||
await session.execute(
|
||||
select(Folder.id).where(
|
||||
Folder.search_space_id == search_space_id,
|
||||
(
|
||||
await session.execute(
|
||||
select(Folder.id).where(
|
||||
Folder.search_space_id == search_space_id,
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
all_root_folder_ids.update(all_db_folders)
|
||||
|
||||
all_folder_docs = (
|
||||
await session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == search_space_id,
|
||||
Document.folder_id.in_(list(all_root_folder_ids)),
|
||||
(
|
||||
await session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == search_space_id,
|
||||
Document.folder_id.in_(list(all_root_folder_ids)),
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
for doc in all_folder_docs:
|
||||
if doc.unique_identifier_hash not in seen_unique_hashes:
|
||||
|
|
@ -742,9 +845,7 @@ async def index_local_folder(
|
|||
)
|
||||
|
||||
pipeline = IndexingPipelineService(session)
|
||||
doc_map = {
|
||||
compute_unique_identifier_hash(cd): cd for cd in connector_docs
|
||||
}
|
||||
doc_map = {compute_unique_identifier_hash(cd): cd for cd in connector_docs}
|
||||
documents = await pipeline.prepare_for_indexing(connector_docs)
|
||||
|
||||
# Assign folder_id immediately so docs appear in the correct
|
||||
|
|
@ -1033,7 +1134,9 @@ async def _index_single_file(
|
|||
db_doc.document_metadata = doc_meta
|
||||
await session.commit()
|
||||
|
||||
indexed = 1 if DocumentStatus.is_state(db_doc.status, DocumentStatus.READY) else 0
|
||||
indexed = (
|
||||
1 if DocumentStatus.is_state(db_doc.status, DocumentStatus.READY) else 0
|
||||
)
|
||||
failed_msg = None if indexed else "Indexing failed"
|
||||
|
||||
if indexed:
|
||||
|
|
|
|||
|
|
@ -83,9 +83,9 @@ async def create_version_snapshot(
|
|||
# Cleanup: cap at MAX_VERSIONS_PER_DOCUMENT
|
||||
count = (
|
||||
await session.execute(
|
||||
select(func.count()).select_from(DocumentVersion).where(
|
||||
DocumentVersion.document_id == document.id
|
||||
)
|
||||
select(func.count())
|
||||
.select_from(DocumentVersion)
|
||||
.where(DocumentVersion.document_id == document.id)
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
|
|
|
|||
|
|
@ -166,5 +166,3 @@ def make_connector_document(db_connector, db_user):
|
|||
return ConnectorDocument(**defaults)
|
||||
|
||||
return _make
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,9 @@ from app.db import (
|
|||
pytestmark = pytest.mark.integration
|
||||
|
||||
UNIFIED_FIXTURES = (
|
||||
"patched_summarize", "patched_embed_texts", "patched_chunk_text",
|
||||
"patched_summarize",
|
||||
"patched_embed_texts",
|
||||
"patched_chunk_text",
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -37,6 +39,7 @@ class _FakeSessionMaker:
|
|||
@asynccontextmanager
|
||||
async def _ctx():
|
||||
yield self._session
|
||||
|
||||
return _ctx()
|
||||
|
||||
|
||||
|
|
@ -59,7 +62,6 @@ def patched_batch_sessions(monkeypatch, db_session):
|
|||
|
||||
|
||||
class TestFullIndexer:
|
||||
|
||||
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
|
||||
async def test_i1_new_file_indexed(
|
||||
self,
|
||||
|
|
@ -73,7 +75,7 @@ class TestFullIndexer:
|
|||
|
||||
(tmp_path / "note.md").write_text("# Hello World\n\nContent here.")
|
||||
|
||||
count, skipped, root_folder_id, err = await index_local_folder(
|
||||
count, _skipped, _root_folder_id, err = await index_local_folder(
|
||||
session=db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
|
|
@ -85,13 +87,17 @@ class TestFullIndexer:
|
|||
assert count == 1
|
||||
|
||||
docs = (
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
(
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert len(docs) == 1
|
||||
assert docs[0].document_type == DocumentType.LOCAL_FOLDER_FILE
|
||||
assert DocumentStatus.is_state(docs[0].status, DocumentStatus.READY)
|
||||
|
|
@ -130,7 +136,9 @@ class TestFullIndexer:
|
|||
|
||||
total = (
|
||||
await db_session.execute(
|
||||
select(func.count()).select_from(Document).where(
|
||||
select(func.count())
|
||||
.select_from(Document)
|
||||
.where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
|
|
@ -174,13 +182,19 @@ class TestFullIndexer:
|
|||
assert count == 1
|
||||
|
||||
versions = (
|
||||
await db_session.execute(
|
||||
select(DocumentVersion).join(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
(
|
||||
await db_session.execute(
|
||||
select(DocumentVersion)
|
||||
.join(Document)
|
||||
.where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert len(versions) >= 1
|
||||
|
||||
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
|
||||
|
|
@ -207,7 +221,9 @@ class TestFullIndexer:
|
|||
|
||||
docs_before = (
|
||||
await db_session.execute(
|
||||
select(func.count()).select_from(Document).where(
|
||||
select(func.count())
|
||||
.select_from(Document)
|
||||
.where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
|
|
@ -228,7 +244,9 @@ class TestFullIndexer:
|
|||
|
||||
docs_after = (
|
||||
await db_session.execute(
|
||||
select(func.count()).select_from(Document).where(
|
||||
select(func.count())
|
||||
.select_from(Document)
|
||||
.where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
|
|
@ -262,13 +280,17 @@ class TestFullIndexer:
|
|||
assert count == 1
|
||||
|
||||
docs = (
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
(
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert len(docs) == 1
|
||||
assert docs[0].title == "b.md"
|
||||
|
||||
|
|
@ -279,7 +301,6 @@ class TestFullIndexer:
|
|||
|
||||
|
||||
class TestFolderMirroring:
|
||||
|
||||
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
|
||||
async def test_f1_root_folder_created(
|
||||
self,
|
||||
|
|
@ -335,10 +356,14 @@ class TestFolderMirroring:
|
|||
)
|
||||
|
||||
folders = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.search_space_id == db_search_space.id)
|
||||
(
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.search_space_id == db_search_space.id)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
folder_names = {f.name for f in folders}
|
||||
assert "notes" in folder_names
|
||||
|
|
@ -376,10 +401,14 @@ class TestFolderMirroring:
|
|||
)
|
||||
|
||||
folders_before = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.search_space_id == db_search_space.id)
|
||||
(
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.search_space_id == db_search_space.id)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
ids_before = {f.id for f in folders_before}
|
||||
|
||||
await index_local_folder(
|
||||
|
|
@ -392,10 +421,14 @@ class TestFolderMirroring:
|
|||
)
|
||||
|
||||
folders_after = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.search_space_id == db_search_space.id)
|
||||
(
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.search_space_id == db_search_space.id)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
ids_after = {f.id for f in folders_after}
|
||||
|
||||
assert ids_before == ids_after
|
||||
|
|
@ -425,21 +458,23 @@ class TestFolderMirroring:
|
|||
)
|
||||
|
||||
docs = (
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
(
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
today_doc = next(d for d in docs if d.title == "today.md")
|
||||
root_doc = next(d for d in docs if d.title == "root.md")
|
||||
|
||||
daily_folder = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "daily")
|
||||
)
|
||||
await db_session.execute(select(Folder).where(Folder.name == "daily"))
|
||||
).scalar_one()
|
||||
|
||||
assert today_doc.folder_id == daily_folder.id
|
||||
|
|
@ -455,9 +490,10 @@ class TestFolderMirroring:
|
|||
tmp_path: Path,
|
||||
):
|
||||
"""F5: Deleted dir's empty Folder row is cleaned up on re-sync."""
|
||||
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
|
||||
import shutil
|
||||
|
||||
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
|
||||
|
||||
daily = tmp_path / "notes" / "daily"
|
||||
daily.mkdir(parents=True)
|
||||
weekly = tmp_path / "notes" / "weekly"
|
||||
|
|
@ -474,9 +510,7 @@ class TestFolderMirroring:
|
|||
)
|
||||
|
||||
weekly_folder = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "weekly")
|
||||
)
|
||||
await db_session.execute(select(Folder).where(Folder.name == "weekly"))
|
||||
).scalar_one_or_none()
|
||||
assert weekly_folder is not None
|
||||
|
||||
|
|
@ -492,16 +526,12 @@ class TestFolderMirroring:
|
|||
)
|
||||
|
||||
weekly_after = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "weekly")
|
||||
)
|
||||
await db_session.execute(select(Folder).where(Folder.name == "weekly"))
|
||||
).scalar_one_or_none()
|
||||
assert weekly_after is None
|
||||
|
||||
daily_after = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "daily")
|
||||
)
|
||||
await db_session.execute(select(Folder).where(Folder.name == "daily"))
|
||||
).scalar_one_or_none()
|
||||
assert daily_after is not None
|
||||
|
||||
|
|
@ -551,18 +581,14 @@ class TestFolderMirroring:
|
|||
).scalar_one()
|
||||
|
||||
daily_folder = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "daily")
|
||||
)
|
||||
await db_session.execute(select(Folder).where(Folder.name == "daily"))
|
||||
).scalar_one()
|
||||
|
||||
assert doc.folder_id == daily_folder.id
|
||||
assert daily_folder.parent_id is not None
|
||||
|
||||
notes_folder = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "notes")
|
||||
)
|
||||
await db_session.execute(select(Folder).where(Folder.name == "notes"))
|
||||
).scalar_one()
|
||||
assert daily_folder.parent_id == notes_folder.id
|
||||
assert notes_folder.parent_id == root_folder_id
|
||||
|
|
@ -592,9 +618,7 @@ class TestFolderMirroring:
|
|||
)
|
||||
|
||||
eph_folder = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "ephemeral")
|
||||
)
|
||||
await db_session.execute(select(Folder).where(Folder.name == "ephemeral"))
|
||||
).scalar_one_or_none()
|
||||
assert eph_folder is not None
|
||||
|
||||
|
|
@ -612,16 +636,12 @@ class TestFolderMirroring:
|
|||
)
|
||||
|
||||
eph_after = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "ephemeral")
|
||||
)
|
||||
await db_session.execute(select(Folder).where(Folder.name == "ephemeral"))
|
||||
).scalar_one_or_none()
|
||||
assert eph_after is None
|
||||
|
||||
notes_after = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "notes")
|
||||
)
|
||||
await db_session.execute(select(Folder).where(Folder.name == "notes"))
|
||||
).scalar_one_or_none()
|
||||
assert notes_after is None
|
||||
|
||||
|
|
@ -632,7 +652,6 @@ class TestFolderMirroring:
|
|||
|
||||
|
||||
class TestBatchMode:
|
||||
|
||||
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
|
||||
async def test_b1_batch_indexes_multiple_files(
|
||||
self,
|
||||
|
|
@ -649,7 +668,7 @@ class TestBatchMode:
|
|||
(tmp_path / "b.md").write_text("File B content")
|
||||
(tmp_path / "c.md").write_text("File C content")
|
||||
|
||||
count, failed, root_folder_id, err = await index_local_folder(
|
||||
count, failed, _root_folder_id, err = await index_local_folder(
|
||||
session=db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
|
|
@ -667,13 +686,17 @@ class TestBatchMode:
|
|||
assert err is None
|
||||
|
||||
docs = (
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
(
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert len(docs) == 3
|
||||
assert {d.title for d in docs} == {"a.md", "b.md", "c.md"}
|
||||
assert all(
|
||||
|
|
@ -714,13 +737,17 @@ class TestBatchMode:
|
|||
assert err is not None
|
||||
|
||||
docs = (
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
(
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert len(docs) == 2
|
||||
assert {d.title for d in docs} == {"good1.md", "good2.md"}
|
||||
|
||||
|
|
@ -731,7 +758,6 @@ class TestBatchMode:
|
|||
|
||||
|
||||
class TestPipelineIntegration:
|
||||
|
||||
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
|
||||
async def test_p1_local_folder_file_through_pipeline(
|
||||
self,
|
||||
|
|
@ -742,7 +768,9 @@ class TestPipelineIntegration:
|
|||
):
|
||||
"""P1: LOCAL_FOLDER_FILE ConnectorDocument through prepare+index to READY."""
|
||||
from app.indexing_pipeline.connector_document import ConnectorDocument
|
||||
from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService
|
||||
from app.indexing_pipeline.indexing_pipeline_service import (
|
||||
IndexingPipelineService,
|
||||
)
|
||||
|
||||
doc = ConnectorDocument(
|
||||
title="Test Local File",
|
||||
|
|
@ -763,12 +791,16 @@ class TestPipelineIntegration:
|
|||
assert result is not None
|
||||
|
||||
docs = (
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
(
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert len(docs) == 1
|
||||
assert DocumentStatus.is_state(docs[0].status, DocumentStatus.READY)
|
||||
|
|
|
|||
|
|
@ -34,14 +34,16 @@ async def db_document(
|
|||
|
||||
async def _version_count(session: AsyncSession, document_id: int) -> int:
|
||||
result = await session.execute(
|
||||
select(func.count()).select_from(DocumentVersion).where(
|
||||
DocumentVersion.document_id == document_id
|
||||
)
|
||||
select(func.count())
|
||||
.select_from(DocumentVersion)
|
||||
.where(DocumentVersion.document_id == document_id)
|
||||
)
|
||||
return result.scalar_one()
|
||||
|
||||
|
||||
async def _get_versions(session: AsyncSession, document_id: int) -> list[DocumentVersion]:
|
||||
async def _get_versions(
|
||||
session: AsyncSession, document_id: int
|
||||
) -> list[DocumentVersion]:
|
||||
result = await session.execute(
|
||||
select(DocumentVersion)
|
||||
.where(DocumentVersion.document_id == document_id)
|
||||
|
|
@ -74,18 +76,14 @@ class TestCreateVersionSnapshot:
|
|||
from app.utils.document_versioning import create_version_snapshot
|
||||
|
||||
t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda: t0
|
||||
)
|
||||
monkeypatch.setattr("app.utils.document_versioning._now", lambda: t0)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
# Simulate content change and time passing
|
||||
db_document.source_markdown = "# Test\n\nUpdated content."
|
||||
db_document.content_hash = "def456"
|
||||
t1 = t0 + timedelta(minutes=31)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda: t1
|
||||
)
|
||||
monkeypatch.setattr("app.utils.document_versioning._now", lambda: t1)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
versions = await _get_versions(db_session, db_document.id)
|
||||
|
|
@ -101,9 +99,7 @@ class TestCreateVersionSnapshot:
|
|||
from app.utils.document_versioning import create_version_snapshot
|
||||
|
||||
t0 = datetime(2025, 1, 1, 12, 0, 0, tzinfo=UTC)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda: t0
|
||||
)
|
||||
monkeypatch.setattr("app.utils.document_versioning._now", lambda: t0)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
count_after_first = await _version_count(db_session, db_document.id)
|
||||
assert count_after_first == 1
|
||||
|
|
@ -112,9 +108,7 @@ class TestCreateVersionSnapshot:
|
|||
db_document.source_markdown = "# Test\n\nQuick edit."
|
||||
db_document.content_hash = "quick123"
|
||||
t1 = t0 + timedelta(minutes=10)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda: t1
|
||||
)
|
||||
monkeypatch.setattr("app.utils.document_versioning._now", lambda: t1)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
count_after_second = await _version_count(db_session, db_document.id)
|
||||
|
|
@ -134,22 +128,15 @@ class TestCreateVersionSnapshot:
|
|||
|
||||
# Create 5 versions spread across time: 3 older than 90 days, 2 recent
|
||||
for i in range(5):
|
||||
db_document.source_markdown = f"Content v{i+1}"
|
||||
db_document.content_hash = f"hash_{i+1}"
|
||||
if i < 3:
|
||||
t = base + timedelta(days=i) # old
|
||||
else:
|
||||
t = base + timedelta(days=100 + i) # recent
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda _t=t: _t
|
||||
)
|
||||
db_document.source_markdown = f"Content v{i + 1}"
|
||||
db_document.content_hash = f"hash_{i + 1}"
|
||||
t = base + timedelta(days=i) if i < 3 else base + timedelta(days=100 + i)
|
||||
monkeypatch.setattr("app.utils.document_versioning._now", lambda _t=t: _t)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
# Now trigger cleanup from a "current" time that makes the first 3 versions > 90 days old
|
||||
now = base + timedelta(days=200)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda: now
|
||||
)
|
||||
monkeypatch.setattr("app.utils.document_versioning._now", lambda: now)
|
||||
db_document.source_markdown = "Content v6"
|
||||
db_document.content_hash = "hash_6"
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
|
@ -160,9 +147,7 @@ class TestCreateVersionSnapshot:
|
|||
age = now - v.created_at.replace(tzinfo=UTC)
|
||||
assert age <= timedelta(days=90), f"Version {v.version_number} is too old"
|
||||
|
||||
async def test_v5_cap_at_20_versions(
|
||||
self, db_session, db_document, monkeypatch
|
||||
):
|
||||
async def test_v5_cap_at_20_versions(self, db_session, db_document, monkeypatch):
|
||||
"""V5: More than 20 versions triggers cap — oldest gets deleted."""
|
||||
from app.utils.document_versioning import create_version_snapshot
|
||||
|
||||
|
|
@ -170,12 +155,10 @@ class TestCreateVersionSnapshot:
|
|||
|
||||
# Create 21 versions (all within 90 days, each 31 min apart)
|
||||
for i in range(21):
|
||||
db_document.source_markdown = f"Content v{i+1}"
|
||||
db_document.content_hash = f"hash_{i+1}"
|
||||
db_document.source_markdown = f"Content v{i + 1}"
|
||||
db_document.content_hash = f"hash_{i + 1}"
|
||||
t = base + timedelta(minutes=31 * i)
|
||||
monkeypatch.setattr(
|
||||
"app.utils.document_versioning._now", lambda _t=t: _t
|
||||
)
|
||||
monkeypatch.setattr("app.utils.document_versioning._now", lambda _t=t: _t)
|
||||
await create_version_snapshot(db_session, db_document)
|
||||
|
||||
versions = await _get_versions(db_session, db_document.id)
|
||||
|
|
|
|||
|
|
@ -51,9 +51,7 @@ class TestScanFolder:
|
|||
git.mkdir()
|
||||
(git / "config").write_text("gitconfig")
|
||||
|
||||
results = scan_folder(
|
||||
str(tmp_path), exclude_patterns=["node_modules", ".git"]
|
||||
)
|
||||
results = scan_folder(str(tmp_path), exclude_patterns=["node_modules", ".git"])
|
||||
names = {r["relative_path"] for r in results}
|
||||
|
||||
assert "good.md" in names
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue