Merge remote-tracking branch 'upstream/dev' into refactor/indexing-pipelines

This commit is contained in:
Anish Sarkar 2026-03-27 22:36:34 +05:30
commit 17091edb77
104 changed files with 4944 additions and 1319 deletions

View file

@ -37,7 +37,9 @@ def upgrade() -> None:
conn = op.get_bind()
result = conn.execute(
sa.text("SELECT 1 FROM information_schema.tables WHERE table_name = 'video_presentations'")
sa.text(
"SELECT 1 FROM information_schema.tables WHERE table_name = 'video_presentations'"
)
)
if not result.fetchone():
op.create_table(

View file

@ -0,0 +1,90 @@
"""Add folders table and folder_id to documents
Revision ID: 109
Revises: 108
Creates the folders table for nested folder organization (max 8 levels),
adds folder_id FK to documents, and creates an expression-based unique
index to correctly handle NULL parent_id at root level.
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "109"
down_revision: str | None = "108"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
op.create_table(
"folders",
sa.Column("id", sa.Integer(), primary_key=True, index=True),
sa.Column("name", sa.String(255), nullable=False, index=True),
sa.Column("position", sa.String(50), nullable=False, index=True),
sa.Column(
"parent_id",
sa.Integer(),
sa.ForeignKey("folders.id", ondelete="CASCADE"),
nullable=True,
index=True,
),
sa.Column(
"search_space_id",
sa.Integer(),
sa.ForeignKey("searchspaces.id", ondelete="CASCADE"),
nullable=False,
index=True,
),
sa.Column(
"created_by_id",
sa.Uuid(),
sa.ForeignKey("user.id", ondelete="SET NULL"),
nullable=True,
index=True,
),
sa.Column(
"created_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
sa.Column(
"updated_at",
sa.TIMESTAMP(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
# Expression-based unique index: COALESCE(parent_id, 0) handles NULL correctly.
# PostgreSQL treats NULL != NULL in regular unique constraints, so a standard
# UniqueConstraint(search_space_id, parent_id, name) would allow duplicate
# folder names at the root level.
op.execute(
"""
CREATE UNIQUE INDEX uq_folder_space_parent_name
ON folders (search_space_id, COALESCE(parent_id, 0), name);
"""
)
op.add_column(
"documents",
sa.Column(
"folder_id",
sa.Integer(),
sa.ForeignKey("folders.id", ondelete="SET NULL"),
nullable=True,
index=True,
),
)
def downgrade() -> None:
op.drop_column("documents", "folder_id")
op.execute("DROP INDEX IF EXISTS uq_folder_space_parent_name;")
op.drop_table("folders")

View file

@ -914,6 +914,43 @@ class SharedMemory(BaseModel, TimestampMixin):
created_by = relationship("User")
class Folder(BaseModel, TimestampMixin):
__tablename__ = "folders"
name = Column(String(255), nullable=False, index=True)
position = Column(String(50), nullable=False, index=True)
parent_id = Column(
Integer,
ForeignKey("folders.id", ondelete="CASCADE"),
nullable=True,
index=True,
)
search_space_id = Column(
Integer,
ForeignKey("searchspaces.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
created_by_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
updated_at = Column(
TIMESTAMP(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
onupdate=lambda: datetime.now(UTC),
index=True,
)
parent = relationship("Folder", remote_side="Folder.id", backref="children")
search_space = relationship("SearchSpace", back_populates="folders")
created_by = relationship("User", back_populates="folders")
documents = relationship("Document", back_populates="folder", passive_deletes=True)
class Document(BaseModel, TimestampMixin):
__tablename__ = "documents"
@ -947,6 +984,13 @@ class Document(BaseModel, TimestampMixin):
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
folder_id = Column(
Integer,
ForeignKey("folders.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
# Track who created/uploaded this document
created_by_id = Column(
UUID(as_uuid=True),
@ -976,6 +1020,7 @@ class Document(BaseModel, TimestampMixin):
# Relationships
search_space = relationship("SearchSpace", back_populates="documents")
folder = relationship("Folder", back_populates="documents")
created_by = relationship("User", back_populates="documents")
connector = relationship("SearchSourceConnector", back_populates="documents")
chunks = relationship(
@ -1279,6 +1324,12 @@ class SearchSpace(BaseModel, TimestampMixin):
)
user = relationship("User", back_populates="search_spaces")
folders = relationship(
"Folder",
back_populates="search_space",
order_by="Folder.position",
cascade="all, delete-orphan",
)
documents = relationship(
"Document",
back_populates="search_space",
@ -1765,6 +1816,13 @@ if config.AUTH_TYPE == "GOOGLE":
passive_deletes=True,
)
# Folders created by this user
folders = relationship(
"Folder",
back_populates="created_by",
passive_deletes=True,
)
# Image generations created by this user
image_generations = relationship(
"ImageGeneration",
@ -1867,6 +1925,13 @@ else:
passive_deletes=True,
)
# Folders created by this user
folders = relationship(
"Folder",
back_populates="created_by",
passive_deletes=True,
)
# Image generations created by this user
image_generations = relationship(
"ImageGeneration",

View file

@ -11,6 +11,7 @@ from .confluence_add_connector_route import router as confluence_add_connector_r
from .discord_add_connector_route import router as discord_add_connector_router
from .documents_routes import router as documents_router
from .editor_routes import router as editor_router
from .folders_routes import router as folders_router
from .google_calendar_add_connector_route import (
router as google_calendar_add_connector_router,
)
@ -51,6 +52,7 @@ router.include_router(search_spaces_router)
router.include_router(rbac_router) # RBAC routes for roles, members, invites
router.include_router(editor_router)
router.include_router(documents_router)
router.include_router(folders_router)
router.include_router(notes_router)
router.include_router(new_chat_router) # Chat with assistant-ui persistence
router.include_router(sandbox_router) # Sandbox file downloads (Daytona)

View file

@ -320,6 +320,7 @@ async def read_documents(
page_size: int = 50,
search_space_id: int | None = None,
document_types: str | None = None,
folder_id: int | str | None = None,
sort_by: str = "created_at",
sort_order: str = "desc",
session: AsyncSession = Depends(get_async_session),
@ -391,6 +392,17 @@ async def read_documents(
query = query.filter(Document.document_type.in_(type_list))
count_query = count_query.filter(Document.document_type.in_(type_list))
# Filter by folder_id: "root" or "null" => root level (folder_id IS NULL),
# integer => specific folder, omitted => all documents
if folder_id is not None:
if str(folder_id).lower() in ("root", "null"):
query = query.filter(Document.folder_id.is_(None))
count_query = count_query.filter(Document.folder_id.is_(None))
else:
fid = int(folder_id)
query = query.filter(Document.folder_id == fid)
count_query = count_query.filter(Document.folder_id == fid)
total_result = await session.execute(count_query)
total = total_result.scalar() or 0
@ -451,6 +463,7 @@ async def read_documents(
created_at=doc.created_at,
updated_at=doc.updated_at,
search_space_id=doc.search_space_id,
folder_id=doc.folder_id,
created_by_id=doc.created_by_id,
created_by_name=created_by_name,
created_by_email=created_by_email,
@ -608,6 +621,7 @@ async def search_documents(
created_at=doc.created_at,
updated_at=doc.updated_at,
search_space_id=doc.search_space_id,
folder_id=doc.folder_id,
created_by_id=doc.created_by_id,
created_by_name=created_by_name,
created_by_email=created_by_email,
@ -978,6 +992,7 @@ async def read_document(
created_at=document.created_at,
updated_at=document.updated_at,
search_space_id=document.search_space_id,
folder_id=document.folder_id,
)
except HTTPException:
raise
@ -1036,6 +1051,7 @@ async def update_document(
created_at=db_document.created_at,
updated_at=db_document.updated_at,
search_space_id=db_document.search_space_id,
folder_id=db_document.folder_id,
)
except HTTPException:
raise

View file

@ -0,0 +1,516 @@
"""API routes for folder CRUD, move, reorder, and document move operations."""
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from app.db import Document, Folder, Permission, User, get_async_session
from app.schemas import (
BulkDocumentMove,
DocumentMove,
FolderBreadcrumb,
FolderCreate,
FolderMove,
FolderRead,
FolderReorder,
FolderUpdate,
)
from app.services.folder_service import (
check_no_circular_reference,
generate_folder_position,
get_folder_subtree_ids,
get_subtree_max_depth,
validate_folder_depth,
)
from app.users import current_active_user
from app.utils.rbac import check_permission
router = APIRouter()
@router.post("/folders", response_model=FolderRead)
async def create_folder(
request: FolderCreate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Create a new folder. Requires DOCUMENTS_CREATE permission."""
try:
await check_permission(
session,
user,
request.search_space_id,
Permission.DOCUMENTS_CREATE.value,
"You don't have permission to create folders in this search space",
)
if request.parent_id is not None:
parent = await session.get(Folder, request.parent_id)
if not parent:
raise HTTPException(status_code=404, detail="Parent folder not found")
if parent.search_space_id != request.search_space_id:
raise HTTPException(
status_code=400,
detail="Parent folder belongs to a different search space",
)
await validate_folder_depth(session, request.parent_id)
position = await generate_folder_position(
session, request.search_space_id, request.parent_id
)
folder = Folder(
name=request.name,
position=position,
parent_id=request.parent_id,
search_space_id=request.search_space_id,
created_by_id=user.id,
)
session.add(folder)
await session.commit()
await session.refresh(folder)
return folder
except HTTPException:
raise
except Exception as e:
await session.rollback()
if "uq_folder_space_parent_name" in str(e):
raise HTTPException(
status_code=409,
detail="A folder with this name already exists at this location",
) from e
raise HTTPException(
status_code=500, detail=f"Failed to create folder: {e!s}"
) from e
@router.get("/folders", response_model=list[FolderRead])
async def list_folders(
search_space_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""List all folders in a search space (flat). Requires DOCUMENTS_READ permission."""
try:
await check_permission(
session,
user,
search_space_id,
Permission.DOCUMENTS_READ.value,
"You don't have permission to read folders in this search space",
)
result = await session.execute(
select(Folder)
.where(Folder.search_space_id == search_space_id)
.order_by(Folder.position)
)
return result.scalars().all()
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to list folders: {e!s}"
) from e
@router.get("/folders/{folder_id}", response_model=FolderRead)
async def get_folder(
folder_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Get a single folder. Requires DOCUMENTS_READ permission."""
try:
folder = await session.get(Folder, folder_id)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
await check_permission(
session,
user,
folder.search_space_id,
Permission.DOCUMENTS_READ.value,
"You don't have permission to read folders in this search space",
)
return folder
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to get folder: {e!s}"
) from e
@router.get("/folders/{folder_id}/breadcrumb", response_model=list[FolderBreadcrumb])
async def get_folder_breadcrumb(
folder_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Get ancestor chain for breadcrumb display. Requires DOCUMENTS_READ permission."""
try:
folder = await session.get(Folder, folder_id)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
await check_permission(
session,
user,
folder.search_space_id,
Permission.DOCUMENTS_READ.value,
"You don't have permission to read folders in this search space",
)
result = await session.execute(
text("""
WITH RECURSIVE ancestors AS (
SELECT id, name, parent_id, 0 AS depth
FROM folders WHERE id = :folder_id
UNION ALL
SELECT f.id, f.name, f.parent_id, a.depth + 1
FROM folders f JOIN ancestors a ON f.id = a.parent_id
)
SELECT id, name FROM ancestors ORDER BY depth DESC;
"""),
{"folder_id": folder_id},
)
rows = result.fetchall()
return [FolderBreadcrumb(id=row.id, name=row.name) for row in rows]
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to get breadcrumb: {e!s}"
) from e
@router.put("/folders/{folder_id}", response_model=FolderRead)
async def update_folder(
folder_id: int,
request: FolderUpdate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Rename a folder. Requires DOCUMENTS_UPDATE permission."""
try:
folder = await session.get(Folder, folder_id)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
await check_permission(
session,
user,
folder.search_space_id,
Permission.DOCUMENTS_UPDATE.value,
"You don't have permission to update folders in this search space",
)
folder.name = request.name
await session.commit()
await session.refresh(folder)
return folder
except HTTPException:
raise
except Exception as e:
await session.rollback()
if "uq_folder_space_parent_name" in str(e):
raise HTTPException(
status_code=409,
detail="A folder with this name already exists at this location",
) from e
raise HTTPException(
status_code=500, detail=f"Failed to update folder: {e!s}"
) from e
@router.put("/folders/{folder_id}/move", response_model=FolderRead)
async def move_folder(
folder_id: int,
request: FolderMove,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Move a folder to a new parent. Requires DOCUMENTS_UPDATE permission."""
try:
folder = await session.get(Folder, folder_id)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
await check_permission(
session,
user,
folder.search_space_id,
Permission.DOCUMENTS_UPDATE.value,
"You don't have permission to move folders in this search space",
)
if request.new_parent_id is not None:
new_parent = await session.get(Folder, request.new_parent_id)
if not new_parent:
raise HTTPException(
status_code=404, detail="Target parent folder not found"
)
if new_parent.search_space_id != folder.search_space_id:
raise HTTPException(
status_code=400,
detail="Cannot move folder to a different search space",
)
await check_no_circular_reference(session, folder_id, request.new_parent_id)
subtree_depth = await get_subtree_max_depth(session, folder_id)
await validate_folder_depth(session, request.new_parent_id, subtree_depth)
position = await generate_folder_position(
session, folder.search_space_id, request.new_parent_id
)
folder.parent_id = request.new_parent_id
folder.position = position
await session.commit()
await session.refresh(folder)
return folder
except HTTPException:
raise
except Exception as e:
await session.rollback()
if "uq_folder_space_parent_name" in str(e):
raise HTTPException(
status_code=409,
detail="A folder with this name already exists at the target location",
) from e
raise HTTPException(
status_code=500, detail=f"Failed to move folder: {e!s}"
) from e
@router.put("/folders/{folder_id}/reorder", response_model=FolderRead)
async def reorder_folder(
folder_id: int,
request: FolderReorder,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Reorder a folder among its siblings via fractional indexing. Requires DOCUMENTS_UPDATE."""
try:
folder = await session.get(Folder, folder_id)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
await check_permission(
session,
user,
folder.search_space_id,
Permission.DOCUMENTS_UPDATE.value,
"You don't have permission to reorder folders in this search space",
)
position = await generate_folder_position(
session,
folder.search_space_id,
folder.parent_id,
before_position=request.before_position,
after_position=request.after_position,
)
folder.position = position
await session.commit()
await session.refresh(folder)
return folder
except HTTPException:
raise
except Exception as e:
await session.rollback()
raise HTTPException(
status_code=500, detail=f"Failed to reorder folder: {e!s}"
) from e
@router.delete("/folders/{folder_id}")
async def delete_folder(
folder_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Delete a folder and cascade-delete subfolders. Documents are async-deleted via Celery."""
try:
folder = await session.get(Folder, folder_id)
if not folder:
raise HTTPException(status_code=404, detail="Folder not found")
await check_permission(
session,
user,
folder.search_space_id,
Permission.DOCUMENTS_DELETE.value,
"You don't have permission to delete folders in this search space",
)
subtree_ids = await get_folder_subtree_ids(session, folder_id)
doc_result = await session.execute(
select(Document.id).where(
Document.folder_id.in_(subtree_ids),
Document.status["state"].as_string() != "deleting",
)
)
document_ids = list(doc_result.scalars().all())
if document_ids:
await session.execute(
Document.__table__.update()
.where(Document.id.in_(document_ids))
.values(status={"state": "deleting"})
)
await session.commit()
await session.execute(Folder.__table__.delete().where(Folder.id == folder_id))
await session.commit()
if document_ids:
try:
from app.tasks.celery_tasks.document_tasks import (
delete_folder_documents_task,
)
delete_folder_documents_task.delay(document_ids)
except Exception as err:
await session.execute(
Document.__table__.update()
.where(Document.id.in_(document_ids))
.values(status={"state": "ready"})
)
await session.commit()
raise HTTPException(
status_code=503,
detail="Folder deleted but document cleanup could not be queued. Documents have been restored.",
) from err
return {
"message": "Folder deleted successfully",
"documents_queued_for_deletion": len(document_ids),
}
except HTTPException:
raise
except Exception as e:
await session.rollback()
raise HTTPException(
status_code=500, detail=f"Failed to delete folder: {e!s}"
) from e
@router.put("/documents/{document_id}/move")
async def move_document(
document_id: int,
request: DocumentMove,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Move a document to a folder (or root). Requires DOCUMENTS_UPDATE permission."""
try:
result = await session.execute(
select(Document).filter(Document.id == document_id)
)
document = result.scalars().first()
if not document:
raise HTTPException(status_code=404, detail="Document not found")
await check_permission(
session,
user,
document.search_space_id,
Permission.DOCUMENTS_UPDATE.value,
"You don't have permission to move documents in this search space",
)
if request.folder_id is not None:
target = await session.get(Folder, request.folder_id)
if not target:
raise HTTPException(status_code=404, detail="Target folder not found")
if target.search_space_id != document.search_space_id:
raise HTTPException(
status_code=400,
detail="Cannot move document to a folder in a different search space",
)
document.folder_id = request.folder_id
await session.commit()
return {"message": "Document moved successfully"}
except HTTPException:
raise
except Exception as e:
await session.rollback()
raise HTTPException(
status_code=500, detail=f"Failed to move document: {e!s}"
) from e
@router.put("/documents/bulk-move")
async def bulk_move_documents(
request: BulkDocumentMove,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Move multiple documents to a folder (or root). Requires DOCUMENTS_UPDATE permission."""
try:
if not request.document_ids:
raise HTTPException(status_code=400, detail="No document IDs provided")
result = await session.execute(
select(Document).filter(Document.id.in_(request.document_ids))
)
documents = result.scalars().all()
if not documents:
raise HTTPException(status_code=404, detail="No documents found")
search_space_ids = {doc.search_space_id for doc in documents}
for ss_id in search_space_ids:
await check_permission(
session,
user,
ss_id,
Permission.DOCUMENTS_UPDATE.value,
"You don't have permission to move documents in this search space",
)
if request.folder_id is not None:
target = await session.get(Folder, request.folder_id)
if not target:
raise HTTPException(status_code=404, detail="Target folder not found")
mismatched = [
doc.id
for doc in documents
if doc.search_space_id != target.search_space_id
]
if mismatched:
raise HTTPException(
status_code=400,
detail="Cannot move documents to a folder in a different search space",
)
await session.execute(
Document.__table__.update()
.where(Document.id.in_(request.document_ids))
.values(folder_id=request.folder_id)
)
await session.commit()
return {"message": f"{len(request.document_ids)} documents moved successfully"}
except HTTPException:
raise
except Exception as e:
await session.rollback()
raise HTTPException(
status_code=500, detail=f"Failed to move documents: {e!s}"
) from e

View file

@ -22,6 +22,16 @@ from .documents import (
ExtensionDocumentMetadata,
PaginatedResponse,
)
from .folders import (
BulkDocumentMove,
DocumentMove,
FolderBreadcrumb,
FolderCreate,
FolderMove,
FolderRead,
FolderReorder,
FolderUpdate,
)
from .google_drive import DriveItem, GoogleDriveIndexingOptions, GoogleDriveIndexRequest
from .image_generation import (
GlobalImageGenConfigRead,
@ -109,6 +119,8 @@ from .video_presentations import (
)
__all__ = [
# Folder schemas
"BulkDocumentMove",
# Chat schemas (assistant-ui integration)
"ChatMessage",
# Chunk schemas
@ -119,6 +131,7 @@ __all__ = [
"DefaultSystemInstructionsResponse",
# Document schemas
"DocumentBase",
"DocumentMove",
"DocumentRead",
"DocumentStatusBatchResponse",
"DocumentStatusItemRead",
@ -132,6 +145,12 @@ __all__ = [
"DriveItem",
"ExtensionDocumentContent",
"ExtensionDocumentMetadata",
"FolderBreadcrumb",
"FolderCreate",
"FolderMove",
"FolderRead",
"FolderReorder",
"FolderUpdate",
"GlobalImageGenConfigRead",
"GlobalNewLLMConfigRead",
"GoogleDriveIndexRequest",

View file

@ -59,6 +59,7 @@ class DocumentRead(BaseModel):
created_at: datetime
updated_at: datetime | None
search_space_id: int
folder_id: int | None = None
created_by_id: UUID | None = None # User who created/uploaded this document
created_by_name: str | None = None
created_by_email: str | None = None
@ -89,6 +90,7 @@ class DocumentTitleRead(BaseModel):
id: int
title: str
document_type: DocumentType
folder_id: int | None = None
model_config = ConfigDict(from_attributes=True)

View file

@ -0,0 +1,52 @@
"""Pydantic schemas for folder CRUD, move, and reorder operations."""
from datetime import datetime
from uuid import UUID
from pydantic import BaseModel, ConfigDict, Field
class FolderCreate(BaseModel):
name: str = Field(max_length=255, min_length=1)
parent_id: int | None = None
search_space_id: int
class FolderUpdate(BaseModel):
name: str = Field(max_length=255, min_length=1)
class FolderMove(BaseModel):
new_parent_id: int | None = None
class FolderReorder(BaseModel):
before_position: str | None = None
after_position: str | None = None
class FolderRead(BaseModel):
id: int
name: str
position: str
parent_id: int | None
search_space_id: int
created_by_id: UUID | None
created_at: datetime
updated_at: datetime
model_config = ConfigDict(from_attributes=True)
class FolderBreadcrumb(BaseModel):
id: int
name: str
class DocumentMove(BaseModel):
folder_id: int | None = None
class BulkDocumentMove(BaseModel):
document_ids: list[int]
folder_id: int | None = None

View file

@ -0,0 +1,158 @@
"""Folder service: depth validation, circular reference checks, and position generation."""
from fastapi import HTTPException
from fractional_indexing import generate_key_between
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from app.db import Folder
MAX_FOLDER_DEPTH = 8
async def get_folder_depth(session: AsyncSession, folder_id: int) -> int:
"""Return the depth of a folder (root-level = 1) using a recursive CTE."""
result = await session.execute(
text("""
WITH RECURSIVE ancestors AS (
SELECT id, parent_id, 1 AS depth
FROM folders
WHERE id = :folder_id
UNION ALL
SELECT f.id, f.parent_id, a.depth + 1
FROM folders f
JOIN ancestors a ON f.id = a.parent_id
)
SELECT MAX(depth) FROM ancestors;
"""),
{"folder_id": folder_id},
)
return result.scalar() or 0
async def get_subtree_max_depth(session: AsyncSession, folder_id: int) -> int:
"""Return the maximum depth of any descendant below folder_id (0 if leaf)."""
result = await session.execute(
text("""
WITH RECURSIVE descendants AS (
SELECT id, 0 AS depth
FROM folders
WHERE parent_id = :folder_id
UNION ALL
SELECT f.id, d.depth + 1
FROM folders f
JOIN descendants d ON f.parent_id = d.id
)
SELECT COALESCE(MAX(depth), -1) FROM descendants;
"""),
{"folder_id": folder_id},
)
val = result.scalar()
return (val + 1) if val is not None and val >= 0 else 0
async def validate_folder_depth(
session: AsyncSession,
parent_id: int | None,
subtree_depth: int = 0,
) -> None:
"""Raise 400 if placing a folder (with subtree) under parent_id would exceed MAX_FOLDER_DEPTH."""
if parent_id is None:
parent_depth = 0
else:
parent_depth = await get_folder_depth(session, parent_id)
total = parent_depth + 1 + subtree_depth
if total > MAX_FOLDER_DEPTH:
raise HTTPException(
status_code=400,
detail=f"Maximum folder nesting depth is {MAX_FOLDER_DEPTH}. "
f"This operation would result in depth {total}.",
)
async def check_no_circular_reference(
session: AsyncSession,
folder_id: int,
new_parent_id: int | None,
) -> None:
"""Raise 400 if new_parent_id is folder_id itself or a descendant of folder_id."""
if new_parent_id is None:
return
if new_parent_id == folder_id:
raise HTTPException(
status_code=400,
detail="A folder cannot be moved into itself.",
)
result = await session.execute(
text("""
WITH RECURSIVE ancestors AS (
SELECT id, parent_id
FROM folders
WHERE id = :new_parent_id
UNION ALL
SELECT f.id, f.parent_id
FROM folders f
JOIN ancestors a ON f.id = a.parent_id
)
SELECT 1 FROM ancestors WHERE id = :folder_id LIMIT 1;
"""),
{"new_parent_id": new_parent_id, "folder_id": folder_id},
)
if result.scalar() is not None:
raise HTTPException(
status_code=400,
detail="Cannot move a folder into one of its own descendants.",
)
async def generate_folder_position(
session: AsyncSession,
search_space_id: int,
parent_id: int | None,
before_position: str | None = None,
after_position: str | None = None,
) -> str:
"""Generate a fractional index key for ordering a folder among its siblings.
- Default (no before/after): append after last sibling
- Prepend: before_position=None, after_position=first sibling position
- Insert between: both positions provided
"""
if before_position is not None or after_position is not None:
return generate_key_between(before_position, after_position)
# Append after last sibling
query = (
select(Folder.position)
.where(
Folder.search_space_id == search_space_id,
Folder.parent_id == parent_id
if parent_id is not None
else Folder.parent_id.is_(None),
)
.order_by(Folder.position.desc())
.limit(1)
)
result = await session.execute(query)
last_position = result.scalar()
return generate_key_between(last_position, None)
async def get_folder_subtree_ids(session: AsyncSession, folder_id: int) -> list[int]:
"""Return all folder IDs in the subtree rooted at folder_id (inclusive)."""
result = await session.execute(
text("""
WITH RECURSIVE subtree AS (
SELECT id FROM folders WHERE id = :folder_id
UNION ALL
SELECT f.id FROM folders f JOIN subtree s ON f.parent_id = s.id
)
SELECT id FROM subtree;
"""),
{"folder_id": folder_id},
)
return list(result.scalars().all())

View file

@ -133,6 +133,51 @@ async def _delete_document_background(document_id: int) -> None:
await session.commit()
@celery_app.task(
name="delete_folder_documents_background",
bind=True,
autoretry_for=(Exception,),
retry_backoff=True,
retry_backoff_max=300,
max_retries=5,
)
def delete_folder_documents_task(self, document_ids: list[int]):
"""Celery task to batch-delete documents orphaned by folder deletion."""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(_delete_folder_documents(document_ids))
finally:
loop.close()
async def _delete_folder_documents(document_ids: list[int]) -> None:
"""Delete chunks in batches, then document rows for each orphaned document."""
from sqlalchemy import delete as sa_delete, select
from app.db import Chunk, Document
async with get_celery_session_maker()() as session:
batch_size = 500
for doc_id in document_ids:
while True:
chunk_ids_result = await session.execute(
select(Chunk.id)
.where(Chunk.document_id == doc_id)
.limit(batch_size)
)
chunk_ids = chunk_ids_result.scalars().all()
if not chunk_ids:
break
await session.execute(sa_delete(Chunk).where(Chunk.id.in_(chunk_ids)))
await session.commit()
doc = await session.get(Document, doc_id)
if doc:
await session.delete(doc)
await session.commit()
@celery_app.task(
name="delete_search_space_background",
bind=True,

View file

@ -73,6 +73,7 @@ dependencies = [
"langchain-daytona>=0.0.2",
"pypandoc>=1.16.2",
"notion-markdown>=0.7.0",
"fractional-indexing>=0.1.3",
]
[dependency-groups]

1219
surfsense_backend/uv.lock generated

File diff suppressed because it is too large Load diff