SurfSense/surfsense_backend/app/routes/reports_routes.py

251 lines
7.6 KiB
Python
Raw Normal View History

"""
Report routes for CRUD operations and export (PDF/DOCX).
These routes support the report generation feature in new-chat.
Reports are generated inline by the agent tool and stored as Markdown.
Export to PDF/DOCX is on-demand via pypandoc.
Authorization: lightweight search-space membership checks (no granular RBAC)
since reports are chat-generated artifacts, not standalone managed resources.
"""
import asyncio
import io
import logging
from enum import Enum
import pypandoc
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import StreamingResponse
from sqlalchemy import select
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import (
Report,
SearchSpace,
SearchSpaceMembership,
User,
get_async_session,
)
from app.schemas import ReportContentRead, ReportRead
from app.users import current_active_user
from app.utils.rbac import check_search_space_access
logger = logging.getLogger(__name__)
router = APIRouter()
MAX_REPORT_LIST_LIMIT = 500
class ExportFormat(str, Enum):
PDF = "pdf"
DOCX = "docx"
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _get_report_with_access(
report_id: int,
session: AsyncSession,
user: User,
) -> Report:
"""Fetch a report and verify the user belongs to its search space.
Raises HTTPException(404) if not found, HTTPException(403) if no access.
"""
result = await session.execute(select(Report).filter(Report.id == report_id))
report = result.scalars().first()
if not report:
raise HTTPException(status_code=404, detail="Report not found")
# Lightweight membership check no granular RBAC, just "is the user a
# member of the search space this report belongs to?"
await check_search_space_access(session, user, report.search_space_id)
return report
# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------
@router.get("/reports", response_model=list[ReportRead])
async def read_reports(
skip: int = Query(default=0, ge=0),
limit: int = Query(default=100, ge=1, le=MAX_REPORT_LIST_LIMIT),
search_space_id: int | None = None,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
List reports the user has access to.
Filters by search space membership.
"""
try:
if search_space_id is not None:
# Verify the caller is a member of the requested search space
await check_search_space_access(session, user, search_space_id)
result = await session.execute(
select(Report)
.filter(Report.search_space_id == search_space_id)
.order_by(Report.id.desc())
.offset(skip)
.limit(limit)
)
else:
result = await session.execute(
select(Report)
.join(SearchSpace)
.join(SearchSpaceMembership)
.filter(SearchSpaceMembership.user_id == user.id)
.order_by(Report.id.desc())
.offset(skip)
.limit(limit)
)
return result.scalars().all()
except HTTPException:
raise
except SQLAlchemyError:
raise HTTPException(
status_code=500, detail="Database error occurred while fetching reports"
) from None
@router.get("/reports/{report_id}", response_model=ReportRead)
async def read_report(
report_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Get a specific report by ID (metadata only, no content).
"""
try:
return await _get_report_with_access(report_id, session, user)
except HTTPException:
raise
except SQLAlchemyError:
raise HTTPException(
status_code=500, detail="Database error occurred while fetching report"
) from None
@router.get("/reports/{report_id}/content", response_model=ReportContentRead)
async def read_report_content(
report_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Get full Markdown content of a report.
"""
try:
return await _get_report_with_access(report_id, session, user)
except HTTPException:
raise
except SQLAlchemyError:
raise HTTPException(
status_code=500,
detail="Database error occurred while fetching report content",
) from None
@router.get("/reports/{report_id}/export")
async def export_report(
report_id: int,
format: ExportFormat = Query(ExportFormat.PDF, description="Export format: pdf or docx"),
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Export a report as PDF or DOCX.
"""
try:
report = await _get_report_with_access(report_id, session, user)
if not report.content:
raise HTTPException(
status_code=400, detail="Report has no content to export"
)
# Convert Markdown to the requested format via pypandoc.
# pypandoc spawns a pandoc subprocess (blocking), so we run it in a
# thread executor to avoid blocking the async event loop.
extra_args = ["--standalone"]
if format == ExportFormat.PDF:
extra_args.append("--pdf-engine=wkhtmltopdf")
loop = asyncio.get_running_loop()
output = await loop.run_in_executor(
None, # default thread-pool
lambda: pypandoc.convert_text(
report.content,
format.value,
format="md",
extra_args=extra_args,
),
)
# pypandoc returns bytes for binary formats (pdf, docx), str for text formats
if isinstance(output, str):
output = output.encode("utf-8")
# Sanitize filename
safe_title = (
"".join(c if c.isalnum() or c in " -_" else "_" for c in report.title)
.strip()[:80]
or "report"
)
media_types = {
ExportFormat.PDF: "application/pdf",
ExportFormat.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
}
return StreamingResponse(
io.BytesIO(output),
media_type=media_types[format],
headers={
"Content-Disposition": f'attachment; filename="{safe_title}.{format.value}"',
},
)
except HTTPException:
raise
except Exception as e:
logger.exception("Report export failed")
raise HTTPException(
status_code=500, detail=f"Export failed: {e!s}"
) from e
@router.delete("/reports/{report_id}", response_model=dict)
async def delete_report(
report_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Delete a report.
"""
try:
db_report = await _get_report_with_access(report_id, session, user)
await session.delete(db_report)
await session.commit()
return {"message": "Report deleted successfully"}
except HTTPException:
raise
except SQLAlchemyError:
await session.rollback()
raise HTTPException(
status_code=500, detail="Database error occurred while deleting report"
) from None