SurfSense/surfsense_backend/app/routes/reports_routes.py
Anish Sarkar acad8c6d2b feat: implement report generation tool and associated routes for CRUD operations
- Added a new tool for generating structured Markdown reports based on user input.
- Implemented routes for creating, reading, exporting, and deleting reports.
- Integrated report generation into the chat flow, allowing users to generate reports inline.
- Updated schemas to support report data structures and responses.
- Enhanced frontend components to handle report generation and display results.
2026-02-11 17:55:52 +05:30

250 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Report routes for CRUD operations and export (PDF/DOCX).
These routes support the report generation feature in new-chat.
Reports are generated inline by the agent tool and stored as Markdown.
Export to PDF/DOCX is on-demand via pypandoc.
Authorization: lightweight search-space membership checks (no granular RBAC)
since reports are chat-generated artifacts, not standalone managed resources.
"""
import asyncio
import io
import logging
from enum import Enum
import pypandoc
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import StreamingResponse
from sqlalchemy import select
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import (
Report,
SearchSpace,
SearchSpaceMembership,
User,
get_async_session,
)
from app.schemas import ReportContentRead, ReportRead
from app.users import current_active_user
from app.utils.rbac import check_search_space_access
logger = logging.getLogger(__name__)
router = APIRouter()
MAX_REPORT_LIST_LIMIT = 500
class ExportFormat(str, Enum):
PDF = "pdf"
DOCX = "docx"
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
async def _get_report_with_access(
report_id: int,
session: AsyncSession,
user: User,
) -> Report:
"""Fetch a report and verify the user belongs to its search space.
Raises HTTPException(404) if not found, HTTPException(403) if no access.
"""
result = await session.execute(select(Report).filter(Report.id == report_id))
report = result.scalars().first()
if not report:
raise HTTPException(status_code=404, detail="Report not found")
# Lightweight membership check no granular RBAC, just "is the user a
# member of the search space this report belongs to?"
await check_search_space_access(session, user, report.search_space_id)
return report
# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------
@router.get("/reports", response_model=list[ReportRead])
async def read_reports(
skip: int = Query(default=0, ge=0),
limit: int = Query(default=100, ge=1, le=MAX_REPORT_LIST_LIMIT),
search_space_id: int | None = None,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
List reports the user has access to.
Filters by search space membership.
"""
try:
if search_space_id is not None:
# Verify the caller is a member of the requested search space
await check_search_space_access(session, user, search_space_id)
result = await session.execute(
select(Report)
.filter(Report.search_space_id == search_space_id)
.order_by(Report.id.desc())
.offset(skip)
.limit(limit)
)
else:
result = await session.execute(
select(Report)
.join(SearchSpace)
.join(SearchSpaceMembership)
.filter(SearchSpaceMembership.user_id == user.id)
.order_by(Report.id.desc())
.offset(skip)
.limit(limit)
)
return result.scalars().all()
except HTTPException:
raise
except SQLAlchemyError:
raise HTTPException(
status_code=500, detail="Database error occurred while fetching reports"
) from None
@router.get("/reports/{report_id}", response_model=ReportRead)
async def read_report(
report_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Get a specific report by ID (metadata only, no content).
"""
try:
return await _get_report_with_access(report_id, session, user)
except HTTPException:
raise
except SQLAlchemyError:
raise HTTPException(
status_code=500, detail="Database error occurred while fetching report"
) from None
@router.get("/reports/{report_id}/content", response_model=ReportContentRead)
async def read_report_content(
report_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Get full Markdown content of a report.
"""
try:
return await _get_report_with_access(report_id, session, user)
except HTTPException:
raise
except SQLAlchemyError:
raise HTTPException(
status_code=500,
detail="Database error occurred while fetching report content",
) from None
@router.get("/reports/{report_id}/export")
async def export_report(
report_id: int,
format: ExportFormat = Query(ExportFormat.PDF, description="Export format: pdf or docx"),
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Export a report as PDF or DOCX.
"""
try:
report = await _get_report_with_access(report_id, session, user)
if not report.content:
raise HTTPException(
status_code=400, detail="Report has no content to export"
)
# Convert Markdown to the requested format via pypandoc.
# pypandoc spawns a pandoc subprocess (blocking), so we run it in a
# thread executor to avoid blocking the async event loop.
extra_args = ["--standalone"]
if format == ExportFormat.PDF:
extra_args.append("--pdf-engine=wkhtmltopdf")
loop = asyncio.get_running_loop()
output = await loop.run_in_executor(
None, # default thread-pool
lambda: pypandoc.convert_text(
report.content,
format.value,
format="md",
extra_args=extra_args,
),
)
# pypandoc returns bytes for binary formats (pdf, docx), str for text formats
if isinstance(output, str):
output = output.encode("utf-8")
# Sanitize filename
safe_title = (
"".join(c if c.isalnum() or c in " -_" else "_" for c in report.title)
.strip()[:80]
or "report"
)
media_types = {
ExportFormat.PDF: "application/pdf",
ExportFormat.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
}
return StreamingResponse(
io.BytesIO(output),
media_type=media_types[format],
headers={
"Content-Disposition": f'attachment; filename="{safe_title}.{format.value}"',
},
)
except HTTPException:
raise
except Exception as e:
logger.exception("Report export failed")
raise HTTPException(
status_code=500, detail=f"Export failed: {e!s}"
) from e
@router.delete("/reports/{report_id}", response_model=dict)
async def delete_report(
report_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Delete a report.
"""
try:
db_report = await _get_report_with_access(report_id, session, user)
await session.delete(db_report)
await session.commit()
return {"message": "Report deleted successfully"}
except HTTPException:
raise
except SQLAlchemyError:
await session.rollback()
raise HTTPException(
status_code=500, detail="Database error occurred while deleting report"
) from None