SurfSense/surfsense_backend/app/routes/reports_routes.py

"""
Report routes for CRUD operations and export (PDF/DOCX).

These routes support the report generation feature in new-chat.
Reports are generated inline by the agent tool and stored as Markdown.
Export to PDF/DOCX is on-demand via pypandoc.

Authorization: lightweight search-space membership checks (no granular RBAC)
since reports are chat-generated artifacts, not standalone managed resources.
"""

import asyncio
import io
import logging
from enum import Enum

import pypandoc
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import StreamingResponse
from sqlalchemy import select
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession

from app.db import (
    Report,
    SearchSpace,
    SearchSpaceMembership,
    User,
    get_async_session,
)
from app.schemas import ReportContentRead, ReportRead
from app.users import current_active_user
from app.utils.rbac import check_search_space_access

logger = logging.getLogger(__name__)

router = APIRouter()

MAX_REPORT_LIST_LIMIT = 500


class ExportFormat(str, Enum):
    PDF = "pdf"
    DOCX = "docx"


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

async def _get_report_with_access(
    report_id: int,
    session: AsyncSession,
    user: User,
) -> Report:
    """Fetch a report and verify the user belongs to its search space.

    Raises HTTPException(404) if not found, HTTPException(403) if no access.
    """
    result = await session.execute(select(Report).filter(Report.id == report_id))
    report = result.scalars().first()

    if not report:
        raise HTTPException(status_code=404, detail="Report not found")

    # Lightweight membership check – no granular RBAC, just "is the user a
    # member of the search space this report belongs to?"
    await check_search_space_access(session, user, report.search_space_id)

    return report


# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------


@router.get("/reports", response_model=list[ReportRead])
async def read_reports(
    skip: int = Query(default=0, ge=0),
    limit: int = Query(default=100, ge=1, le=MAX_REPORT_LIST_LIMIT),
    search_space_id: int | None = None,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
):
    """
    List reports the user has access to.
    Filters by search space membership.
    """
    try:
        if search_space_id is not None:
            # Verify the caller is a member of the requested search space
            await check_search_space_access(session, user, search_space_id)

            result = await session.execute(
                select(Report)
                .filter(Report.search_space_id == search_space_id)
                .order_by(Report.id.desc())
                .offset(skip)
                .limit(limit)
            )
        else:
            result = await session.execute(
                select(Report)
                .join(SearchSpace)
                .join(SearchSpaceMembership)
                .filter(SearchSpaceMembership.user_id == user.id)
                .order_by(Report.id.desc())
                .offset(skip)
                .limit(limit)
            )
        return result.scalars().all()
    except HTTPException:
        raise
    except SQLAlchemyError:
        raise HTTPException(
            status_code=500, detail="Database error occurred while fetching reports"
        ) from None


@router.get("/reports/{report_id}", response_model=ReportRead)
async def read_report(
    report_id: int,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
):
    """
    Get a specific report by ID (metadata only, no content).
    """
    try:
        return await _get_report_with_access(report_id, session, user)
    except HTTPException:
        raise
    except SQLAlchemyError:
        raise HTTPException(
            status_code=500, detail="Database error occurred while fetching report"
        ) from None


@router.get("/reports/{report_id}/content", response_model=ReportContentRead)
async def read_report_content(
    report_id: int,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
):
    """
    Get full Markdown content of a report.
    """
    try:
        return await _get_report_with_access(report_id, session, user)
    except HTTPException:
        raise
    except SQLAlchemyError:
        raise HTTPException(
            status_code=500,
            detail="Database error occurred while fetching report content",
        ) from None


@router.get("/reports/{report_id}/export")
async def export_report(
    report_id: int,
    format: ExportFormat = Query(ExportFormat.PDF, description="Export format: pdf or docx"),
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
):
    """
    Export a report as PDF or DOCX.
    """
    try:
        report = await _get_report_with_access(report_id, session, user)

        if not report.content:
            raise HTTPException(
                status_code=400, detail="Report has no content to export"
            )

        # Convert Markdown to the requested format via pypandoc.
        # pypandoc spawns a pandoc subprocess (blocking), so we run it in a
        # thread executor to avoid blocking the async event loop.
        extra_args = ["--standalone"]
        if format == ExportFormat.PDF:
            extra_args.append("--pdf-engine=wkhtmltopdf")

        loop = asyncio.get_running_loop()
        output = await loop.run_in_executor(
            None,  # default thread-pool
            lambda: pypandoc.convert_text(
                report.content,
                format.value,
                format="md",
                extra_args=extra_args,
            ),
        )

        # pypandoc returns bytes for binary formats (pdf, docx), str for text formats
        if isinstance(output, str):
            output = output.encode("utf-8")

        # Sanitize filename
        safe_title = (
            "".join(c if c.isalnum() or c in " -_" else "_" for c in report.title)
            .strip()[:80]
            or "report"
        )

        media_types = {
            ExportFormat.PDF: "application/pdf",
            ExportFormat.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        }

        return StreamingResponse(
            io.BytesIO(output),
            media_type=media_types[format],
            headers={
                "Content-Disposition": f'attachment; filename="{safe_title}.{format.value}"',
            },
        )

    except HTTPException:
        raise
    except Exception as e:
        logger.exception("Report export failed")
        raise HTTPException(
            status_code=500, detail=f"Export failed: {e!s}"
        ) from e


@router.delete("/reports/{report_id}", response_model=dict)
async def delete_report(
    report_id: int,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
):
    """
    Delete a report.
    """
    try:
        db_report = await _get_report_with_access(report_id, session, user)

        await session.delete(db_report)
        await session.commit()
        return {"message": "Report deleted successfully"}
    except HTTPException:
        raise
    except SQLAlchemyError:
        await session.rollback()
        raise HTTPException(
            status_code=500, detail="Database error occurred while deleting report"
        ) from None