feat: remove pandoc and its respective engine dependencies

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-13 01:34:43 -08:00
parent 3a7a27f3ae
commit a8c1aa28c0
11 changed files with 3354 additions and 3252 deletions

View file

@ -94,6 +94,8 @@ You have access to the following tools:
3. generate_report: Generate a structured Markdown report from provided content. 3. generate_report: Generate a structured Markdown report from provided content.
- Use this when the user asks to create, generate, write, produce, draft, or summarize into a report-style deliverable. - Use this when the user asks to create, generate, write, produce, draft, or summarize into a report-style deliverable.
- DECISION RULE (HIGH PRIORITY): If the user asks for a report in any form, call `generate_report` instead of writing the full report directly in chat.
- Only skip `generate_report` if the user explicitly asks for chat-only output (e.g., "just answer in chat", "no report card", "don't generate a report").
- Trigger classes include: - Trigger classes include:
* Direct trigger words: report, document, memo, letter, template * Direct trigger words: report, document, memo, letter, template
* Creation-intent phrases: "write a document/report/post/article" * Creation-intent phrases: "write a document/report/post/article"
@ -108,6 +110,7 @@ You have access to the following tools:
* "write a report/document", "draft a report" * "write a report/document", "draft a report"
* "create an executive summary", "make a briefing note", "write a one-pager" * "create an executive summary", "make a briefing note", "write a one-pager"
* "write a blog post", "write an article", "create a comprehensive guide" * "write a blog post", "write an article", "create a comprehensive guide"
* "create a small report", "write a short report", "make a quick report", "brief report for class"
- IMPORTANT FORMAT RULE: Reports are ALWAYS generated in Markdown. - IMPORTANT FORMAT RULE: Reports are ALWAYS generated in Markdown.
- Args: - Args:
- topic: The main topic or title of the report - topic: The main topic or title of the report
@ -121,7 +124,9 @@ You have access to the following tools:
- Returns: A dictionary with status "ready" or "failed", report_id, title, and word_count. - Returns: A dictionary with status "ready" or "failed", report_id, title, and word_count.
- The report is generated immediately in Markdown and displayed inline in the chat. - The report is generated immediately in Markdown and displayed inline in the chat.
- Export/download formats (e.g., PDF/DOCX) are produced from the generated Markdown report. - Export/download formats (e.g., PDF/DOCX) are produced from the generated Markdown report.
- IMPORTANT: Always search the knowledge base first to gather comprehensive source_content before generating a report. - SOURCE-COLLECTION RULE:
* If the user already provided enough source material (current chat content, uploaded files, pasted text, or a summarized video/article), generate the report directly from that.
* Use search_knowledge_base first when additional context is needed or the user asks for information beyond what is already available in the conversation.
- AFTER CALLING THIS TOOL: Do NOT repeat, summarize, or reproduce the report content in the chat. The report is already displayed as an interactive card that the user can open, read, copy, and export. Simply confirm that the report was generated (e.g., "I've generated your report on [topic]. You can view the Markdown report now, and export to PDF/DOCX from the card."). NEVER write out the report text in the chat. - AFTER CALLING THIS TOOL: Do NOT repeat, summarize, or reproduce the report content in the chat. The report is already displayed as an interactive card that the user can open, read, copy, and export. Simply confirm that the report was generated (e.g., "I've generated your report on [topic]. You can view the Markdown report now, and export to PDF/DOCX from the card."). NEVER write out the report text in the chat.
4. link_preview: Fetch metadata for a URL to display a rich preview card. 4. link_preview: Fetch metadata for a URL to display a rich preview card.

View file

@ -58,6 +58,23 @@ Write the report now:
""" """
def _strip_wrapping_code_fences(text: str) -> str:
"""Remove wrapping code fences that LLMs often add around Markdown output.
Handles patterns like:
```markdown\\n...content...\\n```
```md\\n...content...\\n```
```\\n...content...\\n```
"""
stripped = text.strip()
# Match opening fence with optional language tag (markdown, md, or bare)
m = re.match(r"^```(?:markdown|md)?\s*\n", stripped)
if m and stripped.endswith("```"):
stripped = stripped[m.end() :] # remove opening fence
stripped = stripped[:-3].rstrip() # remove closing fence
return stripped
def _extract_metadata(content: str) -> dict[str, Any]: def _extract_metadata(content: str) -> dict[str, Any]:
"""Extract metadata from generated Markdown content.""" """Extract metadata from generated Markdown content."""
# Count section headings # Count section headings
@ -110,6 +127,11 @@ def create_generate_report_tool(
Use this tool when the user asks to create, generate, write, produce, draft, Use this tool when the user asks to create, generate, write, produce, draft,
or summarize into a report-style deliverable. or summarize into a report-style deliverable.
HIGH-PRIORITY DECISION RULE:
- If the user asks for a report in any form,
call this tool rather than writing the full report directly in chat.
- Only skip this tool when the user explicitly requests chat-only output and
says they do not want a generated report card.
Trigger classes include: Trigger classes include:
- Direct trigger words: report, document, memo, letter, template - Direct trigger words: report, document, memo, letter, template
- Creation-intent phrases: "write a document/report/post/article" - Creation-intent phrases: "write a document/report/post/article"
@ -136,11 +158,21 @@ def create_generate_report_tool(
- "Write an article" - "Write an article"
- "Create a comprehensive guide" - "Create a comprehensive guide"
- "Prepare a report" - "Prepare a report"
- "Create a small report"
- "Write a short report"
- "Make a quick report"
- "Brief report for class"
FORMAT/EXPORT RULE: FORMAT/EXPORT RULE:
- Always generate the report content in Markdown. - Always generate the report content in Markdown.
- If the user requests DOCX/Word/PDF or another file format, export from - If the user requests DOCX/Word/PDF or another file format, export from
the generated Markdown report. the generated Markdown report.
SOURCE-COLLECTION RULE:
- If enough source material is already present in the conversation (chat
history, pasted text, uploaded files, or a provided video/article summary),
generate directly from that source_content.
- Use knowledge-base search first only when extra context is needed beyond
what the user already provided.
VERSIONING parent_report_id: VERSIONING parent_report_id:
- Set parent_report_id when the user wants to MODIFY, REVISE, IMPROVE, - Set parent_report_id when the user wants to MODIFY, REVISE, IMPROVE,
@ -298,6 +330,20 @@ def create_generate_report_tool(
"title": topic, "title": topic,
} }
# LLMs often wrap output in ```markdown ... ``` fences — strip them
# so the stored content is clean Markdown.
report_content = _strip_wrapping_code_fences(report_content)
if not report_content:
error_msg = "LLM returned empty or invalid content"
report_id = await _save_failed_report(error_msg)
return {
"status": "failed",
"error": error_msg,
"report_id": report_id,
"title": topic,
}
# Extract metadata (includes "status": "ready") # Extract metadata (includes "status": "ready")
metadata = _extract_metadata(report_content) metadata = _extract_metadata(report_content)

View file

@ -3,7 +3,8 @@ Report routes for read, export (PDF/DOCX), and delete operations.
No create or update endpoints here reports are generated inline by the No create or update endpoints here reports are generated inline by the
agent tool during chat and stored as Markdown in the database. agent tool during chat and stored as Markdown in the database.
Export to PDF/DOCX is on-demand via pypandoc (PDF uses Typst as the engine). Export to PDF/DOCX is on-demand PDF uses pypandoc (MarkdownTypst) + typst-py
(TypstPDF); DOCX uses pypandoc directly.
Authorization: lightweight search-space membership checks (no granular RBAC) Authorization: lightweight search-space membership checks (no granular RBAC)
since reports are chat-generated artifacts, not standalone managed resources. since reports are chat-generated artifacts, not standalone managed resources.
@ -13,10 +14,12 @@ import asyncio
import io import io
import logging import logging
import os import os
import re
import tempfile import tempfile
from enum import Enum from enum import Enum
import pypandoc import pypandoc
import typst
from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
from sqlalchemy import select from sqlalchemy import select
@ -51,6 +54,17 @@ class ExportFormat(str, Enum):
# Helpers # Helpers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_CODE_FENCE_RE = re.compile(r"^```(?:markdown|md)?\s*\n", re.MULTILINE)
def _strip_wrapping_code_fences(text: str) -> str:
"""Remove wrapping code fences (```markdown...```) that LLMs often add."""
stripped = text.strip()
m = _CODE_FENCE_RE.match(stripped)
if m and stripped.endswith("```"):
stripped = stripped[m.end() : -3].rstrip()
return stripped
async def _get_report_with_access( async def _get_report_with_access(
report_id: int, report_id: int,
@ -209,37 +223,64 @@ async def export_report(
status_code=400, detail="Report has no content to export" status_code=400, detail="Report has no content to export"
) )
# Convert Markdown to the requested format via pypandoc. # Strip wrapping code fences that LLMs sometimes add around Markdown.
# pypandoc spawns a pandoc subprocess (blocking), so we run the # Without this, pandoc treats the entire content as a code block.
# entire convert → read → cleanup pipeline in a thread executor markdown_content = _strip_wrapping_code_fences(report.content)
# to avoid blocking the async event loop on any file I/O.
# Convert Markdown to the requested format.
# #
# PDF uses Typst as the rendering engine — Typst has built-in # DOCX: pypandoc (pandoc) handles the full conversion directly.
# professional styling for tables, headings, code blocks, etc.,
# so no CSS injection is needed.
# #
# Use "gfm" because LLM output uses GFM-style pipe tables that # PDF: two-step pipeline — pypandoc converts Markdown → Typst markup,
# pandoc's stricter default "markdown" format may fail to parse. # then the `typst` Python library compiles Typst → PDF. This avoids
extra_args = ["--standalone"] # requiring the Typst CLI on the system PATH; the typst pip package
if format == ExportFormat.PDF: # bundles the compiler as a native extension. Typst produces
extra_args.append("--pdf-engine=typst") # professional styling for tables, headings, code blocks, etc.
#
# Use "gfm" as the input format because LLM output uses GFM-style
# pipe tables that pandoc's stricter default "markdown" may mangle.
def _convert_and_read() -> bytes: def _convert_and_read() -> bytes:
"""Run all blocking I/O (tempfile, pandoc, file read, cleanup) in a thread.""" """Run all blocking I/O (tempfile, pandoc/typst, file read, cleanup) in a thread."""
fd, tmp_path = tempfile.mkstemp(suffix=f".{format.value}") if format == ExportFormat.PDF:
os.close(fd) # Step 1: Markdown → Typst markup via pandoc.
try: # We must set mainfont / monofont so the generated template's
pypandoc.convert_text( # `font` parameter is non-empty; without it pandoc emits
report.content, # `font: ()` which makes Typst error with
format.value, # "font fallback list must not be empty".
# We use fonts that ship embedded inside typst-py so this
# works even on systems with no fonts installed.
typst_markup: str = pypandoc.convert_text(
markdown_content,
"typst",
format="gfm", format="gfm",
extra_args=extra_args, extra_args=[
outputfile=tmp_path, "--standalone",
"-V",
"mainfont:Libertinus Serif",
"-V",
"monofont:DejaVu Sans Mono",
],
) )
with open(tmp_path, "rb") as f: # Step 2: Typst markup → PDF via typst Python library
return f.read() pdf_bytes: bytes = typst.compile(typst_markup.encode("utf-8"))
finally: return pdf_bytes
os.unlink(tmp_path) else:
# DOCX: let pandoc handle the full conversion
fd, tmp_path = tempfile.mkstemp(suffix=f".{format.value}")
os.close(fd)
try:
pypandoc.convert_text(
markdown_content,
format.value,
format="gfm",
extra_args=["--standalone"],
outputfile=tmp_path,
)
with open(tmp_path, "rb") as f:
return f.read()
finally:
os.unlink(tmp_path)
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
output = await loop.run_in_executor(None, _convert_and_read) output = await loop.run_in_executor(None, _convert_and_read)

View file

@ -63,7 +63,8 @@ dependencies = [
"unstructured-client>=0.42.3", "unstructured-client>=0.42.3",
"langchain-unstructured>=1.0.1", "langchain-unstructured>=1.0.1",
"slowapi>=0.1.9", "slowapi>=0.1.9",
"pypandoc>=1.16.2", "pypandoc_binary>=1.16.2",
"typst>=0.14.0",
] ]
[dependency-groups] [dependency-groups]

6439
surfsense_backend/uv.lock generated

File diff suppressed because it is too large Load diff

View file

@ -28,10 +28,12 @@ import {
// extractWriteTodosFromContent, // extractWriteTodosFromContent,
hydratePlanStateAtom, hydratePlanStateAtom,
} from "@/atoms/chat/plan-state.atom"; } from "@/atoms/chat/plan-state.atom";
import { closeReportPanelAtom } from "@/atoms/chat/report-panel.atom";
import { membersAtom } from "@/atoms/members/members-query.atoms"; import { membersAtom } from "@/atoms/members/members-query.atoms";
import { currentUserAtom } from "@/atoms/user/user-query.atoms"; import { currentUserAtom } from "@/atoms/user/user-query.atoms";
import { Thread } from "@/components/assistant-ui/thread"; import { Thread } from "@/components/assistant-ui/thread";
import { ChatHeader } from "@/components/new-chat/chat-header"; import { ChatHeader } from "@/components/new-chat/chat-header";
import { ReportPanel } from "@/components/report-panel/report-panel";
import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking";
import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; import { DisplayImageToolUI } from "@/components/tool-ui/display-image";
import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
@ -39,8 +41,6 @@ import { GenerateReportToolUI } from "@/components/tool-ui/generate-report";
import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview";
import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage"; import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage";
import { RecallMemoryToolUI, SaveMemoryToolUI } from "@/components/tool-ui/user-memory"; import { RecallMemoryToolUI, SaveMemoryToolUI } from "@/components/tool-ui/user-memory";
import { ReportPanel } from "@/components/report-panel/report-panel";
import { closeReportPanelAtom } from "@/atoms/chat/report-panel.atom";
import { Skeleton } from "@/components/ui/skeleton"; import { Skeleton } from "@/components/ui/skeleton";
import { useChatSessionStateSync } from "@/hooks/use-chat-session-state"; import { useChatSessionStateSync } from "@/hooks/use-chat-session-state";
import { useMessagesElectric } from "@/hooks/use-messages-electric"; import { useMessagesElectric } from "@/hooks/use-messages-electric";

View file

@ -1,7 +1,7 @@
import Image from "next/image";
import { Streamdown, type StreamdownProps } from "streamdown";
import { createCodePlugin } from "@streamdown/code"; import { createCodePlugin } from "@streamdown/code";
import { createMathPlugin } from "@streamdown/math"; import { createMathPlugin } from "@streamdown/math";
import Image from "next/image";
import { Streamdown, type StreamdownProps } from "streamdown";
import "katex/dist/katex.min.css"; import "katex/dist/katex.min.css";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";

View file

@ -2,12 +2,12 @@
import { AssistantRuntimeProvider } from "@assistant-ui/react"; import { AssistantRuntimeProvider } from "@assistant-ui/react";
import { Navbar } from "@/components/homepage/navbar"; import { Navbar } from "@/components/homepage/navbar";
import { ReportPanel } from "@/components/report-panel/report-panel";
import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; import { DisplayImageToolUI } from "@/components/tool-ui/display-image";
import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
import { GenerateReportToolUI } from "@/components/tool-ui/generate-report"; import { GenerateReportToolUI } from "@/components/tool-ui/generate-report";
import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview";
import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage"; import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage";
import { ReportPanel } from "@/components/report-panel/report-panel";
import { Spinner } from "@/components/ui/spinner"; import { Spinner } from "@/components/ui/spinner";
import { usePublicChat } from "@/hooks/use-public-chat"; import { usePublicChat } from "@/hooks/use-public-chat";
import { usePublicChatRuntime } from "@/hooks/use-public-chat-runtime"; import { usePublicChatRuntime } from "@/hooks/use-public-chat-runtime";

View file

@ -5,8 +5,8 @@ import { ChevronDownIcon, XIcon } from "lucide-react";
import { useCallback, useEffect, useRef, useState } from "react"; import { useCallback, useEffect, useRef, useState } from "react";
import { z } from "zod"; import { z } from "zod";
import { closeReportPanelAtom, reportPanelAtom } from "@/atoms/chat/report-panel.atom"; import { closeReportPanelAtom, reportPanelAtom } from "@/atoms/chat/report-panel.atom";
import { MarkdownViewer } from "@/components/markdown-viewer";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Spinner } from "@/components/ui/spinner";
import { Drawer, DrawerContent, DrawerHandle } from "@/components/ui/drawer"; import { Drawer, DrawerContent, DrawerHandle } from "@/components/ui/drawer";
import { import {
DropdownMenu, DropdownMenu,
@ -14,7 +14,7 @@ import {
DropdownMenuItem, DropdownMenuItem,
DropdownMenuTrigger, DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu"; } from "@/components/ui/dropdown-menu";
import { MarkdownViewer } from "@/components/markdown-viewer"; import { Spinner } from "@/components/ui/spinner";
import { useMediaQuery } from "@/hooks/use-media-query"; import { useMediaQuery } from "@/hooks/use-media-query";
import { baseApiService } from "@/lib/apis/base-api.service"; import { baseApiService } from "@/lib/apis/base-api.service";
import { authenticatedFetch } from "@/lib/auth-utils"; import { authenticatedFetch } from "@/lib/auth-utils";

View file

@ -8,8 +8,8 @@ import {
FileTextIcon, FileTextIcon,
UserIcon, UserIcon,
} from "lucide-react"; } from "lucide-react";
import { Component, type ReactNode, useCallback, useState } from "react";
import Image from "next/image"; import Image from "next/image";
import { Component, type ReactNode, useCallback, useState } from "react";
import { z } from "zod"; import { z } from "zod";
import { Card, CardContent } from "@/components/ui/card"; import { Card, CardContent } from "@/components/ui/card";
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip";

View file

@ -6,8 +6,8 @@ import { Dot, FileTextIcon } from "lucide-react";
import { useParams, usePathname } from "next/navigation"; import { useParams, usePathname } from "next/navigation";
import { useEffect, useState } from "react"; import { useEffect, useState } from "react";
import { z } from "zod"; import { z } from "zod";
import { TextShimmerLoader } from "@/components/prompt-kit/loader";
import { openReportPanelAtom, reportPanelAtom } from "@/atoms/chat/report-panel.atom"; import { openReportPanelAtom, reportPanelAtom } from "@/atoms/chat/report-panel.atom";
import { TextShimmerLoader } from "@/components/prompt-kit/loader";
import { baseApiService } from "@/lib/apis/base-api.service"; import { baseApiService } from "@/lib/apis/base-api.service";
/** /**