From 01d7379914d093f2b72ece0ce982093722110014 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 15 May 2026 02:05:11 +0530 Subject: [PATCH] refactor: add public URL handling for SurfSense documents across various components and schemas --- .../research/tools/search_surfsense_docs.py | 6 ++++-- .../agents/new_chat/tools/search_surfsense_docs.py | 6 ++++-- .../app/routes/surfsense_docs_routes.py | 3 +++ surfsense_backend/app/schemas/surfsense_docs.py | 2 ++ surfsense_backend/app/tasks/chat/stream_new_chat.py | 8 ++++++-- surfsense_backend/app/utils/surfsense_docs.py | 13 +++++++++++++ .../components/assistant-ui/inline-citation.tsx | 4 ++-- surfsense_web/contracts/types/document.types.ts | 1 + 8 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 surfsense_backend/app/utils/surfsense_docs.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py index 0d702be4c..ccc5c49e2 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/tools/search_surfsense_docs.py @@ -9,6 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument from app.utils.document_converters import embed_text +from app.utils.surfsense_docs import surfsense_docs_public_url def format_surfsense_docs_results(results: list[tuple]) -> str: @@ -19,13 +20,14 @@ def format_surfsense_docs_results(results: list[tuple]) -> str: # Group chunks by document grouped: dict[int, dict] = {} for chunk, doc in results: + public_url = surfsense_docs_public_url(doc.source) if doc.id not in grouped: grouped[doc.id] = { "document_id": f"doc-{doc.id}", "document_type": "SURFSENSE_DOCS", "title": doc.title, - "url": doc.source, - "metadata": {"source": doc.source}, + "url": public_url, + "metadata": {"source": doc.source, "public_url": public_url}, "chunks": [], } grouped[doc.id]["chunks"].append( diff --git a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py index 2965f2f02..d8a0efac7 100644 --- a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py +++ b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py @@ -17,6 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker from app.utils.document_converters import embed_text +from app.utils.surfsense_docs import surfsense_docs_public_url def format_surfsense_docs_results(results: list[tuple]) -> str: @@ -40,13 +41,14 @@ def format_surfsense_docs_results(results: list[tuple]) -> str: # Group chunks by document grouped: dict[int, dict] = {} for chunk, doc in results: + public_url = surfsense_docs_public_url(doc.source) if doc.id not in grouped: grouped[doc.id] = { "document_id": f"doc-{doc.id}", "document_type": "SURFSENSE_DOCS", "title": doc.title, - "url": doc.source, - "metadata": {"source": doc.source}, + "url": public_url, + "metadata": {"source": doc.source, "public_url": public_url}, "chunks": [], } grouped[doc.id]["chunks"].append( diff --git a/surfsense_backend/app/routes/surfsense_docs_routes.py b/surfsense_backend/app/routes/surfsense_docs_routes.py index e1713e8a3..0d5428dec 100644 --- a/surfsense_backend/app/routes/surfsense_docs_routes.py +++ b/surfsense_backend/app/routes/surfsense_docs_routes.py @@ -24,6 +24,7 @@ from app.schemas.surfsense_docs import ( SurfsenseDocsDocumentWithChunksRead, ) from app.users import current_active_user +from app.utils.surfsense_docs import surfsense_docs_public_url router = APIRouter() @@ -76,6 +77,7 @@ async def get_surfsense_doc_by_chunk_id( id=document.id, title=document.title, source=document.source, + public_url=surfsense_docs_public_url(document.source), content=document.content, chunks=[ SurfsenseDocsChunkRead(id=c.id, content=c.content) @@ -146,6 +148,7 @@ async def list_surfsense_docs( id=doc.id, title=doc.title, source=doc.source, + public_url=surfsense_docs_public_url(doc.source), content=doc.content, created_at=doc.created_at, updated_at=doc.updated_at, diff --git a/surfsense_backend/app/schemas/surfsense_docs.py b/surfsense_backend/app/schemas/surfsense_docs.py index ce32c0ef8..3adf25032 100644 --- a/surfsense_backend/app/schemas/surfsense_docs.py +++ b/surfsense_backend/app/schemas/surfsense_docs.py @@ -22,6 +22,7 @@ class SurfsenseDocsDocumentRead(BaseModel): id: int title: str source: str + public_url: str content: str created_at: datetime | None = None updated_at: datetime | None = None @@ -35,6 +36,7 @@ class SurfsenseDocsDocumentWithChunksRead(BaseModel): id: int title: str source: str + public_url: str content: str chunks: list[SurfsenseDocsChunkRead] diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 818282996..328ef59ad 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -78,6 +78,7 @@ from app.services.new_streaming_service import VercelStreamingService from app.tasks.chat.streaming.graph_stream.event_stream import stream_output from app.utils.content_utils import bootstrap_history_from_db from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap +from app.utils.surfsense_docs import surfsense_docs_public_url from app.utils.user_message_multimodal import build_human_message_content _background_tasks: set[asyncio.Task] = set() @@ -239,14 +240,17 @@ def format_mentioned_surfsense_docs_as_context( ) for doc in documents: - metadata_json = json.dumps({"source": doc.source}, ensure_ascii=False) + public_url = surfsense_docs_public_url(doc.source) + metadata_json = json.dumps( + {"source": doc.source, "public_url": public_url}, ensure_ascii=False + ) context_parts.append("") context_parts.append("") context_parts.append(f" doc-{doc.id}") context_parts.append(" SURFSENSE_DOCS") context_parts.append(f" <![CDATA[{doc.title}]]>") - context_parts.append(f" ") + context_parts.append(f" ") context_parts.append( f" " ) diff --git a/surfsense_backend/app/utils/surfsense_docs.py b/surfsense_backend/app/utils/surfsense_docs.py new file mode 100644 index 000000000..9a6ab11a9 --- /dev/null +++ b/surfsense_backend/app/utils/surfsense_docs.py @@ -0,0 +1,13 @@ +"""Utilities for SurfSense's built-in documentation index.""" + +from pathlib import PurePosixPath + +DOCS_PUBLIC_ROOT = PurePosixPath("/docs") + + +def surfsense_docs_public_url(source: str) -> str: + """Return the public docs route for an indexed documentation source path.""" + docs_path = PurePosixPath(source).with_suffix("") + if docs_path.name == "index": + docs_path = docs_path.parent + return (DOCS_PUBLIC_ROOT / docs_path).as_posix() diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx index a9f9cc076..eba617c15 100644 --- a/surfsense_web/components/assistant-ui/inline-citation.tsx +++ b/surfsense_web/components/assistant-ui/inline-citation.tsx @@ -130,9 +130,9 @@ const SurfsenseDocPreview: FC<{ chunkId: number }> = ({ chunkId }) => {

Chunk #{chunkId}

- {data?.source && ( + {data?.public_url && (