refactor: add public URL handling for SurfSense documents across various components and schemas

This commit is contained in:
Anish Sarkar 2026-05-15 02:05:11 +05:30
parent ea087d1d23
commit 01d7379914
8 changed files with 35 additions and 8 deletions

View file

@ -9,6 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
from app.utils.document_converters import embed_text
from app.utils.surfsense_docs import surfsense_docs_public_url
def format_surfsense_docs_results(results: list[tuple]) -> str:
@ -19,13 +20,14 @@ def format_surfsense_docs_results(results: list[tuple]) -> str:
# Group chunks by document
grouped: dict[int, dict] = {}
for chunk, doc in results:
public_url = surfsense_docs_public_url(doc.source)
if doc.id not in grouped:
grouped[doc.id] = {
"document_id": f"doc-{doc.id}",
"document_type": "SURFSENSE_DOCS",
"title": doc.title,
"url": doc.source,
"metadata": {"source": doc.source},
"url": public_url,
"metadata": {"source": doc.source, "public_url": public_url},
"chunks": [],
}
grouped[doc.id]["chunks"].append(

View file

@ -17,6 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
from app.utils.document_converters import embed_text
from app.utils.surfsense_docs import surfsense_docs_public_url
def format_surfsense_docs_results(results: list[tuple]) -> str:
@ -40,13 +41,14 @@ def format_surfsense_docs_results(results: list[tuple]) -> str:
# Group chunks by document
grouped: dict[int, dict] = {}
for chunk, doc in results:
public_url = surfsense_docs_public_url(doc.source)
if doc.id not in grouped:
grouped[doc.id] = {
"document_id": f"doc-{doc.id}",
"document_type": "SURFSENSE_DOCS",
"title": doc.title,
"url": doc.source,
"metadata": {"source": doc.source},
"url": public_url,
"metadata": {"source": doc.source, "public_url": public_url},
"chunks": [],
}
grouped[doc.id]["chunks"].append(

View file

@ -24,6 +24,7 @@ from app.schemas.surfsense_docs import (
SurfsenseDocsDocumentWithChunksRead,
)
from app.users import current_active_user
from app.utils.surfsense_docs import surfsense_docs_public_url
router = APIRouter()
@ -76,6 +77,7 @@ async def get_surfsense_doc_by_chunk_id(
id=document.id,
title=document.title,
source=document.source,
public_url=surfsense_docs_public_url(document.source),
content=document.content,
chunks=[
SurfsenseDocsChunkRead(id=c.id, content=c.content)
@ -146,6 +148,7 @@ async def list_surfsense_docs(
id=doc.id,
title=doc.title,
source=doc.source,
public_url=surfsense_docs_public_url(doc.source),
content=doc.content,
created_at=doc.created_at,
updated_at=doc.updated_at,

View file

@ -22,6 +22,7 @@ class SurfsenseDocsDocumentRead(BaseModel):
id: int
title: str
source: str
public_url: str
content: str
created_at: datetime | None = None
updated_at: datetime | None = None
@ -35,6 +36,7 @@ class SurfsenseDocsDocumentWithChunksRead(BaseModel):
id: int
title: str
source: str
public_url: str
content: str
chunks: list[SurfsenseDocsChunkRead]

View file

@ -78,6 +78,7 @@ from app.services.new_streaming_service import VercelStreamingService
from app.tasks.chat.streaming.graph_stream.event_stream import stream_output
from app.utils.content_utils import bootstrap_history_from_db
from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap
from app.utils.surfsense_docs import surfsense_docs_public_url
from app.utils.user_message_multimodal import build_human_message_content
_background_tasks: set[asyncio.Task] = set()
@ -239,14 +240,17 @@ def format_mentioned_surfsense_docs_as_context(
)
for doc in documents:
metadata_json = json.dumps({"source": doc.source}, ensure_ascii=False)
public_url = surfsense_docs_public_url(doc.source)
metadata_json = json.dumps(
{"source": doc.source, "public_url": public_url}, ensure_ascii=False
)
context_parts.append("<document>")
context_parts.append("<document_metadata>")
context_parts.append(f" <document_id>doc-{doc.id}</document_id>")
context_parts.append(" <document_type>SURFSENSE_DOCS</document_type>")
context_parts.append(f" <title><![CDATA[{doc.title}]]></title>")
context_parts.append(f" <url><![CDATA[{doc.source}]]></url>")
context_parts.append(f" <url><![CDATA[{public_url}]]></url>")
context_parts.append(
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
)

View file

@ -0,0 +1,13 @@
"""Utilities for SurfSense's built-in documentation index."""
from pathlib import PurePosixPath
DOCS_PUBLIC_ROOT = PurePosixPath("/docs")
def surfsense_docs_public_url(source: str) -> str:
"""Return the public docs route for an indexed documentation source path."""
docs_path = PurePosixPath(source).with_suffix("")
if docs_path.name == "index":
docs_path = docs_path.parent
return (DOCS_PUBLIC_ROOT / docs_path).as_posix()

View file

@ -130,9 +130,9 @@ const SurfsenseDocPreview: FC<{ chunkId: number }> = ({ chunkId }) => {
</p>
<p className="text-[11px] text-muted-foreground">Chunk #{chunkId}</p>
</div>
{data?.source && (
{data?.public_url && (
<a
href={data.source}
href={data.public_url}
target="_blank"
rel="noopener noreferrer"
className="inline-flex shrink-0 items-center gap-1 rounded-md px-2 py-1 text-[11px] font-medium text-primary hover:bg-primary/10"

View file

@ -88,6 +88,7 @@ export const surfsenseDocsDocument = z.object({
id: z.number(),
title: z.string(),
source: z.string(),
public_url: z.string(),
content: z.string(),
});