mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-23 19:05:16 +02:00
refactor: add public URL handling for SurfSense documents across various components and schemas
This commit is contained in:
parent
ea087d1d23
commit
01d7379914
8 changed files with 35 additions and 8 deletions
|
|
@ -9,6 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
|
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
|
||||||
from app.utils.document_converters import embed_text
|
from app.utils.document_converters import embed_text
|
||||||
|
from app.utils.surfsense_docs import surfsense_docs_public_url
|
||||||
|
|
||||||
|
|
||||||
def format_surfsense_docs_results(results: list[tuple]) -> str:
|
def format_surfsense_docs_results(results: list[tuple]) -> str:
|
||||||
|
|
@ -19,13 +20,14 @@ def format_surfsense_docs_results(results: list[tuple]) -> str:
|
||||||
# Group chunks by document
|
# Group chunks by document
|
||||||
grouped: dict[int, dict] = {}
|
grouped: dict[int, dict] = {}
|
||||||
for chunk, doc in results:
|
for chunk, doc in results:
|
||||||
|
public_url = surfsense_docs_public_url(doc.source)
|
||||||
if doc.id not in grouped:
|
if doc.id not in grouped:
|
||||||
grouped[doc.id] = {
|
grouped[doc.id] = {
|
||||||
"document_id": f"doc-{doc.id}",
|
"document_id": f"doc-{doc.id}",
|
||||||
"document_type": "SURFSENSE_DOCS",
|
"document_type": "SURFSENSE_DOCS",
|
||||||
"title": doc.title,
|
"title": doc.title,
|
||||||
"url": doc.source,
|
"url": public_url,
|
||||||
"metadata": {"source": doc.source},
|
"metadata": {"source": doc.source, "public_url": public_url},
|
||||||
"chunks": [],
|
"chunks": [],
|
||||||
}
|
}
|
||||||
grouped[doc.id]["chunks"].append(
|
grouped[doc.id]["chunks"].append(
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
|
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
|
||||||
from app.utils.document_converters import embed_text
|
from app.utils.document_converters import embed_text
|
||||||
|
from app.utils.surfsense_docs import surfsense_docs_public_url
|
||||||
|
|
||||||
|
|
||||||
def format_surfsense_docs_results(results: list[tuple]) -> str:
|
def format_surfsense_docs_results(results: list[tuple]) -> str:
|
||||||
|
|
@ -40,13 +41,14 @@ def format_surfsense_docs_results(results: list[tuple]) -> str:
|
||||||
# Group chunks by document
|
# Group chunks by document
|
||||||
grouped: dict[int, dict] = {}
|
grouped: dict[int, dict] = {}
|
||||||
for chunk, doc in results:
|
for chunk, doc in results:
|
||||||
|
public_url = surfsense_docs_public_url(doc.source)
|
||||||
if doc.id not in grouped:
|
if doc.id not in grouped:
|
||||||
grouped[doc.id] = {
|
grouped[doc.id] = {
|
||||||
"document_id": f"doc-{doc.id}",
|
"document_id": f"doc-{doc.id}",
|
||||||
"document_type": "SURFSENSE_DOCS",
|
"document_type": "SURFSENSE_DOCS",
|
||||||
"title": doc.title,
|
"title": doc.title,
|
||||||
"url": doc.source,
|
"url": public_url,
|
||||||
"metadata": {"source": doc.source},
|
"metadata": {"source": doc.source, "public_url": public_url},
|
||||||
"chunks": [],
|
"chunks": [],
|
||||||
}
|
}
|
||||||
grouped[doc.id]["chunks"].append(
|
grouped[doc.id]["chunks"].append(
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ from app.schemas.surfsense_docs import (
|
||||||
SurfsenseDocsDocumentWithChunksRead,
|
SurfsenseDocsDocumentWithChunksRead,
|
||||||
)
|
)
|
||||||
from app.users import current_active_user
|
from app.users import current_active_user
|
||||||
|
from app.utils.surfsense_docs import surfsense_docs_public_url
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
@ -76,6 +77,7 @@ async def get_surfsense_doc_by_chunk_id(
|
||||||
id=document.id,
|
id=document.id,
|
||||||
title=document.title,
|
title=document.title,
|
||||||
source=document.source,
|
source=document.source,
|
||||||
|
public_url=surfsense_docs_public_url(document.source),
|
||||||
content=document.content,
|
content=document.content,
|
||||||
chunks=[
|
chunks=[
|
||||||
SurfsenseDocsChunkRead(id=c.id, content=c.content)
|
SurfsenseDocsChunkRead(id=c.id, content=c.content)
|
||||||
|
|
@ -146,6 +148,7 @@ async def list_surfsense_docs(
|
||||||
id=doc.id,
|
id=doc.id,
|
||||||
title=doc.title,
|
title=doc.title,
|
||||||
source=doc.source,
|
source=doc.source,
|
||||||
|
public_url=surfsense_docs_public_url(doc.source),
|
||||||
content=doc.content,
|
content=doc.content,
|
||||||
created_at=doc.created_at,
|
created_at=doc.created_at,
|
||||||
updated_at=doc.updated_at,
|
updated_at=doc.updated_at,
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ class SurfsenseDocsDocumentRead(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
title: str
|
title: str
|
||||||
source: str
|
source: str
|
||||||
|
public_url: str
|
||||||
content: str
|
content: str
|
||||||
created_at: datetime | None = None
|
created_at: datetime | None = None
|
||||||
updated_at: datetime | None = None
|
updated_at: datetime | None = None
|
||||||
|
|
@ -35,6 +36,7 @@ class SurfsenseDocsDocumentWithChunksRead(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
title: str
|
title: str
|
||||||
source: str
|
source: str
|
||||||
|
public_url: str
|
||||||
content: str
|
content: str
|
||||||
chunks: list[SurfsenseDocsChunkRead]
|
chunks: list[SurfsenseDocsChunkRead]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,7 @@ from app.services.new_streaming_service import VercelStreamingService
|
||||||
from app.tasks.chat.streaming.graph_stream.event_stream import stream_output
|
from app.tasks.chat.streaming.graph_stream.event_stream import stream_output
|
||||||
from app.utils.content_utils import bootstrap_history_from_db
|
from app.utils.content_utils import bootstrap_history_from_db
|
||||||
from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap
|
from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_heap
|
||||||
|
from app.utils.surfsense_docs import surfsense_docs_public_url
|
||||||
from app.utils.user_message_multimodal import build_human_message_content
|
from app.utils.user_message_multimodal import build_human_message_content
|
||||||
|
|
||||||
_background_tasks: set[asyncio.Task] = set()
|
_background_tasks: set[asyncio.Task] = set()
|
||||||
|
|
@ -239,14 +240,17 @@ def format_mentioned_surfsense_docs_as_context(
|
||||||
)
|
)
|
||||||
|
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
metadata_json = json.dumps({"source": doc.source}, ensure_ascii=False)
|
public_url = surfsense_docs_public_url(doc.source)
|
||||||
|
metadata_json = json.dumps(
|
||||||
|
{"source": doc.source, "public_url": public_url}, ensure_ascii=False
|
||||||
|
)
|
||||||
|
|
||||||
context_parts.append("<document>")
|
context_parts.append("<document>")
|
||||||
context_parts.append("<document_metadata>")
|
context_parts.append("<document_metadata>")
|
||||||
context_parts.append(f" <document_id>doc-{doc.id}</document_id>")
|
context_parts.append(f" <document_id>doc-{doc.id}</document_id>")
|
||||||
context_parts.append(" <document_type>SURFSENSE_DOCS</document_type>")
|
context_parts.append(" <document_type>SURFSENSE_DOCS</document_type>")
|
||||||
context_parts.append(f" <title><![CDATA[{doc.title}]]></title>")
|
context_parts.append(f" <title><![CDATA[{doc.title}]]></title>")
|
||||||
context_parts.append(f" <url><![CDATA[{doc.source}]]></url>")
|
context_parts.append(f" <url><![CDATA[{public_url}]]></url>")
|
||||||
context_parts.append(
|
context_parts.append(
|
||||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
|
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
13
surfsense_backend/app/utils/surfsense_docs.py
Normal file
13
surfsense_backend/app/utils/surfsense_docs.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
"""Utilities for SurfSense's built-in documentation index."""
|
||||||
|
|
||||||
|
from pathlib import PurePosixPath
|
||||||
|
|
||||||
|
DOCS_PUBLIC_ROOT = PurePosixPath("/docs")
|
||||||
|
|
||||||
|
|
||||||
|
def surfsense_docs_public_url(source: str) -> str:
|
||||||
|
"""Return the public docs route for an indexed documentation source path."""
|
||||||
|
docs_path = PurePosixPath(source).with_suffix("")
|
||||||
|
if docs_path.name == "index":
|
||||||
|
docs_path = docs_path.parent
|
||||||
|
return (DOCS_PUBLIC_ROOT / docs_path).as_posix()
|
||||||
|
|
@ -130,9 +130,9 @@ const SurfsenseDocPreview: FC<{ chunkId: number }> = ({ chunkId }) => {
|
||||||
</p>
|
</p>
|
||||||
<p className="text-[11px] text-muted-foreground">Chunk #{chunkId}</p>
|
<p className="text-[11px] text-muted-foreground">Chunk #{chunkId}</p>
|
||||||
</div>
|
</div>
|
||||||
{data?.source && (
|
{data?.public_url && (
|
||||||
<a
|
<a
|
||||||
href={data.source}
|
href={data.public_url}
|
||||||
target="_blank"
|
target="_blank"
|
||||||
rel="noopener noreferrer"
|
rel="noopener noreferrer"
|
||||||
className="inline-flex shrink-0 items-center gap-1 rounded-md px-2 py-1 text-[11px] font-medium text-primary hover:bg-primary/10"
|
className="inline-flex shrink-0 items-center gap-1 rounded-md px-2 py-1 text-[11px] font-medium text-primary hover:bg-primary/10"
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,7 @@ export const surfsenseDocsDocument = z.object({
|
||||||
id: z.number(),
|
id: z.number(),
|
||||||
title: z.string(),
|
title: z.string(),
|
||||||
source: z.string(),
|
source: z.string(),
|
||||||
|
public_url: z.string(),
|
||||||
content: z.string(),
|
content: z.string(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue