diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index f558481cf..f1ca3b6bf 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -745,6 +745,51 @@ async def search_document_titles( ) from e +@router.get("/documents/by-virtual-path", response_model=DocumentTitleRead) +async def get_document_by_virtual_path( + search_space_id: int, + virtual_path: str, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Resolve a knowledge-base document id by exact virtual path.""" + try: + await check_permission( + session, + user, + search_space_id, + Permission.DOCUMENTS_READ.value, + "You don't have permission to read documents in this search space", + ) + + result = await session.execute( + select( + Document.id, + Document.title, + Document.document_type, + ).filter( + Document.search_space_id == search_space_id, + Document.document_metadata["virtual_path"].as_string() == virtual_path, + ) + ) + row = result.first() + if row is None: + raise HTTPException(status_code=404, detail="Document not found") + + return DocumentTitleRead( + id=row.id, + title=row.title, + document_type=row.document_type, + ) + except HTTPException: + raise + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to resolve document by virtual path: {e!s}", + ) from e + + @router.get("/documents/status", response_model=DocumentStatusBatchResponse) async def get_documents_status( search_space_id: int, diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 07d14afeb..53f237f06 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -304,20 +304,17 @@ def _tool_output_has_error(tool_output: Any) -> bool: return False -def _extract_resolved_file_path(*, tool_name: str, tool_output: Any) -> str | None: +def _extract_resolved_file_path( + *, tool_name: str, tool_output: Any, tool_input: Any | None = None +) -> str | None: if isinstance(tool_output, dict): path_value = tool_output.get("path") if isinstance(path_value, str) and path_value.strip(): return path_value.strip() - text = _tool_output_to_text(tool_output) - if tool_name == "write_file": - match = re.search(r"Updated file\s+(.+)$", text.strip()) - if match: - return match.group(1).strip() - if tool_name == "edit_file": - match = re.search(r"in '([^']+)'", text) - if match: - return match.group(1).strip() + if tool_name in ("write_file", "edit_file") and isinstance(tool_input, dict): + file_path = tool_input.get("file_path") + if isinstance(file_path, str) and file_path.strip(): + return file_path.strip() return None @@ -714,6 +711,7 @@ async def _stream_agent_events( # fallback path only and never re-pops a chunk we already streamed. pending_tool_call_chunks: list[dict[str, Any]] = [] lc_tool_call_id_by_run: dict[str, str] = {} + file_path_by_run: dict[str, str] = {} # parity_v2 only: live tool-call argument streaming. ``index_to_meta`` # is keyed by the chunk's ``index`` field — LangChain @@ -892,6 +890,10 @@ async def _stream_agent_events( tool_input = event.get("data", {}).get("input", {}) if tool_name in ("write_file", "edit_file"): result.write_attempted = True + if isinstance(tool_input, dict): + file_path = tool_input.get("file_path") + if isinstance(file_path, str) and file_path.strip() and run_id: + file_path_by_run[run_id] = file_path.strip() if current_text_id is not None: yield streaming_service.format_text_end(current_text_id) @@ -1298,6 +1300,7 @@ async def _stream_agent_events( run_id = event.get("run_id", "") tool_name = event.get("name", "unknown_tool") raw_output = event.get("data", {}).get("output", "") + staged_file_path = file_path_by_run.pop(run_id, None) if run_id else None if tool_name == "update_memory": called_update_memory = True @@ -1811,6 +1814,7 @@ async def _stream_agent_events( resolved_path = _extract_resolved_file_path( tool_name=tool_name, tool_output=tool_output, + tool_input={"file_path": staged_file_path} if staged_file_path else None, ) result_text = _tool_output_to_text(tool_output) if _tool_output_has_error(tool_output): diff --git a/surfsense_backend/tests/unit/test_stream_new_chat_contract.py b/surfsense_backend/tests/unit/test_stream_new_chat_contract.py index 6a1b4c13b..3676601f4 100644 --- a/surfsense_backend/tests/unit/test_stream_new_chat_contract.py +++ b/surfsense_backend/tests/unit/test_stream_new_chat_contract.py @@ -13,6 +13,7 @@ from app.tasks.chat.stream_new_chat import ( StreamResult, _classify_stream_exception, _contract_enforcement_active, + _extract_resolved_file_path, _evaluate_file_contract_outcome, _log_chat_stream_error, _tool_output_has_error, @@ -28,6 +29,39 @@ def test_tool_output_error_detection(): assert not _tool_output_has_error({"result": "Updated file /notes.md"}) +def test_extract_resolved_file_path_prefers_structured_path(): + assert ( + _extract_resolved_file_path( + tool_name="write_file", + tool_output={"status": "completed", "path": "/docs/note.md"}, + tool_input=None, + ) + == "/docs/note.md" + ) + + +def test_extract_resolved_file_path_falls_back_to_tool_input(): + assert ( + _extract_resolved_file_path( + tool_name="edit_file", + tool_output={"status": "completed", "result": "updated"}, + tool_input={"file_path": "/docs/edited.md"}, + ) + == "/docs/edited.md" + ) + + +def test_extract_resolved_file_path_does_not_parse_result_text(): + assert ( + _extract_resolved_file_path( + tool_name="write_file", + tool_output={"result": "Updated file /docs/from-text.md"}, + tool_input=None, + ) + is None + ) + + def test_file_write_contract_outcome_reasons(): result = StreamResult(intent_detected="file_write") passed, reason = _evaluate_file_contract_outcome(result) diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx index 4842e5979..bfbc3a423 100644 --- a/surfsense_web/components/assistant-ui/markdown-text.tsx +++ b/surfsense_web/components/assistant-ui/markdown-text.tsx @@ -30,8 +30,10 @@ import { TableRow, } from "@/components/ui/table"; import { useElectronAPI } from "@/hooks/use-platform"; +import { documentsApiService } from "@/lib/apis/documents-api.service"; import { type CitationUrlMap, preprocessCitationMarkdown } from "@/lib/citations/citation-parser"; import { cn } from "@/lib/utils"; +import { toast } from "sonner"; function MarkdownCodeBlockSkeleton() { return ( @@ -194,6 +196,89 @@ function isVirtualFilePathToken(value: string): boolean { return segments.length >= 2; } +function isStandaloneDocumentsPathText(node: ReactNode): string | null { + if (typeof node !== "string") return null; + const value = node.trim(); + if (!value.startsWith("/documents/")) return null; + if (value.includes(" ")) return null; + const normalized = value.replace(/\/+$/, ""); + const leaf = normalized.split("/").filter(Boolean).at(-1) ?? ""; + if (!leaf || !leaf.includes(".")) return null; + return value; +} + +function FilePathLink({ + path, + className, +}: { + path: string; + className?: string; +}) { + const openEditorPanel = useSetAtom(openEditorPanelAtom); + const params = useParams(); + const electronAPI = useElectronAPI(); + const searchSpaceIdParam = params?.search_space_id; + const parsedSearchSpaceId = Array.isArray(searchSpaceIdParam) + ? Number(searchSpaceIdParam[0]) + : Number(searchSpaceIdParam); + const resolvedSearchSpaceId = Number.isFinite(parsedSearchSpaceId) ? parsedSearchSpaceId : undefined; + + return ( + + ); +} + function MarkdownImage({ src, alt }: { src?: string; alt?: string }) { if (!src) return null; @@ -311,9 +396,14 @@ const defaultComponents = memoizeMarkdownComponents({ }, p: function P({ className, children, ...props }) { const urlMap = useCitationUrlMap(); + const standalonePath = isStandaloneDocumentsPathText(children); return (

- {processChildrenWithCitations(children, urlMap)} + {standalonePath ? ( + + ) : ( + processChildrenWithCitations(children, urlMap) + )}

); }, @@ -400,8 +490,6 @@ const defaultComponents = memoizeMarkdownComponents({ code: function Code({ className, children, ...props }) { const isCodeBlock = useIsMarkdownCodeBlock(); const { resolvedTheme } = useTheme(); - const openEditorPanel = useSetAtom(openEditorPanelAtom); - const params = useParams(); const electronAPI = useElectronAPI(); const language = /language-(\w+)/.exec(className || "")?.[1] ?? "text"; const codeString = String(children).replace(/\n$/, ""); @@ -418,53 +506,17 @@ const defaultComponents = memoizeMarkdownComponents({ const isLikelyFolder = inlineValue.endsWith("/") || !leafSegment || !leafSegment.includes("."); const isLocalPath = - !!electronAPI && - isVirtualFilePathToken(inlineValue) && - !inlineValue.startsWith("//") && - !isLikelyFolder; - const displayLocalPath = inlineValue.replace(/^\/+/, ""); - const searchSpaceIdParam = params?.search_space_id; - const parsedSearchSpaceId = Array.isArray(searchSpaceIdParam) - ? Number(searchSpaceIdParam[0]) - : Number(searchSpaceIdParam); + (isVirtualFilePathToken(inlineValue) && + !inlineValue.startsWith("//") && + !isLikelyFolder && + !!electronAPI) || + (isVirtualFilePathToken(inlineValue) && + !inlineValue.startsWith("//") && + !isLikelyFolder && + !electronAPI && + inlineValue.startsWith("/documents/")); if (isLocalPath) { - return ( - - ); + return ; } return ( { + const params = new URLSearchParams({ + search_space_id: String(request.search_space_id), + virtual_path: request.virtual_path, + }); + return baseApiService.get(`/api/v1/documents/by-virtual-path?${params.toString()}`, documentTitleRead); + }; + /** * Get document type counts */