feat(documents): add endpoint to retrieve document by virtual path

This commit is contained in:
Anish Sarkar 2026-05-02 02:45:27 +05:30
parent 789d8ce62e
commit d14fed43c6
5 changed files with 206 additions and 59 deletions

View file

@ -745,6 +745,51 @@ async def search_document_titles(
) from e
@router.get("/documents/by-virtual-path", response_model=DocumentTitleRead)
async def get_document_by_virtual_path(
search_space_id: int,
virtual_path: str,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Resolve a knowledge-base document id by exact virtual path."""
try:
await check_permission(
session,
user,
search_space_id,
Permission.DOCUMENTS_READ.value,
"You don't have permission to read documents in this search space",
)
result = await session.execute(
select(
Document.id,
Document.title,
Document.document_type,
).filter(
Document.search_space_id == search_space_id,
Document.document_metadata["virtual_path"].as_string() == virtual_path,
)
)
row = result.first()
if row is None:
raise HTTPException(status_code=404, detail="Document not found")
return DocumentTitleRead(
id=row.id,
title=row.title,
document_type=row.document_type,
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to resolve document by virtual path: {e!s}",
) from e
@router.get("/documents/status", response_model=DocumentStatusBatchResponse)
async def get_documents_status(
search_space_id: int,

View file

@ -304,20 +304,17 @@ def _tool_output_has_error(tool_output: Any) -> bool:
return False
def _extract_resolved_file_path(*, tool_name: str, tool_output: Any) -> str | None:
def _extract_resolved_file_path(
*, tool_name: str, tool_output: Any, tool_input: Any | None = None
) -> str | None:
if isinstance(tool_output, dict):
path_value = tool_output.get("path")
if isinstance(path_value, str) and path_value.strip():
return path_value.strip()
text = _tool_output_to_text(tool_output)
if tool_name == "write_file":
match = re.search(r"Updated file\s+(.+)$", text.strip())
if match:
return match.group(1).strip()
if tool_name == "edit_file":
match = re.search(r"in '([^']+)'", text)
if match:
return match.group(1).strip()
if tool_name in ("write_file", "edit_file") and isinstance(tool_input, dict):
file_path = tool_input.get("file_path")
if isinstance(file_path, str) and file_path.strip():
return file_path.strip()
return None
@ -714,6 +711,7 @@ async def _stream_agent_events(
# fallback path only and never re-pops a chunk we already streamed.
pending_tool_call_chunks: list[dict[str, Any]] = []
lc_tool_call_id_by_run: dict[str, str] = {}
file_path_by_run: dict[str, str] = {}
# parity_v2 only: live tool-call argument streaming. ``index_to_meta``
# is keyed by the chunk's ``index`` field — LangChain
@ -892,6 +890,10 @@ async def _stream_agent_events(
tool_input = event.get("data", {}).get("input", {})
if tool_name in ("write_file", "edit_file"):
result.write_attempted = True
if isinstance(tool_input, dict):
file_path = tool_input.get("file_path")
if isinstance(file_path, str) and file_path.strip() and run_id:
file_path_by_run[run_id] = file_path.strip()
if current_text_id is not None:
yield streaming_service.format_text_end(current_text_id)
@ -1298,6 +1300,7 @@ async def _stream_agent_events(
run_id = event.get("run_id", "")
tool_name = event.get("name", "unknown_tool")
raw_output = event.get("data", {}).get("output", "")
staged_file_path = file_path_by_run.pop(run_id, None) if run_id else None
if tool_name == "update_memory":
called_update_memory = True
@ -1811,6 +1814,7 @@ async def _stream_agent_events(
resolved_path = _extract_resolved_file_path(
tool_name=tool_name,
tool_output=tool_output,
tool_input={"file_path": staged_file_path} if staged_file_path else None,
)
result_text = _tool_output_to_text(tool_output)
if _tool_output_has_error(tool_output):

View file

@ -13,6 +13,7 @@ from app.tasks.chat.stream_new_chat import (
StreamResult,
_classify_stream_exception,
_contract_enforcement_active,
_extract_resolved_file_path,
_evaluate_file_contract_outcome,
_log_chat_stream_error,
_tool_output_has_error,
@ -28,6 +29,39 @@ def test_tool_output_error_detection():
assert not _tool_output_has_error({"result": "Updated file /notes.md"})
def test_extract_resolved_file_path_prefers_structured_path():
assert (
_extract_resolved_file_path(
tool_name="write_file",
tool_output={"status": "completed", "path": "/docs/note.md"},
tool_input=None,
)
== "/docs/note.md"
)
def test_extract_resolved_file_path_falls_back_to_tool_input():
assert (
_extract_resolved_file_path(
tool_name="edit_file",
tool_output={"status": "completed", "result": "updated"},
tool_input={"file_path": "/docs/edited.md"},
)
== "/docs/edited.md"
)
def test_extract_resolved_file_path_does_not_parse_result_text():
assert (
_extract_resolved_file_path(
tool_name="write_file",
tool_output={"result": "Updated file /docs/from-text.md"},
tool_input=None,
)
is None
)
def test_file_write_contract_outcome_reasons():
result = StreamResult(intent_detected="file_write")
passed, reason = _evaluate_file_contract_outcome(result)

View file

@ -30,8 +30,10 @@ import {
TableRow,
} from "@/components/ui/table";
import { useElectronAPI } from "@/hooks/use-platform";
import { documentsApiService } from "@/lib/apis/documents-api.service";
import { type CitationUrlMap, preprocessCitationMarkdown } from "@/lib/citations/citation-parser";
import { cn } from "@/lib/utils";
import { toast } from "sonner";
function MarkdownCodeBlockSkeleton() {
return (
@ -194,6 +196,89 @@ function isVirtualFilePathToken(value: string): boolean {
return segments.length >= 2;
}
function isStandaloneDocumentsPathText(node: ReactNode): string | null {
if (typeof node !== "string") return null;
const value = node.trim();
if (!value.startsWith("/documents/")) return null;
if (value.includes(" ")) return null;
const normalized = value.replace(/\/+$/, "");
const leaf = normalized.split("/").filter(Boolean).at(-1) ?? "";
if (!leaf || !leaf.includes(".")) return null;
return value;
}
function FilePathLink({
path,
className,
}: {
path: string;
className?: string;
}) {
const openEditorPanel = useSetAtom(openEditorPanelAtom);
const params = useParams();
const electronAPI = useElectronAPI();
const searchSpaceIdParam = params?.search_space_id;
const parsedSearchSpaceId = Array.isArray(searchSpaceIdParam)
? Number(searchSpaceIdParam[0])
: Number(searchSpaceIdParam);
const resolvedSearchSpaceId = Number.isFinite(parsedSearchSpaceId) ? parsedSearchSpaceId : undefined;
return (
<button
type="button"
className={cn(
"cursor-pointer font-mono text-[0.9em] font-medium text-primary underline underline-offset-4 transition-colors hover:text-primary/80",
className
)}
onClick={(event) => {
event.preventDefault();
event.stopPropagation();
void (async () => {
if (electronAPI) {
let resolvedLocalPath = path;
if (electronAPI.getAgentFilesystemMounts) {
try {
const mounts = (await electronAPI.getAgentFilesystemMounts(
resolvedSearchSpaceId
)) as AgentFilesystemMount[];
resolvedLocalPath = normalizeLocalVirtualPathForEditor(path, mounts);
} catch {
// Fall back to the raw path if mount lookup fails.
}
}
openEditorPanel({
kind: "local_file",
localFilePath: resolvedLocalPath,
title: resolvedLocalPath.split("/").pop() || resolvedLocalPath,
searchSpaceId: resolvedSearchSpaceId,
});
return;
}
if (!resolvedSearchSpaceId || !path.startsWith("/documents/")) return;
try {
const doc = await documentsApiService.getDocumentByVirtualPath({
search_space_id: resolvedSearchSpaceId,
virtual_path: path,
});
openEditorPanel({
kind: "document",
documentId: doc.id,
searchSpaceId: resolvedSearchSpaceId,
title: doc.title,
});
} catch {
toast.error("Document not found in knowledge base.");
}
})();
}}
title="Open in editor panel"
>
{path}
</button>
);
}
function MarkdownImage({ src, alt }: { src?: string; alt?: string }) {
if (!src) return null;
@ -311,9 +396,14 @@ const defaultComponents = memoizeMarkdownComponents({
},
p: function P({ className, children, ...props }) {
const urlMap = useCitationUrlMap();
const standalonePath = isStandaloneDocumentsPathText(children);
return (
<p className={cn("aui-md-p mt-5 mb-5 leading-7 first:mt-0 last:mb-0", className)} {...props}>
{processChildrenWithCitations(children, urlMap)}
{standalonePath ? (
<FilePathLink path={standalonePath} />
) : (
processChildrenWithCitations(children, urlMap)
)}
</p>
);
},
@ -400,8 +490,6 @@ const defaultComponents = memoizeMarkdownComponents({
code: function Code({ className, children, ...props }) {
const isCodeBlock = useIsMarkdownCodeBlock();
const { resolvedTheme } = useTheme();
const openEditorPanel = useSetAtom(openEditorPanelAtom);
const params = useParams();
const electronAPI = useElectronAPI();
const language = /language-(\w+)/.exec(className || "")?.[1] ?? "text";
const codeString = String(children).replace(/\n$/, "");
@ -418,53 +506,17 @@ const defaultComponents = memoizeMarkdownComponents({
const isLikelyFolder =
inlineValue.endsWith("/") || !leafSegment || !leafSegment.includes(".");
const isLocalPath =
!!electronAPI &&
isVirtualFilePathToken(inlineValue) &&
!inlineValue.startsWith("//") &&
!isLikelyFolder;
const displayLocalPath = inlineValue.replace(/^\/+/, "");
const searchSpaceIdParam = params?.search_space_id;
const parsedSearchSpaceId = Array.isArray(searchSpaceIdParam)
? Number(searchSpaceIdParam[0])
: Number(searchSpaceIdParam);
(isVirtualFilePathToken(inlineValue) &&
!inlineValue.startsWith("//") &&
!isLikelyFolder &&
!!electronAPI) ||
(isVirtualFilePathToken(inlineValue) &&
!inlineValue.startsWith("//") &&
!isLikelyFolder &&
!electronAPI &&
inlineValue.startsWith("/documents/"));
if (isLocalPath) {
return (
<button
type="button"
className={cn(
"cursor-pointer font-mono text-[0.9em] font-medium text-primary underline underline-offset-4 transition-colors hover:text-primary/80"
)}
onClick={(event) => {
event.preventDefault();
event.stopPropagation();
void (async () => {
let resolvedLocalPath = inlineValue;
const resolvedSearchSpaceId = Number.isFinite(parsedSearchSpaceId)
? parsedSearchSpaceId
: undefined;
if (electronAPI?.getAgentFilesystemMounts) {
try {
const mounts = (await electronAPI.getAgentFilesystemMounts(
resolvedSearchSpaceId
)) as AgentFilesystemMount[];
resolvedLocalPath = normalizeLocalVirtualPathForEditor(inlineValue, mounts);
} catch {
// Fall back to the raw inline path if mount lookup fails.
}
}
openEditorPanel({
kind: "local_file",
localFilePath: resolvedLocalPath,
title: resolvedLocalPath.split("/").pop() || resolvedLocalPath,
searchSpaceId: resolvedSearchSpaceId,
});
})();
}}
title="Open in editor panel"
>
{displayLocalPath}
</button>
);
return <FilePathLink path={inlineValue} className="text-[0.9em]" />;
}
return (
<code

View file

@ -28,6 +28,7 @@ import {
getSurfsenseDocsRequest,
getSurfsenseDocsResponse,
type SearchDocumentsRequest,
documentTitleRead,
type SearchDocumentTitlesRequest,
searchDocumentsRequest,
searchDocumentsResponse,
@ -269,6 +270,17 @@ class DocumentsApiService {
);
};
getDocumentByVirtualPath = async (request: {
search_space_id: number;
virtual_path: string;
}) => {
const params = new URLSearchParams({
search_space_id: String(request.search_space_id),
virtual_path: request.virtual_path,
});
return baseApiService.get(`/api/v1/documents/by-virtual-path?${params.toString()}`, documentTitleRead);
};
/**
* Get document type counts
*/