mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-09 07:42:39 +02:00
feat(documents): add endpoint to retrieve document by virtual path
This commit is contained in:
parent
789d8ce62e
commit
d14fed43c6
5 changed files with 206 additions and 59 deletions
|
|
@ -745,6 +745,51 @@ async def search_document_titles(
|
|||
) from e
|
||||
|
||||
|
||||
@router.get("/documents/by-virtual-path", response_model=DocumentTitleRead)
|
||||
async def get_document_by_virtual_path(
|
||||
search_space_id: int,
|
||||
virtual_path: str,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Resolve a knowledge-base document id by exact virtual path."""
|
||||
try:
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.DOCUMENTS_READ.value,
|
||||
"You don't have permission to read documents in this search space",
|
||||
)
|
||||
|
||||
result = await session.execute(
|
||||
select(
|
||||
Document.id,
|
||||
Document.title,
|
||||
Document.document_type,
|
||||
).filter(
|
||||
Document.search_space_id == search_space_id,
|
||||
Document.document_metadata["virtual_path"].as_string() == virtual_path,
|
||||
)
|
||||
)
|
||||
row = result.first()
|
||||
if row is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
return DocumentTitleRead(
|
||||
id=row.id,
|
||||
title=row.title,
|
||||
document_type=row.document_type,
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to resolve document by virtual path: {e!s}",
|
||||
) from e
|
||||
|
||||
|
||||
@router.get("/documents/status", response_model=DocumentStatusBatchResponse)
|
||||
async def get_documents_status(
|
||||
search_space_id: int,
|
||||
|
|
|
|||
|
|
@ -304,20 +304,17 @@ def _tool_output_has_error(tool_output: Any) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def _extract_resolved_file_path(*, tool_name: str, tool_output: Any) -> str | None:
|
||||
def _extract_resolved_file_path(
|
||||
*, tool_name: str, tool_output: Any, tool_input: Any | None = None
|
||||
) -> str | None:
|
||||
if isinstance(tool_output, dict):
|
||||
path_value = tool_output.get("path")
|
||||
if isinstance(path_value, str) and path_value.strip():
|
||||
return path_value.strip()
|
||||
text = _tool_output_to_text(tool_output)
|
||||
if tool_name == "write_file":
|
||||
match = re.search(r"Updated file\s+(.+)$", text.strip())
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
if tool_name == "edit_file":
|
||||
match = re.search(r"in '([^']+)'", text)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
if tool_name in ("write_file", "edit_file") and isinstance(tool_input, dict):
|
||||
file_path = tool_input.get("file_path")
|
||||
if isinstance(file_path, str) and file_path.strip():
|
||||
return file_path.strip()
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -714,6 +711,7 @@ async def _stream_agent_events(
|
|||
# fallback path only and never re-pops a chunk we already streamed.
|
||||
pending_tool_call_chunks: list[dict[str, Any]] = []
|
||||
lc_tool_call_id_by_run: dict[str, str] = {}
|
||||
file_path_by_run: dict[str, str] = {}
|
||||
|
||||
# parity_v2 only: live tool-call argument streaming. ``index_to_meta``
|
||||
# is keyed by the chunk's ``index`` field — LangChain
|
||||
|
|
@ -892,6 +890,10 @@ async def _stream_agent_events(
|
|||
tool_input = event.get("data", {}).get("input", {})
|
||||
if tool_name in ("write_file", "edit_file"):
|
||||
result.write_attempted = True
|
||||
if isinstance(tool_input, dict):
|
||||
file_path = tool_input.get("file_path")
|
||||
if isinstance(file_path, str) and file_path.strip() and run_id:
|
||||
file_path_by_run[run_id] = file_path.strip()
|
||||
|
||||
if current_text_id is not None:
|
||||
yield streaming_service.format_text_end(current_text_id)
|
||||
|
|
@ -1298,6 +1300,7 @@ async def _stream_agent_events(
|
|||
run_id = event.get("run_id", "")
|
||||
tool_name = event.get("name", "unknown_tool")
|
||||
raw_output = event.get("data", {}).get("output", "")
|
||||
staged_file_path = file_path_by_run.pop(run_id, None) if run_id else None
|
||||
|
||||
if tool_name == "update_memory":
|
||||
called_update_memory = True
|
||||
|
|
@ -1811,6 +1814,7 @@ async def _stream_agent_events(
|
|||
resolved_path = _extract_resolved_file_path(
|
||||
tool_name=tool_name,
|
||||
tool_output=tool_output,
|
||||
tool_input={"file_path": staged_file_path} if staged_file_path else None,
|
||||
)
|
||||
result_text = _tool_output_to_text(tool_output)
|
||||
if _tool_output_has_error(tool_output):
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from app.tasks.chat.stream_new_chat import (
|
|||
StreamResult,
|
||||
_classify_stream_exception,
|
||||
_contract_enforcement_active,
|
||||
_extract_resolved_file_path,
|
||||
_evaluate_file_contract_outcome,
|
||||
_log_chat_stream_error,
|
||||
_tool_output_has_error,
|
||||
|
|
@ -28,6 +29,39 @@ def test_tool_output_error_detection():
|
|||
assert not _tool_output_has_error({"result": "Updated file /notes.md"})
|
||||
|
||||
|
||||
def test_extract_resolved_file_path_prefers_structured_path():
|
||||
assert (
|
||||
_extract_resolved_file_path(
|
||||
tool_name="write_file",
|
||||
tool_output={"status": "completed", "path": "/docs/note.md"},
|
||||
tool_input=None,
|
||||
)
|
||||
== "/docs/note.md"
|
||||
)
|
||||
|
||||
|
||||
def test_extract_resolved_file_path_falls_back_to_tool_input():
|
||||
assert (
|
||||
_extract_resolved_file_path(
|
||||
tool_name="edit_file",
|
||||
tool_output={"status": "completed", "result": "updated"},
|
||||
tool_input={"file_path": "/docs/edited.md"},
|
||||
)
|
||||
== "/docs/edited.md"
|
||||
)
|
||||
|
||||
|
||||
def test_extract_resolved_file_path_does_not_parse_result_text():
|
||||
assert (
|
||||
_extract_resolved_file_path(
|
||||
tool_name="write_file",
|
||||
tool_output={"result": "Updated file /docs/from-text.md"},
|
||||
tool_input=None,
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
|
||||
def test_file_write_contract_outcome_reasons():
|
||||
result = StreamResult(intent_detected="file_write")
|
||||
passed, reason = _evaluate_file_contract_outcome(result)
|
||||
|
|
|
|||
|
|
@ -30,8 +30,10 @@ import {
|
|||
TableRow,
|
||||
} from "@/components/ui/table";
|
||||
import { useElectronAPI } from "@/hooks/use-platform";
|
||||
import { documentsApiService } from "@/lib/apis/documents-api.service";
|
||||
import { type CitationUrlMap, preprocessCitationMarkdown } from "@/lib/citations/citation-parser";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { toast } from "sonner";
|
||||
|
||||
function MarkdownCodeBlockSkeleton() {
|
||||
return (
|
||||
|
|
@ -194,6 +196,89 @@ function isVirtualFilePathToken(value: string): boolean {
|
|||
return segments.length >= 2;
|
||||
}
|
||||
|
||||
function isStandaloneDocumentsPathText(node: ReactNode): string | null {
|
||||
if (typeof node !== "string") return null;
|
||||
const value = node.trim();
|
||||
if (!value.startsWith("/documents/")) return null;
|
||||
if (value.includes(" ")) return null;
|
||||
const normalized = value.replace(/\/+$/, "");
|
||||
const leaf = normalized.split("/").filter(Boolean).at(-1) ?? "";
|
||||
if (!leaf || !leaf.includes(".")) return null;
|
||||
return value;
|
||||
}
|
||||
|
||||
function FilePathLink({
|
||||
path,
|
||||
className,
|
||||
}: {
|
||||
path: string;
|
||||
className?: string;
|
||||
}) {
|
||||
const openEditorPanel = useSetAtom(openEditorPanelAtom);
|
||||
const params = useParams();
|
||||
const electronAPI = useElectronAPI();
|
||||
const searchSpaceIdParam = params?.search_space_id;
|
||||
const parsedSearchSpaceId = Array.isArray(searchSpaceIdParam)
|
||||
? Number(searchSpaceIdParam[0])
|
||||
: Number(searchSpaceIdParam);
|
||||
const resolvedSearchSpaceId = Number.isFinite(parsedSearchSpaceId) ? parsedSearchSpaceId : undefined;
|
||||
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
className={cn(
|
||||
"cursor-pointer font-mono text-[0.9em] font-medium text-primary underline underline-offset-4 transition-colors hover:text-primary/80",
|
||||
className
|
||||
)}
|
||||
onClick={(event) => {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
void (async () => {
|
||||
if (electronAPI) {
|
||||
let resolvedLocalPath = path;
|
||||
if (electronAPI.getAgentFilesystemMounts) {
|
||||
try {
|
||||
const mounts = (await electronAPI.getAgentFilesystemMounts(
|
||||
resolvedSearchSpaceId
|
||||
)) as AgentFilesystemMount[];
|
||||
resolvedLocalPath = normalizeLocalVirtualPathForEditor(path, mounts);
|
||||
} catch {
|
||||
// Fall back to the raw path if mount lookup fails.
|
||||
}
|
||||
}
|
||||
openEditorPanel({
|
||||
kind: "local_file",
|
||||
localFilePath: resolvedLocalPath,
|
||||
title: resolvedLocalPath.split("/").pop() || resolvedLocalPath,
|
||||
searchSpaceId: resolvedSearchSpaceId,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (!resolvedSearchSpaceId || !path.startsWith("/documents/")) return;
|
||||
try {
|
||||
const doc = await documentsApiService.getDocumentByVirtualPath({
|
||||
search_space_id: resolvedSearchSpaceId,
|
||||
virtual_path: path,
|
||||
});
|
||||
openEditorPanel({
|
||||
kind: "document",
|
||||
documentId: doc.id,
|
||||
searchSpaceId: resolvedSearchSpaceId,
|
||||
title: doc.title,
|
||||
});
|
||||
} catch {
|
||||
toast.error("Document not found in knowledge base.");
|
||||
}
|
||||
})();
|
||||
}}
|
||||
title="Open in editor panel"
|
||||
>
|
||||
{path}
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
function MarkdownImage({ src, alt }: { src?: string; alt?: string }) {
|
||||
if (!src) return null;
|
||||
|
||||
|
|
@ -311,9 +396,14 @@ const defaultComponents = memoizeMarkdownComponents({
|
|||
},
|
||||
p: function P({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
const standalonePath = isStandaloneDocumentsPathText(children);
|
||||
return (
|
||||
<p className={cn("aui-md-p mt-5 mb-5 leading-7 first:mt-0 last:mb-0", className)} {...props}>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
{standalonePath ? (
|
||||
<FilePathLink path={standalonePath} />
|
||||
) : (
|
||||
processChildrenWithCitations(children, urlMap)
|
||||
)}
|
||||
</p>
|
||||
);
|
||||
},
|
||||
|
|
@ -400,8 +490,6 @@ const defaultComponents = memoizeMarkdownComponents({
|
|||
code: function Code({ className, children, ...props }) {
|
||||
const isCodeBlock = useIsMarkdownCodeBlock();
|
||||
const { resolvedTheme } = useTheme();
|
||||
const openEditorPanel = useSetAtom(openEditorPanelAtom);
|
||||
const params = useParams();
|
||||
const electronAPI = useElectronAPI();
|
||||
const language = /language-(\w+)/.exec(className || "")?.[1] ?? "text";
|
||||
const codeString = String(children).replace(/\n$/, "");
|
||||
|
|
@ -418,53 +506,17 @@ const defaultComponents = memoizeMarkdownComponents({
|
|||
const isLikelyFolder =
|
||||
inlineValue.endsWith("/") || !leafSegment || !leafSegment.includes(".");
|
||||
const isLocalPath =
|
||||
!!electronAPI &&
|
||||
isVirtualFilePathToken(inlineValue) &&
|
||||
!inlineValue.startsWith("//") &&
|
||||
!isLikelyFolder;
|
||||
const displayLocalPath = inlineValue.replace(/^\/+/, "");
|
||||
const searchSpaceIdParam = params?.search_space_id;
|
||||
const parsedSearchSpaceId = Array.isArray(searchSpaceIdParam)
|
||||
? Number(searchSpaceIdParam[0])
|
||||
: Number(searchSpaceIdParam);
|
||||
(isVirtualFilePathToken(inlineValue) &&
|
||||
!inlineValue.startsWith("//") &&
|
||||
!isLikelyFolder &&
|
||||
!!electronAPI) ||
|
||||
(isVirtualFilePathToken(inlineValue) &&
|
||||
!inlineValue.startsWith("//") &&
|
||||
!isLikelyFolder &&
|
||||
!electronAPI &&
|
||||
inlineValue.startsWith("/documents/"));
|
||||
if (isLocalPath) {
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
className={cn(
|
||||
"cursor-pointer font-mono text-[0.9em] font-medium text-primary underline underline-offset-4 transition-colors hover:text-primary/80"
|
||||
)}
|
||||
onClick={(event) => {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
void (async () => {
|
||||
let resolvedLocalPath = inlineValue;
|
||||
const resolvedSearchSpaceId = Number.isFinite(parsedSearchSpaceId)
|
||||
? parsedSearchSpaceId
|
||||
: undefined;
|
||||
if (electronAPI?.getAgentFilesystemMounts) {
|
||||
try {
|
||||
const mounts = (await electronAPI.getAgentFilesystemMounts(
|
||||
resolvedSearchSpaceId
|
||||
)) as AgentFilesystemMount[];
|
||||
resolvedLocalPath = normalizeLocalVirtualPathForEditor(inlineValue, mounts);
|
||||
} catch {
|
||||
// Fall back to the raw inline path if mount lookup fails.
|
||||
}
|
||||
}
|
||||
openEditorPanel({
|
||||
kind: "local_file",
|
||||
localFilePath: resolvedLocalPath,
|
||||
title: resolvedLocalPath.split("/").pop() || resolvedLocalPath,
|
||||
searchSpaceId: resolvedSearchSpaceId,
|
||||
});
|
||||
})();
|
||||
}}
|
||||
title="Open in editor panel"
|
||||
>
|
||||
{displayLocalPath}
|
||||
</button>
|
||||
);
|
||||
return <FilePathLink path={inlineValue} className="text-[0.9em]" />;
|
||||
}
|
||||
return (
|
||||
<code
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import {
|
|||
getSurfsenseDocsRequest,
|
||||
getSurfsenseDocsResponse,
|
||||
type SearchDocumentsRequest,
|
||||
documentTitleRead,
|
||||
type SearchDocumentTitlesRequest,
|
||||
searchDocumentsRequest,
|
||||
searchDocumentsResponse,
|
||||
|
|
@ -269,6 +270,17 @@ class DocumentsApiService {
|
|||
);
|
||||
};
|
||||
|
||||
getDocumentByVirtualPath = async (request: {
|
||||
search_space_id: number;
|
||||
virtual_path: string;
|
||||
}) => {
|
||||
const params = new URLSearchParams({
|
||||
search_space_id: String(request.search_space_id),
|
||||
virtual_path: request.virtual_path,
|
||||
});
|
||||
return baseApiService.get(`/api/v1/documents/by-virtual-path?${params.toString()}`, documentTitleRead);
|
||||
};
|
||||
|
||||
/**
|
||||
* Get document type counts
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue