mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-06-18 20:15:18 +02:00
fix(filesystem): remove session ref aliases from cli output
This commit is contained in:
parent
06d8553a0a
commit
a80b84dae4
5 changed files with 82 additions and 54 deletions
|
|
@ -5,7 +5,7 @@ This mirrors examples/agentic_vectorless_rag_demo.py, but exposes a corpus
|
||||||
through the PageIndex FileSystem shell instead of direct PageIndex document
|
through the PageIndex FileSystem shell instead of direct PageIndex document
|
||||||
tools. The agent receives one read-only bash-like PIFS tool and must retrieve
|
tools. The agent receives one read-only bash-like PIFS tool and must retrieve
|
||||||
evidence through commands such as ls, tree, find, grep, search-summary,
|
evidence through commands such as ls, tree, find, grep, search-summary,
|
||||||
cat <ref> --structure, cat <ref> --page, and cat <ref> --node.
|
cat <path> --structure, cat <path> --page, and cat <path> --node.
|
||||||
|
|
||||||
The demo uses PDFs under examples/documents. When a matching
|
The demo uses PDFs under examples/documents. When a matching
|
||||||
examples/documents/results/*_structure.json file exists, it is loaded into the
|
examples/documents/results/*_structure.json file exists, it is loaded into the
|
||||||
|
|
@ -61,8 +61,8 @@ shell. The workspace contains registered example PDFs.
|
||||||
|
|
||||||
Retrieval strategy:
|
Retrieval strategy:
|
||||||
- Start with ls or tree to understand the workspace.
|
- Start with ls or tree to understand the workspace.
|
||||||
- Use refs exactly as listed, such as ref_1, or use a concrete file path from
|
- Use concrete PIFS paths from ls/find output, such as /documents/report.pdf,
|
||||||
ls output. Do not invent paths like /documents/ref_1.
|
or stable file_ref/document ids. Do not invent temporary ref_N aliases.
|
||||||
- Folder paths such as /documents are positional command targets; do not put
|
- Folder paths such as /documents are positional command targets; do not put
|
||||||
folder paths inside --where.
|
folder paths inside --where.
|
||||||
- Use search-summary when available to find likely documents.
|
- Use search-summary when available to find likely documents.
|
||||||
|
|
@ -73,12 +73,12 @@ Retrieval strategy:
|
||||||
- Use grep -R only for lexical evidence; do not treat semantic candidates as
|
- Use grep -R only for lexical evidence; do not treat semantic candidates as
|
||||||
literal matches.
|
literal matches.
|
||||||
- Run one evidence command at a time. Do not chain large commands like
|
- Run one evidence command at a time. Do not chain large commands like
|
||||||
cat <ref> --structure, grep, and cat <ref> --page in one bash call.
|
cat <path> --structure, grep, and cat <path> --page in one bash call.
|
||||||
- For PDFs, use cat <ref> --structure to inspect the PageIndex tree, then
|
- For PDFs, use cat <path> --structure to inspect the PageIndex tree, then
|
||||||
cat <ref> --page <range> for evidence, for example:
|
cat <path> --page <range> for evidence, for example:
|
||||||
cat ref_1 --page 31-35
|
cat /documents/2023-annual-report.pdf --page 31-35
|
||||||
- For page-range questions, use cat <ref> --structure to identify the full section
|
- For page-range questions, use cat <path> --structure to identify the full section
|
||||||
range. Then run cat <ref> --page on the smallest useful evidence range, usually the
|
range. Then run cat <path> --page on the smallest useful evidence range, usually the
|
||||||
section start page or first 1-2 pages, before the final answer. Do not print
|
section start page or first 1-2 pages, before the final answer. Do not print
|
||||||
a broad multi-page section unless the user asks to read the whole section.
|
a broad multi-page section unless the user asks to read the whole section.
|
||||||
- Do not use cat --all on PDFs.
|
- Do not use cat --all on PDFs.
|
||||||
|
|
@ -630,11 +630,11 @@ def run_smoke_commands(
|
||||||
verbose=verbose,
|
verbose=verbose,
|
||||||
)
|
)
|
||||||
|
|
||||||
first_ref = registered[0]["file_ref"] if registered else None
|
first_target = f"/documents/{Path(str(registered[0]['path'])).name}" if registered else None
|
||||||
if not first_ref:
|
if not first_target:
|
||||||
return
|
return
|
||||||
|
|
||||||
command = f"stat {first_ref}"
|
command = f"stat {first_target}"
|
||||||
stat = execute_json_command(json_executor, command)
|
stat = execute_json_command(json_executor, command)
|
||||||
stat_data = stat.get("data") or {}
|
stat_data = stat.get("data") or {}
|
||||||
show_capability(
|
show_capability(
|
||||||
|
|
@ -648,7 +648,7 @@ def run_smoke_commands(
|
||||||
verbose=verbose,
|
verbose=verbose,
|
||||||
)
|
)
|
||||||
|
|
||||||
command = f"cat {first_ref} --structure"
|
command = f"cat {first_target} --structure"
|
||||||
structure_payload = execute_json_command(json_executor, command)
|
structure_payload = execute_json_command(json_executor, command)
|
||||||
structure_data = structure_payload.get("data") or {}
|
structure_data = structure_payload.get("data") or {}
|
||||||
structure = structure_data.get("structure") or []
|
structure = structure_data.get("structure") or []
|
||||||
|
|
@ -666,7 +666,7 @@ def run_smoke_commands(
|
||||||
)
|
)
|
||||||
|
|
||||||
evidence_range = opening_page_range_for_node(supervision_node) or "1-2"
|
evidence_range = opening_page_range_for_node(supervision_node) or "1-2"
|
||||||
command = f"cat {first_ref} --page {evidence_range}"
|
command = f"cat {first_target} --page {evidence_range}"
|
||||||
page = execute_json_command(json_executor, command)
|
page = execute_json_command(json_executor, command)
|
||||||
page_text = str((page.get("data") or {}).get("text") or "")
|
page_text = str((page.get("data") or {}).get("text") or "")
|
||||||
show_capability(
|
show_capability(
|
||||||
|
|
|
||||||
|
|
@ -36,8 +36,10 @@ commands described in the workspace context. grep -R is lexical evidence search;
|
||||||
semantic search commands return candidate documents and do not guarantee literal
|
semantic search commands return candidate documents and do not guarantee literal
|
||||||
text matches. Errors are returned as text prefixed with ERROR. Do not call
|
text matches. Errors are returned as text prefixed with ERROR. Do not call
|
||||||
commands that are not listed as available. When evidence is required, inspect it
|
commands that are not listed as available. When evidence is required, inspect it
|
||||||
with cat or grep before answering. Prefer shell-like target-first cat syntax:
|
with cat or grep before answering. Prefer shell-like target-first cat syntax
|
||||||
cat <ref> --structure, cat <ref> --page 31-59, and cat <ref> --node 0009.
|
with stable targets: cat <path> --structure, cat <path> --page 31-59, and
|
||||||
|
cat <path> --node 0009. You may also use file_ref or document_id when a path is
|
||||||
|
ambiguous.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
AGENT_TOOL_POLICY = """
|
AGENT_TOOL_POLICY = """
|
||||||
|
|
@ -51,8 +53,8 @@ Tool policy:
|
||||||
- Semantic search commands are candidate-discovery tools and do not guarantee literal text matches.
|
- Semantic search commands are candidate-discovery tools and do not guarantee literal text matches.
|
||||||
- Tool errors are returned as ERROR text; recover by trying an available command.
|
- Tool errors are returned as ERROR text; recover by trying an available command.
|
||||||
- Use cat or grep to gather evidence before making source-backed claims.
|
- Use cat or grep to gather evidence before making source-backed claims.
|
||||||
- Prefer target-first cat syntax: cat <ref> --structure, cat <ref> --page 31-59, cat <ref> --node <node_id>.
|
- Prefer target-first cat syntax with stable targets: cat <path> --structure, cat <path> --page 31-59, cat <path> --node <node_id>.
|
||||||
- Do not call cat --page <ref> <start> <end>; if you need a page span, use cat <ref> --page <start>-<end>.
|
- Do not call cat --page <target> <start> <end>; if you need a page span, use cat <target> --page <start>-<end>.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
STREAM_MODE_ALIASES = {
|
STREAM_MODE_ALIASES = {
|
||||||
|
|
|
||||||
|
|
@ -92,8 +92,8 @@ class PIFSCommandExecutor:
|
||||||
"- find --where: exact/canonical metadata DSL filtering using stat --schema fields only",
|
"- find --where: exact/canonical metadata DSL filtering using stat --schema fields only",
|
||||||
"- find <folder> -maxdepth N -type f|d: bounded folder traversal for find",
|
"- find <folder> -maxdepth N -type f|d: bounded folder traversal for find",
|
||||||
"- grep -R: recursive lexical/FTS search only; semantic vector prefilter is disabled",
|
"- grep -R: recursive lexical/FTS search only; semantic vector prefilter is disabled",
|
||||||
"- cat <ref> --structure/--node/--page: cached PageIndex reads for PDF/Markdown files",
|
"- cat <path|file_ref|document_id> --structure/--node/--page: cached PageIndex reads for PDF/Markdown files",
|
||||||
"- cat <ref> --all: full text artifact reads for txt/text files",
|
"- cat <path|file_ref|document_id> --all: full text artifact reads for txt/text files",
|
||||||
]
|
]
|
||||||
if "entity" in semantic_channels:
|
if "entity" in semantic_channels:
|
||||||
lines.append("- find --name: entity semantic candidate discovery alias")
|
lines.append("- find --name: entity semantic candidate discovery alias")
|
||||||
|
|
@ -115,7 +115,7 @@ class PIFSCommandExecutor:
|
||||||
)
|
)
|
||||||
if not semantic.get("commands"):
|
if not semantic.get("commands"):
|
||||||
lines.append("- semantic vector commands: none available in this workspace")
|
lines.append("- semantic vector commands: none available in this workspace")
|
||||||
lines.append("- grep <query> <ref>, cat, stat: evidence inspection")
|
lines.append("- grep <query> <path|file_ref|document_id>, cat, stat: evidence inspection")
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
def execute(self, command: str) -> str:
|
def execute(self, command: str) -> str:
|
||||||
|
|
@ -432,8 +432,9 @@ class PIFSCommandExecutor:
|
||||||
target = args[0]
|
target = args[0]
|
||||||
if target.startswith("-"):
|
if target.startswith("-"):
|
||||||
raise PIFSCommandError(
|
raise PIFSCommandError(
|
||||||
"cat syntax is target-first: cat <ref> --structure, "
|
"cat syntax is target-first: cat <path|file_ref|document_id> --structure, "
|
||||||
"cat <ref> --page 31-59, or cat <ref> --node 0009"
|
"cat <path|file_ref|document_id> --page 31-59, or "
|
||||||
|
"cat <path|file_ref|document_id> --node 0009"
|
||||||
)
|
)
|
||||||
location = "all"
|
location = "all"
|
||||||
structural_mode: str | None = None
|
structural_mode: str | None = None
|
||||||
|
|
@ -467,8 +468,8 @@ class PIFSCommandExecutor:
|
||||||
raise PIFSCommandError(f"Unsupported cat option: {arg}")
|
raise PIFSCommandError(f"Unsupported cat option: {arg}")
|
||||||
else:
|
else:
|
||||||
raise PIFSCommandError(
|
raise PIFSCommandError(
|
||||||
"cat accepts one file target. Use: cat <ref> --page <page-or-range>, "
|
"cat accepts one file target. Use: cat <path|file_ref|document_id> --page <page-or-range>, "
|
||||||
"for example: cat ref_1 --page 31-59"
|
"for example: cat /documents/report.pdf --page 31-59"
|
||||||
)
|
)
|
||||||
i += 1
|
i += 1
|
||||||
if structural_mode == "structure":
|
if structural_mode == "structure":
|
||||||
|
|
@ -479,7 +480,7 @@ class PIFSCommandExecutor:
|
||||||
if not page_range or not re.fullmatch(r"\d+(?:-\d+)?", page_range):
|
if not page_range or not re.fullmatch(r"\d+(?:-\d+)?", page_range):
|
||||||
raise PIFSCommandError(
|
raise PIFSCommandError(
|
||||||
"cat --page requires one page selector like 31 or 31-59. "
|
"cat --page requires one page selector like 31 or 31-59. "
|
||||||
"Use: cat <ref> --page <page-or-range>"
|
"Use: cat <path|file_ref|document_id> --page <page-or-range>"
|
||||||
)
|
)
|
||||||
return self.filesystem.pageindex_pages(target, page_range)
|
return self.filesystem.pageindex_pages(target, page_range)
|
||||||
return self.filesystem.cat_text_artifact(target, location)
|
return self.filesystem.cat_text_artifact(target, location)
|
||||||
|
|
@ -804,7 +805,7 @@ class PIFSCommandExecutor:
|
||||||
)
|
)
|
||||||
if mode == "matches":
|
if mode == "matches":
|
||||||
return "\n".join(
|
return "\n".join(
|
||||||
f"{item['reference_id']}:{item['line']}: "
|
f"{self._file_target_path(item)}:{item['line']}: "
|
||||||
f"{self._compact_text(item['text'], max_chars=220)}"
|
f"{self._compact_text(item['text'], max_chars=220)}"
|
||||||
for item in data.get("data", [])
|
for item in data.get("data", [])
|
||||||
)
|
)
|
||||||
|
|
@ -835,7 +836,7 @@ class PIFSCommandExecutor:
|
||||||
lines.append(f"{name}: {field.get('type', 'string')}")
|
lines.append(f"{name}: {field.get('type', 'string')}")
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
lines = [
|
lines = [
|
||||||
f"ref: {data.get('target') or data.get('file_ref')}",
|
f"target: {data.get('target') or data.get('file_ref')}",
|
||||||
f"file_ref: {data.get('file_ref')}",
|
f"file_ref: {data.get('file_ref')}",
|
||||||
f"document_id: {data.get('external_id') or data.get('document_id') or '-'}",
|
f"document_id: {data.get('external_id') or data.get('document_id') or '-'}",
|
||||||
f"source_path: {data.get('source_path') or '-'}",
|
f"source_path: {data.get('source_path') or '-'}",
|
||||||
|
|
@ -857,23 +858,37 @@ class PIFSCommandExecutor:
|
||||||
|
|
||||||
def _file_row_text(self, item: dict[str, Any]) -> str:
|
def _file_row_text(self, item: dict[str, Any]) -> str:
|
||||||
file_ref = item.get("file_ref")
|
file_ref = item.get("file_ref")
|
||||||
ref = item.get("reference_id") or (self.filesystem._reference_for(file_ref) if file_ref else "-")
|
|
||||||
doc_id = item.get("external_id") or item.get("document_id") or "-"
|
doc_id = item.get("external_id") or item.get("document_id") or "-"
|
||||||
title = self._compact_text(item.get("title") or item.get("name") or "", max_chars=80)
|
title = self._compact_text(item.get("title") or item.get("name") or "", max_chars=80)
|
||||||
source_path = item.get("source_path") or "-"
|
source_path = item.get("source_path") or "-"
|
||||||
folder_paths = item.get("folder_paths") or self._folder_paths_for_file(file_ref)
|
folder_paths = item.get("folder_paths") or self._folder_paths_for_file(file_ref)
|
||||||
folders = f" folders={','.join(folder_paths)}" if folder_paths else ""
|
folders = f" folders={','.join(folder_paths)}" if folder_paths else ""
|
||||||
return f"{ref} {doc_id} {title} {source_path}{folders}".strip()
|
target = self._file_target_path(item)
|
||||||
|
return f"{target} id={doc_id} file_ref={file_ref or '-'} title={title} source={source_path}{folders}".strip()
|
||||||
|
|
||||||
def _grep_file_hit_text(self, item: dict[str, Any]) -> str:
|
def _grep_file_hit_text(self, item: dict[str, Any]) -> str:
|
||||||
doc_id = item.get("external_id") or "-"
|
doc_id = item.get("external_id") or "-"
|
||||||
source_path = item.get("source_path") or "-"
|
|
||||||
line = item.get("line") or 1
|
line = item.get("line") or 1
|
||||||
|
target = self._file_target_path(item)
|
||||||
return (
|
return (
|
||||||
f"{item['reference_id']} {doc_id} {source_path}:{line}: "
|
f"{target}:{line}: id={doc_id} "
|
||||||
f"{self._compact_text(item.get('text') or '', max_chars=180)}"
|
f"{self._compact_text(item.get('text') or '', max_chars=180)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _file_target_path(self, item: dict[str, Any]) -> str:
|
||||||
|
file_ref = item.get("file_ref")
|
||||||
|
title = str(item.get("title") or item.get("name") or "").strip()
|
||||||
|
folder_paths = item.get("folder_paths") or []
|
||||||
|
folder_path = item.get("folder_path")
|
||||||
|
if not folder_paths and folder_path:
|
||||||
|
folder_paths = [folder_path]
|
||||||
|
if not folder_paths:
|
||||||
|
folder_paths = self._folder_paths_for_file(file_ref)
|
||||||
|
if folder_paths and title:
|
||||||
|
folder = str(folder_paths[0] or "/").rstrip("/")
|
||||||
|
return f"{folder}/{title}" if folder else f"/{title}"
|
||||||
|
return str(item.get("source_path") or item.get("external_id") or file_ref or "-")
|
||||||
|
|
||||||
def _semantic_retrieval_query(self, query: str) -> str:
|
def _semantic_retrieval_query(self, query: str) -> str:
|
||||||
query = str(query or "").strip()
|
query = str(query or "").strip()
|
||||||
context = str(self.query_context or "").strip()
|
context = str(self.query_context or "").strip()
|
||||||
|
|
@ -1040,11 +1055,10 @@ class PIFSCommandExecutor:
|
||||||
continue
|
continue
|
||||||
if direct_only and self._folder_path_for_source_path(file_row["source_path"]) != folder_path:
|
if direct_only and self._folder_path_for_source_path(file_row["source_path"]) != folder_path:
|
||||||
continue
|
continue
|
||||||
reference_id = self.filesystem._reference_for(file_row["file_ref"])
|
|
||||||
line_number, text = self._first_matching_source_line(path, query)
|
line_number, text = self._first_matching_source_line(path, query)
|
||||||
hits.append(
|
hits.append(
|
||||||
{
|
{
|
||||||
"reference_id": reference_id,
|
"reference_id": file_row["external_id"] or file_row["file_ref"],
|
||||||
"file_ref": file_row["file_ref"],
|
"file_ref": file_row["file_ref"],
|
||||||
"external_id": file_row["external_id"],
|
"external_id": file_row["external_id"],
|
||||||
"title": file_row["title"],
|
"title": file_row["title"],
|
||||||
|
|
@ -1060,17 +1074,18 @@ class PIFSCommandExecutor:
|
||||||
|
|
||||||
def _grep_file_matches(self, target: str, query: str, *, limit: int) -> list[dict[str, Any]]:
|
def _grep_file_matches(self, target: str, query: str, *, limit: int) -> list[dict[str, Any]]:
|
||||||
file_ref = self.filesystem._resolve_reference(target)
|
file_ref = self.filesystem._resolve_reference(target)
|
||||||
reference_id = self.filesystem._reference_for(file_ref)
|
|
||||||
entry = self.filesystem.store.get_file(file_ref)
|
entry = self.filesystem.store.get_file(file_ref)
|
||||||
matches = []
|
matches = []
|
||||||
for line_number, line in enumerate(self.filesystem.store.read_text(file_ref).splitlines(), 1):
|
for line_number, line in enumerate(self.filesystem.store.read_text(file_ref).splitlines(), 1):
|
||||||
if self._line_matches(line, query):
|
if self._line_matches(line, query):
|
||||||
matches.append(
|
matches.append(
|
||||||
{
|
{
|
||||||
"reference_id": reference_id,
|
"reference_id": entry.external_id or file_ref,
|
||||||
"file_ref": file_ref,
|
"file_ref": file_ref,
|
||||||
"external_id": entry.external_id,
|
"external_id": entry.external_id,
|
||||||
|
"title": entry.title,
|
||||||
"source_path": entry.source_path,
|
"source_path": entry.source_path,
|
||||||
|
"folder_paths": self._folder_paths_for_file(file_ref),
|
||||||
"line": line_number,
|
"line": line_number,
|
||||||
"text": self._compact_text(line, max_chars=220),
|
"text": self._compact_text(line, max_chars=220),
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -102,7 +102,6 @@ class PageIndexFileSystem:
|
||||||
self.workspace = Path(workspace).expanduser()
|
self.workspace = Path(workspace).expanduser()
|
||||||
self.store = SQLiteFileSystemStore(self.workspace)
|
self.store = SQLiteFileSystemStore(self.workspace)
|
||||||
self.metadata = MetadataQueryEngine(self.store)
|
self.metadata = MetadataQueryEngine(self.store)
|
||||||
self._references: dict[str, str] = {}
|
|
||||||
self.semantic_retrieval_backend = semantic_retrieval_backend
|
self.semantic_retrieval_backend = semantic_retrieval_backend
|
||||||
self.metadata_generator = metadata_generator
|
self.metadata_generator = metadata_generator
|
||||||
self.summary_projection_indexer = summary_projection_indexer
|
self.summary_projection_indexer = summary_projection_indexer
|
||||||
|
|
@ -388,7 +387,6 @@ class PageIndexFileSystem:
|
||||||
results = []
|
results = []
|
||||||
scope_path = self._scope_folder_path(scope)
|
scope_path = self._scope_folder_path(scope)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
reference_id = self._reference_for(row["file_ref"])
|
|
||||||
folder_paths = [
|
folder_paths = [
|
||||||
folder["path"]
|
folder["path"]
|
||||||
for folder in self.store.folder_memberships(row["file_ref"])
|
for folder in self.store.folder_memberships(row["file_ref"])
|
||||||
|
|
@ -396,7 +394,7 @@ class PageIndexFileSystem:
|
||||||
folder_path = self._preferred_folder_path(folder_paths, scope_path, row["folder_path"])
|
folder_path = self._preferred_folder_path(folder_paths, scope_path, row["folder_path"])
|
||||||
results.append(
|
results.append(
|
||||||
SearchResult(
|
SearchResult(
|
||||||
reference_id=reference_id,
|
reference_id=row["external_id"] or row["file_ref"],
|
||||||
file_ref=row["file_ref"],
|
file_ref=row["file_ref"],
|
||||||
external_id=row["external_id"],
|
external_id=row["external_id"],
|
||||||
title=row["title"],
|
title=row["title"],
|
||||||
|
|
@ -693,8 +691,9 @@ class PageIndexFileSystem:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"{command} is only supported for txt/text files; "
|
f"{command} is only supported for txt/text files; "
|
||||||
f"got source_path={entry.source_path!r}, content_type={entry.content_type!r}. "
|
f"got source_path={entry.source_path!r}, content_type={entry.content_type!r}. "
|
||||||
"Use cat <ref> --structure, cat <ref> --page, or cat <ref> --node "
|
"Use cat <path|file_ref|document_id> --structure, "
|
||||||
"for PDF/Markdown PageIndex files."
|
"cat <path|file_ref|document_id> --page, or "
|
||||||
|
"cat <path|file_ref|document_id> --node for PDF/Markdown PageIndex files."
|
||||||
)
|
)
|
||||||
|
|
||||||
def _require_pageindex_document_file(self, entry: Any, command: str) -> None:
|
def _require_pageindex_document_file(self, entry: Any, command: str) -> None:
|
||||||
|
|
@ -703,7 +702,7 @@ class PageIndexFileSystem:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"{command} is only supported for PDF/Markdown PageIndex files; "
|
f"{command} is only supported for PDF/Markdown PageIndex files; "
|
||||||
f"got source_path={entry.source_path!r}, content_type={entry.content_type!r}. "
|
f"got source_path={entry.source_path!r}, content_type={entry.content_type!r}. "
|
||||||
"Use cat <ref> --all for txt/text files."
|
"Use cat <path|file_ref|document_id> --all for txt/text files."
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -1254,8 +1253,6 @@ class PageIndexFileSystem:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _resolve_reference(self, reference_id: str) -> str:
|
def _resolve_reference(self, reference_id: str) -> str:
|
||||||
if reference_id in self._references:
|
|
||||||
return self._references[reference_id]
|
|
||||||
return self.store.resolve_file_ref(reference_id)
|
return self.store.resolve_file_ref(reference_id)
|
||||||
|
|
||||||
def _should_use_semantic_retrieval(
|
def _should_use_semantic_retrieval(
|
||||||
|
|
@ -1315,7 +1312,6 @@ class PageIndexFileSystem:
|
||||||
continue
|
continue
|
||||||
seen.add(file_ref)
|
seen.add(file_ref)
|
||||||
entry = self.store.get_file(file_ref)
|
entry = self.store.get_file(file_ref)
|
||||||
reference_id = self._reference_for(file_ref)
|
|
||||||
folder_paths = [
|
folder_paths = [
|
||||||
folder["path"]
|
folder["path"]
|
||||||
for folder in self.store.folder_memberships(file_ref)
|
for folder in self.store.folder_memberships(file_ref)
|
||||||
|
|
@ -1323,7 +1319,7 @@ class PageIndexFileSystem:
|
||||||
folder_path = self._preferred_folder_path(folder_paths, scope_path, entry.folder_path)
|
folder_path = self._preferred_folder_path(folder_paths, scope_path, entry.folder_path)
|
||||||
results.append(
|
results.append(
|
||||||
SearchResult(
|
SearchResult(
|
||||||
reference_id=reference_id,
|
reference_id=entry.external_id or file_ref,
|
||||||
file_ref=file_ref,
|
file_ref=file_ref,
|
||||||
external_id=entry.external_id,
|
external_id=entry.external_id,
|
||||||
title=entry.title,
|
title=entry.title,
|
||||||
|
|
@ -1348,14 +1344,6 @@ class PageIndexFileSystem:
|
||||||
break
|
break
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def _reference_for(self, file_ref: str) -> str:
|
|
||||||
for reference_id, existing in self._references.items():
|
|
||||||
if existing == file_ref:
|
|
||||||
return reference_id
|
|
||||||
reference_id = f"ref_{len(self._references) + 1}"
|
|
||||||
self._references[reference_id] = file_ref
|
|
||||||
return reference_id
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_descriptor(title: str, metadata: dict[str, Any]) -> str:
|
def _build_descriptor(title: str, metadata: dict[str, Any]) -> str:
|
||||||
source = metadata.get("source_type") or metadata.get("repo") or metadata.get("channel")
|
source = metadata.get("source_type") or metadata.get("repo") or metadata.get("channel")
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,29 @@ def test_find_maxdepth_one_returns_direct_files_only(tmp_path):
|
||||||
assert [row["external_id"] for row in rows] == ["doc_root"]
|
assert [row["external_id"] for row in rows] == ["doc_root"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_output_is_path_first_without_session_refs(tmp_path):
|
||||||
|
executor = _register_find_fixture(tmp_path)
|
||||||
|
executor.json_output = False
|
||||||
|
|
||||||
|
output = executor.execute("find /documents -maxdepth 1 -type f")
|
||||||
|
|
||||||
|
assert output.startswith("/documents/Root document id=doc_root file_ref=file_")
|
||||||
|
assert "ref_1" not in output
|
||||||
|
assert "title=Root document" in output
|
||||||
|
|
||||||
|
|
||||||
|
def test_stable_path_targets_work_without_session_refs(tmp_path):
|
||||||
|
executor = _register_find_fixture(tmp_path)
|
||||||
|
executor.json_output = False
|
||||||
|
|
||||||
|
stat = executor.execute("stat '/documents/Root document'")
|
||||||
|
text = executor.execute("cat '/documents/Root document' --all")
|
||||||
|
|
||||||
|
assert "target: /documents/Root document" in stat
|
||||||
|
assert "document_id: doc_root" in stat
|
||||||
|
assert "Root document fixture text" in text
|
||||||
|
|
||||||
|
|
||||||
def test_find_maxdepth_zero_type_directory_returns_start_folder(tmp_path):
|
def test_find_maxdepth_zero_type_directory_returns_start_folder(tmp_path):
|
||||||
executor = _register_find_fixture(tmp_path)
|
executor = _register_find_fixture(tmp_path)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue