fix(filesystem): remove session ref aliases from cli output

This commit is contained in:
BukeLy 2026-05-26 15:39:16 +08:00
parent 06d8553a0a
commit a80b84dae4
5 changed files with 82 additions and 54 deletions

View file

@ -36,8 +36,10 @@ commands described in the workspace context. grep -R is lexical evidence search;
semantic search commands return candidate documents and do not guarantee literal
text matches. Errors are returned as text prefixed with ERROR. Do not call
commands that are not listed as available. When evidence is required, inspect it
with cat or grep before answering. Prefer shell-like target-first cat syntax:
cat <ref> --structure, cat <ref> --page 31-59, and cat <ref> --node 0009.
with cat or grep before answering. Prefer shell-like target-first cat syntax
with stable targets: cat <path> --structure, cat <path> --page 31-59, and
cat <path> --node 0009. You may also use file_ref or document_id when a path is
ambiguous.
"""
AGENT_TOOL_POLICY = """
@ -51,8 +53,8 @@ Tool policy:
- Semantic search commands are candidate-discovery tools and do not guarantee literal text matches.
- Tool errors are returned as ERROR text; recover by trying an available command.
- Use cat or grep to gather evidence before making source-backed claims.
- Prefer target-first cat syntax: cat <ref> --structure, cat <ref> --page 31-59, cat <ref> --node <node_id>.
- Do not call cat --page <ref> <start> <end>; if you need a page span, use cat <ref> --page <start>-<end>.
- Prefer target-first cat syntax with stable targets: cat <path> --structure, cat <path> --page 31-59, cat <path> --node <node_id>.
- Do not call cat --page <target> <start> <end>; if you need a page span, use cat <target> --page <start>-<end>.
"""
STREAM_MODE_ALIASES = {

View file

@ -92,8 +92,8 @@ class PIFSCommandExecutor:
"- find --where: exact/canonical metadata DSL filtering using stat --schema fields only",
"- find <folder> -maxdepth N -type f|d: bounded folder traversal for find",
"- grep -R: recursive lexical/FTS search only; semantic vector prefilter is disabled",
"- cat <ref> --structure/--node/--page: cached PageIndex reads for PDF/Markdown files",
"- cat <ref> --all: full text artifact reads for txt/text files",
"- cat <path|file_ref|document_id> --structure/--node/--page: cached PageIndex reads for PDF/Markdown files",
"- cat <path|file_ref|document_id> --all: full text artifact reads for txt/text files",
]
if "entity" in semantic_channels:
lines.append("- find --name: entity semantic candidate discovery alias")
@ -115,7 +115,7 @@ class PIFSCommandExecutor:
)
if not semantic.get("commands"):
lines.append("- semantic vector commands: none available in this workspace")
lines.append("- grep <query> <ref>, cat, stat: evidence inspection")
lines.append("- grep <query> <path|file_ref|document_id>, cat, stat: evidence inspection")
return "\n".join(lines)
def execute(self, command: str) -> str:
@ -432,8 +432,9 @@ class PIFSCommandExecutor:
target = args[0]
if target.startswith("-"):
raise PIFSCommandError(
"cat syntax is target-first: cat <ref> --structure, "
"cat <ref> --page 31-59, or cat <ref> --node 0009"
"cat syntax is target-first: cat <path|file_ref|document_id> --structure, "
"cat <path|file_ref|document_id> --page 31-59, or "
"cat <path|file_ref|document_id> --node 0009"
)
location = "all"
structural_mode: str | None = None
@ -467,8 +468,8 @@ class PIFSCommandExecutor:
raise PIFSCommandError(f"Unsupported cat option: {arg}")
else:
raise PIFSCommandError(
"cat accepts one file target. Use: cat <ref> --page <page-or-range>, "
"for example: cat ref_1 --page 31-59"
"cat accepts one file target. Use: cat <path|file_ref|document_id> --page <page-or-range>, "
"for example: cat /documents/report.pdf --page 31-59"
)
i += 1
if structural_mode == "structure":
@ -479,7 +480,7 @@ class PIFSCommandExecutor:
if not page_range or not re.fullmatch(r"\d+(?:-\d+)?", page_range):
raise PIFSCommandError(
"cat --page requires one page selector like 31 or 31-59. "
"Use: cat <ref> --page <page-or-range>"
"Use: cat <path|file_ref|document_id> --page <page-or-range>"
)
return self.filesystem.pageindex_pages(target, page_range)
return self.filesystem.cat_text_artifact(target, location)
@ -804,7 +805,7 @@ class PIFSCommandExecutor:
)
if mode == "matches":
return "\n".join(
f"{item['reference_id']}:{item['line']}: "
f"{self._file_target_path(item)}:{item['line']}: "
f"{self._compact_text(item['text'], max_chars=220)}"
for item in data.get("data", [])
)
@ -835,7 +836,7 @@ class PIFSCommandExecutor:
lines.append(f"{name}: {field.get('type', 'string')}")
return "\n".join(lines)
lines = [
f"ref: {data.get('target') or data.get('file_ref')}",
f"target: {data.get('target') or data.get('file_ref')}",
f"file_ref: {data.get('file_ref')}",
f"document_id: {data.get('external_id') or data.get('document_id') or '-'}",
f"source_path: {data.get('source_path') or '-'}",
@ -857,23 +858,37 @@ class PIFSCommandExecutor:
def _file_row_text(self, item: dict[str, Any]) -> str:
file_ref = item.get("file_ref")
ref = item.get("reference_id") or (self.filesystem._reference_for(file_ref) if file_ref else "-")
doc_id = item.get("external_id") or item.get("document_id") or "-"
title = self._compact_text(item.get("title") or item.get("name") or "", max_chars=80)
source_path = item.get("source_path") or "-"
folder_paths = item.get("folder_paths") or self._folder_paths_for_file(file_ref)
folders = f" folders={','.join(folder_paths)}" if folder_paths else ""
return f"{ref} {doc_id} {title} {source_path}{folders}".strip()
target = self._file_target_path(item)
return f"{target} id={doc_id} file_ref={file_ref or '-'} title={title} source={source_path}{folders}".strip()
def _grep_file_hit_text(self, item: dict[str, Any]) -> str:
doc_id = item.get("external_id") or "-"
source_path = item.get("source_path") or "-"
line = item.get("line") or 1
target = self._file_target_path(item)
return (
f"{item['reference_id']} {doc_id} {source_path}:{line}: "
f"{target}:{line}: id={doc_id} "
f"{self._compact_text(item.get('text') or '', max_chars=180)}"
)
def _file_target_path(self, item: dict[str, Any]) -> str:
file_ref = item.get("file_ref")
title = str(item.get("title") or item.get("name") or "").strip()
folder_paths = item.get("folder_paths") or []
folder_path = item.get("folder_path")
if not folder_paths and folder_path:
folder_paths = [folder_path]
if not folder_paths:
folder_paths = self._folder_paths_for_file(file_ref)
if folder_paths and title:
folder = str(folder_paths[0] or "/").rstrip("/")
return f"{folder}/{title}" if folder else f"/{title}"
return str(item.get("source_path") or item.get("external_id") or file_ref or "-")
def _semantic_retrieval_query(self, query: str) -> str:
query = str(query or "").strip()
context = str(self.query_context or "").strip()
@ -1040,11 +1055,10 @@ class PIFSCommandExecutor:
continue
if direct_only and self._folder_path_for_source_path(file_row["source_path"]) != folder_path:
continue
reference_id = self.filesystem._reference_for(file_row["file_ref"])
line_number, text = self._first_matching_source_line(path, query)
hits.append(
{
"reference_id": reference_id,
"reference_id": file_row["external_id"] or file_row["file_ref"],
"file_ref": file_row["file_ref"],
"external_id": file_row["external_id"],
"title": file_row["title"],
@ -1060,17 +1074,18 @@ class PIFSCommandExecutor:
def _grep_file_matches(self, target: str, query: str, *, limit: int) -> list[dict[str, Any]]:
file_ref = self.filesystem._resolve_reference(target)
reference_id = self.filesystem._reference_for(file_ref)
entry = self.filesystem.store.get_file(file_ref)
matches = []
for line_number, line in enumerate(self.filesystem.store.read_text(file_ref).splitlines(), 1):
if self._line_matches(line, query):
matches.append(
{
"reference_id": reference_id,
"reference_id": entry.external_id or file_ref,
"file_ref": file_ref,
"external_id": entry.external_id,
"title": entry.title,
"source_path": entry.source_path,
"folder_paths": self._folder_paths_for_file(file_ref),
"line": line_number,
"text": self._compact_text(line, max_chars=220),
}

View file

@ -102,7 +102,6 @@ class PageIndexFileSystem:
self.workspace = Path(workspace).expanduser()
self.store = SQLiteFileSystemStore(self.workspace)
self.metadata = MetadataQueryEngine(self.store)
self._references: dict[str, str] = {}
self.semantic_retrieval_backend = semantic_retrieval_backend
self.metadata_generator = metadata_generator
self.summary_projection_indexer = summary_projection_indexer
@ -388,7 +387,6 @@ class PageIndexFileSystem:
results = []
scope_path = self._scope_folder_path(scope)
for row in rows:
reference_id = self._reference_for(row["file_ref"])
folder_paths = [
folder["path"]
for folder in self.store.folder_memberships(row["file_ref"])
@ -396,7 +394,7 @@ class PageIndexFileSystem:
folder_path = self._preferred_folder_path(folder_paths, scope_path, row["folder_path"])
results.append(
SearchResult(
reference_id=reference_id,
reference_id=row["external_id"] or row["file_ref"],
file_ref=row["file_ref"],
external_id=row["external_id"],
title=row["title"],
@ -693,8 +691,9 @@ class PageIndexFileSystem:
raise ValueError(
f"{command} is only supported for txt/text files; "
f"got source_path={entry.source_path!r}, content_type={entry.content_type!r}. "
"Use cat <ref> --structure, cat <ref> --page, or cat <ref> --node "
"for PDF/Markdown PageIndex files."
"Use cat <path|file_ref|document_id> --structure, "
"cat <path|file_ref|document_id> --page, or "
"cat <path|file_ref|document_id> --node for PDF/Markdown PageIndex files."
)
def _require_pageindex_document_file(self, entry: Any, command: str) -> None:
@ -703,7 +702,7 @@ class PageIndexFileSystem:
raise ValueError(
f"{command} is only supported for PDF/Markdown PageIndex files; "
f"got source_path={entry.source_path!r}, content_type={entry.content_type!r}. "
"Use cat <ref> --all for txt/text files."
"Use cat <path|file_ref|document_id> --all for txt/text files."
)
@classmethod
@ -1254,8 +1253,6 @@ class PageIndexFileSystem:
return result
def _resolve_reference(self, reference_id: str) -> str:
if reference_id in self._references:
return self._references[reference_id]
return self.store.resolve_file_ref(reference_id)
def _should_use_semantic_retrieval(
@ -1315,7 +1312,6 @@ class PageIndexFileSystem:
continue
seen.add(file_ref)
entry = self.store.get_file(file_ref)
reference_id = self._reference_for(file_ref)
folder_paths = [
folder["path"]
for folder in self.store.folder_memberships(file_ref)
@ -1323,7 +1319,7 @@ class PageIndexFileSystem:
folder_path = self._preferred_folder_path(folder_paths, scope_path, entry.folder_path)
results.append(
SearchResult(
reference_id=reference_id,
reference_id=entry.external_id or file_ref,
file_ref=file_ref,
external_id=entry.external_id,
title=entry.title,
@ -1348,14 +1344,6 @@ class PageIndexFileSystem:
break
return results
def _reference_for(self, file_ref: str) -> str:
for reference_id, existing in self._references.items():
if existing == file_ref:
return reference_id
reference_id = f"ref_{len(self._references) + 1}"
self._references[reference_id] = file_ref
return reference_id
@staticmethod
def _build_descriptor(title: str, metadata: dict[str, Any]) -> str:
source = metadata.get("source_type") or metadata.get("repo") or metadata.get("channel")