feat(pifs): render browse results as records

This commit is contained in:
BukeLy 2026-05-31 17:26:20 +08:00
parent 3562d47fdb
commit c63ebe573b
3 changed files with 226 additions and 3 deletions

View file

@ -1155,6 +1155,8 @@ class PIFSCommandExecutor:
return self._render_listing(data)
if command_name == "tree":
return self._render_tree(data)
if command_name == "browse":
return self._render_browse(data)
if command_name in {"grep", "semantic-grep"}:
return self._render_grep(data)
if command_name in {"search-summary", "search-entity", "search-relation"}:
@ -1301,6 +1303,86 @@ class PIFSCommandExecutor:
lines.append("")
return "\n".join(lines).rstrip()
def _render_browse(self, data: Any) -> str:
if not isinstance(data, dict):
return str(data)
page = self._coerce_positive_int(data.get("page"), default=1)
page_size = self._coerce_positive_int(
data.get("page_size"),
default=self.BROWSE_PAGE_SIZE,
)
has_more = bool(data.get("has_more"))
lines = [
f"# page={page} page_size={page_size} "
f"has_more={'true' if has_more else 'false'}"
]
results = data.get("data") or []
for index, item in enumerate(results):
if index:
lines.append("")
item = item if isinstance(item, dict) else {}
lines.extend(
[
f"rank: {item.get('rank') or index + 1}",
f"similarity: {self._format_similarity(item.get('similarity'))}",
f"path: {self._browse_result_path(item)}",
"summary: "
f"{self._compact_text(self._one_line_value(item.get('summary')), max_chars=240)}",
]
)
if has_more:
if results:
lines.append("")
lines.append(f"# next: {self._browse_next_command(data, page=page)}")
return "\n".join(lines).rstrip()
@staticmethod
def _coerce_positive_int(value: Any, *, default: int) -> int:
try:
parsed = int(value)
except (TypeError, ValueError):
return default
return parsed if parsed >= 1 else default
@staticmethod
def _format_similarity(value: Any) -> str:
try:
similarity = float(value)
except (TypeError, ValueError):
similarity = 0.0
similarity = max(0.0, min(1.0, similarity))
return f"{similarity:.2f}"
@staticmethod
def _browse_result_path(item: dict[str, Any]) -> str:
return str(
item.get("path")
or item.get("document_id")
or item.get("external_id")
or item.get("file_ref")
or "-"
)
def _browse_next_command(self, data: dict[str, Any], *, page: int) -> str:
parts = ["browse"]
if data.get("recursive"):
parts.append("-R")
parts.append(shlex.quote(str(data.get("scope") or "/")))
parts.append(shlex.quote(str(data.get("query") or "")))
space = str(data.get("space") or "summary")
if space != "summary":
parts.extend(["--space", shlex.quote(space)])
if data.get("where") is not None:
parts.extend(["--where", shlex.quote(self._browse_where_text(data["where"]))])
parts.extend(["--page", str(page + 1)])
return " ".join(parts)
@staticmethod
def _browse_where_text(where: Any) -> str:
if isinstance(where, str):
return where
return json.dumps(where, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
def _render_find(self, data: Any) -> str:
if not isinstance(data, list):
return str(data)

View file

@ -441,7 +441,7 @@ class PageIndexFileSystem:
if len(rows) >= needed:
break
page_rows = rows[offset : offset + page_size]
return {
payload = {
"mode": "files",
"retrieval": f"{space}_vector",
"query": query,
@ -454,6 +454,9 @@ class PageIndexFileSystem:
"has_more": len(rows) > offset + page_size,
"data": page_rows,
}
if metadata_filter is not None:
payload["where"] = self._metadata_filter_payload(metadata_filter)
return payload
def folder_info(self, path: str = "/") -> dict[str, Any]:
return self.store.folder_info(path)
@ -1664,6 +1667,17 @@ class PageIndexFileSystem:
return 0.0
return round(max(0.0, min(1.0, score)), 4)
@staticmethod
def _metadata_filter_payload(metadata_filter: Any) -> str:
if isinstance(metadata_filter, str):
return metadata_filter
return json.dumps(
metadata_filter,
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
)
def _stable_file_locator(self, file_ref: str, entry: Any) -> str:
source_path = str(getattr(entry, "source_path", "") or "").strip()
if source_path:

View file

@ -106,13 +106,22 @@ class BrowseBackend:
]
def _register_browse_file(filesystem, external_id, folder_path, *, department="ops"):
def _register_browse_file(
filesystem,
external_id,
folder_path,
*,
department="ops",
summary=None,
):
from pageindex.filesystem.metadata_generation import MetadataGenerationResult
class SummaryGenerator:
def generate(self, document, *, fields):
values = {
"summary": f"summary for {document.external_id}",
"summary": summary
if summary is not None
else f"summary for {document.external_id}",
"doc_type": "memo",
"domain": "finance",
"topic": "risk",
@ -320,6 +329,124 @@ def test_browse_scopes_semantic_search_before_candidate_limit(tmp_path):
]
def test_browse_shell_output_uses_fixed_blocks_with_pagination_command(tmp_path):
import re
from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace")
document_ids = []
for index in range(12):
external_id = f"doc_{index:02d}"
document_ids.append(external_id)
_register_browse_file(
filesystem,
external_id,
"/documents",
department="finance",
summary=(
"first line\nsecond\tline with spaces"
if index == 0
else f"summary for {external_id}"
),
)
filesystem.semantic_retrieval_backend = BrowseBackend(
document_ids,
channels=("summary", "entity"),
)
executor = PIFSCommandExecutor(filesystem)
rendered = executor.execute(
'browse -R /documents "vector database" --space entity '
'--where \'{"department":"finance"}\''
)
lines = rendered.splitlines()
assert lines[:6] == [
"# page=1 page_size=10 has_more=true",
"rank: 1",
"similarity: 0.91",
"path: /documents/doc_00.txt",
"summary: first line second line with spaces",
"",
]
assert lines[6:10] == [
"rank: 2",
"similarity: 0.83",
"path: /documents/doc_01.txt",
"summary: summary for doc_01",
]
similarity_lines = [line for line in lines if line.startswith("similarity: ")]
assert len(similarity_lines) == 10
assert all(re.fullmatch(r"similarity: [01]\.\d{2}", line) for line in similarity_lines)
assert all(0.0 <= float(line.removeprefix("similarity: ")) <= 1.0 for line in similarity_lines)
assert lines[-1] == (
"# next: browse -R /documents 'vector database' --space entity "
'--where \'{"department":"finance"}\' --page 2'
)
assert "mode:" not in rendered
assert "data:" not in rendered
assert "score:" not in rendered
def test_browse_shell_path_falls_back_to_unique_locator_when_source_collides(tmp_path):
from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
from pageindex.filesystem.metadata_generation import MetadataGenerationResult
class SummaryGenerator:
def generate(self, document, *, fields):
return MetadataGenerationResult(
values={"summary": f"summary for {document.external_id}"}
)
filesystem = PageIndexFileSystem(
workspace=tmp_path / "workspace",
metadata_generator=SummaryGenerator(),
)
first_ref = filesystem.register_file(
storage_uri="file:///tmp/first.json",
source_path="shared/source.json",
folder_path="/documents",
external_id="dsid_first",
title="First",
content="first content",
metadata_policy={
"fields": {
"summary": True,
"doc_type": False,
"domain": False,
"topic": False,
}
},
)
filesystem.register_file(
storage_uri="file:///tmp/second.json",
source_path="shared/source.json",
folder_path="/documents",
external_id="dsid_second",
title="Second",
content="second content",
metadata_policy={
"fields": {
"summary": True,
"doc_type": False,
"domain": False,
"topic": False,
}
},
)
filesystem.semantic_retrieval_backend = BrowseBackend(["dsid_first"])
executor = PIFSCommandExecutor(filesystem)
rendered = executor.execute('browse /documents "first"')
assert "path: dsid_first" in rendered
assert "path: /shared/source.json" not in rendered
assert filesystem.store.resolve_file_ref("dsid_first") == first_ref
with pytest.raises(KeyError, match="Ambiguous file target"):
filesystem.store.resolve_file_ref("/shared/source.json")
def test_semantic_search_scope_keeps_ordinary_folders_out_of_source_type_filters(tmp_path):
from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
from pageindex.filesystem.metadata_generation import MetadataGenerationResult