From 06d8553a0a6eb69efcdcf3aef96e7253c97c6789 Mon Sep 17 00:00:00 2001 From: BukeLy Date: Tue, 26 May 2026 15:25:37 +0800 Subject: [PATCH] fix(filesystem): clarify folder path metadata errors --- examples/pifs_demo.py | 2 ++ pageindex/filesystem/agent.py | 2 ++ pageindex/filesystem/commands.py | 3 ++- pageindex/filesystem/metadata.py | 7 +++++++ tests/test_pifs_find_maxdepth.py | 14 ++++++++++++++ 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/examples/pifs_demo.py b/examples/pifs_demo.py index 839d37c..fa610ad 100644 --- a/examples/pifs_demo.py +++ b/examples/pifs_demo.py @@ -63,6 +63,8 @@ Retrieval strategy: - Start with ls or tree to understand the workspace. - Use refs exactly as listed, such as ref_1, or use a concrete file path from ls output. Do not invent paths like /documents/ref_1. +- Folder paths such as /documents are positional command targets; do not put + folder paths inside --where. - Use search-summary when available to find likely documents. Quote multi-word queries and include a path, for example: search-summary "Federal Reserve supervision regulation" /documents diff --git a/pageindex/filesystem/agent.py b/pageindex/filesystem/agent.py index 15e95a5..dce9aca 100644 --- a/pageindex/filesystem/agent.py +++ b/pageindex/filesystem/agent.py @@ -45,6 +45,8 @@ Tool policy: - The bash tool is a PageIndex virtual shell, not an operating-system shell. - The default agent tool surface is read-only. - Use only commands listed in the workspace capabilities. +- Folder paths such as /documents are positional command targets; never put folder paths in --where. +- Use --where only with metadata fields shown by stat --schema. - grep -R performs lexical evidence search. - Semantic search commands are candidate-discovery tools and do not guarantee literal text matches. - Tool errors are returned as ERROR text; recover by trying an available command. diff --git a/pageindex/filesystem/commands.py b/pageindex/filesystem/commands.py index 2e36179..730deee 100644 --- a/pageindex/filesystem/commands.py +++ b/pageindex/filesystem/commands.py @@ -88,7 +88,8 @@ class PIFSCommandExecutor: "Available command surfaces for this workspace:", "- mode: read-only inspection", "- ls/tree: folder browsing", - "- find --where: exact/canonical metadata DSL filtering", + "- find : folder path is positional; do not put paths in --where", + "- find --where: exact/canonical metadata DSL filtering using stat --schema fields only", "- find -maxdepth N -type f|d: bounded folder traversal for find", "- grep -R: recursive lexical/FTS search only; semantic vector prefilter is disabled", "- cat --structure/--node/--page: cached PageIndex reads for PDF/Markdown files", diff --git a/pageindex/filesystem/metadata.py b/pageindex/filesystem/metadata.py index 2766282..60d7beb 100644 --- a/pageindex/filesystem/metadata.py +++ b/pageindex/filesystem/metadata.py @@ -15,6 +15,7 @@ class MetadataQueryEngine: FIELD_RE = re.compile(r"^[A-Za-z][A-Za-z0-9_]*$") OPERATORS = {"$eq", "$ne", "$in", "$gt", "$gte", "$lt", "$lte", "$contains"} LOGICAL_OPERATORS = {"$and", "$or"} + FOLDER_SCOPE_FIELD_HINTS = {"path", "folder", "folders", "folder_path", "folder_paths"} MAX_DEPTH = 5 def __init__(self, store: Any): @@ -121,6 +122,12 @@ class MetadataQueryEngine: def validate_field(self, field: str) -> None: self.validate_field_name(field) if not self.store.metadata_field_exists(field): + if field in self.FOLDER_SCOPE_FIELD_HINTS: + raise MetadataQueryError( + f"Unknown metadata field: {field}. Folder paths are positional PIFS paths, " + "not metadata fields; use `ls /documents` or `find /documents -type f`. " + "Use --where only with fields from `stat --schema`." + ) raise MetadataQueryError(f"Unknown metadata field: {field}") def validate_field_name(self, field: str) -> None: diff --git a/tests/test_pifs_find_maxdepth.py b/tests/test_pifs_find_maxdepth.py index 56c32e4..2635f27 100644 --- a/tests/test_pifs_find_maxdepth.py +++ b/tests/test_pifs_find_maxdepth.py @@ -115,3 +115,17 @@ def test_find_maxdepth_is_advertised_to_agents(tmp_path): assert "-maxdepth N -type f|d" in executor.describe_available_command_surfaces() assert executor.command_capabilities()["retrieval"]["lexical"]["find_maxdepth"] is True + + +def test_where_path_error_points_to_folder_scope(tmp_path): + from pageindex.filesystem.commands import PIFSCommandError + + executor = _register_find_fixture(tmp_path) + + with pytest.raises(PIFSCommandError) as exc_info: + executor.execute("""find --where '{"path":"/documents"}'""") + + message = str(exc_info.value) + assert "Folder paths are positional PIFS paths" in message + assert "find /documents -type f" in message + assert "stat --schema" in message