fix(filesystem): clarify folder path metadata errors

This commit is contained in:
BukeLy 2026-05-26 15:25:37 +08:00
parent 144e8ba325
commit 06d8553a0a
5 changed files with 27 additions and 1 deletions

View file

@ -63,6 +63,8 @@ Retrieval strategy:
- Start with ls or tree to understand the workspace.
- Use refs exactly as listed, such as ref_1, or use a concrete file path from
ls output. Do not invent paths like /documents/ref_1.
- Folder paths such as /documents are positional command targets; do not put
folder paths inside --where.
- Use search-summary when available to find likely documents.
Quote multi-word queries and include a path, for example:
search-summary "Federal Reserve supervision regulation" /documents

View file

@ -45,6 +45,8 @@ Tool policy:
- The bash tool is a PageIndex virtual shell, not an operating-system shell.
- The default agent tool surface is read-only.
- Use only commands listed in the workspace capabilities.
- Folder paths such as /documents are positional command targets; never put folder paths in --where.
- Use --where only with metadata fields shown by stat --schema.
- grep -R performs lexical evidence search.
- Semantic search commands are candidate-discovery tools and do not guarantee literal text matches.
- Tool errors are returned as ERROR text; recover by trying an available command.

View file

@ -88,7 +88,8 @@ class PIFSCommandExecutor:
"Available command surfaces for this workspace:",
"- mode: read-only inspection",
"- ls/tree: folder browsing",
"- find --where: exact/canonical metadata DSL filtering",
"- find <folder>: folder path is positional; do not put paths in --where",
"- find --where: exact/canonical metadata DSL filtering using stat --schema fields only",
"- find <folder> -maxdepth N -type f|d: bounded folder traversal for find",
"- grep -R: recursive lexical/FTS search only; semantic vector prefilter is disabled",
"- cat <ref> --structure/--node/--page: cached PageIndex reads for PDF/Markdown files",

View file

@ -15,6 +15,7 @@ class MetadataQueryEngine:
FIELD_RE = re.compile(r"^[A-Za-z][A-Za-z0-9_]*$")
OPERATORS = {"$eq", "$ne", "$in", "$gt", "$gte", "$lt", "$lte", "$contains"}
LOGICAL_OPERATORS = {"$and", "$or"}
FOLDER_SCOPE_FIELD_HINTS = {"path", "folder", "folders", "folder_path", "folder_paths"}
MAX_DEPTH = 5
def __init__(self, store: Any):
@ -121,6 +122,12 @@ class MetadataQueryEngine:
def validate_field(self, field: str) -> None:
self.validate_field_name(field)
if not self.store.metadata_field_exists(field):
if field in self.FOLDER_SCOPE_FIELD_HINTS:
raise MetadataQueryError(
f"Unknown metadata field: {field}. Folder paths are positional PIFS paths, "
"not metadata fields; use `ls /documents` or `find /documents -type f`. "
"Use --where only with fields from `stat --schema`."
)
raise MetadataQueryError(f"Unknown metadata field: {field}")
def validate_field_name(self, field: str) -> None:

View file

@ -115,3 +115,17 @@ def test_find_maxdepth_is_advertised_to_agents(tmp_path):
assert "-maxdepth N -type f|d" in executor.describe_available_command_surfaces()
assert executor.command_capabilities()["retrieval"]["lexical"]["find_maxdepth"] is True
def test_where_path_error_points_to_folder_scope(tmp_path):
from pageindex.filesystem.commands import PIFSCommandError
executor = _register_find_fixture(tmp_path)
with pytest.raises(PIFSCommandError) as exc_info:
executor.execute("""find --where '{"path":"/documents"}'""")
message = str(exc_info.value)
assert "Folder paths are positional PIFS paths" in message
assert "find /documents -type f" in message
assert "stat --schema" in message