mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-06-12 19:55:17 +02:00
fix(filesystem): clarify folder path metadata errors
This commit is contained in:
parent
144e8ba325
commit
06d8553a0a
5 changed files with 27 additions and 1 deletions
|
|
@ -63,6 +63,8 @@ Retrieval strategy:
|
|||
- Start with ls or tree to understand the workspace.
|
||||
- Use refs exactly as listed, such as ref_1, or use a concrete file path from
|
||||
ls output. Do not invent paths like /documents/ref_1.
|
||||
- Folder paths such as /documents are positional command targets; do not put
|
||||
folder paths inside --where.
|
||||
- Use search-summary when available to find likely documents.
|
||||
Quote multi-word queries and include a path, for example:
|
||||
search-summary "Federal Reserve supervision regulation" /documents
|
||||
|
|
|
|||
|
|
@ -45,6 +45,8 @@ Tool policy:
|
|||
- The bash tool is a PageIndex virtual shell, not an operating-system shell.
|
||||
- The default agent tool surface is read-only.
|
||||
- Use only commands listed in the workspace capabilities.
|
||||
- Folder paths such as /documents are positional command targets; never put folder paths in --where.
|
||||
- Use --where only with metadata fields shown by stat --schema.
|
||||
- grep -R performs lexical evidence search.
|
||||
- Semantic search commands are candidate-discovery tools and do not guarantee literal text matches.
|
||||
- Tool errors are returned as ERROR text; recover by trying an available command.
|
||||
|
|
|
|||
|
|
@ -88,7 +88,8 @@ class PIFSCommandExecutor:
|
|||
"Available command surfaces for this workspace:",
|
||||
"- mode: read-only inspection",
|
||||
"- ls/tree: folder browsing",
|
||||
"- find --where: exact/canonical metadata DSL filtering",
|
||||
"- find <folder>: folder path is positional; do not put paths in --where",
|
||||
"- find --where: exact/canonical metadata DSL filtering using stat --schema fields only",
|
||||
"- find <folder> -maxdepth N -type f|d: bounded folder traversal for find",
|
||||
"- grep -R: recursive lexical/FTS search only; semantic vector prefilter is disabled",
|
||||
"- cat <ref> --structure/--node/--page: cached PageIndex reads for PDF/Markdown files",
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ class MetadataQueryEngine:
|
|||
FIELD_RE = re.compile(r"^[A-Za-z][A-Za-z0-9_]*$")
|
||||
OPERATORS = {"$eq", "$ne", "$in", "$gt", "$gte", "$lt", "$lte", "$contains"}
|
||||
LOGICAL_OPERATORS = {"$and", "$or"}
|
||||
FOLDER_SCOPE_FIELD_HINTS = {"path", "folder", "folders", "folder_path", "folder_paths"}
|
||||
MAX_DEPTH = 5
|
||||
|
||||
def __init__(self, store: Any):
|
||||
|
|
@ -121,6 +122,12 @@ class MetadataQueryEngine:
|
|||
def validate_field(self, field: str) -> None:
|
||||
self.validate_field_name(field)
|
||||
if not self.store.metadata_field_exists(field):
|
||||
if field in self.FOLDER_SCOPE_FIELD_HINTS:
|
||||
raise MetadataQueryError(
|
||||
f"Unknown metadata field: {field}. Folder paths are positional PIFS paths, "
|
||||
"not metadata fields; use `ls /documents` or `find /documents -type f`. "
|
||||
"Use --where only with fields from `stat --schema`."
|
||||
)
|
||||
raise MetadataQueryError(f"Unknown metadata field: {field}")
|
||||
|
||||
def validate_field_name(self, field: str) -> None:
|
||||
|
|
|
|||
|
|
@ -115,3 +115,17 @@ def test_find_maxdepth_is_advertised_to_agents(tmp_path):
|
|||
|
||||
assert "-maxdepth N -type f|d" in executor.describe_available_command_surfaces()
|
||||
assert executor.command_capabilities()["retrieval"]["lexical"]["find_maxdepth"] is True
|
||||
|
||||
|
||||
def test_where_path_error_points_to_folder_scope(tmp_path):
|
||||
from pageindex.filesystem.commands import PIFSCommandError
|
||||
|
||||
executor = _register_find_fixture(tmp_path)
|
||||
|
||||
with pytest.raises(PIFSCommandError) as exc_info:
|
||||
executor.execute("""find --where '{"path":"/documents"}'""")
|
||||
|
||||
message = str(exc_info.value)
|
||||
assert "Folder paths are positional PIFS paths" in message
|
||||
assert "find /documents -type f" in message
|
||||
assert "stat --schema" in message
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue