mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-06-12 19:55:17 +02:00
fix(filesystem): clarify agent evidence recovery
This commit is contained in:
parent
c13cb203cd
commit
0243732e22
5 changed files with 26 additions and 7 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -5,3 +5,4 @@ __pycache__
|
|||
.venv/
|
||||
logs/
|
||||
examples/pifs_workspace/
|
||||
examples/Benchmark/enterpriseragbenchmark_workspace/
|
||||
|
|
|
|||
|
|
@ -60,9 +60,11 @@ commands described in the workspace context. grep -R is lexical evidence search;
|
|||
grep does not support regex alternation such as "a|b"; run multiple grep
|
||||
commands or use search-summary for semantic candidate discovery instead.
|
||||
semantic search commands such as search-summary return candidate documents and
|
||||
do not guarantee literal text matches. Use search-summary when the user asks for
|
||||
summary search, semantic search, or vector search and the command is listed as
|
||||
available. Quote multi-word semantic queries, for example:
|
||||
do not guarantee literal text matches or final answer evidence. After choosing
|
||||
a likely search-summary candidate, verify the relevant claim with cat before
|
||||
answering. Use search-summary when the user asks for summary search, semantic
|
||||
search, or vector search and the command is listed as available. Quote
|
||||
multi-word semantic queries, for example:
|
||||
search-summary "Federal Reserve" /documents. Do not write
|
||||
search-summary Federal Reserve /documents. Errors are returned as text prefixed
|
||||
with ERROR. Do not call
|
||||
|
|
@ -70,7 +72,9 @@ commands that are not listed as available. When evidence is required, inspect it
|
|||
with cat or grep before answering. Prefer shell-like target-first cat syntax
|
||||
with stable targets: cat <path> --structure, cat <path> --page 31-59, and
|
||||
cat <path> --node 0009. You may also use file_ref or document_id when a path is
|
||||
ambiguous. After structure identifies a relevant section node, prefer
|
||||
ambiguous. Do not reconstruct paths from document titles; use exact targets
|
||||
returned by PIFS commands and quote paths containing spaces. After structure
|
||||
identifies a relevant section node, prefer
|
||||
cat <path> --node <node_id>; use cat <path> --page <range> when the user asks
|
||||
for page-level evidence, no suitable node exists, or exact page text is needed.
|
||||
cat <path> --structure is paginated; request more with --offset if needed. Page
|
||||
|
|
@ -94,11 +98,13 @@ Tool policy:
|
|||
- Use --where only with metadata fields shown by stat --schema.
|
||||
- grep -R performs lexical evidence search.
|
||||
- grep does not support regex alternation such as "a|b"; run separate grep commands or use search-summary for semantic candidate discovery.
|
||||
- Semantic search commands are candidate-discovery tools and do not guarantee literal text matches.
|
||||
- Semantic search commands are candidate-discovery tools and do not guarantee literal text matches or final answer evidence. After selecting a likely search-summary candidate, verify the relevant facts with cat before answering.
|
||||
- Do not use find | grep as an exhaustive search or as proof that no document exists; find output can be scoped or limited. Use metadata filters, search-summary, grep on a narrowed target, or cat on likely candidates instead.
|
||||
- A single failed grep is not enough evidence to say there is no relevant document. If grep returns no matches for a workspace-topic question, verify with search-summary or another available semantic/vector candidate command, or inspect likely document structure, before answering no-evidence.
|
||||
- If search-summary is available and the user asks for summary search, semantic search, vector search, or "用 summary 搜", use search-summary "<query>" <folder>; quote multi-word queries, for example search-summary "Federal Reserve" /documents; do not translate that request into find --where.
|
||||
- Tool errors are returned as ERROR text; recover by trying an available command.
|
||||
- Use cat or grep to gather evidence before making source-backed claims.
|
||||
- Do not reconstruct a file path from a title. Use exact paths returned by PIFS commands, or use file_ref/document_id when available; quote paths that contain spaces.
|
||||
- For broad topic, method, or "what solution" questions that are likely about the workspace, search for candidate documents before asking the user to choose a document.
|
||||
- Use stat only for metadata/schema/status questions or to resolve ambiguous target identity. Do not run stat merely to understand what a document says.
|
||||
- Prefer target-first cat syntax with stable targets: cat <path> --structure, cat <path> --page 31-59, cat <path> --node <node_id>.
|
||||
|
|
|
|||
|
|
@ -534,7 +534,11 @@ class PIFSCommandExecutor:
|
|||
"cat accepts one file target. Use target-first syntax: "
|
||||
"cat <path|file_ref|document_id> --structure, "
|
||||
"cat <path|file_ref|document_id> --node 0002 0004, or "
|
||||
"cat <path|file_ref|document_id> --page 31-33"
|
||||
"cat <path|file_ref|document_id> --page 31-33. "
|
||||
f"Unexpected extra argument: {arg!r}. If the target path or title contains "
|
||||
"spaces, quote the whole target, for example: cat \"/documents/report name.pdf\" "
|
||||
"--structure. If a title-derived path is ambiguous, use the file_ref or "
|
||||
"document_id instead."
|
||||
)
|
||||
i += 1
|
||||
if structural_mode == "structure":
|
||||
|
|
|
|||
|
|
@ -517,7 +517,7 @@ def test_cat_structure_page_node_and_text_outputs_are_hard_limited():
|
|||
"0006 0007 0008 0009 0010 0011"
|
||||
)
|
||||
|
||||
with pytest.raises(PIFSCommandError, match="cat accepts one file target"):
|
||||
with pytest.raises(PIFSCommandError, match="quote the whole target"):
|
||||
executor.execute("cat dsid_limited_pdf 0001")
|
||||
|
||||
text = json.loads(executor.execute("cat dsid_long_text --all"))
|
||||
|
|
|
|||
|
|
@ -204,6 +204,8 @@ class PIFSAgentStreamTest(unittest.TestCase):
|
|||
self.assertIn("prefer\ncat <path> --node <node_id>", BASH_TOOL_DESCRIPTION)
|
||||
self.assertIn("stop if the evidence is sufficient", AGENT_TOOL_POLICY)
|
||||
self.assertIn("continue with another chunk before answering", BASH_TOOL_DESCRIPTION)
|
||||
self.assertIn("Do not reconstruct paths from document titles", BASH_TOOL_DESCRIPTION)
|
||||
self.assertIn("file_ref/document_id", AGENT_TOOL_POLICY)
|
||||
|
||||
def test_prompt_requires_stat_for_metadata_questions(self):
|
||||
self.assertIn("stat --schema and stat <target>", AGENT_TOOL_POLICY)
|
||||
|
|
@ -218,6 +220,12 @@ class PIFSAgentStreamTest(unittest.TestCase):
|
|||
self.assertIn('use search-summary "<query>" <folder>', AGENT_TOOL_POLICY)
|
||||
self.assertIn('search-summary "Federal Reserve" /documents', BASH_TOOL_DESCRIPTION)
|
||||
self.assertIn("do not translate that request into find --where", AGENT_TOOL_POLICY)
|
||||
self.assertIn("verify the relevant facts with cat", AGENT_TOOL_POLICY)
|
||||
self.assertIn("verify the relevant claim with cat", BASH_TOOL_DESCRIPTION)
|
||||
|
||||
def test_prompt_rejects_find_grep_as_exhaustive_search(self):
|
||||
self.assertIn("Do not use find | grep as an exhaustive search", AGENT_TOOL_POLICY)
|
||||
self.assertIn("find output can be scoped or limited", AGENT_TOOL_POLICY)
|
||||
|
||||
def test_system_prompt_sets_workspace_identity_and_scope(self):
|
||||
self.assertIn("PageIndex FileSystem Demo Agent", AGENT_SYSTEM_PROMPT)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue