fix(filesystem): clarify agent evidence recovery

This commit is contained in:
BukeLy 2026-05-27 03:35:12 +08:00
parent c13cb203cd
commit 0243732e22
5 changed files with 26 additions and 7 deletions

1
.gitignore vendored
View file

@ -5,3 +5,4 @@ __pycache__
.venv/
logs/
examples/pifs_workspace/
examples/Benchmark/enterpriseragbenchmark_workspace/

View file

@ -60,9 +60,11 @@ commands described in the workspace context. grep -R is lexical evidence search;
grep does not support regex alternation such as "a|b"; run multiple grep
commands or use search-summary for semantic candidate discovery instead.
semantic search commands such as search-summary return candidate documents and
do not guarantee literal text matches. Use search-summary when the user asks for
summary search, semantic search, or vector search and the command is listed as
available. Quote multi-word semantic queries, for example:
do not guarantee literal text matches or final answer evidence. After choosing
a likely search-summary candidate, verify the relevant claim with cat before
answering. Use search-summary when the user asks for summary search, semantic
search, or vector search and the command is listed as available. Quote
multi-word semantic queries, for example:
search-summary "Federal Reserve" /documents. Do not write
search-summary Federal Reserve /documents. Errors are returned as text prefixed
with ERROR. Do not call
@ -70,7 +72,9 @@ commands that are not listed as available. When evidence is required, inspect it
with cat or grep before answering. Prefer shell-like target-first cat syntax
with stable targets: cat <path> --structure, cat <path> --page 31-59, and
cat <path> --node 0009. You may also use file_ref or document_id when a path is
ambiguous. After structure identifies a relevant section node, prefer
ambiguous. Do not reconstruct paths from document titles; use exact targets
returned by PIFS commands and quote paths containing spaces. After structure
identifies a relevant section node, prefer
cat <path> --node <node_id>; use cat <path> --page <range> when the user asks
for page-level evidence, no suitable node exists, or exact page text is needed.
cat <path> --structure is paginated; request more with --offset if needed. Page
@ -94,11 +98,13 @@ Tool policy:
- Use --where only with metadata fields shown by stat --schema.
- grep -R performs lexical evidence search.
- grep does not support regex alternation such as "a|b"; run separate grep commands or use search-summary for semantic candidate discovery.
- Semantic search commands are candidate-discovery tools and do not guarantee literal text matches.
- Semantic search commands are candidate-discovery tools and do not guarantee literal text matches or final answer evidence. After selecting a likely search-summary candidate, verify the relevant facts with cat before answering.
- Do not use find | grep as an exhaustive search or as proof that no document exists; find output can be scoped or limited. Use metadata filters, search-summary, grep on a narrowed target, or cat on likely candidates instead.
- A single failed grep is not enough evidence to say there is no relevant document. If grep returns no matches for a workspace-topic question, verify with search-summary or another available semantic/vector candidate command, or inspect likely document structure, before answering no-evidence.
- If search-summary is available and the user asks for summary search, semantic search, vector search, or "用 summary 搜", use search-summary "<query>" <folder>; quote multi-word queries, for example search-summary "Federal Reserve" /documents; do not translate that request into find --where.
- Tool errors are returned as ERROR text; recover by trying an available command.
- Use cat or grep to gather evidence before making source-backed claims.
- Do not reconstruct a file path from a title. Use exact paths returned by PIFS commands, or use file_ref/document_id when available; quote paths that contain spaces.
- For broad topic, method, or "what solution" questions that are likely about the workspace, search for candidate documents before asking the user to choose a document.
- Use stat only for metadata/schema/status questions or to resolve ambiguous target identity. Do not run stat merely to understand what a document says.
- Prefer target-first cat syntax with stable targets: cat <path> --structure, cat <path> --page 31-59, cat <path> --node <node_id>.

View file

@ -534,7 +534,11 @@ class PIFSCommandExecutor:
"cat accepts one file target. Use target-first syntax: "
"cat <path|file_ref|document_id> --structure, "
"cat <path|file_ref|document_id> --node 0002 0004, or "
"cat <path|file_ref|document_id> --page 31-33"
"cat <path|file_ref|document_id> --page 31-33. "
f"Unexpected extra argument: {arg!r}. If the target path or title contains "
"spaces, quote the whole target, for example: cat \"/documents/report name.pdf\" "
"--structure. If a title-derived path is ambiguous, use the file_ref or "
"document_id instead."
)
i += 1
if structural_mode == "structure":

View file

@ -517,7 +517,7 @@ def test_cat_structure_page_node_and_text_outputs_are_hard_limited():
"0006 0007 0008 0009 0010 0011"
)
with pytest.raises(PIFSCommandError, match="cat accepts one file target"):
with pytest.raises(PIFSCommandError, match="quote the whole target"):
executor.execute("cat dsid_limited_pdf 0001")
text = json.loads(executor.execute("cat dsid_long_text --all"))

View file

@ -204,6 +204,8 @@ class PIFSAgentStreamTest(unittest.TestCase):
self.assertIn("prefer\ncat <path> --node <node_id>", BASH_TOOL_DESCRIPTION)
self.assertIn("stop if the evidence is sufficient", AGENT_TOOL_POLICY)
self.assertIn("continue with another chunk before answering", BASH_TOOL_DESCRIPTION)
self.assertIn("Do not reconstruct paths from document titles", BASH_TOOL_DESCRIPTION)
self.assertIn("file_ref/document_id", AGENT_TOOL_POLICY)
def test_prompt_requires_stat_for_metadata_questions(self):
self.assertIn("stat --schema and stat <target>", AGENT_TOOL_POLICY)
@ -218,6 +220,12 @@ class PIFSAgentStreamTest(unittest.TestCase):
self.assertIn('use search-summary "<query>" <folder>', AGENT_TOOL_POLICY)
self.assertIn('search-summary "Federal Reserve" /documents', BASH_TOOL_DESCRIPTION)
self.assertIn("do not translate that request into find --where", AGENT_TOOL_POLICY)
self.assertIn("verify the relevant facts with cat", AGENT_TOOL_POLICY)
self.assertIn("verify the relevant claim with cat", BASH_TOOL_DESCRIPTION)
def test_prompt_rejects_find_grep_as_exhaustive_search(self):
self.assertIn("Do not use find | grep as an exhaustive search", AGENT_TOOL_POLICY)
self.assertIn("find output can be scoped or limited", AGENT_TOOL_POLICY)
def test_system_prompt_sets_workspace_identity_and_scope(self):
self.assertIn("PageIndex FileSystem Demo Agent", AGENT_SYSTEM_PROMPT)