fix(filesystem): relax structural read limits

This commit is contained in:
BukeLy 2026-05-26 20:57:08 +08:00
parent 2b69719f60
commit cd571ddbaf
4 changed files with 52 additions and 19 deletions

View file

@ -74,8 +74,10 @@ ambiguous. After structure identifies a relevant section node, prefer
cat <path> --node <node_id>; use cat <path> --page <range> when the user asks
for page-level evidence, no suitable node exists, or exact page text is needed.
cat <path> --structure is paginated; request more with --offset if needed. Page
reads are limited to three pages at once, node reads to at most five node ids,
and text cat --all returns only the first page of text lines.
reads are limited to five pages at once, node reads to at most ten node ids,
and text cat --all returns only the first page of text lines. If a cat limit
error requires a smaller call, stop when the evidence is sufficient; otherwise
continue with another chunk before answering.
For questions about metadata fields, available summaries, or whether metadata
was provided, inspect stat --schema and stat <target> before making claims.
Do not use stat as a general content/topic discovery step. For document Q&A,
@ -101,8 +103,9 @@ Tool policy:
- Use stat only for metadata/schema/status questions or to resolve ambiguous target identity. Do not run stat merely to understand what a document says.
- Prefer target-first cat syntax with stable targets: cat <path> --structure, cat <path> --page 31-59, cat <path> --node <node_id>.
- cat <target> --structure returns at most 25 nodes; use --offset and --limit for more structure pages.
- cat <target> --page accepts at most 3 pages at once. If a larger range is needed, first inspect cat <target> --structure and then read a smaller page range or node.
- cat <target> --node accepts at most 5 node ids at once. Prefer one relevant node when possible.
- cat <target> --page accepts at most 5 pages at once. If a larger range is needed, first inspect cat <target> --structure and then read a smaller page range or node.
- cat <target> --node accepts at most 10 node ids at once. Prefer relevant nodes from structure when possible.
- When recovering from cat page/node/text limit errors, stop if the evidence is sufficient; if it is not sufficient, make another smaller call before answering.
- cat <target> --all returns at most 100 text lines; use cat <target> --range <start>-<end> for the next page.
- After cat <target> --structure finds a relevant section/subsection with a node_id, prefer cat <target> --node <node_id> for content from that semantic unit.
- Use cat <target> --page <start>-<end> when the user explicitly asks for pages/page ranges, when no suitable node_id exists, or when you need exact page text to verify page-level evidence.

View file

@ -54,9 +54,9 @@ class PIFSCommandExecutor:
MAX_GREP_LIMIT = 20
MAX_SEMANTIC_LIMIT = 20
MAX_TEXT_LINES = 100
MAX_PAGE_SPAN = 3
MAX_PAGE_SPAN = 5
MAX_STRUCTURE_NODES = 25
MAX_NODE_IDS = 5
MAX_NODE_IDS = 10
MAX_NODE_TEXT_LINES = 100
MAX_NODE_TEXT_CHARS = 12_000
MAX_STAT_FIELD_TARGETS = 20
@ -107,8 +107,8 @@ class PIFSCommandExecutor:
"- find <folder> -maxdepth N -type f|d: bounded folder traversal for find",
"- grep -R: recursive lexical/FTS search only; semantic vector prefilter is disabled",
"- cat <path|file_ref|document_id> --structure: cached PageIndex node list, paginated at 25 nodes",
"- cat <path|file_ref|document_id> --page: cached PageIndex page reads, limited to 3 pages",
"- cat <path|file_ref|document_id> --node: cached PageIndex node reads, limited to 5 node ids",
"- cat <path|file_ref|document_id> --page: cached PageIndex page reads, limited to 5 pages",
"- cat <path|file_ref|document_id> --node: cached PageIndex node reads, limited to 10 node ids",
"- cat <path|file_ref|document_id> --all: text artifact reads for txt/text files, paginated at 100 lines",
"- stat --field <metadata_field> <target...>: one metadata field across up to 20 documents",
]
@ -2003,8 +2003,10 @@ class PIFSCommandExecutor:
if value > max_value:
raise PIFSCommandError(
f"{label} supports at most {max_value}; requested {value}. "
"Use a smaller value. If you are unsure where to inspect, "
"use cat <target> --structure first."
"Split it into a smaller call. If the evidence is sufficient, "
"stop; if not, continue with additional chunks before "
"answering. If you are unsure where to inspect, use cat <target> "
"--structure first."
)
return value

View file

@ -474,22 +474,48 @@ def test_cat_structure_page_node_and_text_outputs_are_hard_limited():
assert len(second_structure["data"]["structure"]) == 5
assert second_structure["data"]["structure"][0]["node_id"] == "0026"
pages = json.loads(executor.execute("cat dsid_limited_pdf --page 1-3"))
assert pages["data"]["text"] == "Page 1 text\n\nPage 2 text\n\nPage 3 text"
assert pages["data"]["page_pagination"]["limit"] == 3
with pytest.raises(PIFSCommandError, match="at most 3"):
executor.execute("cat dsid_limited_pdf --page 1-4")
pages = json.loads(executor.execute("cat dsid_limited_pdf --page 1-5"))
assert pages["data"]["text"] == (
"Page 1 text\n\nPage 2 text\n\nPage 3 text\n\nPage 4 text\n\nPage 5 text"
)
assert pages["data"]["page_pagination"]["limit"] == 5
with pytest.raises(PIFSCommandError, match="at most 5"):
executor.execute("cat dsid_limited_pdf --page 1-6")
with pytest.raises(PIFSCommandError, match="evidence is sufficient"):
executor.execute("cat dsid_limited_pdf --page 1-6")
nodes = json.loads(
executor.execute("cat dsid_limited_pdf --node 0001 0002 0003 0004 0005")
executor.execute(
"cat dsid_limited_pdf --node 0001 0002 0003 0004 0005 "
"0006 0007 0008 0009 0010"
)
)
assert nodes["data"]["node_ids"] == ["0001", "0002", "0003", "0004", "0005"]
assert nodes["data"]["node_ids"] == [
"0001",
"0002",
"0003",
"0004",
"0005",
"0006",
"0007",
"0008",
"0009",
"0010",
]
comma_nodes = json.loads(
executor.execute("cat dsid_limited_pdf --node 0001,0002")
)
assert comma_nodes["data"]["node_ids"] == ["0001", "0002"]
with pytest.raises(PIFSCommandError, match="at most 5"):
executor.execute("cat dsid_limited_pdf --node 0001 0002 0003 0004 0005 0006")
with pytest.raises(PIFSCommandError, match="at most 10"):
executor.execute(
"cat dsid_limited_pdf --node 0001 0002 0003 0004 0005 "
"0006 0007 0008 0009 0010 0011"
)
with pytest.raises(PIFSCommandError, match="continue with additional chunks"):
executor.execute(
"cat dsid_limited_pdf --node 0001 0002 0003 0004 0005 "
"0006 0007 0008 0009 0010 0011"
)
with pytest.raises(PIFSCommandError, match="cat accepts one file target"):
executor.execute("cat dsid_limited_pdf 0001")

View file

@ -202,6 +202,8 @@ class PIFSAgentStreamTest(unittest.TestCase):
self.assertIn("prefer cat <target> --node <node_id>", AGENT_TOOL_POLICY)
self.assertIn("page-level evidence", AGENT_TOOL_POLICY)
self.assertIn("prefer\ncat <path> --node <node_id>", BASH_TOOL_DESCRIPTION)
self.assertIn("stop if the evidence is sufficient", AGENT_TOOL_POLICY)
self.assertIn("continue with another chunk before answering", BASH_TOOL_DESCRIPTION)
def test_prompt_requires_stat_for_metadata_questions(self):
self.assertIn("stat --schema and stat <target>", AGENT_TOOL_POLICY)