From 2b69719f603dcc4c3f8693eafe06e9733dadcb65 Mon Sep 17 00:00:00 2001 From: BukeLy Date: Tue, 26 May 2026 20:49:28 +0800 Subject: [PATCH] feat(filesystem): support space-separated cat node ids --- pageindex/filesystem/commands.py | 11 ++++++++--- tests/test_pageindex_structural_read.py | 11 +++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pageindex/filesystem/commands.py b/pageindex/filesystem/commands.py index ece7aa0..29ea46a 100644 --- a/pageindex/filesystem/commands.py +++ b/pageindex/filesystem/commands.py @@ -517,7 +517,10 @@ class PIFSCommandExecutor: if i >= len(args): raise PIFSCommandError("cat --node requires a node id") structural_mode = "node" - node_ids.extend(self._parse_node_ids(args[i])) + while i < len(args) and not args[i].startswith("-"): + node_ids.extend(self._parse_node_ids(args[i])) + i += 1 + i -= 1 elif arg == "--page": i += 1 if i >= len(args): @@ -528,8 +531,10 @@ class PIFSCommandExecutor: raise PIFSCommandError(f"Unsupported cat option: {arg}") else: raise PIFSCommandError( - "cat accepts one file target. Use: cat --page , " - "for example: cat /documents/report.pdf --page 31-59" + "cat accepts one file target. Use target-first syntax: " + "cat --structure, " + "cat --node 0002 0004, or " + "cat --page 31-33" ) i += 1 if structural_mode == "structure": diff --git a/tests/test_pageindex_structural_read.py b/tests/test_pageindex_structural_read.py index 500e4c5..cd104c5 100644 --- a/tests/test_pageindex_structural_read.py +++ b/tests/test_pageindex_structural_read.py @@ -481,11 +481,18 @@ def test_cat_structure_page_node_and_text_outputs_are_hard_limited(): executor.execute("cat dsid_limited_pdf --page 1-4") nodes = json.loads( - executor.execute("cat dsid_limited_pdf --node 0001,0002,0003,0004,0005") + executor.execute("cat dsid_limited_pdf --node 0001 0002 0003 0004 0005") ) assert nodes["data"]["node_ids"] == ["0001", "0002", "0003", "0004", "0005"] + comma_nodes = json.loads( + executor.execute("cat dsid_limited_pdf --node 0001,0002") + ) + assert comma_nodes["data"]["node_ids"] == ["0001", "0002"] with pytest.raises(PIFSCommandError, match="at most 5"): - executor.execute("cat dsid_limited_pdf --node 0001,0002,0003,0004,0005,0006") + executor.execute("cat dsid_limited_pdf --node 0001 0002 0003 0004 0005 0006") + + with pytest.raises(PIFSCommandError, match="cat accepts one file target"): + executor.execute("cat dsid_limited_pdf 0001") text = json.loads(executor.execute("cat dsid_long_text --all")) assert "line 100" in text["data"]["text"]