import json from pathlib import Path import pytest def _register_find_fixture(tmp_path: Path): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem source_dir = tmp_path / "source" source_dir.mkdir() filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace") filesystem.metadata.register_schema({"fields": {"department": "string"}}) def add_file( filename: str, *, folder_path: str, external_id: str, title: str, domain: str, ) -> None: source = source_dir / filename source.write_text(f"{title} fixture text", encoding="utf-8") filesystem.register_file( storage_uri=source.as_uri(), source_path=f"docs/{filename}", folder_path=folder_path, external_id=external_id, title=title, content=source.read_text(encoding="utf-8"), metadata={"department": domain}, ) add_file( "root.txt", folder_path="/documents", external_id="doc_root", title="Root document", domain="ops", ) add_file( "child.txt", folder_path="/documents/team", external_id="doc_child", title="Child document", domain="ops", ) add_file( "deep.txt", folder_path="/documents/team/deep", external_id="doc_deep", title="Deep document", domain="ops", ) add_file( "other.txt", folder_path="/documents/team", external_id="doc_other", title="Other document", domain="finance", ) return PIFSCommandExecutor(filesystem, json_output=True) def _data(output: str): return json.loads(output)["data"] def test_find_maxdepth_one_returns_direct_files_only(tmp_path): executor = _register_find_fixture(tmp_path) rows = _data(executor.execute("find /documents -maxdepth 1 -type f")) assert [row["external_id"] for row in rows] == ["doc_root"] def test_find_output_is_path_first_without_session_refs(tmp_path): executor = _register_find_fixture(tmp_path) executor.json_output = False output = executor.execute("find /documents -maxdepth 1 -type f") assert output.startswith("/documents/Root document id=doc_root file_ref=file_") assert "ref_1" not in output assert "title=Root document" in output def test_stable_path_targets_work_without_session_refs(tmp_path): executor = _register_find_fixture(tmp_path) executor.json_output = False stat = executor.execute("stat '/documents/Root document'") text = executor.execute("cat '/documents/Root document' --all") assert "target: /documents/Root document" in stat assert "document_id: doc_root" in stat assert "Root document fixture text" in text def test_shell_limits_reject_context_expanding_counts(tmp_path): from pageindex.filesystem.commands import PIFSCommandError executor = _register_find_fixture(tmp_path) for command, limit in ( ("find /documents --limit 51", 50), ("grep --limit 21 Root /documents", 20), ("ls /documents --limit 101", 100), ("tree /documents --limit 201", 200), ("head -n 101 /documents/Root\\ document", 100), ("tail -n 101 /documents/Root\\ document", 100), ("sed -n 1,101p /documents/Root\\ document", 100), ): with pytest.raises(PIFSCommandError, match=f"at most {limit}"): executor.execute(command) def test_grep_rejects_regex_alternation_patterns(tmp_path): from pageindex.filesystem.commands import PIFSCommandError executor = _register_find_fixture(tmp_path) executor.json_output = False with pytest.raises(PIFSCommandError, match="does not support regex alternation"): executor.execute('grep -R "Root|Child" /documents') with pytest.raises(PIFSCommandError, match="multiple grep commands"): executor.execute('find /documents -type f | grep "Root|Child"') def test_stat_shell_output_includes_unified_metadata_status(tmp_path): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem from pageindex.filesystem.metadata_generation import MetadataGenerationResult source = tmp_path / "source.txt" source.write_text("fixture text", encoding="utf-8") class SummaryGenerator: def generate(self, document, *, fields): return MetadataGenerationResult( values={field: "Generated summary for retrieval." for field in fields} ) filesystem = PageIndexFileSystem( workspace=tmp_path / "workspace", metadata_generator=SummaryGenerator(), ) filesystem.register_file( storage_uri=source.as_uri(), source_path="docs/source.txt", folder_path="/documents", external_id="doc_generated", title="Generated metadata document", content=source.read_text(encoding="utf-8"), metadata={"department": "ops"}, metadata_policy={ "fields": { "summary": True, "doc_type": False, "domain": False, "topic": False, } }, ) executor = PIFSCommandExecutor(filesystem, json_output=False) stat = executor.execute("stat /documents/'Generated metadata document'") assert "metadata:" in stat assert " department: ops" in stat assert " summary: Generated summary for retrieval." in stat assert "metadata_status: generated" in stat def test_stat_field_reads_one_metadata_field_across_multiple_targets(tmp_path): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem from pageindex.filesystem.commands import PIFSCommandError from pageindex.filesystem.metadata_generation import MetadataGenerationResult class SummaryGenerator: def generate(self, document, *, fields): return MetadataGenerationResult( values={ field: ( f"Summary for {document.title}\n" + "full summary token " * 80 ) for field in fields } ) filesystem = PageIndexFileSystem( workspace=tmp_path / "workspace", metadata_generator=SummaryGenerator(), ) for index in range(1, 3): source = tmp_path / f"source{index}.txt" source.write_text(f"fixture text {index}", encoding="utf-8") filesystem.register_file( storage_uri=source.as_uri(), source_path=f"docs/source{index}.txt", folder_path="/documents", external_id=f"doc_summary_{index}", title=f"Summary document {index}", content=source.read_text(encoding="utf-8"), metadata_policy={ "fields": { "summary": True, "doc_type": False, "domain": False, "topic": False, } }, ) executor = PIFSCommandExecutor(filesystem, json_output=False) output = executor.execute( "stat --field summary /documents/'Summary document 1' /documents/'Summary document 2'" ) assert "/documents/Summary document 1:" in output assert "summary: Summary for Summary document 1" in output assert "full summary token" in output assert "[truncated]" not in output assert "/documents/Summary document 2:" in output assert "summary: Summary for Summary document 2" in output data = json.loads( PIFSCommandExecutor(filesystem, json_output=True).execute( "stat --field summary /documents/'Summary document 1' /documents/'Summary document 2'" ) )["data"] assert data["mode"] == "field_values" assert data["target_count"] == 2 assert data["data"][0]["field"] == "summary" assert data["data"][0]["value"].startswith("Summary for Summary document 1\n") assert data["data"][0]["value"].count("full summary token") == 80 with pytest.raises(PIFSCommandError, match="Unknown metadata field"): executor.execute("stat --field missing_field /documents/'Summary document 1'") def test_stat_field_rejects_more_than_twenty_targets(tmp_path): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem from pageindex.filesystem.commands import PIFSCommandError filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace") targets = [] for index in range(21): source = tmp_path / f"source{index}.txt" source.write_text(f"fixture text {index}", encoding="utf-8") filesystem.register_file( storage_uri=source.as_uri(), source_path=f"docs/source{index}.txt", folder_path="/documents", external_id=f"doc_{index}", title=f"Document {index}", content=source.read_text(encoding="utf-8"), metadata={"department": "ops"}, ) targets.append(f"/documents/'Document {index}'") executor = PIFSCommandExecutor(filesystem, json_output=False) with pytest.raises(PIFSCommandError, match="at most 20"): executor.execute("stat --field department " + " ".join(targets)) def test_register_rejects_pifs_owned_metadata_fields(tmp_path): from pageindex.filesystem import PageIndexFileSystem source = tmp_path / "source.txt" source.write_text("fixture text", encoding="utf-8") filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace") with pytest.raises(ValueError, match="PIFS-owned generated field"): filesystem.register_file( storage_uri=source.as_uri(), source_path="docs/source.txt", folder_path="/documents", external_id="doc_conflict", title="Conflict document", content=source.read_text(encoding="utf-8"), metadata={"summary": "caller summary"}, ) def test_batch_metadata_status_generates_into_unified_metadata(tmp_path): from pageindex.filesystem import PageIndexFileSystem from pageindex.filesystem.metadata_generation import MetadataGenerationResult source = tmp_path / "source.txt" source.write_text("fixture text", encoding="utf-8") class SummaryGenerator: def generate(self, document, *, fields): return MetadataGenerationResult(values={"summary": "Batch generated summary."}) filesystem = PageIndexFileSystem( workspace=tmp_path / "workspace", metadata_generator=SummaryGenerator(), ) file_ref = filesystem.register_file( storage_uri=source.as_uri(), source_path="docs/source.txt", folder_path="/documents", external_id="doc_batch", title="Batch document", content=source.read_text(encoding="utf-8"), metadata={"department": "ops"}, metadata_policy={ "batch": True, "fields": { "summary": True, "doc_type": False, "domain": False, "topic": False, }, }, ) before = filesystem.store.get_file(file_ref) assert "summary" not in before.metadata assert before.metadata_status["fields"]["summary"]["status"] == "pending_submit" result = filesystem.batch_generate() after = filesystem.store.get_file(file_ref) assert result["generated"] == 1 assert after.metadata["summary"] == "Batch generated summary." assert after.metadata["department"] == "ops" assert after.metadata_status["fields"]["summary"]["status"] == "generated" def test_find_maxdepth_zero_type_directory_returns_start_folder(tmp_path): executor = _register_find_fixture(tmp_path) rows = _data(executor.execute("find /documents -maxdepth 0 -type d")) assert [row["path"] for row in rows] == ["/documents"] def test_find_directory_output_renders_root_without_double_slash(tmp_path): executor = _register_find_fixture(tmp_path) executor.json_output = False output = executor.execute("find / -maxdepth 1 -type d") assert output.splitlines()[0] == "/ folders=1 files=0" assert "//" not in output assert "/documents/ folders=1 files=1" in output def test_find_maxdepth_combines_with_where_and_limit(tmp_path): executor = _register_find_fixture(tmp_path) rows = _data( executor.execute( """find /documents -maxdepth 2 -type f --where '{"department":"ops"}' --limit 1""" ) ) assert len(rows) == 1 assert rows[0]["metadata"]["department"] == "ops" assert rows[0]["folder_path"] in {"/documents", "/documents/team"} def test_find_maxdepth_rejects_invalid_values_and_unsupported_options(tmp_path): from pageindex.filesystem.commands import PIFSCommandError executor = _register_find_fixture(tmp_path) with pytest.raises(PIFSCommandError, match="find -maxdepth requires an integer >= 0"): executor.execute("find /documents -maxdepth nope -type f") with pytest.raises(PIFSCommandError, match="find -maxdepth requires an integer >= 0"): executor.execute("find /documents -maxdepth -1 -type f") with pytest.raises(PIFSCommandError, match="Unsupported find option: -exec"): executor.execute("find /documents -maxdepth 1 -type f -exec") def test_find_maxdepth_is_advertised_to_agents(tmp_path): executor = _register_find_fixture(tmp_path) assert "-maxdepth N -type f|d" in executor.describe_available_command_surfaces() assert executor.command_capabilities()["retrieval"]["lexical"]["find_maxdepth"] is True def test_where_path_error_points_to_folder_scope(tmp_path): from pageindex.filesystem.commands import PIFSCommandError executor = _register_find_fixture(tmp_path) with pytest.raises(PIFSCommandError) as exc_info: executor.execute("""find --where '{"path":"/documents"}'""") message = str(exc_info.value) assert "Folder paths are positional PIFS paths" in message assert "find /documents -type f" in message assert "stat --schema" in message