PageIndex/tests/test_pifs_cli.py

430 lines
13 KiB
Python
Raw Permalink Normal View History

import builtins
import os
import sys
from pathlib import Path
import pytest
class FakeFileSystem:
def __init__(self, workspace):
self.workspace = Path(workspace)
self.projection_retrieval_configured = False
def configure_existing_projection_retrieval(self):
self.projection_retrieval_configured = True
return True
def test_cli_workspace_configures_existing_projection_retrieval(monkeypatch, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
filesystem = cli._filesystem_from_workspace(str(workspace))
assert filesystem.workspace == workspace
assert filesystem.projection_retrieval_configured is True
def test_cli_workspace_without_projection_index_does_not_require_sqlite_vec(
monkeypatch, tmp_path
):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
real_import = builtins.__import__
monkeypatch.delitem(sys.modules, "pageindex.filesystem.semantic_projection", raising=False)
monkeypatch.delitem(sys.modules, "pageindex.filesystem.semantic_index", raising=False)
monkeypatch.delitem(sys.modules, "sqlite_vec", raising=False)
def block_sqlite_vec(name, globals=None, locals=None, fromlist=(), level=0):
if name.split(".", 1)[0] == "sqlite_vec":
raise ModuleNotFoundError("No module named 'sqlite_vec'", name="sqlite_vec")
return real_import(name, globals, locals, fromlist, level)
monkeypatch.setattr(builtins, "__import__", block_sqlite_vec)
filesystem = cli._filesystem_from_workspace(str(workspace))
assert filesystem.workspace == workspace
assert filesystem.semantic_retrieval_channels() == ()
def test_cli_workspace_surfaces_projection_dimension_mismatch(tmp_path):
from pageindex.filesystem import cli
from pageindex.filesystem.semantic_index import SemanticIndexRecord, SQLiteVecSemanticIndex
workspace = tmp_path / "workspace"
index_dir = workspace / "artifacts" / "projection_indexes"
summary_index = SQLiteVecSemanticIndex(index_dir / "summary_only_vector.sqlite")
summary_index.reset(
dimension=3,
metadata={
"channel": "summary",
"embedding_provider": "test",
"embedding_model": "fake",
"embedding_dimensions": 3,
},
)
summary_index.upsert_many(
[
SemanticIndexRecord(
file_ref="file_a",
external_id="doc_a",
source_type="documents",
title="A",
text="summary",
vector=[1.0, 0.0, 0.0],
)
]
)
with pytest.raises(
RuntimeError,
match=(
"summary projection index dimension mismatch: .*"
"dimension 3.*summary_projection_embedding_dimensions is 1024.*Rebuild"
),
):
cli._filesystem_from_workspace(str(workspace))
def test_cli_passthrough_invokes_pifs_command_executor(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
executor_instances = []
class FakeExecutor:
def __init__(self, filesystem, *, json_output=False):
self.filesystem = filesystem
self.json_output = json_output
self.commands = []
executor_instances.append(self)
def execute(self, command):
self.commands.append(command)
return f"executed:{command}"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
monkeypatch.setattr(cli, "PIFSCommandExecutor", FakeExecutor)
status = cli.main(["--workspace", str(workspace), "ls", "/documents", "--json"])
assert status == 0
assert capsys.readouterr().out == "executed:ls /documents\n"
assert len(executor_instances) == 1
assert executor_instances[0].filesystem.workspace == workspace
assert executor_instances[0].json_output is True
assert executor_instances[0].commands == ["ls /documents"]
def test_cli_set_workspace_persists_default(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli
config_path = tmp_path / "pifs.json"
workspace = tmp_path / "workspace"
monkeypatch.setenv("PIFS_CONFIG_FILE", str(config_path))
status = cli.main(["set", "workspace", str(workspace)])
assert status == 0
output = capsys.readouterr().out
assert f"workspace: {workspace}" in output
assert f"config: {config_path}" in output
assert config_path.read_text(encoding="utf-8") == (
'{\n "workspace": "' + str(workspace) + '"\n}\n'
)
def test_cli_passthrough_uses_configured_workspace(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli
config_path = tmp_path / "pifs.json"
workspace = tmp_path / "workspace"
executor_instances = []
monkeypatch.setenv("PIFS_CONFIG_FILE", str(config_path))
monkeypatch.delenv("PIFS_WORKSPACE", raising=False)
class FakeExecutor:
def __init__(self, filesystem, *, json_output=False):
self.filesystem = filesystem
self.json_output = json_output
self.commands = []
executor_instances.append(self)
def execute(self, command):
self.commands.append(command)
return f"executed:{command}"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
monkeypatch.setattr(cli, "PIFSCommandExecutor", FakeExecutor)
assert cli.main(["set", "workspace", str(workspace)]) == 0
capsys.readouterr()
status = cli.main(["ls", "/documents"])
assert status == 0
assert capsys.readouterr().out == "executed:ls /documents\n"
assert executor_instances[0].filesystem.workspace == workspace
def test_cli_ask_invokes_agent_with_question(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
agent_calls = []
def fake_run_pifs_agent(filesystem, question, **kwargs):
agent_calls.append((filesystem, question, kwargs))
return "agent answer"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
monkeypatch.setattr(cli, "run_pifs_agent", fake_run_pifs_agent)
status = cli.main(
[
"ask",
"--workspace",
str(workspace),
"--model",
"test-model",
"--stream-mode",
"off",
"--max-turns",
"7",
"--max-seconds",
"3.5",
"--reasoning-effort",
"low",
"--reasoning-summary",
"concise",
"What",
"is",
"inside?",
]
)
assert status == 0
assert capsys.readouterr().out == "agent answer\n"
filesystem, question, kwargs = agent_calls[0]
assert filesystem.workspace == workspace
assert question == "What is inside?"
assert kwargs == {
"model": "test-model",
"stream_mode": "off",
"max_turns": 7,
"max_seconds": 3.5,
"reasoning_effort": "low",
"reasoning_summary": "concise",
}
def test_cli_ask_defaults_to_global_agent_model(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
agent_calls = []
monkeypatch.delenv("PIFS_AGENT_MODEL", raising=False)
monkeypatch.delenv("PIFS_MODEL", raising=False)
def fake_run_pifs_agent(filesystem, question, **kwargs):
agent_calls.append(kwargs)
return "agent answer"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
monkeypatch.setattr(cli, "run_pifs_agent", fake_run_pifs_agent)
status = cli.main(["ask", "--workspace", str(workspace), "What?"])
assert status == 0
assert capsys.readouterr().out == "agent answer\n"
assert agent_calls[0]["model"] == "gpt-5.4"
def test_cli_ask_loads_env_file_before_running_agent(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
env_file = tmp_path / ".env"
env_file.write_text("OPENAI_API_KEY=from-dotenv\n", encoding="utf-8")
agent_keys = []
def fake_run_pifs_agent(filesystem, question, **kwargs):
agent_keys.append(os.environ.get("OPENAI_API_KEY"))
return "agent answer"
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
monkeypatch.setattr(cli, "run_pifs_agent", fake_run_pifs_agent)
status = cli.main(
[
"ask",
"--workspace",
str(workspace),
"--env-file",
str(env_file),
"What",
"is",
"inside?",
]
)
assert status == 0
assert capsys.readouterr().out == "agent answer\n"
assert agent_keys == ["from-dotenv"]
def test_cli_chat_runs_one_question_and_exits(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
inputs = iter(["", "Summarize the workspace", "exit"])
session_instances = []
session_questions = []
class FakeSession:
def __init__(self, filesystem, **kwargs):
self.filesystem = filesystem
self.kwargs = kwargs
session_instances.append(self)
def run(self, question):
session_questions.append((self, question))
return f"answer:{question}"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
monkeypatch.setattr(cli, "PIFSAgentSession", FakeSession)
monkeypatch.setattr("builtins.input", lambda prompt="": next(inputs))
status = cli.main(["chat", "--workspace", str(workspace), "--model", "test-model"])
assert status == 0
assert capsys.readouterr().out == ""
assert len(session_instances) == 1
assert session_instances[0].filesystem.workspace == workspace
assert session_questions == [(session_instances[0], "Summarize the workspace")]
assert session_instances[0].kwargs["model"] == "test-model"
assert session_instances[0].kwargs["stream_mode"] == "all"
def test_cli_chat_sanitizes_control_input(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
inputs = iter(["\x12", "he\x7fllo\x1b[A", "exit"])
agent_calls = []
class FakeSession:
def __init__(self, filesystem, **kwargs):
pass
def run(self, question):
agent_calls.append(question)
return f"answer:{question}"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
monkeypatch.setattr(cli, "PIFSAgentSession", FakeSession)
monkeypatch.setattr("builtins.input", lambda prompt="": next(inputs))
status = cli.main(["chat", "--workspace", str(workspace), "--stream-mode", "off"])
assert status == 0
assert agent_calls == ["hllo"]
assert capsys.readouterr().out == "answer:hllo\n"
def test_cli_ask_does_not_reprint_streamed_agent_output(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
def fake_run_pifs_agent(filesystem, question, **kwargs):
print("streamed answer")
return "returned answer"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
monkeypatch.setattr(cli, "run_pifs_agent", fake_run_pifs_agent)
status = cli.main(
[
"ask",
"--workspace",
str(workspace),
"--stream-mode",
"all",
"What",
"is",
"inside?",
]
)
assert status == 0
assert capsys.readouterr().out == "streamed answer\n"
def test_cli_chat_stream_mode_can_be_overridden(monkeypatch, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
inputs = iter(["Summarize the workspace", "exit"])
session_kwargs = []
class FakeSession:
def __init__(self, filesystem, **kwargs):
session_kwargs.append(kwargs)
def run(self, question):
return f"answer:{question}"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
monkeypatch.setattr(cli, "PIFSAgentSession", FakeSession)
monkeypatch.setattr("builtins.input", lambda prompt="": next(inputs))
status = cli.main(
[
"chat",
"--workspace",
str(workspace),
"--stream-mode",
"tools",
]
)
assert status == 0
assert session_kwargs[0]["stream_mode"] == "tools"
def test_cli_chat_reuses_one_agent_session_for_multiple_questions(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
inputs = iter(["first", "second", "exit"])
sessions = []
class FakeSession:
def __init__(self, filesystem, **kwargs):
self.questions = []
sessions.append(self)
def run(self, question):
self.questions.append(question)
return f"answer:{question}"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
monkeypatch.setattr(cli, "PIFSAgentSession", FakeSession)
monkeypatch.setattr("builtins.input", lambda prompt="": next(inputs))
status = cli.main(["chat", "--workspace", str(workspace), "--stream-mode", "off"])
assert status == 0
assert len(sessions) == 1
assert sessions[0].questions == ["first", "second"]
assert capsys.readouterr().out == "answer:first\nanswer:second\n"