fix(filesystem): restore summary vector search in cli

This commit is contained in:
BukeLy 2026-05-26 16:40:14 +08:00
parent 7e70b580f0
commit fc0be1aeee
6 changed files with 147 additions and 3 deletions

View file

@ -58,3 +58,60 @@ def test_semantic_search_scope_filters_explicit_source_type_facets():
{"folder_path": "/documents"}
) == {}
def test_existing_summary_projection_index_configures_retrieval_backend(tmp_path, monkeypatch):
from pageindex.filesystem import PageIndexFileSystem
from pageindex.filesystem.semantic_index import SemanticIndexRecord, SQLiteVecSemanticIndex
workspace = tmp_path / "workspace"
index_dir = workspace / "artifacts" / "projection_indexes"
summary_index = SQLiteVecSemanticIndex(index_dir / "summary_only_vector.sqlite")
summary_index.reset(
dimension=3,
metadata={
"channel": "summary",
"embedding_provider": "openai",
"embedding_model": "test-embedding",
"embedding_dimensions": 3,
},
)
summary_index.upsert_many(
[
SemanticIndexRecord(
file_ref="file_a",
external_id="doc_a",
source_type="documents",
source_path="documents/a.pdf",
title="A",
text="summary",
vector=[1.0, 0.0, 0.0],
)
]
)
filesystem = PageIndexFileSystem(workspace)
calls = []
def fake_configure(index_dir_arg, **kwargs):
calls.append((index_dir_arg, kwargs))
filesystem.semantic_retrieval_backend = SummaryBackend("doc_a")
return filesystem.semantic_retrieval_backend
monkeypatch.setattr(
filesystem,
"configure_hybrid_projection_retrieval",
fake_configure,
)
assert filesystem.configure_existing_projection_retrieval() is True
assert calls == [
(
filesystem.summary_projection_index_dir,
{
"embedding_provider": "openai",
"embedding_model": "test-embedding",
"embedding_dimensions": 3,
"embedding_timeout": 60,
},
)
]
assert filesystem.semantic_retrieval_channels() == ("summary",)

View file

@ -204,6 +204,11 @@ class PIFSAgentStreamTest(unittest.TestCase):
self.assertIn("do not infer metadata presence or absence", AGENT_TOOL_POLICY)
self.assertIn("questions about metadata fields", BASH_TOOL_DESCRIPTION)
def test_prompt_routes_summary_search_to_search_summary(self):
self.assertIn("search-summary when the user asks for", BASH_TOOL_DESCRIPTION)
self.assertIn("use search-summary <query> <folder>", AGENT_TOOL_POLICY)
self.assertIn("do not translate that request into find --where", AGENT_TOOL_POLICY)
def test_system_prompt_sets_workspace_identity_and_scope(self):
self.assertIn("PageIndex FileSystem Demo Agent", AGENT_SYSTEM_PROMPT)
self.assertIn("VectifyAI Team", AGENT_SYSTEM_PROMPT)

View file

@ -5,6 +5,24 @@ from pathlib import Path
class FakeFileSystem:
def __init__(self, workspace):
self.workspace = Path(workspace)
self.projection_retrieval_configured = False
def configure_existing_projection_retrieval(self):
self.projection_retrieval_configured = True
return True
def test_cli_workspace_configures_existing_projection_retrieval(monkeypatch, tmp_path):
from pageindex.filesystem import cli
workspace = tmp_path / "workspace"
monkeypatch.setattr(cli, "PageIndexFileSystem", FakeFileSystem)
filesystem = cli._filesystem_from_workspace(str(workspace))
assert filesystem.workspace == workspace
assert filesystem.projection_retrieval_configured is True
def test_cli_passthrough_invokes_pifs_command_executor(monkeypatch, capsys, tmp_path):