refactor(filesystem): consolidate semantic projection modules

This commit is contained in:
BukeLy 2026-05-31 23:57:12 +08:00
parent 34fa8f7b42
commit e368562e03
10 changed files with 233 additions and 385 deletions

View file

@ -7,7 +7,7 @@ import pytest
def test_filesystem_lazy_exports_remain_public():
import pageindex.filesystem as filesystem
from pageindex.filesystem import (
HybridProjectionSearchBackend,
SemanticProjectionSearchBackend,
RebuildableSemanticIndex,
SemanticIndexRecord,
SemanticSearchResult,
@ -16,7 +16,7 @@ def test_filesystem_lazy_exports_remain_public():
)
for name in (
"HybridProjectionSearchBackend",
"SemanticProjectionSearchBackend",
"RebuildableSemanticIndex",
"SemanticIndexRecord",
"SemanticSearchResult",
@ -26,7 +26,7 @@ def test_filesystem_lazy_exports_remain_public():
assert name in filesystem.__all__
assert name in dir(filesystem)
assert HybridProjectionSearchBackend.__name__ == "HybridProjectionSearchBackend"
assert SemanticProjectionSearchBackend.__name__ == "SemanticProjectionSearchBackend"
assert RebuildableSemanticIndex.__name__ == "RebuildableSemanticIndex"
assert SemanticIndexRecord.__name__ == "SemanticIndexRecord"
assert SemanticSearchResult.__name__ == "SemanticSearchResult"
@ -819,7 +819,7 @@ def test_existing_summary_projection_index_uses_current_config_when_dimensions_m
monkeypatch.setattr(
filesystem,
"configure_hybrid_projection_retrieval",
"configure_semantic_projection_retrieval",
fake_configure,
)
@ -876,7 +876,7 @@ def test_existing_summary_projection_index_dimension_mismatch_rejects_retrieval(
monkeypatch.setattr(
filesystem,
"configure_hybrid_projection_retrieval",
"configure_semantic_projection_retrieval",
fail_configure,
)
@ -892,9 +892,9 @@ def test_existing_summary_projection_index_dimension_mismatch_rejects_retrieval(
def test_browse_semantic_files_uses_summary_projection_when_only_summary_available(tmp_path):
from pageindex.filesystem import PageIndexFileSystem
from pageindex.filesystem.hybrid_projection import HybridProjectionSearchBackend
from pageindex.filesystem.semantic_projection import SemanticProjectionSearchBackend
from pageindex.filesystem.metadata_generation import MetadataGenerationResult
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
class FixedEmbedder:
def embed(self, texts):
@ -916,7 +916,7 @@ def test_browse_semantic_files_uses_summary_projection_when_only_summary_availab
embedding_model="fake",
embedding_dimensions=3,
)
backend = HybridProjectionSearchBackend(
backend = SemanticProjectionSearchBackend(
index_dir,
embedder=FixedEmbedder(),
embedding_provider="test",

View file

@ -25,7 +25,7 @@ class StaticEmbedder:
def make_summary_indexer(workspace: Path):
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
return SummaryProjectionIndexer(
workspace / "artifacts" / "projection_indexes",

View file

@ -37,7 +37,7 @@ def test_cli_workspace_without_projection_index_does_not_require_sqlite_vec(
workspace = tmp_path / "workspace"
real_import = builtins.__import__
monkeypatch.delitem(sys.modules, "pageindex.filesystem.hybrid_projection", raising=False)
monkeypatch.delitem(sys.modules, "pageindex.filesystem.semantic_projection", raising=False)
monkeypatch.delitem(sys.modules, "pageindex.filesystem.semantic_index", raising=False)
monkeypatch.delitem(sys.modules, "sqlite_vec", raising=False)

View file

@ -102,7 +102,7 @@ def test_sqlite_vec_semantic_index_file_ref_filter_not_limited_by_global_rank(tm
def test_summary_projection_indexes_unified_metadata_summary(tmp_path):
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
indexer = SummaryProjectionIndexer(
tmp_path / "projection",
@ -134,7 +134,7 @@ def test_summary_projection_indexes_unified_metadata_summary(tmp_path):
def test_summary_projection_indexer_defaults_to_1024_dimensions(tmp_path):
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
indexer = SummaryProjectionIndexer(
tmp_path / "projection",
@ -164,7 +164,7 @@ def test_summary_projection_indexer_defaults_to_1024_dimensions(tmp_path):
def test_summary_projection_indexer_allows_explicit_256_dimensions(tmp_path):
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
indexer = SummaryProjectionIndexer(
tmp_path / "projection",
@ -188,7 +188,7 @@ def test_summary_projection_indexer_allows_explicit_256_dimensions(tmp_path):
def test_summary_projection_default_rejects_existing_256_index_for_writes(tmp_path):
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
index_dir = tmp_path / "projection"
index = SQLiteVecSemanticIndex(index_dir / "summary_only_vector.sqlite")
@ -216,8 +216,8 @@ def test_summary_projection_default_rejects_existing_256_index_for_writes(tmp_pa
def test_summary_projection_from_provider_rejects_dimension_mismatch_before_embedder(
tmp_path, monkeypatch
):
from pageindex.filesystem import projection_indexing
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
from pageindex.filesystem import semantic_projection
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
index_dir = tmp_path / "projection"
index = SQLiteVecSemanticIndex(index_dir / "summary_only_vector.sqlite")
@ -234,14 +234,14 @@ def test_summary_projection_from_provider_rejects_dimension_mismatch_before_embe
def fail_make_embedder(*args, **kwargs):
raise AssertionError("embedder should not be constructed before dimension validation")
monkeypatch.setattr(projection_indexing, "make_embedder", fail_make_embedder)
monkeypatch.setattr(semantic_projection, "make_embedder", fail_make_embedder)
with pytest.raises(RuntimeError, match="configured embedding_dimensions is 1024"):
SummaryProjectionIndexer.from_provider(index_dir)
def test_embedding_cache_key_separates_model_dimensions(tmp_path):
from pageindex.filesystem.hybrid_projection import (
from pageindex.filesystem.semantic_projection import (
EmbeddingCache,
embedding_cache_model_key,
)
@ -285,7 +285,7 @@ def test_embedding_cache_key_separates_model_dimensions(tmp_path):
def test_summary_projection_dimension_mismatch_preserves_existing_index(tmp_path):
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
index_dir = tmp_path / "projection"
index = SQLiteVecSemanticIndex(index_dir / "summary_only_vector.sqlite")
@ -328,7 +328,7 @@ def test_summary_projection_dimension_mismatch_preserves_existing_index(tmp_path
def test_hash_embedding_provider_is_not_available():
from pageindex.filesystem.hybrid_projection import make_embedder
from pageindex.filesystem.semantic_projection import make_embedder
with pytest.raises(ValueError, match="unknown embedding provider: hash"):
make_embedder("hash", "unused", dimensions=256, timeout=1)