mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-06-18 20:15:18 +02:00
refactor(filesystem): consolidate semantic projection modules
This commit is contained in:
parent
34fa8f7b42
commit
e368562e03
10 changed files with 233 additions and 385 deletions
|
|
@ -7,7 +7,7 @@ import pytest
|
|||
def test_filesystem_lazy_exports_remain_public():
|
||||
import pageindex.filesystem as filesystem
|
||||
from pageindex.filesystem import (
|
||||
HybridProjectionSearchBackend,
|
||||
SemanticProjectionSearchBackend,
|
||||
RebuildableSemanticIndex,
|
||||
SemanticIndexRecord,
|
||||
SemanticSearchResult,
|
||||
|
|
@ -16,7 +16,7 @@ def test_filesystem_lazy_exports_remain_public():
|
|||
)
|
||||
|
||||
for name in (
|
||||
"HybridProjectionSearchBackend",
|
||||
"SemanticProjectionSearchBackend",
|
||||
"RebuildableSemanticIndex",
|
||||
"SemanticIndexRecord",
|
||||
"SemanticSearchResult",
|
||||
|
|
@ -26,7 +26,7 @@ def test_filesystem_lazy_exports_remain_public():
|
|||
assert name in filesystem.__all__
|
||||
assert name in dir(filesystem)
|
||||
|
||||
assert HybridProjectionSearchBackend.__name__ == "HybridProjectionSearchBackend"
|
||||
assert SemanticProjectionSearchBackend.__name__ == "SemanticProjectionSearchBackend"
|
||||
assert RebuildableSemanticIndex.__name__ == "RebuildableSemanticIndex"
|
||||
assert SemanticIndexRecord.__name__ == "SemanticIndexRecord"
|
||||
assert SemanticSearchResult.__name__ == "SemanticSearchResult"
|
||||
|
|
@ -819,7 +819,7 @@ def test_existing_summary_projection_index_uses_current_config_when_dimensions_m
|
|||
|
||||
monkeypatch.setattr(
|
||||
filesystem,
|
||||
"configure_hybrid_projection_retrieval",
|
||||
"configure_semantic_projection_retrieval",
|
||||
fake_configure,
|
||||
)
|
||||
|
||||
|
|
@ -876,7 +876,7 @@ def test_existing_summary_projection_index_dimension_mismatch_rejects_retrieval(
|
|||
|
||||
monkeypatch.setattr(
|
||||
filesystem,
|
||||
"configure_hybrid_projection_retrieval",
|
||||
"configure_semantic_projection_retrieval",
|
||||
fail_configure,
|
||||
)
|
||||
|
||||
|
|
@ -892,9 +892,9 @@ def test_existing_summary_projection_index_dimension_mismatch_rejects_retrieval(
|
|||
|
||||
def test_browse_semantic_files_uses_summary_projection_when_only_summary_available(tmp_path):
|
||||
from pageindex.filesystem import PageIndexFileSystem
|
||||
from pageindex.filesystem.hybrid_projection import HybridProjectionSearchBackend
|
||||
from pageindex.filesystem.semantic_projection import SemanticProjectionSearchBackend
|
||||
from pageindex.filesystem.metadata_generation import MetadataGenerationResult
|
||||
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
|
||||
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
|
||||
|
||||
class FixedEmbedder:
|
||||
def embed(self, texts):
|
||||
|
|
@ -916,7 +916,7 @@ def test_browse_semantic_files_uses_summary_projection_when_only_summary_availab
|
|||
embedding_model="fake",
|
||||
embedding_dimensions=3,
|
||||
)
|
||||
backend = HybridProjectionSearchBackend(
|
||||
backend = SemanticProjectionSearchBackend(
|
||||
index_dir,
|
||||
embedder=FixedEmbedder(),
|
||||
embedding_provider="test",
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ class StaticEmbedder:
|
|||
|
||||
|
||||
def make_summary_indexer(workspace: Path):
|
||||
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
|
||||
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
|
||||
|
||||
return SummaryProjectionIndexer(
|
||||
workspace / "artifacts" / "projection_indexes",
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ def test_cli_workspace_without_projection_index_does_not_require_sqlite_vec(
|
|||
workspace = tmp_path / "workspace"
|
||||
real_import = builtins.__import__
|
||||
|
||||
monkeypatch.delitem(sys.modules, "pageindex.filesystem.hybrid_projection", raising=False)
|
||||
monkeypatch.delitem(sys.modules, "pageindex.filesystem.semantic_projection", raising=False)
|
||||
monkeypatch.delitem(sys.modules, "pageindex.filesystem.semantic_index", raising=False)
|
||||
monkeypatch.delitem(sys.modules, "sqlite_vec", raising=False)
|
||||
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ def test_sqlite_vec_semantic_index_file_ref_filter_not_limited_by_global_rank(tm
|
|||
|
||||
|
||||
def test_summary_projection_indexes_unified_metadata_summary(tmp_path):
|
||||
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
|
||||
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
|
||||
|
||||
indexer = SummaryProjectionIndexer(
|
||||
tmp_path / "projection",
|
||||
|
|
@ -134,7 +134,7 @@ def test_summary_projection_indexes_unified_metadata_summary(tmp_path):
|
|||
|
||||
|
||||
def test_summary_projection_indexer_defaults_to_1024_dimensions(tmp_path):
|
||||
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
|
||||
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
|
||||
|
||||
indexer = SummaryProjectionIndexer(
|
||||
tmp_path / "projection",
|
||||
|
|
@ -164,7 +164,7 @@ def test_summary_projection_indexer_defaults_to_1024_dimensions(tmp_path):
|
|||
|
||||
|
||||
def test_summary_projection_indexer_allows_explicit_256_dimensions(tmp_path):
|
||||
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
|
||||
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
|
||||
|
||||
indexer = SummaryProjectionIndexer(
|
||||
tmp_path / "projection",
|
||||
|
|
@ -188,7 +188,7 @@ def test_summary_projection_indexer_allows_explicit_256_dimensions(tmp_path):
|
|||
|
||||
|
||||
def test_summary_projection_default_rejects_existing_256_index_for_writes(tmp_path):
|
||||
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
|
||||
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
|
||||
|
||||
index_dir = tmp_path / "projection"
|
||||
index = SQLiteVecSemanticIndex(index_dir / "summary_only_vector.sqlite")
|
||||
|
|
@ -216,8 +216,8 @@ def test_summary_projection_default_rejects_existing_256_index_for_writes(tmp_pa
|
|||
def test_summary_projection_from_provider_rejects_dimension_mismatch_before_embedder(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
from pageindex.filesystem import projection_indexing
|
||||
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
|
||||
from pageindex.filesystem import semantic_projection
|
||||
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
|
||||
|
||||
index_dir = tmp_path / "projection"
|
||||
index = SQLiteVecSemanticIndex(index_dir / "summary_only_vector.sqlite")
|
||||
|
|
@ -234,14 +234,14 @@ def test_summary_projection_from_provider_rejects_dimension_mismatch_before_embe
|
|||
def fail_make_embedder(*args, **kwargs):
|
||||
raise AssertionError("embedder should not be constructed before dimension validation")
|
||||
|
||||
monkeypatch.setattr(projection_indexing, "make_embedder", fail_make_embedder)
|
||||
monkeypatch.setattr(semantic_projection, "make_embedder", fail_make_embedder)
|
||||
|
||||
with pytest.raises(RuntimeError, match="configured embedding_dimensions is 1024"):
|
||||
SummaryProjectionIndexer.from_provider(index_dir)
|
||||
|
||||
|
||||
def test_embedding_cache_key_separates_model_dimensions(tmp_path):
|
||||
from pageindex.filesystem.hybrid_projection import (
|
||||
from pageindex.filesystem.semantic_projection import (
|
||||
EmbeddingCache,
|
||||
embedding_cache_model_key,
|
||||
)
|
||||
|
|
@ -285,7 +285,7 @@ def test_embedding_cache_key_separates_model_dimensions(tmp_path):
|
|||
|
||||
|
||||
def test_summary_projection_dimension_mismatch_preserves_existing_index(tmp_path):
|
||||
from pageindex.filesystem.projection_indexing import SummaryProjectionIndexer
|
||||
from pageindex.filesystem.semantic_projection import SummaryProjectionIndexer
|
||||
|
||||
index_dir = tmp_path / "projection"
|
||||
index = SQLiteVecSemanticIndex(index_dir / "summary_only_vector.sqlite")
|
||||
|
|
@ -328,7 +328,7 @@ def test_summary_projection_dimension_mismatch_preserves_existing_index(tmp_path
|
|||
|
||||
|
||||
def test_hash_embedding_provider_is_not_available():
|
||||
from pageindex.filesystem.hybrid_projection import make_embedder
|
||||
from pageindex.filesystem.semantic_projection import make_embedder
|
||||
|
||||
with pytest.raises(ValueError, match="unknown embedding provider: hash"):
|
||||
make_embedder("hash", "unused", dimensions=256, timeout=1)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue