refactor(filesystem): make pifs providers configurable

This commit is contained in:
BukeLy 2026-05-26 17:21:44 +08:00
parent 7c021a7dd0
commit de1992def1
7 changed files with 154 additions and 61 deletions

View file

@ -0,0 +1,30 @@
import sys
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[1]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
def test_metadata_generator_uses_provider_parameter():
from pageindex.filesystem.metadata_generation import (
MetadataGenerationError,
MetadataGenerationInput,
MetadataGenerator,
)
generator = MetadataGenerator(provider="unsupported", model="unused")
request = MetadataGenerationInput(
file_ref="file_a",
external_id="doc_a",
title="A",
source_path="docs/a.txt",
content_type="text/plain",
source_type=None,
text="hello",
)
with pytest.raises(MetadataGenerationError, match="unsupported metadata provider: unsupported"):
generator.generate(request, fields=["summary"])

View file

@ -1,6 +1,8 @@
import sys
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[1]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
@ -87,3 +89,10 @@ def test_summary_projection_indexes_unified_metadata_summary(tmp_path):
assert hits[0].external_id == "doc_a"
assert hits[0].metadata["summary"] == "Unified metadata summary."
assert hits[0].metadata["department"] == "ops"
def test_hash_embedding_provider_is_not_available():
from pageindex.filesystem.hybrid_projection import make_embedder
with pytest.raises(ValueError, match="unknown embedding provider: hash"):
make_embedder("hash", "unused", dimensions=256, timeout=1)