Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
59
tests/test_embed.py
Normal file
59
tests/test_embed.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
"""Tests for iai_mcp.embed -- bge-small-en-v1.5 path (legacy model).
|
||||
|
||||
Plan 02-01 made bge-m3 the default. The 3-model registry still exposes
|
||||
bge-small-en-v1.5 (384d, English-only) for English-only deployments. These
|
||||
tests exercise the Phase-1 model explicitly via `Embedder(model_key=...)` so
|
||||
they remain valid regression gates.
|
||||
|
||||
Multilingual behaviour is covered by tests/test_embed_multilingual.py.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
|
||||
def test_embed_returns_384_dim_vector() -> None:
|
||||
emb = Embedder(model_key="bge-small-en-v1.5")
|
||||
v = emb.embed("hello world")
|
||||
assert len(v) == 384
|
||||
assert all(isinstance(x, float) for x in v)
|
||||
|
||||
|
||||
def test_embed_is_deterministic() -> None:
|
||||
emb = Embedder(model_key="bge-small-en-v1.5")
|
||||
a = emb.embed("exact same text")
|
||||
b = emb.embed("exact same text")
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_embed_batch_preserves_order_and_dim() -> None:
|
||||
emb = Embedder(model_key="bge-small-en-v1.5")
|
||||
texts = ["one", "two", "three"]
|
||||
vecs = emb.embed_batch(texts)
|
||||
assert len(vecs) == 3
|
||||
assert all(len(v) == 384 for v in vecs)
|
||||
# Batch must equal sequential calls (determinism across batching path too).
|
||||
assert vecs[0] == emb.embed("one")
|
||||
|
||||
|
||||
def test_embed_empty_string_still_returns_384d() -> None:
|
||||
emb = Embedder(model_key="bge-small-en-v1.5")
|
||||
v = emb.embed("")
|
||||
assert len(v) == 384
|
||||
|
||||
|
||||
def test_embedder_dim_matches_output() -> None:
|
||||
emb = Embedder(model_key="bge-small-en-v1.5")
|
||||
assert emb.DIM == 384
|
||||
v = emb.embed("anything")
|
||||
assert len(v) == emb.DIM
|
||||
|
||||
|
||||
def test_bge_small_en_still_registered_for_legacy() -> None:
|
||||
"""D-02a keeps the model in the registry for English-only deployments."""
|
||||
from iai_mcp.embed import MODEL_REGISTRY
|
||||
|
||||
assert "bge-small-en-v1.5" in MODEL_REGISTRY
|
||||
assert MODEL_REGISTRY["bge-small-en-v1.5"]["dim"] == 384
|
||||
Loading…
Add table
Add a link
Reference in a new issue