gomcp/internal/domain/vectorstore/polarquant_test.go

405 lines
10 KiB
Go
Raw Normal View History

package vectorstore
import (
"fmt"
"math"
"math/rand"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// --- PolarQuant Core Tests ---
func TestPolarQuant_EncodeDecode_Deterministic(t *testing.T) {
codec := NewPolarQuantCodec(128, 4, 42)
vec := pqRandomVector(128, 1)
cv1 := codec.Encode(vec)
cv2 := codec.Encode(vec)
assert.Equal(t, cv1.Data, cv2.Data, "same input → same compressed data")
assert.Equal(t, cv1.Radius, cv2.Radius, "same input → same radius")
}
func TestPolarQuant_RoundTrip_4bit(t *testing.T) {
codec := NewPolarQuantCodec(128, 4, 42)
vec := pqRandomVector(128, 1)
cv := codec.Encode(vec)
reconstructed := codec.Decode(cv)
// 4-bit quantization on 128-dim: ~91% avg cosine (empirically measured).
// Quantization noise is higher at d=128 vs d=3 due to more dimensions.
l2err := l2Error(vec, reconstructed)
assert.Less(t, l2err, 0.50, "4-bit roundtrip L2 error should be < 50%%, got %.4f", l2err)
cosSim := CosineSimilarity(vec, reconstructed)
assert.Greater(t, cosSim, 0.90, "4-bit roundtrip cosine similarity should be > 0.90, got %.4f", cosSim)
}
func TestPolarQuant_RoundTrip_8bit(t *testing.T) {
codec := NewPolarQuantCodec(128, 8, 42)
vec := pqRandomVector(128, 1)
cv := codec.Encode(vec)
reconstructed := codec.Decode(cv)
// 8-bit quantization: expect << 1% reconstruction error.
l2err := l2Error(vec, reconstructed)
assert.Less(t, l2err, 0.05, "8-bit roundtrip L2 error should be < 5%%, got %.4f", l2err)
cosSim := CosineSimilarity(vec, reconstructed)
assert.Greater(t, cosSim, 0.999, "8-bit roundtrip cosine similarity should be > 0.999, got %.4f", cosSim)
}
func TestPolarQuant_RoundTrip_2bit(t *testing.T) {
codec := NewPolarQuantCodec(128, 2, 42)
vec := pqRandomVector(128, 1)
cv := codec.Encode(vec)
reconstructed := codec.Decode(cv)
// 2-bit: coarse but should preserve general direction.
cosSim := CosineSimilarity(vec, reconstructed)
assert.Greater(t, cosSim, 0.70, "2-bit roundtrip cosine should be > 0.70, got %.4f", cosSim)
}
func TestPolarQuant_PreservesRadius(t *testing.T) {
codec := NewPolarQuantCodec(128, 4, 42)
vec := pqRandomVector(128, 1)
// Scale vector to non-unit length.
scaled := make([]float64, len(vec))
for i, v := range vec {
scaled[i] = v * 3.7
}
cv := codec.Encode(scaled)
assert.InDelta(t, 3.7, float64(cv.Radius), 0.01, "radius should be ≈ 3.7")
reconstructed := codec.Decode(cv)
// Check that the scale is preserved.
var recNorm float64
for _, v := range reconstructed {
recNorm += v * v
}
recNorm = math.Sqrt(recNorm)
assert.InDelta(t, 3.7, recNorm, 0.5, "reconstructed norm should be ≈ 3.7")
}
func TestPolarQuant_PreservesOrdering(t *testing.T) {
codec := NewPolarQuantCodec(128, 4, 42)
query := pqRandomVector(128, 1)
close := pqPerturbVector(query, 0.1, 2)
far := pqPerturbVector(query, 0.9, 3)
cosClose := CosineSimilarity(query, close)
cosFar := CosineSimilarity(query, far)
require.Greater(t, cosClose, cosFar, "sanity: close > far in original space")
// Encode all.
cvQ := codec.Encode(query)
cvClose := codec.Encode(close)
cvFar := codec.Encode(far)
// Compressed similarity should preserve ordering.
compClose := codec.CompressedSimilarity(cvQ, cvClose)
compFar := codec.CompressedSimilarity(cvQ, cvFar)
assert.Greater(t, compClose, compFar,
"PolarQuant must preserve ordering: comp(query,close)=%.4f > comp(query,far)=%.4f",
compClose, compFar)
}
func TestPolarQuant_CompressedSimilarity_AccuracyVsExact(t *testing.T) {
codec := NewPolarQuantCodec(128, 4, 42)
v1 := pqRandomVector(128, 10)
v2 := pqRandomVector(128, 20)
exactSim := CosineSimilarity(v1, v2)
cv1 := codec.Encode(v1)
cv2 := codec.Encode(v2)
compSim := codec.CompressedSimilarity(cv1, cv2)
// 4-bit compressed similarity should be within ±0.1 of exact.
assert.InDelta(t, exactSim, compSim, 0.1,
"compressed similarity (%.4f) should be close to exact (%.4f)", compSim, exactSim)
}
func TestPolarQuant_MemoryReduction_4bit(t *testing.T) {
codec := NewPolarQuantCodec(128, 4, 42)
compBytes := codec.CompressedBytes() + 4 // +4 for float32 radius
origBytes := 128 * 8 // float64
ratio := codec.CompressionRatio()
assert.Equal(t, 64, codec.CompressedBytes(), "128×4bit = 512 bits = 64 bytes")
assert.Equal(t, 68, compBytes, "total = 64 data + 4 radius = 68 bytes")
assert.InDelta(t, float64(origBytes)/float64(compBytes), ratio, 0.1)
assert.Greater(t, ratio, 14.0, "should be >14x compression, got %.1fx", ratio)
}
func TestPolarQuant_MemoryReduction_8bit(t *testing.T) {
codec := NewPolarQuantCodec(128, 8, 42)
compBytes := codec.CompressedBytes() + 4
ratio := codec.CompressionRatio()
assert.Equal(t, 128, codec.CompressedBytes(), "128×8bit = 1024 bits = 128 bytes")
assert.Equal(t, 132, compBytes)
assert.Greater(t, ratio, 7.0, "should be >7x compression, got %.1fx", ratio)
}
func TestPolarQuant_ZeroVector(t *testing.T) {
codec := NewPolarQuantCodec(128, 4, 42)
zero := make([]float64, 128)
cv := codec.Encode(zero)
assert.InDelta(t, 0.0, float64(cv.Radius), 0.001)
reconstructed := codec.Decode(cv)
for i, v := range reconstructed {
assert.InDelta(t, 0.0, v, 0.001, "zero vector dimension %d should stay zero", i)
}
}
func TestPolarQuant_SmallDim(t *testing.T) {
// Ensure PolarQuant works for small dimensions too.
codec := NewPolarQuantCodec(3, 4, 42)
vec := []float64{0.6, 0.8, 0.0}
cv := codec.Encode(vec)
reconstructed := codec.Decode(cv)
cosSim := CosineSimilarity(vec, reconstructed)
assert.Greater(t, cosSim, 0.90, "3-dim 4-bit cosine should be > 0.90, got %.4f", cosSim)
}
func TestPolarQuant_DifferentSeeds(t *testing.T) {
vec := pqRandomVector(128, 1)
codec1 := NewPolarQuantCodec(128, 4, 42)
codec2 := NewPolarQuantCodec(128, 4, 99)
cv1 := codec1.Encode(vec)
cv2 := codec2.Encode(vec)
assert.NotEqual(t, cv1.Data, cv2.Data, "different seeds → different compressed data")
}
func TestPolarQuant_BitWidthClamping(t *testing.T) {
// bitsPerDim < 1 → clamp to 1.
codec1 := NewPolarQuantCodec(128, 0, 42)
assert.Equal(t, 1, codec1.BitsPerDim())
// bitsPerDim > 8 → clamp to 8.
codec2 := NewPolarQuantCodec(128, 16, 42)
assert.Equal(t, 8, codec2.BitsPerDim())
}
func TestPolarQuant_OrthogonalRotation_PreservesNorm(t *testing.T) {
// Orthogonal matrix should preserve vector norms.
codec := NewPolarQuantCodec(64, 8, 42)
vec := pqRandomVector(64, 5)
// Manually rotate.
rotated := make([]float64, 64)
for i := 0; i < 64; i++ {
var dot float64
for j := 0; j < 64; j++ {
dot += codec.rotation[i][j] * vec[j]
}
rotated[i] = dot
}
origNorm := vecNorm(vec, 64)
rotNorm := vecNorm(rotated, 64)
assert.InDelta(t, origNorm, rotNorm, 0.001, "rotation should preserve L2 norm")
}
func TestPolarQuant_OrthogonalRotation_Deterministic(t *testing.T) {
c1 := NewPolarQuantCodec(32, 4, 42)
c2 := NewPolarQuantCodec(32, 4, 42)
for i := 0; i < 32; i++ {
for j := 0; j < 32; j++ {
assert.Equal(t, c1.rotation[i][j], c2.rotation[i][j],
"same seed → same rotation at [%d][%d]", i, j)
}
}
}
// --- Batch Quality Tests ---
func TestPolarQuant_BatchQuality_4bit(t *testing.T) {
codec := NewPolarQuantCodec(128, 4, 42)
n := 100
var totalCosSim float64
for i := 0; i < n; i++ {
vec := pqRandomVector(128, int64(i))
cv := codec.Encode(vec)
rec := codec.Decode(cv)
totalCosSim += CosineSimilarity(vec, rec)
}
avgCos := totalCosSim / float64(n)
assert.Greater(t, avgCos, 0.90,
"avg cosine similarity over %d vectors should be > 0.90, got %.4f", n, avgCos)
}
func TestPolarQuant_BatchOrderingPreservation(t *testing.T) {
codec := NewPolarQuantCodec(128, 4, 42)
n := 50
// For each query, verify that the top-1 nearest neighbor is preserved.
preserved := 0
for i := 0; i < n; i++ {
query := pqRandomVector(128, int64(i*100))
vectors := make([][]float64, 10)
for j := 0; j < 10; j++ {
vectors[j] = pqRandomVector(128, int64(i*100+j+1))
}
// Find exact top-1.
bestExact := -1
bestExactSim := -2.0
for j, v := range vectors {
sim := CosineSimilarity(query, v)
if sim > bestExactSim {
bestExactSim = sim
bestExact = j
}
}
// Find compressed top-1.
cvQ := codec.Encode(query)
bestComp := -1
bestCompSim := -2.0
for j, v := range vectors {
cv := codec.Encode(v)
sim := codec.CompressedSimilarity(cvQ, cv)
if sim > bestCompSim {
bestCompSim = sim
bestComp = j
}
}
if bestExact == bestComp {
preserved++
}
}
rate := float64(preserved) / float64(n)
assert.Greater(t, rate, 0.55,
"top-1 preservation rate should be > 55%%, got %.0f%% (%d/%d)", rate*100, preserved, n)
}
// --- Benchmarks ---
func BenchmarkPolarQuant_Encode_4bit(b *testing.B) {
codec := NewPolarQuantCodec(128, 4, 42)
vec := pqRandomVector(128, 1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
codec.Encode(vec)
}
}
func BenchmarkPolarQuant_Decode_4bit(b *testing.B) {
codec := NewPolarQuantCodec(128, 4, 42)
vec := pqRandomVector(128, 1)
cv := codec.Encode(vec)
b.ResetTimer()
for i := 0; i < b.N; i++ {
codec.Decode(cv)
}
}
func BenchmarkPolarQuant_CompressedSimilarity_4bit(b *testing.B) {
codec := NewPolarQuantCodec(128, 4, 42)
v1 := pqRandomVector(128, 1)
v2 := pqRandomVector(128, 2)
cv1 := codec.Encode(v1)
cv2 := codec.Encode(v2)
b.ResetTimer()
for i := 0; i < b.N; i++ {
codec.CompressedSimilarity(cv1, cv2)
}
}
func BenchmarkPolarQuant_Encode_8bit(b *testing.B) {
codec := NewPolarQuantCodec(128, 8, 42)
vec := pqRandomVector(128, 1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
codec.Encode(vec)
}
}
// --- Helpers (scoped to polarquant tests to avoid collision with qjl_test) ---
func pqRandomVector(dim int, seed int64) []float64 {
rng := rand.New(rand.NewSource(seed))
vec := make([]float64, dim)
for i := range vec {
vec[i] = rng.NormFloat64()
}
var norm float64
for _, v := range vec {
norm += v * v
}
norm = math.Sqrt(norm)
if norm > 0 {
for i := range vec {
vec[i] /= norm
}
}
return vec
}
func pqPerturbVector(v []float64, noise float64, seed int64) []float64 {
rng := rand.New(rand.NewSource(seed))
perturbed := make([]float64, len(v))
for i := range v {
perturbed[i] = v[i] + noise*rng.NormFloat64()
}
var norm float64
for _, val := range perturbed {
norm += val * val
}
norm = math.Sqrt(norm)
if norm > 0 {
for i := range perturbed {
perturbed[i] /= norm
}
}
return perturbed
}
func l2Error(a, b []float64) float64 {
if len(a) != len(b) {
return math.Inf(1)
}
var sumSq float64
for i := range a {
d := a[i] - b[i]
sumSq += d * d
}
return math.Sqrt(sumSq)
}
func init() {
// Silence unused import warnings by referencing fmt.
_ = fmt.Sprint
}