gomcp/internal/domain/vectorstore/polarquant_test.go

package vectorstore

import (
	"fmt"
	"math"
	"math/rand"
	"testing"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

// --- PolarQuant Core Tests ---

func TestPolarQuant_EncodeDecode_Deterministic(t *testing.T) {
	codec := NewPolarQuantCodec(128, 4, 42)
	vec := pqRandomVector(128, 1)

	cv1 := codec.Encode(vec)
	cv2 := codec.Encode(vec)

	assert.Equal(t, cv1.Data, cv2.Data, "same input → same compressed data")
	assert.Equal(t, cv1.Radius, cv2.Radius, "same input → same radius")
}

func TestPolarQuant_RoundTrip_4bit(t *testing.T) {
	codec := NewPolarQuantCodec(128, 4, 42)
	vec := pqRandomVector(128, 1)

	cv := codec.Encode(vec)
	reconstructed := codec.Decode(cv)

	// 4-bit quantization on 128-dim: ~91% avg cosine (empirically measured).
	// Quantization noise is higher at d=128 vs d=3 due to more dimensions.
	l2err := l2Error(vec, reconstructed)
	assert.Less(t, l2err, 0.50, "4-bit roundtrip L2 error should be < 50%%, got %.4f", l2err)

	cosSim := CosineSimilarity(vec, reconstructed)
	assert.Greater(t, cosSim, 0.90, "4-bit roundtrip cosine similarity should be > 0.90, got %.4f", cosSim)
}

func TestPolarQuant_RoundTrip_8bit(t *testing.T) {
	codec := NewPolarQuantCodec(128, 8, 42)
	vec := pqRandomVector(128, 1)

	cv := codec.Encode(vec)
	reconstructed := codec.Decode(cv)

	// 8-bit quantization: expect << 1% reconstruction error.
	l2err := l2Error(vec, reconstructed)
	assert.Less(t, l2err, 0.05, "8-bit roundtrip L2 error should be < 5%%, got %.4f", l2err)

	cosSim := CosineSimilarity(vec, reconstructed)
	assert.Greater(t, cosSim, 0.999, "8-bit roundtrip cosine similarity should be > 0.999, got %.4f", cosSim)
}

func TestPolarQuant_RoundTrip_2bit(t *testing.T) {
	codec := NewPolarQuantCodec(128, 2, 42)
	vec := pqRandomVector(128, 1)

	cv := codec.Encode(vec)
	reconstructed := codec.Decode(cv)

	// 2-bit: coarse but should preserve general direction.
	cosSim := CosineSimilarity(vec, reconstructed)
	assert.Greater(t, cosSim, 0.70, "2-bit roundtrip cosine should be > 0.70, got %.4f", cosSim)
}

func TestPolarQuant_PreservesRadius(t *testing.T) {
	codec := NewPolarQuantCodec(128, 4, 42)
	vec := pqRandomVector(128, 1)

	// Scale vector to non-unit length.
	scaled := make([]float64, len(vec))
	for i, v := range vec {
		scaled[i] = v * 3.7
	}

	cv := codec.Encode(scaled)
	assert.InDelta(t, 3.7, float64(cv.Radius), 0.01, "radius should be ≈ 3.7")

	reconstructed := codec.Decode(cv)
	// Check that the scale is preserved.
	var recNorm float64
	for _, v := range reconstructed {
		recNorm += v * v
	}
	recNorm = math.Sqrt(recNorm)
	assert.InDelta(t, 3.7, recNorm, 0.5, "reconstructed norm should be ≈ 3.7")
}

func TestPolarQuant_PreservesOrdering(t *testing.T) {
	codec := NewPolarQuantCodec(128, 4, 42)

	query := pqRandomVector(128, 1)
	close := pqPerturbVector(query, 0.1, 2)
	far := pqPerturbVector(query, 0.9, 3)

	cosClose := CosineSimilarity(query, close)
	cosFar := CosineSimilarity(query, far)
	require.Greater(t, cosClose, cosFar, "sanity: close > far in original space")

	// Encode all.
	cvQ := codec.Encode(query)
	cvClose := codec.Encode(close)
	cvFar := codec.Encode(far)

	// Compressed similarity should preserve ordering.
	compClose := codec.CompressedSimilarity(cvQ, cvClose)
	compFar := codec.CompressedSimilarity(cvQ, cvFar)

	assert.Greater(t, compClose, compFar,
		"PolarQuant must preserve ordering: comp(query,close)=%.4f > comp(query,far)=%.4f",
		compClose, compFar)
}

func TestPolarQuant_CompressedSimilarity_AccuracyVsExact(t *testing.T) {
	codec := NewPolarQuantCodec(128, 4, 42)

	v1 := pqRandomVector(128, 10)
	v2 := pqRandomVector(128, 20)

	exactSim := CosineSimilarity(v1, v2)

	cv1 := codec.Encode(v1)
	cv2 := codec.Encode(v2)
	compSim := codec.CompressedSimilarity(cv1, cv2)

	// 4-bit compressed similarity should be within ±0.1 of exact.
	assert.InDelta(t, exactSim, compSim, 0.1,
		"compressed similarity (%.4f) should be close to exact (%.4f)", compSim, exactSim)
}

func TestPolarQuant_MemoryReduction_4bit(t *testing.T) {
	codec := NewPolarQuantCodec(128, 4, 42)

	compBytes := codec.CompressedBytes() + 4 // +4 for float32 radius
	origBytes := 128 * 8                     // float64
	ratio := codec.CompressionRatio()

	assert.Equal(t, 64, codec.CompressedBytes(), "128×4bit = 512 bits = 64 bytes")
	assert.Equal(t, 68, compBytes, "total = 64 data + 4 radius = 68 bytes")
	assert.InDelta(t, float64(origBytes)/float64(compBytes), ratio, 0.1)
	assert.Greater(t, ratio, 14.0, "should be >14x compression, got %.1fx", ratio)
}

func TestPolarQuant_MemoryReduction_8bit(t *testing.T) {
	codec := NewPolarQuantCodec(128, 8, 42)

	compBytes := codec.CompressedBytes() + 4
	ratio := codec.CompressionRatio()

	assert.Equal(t, 128, codec.CompressedBytes(), "128×8bit = 1024 bits = 128 bytes")
	assert.Equal(t, 132, compBytes)
	assert.Greater(t, ratio, 7.0, "should be >7x compression, got %.1fx", ratio)
}

func TestPolarQuant_ZeroVector(t *testing.T) {
	codec := NewPolarQuantCodec(128, 4, 42)
	zero := make([]float64, 128)

	cv := codec.Encode(zero)
	assert.InDelta(t, 0.0, float64(cv.Radius), 0.001)

	reconstructed := codec.Decode(cv)
	for i, v := range reconstructed {
		assert.InDelta(t, 0.0, v, 0.001, "zero vector dimension %d should stay zero", i)
	}
}

func TestPolarQuant_SmallDim(t *testing.T) {
	// Ensure PolarQuant works for small dimensions too.
	codec := NewPolarQuantCodec(3, 4, 42)
	vec := []float64{0.6, 0.8, 0.0}

	cv := codec.Encode(vec)
	reconstructed := codec.Decode(cv)

	cosSim := CosineSimilarity(vec, reconstructed)
	assert.Greater(t, cosSim, 0.90, "3-dim 4-bit cosine should be > 0.90, got %.4f", cosSim)
}

func TestPolarQuant_DifferentSeeds(t *testing.T) {
	vec := pqRandomVector(128, 1)

	codec1 := NewPolarQuantCodec(128, 4, 42)
	codec2 := NewPolarQuantCodec(128, 4, 99)

	cv1 := codec1.Encode(vec)
	cv2 := codec2.Encode(vec)

	assert.NotEqual(t, cv1.Data, cv2.Data, "different seeds → different compressed data")
}

func TestPolarQuant_BitWidthClamping(t *testing.T) {
	// bitsPerDim < 1 → clamp to 1.
	codec1 := NewPolarQuantCodec(128, 0, 42)
	assert.Equal(t, 1, codec1.BitsPerDim())

	// bitsPerDim > 8 → clamp to 8.
	codec2 := NewPolarQuantCodec(128, 16, 42)
	assert.Equal(t, 8, codec2.BitsPerDim())
}

func TestPolarQuant_OrthogonalRotation_PreservesNorm(t *testing.T) {
	// Orthogonal matrix should preserve vector norms.
	codec := NewPolarQuantCodec(64, 8, 42)
	vec := pqRandomVector(64, 5)

	// Manually rotate.
	rotated := make([]float64, 64)
	for i := 0; i < 64; i++ {
		var dot float64
		for j := 0; j < 64; j++ {
			dot += codec.rotation[i][j] * vec[j]
		}
		rotated[i] = dot
	}

	origNorm := vecNorm(vec, 64)
	rotNorm := vecNorm(rotated, 64)
	assert.InDelta(t, origNorm, rotNorm, 0.001, "rotation should preserve L2 norm")
}

func TestPolarQuant_OrthogonalRotation_Deterministic(t *testing.T) {
	c1 := NewPolarQuantCodec(32, 4, 42)
	c2 := NewPolarQuantCodec(32, 4, 42)

	for i := 0; i < 32; i++ {
		for j := 0; j < 32; j++ {
			assert.Equal(t, c1.rotation[i][j], c2.rotation[i][j],
				"same seed → same rotation at [%d][%d]", i, j)
		}
	}
}

// --- Batch Quality Tests ---

func TestPolarQuant_BatchQuality_4bit(t *testing.T) {
	codec := NewPolarQuantCodec(128, 4, 42)
	n := 100

	var totalCosSim float64
	for i := 0; i < n; i++ {
		vec := pqRandomVector(128, int64(i))
		cv := codec.Encode(vec)
		rec := codec.Decode(cv)
		totalCosSim += CosineSimilarity(vec, rec)
	}

	avgCos := totalCosSim / float64(n)
	assert.Greater(t, avgCos, 0.90,
		"avg cosine similarity over %d vectors should be > 0.90, got %.4f", n, avgCos)
}

func TestPolarQuant_BatchOrderingPreservation(t *testing.T) {
	codec := NewPolarQuantCodec(128, 4, 42)
	n := 50

	// For each query, verify that the top-1 nearest neighbor is preserved.
	preserved := 0
	for i := 0; i < n; i++ {
		query := pqRandomVector(128, int64(i*100))
		vectors := make([][]float64, 10)
		for j := 0; j < 10; j++ {
			vectors[j] = pqRandomVector(128, int64(i*100+j+1))
		}

		// Find exact top-1.
		bestExact := -1
		bestExactSim := -2.0
		for j, v := range vectors {
			sim := CosineSimilarity(query, v)
			if sim > bestExactSim {
				bestExactSim = sim
				bestExact = j
			}
		}

		// Find compressed top-1.
		cvQ := codec.Encode(query)
		bestComp := -1
		bestCompSim := -2.0
		for j, v := range vectors {
			cv := codec.Encode(v)
			sim := codec.CompressedSimilarity(cvQ, cv)
			if sim > bestCompSim {
				bestCompSim = sim
				bestComp = j
			}
		}

		if bestExact == bestComp {
			preserved++
		}
	}

	rate := float64(preserved) / float64(n)
	assert.Greater(t, rate, 0.55,
		"top-1 preservation rate should be > 55%%, got %.0f%% (%d/%d)", rate*100, preserved, n)
}

// --- Benchmarks ---

func BenchmarkPolarQuant_Encode_4bit(b *testing.B) {
	codec := NewPolarQuantCodec(128, 4, 42)
	vec := pqRandomVector(128, 1)

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		codec.Encode(vec)
	}
}

func BenchmarkPolarQuant_Decode_4bit(b *testing.B) {
	codec := NewPolarQuantCodec(128, 4, 42)
	vec := pqRandomVector(128, 1)
	cv := codec.Encode(vec)

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		codec.Decode(cv)
	}
}

func BenchmarkPolarQuant_CompressedSimilarity_4bit(b *testing.B) {
	codec := NewPolarQuantCodec(128, 4, 42)
	v1 := pqRandomVector(128, 1)
	v2 := pqRandomVector(128, 2)
	cv1 := codec.Encode(v1)
	cv2 := codec.Encode(v2)

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		codec.CompressedSimilarity(cv1, cv2)
	}
}

func BenchmarkPolarQuant_Encode_8bit(b *testing.B) {
	codec := NewPolarQuantCodec(128, 8, 42)
	vec := pqRandomVector(128, 1)

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		codec.Encode(vec)
	}
}

// --- Helpers (scoped to polarquant tests to avoid collision with qjl_test) ---

func pqRandomVector(dim int, seed int64) []float64 {
	rng := rand.New(rand.NewSource(seed))
	vec := make([]float64, dim)
	for i := range vec {
		vec[i] = rng.NormFloat64()
	}
	var norm float64
	for _, v := range vec {
		norm += v * v
	}
	norm = math.Sqrt(norm)
	if norm > 0 {
		for i := range vec {
			vec[i] /= norm
		}
	}
	return vec
}

func pqPerturbVector(v []float64, noise float64, seed int64) []float64 {
	rng := rand.New(rand.NewSource(seed))
	perturbed := make([]float64, len(v))
	for i := range v {
		perturbed[i] = v[i] + noise*rng.NormFloat64()
	}
	var norm float64
	for _, val := range perturbed {
		norm += val * val
	}
	norm = math.Sqrt(norm)
	if norm > 0 {
		for i := range perturbed {
			perturbed[i] /= norm
		}
	}
	return perturbed
}

func l2Error(a, b []float64) float64 {
	if len(a) != len(b) {
		return math.Inf(1)
	}
	var sumSq float64
	for i := range a {
		d := a[i] - b[i]
		sumSq += d * d
	}
	return math.Sqrt(sumSq)
}

func init() {
	// Silence unused import warnings by referencing fmt.
	_ = fmt.Sprint
}