gomcp/internal/domain/vectorstore/polarquant.go

356 lines
9.6 KiB
Go
Raw Normal View History

// Package vectorstore — PolarQuant multi-bit vector compression.
//
// Based on Google's TurboQuant research (ICLR 2026, §3.2).
// Exploits the key insight: after random orthogonal rotation, vector
// coordinates become approximately uniformly distributed regardless of
// the original data distribution. This makes uniform scalar quantization
// near-optimal without any calibration data.
//
// Pipeline:
// 1. Random orthogonal rotation R (data-oblivious, seeded)
// 2. y = R · x (rotate)
// 3. Uniform quantization: each y_i ∈ [-1, 1] → [0, 2^b - 1]
// 4. Compact byte packing (2 values per byte at 4-bit)
//
// Combined with QJL (1-bit approximate search):
// - QJL signatures: fast approximate filtering (Phase 1)
// - PolarQuant codes: compressed exact reranking (Phase 2)
// - Together: TurboQuant = PolarQuant(main bits) + QJL(1-bit residual)
//
// Memory at 4-bit, 128-dim: 64 bytes + 4 bytes radius = 68 bytes
// vs float64 original: 1024 bytes → 15x compression
package vectorstore
import (
"math"
"math/rand"
)
// CompressedVector holds a PolarQuant-compressed representation of a vector.
type CompressedVector struct {
Data []byte // Packed quantized values (2 per byte at 4-bit)
Radius float32 // Original L2 norm for denormalization
}
// PolarQuantCodec encodes/decodes vectors using rotation + uniform quantization.
// Thread-safe after construction (read-only rotation matrix).
type PolarQuantCodec struct {
dim int // Vector dimensionality
bitsPerDim int // Quantization bits per dimension (1-8)
levels int // 2^bitsPerDim - 1 (quantization levels)
rotation [][]float64 // dim × dim orthogonal rotation matrix
}
// NewPolarQuantCodec creates a PolarQuant codec with random orthogonal rotation.
//
// Parameters:
// - dim: expected vector dimensionality (must match embedder output)
// - bitsPerDim: quantization bits per dimension (1-8). Default: 4.
// 4-bit → 16x compression, 8-bit → 8x compression.
// - seed: PRNG seed for reproducible rotation. Same seed → same codec.
func NewPolarQuantCodec(dim, bitsPerDim int, seed int64) *PolarQuantCodec {
if bitsPerDim < 1 {
bitsPerDim = 1
}
if bitsPerDim > 8 {
bitsPerDim = 8
}
rng := rand.New(rand.NewSource(seed))
rotation := randomOrthogonalMatrix(dim, rng)
return &PolarQuantCodec{
dim: dim,
bitsPerDim: bitsPerDim,
levels: (1 << bitsPerDim) - 1,
rotation: rotation,
}
}
// Encode compresses a float64 vector to a compact PolarQuant representation.
//
// Steps:
// 1. Compute and store L2 norm (radius)
// 2. L2-normalize the vector
// 3. Rotate through random orthogonal matrix
// 4. Uniform quantization of each coordinate
// 5. Pack into bytes
func (c *PolarQuantCodec) Encode(vector []float64) CompressedVector {
dim := c.dim
if len(vector) < dim {
dim = len(vector)
}
// Step 1: Compute radius (L2 norm).
var radius float64
for i := 0; i < dim; i++ {
radius += vector[i] * vector[i]
}
radius = math.Sqrt(radius)
// Step 2: Normalize.
normalized := make([]float64, c.dim)
if radius > 0 {
for i := 0; i < dim; i++ {
normalized[i] = vector[i] / radius
}
}
// Step 3: Rotate — y = R · x.
rotated := make([]float64, c.dim)
for i := 0; i < c.dim; i++ {
var dot float64
row := c.rotation[i]
for j := 0; j < c.dim; j++ {
dot += row[j] * normalized[j]
}
rotated[i] = dot
}
// Step 4-5: Quantize and pack.
data := c.packQuantized(rotated)
return CompressedVector{
Data: data,
Radius: float32(radius),
}
}
// Decode reconstructs a float64 vector from its compressed representation.
//
// Steps:
// 1. Unpack quantized values
// 2. Dequantize to [-1, 1] midpoints
// 3. Inverse rotation: x = R^T · y
// 4. Denormalize by radius
func (c *PolarQuantCodec) Decode(cv CompressedVector) []float64 {
// Step 1-2: Unpack and dequantize.
rotated := c.unpackDequantized(cv.Data)
// Step 3: Inverse rotation — x = R^T · y (transpose of orthogonal = inverse).
normalized := make([]float64, c.dim)
for i := 0; i < c.dim; i++ {
var dot float64
for j := 0; j < c.dim; j++ {
dot += c.rotation[j][i] * rotated[j] // R^T[i][j] = R[j][i]
}
normalized[i] = dot
}
// Step 4: Denormalize.
radius := float64(cv.Radius)
result := make([]float64, c.dim)
for i := range normalized {
result[i] = normalized[i] * radius
}
return result
}
// CompressedSimilarity computes approximate cosine similarity between two
// compressed vectors WITHOUT full decompression. Decompresses to the rotated
// domain and computes dot product there (rotation preserves inner products).
func (c *PolarQuantCodec) CompressedSimilarity(a, b CompressedVector) float64 {
ra := c.unpackDequantized(a.Data)
rb := c.unpackDequantized(b.Data)
// Cosine similarity in rotated space = cosine similarity in original space
// (orthogonal rotation preserves inner products).
var dot, normA, normB float64
for i := 0; i < c.dim; i++ {
dot += ra[i] * rb[i]
normA += ra[i] * ra[i]
normB += rb[i] * rb[i]
}
denom := math.Sqrt(normA) * math.Sqrt(normB)
if denom == 0 {
return 0
}
return dot / denom
}
// Dim returns the expected vector dimensionality.
func (c *PolarQuantCodec) Dim() int {
return c.dim
}
// BitsPerDim returns the quantization precision.
func (c *PolarQuantCodec) BitsPerDim() int {
return c.bitsPerDim
}
// CompressedBytes returns bytes per compressed vector (excluding radius).
func (c *PolarQuantCodec) CompressedBytes() int {
return (c.dim*c.bitsPerDim + 7) / 8
}
// CompressionRatio returns the ratio of original to compressed size.
func (c *PolarQuantCodec) CompressionRatio() float64 {
origBytes := c.dim * 8 // float64
compBytes := c.CompressedBytes() + 4 // + float32 radius
return float64(origBytes) / float64(compBytes)
}
// --- Internal: Quantization and packing ---
// packQuantized quantizes and packs rotated coordinates into bytes.
// For 4-bit: 2 values packed per byte (high nibble, low nibble).
// For 8-bit: 1 value per byte.
// For other bit widths: generic bit packing.
func (c *PolarQuantCodec) packQuantized(rotated []float64) []byte {
numBytes := (c.dim*c.bitsPerDim + 7) / 8
data := make([]byte, numBytes)
if c.bitsPerDim == 4 {
// Fast path: 4-bit packing (2 per byte).
for i := 0; i < c.dim; i++ {
q := quantizeUniform(rotated[i], c.levels)
byteIdx := i / 2
if i%2 == 0 {
data[byteIdx] |= q << 4 // High nibble
} else {
data[byteIdx] |= q // Low nibble
}
}
} else if c.bitsPerDim == 8 {
// Fast path: 8-bit packing (1 per byte).
for i := 0; i < c.dim; i++ {
data[i] = quantizeUniform(rotated[i], c.levels)
}
} else {
// Generic bit packing.
bitPos := 0
for i := 0; i < c.dim; i++ {
q := quantizeUniform(rotated[i], c.levels)
for b := c.bitsPerDim - 1; b >= 0; b-- {
if q&(1<<uint(b)) != 0 {
data[bitPos/8] |= 1 << uint(7-bitPos%8)
}
bitPos++
}
}
}
return data
}
// unpackDequantized unpacks and dequantizes bytes back to float64 coordinates.
func (c *PolarQuantCodec) unpackDequantized(data []byte) []float64 {
rotated := make([]float64, c.dim)
if c.bitsPerDim == 4 {
// Fast path: 4-bit.
for i := 0; i < c.dim; i++ {
byteIdx := i / 2
var q uint8
if i%2 == 0 {
q = data[byteIdx] >> 4
} else {
q = data[byteIdx] & 0x0F
}
rotated[i] = dequantizeUniform(q, c.levels)
}
} else if c.bitsPerDim == 8 {
// Fast path: 8-bit.
for i := 0; i < c.dim; i++ {
rotated[i] = dequantizeUniform(data[i], c.levels)
}
} else {
// Generic bit unpacking.
bitPos := 0
for i := 0; i < c.dim; i++ {
var q uint8
for b := c.bitsPerDim - 1; b >= 0; b-- {
if data[bitPos/8]&(1<<uint(7-bitPos%8)) != 0 {
q |= 1 << uint(b)
}
bitPos++
}
rotated[i] = dequantizeUniform(q, c.levels)
}
}
return rotated
}
// quantizeUniform maps a value in [-1, 1] to [0, levels] as uint8.
func quantizeUniform(val float64, levels int) uint8 {
// Clamp to [-1, 1].
if val < -1 {
val = -1
}
if val > 1 {
val = 1
}
// Map [-1, 1] → [0, 1] → [0, levels].
normalized := (val + 1.0) / 2.0
return uint8(math.Round(normalized * float64(levels)))
}
// dequantizeUniform maps a quantized uint8 in [0, levels] back to [-1, 1].
func dequantizeUniform(q uint8, levels int) float64 {
// Map [0, levels] → [0, 1] → [-1, 1].
normalized := float64(q) / float64(levels)
return normalized*2.0 - 1.0
}
// --- Internal: Random orthogonal matrix generation ---
// randomOrthogonalMatrix generates a dim×dim random orthogonal matrix
// using Gram-Schmidt orthogonalization of a random Gaussian matrix.
//
// Properties:
// - Uniformly distributed over the orthogonal group O(d)
// - Deterministic given the PRNG
// - O(d³) construction, done once at initialization
func randomOrthogonalMatrix(dim int, rng *rand.Rand) [][]float64 {
// Generate random Gaussian matrix.
Q := make([][]float64, dim)
for i := range Q {
Q[i] = make([]float64, dim)
for j := range Q[i] {
Q[i][j] = rng.NormFloat64()
}
}
// Modified Gram-Schmidt orthogonalization (numerically stable).
for i := 0; i < dim; i++ {
// Subtract projections onto all previous basis vectors.
for j := 0; j < i; j++ {
dot := vecDot(Q[i], Q[j], dim)
for k := 0; k < dim; k++ {
Q[i][k] -= dot * Q[j][k]
}
}
// Normalize to unit length.
norm := vecNorm(Q[i], dim)
if norm > 0 {
for k := 0; k < dim; k++ {
Q[i][k] /= norm
}
}
}
return Q
}
// vecDot computes dot product of two vectors up to length n.
func vecDot(a, b []float64, n int) float64 {
var sum float64
for i := 0; i < n; i++ {
sum += a[i] * b[i]
}
return sum
}
// vecNorm computes L2 norm of a vector up to length n.
func vecNorm(v []float64, n int) float64 {
var sum float64
for i := 0; i < n; i++ {
sum += v[i] * v[i]
}
return math.Sqrt(sum)
}