mirror of
https://github.com/syntrex-lab/gomcp.git
synced 2026-04-29 22:36:21 +02:00
356 lines
9.6 KiB
Go
356 lines
9.6 KiB
Go
|
|
// Package vectorstore — PolarQuant multi-bit vector compression.
|
|||
|
|
//
|
|||
|
|
// Based on Google's TurboQuant research (ICLR 2026, §3.2).
|
|||
|
|
// Exploits the key insight: after random orthogonal rotation, vector
|
|||
|
|
// coordinates become approximately uniformly distributed regardless of
|
|||
|
|
// the original data distribution. This makes uniform scalar quantization
|
|||
|
|
// near-optimal without any calibration data.
|
|||
|
|
//
|
|||
|
|
// Pipeline:
|
|||
|
|
// 1. Random orthogonal rotation R (data-oblivious, seeded)
|
|||
|
|
// 2. y = R · x (rotate)
|
|||
|
|
// 3. Uniform quantization: each y_i ∈ [-1, 1] → [0, 2^b - 1]
|
|||
|
|
// 4. Compact byte packing (2 values per byte at 4-bit)
|
|||
|
|
//
|
|||
|
|
// Combined with QJL (1-bit approximate search):
|
|||
|
|
// - QJL signatures: fast approximate filtering (Phase 1)
|
|||
|
|
// - PolarQuant codes: compressed exact reranking (Phase 2)
|
|||
|
|
// - Together: TurboQuant = PolarQuant(main bits) + QJL(1-bit residual)
|
|||
|
|
//
|
|||
|
|
// Memory at 4-bit, 128-dim: 64 bytes + 4 bytes radius = 68 bytes
|
|||
|
|
// vs float64 original: 1024 bytes → 15x compression
|
|||
|
|
package vectorstore
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"math"
|
|||
|
|
"math/rand"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
// CompressedVector holds a PolarQuant-compressed representation of a vector.
|
|||
|
|
type CompressedVector struct {
|
|||
|
|
Data []byte // Packed quantized values (2 per byte at 4-bit)
|
|||
|
|
Radius float32 // Original L2 norm for denormalization
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// PolarQuantCodec encodes/decodes vectors using rotation + uniform quantization.
|
|||
|
|
// Thread-safe after construction (read-only rotation matrix).
|
|||
|
|
type PolarQuantCodec struct {
|
|||
|
|
dim int // Vector dimensionality
|
|||
|
|
bitsPerDim int // Quantization bits per dimension (1-8)
|
|||
|
|
levels int // 2^bitsPerDim - 1 (quantization levels)
|
|||
|
|
rotation [][]float64 // dim × dim orthogonal rotation matrix
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// NewPolarQuantCodec creates a PolarQuant codec with random orthogonal rotation.
|
|||
|
|
//
|
|||
|
|
// Parameters:
|
|||
|
|
// - dim: expected vector dimensionality (must match embedder output)
|
|||
|
|
// - bitsPerDim: quantization bits per dimension (1-8). Default: 4.
|
|||
|
|
// 4-bit → 16x compression, 8-bit → 8x compression.
|
|||
|
|
// - seed: PRNG seed for reproducible rotation. Same seed → same codec.
|
|||
|
|
func NewPolarQuantCodec(dim, bitsPerDim int, seed int64) *PolarQuantCodec {
|
|||
|
|
if bitsPerDim < 1 {
|
|||
|
|
bitsPerDim = 1
|
|||
|
|
}
|
|||
|
|
if bitsPerDim > 8 {
|
|||
|
|
bitsPerDim = 8
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
rng := rand.New(rand.NewSource(seed))
|
|||
|
|
rotation := randomOrthogonalMatrix(dim, rng)
|
|||
|
|
|
|||
|
|
return &PolarQuantCodec{
|
|||
|
|
dim: dim,
|
|||
|
|
bitsPerDim: bitsPerDim,
|
|||
|
|
levels: (1 << bitsPerDim) - 1,
|
|||
|
|
rotation: rotation,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Encode compresses a float64 vector to a compact PolarQuant representation.
|
|||
|
|
//
|
|||
|
|
// Steps:
|
|||
|
|
// 1. Compute and store L2 norm (radius)
|
|||
|
|
// 2. L2-normalize the vector
|
|||
|
|
// 3. Rotate through random orthogonal matrix
|
|||
|
|
// 4. Uniform quantization of each coordinate
|
|||
|
|
// 5. Pack into bytes
|
|||
|
|
func (c *PolarQuantCodec) Encode(vector []float64) CompressedVector {
|
|||
|
|
dim := c.dim
|
|||
|
|
if len(vector) < dim {
|
|||
|
|
dim = len(vector)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Step 1: Compute radius (L2 norm).
|
|||
|
|
var radius float64
|
|||
|
|
for i := 0; i < dim; i++ {
|
|||
|
|
radius += vector[i] * vector[i]
|
|||
|
|
}
|
|||
|
|
radius = math.Sqrt(radius)
|
|||
|
|
|
|||
|
|
// Step 2: Normalize.
|
|||
|
|
normalized := make([]float64, c.dim)
|
|||
|
|
if radius > 0 {
|
|||
|
|
for i := 0; i < dim; i++ {
|
|||
|
|
normalized[i] = vector[i] / radius
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Step 3: Rotate — y = R · x.
|
|||
|
|
rotated := make([]float64, c.dim)
|
|||
|
|
for i := 0; i < c.dim; i++ {
|
|||
|
|
var dot float64
|
|||
|
|
row := c.rotation[i]
|
|||
|
|
for j := 0; j < c.dim; j++ {
|
|||
|
|
dot += row[j] * normalized[j]
|
|||
|
|
}
|
|||
|
|
rotated[i] = dot
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Step 4-5: Quantize and pack.
|
|||
|
|
data := c.packQuantized(rotated)
|
|||
|
|
|
|||
|
|
return CompressedVector{
|
|||
|
|
Data: data,
|
|||
|
|
Radius: float32(radius),
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Decode reconstructs a float64 vector from its compressed representation.
|
|||
|
|
//
|
|||
|
|
// Steps:
|
|||
|
|
// 1. Unpack quantized values
|
|||
|
|
// 2. Dequantize to [-1, 1] midpoints
|
|||
|
|
// 3. Inverse rotation: x = R^T · y
|
|||
|
|
// 4. Denormalize by radius
|
|||
|
|
func (c *PolarQuantCodec) Decode(cv CompressedVector) []float64 {
|
|||
|
|
// Step 1-2: Unpack and dequantize.
|
|||
|
|
rotated := c.unpackDequantized(cv.Data)
|
|||
|
|
|
|||
|
|
// Step 3: Inverse rotation — x = R^T · y (transpose of orthogonal = inverse).
|
|||
|
|
normalized := make([]float64, c.dim)
|
|||
|
|
for i := 0; i < c.dim; i++ {
|
|||
|
|
var dot float64
|
|||
|
|
for j := 0; j < c.dim; j++ {
|
|||
|
|
dot += c.rotation[j][i] * rotated[j] // R^T[i][j] = R[j][i]
|
|||
|
|
}
|
|||
|
|
normalized[i] = dot
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Step 4: Denormalize.
|
|||
|
|
radius := float64(cv.Radius)
|
|||
|
|
result := make([]float64, c.dim)
|
|||
|
|
for i := range normalized {
|
|||
|
|
result[i] = normalized[i] * radius
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return result
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// CompressedSimilarity computes approximate cosine similarity between two
|
|||
|
|
// compressed vectors WITHOUT full decompression. Decompresses to the rotated
|
|||
|
|
// domain and computes dot product there (rotation preserves inner products).
|
|||
|
|
func (c *PolarQuantCodec) CompressedSimilarity(a, b CompressedVector) float64 {
|
|||
|
|
ra := c.unpackDequantized(a.Data)
|
|||
|
|
rb := c.unpackDequantized(b.Data)
|
|||
|
|
|
|||
|
|
// Cosine similarity in rotated space = cosine similarity in original space
|
|||
|
|
// (orthogonal rotation preserves inner products).
|
|||
|
|
var dot, normA, normB float64
|
|||
|
|
for i := 0; i < c.dim; i++ {
|
|||
|
|
dot += ra[i] * rb[i]
|
|||
|
|
normA += ra[i] * ra[i]
|
|||
|
|
normB += rb[i] * rb[i]
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
denom := math.Sqrt(normA) * math.Sqrt(normB)
|
|||
|
|
if denom == 0 {
|
|||
|
|
return 0
|
|||
|
|
}
|
|||
|
|
return dot / denom
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Dim returns the expected vector dimensionality.
|
|||
|
|
func (c *PolarQuantCodec) Dim() int {
|
|||
|
|
return c.dim
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// BitsPerDim returns the quantization precision.
|
|||
|
|
func (c *PolarQuantCodec) BitsPerDim() int {
|
|||
|
|
return c.bitsPerDim
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// CompressedBytes returns bytes per compressed vector (excluding radius).
|
|||
|
|
func (c *PolarQuantCodec) CompressedBytes() int {
|
|||
|
|
return (c.dim*c.bitsPerDim + 7) / 8
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// CompressionRatio returns the ratio of original to compressed size.
|
|||
|
|
func (c *PolarQuantCodec) CompressionRatio() float64 {
|
|||
|
|
origBytes := c.dim * 8 // float64
|
|||
|
|
compBytes := c.CompressedBytes() + 4 // + float32 radius
|
|||
|
|
return float64(origBytes) / float64(compBytes)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// --- Internal: Quantization and packing ---
|
|||
|
|
|
|||
|
|
// packQuantized quantizes and packs rotated coordinates into bytes.
|
|||
|
|
// For 4-bit: 2 values packed per byte (high nibble, low nibble).
|
|||
|
|
// For 8-bit: 1 value per byte.
|
|||
|
|
// For other bit widths: generic bit packing.
|
|||
|
|
func (c *PolarQuantCodec) packQuantized(rotated []float64) []byte {
|
|||
|
|
numBytes := (c.dim*c.bitsPerDim + 7) / 8
|
|||
|
|
data := make([]byte, numBytes)
|
|||
|
|
|
|||
|
|
if c.bitsPerDim == 4 {
|
|||
|
|
// Fast path: 4-bit packing (2 per byte).
|
|||
|
|
for i := 0; i < c.dim; i++ {
|
|||
|
|
q := quantizeUniform(rotated[i], c.levels)
|
|||
|
|
byteIdx := i / 2
|
|||
|
|
if i%2 == 0 {
|
|||
|
|
data[byteIdx] |= q << 4 // High nibble
|
|||
|
|
} else {
|
|||
|
|
data[byteIdx] |= q // Low nibble
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
} else if c.bitsPerDim == 8 {
|
|||
|
|
// Fast path: 8-bit packing (1 per byte).
|
|||
|
|
for i := 0; i < c.dim; i++ {
|
|||
|
|
data[i] = quantizeUniform(rotated[i], c.levels)
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
// Generic bit packing.
|
|||
|
|
bitPos := 0
|
|||
|
|
for i := 0; i < c.dim; i++ {
|
|||
|
|
q := quantizeUniform(rotated[i], c.levels)
|
|||
|
|
for b := c.bitsPerDim - 1; b >= 0; b-- {
|
|||
|
|
if q&(1<<uint(b)) != 0 {
|
|||
|
|
data[bitPos/8] |= 1 << uint(7-bitPos%8)
|
|||
|
|
}
|
|||
|
|
bitPos++
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return data
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// unpackDequantized unpacks and dequantizes bytes back to float64 coordinates.
|
|||
|
|
func (c *PolarQuantCodec) unpackDequantized(data []byte) []float64 {
|
|||
|
|
rotated := make([]float64, c.dim)
|
|||
|
|
|
|||
|
|
if c.bitsPerDim == 4 {
|
|||
|
|
// Fast path: 4-bit.
|
|||
|
|
for i := 0; i < c.dim; i++ {
|
|||
|
|
byteIdx := i / 2
|
|||
|
|
var q uint8
|
|||
|
|
if i%2 == 0 {
|
|||
|
|
q = data[byteIdx] >> 4
|
|||
|
|
} else {
|
|||
|
|
q = data[byteIdx] & 0x0F
|
|||
|
|
}
|
|||
|
|
rotated[i] = dequantizeUniform(q, c.levels)
|
|||
|
|
}
|
|||
|
|
} else if c.bitsPerDim == 8 {
|
|||
|
|
// Fast path: 8-bit.
|
|||
|
|
for i := 0; i < c.dim; i++ {
|
|||
|
|
rotated[i] = dequantizeUniform(data[i], c.levels)
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
// Generic bit unpacking.
|
|||
|
|
bitPos := 0
|
|||
|
|
for i := 0; i < c.dim; i++ {
|
|||
|
|
var q uint8
|
|||
|
|
for b := c.bitsPerDim - 1; b >= 0; b-- {
|
|||
|
|
if data[bitPos/8]&(1<<uint(7-bitPos%8)) != 0 {
|
|||
|
|
q |= 1 << uint(b)
|
|||
|
|
}
|
|||
|
|
bitPos++
|
|||
|
|
}
|
|||
|
|
rotated[i] = dequantizeUniform(q, c.levels)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return rotated
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// quantizeUniform maps a value in [-1, 1] to [0, levels] as uint8.
|
|||
|
|
func quantizeUniform(val float64, levels int) uint8 {
|
|||
|
|
// Clamp to [-1, 1].
|
|||
|
|
if val < -1 {
|
|||
|
|
val = -1
|
|||
|
|
}
|
|||
|
|
if val > 1 {
|
|||
|
|
val = 1
|
|||
|
|
}
|
|||
|
|
// Map [-1, 1] → [0, 1] → [0, levels].
|
|||
|
|
normalized := (val + 1.0) / 2.0
|
|||
|
|
return uint8(math.Round(normalized * float64(levels)))
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// dequantizeUniform maps a quantized uint8 in [0, levels] back to [-1, 1].
|
|||
|
|
func dequantizeUniform(q uint8, levels int) float64 {
|
|||
|
|
// Map [0, levels] → [0, 1] → [-1, 1].
|
|||
|
|
normalized := float64(q) / float64(levels)
|
|||
|
|
return normalized*2.0 - 1.0
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// --- Internal: Random orthogonal matrix generation ---
|
|||
|
|
|
|||
|
|
// randomOrthogonalMatrix generates a dim×dim random orthogonal matrix
|
|||
|
|
// using Gram-Schmidt orthogonalization of a random Gaussian matrix.
|
|||
|
|
//
|
|||
|
|
// Properties:
|
|||
|
|
// - Uniformly distributed over the orthogonal group O(d)
|
|||
|
|
// - Deterministic given the PRNG
|
|||
|
|
// - O(d³) construction, done once at initialization
|
|||
|
|
func randomOrthogonalMatrix(dim int, rng *rand.Rand) [][]float64 {
|
|||
|
|
// Generate random Gaussian matrix.
|
|||
|
|
Q := make([][]float64, dim)
|
|||
|
|
for i := range Q {
|
|||
|
|
Q[i] = make([]float64, dim)
|
|||
|
|
for j := range Q[i] {
|
|||
|
|
Q[i][j] = rng.NormFloat64()
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Modified Gram-Schmidt orthogonalization (numerically stable).
|
|||
|
|
for i := 0; i < dim; i++ {
|
|||
|
|
// Subtract projections onto all previous basis vectors.
|
|||
|
|
for j := 0; j < i; j++ {
|
|||
|
|
dot := vecDot(Q[i], Q[j], dim)
|
|||
|
|
for k := 0; k < dim; k++ {
|
|||
|
|
Q[i][k] -= dot * Q[j][k]
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Normalize to unit length.
|
|||
|
|
norm := vecNorm(Q[i], dim)
|
|||
|
|
if norm > 0 {
|
|||
|
|
for k := 0; k < dim; k++ {
|
|||
|
|
Q[i][k] /= norm
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return Q
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// vecDot computes dot product of two vectors up to length n.
|
|||
|
|
func vecDot(a, b []float64, n int) float64 {
|
|||
|
|
var sum float64
|
|||
|
|
for i := 0; i < n; i++ {
|
|||
|
|
sum += a[i] * b[i]
|
|||
|
|
}
|
|||
|
|
return sum
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// vecNorm computes L2 norm of a vector up to length n.
|
|||
|
|
func vecNorm(v []float64, n int) float64 {
|
|||
|
|
var sum float64
|
|||
|
|
for i := 0; i < n; i++ {
|
|||
|
|
sum += v[i] * v[i]
|
|||
|
|
}
|
|||
|
|
return math.Sqrt(sum)
|
|||
|
|
}
|