gomcp/internal/domain/vectorstore/polarquant.go
DmitrL-dev cc7956d835 feat: TurboQuant VectorStore integration & Edge PQ KV cache prototype
- QJL (1-bit) approximate filter for 2.3x fast search

- PolarQuant (4-bit/8-bit) compressed storage with PQDropFloat64 memory reclamation (15x heap reduction)

- Two-Phase SearchQJL with fallback to CompressedSimilarity

- Edge Deployment prototype (pq_attention.cu) for LLaMA 1.5M token context
2026-03-26 22:00:49 +10:00

355 lines
9.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package vectorstore — PolarQuant multi-bit vector compression.
//
// Based on Google's TurboQuant research (ICLR 2026, §3.2).
// Exploits the key insight: after random orthogonal rotation, vector
// coordinates become approximately uniformly distributed regardless of
// the original data distribution. This makes uniform scalar quantization
// near-optimal without any calibration data.
//
// Pipeline:
// 1. Random orthogonal rotation R (data-oblivious, seeded)
// 2. y = R · x (rotate)
// 3. Uniform quantization: each y_i ∈ [-1, 1] → [0, 2^b - 1]
// 4. Compact byte packing (2 values per byte at 4-bit)
//
// Combined with QJL (1-bit approximate search):
// - QJL signatures: fast approximate filtering (Phase 1)
// - PolarQuant codes: compressed exact reranking (Phase 2)
// - Together: TurboQuant = PolarQuant(main bits) + QJL(1-bit residual)
//
// Memory at 4-bit, 128-dim: 64 bytes + 4 bytes radius = 68 bytes
// vs float64 original: 1024 bytes → 15x compression
package vectorstore
import (
"math"
"math/rand"
)
// CompressedVector holds a PolarQuant-compressed representation of a vector.
type CompressedVector struct {
Data []byte // Packed quantized values (2 per byte at 4-bit)
Radius float32 // Original L2 norm for denormalization
}
// PolarQuantCodec encodes/decodes vectors using rotation + uniform quantization.
// Thread-safe after construction (read-only rotation matrix).
type PolarQuantCodec struct {
dim int // Vector dimensionality
bitsPerDim int // Quantization bits per dimension (1-8)
levels int // 2^bitsPerDim - 1 (quantization levels)
rotation [][]float64 // dim × dim orthogonal rotation matrix
}
// NewPolarQuantCodec creates a PolarQuant codec with random orthogonal rotation.
//
// Parameters:
// - dim: expected vector dimensionality (must match embedder output)
// - bitsPerDim: quantization bits per dimension (1-8). Default: 4.
// 4-bit → 16x compression, 8-bit → 8x compression.
// - seed: PRNG seed for reproducible rotation. Same seed → same codec.
func NewPolarQuantCodec(dim, bitsPerDim int, seed int64) *PolarQuantCodec {
if bitsPerDim < 1 {
bitsPerDim = 1
}
if bitsPerDim > 8 {
bitsPerDim = 8
}
rng := rand.New(rand.NewSource(seed))
rotation := randomOrthogonalMatrix(dim, rng)
return &PolarQuantCodec{
dim: dim,
bitsPerDim: bitsPerDim,
levels: (1 << bitsPerDim) - 1,
rotation: rotation,
}
}
// Encode compresses a float64 vector to a compact PolarQuant representation.
//
// Steps:
// 1. Compute and store L2 norm (radius)
// 2. L2-normalize the vector
// 3. Rotate through random orthogonal matrix
// 4. Uniform quantization of each coordinate
// 5. Pack into bytes
func (c *PolarQuantCodec) Encode(vector []float64) CompressedVector {
dim := c.dim
if len(vector) < dim {
dim = len(vector)
}
// Step 1: Compute radius (L2 norm).
var radius float64
for i := 0; i < dim; i++ {
radius += vector[i] * vector[i]
}
radius = math.Sqrt(radius)
// Step 2: Normalize.
normalized := make([]float64, c.dim)
if radius > 0 {
for i := 0; i < dim; i++ {
normalized[i] = vector[i] / radius
}
}
// Step 3: Rotate — y = R · x.
rotated := make([]float64, c.dim)
for i := 0; i < c.dim; i++ {
var dot float64
row := c.rotation[i]
for j := 0; j < c.dim; j++ {
dot += row[j] * normalized[j]
}
rotated[i] = dot
}
// Step 4-5: Quantize and pack.
data := c.packQuantized(rotated)
return CompressedVector{
Data: data,
Radius: float32(radius),
}
}
// Decode reconstructs a float64 vector from its compressed representation.
//
// Steps:
// 1. Unpack quantized values
// 2. Dequantize to [-1, 1] midpoints
// 3. Inverse rotation: x = R^T · y
// 4. Denormalize by radius
func (c *PolarQuantCodec) Decode(cv CompressedVector) []float64 {
// Step 1-2: Unpack and dequantize.
rotated := c.unpackDequantized(cv.Data)
// Step 3: Inverse rotation — x = R^T · y (transpose of orthogonal = inverse).
normalized := make([]float64, c.dim)
for i := 0; i < c.dim; i++ {
var dot float64
for j := 0; j < c.dim; j++ {
dot += c.rotation[j][i] * rotated[j] // R^T[i][j] = R[j][i]
}
normalized[i] = dot
}
// Step 4: Denormalize.
radius := float64(cv.Radius)
result := make([]float64, c.dim)
for i := range normalized {
result[i] = normalized[i] * radius
}
return result
}
// CompressedSimilarity computes approximate cosine similarity between two
// compressed vectors WITHOUT full decompression. Decompresses to the rotated
// domain and computes dot product there (rotation preserves inner products).
func (c *PolarQuantCodec) CompressedSimilarity(a, b CompressedVector) float64 {
ra := c.unpackDequantized(a.Data)
rb := c.unpackDequantized(b.Data)
// Cosine similarity in rotated space = cosine similarity in original space
// (orthogonal rotation preserves inner products).
var dot, normA, normB float64
for i := 0; i < c.dim; i++ {
dot += ra[i] * rb[i]
normA += ra[i] * ra[i]
normB += rb[i] * rb[i]
}
denom := math.Sqrt(normA) * math.Sqrt(normB)
if denom == 0 {
return 0
}
return dot / denom
}
// Dim returns the expected vector dimensionality.
func (c *PolarQuantCodec) Dim() int {
return c.dim
}
// BitsPerDim returns the quantization precision.
func (c *PolarQuantCodec) BitsPerDim() int {
return c.bitsPerDim
}
// CompressedBytes returns bytes per compressed vector (excluding radius).
func (c *PolarQuantCodec) CompressedBytes() int {
return (c.dim*c.bitsPerDim + 7) / 8
}
// CompressionRatio returns the ratio of original to compressed size.
func (c *PolarQuantCodec) CompressionRatio() float64 {
origBytes := c.dim * 8 // float64
compBytes := c.CompressedBytes() + 4 // + float32 radius
return float64(origBytes) / float64(compBytes)
}
// --- Internal: Quantization and packing ---
// packQuantized quantizes and packs rotated coordinates into bytes.
// For 4-bit: 2 values packed per byte (high nibble, low nibble).
// For 8-bit: 1 value per byte.
// For other bit widths: generic bit packing.
func (c *PolarQuantCodec) packQuantized(rotated []float64) []byte {
numBytes := (c.dim*c.bitsPerDim + 7) / 8
data := make([]byte, numBytes)
if c.bitsPerDim == 4 {
// Fast path: 4-bit packing (2 per byte).
for i := 0; i < c.dim; i++ {
q := quantizeUniform(rotated[i], c.levels)
byteIdx := i / 2
if i%2 == 0 {
data[byteIdx] |= q << 4 // High nibble
} else {
data[byteIdx] |= q // Low nibble
}
}
} else if c.bitsPerDim == 8 {
// Fast path: 8-bit packing (1 per byte).
for i := 0; i < c.dim; i++ {
data[i] = quantizeUniform(rotated[i], c.levels)
}
} else {
// Generic bit packing.
bitPos := 0
for i := 0; i < c.dim; i++ {
q := quantizeUniform(rotated[i], c.levels)
for b := c.bitsPerDim - 1; b >= 0; b-- {
if q&(1<<uint(b)) != 0 {
data[bitPos/8] |= 1 << uint(7-bitPos%8)
}
bitPos++
}
}
}
return data
}
// unpackDequantized unpacks and dequantizes bytes back to float64 coordinates.
func (c *PolarQuantCodec) unpackDequantized(data []byte) []float64 {
rotated := make([]float64, c.dim)
if c.bitsPerDim == 4 {
// Fast path: 4-bit.
for i := 0; i < c.dim; i++ {
byteIdx := i / 2
var q uint8
if i%2 == 0 {
q = data[byteIdx] >> 4
} else {
q = data[byteIdx] & 0x0F
}
rotated[i] = dequantizeUniform(q, c.levels)
}
} else if c.bitsPerDim == 8 {
// Fast path: 8-bit.
for i := 0; i < c.dim; i++ {
rotated[i] = dequantizeUniform(data[i], c.levels)
}
} else {
// Generic bit unpacking.
bitPos := 0
for i := 0; i < c.dim; i++ {
var q uint8
for b := c.bitsPerDim - 1; b >= 0; b-- {
if data[bitPos/8]&(1<<uint(7-bitPos%8)) != 0 {
q |= 1 << uint(b)
}
bitPos++
}
rotated[i] = dequantizeUniform(q, c.levels)
}
}
return rotated
}
// quantizeUniform maps a value in [-1, 1] to [0, levels] as uint8.
func quantizeUniform(val float64, levels int) uint8 {
// Clamp to [-1, 1].
if val < -1 {
val = -1
}
if val > 1 {
val = 1
}
// Map [-1, 1] → [0, 1] → [0, levels].
normalized := (val + 1.0) / 2.0
return uint8(math.Round(normalized * float64(levels)))
}
// dequantizeUniform maps a quantized uint8 in [0, levels] back to [-1, 1].
func dequantizeUniform(q uint8, levels int) float64 {
// Map [0, levels] → [0, 1] → [-1, 1].
normalized := float64(q) / float64(levels)
return normalized*2.0 - 1.0
}
// --- Internal: Random orthogonal matrix generation ---
// randomOrthogonalMatrix generates a dim×dim random orthogonal matrix
// using Gram-Schmidt orthogonalization of a random Gaussian matrix.
//
// Properties:
// - Uniformly distributed over the orthogonal group O(d)
// - Deterministic given the PRNG
// - O(d³) construction, done once at initialization
func randomOrthogonalMatrix(dim int, rng *rand.Rand) [][]float64 {
// Generate random Gaussian matrix.
Q := make([][]float64, dim)
for i := range Q {
Q[i] = make([]float64, dim)
for j := range Q[i] {
Q[i][j] = rng.NormFloat64()
}
}
// Modified Gram-Schmidt orthogonalization (numerically stable).
for i := 0; i < dim; i++ {
// Subtract projections onto all previous basis vectors.
for j := 0; j < i; j++ {
dot := vecDot(Q[i], Q[j], dim)
for k := 0; k < dim; k++ {
Q[i][k] -= dot * Q[j][k]
}
}
// Normalize to unit length.
norm := vecNorm(Q[i], dim)
if norm > 0 {
for k := 0; k < dim; k++ {
Q[i][k] /= norm
}
}
}
return Q
}
// vecDot computes dot product of two vectors up to length n.
func vecDot(a, b []float64, n int) float64 {
var sum float64
for i := 0; i < n; i++ {
sum += a[i] * b[i]
}
return sum
}
// vecNorm computes L2 norm of a vector up to length n.
func vecNorm(v []float64, n int) float64 {
var sum float64
for i := 0; i < n; i++ {
sum += v[i] * v[i]
}
return math.Sqrt(sum)
}