mirror of
https://github.com/syntrex-lab/gomcp.git
synced 2026-04-27 05:16:22 +02:00
- QJL (1-bit) approximate filter for 2.3x fast search - PolarQuant (4-bit/8-bit) compressed storage with PQDropFloat64 memory reclamation (15x heap reduction) - Two-Phase SearchQJL with fallback to CompressedSimilarity - Edge Deployment prototype (pq_attention.cu) for LLaMA 1.5M token context
355 lines
9.6 KiB
Go
355 lines
9.6 KiB
Go
// Package vectorstore — PolarQuant multi-bit vector compression.
|
||
//
|
||
// Based on Google's TurboQuant research (ICLR 2026, §3.2).
|
||
// Exploits the key insight: after random orthogonal rotation, vector
|
||
// coordinates become approximately uniformly distributed regardless of
|
||
// the original data distribution. This makes uniform scalar quantization
|
||
// near-optimal without any calibration data.
|
||
//
|
||
// Pipeline:
|
||
// 1. Random orthogonal rotation R (data-oblivious, seeded)
|
||
// 2. y = R · x (rotate)
|
||
// 3. Uniform quantization: each y_i ∈ [-1, 1] → [0, 2^b - 1]
|
||
// 4. Compact byte packing (2 values per byte at 4-bit)
|
||
//
|
||
// Combined with QJL (1-bit approximate search):
|
||
// - QJL signatures: fast approximate filtering (Phase 1)
|
||
// - PolarQuant codes: compressed exact reranking (Phase 2)
|
||
// - Together: TurboQuant = PolarQuant(main bits) + QJL(1-bit residual)
|
||
//
|
||
// Memory at 4-bit, 128-dim: 64 bytes + 4 bytes radius = 68 bytes
|
||
// vs float64 original: 1024 bytes → 15x compression
|
||
package vectorstore
|
||
|
||
import (
|
||
"math"
|
||
"math/rand"
|
||
)
|
||
|
||
// CompressedVector holds a PolarQuant-compressed representation of a vector.
|
||
type CompressedVector struct {
|
||
Data []byte // Packed quantized values (2 per byte at 4-bit)
|
||
Radius float32 // Original L2 norm for denormalization
|
||
}
|
||
|
||
// PolarQuantCodec encodes/decodes vectors using rotation + uniform quantization.
|
||
// Thread-safe after construction (read-only rotation matrix).
|
||
type PolarQuantCodec struct {
|
||
dim int // Vector dimensionality
|
||
bitsPerDim int // Quantization bits per dimension (1-8)
|
||
levels int // 2^bitsPerDim - 1 (quantization levels)
|
||
rotation [][]float64 // dim × dim orthogonal rotation matrix
|
||
}
|
||
|
||
// NewPolarQuantCodec creates a PolarQuant codec with random orthogonal rotation.
|
||
//
|
||
// Parameters:
|
||
// - dim: expected vector dimensionality (must match embedder output)
|
||
// - bitsPerDim: quantization bits per dimension (1-8). Default: 4.
|
||
// 4-bit → 16x compression, 8-bit → 8x compression.
|
||
// - seed: PRNG seed for reproducible rotation. Same seed → same codec.
|
||
func NewPolarQuantCodec(dim, bitsPerDim int, seed int64) *PolarQuantCodec {
|
||
if bitsPerDim < 1 {
|
||
bitsPerDim = 1
|
||
}
|
||
if bitsPerDim > 8 {
|
||
bitsPerDim = 8
|
||
}
|
||
|
||
rng := rand.New(rand.NewSource(seed))
|
||
rotation := randomOrthogonalMatrix(dim, rng)
|
||
|
||
return &PolarQuantCodec{
|
||
dim: dim,
|
||
bitsPerDim: bitsPerDim,
|
||
levels: (1 << bitsPerDim) - 1,
|
||
rotation: rotation,
|
||
}
|
||
}
|
||
|
||
// Encode compresses a float64 vector to a compact PolarQuant representation.
|
||
//
|
||
// Steps:
|
||
// 1. Compute and store L2 norm (radius)
|
||
// 2. L2-normalize the vector
|
||
// 3. Rotate through random orthogonal matrix
|
||
// 4. Uniform quantization of each coordinate
|
||
// 5. Pack into bytes
|
||
func (c *PolarQuantCodec) Encode(vector []float64) CompressedVector {
|
||
dim := c.dim
|
||
if len(vector) < dim {
|
||
dim = len(vector)
|
||
}
|
||
|
||
// Step 1: Compute radius (L2 norm).
|
||
var radius float64
|
||
for i := 0; i < dim; i++ {
|
||
radius += vector[i] * vector[i]
|
||
}
|
||
radius = math.Sqrt(radius)
|
||
|
||
// Step 2: Normalize.
|
||
normalized := make([]float64, c.dim)
|
||
if radius > 0 {
|
||
for i := 0; i < dim; i++ {
|
||
normalized[i] = vector[i] / radius
|
||
}
|
||
}
|
||
|
||
// Step 3: Rotate — y = R · x.
|
||
rotated := make([]float64, c.dim)
|
||
for i := 0; i < c.dim; i++ {
|
||
var dot float64
|
||
row := c.rotation[i]
|
||
for j := 0; j < c.dim; j++ {
|
||
dot += row[j] * normalized[j]
|
||
}
|
||
rotated[i] = dot
|
||
}
|
||
|
||
// Step 4-5: Quantize and pack.
|
||
data := c.packQuantized(rotated)
|
||
|
||
return CompressedVector{
|
||
Data: data,
|
||
Radius: float32(radius),
|
||
}
|
||
}
|
||
|
||
// Decode reconstructs a float64 vector from its compressed representation.
|
||
//
|
||
// Steps:
|
||
// 1. Unpack quantized values
|
||
// 2. Dequantize to [-1, 1] midpoints
|
||
// 3. Inverse rotation: x = R^T · y
|
||
// 4. Denormalize by radius
|
||
func (c *PolarQuantCodec) Decode(cv CompressedVector) []float64 {
|
||
// Step 1-2: Unpack and dequantize.
|
||
rotated := c.unpackDequantized(cv.Data)
|
||
|
||
// Step 3: Inverse rotation — x = R^T · y (transpose of orthogonal = inverse).
|
||
normalized := make([]float64, c.dim)
|
||
for i := 0; i < c.dim; i++ {
|
||
var dot float64
|
||
for j := 0; j < c.dim; j++ {
|
||
dot += c.rotation[j][i] * rotated[j] // R^T[i][j] = R[j][i]
|
||
}
|
||
normalized[i] = dot
|
||
}
|
||
|
||
// Step 4: Denormalize.
|
||
radius := float64(cv.Radius)
|
||
result := make([]float64, c.dim)
|
||
for i := range normalized {
|
||
result[i] = normalized[i] * radius
|
||
}
|
||
|
||
return result
|
||
}
|
||
|
||
// CompressedSimilarity computes approximate cosine similarity between two
|
||
// compressed vectors WITHOUT full decompression. Decompresses to the rotated
|
||
// domain and computes dot product there (rotation preserves inner products).
|
||
func (c *PolarQuantCodec) CompressedSimilarity(a, b CompressedVector) float64 {
|
||
ra := c.unpackDequantized(a.Data)
|
||
rb := c.unpackDequantized(b.Data)
|
||
|
||
// Cosine similarity in rotated space = cosine similarity in original space
|
||
// (orthogonal rotation preserves inner products).
|
||
var dot, normA, normB float64
|
||
for i := 0; i < c.dim; i++ {
|
||
dot += ra[i] * rb[i]
|
||
normA += ra[i] * ra[i]
|
||
normB += rb[i] * rb[i]
|
||
}
|
||
|
||
denom := math.Sqrt(normA) * math.Sqrt(normB)
|
||
if denom == 0 {
|
||
return 0
|
||
}
|
||
return dot / denom
|
||
}
|
||
|
||
// Dim returns the expected vector dimensionality.
|
||
func (c *PolarQuantCodec) Dim() int {
|
||
return c.dim
|
||
}
|
||
|
||
// BitsPerDim returns the quantization precision.
|
||
func (c *PolarQuantCodec) BitsPerDim() int {
|
||
return c.bitsPerDim
|
||
}
|
||
|
||
// CompressedBytes returns bytes per compressed vector (excluding radius).
|
||
func (c *PolarQuantCodec) CompressedBytes() int {
|
||
return (c.dim*c.bitsPerDim + 7) / 8
|
||
}
|
||
|
||
// CompressionRatio returns the ratio of original to compressed size.
|
||
func (c *PolarQuantCodec) CompressionRatio() float64 {
|
||
origBytes := c.dim * 8 // float64
|
||
compBytes := c.CompressedBytes() + 4 // + float32 radius
|
||
return float64(origBytes) / float64(compBytes)
|
||
}
|
||
|
||
// --- Internal: Quantization and packing ---
|
||
|
||
// packQuantized quantizes and packs rotated coordinates into bytes.
|
||
// For 4-bit: 2 values packed per byte (high nibble, low nibble).
|
||
// For 8-bit: 1 value per byte.
|
||
// For other bit widths: generic bit packing.
|
||
func (c *PolarQuantCodec) packQuantized(rotated []float64) []byte {
|
||
numBytes := (c.dim*c.bitsPerDim + 7) / 8
|
||
data := make([]byte, numBytes)
|
||
|
||
if c.bitsPerDim == 4 {
|
||
// Fast path: 4-bit packing (2 per byte).
|
||
for i := 0; i < c.dim; i++ {
|
||
q := quantizeUniform(rotated[i], c.levels)
|
||
byteIdx := i / 2
|
||
if i%2 == 0 {
|
||
data[byteIdx] |= q << 4 // High nibble
|
||
} else {
|
||
data[byteIdx] |= q // Low nibble
|
||
}
|
||
}
|
||
} else if c.bitsPerDim == 8 {
|
||
// Fast path: 8-bit packing (1 per byte).
|
||
for i := 0; i < c.dim; i++ {
|
||
data[i] = quantizeUniform(rotated[i], c.levels)
|
||
}
|
||
} else {
|
||
// Generic bit packing.
|
||
bitPos := 0
|
||
for i := 0; i < c.dim; i++ {
|
||
q := quantizeUniform(rotated[i], c.levels)
|
||
for b := c.bitsPerDim - 1; b >= 0; b-- {
|
||
if q&(1<<uint(b)) != 0 {
|
||
data[bitPos/8] |= 1 << uint(7-bitPos%8)
|
||
}
|
||
bitPos++
|
||
}
|
||
}
|
||
}
|
||
|
||
return data
|
||
}
|
||
|
||
// unpackDequantized unpacks and dequantizes bytes back to float64 coordinates.
|
||
func (c *PolarQuantCodec) unpackDequantized(data []byte) []float64 {
|
||
rotated := make([]float64, c.dim)
|
||
|
||
if c.bitsPerDim == 4 {
|
||
// Fast path: 4-bit.
|
||
for i := 0; i < c.dim; i++ {
|
||
byteIdx := i / 2
|
||
var q uint8
|
||
if i%2 == 0 {
|
||
q = data[byteIdx] >> 4
|
||
} else {
|
||
q = data[byteIdx] & 0x0F
|
||
}
|
||
rotated[i] = dequantizeUniform(q, c.levels)
|
||
}
|
||
} else if c.bitsPerDim == 8 {
|
||
// Fast path: 8-bit.
|
||
for i := 0; i < c.dim; i++ {
|
||
rotated[i] = dequantizeUniform(data[i], c.levels)
|
||
}
|
||
} else {
|
||
// Generic bit unpacking.
|
||
bitPos := 0
|
||
for i := 0; i < c.dim; i++ {
|
||
var q uint8
|
||
for b := c.bitsPerDim - 1; b >= 0; b-- {
|
||
if data[bitPos/8]&(1<<uint(7-bitPos%8)) != 0 {
|
||
q |= 1 << uint(b)
|
||
}
|
||
bitPos++
|
||
}
|
||
rotated[i] = dequantizeUniform(q, c.levels)
|
||
}
|
||
}
|
||
|
||
return rotated
|
||
}
|
||
|
||
// quantizeUniform maps a value in [-1, 1] to [0, levels] as uint8.
|
||
func quantizeUniform(val float64, levels int) uint8 {
|
||
// Clamp to [-1, 1].
|
||
if val < -1 {
|
||
val = -1
|
||
}
|
||
if val > 1 {
|
||
val = 1
|
||
}
|
||
// Map [-1, 1] → [0, 1] → [0, levels].
|
||
normalized := (val + 1.0) / 2.0
|
||
return uint8(math.Round(normalized * float64(levels)))
|
||
}
|
||
|
||
// dequantizeUniform maps a quantized uint8 in [0, levels] back to [-1, 1].
|
||
func dequantizeUniform(q uint8, levels int) float64 {
|
||
// Map [0, levels] → [0, 1] → [-1, 1].
|
||
normalized := float64(q) / float64(levels)
|
||
return normalized*2.0 - 1.0
|
||
}
|
||
|
||
// --- Internal: Random orthogonal matrix generation ---
|
||
|
||
// randomOrthogonalMatrix generates a dim×dim random orthogonal matrix
|
||
// using Gram-Schmidt orthogonalization of a random Gaussian matrix.
|
||
//
|
||
// Properties:
|
||
// - Uniformly distributed over the orthogonal group O(d)
|
||
// - Deterministic given the PRNG
|
||
// - O(d³) construction, done once at initialization
|
||
func randomOrthogonalMatrix(dim int, rng *rand.Rand) [][]float64 {
|
||
// Generate random Gaussian matrix.
|
||
Q := make([][]float64, dim)
|
||
for i := range Q {
|
||
Q[i] = make([]float64, dim)
|
||
for j := range Q[i] {
|
||
Q[i][j] = rng.NormFloat64()
|
||
}
|
||
}
|
||
|
||
// Modified Gram-Schmidt orthogonalization (numerically stable).
|
||
for i := 0; i < dim; i++ {
|
||
// Subtract projections onto all previous basis vectors.
|
||
for j := 0; j < i; j++ {
|
||
dot := vecDot(Q[i], Q[j], dim)
|
||
for k := 0; k < dim; k++ {
|
||
Q[i][k] -= dot * Q[j][k]
|
||
}
|
||
}
|
||
|
||
// Normalize to unit length.
|
||
norm := vecNorm(Q[i], dim)
|
||
if norm > 0 {
|
||
for k := 0; k < dim; k++ {
|
||
Q[i][k] /= norm
|
||
}
|
||
}
|
||
}
|
||
|
||
return Q
|
||
}
|
||
|
||
// vecDot computes dot product of two vectors up to length n.
|
||
func vecDot(a, b []float64, n int) float64 {
|
||
var sum float64
|
||
for i := 0; i < n; i++ {
|
||
sum += a[i] * b[i]
|
||
}
|
||
return sum
|
||
}
|
||
|
||
// vecNorm computes L2 norm of a vector up to length n.
|
||
func vecNorm(v []float64, n int) float64 {
|
||
var sum float64
|
||
for i := 0; i < n; i++ {
|
||
sum += v[i] * v[i]
|
||
}
|
||
return math.Sqrt(sum)
|
||
}
|