initial: Syntrex extraction from sentinel-community (615 files)

This commit is contained in:
DmitrL-dev 2026-03-11 15:12:02 +10:00
commit 2c50c993b1
175 changed files with 32396 additions and 0 deletions

View file

@ -0,0 +1,125 @@
package cache
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"github.com/sentinel-community/gomcp/internal/domain/memory"
bolt "go.etcd.io/bbolt"
)
var l0Bucket = []byte("l0_facts")
// BoltCache implements memory.HotCache using bbolt for L0 fact caching.
type BoltCache struct {
db *bolt.DB
mu sync.RWMutex
}
// NewBoltCache opens or creates a bbolt database for caching.
func NewBoltCache(path string) (*BoltCache, error) {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return nil, fmt.Errorf("create cache directory: %w", err)
}
db, err := bolt.Open(path, 0o600, &bolt.Options{NoSync: false})
if err != nil {
return nil, fmt.Errorf("open bolt cache: %w", err)
}
// Ensure bucket exists.
err = db.Update(func(tx *bolt.Tx) error {
_, err := tx.CreateBucketIfNotExists(l0Bucket)
return err
})
if err != nil {
db.Close()
return nil, fmt.Errorf("create bucket: %w", err)
}
return &BoltCache{db: db}, nil
}
// NewBoltCacheInMemory creates an in-memory bolt cache (for testing).
// bbolt doesn't support true in-memory, so we use a temp file.
func NewBoltCacheFromDB(db *bolt.DB) (*BoltCache, error) {
err := db.Update(func(tx *bolt.Tx) error {
_, err := tx.CreateBucketIfNotExists(l0Bucket)
return err
})
if err != nil {
return nil, fmt.Errorf("create bucket: %w", err)
}
return &BoltCache{db: db}, nil
}
// GetL0Facts returns all cached L0 (project-level) facts.
func (c *BoltCache) GetL0Facts(_ context.Context) ([]*memory.Fact, error) {
c.mu.RLock()
defer c.mu.RUnlock()
var facts []*memory.Fact
err := c.db.View(func(tx *bolt.Tx) error {
b := tx.Bucket(l0Bucket)
if b == nil {
return nil
}
return b.ForEach(func(k, v []byte) error {
var f memory.Fact
if err := json.Unmarshal(v, &f); err != nil {
return fmt.Errorf("unmarshal fact %s: %w", string(k), err)
}
facts = append(facts, &f)
return nil
})
})
return facts, err
}
// InvalidateFact removes a single fact from the cache.
func (c *BoltCache) InvalidateFact(_ context.Context, id string) error {
c.mu.Lock()
defer c.mu.Unlock()
return c.db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket(l0Bucket)
if b == nil {
return nil
}
return b.Delete([]byte(id))
})
}
// WarmUp populates the cache with a batch of L0 facts.
func (c *BoltCache) WarmUp(_ context.Context, facts []*memory.Fact) error {
c.mu.Lock()
defer c.mu.Unlock()
return c.db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket(l0Bucket)
if b == nil {
return fmt.Errorf("l0_facts bucket not found")
}
for _, f := range facts {
data, err := json.Marshal(f)
if err != nil {
return fmt.Errorf("marshal fact %s: %w", f.ID, err)
}
if err := b.Put([]byte(f.ID), data); err != nil {
return fmt.Errorf("put fact %s: %w", f.ID, err)
}
}
return nil
})
}
// Close closes the bbolt database.
func (c *BoltCache) Close() error {
return c.db.Close()
}
// Ensure BoltCache implements memory.HotCache.
var _ memory.HotCache = (*BoltCache)(nil)

View file

@ -0,0 +1,137 @@
package cache
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/sentinel-community/gomcp/internal/domain/memory"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func newTestCache(t *testing.T) *BoltCache {
t.Helper()
dir := t.TempDir()
path := filepath.Join(dir, "test_cache.db")
cache, err := NewBoltCache(path)
require.NoError(t, err)
t.Cleanup(func() { cache.Close() })
return cache
}
func TestBoltCache_WarmUp_GetL0Facts(t *testing.T) {
cache := newTestCache(t)
ctx := context.Background()
facts := []*memory.Fact{
memory.NewFact("Iron Law 1: TDD always", memory.LevelProject, "core", ""),
memory.NewFact("Iron Law 2: No any types", memory.LevelProject, "core", ""),
memory.NewFact("Architecture: Clean Architecture", memory.LevelProject, "arch", ""),
}
err := cache.WarmUp(ctx, facts)
require.NoError(t, err)
got, err := cache.GetL0Facts(ctx)
require.NoError(t, err)
assert.Len(t, got, 3)
}
func TestBoltCache_InvalidateFact(t *testing.T) {
cache := newTestCache(t)
ctx := context.Background()
f := memory.NewFact("to invalidate", memory.LevelProject, "", "")
require.NoError(t, cache.WarmUp(ctx, []*memory.Fact{f}))
got, err := cache.GetL0Facts(ctx)
require.NoError(t, err)
assert.Len(t, got, 1)
require.NoError(t, cache.InvalidateFact(ctx, f.ID))
got, err = cache.GetL0Facts(ctx)
require.NoError(t, err)
assert.Len(t, got, 0)
}
func TestBoltCache_EmptyCache(t *testing.T) {
cache := newTestCache(t)
ctx := context.Background()
got, err := cache.GetL0Facts(ctx)
require.NoError(t, err)
assert.Len(t, got, 0)
}
func TestBoltCache_RoundTrip_Preserves_Fields(t *testing.T) {
cache := newTestCache(t)
ctx := context.Background()
f := memory.NewFact("test content", memory.LevelProject, "domain1", "module1")
f.Confidence = 0.95
f.Source = "consolidation"
f.Embedding = []float64{0.1, 0.2, 0.3}
require.NoError(t, cache.WarmUp(ctx, []*memory.Fact{f}))
got, err := cache.GetL0Facts(ctx)
require.NoError(t, err)
require.Len(t, got, 1)
assert.Equal(t, f.ID, got[0].ID)
assert.Equal(t, f.Content, got[0].Content)
assert.Equal(t, f.Domain, got[0].Domain)
assert.Equal(t, f.Module, got[0].Module)
assert.InDelta(t, f.Confidence, got[0].Confidence, 0.001)
assert.Equal(t, f.Source, got[0].Source)
assert.Len(t, got[0].Embedding, 3)
}
func TestBoltCache_Close_Reopen(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "persist.db")
ctx := context.Background()
// Create and populate cache.
cache, err := NewBoltCache(path)
require.NoError(t, err)
f := memory.NewFact("persistent fact", memory.LevelProject, "", "")
require.NoError(t, cache.WarmUp(ctx, []*memory.Fact{f}))
require.NoError(t, cache.Close())
// Reopen and verify data persists.
cache2, err := NewBoltCache(path)
require.NoError(t, err)
defer cache2.Close()
got, err := cache2.GetL0Facts(ctx)
require.NoError(t, err)
require.Len(t, got, 1)
assert.Equal(t, "persistent fact", got[0].Content)
}
func TestBoltCache_InvalidateFact_NonExistent(t *testing.T) {
cache := newTestCache(t)
ctx := context.Background()
// Should not error on non-existent ID.
err := cache.InvalidateFact(ctx, "nonexistent-id")
assert.NoError(t, err)
}
func TestBoltCache_FileCreated(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "subdir", "cache.db")
cache, err := NewBoltCache(path)
require.NoError(t, err)
defer cache.Close()
_, err = os.Stat(path)
assert.NoError(t, err)
}

View file

@ -0,0 +1,101 @@
package cache
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"github.com/sentinel-community/gomcp/internal/domain/vectorstore"
bolt "go.etcd.io/bbolt"
)
var embeddingBucket = []byte("embedding_cache")
// CachedEmbedder wraps an Embedder and caches results in BoltDB (v3.4).
// Avoids recomputing embeddings for the same text, especially useful
// for ONNX mode where inference is expensive.
type CachedEmbedder struct {
inner vectorstore.Embedder
db *bolt.DB
hits int
miss int
}
// NewCachedEmbedder creates a caching wrapper around any Embedder.
func NewCachedEmbedder(inner vectorstore.Embedder, db *bolt.DB) (*CachedEmbedder, error) {
err := db.Update(func(tx *bolt.Tx) error {
_, err := tx.CreateBucketIfNotExists(embeddingBucket)
return err
})
if err != nil {
return nil, fmt.Errorf("create embedding_cache bucket: %w", err)
}
return &CachedEmbedder{inner: inner, db: db}, nil
}
// Embed returns cached embedding or computes and caches a new one.
func (c *CachedEmbedder) Embed(ctx context.Context, text string) ([]float64, error) {
key := hashText(text)
// Try cache first.
var cached []float64
err := c.db.View(func(tx *bolt.Tx) error {
b := tx.Bucket(embeddingBucket)
if b == nil {
return nil
}
data := b.Get([]byte(key))
if data != nil {
return json.Unmarshal(data, &cached)
}
return nil
})
if err == nil && cached != nil {
c.hits++
return cached, nil
}
// Cache miss — compute.
embedding, err := c.inner.Embed(ctx, text)
if err != nil {
return nil, err
}
c.miss++
// Store in cache (fire-and-forget, don't fail on cache write error).
_ = c.db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket(embeddingBucket)
if b == nil {
return nil
}
data, err := json.Marshal(embedding)
if err != nil {
return err
}
return b.Put([]byte(key), data)
})
return embedding, nil
}
// Dimension delegates to inner embedder.
func (c *CachedEmbedder) Dimension() int { return c.inner.Dimension() }
// Name returns the inner embedder name with cache prefix.
func (c *CachedEmbedder) Name() string { return "cached:" + c.inner.Name() }
// Mode delegates to inner embedder.
func (c *CachedEmbedder) Mode() vectorstore.OracleMode { return c.inner.Mode() }
// Stats returns cache hit/miss statistics.
func (c *CachedEmbedder) Stats() (hits, misses int) { return c.hits, c.miss }
// Ensure CachedEmbedder implements Embedder.
var _ vectorstore.Embedder = (*CachedEmbedder)(nil)
func hashText(text string) string {
h := sha256.Sum256([]byte(text))
return hex.EncodeToString(h[:16]) // 128-bit key
}

View file

@ -0,0 +1,81 @@
package cache
import (
"context"
"os"
"testing"
"github.com/sentinel-community/gomcp/internal/domain/vectorstore"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
bolt "go.etcd.io/bbolt"
)
// mockEmbedder is a simple test embedder.
type mockEmbedder struct {
calls int
}
func (m *mockEmbedder) Embed(_ context.Context, text string) ([]float64, error) {
m.calls++
// Simple deterministic embedding: sum of bytes.
vec := make([]float64, 4)
for i, b := range []byte(text) {
vec[i%4] += float64(b)
}
return vec, nil
}
func (m *mockEmbedder) Dimension() int { return 4 }
func (m *mockEmbedder) Name() string { return "mock" }
func (m *mockEmbedder) Mode() vectorstore.OracleMode { return vectorstore.OracleModeFull }
func TestCachedEmbedder_HitMiss(t *testing.T) {
tmp, err := os.CreateTemp("", "embed_cache_*.db")
require.NoError(t, err)
tmp.Close()
defer os.Remove(tmp.Name())
db, err := bolt.Open(tmp.Name(), 0o600, nil)
require.NoError(t, err)
defer db.Close()
inner := &mockEmbedder{}
cached, err := NewCachedEmbedder(inner, db)
require.NoError(t, err)
ctx := context.Background()
// First call — cache miss.
v1, err := cached.Embed(ctx, "hello world")
require.NoError(t, err)
assert.Equal(t, 1, inner.calls)
// Second call — cache hit.
v2, err := cached.Embed(ctx, "hello world")
require.NoError(t, err)
assert.Equal(t, 1, inner.calls) // No extra call.
assert.Equal(t, v1, v2)
// Different text — cache miss.
_, err = cached.Embed(ctx, "different text")
require.NoError(t, err)
assert.Equal(t, 2, inner.calls)
hits, misses := cached.Stats()
assert.Equal(t, 1, hits)
assert.Equal(t, 2, misses)
}
func TestCachedEmbedder_Name(t *testing.T) {
inner := &mockEmbedder{}
db, err := bolt.Open(t.TempDir()+"/test.db", 0o600, nil)
require.NoError(t, err)
defer db.Close()
cached, err := NewCachedEmbedder(inner, db)
require.NoError(t, err)
assert.Equal(t, "cached:mock", cached.Name())
assert.Equal(t, 4, cached.Dimension())
}