gomcp/internal/domain/context/context.go

364 lines
11 KiB
Go

// Copyright 2026 Syntrex Lab. All rights reserved.
// Use of this source code is governed by an Apache-2.0 license
// that can be found in the LICENSE file.
// Package context defines domain entities for the Proactive Context Engine.
// The engine automatically injects relevant memory facts into every tool response,
// ensuring the LLM always has context without explicitly requesting it.
package context
import (
"errors"
"fmt"
"strings"
"time"
"unicode"
"github.com/syntrex-lab/gomcp/internal/domain/memory"
)
// Default configuration values.
const (
DefaultTokenBudget = 300 // tokens reserved for context injection
DefaultMaxFacts = 10 // max facts per context frame
DefaultRecencyWeight = 0.25 // weight for time-based recency scoring
DefaultFrequencyWeight = 0.15 // weight for access frequency scoring
DefaultLevelWeight = 0.30 // weight for hierarchy level scoring (L0 > L3)
DefaultKeywordWeight = 0.30 // weight for keyword match scoring
DefaultDecayHalfLife = 72.0 // hours until unused fact score halves
)
// FactProvider abstracts fact retrieval for the context engine.
// This decouples the engine from specific storage implementations.
type FactProvider interface {
// GetRelevantFacts returns facts potentially relevant to the given tool arguments.
GetRelevantFacts(args map[string]interface{}) ([]*memory.Fact, error)
// GetL0Facts returns all L0 (project-level) facts — always included in context.
GetL0Facts() ([]*memory.Fact, error)
// RecordAccess increments the access counter for a fact.
RecordAccess(factID string)
}
// ScoredFact pairs a Fact with its computed relevance score and access metadata.
type ScoredFact struct {
Fact *memory.Fact `json:"fact"`
Score float64 `json:"score"`
AccessCount int `json:"access_count"`
LastAccessed time.Time `json:"last_accessed,omitempty"`
}
// NewScoredFact creates a ScoredFact with the given score.
func NewScoredFact(fact *memory.Fact, score float64) *ScoredFact {
return &ScoredFact{
Fact: fact,
Score: score,
}
}
// EstimateTokens returns an approximate token count for this fact's content.
// Uses the ~4 chars per token heuristic plus overhead for formatting.
func (sf *ScoredFact) EstimateTokens() int {
return EstimateTokenCount(sf.Fact.Content)
}
// RecordAccess increments the access counter and updates the timestamp.
func (sf *ScoredFact) RecordAccess() {
sf.AccessCount++
sf.LastAccessed = time.Now()
}
// ContextFrame holds the selected facts for injection into a single tool response.
type ContextFrame struct {
ToolName string `json:"tool_name"`
TokenBudget int `json:"token_budget"`
Facts []*ScoredFact `json:"facts"`
TokensUsed int `json:"tokens_used"`
CreatedAt time.Time `json:"created_at"`
}
// NewContextFrame creates an empty context frame for the given tool.
func NewContextFrame(toolName string, tokenBudget int) *ContextFrame {
return &ContextFrame{
ToolName: toolName,
TokenBudget: tokenBudget,
Facts: make([]*ScoredFact, 0),
TokensUsed: 0,
CreatedAt: time.Now(),
}
}
// AddFact attempts to add a scored fact to the frame within the token budget.
// Returns true if the fact was added, false if it would exceed the budget.
func (cf *ContextFrame) AddFact(sf *ScoredFact) bool {
tokens := sf.EstimateTokens()
if cf.TokensUsed+tokens > cf.TokenBudget {
return false
}
cf.Facts = append(cf.Facts, sf)
cf.TokensUsed += tokens
return true
}
// RemainingTokens returns how many tokens are left in the budget.
func (cf *ContextFrame) RemainingTokens() int {
remaining := cf.TokenBudget - cf.TokensUsed
if remaining < 0 {
return 0
}
return remaining
}
// Format renders the context frame as a text block for injection into tool results.
// Returns empty string if no facts are present.
func (cf *ContextFrame) Format() string {
if len(cf.Facts) == 0 {
return ""
}
var b strings.Builder
b.WriteString("\n\n---\n[MEMORY CONTEXT]\n")
for i, sf := range cf.Facts {
level := sf.Fact.Level.String()
domain := sf.Fact.Domain
if domain == "" {
domain = "general"
}
b.WriteString(fmt.Sprintf("• [L%d/%s] %s", int(sf.Fact.Level), level, sf.Fact.Content))
if domain != "general" {
b.WriteString(fmt.Sprintf(" (domain: %s)", domain))
}
if i < len(cf.Facts)-1 {
b.WriteString("\n")
}
}
b.WriteString("\n[/MEMORY CONTEXT]")
return b.String()
}
// TokenBudget tracks token consumption for context injection.
type TokenBudget struct {
MaxTokens int `json:"max_tokens"`
used int
}
// NewTokenBudget creates a token budget with the given maximum.
// If max is <= 0, uses DefaultTokenBudget.
func NewTokenBudget(max int) *TokenBudget {
if max <= 0 {
max = DefaultTokenBudget
}
return &TokenBudget{
MaxTokens: max,
used: 0,
}
}
// TryConsume attempts to consume n tokens. Returns true if successful.
func (tb *TokenBudget) TryConsume(n int) bool {
if tb.used+n > tb.MaxTokens {
return false
}
tb.used += n
return true
}
// Remaining returns the number of tokens left.
func (tb *TokenBudget) Remaining() int {
r := tb.MaxTokens - tb.used
if r < 0 {
return 0
}
return r
}
// Reset resets the budget to full capacity.
func (tb *TokenBudget) Reset() {
tb.used = 0
}
// EngineConfig holds configuration for the Proactive Context Engine.
type EngineConfig struct {
TokenBudget int `json:"token_budget"`
MaxFacts int `json:"max_facts"`
RecencyWeight float64 `json:"recency_weight"`
FrequencyWeight float64 `json:"frequency_weight"`
LevelWeight float64 `json:"level_weight"`
KeywordWeight float64 `json:"keyword_weight"`
DecayHalfLifeHours float64 `json:"decay_half_life_hours"`
Enabled bool `json:"enabled"`
SkipTools []string `json:"skip_tools,omitempty"`
// Computed at init for O(1) lookup.
skipSet map[string]bool
}
// DefaultEngineConfig returns sensible defaults for the context engine.
func DefaultEngineConfig() EngineConfig {
cfg := EngineConfig{
TokenBudget: DefaultTokenBudget,
MaxFacts: DefaultMaxFacts,
RecencyWeight: DefaultRecencyWeight,
FrequencyWeight: DefaultFrequencyWeight,
LevelWeight: DefaultLevelWeight,
KeywordWeight: DefaultKeywordWeight,
DecayHalfLifeHours: DefaultDecayHalfLife,
Enabled: true,
SkipTools: DefaultSkipTools(),
}
cfg.BuildSkipSet()
return cfg
}
// DefaultSkipTools returns the default list of tools excluded from context injection.
// These are tools that already return facts directly or system tools where context is noise.
func DefaultSkipTools() []string {
return []string{
"search_facts", "get_fact", "list_facts", "get_l0_facts",
"get_stale_facts", "fact_stats", "list_domains", "process_expired",
"semantic_search",
"health", "version", "dashboard",
}
}
// BuildSkipSet builds the O(1) lookup set from SkipTools slice.
// Must be called after deserialization or manual SkipTools changes.
func (c *EngineConfig) BuildSkipSet() {
c.skipSet = make(map[string]bool, len(c.SkipTools))
for _, t := range c.SkipTools {
c.skipSet[t] = true
}
}
// ShouldSkip returns true if the given tool name is in the skip list.
func (c *EngineConfig) ShouldSkip(toolName string) bool {
if c.skipSet == nil {
c.BuildSkipSet()
}
return c.skipSet[toolName]
}
// Validate checks the configuration for errors.
func (c *EngineConfig) Validate() error {
if c.TokenBudget <= 0 {
return errors.New("token_budget must be positive")
}
if c.MaxFacts <= 0 {
return errors.New("max_facts must be positive")
}
if c.RecencyWeight < 0 || c.FrequencyWeight < 0 || c.LevelWeight < 0 || c.KeywordWeight < 0 {
return errors.New("weights must be non-negative")
}
totalWeight := c.RecencyWeight + c.FrequencyWeight + c.LevelWeight + c.KeywordWeight
if totalWeight == 0 {
return errors.New("at least one weight must be positive")
}
return nil
}
// --- Stop words for keyword extraction ---
var stopWords = map[string]bool{
"a": true, "an": true, "the": true, "is": true, "are": true, "was": true,
"were": true, "be": true, "been": true, "being": true, "have": true,
"has": true, "had": true, "do": true, "does": true, "did": true,
"will": true, "would": true, "could": true, "should": true, "may": true,
"might": true, "shall": true, "can": true, "to": true, "of": true,
"in": true, "for": true, "on": true, "with": true, "at": true,
"by": true, "from": true, "as": true, "into": true, "through": true,
"during": true, "before": true, "after": true, "above": true,
"below": true, "between": true, "and": true, "but": true, "or": true,
"nor": true, "not": true, "so": true, "yet": true, "both": true,
"either": true, "neither": true, "each": true, "every": true,
"all": true, "any": true, "few": true, "more": true, "most": true,
"other": true, "some": true, "such": true, "no": true, "only": true,
"same": true, "than": true, "too": true, "very": true, "just": true,
"about": true, "up": true, "out": true, "if": true, "then": true,
"that": true, "this": true, "these": true, "those": true, "it": true,
"its": true, "i": true, "me": true, "my": true, "we": true, "our": true,
"you": true, "your": true, "he": true, "him": true, "his": true,
"she": true, "her": true, "they": true, "them": true, "their": true,
"what": true, "which": true, "who": true, "whom": true, "when": true,
"where": true, "why": true, "how": true, "there": true, "here": true,
}
// ExtractKeywords extracts meaningful keywords from text, filtering stop words
// and short tokens. Splits camelCase and snake_case identifiers.
// Returns deduplicated lowercase keywords.
func ExtractKeywords(text string) []string {
if text == "" {
return nil
}
// First split camelCase before lowercasing
expanded := splitCamelCase(text)
// Tokenize: split on non-alphanumeric boundaries (underscore is a separator now)
words := strings.FieldsFunc(strings.ToLower(expanded), func(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsDigit(r)
})
seen := make(map[string]bool)
var keywords []string
for _, w := range words {
if len(w) < 3 {
continue // skip very short tokens
}
if stopWords[w] {
continue
}
if seen[w] {
continue
}
seen[w] = true
keywords = append(keywords, w)
}
return keywords
}
// splitCamelCase inserts spaces at camelCase boundaries.
// "handleRequest" → "handle Request", "getHTTPClient" → "get HTTP Client"
// Also replaces underscores with spaces for snake_case splitting.
func splitCamelCase(s string) string {
var b strings.Builder
runes := []rune(s)
for i, r := range runes {
if r == '_' {
b.WriteRune(' ')
continue
}
if i > 0 && unicode.IsUpper(r) {
prev := runes[i-1]
// Insert space before uppercase if previous was lowercase
// or if previous was uppercase and next is lowercase (e.g., "HTTPClient" → "HTTP Client")
if unicode.IsLower(prev) {
b.WriteRune(' ')
} else if unicode.IsUpper(prev) && i+1 < len(runes) && unicode.IsLower(runes[i+1]) {
b.WriteRune(' ')
}
}
b.WriteRune(r)
}
return b.String()
}
// timeSinceHours returns the number of hours elapsed since the given time.
func timeSinceHours(t time.Time) float64 {
return time.Since(t).Hours()
}
// EstimateTokenCount returns an approximate token count for the given text.
// Uses the heuristic of ~4 characters per token, with a minimum of 1.
func EstimateTokenCount(text string) int {
if len(text) == 0 {
return 1 // minimum overhead
}
tokens := len(text)/4 + 1 // +1 for rounding and formatting overhead
return tokens
}