// Package intent provides the Intent Distiller — recursive compression // of user input into a pure intent vector (DIP H0.2). // // The distillation process: // 1. Embed raw text → surface vector // 2. Extract key phrases (top-N by TF weight) // 3. Re-embed compressed text → deep vector // 4. Compute cosine similarity(surface, deep) // 5. If similarity > threshold → converged (intent = deep vector) // 6. If similarity < threshold → iterate with further compression // 7. Final sincerity check: high divergence between surface and deep = manipulation package intent import ( "context" "fmt" "math" "strings" "time" ) // EmbeddingFunc abstracts the embedding computation (bridges to Python NLP). type EmbeddingFunc func(ctx context.Context, text string) ([]float64, error) // DistillConfig configures the distillation pipeline. type DistillConfig struct { MaxIterations int // Maximum distillation iterations (default: 5) ConvergenceThreshold float64 // Cosine similarity threshold for convergence (default: 0.92) SincerityThreshold float64 // Max surface-deep divergence before flagging manipulation (default: 0.35) MinTextLength int // Minimum text length to attempt distillation (default: 10) } // DefaultConfig returns sensible defaults. func DefaultConfig() DistillConfig { return DistillConfig{ MaxIterations: 5, ConvergenceThreshold: 0.92, SincerityThreshold: 0.35, MinTextLength: 10, } } // DistillResult holds the output of intent distillation. type DistillResult struct { // Core outputs IntentVector []float64 `json:"intent_vector"` // Pure intent embedding SurfaceVector []float64 `json:"surface_vector"` // Raw text embedding CompressedText string `json:"compressed_text"` // Final compressed form // Metrics Iterations int `json:"iterations"` // Distillation iterations used Convergence float64 `json:"convergence"` // Final cosine similarity SincerityScore float64 `json:"sincerity_score"` // 1.0 = sincere, 0.0 = manipulative IsSincere bool `json:"is_sincere"` // Passed sincerity check IsManipulation bool `json:"is_manipulation"` // Failed sincerity check // Timing DurationMs int64 `json:"duration_ms"` } // Distiller performs recursive intent extraction. type Distiller struct { cfg DistillConfig embed EmbeddingFunc } // NewDistiller creates a new Intent Distiller. func NewDistiller(embedFn EmbeddingFunc, cfg *DistillConfig) *Distiller { c := DefaultConfig() if cfg != nil { if cfg.MaxIterations > 0 { c.MaxIterations = cfg.MaxIterations } if cfg.ConvergenceThreshold > 0 { c.ConvergenceThreshold = cfg.ConvergenceThreshold } if cfg.SincerityThreshold > 0 { c.SincerityThreshold = cfg.SincerityThreshold } if cfg.MinTextLength > 0 { c.MinTextLength = cfg.MinTextLength } } return &Distiller{cfg: c, embed: embedFn} } // Distill performs recursive intent distillation on the input text. // // The process iteratively compresses the text and compares embeddings // until convergence (the meaning stabilizes) or max iterations. // A sincerity check compares the original surface embedding against // the final deep embedding — high divergence signals manipulation. func (d *Distiller) Distill(ctx context.Context, text string) (*DistillResult, error) { start := time.Now() if len(strings.TrimSpace(text)) < d.cfg.MinTextLength { return nil, fmt.Errorf("text too short for distillation (min %d chars)", d.cfg.MinTextLength) } // Step 1: Surface embedding (raw text as-is). surfaceVec, err := d.embed(ctx, text) if err != nil { return nil, fmt.Errorf("surface embedding: %w", err) } // Step 2: Iterative compression loop. currentText := text var prevVec []float64 currentVec := surfaceVec iterations := 0 convergence := 0.0 for i := 0; i < d.cfg.MaxIterations; i++ { iterations = i + 1 // Compress text: extract core phrases. compressed := compressText(currentText) if compressed == currentText || len(compressed) < d.cfg.MinTextLength { break // Cannot compress further } // Re-embed compressed text. prevVec = currentVec currentVec, err = d.embed(ctx, compressed) if err != nil { return nil, fmt.Errorf("iteration %d embedding: %w", i, err) } // Check convergence. convergence = cosineSimilarity(prevVec, currentVec) if convergence >= d.cfg.ConvergenceThreshold { currentText = compressed break // Intent has stabilized } currentText = compressed } // Step 3: Sincerity check. surfaceDeepSim := cosineSimilarity(surfaceVec, currentVec) divergence := 1.0 - surfaceDeepSim isSincere := divergence <= d.cfg.SincerityThreshold result := &DistillResult{ IntentVector: currentVec, SurfaceVector: surfaceVec, CompressedText: currentText, Iterations: iterations, Convergence: convergence, SincerityScore: surfaceDeepSim, IsSincere: isSincere, IsManipulation: !isSincere, DurationMs: time.Since(start).Milliseconds(), } return result, nil } // compressText extracts the semantic core of text by removing // filler words, decorations, and social engineering wrappers. func compressText(text string) string { words := strings.Fields(text) if len(words) <= 3 { return text } // Remove common filler/manipulation patterns fillers := map[string]bool{ "please": true, "пожалуйста": true, "kindly": true, "just": true, "simply": true, "только": true, "imagine": true, "представь": true, "pretend": true, "suppose": true, "допустим": true, "assuming": true, "hypothetically": true, "гипотетически": true, "for": true, "для": true, "as": true, "как": true, "the": true, "a": true, "an": true, "и": true, "is": true, "are": true, "was": true, "were": true, "that": true, "this": true, "these": true, "those": true, "будь": true, "будьте": true, "можешь": true, "could": true, "would": true, "should": true, "actually": true, "really": true, "very": true, "you": true, "your": true, "ты": true, "твой": true, "my": true, "мой": true, "i": true, "я": true, "в": true, "на": true, "с": true, "к": true, "не": true, "но": true, "из": true, "от": true, } var core []string for _, w := range words { lower := strings.ToLower(w) // Strip punctuation for check, keep original cleaned := strings.Trim(lower, ".,!?;:'\"()-[]{}«»") if !fillers[cleaned] && len(cleaned) > 1 { core = append(core, w) } } if len(core) == 0 { return text // Don't compress to nothing } // Keep max 70% of original words (progressive compression) maxWords := int(float64(len(words)) * 0.7) if maxWords < 3 { maxWords = 3 } if len(core) > maxWords { core = core[:maxWords] } return strings.Join(core, " ") } // cosineSimilarity computes cosine similarity between two vectors. func cosineSimilarity(a, b []float64) float64 { if len(a) != len(b) || len(a) == 0 { return 0 } var dot, normA, normB float64 for i := range a { dot += a[i] * b[i] normA += a[i] * a[i] normB += b[i] * b[i] } denom := math.Sqrt(normA) * math.Sqrt(normB) if denom == 0 { return 0 } return dot / denom }