Add initial project structure and configuration files

- Created .gitignore to exclude unnecessary files and directories. - Added Cargo.toml for Rust workspace configuration. - Introduced example configuration file entropix.yaml.example for user customization. - Included LICENSE file with Apache 2.0 license details. - Created pyproject.toml for Python project metadata and dependencies. - Added README.md with project overview and usage instructions. - Implemented a broken agent example to demonstrate testing capabilities. - Established Rust module structure with Cargo.toml and source files. - Set up initial tests for assertions and configuration validation.
2026-06-30 20:29:37 +02:00 · 2025-12-28 21:55:01 +08:00 · 2025-12-28 21:55:01 +08:00 · a36cecf255
commit a36cecf255
37 changed files with 5397 additions and 0 deletions
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@ -0,0 +1,186 @@
+//! Entropix Rust Performance Module
+//!
+//! This module provides high-performance implementations for:
+//! - Robustness score calculation
+//! - Parallel mutation processing
+//! - Fast string similarity scoring
+
+use pyo3::prelude::*;
+use rayon::prelude::*;
+
+mod parallel;
+mod scoring;
+
+pub use parallel::*;
+pub use scoring::*;
+
+/// Calculate the robustness score for a test run.
+///
+/// The robustness score R is calculated as:
+/// R = (W_s * S_passed + W_d * D_passed) / N_total
+///
+/// Where:
+/// - S_passed = Semantic variations passed
+/// - D_passed = Deterministic tests passed
+/// - W_s, W_d = Weights for semantic and deterministic tests
+#[pyfunction]
+fn calculate_robustness_score(
+    semantic_passed: u32,
+    deterministic_passed: u32,
+    total: u32,
+    semantic_weight: f64,
+    deterministic_weight: f64,
+) -> f64 {
+    if total == 0 {
+        return 0.0;
+    }
+    
+    let weighted_sum = semantic_weight * semantic_passed as f64 
+        + deterministic_weight * deterministic_passed as f64;
+    
+    weighted_sum / total as f64
+}
+
+/// Calculate weighted robustness score with per-mutation weights.
+///
+/// Each mutation has its own weight based on difficulty.
+/// Passing a prompt injection attack is worth more than passing a typo test.
+#[pyfunction]
+fn calculate_weighted_score(
+    results: Vec<(bool, f64)>,  // (passed, weight)
+) -> f64 {
+    if results.is_empty() {
+        return 0.0;
+    }
+    
+    let total_weight: f64 = results.iter().map(|(_, w)| w).sum();
+    let passed_weight: f64 = results
+        .iter()
+        .filter(|(passed, _)| *passed)
+        .map(|(_, w)| w)
+        .sum();
+    
+    if total_weight == 0.0 {
+        return 0.0;
+    }
+    
+    passed_weight / total_weight
+}
+
+/// Process mutations in parallel and return results.
+///
+/// Uses Rayon for efficient parallel processing.
+#[pyfunction]
+fn parallel_process_mutations(
+    mutations: Vec<String>,
+    mutation_types: Vec<String>,
+    weights: Vec<f64>,
+) -> Vec<(String, String, f64)> {
+    mutations
+        .into_par_iter()
+        .enumerate()
+        .map(|(i, mutation)| {
+            let mutation_type = mutation_types.get(i % mutation_types.len())
+                .cloned()
+                .unwrap_or_else(|| "unknown".to_string());
+            let weight = weights.get(i % weights.len())
+                .copied()
+                .unwrap_or(1.0);
+            (mutation, mutation_type, weight)
+        })
+        .collect()
+}
+
+/// Fast Levenshtein distance calculation for noise mutation validation.
+#[pyfunction]
+fn levenshtein_distance(s1: &str, s2: &str) -> usize {
+    let len1 = s1.chars().count();
+    let len2 = s2.chars().count();
+    
+    if len1 == 0 {
+        return len2;
+    }
+    if len2 == 0 {
+        return len1;
+    }
+    
+    let s1_chars: Vec<char> = s1.chars().collect();
+    let s2_chars: Vec<char> = s2.chars().collect();
+    
+    let mut prev_row: Vec<usize> = (0..=len2).collect();
+    let mut curr_row: Vec<usize> = vec![0; len2 + 1];
+    
+    for i in 1..=len1 {
+        curr_row[0] = i;
+        for j in 1..=len2 {
+            let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
+            curr_row[j] = std::cmp::min(
+                std::cmp::min(prev_row[j] + 1, curr_row[j - 1] + 1),
+                prev_row[j - 1] + cost,
+            );
+        }
+        std::mem::swap(&mut prev_row, &mut curr_row);
+    }
+    
+    prev_row[len2]
+}
+
+/// Calculate similarity ratio between two strings (0.0 to 1.0).
+#[pyfunction]
+fn string_similarity(s1: &str, s2: &str) -> f64 {
+    let distance = levenshtein_distance(s1, s2);
+    let max_len = std::cmp::max(s1.chars().count(), s2.chars().count());
+    
+    if max_len == 0 {
+        return 1.0;
+    }
+    
+    1.0 - (distance as f64 / max_len as f64)
+}
+
+/// Python module definition
+#[pymodule]
+fn entropix_rust(_py: Python, m: &PyModule) -> PyResult<()> {
+    m.add_function(wrap_pyfunction!(calculate_robustness_score, m)?)?;
+    m.add_function(wrap_pyfunction!(calculate_weighted_score, m)?)?;
+    m.add_function(wrap_pyfunction!(parallel_process_mutations, m)?)?;
+    m.add_function(wrap_pyfunction!(levenshtein_distance, m)?)?;
+    m.add_function(wrap_pyfunction!(string_similarity, m)?)?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_robustness_score() {
+        let score = calculate_robustness_score(8, 10, 20, 1.0, 1.0);
+        assert!((score - 0.9).abs() < 0.001);
+    }
+
+    #[test]
+    fn test_weighted_score() {
+        let results = vec![
+            (true, 1.0),
+            (true, 1.5),
+            (false, 1.0),
+        ];
+        let score = calculate_weighted_score(results);
+        assert!((score - 0.714).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_levenshtein() {
+        assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
+        assert_eq!(levenshtein_distance("", "abc"), 3);
+        assert_eq!(levenshtein_distance("abc", "abc"), 0);
+    }
+
+    #[test]
+    fn test_string_similarity() {
+        let sim = string_similarity("hello", "hallo");
+        assert!(sim > 0.7 && sim < 0.9);
+    }
+}
+
--- a/rust/src/parallel.rs
+++ b/rust/src/parallel.rs
@ -0,0 +1,60 @@
+//! Parallel processing utilities for Entropix
+//!
+//! This module provides efficient parallel processing for mutation generation
+//! and agent testing using Rayon.
+
+use rayon::prelude::*;
+
+/// Process items in parallel with a maximum concurrency limit.
+pub fn parallel_map<T, U, F>(items: Vec<T>, max_concurrency: usize, f: F) -> Vec<U>
+where
+    T: Send + Sync,
+    U: Send,
+    F: Fn(T) -> U + Send + Sync,
+{
+    let pool = rayon::ThreadPoolBuilder::new()
+        .num_threads(max_concurrency)
+        .build()
+        .unwrap_or_else(|_| rayon::ThreadPoolBuilder::new().build().unwrap());
+    
+    pool.install(|| {
+        items.into_par_iter().map(f).collect()
+    })
+}
+
+/// Batch processing with progress callback.
+pub fn parallel_batch_process<T, U, F, P>(
+    items: Vec<T>,
+    batch_size: usize,
+    f: F,
+    _progress_callback: P,
+) -> Vec<U>
+where
+    T: Send + Sync + Clone,
+    U: Send,
+    F: Fn(&[T]) -> Vec<U> + Send + Sync,
+    P: Fn(usize, usize) + Send + Sync,
+{
+    let batches: Vec<Vec<T>> = items
+        .chunks(batch_size)
+        .map(|chunk| chunk.to_vec())
+        .collect();
+    
+    batches
+        .into_par_iter()
+        .flat_map(|batch| f(&batch))
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parallel_map() {
+        let items = vec![1, 2, 3, 4, 5];
+        let results = parallel_map(items, 2, |x| x * 2);
+        assert_eq!(results, vec![2, 4, 6, 8, 10]);
+    }
+}
+
--- a/rust/src/scoring.rs
+++ b/rust/src/scoring.rs
@ -0,0 +1,172 @@
+//! Scoring algorithms for Entropix
+//!
+//! This module contains optimized scoring algorithms for calculating
+//! robustness metrics and aggregating test results.
+
+use serde::{Deserialize, Serialize};
+
+/// Result of a single mutation test
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MutationResult {
+    pub mutation_type: String,
+    pub passed: bool,
+    pub weight: f64,
+    pub latency_ms: f64,
+    pub checks: Vec<CheckResult>,
+}
+
+/// Result of a single invariant check
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CheckResult {
+    pub check_type: String,
+    pub passed: bool,
+    pub details: String,
+}
+
+/// Aggregate statistics for a test run
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TestStatistics {
+    pub total_mutations: usize,
+    pub passed_mutations: usize,
+    pub failed_mutations: usize,
+    pub robustness_score: f64,
+    pub avg_latency_ms: f64,
+    pub p50_latency_ms: f64,
+    pub p95_latency_ms: f64,
+    pub p99_latency_ms: f64,
+    pub by_type: Vec<TypeStatistics>,
+}
+
+/// Statistics broken down by mutation type
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TypeStatistics {
+    pub mutation_type: String,
+    pub total: usize,
+    pub passed: usize,
+    pub pass_rate: f64,
+}
+
+/// Calculate comprehensive statistics from mutation results
+pub fn calculate_statistics(results: &[MutationResult]) -> TestStatistics {
+    let total = results.len();
+    let passed = results.iter().filter(|r| r.passed).count();
+    let failed = total - passed;
+    
+    // Calculate robustness score
+    let total_weight: f64 = results.iter().map(|r| r.weight).sum();
+    let passed_weight: f64 = results
+        .iter()
+        .filter(|r| r.passed)
+        .map(|r| r.weight)
+        .sum();
+    
+    let robustness_score = if total_weight > 0.0 {
+        passed_weight / total_weight
+    } else {
+        0.0
+    };
+    
+    // Calculate latency statistics
+    let mut latencies: Vec<f64> = results.iter().map(|r| r.latency_ms).collect();
+    latencies.sort_by(|a, b| a.partial_cmp(b).unwrap());
+    
+    let avg_latency = if !latencies.is_empty() {
+        latencies.iter().sum::<f64>() / latencies.len() as f64
+    } else {
+        0.0
+    };
+    
+    let p50 = percentile(&latencies, 50);
+    let p95 = percentile(&latencies, 95);
+    let p99 = percentile(&latencies, 99);
+    
+    // Statistics by mutation type
+    let mut type_stats = std::collections::HashMap::new();
+    for result in results {
+        let entry = type_stats
+            .entry(result.mutation_type.clone())
+            .or_insert((0usize, 0usize));
+        entry.0 += 1;
+        if result.passed {
+            entry.1 += 1;
+        }
+    }
+    
+    let by_type: Vec<TypeStatistics> = type_stats
+        .into_iter()
+        .map(|(mutation_type, (total, passed))| TypeStatistics {
+            mutation_type,
+            total,
+            passed,
+            pass_rate: passed as f64 / total as f64,
+        })
+        .collect();
+    
+    TestStatistics {
+        total_mutations: total,
+        passed_mutations: passed,
+        failed_mutations: failed,
+        robustness_score,
+        avg_latency_ms: avg_latency,
+        p50_latency_ms: p50,
+        p95_latency_ms: p95,
+        p99_latency_ms: p99,
+        by_type,
+    }
+}
+
+/// Calculate percentile from sorted values
+fn percentile(sorted_values: &[f64], p: usize) -> f64 {
+    if sorted_values.is_empty() {
+        return 0.0;
+    }
+    
+    let index = (p as f64 / 100.0 * (sorted_values.len() - 1) as f64).round() as usize;
+    sorted_values[index.min(sorted_values.len() - 1)]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_percentile() {
+        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
+        assert!((percentile(&values, 50) - 5.5).abs() < 1.0);
+        assert!((percentile(&values, 95) - 9.5).abs() < 1.0);
+    }
+
+    #[test]
+    fn test_calculate_statistics() {
+        let results = vec![
+            MutationResult {
+                mutation_type: "paraphrase".to_string(),
+                passed: true,
+                weight: 1.0,
+                latency_ms: 100.0,
+                checks: vec![],
+            },
+            MutationResult {
+                mutation_type: "noise".to_string(),
+                passed: true,
+                weight: 0.8,
+                latency_ms: 150.0,
+                checks: vec![],
+            },
+            MutationResult {
+                mutation_type: "prompt_injection".to_string(),
+                passed: false,
+                weight: 1.5,
+                latency_ms: 200.0,
+                checks: vec![],
+            },
+        ];
+        
+        let stats = calculate_statistics(&results);
+        assert_eq!(stats.total_mutations, 3);
+        assert_eq!(stats.passed_mutations, 2);
+        assert_eq!(stats.failed_mutations, 1);
+        assert!(stats.robustness_score > 0.5);
+    }
+}
+