mirror of
https://github.com/flakestorm/flakestorm.git
synced 2026-04-25 08:46:47 +02:00
Add initial project structure and configuration files
- Created .gitignore to exclude unnecessary files and directories. - Added Cargo.toml for Rust workspace configuration. - Introduced example configuration file entropix.yaml.example for user customization. - Included LICENSE file with Apache 2.0 license details. - Created pyproject.toml for Python project metadata and dependencies. - Added README.md with project overview and usage instructions. - Implemented a broken agent example to demonstrate testing capabilities. - Established Rust module structure with Cargo.toml and source files. - Set up initial tests for assertions and configuration validation.
This commit is contained in:
commit
a36cecf255
37 changed files with 5397 additions and 0 deletions
186
rust/src/lib.rs
Normal file
186
rust/src/lib.rs
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
//! Entropix Rust Performance Module
|
||||
//!
|
||||
//! This module provides high-performance implementations for:
|
||||
//! - Robustness score calculation
|
||||
//! - Parallel mutation processing
|
||||
//! - Fast string similarity scoring
|
||||
|
||||
use pyo3::prelude::*;
|
||||
use rayon::prelude::*;
|
||||
|
||||
mod parallel;
|
||||
mod scoring;
|
||||
|
||||
pub use parallel::*;
|
||||
pub use scoring::*;
|
||||
|
||||
/// Calculate the robustness score for a test run.
|
||||
///
|
||||
/// The robustness score R is calculated as:
|
||||
/// R = (W_s * S_passed + W_d * D_passed) / N_total
|
||||
///
|
||||
/// Where:
|
||||
/// - S_passed = Semantic variations passed
|
||||
/// - D_passed = Deterministic tests passed
|
||||
/// - W_s, W_d = Weights for semantic and deterministic tests
|
||||
#[pyfunction]
|
||||
fn calculate_robustness_score(
|
||||
semantic_passed: u32,
|
||||
deterministic_passed: u32,
|
||||
total: u32,
|
||||
semantic_weight: f64,
|
||||
deterministic_weight: f64,
|
||||
) -> f64 {
|
||||
if total == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let weighted_sum = semantic_weight * semantic_passed as f64
|
||||
+ deterministic_weight * deterministic_passed as f64;
|
||||
|
||||
weighted_sum / total as f64
|
||||
}
|
||||
|
||||
/// Calculate weighted robustness score with per-mutation weights.
|
||||
///
|
||||
/// Each mutation has its own weight based on difficulty.
|
||||
/// Passing a prompt injection attack is worth more than passing a typo test.
|
||||
#[pyfunction]
|
||||
fn calculate_weighted_score(
|
||||
results: Vec<(bool, f64)>, // (passed, weight)
|
||||
) -> f64 {
|
||||
if results.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let total_weight: f64 = results.iter().map(|(_, w)| w).sum();
|
||||
let passed_weight: f64 = results
|
||||
.iter()
|
||||
.filter(|(passed, _)| *passed)
|
||||
.map(|(_, w)| w)
|
||||
.sum();
|
||||
|
||||
if total_weight == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
passed_weight / total_weight
|
||||
}
|
||||
|
||||
/// Process mutations in parallel and return results.
|
||||
///
|
||||
/// Uses Rayon for efficient parallel processing.
|
||||
#[pyfunction]
|
||||
fn parallel_process_mutations(
|
||||
mutations: Vec<String>,
|
||||
mutation_types: Vec<String>,
|
||||
weights: Vec<f64>,
|
||||
) -> Vec<(String, String, f64)> {
|
||||
mutations
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.map(|(i, mutation)| {
|
||||
let mutation_type = mutation_types.get(i % mutation_types.len())
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let weight = weights.get(i % weights.len())
|
||||
.copied()
|
||||
.unwrap_or(1.0);
|
||||
(mutation, mutation_type, weight)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Fast Levenshtein distance calculation for noise mutation validation.
|
||||
#[pyfunction]
|
||||
fn levenshtein_distance(s1: &str, s2: &str) -> usize {
|
||||
let len1 = s1.chars().count();
|
||||
let len2 = s2.chars().count();
|
||||
|
||||
if len1 == 0 {
|
||||
return len2;
|
||||
}
|
||||
if len2 == 0 {
|
||||
return len1;
|
||||
}
|
||||
|
||||
let s1_chars: Vec<char> = s1.chars().collect();
|
||||
let s2_chars: Vec<char> = s2.chars().collect();
|
||||
|
||||
let mut prev_row: Vec<usize> = (0..=len2).collect();
|
||||
let mut curr_row: Vec<usize> = vec![0; len2 + 1];
|
||||
|
||||
for i in 1..=len1 {
|
||||
curr_row[0] = i;
|
||||
for j in 1..=len2 {
|
||||
let cost = if s1_chars[i - 1] == s2_chars[j - 1] { 0 } else { 1 };
|
||||
curr_row[j] = std::cmp::min(
|
||||
std::cmp::min(prev_row[j] + 1, curr_row[j - 1] + 1),
|
||||
prev_row[j - 1] + cost,
|
||||
);
|
||||
}
|
||||
std::mem::swap(&mut prev_row, &mut curr_row);
|
||||
}
|
||||
|
||||
prev_row[len2]
|
||||
}
|
||||
|
||||
/// Calculate similarity ratio between two strings (0.0 to 1.0).
|
||||
#[pyfunction]
|
||||
fn string_similarity(s1: &str, s2: &str) -> f64 {
|
||||
let distance = levenshtein_distance(s1, s2);
|
||||
let max_len = std::cmp::max(s1.chars().count(), s2.chars().count());
|
||||
|
||||
if max_len == 0 {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
1.0 - (distance as f64 / max_len as f64)
|
||||
}
|
||||
|
||||
/// Python module definition
|
||||
#[pymodule]
|
||||
fn entropix_rust(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||
m.add_function(wrap_pyfunction!(calculate_robustness_score, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(calculate_weighted_score, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(parallel_process_mutations, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(levenshtein_distance, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(string_similarity, m)?)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_robustness_score() {
|
||||
let score = calculate_robustness_score(8, 10, 20, 1.0, 1.0);
|
||||
assert!((score - 0.9).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_weighted_score() {
|
||||
let results = vec![
|
||||
(true, 1.0),
|
||||
(true, 1.5),
|
||||
(false, 1.0),
|
||||
];
|
||||
let score = calculate_weighted_score(results);
|
||||
assert!((score - 0.714).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_levenshtein() {
|
||||
assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
|
||||
assert_eq!(levenshtein_distance("", "abc"), 3);
|
||||
assert_eq!(levenshtein_distance("abc", "abc"), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_similarity() {
|
||||
let sim = string_similarity("hello", "hallo");
|
||||
assert!(sim > 0.7 && sim < 0.9);
|
||||
}
|
||||
}
|
||||
|
||||
60
rust/src/parallel.rs
Normal file
60
rust/src/parallel.rs
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
//! Parallel processing utilities for Entropix
|
||||
//!
|
||||
//! This module provides efficient parallel processing for mutation generation
|
||||
//! and agent testing using Rayon.
|
||||
|
||||
use rayon::prelude::*;
|
||||
|
||||
/// Process items in parallel with a maximum concurrency limit.
|
||||
pub fn parallel_map<T, U, F>(items: Vec<T>, max_concurrency: usize, f: F) -> Vec<U>
|
||||
where
|
||||
T: Send + Sync,
|
||||
U: Send,
|
||||
F: Fn(T) -> U + Send + Sync,
|
||||
{
|
||||
let pool = rayon::ThreadPoolBuilder::new()
|
||||
.num_threads(max_concurrency)
|
||||
.build()
|
||||
.unwrap_or_else(|_| rayon::ThreadPoolBuilder::new().build().unwrap());
|
||||
|
||||
pool.install(|| {
|
||||
items.into_par_iter().map(f).collect()
|
||||
})
|
||||
}
|
||||
|
||||
/// Batch processing with progress callback.
|
||||
pub fn parallel_batch_process<T, U, F, P>(
|
||||
items: Vec<T>,
|
||||
batch_size: usize,
|
||||
f: F,
|
||||
_progress_callback: P,
|
||||
) -> Vec<U>
|
||||
where
|
||||
T: Send + Sync + Clone,
|
||||
U: Send,
|
||||
F: Fn(&[T]) -> Vec<U> + Send + Sync,
|
||||
P: Fn(usize, usize) + Send + Sync,
|
||||
{
|
||||
let batches: Vec<Vec<T>> = items
|
||||
.chunks(batch_size)
|
||||
.map(|chunk| chunk.to_vec())
|
||||
.collect();
|
||||
|
||||
batches
|
||||
.into_par_iter()
|
||||
.flat_map(|batch| f(&batch))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parallel_map() {
|
||||
let items = vec![1, 2, 3, 4, 5];
|
||||
let results = parallel_map(items, 2, |x| x * 2);
|
||||
assert_eq!(results, vec![2, 4, 6, 8, 10]);
|
||||
}
|
||||
}
|
||||
|
||||
172
rust/src/scoring.rs
Normal file
172
rust/src/scoring.rs
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
//! Scoring algorithms for Entropix
|
||||
//!
|
||||
//! This module contains optimized scoring algorithms for calculating
|
||||
//! robustness metrics and aggregating test results.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Result of a single mutation test
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MutationResult {
|
||||
pub mutation_type: String,
|
||||
pub passed: bool,
|
||||
pub weight: f64,
|
||||
pub latency_ms: f64,
|
||||
pub checks: Vec<CheckResult>,
|
||||
}
|
||||
|
||||
/// Result of a single invariant check
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CheckResult {
|
||||
pub check_type: String,
|
||||
pub passed: bool,
|
||||
pub details: String,
|
||||
}
|
||||
|
||||
/// Aggregate statistics for a test run
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TestStatistics {
|
||||
pub total_mutations: usize,
|
||||
pub passed_mutations: usize,
|
||||
pub failed_mutations: usize,
|
||||
pub robustness_score: f64,
|
||||
pub avg_latency_ms: f64,
|
||||
pub p50_latency_ms: f64,
|
||||
pub p95_latency_ms: f64,
|
||||
pub p99_latency_ms: f64,
|
||||
pub by_type: Vec<TypeStatistics>,
|
||||
}
|
||||
|
||||
/// Statistics broken down by mutation type
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TypeStatistics {
|
||||
pub mutation_type: String,
|
||||
pub total: usize,
|
||||
pub passed: usize,
|
||||
pub pass_rate: f64,
|
||||
}
|
||||
|
||||
/// Calculate comprehensive statistics from mutation results
|
||||
pub fn calculate_statistics(results: &[MutationResult]) -> TestStatistics {
|
||||
let total = results.len();
|
||||
let passed = results.iter().filter(|r| r.passed).count();
|
||||
let failed = total - passed;
|
||||
|
||||
// Calculate robustness score
|
||||
let total_weight: f64 = results.iter().map(|r| r.weight).sum();
|
||||
let passed_weight: f64 = results
|
||||
.iter()
|
||||
.filter(|r| r.passed)
|
||||
.map(|r| r.weight)
|
||||
.sum();
|
||||
|
||||
let robustness_score = if total_weight > 0.0 {
|
||||
passed_weight / total_weight
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
// Calculate latency statistics
|
||||
let mut latencies: Vec<f64> = results.iter().map(|r| r.latency_ms).collect();
|
||||
latencies.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
|
||||
let avg_latency = if !latencies.is_empty() {
|
||||
latencies.iter().sum::<f64>() / latencies.len() as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let p50 = percentile(&latencies, 50);
|
||||
let p95 = percentile(&latencies, 95);
|
||||
let p99 = percentile(&latencies, 99);
|
||||
|
||||
// Statistics by mutation type
|
||||
let mut type_stats = std::collections::HashMap::new();
|
||||
for result in results {
|
||||
let entry = type_stats
|
||||
.entry(result.mutation_type.clone())
|
||||
.or_insert((0usize, 0usize));
|
||||
entry.0 += 1;
|
||||
if result.passed {
|
||||
entry.1 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let by_type: Vec<TypeStatistics> = type_stats
|
||||
.into_iter()
|
||||
.map(|(mutation_type, (total, passed))| TypeStatistics {
|
||||
mutation_type,
|
||||
total,
|
||||
passed,
|
||||
pass_rate: passed as f64 / total as f64,
|
||||
})
|
||||
.collect();
|
||||
|
||||
TestStatistics {
|
||||
total_mutations: total,
|
||||
passed_mutations: passed,
|
||||
failed_mutations: failed,
|
||||
robustness_score,
|
||||
avg_latency_ms: avg_latency,
|
||||
p50_latency_ms: p50,
|
||||
p95_latency_ms: p95,
|
||||
p99_latency_ms: p99,
|
||||
by_type,
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate percentile from sorted values
|
||||
fn percentile(sorted_values: &[f64], p: usize) -> f64 {
|
||||
if sorted_values.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let index = (p as f64 / 100.0 * (sorted_values.len() - 1) as f64).round() as usize;
|
||||
sorted_values[index.min(sorted_values.len() - 1)]
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_percentile() {
|
||||
let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
|
||||
assert!((percentile(&values, 50) - 5.5).abs() < 1.0);
|
||||
assert!((percentile(&values, 95) - 9.5).abs() < 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_statistics() {
|
||||
let results = vec![
|
||||
MutationResult {
|
||||
mutation_type: "paraphrase".to_string(),
|
||||
passed: true,
|
||||
weight: 1.0,
|
||||
latency_ms: 100.0,
|
||||
checks: vec![],
|
||||
},
|
||||
MutationResult {
|
||||
mutation_type: "noise".to_string(),
|
||||
passed: true,
|
||||
weight: 0.8,
|
||||
latency_ms: 150.0,
|
||||
checks: vec![],
|
||||
},
|
||||
MutationResult {
|
||||
mutation_type: "prompt_injection".to_string(),
|
||||
passed: false,
|
||||
weight: 1.5,
|
||||
latency_ms: 200.0,
|
||||
checks: vec![],
|
||||
},
|
||||
];
|
||||
|
||||
let stats = calculate_statistics(&results);
|
||||
assert_eq!(stats.total_mutations, 3);
|
||||
assert_eq!(stats.passed_mutations, 2);
|
||||
assert_eq!(stats.failed_mutations, 1);
|
||||
assert!(stats.robustness_score > 0.5);
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue