nyx/tests/real_world_tests.rs
Eli Peter 1bbe4b1cfb
Phase 1 (#33)
* chore: Exclude CLAUDE.md from Cargo.toml

* feat: add callgraph module and integrate into main analysis flow

* feat: enhance CLI with new severity filtering and analysis modes

* feat: update CHANGELOG with recent enhancements and fixes to severity filtering and output handling

* feat: implement state-model dataflow analysis for resource lifecycle and auth state

* feat: enhance diagnostic output formatting and add evidence structure

* feat: implement attack surface ranking for diagnostics with scoring and sorting

* feat: add comprehensive documentation for installation, usage, and rules reference

* feat: add multiple language support for command execution and evaluation endpoints

* feat: implement inline suppression for findings using `nyx:ignore` comments

* feat: add confidence levels to AST patterns and update output structure

* feat: implement low-noise prioritization system with category filtering, rollup grouping, and configurable budgets

* feat: bump version to 0.4.0 and update changelog with new features and improvements

* feat: add dead code allowances to various functions in mod.rs and real_world_tests.rs
2026-02-25 21:16:36 -05:00

537 lines
19 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Real-world vulnerability fixture test suite.
//!
//! Scans realistic code snippets (20120 lines) across all 10 supported languages
//! and compares findings against `.expect.json` expectation files.
//!
//! # Environment Variables
//!
//! - `NYX_TEST_LANG=python` — run only fixtures for one language
//! - `NYX_TEST_FIXTURE=cmdi_subprocess` — run only fixtures whose name contains this string
//! - `NYX_TEST_VERBOSE=1` — print full diff details for every fixture
//! - `NYX_TEST_CATEGORY=taint` — run only one category (taint/cfg/state/mixed)
//!
//! # Known-failure handling
//!
//! Expectations with `"must_match": false` are tracked but do not cause test failure.
//! A summary of soft misses is always printed at the end.
mod common;
use common::test_config;
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::utils::config::AnalysisMode;
use serde::Deserialize;
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
// ── Expectation schema ───────────────────────────────────────────────────────
#[derive(Debug, Clone, Deserialize)]
#[allow(dead_code)]
struct RealWorldExpectations {
/// Human description of what this fixture tests.
#[serde(default)]
description: String,
/// Tags for coverage matrix (e.g. ["taint", "cmdi", "express"]).
#[serde(default)]
tags: Vec<String>,
/// Which analysis modes this fixture targets.
#[serde(default = "default_modes")]
modes: Vec<String>,
/// Expected findings.
expected: Vec<ExpectedFinding>,
}
fn default_modes() -> Vec<String> {
vec!["full".to_string()]
}
#[derive(Debug, Clone, Deserialize)]
struct ExpectedFinding {
/// Rule ID substring to match (e.g. "taint-" or "js.xss.innerhtml").
rule_id: String,
/// Severity (optional, not checked if absent).
#[serde(default)]
severity: Option<String>,
/// If true, missing this finding is a hard failure. If false, it's a soft miss.
#[serde(default = "default_must_match")]
must_match: bool,
/// Line number or range [start, end] where finding should appear.
#[serde(default)]
line_range: Option<(usize, usize)>,
/// Substrings that must appear in message or evidence fields.
#[serde(default)]
evidence_contains: Vec<String>,
/// Human explanation of this expectation.
#[serde(default)]
notes: String,
}
fn default_must_match() -> bool {
true
}
// ── Fixture discovery ────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
struct Fixture {
/// Language slug (rust, c, cpp, java, go, php, python, ruby, typescript, javascript).
lang: String,
/// Category (taint, cfg, state, mixed).
category: String,
/// Fixture name (stem of source file).
name: String,
/// Path to the source fixture file.
source_path: PathBuf,
/// Parsed expectations.
expectations: RealWorldExpectations,
}
fn discover_fixtures() -> Vec<Fixture> {
let base = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/real_world");
let mut fixtures = Vec::new();
let langs = [
"rust",
"c",
"cpp",
"java",
"go",
"php",
"python",
"ruby",
"typescript",
"javascript",
];
let categories = ["taint", "cfg", "state", "mixed"];
for lang in &langs {
for category in &categories {
let dir = base.join(lang).join(category);
if !dir.is_dir() {
continue;
}
// Find all .expect.json files, derive source file from them.
let Ok(entries) = std::fs::read_dir(&dir) else {
continue;
};
for entry in entries.flatten() {
let path = entry.path();
let fname = path.file_name().unwrap().to_string_lossy().to_string();
if !fname.ends_with(".expect.json") {
continue;
}
let stem = fname.trim_end_matches(".expect.json");
// Find the corresponding source file (any extension).
let source_path = find_source_file(&dir, stem);
let Some(source_path) = source_path else {
eprintln!(
"WARN: no source file for {}/{}/{}/{}",
lang, category, stem, fname
);
continue;
};
let expect_content = std::fs::read_to_string(&path).unwrap_or_else(|e| {
panic!("Failed to read {}: {e}", path.display());
});
let expectations: RealWorldExpectations = serde_json::from_str(&expect_content)
.unwrap_or_else(|e| {
panic!("Failed to parse {}: {e}", path.display());
});
fixtures.push(Fixture {
lang: lang.to_string(),
category: category.to_string(),
name: stem.to_string(),
source_path,
expectations,
});
}
}
}
// Sort for deterministic ordering.
fixtures.sort_by(|a, b| {
a.lang
.cmp(&b.lang)
.then(a.category.cmp(&b.category))
.then(a.name.cmp(&b.name))
});
fixtures
}
fn find_source_file(dir: &Path, stem: &str) -> Option<PathBuf> {
let extensions = [
"rs", "c", "cpp", "cc", "cxx", "java", "go", "php", "py", "rb", "ts", "tsx", "js", "jsx",
];
for ext in &extensions {
let candidate = dir.join(format!("{stem}.{ext}"));
if candidate.exists() {
return Some(candidate);
}
}
None
}
// ── Scanning ─────────────────────────────────────────────────────────────────
fn scan_fixture(fixture: &Fixture, mode: AnalysisMode) -> Vec<Diag> {
// We scan the parent directory containing just this fixture file.
// To isolate, we copy the fixture to a temp dir.
let tmp = tempfile::TempDir::with_prefix("nyx_rw_test_").expect("tempdir");
let dest = tmp.path().join(fixture.source_path.file_name().unwrap());
std::fs::copy(&fixture.source_path, &dest).expect("copy fixture");
let cfg = test_config(mode);
let mut diags =
nyx_scanner::scan_no_index(tmp.path(), &cfg).expect("scan_no_index should succeed");
// Normalize paths to just the filename for comparison.
for d in &mut diags {
if let Some(fname) = Path::new(&d.path).file_name() {
d.path = fname.to_string_lossy().to_string();
}
}
// Sort deterministically.
diags.sort_by(|a, b| {
a.path
.cmp(&b.path)
.then(a.line.cmp(&b.line))
.then(a.id.cmp(&b.id))
.then(a.col.cmp(&b.col))
});
diags
}
// ── Matching ─────────────────────────────────────────────────────────────────
#[derive(Debug)]
struct MatchResult {
hard_misses: Vec<(ExpectedFinding, String)>,
soft_misses: Vec<(ExpectedFinding, String)>,
unexpected: Vec<Diag>,
matched: usize,
}
fn match_expectations(
diags: &[Diag],
expectations: &[ExpectedFinding],
fixture_file: &str,
) -> MatchResult {
let mut hard_misses = Vec::new();
let mut soft_misses = Vec::new();
let mut matched_indices: Vec<bool> = vec![false; diags.len()];
let mut matched = 0;
for exp in expectations {
let found = diags.iter().enumerate().any(|(i, d)| {
if matched_indices[i] {
return false;
}
if !d.id.contains(&exp.rule_id) {
return false;
}
// Check file
if !d.path.contains(fixture_file) && fixture_file != d.path {
return false;
}
// Check severity if specified
if let Some(ref sev) = exp.severity
&& d.severity.as_db_str() != sev.to_uppercase()
{
return false;
}
// Check line range if specified
if let Some((start, end)) = exp.line_range
&& (d.line < start || d.line > end)
{
return false;
}
// Check evidence substrings
for substr in &exp.evidence_contains {
let msg = d.message.as_deref().unwrap_or("");
let ev_text = if let Some(ev) = &d.evidence {
let mut parts = Vec::new();
if let Some(src) = &ev.source {
parts.push(format!(
"source: {}",
src.snippet.as_deref().unwrap_or(&src.kind)
));
}
if let Some(snk) = &ev.sink {
parts.push(format!(
"sink: {}",
snk.snippet.as_deref().unwrap_or(&snk.kind)
));
}
for note in &ev.notes {
parts.push(note.clone());
}
parts.join(" ")
} else {
String::new()
};
let combined = format!("{msg} {ev_text}");
if !combined.to_lowercase().contains(&substr.to_lowercase()) {
return false;
}
}
matched_indices[i] = true;
true
});
if found {
matched += 1;
} else {
let reason = format!(
"rule_id='{}' severity={:?} line_range={:?}",
exp.rule_id, exp.severity, exp.line_range
);
if exp.must_match {
hard_misses.push((exp.clone(), reason));
} else {
soft_misses.push((exp.clone(), reason));
}
}
}
// Unexpected = diags not matched by any expectation (informational only).
let unexpected: Vec<Diag> = diags
.iter()
.enumerate()
.filter(|(i, _)| !matched_indices[*i])
.map(|(_, d)| d.clone())
.collect();
MatchResult {
hard_misses,
soft_misses,
unexpected,
matched,
}
}
// ── Mode resolution ──────────────────────────────────────────────────────────
fn resolve_mode(mode_str: &str) -> AnalysisMode {
match mode_str.to_lowercase().as_str() {
"ast" => AnalysisMode::Ast,
"taint" => AnalysisMode::Taint,
"full" => AnalysisMode::Full,
_ => AnalysisMode::Full,
}
}
// ── Coverage matrix ──────────────────────────────────────────────────────────
fn print_coverage_matrix(fixtures: &[Fixture]) {
let mut matrix: BTreeMap<String, BTreeMap<String, usize>> = BTreeMap::new();
let mut tag_counts: BTreeMap<String, usize> = BTreeMap::new();
for f in fixtures {
*matrix
.entry(f.lang.clone())
.or_default()
.entry(f.category.clone())
.or_default() += 1;
for tag in &f.expectations.tags {
*tag_counts.entry(tag.clone()).or_default() += 1;
}
}
eprintln!("\n╔══════════════════════════════════════════════════════════╗");
eprintln!("║ REAL-WORLD TEST COVERAGE MATRIX ║");
eprintln!("╠══════════════╦════════╦══════╦════════╦════════╦════════╣");
eprintln!("║ Language ║ Taint ║ CFG ║ State ║ Mixed ║ Total ║");
eprintln!("╠══════════════╬════════╬══════╬════════╬════════╬════════╣");
let mut grand_total = 0;
for (lang, cats) in &matrix {
let t = cats.get("taint").unwrap_or(&0);
let c = cats.get("cfg").unwrap_or(&0);
let s = cats.get("state").unwrap_or(&0);
let m = cats.get("mixed").unwrap_or(&0);
let total = t + c + s + m;
grand_total += total;
eprintln!(
"{:<12}{:>4}{:>3}{:>4}{:>4}{:>4}",
lang, t, c, s, m, total
);
}
eprintln!("╠══════════════╬════════╬══════╬════════╬════════╬════════╣");
eprintln!(
"║ TOTAL ║ ║ ║ ║ ║ {:>4}",
grand_total
);
eprintln!("╚══════════════╩════════╩══════╩════════╩════════╩════════╝");
if !tag_counts.is_empty() {
eprintln!("\nTag distribution:");
for (tag, count) in &tag_counts {
eprintln!(" {tag}: {count}");
}
}
}
// ── Main test ────────────────────────────────────────────────────────────────
static ALL_FIXTURES: OnceLock<Vec<Fixture>> = OnceLock::new();
fn get_fixtures() -> &'static [Fixture] {
ALL_FIXTURES.get_or_init(discover_fixtures)
}
fn should_run(fixture: &Fixture) -> bool {
if let Ok(lang) = std::env::var("NYX_TEST_LANG")
&& !fixture.lang.eq_ignore_ascii_case(&lang)
{
return false;
}
if let Ok(name) = std::env::var("NYX_TEST_FIXTURE")
&& !fixture.name.contains(&name)
{
return false;
}
if let Ok(cat) = std::env::var("NYX_TEST_CATEGORY")
&& !fixture.category.eq_ignore_ascii_case(&cat)
{
return false;
}
true
}
fn is_verbose() -> bool {
std::env::var("NYX_TEST_VERBOSE").is_ok()
}
#[test]
fn real_world_fixture_suite() {
let fixtures = get_fixtures();
let verbose = is_verbose();
let active: Vec<&Fixture> = fixtures.iter().filter(|f| should_run(f)).collect();
if active.is_empty() {
eprintln!(
"No fixtures matched filters. Total available: {}",
fixtures.len()
);
print_coverage_matrix(fixtures);
return;
}
eprintln!(
"\nRunning {} real-world fixtures (of {} total)\n",
active.len(),
fixtures.len()
);
let mut total_hard_fails = 0;
let mut total_soft_misses = 0;
let mut total_matched = 0;
let mut total_unexpected = 0;
let mut failure_details: Vec<String> = Vec::new();
let mut soft_miss_details: Vec<String> = Vec::new();
for fixture in &active {
let fixture_label = format!("{}/{}/{}", fixture.lang, fixture.category, fixture.name);
for mode_str in &fixture.expectations.modes {
let mode = resolve_mode(mode_str);
let diags = scan_fixture(fixture, mode);
let fixture_file = fixture
.source_path
.file_name()
.unwrap()
.to_string_lossy()
.to_string();
let result = match_expectations(&diags, &fixture.expectations.expected, &fixture_file);
total_matched += result.matched;
total_unexpected += result.unexpected.len();
if !result.hard_misses.is_empty() {
let mut msg = format!("FAIL {fixture_label} [{mode_str}]:");
for (exp, reason) in &result.hard_misses {
msg.push_str(&format!(
"\n MISSING (must_match): {}{}",
reason, exp.notes
));
}
failure_details.push(msg);
total_hard_fails += result.hard_misses.len();
}
if !result.soft_misses.is_empty() {
let mut msg = format!("SOFT {fixture_label} [{mode_str}]:");
for (exp, reason) in &result.soft_misses {
msg.push_str(&format!("\n soft miss: {}{}", reason, exp.notes));
}
soft_miss_details.push(msg);
total_soft_misses += result.soft_misses.len();
}
if verbose {
eprintln!(
" {fixture_label} [{mode_str}]: {} matched, {} hard misses, {} soft misses, {} unexpected",
result.matched,
result.hard_misses.len(),
result.soft_misses.len(),
result.unexpected.len()
);
if !result.unexpected.is_empty() {
for d in &result.unexpected {
eprintln!(
" EXTRA: {}:{} [{}] {}",
d.path,
d.line,
d.severity.as_db_str(),
d.id
);
}
}
}
}
}
// Print coverage matrix.
print_coverage_matrix(fixtures);
// Print summary.
eprintln!("\n────────────────────────────────────────────────────");
eprintln!(
"RESULTS: {} matched, {} hard failures, {} soft misses, {} unexpected",
total_matched, total_hard_fails, total_soft_misses, total_unexpected
);
eprintln!("────────────────────────────────────────────────────");
if !failure_details.is_empty() {
eprintln!("\n=== HARD FAILURES (must_match=true) ===");
for msg in &failure_details {
eprintln!("{msg}");
}
}
if !soft_miss_details.is_empty() {
eprintln!("\n=== SOFT MISSES (must_match=false, informational) ===");
for msg in &soft_miss_details {
eprintln!("{msg}");
}
}
// Hard failures cause test failure.
assert_eq!(
total_hard_fails, 0,
"{total_hard_fails} expected findings not found (must_match=true). \
Run with NYX_TEST_VERBOSE=1 for details."
);
}