nyx/tests/pattern_tests.rs
Eli Peter 1bbe4b1cfb
Phase 1 (#33)
* chore: Exclude CLAUDE.md from Cargo.toml

* feat: add callgraph module and integrate into main analysis flow

* feat: enhance CLI with new severity filtering and analysis modes

* feat: update CHANGELOG with recent enhancements and fixes to severity filtering and output handling

* feat: implement state-model dataflow analysis for resource lifecycle and auth state

* feat: enhance diagnostic output formatting and add evidence structure

* feat: implement attack surface ranking for diagnostics with scoring and sorting

* feat: add comprehensive documentation for installation, usage, and rules reference

* feat: add multiple language support for command execution and evaluation endpoints

* feat: implement inline suppression for findings using `nyx:ignore` comments

* feat: add confidence levels to AST patterns and update output structure

* feat: implement low-noise prioritization system with category filtering, rollup grouping, and configurable budgets

* feat: bump version to 0.4.0 and update changelog with new features and improvements

* feat: add dead code allowances to various functions in mod.rs and real_world_tests.rs
2026-02-25 21:16:36 -05:00

500 lines
14 KiB
Rust

//! Pattern sanity tests and positive/negative fixture validation.
//!
//! These tests verify that:
//! 1. All pattern IDs are globally unique.
//! 2. All tree-sitter queries compile without error.
//! 3. All patterns have non-empty descriptions and valid severity/tier/category.
//! 4. Positive fixtures trigger expected patterns.
//! 5. Negative fixtures do NOT trigger security patterns.
use nyx_scanner::patterns::{self, PatternTier, Severity};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use tree_sitter::{Language, Query, QueryCursor, StreamingIterator};
// ── Helpers ──────────────────────────────────────────────────────────────────
fn fixture_path(lang: &str, kind: &str) -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/patterns")
.join(lang)
.join(kind)
}
fn ts_lang_for(slug: &str) -> Language {
match slug {
"rust" => Language::from(tree_sitter_rust::LANGUAGE),
"java" => Language::from(tree_sitter_java::LANGUAGE),
"python" => Language::from(tree_sitter_python::LANGUAGE),
"javascript" => Language::from(tree_sitter_javascript::LANGUAGE),
"typescript" => Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
"c" => Language::from(tree_sitter_c::LANGUAGE),
"cpp" => Language::from(tree_sitter_cpp::LANGUAGE),
"go" => Language::from(tree_sitter_go::LANGUAGE),
"php" => Language::from(tree_sitter_php::LANGUAGE_PHP),
"ruby" => Language::from(tree_sitter_ruby::LANGUAGE),
_ => panic!("unknown language: {slug}"),
}
}
/// Run all patterns for a language against source bytes.
/// Returns the set of pattern IDs that matched at least once.
fn run_patterns(slug: &str, source: &[u8]) -> HashSet<String> {
let ts_lang = ts_lang_for(slug);
let pats = patterns::load(slug);
let mut matched = HashSet::new();
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).expect("set language");
let tree = parser.parse(source, None).expect("parse");
let root = tree.root_node();
for pat in &pats {
let query = match Query::new(&ts_lang, pat.query) {
Ok(q) => q,
Err(_) => continue,
};
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&query, root, source);
if matches.next().is_some() {
matched.insert(pat.id.to_string());
}
}
matched
}
// ── All languages for iteration ──────────────────────────────────────────────
const ALL_LANGS: &[&str] = &[
"rust",
"java",
"python",
"javascript",
"typescript",
"c",
"cpp",
"go",
"php",
"ruby",
];
// ── Sanity tests ─────────────────────────────────────────────────────────────
#[test]
fn all_pattern_ids_are_globally_unique() {
let mut seen: HashMap<String, String> = HashMap::new();
let mut dupes = Vec::new();
for &lang in ALL_LANGS {
for pat in patterns::load(lang) {
if let Some(prev_lang) = seen.insert(pat.id.to_string(), lang.to_string()) {
// Same lang alias is ok (e.g. "js" and "javascript" share patterns)
if prev_lang != lang {
dupes.push(format!("{} (in {} and {})", pat.id, prev_lang, lang));
}
}
}
}
assert!(
dupes.is_empty(),
"Duplicate pattern IDs across languages:\n {}",
dupes.join("\n ")
);
}
#[test]
fn all_queries_compile() {
let mut errors = Vec::new();
for &lang in ALL_LANGS {
let ts_lang = ts_lang_for(lang);
for pat in patterns::load(lang) {
if let Err(e) = Query::new(&ts_lang, pat.query) {
errors.push(format!("[{}] {}: {}", lang, pat.id, e));
}
}
}
assert!(
errors.is_empty(),
"Pattern query compilation errors:\n {}",
errors.join("\n ")
);
}
#[test]
fn all_descriptions_non_empty() {
for &lang in ALL_LANGS {
for pat in patterns::load(lang) {
assert!(
!pat.description.trim().is_empty(),
"Pattern {} has empty description",
pat.id
);
}
}
}
#[test]
fn all_ids_follow_naming_convention() {
// IDs should be <lang>.<category>.<specific> with dots
for &lang in ALL_LANGS {
for pat in patterns::load(lang) {
let parts: Vec<&str> = pat.id.split('.').collect();
assert!(
parts.len() == 3,
"Pattern ID '{}' should have 3 dot-separated parts (lang.category.specific), got {}",
pat.id,
parts.len()
);
// First part should be a short lang prefix
assert!(
parts[0].len() <= 4,
"Pattern ID '{}' language prefix '{}' too long (max 4 chars)",
pat.id,
parts[0]
);
}
}
}
#[test]
fn severity_distribution_reasonable() {
// Sanity: no language should have ALL patterns at the same severity
for &lang in ALL_LANGS {
let pats = patterns::load(lang);
if pats.len() < 3 {
continue;
}
let severities: HashSet<_> = pats.iter().map(|p| p.severity).collect();
// At least 2 different severity levels if >= 5 patterns
if pats.len() >= 5 {
assert!(
severities.len() >= 2,
"{} has {} patterns but only 1 severity level",
lang,
pats.len()
);
}
}
}
#[test]
fn tier_a_patterns_have_no_heuristic_in_description() {
// Tier A patterns should not reference "concatenation" or "format" heuristics
// (that's Tier B territory). This is a soft check.
let heuristic_words = ["concatenat", "non-literal", "heuristic"];
let mut violations = Vec::new();
for &lang in ALL_LANGS {
for pat in patterns::load(lang) {
if pat.tier == PatternTier::A {
let desc_lower = pat.description.to_lowercase();
for word in &heuristic_words {
if desc_lower.contains(word) {
violations.push(format!(
"{}: Tier A but description mentions '{}'",
pat.id, word
));
}
}
}
}
}
// Warn but don't fail — descriptions are informational
if !violations.is_empty() {
eprintln!(
"WARNING: Tier A patterns with heuristic-like descriptions:\n {}",
violations.join("\n ")
);
}
}
// ── Positive fixture tests ───────────────────────────────────────────────────
// Each test verifies that the positive fixture triggers at least the listed IDs.
fn assert_positive_match(lang: &str, fixture_file: &str, expected_ids: &[&str]) {
let path = fixture_path(lang, fixture_file);
if !path.exists() {
eprintln!("SKIP: fixture not found: {}", path.display());
return;
}
let source = std::fs::read(&path).expect("read fixture");
let matched = run_patterns(lang, &source);
let mut missing = Vec::new();
for &id in expected_ids {
if !matched.contains(id) {
missing.push(id);
}
}
assert!(
missing.is_empty(),
"[{}] Positive fixture '{}' did not trigger expected patterns:\n missing: {:?}\n matched: {:?}",
lang,
fixture_file,
missing,
matched
);
}
#[test]
fn positive_rust() {
assert_positive_match(
"rust",
"positive.rs",
&[
"rs.memory.transmute",
"rs.memory.copy_nonoverlapping",
"rs.memory.get_unchecked",
"rs.memory.mem_zeroed",
"rs.memory.ptr_read",
"rs.quality.unsafe_block",
"rs.quality.unsafe_fn",
"rs.quality.unwrap",
"rs.quality.expect",
"rs.quality.panic_macro",
"rs.quality.todo",
"rs.memory.narrow_cast",
"rs.memory.mem_forget",
],
);
}
#[test]
fn positive_java() {
assert_positive_match(
"java",
"positive.java",
&[
"java.deser.readobject",
"java.cmdi.runtime_exec",
"java.reflection.class_forname",
"java.reflection.method_invoke",
"java.sqli.execute_concat",
"java.crypto.insecure_random",
],
);
}
#[test]
fn positive_python() {
assert_positive_match(
"python",
"positive.py",
&[
"py.code_exec.eval",
"py.code_exec.exec",
"py.cmdi.os_system",
"py.cmdi.os_popen",
"py.deser.pickle_loads",
"py.deser.yaml_load",
],
);
}
#[test]
fn positive_javascript() {
assert_positive_match(
"javascript",
"positive.js",
&[
"js.code_exec.eval",
"js.code_exec.new_function",
"js.code_exec.settimeout_string",
"js.xss.document_write",
"js.xss.outer_html",
"js.xss.insert_adjacent_html",
"js.prototype.proto_assignment",
"js.xss.cookie_write",
],
);
}
#[test]
fn positive_typescript() {
assert_positive_match(
"typescript",
"positive.ts",
&[
"ts.code_exec.eval",
"ts.code_exec.new_function",
"ts.code_exec.settimeout_string",
"ts.xss.document_write",
"ts.xss.outer_html",
"ts.xss.insert_adjacent_html",
"ts.quality.any_annotation",
"ts.quality.as_any",
"ts.prototype.proto_assignment",
"ts.xss.cookie_write",
],
);
}
#[test]
fn positive_c() {
assert_positive_match(
"c",
"positive.c",
&[
"c.memory.gets",
"c.memory.strcpy",
"c.memory.strcat",
"c.memory.sprintf",
"c.memory.scanf_percent_s",
"c.cmdi.system",
"c.cmdi.popen",
"c.memory.printf_no_fmt",
],
);
}
#[test]
fn positive_cpp() {
assert_positive_match(
"cpp",
"positive.cpp",
&[
"cpp.memory.gets",
"cpp.memory.strcpy",
"cpp.memory.strcat",
"cpp.memory.sprintf",
"cpp.cmdi.system",
"cpp.memory.reinterpret_cast",
"cpp.memory.const_cast",
"cpp.memory.printf_no_fmt",
],
);
}
#[test]
fn positive_go() {
assert_positive_match(
"go",
"positive.go",
&["go.cmdi.exec_command", "go.crypto.md5", "go.crypto.sha1"],
);
}
#[test]
fn positive_php() {
assert_positive_match(
"php",
"positive.php",
&[
"php.code_exec.eval",
"php.code_exec.create_function",
"php.cmdi.system",
"php.deser.unserialize",
],
);
}
#[test]
fn positive_ruby() {
assert_positive_match(
"ruby",
"positive.rb",
&[
"rb.code_exec.eval",
"rb.code_exec.instance_eval",
"rb.code_exec.class_eval",
"rb.cmdi.backtick",
"rb.deser.yaml_load",
"rb.deser.marshal_load",
"rb.reflection.constantize",
],
);
}
// ── Negative fixture tests ───────────────────────────────────────────────────
// Negative fixtures should produce zero matches for High/Medium security patterns.
fn get_security_pattern_ids(lang: &str) -> HashSet<String> {
patterns::load(lang)
.into_iter()
.filter(|p| {
p.severity != Severity::Low
&& !matches!(
p.category,
nyx_scanner::patterns::PatternCategory::CodeQuality
)
})
.map(|p| p.id.to_string())
.collect()
}
fn assert_negative_no_security_match(lang: &str, fixture_file: &str) {
let path = fixture_path(lang, fixture_file);
if !path.exists() {
eprintln!("SKIP: fixture not found: {}", path.display());
return;
}
let source = std::fs::read(&path).expect("read fixture");
let matched = run_patterns(lang, &source);
let security_ids = get_security_pattern_ids(lang);
let false_positives: Vec<_> = matched.intersection(&security_ids).collect();
assert!(
false_positives.is_empty(),
"[{}] Negative fixture '{}' triggered security patterns (false positives):\n {:?}",
lang,
fixture_file,
false_positives
);
}
#[test]
fn negative_rust() {
assert_negative_no_security_match("rust", "negative.rs");
}
#[test]
fn negative_java() {
assert_negative_no_security_match("java", "negative.java");
}
#[test]
fn negative_python() {
assert_negative_no_security_match("python", "negative.py");
}
#[test]
fn negative_javascript() {
assert_negative_no_security_match("javascript", "negative.js");
}
#[test]
fn negative_typescript() {
assert_negative_no_security_match("typescript", "negative.ts");
}
#[test]
fn negative_c() {
assert_negative_no_security_match("c", "negative.c");
}
#[test]
fn negative_cpp() {
assert_negative_no_security_match("cpp", "negative.cpp");
}
#[test]
fn negative_go() {
assert_negative_no_security_match("go", "negative.go");
}
#[test]
fn negative_php() {
assert_negative_no_security_match("php", "negative.php");
}
#[test]
fn negative_ruby() {
assert_negative_no_security_match("ruby", "negative.rb");
}