nyx/src/patterns/mod.rs
Eli Peter 1bbe4b1cfb
Phase 1 (#33)
* chore: Exclude CLAUDE.md from Cargo.toml

* feat: add callgraph module and integrate into main analysis flow

* feat: enhance CLI with new severity filtering and analysis modes

* feat: update CHANGELOG with recent enhancements and fixes to severity filtering and output handling

* feat: implement state-model dataflow analysis for resource lifecycle and auth state

* feat: enhance diagnostic output formatting and add evidence structure

* feat: implement attack surface ranking for diagnostics with scoring and sorting

* feat: add comprehensive documentation for installation, usage, and rules reference

* feat: add multiple language support for command execution and evaluation endpoints

* feat: implement inline suppression for findings using `nyx:ignore` comments

* feat: add confidence levels to AST patterns and update output structure

* feat: implement low-noise prioritization system with category filtering, rollup grouping, and configurable budgets

* feat: bump version to 0.4.0 and update changelog with new features and improvements

* feat: add dead code allowances to various functions in mod.rs and real_world_tests.rs
2026-02-25 21:16:36 -05:00

395 lines
12 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! # AST Pattern Conventions
//!
//! Each language file exports a `PATTERNS` slice of [`Pattern`] structs.
//!
//! ## ID format
//!
//! `<lang>.<category>.<specific>` — e.g. `java.deser.readobject`, `py.cmdi.os_system`.
//!
//! Language prefixes: `rs`, `java`, `py`, `js`, `ts`, `c`, `cpp`, `go`, `php`, `rb`.
//!
//! ## Tiers
//!
//! * **Tier A** — structural presence is high-signal (e.g. `gets()`, `eval()`).
//! * **Tier B** — requires a heuristic guard in the query (e.g. SQL with concatenated
//! arg, format-string with variable first arg).
//!
//! ## Severity
//!
//! * **High** — command exec, deserialization, banned C functions.
//! * **Medium** — SQL concat, reflection, XSS sinks, casts.
//! * **Low** — weak crypto, insecure randomness, code-quality (`unwrap`/`expect`/`panic`).
//!
//! Note: the default `min_severity` filter skips Low patterns; they only appear when
//! the user explicitly lowers the threshold.
//!
//! ## No-duplicate rule
//!
//! If a vulnerability class is already detected by taint analysis (e.g. `eval` as a
//! sink, `system` as a sink), the AST pattern is still kept for `--ast-only` mode but
//! uses a distinct ID namespace (`js.code_exec.eval` vs `taint-unsanitised-flow`).
//! The dedup pass in `ast.rs` prevents exact-duplicate findings at the same location.
//!
//! ## Adding a new pattern
//!
//! 1. Pick the language file under `src/patterns/<lang>.rs`.
//! 2. Choose tier, category, severity per the rules above.
//! 3. Write the tree-sitter query — test with `cargo test --test pattern_tests`.
//! 4. Add a snippet to `tests/fixtures/patterns/<lang>/positive.<ext>`.
//! 5. Add the ID to the positive test assertion in `tests/pattern_tests.rs`.
pub mod c;
pub mod cpp;
mod go;
mod java;
pub mod javascript;
mod php;
mod python;
mod ruby;
pub mod rust;
pub mod typescript;
use crate::evidence::Confidence;
use console::style;
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt;
use std::str::FromStr;
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
pub enum Severity {
High,
Medium,
Low,
}
impl Severity {
/// Bracketed, colored, fixed-width tag for aligned console output.
///
/// Returns e.g. `"[HIGH] "` or `"[MEDIUM]"` — always 8 visible characters
/// so the column after the tag lines up regardless of severity.
#[allow(dead_code)] // public API for lib consumers
pub fn colored_tag(self) -> String {
// Visible widths: "[HIGH]" = 6, "[MEDIUM]" = 8, "[LOW]" = 5.
// Pad the *whole* tag to 8 visible chars (the longest, "[MEDIUM]").
let (label, styled_fn): (&str, fn(&str) -> String) = match self {
Severity::High => ("HIGH", |s| style(s).red().bold().to_string()),
Severity::Medium => ("MEDIUM", |s| style(s).color256(208).bold().to_string()),
Severity::Low => ("LOW", |s| style(s).color256(67).to_string()),
};
let bracket_len = label.len() + 2; // "[" + label + "]"
let pad = 8usize.saturating_sub(bracket_len);
format!("[{}]{:pad$}", styled_fn(label), "", pad = pad)
}
}
impl fmt::Display for Severity {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let styled = match *self {
Severity::High => style("HIGH").red().bold().to_string(),
Severity::Medium => style("MEDIUM").color256(208).bold().to_string(),
Severity::Low => style("LOW").color256(67).to_string(),
};
f.write_str(&styled)
}
}
impl Severity {
/// Textual value stored in SQLite.
pub fn as_db_str(self) -> &'static str {
match self {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
}
}
}
impl FromStr for Severity {
type Err = String;
fn from_str(input: &str) -> Result<Self, Self::Err> {
match input.trim().to_ascii_uppercase().as_str() {
"HIGH" => Ok(Severity::High),
"MEDIUM" | "MED" => Ok(Severity::Medium),
"LOW" => Ok(Severity::Low),
other => Err(format!("unknown severity: '{other}'")),
}
}
}
/// A parsed severity filter expression.
///
/// Supports three forms:
/// - Single level: `"HIGH"` — matches only that level
/// - Comma list: `"HIGH,MEDIUM"` — matches any listed level
/// - Threshold: `">=MEDIUM"` — matches that level and above
///
/// Parsing is case-insensitive and tolerates whitespace around tokens.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SeverityFilter {
/// Match findings at or above this level (High >= Medium >= Low).
AtLeast(Severity),
/// Match findings whose severity is in this exact set.
AnyOf(Vec<Severity>),
}
impl SeverityFilter {
/// Parse a severity filter expression.
///
/// Examples: `"HIGH"`, `"high,medium"`, `">=MEDIUM"`, `">= low"`.
pub fn parse(expr: &str) -> Result<Self, String> {
let trimmed = expr.trim();
if trimmed.is_empty() {
return Err("empty severity expression".into());
}
// Threshold form: >=LEVEL
if let Some(rest) = trimmed.strip_prefix(">=") {
let level: Severity = rest.parse()?;
return Ok(SeverityFilter::AtLeast(level));
}
// Comma-separated list (also handles single value)
let levels: Result<Vec<Severity>, String> = trimmed
.split(',')
.map(|tok| tok.trim().parse::<Severity>())
.collect();
let levels = levels?;
if levels.is_empty() {
return Err("empty severity expression".into());
}
// Optimise single-value list
if levels.len() == 1 {
return Ok(SeverityFilter::AnyOf(levels));
}
Ok(SeverityFilter::AnyOf(levels))
}
/// Returns `true` if the given severity passes this filter.
pub fn matches(&self, sev: Severity) -> bool {
match self {
SeverityFilter::AtLeast(threshold) => {
// Severity ordering: High < Medium < Low (derived Ord).
// "at least Medium" means sev <= Medium in Ord terms.
sev <= *threshold
}
SeverityFilter::AnyOf(set) => set.contains(&sev),
}
}
}
/// Pattern confidence tier.
///
/// * **A** Structural presence alone is high-signal (e.g. `gets()`, `eval()`).
/// * **B** Requires a simple heuristic guard in the query (e.g. SQL with
/// concatenated arg, file-open with non-literal path).
#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
pub enum PatternTier {
A,
B,
}
/// High-level finding category for noise reduction and prioritization.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub enum FindingCategory {
Security,
Reliability,
Quality,
}
impl std::fmt::Display for FindingCategory {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
FindingCategory::Security => write!(f, "Security"),
FindingCategory::Reliability => write!(f, "Reliability"),
FindingCategory::Quality => write!(f, "Quality"),
}
}
}
/// Vulnerability class that a pattern detects.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
pub enum PatternCategory {
CommandExec,
CodeExec,
Deserialization,
SqlInjection,
PathTraversal,
Xss,
Crypto,
Secrets,
InsecureTransport,
Reflection,
MemorySafety,
Prototype,
CodeQuality,
}
impl PatternCategory {
/// Map this vulnerability class to a high-level finding category.
pub fn finding_category(self) -> FindingCategory {
match self {
PatternCategory::CodeQuality => FindingCategory::Quality,
_ => FindingCategory::Security,
}
}
}
/// One AST pattern with a tree-sitter query and meta-data.
#[derive(Debug, Clone, Serialize, PartialEq)]
pub struct Pattern {
/// Unique identifier — `<lang>.<category>.<specific>` preferred.
pub id: &'static str,
/// Human-readable explanation.
pub description: &'static str,
/// tree-sitter query string.
pub query: &'static str,
/// Rough severity bucket.
pub severity: Severity,
/// Confidence tier (A = structural, B = heuristic-guarded).
pub tier: PatternTier,
/// Vulnerability class.
pub category: PatternCategory,
/// Confidence level for findings produced by this pattern.
pub confidence: Confidence,
}
/// Global, lazily-initialised registry: lang-name → pattern slice
static REGISTRY: Lazy<HashMap<&'static str, &'static [Pattern]>> = Lazy::new(|| {
let mut m = HashMap::new();
// ---- Rust ----
m.insert("rust", rust::PATTERNS);
// ---- TypeScript ----
m.insert("typescript", typescript::PATTERNS);
m.insert("ts", typescript::PATTERNS);
m.insert("tsx", typescript::PATTERNS);
// ---- JavaScript ----
m.insert("javascript", javascript::PATTERNS);
m.insert("js", javascript::PATTERNS);
// ---- C & C++ ----
m.insert("c", c::PATTERNS);
m.insert("cpp", cpp::PATTERNS);
m.insert("c++", cpp::PATTERNS);
// ---- Other patterns in the folder ----
m.insert("java", java::PATTERNS);
m.insert("go", go::PATTERNS);
m.insert("php", php::PATTERNS);
m.insert("python", python::PATTERNS);
m.insert("py", python::PATTERNS);
m.insert("ruby", ruby::PATTERNS);
m.insert("rb", ruby::PATTERNS);
tracing::debug!("AST-pattern registry initialised ({} patterns)", m.len());
m
});
/// Return all patterns for the requested language (case-insensitive).
///
/// Unknown patterns yield an **empty** `Vec`.
pub fn load(lang: &str) -> Vec<Pattern> {
let key = lang.to_ascii_lowercase();
REGISTRY.get(key.as_str()).copied().unwrap_or(&[]).to_vec()
}
#[test]
fn severity_as_db_str_roundtrip() {
for &s in &[Severity::High, Severity::Medium, Severity::Low] {
let db = s.as_db_str();
assert!(matches!(db, "HIGH" | "MEDIUM" | "LOW"));
assert_eq!(db.parse::<Severity>().unwrap(), s);
assert_eq!(db.to_lowercase().parse::<Severity>().unwrap(), s);
}
}
#[test]
fn severity_display_contains_uppercase_name() {
assert!(Severity::High.to_string().contains("HIGH"));
assert!(Severity::Medium.to_string().contains("MEDIUM"));
assert!(Severity::Low.to_string().contains("LOW"));
}
#[test]
fn load_returns_correct_pattern_slices() {
let rust = load("rust");
assert!(!rust.is_empty(), "Rust patterns should be loaded");
let ts = load("typescript");
let tsx = load("tsx");
assert_eq!(ts, tsx, "alias tsx must map to TypeScript patterns");
assert_eq!(load("RUST"), rust);
assert!(load("brainfuck").is_empty());
}
#[test]
fn severity_from_str_rejects_unknown() {
assert!("garbage".parse::<Severity>().is_err());
}
#[test]
fn severity_filter_single() {
let f = SeverityFilter::parse("HIGH").unwrap();
assert!(f.matches(Severity::High));
assert!(!f.matches(Severity::Medium));
assert!(!f.matches(Severity::Low));
}
#[test]
fn severity_filter_comma_list() {
let f = SeverityFilter::parse("HIGH,MEDIUM").unwrap();
assert!(f.matches(Severity::High));
assert!(f.matches(Severity::Medium));
assert!(!f.matches(Severity::Low));
}
#[test]
fn severity_filter_threshold() {
let f = SeverityFilter::parse(">=MEDIUM").unwrap();
assert!(f.matches(Severity::High));
assert!(f.matches(Severity::Medium));
assert!(!f.matches(Severity::Low));
let f2 = SeverityFilter::parse(">=LOW").unwrap();
assert!(f2.matches(Severity::High));
assert!(f2.matches(Severity::Medium));
assert!(f2.matches(Severity::Low));
let f3 = SeverityFilter::parse(">=HIGH").unwrap();
assert!(f3.matches(Severity::High));
assert!(!f3.matches(Severity::Medium));
}
#[test]
fn severity_filter_case_insensitive_and_whitespace() {
let f = SeverityFilter::parse(" high , medium ").unwrap();
assert!(f.matches(Severity::High));
assert!(f.matches(Severity::Medium));
assert!(!f.matches(Severity::Low));
let f2 = SeverityFilter::parse(">= medium").unwrap();
assert!(f2.matches(Severity::High));
assert!(f2.matches(Severity::Medium));
}
#[test]
fn severity_filter_rejects_empty() {
assert!(SeverityFilter::parse("").is_err());
assert!(SeverityFilter::parse(" ").is_err());
}
#[test]
fn severity_filter_rejects_invalid_level() {
assert!(SeverityFilter::parse("CRITICAL").is_err());
assert!(SeverityFilter::parse("HIGH,CRITICAL").is_err());
assert!(SeverityFilter::parse(">=BOGUS").is_err());
}