nyx/src/rank.rs

647 lines
21 KiB
Rust
Raw Normal View History

//! Attack surface ranking for scan diagnostics.
//!
//! Computes a deterministic score for each [`Diag`] using only in-memory
//! information (severity, evidence, source kind, rule ID, validation state).
//! The score is used to sort findings so that truncation keeps the most
//! exploitable / important results.
use crate::commands::scan::Diag;
use crate::evidence::Evidence;
use crate::patterns::Severity;
use std::hash::{DefaultHasher, Hash, Hasher};
/// Computed attack-surface ranking for a single diagnostic.
#[derive(Debug, Clone)]
pub struct AttackRank {
pub score: f64,
/// Breakdown of score components (for debug/display purposes).
#[allow(dead_code)]
pub components: Vec<(String, String)>,
}
/// Compute an attack-surface score for `diag`.
///
/// The score is a positive `f64`; higher means more exploitable / important.
/// Components are returned for optional debug/display.
pub fn compute_attack_rank(diag: &Diag) -> AttackRank {
let mut score = 0.0_f64;
let mut components: Vec<(String, String)> = Vec::new();
// ── 1. Severity base ────────────────────────────────────────────────
let sev_score = match diag.severity {
Severity::High => 60.0,
Severity::Medium => 30.0,
Severity::Low => 10.0,
};
score += sev_score;
components.push(("severity".into(), format!("{sev_score}")));
// ── 2. Analysis kind bonus ──────────────────────────────────────────
//
// Taint-confirmed findings are the strongest signal. State findings
// (resource lifecycle / auth) are next. CFG-structural findings
// without taint evidence rank lower. AST-only pattern matches are
// the weakest.
let kind_bonus = analysis_kind_bonus(&diag.id, diag.evidence.as_ref());
score += kind_bonus;
if kind_bonus != 0.0 {
components.push(("analysis_kind".into(), format!("{kind_bonus}")));
}
// ── 3. Evidence strength / source-kind priority ─────────────────────
let evidence_bonus = evidence_strength(diag);
score += evidence_bonus;
if evidence_bonus != 0.0 {
components.push(("evidence".into(), format!("{evidence_bonus}")));
}
// ── 4. State finding sub-ranking ────────────────────────────────────
let state_bonus = state_finding_bonus(&diag.id);
score += state_bonus;
if state_bonus != 0.0 {
components.push(("state_rule".into(), format!("{state_bonus}")));
}
// ── 5. Path validation penalty ──────────────────────────────────────
//
// If a taint path is guarded by a validation predicate, the finding
// has higher informational value but lower exploitability because the
// guard may prevent the vulnerability from being triggered. Apply a
// small penalty (5) to push validated paths below otherwise-equal
// unvalidated ones without changing the overall ranking tier.
let path_validated = diag.evidence.as_ref().map_or(diag.path_validated, |ev| {
ev.notes.iter().any(|n| n == "path_validated")
});
if path_validated {
score -= 5.0;
components.push(("path_validated_penalty".into(), "-5".into()));
}
AttackRank { score, components }
}
/// Deterministic sort key for a diagnostic.
///
/// Two diags with identical scores are tie-broken by:
/// severity (High < Medium < Low in the `Ord` impl, so we negate)
/// → rule ID → file path → line → col → message hash
///
/// Returns a tuple suitable for `sort_by`.
pub fn sort_key(diag: &Diag) -> impl Ord {
let sev_ord: u8 = match diag.severity {
Severity::High => 0,
Severity::Medium => 1,
Severity::Low => 2,
};
let msg_hash = {
let mut h = DefaultHasher::new();
diag.message.hash(&mut h);
h.finish()
};
(
sev_ord,
diag.id.clone(),
diag.path.clone(),
diag.line,
diag.col,
msg_hash,
)
}
/// Sort diagnostics in-place by descending attack-surface score, then by
/// deterministic tie-breaker. Populates `rank_score` on each `Diag`.
pub fn rank_diags(diags: &mut [Diag]) {
// Compute scores
let scores: Vec<f64> = diags.iter().map(|d| compute_attack_rank(d).score).collect();
// Attach scores to diags
for (d, s) in diags.iter_mut().zip(scores.iter()) {
d.rank_score = Some(*s);
}
// Sort descending by score, then ascending by tie-breaker
diags.sort_by(|a, b| {
let sa = a.rank_score.unwrap_or(0.0);
let sb = b.rank_score.unwrap_or(0.0);
// Descending score (higher first)
sb.partial_cmp(&sa)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| sort_key(a).cmp(&sort_key(b)))
});
}
// ─────────────────────────────────────────────────────────────────────────────
// Scoring helpers
// ─────────────────────────────────────────────────────────────────────────────
/// Bonus based on analysis kind inferred from rule ID + evidence.
fn analysis_kind_bonus(rule_id: &str, evidence: Option<&Evidence>) -> f64 {
if rule_id.starts_with("taint-") {
// Taint-confirmed flow is the strongest signal
10.0
} else if rule_id.starts_with("state-") {
// State-model findings (resource / auth) are strong
8.0
} else if rule_id.starts_with("cfg-") {
// CFG-structural findings: boost if evidence exists
if evidence.is_some_and(|e| !e.is_empty()) {
5.0
} else {
3.0
}
} else {
// AST-only pattern match
0.0
}
}
/// Bonus from evidence strength: number of evidence items and source-kind
/// priority.
fn evidence_strength(diag: &Diag) -> f64 {
let mut bonus = 0.0;
if let Some(ev) = &diag.evidence {
// Count structured evidence items (capped at 4)
let item_count = ev.source.is_some() as usize
+ ev.sink.is_some() as usize
+ (ev.guards.len() + ev.sanitizers.len()).min(2);
bonus += item_count.min(4) as f64;
// Source-kind priority from evidence notes
for note in &ev.notes {
if let Some(kind) = note.strip_prefix("source_kind:") {
bonus += source_kind_priority(kind);
break;
}
}
} else {
// Fallback for DB-cached diags without structured evidence
bonus += (diag.labels.len() as f64).min(4.0);
for (label, value) in &diag.labels {
if label == "Source" {
bonus += source_kind_priority(value);
}
}
}
bonus
}
/// Priority bonus based on the source kind string found in evidence.
///
/// UserInput / EnvironmentConfig / Unknown are most exploitable.
/// FileSystem / Database are lower because the attacker needs a more
/// indirect vector.
fn source_kind_priority(source_value: &str) -> f64 {
// Structured SourceKind enum values (from evidence.notes "source_kind:X")
match source_value {
"UserInput" => return 6.0,
"EnvironmentConfig" => return 5.0,
"FileSystem" => return 3.0,
"Database" => return 2.0,
"Unknown" => return 4.0,
_ => {}
}
// Fallback: substring matching for legacy labels
let lower = source_value.to_ascii_lowercase();
if lower.contains("stdin")
|| lower.contains("argv")
|| lower.contains("request")
|| lower.contains("form")
|| lower.contains("query")
|| lower.contains("param")
|| lower.contains("header")
|| lower.contains("body")
|| lower.contains("read_line")
{
// Strong user-input signals
6.0
} else if lower.contains("env") || lower.contains("var(") || lower.contains("getenv") {
// Environment / config — still attacker-controllable in many deployments
5.0
} else if lower.contains("read") || lower.contains("file") || lower.contains("open") {
// File system — needs indirect vector
3.0
} else if lower.contains("query") || lower.contains("fetch") || lower.contains("select") {
// Database — needs prior injection
2.0
} else {
// Unknown / unrecognised — treat as moderately exploitable
4.0
}
}
/// Bonus for specific state-analysis rule IDs.
fn state_finding_bonus(rule_id: &str) -> f64 {
match rule_id {
"state-use-after-close" => 6.0,
"state-unauthed-access" => 6.0,
"state-double-close" => 3.0,
"state-resource-leak" => 2.0, // must-leak
"state-resource-leak-possible" => 1.0, // may-leak
_ => 0.0,
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Tests
// ─────────────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
fn make_diag(
severity: Severity,
id: &str,
path: &str,
line: usize,
labels: Vec<(String, String)>,
path_validated: bool,
) -> Diag {
Diag {
path: path.into(),
line,
col: 1,
severity,
id: id.into(),
category: crate::patterns::FindingCategory::Security,
path_validated,
guard_kind: None,
message: None,
labels,
confidence: None,
evidence: None,
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
}
}
// ── Ordering tests ──────────────────────────────────────────────────
#[test]
fn high_taint_user_input_ranks_above_medium_file_io() {
let high_taint = make_diag(
Severity::High,
"taint-unsanitised-flow (source 1:1)",
"src/main.rs",
10,
vec![
("Source".into(), "read_line() at 1:1".into()),
("Sink".into(), "exec()".into()),
],
false,
);
let med_file = make_diag(
Severity::Medium,
"taint-unsanitised-flow (source 5:1)",
"src/lib.rs",
20,
vec![
("Source".into(), "File::open() at 5:1".into()),
("Sink".into(), "write()".into()),
],
false,
);
let score_high = compute_attack_rank(&high_taint).score;
let score_med = compute_attack_rank(&med_file).score;
assert!(
score_high > score_med,
"high taint user-input ({score_high}) should rank above medium file-io ({score_med})"
);
}
#[test]
fn must_leak_ranks_above_may_leak() {
let must = make_diag(
Severity::Medium,
"state-resource-leak",
"src/db.rs",
30,
vec![],
false,
);
let may = make_diag(
Severity::Low,
"state-resource-leak-possible",
"src/db.rs",
35,
vec![],
false,
);
let score_must = compute_attack_rank(&must).score;
let score_may = compute_attack_rank(&may).score;
assert!(
score_must > score_may,
"must-leak ({score_must}) should rank above may-leak ({score_may})"
);
}
#[test]
fn cfg_without_evidence_ranks_below_taint_confirmed() {
let taint = make_diag(
Severity::High,
"taint-unsanitised-flow (source 1:1)",
"src/main.rs",
10,
vec![
("Source".into(), "env::var(\"CMD\") at 1:1".into()),
("Sink".into(), "exec()".into()),
],
false,
);
let cfg_only = make_diag(
Severity::High,
"cfg-unguarded-sink",
"src/main.rs",
10,
vec![],
false,
);
let score_taint = compute_attack_rank(&taint).score;
let score_cfg = compute_attack_rank(&cfg_only).score;
assert!(
score_taint > score_cfg,
"taint-confirmed ({score_taint}) should rank above cfg-only ({score_cfg})"
);
}
#[test]
fn determinism_input_order_independent() {
let d1 = make_diag(
Severity::High,
"taint-unsanitised-flow (source 1:1)",
"a.rs",
1,
vec![("Source".into(), "stdin at 1:1".into())],
false,
);
let d2 = make_diag(
Severity::Medium,
"cfg-unguarded-sink",
"b.rs",
2,
vec![],
false,
);
let d3 = make_diag(Severity::Low, "rs.code_exec.eval", "c.rs", 3, vec![], false);
let mut order_a = vec![d1.clone(), d2.clone(), d3.clone()];
let mut order_b = vec![d3, d1, d2];
rank_diags(&mut order_a);
rank_diags(&mut order_b);
let ids_a: Vec<_> = order_a.iter().map(|d| (&d.id, d.line)).collect();
let ids_b: Vec<_> = order_b.iter().map(|d| (&d.id, d.line)).collect();
assert_eq!(
ids_a, ids_b,
"ranking must be deterministic regardless of input order"
);
}
#[test]
fn path_validated_penalty_applied() {
let unvalidated = make_diag(
Severity::High,
"taint-unsanitised-flow (source 1:1)",
"src/main.rs",
10,
vec![("Source".into(), "env::var(\"X\") at 1:1".into())],
false,
);
let validated = make_diag(
Severity::High,
"taint-unsanitised-flow (source 1:1)",
"src/main.rs",
10,
vec![("Source".into(), "env::var(\"X\") at 1:1".into())],
true,
);
let score_unval = compute_attack_rank(&unvalidated).score;
let score_val = compute_attack_rank(&validated).score;
assert!(
score_unval > score_val,
"unvalidated ({score_unval}) should rank above validated ({score_val})"
);
}
#[test]
fn state_use_after_close_ranks_above_may_leak() {
let uac = make_diag(
Severity::High,
"state-use-after-close",
"x.rs",
1,
vec![],
false,
);
let may = make_diag(
Severity::Low,
"state-resource-leak-possible",
"x.rs",
2,
vec![],
false,
);
let score_uac = compute_attack_rank(&uac).score;
let score_may = compute_attack_rank(&may).score;
assert!(score_uac > score_may);
}
#[test]
fn unauthed_access_ranks_above_resource_leak() {
let unauth = make_diag(
Severity::High,
"state-unauthed-access",
"x.rs",
1,
vec![],
false,
);
let leak = make_diag(
Severity::Medium,
"state-resource-leak",
"x.rs",
2,
vec![],
false,
);
let score_ua = compute_attack_rank(&unauth).score;
let score_lk = compute_attack_rank(&leak).score;
assert!(score_ua > score_lk);
}
#[test]
fn ast_only_ranks_below_all_others_at_same_severity() {
let ast = make_diag(
Severity::High,
"rs.code_exec.eval",
"x.rs",
1,
vec![],
false,
);
let cfg = make_diag(
Severity::High,
"cfg-unguarded-sink",
"x.rs",
2,
vec![],
false,
);
let taint = make_diag(
Severity::High,
"taint-unsanitised-flow (source 1:1)",
"x.rs",
3,
vec![("Source".into(), "env::var(\"X\") at 1:1".into())],
false,
);
let state = make_diag(
Severity::High,
"state-use-after-close",
"x.rs",
4,
vec![],
false,
);
let s_ast = compute_attack_rank(&ast).score;
let s_cfg = compute_attack_rank(&cfg).score;
let s_taint = compute_attack_rank(&taint).score;
let s_state = compute_attack_rank(&state).score;
assert!(s_ast < s_cfg, "AST ({s_ast}) < CFG ({s_cfg})");
assert!(s_ast < s_taint, "AST ({s_ast}) < taint ({s_taint})");
assert!(s_ast < s_state, "AST ({s_ast}) < state ({s_state})");
}
#[test]
fn structured_evidence_source_kind_matches_legacy() {
// Structured evidence with source_kind:UserInput note should give
// the same source-kind bonus as a legacy "Source" label with user input.
let mut structured = make_diag(
Severity::High,
"taint-unsanitised-flow (source 1:1)",
"src/main.rs",
10,
vec![],
false,
);
structured.evidence = Some(crate::evidence::Evidence {
source: Some(crate::evidence::SpanEvidence {
path: "src/main.rs".into(),
line: 1,
col: 1,
kind: "source".into(),
snippet: Some("read_line()".into()),
}),
sink: Some(crate::evidence::SpanEvidence {
path: "src/main.rs".into(),
line: 10,
col: 5,
kind: "sink".into(),
snippet: Some("exec()".into()),
}),
guards: vec![],
sanitizers: vec![],
state: None,
notes: vec!["source_kind:UserInput".into()],
});
let legacy = make_diag(
Severity::High,
"taint-unsanitised-flow (source 1:1)",
"src/main.rs",
10,
vec![
("Source".into(), "read_line() at 1:1".into()),
("Sink".into(), "exec()".into()),
],
false,
);
let score_structured = compute_attack_rank(&structured).score;
let score_legacy = compute_attack_rank(&legacy).score;
assert_eq!(
score_structured, score_legacy,
"structured ({score_structured}) should equal legacy ({score_legacy})"
);
}
#[test]
fn evidence_item_count_capped_at_4() {
let mut d = make_diag(
Severity::High,
"taint-unsanitised-flow (source 1:1)",
"src/main.rs",
10,
vec![],
false,
);
let span = || crate::evidence::SpanEvidence {
path: "x.rs".into(),
line: 1,
col: 1,
kind: "guard".into(),
snippet: None,
};
d.evidence = Some(crate::evidence::Evidence {
source: Some(span()),
sink: Some(span()),
guards: vec![span(), span(), span()], // 3 guards
sanitizers: vec![span()], // 1 sanitizer
state: None,
notes: vec![],
});
// item_count = 1 (source) + 1 (sink) + min(2, 3+1) = 4
// evidence bonus should be exactly 4.0 (from items) + 4.0 (unknown source kind) = 8.0
// ... but no source_kind note, so no source priority bonus
let score = evidence_strength(&d);
assert!(
(score - 4.0).abs() < f64::EPSILON,
"evidence item count should be capped at 4, got {score}"
);
}
#[test]
fn path_validated_from_evidence_notes() {
let mut d = make_diag(
Severity::High,
"taint-unsanitised-flow (source 1:1)",
"src/main.rs",
10,
vec![],
false, // path_validated is false on Diag
);
d.evidence = Some(crate::evidence::Evidence {
source: None,
sink: None,
guards: vec![],
sanitizers: vec![],
state: None,
notes: vec!["path_validated".into()],
});
let rank = compute_attack_rank(&d);
assert!(
rank.components
.iter()
.any(|(k, _)| k == "path_validated_penalty"),
"path_validated note in evidence should trigger penalty"
);
}
}