//! Attack surface ranking for scan diagnostics. //! //! Computes a deterministic score for each [`Diag`] using only in-memory //! information (severity, evidence, source kind, rule ID, validation state). //! The score is used to sort findings so that truncation keeps the most //! exploitable / important results. use crate::commands::scan::Diag; use crate::evidence::Evidence; use crate::patterns::Severity; use std::hash::{DefaultHasher, Hash, Hasher}; /// Computed attack-surface ranking for a single diagnostic. #[derive(Debug, Clone)] pub struct AttackRank { pub score: f64, /// Breakdown of score components (for debug/display purposes). #[allow(dead_code)] pub components: Vec<(String, String)>, } /// Compute an attack-surface score for `diag`. /// /// The score is a positive `f64`; higher means more exploitable / important. /// Components are returned for optional debug/display. pub fn compute_attack_rank(diag: &Diag) -> AttackRank { let mut score = 0.0_f64; let mut components: Vec<(String, String)> = Vec::new(); // ── 1. Severity base ──────────────────────────────────────────────── let sev_score = match diag.severity { Severity::High => 60.0, Severity::Medium => 30.0, Severity::Low => 10.0, }; score += sev_score; components.push(("severity".into(), format!("{sev_score}"))); // ── 2. Analysis kind bonus ────────────────────────────────────────── // // Taint-confirmed findings are the strongest signal. State findings // (resource lifecycle / auth) are next. CFG-structural findings // without taint evidence rank lower. AST-only pattern matches are // the weakest. let kind_bonus = analysis_kind_bonus(&diag.id, diag.evidence.as_ref()); score += kind_bonus; if kind_bonus != 0.0 { components.push(("analysis_kind".into(), format!("{kind_bonus}"))); } // ── 3. Evidence strength / source-kind priority ───────────────────── let evidence_bonus = evidence_strength(diag); score += evidence_bonus; if evidence_bonus != 0.0 { components.push(("evidence".into(), format!("{evidence_bonus}"))); } // ── 4. State finding sub-ranking ──────────────────────────────────── let state_bonus = state_finding_bonus(&diag.id); score += state_bonus; if state_bonus != 0.0 { components.push(("state_rule".into(), format!("{state_bonus}"))); } // ── 5. Path validation penalty ────────────────────────────────────── // // If a taint path is guarded by a validation predicate, the finding // has higher informational value but lower exploitability because the // guard may prevent the vulnerability from being triggered. Apply a // small penalty (–5) to push validated paths below otherwise-equal // unvalidated ones without changing the overall ranking tier. let path_validated = diag.evidence.as_ref().map_or(diag.path_validated, |ev| { ev.notes.iter().any(|n| n == "path_validated") }); if path_validated { score -= 5.0; components.push(("path_validated_penalty".into(), "-5".into())); } AttackRank { score, components } } /// Deterministic sort key for a diagnostic. /// /// Two diags with identical scores are tie-broken by: /// severity (High < Medium < Low in the `Ord` impl, so we negate) /// → rule ID → file path → line → col → message hash /// /// Returns a tuple suitable for `sort_by`. pub fn sort_key(diag: &Diag) -> impl Ord { let sev_ord: u8 = match diag.severity { Severity::High => 0, Severity::Medium => 1, Severity::Low => 2, }; let msg_hash = { let mut h = DefaultHasher::new(); diag.message.hash(&mut h); h.finish() }; ( sev_ord, diag.id.clone(), diag.path.clone(), diag.line, diag.col, msg_hash, ) } /// Sort diagnostics in-place by descending attack-surface score, then by /// deterministic tie-breaker. Populates `rank_score` on each `Diag`. pub fn rank_diags(diags: &mut [Diag]) { // Compute scores let scores: Vec = diags.iter().map(|d| compute_attack_rank(d).score).collect(); // Attach scores to diags for (d, s) in diags.iter_mut().zip(scores.iter()) { d.rank_score = Some(*s); } // Sort descending by score, then ascending by tie-breaker diags.sort_by(|a, b| { let sa = a.rank_score.unwrap_or(0.0); let sb = b.rank_score.unwrap_or(0.0); // Descending score (higher first) sb.partial_cmp(&sa) .unwrap_or(std::cmp::Ordering::Equal) .then_with(|| sort_key(a).cmp(&sort_key(b))) }); } // ───────────────────────────────────────────────────────────────────────────── // Scoring helpers // ───────────────────────────────────────────────────────────────────────────── /// Bonus based on analysis kind inferred from rule ID + evidence. fn analysis_kind_bonus(rule_id: &str, evidence: Option<&Evidence>) -> f64 { if rule_id.starts_with("taint-") { // Taint-confirmed flow is the strongest signal 10.0 } else if rule_id.starts_with("state-") { // State-model findings (resource / auth) are strong 8.0 } else if rule_id.starts_with("cfg-") { // CFG-structural findings: boost if evidence exists if evidence.is_some_and(|e| !e.is_empty()) { 5.0 } else { 3.0 } } else { // AST-only pattern match 0.0 } } /// Bonus from evidence strength: number of evidence items and source-kind /// priority. fn evidence_strength(diag: &Diag) -> f64 { let mut bonus = 0.0; if let Some(ev) = &diag.evidence { // Count structured evidence items (capped at 4) let item_count = ev.source.is_some() as usize + ev.sink.is_some() as usize + (ev.guards.len() + ev.sanitizers.len()).min(2); bonus += item_count.min(4) as f64; // Source-kind priority from evidence notes for note in &ev.notes { if let Some(kind) = note.strip_prefix("source_kind:") { bonus += source_kind_priority(kind); break; } } } else { // Fallback for DB-cached diags without structured evidence bonus += (diag.labels.len() as f64).min(4.0); for (label, value) in &diag.labels { if label == "Source" { bonus += source_kind_priority(value); } } } bonus } /// Priority bonus based on the source kind string found in evidence. /// /// UserInput / EnvironmentConfig / Unknown are most exploitable. /// FileSystem / Database are lower because the attacker needs a more /// indirect vector. fn source_kind_priority(source_value: &str) -> f64 { // Structured SourceKind enum values (from evidence.notes "source_kind:X") match source_value { "UserInput" => return 6.0, "EnvironmentConfig" => return 5.0, "FileSystem" => return 3.0, "Database" => return 2.0, "Unknown" => return 4.0, _ => {} } // Fallback: substring matching for legacy labels let lower = source_value.to_ascii_lowercase(); if lower.contains("stdin") || lower.contains("argv") || lower.contains("request") || lower.contains("form") || lower.contains("query") || lower.contains("param") || lower.contains("header") || lower.contains("body") || lower.contains("read_line") { // Strong user-input signals 6.0 } else if lower.contains("env") || lower.contains("var(") || lower.contains("getenv") { // Environment / config — still attacker-controllable in many deployments 5.0 } else if lower.contains("read") || lower.contains("file") || lower.contains("open") { // File system — needs indirect vector 3.0 } else if lower.contains("query") || lower.contains("fetch") || lower.contains("select") { // Database — needs prior injection 2.0 } else { // Unknown / unrecognised — treat as moderately exploitable 4.0 } } /// Bonus for specific state-analysis rule IDs. fn state_finding_bonus(rule_id: &str) -> f64 { match rule_id { "state-use-after-close" => 6.0, "state-unauthed-access" => 6.0, "state-double-close" => 3.0, "state-resource-leak" => 2.0, // must-leak "state-resource-leak-possible" => 1.0, // may-leak _ => 0.0, } } // ───────────────────────────────────────────────────────────────────────────── // Tests // ───────────────────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; fn make_diag( severity: Severity, id: &str, path: &str, line: usize, labels: Vec<(String, String)>, path_validated: bool, ) -> Diag { Diag { path: path.into(), line, col: 1, severity, id: id.into(), category: crate::patterns::FindingCategory::Security, path_validated, guard_kind: None, message: None, labels, confidence: None, evidence: None, rank_score: None, rank_reason: None, suppressed: false, suppression: None, rollup: None, } } // ── Ordering tests ────────────────────────────────────────────────── #[test] fn high_taint_user_input_ranks_above_medium_file_io() { let high_taint = make_diag( Severity::High, "taint-unsanitised-flow (source 1:1)", "src/main.rs", 10, vec![ ("Source".into(), "read_line() at 1:1".into()), ("Sink".into(), "exec()".into()), ], false, ); let med_file = make_diag( Severity::Medium, "taint-unsanitised-flow (source 5:1)", "src/lib.rs", 20, vec![ ("Source".into(), "File::open() at 5:1".into()), ("Sink".into(), "write()".into()), ], false, ); let score_high = compute_attack_rank(&high_taint).score; let score_med = compute_attack_rank(&med_file).score; assert!( score_high > score_med, "high taint user-input ({score_high}) should rank above medium file-io ({score_med})" ); } #[test] fn must_leak_ranks_above_may_leak() { let must = make_diag( Severity::Medium, "state-resource-leak", "src/db.rs", 30, vec![], false, ); let may = make_diag( Severity::Low, "state-resource-leak-possible", "src/db.rs", 35, vec![], false, ); let score_must = compute_attack_rank(&must).score; let score_may = compute_attack_rank(&may).score; assert!( score_must > score_may, "must-leak ({score_must}) should rank above may-leak ({score_may})" ); } #[test] fn cfg_without_evidence_ranks_below_taint_confirmed() { let taint = make_diag( Severity::High, "taint-unsanitised-flow (source 1:1)", "src/main.rs", 10, vec![ ("Source".into(), "env::var(\"CMD\") at 1:1".into()), ("Sink".into(), "exec()".into()), ], false, ); let cfg_only = make_diag( Severity::High, "cfg-unguarded-sink", "src/main.rs", 10, vec![], false, ); let score_taint = compute_attack_rank(&taint).score; let score_cfg = compute_attack_rank(&cfg_only).score; assert!( score_taint > score_cfg, "taint-confirmed ({score_taint}) should rank above cfg-only ({score_cfg})" ); } #[test] fn determinism_input_order_independent() { let d1 = make_diag( Severity::High, "taint-unsanitised-flow (source 1:1)", "a.rs", 1, vec![("Source".into(), "stdin at 1:1".into())], false, ); let d2 = make_diag( Severity::Medium, "cfg-unguarded-sink", "b.rs", 2, vec![], false, ); let d3 = make_diag(Severity::Low, "rs.code_exec.eval", "c.rs", 3, vec![], false); let mut order_a = vec![d1.clone(), d2.clone(), d3.clone()]; let mut order_b = vec![d3, d1, d2]; rank_diags(&mut order_a); rank_diags(&mut order_b); let ids_a: Vec<_> = order_a.iter().map(|d| (&d.id, d.line)).collect(); let ids_b: Vec<_> = order_b.iter().map(|d| (&d.id, d.line)).collect(); assert_eq!( ids_a, ids_b, "ranking must be deterministic regardless of input order" ); } #[test] fn path_validated_penalty_applied() { let unvalidated = make_diag( Severity::High, "taint-unsanitised-flow (source 1:1)", "src/main.rs", 10, vec![("Source".into(), "env::var(\"X\") at 1:1".into())], false, ); let validated = make_diag( Severity::High, "taint-unsanitised-flow (source 1:1)", "src/main.rs", 10, vec![("Source".into(), "env::var(\"X\") at 1:1".into())], true, ); let score_unval = compute_attack_rank(&unvalidated).score; let score_val = compute_attack_rank(&validated).score; assert!( score_unval > score_val, "unvalidated ({score_unval}) should rank above validated ({score_val})" ); } #[test] fn state_use_after_close_ranks_above_may_leak() { let uac = make_diag( Severity::High, "state-use-after-close", "x.rs", 1, vec![], false, ); let may = make_diag( Severity::Low, "state-resource-leak-possible", "x.rs", 2, vec![], false, ); let score_uac = compute_attack_rank(&uac).score; let score_may = compute_attack_rank(&may).score; assert!(score_uac > score_may); } #[test] fn unauthed_access_ranks_above_resource_leak() { let unauth = make_diag( Severity::High, "state-unauthed-access", "x.rs", 1, vec![], false, ); let leak = make_diag( Severity::Medium, "state-resource-leak", "x.rs", 2, vec![], false, ); let score_ua = compute_attack_rank(&unauth).score; let score_lk = compute_attack_rank(&leak).score; assert!(score_ua > score_lk); } #[test] fn ast_only_ranks_below_all_others_at_same_severity() { let ast = make_diag( Severity::High, "rs.code_exec.eval", "x.rs", 1, vec![], false, ); let cfg = make_diag( Severity::High, "cfg-unguarded-sink", "x.rs", 2, vec![], false, ); let taint = make_diag( Severity::High, "taint-unsanitised-flow (source 1:1)", "x.rs", 3, vec![("Source".into(), "env::var(\"X\") at 1:1".into())], false, ); let state = make_diag( Severity::High, "state-use-after-close", "x.rs", 4, vec![], false, ); let s_ast = compute_attack_rank(&ast).score; let s_cfg = compute_attack_rank(&cfg).score; let s_taint = compute_attack_rank(&taint).score; let s_state = compute_attack_rank(&state).score; assert!(s_ast < s_cfg, "AST ({s_ast}) < CFG ({s_cfg})"); assert!(s_ast < s_taint, "AST ({s_ast}) < taint ({s_taint})"); assert!(s_ast < s_state, "AST ({s_ast}) < state ({s_state})"); } #[test] fn structured_evidence_source_kind_matches_legacy() { // Structured evidence with source_kind:UserInput note should give // the same source-kind bonus as a legacy "Source" label with user input. let mut structured = make_diag( Severity::High, "taint-unsanitised-flow (source 1:1)", "src/main.rs", 10, vec![], false, ); structured.evidence = Some(crate::evidence::Evidence { source: Some(crate::evidence::SpanEvidence { path: "src/main.rs".into(), line: 1, col: 1, kind: "source".into(), snippet: Some("read_line()".into()), }), sink: Some(crate::evidence::SpanEvidence { path: "src/main.rs".into(), line: 10, col: 5, kind: "sink".into(), snippet: Some("exec()".into()), }), guards: vec![], sanitizers: vec![], state: None, notes: vec!["source_kind:UserInput".into()], }); let legacy = make_diag( Severity::High, "taint-unsanitised-flow (source 1:1)", "src/main.rs", 10, vec![ ("Source".into(), "read_line() at 1:1".into()), ("Sink".into(), "exec()".into()), ], false, ); let score_structured = compute_attack_rank(&structured).score; let score_legacy = compute_attack_rank(&legacy).score; assert_eq!( score_structured, score_legacy, "structured ({score_structured}) should equal legacy ({score_legacy})" ); } #[test] fn evidence_item_count_capped_at_4() { let mut d = make_diag( Severity::High, "taint-unsanitised-flow (source 1:1)", "src/main.rs", 10, vec![], false, ); let span = || crate::evidence::SpanEvidence { path: "x.rs".into(), line: 1, col: 1, kind: "guard".into(), snippet: None, }; d.evidence = Some(crate::evidence::Evidence { source: Some(span()), sink: Some(span()), guards: vec![span(), span(), span()], // 3 guards sanitizers: vec![span()], // 1 sanitizer state: None, notes: vec![], }); // item_count = 1 (source) + 1 (sink) + min(2, 3+1) = 4 // evidence bonus should be exactly 4.0 (from items) + 4.0 (unknown source kind) = 8.0 // ... but no source_kind note, so no source priority bonus let score = evidence_strength(&d); assert!( (score - 4.0).abs() < f64::EPSILON, "evidence item count should be capped at 4, got {score}" ); } #[test] fn path_validated_from_evidence_notes() { let mut d = make_diag( Severity::High, "taint-unsanitised-flow (source 1:1)", "src/main.rs", 10, vec![], false, // path_validated is false on Diag ); d.evidence = Some(crate::evidence::Evidence { source: None, sink: None, guards: vec![], sanitizers: vec![], state: None, notes: vec!["path_validated".into()], }); let rank = compute_attack_rank(&d); assert!( rank.components .iter() .any(|(k, _)| k == "path_validated_penalty"), "path_validated note in evidence should trigger penalty" ); } }