//! Integration tests for the direction-aware `EngineNote` pipeline. //! //! Verifies the three downstream behaviours that the //! [`nyx_scanner::engine_notes::LossDirection`] classification drives: //! //! 1. [`nyx_scanner::evidence::compute_confidence`] caps at `Medium` //! when an `OverReport` or `Bail` note is attached. //! 2. [`nyx_scanner::rank::rank_diags`] applies a `completeness` //! component to the attack-surface score, direction-aware in //! magnitude but not additive across notes. //! 3. The ranked sort order places capped findings below converged //! findings of the same severity. //! //! Unit tests in `src/rank.rs` and `src/evidence.rs` cover the //! individual functions. These tests pin down the *composition*: a //! single `Diag` run through `compute_confidence` then `rank_diags` //! must see both effects, and the pipeline must remain deterministic. use nyx_scanner::commands::scan::Diag; use nyx_scanner::engine_notes::{EngineNote, LossDirection, worst_direction}; use nyx_scanner::evidence::{Confidence, Evidence, SpanEvidence, compute_confidence}; use nyx_scanner::labels::SourceKind; use nyx_scanner::patterns::{FindingCategory, Severity}; use nyx_scanner::rank::{compute_attack_rank, rank_diags}; // ── Diag factories ───────────────────────────────────────────────────── /// A converged taint finding that the points-based scorer will score /// as `Confidence::High`. Used as the "clean" baseline, any delta /// against this must come from attached engine notes. fn high_confidence_taint_diag(path: &str, line: u32) -> Diag { Diag { path: path.into(), line: line as usize, col: 1, severity: Severity::High, id: format!("taint-unsanitised-flow (source {line}:1)"), category: FindingCategory::Security, path_validated: false, guard_kind: None, message: None, labels: vec![], confidence: None, evidence: Some(Evidence { source: Some(SpanEvidence { path: path.into(), line, col: 1, kind: "source".into(), snippet: Some("req.query.id".into()), }), sink: Some(SpanEvidence { path: path.into(), line: line + 4, col: 1, kind: "sink".into(), snippet: Some("exec(id)".into()), }), source_kind: Some(SourceKind::UserInput), hop_count: Some(1), cap_specificity: Some(1), notes: vec!["source_kind:UserInput".into()], ..Default::default() }), rank_score: None, rank_reason: None, suppressed: false, suppression: None, rollup: None, finding_id: String::new(), alternative_finding_ids: Vec::new(), } } fn attach_notes(d: &mut Diag, notes: Vec) { let mut ev = d.evidence.clone().unwrap_or_default(); ev.engine_notes = smallvec::SmallVec::from_vec(notes); d.evidence = Some(ev); } // ── Pipeline integration tests ───────────────────────────────────────── /// End-to-end: construct a finding that would score High confidence, /// attach a Bail note, run compute_confidence → rank_diags, and verify /// both the confidence cap and the rank penalty apply together (not /// double-counted on the confidence arm). #[test] fn bail_note_caps_confidence_and_applies_completeness_penalty() { let mut clean = high_confidence_taint_diag("clean.rs", 10); let mut bailed = high_confidence_taint_diag("bailed.rs", 10); attach_notes( &mut bailed, vec![EngineNote::ParseTimeout { timeout_ms: 100 }], ); // 1. compute_confidence clean.confidence = Some(compute_confidence(&clean)); bailed.confidence = Some(compute_confidence(&bailed)); assert_eq!( clean.confidence, Some(Confidence::High), "clean diag must baseline at High" ); assert_eq!( bailed.confidence, Some(Confidence::Medium), "Bail note must cap confidence at Medium" ); // 2. rank_diags let clean_rank = compute_attack_rank(&clean); let bailed_rank = compute_attack_rank(&bailed); // Confidence delta: High(+3) − Medium(0) = 3 // Completeness delta: Bail = -8 // Total delta: 11 let total_delta = clean_rank.score - bailed_rank.score; assert!( (total_delta - 11.0).abs() < f64::EPSILON, "expected combined delta of 11.0 (confidence 3 + completeness 8), got {total_delta}" ); // Both components must appear in rank_reason. let bailed_keys: Vec<&str> = bailed_rank .components .iter() .map(|(k, _)| k.as_str()) .collect(); assert!( bailed_keys.contains(&"completeness"), "completeness component missing from rank_reason: {bailed_keys:?}" ); // Confidence component only appears when non-zero; Medium = 0.0 so // it's omitted. Verify by contradiction: re-check with Low. let _ = bailed_keys; } #[test] fn under_report_note_does_not_cap_confidence_but_does_penalize_rank() { let mut d = high_confidence_taint_diag("x.rs", 1); attach_notes(&mut d, vec![EngineNote::WorklistCapped { iterations: 100 }]); d.confidence = Some(compute_confidence(&d)); assert_eq!( d.confidence, Some(Confidence::High), "UnderReport must not cap confidence — the emitted flow is still real" ); // Seed confidence on the clean diag too so the score delta reflects // only the completeness component, not a spurious confidence-None // vs confidence-High difference. let mut clean = high_confidence_taint_diag("x.rs", 1); clean.confidence = Some(compute_confidence(&clean)); assert_eq!(clean.confidence, Some(Confidence::High)); let clean_score = compute_attack_rank(&clean).score; let penalized_score = compute_attack_rank(&d).score; assert!( (clean_score - penalized_score - 3.0).abs() < f64::EPSILON, "UnderReport must apply -3.0 rank penalty (clean={clean_score} under={penalized_score})" ); } #[test] fn rank_diags_sorts_converged_above_capped_at_same_severity() { // Three High findings: one converged, one UnderReport, one Bail. // After sorting they must come out in score-desc order: // converged > under > bail. let converged = high_confidence_taint_diag("a.rs", 1); let mut under = high_confidence_taint_diag("b.rs", 1); attach_notes( &mut under, vec![EngineNote::WorklistCapped { iterations: 10 }], ); let mut bail = high_confidence_taint_diag("c.rs", 1); attach_notes( &mut bail, vec![EngineNote::ParseTimeout { timeout_ms: 100 }], ); // Seed confidence before ranking (mirrors post_process_diags order). let mut diags = vec![converged, under, bail]; for d in diags.iter_mut() { d.confidence = Some(compute_confidence(d)); } rank_diags(&mut diags); assert_eq!( diags[0].path, "a.rs", "converged finding must rank first, got {:?}", diags.iter().map(|d| &d.path).collect::>() ); assert_eq!( diags[1].path, "b.rs", "UnderReport finding must rank second" ); assert_eq!(diags[2].path, "c.rs", "Bail finding must rank last"); } #[test] fn rank_diags_preserves_severity_tier_under_bail() { // High + Bail must still outrank Medium + clean at the same // evidence-strength baseline, this is the tier-boundary invariant // that the -8 completeness magnitude is calibrated for. let mut high_bailed = high_confidence_taint_diag("a.rs", 1); attach_notes( &mut high_bailed, vec![EngineNote::ParseTimeout { timeout_ms: 100 }], ); let mut medium_clean = high_confidence_taint_diag("b.rs", 1); medium_clean.severity = Severity::Medium; medium_clean.id = "taint-unsanitised-flow (source 2:1)".into(); let mut diags = vec![medium_clean, high_bailed]; for d in diags.iter_mut() { d.confidence = Some(compute_confidence(d)); } rank_diags(&mut diags); assert_eq!( diags[0].path, "a.rs", "High+Bail must outrank Medium+clean to preserve severity tiers" ); } #[test] fn pipeline_is_deterministic_under_input_permutation() { // Ranking must be input-order-independent even when completeness // penalties come into play. let mut a = high_confidence_taint_diag("a.rs", 1); let mut b = high_confidence_taint_diag("b.rs", 1); let mut c = high_confidence_taint_diag("c.rs", 1); attach_notes(&mut a, vec![EngineNote::WorklistCapped { iterations: 1 }]); attach_notes(&mut b, vec![EngineNote::PredicateStateWidened]); attach_notes(&mut c, vec![EngineNote::ParseTimeout { timeout_ms: 100 }]); let seed = vec![a, b, c]; let mut order1: Vec = seed.clone(); let mut order2: Vec = seed.iter().rev().cloned().collect(); let mut order3: Vec = vec![seed[2].clone(), seed[0].clone(), seed[1].clone()]; for list in [&mut order1, &mut order2, &mut order3] { for d in list.iter_mut() { d.confidence = Some(compute_confidence(d)); } rank_diags(list); } let paths1: Vec<_> = order1.iter().map(|d| &d.path).collect(); let paths2: Vec<_> = order2.iter().map(|d| &d.path).collect(); let paths3: Vec<_> = order3.iter().map(|d| &d.path).collect(); assert_eq!( paths1, paths2, "rank order must be input-permutation-stable" ); assert_eq!( paths1, paths3, "rank order must be input-permutation-stable" ); } // ── Direction API regressions ────────────────────────────────────────── #[test] fn worst_direction_matches_sarif_property() { // The SARIF `loss_direction` property is serialized as the snake- // case tag of the worst direction. Ensure the tag values match // the documented stable strings. let notes = vec![ EngineNote::WorklistCapped { iterations: 1 }, EngineNote::PredicateStateWidened, ]; let dir = worst_direction(¬es).expect("mixed non-informational notes must yield a direction"); assert_eq!(dir, LossDirection::OverReport); assert_eq!(dir.tag(), "over-report"); } // ── --require-converged filter ───────────────────────────────────────── #[test] fn require_converged_drops_over_report_and_bail() { let converged = high_confidence_taint_diag("converged.rs", 1); let mut under = high_confidence_taint_diag("under.rs", 1); attach_notes( &mut under, vec![EngineNote::WorklistCapped { iterations: 1 }], ); let mut over = high_confidence_taint_diag("over.rs", 1); attach_notes(&mut over, vec![EngineNote::PredicateStateWidened]); let mut bail = high_confidence_taint_diag("bail.rs", 1); attach_notes( &mut bail, vec![EngineNote::ParseTimeout { timeout_ms: 100 }], ); let mut info = high_confidence_taint_diag("info.rs", 1); attach_notes(&mut info, vec![EngineNote::InlineCacheReused]); let mut diags = vec![converged, under, over, bail, info]; nyx_scanner::commands::scan::retain_converged_findings(&mut diags); let kept: Vec<&str> = diags.iter().map(|d| d.path.as_str()).collect(); assert!( kept.contains(&"converged.rs"), "converged finding must be kept" ); assert!( kept.contains(&"under.rs"), "UnderReport finding must be kept — emitted flow is still real" ); assert!( kept.contains(&"info.rs"), "informational notes must not drop findings" ); assert!( !kept.contains(&"over.rs"), "OverReport finding must be dropped (widening → likely FP)" ); assert!( !kept.contains(&"bail.rs"), "Bail finding must be dropped (analysis aborted)" ); assert_eq!(kept.len(), 3, "exactly 3 findings should remain"); } #[test] fn require_converged_keeps_findings_with_no_evidence_struct() { // A finding with `evidence: None` has no engine notes by // definition, so it must not be affected by the filter. let mut d = high_confidence_taint_diag("x.rs", 1); d.evidence = None; let mut diags = vec![d]; nyx_scanner::commands::scan::retain_converged_findings(&mut diags); assert_eq!(diags.len(), 1, "no-evidence diag must be kept"); } #[test] fn require_converged_keeps_findings_with_empty_notes_list() { let d = high_confidence_taint_diag("x.rs", 1); let mut diags = vec![d]; nyx_scanner::commands::scan::retain_converged_findings(&mut diags); assert_eq!(diags.len(), 1, "empty-notes diag must be kept"); } #[test] fn require_converged_drops_mixed_over_report_with_under_report() { // Mixed: UnderReport + OverReport ⇒ worst is OverReport ⇒ drop. let mut d = high_confidence_taint_diag("x.rs", 1); attach_notes( &mut d, vec![ EngineNote::WorklistCapped { iterations: 1 }, EngineNote::PredicateStateWidened, ], ); let mut diags = vec![d]; nyx_scanner::commands::scan::retain_converged_findings(&mut diags); assert!( diags.is_empty(), "OverReport in mixed note list must dominate and drop the finding" ); } // ── SARIF serialization ──────────────────────────────────────────────── #[test] fn sarif_exports_loss_direction_property() { // When a finding carries non-informational engine notes, the SARIF // output must include a `loss_direction` property whose value is // the snake-case tag of the worst direction. Consumers rely on // this string being stable across releases. let mut d = high_confidence_taint_diag("sample.rs", 1); attach_notes(&mut d, vec![EngineNote::WorklistCapped { iterations: 10 }]); let sarif = nyx_scanner::output::build_sarif(&[d], std::path::Path::new(".")); let results = sarif["runs"][0]["results"] .as_array() .expect("runs[0].results"); let result = &results[0]; let props = &result["properties"]; let direction = props["loss_direction"] .as_str() .expect("loss_direction property must be present for non-informational notes"); assert_eq!( direction, "under-report", "SARIF loss_direction must be snake-case tag" ); assert_eq!( props["confidence_capped"].as_bool(), Some(true), "confidence_capped must track non-informational note presence" ); } #[test] fn sarif_omits_loss_direction_for_informational_only() { let mut d = high_confidence_taint_diag("sample.rs", 1); attach_notes(&mut d, vec![EngineNote::InlineCacheReused]); let sarif = nyx_scanner::output::build_sarif(&[d], std::path::Path::new(".")); let props = &sarif["runs"][0]["results"][0]["properties"]; assert!( props.get("loss_direction").is_none(), "informational-only notes must not set loss_direction (got {:?})", props.get("loss_direction") ); assert_eq!( props["confidence_capped"].as_bool(), Some(false), "confidence_capped must be false for informational-only notes" ); } #[test] fn every_engine_note_direction_is_documented() { // Enumerate every EngineNote variant and assert its direction. // The intent is that a contributor adding a new variant will cause // this test to fail to compile (no match arm), a structural guard // against silent misclassification. fn check(note: EngineNote, expected: LossDirection) { assert_eq!( note.direction(), expected, "direction classification mismatch for {note:?}" ); } check( EngineNote::WorklistCapped { iterations: 1 }, LossDirection::UnderReport, ); check( EngineNote::OriginsTruncated { dropped: 1 }, LossDirection::UnderReport, ); check( EngineNote::InFileFixpointCapped { iterations: 1, reason: nyx_scanner::engine_notes::CapHitReason::Unknown, }, LossDirection::UnderReport, ); check( EngineNote::CrossFileFixpointCapped { iterations: 1, reason: nyx_scanner::engine_notes::CapHitReason::Unknown, }, LossDirection::UnderReport, ); check( EngineNote::SsaLoweringBailed { reason: "unsupported".into(), }, LossDirection::Bail, ); check( EngineNote::ParseTimeout { timeout_ms: 100 }, LossDirection::Bail, ); check(EngineNote::PredicateStateWidened, LossDirection::OverReport); check(EngineNote::PathEnvCapped, LossDirection::OverReport); check(EngineNote::InlineCacheReused, LossDirection::Informational); }