mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
469 lines
17 KiB
Rust
469 lines
17 KiB
Rust
//! Integration tests for the direction-aware `EngineNote` pipeline.
|
||
//!
|
||
//! Verifies the three downstream behaviours that the
|
||
//! [`nyx_scanner::engine_notes::LossDirection`] classification drives:
|
||
//!
|
||
//! 1. [`nyx_scanner::evidence::compute_confidence`] caps at `Medium`
|
||
//! when an `OverReport` or `Bail` note is attached.
|
||
//! 2. [`nyx_scanner::rank::rank_diags`] applies a `completeness`
|
||
//! component to the attack-surface score, direction-aware in
|
||
//! magnitude but not additive across notes.
|
||
//! 3. The ranked sort order places capped findings below converged
|
||
//! findings of the same severity.
|
||
//!
|
||
//! Unit tests in `src/rank.rs` and `src/evidence.rs` cover the
|
||
//! individual functions. These tests pin down the *composition*: a
|
||
//! single `Diag` run through `compute_confidence` then `rank_diags`
|
||
//! must see both effects, and the pipeline must remain deterministic.
|
||
|
||
use nyx_scanner::commands::scan::Diag;
|
||
use nyx_scanner::engine_notes::{EngineNote, LossDirection, worst_direction};
|
||
use nyx_scanner::evidence::{Confidence, Evidence, SpanEvidence, compute_confidence};
|
||
use nyx_scanner::labels::SourceKind;
|
||
use nyx_scanner::patterns::{FindingCategory, Severity};
|
||
use nyx_scanner::rank::{compute_attack_rank, rank_diags};
|
||
|
||
// ── Diag factories ─────────────────────────────────────────────────────
|
||
|
||
/// A converged taint finding that the points-based scorer will score
|
||
/// as `Confidence::High`. Used as the "clean" baseline, any delta
|
||
/// against this must come from attached engine notes.
|
||
fn high_confidence_taint_diag(path: &str, line: u32) -> Diag {
|
||
Diag {
|
||
path: path.into(),
|
||
line: line as usize,
|
||
col: 1,
|
||
severity: Severity::High,
|
||
id: format!("taint-unsanitised-flow (source {line}:1)"),
|
||
category: FindingCategory::Security,
|
||
path_validated: false,
|
||
guard_kind: None,
|
||
message: None,
|
||
labels: vec![],
|
||
confidence: None,
|
||
evidence: Some(Evidence {
|
||
source: Some(SpanEvidence {
|
||
path: path.into(),
|
||
line,
|
||
col: 1,
|
||
kind: "source".into(),
|
||
snippet: Some("req.query.id".into()),
|
||
}),
|
||
sink: Some(SpanEvidence {
|
||
path: path.into(),
|
||
line: line + 4,
|
||
col: 1,
|
||
kind: "sink".into(),
|
||
snippet: Some("exec(id)".into()),
|
||
}),
|
||
source_kind: Some(SourceKind::UserInput),
|
||
hop_count: Some(1),
|
||
cap_specificity: Some(1),
|
||
notes: vec!["source_kind:UserInput".into()],
|
||
..Default::default()
|
||
}),
|
||
rank_score: None,
|
||
rank_reason: None,
|
||
suppressed: false,
|
||
suppression: None,
|
||
rollup: None,
|
||
finding_id: String::new(),
|
||
alternative_finding_ids: Vec::new(),
|
||
}
|
||
}
|
||
|
||
fn attach_notes(d: &mut Diag, notes: Vec<EngineNote>) {
|
||
let mut ev = d.evidence.clone().unwrap_or_default();
|
||
ev.engine_notes = smallvec::SmallVec::from_vec(notes);
|
||
d.evidence = Some(ev);
|
||
}
|
||
|
||
// ── Pipeline integration tests ─────────────────────────────────────────
|
||
|
||
/// End-to-end: construct a finding that would score High confidence,
|
||
/// attach a Bail note, run compute_confidence → rank_diags, and verify
|
||
/// both the confidence cap and the rank penalty apply together (not
|
||
/// double-counted on the confidence arm).
|
||
#[test]
|
||
fn bail_note_caps_confidence_and_applies_completeness_penalty() {
|
||
let mut clean = high_confidence_taint_diag("clean.rs", 10);
|
||
let mut bailed = high_confidence_taint_diag("bailed.rs", 10);
|
||
attach_notes(
|
||
&mut bailed,
|
||
vec![EngineNote::ParseTimeout { timeout_ms: 100 }],
|
||
);
|
||
|
||
// 1. compute_confidence
|
||
clean.confidence = Some(compute_confidence(&clean));
|
||
bailed.confidence = Some(compute_confidence(&bailed));
|
||
|
||
assert_eq!(
|
||
clean.confidence,
|
||
Some(Confidence::High),
|
||
"clean diag must baseline at High"
|
||
);
|
||
assert_eq!(
|
||
bailed.confidence,
|
||
Some(Confidence::Medium),
|
||
"Bail note must cap confidence at Medium"
|
||
);
|
||
|
||
// 2. rank_diags
|
||
let clean_rank = compute_attack_rank(&clean);
|
||
let bailed_rank = compute_attack_rank(&bailed);
|
||
|
||
// Confidence delta: High(+3) − Medium(0) = 3
|
||
// Completeness delta: Bail = -8
|
||
// Total delta: 11
|
||
let total_delta = clean_rank.score - bailed_rank.score;
|
||
assert!(
|
||
(total_delta - 11.0).abs() < f64::EPSILON,
|
||
"expected combined delta of 11.0 (confidence 3 + completeness 8), got {total_delta}"
|
||
);
|
||
|
||
// Both components must appear in rank_reason.
|
||
let bailed_keys: Vec<&str> = bailed_rank
|
||
.components
|
||
.iter()
|
||
.map(|(k, _)| k.as_str())
|
||
.collect();
|
||
assert!(
|
||
bailed_keys.contains(&"completeness"),
|
||
"completeness component missing from rank_reason: {bailed_keys:?}"
|
||
);
|
||
// Confidence component only appears when non-zero; Medium = 0.0 so
|
||
// it's omitted. Verify by contradiction: re-check with Low.
|
||
let _ = bailed_keys;
|
||
}
|
||
|
||
#[test]
|
||
fn under_report_note_does_not_cap_confidence_but_does_penalize_rank() {
|
||
let mut d = high_confidence_taint_diag("x.rs", 1);
|
||
attach_notes(&mut d, vec![EngineNote::WorklistCapped { iterations: 100 }]);
|
||
d.confidence = Some(compute_confidence(&d));
|
||
|
||
assert_eq!(
|
||
d.confidence,
|
||
Some(Confidence::High),
|
||
"UnderReport must not cap confidence — the emitted flow is still real"
|
||
);
|
||
|
||
// Seed confidence on the clean diag too so the score delta reflects
|
||
// only the completeness component, not a spurious confidence-None
|
||
// vs confidence-High difference.
|
||
let mut clean = high_confidence_taint_diag("x.rs", 1);
|
||
clean.confidence = Some(compute_confidence(&clean));
|
||
assert_eq!(clean.confidence, Some(Confidence::High));
|
||
|
||
let clean_score = compute_attack_rank(&clean).score;
|
||
let penalized_score = compute_attack_rank(&d).score;
|
||
assert!(
|
||
(clean_score - penalized_score - 3.0).abs() < f64::EPSILON,
|
||
"UnderReport must apply -3.0 rank penalty (clean={clean_score} under={penalized_score})"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn rank_diags_sorts_converged_above_capped_at_same_severity() {
|
||
// Three High findings: one converged, one UnderReport, one Bail.
|
||
// After sorting they must come out in score-desc order:
|
||
// converged > under > bail.
|
||
let converged = high_confidence_taint_diag("a.rs", 1);
|
||
let mut under = high_confidence_taint_diag("b.rs", 1);
|
||
attach_notes(
|
||
&mut under,
|
||
vec![EngineNote::WorklistCapped { iterations: 10 }],
|
||
);
|
||
let mut bail = high_confidence_taint_diag("c.rs", 1);
|
||
attach_notes(
|
||
&mut bail,
|
||
vec![EngineNote::ParseTimeout { timeout_ms: 100 }],
|
||
);
|
||
|
||
// Seed confidence before ranking (mirrors post_process_diags order).
|
||
let mut diags = vec![converged, under, bail];
|
||
for d in diags.iter_mut() {
|
||
d.confidence = Some(compute_confidence(d));
|
||
}
|
||
|
||
rank_diags(&mut diags);
|
||
|
||
assert_eq!(
|
||
diags[0].path,
|
||
"a.rs",
|
||
"converged finding must rank first, got {:?}",
|
||
diags.iter().map(|d| &d.path).collect::<Vec<_>>()
|
||
);
|
||
assert_eq!(
|
||
diags[1].path, "b.rs",
|
||
"UnderReport finding must rank second"
|
||
);
|
||
assert_eq!(diags[2].path, "c.rs", "Bail finding must rank last");
|
||
}
|
||
|
||
#[test]
|
||
fn rank_diags_preserves_severity_tier_under_bail() {
|
||
// High + Bail must still outrank Medium + clean at the same
|
||
// evidence-strength baseline, this is the tier-boundary invariant
|
||
// that the -8 completeness magnitude is calibrated for.
|
||
let mut high_bailed = high_confidence_taint_diag("a.rs", 1);
|
||
attach_notes(
|
||
&mut high_bailed,
|
||
vec![EngineNote::ParseTimeout { timeout_ms: 100 }],
|
||
);
|
||
|
||
let mut medium_clean = high_confidence_taint_diag("b.rs", 1);
|
||
medium_clean.severity = Severity::Medium;
|
||
medium_clean.id = "taint-unsanitised-flow (source 2:1)".into();
|
||
|
||
let mut diags = vec![medium_clean, high_bailed];
|
||
for d in diags.iter_mut() {
|
||
d.confidence = Some(compute_confidence(d));
|
||
}
|
||
rank_diags(&mut diags);
|
||
|
||
assert_eq!(
|
||
diags[0].path, "a.rs",
|
||
"High+Bail must outrank Medium+clean to preserve severity tiers"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn pipeline_is_deterministic_under_input_permutation() {
|
||
// Ranking must be input-order-independent even when completeness
|
||
// penalties come into play.
|
||
let mut a = high_confidence_taint_diag("a.rs", 1);
|
||
let mut b = high_confidence_taint_diag("b.rs", 1);
|
||
let mut c = high_confidence_taint_diag("c.rs", 1);
|
||
attach_notes(&mut a, vec![EngineNote::WorklistCapped { iterations: 1 }]);
|
||
attach_notes(&mut b, vec![EngineNote::PredicateStateWidened]);
|
||
attach_notes(&mut c, vec![EngineNote::ParseTimeout { timeout_ms: 100 }]);
|
||
|
||
let seed = vec![a, b, c];
|
||
let mut order1: Vec<Diag> = seed.clone();
|
||
let mut order2: Vec<Diag> = seed.iter().rev().cloned().collect();
|
||
let mut order3: Vec<Diag> = vec![seed[2].clone(), seed[0].clone(), seed[1].clone()];
|
||
|
||
for list in [&mut order1, &mut order2, &mut order3] {
|
||
for d in list.iter_mut() {
|
||
d.confidence = Some(compute_confidence(d));
|
||
}
|
||
rank_diags(list);
|
||
}
|
||
|
||
let paths1: Vec<_> = order1.iter().map(|d| &d.path).collect();
|
||
let paths2: Vec<_> = order2.iter().map(|d| &d.path).collect();
|
||
let paths3: Vec<_> = order3.iter().map(|d| &d.path).collect();
|
||
assert_eq!(
|
||
paths1, paths2,
|
||
"rank order must be input-permutation-stable"
|
||
);
|
||
assert_eq!(
|
||
paths1, paths3,
|
||
"rank order must be input-permutation-stable"
|
||
);
|
||
}
|
||
|
||
// ── Direction API regressions ──────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn worst_direction_matches_sarif_property() {
|
||
// The SARIF `loss_direction` property is serialized as the snake-
|
||
// case tag of the worst direction. Ensure the tag values match
|
||
// the documented stable strings.
|
||
let notes = vec![
|
||
EngineNote::WorklistCapped { iterations: 1 },
|
||
EngineNote::PredicateStateWidened,
|
||
];
|
||
let dir =
|
||
worst_direction(¬es).expect("mixed non-informational notes must yield a direction");
|
||
assert_eq!(dir, LossDirection::OverReport);
|
||
assert_eq!(dir.tag(), "over-report");
|
||
}
|
||
|
||
// ── --require-converged filter ─────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn require_converged_drops_over_report_and_bail() {
|
||
let converged = high_confidence_taint_diag("converged.rs", 1);
|
||
let mut under = high_confidence_taint_diag("under.rs", 1);
|
||
attach_notes(
|
||
&mut under,
|
||
vec![EngineNote::WorklistCapped { iterations: 1 }],
|
||
);
|
||
let mut over = high_confidence_taint_diag("over.rs", 1);
|
||
attach_notes(&mut over, vec![EngineNote::PredicateStateWidened]);
|
||
let mut bail = high_confidence_taint_diag("bail.rs", 1);
|
||
attach_notes(
|
||
&mut bail,
|
||
vec![EngineNote::ParseTimeout { timeout_ms: 100 }],
|
||
);
|
||
let mut info = high_confidence_taint_diag("info.rs", 1);
|
||
attach_notes(&mut info, vec![EngineNote::InlineCacheReused]);
|
||
|
||
let mut diags = vec![converged, under, over, bail, info];
|
||
nyx_scanner::commands::scan::retain_converged_findings(&mut diags);
|
||
|
||
let kept: Vec<&str> = diags.iter().map(|d| d.path.as_str()).collect();
|
||
assert!(
|
||
kept.contains(&"converged.rs"),
|
||
"converged finding must be kept"
|
||
);
|
||
assert!(
|
||
kept.contains(&"under.rs"),
|
||
"UnderReport finding must be kept — emitted flow is still real"
|
||
);
|
||
assert!(
|
||
kept.contains(&"info.rs"),
|
||
"informational notes must not drop findings"
|
||
);
|
||
assert!(
|
||
!kept.contains(&"over.rs"),
|
||
"OverReport finding must be dropped (widening → likely FP)"
|
||
);
|
||
assert!(
|
||
!kept.contains(&"bail.rs"),
|
||
"Bail finding must be dropped (analysis aborted)"
|
||
);
|
||
assert_eq!(kept.len(), 3, "exactly 3 findings should remain");
|
||
}
|
||
|
||
#[test]
|
||
fn require_converged_keeps_findings_with_no_evidence_struct() {
|
||
// A finding with `evidence: None` has no engine notes by
|
||
// definition, so it must not be affected by the filter.
|
||
let mut d = high_confidence_taint_diag("x.rs", 1);
|
||
d.evidence = None;
|
||
let mut diags = vec![d];
|
||
nyx_scanner::commands::scan::retain_converged_findings(&mut diags);
|
||
assert_eq!(diags.len(), 1, "no-evidence diag must be kept");
|
||
}
|
||
|
||
#[test]
|
||
fn require_converged_keeps_findings_with_empty_notes_list() {
|
||
let d = high_confidence_taint_diag("x.rs", 1);
|
||
let mut diags = vec![d];
|
||
nyx_scanner::commands::scan::retain_converged_findings(&mut diags);
|
||
assert_eq!(diags.len(), 1, "empty-notes diag must be kept");
|
||
}
|
||
|
||
#[test]
|
||
fn require_converged_drops_mixed_over_report_with_under_report() {
|
||
// Mixed: UnderReport + OverReport ⇒ worst is OverReport ⇒ drop.
|
||
let mut d = high_confidence_taint_diag("x.rs", 1);
|
||
attach_notes(
|
||
&mut d,
|
||
vec![
|
||
EngineNote::WorklistCapped { iterations: 1 },
|
||
EngineNote::PredicateStateWidened,
|
||
],
|
||
);
|
||
let mut diags = vec![d];
|
||
nyx_scanner::commands::scan::retain_converged_findings(&mut diags);
|
||
assert!(
|
||
diags.is_empty(),
|
||
"OverReport in mixed note list must dominate and drop the finding"
|
||
);
|
||
}
|
||
|
||
// ── SARIF serialization ────────────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn sarif_exports_loss_direction_property() {
|
||
// When a finding carries non-informational engine notes, the SARIF
|
||
// output must include a `loss_direction` property whose value is
|
||
// the snake-case tag of the worst direction. Consumers rely on
|
||
// this string being stable across releases.
|
||
let mut d = high_confidence_taint_diag("sample.rs", 1);
|
||
attach_notes(&mut d, vec![EngineNote::WorklistCapped { iterations: 10 }]);
|
||
let sarif = nyx_scanner::output::build_sarif(&[d], std::path::Path::new("."));
|
||
|
||
let results = sarif["runs"][0]["results"]
|
||
.as_array()
|
||
.expect("runs[0].results");
|
||
let result = &results[0];
|
||
let props = &result["properties"];
|
||
|
||
let direction = props["loss_direction"]
|
||
.as_str()
|
||
.expect("loss_direction property must be present for non-informational notes");
|
||
assert_eq!(
|
||
direction, "under-report",
|
||
"SARIF loss_direction must be snake-case tag"
|
||
);
|
||
assert_eq!(
|
||
props["confidence_capped"].as_bool(),
|
||
Some(true),
|
||
"confidence_capped must track non-informational note presence"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn sarif_omits_loss_direction_for_informational_only() {
|
||
let mut d = high_confidence_taint_diag("sample.rs", 1);
|
||
attach_notes(&mut d, vec![EngineNote::InlineCacheReused]);
|
||
let sarif = nyx_scanner::output::build_sarif(&[d], std::path::Path::new("."));
|
||
|
||
let props = &sarif["runs"][0]["results"][0]["properties"];
|
||
assert!(
|
||
props.get("loss_direction").is_none(),
|
||
"informational-only notes must not set loss_direction (got {:?})",
|
||
props.get("loss_direction")
|
||
);
|
||
assert_eq!(
|
||
props["confidence_capped"].as_bool(),
|
||
Some(false),
|
||
"confidence_capped must be false for informational-only notes"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn every_engine_note_direction_is_documented() {
|
||
// Enumerate every EngineNote variant and assert its direction.
|
||
// The intent is that a contributor adding a new variant will cause
|
||
// this test to fail to compile (no match arm), a structural guard
|
||
// against silent misclassification.
|
||
fn check(note: EngineNote, expected: LossDirection) {
|
||
assert_eq!(
|
||
note.direction(),
|
||
expected,
|
||
"direction classification mismatch for {note:?}"
|
||
);
|
||
}
|
||
|
||
check(
|
||
EngineNote::WorklistCapped { iterations: 1 },
|
||
LossDirection::UnderReport,
|
||
);
|
||
check(
|
||
EngineNote::OriginsTruncated { dropped: 1 },
|
||
LossDirection::UnderReport,
|
||
);
|
||
check(
|
||
EngineNote::InFileFixpointCapped {
|
||
iterations: 1,
|
||
reason: nyx_scanner::engine_notes::CapHitReason::Unknown,
|
||
},
|
||
LossDirection::UnderReport,
|
||
);
|
||
check(
|
||
EngineNote::CrossFileFixpointCapped {
|
||
iterations: 1,
|
||
reason: nyx_scanner::engine_notes::CapHitReason::Unknown,
|
||
},
|
||
LossDirection::UnderReport,
|
||
);
|
||
check(
|
||
EngineNote::SsaLoweringBailed {
|
||
reason: "unsupported".into(),
|
||
},
|
||
LossDirection::Bail,
|
||
);
|
||
check(
|
||
EngineNote::ParseTimeout { timeout_ms: 100 },
|
||
LossDirection::Bail,
|
||
);
|
||
check(EngineNote::PredicateStateWidened, LossDirection::OverReport);
|
||
check(EngineNote::PathEnvCapped, LossDirection::OverReport);
|
||
check(EngineNote::InlineCacheReused, LossDirection::Informational);
|
||
}
|