nyx/tests/calibration_data_exfil.rs

286 lines
9.7 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Calibration tests for `taint-data-exfiltration` severity, confidence,
//! and rank scoring.
//!
//! These tests pin the calibration described in `docs/detectors.md` so any
//! future change to the scoring path either preserves the documented tier
//! relationships or breaks a test deliberately.
//!
//! What is checked here:
//!
//! * Cookie source + Confirmed symbolic verdict produces High severity
//! (cookies carry session / credential material and are treated as
//! Secret-tier for the leak class).
//! * Env source + Confirmed verdict produces High severity (same
//! reasoning, env vars carry credential material).
//! * Header / FileSystem / Database / CaughtException sources downgrade
//! to Medium severity even with a Confirmed verdict — they are
//! Sensitive but not credential-grade secrets.
//! * No symbolic verdict (or `Inconclusive` / `NotAttempted`) → Low
//! confidence (the instruction's "Inconclusive" tier; the
//! `Confidence` enum has no separate Inconclusive variant so it
//! floors to Low).
//! * Opaque body (Confirmed but with empty witness) → Medium
//! confidence; the abstract domain still produced a corroboration
//! signal even if the witness string is bare.
//! * `path_validated=true` drops a confidence tier (Medium → Low).
//! * On the same source, DATA_EXFIL ranks strictly below SSRF (the
//! taint-class bonus is +7 for data-exfil vs +10 for the generic
//! `taint-unsanitised-flow`).
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::evidence::{
Confidence, Evidence, SpanEvidence, SymbolicVerdict, Verdict, compute_confidence,
};
use nyx_scanner::labels::SourceKind;
use nyx_scanner::patterns::{FindingCategory, Severity};
use nyx_scanner::rank::compute_attack_rank;
fn make_evidence(source_kind: SourceKind, verdict: Option<Verdict>) -> Evidence {
Evidence {
source: Some(SpanEvidence {
path: "src/leak.js".into(),
line: 1,
col: 1,
kind: "source".into(),
snippet: Some("req.cookies.session".into()),
}),
sink: Some(SpanEvidence {
path: "src/leak.js".into(),
line: 5,
col: 5,
kind: "sink".into(),
snippet: Some("fetch('/endpoint', { body: payload })".into()),
}),
source_kind: Some(source_kind),
hop_count: Some(1),
cap_specificity: Some(1),
symbolic: verdict.map(|v| SymbolicVerdict {
verdict: v,
constraints_checked: 0,
paths_explored: 1,
// For Confirmed cases use the strong-witness phrasing so the
// test exercises the same code path that real symex output
// takes (see `compute_taint_confidence` for the analogous
// witness-strength branch).
witness: matches!(v, Verdict::Confirmed)
.then(|| "tainted cookie flows to fetch body".into()),
interproc_call_chains: vec![],
cutoff_notes: vec![],
}),
..Default::default()
}
}
fn make_diag(
rule_id: &str,
severity: Severity,
source_kind: SourceKind,
verdict: Option<Verdict>,
path_validated: bool,
) -> Diag {
Diag {
path: "src/leak.js".into(),
line: 5,
col: 5,
severity,
id: rule_id.into(),
category: FindingCategory::Security,
path_validated,
guard_kind: if path_validated {
Some("Validation".into())
} else {
None
},
message: None,
labels: vec![],
confidence: None,
evidence: Some(make_evidence(source_kind, verdict)),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
triage_state: "open".to_string(),
triage_note: String::new(),
rollup: None,
finding_id: String::new(),
alternative_finding_ids: vec![],
stable_hash: 0,
}
}
// ── Calibration fixture 1: Cookie source, Confirmed verdict ─────────────
#[test]
fn cookie_source_with_confirmed_verdict_is_high_medium() {
// Severity: cookies are Secret-tier for DATA_EXFIL → High.
// Confidence: Confirmed verdict on a Sensitive source → Medium (the
// routing caps at Medium even with a strong witness; see
// `compute_data_exfil_confidence`).
let diag = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::High,
SourceKind::Cookie,
Some(Verdict::Confirmed),
false,
);
let confidence = compute_confidence(&diag);
assert_eq!(
confidence,
Confidence::Medium,
"Cookie + Confirmed → Medium (DATA_EXFIL cap), got {confidence:?}"
);
}
// ── Calibration fixture 2: Env source, Confirmed verdict ────────────────
#[test]
fn env_source_with_confirmed_verdict_is_high_medium() {
// Env vars carry credential / config material and are treated as
// Secret-tier alongside cookies.
let diag = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::High,
SourceKind::EnvironmentConfig,
Some(Verdict::Confirmed),
false,
);
let confidence = compute_confidence(&diag);
assert_eq!(
confidence,
Confidence::Medium,
"Env + Confirmed → Medium, got {confidence:?}"
);
}
// ── Calibration fixture 3: Header source, opaque body (no verdict) ──────
#[test]
fn header_source_without_symex_is_medium_low() {
// Header is Sensitive but not credential-grade; severity downgrades
// to Medium. No symbolic verdict → confidence Low (the "Inconclusive
// when no symex verdict" tier from the instruction).
let diag = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::Medium,
SourceKind::Header,
None,
false,
);
let confidence = compute_confidence(&diag);
assert_eq!(
confidence,
Confidence::Low,
"Header + no verdict → Low, got {confidence:?}"
);
}
// ── Calibration fixture 4: guarded path drops a tier ────────────────────
#[test]
fn guarded_path_drops_confidence_tier() {
// Cookie + Confirmed would normally yield Medium confidence; the
// path-validated flag drops it one step to Low. Without the guard
// the same diag must score Medium (asserted alongside to lock in
// the delta, not just the floor).
let unguarded = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::High,
SourceKind::Cookie,
Some(Verdict::Confirmed),
false,
);
let guarded = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::High,
SourceKind::Cookie,
Some(Verdict::Confirmed),
true,
);
assert_eq!(compute_confidence(&unguarded), Confidence::Medium);
assert_eq!(
compute_confidence(&guarded),
Confidence::Low,
"guarded DATA_EXFIL path must drop one confidence tier"
);
}
// ── Calibration fixture 5: ranking — DATA_EXFIL below SSRF on same source
#[test]
fn data_exfil_ranks_below_ssrf_on_same_source() {
// Cookie source flowing to `fetch` could fire either DATA_EXFIL (body
// arg) or SSRF / generic taint (URL arg). On the same severity tier
// SSRF must outrank DATA_EXFIL because the analysis-kind bonus is +10
// for `taint-unsanitised-flow` and +7 for `taint-data-exfiltration`.
let exfil = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::High,
SourceKind::Cookie,
Some(Verdict::Confirmed),
false,
);
let ssrf = make_diag(
"taint-unsanitised-flow (source 1:1)",
Severity::High,
SourceKind::Cookie,
Some(Verdict::Confirmed),
false,
);
let exfil_score = compute_attack_rank(&exfil).score;
let ssrf_score = compute_attack_rank(&ssrf).score;
assert!(
ssrf_score > exfil_score,
"SSRF score ({ssrf_score}) must outrank DATA_EXFIL score \
({exfil_score}) on the same source"
);
// The delta is exactly the analysis-kind bonus difference (+3) — pin
// it so accidental drift trips the test rather than silently moving
// both bonuses in lock-step.
assert!(
(ssrf_score - exfil_score - 3.0).abs() < 0.001,
"SSRF DATA_EXFIL should equal the analysis-kind bonus delta \
(+3); got {} ({} {})",
ssrf_score - exfil_score,
ssrf_score,
exfil_score,
);
}
// ── Calibration fixture 6: DATA_EXFIL above AST patterns ────────────────
#[test]
fn data_exfil_ranks_above_ast_pattern() {
// The instruction mandates DATA_EXFIL sit above informational AST
// patterns. Use a Medium DATA_EXFIL (header source) vs a Low AST
// pattern (the typical AST-only banned-API match) to lock the
// ordering in even at the weaker end of the DATA_EXFIL spectrum.
let medium_exfil = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::Medium,
SourceKind::Header,
Some(Verdict::Confirmed),
false,
);
let mut ast_pattern = make_diag(
"js.code_exec.eval",
Severity::Low,
SourceKind::Unknown,
None,
false,
);
// AST patterns don't carry taint evidence; clear it so the ranker
// takes the AST-only branch.
ast_pattern.evidence = None;
let exfil_score = compute_attack_rank(&medium_exfil).score;
let ast_score = compute_attack_rank(&ast_pattern).score;
assert!(
exfil_score > ast_score,
"DATA_EXFIL ({exfil_score}) must outrank AST pattern ({ast_score})"
);
}