mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-24 20:28:06 +02:00
Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)
* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers * feat: Implement cross-file data exfiltration detection with parameter-specific gate filters * feat: Add calibration tests and refine DATA_EXFIL severity scoring logic * feat: Introduce per-detector configuration for data exfiltration suppression * feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output * feat: Add tainted body and URL handling for data exfiltration detection * feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go * feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients * feat: Add synthetic externals handling for closure-captured variables in SSA * feat: Implement closure-based suppression for resource leak findings * feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns * feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders * feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt * feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests * feat: Add data exfiltration sinks for various languages and enhance documentation * refactor: Simplify formatting and improve readability in various files * refactor: Improve readability by simplifying conditional statements and adding clippy linting * docs: Update CHANGELOG and comments for data exfiltration features and configuration * docs: Clarify configuration instructions for data exfiltration trusted destinations * docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
parent
a438886217
commit
58f1794a4e
189 changed files with 8421 additions and 383 deletions
|
|
@ -218,6 +218,14 @@ pub struct Evidence {
|
|||
/// under-budget findings and skipped during serialization in that case.
|
||||
#[serde(default, skip_serializing_if = "smallvec::SmallVec::is_empty")]
|
||||
pub engine_notes: smallvec::SmallVec<[crate::engine_notes::EngineNote; 2]>,
|
||||
|
||||
/// For `Cap::DATA_EXFIL` findings, the destination object-literal field
|
||||
/// the tainted value reached (e.g. `"body"`, `"headers"`, `"json"`).
|
||||
/// `None` for non-exfil findings, for exfil findings whose payload arg
|
||||
/// was not an object literal, or when the sink was resolved through a
|
||||
/// summary path that did not preserve destination metadata.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub data_exfil_field: Option<String>,
|
||||
}
|
||||
|
||||
fn is_zero_u16(v: &u16) -> bool {
|
||||
|
|
@ -301,7 +309,15 @@ pub fn compute_confidence(diag: &Diag) -> Confidence {
|
|||
|
||||
let id = &diag.id;
|
||||
|
||||
let base = if id.starts_with("taint-") {
|
||||
let base = if id.starts_with("taint-data-exfiltration") {
|
||||
// DATA_EXFIL is calibrated independently from the generic taint path:
|
||||
// the value at risk is the leak of an *already-sensitive* source, not
|
||||
// the construction of an attacker payload, so the points-based scoring
|
||||
// tuned for code-exec / SSRF / SQLi over-credits these findings. Route
|
||||
// to a narrower decision tree that asks "did we corroborate a real
|
||||
// string body leaving the process?" instead.
|
||||
compute_data_exfil_confidence(diag)
|
||||
} else if id.starts_with("taint-") {
|
||||
compute_taint_confidence(diag)
|
||||
} else if id.starts_with("state-") {
|
||||
match id.as_str() {
|
||||
|
|
@ -458,13 +474,71 @@ fn compute_taint_confidence(diag: &Diag) -> Confidence {
|
|||
}
|
||||
}
|
||||
|
||||
/// Confidence routing for `taint-data-exfiltration` findings.
|
||||
///
|
||||
/// The generic taint scorer ranks DATA_EXFIL too aggressively: a Sensitive
|
||||
/// source plus a sink call is enough to push it into the Medium/High band,
|
||||
/// but the leak class needs corroboration that a real string body actually
|
||||
/// leaves the process (otherwise we surface every `fetch(..., {body: x})`
|
||||
/// where `x` happens to be Sensitive-tagged). This routing is deliberately
|
||||
/// capped at Medium and only fires Medium when the symbolic execution
|
||||
/// verdict confirms the path (abstract interpretation participates only as
|
||||
/// a sink-suppression filter inside SSA taint and does not surface a
|
||||
/// separate verdict here).
|
||||
///
|
||||
/// Routing:
|
||||
/// * Source < Sensitive → Low (caller already strips DATA_EXFIL for
|
||||
/// Plain sources, but defensively floor here).
|
||||
/// * Symbolic verdict `Confirmed` → Medium (symex produced a witness
|
||||
/// that a tainted string reaches the body argument).
|
||||
/// * Symbolic verdict `Inconclusive` / `NotAttempted` / no symbolic
|
||||
/// analysis → Low (instruction's "Inconclusive" tier; the `Confidence`
|
||||
/// enum has no separate Inconclusive variant so it floors to Low).
|
||||
/// * Symbolic verdict `Infeasible` → Low (path proven dead).
|
||||
///
|
||||
/// After routing, a `path_validated` guard on the diag drops the result
|
||||
/// one tier (Medium → Low; Low stays Low) and `apply_engine_notes_cap`
|
||||
/// applies the standard engine-notes cap.
|
||||
fn compute_data_exfil_confidence(diag: &Diag) -> Confidence {
|
||||
let ev = match &diag.evidence {
|
||||
Some(e) => e,
|
||||
None => return Confidence::Low,
|
||||
};
|
||||
|
||||
let is_sensitive = ev
|
||||
.source_kind
|
||||
.map(|k| k.sensitivity() >= crate::labels::Sensitivity::Sensitive)
|
||||
.unwrap_or(false);
|
||||
if !is_sensitive {
|
||||
return Confidence::Low;
|
||||
}
|
||||
|
||||
let mut base = match ev.symbolic.as_ref().map(|s| s.verdict) {
|
||||
Some(Verdict::Confirmed) => Confidence::Medium,
|
||||
Some(Verdict::Infeasible) => Confidence::Low,
|
||||
Some(Verdict::Inconclusive) | Some(Verdict::NotAttempted) | None => Confidence::Low,
|
||||
};
|
||||
|
||||
// Guarded flow: drop a tier. A validation predicate on the path means
|
||||
// the leak may be unreachable in practice, so the corroborated witness
|
||||
// is downgraded one step (Medium → Low; Low stays Low).
|
||||
if diag.path_validated && base > Confidence::Low {
|
||||
base = Confidence::Low;
|
||||
}
|
||||
|
||||
apply_engine_notes_cap(diag, base)
|
||||
}
|
||||
|
||||
/// Score a structured `SourceKind` value.
|
||||
///
|
||||
/// UserInput=+3, EnvironmentConfig=+2, Unknown/FileSystem=+1, Database/CaughtException=0.
|
||||
fn structured_source_kind_score(kind: crate::labels::SourceKind) -> i32 {
|
||||
use crate::labels::SourceKind;
|
||||
match kind {
|
||||
SourceKind::UserInput => 3,
|
||||
// Cookie / Header carry auth material, score them at the same
|
||||
// ranking weight as direct user input rather than the lower
|
||||
// FileSystem/Database tiers.
|
||||
SourceKind::UserInput | SourceKind::Cookie | SourceKind::Header => 3,
|
||||
SourceKind::EnvironmentConfig => 2,
|
||||
SourceKind::Unknown | SourceKind::FileSystem => 1,
|
||||
SourceKind::Database | SourceKind::CaughtException => 0,
|
||||
|
|
@ -538,6 +612,8 @@ pub fn generate_explanation(diag: &Diag) -> Option<String> {
|
|||
use crate::labels::SourceKind;
|
||||
match kind {
|
||||
SourceKind::UserInput => "user input",
|
||||
SourceKind::Cookie => "cookie",
|
||||
SourceKind::Header => "request header",
|
||||
SourceKind::EnvironmentConfig => "environment/config",
|
||||
SourceKind::Database => "database",
|
||||
SourceKind::FileSystem => "file system",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue