mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-24 20:28:06 +02:00
Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)
* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers * feat: Implement cross-file data exfiltration detection with parameter-specific gate filters * feat: Add calibration tests and refine DATA_EXFIL severity scoring logic * feat: Introduce per-detector configuration for data exfiltration suppression * feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output * feat: Add tainted body and URL handling for data exfiltration detection * feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go * feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients * feat: Add synthetic externals handling for closure-captured variables in SSA * feat: Implement closure-based suppression for resource leak findings * feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns * feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders * feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt * feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests * feat: Add data exfiltration sinks for various languages and enhance documentation * refactor: Simplify formatting and improve readability in various files * refactor: Improve readability by simplifying conditional statements and adding clippy linting * docs: Update CHANGELOG and comments for data exfiltration features and configuration * docs: Clarify configuration instructions for data exfiltration trusted destinations * docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
parent
a438886217
commit
58f1794a4e
189 changed files with 8421 additions and 383 deletions
|
|
@ -320,6 +320,11 @@ static GATED_REGISTRY: Lazy<HashMap<&'static str, &'static [SinkGate]>> = Lazy::
|
|||
m.insert("ts", typescript::GATED_SINKS);
|
||||
m.insert("python", python::GATED_SINKS);
|
||||
m.insert("py", python::GATED_SINKS);
|
||||
m.insert("go", go::GATED_SINKS);
|
||||
m.insert("php", php::GATED_SINKS);
|
||||
m.insert("c", c::GATED_SINKS);
|
||||
m.insert("cpp", cpp::GATED_SINKS);
|
||||
m.insert("c++", cpp::GATED_SINKS);
|
||||
m
|
||||
});
|
||||
|
||||
|
|
@ -473,6 +478,10 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
|
|||
pub enum SourceKind {
|
||||
/// Direct user input (request params, argv, stdin, form data)
|
||||
UserInput,
|
||||
/// HTTP cookie value (carries session / auth material)
|
||||
Cookie,
|
||||
/// HTTP request header (may carry auth tokens, user-agent fingerprints)
|
||||
Header,
|
||||
/// Environment variables and configuration
|
||||
EnvironmentConfig,
|
||||
/// File system reads
|
||||
|
|
@ -485,10 +494,81 @@ pub enum SourceKind {
|
|||
Unknown,
|
||||
}
|
||||
|
||||
/// Sensitivity classification of a taint source. Drives detector classes
|
||||
/// like `DATA_EXFIL` that only fire when the source carries information
|
||||
/// the operator did not intend to leak. Plain user input echoed back into
|
||||
/// an outbound request is not data exfiltration, the user already controls
|
||||
/// it, surfacing it as a leak is noise.
|
||||
///
|
||||
/// The threshold for `DATA_EXFIL` is `>= Sensitive`, plain user input is
|
||||
/// suppressed. Projects that legitimately classify a request body as
|
||||
/// sensitive (e.g. an API gateway forwarding pre-authenticated user tokens
|
||||
/// out of a request body) can override via custom rules in `nyx.conf`,
|
||||
/// either by re-classifying the source or by adding a Sanitizer rule for
|
||||
/// `Cap::DATA_EXFIL` on the legitimate forwarding path.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub enum Sensitivity {
|
||||
/// Attacker-controlled but not secret in itself, request bodies, query
|
||||
/// strings, form fields, argv. Echoing this to an outbound request is
|
||||
/// not data exfiltration.
|
||||
Plain,
|
||||
/// Carries operator state the user should not see leak out, cookies,
|
||||
/// auth headers, env, file system reads, database rows.
|
||||
Sensitive,
|
||||
/// Reserved for future explicit secret classifications (API keys,
|
||||
/// credential stores, key material). No source currently produces
|
||||
/// this, but the threshold check in `effective_sink_caps` already
|
||||
/// handles it monotonically.
|
||||
Secret,
|
||||
}
|
||||
|
||||
impl SourceKind {
|
||||
/// Return the sensitivity tier this source kind belongs to. Drives the
|
||||
/// `Cap::DATA_EXFIL` cap-suppression decision in `ast.rs`.
|
||||
pub fn sensitivity(self) -> Sensitivity {
|
||||
match self {
|
||||
// Plain user-controlled input, the user already has the data,
|
||||
// surfacing it back to them via an outbound request is not a
|
||||
// disclosure.
|
||||
SourceKind::UserInput => Sensitivity::Plain,
|
||||
// Operator-bound state, leaking these via an outbound request
|
||||
// is a real cross-boundary disclosure.
|
||||
SourceKind::Cookie
|
||||
| SourceKind::Header
|
||||
| SourceKind::EnvironmentConfig
|
||||
| SourceKind::FileSystem
|
||||
| SourceKind::Database => Sensitivity::Sensitive,
|
||||
// Caught exceptions can carry stack traces, db errors, internal
|
||||
// paths, treat them as sensitive by default.
|
||||
SourceKind::CaughtException => Sensitivity::Sensitive,
|
||||
// Conservative default for unclassified sources, surface
|
||||
// findings rather than silently drop them.
|
||||
SourceKind::Unknown => Sensitivity::Sensitive,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Infer the source kind from capabilities and callee name.
|
||||
pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
||||
let cl = callee.to_ascii_lowercase();
|
||||
|
||||
// Cookie / Header are checked *before* the generic user-input bucket
|
||||
// because they imply higher sensitivity (auth material, session ids).
|
||||
// The generic UserInput substrings (`request`, `header`, `cookie`)
|
||||
// would otherwise swallow these.
|
||||
//
|
||||
// Session stores carry auth material (CSRF tokens, signed user ids) of
|
||||
// the same sensitivity tier as raw cookies, so route them through the
|
||||
// `Cookie` arm. The substring is checked AFTER excluding the
|
||||
// capitalised `Session` constructor (covered by the `request` /
|
||||
// `requests` checks below not firing for `Session` builders).
|
||||
if cl.contains("cookie") || cl.contains("session") {
|
||||
return SourceKind::Cookie;
|
||||
}
|
||||
if cl.contains("header") {
|
||||
return SourceKind::Header;
|
||||
}
|
||||
|
||||
// User input patterns
|
||||
if cl.contains("argv")
|
||||
|| cl.contains("stdin")
|
||||
|
|
@ -498,11 +578,23 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
|||
|| cl.contains("params")
|
||||
|| cl.contains("input")
|
||||
|| cl.contains("body")
|
||||
|| cl.contains("header")
|
||||
|| cl.contains("cookie")
|
||||
|| cl.contains("location")
|
||||
|| cl.contains("document.url")
|
||||
|| cl.contains("document.referrer")
|
||||
// PHP superglobals: the AST text preserves the `$` (member-text
|
||||
// extraction reads the `variable_name` node verbatim) so we match
|
||||
// both `$_POST` and the `_POST` form some collectors emit.
|
||||
// `$_REQUEST` already matches via the `request` substring above;
|
||||
// `$_COOKIE` / `$_SESSION` route through the Cookie tier earlier in
|
||||
// the function. `$_SERVER` is operator-state-bearing (auth headers
|
||||
// etc.) so it stays Sensitive by falling through to the Unknown
|
||||
// bucket.
|
||||
|| cl == "$_get"
|
||||
|| cl == "$_post"
|
||||
|| cl == "$_files"
|
||||
|| cl == "_get"
|
||||
|| cl == "_post"
|
||||
|| cl == "_files"
|
||||
{
|
||||
return SourceKind::UserInput;
|
||||
}
|
||||
|
|
@ -542,6 +634,8 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
|||
pub fn severity_for_source_kind(kind: SourceKind) -> crate::patterns::Severity {
|
||||
match kind {
|
||||
SourceKind::UserInput => crate::patterns::Severity::High,
|
||||
SourceKind::Cookie => crate::patterns::Severity::High,
|
||||
SourceKind::Header => crate::patterns::Severity::High,
|
||||
SourceKind::EnvironmentConfig => crate::patterns::Severity::High,
|
||||
SourceKind::FileSystem => crate::patterns::Severity::Medium,
|
||||
SourceKind::Database => crate::patterns::Severity::Medium,
|
||||
|
|
@ -986,11 +1080,20 @@ pub fn classify_gated_sink(
|
|||
None => return out,
|
||||
};
|
||||
|
||||
// Match against the original callee text AND a chain-normalised form
|
||||
// that strips `()` between dots so a chained construction like
|
||||
// `httpx.AsyncClient().post` matches a gate matcher of
|
||||
// `httpx.AsyncClient.post`. Mirrors the normalisation applied by
|
||||
// `classify` for flat label rules.
|
||||
let callee_bytes = callee_text.as_bytes();
|
||||
let normalized = normalize_chained_call(callee_text);
|
||||
let normalized_bytes = normalized.as_bytes();
|
||||
|
||||
for gate in *gates {
|
||||
let matcher = gate.callee_matcher.as_bytes();
|
||||
if !match_suffix_cs(callee_bytes, matcher, gate.case_sensitive) {
|
||||
if !match_suffix_cs(callee_bytes, matcher, gate.case_sensitive)
|
||||
&& !match_suffix_cs(normalized_bytes, matcher, gate.case_sensitive)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1473,26 +1576,69 @@ mod tests {
|
|||
// CVE Hunt Session 2 (Go CVE-2023-3188 Owncast SSRF):
|
||||
// `http.DefaultClient.Get/Post/Head/Do/PostForm` is the idiomatic Go
|
||||
// SSRF sink shape (`http.DefaultClient` is the package-level shared
|
||||
// `*http.Client`). Bare `Get`/`Post` matchers would over-match
|
||||
// unrelated method names; the explicit `http.DefaultClient.*` matcher
|
||||
// restricts the suffix-match to the stdlib helper while leaving
|
||||
// user-defined `myClient.Get` alone (no false positives).
|
||||
// `*http.Client`). These callees migrated from a flat `Sink(SSRF)`
|
||||
// rule to destination-aware gated sinks so that DATA_EXFIL gates can
|
||||
// coexist on the same callee (e.g. `http.DefaultClient.Post(url, _,
|
||||
// body)` carries SSRF on arg 0 and DATA_EXFIL on arg 2). The
|
||||
// assertions below check the gate registration rather than the flat
|
||||
// classifier output.
|
||||
#[test]
|
||||
fn classify_go_http_default_client_get_is_ssrf_sink() {
|
||||
let result = classify("go", "http.DefaultClient.Get", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
fn classify_go_http_default_client_get_is_ssrf_gate() {
|
||||
let no_kw = |_: &str| None;
|
||||
let no_kw_present = |_: &str| false;
|
||||
let result = classify_gated_sink(
|
||||
"go",
|
||||
"http.DefaultClient.Get",
|
||||
|_| None,
|
||||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert!(
|
||||
result.iter().any(|m| m.label == DataLabel::Sink(Cap::SSRF)),
|
||||
"expected SSRF gate match, got {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_go_http_default_client_post_is_ssrf_sink() {
|
||||
let result = classify("go", "http.DefaultClient.Post", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
fn classify_go_http_default_client_post_is_ssrf_and_data_exfil_gate() {
|
||||
let no_kw = |_: &str| None;
|
||||
let no_kw_present = |_: &str| false;
|
||||
let result = classify_gated_sink(
|
||||
"go",
|
||||
"http.DefaultClient.Post",
|
||||
|_| None,
|
||||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert!(
|
||||
result.iter().any(|m| m.label == DataLabel::Sink(Cap::SSRF)),
|
||||
"expected SSRF gate match, got {result:?}"
|
||||
);
|
||||
assert!(
|
||||
result
|
||||
.iter()
|
||||
.any(|m| m.label == DataLabel::Sink(Cap::DATA_EXFIL)),
|
||||
"expected DATA_EXFIL gate match, got {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_go_http_default_client_do_is_ssrf_sink() {
|
||||
let result = classify("go", "http.DefaultClient.Do", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
fn classify_go_http_default_client_do_is_data_exfil_gate() {
|
||||
let no_kw = |_: &str| None;
|
||||
let no_kw_present = |_: &str| false;
|
||||
let result = classify_gated_sink(
|
||||
"go",
|
||||
"http.DefaultClient.Do",
|
||||
|_| None,
|
||||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert!(
|
||||
result
|
||||
.iter()
|
||||
.any(|m| m.label == DataLabel::Sink(Cap::DATA_EXFIL)),
|
||||
"expected DATA_EXFIL gate match, got {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue