Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)

* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers * feat: Implement cross-file data exfiltration detection with parameter-specific gate filters * feat: Add calibration tests and refine DATA_EXFIL severity scoring logic * feat: Introduce per-detector configuration for data exfiltration suppression * feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output * feat: Add tainted body and URL handling for data exfiltration detection * feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go * feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients * feat: Add synthetic externals handling for closure-captured variables in SSA * feat: Implement closure-based suppression for resource leak findings * feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns * feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders * feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt * feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests * feat: Add data exfiltration sinks for various languages and enhance documentation * refactor: Simplify formatting and improve readability in various files * refactor: Improve readability by simplifying conditional statements and adding clippy linting * docs: Update CHANGELOG and comments for data exfiltration features and configuration * docs: Clarify configuration instructions for data exfiltration trusted destinations * docs: Enhance comments for evidence routing logic in data exfiltration
2026-06-18 20:15:14 +02:00 · 2026-05-01 10:59:52 -04:00 · 2026-05-01 10:59:52 -04:00 · 58f1794a4e
commit 58f1794a4e
parent a438886217
189 changed files with 8421 additions and 383 deletions
--- a/src/utils/detector_options.rs
+++ b/src/utils/detector_options.rs
@ -0,0 +1,129 @@
+//! Per-detector runtime options.
+//!
+//! Mirrors the install/current pattern in [`crate::utils::analysis_options`]
+//! but for detector-class knobs that live under `[detectors.*]` in
+//! `nyx.conf`.  Engine code that wants to consult a detector option calls
+//! [`current`]; the CLI installs a resolved value before the scan starts.
+//!
+//! The first knobs covered here are the [`Cap::DATA_EXFIL`][crate::labels::Cap::DATA_EXFIL]
+//! suppression layers:
+//!
+//! * `enabled` — turn the cap off entirely per-project so legitimate
+//!   forwarding pipelines don't surface findings.
+//! * `trusted_destinations` — destination URL prefixes that suppress the
+//!   cap when a sink's URL argument has a static prefix matching one of
+//!   them.  Uses the same prefix-lock plumbing the SSRF suppression has.
+//!
+//! Defaults are conservative: detector enabled, no trusted destinations.
+
+use serde::{Deserialize, Serialize};
+use std::sync::RwLock;
+
+/// Options for the `Cap::DATA_EXFIL` suppression layers.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(default)]
+pub struct DataExfilDetectorOptions {
+    /// When `false`, the entire data-exfiltration detector class is
+    /// suppressed for the project.  Sink-time filters drop
+    /// [`crate::labels::Cap::DATA_EXFIL`] from sink caps before event
+    /// emission, so no `taint-data-exfiltration` findings reach output.
+    pub enabled: bool,
+    /// URL prefixes treated as trusted destinations for outbound
+    /// requests.  When a sink's destination argument has a proven static
+    /// prefix (from the abstract string domain or an inline literal)
+    /// that begins with one of these entries, the
+    /// [`crate::labels::Cap::DATA_EXFIL`] bit is dropped before event
+    /// emission.  Mirrors the SSRF prefix-lock semantics.
+    pub trusted_destinations: Vec<String>,
+}
+
+impl Default for DataExfilDetectorOptions {
+    fn default() -> Self {
+        Self {
+            enabled: true,
+            trusted_destinations: Vec::new(),
+        }
+    }
+}
+
+/// Top-level `[detectors]` block.
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(default)]
+pub struct DetectorOptions {
+    pub data_exfil: DataExfilDetectorOptions,
+}
+
+static RUNTIME: RwLock<Option<DetectorOptions>> = RwLock::new(None);
+
+/// Install the process-wide detector options.  First-wins: subsequent calls
+/// are a no-op and return `false`.  The CLI calls this once per process at
+/// scan start; library consumers that never install pick up
+/// [`DetectorOptions::default`] via [`current`].
+pub fn install(opts: DetectorOptions) -> bool {
+    let mut guard = RUNTIME.write().expect("detector options RwLock poisoned");
+    if guard.is_some() {
+        return false;
+    }
+    *guard = Some(opts);
+    true
+}
+
+/// Replace the installed options unconditionally.  Mirrors
+/// [`crate::utils::analysis_options::reinstall`] for the server's
+/// per-request resolution path.
+pub fn reinstall(opts: DetectorOptions) {
+    *RUNTIME.write().expect("detector options RwLock poisoned") = Some(opts);
+}
+
+/// Read the active options.  Returns the installed runtime when present,
+/// otherwise [`DetectorOptions::default`].
+pub fn current() -> DetectorOptions {
+    RUNTIME
+        .read()
+        .expect("detector options RwLock poisoned")
+        .clone()
+        .unwrap_or_default()
+}
+
+/// Test helper: clear the installed runtime so a subsequent [`install`]
+/// takes effect.  Used only in tests that exercise different detector
+/// configurations within the same process.
+#[doc(hidden)]
+pub fn _reset_for_tests() {
+    *RUNTIME.write().expect("detector options RwLock poisoned") = None;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn defaults_match_documented() {
+        let o = DetectorOptions::default();
+        assert!(o.data_exfil.enabled);
+        assert!(o.data_exfil.trusted_destinations.is_empty());
+    }
+
+    #[test]
+    fn toml_roundtrip() {
+        let opts = DetectorOptions {
+            data_exfil: DataExfilDetectorOptions {
+                enabled: false,
+                trusted_destinations: vec![
+                    "https://api.internal/".into(),
+                    "https://telemetry.".into(),
+                ],
+            },
+        };
+        let s = toml::to_string(&opts).unwrap();
+        let back: DetectorOptions = toml::from_str(&s).unwrap();
+        assert_eq!(opts, back);
+    }
+
+    #[test]
+    fn missing_section_uses_defaults() {
+        let toml_str = r#"# empty"#;
+        let cfg: DetectorOptions = toml::from_str(toml_str).unwrap();
+        assert!(cfg.data_exfil.enabled);
+    }
+}