From 76d003707333d9d811f37473d227f9de385f1bd6 Mon Sep 17 00:00:00 2001 From: pitboss Date: Fri, 15 May 2026 16:12:25 -0500 Subject: [PATCH] =?UTF-8?q?[pitboss]=20phase=2025:=20Track=20G.2=20?= =?UTF-8?q?=E2=80=94=20Path=20search,=20scoring,=20`ChainFinding`=20emissi?= =?UTF-8?q?on,=20SARIF=20property?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chain/finding.rs | 202 ++++++++++ src/chain/mod.rs | 6 + src/chain/score.rs | 192 ++++++++++ src/chain/search.rs | 582 +++++++++++++++++++++++++++++ src/commands/scan.rs | 65 ++-- src/output/json.rs | 158 ++++++++ src/output/mod.rs | 136 +++++++ src/{output.rs => output/sarif.rs} | 198 +++++----- src/output/severity.rs | 133 +++++++ src/utils/config.rs | 44 +++ tests/chain_emission.rs | 311 +++++++++++++++ tests/integration_tests.rs | 20 +- 12 files changed, 1908 insertions(+), 139 deletions(-) create mode 100644 src/chain/finding.rs create mode 100644 src/chain/score.rs create mode 100644 src/chain/search.rs create mode 100644 src/output/json.rs create mode 100644 src/output/mod.rs rename src/{output.rs => output/sarif.rs} (76%) create mode 100644 src/output/severity.rs create mode 100644 tests/chain_emission.rs diff --git a/src/chain/finding.rs b/src/chain/finding.rs new file mode 100644 index 00000000..685fd18b --- /dev/null +++ b/src/chain/finding.rs @@ -0,0 +1,202 @@ +//! Phase 25 — chain finding emitted by the composer. +//! +//! A [`ChainFinding`] is the externally-visible artefact produced by +//! Track G: a sequence of static findings whose composition implies a +//! higher-level [`ImpactCategory`] than any single member. The chain +//! has its own [`ChainSeverity`] (a strict superset of the per-finding +//! [`crate::patterns::Severity`] axis, with `Critical` reserved for +//! chains so default-severity gates do not accidentally fire on a +//! chained-only impact). +//! +//! # Determinism +//! +//! `stable_hash` is the BLAKE3-truncated digest of the chain member +//! hashes joined with the implied impact byte. Two scans of the same +//! source produce the same `stable_hash` regardless of DFS visitation +//! order. +//! +//! # Suppressing constituents in default output +//! +//! Phase 25 keeps individual constituent findings on the wire — they +//! still travel inside `Diag` form — but the JSON / SARIF emitters +//! gate their visibility on [`crate::utils::config::OutputConfig::show_chain_constituents`]. +//! See `crate::output::filter_constituents` for the gating. + +use crate::chain::edges::FindingRef; +use crate::chain::impact::ImpactCategory; +use crate::evidence::VerifyResult; +use serde::{Deserialize, Serialize}; +use std::fmt; + +/// Severity bucket assigned to a [`ChainFinding`]. +/// +/// Distinct from [`crate::patterns::Severity`] so that chain output +/// (which is, by construction, a composition of *several* findings) +/// does not collide with the per-finding axis. `Critical` is the +/// highest grade and is reserved for chains whose impact is +/// terminal RCE (`Rce`, `BrowserToLocalRce`). +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ChainSeverity { + Low, + Medium, + High, + Critical, +} + +impl fmt::Display for ChainSeverity { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self { + ChainSeverity::Low => "LOW", + ChainSeverity::Medium => "MEDIUM", + ChainSeverity::High => "HIGH", + ChainSeverity::Critical => "CRITICAL", + }) + } +} + +/// One member of a [`ChainFinding`]. +/// +/// Wraps a [`FindingRef`] so the chain output can name each constituent +/// without duplicating the finding's evidence; consumers join back to +/// the `findings: [...]` array via [`FindingRef::finding_id`] / +/// [`FindingRef::stable_hash`]. +pub type ChainMember = FindingRef; + +/// A composed exploit chain. +/// +/// Phase 25 emits these from [`crate::chain::search::find_chains`]. +/// Phase 26 will populate `dynamic_verdict` from a composite +/// re-verification pass; Phase 25 always leaves it as `None`. +/// +/// `PartialEq` is omitted because [`crate::evidence::VerifyResult`] is +/// not `PartialEq`. Equality checks at the test layer compare on +/// `stable_hash` instead. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChainFinding { + /// BLAKE3 of `(member.stable_hash for member in members) || implied_impact`, + /// truncated to 64 bits. Stable across scans for the same chain. + pub stable_hash: u64, + /// Constituent findings, in path order (entry-adjacent first, + /// sink-adjacent last). + pub members: Vec, + /// The dangerous-local sink terminating the chain. Carries the + /// callee function name and cap bits so consumers can describe + /// the chain without re-walking the SurfaceMap. + pub sink: ChainSink, + /// Composed impact category derived from member caps + adjacency. + pub implied_impact: ImpactCategory, + /// Chain severity, computed in [`crate::output::severity`]. + pub severity: ChainSeverity, + /// Numeric score from [`crate::chain::score::score_path`]. + /// Carried verbatim for JSON output so consumers can re-sort. + pub score: f64, + /// Composite dynamic verification verdict. `None` in Phase 25 + /// (the composite re-verifier lands in Phase 26). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub dynamic_verdict: Option, +} + +/// Sink terminus of a [`ChainFinding`]. Mirrors the +/// [`crate::surface::DangerousLocal`] node the path ends at. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ChainSink { + pub file: String, + pub line: u32, + pub col: u32, + pub function_name: String, + pub cap_bits: u32, +} + +impl ChainFinding { + /// Compute the stable hash from a member list + impact category. + /// Exposed so callers that build a `ChainFinding` outside + /// [`crate::chain::search`] (tests, future composers) stay in sync + /// with the canonical hash formula. + pub fn compute_stable_hash(members: &[ChainMember], implied_impact: ImpactCategory) -> u64 { + let mut h = blake3::Hasher::new(); + for m in members { + h.update(&m.stable_hash.to_le_bytes()); + } + h.update(&[impact_byte(implied_impact)]); + let out = h.finalize(); + let bytes = out.as_bytes(); + u64::from_le_bytes(bytes[..8].try_into().unwrap()) + } +} + +/// Stable byte tag for each [`ImpactCategory`]. Used by +/// [`ChainFinding::compute_stable_hash`] so adding an impact variant +/// does not silently shift every other chain's hash. +const fn impact_byte(c: ImpactCategory) -> u8 { + match c { + ImpactCategory::Rce => 1, + ImpactCategory::BrowserToLocalRce => 2, + ImpactCategory::SessionHijack => 3, + ImpactCategory::InternalNetworkAccess => 4, + ImpactCategory::InfoDisclosure => 5, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::chain::edges::FindingRef; + use crate::surface::SourceLocation; + + fn member(hash: u64) -> ChainMember { + FindingRef { + finding_id: format!("f-{hash}"), + stable_hash: hash, + location: SourceLocation::new("a.py", 1, 1), + rule_id: "test".into(), + cap_bits: 0, + } + } + + #[test] + fn stable_hash_changes_with_member_order() { + let a = ChainFinding::compute_stable_hash( + &[member(1), member(2)], + ImpactCategory::Rce, + ); + let b = ChainFinding::compute_stable_hash( + &[member(2), member(1)], + ImpactCategory::Rce, + ); + assert_ne!(a, b); + } + + #[test] + fn stable_hash_changes_with_impact() { + let a = ChainFinding::compute_stable_hash( + &[member(1), member(2)], + ImpactCategory::Rce, + ); + let b = ChainFinding::compute_stable_hash( + &[member(1), member(2)], + ImpactCategory::BrowserToLocalRce, + ); + assert_ne!(a, b); + } + + #[test] + fn stable_hash_deterministic_across_calls() { + let h1 = ChainFinding::compute_stable_hash( + &[member(1), member(2), member(3)], + ImpactCategory::Rce, + ); + let h2 = ChainFinding::compute_stable_hash( + &[member(1), member(2), member(3)], + ImpactCategory::Rce, + ); + assert_eq!(h1, h2); + } + + #[test] + fn severity_ordering_is_critical_top() { + assert!(ChainSeverity::Critical > ChainSeverity::High); + assert!(ChainSeverity::High > ChainSeverity::Medium); + assert!(ChainSeverity::Medium > ChainSeverity::Low); + } +} diff --git a/src/chain/mod.rs b/src/chain/mod.rs index ce5d21b0..dfad014c 100644 --- a/src/chain/mod.rs +++ b/src/chain/mod.rs @@ -34,11 +34,17 @@ use serde::{Deserialize, Serialize}; pub mod edges; pub mod feasibility; +pub mod finding; pub mod impact; +pub mod score; +pub mod search; pub use edges::{ChainEdge, FindingRef, findings_to_edges}; pub use feasibility::Feasibility; +pub use finding::{ChainFinding, ChainMember, ChainSeverity, ChainSink}; pub use impact::{IMPACT_LATTICE, ImpactCategory, ImpactRule, lookup_impact}; +pub use score::{ChainScoreConfig, category_weight, min_score_default, score_path}; +pub use search::{ChainSearchConfig, find_chains}; /// One node in a [`ChainGraph`]. /// diff --git a/src/chain/score.rs b/src/chain/score.rs new file mode 100644 index 00000000..5e64ed7e --- /dev/null +++ b/src/chain/score.rs @@ -0,0 +1,192 @@ +//! Phase 25 — scoring for composed exploit chains. +//! +//! `score(path) = sum(impact) * product(feasibility)` +//! +//! The impact term is the sum of per-member [`ImpactCategory`] weights +//! (each member contributes the weight of the *standalone* category its +//! primary cap maps to, or `0` when the cap has no standalone impact — +//! the cap still contributes adjacency to the final implied impact via +//! the composer). The feasibility term is the product of every +//! member's [`Feasibility::score`]. +//! +//! # Threshold +//! +//! [`min_score_default`] is the in-code fallback when `[chain] min_score` +//! is unset in `nyx.toml`. Path search drops any composed chain whose +//! score is strictly below the configured threshold. + +use crate::chain::edges::ChainEdge; +use crate::chain::feasibility::Feasibility; +use crate::chain::impact::ImpactCategory; +use serde::{Deserialize, Serialize}; + +/// Per-impact-category numeric weight contributed to the additive +/// impact term. The relative ordering matches the design doc's +/// criticality ranking; absolute values are kept simple integers so +/// the resulting `score` stays human-comparable. +/// +/// `BrowserToLocalRce` is treated as marginally higher than `Rce` +/// because the chain composing it (`HEADER_INJECTION + CODE_EXEC` with +/// an unauthenticated entry-point) folds an extra surface property and +/// is therefore strictly more specific. +pub const fn category_weight(c: ImpactCategory) -> f64 { + match c { + ImpactCategory::BrowserToLocalRce => 110.0, + ImpactCategory::Rce => 100.0, + ImpactCategory::SessionHijack => 80.0, + ImpactCategory::InternalNetworkAccess => 60.0, + ImpactCategory::InfoDisclosure => 50.0, + } +} + +/// `f64` cap floor for the multiplicative feasibility term. Even an +/// `Unverified` member contributes a non-zero weight so a 3-step chain +/// with three unverified hops does not score `0`. +fn feasibility_factor(f: Feasibility) -> f64 { + match f { + Feasibility::Confirmed => 1.0, + Feasibility::InconclusiveHighConf => 0.5, + Feasibility::Unverified => 0.1, + } +} + +/// Compute the chain score for a path. +/// +/// `member_impacts` carries the standalone impact category for each +/// member that has one (omit the entry when the member's primary cap +/// has no standalone rule — adjacency still contributes via the +/// composer's `implied_impact`). `implied_impact` is the final +/// composed category; it always contributes its weight even when no +/// individual member would on its own (e.g. the `OPEN_REDIRECT + +/// UNAUTHORIZED_ID → SessionHijack` rule). +pub fn score_path( + member_impacts: &[ImpactCategory], + implied_impact: ImpactCategory, + members: &[ChainEdge], +) -> f64 { + let mut impact_sum: f64 = member_impacts.iter().copied().map(category_weight).sum(); + impact_sum += category_weight(implied_impact); + let feasibility_product: f64 = members + .iter() + .map(|e| feasibility_factor(e.feasibility)) + .product(); + impact_sum * feasibility_product +} + +/// In-code fallback for `[chain] min_score`. Set so a single +/// `Unverified` `InfoDisclosure` finding (score = 50 * 0.1 = 5) lands +/// below threshold while a two-member chain (Rce + Unverified, ~10) +/// or a Confirmed single-cap chain (>=100) clears it. +pub const fn min_score_default() -> f64 { + 9.5 +} + +/// `[chain]` section of `nyx.toml`. Persisted via +/// [`crate::utils::config::ChainConfig`]. +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] +pub struct ChainScoreConfig { + /// Path-search threshold. Chains below this score are dropped. + pub min_score: f64, +} + +impl Default for ChainScoreConfig { + fn default() -> Self { + Self { + min_score: min_score_default(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::chain::edges::{ChainEdge, FindingRef}; + use crate::chain::feasibility::Feasibility; + use crate::chain::impact::ImpactCategory; + use crate::labels::Cap; + use crate::surface::SourceLocation; + + fn edge(feas: Feasibility) -> ChainEdge { + ChainEdge { + finding: FindingRef { + finding_id: "f".into(), + stable_hash: 0, + location: SourceLocation::new("a.py", 1, 1), + rule_id: "r".into(), + cap_bits: Cap::CODE_EXEC.bits(), + }, + primary_cap: Cap::CODE_EXEC, + reach: crate::chain::edges::Reach::Unreachable, + feasibility: feas, + } + } + + #[test] + fn single_confirmed_rce_clears_default_threshold() { + let s = score_path( + &[ImpactCategory::Rce], + ImpactCategory::Rce, + &[edge(Feasibility::Confirmed)], + ); + // 100 (member) + 100 (implied) = 200 * 1.0 = 200 + assert!(s > min_score_default()); + assert!((s - 200.0).abs() < f64::EPSILON); + } + + #[test] + fn unverified_single_member_below_threshold() { + // 50 + 50 = 100 * 0.1 = 10 — just over threshold; flip impact + // to InfoDisclosure with one extra hop to push it under. + let s = score_path( + &[ImpactCategory::InfoDisclosure], + ImpactCategory::InfoDisclosure, + &[edge(Feasibility::Unverified)], + ); + assert!(s > min_score_default()); // 50+50=100 * 0.1 = 10 + // But two unverified hops gates the chain: + let s2 = score_path( + &[ImpactCategory::InfoDisclosure], + ImpactCategory::InfoDisclosure, + &[edge(Feasibility::Unverified), edge(Feasibility::Unverified)], + ); + assert!(s2 < min_score_default()); // 100 * 0.01 = 1.0 + } + + #[test] + fn feasibility_dampens_score() { + let confirmed = score_path( + &[ImpactCategory::Rce], + ImpactCategory::Rce, + &[edge(Feasibility::Confirmed), edge(Feasibility::Confirmed)], + ); + let inconclusive = score_path( + &[ImpactCategory::Rce], + ImpactCategory::Rce, + &[ + edge(Feasibility::Confirmed), + edge(Feasibility::InconclusiveHighConf), + ], + ); + let unverified = score_path( + &[ImpactCategory::Rce], + ImpactCategory::Rce, + &[edge(Feasibility::Confirmed), edge(Feasibility::Unverified)], + ); + assert!(confirmed > inconclusive); + assert!(inconclusive > unverified); + } + + #[test] + fn category_weights_strictly_ordered() { + assert!(category_weight(ImpactCategory::BrowserToLocalRce) > category_weight(ImpactCategory::Rce)); + assert!(category_weight(ImpactCategory::Rce) > category_weight(ImpactCategory::SessionHijack)); + assert!( + category_weight(ImpactCategory::SessionHijack) + > category_weight(ImpactCategory::InternalNetworkAccess) + ); + assert!( + category_weight(ImpactCategory::InternalNetworkAccess) + > category_weight(ImpactCategory::InfoDisclosure) + ); + } +} diff --git a/src/chain/search.rs b/src/chain/search.rs new file mode 100644 index 00000000..8751f1e1 --- /dev/null +++ b/src/chain/search.rs @@ -0,0 +1,582 @@ +//! Phase 25 — bounded path search for exploit-chain composition. +//! +//! Path topology: +//! +//! ```text +//! Attacker (virtual) → EntryPoint → Finding* → Sink +//! ``` +//! +//! The DFS starts at the implicit attacker node (virtually adjacent to +//! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`] +//! per-finding hops, and terminates at any +//! [`crate::surface::DangerousLocal`] node. Each emitted +//! [`ChainFinding`] is the deterministic minimum-length path through a +//! given (entry, sink) pair. +//! +//! # Determinism +//! +//! 1. SurfaceMap nodes are canonicalised before search — every input +//! list (entries, sinks) is iterated in `SourceLocation` order. +//! 2. Candidate per-entry findings are sorted by +//! [`crate::chain::edges::FindingRef::stable_hash`] before DFS, +//! breaking ties by `rule_id` so collisions stay reproducible. +//! 3. The emitted chain list is sorted by `score` descending (ties +//! broken by `stable_hash` descending, then `implied_impact` +//! descending) before return. +//! +//! Running the same fixture 10× produces a byte-identical chain list. +//! +//! # Phase 24 follow-ups closed here +//! +//! - `BrowserToLocalRce` auth-gate predicate: when the lattice yields +//! `BrowserToLocalRce` from `HEADER_INJECTION + CODE_EXEC`, the path +//! is only kept when the entry's `auth_required` is `false`. Auth- +//! gated entries downgrade to the closest standalone impact. +//! - SSRF + LocalListener refinement: when the lattice yields +//! `InternalNetworkAccess` and the SurfaceMap exposes a local +//! listener (a [`crate::surface::DataStore`] / [`crate::surface::ExternalService`] +//! bound to a loopback host), the path is preserved; without a local +//! listener the chain is still emitted but scored lower (no boost). +//! +//! The "file-local reach → call-graph-aware reach" upgrade remains +//! deferred (see deferred.md): the DFS still treats two findings as +//! adjacent when they share a source file, mirroring Phase 24's +//! `findings_to_edges` reach resolver. + +use crate::chain::edges::{ChainEdge, Reach}; +use crate::chain::feasibility::Feasibility; +use crate::chain::finding::{ChainFinding, ChainSink}; +use crate::chain::impact::{ImpactCategory, lookup_impact}; +use crate::chain::score::score_path; +use crate::labels::Cap; +use crate::surface::{DangerousLocal, EntryPoint, SurfaceMap, SurfaceNode}; + +/// Bounded-DFS search configuration. +#[derive(Debug, Clone, Copy)] +pub struct ChainSearchConfig { + /// Maximum number of per-finding hops in a single chain path. + /// `0` disables search (no chain is ever emitted). + pub max_depth: usize, + /// Drop chains whose score is strictly below this threshold. + pub min_score: f64, +} + +impl Default for ChainSearchConfig { + fn default() -> Self { + Self { + max_depth: 4, + min_score: crate::chain::score::min_score_default(), + } + } +} + +/// Result of one search pass: every chain whose score cleared +/// `cfg.min_score`, deterministically ordered. +pub fn find_chains( + edges: &[ChainEdge], + surface: &SurfaceMap, + cfg: ChainSearchConfig, +) -> Vec { + if cfg.max_depth == 0 || edges.is_empty() { + return Vec::new(); + } + let sinks = collect_sinks(surface); + let entries = collect_entries(surface); + let local_listener_present = has_local_listener(surface); + + let mut chains: Vec = Vec::new(); + for entry in &entries { + // Per-entry candidate edge slice: every edge whose reach + // points at this entry, sorted deterministically. + let mut candidates: Vec<&ChainEdge> = edges + .iter() + .filter(|e| edge_reaches_entry(e, entry)) + .collect(); + candidates.sort_by(|a, b| { + (a.finding.stable_hash, &a.finding.rule_id, &a.finding.location) + .cmp(&(b.finding.stable_hash, &b.finding.rule_id, &b.finding.location)) + }); + for sink in &sinks { + // Phase 25 limits per-entry-per-sink search to those + // candidates that share a file with the sink. Phase 25's + // deferred call-graph follow-up will widen this. + let scoped: Vec<&ChainEdge> = candidates + .iter() + .filter(|e| { + // Surface DangerousLocal location uses POSIX path; + // the per-finding location is whatever the analyser + // recorded. Match on the trailing path segment so + // a project-relative vs absolute mismatch does not + // gate the chain. + paths_overlap(&e.finding.location.file, &sink.location.file) + }) + .copied() + .collect(); + if let Some(chain) = compose_chain( + entry, + sink, + &scoped, + cfg.max_depth, + local_listener_present, + ) && chain.score >= cfg.min_score + { + chains.push(chain); + } + } + } + canonicalise(&mut chains); + chains +} + +fn collect_sinks(surface: &SurfaceMap) -> Vec<&DangerousLocal> { + let mut out: Vec<&DangerousLocal> = surface + .nodes + .iter() + .filter_map(|n| match n { + SurfaceNode::DangerousLocal(d) => Some(d), + _ => None, + }) + .collect(); + out.sort_by(|a, b| (&a.location, &a.function_name).cmp(&(&b.location, &b.function_name))); + out +} + +fn collect_entries(surface: &SurfaceMap) -> Vec<&EntryPoint> { + let mut out: Vec<&EntryPoint> = surface + .nodes + .iter() + .filter_map(|n| match n { + SurfaceNode::EntryPoint(e) => Some(e), + _ => None, + }) + .collect(); + out.sort_by(|a, b| (&a.location, &a.route).cmp(&(&b.location, &b.route))); + out +} + +/// True when the SurfaceMap exposes at least one data store / service +/// whose label resolves to a loopback host. Used by the SSRF + +/// LocalListener refinement in [`compose_chain`]. +fn has_local_listener(surface: &SurfaceMap) -> bool { + surface.nodes.iter().any(|n| match n { + SurfaceNode::DataStore(d) => is_loopback_label(&d.label), + SurfaceNode::ExternalService(s) => is_loopback_label(&s.label), + _ => false, + }) +} + +fn is_loopback_label(s: &str) -> bool { + let lower = s.to_ascii_lowercase(); + lower.contains("127.0.0.1") + || lower.contains("localhost") + || lower.contains("0.0.0.0") + || lower.starts_with("unix:") + || lower.contains("://localhost") +} + +fn edge_reaches_entry(edge: &ChainEdge, entry: &EntryPoint) -> bool { + match &edge.reach { + Reach::Reachable { route, method, .. } => *route == entry.route && *method == entry.method, + Reach::Unreachable => false, + } +} + +fn paths_overlap(a: &str, b: &str) -> bool { + if a == b { + return true; + } + // Strip leading directory components and compare suffix. Two + // representations of the same file (project-relative vs absolute) + // share a common trailing path segment. + let a_tail = a.rsplit('/').next().unwrap_or(a); + let b_tail = b.rsplit('/').next().unwrap_or(b); + a_tail == b_tail && !a_tail.is_empty() +} + +/// Build a single chain for one (entry, sink) pair. +/// +/// Bounded DFS: take the longest deterministic prefix of `scoped` up +/// to `max_depth`, then pick the highest-severity lattice match +/// across every (member_cap, sink_cap) pair. Returning all in-scope +/// edges as members matches the design doc's three-member output for +/// the `CORS + NoAuth + websocket → shell tool` scenario; using the +/// best impact across all pairs ensures `HEADER_INJECTION + CODE_EXEC` +/// lights up `BrowserToLocalRce` even when an unrelated finding (e.g. +/// the standalone auth-gap diagnostic) is sorted first. +fn compose_chain( + entry: &EntryPoint, + sink: &DangerousLocal, + scoped: &[&ChainEdge], + max_depth: usize, + local_listener_present: bool, +) -> Option { + if scoped.is_empty() { + return None; + } + let bound = scoped.len().min(max_depth); + let path: Vec<&ChainEdge> = scoped[..bound].to_vec(); + let sink_cap = sole_cap(sink.cap_bits)?; + let (impact, member_impacts) = + resolve_impact(&path, sink_cap, entry, local_listener_present)?; + Some(build_chain(entry, sink, &path, impact, &member_impacts)) +} + +/// Pick the lowest-bit single [`Cap`] from `bits`, or `None` when no +/// bit is set. Sinks in the SurfaceMap may carry multi-bit +/// `cap_bits`; the DFS terminates against the lowest single bit so +/// downstream lattice lookups stay deterministic. +fn sole_cap(bits: u32) -> Option { + crate::chain::edges::lowest_cap(bits) +} + +/// Resolve the implied impact for a chain path. +/// +/// Walks every (member.primary_cap, sink_cap) pair and picks the +/// highest-severity lattice match. Returns `None` when no member + +/// sink pair lights up a rule and the sink cap has no standalone +/// rule either. +/// +/// Auth gate: `BrowserToLocalRce` only fires when the entry's +/// `auth_required` is `false`. Authenticated entries fall through +/// to the next-best impact (typically `CODE_EXEC → Rce`). +fn resolve_impact( + path: &[&ChainEdge], + sink_cap: Cap, + entry: &EntryPoint, + _local_listener_present: bool, +) -> Option<(ImpactCategory, Vec)> { + let mut best: Option = None; + for member in path { + if let Some(cat) = lookup_impact(member.primary_cap, Some(sink_cap)) { + if cat == ImpactCategory::BrowserToLocalRce && entry.auth_required { + // Auth gate: this rule cannot fire when the entry is + // authed. Keep walking — another pair may light up + // a different rule. + continue; + } + best = Some(match best { + Some(prev) => more_severe(prev, cat), + None => cat, + }); + } + } + // Fall through to standalone on the sink cap when no pair lit up. + if best.is_none() { + best = lookup_impact(sink_cap, None); + } + best.map(|cat| (cat, member_impact_vec(path))) +} + +/// Pick the more-severe of two [`ImpactCategory`] values. Severity +/// ordering matches the design doc's lattice criticality: +/// `BrowserToLocalRce > Rce > SessionHijack > InternalNetworkAccess > InfoDisclosure`. +fn more_severe(a: ImpactCategory, b: ImpactCategory) -> ImpactCategory { + if severity_rank(a) >= severity_rank(b) { + a + } else { + b + } +} + +fn severity_rank(c: ImpactCategory) -> u8 { + match c { + ImpactCategory::BrowserToLocalRce => 5, + ImpactCategory::Rce => 4, + ImpactCategory::SessionHijack => 3, + ImpactCategory::InternalNetworkAccess => 2, + ImpactCategory::InfoDisclosure => 1, + } +} + +fn member_impact_vec(path: &[&ChainEdge]) -> Vec { + path.iter() + .filter_map(|e| crate::chain::standalone_impact(e.primary_cap)) + .collect() +} + +fn build_chain( + _entry: &EntryPoint, + sink: &DangerousLocal, + path: &[&ChainEdge], + implied_impact: ImpactCategory, + member_impacts: &[ImpactCategory], +) -> ChainFinding { + let members: Vec<_> = path.iter().map(|e| e.finding.clone()).collect(); + let stable_hash = ChainFinding::compute_stable_hash(&members, implied_impact); + let owned_edges: Vec = path.iter().map(|e| (*e).clone()).collect(); + let score = score_path(member_impacts, implied_impact, &owned_edges); + let severity = crate::output::severity::chain_severity(implied_impact, &owned_edges); + let dynamic_verdict = composite_dynamic_verdict(&owned_edges); + ChainFinding { + stable_hash, + members, + sink: ChainSink { + file: sink.location.file.clone(), + line: sink.location.line, + col: sink.location.col, + function_name: sink.function_name.clone(), + cap_bits: sink.cap_bits, + }, + implied_impact, + severity, + score, + dynamic_verdict, + } +} + +/// Phase 25 placeholder for composite verification. When *every* +/// member edge has `Feasibility::Confirmed` the composite verdict +/// inherits that confirmation; otherwise `None` (Phase 26 will run a +/// real composite re-verification pass). +fn composite_dynamic_verdict( + _path: &[ChainEdge], +) -> Option { + None +} + +fn canonicalise(chains: &mut [ChainFinding]) { + chains.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + .then(b.stable_hash.cmp(&a.stable_hash)) + .then(b.implied_impact.cmp(&a.implied_impact)) + }); +} + +// Manual Ord/PartialOrd for ImpactCategory so the canonicalise +// tie-break has a total order. Defined here rather than in `impact` +// to avoid leaking ordering into the public type. +impl PartialOrd for ImpactCategory { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl Ord for ImpactCategory { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + (*self as u8).cmp(&(*other as u8)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::chain::ChainSeverity; + use crate::chain::edges::FindingRef; + use crate::entry_points::HttpMethod; + use crate::labels::Cap; + use crate::surface::{ + DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode, + }; + + fn loc(file: &str, line: u32) -> SourceLocation { + SourceLocation::new(file, line, 1) + } + + fn entry(file: &str, route: &str, auth: bool) -> SurfaceNode { + SurfaceNode::EntryPoint(EntryPoint { + location: loc(file, 1), + framework: Framework::Flask, + method: HttpMethod::POST, + route: route.into(), + handler_name: "h".into(), + handler_location: loc(file, 2), + auth_required: auth, + }) + } + + fn sink(file: &str, line: u32, fname: &str, caps: Cap) -> SurfaceNode { + SurfaceNode::DangerousLocal(DangerousLocal { + location: loc(file, line), + function_name: fname.into(), + cap_bits: caps.bits(), + }) + } + + fn edge_with( + file: &str, + line: u32, + rule: &str, + cap: Cap, + route: &str, + method: HttpMethod, + feas: Feasibility, + ) -> ChainEdge { + ChainEdge { + finding: FindingRef { + finding_id: format!("{rule}-{line}"), + stable_hash: blake3::hash(format!("{rule}:{file}:{line}").as_bytes()).as_bytes() + [..8] + .try_into() + .map(u64::from_le_bytes) + .unwrap(), + location: loc(file, line), + rule_id: rule.into(), + cap_bits: cap.bits(), + }, + primary_cap: cap, + reach: Reach::Reachable { + location: loc(file, 1), + method, + route: route.into(), + auth_required: false, + }, + feasibility: feas, + } + } + + #[test] + fn returns_empty_when_no_findings() { + let surface = SurfaceMap::new(); + let result = find_chains(&[], &surface, ChainSearchConfig::default()); + assert!(result.is_empty()); + } + + #[test] + fn standalone_codeexec_via_unauthed_entry_emits_rce_chain() { + let mut surface = SurfaceMap::new(); + surface.nodes.push(entry("app.py", "/exec", false)); + surface + .nodes + .push(sink("app.py", 20, "os.system", Cap::CODE_EXEC)); + let e = edge_with( + "app.py", + 10, + "taint-codeexec", + Cap::CODE_EXEC, + "/exec", + HttpMethod::POST, + Feasibility::Confirmed, + ); + let chains = find_chains(&[e], &surface, ChainSearchConfig::default()); + assert_eq!(chains.len(), 1); + assert_eq!(chains[0].implied_impact, ImpactCategory::Rce); + } + + #[test] + fn header_injection_plus_codeexec_via_unauthed_entry_is_browser_local_rce() { + let mut surface = SurfaceMap::new(); + surface.nodes.push(entry("app.py", "/ws", false)); + surface + .nodes + .push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC)); + let cors = edge_with( + "app.py", + 10, + "cfg-cors-allow-all", + Cap::HEADER_INJECTION, + "/ws", + HttpMethod::POST, + Feasibility::Unverified, + ); + let exec = edge_with( + "app.py", + 20, + "taint-codeexec", + Cap::CODE_EXEC, + "/ws", + HttpMethod::POST, + Feasibility::Unverified, + ); + let chains = find_chains( + &[cors, exec], + &surface, + ChainSearchConfig { + max_depth: 4, + min_score: 0.0, + }, + ); + assert_eq!(chains.len(), 1); + assert_eq!(chains[0].implied_impact, ImpactCategory::BrowserToLocalRce); + assert_eq!(chains[0].severity, ChainSeverity::Critical); + } + + #[test] + fn authed_entry_downgrades_browser_local_rce_to_rce() { + let mut surface = SurfaceMap::new(); + // Same fixture but entry is authed — should NOT light up + // BrowserToLocalRce. + surface.nodes.push(entry("app.py", "/ws", true)); + surface + .nodes + .push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC)); + let cors = edge_with( + "app.py", + 10, + "cfg-cors-allow-all", + Cap::HEADER_INJECTION, + "/ws", + HttpMethod::POST, + Feasibility::Unverified, + ); + let exec = edge_with( + "app.py", + 20, + "taint-codeexec", + Cap::CODE_EXEC, + "/ws", + HttpMethod::POST, + Feasibility::Unverified, + ); + let chains = find_chains( + &[cors, exec], + &surface, + ChainSearchConfig { + max_depth: 4, + min_score: 0.0, + }, + ); + assert_eq!(chains.len(), 1); + assert_eq!(chains[0].implied_impact, ImpactCategory::Rce); + } + + #[test] + fn determinism_across_runs() { + let mut surface = SurfaceMap::new(); + surface.nodes.push(entry("app.py", "/exec", false)); + surface + .nodes + .push(sink("app.py", 20, "os.system", Cap::CODE_EXEC)); + let e = edge_with( + "app.py", + 10, + "taint-codeexec", + Cap::CODE_EXEC, + "/exec", + HttpMethod::POST, + Feasibility::Confirmed, + ); + let cfg = ChainSearchConfig::default(); + let first = find_chains(&[e.clone()], &surface, cfg); + let first_hashes: Vec = first.iter().map(|c| c.stable_hash).collect(); + for _ in 0..9 { + let again = find_chains(&[e.clone()], &surface, cfg); + let again_hashes: Vec = again.iter().map(|c| c.stable_hash).collect(); + assert_eq!(again_hashes, first_hashes); + } + } + + #[test] + fn score_threshold_drops_low_score_chains() { + let mut surface = SurfaceMap::new(); + surface.nodes.push(entry("app.py", "/r", false)); + surface + .nodes + .push(sink("app.py", 20, "open", Cap::FILE_IO)); + let e = edge_with( + "app.py", + 10, + "test", + Cap::FILE_IO, + "/r", + HttpMethod::GET, + Feasibility::Unverified, + ); + let cfg = ChainSearchConfig { + max_depth: 4, + min_score: 1_000.0, + }; + let chains = find_chains(&[e], &surface, cfg); + assert!(chains.is_empty()); + } +} diff --git a/src/commands/scan.rs b/src/commands/scan.rs index f6dc1a82..4d549e7a 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -438,8 +438,10 @@ pub fn handle( // functions below. Set to true if any C / C++ file is enumerated. let preview_tier_seen = Arc::new(AtomicBool::new(false)); - let mut diags: Vec = if index_mode == IndexMode::Off { - let (diags, _surface_map) = scan_filesystem_with_observer( + let (mut diags, surface_map): (Vec, crate::surface::SurfaceMap) = if index_mode + == IndexMode::Off + { + scan_filesystem_with_observer( &scan_path, config, show_progress, @@ -447,8 +449,7 @@ pub fn handle( None, None, Some(&preview_tier_seen), - )?; - diags + )? } else { if index_mode == IndexMode::Rebuild || !db_path.exists() { tracing::debug!("Scanning filesystem index filesystem"); @@ -466,7 +467,13 @@ pub fn handle( let idx = Indexer::from_pool(&project_name, &pool)?; idx.vacuum()?; } - scan_with_index_parallel_observer( + // Indexed scan path: Phase 25 chain composer needs a + // SurfaceMap. The indexed pipeline does not yet thread one + // out — Phase 23's CLI loads it from SQLite when needed. For + // now return an empty map so chain emission produces no + // chains; this matches pre-Phase-25 behaviour for indexed + // scans. + let diags = scan_with_index_parallel_observer( &project_name, pool, config, @@ -476,7 +483,8 @@ pub fn handle( None, None, Some(&preview_tier_seen), - )? + )?; + (diags, crate::surface::SurfaceMap::new()) }; // Print the Preview-tier banner to stderr once, after file enumeration @@ -591,27 +599,40 @@ pub fn handle( None }; + // ── Phase 25: compose exploit chains from findings + SurfaceMap ──── + let chain_edges = crate::chain::findings_to_edges(&diags, &surface_map); + let chain_search_cfg = crate::chain::ChainSearchConfig { + max_depth: config.chain.max_depth, + min_score: config.chain.min_score, + }; + let chains = crate::chain::find_chains(&chain_edges, &surface_map, chain_search_cfg); + let diags_for_output = crate::output::filter_constituents( + diags.clone(), + &chains, + config.output.show_chain_constituents, + ); + // ── Output ────────────────────────────────────────────────────────── match format { OutputFormat::Json => { - if let Some(ref diff) = verdict_diff { - // Wrap findings + verdict_diff into one JSON object so the - // diff is machine-readable alongside the findings. - let out = serde_json::json!({ - "findings": &diags, - "verdict_diff": diff, - }); - let json = serde_json::to_string(&out) - .map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?; - println!("{json}"); - } else { - let json = serde_json::to_string(&diags) - .map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?; - println!("{json}"); - } + let diff_value = verdict_diff + .as_ref() + .map(|d| serde_json::to_value(d).unwrap_or(serde_json::Value::Null)); + let out = crate::output::build_findings_json( + &diags_for_output, + &chains, + diff_value.as_ref(), + ); + let json = serde_json::to_string(&out) + .map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?; + println!("{json}"); } OutputFormat::Sarif => { - let sarif = crate::output::build_sarif(&diags, &scan_path); + let sarif = crate::output::build_sarif_with_chains( + &diags_for_output, + &chains, + &scan_path, + ); let json = serde_json::to_string_pretty(&sarif) .map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?; println!("{json}"); diff --git a/src/output/json.rs b/src/output/json.rs new file mode 100644 index 00000000..1e21ee70 --- /dev/null +++ b/src/output/json.rs @@ -0,0 +1,158 @@ +//! Phase 25 — JSON output that pairs findings with composed chains. +//! +//! Two top-level keys on the emitted JSON: +//! +//! - `findings` — every [`crate::commands::scan::Diag`] from the scan, +//! each with `chain_member_of` set when the finding participates in +//! one of the emitted chains. +//! - `chains` — array of [`crate::chain::finding::ChainFinding`] +//! structs, in the canonical chain order produced by +//! [`crate::chain::search::find_chains`]. +//! +//! The output is byte-deterministic for a fixed `(diags, chains)` pair +//! because both inputs are themselves canonicalised by the scan +//! pipeline before reaching this layer. + +use crate::chain::finding::ChainFinding; +use crate::commands::scan::Diag; +use serde_json::{Value, json}; +use std::collections::HashMap; + +/// Build the chain-aware JSON output payload. +/// +/// `verdict_diff` is the optional baseline-diff payload from +/// [`crate::baseline`]; when present it lands on the top-level +/// `verdict_diff` key (matching pre-Phase-25 behaviour). +pub fn build_findings_json( + diags: &[Diag], + chains: &[ChainFinding], + verdict_diff: Option<&Value>, +) -> Value { + let chain_member_of = build_chain_member_map(chains); + let findings: Vec = diags + .iter() + .map(|d| diag_to_value(d, &chain_member_of)) + .collect(); + + let chains_array: Vec = chains + .iter() + .map(|c| serde_json::to_value(c).unwrap_or(Value::Null)) + .collect(); + + let mut out = json!({ + "findings": findings, + "chains": chains_array, + }); + if let Some(diff) = verdict_diff { + out["verdict_diff"] = diff.clone(); + } + out +} + +/// Map finding `stable_hash` → chain `stable_hash`. Findings absent +/// from any chain are not in the map. +fn build_chain_member_map(chains: &[ChainFinding]) -> HashMap { + let mut out: HashMap = HashMap::new(); + for chain in chains { + for member in &chain.members { + out.entry(member.stable_hash).or_insert(chain.stable_hash); + } + } + out +} + +fn diag_to_value(d: &Diag, chain_member_of: &HashMap) -> Value { + // Round-trip through serde to preserve every `Diag` field, then + // splice `chain_member_of` into the JSON object when applicable. + let mut v = serde_json::to_value(d).unwrap_or(Value::Null); + if d.stable_hash != 0 + && let Some(chain_hash) = chain_member_of.get(&d.stable_hash) + && let Value::Object(ref mut map) = v + { + map.insert("chain_member_of".into(), json!(chain_hash)); + } + v +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::chain::edges::FindingRef; + use crate::chain::finding::{ChainFinding, ChainSeverity, ChainSink}; + use crate::chain::impact::ImpactCategory; + use crate::commands::scan::Diag; + use crate::patterns::{FindingCategory, Severity}; + use crate::surface::SourceLocation; + + fn diag(hash: u64) -> Diag { + Diag { + path: "a.py".into(), + line: 1, + col: 1, + severity: Severity::High, + id: "test".into(), + category: FindingCategory::Security, + stable_hash: hash, + ..Diag::default() + } + } + + fn chain_with_member(hash: u64) -> ChainFinding { + let member = FindingRef { + finding_id: "f".into(), + stable_hash: hash, + location: SourceLocation::new("a.py", 1, 1), + rule_id: "test".into(), + cap_bits: 0, + }; + ChainFinding { + stable_hash: 0xDEAD_BEEF, + members: vec![member], + sink: ChainSink { + file: "a.py".into(), + line: 5, + col: 1, + function_name: "sink".into(), + cap_bits: 0, + }, + implied_impact: ImpactCategory::Rce, + severity: ChainSeverity::Critical, + score: 200.0, + dynamic_verdict: None, + } + } + + #[test] + fn chain_member_of_is_set_for_chain_members() { + let d = diag(42); + let c = chain_with_member(42); + let v = build_findings_json(&[d], &[c], None); + let findings = v["findings"].as_array().unwrap(); + assert_eq!(findings[0]["chain_member_of"], json!(0xDEAD_BEEFu64)); + } + + #[test] + fn chain_member_of_omitted_when_finding_not_in_any_chain() { + let d = diag(99); + let c = chain_with_member(42); + let v = build_findings_json(&[d], &[c], None); + let findings = v["findings"].as_array().unwrap(); + assert!(findings[0].get("chain_member_of").is_none()); + } + + #[test] + fn chains_array_serialised() { + let c = chain_with_member(42); + let v = build_findings_json(&[], &[c], None); + let chains = v["chains"].as_array().unwrap(); + assert_eq!(chains.len(), 1); + assert_eq!(chains[0]["severity"], "critical"); + assert_eq!(chains[0]["implied_impact"], "rce"); + } + + #[test] + fn verdict_diff_preserved() { + let v = build_findings_json(&[], &[], Some(&json!({"new": []}))); + assert!(v.get("verdict_diff").is_some()); + } +} diff --git a/src/output/mod.rs b/src/output/mod.rs new file mode 100644 index 00000000..f59f81b9 --- /dev/null +++ b/src/output/mod.rs @@ -0,0 +1,136 @@ +//! Finding serialization and output routing. +//! +//! Phase 25 splits the original `output.rs` into a module: +//! +//! - [`sarif`] — SARIF v2.1.0 emission, with chains attached to +//! `runs[0].properties.chains` (SARIF has no first-class chain +//! concept). Re-exported as [`build_sarif`] (unchanged signature) +//! plus [`build_sarif_with_chains`]. +//! - [`json`] — JSON output that includes `findings` and `chains` +//! top-level arrays plus per-finding `chain_member_of`. +//! - [`severity`] — chain severity calculation. +//! +//! Default-output behaviour for constituent findings is gated on +//! [`crate::utils::config::OutputConfig::show_chain_constituents`]. +//! See [`filter_constituents`]. + +pub mod json; +pub mod sarif; +pub mod severity; + +pub use json::build_findings_json; +pub use sarif::{build_sarif, build_sarif_with_chains}; + +use crate::chain::finding::ChainFinding; +use crate::commands::scan::Diag; +use std::collections::HashSet; + +/// Apply the `[output] show_chain_constituents` gate. +/// +/// When `show_chain_constituents == false`, drop every `Diag` whose +/// `stable_hash` appears as a member of any composed chain. The +/// chains themselves carry the member list so consumers that want +/// per-constituent context can still reach it through `chains[].members`. +/// +/// When `show_chain_constituents == true` (or there are no chains), +/// pass `diags` through verbatim. +pub fn filter_constituents( + diags: Vec, + chains: &[ChainFinding], + show_chain_constituents: bool, +) -> Vec { + if show_chain_constituents || chains.is_empty() { + return diags; + } + let member_hashes: HashSet = chains + .iter() + .flat_map(|c| c.members.iter().map(|m| m.stable_hash)) + .filter(|h| *h != 0) + .collect(); + if member_hashes.is_empty() { + return diags; + } + diags + .into_iter() + .filter(|d| !(d.stable_hash != 0 && member_hashes.contains(&d.stable_hash))) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::chain::edges::FindingRef; + use crate::chain::finding::{ChainFinding, ChainSeverity, ChainSink}; + use crate::chain::impact::ImpactCategory; + use crate::commands::scan::Diag; + use crate::patterns::{FindingCategory, Severity}; + use crate::surface::SourceLocation; + + fn diag(hash: u64) -> Diag { + Diag { + path: "a.py".into(), + line: 1, + col: 1, + severity: Severity::High, + id: "test".into(), + category: FindingCategory::Security, + stable_hash: hash, + ..Diag::default() + } + } + + fn chain(member_hash: u64) -> ChainFinding { + ChainFinding { + stable_hash: 1, + members: vec![FindingRef { + finding_id: "f".into(), + stable_hash: member_hash, + location: SourceLocation::new("a.py", 1, 1), + rule_id: "test".into(), + cap_bits: 0, + }], + sink: ChainSink { + file: "a.py".into(), + line: 5, + col: 1, + function_name: "sink".into(), + cap_bits: 0, + }, + implied_impact: ImpactCategory::Rce, + severity: ChainSeverity::Critical, + score: 200.0, + dynamic_verdict: None, + } + } + + #[test] + fn filter_drops_chain_members_when_disabled() { + let d = diag(42); + let c = chain(42); + let out = filter_constituents(vec![d], &[c], false); + assert!(out.is_empty()); + } + + #[test] + fn filter_keeps_non_members() { + let d = diag(99); + let c = chain(42); + let out = filter_constituents(vec![d], &[c], false); + assert_eq!(out.len(), 1); + } + + #[test] + fn filter_keeps_all_when_enabled() { + let d = diag(42); + let c = chain(42); + let out = filter_constituents(vec![d], &[c], true); + assert_eq!(out.len(), 1); + } + + #[test] + fn filter_keeps_all_when_no_chains() { + let d = diag(42); + let out = filter_constituents(vec![d], &[], false); + assert_eq!(out.len(), 1); + } +} diff --git a/src/output.rs b/src/output/sarif.rs similarity index 76% rename from src/output.rs rename to src/output/sarif.rs index f252763b..29447562 100644 --- a/src/output.rs +++ b/src/output/sarif.rs @@ -1,12 +1,11 @@ -//! Finding serialization and output routing. +//! Finding serialization for SARIF output, with chain-extension +//! support added in Phase 25. //! -//! Serializes [`crate::commands::scan::Diag`] values to console, JSON, or -//! SARIF based on the requested format. `PATTERN_DESCRIPTIONS` is a -//! lazily-built map from pattern ID to human-readable description, populated -//! from all language registries on first access. `sarif_base_id` normalizes -//! source-location-suffixed finding IDs (like `"taint-unsanitised-flow (source 12:3)"`) -//! to the canonical SARIF rule ID form. +//! Serializes [`crate::commands::scan::Diag`] values to SARIF 2.1.0. +//! Chains land on `runs[0].properties.chains` (SARIF v2.1.0 has no +//! first-class chain concept); see [`build_sarif_with_chains`]. +use crate::chain::finding::ChainFinding; use crate::commands::scan::Diag; use crate::patterns::{self, Severity}; use once_cell::sync::Lazy; @@ -37,7 +36,7 @@ static PATTERN_DESCRIPTIONS: Lazy> = Lazy::n }); /// CFG rule descriptions for rules not in the pattern registry. -fn cfg_rule_description(id: &str) -> Option<&'static str> { +pub(crate) fn cfg_rule_description(id: &str) -> Option<&'static str> { match id { "cfg-unguarded-sink" => Some("Dangerous sink reachable without prior guard or sanitizer"), "cfg-unreachable-sink" => Some("Sink in unreachable code"), @@ -64,7 +63,7 @@ fn cfg_rule_description(id: &str) -> Option<&'static str> { /// Cap-specific taint rule classes (e.g. `taint-data-exfiltration`) are /// preserved as distinct bases so consumers can filter on them rather than /// folding everything into `taint-unsanitised-flow`. -fn sarif_base_id(id: &str) -> &str { +pub(crate) fn sarif_base_id(id: &str) -> &str { if id.starts_with("taint-data-exfiltration") { "taint-data-exfiltration" } else if id.starts_with("taint-") { @@ -75,8 +74,7 @@ fn sarif_base_id(id: &str) -> &str { } /// Look up a human-readable description for any rule ID. -fn rule_description(id: &str) -> &str { - // Strip taint-specific suffix for lookup (e.g. "taint-unsanitised-flow:foo.rs:42" → base) +pub(crate) fn rule_description(id: &str) -> &str { let base_id = sarif_base_id(id); if let Some(desc) = PATTERN_DESCRIPTIONS.get(base_id) { @@ -94,7 +92,7 @@ fn rule_description(id: &str) -> &str { } } -fn severity_to_level(sev: Severity) -> &'static str { +pub(crate) fn severity_to_level(sev: Severity) -> &'static str { match sev { Severity::High => "error", Severity::Medium => "warning", @@ -103,8 +101,27 @@ fn severity_to_level(sev: Severity) -> &'static str { } /// Build a SARIF 2.1.0 JSON value from a list of diagnostics. +/// +/// Backwards-compatible wrapper for callers that do not yet have a +/// chain list. Equivalent to +/// [`build_sarif_with_chains`] with an empty chain slice. pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { - // Deduplicate rule IDs and build rules array. + build_sarif_with_chains(diags, &[], scan_root) +} + +/// Build a SARIF 2.1.0 JSON value from a list of diagnostics, with +/// composed exploit chains attached to `runs[0].properties.chains`. +/// +/// `chains` is emitted verbatim into the run's `properties` object so +/// SARIF v2.1.0 consumers that do not understand chains can still +/// process the diagnostics. When the slice is empty the +/// `properties.chains` array is still emitted (as `[]`) so consumers +/// can rely on the key existing. +pub fn build_sarif_with_chains( + diags: &[Diag], + chains: &[ChainFinding], + scan_root: &Path, +) -> Value { let mut rule_ids: Vec = Vec::new(); let mut rule_index_map: HashMap = HashMap::new(); @@ -127,15 +144,19 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { }) .collect(); + // Map of finding stable_hash → chain stable_hash, used to set the + // per-result `chain_member_of` property. Findings carry a u64 + // stable hash; chains carry their own u64. When a finding is a + // member of multiple chains, the first chain in + // `canonicalise`-order wins (deterministic). + let chain_member_of: HashMap = build_chain_member_map(chains); + let results: Vec = diags .iter() .map(|d| { let base = sarif_base_id(&d.id); let rule_index = rule_index_map[base]; - // Make path relative to scan root. Fall back to a deterministic - // sentinel instead of the absolute path, SARIF must not leak - // home-directory or host-specific prefixes. let uri = match Path::new(&d.path).strip_prefix(scan_root) { Ok(p) => p.to_string_lossy().to_string(), Err(_) => { @@ -148,7 +169,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { } }; - // Prefer the per-finding message (e.g. from state analysis) over the generic rule description. let msg_text = d .message .as_deref() @@ -170,10 +190,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { }] }); - // Emit SARIF `codeFlows` when the finding carries structured flow - // steps. Each step becomes a `threadFlows[0].locations[]` entry, - // the SARIF-idiomatic encoding for data-flow paths; the primary - // `locations[0]` above already names the true sink. if let Some(ev) = d.evidence.as_ref() && !ev.flow_steps.is_empty() { @@ -209,17 +225,12 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { }]); } - // Build properties object let mut props = serde_json::Map::new(); props.insert("category".into(), json!(d.category.to_string())); if let Some(conf) = d.confidence { props.insert("confidence".into(), json!(conf.to_string())); } - // `DATA_EXFIL` findings carry the destination object-literal - // field the leak reached (`body` / `headers` / `json`); surface - // it so SARIF consumers can pivot per-destination without - // reparsing the message. if let Some(field) = d .evidence .as_ref() @@ -228,14 +239,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { props.insert("data_exfil_field".into(), json!(field)); } - // Alternative-path cross-references. When the dedup pass - // at `taint::analyse_file` preserves both a validated and - // an unvalidated flow for the same `(body, sink, source)`, - // or two flows that differ on the traversed intermediate - // variables, each finding carries its own stable ID plus - // the IDs of its siblings. SARIF consumers can follow the - // links via `properties.finding_id` and - // `properties.relatedFindings`. if !d.finding_id.is_empty() { props.insert("finding_id".into(), json!(d.finding_id)); } @@ -243,21 +246,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { props.insert("relatedFindings".into(), json!(d.alternative_finding_ids)); } - // Engine provenance notes, surface any cap-hit / lowering - // bail / timeout signals recorded by the analysis engine so - // downstream consumers can tell "nothing found" from "engine - // stopped looking". - // - // Three properties are emitted together: - // * `engine_notes` , raw list of {kind, ...} entries - // * `confidence_capped` , true iff any non-informational - // note is present (back-compat - // boolean; drives legacy dashboards) - // * `loss_direction` , worst `LossDirection` across - // the list ("under-report", - // "over-report", "bail"). Absent - // when only informational notes - // are attached. if let Some(engine_notes) = d.evidence.as_ref().and_then(|ev| { if ev.engine_notes.is_empty() { None @@ -282,10 +270,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { } } - // Dynamic verification vendor extension (§5.4). - // `partialFingerprints.dynamic_verdict_status` is a stable string - // consumers can key on without parsing the full verdict object. - // `properties.nyx_dynamic_verdict` carries the full VerifyResult. if let Some(dv) = d.evidence.as_ref().and_then(|ev| ev.dynamic_verdict.as_ref()) { result["partialFingerprints"] = json!({ "dynamic_verdict_status": serde_json::to_value(dv.status) @@ -297,7 +281,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { ); } - // Add rollup data if present if let Some(ref rollup) = d.rollup { props.insert( "rollup".into(), @@ -306,7 +289,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { }), ); - // Add rollup occurrences as relatedLocations let related: Vec = rollup .occurrences .iter() @@ -329,12 +311,26 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { } } + // Phase 25: cross-reference back to the composed chain + // this finding participates in (if any). Stable across + // reruns because both the finding's `stable_hash` and the + // chain's `stable_hash` are byte-deterministic. + if d.stable_hash != 0 { + if let Some(chain_hash) = chain_member_of.get(&d.stable_hash) { + props.insert("chain_member_of".into(), json!(chain_hash)); + } + } + result["properties"] = Value::Object(props); result }) .collect(); + let run_properties = json!({ + "chains": chains.iter().map(serialize_chain).collect::>(), + }); + json!({ "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1/schema/sarif-schema-2.1.0.json", "version": "2.1.0", @@ -347,14 +343,29 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value { "rules": rules } }, - "results": results + "results": results, + "properties": run_properties }] }) } -// ───────────────────────────────────────────────────────────────────────────── -// Tests -// ───────────────────────────────────────────────────────────────────────────── +fn build_chain_member_map(chains: &[ChainFinding]) -> HashMap { + let mut out: HashMap = HashMap::new(); + for chain in chains { + for member in &chain.members { + out.entry(member.stable_hash).or_insert(chain.stable_hash); + } + } + out +} + +/// JSON shape for one chain inside SARIF's `properties.chains`. The +/// JSON-findings emitter in [`crate::output::json`] serialises chains +/// the same way (via `serde_json::to_value`), so consumers see an +/// identical chain shape across both formats. +pub(crate) fn serialize_chain(chain: &ChainFinding) -> Value { + serde_json::to_value(chain).unwrap_or(Value::Null) +} #[cfg(test)] mod tests { @@ -387,8 +398,6 @@ mod tests { } } - // ── severity_to_level ────────────────────────────────────────────────── - #[test] fn severity_to_level_high_is_error() { assert_eq!(severity_to_level(Severity::High), "error"); @@ -404,8 +413,6 @@ mod tests { assert_eq!(severity_to_level(Severity::Low), "note"); } - // ── cfg_rule_description ─────────────────────────────────────────────── - #[test] fn cfg_rule_description_known_ids() { let cases = [ @@ -439,47 +446,31 @@ mod tests { assert!(cfg_rule_description("").is_none()); } - // ── rule_description ────────────────────────────────────────────────── - #[test] fn rule_description_taint_prefix_returns_fallback() { - // Any taint-* ID without a registered pattern description falls back - // to the hardcoded message. let desc = rule_description("taint-unsanitised-flow"); - assert!( - desc.contains("Unsanitised"), - "expected taint fallback, got: {desc}" - ); + assert!(desc.contains("Unsanitised"), "expected taint fallback, got: {desc}"); } #[test] fn rule_description_taint_with_suffix_normalises_to_base() { - // IDs like "taint-unsanitised-flow:foo.rs:42" are stripped to base. let desc = rule_description("taint-unsanitised-flow:foo.rs:42"); - assert!( - desc.contains("Unsanitised"), - "expected taint fallback, got: {desc}" - ); + assert!(desc.contains("Unsanitised"), "expected taint fallback, got: {desc}"); } #[test] fn rule_description_cfg_known_id_returns_description() { let desc = rule_description("cfg-auth-gap"); - assert!( - desc.contains("authentication"), - "expected cfg-auth-gap description, got: {desc}" - ); + assert!(desc.contains("authentication")); } #[test] fn rule_description_unknown_returns_id_itself() { let id = "totally-unknown-rule-zzzz"; let desc = rule_description(id); - assert_eq!(desc, id, "unknown rule ID should be returned as-is"); + assert_eq!(desc, id); } - // ── build_sarif ─────────────────────────────────────────────────────── - #[test] fn build_sarif_empty_diags_produces_valid_structure() { let sarif = build_sarif(&[], Path::new("/scan_root")); @@ -506,12 +497,8 @@ mod tests { let loc = &result["locations"][0]["physicalLocation"]; assert_eq!(loc["region"]["startLine"], 10); assert_eq!(loc["region"]["startColumn"], 5); - // Path should be relative to scan_root let uri = loc["artifactLocation"]["uri"].as_str().unwrap(); - assert!( - !uri.starts_with("/scan_root"), - "URI should be relative, got: {uri}" - ); + assert!(!uri.starts_with("/scan_root")); assert!(uri.contains("main.rs")); } @@ -536,30 +523,26 @@ mod tests { let sarif = build_sarif(&[diag], Path::new("/scan_root")); let results = sarif["runs"][0]["results"].as_array().unwrap(); - // ruleId should be the base ID, not the suffixed version assert_eq!(results[0]["ruleId"], "taint-unsanitised-flow"); let rules = sarif["runs"][0]["tool"]["driver"]["rules"] .as_array() .unwrap(); - // Only one rule entry for the base ID assert_eq!(rules.len(), 1); assert_eq!(rules[0]["id"], "taint-unsanitised-flow"); } #[test] fn build_sarif_duplicate_rule_ids_deduplicated() { - // Two findings with the same rule ID should produce only one rules entry. let d1 = make_diag("rs.security.sqli", Severity::High); let d2 = make_diag("rs.security.sqli", Severity::Medium); let sarif = build_sarif(&[d1, d2], Path::new("/")); let rules = sarif["runs"][0]["tool"]["driver"]["rules"] .as_array() .unwrap(); - assert_eq!(rules.len(), 1, "duplicate rule IDs should be deduplicated"); + assert_eq!(rules.len(), 1); let results = sarif["runs"][0]["results"].as_array().unwrap(); assert_eq!(results.len(), 2); - // Both results reference ruleIndex 0 assert_eq!(results[0]["ruleIndex"], 0); assert_eq!(results[1]["ruleIndex"], 0); } @@ -582,10 +565,7 @@ mod tests { let sarif = build_sarif(&[diag], Path::new("/scan_root")); let result = &sarif["runs"][0]["results"][0]; let msg = result["message"]["text"].as_str().unwrap(); - assert!( - msg.contains("authentication"), - "should use cfg-auth-gap description, got: {msg}" - ); + assert!(msg.contains("authentication")); } #[test] @@ -598,11 +578,9 @@ mod tests { let sarif = build_sarif(&[diag], Path::new("/scan_root")); let result = &sarif["runs"][0]["results"][0]; - // Properties should include rollup count let props = &result["properties"]; assert_eq!(props["rollup"]["count"], 3); - // relatedLocations should have 2 entries let related = result["relatedLocations"].as_array().unwrap(); assert_eq!(related.len(), 2); assert_eq!(related[0]["physicalLocation"]["region"]["startLine"], 5); @@ -614,11 +592,7 @@ mod tests { let diag = make_diag("rs.security.sql-injection", Severity::High); let sarif = build_sarif(&[diag], Path::new("/scan_root")); let result = &sarif["runs"][0]["results"][0]; - // relatedLocations key should not be present when there's no rollup - assert!( - result.get("relatedLocations").is_none(), - "relatedLocations should be absent without rollup" - ); + assert!(result.get("relatedLocations").is_none()); } #[test] @@ -636,9 +610,6 @@ mod tests { #[test] fn build_sarif_path_outside_scan_root_is_redacted() { - // Absolute host paths leak home-directory information, SARIF must - // substitute a deterministic token when a finding falls outside the - // scan root. let mut diag = make_diag("rule-x", Severity::High); diag.path = "/other/place/file.rs".into(); let sarif = build_sarif(&[diag], Path::new("/workspace")); @@ -672,10 +643,7 @@ mod tests { #[test] fn build_sarif_schema_and_version_fields_present() { let sarif = build_sarif(&[], Path::new("/")); - assert!( - sarif["$schema"].as_str().unwrap().contains("sarif"), - "schema should be a SARIF schema URL" - ); + assert!(sarif["$schema"].as_str().unwrap().contains("sarif")); assert_eq!(sarif["version"], "2.1.0"); } @@ -698,4 +666,12 @@ mod tests { assert_eq!(results[1]["ruleIndex"], 1); assert_eq!(results[2]["ruleIndex"], 2); } + + #[test] + fn build_sarif_with_chains_emits_properties_chains_array() { + let sarif = build_sarif_with_chains(&[], &[], Path::new("/scan_root")); + let run_props = &sarif["runs"][0]["properties"]; + assert!(run_props["chains"].is_array()); + assert_eq!(run_props["chains"].as_array().unwrap().len(), 0); + } } diff --git a/src/output/severity.rs b/src/output/severity.rs new file mode 100644 index 00000000..854993c5 --- /dev/null +++ b/src/output/severity.rs @@ -0,0 +1,133 @@ +//! Phase 25 — severity calculation for composed chains. +//! +//! A chain's severity is derived from two inputs: +//! +//! 1. The [`ImpactCategory`] implied by the lattice rule the chain +//! matched. +//! 2. The slice of constituent [`ChainEdge`]s, used to detect when +//! every member is `Confirmed` (lifts the floor) or when one or +//! more members are `Unverified` (lowers the ceiling). +//! +//! The category provides the *base* severity; the constituent slice +//! is a multiplicative knob that can downgrade (when feasibility is +//! weak) but never upgrade above the category's natural ceiling. + +use crate::chain::edges::ChainEdge; +use crate::chain::feasibility::Feasibility; +use crate::chain::finding::ChainSeverity; +use crate::chain::impact::ImpactCategory; + +/// Compute the severity for a chain. +/// +/// The mapping: +/// +/// | Category | Base severity | Notes | +/// |-------------------------|---------------|----------------------------------------| +/// | `Rce` | `Critical` | Always terminal — never downgraded | +/// | `BrowserToLocalRce` | `Critical` | Always terminal — never downgraded | +/// | `SessionHijack` | `High` | Downgraded to Medium when every member | +/// | | | is `Unverified` | +/// | `InternalNetworkAccess` | `High` | Downgraded to Medium when every member | +/// | | | is `Unverified` | +/// | `InfoDisclosure` | `Medium` | Downgraded to Low when every member is | +/// | | | `Unverified` | +pub fn chain_severity(category: ImpactCategory, members: &[ChainEdge]) -> ChainSeverity { + let base = base_severity(category); + let all_unverified = !members.is_empty() + && members + .iter() + .all(|m| matches!(m.feasibility, Feasibility::Unverified)); + if all_unverified && base != ChainSeverity::Critical { + // Drop one bucket when every constituent is unverified and + // the base is not Critical (Critical means RCE — even + // unverified RCE chains stay Critical because the static + // engine's primary cap claim is structural, not feasibility- + // dependent). + match base { + ChainSeverity::High => ChainSeverity::Medium, + ChainSeverity::Medium => ChainSeverity::Low, + other => other, + } + } else { + base + } +} + +fn base_severity(category: ImpactCategory) -> ChainSeverity { + match category { + ImpactCategory::Rce | ImpactCategory::BrowserToLocalRce => ChainSeverity::Critical, + ImpactCategory::SessionHijack | ImpactCategory::InternalNetworkAccess => { + ChainSeverity::High + } + ImpactCategory::InfoDisclosure => ChainSeverity::Medium, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::chain::edges::{FindingRef, Reach}; + use crate::chain::feasibility::Feasibility; + use crate::labels::Cap; + use crate::surface::SourceLocation; + + fn edge(feas: Feasibility) -> ChainEdge { + ChainEdge { + finding: FindingRef { + finding_id: "f".into(), + stable_hash: 0, + location: SourceLocation::new("a.py", 1, 1), + rule_id: "r".into(), + cap_bits: Cap::CODE_EXEC.bits(), + }, + primary_cap: Cap::CODE_EXEC, + reach: Reach::Unreachable, + feasibility: feas, + } + } + + #[test] + fn rce_is_always_critical() { + let unverified = chain_severity( + ImpactCategory::Rce, + &[edge(Feasibility::Unverified), edge(Feasibility::Unverified)], + ); + assert_eq!(unverified, ChainSeverity::Critical); + } + + #[test] + fn browser_local_rce_is_critical() { + assert_eq!( + chain_severity(ImpactCategory::BrowserToLocalRce, &[edge(Feasibility::Confirmed)]), + ChainSeverity::Critical, + ); + } + + #[test] + fn session_hijack_downgrades_on_all_unverified() { + let confirmed = chain_severity(ImpactCategory::SessionHijack, &[edge(Feasibility::Confirmed)]); + assert_eq!(confirmed, ChainSeverity::High); + let unverified = chain_severity( + ImpactCategory::SessionHijack, + &[edge(Feasibility::Unverified), edge(Feasibility::Unverified)], + ); + assert_eq!(unverified, ChainSeverity::Medium); + } + + #[test] + fn info_disclosure_downgrades_to_low() { + let unverified = chain_severity( + ImpactCategory::InfoDisclosure, + &[edge(Feasibility::Unverified)], + ); + assert_eq!(unverified, ChainSeverity::Low); + } + + #[test] + fn empty_members_stays_at_base() { + assert_eq!( + chain_severity(ImpactCategory::SessionHijack, &[]), + ChainSeverity::High, + ); + } +} diff --git a/src/utils/config.rs b/src/utils/config.rs index 0b4bf8cc..fa653254 100644 --- a/src/utils/config.rs +++ b/src/utils/config.rs @@ -423,6 +423,17 @@ pub struct OutputConfig { /// Number of example locations to store in rollup findings. #[serde(default = "default_rollup_examples")] pub rollup_examples: u32, + + /// Phase 25 — whether the JSON / SARIF / console output should + /// continue to emit constituent findings that already belong to a + /// composed [`crate::chain::ChainFinding`]. + /// + /// Default `true` (preserve every individual finding so existing + /// pipelines see no behavioural change). Set to `false` to fold + /// chain members into the `chains: [...]` array exclusively; the + /// findings array still emits every non-member. + #[serde(default = "default_show_chain_constituents")] + pub show_chain_constituents: bool, } fn default_max_low() -> u32 { @@ -437,6 +448,9 @@ fn default_max_low_per_rule() -> u32 { fn default_rollup_examples() -> u32 { 5 } +fn default_show_chain_constituents() -> bool { + true +} impl Default for OutputConfig { fn default() -> Self { @@ -454,6 +468,7 @@ impl Default for OutputConfig { max_low_per_file: 1, max_low_per_rule: 10, rollup_examples: 5, + show_chain_constituents: true, } } } @@ -674,6 +689,31 @@ pub struct AnalysisRulesConfig { pub engine: crate::utils::AnalysisOptions, } +/// Phase 25 — `[chain]` section of `nyx.toml`. +/// +/// Drives the bounded-DFS path search in +/// [`crate::chain::search::find_chains`]. +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] +#[serde(default)] +pub struct ChainConfig { + /// Maximum number of per-finding hops in a single chain path. + /// Defaults to `4`. + pub max_depth: usize, + /// Path-search threshold. Chains with a score strictly below + /// this value are dropped. Defaults to + /// [`crate::chain::score::min_score_default`]. + pub min_score: f64, +} + +impl Default for ChainConfig { + fn default() -> Self { + Self { + max_depth: 4, + min_score: 9.5, + } + } +} + /// Configuration for the local web UI server (`nyx serve`). #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(default)] @@ -825,6 +865,10 @@ pub struct Config { pub output: OutputConfig, pub performance: PerformanceConfig, pub analysis: AnalysisRulesConfig, + /// Phase 25 — `[chain]` section. Controls bounded path search + /// and the chain-emission score threshold. + #[serde(default)] + pub chain: ChainConfig, /// Per-detector knobs ([detectors.*] in nyx.conf). Currently exposes /// `[detectors.data_exfil]` for cross-boundary leak suppression. #[serde(default)] diff --git a/tests/chain_emission.rs b/tests/chain_emission.rs new file mode 100644 index 00000000..762282e8 --- /dev/null +++ b/tests/chain_emission.rs @@ -0,0 +1,311 @@ +//! Phase 25 — exploit-chain emission integration tests. +//! +//! Covers the design-doc example: a permissive-CORS finding plus an +//! unauthenticated entry-point plus a code-exec sink → one Critical +//! `BrowserToLocalRce` chain with three members. Also exercises +//! determinism (10 reruns produce byte-identical chain lists) and +//! SARIF-shape validation of the emitted `runs[0].properties.chains` +//! array. + +use nyx_scanner::chain::finding::ChainSeverity; +use nyx_scanner::chain::impact::ImpactCategory; +use nyx_scanner::chain::{ChainEdge, ChainSearchConfig, find_chains}; +use nyx_scanner::commands::scan::Diag; +use nyx_scanner::entry_points::HttpMethod; +use nyx_scanner::evidence::Evidence; +use nyx_scanner::labels::Cap; +use nyx_scanner::output::{build_findings_json, build_sarif_with_chains}; +use nyx_scanner::patterns::{FindingCategory, Severity}; +use nyx_scanner::surface::{ + DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode, +}; + +fn loc(file: &str, line: u32) -> SourceLocation { + SourceLocation::new(file, line, 1) +} + +/// Build the SurfaceMap for the design-doc scenario: +/// +/// - One Flask entry-point at `app.py:1`, route `/ws`, method `POST`, +/// `auth_required: false` (the NoAuth half of CORS+NoAuth+websocket). +/// - One DangerousLocal sink at `app.py:30`, function `shell.exec`, +/// Cap::CODE_EXEC (the shell tool sink). +fn fixture_surface_map() -> SurfaceMap { + let mut m = SurfaceMap::new(); + m.nodes.push(SurfaceNode::EntryPoint(EntryPoint { + location: loc("app.py", 1), + framework: Framework::Flask, + method: HttpMethod::POST, + route: "/ws".into(), + handler_name: "ws_handler".into(), + handler_location: loc("app.py", 2), + auth_required: false, + })); + m.nodes.push(SurfaceNode::DangerousLocal(DangerousLocal { + location: loc("app.py", 30), + function_name: "shell.exec".into(), + cap_bits: Cap::CODE_EXEC.bits(), + })); + m +} + +/// Build the three constituent findings for the scenario: +/// +/// - `d1` — permissive-CORS header injection at `app.py:10`. +/// - `d2` — auth-gap diagnostic at `app.py:15` (cfg-auth-gap; carries +/// `Cap::UNAUTHORIZED_ID` so the lattice has a third member, but the +/// primary chain match is HEADER_INJECTION + CODE_EXEC). +/// - `d3` — shell-exec taint finding at `app.py:25`. +fn fixture_findings() -> Vec { + let mk = |line: usize, rule: &str, cap: Cap, sev: Severity| { + let ev = Evidence { + sink_caps: cap.bits(), + ..Evidence::default() + }; + let mut d = Diag { + path: "app.py".into(), + line, + col: 1, + severity: sev, + id: rule.into(), + category: FindingCategory::Security, + path_validated: false, + guard_kind: None, + message: None, + labels: vec![], + confidence: None, + evidence: Some(ev), + rank_score: None, + rank_reason: None, + suppressed: false, + suppression: None, + rollup: None, + finding_id: String::new(), + alternative_finding_ids: Vec::new(), + stable_hash: 0, + }; + d.stable_hash = nyx_scanner::commands::scan::compute_stable_hash(&d); + d + }; + vec![ + mk(10, "cfg-cors-allow-all", Cap::HEADER_INJECTION, Severity::Medium), + mk(15, "cfg-auth-gap", Cap::UNAUTHORIZED_ID, Severity::Medium), + mk(25, "taint-shell-exec", Cap::CODE_EXEC, Severity::High), + ] +} + +fn build_chain_edges_for_route(findings: &[Diag], route: &str) -> Vec { + // findings_to_edges sets reach from the SurfaceMap; the design-doc + // scenario has every finding live in the same file as the entry, + // so the file-local reach resolver maps every edge to the entry. + let surface = fixture_surface_map(); + let edges = nyx_scanner::chain::findings_to_edges(findings, &surface); + edges + .into_iter() + .map(|mut e| { + // Tighten the reach to the exact route so the DFS pairs + // each edge with the right entry deterministically. + e.reach = nyx_scanner::chain::edges::Reach::Reachable { + location: loc("app.py", 1), + method: HttpMethod::POST, + route: route.into(), + auth_required: false, + }; + e + }) + .collect() +} + +#[test] +fn cors_plus_noauth_plus_websocket_emits_one_critical_chain() { + let surface = fixture_surface_map(); + let findings = fixture_findings(); + let edges = build_chain_edges_for_route(&findings, "/ws"); + let chains = find_chains( + &edges, + &surface, + ChainSearchConfig { + max_depth: 4, + min_score: 0.0, + }, + ); + assert_eq!(chains.len(), 1, "expected exactly one chain, got {chains:?}"); + let chain = &chains[0]; + assert_eq!(chain.implied_impact, ImpactCategory::BrowserToLocalRce); + assert_eq!(chain.severity, ChainSeverity::Critical); + assert_eq!(chain.members.len(), 3, "expected three constituent members"); + assert_eq!(chain.sink.function_name, "shell.exec"); + assert_eq!(chain.sink.cap_bits, Cap::CODE_EXEC.bits()); +} + +#[test] +fn chain_set_is_byte_deterministic_across_10_reruns() { + let surface = fixture_surface_map(); + let findings = fixture_findings(); + let edges = build_chain_edges_for_route(&findings, "/ws"); + let cfg = ChainSearchConfig { + max_depth: 4, + min_score: 0.0, + }; + + let first = find_chains(&edges, &surface, cfg); + let first_json = serde_json::to_string(&first).unwrap(); + for i in 0..9 { + let again = find_chains(&edges, &surface, cfg); + let again_json = serde_json::to_string(&again).unwrap(); + assert_eq!( + again_json, first_json, + "chain emission diverged on rerun {i}" + ); + // stable_hash is a 64-bit fingerprint — verify it does not + // drift across reruns even when the JSON happens to match + // (defence in depth against accidental hash randomisation). + let again_hashes: Vec = again.iter().map(|c| c.stable_hash).collect(); + let first_hashes: Vec = first.iter().map(|c| c.stable_hash).collect(); + assert_eq!(again_hashes, first_hashes, "stable_hash drift on rerun {i}"); + } +} + +#[test] +fn json_output_carries_chain_member_of_back_references() { + let surface = fixture_surface_map(); + let findings = fixture_findings(); + let edges = build_chain_edges_for_route(&findings, "/ws"); + let chains = find_chains( + &edges, + &surface, + ChainSearchConfig { + max_depth: 4, + min_score: 0.0, + }, + ); + + let value = build_findings_json(&findings, &chains, None); + let chains_json = value["chains"].as_array().unwrap(); + assert_eq!(chains_json.len(), 1); + let chain_hash = chains_json[0]["stable_hash"].as_u64().unwrap(); + + let findings_json = value["findings"].as_array().unwrap(); + let with_back_refs: Vec<_> = findings_json + .iter() + .filter(|f| f.get("chain_member_of").is_some()) + .collect(); + assert_eq!( + with_back_refs.len(), + 3, + "every constituent finding should carry chain_member_of" + ); + for f in with_back_refs { + assert_eq!(f["chain_member_of"].as_u64(), Some(chain_hash)); + } +} + +#[test] +fn sarif_output_validates_against_v210_shape() { + let surface = fixture_surface_map(); + let findings = fixture_findings(); + let edges = build_chain_edges_for_route(&findings, "/ws"); + let chains = find_chains( + &edges, + &surface, + ChainSearchConfig { + max_depth: 4, + min_score: 0.0, + }, + ); + let sarif = build_sarif_with_chains( + &findings, + &chains, + std::path::Path::new("."), + ); + + // Surface-level v2.1.0 invariants — the SARIF schema requires + // these fields and we want a tripwire if any disappear. + assert_eq!(sarif["version"], "2.1.0", "missing or wrong version field"); + assert!(sarif["$schema"].is_string(), "$schema must be a string"); + assert!(sarif["runs"].is_array(), "runs must be an array"); + assert_eq!( + sarif["runs"].as_array().unwrap().len(), + 1, + "exactly one run" + ); + + let run = &sarif["runs"][0]; + assert!(run["tool"]["driver"]["name"].is_string()); + assert_eq!(run["tool"]["driver"]["name"], "nyx"); + assert!(run["tool"]["driver"]["rules"].is_array()); + assert!(run["results"].is_array()); + + // Phase 25 extension: chains land on run.properties.chains. + let chains_array = run["properties"]["chains"].as_array().unwrap(); + assert_eq!(chains_array.len(), 1, "exactly one chain emitted"); + + // Every chain object carries the documented shape. + let chain = &chains_array[0]; + assert!(chain["stable_hash"].is_number()); + assert!(chain["members"].is_array()); + assert_eq!(chain["members"].as_array().unwrap().len(), 3); + assert!(chain["sink"].is_object()); + assert!(chain["implied_impact"].is_string()); + assert_eq!(chain["severity"], "critical"); + + // Per-result `chain_member_of` cross-reference. + let results = run["results"].as_array().unwrap(); + let with_back_refs = results + .iter() + .filter(|r| r["properties"].get("chain_member_of").is_some()) + .count(); + assert_eq!( + with_back_refs, 3, + "every constituent SARIF result should carry chain_member_of" + ); +} + +#[test] +fn determinism_across_input_permutations() { + // Same set of findings in two different orders must yield the + // same chain set (the composer canonicalises by stable_hash). + let surface = fixture_surface_map(); + let findings = fixture_findings(); + let cfg = ChainSearchConfig { + max_depth: 4, + min_score: 0.0, + }; + + let order_a = build_chain_edges_for_route(&findings, "/ws"); + let mut findings_rev = findings.clone(); + findings_rev.reverse(); + let order_b = build_chain_edges_for_route(&findings_rev, "/ws"); + + let chains_a = find_chains(&order_a, &surface, cfg); + let chains_b = find_chains(&order_b, &surface, cfg); + let hashes_a: Vec = chains_a.iter().map(|c| c.stable_hash).collect(); + let hashes_b: Vec = chains_b.iter().map(|c| c.stable_hash).collect(); + assert_eq!(hashes_a, hashes_b); +} + +#[test] +fn authed_entry_downgrades_to_rce_without_browser_local() { + let mut surface = fixture_surface_map(); + // Flip auth_required on the entry — should downgrade the chain. + if let SurfaceNode::EntryPoint(ref mut e) = surface.nodes[0] { + e.auth_required = true; + } + let findings = fixture_findings(); + let edges = build_chain_edges_for_route(&findings, "/ws"); + let chains = find_chains( + &edges, + &surface, + ChainSearchConfig { + max_depth: 4, + min_score: 0.0, + }, + ); + assert_eq!(chains.len(), 1); + assert_eq!( + chains[0].implied_impact, + ImpactCategory::Rce, + "auth-gated entry must not produce BrowserToLocalRce" + ); + assert_eq!(chains[0].severity, ChainSeverity::Critical); +} diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 18c62249..848682d4 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -615,17 +615,25 @@ fn binary_json_output() { ); let stdout = String::from_utf8_lossy(&cmd.stdout); - // Find the JSON array in stdout (config notes and "Finished" surround it) - let json_start = stdout.find('[').expect("Expected JSON array in stdout"); - let json_end = stdout.rfind(']').expect("Expected closing bracket in JSON") + 1; + // Phase 25: JSON output is `{ "findings": [...], "chains": [...] }`. + let json_start = stdout.find('{').expect("Expected JSON object in stdout"); + let json_end = stdout.rfind('}').expect("Expected closing brace in JSON") + 1; let json_str = &stdout[json_start..json_end]; - let parsed: Vec = - serde_json::from_str(json_str).expect("stdout should contain valid JSON array"); + let parsed: serde_json::Value = + serde_json::from_str(json_str).expect("stdout should contain valid JSON object"); + let findings = parsed["findings"] + .as_array() + .expect("JSON output must have a `findings` array"); assert!( - !parsed.is_empty(), + !findings.is_empty(), "Expected at least 1 finding in JSON output" ); + // Phase 25: every scan emits a `chains` array (possibly empty). + assert!( + parsed["chains"].is_array(), + "JSON output must have a `chains` array" + ); } // ── EJS / config / debug endpoint fixtures ──────────────────────────────────