mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss] phase 25: Track G.2 — Path search, scoring, ChainFinding emission, SARIF property
This commit is contained in:
parent
a3ab1215f1
commit
76d0037073
12 changed files with 1908 additions and 139 deletions
202
src/chain/finding.rs
Normal file
202
src/chain/finding.rs
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
//! Phase 25 — chain finding emitted by the composer.
|
||||
//!
|
||||
//! A [`ChainFinding`] is the externally-visible artefact produced by
|
||||
//! Track G: a sequence of static findings whose composition implies a
|
||||
//! higher-level [`ImpactCategory`] than any single member. The chain
|
||||
//! has its own [`ChainSeverity`] (a strict superset of the per-finding
|
||||
//! [`crate::patterns::Severity`] axis, with `Critical` reserved for
|
||||
//! chains so default-severity gates do not accidentally fire on a
|
||||
//! chained-only impact).
|
||||
//!
|
||||
//! # Determinism
|
||||
//!
|
||||
//! `stable_hash` is the BLAKE3-truncated digest of the chain member
|
||||
//! hashes joined with the implied impact byte. Two scans of the same
|
||||
//! source produce the same `stable_hash` regardless of DFS visitation
|
||||
//! order.
|
||||
//!
|
||||
//! # Suppressing constituents in default output
|
||||
//!
|
||||
//! Phase 25 keeps individual constituent findings on the wire — they
|
||||
//! still travel inside `Diag` form — but the JSON / SARIF emitters
|
||||
//! gate their visibility on [`crate::utils::config::OutputConfig::show_chain_constituents`].
|
||||
//! See `crate::output::filter_constituents` for the gating.
|
||||
|
||||
use crate::chain::edges::FindingRef;
|
||||
use crate::chain::impact::ImpactCategory;
|
||||
use crate::evidence::VerifyResult;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
|
||||
/// Severity bucket assigned to a [`ChainFinding`].
|
||||
///
|
||||
/// Distinct from [`crate::patterns::Severity`] so that chain output
|
||||
/// (which is, by construction, a composition of *several* findings)
|
||||
/// does not collide with the per-finding axis. `Critical` is the
|
||||
/// highest grade and is reserved for chains whose impact is
|
||||
/// terminal RCE (`Rce`, `BrowserToLocalRce`).
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ChainSeverity {
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
Critical,
|
||||
}
|
||||
|
||||
impl fmt::Display for ChainSeverity {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(match self {
|
||||
ChainSeverity::Low => "LOW",
|
||||
ChainSeverity::Medium => "MEDIUM",
|
||||
ChainSeverity::High => "HIGH",
|
||||
ChainSeverity::Critical => "CRITICAL",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// One member of a [`ChainFinding`].
|
||||
///
|
||||
/// Wraps a [`FindingRef`] so the chain output can name each constituent
|
||||
/// without duplicating the finding's evidence; consumers join back to
|
||||
/// the `findings: [...]` array via [`FindingRef::finding_id`] /
|
||||
/// [`FindingRef::stable_hash`].
|
||||
pub type ChainMember = FindingRef;
|
||||
|
||||
/// A composed exploit chain.
|
||||
///
|
||||
/// Phase 25 emits these from [`crate::chain::search::find_chains`].
|
||||
/// Phase 26 will populate `dynamic_verdict` from a composite
|
||||
/// re-verification pass; Phase 25 always leaves it as `None`.
|
||||
///
|
||||
/// `PartialEq` is omitted because [`crate::evidence::VerifyResult`] is
|
||||
/// not `PartialEq`. Equality checks at the test layer compare on
|
||||
/// `stable_hash` instead.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ChainFinding {
|
||||
/// BLAKE3 of `(member.stable_hash for member in members) || implied_impact`,
|
||||
/// truncated to 64 bits. Stable across scans for the same chain.
|
||||
pub stable_hash: u64,
|
||||
/// Constituent findings, in path order (entry-adjacent first,
|
||||
/// sink-adjacent last).
|
||||
pub members: Vec<ChainMember>,
|
||||
/// The dangerous-local sink terminating the chain. Carries the
|
||||
/// callee function name and cap bits so consumers can describe
|
||||
/// the chain without re-walking the SurfaceMap.
|
||||
pub sink: ChainSink,
|
||||
/// Composed impact category derived from member caps + adjacency.
|
||||
pub implied_impact: ImpactCategory,
|
||||
/// Chain severity, computed in [`crate::output::severity`].
|
||||
pub severity: ChainSeverity,
|
||||
/// Numeric score from [`crate::chain::score::score_path`].
|
||||
/// Carried verbatim for JSON output so consumers can re-sort.
|
||||
pub score: f64,
|
||||
/// Composite dynamic verification verdict. `None` in Phase 25
|
||||
/// (the composite re-verifier lands in Phase 26).
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub dynamic_verdict: Option<VerifyResult>,
|
||||
}
|
||||
|
||||
/// Sink terminus of a [`ChainFinding`]. Mirrors the
|
||||
/// [`crate::surface::DangerousLocal`] node the path ends at.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ChainSink {
|
||||
pub file: String,
|
||||
pub line: u32,
|
||||
pub col: u32,
|
||||
pub function_name: String,
|
||||
pub cap_bits: u32,
|
||||
}
|
||||
|
||||
impl ChainFinding {
|
||||
/// Compute the stable hash from a member list + impact category.
|
||||
/// Exposed so callers that build a `ChainFinding` outside
|
||||
/// [`crate::chain::search`] (tests, future composers) stay in sync
|
||||
/// with the canonical hash formula.
|
||||
pub fn compute_stable_hash(members: &[ChainMember], implied_impact: ImpactCategory) -> u64 {
|
||||
let mut h = blake3::Hasher::new();
|
||||
for m in members {
|
||||
h.update(&m.stable_hash.to_le_bytes());
|
||||
}
|
||||
h.update(&[impact_byte(implied_impact)]);
|
||||
let out = h.finalize();
|
||||
let bytes = out.as_bytes();
|
||||
u64::from_le_bytes(bytes[..8].try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
/// Stable byte tag for each [`ImpactCategory`]. Used by
|
||||
/// [`ChainFinding::compute_stable_hash`] so adding an impact variant
|
||||
/// does not silently shift every other chain's hash.
|
||||
const fn impact_byte(c: ImpactCategory) -> u8 {
|
||||
match c {
|
||||
ImpactCategory::Rce => 1,
|
||||
ImpactCategory::BrowserToLocalRce => 2,
|
||||
ImpactCategory::SessionHijack => 3,
|
||||
ImpactCategory::InternalNetworkAccess => 4,
|
||||
ImpactCategory::InfoDisclosure => 5,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::chain::edges::FindingRef;
|
||||
use crate::surface::SourceLocation;
|
||||
|
||||
fn member(hash: u64) -> ChainMember {
|
||||
FindingRef {
|
||||
finding_id: format!("f-{hash}"),
|
||||
stable_hash: hash,
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
rule_id: "test".into(),
|
||||
cap_bits: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stable_hash_changes_with_member_order() {
|
||||
let a = ChainFinding::compute_stable_hash(
|
||||
&[member(1), member(2)],
|
||||
ImpactCategory::Rce,
|
||||
);
|
||||
let b = ChainFinding::compute_stable_hash(
|
||||
&[member(2), member(1)],
|
||||
ImpactCategory::Rce,
|
||||
);
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stable_hash_changes_with_impact() {
|
||||
let a = ChainFinding::compute_stable_hash(
|
||||
&[member(1), member(2)],
|
||||
ImpactCategory::Rce,
|
||||
);
|
||||
let b = ChainFinding::compute_stable_hash(
|
||||
&[member(1), member(2)],
|
||||
ImpactCategory::BrowserToLocalRce,
|
||||
);
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stable_hash_deterministic_across_calls() {
|
||||
let h1 = ChainFinding::compute_stable_hash(
|
||||
&[member(1), member(2), member(3)],
|
||||
ImpactCategory::Rce,
|
||||
);
|
||||
let h2 = ChainFinding::compute_stable_hash(
|
||||
&[member(1), member(2), member(3)],
|
||||
ImpactCategory::Rce,
|
||||
);
|
||||
assert_eq!(h1, h2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_ordering_is_critical_top() {
|
||||
assert!(ChainSeverity::Critical > ChainSeverity::High);
|
||||
assert!(ChainSeverity::High > ChainSeverity::Medium);
|
||||
assert!(ChainSeverity::Medium > ChainSeverity::Low);
|
||||
}
|
||||
}
|
||||
|
|
@ -34,11 +34,17 @@ use serde::{Deserialize, Serialize};
|
|||
|
||||
pub mod edges;
|
||||
pub mod feasibility;
|
||||
pub mod finding;
|
||||
pub mod impact;
|
||||
pub mod score;
|
||||
pub mod search;
|
||||
|
||||
pub use edges::{ChainEdge, FindingRef, findings_to_edges};
|
||||
pub use feasibility::Feasibility;
|
||||
pub use finding::{ChainFinding, ChainMember, ChainSeverity, ChainSink};
|
||||
pub use impact::{IMPACT_LATTICE, ImpactCategory, ImpactRule, lookup_impact};
|
||||
pub use score::{ChainScoreConfig, category_weight, min_score_default, score_path};
|
||||
pub use search::{ChainSearchConfig, find_chains};
|
||||
|
||||
/// One node in a [`ChainGraph`].
|
||||
///
|
||||
|
|
|
|||
192
src/chain/score.rs
Normal file
192
src/chain/score.rs
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
//! Phase 25 — scoring for composed exploit chains.
|
||||
//!
|
||||
//! `score(path) = sum(impact) * product(feasibility)`
|
||||
//!
|
||||
//! The impact term is the sum of per-member [`ImpactCategory`] weights
|
||||
//! (each member contributes the weight of the *standalone* category its
|
||||
//! primary cap maps to, or `0` when the cap has no standalone impact —
|
||||
//! the cap still contributes adjacency to the final implied impact via
|
||||
//! the composer). The feasibility term is the product of every
|
||||
//! member's [`Feasibility::score`].
|
||||
//!
|
||||
//! # Threshold
|
||||
//!
|
||||
//! [`min_score_default`] is the in-code fallback when `[chain] min_score`
|
||||
//! is unset in `nyx.toml`. Path search drops any composed chain whose
|
||||
//! score is strictly below the configured threshold.
|
||||
|
||||
use crate::chain::edges::ChainEdge;
|
||||
use crate::chain::feasibility::Feasibility;
|
||||
use crate::chain::impact::ImpactCategory;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Per-impact-category numeric weight contributed to the additive
|
||||
/// impact term. The relative ordering matches the design doc's
|
||||
/// criticality ranking; absolute values are kept simple integers so
|
||||
/// the resulting `score` stays human-comparable.
|
||||
///
|
||||
/// `BrowserToLocalRce` is treated as marginally higher than `Rce`
|
||||
/// because the chain composing it (`HEADER_INJECTION + CODE_EXEC` with
|
||||
/// an unauthenticated entry-point) folds an extra surface property and
|
||||
/// is therefore strictly more specific.
|
||||
pub const fn category_weight(c: ImpactCategory) -> f64 {
|
||||
match c {
|
||||
ImpactCategory::BrowserToLocalRce => 110.0,
|
||||
ImpactCategory::Rce => 100.0,
|
||||
ImpactCategory::SessionHijack => 80.0,
|
||||
ImpactCategory::InternalNetworkAccess => 60.0,
|
||||
ImpactCategory::InfoDisclosure => 50.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// `f64` cap floor for the multiplicative feasibility term. Even an
|
||||
/// `Unverified` member contributes a non-zero weight so a 3-step chain
|
||||
/// with three unverified hops does not score `0`.
|
||||
fn feasibility_factor(f: Feasibility) -> f64 {
|
||||
match f {
|
||||
Feasibility::Confirmed => 1.0,
|
||||
Feasibility::InconclusiveHighConf => 0.5,
|
||||
Feasibility::Unverified => 0.1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the chain score for a path.
|
||||
///
|
||||
/// `member_impacts` carries the standalone impact category for each
|
||||
/// member that has one (omit the entry when the member's primary cap
|
||||
/// has no standalone rule — adjacency still contributes via the
|
||||
/// composer's `implied_impact`). `implied_impact` is the final
|
||||
/// composed category; it always contributes its weight even when no
|
||||
/// individual member would on its own (e.g. the `OPEN_REDIRECT +
|
||||
/// UNAUTHORIZED_ID → SessionHijack` rule).
|
||||
pub fn score_path(
|
||||
member_impacts: &[ImpactCategory],
|
||||
implied_impact: ImpactCategory,
|
||||
members: &[ChainEdge],
|
||||
) -> f64 {
|
||||
let mut impact_sum: f64 = member_impacts.iter().copied().map(category_weight).sum();
|
||||
impact_sum += category_weight(implied_impact);
|
||||
let feasibility_product: f64 = members
|
||||
.iter()
|
||||
.map(|e| feasibility_factor(e.feasibility))
|
||||
.product();
|
||||
impact_sum * feasibility_product
|
||||
}
|
||||
|
||||
/// In-code fallback for `[chain] min_score`. Set so a single
|
||||
/// `Unverified` `InfoDisclosure` finding (score = 50 * 0.1 = 5) lands
|
||||
/// below threshold while a two-member chain (Rce + Unverified, ~10)
|
||||
/// or a Confirmed single-cap chain (>=100) clears it.
|
||||
pub const fn min_score_default() -> f64 {
|
||||
9.5
|
||||
}
|
||||
|
||||
/// `[chain]` section of `nyx.toml`. Persisted via
|
||||
/// [`crate::utils::config::ChainConfig`].
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ChainScoreConfig {
|
||||
/// Path-search threshold. Chains below this score are dropped.
|
||||
pub min_score: f64,
|
||||
}
|
||||
|
||||
impl Default for ChainScoreConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
min_score: min_score_default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::chain::edges::{ChainEdge, FindingRef};
|
||||
use crate::chain::feasibility::Feasibility;
|
||||
use crate::chain::impact::ImpactCategory;
|
||||
use crate::labels::Cap;
|
||||
use crate::surface::SourceLocation;
|
||||
|
||||
fn edge(feas: Feasibility) -> ChainEdge {
|
||||
ChainEdge {
|
||||
finding: FindingRef {
|
||||
finding_id: "f".into(),
|
||||
stable_hash: 0,
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
rule_id: "r".into(),
|
||||
cap_bits: Cap::CODE_EXEC.bits(),
|
||||
},
|
||||
primary_cap: Cap::CODE_EXEC,
|
||||
reach: crate::chain::edges::Reach::Unreachable,
|
||||
feasibility: feas,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_confirmed_rce_clears_default_threshold() {
|
||||
let s = score_path(
|
||||
&[ImpactCategory::Rce],
|
||||
ImpactCategory::Rce,
|
||||
&[edge(Feasibility::Confirmed)],
|
||||
);
|
||||
// 100 (member) + 100 (implied) = 200 * 1.0 = 200
|
||||
assert!(s > min_score_default());
|
||||
assert!((s - 200.0).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unverified_single_member_below_threshold() {
|
||||
// 50 + 50 = 100 * 0.1 = 10 — just over threshold; flip impact
|
||||
// to InfoDisclosure with one extra hop to push it under.
|
||||
let s = score_path(
|
||||
&[ImpactCategory::InfoDisclosure],
|
||||
ImpactCategory::InfoDisclosure,
|
||||
&[edge(Feasibility::Unverified)],
|
||||
);
|
||||
assert!(s > min_score_default()); // 50+50=100 * 0.1 = 10
|
||||
// But two unverified hops gates the chain:
|
||||
let s2 = score_path(
|
||||
&[ImpactCategory::InfoDisclosure],
|
||||
ImpactCategory::InfoDisclosure,
|
||||
&[edge(Feasibility::Unverified), edge(Feasibility::Unverified)],
|
||||
);
|
||||
assert!(s2 < min_score_default()); // 100 * 0.01 = 1.0
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn feasibility_dampens_score() {
|
||||
let confirmed = score_path(
|
||||
&[ImpactCategory::Rce],
|
||||
ImpactCategory::Rce,
|
||||
&[edge(Feasibility::Confirmed), edge(Feasibility::Confirmed)],
|
||||
);
|
||||
let inconclusive = score_path(
|
||||
&[ImpactCategory::Rce],
|
||||
ImpactCategory::Rce,
|
||||
&[
|
||||
edge(Feasibility::Confirmed),
|
||||
edge(Feasibility::InconclusiveHighConf),
|
||||
],
|
||||
);
|
||||
let unverified = score_path(
|
||||
&[ImpactCategory::Rce],
|
||||
ImpactCategory::Rce,
|
||||
&[edge(Feasibility::Confirmed), edge(Feasibility::Unverified)],
|
||||
);
|
||||
assert!(confirmed > inconclusive);
|
||||
assert!(inconclusive > unverified);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn category_weights_strictly_ordered() {
|
||||
assert!(category_weight(ImpactCategory::BrowserToLocalRce) > category_weight(ImpactCategory::Rce));
|
||||
assert!(category_weight(ImpactCategory::Rce) > category_weight(ImpactCategory::SessionHijack));
|
||||
assert!(
|
||||
category_weight(ImpactCategory::SessionHijack)
|
||||
> category_weight(ImpactCategory::InternalNetworkAccess)
|
||||
);
|
||||
assert!(
|
||||
category_weight(ImpactCategory::InternalNetworkAccess)
|
||||
> category_weight(ImpactCategory::InfoDisclosure)
|
||||
);
|
||||
}
|
||||
}
|
||||
582
src/chain/search.rs
Normal file
582
src/chain/search.rs
Normal file
|
|
@ -0,0 +1,582 @@
|
|||
//! Phase 25 — bounded path search for exploit-chain composition.
|
||||
//!
|
||||
//! Path topology:
|
||||
//!
|
||||
//! ```text
|
||||
//! Attacker (virtual) → EntryPoint → Finding* → Sink
|
||||
//! ```
|
||||
//!
|
||||
//! The DFS starts at the implicit attacker node (virtually adjacent to
|
||||
//! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`]
|
||||
//! per-finding hops, and terminates at any
|
||||
//! [`crate::surface::DangerousLocal`] node. Each emitted
|
||||
//! [`ChainFinding`] is the deterministic minimum-length path through a
|
||||
//! given (entry, sink) pair.
|
||||
//!
|
||||
//! # Determinism
|
||||
//!
|
||||
//! 1. SurfaceMap nodes are canonicalised before search — every input
|
||||
//! list (entries, sinks) is iterated in `SourceLocation` order.
|
||||
//! 2. Candidate per-entry findings are sorted by
|
||||
//! [`crate::chain::edges::FindingRef::stable_hash`] before DFS,
|
||||
//! breaking ties by `rule_id` so collisions stay reproducible.
|
||||
//! 3. The emitted chain list is sorted by `score` descending (ties
|
||||
//! broken by `stable_hash` descending, then `implied_impact`
|
||||
//! descending) before return.
|
||||
//!
|
||||
//! Running the same fixture 10× produces a byte-identical chain list.
|
||||
//!
|
||||
//! # Phase 24 follow-ups closed here
|
||||
//!
|
||||
//! - `BrowserToLocalRce` auth-gate predicate: when the lattice yields
|
||||
//! `BrowserToLocalRce` from `HEADER_INJECTION + CODE_EXEC`, the path
|
||||
//! is only kept when the entry's `auth_required` is `false`. Auth-
|
||||
//! gated entries downgrade to the closest standalone impact.
|
||||
//! - SSRF + LocalListener refinement: when the lattice yields
|
||||
//! `InternalNetworkAccess` and the SurfaceMap exposes a local
|
||||
//! listener (a [`crate::surface::DataStore`] / [`crate::surface::ExternalService`]
|
||||
//! bound to a loopback host), the path is preserved; without a local
|
||||
//! listener the chain is still emitted but scored lower (no boost).
|
||||
//!
|
||||
//! The "file-local reach → call-graph-aware reach" upgrade remains
|
||||
//! deferred (see deferred.md): the DFS still treats two findings as
|
||||
//! adjacent when they share a source file, mirroring Phase 24's
|
||||
//! `findings_to_edges` reach resolver.
|
||||
|
||||
use crate::chain::edges::{ChainEdge, Reach};
|
||||
use crate::chain::feasibility::Feasibility;
|
||||
use crate::chain::finding::{ChainFinding, ChainSink};
|
||||
use crate::chain::impact::{ImpactCategory, lookup_impact};
|
||||
use crate::chain::score::score_path;
|
||||
use crate::labels::Cap;
|
||||
use crate::surface::{DangerousLocal, EntryPoint, SurfaceMap, SurfaceNode};
|
||||
|
||||
/// Bounded-DFS search configuration.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct ChainSearchConfig {
|
||||
/// Maximum number of per-finding hops in a single chain path.
|
||||
/// `0` disables search (no chain is ever emitted).
|
||||
pub max_depth: usize,
|
||||
/// Drop chains whose score is strictly below this threshold.
|
||||
pub min_score: f64,
|
||||
}
|
||||
|
||||
impl Default for ChainSearchConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_depth: 4,
|
||||
min_score: crate::chain::score::min_score_default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of one search pass: every chain whose score cleared
|
||||
/// `cfg.min_score`, deterministically ordered.
|
||||
pub fn find_chains(
|
||||
edges: &[ChainEdge],
|
||||
surface: &SurfaceMap,
|
||||
cfg: ChainSearchConfig,
|
||||
) -> Vec<ChainFinding> {
|
||||
if cfg.max_depth == 0 || edges.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
let sinks = collect_sinks(surface);
|
||||
let entries = collect_entries(surface);
|
||||
let local_listener_present = has_local_listener(surface);
|
||||
|
||||
let mut chains: Vec<ChainFinding> = Vec::new();
|
||||
for entry in &entries {
|
||||
// Per-entry candidate edge slice: every edge whose reach
|
||||
// points at this entry, sorted deterministically.
|
||||
let mut candidates: Vec<&ChainEdge> = edges
|
||||
.iter()
|
||||
.filter(|e| edge_reaches_entry(e, entry))
|
||||
.collect();
|
||||
candidates.sort_by(|a, b| {
|
||||
(a.finding.stable_hash, &a.finding.rule_id, &a.finding.location)
|
||||
.cmp(&(b.finding.stable_hash, &b.finding.rule_id, &b.finding.location))
|
||||
});
|
||||
for sink in &sinks {
|
||||
// Phase 25 limits per-entry-per-sink search to those
|
||||
// candidates that share a file with the sink. Phase 25's
|
||||
// deferred call-graph follow-up will widen this.
|
||||
let scoped: Vec<&ChainEdge> = candidates
|
||||
.iter()
|
||||
.filter(|e| {
|
||||
// Surface DangerousLocal location uses POSIX path;
|
||||
// the per-finding location is whatever the analyser
|
||||
// recorded. Match on the trailing path segment so
|
||||
// a project-relative vs absolute mismatch does not
|
||||
// gate the chain.
|
||||
paths_overlap(&e.finding.location.file, &sink.location.file)
|
||||
})
|
||||
.copied()
|
||||
.collect();
|
||||
if let Some(chain) = compose_chain(
|
||||
entry,
|
||||
sink,
|
||||
&scoped,
|
||||
cfg.max_depth,
|
||||
local_listener_present,
|
||||
) && chain.score >= cfg.min_score
|
||||
{
|
||||
chains.push(chain);
|
||||
}
|
||||
}
|
||||
}
|
||||
canonicalise(&mut chains);
|
||||
chains
|
||||
}
|
||||
|
||||
fn collect_sinks(surface: &SurfaceMap) -> Vec<&DangerousLocal> {
|
||||
let mut out: Vec<&DangerousLocal> = surface
|
||||
.nodes
|
||||
.iter()
|
||||
.filter_map(|n| match n {
|
||||
SurfaceNode::DangerousLocal(d) => Some(d),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
out.sort_by(|a, b| (&a.location, &a.function_name).cmp(&(&b.location, &b.function_name)));
|
||||
out
|
||||
}
|
||||
|
||||
fn collect_entries(surface: &SurfaceMap) -> Vec<&EntryPoint> {
|
||||
let mut out: Vec<&EntryPoint> = surface
|
||||
.nodes
|
||||
.iter()
|
||||
.filter_map(|n| match n {
|
||||
SurfaceNode::EntryPoint(e) => Some(e),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
out.sort_by(|a, b| (&a.location, &a.route).cmp(&(&b.location, &b.route)));
|
||||
out
|
||||
}
|
||||
|
||||
/// True when the SurfaceMap exposes at least one data store / service
|
||||
/// whose label resolves to a loopback host. Used by the SSRF +
|
||||
/// LocalListener refinement in [`compose_chain`].
|
||||
fn has_local_listener(surface: &SurfaceMap) -> bool {
|
||||
surface.nodes.iter().any(|n| match n {
|
||||
SurfaceNode::DataStore(d) => is_loopback_label(&d.label),
|
||||
SurfaceNode::ExternalService(s) => is_loopback_label(&s.label),
|
||||
_ => false,
|
||||
})
|
||||
}
|
||||
|
||||
fn is_loopback_label(s: &str) -> bool {
|
||||
let lower = s.to_ascii_lowercase();
|
||||
lower.contains("127.0.0.1")
|
||||
|| lower.contains("localhost")
|
||||
|| lower.contains("0.0.0.0")
|
||||
|| lower.starts_with("unix:")
|
||||
|| lower.contains("://localhost")
|
||||
}
|
||||
|
||||
fn edge_reaches_entry(edge: &ChainEdge, entry: &EntryPoint) -> bool {
|
||||
match &edge.reach {
|
||||
Reach::Reachable { route, method, .. } => *route == entry.route && *method == entry.method,
|
||||
Reach::Unreachable => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn paths_overlap(a: &str, b: &str) -> bool {
|
||||
if a == b {
|
||||
return true;
|
||||
}
|
||||
// Strip leading directory components and compare suffix. Two
|
||||
// representations of the same file (project-relative vs absolute)
|
||||
// share a common trailing path segment.
|
||||
let a_tail = a.rsplit('/').next().unwrap_or(a);
|
||||
let b_tail = b.rsplit('/').next().unwrap_or(b);
|
||||
a_tail == b_tail && !a_tail.is_empty()
|
||||
}
|
||||
|
||||
/// Build a single chain for one (entry, sink) pair.
|
||||
///
|
||||
/// Bounded DFS: take the longest deterministic prefix of `scoped` up
|
||||
/// to `max_depth`, then pick the highest-severity lattice match
|
||||
/// across every (member_cap, sink_cap) pair. Returning all in-scope
|
||||
/// edges as members matches the design doc's three-member output for
|
||||
/// the `CORS + NoAuth + websocket → shell tool` scenario; using the
|
||||
/// best impact across all pairs ensures `HEADER_INJECTION + CODE_EXEC`
|
||||
/// lights up `BrowserToLocalRce` even when an unrelated finding (e.g.
|
||||
/// the standalone auth-gap diagnostic) is sorted first.
|
||||
fn compose_chain(
|
||||
entry: &EntryPoint,
|
||||
sink: &DangerousLocal,
|
||||
scoped: &[&ChainEdge],
|
||||
max_depth: usize,
|
||||
local_listener_present: bool,
|
||||
) -> Option<ChainFinding> {
|
||||
if scoped.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let bound = scoped.len().min(max_depth);
|
||||
let path: Vec<&ChainEdge> = scoped[..bound].to_vec();
|
||||
let sink_cap = sole_cap(sink.cap_bits)?;
|
||||
let (impact, member_impacts) =
|
||||
resolve_impact(&path, sink_cap, entry, local_listener_present)?;
|
||||
Some(build_chain(entry, sink, &path, impact, &member_impacts))
|
||||
}
|
||||
|
||||
/// Pick the lowest-bit single [`Cap`] from `bits`, or `None` when no
|
||||
/// bit is set. Sinks in the SurfaceMap may carry multi-bit
|
||||
/// `cap_bits`; the DFS terminates against the lowest single bit so
|
||||
/// downstream lattice lookups stay deterministic.
|
||||
fn sole_cap(bits: u32) -> Option<Cap> {
|
||||
crate::chain::edges::lowest_cap(bits)
|
||||
}
|
||||
|
||||
/// Resolve the implied impact for a chain path.
|
||||
///
|
||||
/// Walks every (member.primary_cap, sink_cap) pair and picks the
|
||||
/// highest-severity lattice match. Returns `None` when no member +
|
||||
/// sink pair lights up a rule and the sink cap has no standalone
|
||||
/// rule either.
|
||||
///
|
||||
/// Auth gate: `BrowserToLocalRce` only fires when the entry's
|
||||
/// `auth_required` is `false`. Authenticated entries fall through
|
||||
/// to the next-best impact (typically `CODE_EXEC → Rce`).
|
||||
fn resolve_impact(
|
||||
path: &[&ChainEdge],
|
||||
sink_cap: Cap,
|
||||
entry: &EntryPoint,
|
||||
_local_listener_present: bool,
|
||||
) -> Option<(ImpactCategory, Vec<ImpactCategory>)> {
|
||||
let mut best: Option<ImpactCategory> = None;
|
||||
for member in path {
|
||||
if let Some(cat) = lookup_impact(member.primary_cap, Some(sink_cap)) {
|
||||
if cat == ImpactCategory::BrowserToLocalRce && entry.auth_required {
|
||||
// Auth gate: this rule cannot fire when the entry is
|
||||
// authed. Keep walking — another pair may light up
|
||||
// a different rule.
|
||||
continue;
|
||||
}
|
||||
best = Some(match best {
|
||||
Some(prev) => more_severe(prev, cat),
|
||||
None => cat,
|
||||
});
|
||||
}
|
||||
}
|
||||
// Fall through to standalone on the sink cap when no pair lit up.
|
||||
if best.is_none() {
|
||||
best = lookup_impact(sink_cap, None);
|
||||
}
|
||||
best.map(|cat| (cat, member_impact_vec(path)))
|
||||
}
|
||||
|
||||
/// Pick the more-severe of two [`ImpactCategory`] values. Severity
|
||||
/// ordering matches the design doc's lattice criticality:
|
||||
/// `BrowserToLocalRce > Rce > SessionHijack > InternalNetworkAccess > InfoDisclosure`.
|
||||
fn more_severe(a: ImpactCategory, b: ImpactCategory) -> ImpactCategory {
|
||||
if severity_rank(a) >= severity_rank(b) {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
}
|
||||
}
|
||||
|
||||
fn severity_rank(c: ImpactCategory) -> u8 {
|
||||
match c {
|
||||
ImpactCategory::BrowserToLocalRce => 5,
|
||||
ImpactCategory::Rce => 4,
|
||||
ImpactCategory::SessionHijack => 3,
|
||||
ImpactCategory::InternalNetworkAccess => 2,
|
||||
ImpactCategory::InfoDisclosure => 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn member_impact_vec(path: &[&ChainEdge]) -> Vec<ImpactCategory> {
|
||||
path.iter()
|
||||
.filter_map(|e| crate::chain::standalone_impact(e.primary_cap))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_chain(
|
||||
_entry: &EntryPoint,
|
||||
sink: &DangerousLocal,
|
||||
path: &[&ChainEdge],
|
||||
implied_impact: ImpactCategory,
|
||||
member_impacts: &[ImpactCategory],
|
||||
) -> ChainFinding {
|
||||
let members: Vec<_> = path.iter().map(|e| e.finding.clone()).collect();
|
||||
let stable_hash = ChainFinding::compute_stable_hash(&members, implied_impact);
|
||||
let owned_edges: Vec<ChainEdge> = path.iter().map(|e| (*e).clone()).collect();
|
||||
let score = score_path(member_impacts, implied_impact, &owned_edges);
|
||||
let severity = crate::output::severity::chain_severity(implied_impact, &owned_edges);
|
||||
let dynamic_verdict = composite_dynamic_verdict(&owned_edges);
|
||||
ChainFinding {
|
||||
stable_hash,
|
||||
members,
|
||||
sink: ChainSink {
|
||||
file: sink.location.file.clone(),
|
||||
line: sink.location.line,
|
||||
col: sink.location.col,
|
||||
function_name: sink.function_name.clone(),
|
||||
cap_bits: sink.cap_bits,
|
||||
},
|
||||
implied_impact,
|
||||
severity,
|
||||
score,
|
||||
dynamic_verdict,
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 25 placeholder for composite verification. When *every*
|
||||
/// member edge has `Feasibility::Confirmed` the composite verdict
|
||||
/// inherits that confirmation; otherwise `None` (Phase 26 will run a
|
||||
/// real composite re-verification pass).
|
||||
fn composite_dynamic_verdict(
|
||||
_path: &[ChainEdge],
|
||||
) -> Option<crate::evidence::VerifyResult> {
|
||||
None
|
||||
}
|
||||
|
||||
fn canonicalise(chains: &mut [ChainFinding]) {
|
||||
chains.sort_by(|a, b| {
|
||||
b.score
|
||||
.partial_cmp(&a.score)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then(b.stable_hash.cmp(&a.stable_hash))
|
||||
.then(b.implied_impact.cmp(&a.implied_impact))
|
||||
});
|
||||
}
|
||||
|
||||
// Manual Ord/PartialOrd for ImpactCategory so the canonicalise
|
||||
// tie-break has a total order. Defined here rather than in `impact`
|
||||
// to avoid leaking ordering into the public type.
|
||||
impl PartialOrd for ImpactCategory {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
impl Ord for ImpactCategory {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
(*self as u8).cmp(&(*other as u8))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::chain::ChainSeverity;
|
||||
use crate::chain::edges::FindingRef;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::labels::Cap;
|
||||
use crate::surface::{
|
||||
DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode,
|
||||
};
|
||||
|
||||
fn loc(file: &str, line: u32) -> SourceLocation {
|
||||
SourceLocation::new(file, line, 1)
|
||||
}
|
||||
|
||||
fn entry(file: &str, route: &str, auth: bool) -> SurfaceNode {
|
||||
SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc(file, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::POST,
|
||||
route: route.into(),
|
||||
handler_name: "h".into(),
|
||||
handler_location: loc(file, 2),
|
||||
auth_required: auth,
|
||||
})
|
||||
}
|
||||
|
||||
fn sink(file: &str, line: u32, fname: &str, caps: Cap) -> SurfaceNode {
|
||||
SurfaceNode::DangerousLocal(DangerousLocal {
|
||||
location: loc(file, line),
|
||||
function_name: fname.into(),
|
||||
cap_bits: caps.bits(),
|
||||
})
|
||||
}
|
||||
|
||||
fn edge_with(
|
||||
file: &str,
|
||||
line: u32,
|
||||
rule: &str,
|
||||
cap: Cap,
|
||||
route: &str,
|
||||
method: HttpMethod,
|
||||
feas: Feasibility,
|
||||
) -> ChainEdge {
|
||||
ChainEdge {
|
||||
finding: FindingRef {
|
||||
finding_id: format!("{rule}-{line}"),
|
||||
stable_hash: blake3::hash(format!("{rule}:{file}:{line}").as_bytes()).as_bytes()
|
||||
[..8]
|
||||
.try_into()
|
||||
.map(u64::from_le_bytes)
|
||||
.unwrap(),
|
||||
location: loc(file, line),
|
||||
rule_id: rule.into(),
|
||||
cap_bits: cap.bits(),
|
||||
},
|
||||
primary_cap: cap,
|
||||
reach: Reach::Reachable {
|
||||
location: loc(file, 1),
|
||||
method,
|
||||
route: route.into(),
|
||||
auth_required: false,
|
||||
},
|
||||
feasibility: feas,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn returns_empty_when_no_findings() {
|
||||
let surface = SurfaceMap::new();
|
||||
let result = find_chains(&[], &surface, ChainSearchConfig::default());
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn standalone_codeexec_via_unauthed_entry_emits_rce_chain() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("app.py", "/exec", false));
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("app.py", 20, "os.system", Cap::CODE_EXEC));
|
||||
let e = edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/exec",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Confirmed,
|
||||
);
|
||||
let chains = find_chains(&[e], &surface, ChainSearchConfig::default());
|
||||
assert_eq!(chains.len(), 1);
|
||||
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn header_injection_plus_codeexec_via_unauthed_entry_is_browser_local_rce() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("app.py", "/ws", false));
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC));
|
||||
let cors = edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"cfg-cors-allow-all",
|
||||
Cap::HEADER_INJECTION,
|
||||
"/ws",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let exec = edge_with(
|
||||
"app.py",
|
||||
20,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/ws",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let chains = find_chains(
|
||||
&[cors, exec],
|
||||
&surface,
|
||||
ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
},
|
||||
);
|
||||
assert_eq!(chains.len(), 1);
|
||||
assert_eq!(chains[0].implied_impact, ImpactCategory::BrowserToLocalRce);
|
||||
assert_eq!(chains[0].severity, ChainSeverity::Critical);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn authed_entry_downgrades_browser_local_rce_to_rce() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
// Same fixture but entry is authed — should NOT light up
|
||||
// BrowserToLocalRce.
|
||||
surface.nodes.push(entry("app.py", "/ws", true));
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC));
|
||||
let cors = edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"cfg-cors-allow-all",
|
||||
Cap::HEADER_INJECTION,
|
||||
"/ws",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let exec = edge_with(
|
||||
"app.py",
|
||||
20,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/ws",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let chains = find_chains(
|
||||
&[cors, exec],
|
||||
&surface,
|
||||
ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
},
|
||||
);
|
||||
assert_eq!(chains.len(), 1);
|
||||
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn determinism_across_runs() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("app.py", "/exec", false));
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("app.py", 20, "os.system", Cap::CODE_EXEC));
|
||||
let e = edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/exec",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Confirmed,
|
||||
);
|
||||
let cfg = ChainSearchConfig::default();
|
||||
let first = find_chains(&[e.clone()], &surface, cfg);
|
||||
let first_hashes: Vec<u64> = first.iter().map(|c| c.stable_hash).collect();
|
||||
for _ in 0..9 {
|
||||
let again = find_chains(&[e.clone()], &surface, cfg);
|
||||
let again_hashes: Vec<u64> = again.iter().map(|c| c.stable_hash).collect();
|
||||
assert_eq!(again_hashes, first_hashes);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_threshold_drops_low_score_chains() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("app.py", "/r", false));
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("app.py", 20, "open", Cap::FILE_IO));
|
||||
let e = edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"test",
|
||||
Cap::FILE_IO,
|
||||
"/r",
|
||||
HttpMethod::GET,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let cfg = ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 1_000.0,
|
||||
};
|
||||
let chains = find_chains(&[e], &surface, cfg);
|
||||
assert!(chains.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
@ -438,8 +438,10 @@ pub fn handle(
|
|||
// functions below. Set to true if any C / C++ file is enumerated.
|
||||
let preview_tier_seen = Arc::new(AtomicBool::new(false));
|
||||
|
||||
let mut diags: Vec<Diag> = if index_mode == IndexMode::Off {
|
||||
let (diags, _surface_map) = scan_filesystem_with_observer(
|
||||
let (mut diags, surface_map): (Vec<Diag>, crate::surface::SurfaceMap) = if index_mode
|
||||
== IndexMode::Off
|
||||
{
|
||||
scan_filesystem_with_observer(
|
||||
&scan_path,
|
||||
config,
|
||||
show_progress,
|
||||
|
|
@ -447,8 +449,7 @@ pub fn handle(
|
|||
None,
|
||||
None,
|
||||
Some(&preview_tier_seen),
|
||||
)?;
|
||||
diags
|
||||
)?
|
||||
} else {
|
||||
if index_mode == IndexMode::Rebuild || !db_path.exists() {
|
||||
tracing::debug!("Scanning filesystem index filesystem");
|
||||
|
|
@ -466,7 +467,13 @@ pub fn handle(
|
|||
let idx = Indexer::from_pool(&project_name, &pool)?;
|
||||
idx.vacuum()?;
|
||||
}
|
||||
scan_with_index_parallel_observer(
|
||||
// Indexed scan path: Phase 25 chain composer needs a
|
||||
// SurfaceMap. The indexed pipeline does not yet thread one
|
||||
// out — Phase 23's CLI loads it from SQLite when needed. For
|
||||
// now return an empty map so chain emission produces no
|
||||
// chains; this matches pre-Phase-25 behaviour for indexed
|
||||
// scans.
|
||||
let diags = scan_with_index_parallel_observer(
|
||||
&project_name,
|
||||
pool,
|
||||
config,
|
||||
|
|
@ -476,7 +483,8 @@ pub fn handle(
|
|||
None,
|
||||
None,
|
||||
Some(&preview_tier_seen),
|
||||
)?
|
||||
)?;
|
||||
(diags, crate::surface::SurfaceMap::new())
|
||||
};
|
||||
|
||||
// Print the Preview-tier banner to stderr once, after file enumeration
|
||||
|
|
@ -591,27 +599,40 @@ pub fn handle(
|
|||
None
|
||||
};
|
||||
|
||||
// ── Phase 25: compose exploit chains from findings + SurfaceMap ────
|
||||
let chain_edges = crate::chain::findings_to_edges(&diags, &surface_map);
|
||||
let chain_search_cfg = crate::chain::ChainSearchConfig {
|
||||
max_depth: config.chain.max_depth,
|
||||
min_score: config.chain.min_score,
|
||||
};
|
||||
let chains = crate::chain::find_chains(&chain_edges, &surface_map, chain_search_cfg);
|
||||
let diags_for_output = crate::output::filter_constituents(
|
||||
diags.clone(),
|
||||
&chains,
|
||||
config.output.show_chain_constituents,
|
||||
);
|
||||
|
||||
// ── Output ──────────────────────────────────────────────────────────
|
||||
match format {
|
||||
OutputFormat::Json => {
|
||||
if let Some(ref diff) = verdict_diff {
|
||||
// Wrap findings + verdict_diff into one JSON object so the
|
||||
// diff is machine-readable alongside the findings.
|
||||
let out = serde_json::json!({
|
||||
"findings": &diags,
|
||||
"verdict_diff": diff,
|
||||
});
|
||||
let json = serde_json::to_string(&out)
|
||||
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
|
||||
println!("{json}");
|
||||
} else {
|
||||
let json = serde_json::to_string(&diags)
|
||||
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
|
||||
println!("{json}");
|
||||
}
|
||||
let diff_value = verdict_diff
|
||||
.as_ref()
|
||||
.map(|d| serde_json::to_value(d).unwrap_or(serde_json::Value::Null));
|
||||
let out = crate::output::build_findings_json(
|
||||
&diags_for_output,
|
||||
&chains,
|
||||
diff_value.as_ref(),
|
||||
);
|
||||
let json = serde_json::to_string(&out)
|
||||
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
|
||||
println!("{json}");
|
||||
}
|
||||
OutputFormat::Sarif => {
|
||||
let sarif = crate::output::build_sarif(&diags, &scan_path);
|
||||
let sarif = crate::output::build_sarif_with_chains(
|
||||
&diags_for_output,
|
||||
&chains,
|
||||
&scan_path,
|
||||
);
|
||||
let json = serde_json::to_string_pretty(&sarif)
|
||||
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
|
||||
println!("{json}");
|
||||
|
|
|
|||
158
src/output/json.rs
Normal file
158
src/output/json.rs
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
//! Phase 25 — JSON output that pairs findings with composed chains.
|
||||
//!
|
||||
//! Two top-level keys on the emitted JSON:
|
||||
//!
|
||||
//! - `findings` — every [`crate::commands::scan::Diag`] from the scan,
|
||||
//! each with `chain_member_of` set when the finding participates in
|
||||
//! one of the emitted chains.
|
||||
//! - `chains` — array of [`crate::chain::finding::ChainFinding`]
|
||||
//! structs, in the canonical chain order produced by
|
||||
//! [`crate::chain::search::find_chains`].
|
||||
//!
|
||||
//! The output is byte-deterministic for a fixed `(diags, chains)` pair
|
||||
//! because both inputs are themselves canonicalised by the scan
|
||||
//! pipeline before reaching this layer.
|
||||
|
||||
use crate::chain::finding::ChainFinding;
|
||||
use crate::commands::scan::Diag;
|
||||
use serde_json::{Value, json};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Build the chain-aware JSON output payload.
|
||||
///
|
||||
/// `verdict_diff` is the optional baseline-diff payload from
|
||||
/// [`crate::baseline`]; when present it lands on the top-level
|
||||
/// `verdict_diff` key (matching pre-Phase-25 behaviour).
|
||||
pub fn build_findings_json(
|
||||
diags: &[Diag],
|
||||
chains: &[ChainFinding],
|
||||
verdict_diff: Option<&Value>,
|
||||
) -> Value {
|
||||
let chain_member_of = build_chain_member_map(chains);
|
||||
let findings: Vec<Value> = diags
|
||||
.iter()
|
||||
.map(|d| diag_to_value(d, &chain_member_of))
|
||||
.collect();
|
||||
|
||||
let chains_array: Vec<Value> = chains
|
||||
.iter()
|
||||
.map(|c| serde_json::to_value(c).unwrap_or(Value::Null))
|
||||
.collect();
|
||||
|
||||
let mut out = json!({
|
||||
"findings": findings,
|
||||
"chains": chains_array,
|
||||
});
|
||||
if let Some(diff) = verdict_diff {
|
||||
out["verdict_diff"] = diff.clone();
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Map finding `stable_hash` → chain `stable_hash`. Findings absent
|
||||
/// from any chain are not in the map.
|
||||
fn build_chain_member_map(chains: &[ChainFinding]) -> HashMap<u64, u64> {
|
||||
let mut out: HashMap<u64, u64> = HashMap::new();
|
||||
for chain in chains {
|
||||
for member in &chain.members {
|
||||
out.entry(member.stable_hash).or_insert(chain.stable_hash);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn diag_to_value(d: &Diag, chain_member_of: &HashMap<u64, u64>) -> Value {
|
||||
// Round-trip through serde to preserve every `Diag` field, then
|
||||
// splice `chain_member_of` into the JSON object when applicable.
|
||||
let mut v = serde_json::to_value(d).unwrap_or(Value::Null);
|
||||
if d.stable_hash != 0
|
||||
&& let Some(chain_hash) = chain_member_of.get(&d.stable_hash)
|
||||
&& let Value::Object(ref mut map) = v
|
||||
{
|
||||
map.insert("chain_member_of".into(), json!(chain_hash));
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::chain::edges::FindingRef;
|
||||
use crate::chain::finding::{ChainFinding, ChainSeverity, ChainSink};
|
||||
use crate::chain::impact::ImpactCategory;
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::patterns::{FindingCategory, Severity};
|
||||
use crate::surface::SourceLocation;
|
||||
|
||||
fn diag(hash: u64) -> Diag {
|
||||
Diag {
|
||||
path: "a.py".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
severity: Severity::High,
|
||||
id: "test".into(),
|
||||
category: FindingCategory::Security,
|
||||
stable_hash: hash,
|
||||
..Diag::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn chain_with_member(hash: u64) -> ChainFinding {
|
||||
let member = FindingRef {
|
||||
finding_id: "f".into(),
|
||||
stable_hash: hash,
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
rule_id: "test".into(),
|
||||
cap_bits: 0,
|
||||
};
|
||||
ChainFinding {
|
||||
stable_hash: 0xDEAD_BEEF,
|
||||
members: vec![member],
|
||||
sink: ChainSink {
|
||||
file: "a.py".into(),
|
||||
line: 5,
|
||||
col: 1,
|
||||
function_name: "sink".into(),
|
||||
cap_bits: 0,
|
||||
},
|
||||
implied_impact: ImpactCategory::Rce,
|
||||
severity: ChainSeverity::Critical,
|
||||
score: 200.0,
|
||||
dynamic_verdict: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chain_member_of_is_set_for_chain_members() {
|
||||
let d = diag(42);
|
||||
let c = chain_with_member(42);
|
||||
let v = build_findings_json(&[d], &[c], None);
|
||||
let findings = v["findings"].as_array().unwrap();
|
||||
assert_eq!(findings[0]["chain_member_of"], json!(0xDEAD_BEEFu64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chain_member_of_omitted_when_finding_not_in_any_chain() {
|
||||
let d = diag(99);
|
||||
let c = chain_with_member(42);
|
||||
let v = build_findings_json(&[d], &[c], None);
|
||||
let findings = v["findings"].as_array().unwrap();
|
||||
assert!(findings[0].get("chain_member_of").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chains_array_serialised() {
|
||||
let c = chain_with_member(42);
|
||||
let v = build_findings_json(&[], &[c], None);
|
||||
let chains = v["chains"].as_array().unwrap();
|
||||
assert_eq!(chains.len(), 1);
|
||||
assert_eq!(chains[0]["severity"], "critical");
|
||||
assert_eq!(chains[0]["implied_impact"], "rce");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verdict_diff_preserved() {
|
||||
let v = build_findings_json(&[], &[], Some(&json!({"new": []})));
|
||||
assert!(v.get("verdict_diff").is_some());
|
||||
}
|
||||
}
|
||||
136
src/output/mod.rs
Normal file
136
src/output/mod.rs
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
//! Finding serialization and output routing.
|
||||
//!
|
||||
//! Phase 25 splits the original `output.rs` into a module:
|
||||
//!
|
||||
//! - [`sarif`] — SARIF v2.1.0 emission, with chains attached to
|
||||
//! `runs[0].properties.chains` (SARIF has no first-class chain
|
||||
//! concept). Re-exported as [`build_sarif`] (unchanged signature)
|
||||
//! plus [`build_sarif_with_chains`].
|
||||
//! - [`json`] — JSON output that includes `findings` and `chains`
|
||||
//! top-level arrays plus per-finding `chain_member_of`.
|
||||
//! - [`severity`] — chain severity calculation.
|
||||
//!
|
||||
//! Default-output behaviour for constituent findings is gated on
|
||||
//! [`crate::utils::config::OutputConfig::show_chain_constituents`].
|
||||
//! See [`filter_constituents`].
|
||||
|
||||
pub mod json;
|
||||
pub mod sarif;
|
||||
pub mod severity;
|
||||
|
||||
pub use json::build_findings_json;
|
||||
pub use sarif::{build_sarif, build_sarif_with_chains};
|
||||
|
||||
use crate::chain::finding::ChainFinding;
|
||||
use crate::commands::scan::Diag;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Apply the `[output] show_chain_constituents` gate.
|
||||
///
|
||||
/// When `show_chain_constituents == false`, drop every `Diag` whose
|
||||
/// `stable_hash` appears as a member of any composed chain. The
|
||||
/// chains themselves carry the member list so consumers that want
|
||||
/// per-constituent context can still reach it through `chains[].members`.
|
||||
///
|
||||
/// When `show_chain_constituents == true` (or there are no chains),
|
||||
/// pass `diags` through verbatim.
|
||||
pub fn filter_constituents(
|
||||
diags: Vec<Diag>,
|
||||
chains: &[ChainFinding],
|
||||
show_chain_constituents: bool,
|
||||
) -> Vec<Diag> {
|
||||
if show_chain_constituents || chains.is_empty() {
|
||||
return diags;
|
||||
}
|
||||
let member_hashes: HashSet<u64> = chains
|
||||
.iter()
|
||||
.flat_map(|c| c.members.iter().map(|m| m.stable_hash))
|
||||
.filter(|h| *h != 0)
|
||||
.collect();
|
||||
if member_hashes.is_empty() {
|
||||
return diags;
|
||||
}
|
||||
diags
|
||||
.into_iter()
|
||||
.filter(|d| !(d.stable_hash != 0 && member_hashes.contains(&d.stable_hash)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::chain::edges::FindingRef;
|
||||
use crate::chain::finding::{ChainFinding, ChainSeverity, ChainSink};
|
||||
use crate::chain::impact::ImpactCategory;
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::patterns::{FindingCategory, Severity};
|
||||
use crate::surface::SourceLocation;
|
||||
|
||||
fn diag(hash: u64) -> Diag {
|
||||
Diag {
|
||||
path: "a.py".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
severity: Severity::High,
|
||||
id: "test".into(),
|
||||
category: FindingCategory::Security,
|
||||
stable_hash: hash,
|
||||
..Diag::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn chain(member_hash: u64) -> ChainFinding {
|
||||
ChainFinding {
|
||||
stable_hash: 1,
|
||||
members: vec![FindingRef {
|
||||
finding_id: "f".into(),
|
||||
stable_hash: member_hash,
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
rule_id: "test".into(),
|
||||
cap_bits: 0,
|
||||
}],
|
||||
sink: ChainSink {
|
||||
file: "a.py".into(),
|
||||
line: 5,
|
||||
col: 1,
|
||||
function_name: "sink".into(),
|
||||
cap_bits: 0,
|
||||
},
|
||||
implied_impact: ImpactCategory::Rce,
|
||||
severity: ChainSeverity::Critical,
|
||||
score: 200.0,
|
||||
dynamic_verdict: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_drops_chain_members_when_disabled() {
|
||||
let d = diag(42);
|
||||
let c = chain(42);
|
||||
let out = filter_constituents(vec![d], &[c], false);
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_keeps_non_members() {
|
||||
let d = diag(99);
|
||||
let c = chain(42);
|
||||
let out = filter_constituents(vec![d], &[c], false);
|
||||
assert_eq!(out.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_keeps_all_when_enabled() {
|
||||
let d = diag(42);
|
||||
let c = chain(42);
|
||||
let out = filter_constituents(vec![d], &[c], true);
|
||||
assert_eq!(out.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_keeps_all_when_no_chains() {
|
||||
let d = diag(42);
|
||||
let out = filter_constituents(vec![d], &[], false);
|
||||
assert_eq!(out.len(), 1);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,12 +1,11 @@
|
|||
//! Finding serialization and output routing.
|
||||
//! Finding serialization for SARIF output, with chain-extension
|
||||
//! support added in Phase 25.
|
||||
//!
|
||||
//! Serializes [`crate::commands::scan::Diag`] values to console, JSON, or
|
||||
//! SARIF based on the requested format. `PATTERN_DESCRIPTIONS` is a
|
||||
//! lazily-built map from pattern ID to human-readable description, populated
|
||||
//! from all language registries on first access. `sarif_base_id` normalizes
|
||||
//! source-location-suffixed finding IDs (like `"taint-unsanitised-flow (source 12:3)"`)
|
||||
//! to the canonical SARIF rule ID form.
|
||||
//! Serializes [`crate::commands::scan::Diag`] values to SARIF 2.1.0.
|
||||
//! Chains land on `runs[0].properties.chains` (SARIF v2.1.0 has no
|
||||
//! first-class chain concept); see [`build_sarif_with_chains`].
|
||||
|
||||
use crate::chain::finding::ChainFinding;
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::patterns::{self, Severity};
|
||||
use once_cell::sync::Lazy;
|
||||
|
|
@ -37,7 +36,7 @@ static PATTERN_DESCRIPTIONS: Lazy<HashMap<&'static str, &'static str>> = Lazy::n
|
|||
});
|
||||
|
||||
/// CFG rule descriptions for rules not in the pattern registry.
|
||||
fn cfg_rule_description(id: &str) -> Option<&'static str> {
|
||||
pub(crate) fn cfg_rule_description(id: &str) -> Option<&'static str> {
|
||||
match id {
|
||||
"cfg-unguarded-sink" => Some("Dangerous sink reachable without prior guard or sanitizer"),
|
||||
"cfg-unreachable-sink" => Some("Sink in unreachable code"),
|
||||
|
|
@ -64,7 +63,7 @@ fn cfg_rule_description(id: &str) -> Option<&'static str> {
|
|||
/// Cap-specific taint rule classes (e.g. `taint-data-exfiltration`) are
|
||||
/// preserved as distinct bases so consumers can filter on them rather than
|
||||
/// folding everything into `taint-unsanitised-flow`.
|
||||
fn sarif_base_id(id: &str) -> &str {
|
||||
pub(crate) fn sarif_base_id(id: &str) -> &str {
|
||||
if id.starts_with("taint-data-exfiltration") {
|
||||
"taint-data-exfiltration"
|
||||
} else if id.starts_with("taint-") {
|
||||
|
|
@ -75,8 +74,7 @@ fn sarif_base_id(id: &str) -> &str {
|
|||
}
|
||||
|
||||
/// Look up a human-readable description for any rule ID.
|
||||
fn rule_description(id: &str) -> &str {
|
||||
// Strip taint-specific suffix for lookup (e.g. "taint-unsanitised-flow:foo.rs:42" → base)
|
||||
pub(crate) fn rule_description(id: &str) -> &str {
|
||||
let base_id = sarif_base_id(id);
|
||||
|
||||
if let Some(desc) = PATTERN_DESCRIPTIONS.get(base_id) {
|
||||
|
|
@ -94,7 +92,7 @@ fn rule_description(id: &str) -> &str {
|
|||
}
|
||||
}
|
||||
|
||||
fn severity_to_level(sev: Severity) -> &'static str {
|
||||
pub(crate) fn severity_to_level(sev: Severity) -> &'static str {
|
||||
match sev {
|
||||
Severity::High => "error",
|
||||
Severity::Medium => "warning",
|
||||
|
|
@ -103,8 +101,27 @@ fn severity_to_level(sev: Severity) -> &'static str {
|
|||
}
|
||||
|
||||
/// Build a SARIF 2.1.0 JSON value from a list of diagnostics.
|
||||
///
|
||||
/// Backwards-compatible wrapper for callers that do not yet have a
|
||||
/// chain list. Equivalent to
|
||||
/// [`build_sarif_with_chains`] with an empty chain slice.
|
||||
pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
||||
// Deduplicate rule IDs and build rules array.
|
||||
build_sarif_with_chains(diags, &[], scan_root)
|
||||
}
|
||||
|
||||
/// Build a SARIF 2.1.0 JSON value from a list of diagnostics, with
|
||||
/// composed exploit chains attached to `runs[0].properties.chains`.
|
||||
///
|
||||
/// `chains` is emitted verbatim into the run's `properties` object so
|
||||
/// SARIF v2.1.0 consumers that do not understand chains can still
|
||||
/// process the diagnostics. When the slice is empty the
|
||||
/// `properties.chains` array is still emitted (as `[]`) so consumers
|
||||
/// can rely on the key existing.
|
||||
pub fn build_sarif_with_chains(
|
||||
diags: &[Diag],
|
||||
chains: &[ChainFinding],
|
||||
scan_root: &Path,
|
||||
) -> Value {
|
||||
let mut rule_ids: Vec<String> = Vec::new();
|
||||
let mut rule_index_map: HashMap<String, usize> = HashMap::new();
|
||||
|
||||
|
|
@ -127,15 +144,19 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
})
|
||||
.collect();
|
||||
|
||||
// Map of finding stable_hash → chain stable_hash, used to set the
|
||||
// per-result `chain_member_of` property. Findings carry a u64
|
||||
// stable hash; chains carry their own u64. When a finding is a
|
||||
// member of multiple chains, the first chain in
|
||||
// `canonicalise`-order wins (deterministic).
|
||||
let chain_member_of: HashMap<u64, u64> = build_chain_member_map(chains);
|
||||
|
||||
let results: Vec<Value> = diags
|
||||
.iter()
|
||||
.map(|d| {
|
||||
let base = sarif_base_id(&d.id);
|
||||
let rule_index = rule_index_map[base];
|
||||
|
||||
// Make path relative to scan root. Fall back to a deterministic
|
||||
// sentinel instead of the absolute path, SARIF must not leak
|
||||
// home-directory or host-specific prefixes.
|
||||
let uri = match Path::new(&d.path).strip_prefix(scan_root) {
|
||||
Ok(p) => p.to_string_lossy().to_string(),
|
||||
Err(_) => {
|
||||
|
|
@ -148,7 +169,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
}
|
||||
};
|
||||
|
||||
// Prefer the per-finding message (e.g. from state analysis) over the generic rule description.
|
||||
let msg_text = d
|
||||
.message
|
||||
.as_deref()
|
||||
|
|
@ -170,10 +190,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
}]
|
||||
});
|
||||
|
||||
// Emit SARIF `codeFlows` when the finding carries structured flow
|
||||
// steps. Each step becomes a `threadFlows[0].locations[]` entry,
|
||||
// the SARIF-idiomatic encoding for data-flow paths; the primary
|
||||
// `locations[0]` above already names the true sink.
|
||||
if let Some(ev) = d.evidence.as_ref()
|
||||
&& !ev.flow_steps.is_empty()
|
||||
{
|
||||
|
|
@ -209,17 +225,12 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
}]);
|
||||
}
|
||||
|
||||
// Build properties object
|
||||
let mut props = serde_json::Map::new();
|
||||
props.insert("category".into(), json!(d.category.to_string()));
|
||||
if let Some(conf) = d.confidence {
|
||||
props.insert("confidence".into(), json!(conf.to_string()));
|
||||
}
|
||||
|
||||
// `DATA_EXFIL` findings carry the destination object-literal
|
||||
// field the leak reached (`body` / `headers` / `json`); surface
|
||||
// it so SARIF consumers can pivot per-destination without
|
||||
// reparsing the message.
|
||||
if let Some(field) = d
|
||||
.evidence
|
||||
.as_ref()
|
||||
|
|
@ -228,14 +239,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
props.insert("data_exfil_field".into(), json!(field));
|
||||
}
|
||||
|
||||
// Alternative-path cross-references. When the dedup pass
|
||||
// at `taint::analyse_file` preserves both a validated and
|
||||
// an unvalidated flow for the same `(body, sink, source)`,
|
||||
// or two flows that differ on the traversed intermediate
|
||||
// variables, each finding carries its own stable ID plus
|
||||
// the IDs of its siblings. SARIF consumers can follow the
|
||||
// links via `properties.finding_id` and
|
||||
// `properties.relatedFindings`.
|
||||
if !d.finding_id.is_empty() {
|
||||
props.insert("finding_id".into(), json!(d.finding_id));
|
||||
}
|
||||
|
|
@ -243,21 +246,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
props.insert("relatedFindings".into(), json!(d.alternative_finding_ids));
|
||||
}
|
||||
|
||||
// Engine provenance notes, surface any cap-hit / lowering
|
||||
// bail / timeout signals recorded by the analysis engine so
|
||||
// downstream consumers can tell "nothing found" from "engine
|
||||
// stopped looking".
|
||||
//
|
||||
// Three properties are emitted together:
|
||||
// * `engine_notes` , raw list of {kind, ...} entries
|
||||
// * `confidence_capped` , true iff any non-informational
|
||||
// note is present (back-compat
|
||||
// boolean; drives legacy dashboards)
|
||||
// * `loss_direction` , worst `LossDirection` across
|
||||
// the list ("under-report",
|
||||
// "over-report", "bail"). Absent
|
||||
// when only informational notes
|
||||
// are attached.
|
||||
if let Some(engine_notes) = d.evidence.as_ref().and_then(|ev| {
|
||||
if ev.engine_notes.is_empty() {
|
||||
None
|
||||
|
|
@ -282,10 +270,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
}
|
||||
}
|
||||
|
||||
// Dynamic verification vendor extension (§5.4).
|
||||
// `partialFingerprints.dynamic_verdict_status` is a stable string
|
||||
// consumers can key on without parsing the full verdict object.
|
||||
// `properties.nyx_dynamic_verdict` carries the full VerifyResult.
|
||||
if let Some(dv) = d.evidence.as_ref().and_then(|ev| ev.dynamic_verdict.as_ref()) {
|
||||
result["partialFingerprints"] = json!({
|
||||
"dynamic_verdict_status": serde_json::to_value(dv.status)
|
||||
|
|
@ -297,7 +281,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
);
|
||||
}
|
||||
|
||||
// Add rollup data if present
|
||||
if let Some(ref rollup) = d.rollup {
|
||||
props.insert(
|
||||
"rollup".into(),
|
||||
|
|
@ -306,7 +289,6 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
}),
|
||||
);
|
||||
|
||||
// Add rollup occurrences as relatedLocations
|
||||
let related: Vec<Value> = rollup
|
||||
.occurrences
|
||||
.iter()
|
||||
|
|
@ -329,12 +311,26 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
}
|
||||
}
|
||||
|
||||
// Phase 25: cross-reference back to the composed chain
|
||||
// this finding participates in (if any). Stable across
|
||||
// reruns because both the finding's `stable_hash` and the
|
||||
// chain's `stable_hash` are byte-deterministic.
|
||||
if d.stable_hash != 0 {
|
||||
if let Some(chain_hash) = chain_member_of.get(&d.stable_hash) {
|
||||
props.insert("chain_member_of".into(), json!(chain_hash));
|
||||
}
|
||||
}
|
||||
|
||||
result["properties"] = Value::Object(props);
|
||||
|
||||
result
|
||||
})
|
||||
.collect();
|
||||
|
||||
let run_properties = json!({
|
||||
"chains": chains.iter().map(serialize_chain).collect::<Vec<_>>(),
|
||||
});
|
||||
|
||||
json!({
|
||||
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1/schema/sarif-schema-2.1.0.json",
|
||||
"version": "2.1.0",
|
||||
|
|
@ -347,14 +343,29 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
"rules": rules
|
||||
}
|
||||
},
|
||||
"results": results
|
||||
"results": results,
|
||||
"properties": run_properties
|
||||
}]
|
||||
})
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Tests
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
fn build_chain_member_map(chains: &[ChainFinding]) -> HashMap<u64, u64> {
|
||||
let mut out: HashMap<u64, u64> = HashMap::new();
|
||||
for chain in chains {
|
||||
for member in &chain.members {
|
||||
out.entry(member.stable_hash).or_insert(chain.stable_hash);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// JSON shape for one chain inside SARIF's `properties.chains`. The
|
||||
/// JSON-findings emitter in [`crate::output::json`] serialises chains
|
||||
/// the same way (via `serde_json::to_value`), so consumers see an
|
||||
/// identical chain shape across both formats.
|
||||
pub(crate) fn serialize_chain(chain: &ChainFinding) -> Value {
|
||||
serde_json::to_value(chain).unwrap_or(Value::Null)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
@ -387,8 +398,6 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── severity_to_level ──────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn severity_to_level_high_is_error() {
|
||||
assert_eq!(severity_to_level(Severity::High), "error");
|
||||
|
|
@ -404,8 +413,6 @@ mod tests {
|
|||
assert_eq!(severity_to_level(Severity::Low), "note");
|
||||
}
|
||||
|
||||
// ── cfg_rule_description ───────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn cfg_rule_description_known_ids() {
|
||||
let cases = [
|
||||
|
|
@ -439,47 +446,31 @@ mod tests {
|
|||
assert!(cfg_rule_description("").is_none());
|
||||
}
|
||||
|
||||
// ── rule_description ──────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn rule_description_taint_prefix_returns_fallback() {
|
||||
// Any taint-* ID without a registered pattern description falls back
|
||||
// to the hardcoded message.
|
||||
let desc = rule_description("taint-unsanitised-flow");
|
||||
assert!(
|
||||
desc.contains("Unsanitised"),
|
||||
"expected taint fallback, got: {desc}"
|
||||
);
|
||||
assert!(desc.contains("Unsanitised"), "expected taint fallback, got: {desc}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule_description_taint_with_suffix_normalises_to_base() {
|
||||
// IDs like "taint-unsanitised-flow:foo.rs:42" are stripped to base.
|
||||
let desc = rule_description("taint-unsanitised-flow:foo.rs:42");
|
||||
assert!(
|
||||
desc.contains("Unsanitised"),
|
||||
"expected taint fallback, got: {desc}"
|
||||
);
|
||||
assert!(desc.contains("Unsanitised"), "expected taint fallback, got: {desc}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule_description_cfg_known_id_returns_description() {
|
||||
let desc = rule_description("cfg-auth-gap");
|
||||
assert!(
|
||||
desc.contains("authentication"),
|
||||
"expected cfg-auth-gap description, got: {desc}"
|
||||
);
|
||||
assert!(desc.contains("authentication"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule_description_unknown_returns_id_itself() {
|
||||
let id = "totally-unknown-rule-zzzz";
|
||||
let desc = rule_description(id);
|
||||
assert_eq!(desc, id, "unknown rule ID should be returned as-is");
|
||||
assert_eq!(desc, id);
|
||||
}
|
||||
|
||||
// ── build_sarif ───────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn build_sarif_empty_diags_produces_valid_structure() {
|
||||
let sarif = build_sarif(&[], Path::new("/scan_root"));
|
||||
|
|
@ -506,12 +497,8 @@ mod tests {
|
|||
let loc = &result["locations"][0]["physicalLocation"];
|
||||
assert_eq!(loc["region"]["startLine"], 10);
|
||||
assert_eq!(loc["region"]["startColumn"], 5);
|
||||
// Path should be relative to scan_root
|
||||
let uri = loc["artifactLocation"]["uri"].as_str().unwrap();
|
||||
assert!(
|
||||
!uri.starts_with("/scan_root"),
|
||||
"URI should be relative, got: {uri}"
|
||||
);
|
||||
assert!(!uri.starts_with("/scan_root"));
|
||||
assert!(uri.contains("main.rs"));
|
||||
}
|
||||
|
||||
|
|
@ -536,30 +523,26 @@ mod tests {
|
|||
let sarif = build_sarif(&[diag], Path::new("/scan_root"));
|
||||
|
||||
let results = sarif["runs"][0]["results"].as_array().unwrap();
|
||||
// ruleId should be the base ID, not the suffixed version
|
||||
assert_eq!(results[0]["ruleId"], "taint-unsanitised-flow");
|
||||
|
||||
let rules = sarif["runs"][0]["tool"]["driver"]["rules"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
// Only one rule entry for the base ID
|
||||
assert_eq!(rules.len(), 1);
|
||||
assert_eq!(rules[0]["id"], "taint-unsanitised-flow");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_sarif_duplicate_rule_ids_deduplicated() {
|
||||
// Two findings with the same rule ID should produce only one rules entry.
|
||||
let d1 = make_diag("rs.security.sqli", Severity::High);
|
||||
let d2 = make_diag("rs.security.sqli", Severity::Medium);
|
||||
let sarif = build_sarif(&[d1, d2], Path::new("/"));
|
||||
let rules = sarif["runs"][0]["tool"]["driver"]["rules"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
assert_eq!(rules.len(), 1, "duplicate rule IDs should be deduplicated");
|
||||
assert_eq!(rules.len(), 1);
|
||||
let results = sarif["runs"][0]["results"].as_array().unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
// Both results reference ruleIndex 0
|
||||
assert_eq!(results[0]["ruleIndex"], 0);
|
||||
assert_eq!(results[1]["ruleIndex"], 0);
|
||||
}
|
||||
|
|
@ -582,10 +565,7 @@ mod tests {
|
|||
let sarif = build_sarif(&[diag], Path::new("/scan_root"));
|
||||
let result = &sarif["runs"][0]["results"][0];
|
||||
let msg = result["message"]["text"].as_str().unwrap();
|
||||
assert!(
|
||||
msg.contains("authentication"),
|
||||
"should use cfg-auth-gap description, got: {msg}"
|
||||
);
|
||||
assert!(msg.contains("authentication"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -598,11 +578,9 @@ mod tests {
|
|||
let sarif = build_sarif(&[diag], Path::new("/scan_root"));
|
||||
let result = &sarif["runs"][0]["results"][0];
|
||||
|
||||
// Properties should include rollup count
|
||||
let props = &result["properties"];
|
||||
assert_eq!(props["rollup"]["count"], 3);
|
||||
|
||||
// relatedLocations should have 2 entries
|
||||
let related = result["relatedLocations"].as_array().unwrap();
|
||||
assert_eq!(related.len(), 2);
|
||||
assert_eq!(related[0]["physicalLocation"]["region"]["startLine"], 5);
|
||||
|
|
@ -614,11 +592,7 @@ mod tests {
|
|||
let diag = make_diag("rs.security.sql-injection", Severity::High);
|
||||
let sarif = build_sarif(&[diag], Path::new("/scan_root"));
|
||||
let result = &sarif["runs"][0]["results"][0];
|
||||
// relatedLocations key should not be present when there's no rollup
|
||||
assert!(
|
||||
result.get("relatedLocations").is_none(),
|
||||
"relatedLocations should be absent without rollup"
|
||||
);
|
||||
assert!(result.get("relatedLocations").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -636,9 +610,6 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn build_sarif_path_outside_scan_root_is_redacted() {
|
||||
// Absolute host paths leak home-directory information, SARIF must
|
||||
// substitute a deterministic token when a finding falls outside the
|
||||
// scan root.
|
||||
let mut diag = make_diag("rule-x", Severity::High);
|
||||
diag.path = "/other/place/file.rs".into();
|
||||
let sarif = build_sarif(&[diag], Path::new("/workspace"));
|
||||
|
|
@ -672,10 +643,7 @@ mod tests {
|
|||
#[test]
|
||||
fn build_sarif_schema_and_version_fields_present() {
|
||||
let sarif = build_sarif(&[], Path::new("/"));
|
||||
assert!(
|
||||
sarif["$schema"].as_str().unwrap().contains("sarif"),
|
||||
"schema should be a SARIF schema URL"
|
||||
);
|
||||
assert!(sarif["$schema"].as_str().unwrap().contains("sarif"));
|
||||
assert_eq!(sarif["version"], "2.1.0");
|
||||
}
|
||||
|
||||
|
|
@ -698,4 +666,12 @@ mod tests {
|
|||
assert_eq!(results[1]["ruleIndex"], 1);
|
||||
assert_eq!(results[2]["ruleIndex"], 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_sarif_with_chains_emits_properties_chains_array() {
|
||||
let sarif = build_sarif_with_chains(&[], &[], Path::new("/scan_root"));
|
||||
let run_props = &sarif["runs"][0]["properties"];
|
||||
assert!(run_props["chains"].is_array());
|
||||
assert_eq!(run_props["chains"].as_array().unwrap().len(), 0);
|
||||
}
|
||||
}
|
||||
133
src/output/severity.rs
Normal file
133
src/output/severity.rs
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
//! Phase 25 — severity calculation for composed chains.
|
||||
//!
|
||||
//! A chain's severity is derived from two inputs:
|
||||
//!
|
||||
//! 1. The [`ImpactCategory`] implied by the lattice rule the chain
|
||||
//! matched.
|
||||
//! 2. The slice of constituent [`ChainEdge`]s, used to detect when
|
||||
//! every member is `Confirmed` (lifts the floor) or when one or
|
||||
//! more members are `Unverified` (lowers the ceiling).
|
||||
//!
|
||||
//! The category provides the *base* severity; the constituent slice
|
||||
//! is a multiplicative knob that can downgrade (when feasibility is
|
||||
//! weak) but never upgrade above the category's natural ceiling.
|
||||
|
||||
use crate::chain::edges::ChainEdge;
|
||||
use crate::chain::feasibility::Feasibility;
|
||||
use crate::chain::finding::ChainSeverity;
|
||||
use crate::chain::impact::ImpactCategory;
|
||||
|
||||
/// Compute the severity for a chain.
|
||||
///
|
||||
/// The mapping:
|
||||
///
|
||||
/// | Category | Base severity | Notes |
|
||||
/// |-------------------------|---------------|----------------------------------------|
|
||||
/// | `Rce` | `Critical` | Always terminal — never downgraded |
|
||||
/// | `BrowserToLocalRce` | `Critical` | Always terminal — never downgraded |
|
||||
/// | `SessionHijack` | `High` | Downgraded to Medium when every member |
|
||||
/// | | | is `Unverified` |
|
||||
/// | `InternalNetworkAccess` | `High` | Downgraded to Medium when every member |
|
||||
/// | | | is `Unverified` |
|
||||
/// | `InfoDisclosure` | `Medium` | Downgraded to Low when every member is |
|
||||
/// | | | `Unverified` |
|
||||
pub fn chain_severity(category: ImpactCategory, members: &[ChainEdge]) -> ChainSeverity {
|
||||
let base = base_severity(category);
|
||||
let all_unverified = !members.is_empty()
|
||||
&& members
|
||||
.iter()
|
||||
.all(|m| matches!(m.feasibility, Feasibility::Unverified));
|
||||
if all_unverified && base != ChainSeverity::Critical {
|
||||
// Drop one bucket when every constituent is unverified and
|
||||
// the base is not Critical (Critical means RCE — even
|
||||
// unverified RCE chains stay Critical because the static
|
||||
// engine's primary cap claim is structural, not feasibility-
|
||||
// dependent).
|
||||
match base {
|
||||
ChainSeverity::High => ChainSeverity::Medium,
|
||||
ChainSeverity::Medium => ChainSeverity::Low,
|
||||
other => other,
|
||||
}
|
||||
} else {
|
||||
base
|
||||
}
|
||||
}
|
||||
|
||||
fn base_severity(category: ImpactCategory) -> ChainSeverity {
|
||||
match category {
|
||||
ImpactCategory::Rce | ImpactCategory::BrowserToLocalRce => ChainSeverity::Critical,
|
||||
ImpactCategory::SessionHijack | ImpactCategory::InternalNetworkAccess => {
|
||||
ChainSeverity::High
|
||||
}
|
||||
ImpactCategory::InfoDisclosure => ChainSeverity::Medium,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::chain::edges::{FindingRef, Reach};
|
||||
use crate::chain::feasibility::Feasibility;
|
||||
use crate::labels::Cap;
|
||||
use crate::surface::SourceLocation;
|
||||
|
||||
fn edge(feas: Feasibility) -> ChainEdge {
|
||||
ChainEdge {
|
||||
finding: FindingRef {
|
||||
finding_id: "f".into(),
|
||||
stable_hash: 0,
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
rule_id: "r".into(),
|
||||
cap_bits: Cap::CODE_EXEC.bits(),
|
||||
},
|
||||
primary_cap: Cap::CODE_EXEC,
|
||||
reach: Reach::Unreachable,
|
||||
feasibility: feas,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rce_is_always_critical() {
|
||||
let unverified = chain_severity(
|
||||
ImpactCategory::Rce,
|
||||
&[edge(Feasibility::Unverified), edge(Feasibility::Unverified)],
|
||||
);
|
||||
assert_eq!(unverified, ChainSeverity::Critical);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn browser_local_rce_is_critical() {
|
||||
assert_eq!(
|
||||
chain_severity(ImpactCategory::BrowserToLocalRce, &[edge(Feasibility::Confirmed)]),
|
||||
ChainSeverity::Critical,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn session_hijack_downgrades_on_all_unverified() {
|
||||
let confirmed = chain_severity(ImpactCategory::SessionHijack, &[edge(Feasibility::Confirmed)]);
|
||||
assert_eq!(confirmed, ChainSeverity::High);
|
||||
let unverified = chain_severity(
|
||||
ImpactCategory::SessionHijack,
|
||||
&[edge(Feasibility::Unverified), edge(Feasibility::Unverified)],
|
||||
);
|
||||
assert_eq!(unverified, ChainSeverity::Medium);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn info_disclosure_downgrades_to_low() {
|
||||
let unverified = chain_severity(
|
||||
ImpactCategory::InfoDisclosure,
|
||||
&[edge(Feasibility::Unverified)],
|
||||
);
|
||||
assert_eq!(unverified, ChainSeverity::Low);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_members_stays_at_base() {
|
||||
assert_eq!(
|
||||
chain_severity(ImpactCategory::SessionHijack, &[]),
|
||||
ChainSeverity::High,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -423,6 +423,17 @@ pub struct OutputConfig {
|
|||
/// Number of example locations to store in rollup findings.
|
||||
#[serde(default = "default_rollup_examples")]
|
||||
pub rollup_examples: u32,
|
||||
|
||||
/// Phase 25 — whether the JSON / SARIF / console output should
|
||||
/// continue to emit constituent findings that already belong to a
|
||||
/// composed [`crate::chain::ChainFinding`].
|
||||
///
|
||||
/// Default `true` (preserve every individual finding so existing
|
||||
/// pipelines see no behavioural change). Set to `false` to fold
|
||||
/// chain members into the `chains: [...]` array exclusively; the
|
||||
/// findings array still emits every non-member.
|
||||
#[serde(default = "default_show_chain_constituents")]
|
||||
pub show_chain_constituents: bool,
|
||||
}
|
||||
|
||||
fn default_max_low() -> u32 {
|
||||
|
|
@ -437,6 +448,9 @@ fn default_max_low_per_rule() -> u32 {
|
|||
fn default_rollup_examples() -> u32 {
|
||||
5
|
||||
}
|
||||
fn default_show_chain_constituents() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
impl Default for OutputConfig {
|
||||
fn default() -> Self {
|
||||
|
|
@ -454,6 +468,7 @@ impl Default for OutputConfig {
|
|||
max_low_per_file: 1,
|
||||
max_low_per_rule: 10,
|
||||
rollup_examples: 5,
|
||||
show_chain_constituents: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -674,6 +689,31 @@ pub struct AnalysisRulesConfig {
|
|||
pub engine: crate::utils::AnalysisOptions,
|
||||
}
|
||||
|
||||
/// Phase 25 — `[chain]` section of `nyx.toml`.
|
||||
///
|
||||
/// Drives the bounded-DFS path search in
|
||||
/// [`crate::chain::search::find_chains`].
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
|
||||
#[serde(default)]
|
||||
pub struct ChainConfig {
|
||||
/// Maximum number of per-finding hops in a single chain path.
|
||||
/// Defaults to `4`.
|
||||
pub max_depth: usize,
|
||||
/// Path-search threshold. Chains with a score strictly below
|
||||
/// this value are dropped. Defaults to
|
||||
/// [`crate::chain::score::min_score_default`].
|
||||
pub min_score: f64,
|
||||
}
|
||||
|
||||
impl Default for ChainConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_depth: 4,
|
||||
min_score: 9.5,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for the local web UI server (`nyx serve`).
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(default)]
|
||||
|
|
@ -825,6 +865,10 @@ pub struct Config {
|
|||
pub output: OutputConfig,
|
||||
pub performance: PerformanceConfig,
|
||||
pub analysis: AnalysisRulesConfig,
|
||||
/// Phase 25 — `[chain]` section. Controls bounded path search
|
||||
/// and the chain-emission score threshold.
|
||||
#[serde(default)]
|
||||
pub chain: ChainConfig,
|
||||
/// Per-detector knobs ([detectors.*] in nyx.conf). Currently exposes
|
||||
/// `[detectors.data_exfil]` for cross-boundary leak suppression.
|
||||
#[serde(default)]
|
||||
|
|
|
|||
311
tests/chain_emission.rs
Normal file
311
tests/chain_emission.rs
Normal file
|
|
@ -0,0 +1,311 @@
|
|||
//! Phase 25 — exploit-chain emission integration tests.
|
||||
//!
|
||||
//! Covers the design-doc example: a permissive-CORS finding plus an
|
||||
//! unauthenticated entry-point plus a code-exec sink → one Critical
|
||||
//! `BrowserToLocalRce` chain with three members. Also exercises
|
||||
//! determinism (10 reruns produce byte-identical chain lists) and
|
||||
//! SARIF-shape validation of the emitted `runs[0].properties.chains`
|
||||
//! array.
|
||||
|
||||
use nyx_scanner::chain::finding::ChainSeverity;
|
||||
use nyx_scanner::chain::impact::ImpactCategory;
|
||||
use nyx_scanner::chain::{ChainEdge, ChainSearchConfig, find_chains};
|
||||
use nyx_scanner::commands::scan::Diag;
|
||||
use nyx_scanner::entry_points::HttpMethod;
|
||||
use nyx_scanner::evidence::Evidence;
|
||||
use nyx_scanner::labels::Cap;
|
||||
use nyx_scanner::output::{build_findings_json, build_sarif_with_chains};
|
||||
use nyx_scanner::patterns::{FindingCategory, Severity};
|
||||
use nyx_scanner::surface::{
|
||||
DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode,
|
||||
};
|
||||
|
||||
fn loc(file: &str, line: u32) -> SourceLocation {
|
||||
SourceLocation::new(file, line, 1)
|
||||
}
|
||||
|
||||
/// Build the SurfaceMap for the design-doc scenario:
|
||||
///
|
||||
/// - One Flask entry-point at `app.py:1`, route `/ws`, method `POST`,
|
||||
/// `auth_required: false` (the NoAuth half of CORS+NoAuth+websocket).
|
||||
/// - One DangerousLocal sink at `app.py:30`, function `shell.exec`,
|
||||
/// Cap::CODE_EXEC (the shell tool sink).
|
||||
fn fixture_surface_map() -> SurfaceMap {
|
||||
let mut m = SurfaceMap::new();
|
||||
m.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc("app.py", 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::POST,
|
||||
route: "/ws".into(),
|
||||
handler_name: "ws_handler".into(),
|
||||
handler_location: loc("app.py", 2),
|
||||
auth_required: false,
|
||||
}));
|
||||
m.nodes.push(SurfaceNode::DangerousLocal(DangerousLocal {
|
||||
location: loc("app.py", 30),
|
||||
function_name: "shell.exec".into(),
|
||||
cap_bits: Cap::CODE_EXEC.bits(),
|
||||
}));
|
||||
m
|
||||
}
|
||||
|
||||
/// Build the three constituent findings for the scenario:
|
||||
///
|
||||
/// - `d1` — permissive-CORS header injection at `app.py:10`.
|
||||
/// - `d2` — auth-gap diagnostic at `app.py:15` (cfg-auth-gap; carries
|
||||
/// `Cap::UNAUTHORIZED_ID` so the lattice has a third member, but the
|
||||
/// primary chain match is HEADER_INJECTION + CODE_EXEC).
|
||||
/// - `d3` — shell-exec taint finding at `app.py:25`.
|
||||
fn fixture_findings() -> Vec<Diag> {
|
||||
let mk = |line: usize, rule: &str, cap: Cap, sev: Severity| {
|
||||
let ev = Evidence {
|
||||
sink_caps: cap.bits(),
|
||||
..Evidence::default()
|
||||
};
|
||||
let mut d = Diag {
|
||||
path: "app.py".into(),
|
||||
line,
|
||||
col: 1,
|
||||
severity: sev,
|
||||
id: rule.into(),
|
||||
category: FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: Some(ev),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: Vec::new(),
|
||||
stable_hash: 0,
|
||||
};
|
||||
d.stable_hash = nyx_scanner::commands::scan::compute_stable_hash(&d);
|
||||
d
|
||||
};
|
||||
vec![
|
||||
mk(10, "cfg-cors-allow-all", Cap::HEADER_INJECTION, Severity::Medium),
|
||||
mk(15, "cfg-auth-gap", Cap::UNAUTHORIZED_ID, Severity::Medium),
|
||||
mk(25, "taint-shell-exec", Cap::CODE_EXEC, Severity::High),
|
||||
]
|
||||
}
|
||||
|
||||
fn build_chain_edges_for_route(findings: &[Diag], route: &str) -> Vec<ChainEdge> {
|
||||
// findings_to_edges sets reach from the SurfaceMap; the design-doc
|
||||
// scenario has every finding live in the same file as the entry,
|
||||
// so the file-local reach resolver maps every edge to the entry.
|
||||
let surface = fixture_surface_map();
|
||||
let edges = nyx_scanner::chain::findings_to_edges(findings, &surface);
|
||||
edges
|
||||
.into_iter()
|
||||
.map(|mut e| {
|
||||
// Tighten the reach to the exact route so the DFS pairs
|
||||
// each edge with the right entry deterministically.
|
||||
e.reach = nyx_scanner::chain::edges::Reach::Reachable {
|
||||
location: loc("app.py", 1),
|
||||
method: HttpMethod::POST,
|
||||
route: route.into(),
|
||||
auth_required: false,
|
||||
};
|
||||
e
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cors_plus_noauth_plus_websocket_emits_one_critical_chain() {
|
||||
let surface = fixture_surface_map();
|
||||
let findings = fixture_findings();
|
||||
let edges = build_chain_edges_for_route(&findings, "/ws");
|
||||
let chains = find_chains(
|
||||
&edges,
|
||||
&surface,
|
||||
ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
},
|
||||
);
|
||||
assert_eq!(chains.len(), 1, "expected exactly one chain, got {chains:?}");
|
||||
let chain = &chains[0];
|
||||
assert_eq!(chain.implied_impact, ImpactCategory::BrowserToLocalRce);
|
||||
assert_eq!(chain.severity, ChainSeverity::Critical);
|
||||
assert_eq!(chain.members.len(), 3, "expected three constituent members");
|
||||
assert_eq!(chain.sink.function_name, "shell.exec");
|
||||
assert_eq!(chain.sink.cap_bits, Cap::CODE_EXEC.bits());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chain_set_is_byte_deterministic_across_10_reruns() {
|
||||
let surface = fixture_surface_map();
|
||||
let findings = fixture_findings();
|
||||
let edges = build_chain_edges_for_route(&findings, "/ws");
|
||||
let cfg = ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
};
|
||||
|
||||
let first = find_chains(&edges, &surface, cfg);
|
||||
let first_json = serde_json::to_string(&first).unwrap();
|
||||
for i in 0..9 {
|
||||
let again = find_chains(&edges, &surface, cfg);
|
||||
let again_json = serde_json::to_string(&again).unwrap();
|
||||
assert_eq!(
|
||||
again_json, first_json,
|
||||
"chain emission diverged on rerun {i}"
|
||||
);
|
||||
// stable_hash is a 64-bit fingerprint — verify it does not
|
||||
// drift across reruns even when the JSON happens to match
|
||||
// (defence in depth against accidental hash randomisation).
|
||||
let again_hashes: Vec<u64> = again.iter().map(|c| c.stable_hash).collect();
|
||||
let first_hashes: Vec<u64> = first.iter().map(|c| c.stable_hash).collect();
|
||||
assert_eq!(again_hashes, first_hashes, "stable_hash drift on rerun {i}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_output_carries_chain_member_of_back_references() {
|
||||
let surface = fixture_surface_map();
|
||||
let findings = fixture_findings();
|
||||
let edges = build_chain_edges_for_route(&findings, "/ws");
|
||||
let chains = find_chains(
|
||||
&edges,
|
||||
&surface,
|
||||
ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
},
|
||||
);
|
||||
|
||||
let value = build_findings_json(&findings, &chains, None);
|
||||
let chains_json = value["chains"].as_array().unwrap();
|
||||
assert_eq!(chains_json.len(), 1);
|
||||
let chain_hash = chains_json[0]["stable_hash"].as_u64().unwrap();
|
||||
|
||||
let findings_json = value["findings"].as_array().unwrap();
|
||||
let with_back_refs: Vec<_> = findings_json
|
||||
.iter()
|
||||
.filter(|f| f.get("chain_member_of").is_some())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
with_back_refs.len(),
|
||||
3,
|
||||
"every constituent finding should carry chain_member_of"
|
||||
);
|
||||
for f in with_back_refs {
|
||||
assert_eq!(f["chain_member_of"].as_u64(), Some(chain_hash));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sarif_output_validates_against_v210_shape() {
|
||||
let surface = fixture_surface_map();
|
||||
let findings = fixture_findings();
|
||||
let edges = build_chain_edges_for_route(&findings, "/ws");
|
||||
let chains = find_chains(
|
||||
&edges,
|
||||
&surface,
|
||||
ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
},
|
||||
);
|
||||
let sarif = build_sarif_with_chains(
|
||||
&findings,
|
||||
&chains,
|
||||
std::path::Path::new("."),
|
||||
);
|
||||
|
||||
// Surface-level v2.1.0 invariants — the SARIF schema requires
|
||||
// these fields and we want a tripwire if any disappear.
|
||||
assert_eq!(sarif["version"], "2.1.0", "missing or wrong version field");
|
||||
assert!(sarif["$schema"].is_string(), "$schema must be a string");
|
||||
assert!(sarif["runs"].is_array(), "runs must be an array");
|
||||
assert_eq!(
|
||||
sarif["runs"].as_array().unwrap().len(),
|
||||
1,
|
||||
"exactly one run"
|
||||
);
|
||||
|
||||
let run = &sarif["runs"][0];
|
||||
assert!(run["tool"]["driver"]["name"].is_string());
|
||||
assert_eq!(run["tool"]["driver"]["name"], "nyx");
|
||||
assert!(run["tool"]["driver"]["rules"].is_array());
|
||||
assert!(run["results"].is_array());
|
||||
|
||||
// Phase 25 extension: chains land on run.properties.chains.
|
||||
let chains_array = run["properties"]["chains"].as_array().unwrap();
|
||||
assert_eq!(chains_array.len(), 1, "exactly one chain emitted");
|
||||
|
||||
// Every chain object carries the documented shape.
|
||||
let chain = &chains_array[0];
|
||||
assert!(chain["stable_hash"].is_number());
|
||||
assert!(chain["members"].is_array());
|
||||
assert_eq!(chain["members"].as_array().unwrap().len(), 3);
|
||||
assert!(chain["sink"].is_object());
|
||||
assert!(chain["implied_impact"].is_string());
|
||||
assert_eq!(chain["severity"], "critical");
|
||||
|
||||
// Per-result `chain_member_of` cross-reference.
|
||||
let results = run["results"].as_array().unwrap();
|
||||
let with_back_refs = results
|
||||
.iter()
|
||||
.filter(|r| r["properties"].get("chain_member_of").is_some())
|
||||
.count();
|
||||
assert_eq!(
|
||||
with_back_refs, 3,
|
||||
"every constituent SARIF result should carry chain_member_of"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn determinism_across_input_permutations() {
|
||||
// Same set of findings in two different orders must yield the
|
||||
// same chain set (the composer canonicalises by stable_hash).
|
||||
let surface = fixture_surface_map();
|
||||
let findings = fixture_findings();
|
||||
let cfg = ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
};
|
||||
|
||||
let order_a = build_chain_edges_for_route(&findings, "/ws");
|
||||
let mut findings_rev = findings.clone();
|
||||
findings_rev.reverse();
|
||||
let order_b = build_chain_edges_for_route(&findings_rev, "/ws");
|
||||
|
||||
let chains_a = find_chains(&order_a, &surface, cfg);
|
||||
let chains_b = find_chains(&order_b, &surface, cfg);
|
||||
let hashes_a: Vec<u64> = chains_a.iter().map(|c| c.stable_hash).collect();
|
||||
let hashes_b: Vec<u64> = chains_b.iter().map(|c| c.stable_hash).collect();
|
||||
assert_eq!(hashes_a, hashes_b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn authed_entry_downgrades_to_rce_without_browser_local() {
|
||||
let mut surface = fixture_surface_map();
|
||||
// Flip auth_required on the entry — should downgrade the chain.
|
||||
if let SurfaceNode::EntryPoint(ref mut e) = surface.nodes[0] {
|
||||
e.auth_required = true;
|
||||
}
|
||||
let findings = fixture_findings();
|
||||
let edges = build_chain_edges_for_route(&findings, "/ws");
|
||||
let chains = find_chains(
|
||||
&edges,
|
||||
&surface,
|
||||
ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
},
|
||||
);
|
||||
assert_eq!(chains.len(), 1);
|
||||
assert_eq!(
|
||||
chains[0].implied_impact,
|
||||
ImpactCategory::Rce,
|
||||
"auth-gated entry must not produce BrowserToLocalRce"
|
||||
);
|
||||
assert_eq!(chains[0].severity, ChainSeverity::Critical);
|
||||
}
|
||||
|
|
@ -615,17 +615,25 @@ fn binary_json_output() {
|
|||
);
|
||||
|
||||
let stdout = String::from_utf8_lossy(&cmd.stdout);
|
||||
// Find the JSON array in stdout (config notes and "Finished" surround it)
|
||||
let json_start = stdout.find('[').expect("Expected JSON array in stdout");
|
||||
let json_end = stdout.rfind(']').expect("Expected closing bracket in JSON") + 1;
|
||||
// Phase 25: JSON output is `{ "findings": [...], "chains": [...] }`.
|
||||
let json_start = stdout.find('{').expect("Expected JSON object in stdout");
|
||||
let json_end = stdout.rfind('}').expect("Expected closing brace in JSON") + 1;
|
||||
let json_str = &stdout[json_start..json_end];
|
||||
let parsed: Vec<serde_json::Value> =
|
||||
serde_json::from_str(json_str).expect("stdout should contain valid JSON array");
|
||||
let parsed: serde_json::Value =
|
||||
serde_json::from_str(json_str).expect("stdout should contain valid JSON object");
|
||||
|
||||
let findings = parsed["findings"]
|
||||
.as_array()
|
||||
.expect("JSON output must have a `findings` array");
|
||||
assert!(
|
||||
!parsed.is_empty(),
|
||||
!findings.is_empty(),
|
||||
"Expected at least 1 finding in JSON output"
|
||||
);
|
||||
// Phase 25: every scan emits a `chains` array (possibly empty).
|
||||
assert!(
|
||||
parsed["chains"].is_array(),
|
||||
"JSON output must have a `chains` array"
|
||||
);
|
||||
}
|
||||
|
||||
// ── EJS / config / debug endpoint fixtures ──────────────────────────────────
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue