From c9e7342ad33a37faf1536d84885365b365958734 Mon Sep 17 00:00:00 2001 From: pitboss Date: Fri, 15 May 2026 15:20:09 -0500 Subject: [PATCH] =?UTF-8?q?[pitboss]=20phase=2024:=20Track=20G.1=20?= =?UTF-8?q?=E2=80=94=20Chain=20module=20+=20impact=20lattice=20+=20candida?= =?UTF-8?q?te=20edges?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chain/edges.rs | 193 +++++++++++++++++++++++++++ src/chain/feasibility.rs | 150 +++++++++++++++++++++ src/chain/impact.rs | 276 +++++++++++++++++++++++++++++++++++++++ src/chain/mod.rs | 129 ++++++++++++++++++ src/lib.rs | 1 + tests/chain_edges.rs | 194 +++++++++++++++++++++++++++ 6 files changed, 943 insertions(+) create mode 100644 src/chain/edges.rs create mode 100644 src/chain/feasibility.rs create mode 100644 src/chain/impact.rs create mode 100644 src/chain/mod.rs create mode 100644 tests/chain_edges.rs diff --git a/src/chain/edges.rs b/src/chain/edges.rs new file mode 100644 index 00000000..6b007845 --- /dev/null +++ b/src/chain/edges.rs @@ -0,0 +1,193 @@ +//! Phase 24 — convert per-finding [`Diag`]s into chain-graph edges. +//! +//! Each call to [`findings_to_edges`] emits exactly one [`ChainEdge`] +//! per input finding. The edge is *typed* by: +//! +//! - the primary [`Cap`] bit picked from [`Evidence::sink_caps`] +//! (the lowest-bit set, chosen deterministically), and +//! - the *reach* — the surface [`EntryPoint`] in the same file as the +//! finding, when one exists, otherwise [`Reach::Unreachable`]. +//! +//! Phase 25's path search composes these edges with the SurfaceMap's +//! `Reaches` edges into full chains. Phase 24 does not run any path +//! search or do call-graph traversal: edges are emitted at finding +//! granularity and carry only the file-local reach hint. + +use crate::commands::scan::Diag; +use crate::entry_points::HttpMethod; +use crate::labels::Cap; +use crate::surface::{SourceLocation, SurfaceMap, SurfaceNode}; +use serde::{Deserialize, Serialize}; + +use super::feasibility::Feasibility; + +/// Compact reference to a static finding embedded in a [`ChainEdge`]. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct FindingRef { + /// Stable finding ID (matches [`Diag::finding_id`] when present). + pub finding_id: String, + /// Stable 64-bit hash from [`Diag::stable_hash`]. Zero when the + /// finding has not been hashed yet. + pub stable_hash: u64, + /// Source location of the sink. + pub location: SourceLocation, + /// Rule identifier (`Diag::id`). + pub rule_id: String, + /// Resolved sink cap bits ([`Evidence::sink_caps`]). + pub cap_bits: u32, +} + +/// Whether the finding lands inside an externally-reachable surface +/// entry-point. Phase 24 only resolves *file-local* reach: a finding +/// in `app/views.py` is treated as reachable if any +/// [`EntryPoint`](crate::surface::EntryPoint) declares a handler in +/// that same file. Phase 25 will fold the call graph in. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "reach", rename_all = "snake_case")] +pub enum Reach { + /// Finding is in a file that hosts at least one entry-point. + /// `route` and `method` describe the first matching entry-point + /// (surface-canonical order). + Reachable { + location: SourceLocation, + method: HttpMethod, + route: String, + auth_required: bool, + }, + /// Finding is in a file with no surface entry-points. + Unreachable, +} + +/// One edge in the chain graph. +/// +/// Phase 24's edges live at the granularity of a single finding. +/// Phase 25 will introduce additional edge kinds (entry → finding, +/// finding → sink-cluster, etc.) once path search is wired up. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct ChainEdge { + pub finding: FindingRef, + /// Primary cap classification. Picked deterministically as the + /// lowest set bit of [`FindingRef::cap_bits`] so two scans of the + /// same source produce identical edges. + pub primary_cap: Cap, + /// Where the finding sits relative to the surface. + pub reach: Reach, + /// Phase 25 path-score factor. + pub feasibility: Feasibility, +} + +/// Convert each [`Diag`] to one [`ChainEdge`]. +/// +/// Findings without cap bits (`Diag::evidence.sink_caps == 0`) are +/// dropped — the chain composer cannot classify them on a typed +/// lattice and Phase 25's scoring expects every edge to expose a +/// primary cap. This is a deliberate quiet-drop: such findings are +/// usually structural CFG diagnostics (e.g. `cfg-auth-gap`) whose +/// chain participation is modelled by the SurfaceMap's +/// `AuthRequiredOn` edges instead. +/// +/// The output order mirrors `findings`; the caller is responsible for +/// any further canonicalisation. +pub fn findings_to_edges(findings: &[Diag], surface: &SurfaceMap) -> Vec { + findings + .iter() + .filter_map(|d| build_edge(d, surface)) + .collect() +} + +fn build_edge(diag: &Diag, surface: &SurfaceMap) -> Option { + let evidence = diag.evidence.as_ref()?; + if evidence.sink_caps == 0 { + return None; + } + let cap_bits = evidence.sink_caps; + let primary_cap = lowest_cap(cap_bits)?; + let location = SourceLocation::new(diag.path.clone(), diag.line as u32, diag.col as u32); + let reach = locate_reach(&location, surface); + let feasibility = Feasibility::for_finding(diag); + let finding = FindingRef { + finding_id: diag.finding_id.clone(), + stable_hash: diag.stable_hash, + location, + rule_id: diag.id.clone(), + cap_bits, + }; + Some(ChainEdge { + finding, + primary_cap, + reach, + feasibility, + }) +} + +/// Return the lowest single-bit [`Cap`] present in `bits`, or `None` +/// when `bits == 0`. Deterministic: always picks the lowest bit. +pub fn lowest_cap(bits: u32) -> Option { + if bits == 0 { + return None; + } + let lowest = 1u32 << bits.trailing_zeros(); + Cap::from_bits(lowest) +} + +fn locate_reach(loc: &SourceLocation, surface: &SurfaceMap) -> Reach { + for node in &surface.nodes { + if let SurfaceNode::EntryPoint(ep) = node { + if ep.handler_location.file == loc.file { + return Reach::Reachable { + location: ep.location.clone(), + method: ep.method, + route: ep.route.clone(), + auth_required: ep.auth_required, + }; + } + } + } + Reach::Unreachable +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::commands::scan::Diag; + use crate::evidence::Evidence; + use crate::patterns::FindingCategory; + + fn diag_with_cap(path: &str, line: usize, caps: Cap) -> Diag { + let ev = Evidence { + sink_caps: caps.bits(), + ..Evidence::default() + }; + Diag { + path: path.into(), + line, + col: 1, + id: "test-rule".into(), + category: FindingCategory::Security, + evidence: Some(ev), + ..Diag::default() + } + } + + #[test] + fn lowest_cap_picks_least_significant_bit() { + let combined = Cap::SQL_QUERY | Cap::FILE_IO; + assert_eq!(lowest_cap(combined.bits()), Some(Cap::FILE_IO)); + } + + #[test] + fn drops_findings_without_cap_bits() { + let mut d = diag_with_cap("a.py", 1, Cap::CODE_EXEC); + d.evidence.as_mut().unwrap().sink_caps = 0; + let edges = findings_to_edges(&[d], &SurfaceMap::new()); + assert!(edges.is_empty()); + } + + #[test] + fn reach_unreachable_without_matching_entry_point() { + let d = diag_with_cap("orphan.py", 2, Cap::CODE_EXEC); + let edges = findings_to_edges(&[d], &SurfaceMap::new()); + assert_eq!(edges.len(), 1); + assert!(matches!(edges[0].reach, Reach::Unreachable)); + } +} diff --git a/src/chain/feasibility.rs b/src/chain/feasibility.rs new file mode 100644 index 00000000..4f096915 --- /dev/null +++ b/src/chain/feasibility.rs @@ -0,0 +1,150 @@ +//! Phase 24 — feasibility scoring for chain edges. +//! +//! Each edge produced by [`crate::chain::edges::findings_to_edges`] +//! carries a feasibility weight in `[0.0, 1.0]`. The weight enters +//! Phase 25's path score as the multiplicative factor in +//! `score(path) = sum(impact) * product(feasibility)`, so a single +//! low-feasibility hop dampens the entire chain. +//! +//! # Buckets +//! +//! | Bucket | Weight | Trigger | +//! |-------------------------|--------|-------------------------------------------------------------| +//! | [`Confirmed`] | `1.0` | dynamic [`VerifyStatus::Confirmed`] | +//! | [`InconclusiveHighConf`]| `0.5` | dynamic [`VerifyStatus::Inconclusive`] + static `High` | +//! | [`Unverified`] | `0.1` | everything else (no verdict, `NotConfirmed`, `Unsupported`, | +//! | | | or `Inconclusive` without a high static confidence) | +//! +//! [`Confirmed`]: Feasibility::Confirmed +//! [`InconclusiveHighConf`]: Feasibility::InconclusiveHighConf +//! [`Unverified`]: Feasibility::Unverified +//! [`VerifyStatus::Confirmed`]: crate::evidence::VerifyStatus::Confirmed +//! [`VerifyStatus::Inconclusive`]: crate::evidence::VerifyStatus::Inconclusive + +use crate::commands::scan::Diag; +use crate::evidence::{Confidence, VerifyResult, VerifyStatus}; +use serde::{Deserialize, Serialize}; + +/// Discrete feasibility bucket for a chain edge. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum Feasibility { + /// Dynamic verification fired the sink probe. + Confirmed, + /// Dynamic verification was Inconclusive but the static engine's + /// confidence in the finding is `High`. Used for findings that + /// the verifier could not exercise (build failure, sandbox refuse) + /// but where the static evidence is strong. + InconclusiveHighConf, + /// Everything else — no dynamic verification, dynamic verdict was + /// `NotConfirmed`/`Unsupported`, or dynamic was `Inconclusive` but + /// static confidence is not `High`. + Unverified, +} + +impl Feasibility { + /// Multiplicative weight contributed to Phase 25's path score. + pub const fn score(self) -> f32 { + match self { + Feasibility::Confirmed => 1.0, + Feasibility::InconclusiveHighConf => 0.5, + Feasibility::Unverified => 0.1, + } + } + + /// Translate a dynamic [`VerifyResult`] into a feasibility weight. + /// + /// This is the literal signature the design doc specifies. It + /// cannot distinguish `Inconclusive` with high static confidence + /// from `Inconclusive` with low static confidence (the static + /// confidence is carried on the [`Diag`], not on the + /// [`VerifyResult`]); use [`Feasibility::for_finding`] when both + /// halves of the input are available. + pub fn from_verdict(verdict: Option<&VerifyResult>) -> f32 { + Self::bucket_from_verdict(verdict, None).score() + } + + /// Same as [`from_verdict`](Self::from_verdict) but consults the + /// static `Diag.confidence` so the `Inconclusive_HighConf` bucket + /// in the doc's table can fire. Phase 25's scoring pass uses this + /// flavour. + pub fn for_finding(diag: &Diag) -> Feasibility { + let verdict = diag.evidence.as_ref().and_then(|e| e.dynamic_verdict.as_ref()); + Self::bucket_from_verdict(verdict, diag.confidence) + } + + /// Discrete-bucket flavour of [`from_verdict`](Self::from_verdict). + /// Exposed for callers that want the bucket (e.g. for telemetry or + /// UI badges) before reducing to an `f32`. + pub fn bucket_from_verdict( + verdict: Option<&VerifyResult>, + static_confidence: Option, + ) -> Feasibility { + match verdict.map(|v| v.status) { + Some(VerifyStatus::Confirmed) => Feasibility::Confirmed, + Some(VerifyStatus::Inconclusive) + if static_confidence == Some(Confidence::High) => + { + Feasibility::InconclusiveHighConf + } + _ => Feasibility::Unverified, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::evidence::VerifyResult; + + fn verdict(status: VerifyStatus) -> VerifyResult { + VerifyResult { + finding_id: "f".into(), + status, + triggered_payload: None, + reason: None, + inconclusive_reason: None, + detail: None, + attempts: vec![], + toolchain_match: None, + differential: None, + } + } + + #[test] + fn confirmed_returns_one() { + let v = verdict(VerifyStatus::Confirmed); + assert_eq!(Feasibility::from_verdict(Some(&v)), 1.0); + } + + #[test] + fn inconclusive_without_confidence_returns_unverified() { + let v = verdict(VerifyStatus::Inconclusive); + assert_eq!(Feasibility::from_verdict(Some(&v)), 0.1); + } + + #[test] + fn inconclusive_with_high_confidence_returns_half() { + let v = verdict(VerifyStatus::Inconclusive); + let b = Feasibility::bucket_from_verdict(Some(&v), Some(Confidence::High)); + assert_eq!(b, Feasibility::InconclusiveHighConf); + assert_eq!(b.score(), 0.5); + } + + #[test] + fn not_confirmed_returns_unverified() { + let v = verdict(VerifyStatus::NotConfirmed); + assert_eq!(Feasibility::from_verdict(Some(&v)), 0.1); + } + + #[test] + fn unsupported_returns_unverified() { + let v = verdict(VerifyStatus::Unsupported); + assert_eq!(Feasibility::from_verdict(Some(&v)), 0.1); + } + + #[test] + fn no_verdict_returns_unverified() { + assert_eq!(Feasibility::from_verdict(None), 0.1); + } +} diff --git a/src/chain/impact.rs b/src/chain/impact.rs new file mode 100644 index 00000000..edcc9b44 --- /dev/null +++ b/src/chain/impact.rs @@ -0,0 +1,276 @@ +//! Phase 24 — impact lattice for the exploit-chain composer. +//! +//! Each [`ImpactRule`] is a `(source_cap, adjacent_cap, result)` triple +//! drawn from the design doc's lattice: +//! +//! | Rule | Result | +//! |-------------------------------|-------------------------| +//! | `CODE_EXEC` | `Rce` | +//! | `DESERIALIZE` | `Rce` | +//! | `SSRF` | `InternalNetworkAccess` | +//! | `OPEN_REDIRECT + UNAUTHORIZED_ID` | `SessionHijack` | +//! | `HEADER_INJECTION + CODE_EXEC` | `BrowserToLocalRce` | +//! | `FILE_IO + DATA_EXFIL` | `InfoDisclosure` | +//! +//! The doc spells some lattice nodes with surface-level handles +//! (`UserSession`, `Cors`, `NoAuth`, `LocalListener`, +//! `SensitiveFileIo`, `PathTraversal`). Those nodes do not map 1:1 +//! onto [`Cap`] bits, so the table above uses the closest [`Cap`] +//! approximations: +//! +//! - `UserSession` → [`Cap::UNAUTHORIZED_ID`] (request-bound caller +//! identifier carrier) +//! - `Cors + NoAuth` → [`Cap::HEADER_INJECTION`] (the CORS-relaxing +//! header is the structural marker; the no-auth side is folded into +//! Phase 25's surface-property check on [`crate::surface::EntryPoint::auth_required`]) +//! - `LocalListener` → no cap; folded into Phase 25's surface check +//! ([`crate::surface::DataStoreKind::Sql`] / +//! [`crate::surface::ExternalServiceKind::HttpApi`] etc.) +//! - `SensitiveFileIo` → [`Cap::DATA_EXFIL`] (egress-of-sensitive-data +//! carrier) +//! - `PathTraversal` → [`Cap::FILE_IO`] +//! +//! # Exhaustiveness +//! +//! Pattern-matching exhaustively on [`Cap`] is impossible — it is a +//! `bitflags!` struct over `u32`, not a closed enum. This module +//! adopts the [`crate::dynamic::corpus`] pattern instead: every Cap +//! bit belongs to exactly one of [`IMPACT_LATTICE_COVERED`] or +//! [`IMPACT_LATTICE_UNCOVERED`], with a const assertion that the +//! union equals [`Cap::all`]. Adding a new `Cap` bit without +//! updating one of those constants fails to compile. + +use crate::labels::Cap; +use serde::{Deserialize, Serialize}; + +/// Impact category produced by a successful chain composition. +/// +/// Phase 24 enumerates the categories the doc's lattice produces. +/// Phase 25's scoring pass attaches a severity to each category and +/// folds them into the final [`crate::chain::ChainGraph`] output. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ImpactCategory { + /// Remote code execution. + Rce, + /// Browser-mediated path to local code execution (e.g. permissive + /// CORS plus an unauthenticated endpoint that hands off to a + /// `CODE_EXEC` sink). + BrowserToLocalRce, + /// Session-token hijack via an attacker-controlled redirect that + /// keeps the user's auth identity in the request flow. + SessionHijack, + /// SSRF that lands on an internal/local listener. + InternalNetworkAccess, + /// Sensitive data egress through a path-traversal-like primitive. + InfoDisclosure, +} + +/// One rule in the impact lattice. +/// +/// `adjacent_cap` is `None` for self-sufficient rules +/// (`CODE_EXEC → Rce`, `DESERIALIZE → Rce`, `SSRF → InternalNetworkAccess`) +/// and `Some(cap)` for rules that need a second co-located finding +/// (`OPEN_REDIRECT + UNAUTHORIZED_ID → SessionHijack`, etc.). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ImpactRule { + pub source_cap: Cap, + pub adjacent_cap: Option, + pub result: ImpactCategory, +} + +/// The default impact lattice from the design doc. +/// +/// Order matters for [`lookup_impact`]: more specific rules +/// (`adjacent_cap.is_some()`) appear before the broader fallbacks so a +/// `CODE_EXEC + ...` finding pair is classified as +/// `BrowserToLocalRce` before the standalone `CODE_EXEC → Rce` +/// fallback fires. +pub static IMPACT_LATTICE: &[ImpactRule] = &[ + // ── 2-cap rules (most specific first) ───────────────────────── + ImpactRule { + source_cap: Cap::OPEN_REDIRECT, + adjacent_cap: Some(Cap::UNAUTHORIZED_ID), + result: ImpactCategory::SessionHijack, + }, + ImpactRule { + source_cap: Cap::HEADER_INJECTION, + adjacent_cap: Some(Cap::CODE_EXEC), + result: ImpactCategory::BrowserToLocalRce, + }, + ImpactRule { + source_cap: Cap::FILE_IO, + adjacent_cap: Some(Cap::DATA_EXFIL), + result: ImpactCategory::InfoDisclosure, + }, + // ── 1-cap rules ─────────────────────────────────────────────── + ImpactRule { + source_cap: Cap::CODE_EXEC, + adjacent_cap: None, + result: ImpactCategory::Rce, + }, + ImpactRule { + source_cap: Cap::DESERIALIZE, + adjacent_cap: None, + result: ImpactCategory::Rce, + }, + ImpactRule { + source_cap: Cap::SSRF, + adjacent_cap: None, + result: ImpactCategory::InternalNetworkAccess, + }, +]; + +/// Caps that participate in at least one impact rule (either as +/// `source_cap` or as `adjacent_cap`). Update when adding a rule. +pub const IMPACT_LATTICE_COVERED: u32 = Cap::CODE_EXEC.bits() + | Cap::DESERIALIZE.bits() + | Cap::SSRF.bits() + | Cap::OPEN_REDIRECT.bits() + | Cap::UNAUTHORIZED_ID.bits() + | Cap::HEADER_INJECTION.bits() + | Cap::FILE_IO.bits() + | Cap::DATA_EXFIL.bits(); + +/// Caps that do not participate in any impact rule today. Adding a +/// rule that consumes one of these caps requires moving it into +/// [`IMPACT_LATTICE_COVERED`] above. +pub const IMPACT_LATTICE_UNCOVERED: u32 = Cap::ENV_VAR.bits() + | Cap::HTML_ESCAPE.bits() + | Cap::SHELL_ESCAPE.bits() + | Cap::URL_ENCODE.bits() + | Cap::JSON_PARSE.bits() + | Cap::FMT_STRING.bits() + | Cap::SQL_QUERY.bits() + | Cap::CRYPTO.bits() + | Cap::LDAP_INJECTION.bits() + | Cap::XPATH_INJECTION.bits() + | Cap::SSTI.bits() + | Cap::XXE.bits() + | Cap::PROTOTYPE_POLLUTION.bits(); + +const _: () = assert!( + IMPACT_LATTICE_COVERED | IMPACT_LATTICE_UNCOVERED == Cap::all().bits(), + "Cap bit missing from impact lattice coverage; \ + add to IMPACT_LATTICE_COVERED or IMPACT_LATTICE_UNCOVERED and decide \ + whether it should participate in a chain rule", +); + +const _: () = assert!( + IMPACT_LATTICE_COVERED & IMPACT_LATTICE_UNCOVERED == 0, + "Cap bit appears in both IMPACT_LATTICE_COVERED and IMPACT_LATTICE_UNCOVERED", +); + +/// Look up an [`ImpactCategory`] for a (source, adjacent) cap pair. +/// +/// `adjacent` is `None` when the caller has not yet found a partner +/// finding. Returns the most-specific matching rule. +/// +/// Phase 25's path search calls this once per candidate path with the +/// path's primary and secondary caps; multiple cap matches choose the +/// first rule in [`IMPACT_LATTICE`] order (specific before fallback). +pub fn lookup_impact(source: Cap, adjacent: Option) -> Option { + // First pass: exact source + matching adjacency (or both ways). + if let Some(adj) = adjacent { + for rule in IMPACT_LATTICE { + if let Some(rule_adj) = rule.adjacent_cap { + let direct = rule.source_cap == source && rule_adj == adj; + let swapped = rule.source_cap == adj && rule_adj == source; + if direct || swapped { + return Some(rule.result); + } + } + } + } + // Second pass: standalone rule on source_cap. + for rule in IMPACT_LATTICE { + if rule.adjacent_cap.is_none() && rule.source_cap == source { + return Some(rule.result); + } + } + // Third pass: if `adjacent` is given but the pair didn't hit, + // try the standalone rule on adjacent_cap so a CODE_EXEC + UNRELATED + // pair still reaches `Rce`. + if let Some(adj) = adjacent { + for rule in IMPACT_LATTICE { + if rule.adjacent_cap.is_none() && rule.source_cap == adj { + return Some(rule.result); + } + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn cmdi_alone_maps_to_rce() { + assert_eq!( + lookup_impact(Cap::CODE_EXEC, None), + Some(ImpactCategory::Rce) + ); + } + + #[test] + fn deserialize_alone_maps_to_rce() { + assert_eq!( + lookup_impact(Cap::DESERIALIZE, None), + Some(ImpactCategory::Rce) + ); + } + + #[test] + fn ssrf_alone_maps_to_internal_network_access() { + assert_eq!( + lookup_impact(Cap::SSRF, None), + Some(ImpactCategory::InternalNetworkAccess) + ); + } + + #[test] + fn open_redirect_plus_user_session_maps_to_session_hijack() { + assert_eq!( + lookup_impact(Cap::OPEN_REDIRECT, Some(Cap::UNAUTHORIZED_ID)), + Some(ImpactCategory::SessionHijack) + ); + // Argument order should not matter. + assert_eq!( + lookup_impact(Cap::UNAUTHORIZED_ID, Some(Cap::OPEN_REDIRECT)), + Some(ImpactCategory::SessionHijack) + ); + } + + #[test] + fn cors_plus_codeexec_maps_to_browser_local_rce() { + assert_eq!( + lookup_impact(Cap::HEADER_INJECTION, Some(Cap::CODE_EXEC)), + Some(ImpactCategory::BrowserToLocalRce) + ); + } + + #[test] + fn path_traversal_plus_sensitive_io_maps_to_info_disclosure() { + assert_eq!( + lookup_impact(Cap::FILE_IO, Some(Cap::DATA_EXFIL)), + Some(ImpactCategory::InfoDisclosure) + ); + } + + #[test] + fn unknown_cap_returns_none() { + assert_eq!(lookup_impact(Cap::HTML_ESCAPE, None), None); + assert_eq!(lookup_impact(Cap::CRYPTO, None), None); + } + + #[test] + fn pair_with_uncovered_adjacency_falls_through_to_standalone() { + // CODE_EXEC + CRYPTO: CRYPTO has no rule, so we fall back to + // the standalone CODE_EXEC → Rce rule. + assert_eq!( + lookup_impact(Cap::CODE_EXEC, Some(Cap::CRYPTO)), + Some(ImpactCategory::Rce) + ); + } +} diff --git a/src/chain/mod.rs b/src/chain/mod.rs new file mode 100644 index 00000000..ce5d21b0 --- /dev/null +++ b/src/chain/mod.rs @@ -0,0 +1,129 @@ +//! Phase 24 — exploit-chain composer scaffolding (Track G.1). +//! +//! A `ChainGraph` is the small intermediate representation the chain +//! composer walks between two pre-existing artefacts: the flat list of +//! per-finding [`Diag`](crate::commands::scan::Diag)s produced by the +//! static analyser and the [`SurfaceMap`](crate::surface::SurfaceMap) +//! produced by Track F. +//! +//! Phase 24 ships the types only. The implicit-attacker node and the +//! bounded DFS that walks edges into [`ChainFinding`]s land in Phase 25 +//! (`src/chain/search.rs`); composite re-verification lands in Phase 26 +//! (`src/chain/reverify.rs`). +//! +//! # Storage shape +//! +//! Two parallel `Vec`s — `nodes` and `edges` — mirroring `SurfaceMap`'s +//! shape. Determinism is the caller's responsibility: edges are +//! produced in the order the source [`Diag`] slice presents, and +//! `findings_to_edges` does not sort the input. Phase 25 will fold +//! these into a `petgraph::DiGraph` for path search. +//! +//! # Lattice exhaustiveness +//! +//! [`impact`] keeps a `IMPACT_LATTICE_COVERED | IMPACT_LATTICE_UNCOVERED +//! == Cap::all().bits()` const assertion, mirroring the +//! `CORPUS_SUPPORTED | CORPUS_UNSUPPORTED == Cap::all().bits()` pattern +//! in [`crate::dynamic::corpus`]. Adding a new `Cap` bit without +//! updating the lattice fails to compile. + +use crate::entry_points::HttpMethod; +use crate::labels::Cap; +use crate::surface::SourceLocation; +use serde::{Deserialize, Serialize}; + +pub mod edges; +pub mod feasibility; +pub mod impact; + +pub use edges::{ChainEdge, FindingRef, findings_to_edges}; +pub use feasibility::Feasibility; +pub use impact::{IMPACT_LATTICE, ImpactCategory, ImpactRule, lookup_impact}; + +/// One node in a [`ChainGraph`]. +/// +/// `Entry` and `Sink` nodes are translated 1:1 from the SurfaceMap's +/// [`crate::surface::SurfaceNode::EntryPoint`] and +/// [`crate::surface::SurfaceNode::DangerousLocal`] variants. `Finding` +/// nodes wrap a static [`Diag`](crate::commands::scan::Diag) so a path +/// from an entry to a sink can pin which finding witnesses each hop. +/// Phase 25's path search treats the implicit attacker as a virtual +/// predecessor of every `Entry`; there is no explicit `Attacker` +/// variant on this enum. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "node", rename_all = "snake_case")] +pub enum ChainNode { + /// A web entry-point lifted from the SurfaceMap. + Entry { + location: SourceLocation, + method: HttpMethod, + route: String, + auth_required: bool, + }, + /// A static finding produced by the analyser. + Finding(FindingRef), + /// A dangerous-local sink lifted from the SurfaceMap. + Sink { + location: SourceLocation, + function_name: String, + cap_bits: u32, + }, +} + +impl ChainNode { + /// Source location of this node. Used for byte-deterministic + /// ordering and for the `nyx surface`-style human display. + pub fn location(&self) -> &SourceLocation { + match self { + ChainNode::Entry { location, .. } => location, + ChainNode::Finding(f) => &f.location, + ChainNode::Sink { location, .. } => location, + } + } + + /// Cap bitmask carried by this node, or `0` for entry nodes. Used + /// by Phase 25 to discriminate which [`ImpactRule`] a path matches. + pub fn cap_bits(&self) -> u32 { + match self { + ChainNode::Entry { .. } => 0, + ChainNode::Finding(f) => f.cap_bits, + ChainNode::Sink { cap_bits, .. } => *cap_bits, + } + } +} + +/// The full chain graph. Phase 24 only exposes the types; the +/// composer that fills the vectors lands in Phase 25. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +pub struct ChainGraph { + pub nodes: Vec, + pub edges: Vec, +} + +impl ChainGraph { + pub fn new() -> Self { + Self::default() + } + + pub fn node_count(&self) -> usize { + self.nodes.len() + } + + pub fn edge_count(&self) -> usize { + self.edges.len() + } +} + +/// Convert a primary [`Cap`] bit into the closest matching impact +/// category in isolation (no adjacency). Returns `None` when the cap +/// has no terminal interpretation on its own — chain composition needs +/// an additional cap or surface property to lift it. +/// +/// Phase 25's path-search code calls this as a fast-path before +/// consulting the full [`IMPACT_LATTICE`]. +pub fn standalone_impact(cap: Cap) -> Option { + IMPACT_LATTICE + .iter() + .find(|rule| rule.source_cap == cap && rule.adjacent_cap.is_none()) + .map(|rule| rule.result) +} diff --git a/src/lib.rs b/src/lib.rs index adbd3ec3..bd9e5c68 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -95,6 +95,7 @@ pub mod baseline; pub mod callgraph; pub mod cfg; pub mod cfg_analysis; +pub mod chain; pub mod cli; pub mod commands; pub mod constraint; diff --git a/tests/chain_edges.rs b/tests/chain_edges.rs new file mode 100644 index 00000000..05e80301 --- /dev/null +++ b/tests/chain_edges.rs @@ -0,0 +1,194 @@ +//! Phase 24 acceptance: each impact-lattice rule fires on a synthetic +//! finding + SurfaceMap pair. +//! +//! Mirrors the test plan in `.pitboss/play/plan.md` (Phase 24): +//! "Tests: `tests/chain_edges.rs` covers each impact rule on a +//! synthetic SurfaceMap." Each `#[test]` builds the minimal Diag(s) +//! that should trigger one rule, runs `findings_to_edges`, then +//! confirms that the resulting edge's primary cap (plus, where the +//! rule needs adjacency, a second edge's cap) classifies through +//! `lookup_impact` to the expected `ImpactCategory`. +//! +//! Lattice (from the design doc, paraphrased — Cap approximations +//! documented in `src/chain/impact.rs`): +//! +//! | Static caps | Impact | +//! |--------------------------------------|-------------------------| +//! | `CODE_EXEC` | `Rce` | +//! | `DESERIALIZE` | `Rce` | +//! | `SSRF` | `InternalNetworkAccess` | +//! | `OPEN_REDIRECT + UNAUTHORIZED_ID` | `SessionHijack` | +//! | `HEADER_INJECTION + CODE_EXEC` | `BrowserToLocalRce` | +//! | `FILE_IO + DATA_EXFIL` | `InfoDisclosure` | + +use nyx_scanner::chain::edges::{ChainEdge, Reach, findings_to_edges}; +use nyx_scanner::chain::feasibility::Feasibility; +use nyx_scanner::chain::impact::{ImpactCategory, lookup_impact}; +use nyx_scanner::commands::scan::Diag; +use nyx_scanner::entry_points::HttpMethod; +use nyx_scanner::evidence::{Confidence, Evidence}; +use nyx_scanner::labels::Cap; +use nyx_scanner::patterns::{FindingCategory, Severity}; +use nyx_scanner::surface::{EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode}; + +fn diag_with_caps(path: &str, line: usize, caps: Cap) -> Diag { + Diag { + path: path.into(), + line, + col: 1, + severity: Severity::High, + id: "taint-test".into(), + category: FindingCategory::Security, + path_validated: false, + guard_kind: None, + message: None, + labels: vec![], + confidence: Some(Confidence::Medium), + evidence: Some(Evidence { + sink_caps: caps.bits(), + ..Evidence::default() + }), + rank_score: None, + rank_reason: None, + suppressed: false, + suppression: None, + rollup: None, + finding_id: String::new(), + alternative_finding_ids: vec![], + stable_hash: 0, + } +} + +fn synthetic_surface(handler_file: &str, route: &str) -> SurfaceMap { + let mut m = SurfaceMap::new(); + m.nodes.push(SurfaceNode::EntryPoint(EntryPoint { + location: SourceLocation::new(handler_file, 1, 1), + framework: Framework::Flask, + method: HttpMethod::GET, + route: route.into(), + handler_name: "handler".into(), + handler_location: SourceLocation::new(handler_file, 2, 1), + auth_required: false, + })); + m +} + +fn single_edge(diag: Diag, surface: &SurfaceMap) -> ChainEdge { + let mut edges = findings_to_edges(&[diag], surface); + assert_eq!(edges.len(), 1, "expected exactly one edge"); + edges.pop().unwrap() +} + +#[test] +fn rule_cmdi_alone_maps_to_rce() { + let surface = synthetic_surface("app.py", "/run"); + let edge = single_edge( + diag_with_caps("app.py", 12, Cap::CODE_EXEC), + &surface, + ); + assert_eq!(edge.primary_cap, Cap::CODE_EXEC); + assert!(matches!(edge.reach, Reach::Reachable { .. })); + assert_eq!( + lookup_impact(edge.primary_cap, None), + Some(ImpactCategory::Rce) + ); +} + +#[test] +fn rule_deserialize_alone_maps_to_rce() { + let surface = synthetic_surface("app.py", "/load"); + let edge = single_edge( + diag_with_caps("app.py", 7, Cap::DESERIALIZE), + &surface, + ); + assert_eq!(edge.primary_cap, Cap::DESERIALIZE); + assert_eq!( + lookup_impact(edge.primary_cap, None), + Some(ImpactCategory::Rce) + ); +} + +#[test] +fn rule_ssrf_alone_maps_to_internal_network_access() { + let surface = synthetic_surface("fetch.py", "/proxy"); + let edge = single_edge( + diag_with_caps("fetch.py", 4, Cap::SSRF), + &surface, + ); + assert_eq!(edge.primary_cap, Cap::SSRF); + assert_eq!( + lookup_impact(edge.primary_cap, None), + Some(ImpactCategory::InternalNetworkAccess) + ); +} + +#[test] +fn rule_open_redirect_plus_user_session_maps_to_session_hijack() { + let surface = synthetic_surface("auth.py", "/login"); + let redirect = diag_with_caps("auth.py", 11, Cap::OPEN_REDIRECT); + let user_id = diag_with_caps("auth.py", 18, Cap::UNAUTHORIZED_ID); + let edges = findings_to_edges(&[redirect, user_id], &surface); + assert_eq!(edges.len(), 2); + let caps: Vec = edges.iter().map(|e| e.primary_cap).collect(); + assert!(caps.contains(&Cap::OPEN_REDIRECT)); + assert!(caps.contains(&Cap::UNAUTHORIZED_ID)); + assert_eq!( + lookup_impact(Cap::OPEN_REDIRECT, Some(Cap::UNAUTHORIZED_ID)), + Some(ImpactCategory::SessionHijack) + ); +} + +#[test] +fn rule_cors_plus_codeexec_maps_to_browser_local_rce() { + let surface = synthetic_surface("api.py", "/exec"); + let cors = diag_with_caps("api.py", 3, Cap::HEADER_INJECTION); + let code = diag_with_caps("api.py", 14, Cap::CODE_EXEC); + let edges = findings_to_edges(&[cors, code], &surface); + assert_eq!(edges.len(), 2); + assert_eq!( + lookup_impact(Cap::HEADER_INJECTION, Some(Cap::CODE_EXEC)), + Some(ImpactCategory::BrowserToLocalRce) + ); +} + +#[test] +fn rule_path_traversal_plus_sensitive_io_maps_to_info_disclosure() { + let surface = synthetic_surface("files.py", "/download"); + let trav = diag_with_caps("files.py", 5, Cap::FILE_IO); + let exfil = diag_with_caps("files.py", 9, Cap::DATA_EXFIL); + let edges = findings_to_edges(&[trav, exfil], &surface); + assert_eq!(edges.len(), 2); + assert_eq!( + lookup_impact(Cap::FILE_IO, Some(Cap::DATA_EXFIL)), + Some(ImpactCategory::InfoDisclosure) + ); +} + +#[test] +fn findings_without_sink_caps_are_dropped() { + let surface = synthetic_surface("a.py", "/"); + let mut d = diag_with_caps("a.py", 1, Cap::CODE_EXEC); + d.evidence.as_mut().unwrap().sink_caps = 0; + let edges = findings_to_edges(&[d], &surface); + assert!(edges.is_empty()); +} + +#[test] +fn finding_in_file_with_no_entry_point_is_unreachable() { + let surface = synthetic_surface("app.py", "/"); + let edge = single_edge( + diag_with_caps("internal_helper.py", 1, Cap::CODE_EXEC), + &surface, + ); + assert!(matches!(edge.reach, Reach::Unreachable)); +} + +#[test] +fn feasibility_defaults_to_unverified() { + let surface = synthetic_surface("app.py", "/"); + let edge = single_edge( + diag_with_caps("app.py", 1, Cap::CODE_EXEC), + &surface, + ); + assert_eq!(edge.feasibility, Feasibility::Unverified); +}