mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
358 lines
14 KiB
Rust
358 lines
14 KiB
Rust
//! Phase 24 — convert per-finding [`Diag`]s into chain-graph edges.
|
|
//!
|
|
//! Each call to [`findings_to_edges`] emits exactly one [`ChainEdge`]
|
|
//! per input finding. The edge is *typed* by:
|
|
//!
|
|
//! - the primary [`Cap`] bit picked from [`Evidence::sink_caps`](crate::evidence::Evidence::sink_caps)
|
|
//! (the lowest-bit set, chosen deterministically), and
|
|
//! - the *reach* — the surface [`EntryPoint`](crate::surface::EntryPoint) in the same file as the
|
|
//! finding, when one exists, otherwise [`Reach::Unreachable`].
|
|
//!
|
|
//! Phase 25's path search composes these edges with the SurfaceMap's
|
|
//! `Reaches` edges into full chains. Phase 24 does not run any path
|
|
//! search or do call-graph traversal: edges are emitted at finding
|
|
//! granularity and carry only the file-local reach hint.
|
|
|
|
use crate::callgraph::FileReachMap;
|
|
use crate::commands::scan::Diag;
|
|
use crate::entry_points::HttpMethod;
|
|
use crate::labels::Cap;
|
|
use crate::surface::{SourceLocation, SurfaceMap, SurfaceNode};
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use super::feasibility::Feasibility;
|
|
use super::impact::lookup_impact;
|
|
|
|
/// Compact reference to a static finding embedded in a [`ChainEdge`].
|
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub struct FindingRef {
|
|
/// Stable finding ID (matches [`Diag::finding_id`] when present).
|
|
pub finding_id: String,
|
|
/// Stable 64-bit hash from [`Diag::stable_hash`]. Zero when the
|
|
/// finding has not been hashed yet.
|
|
pub stable_hash: u64,
|
|
/// Source location of the sink.
|
|
pub location: SourceLocation,
|
|
/// Rule identifier (`Diag::id`).
|
|
pub rule_id: String,
|
|
/// Resolved sink cap bits ([`Evidence::sink_caps`](crate::evidence::Evidence::sink_caps)).
|
|
pub cap_bits: u32,
|
|
}
|
|
|
|
/// Whether the finding lands inside an externally-reachable surface
|
|
/// entry-point. Phase 24 only resolves *file-local* reach: a finding
|
|
/// in `app/views.py` is treated as reachable if any
|
|
/// [`EntryPoint`](crate::surface::EntryPoint) declares a handler in
|
|
/// that same file. Phase 25 will fold the call graph in.
|
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
#[serde(tag = "reach", rename_all = "snake_case")]
|
|
pub enum Reach {
|
|
/// Finding is in a file that hosts at least one entry-point.
|
|
/// `route` and `method` describe the first matching entry-point
|
|
/// (surface-canonical order).
|
|
Reachable {
|
|
location: SourceLocation,
|
|
method: HttpMethod,
|
|
route: String,
|
|
auth_required: bool,
|
|
},
|
|
/// Finding is in a file with no surface entry-points.
|
|
Unreachable,
|
|
}
|
|
|
|
/// One edge in the chain graph.
|
|
///
|
|
/// Phase 24's edges live at the granularity of a single finding.
|
|
/// Phase 25 will introduce additional edge kinds (entry → finding,
|
|
/// finding → sink-cluster, etc.) once path search is wired up.
|
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
|
pub struct ChainEdge {
|
|
pub finding: FindingRef,
|
|
/// Primary cap classification. Picked via [`pick_chain_cap`]: when
|
|
/// several cap bits are set, prefers a bit that has a standalone
|
|
/// rule in [`crate::chain::impact::IMPACT_LATTICE`] over the
|
|
/// lowest bit so a `SQL_QUERY | CODE_EXEC` finding lands on the
|
|
/// chain-relevant cap (`CODE_EXEC`). Falls back to the lowest set
|
|
/// bit when no bit has a standalone rule, keeping single-cap
|
|
/// findings deterministic.
|
|
pub primary_cap: Cap,
|
|
/// Where the finding sits relative to the surface.
|
|
pub reach: Reach,
|
|
/// Phase 25 path-score factor.
|
|
pub feasibility: Feasibility,
|
|
}
|
|
|
|
/// Convert each [`Diag`] to one [`ChainEdge`].
|
|
///
|
|
/// Findings without cap bits (`Diag::evidence.sink_caps == 0`) are
|
|
/// dropped — the chain composer cannot classify them on a typed
|
|
/// lattice and Phase 25's scoring expects every edge to expose a
|
|
/// primary cap. This is a deliberate quiet-drop: such findings are
|
|
/// usually structural CFG diagnostics (e.g. `cfg-auth-gap`) whose
|
|
/// chain participation is modelled by the SurfaceMap's
|
|
/// `AuthRequiredOn` edges instead.
|
|
///
|
|
/// The output order mirrors `findings`; the caller is responsible for
|
|
/// any further canonicalisation.
|
|
pub fn findings_to_edges(findings: &[Diag], surface: &SurfaceMap) -> Vec<ChainEdge> {
|
|
findings_to_edges_with_reach(findings, surface, None)
|
|
}
|
|
|
|
/// Like [`findings_to_edges`] but optionally consults a [`FileReachMap`]
|
|
/// to widen `Reach::Reachable` beyond the file-local match.
|
|
///
|
|
/// When `reach` is `Some`, a finding's enclosing file is also considered
|
|
/// `Reachable` whenever any [`SurfaceNode::EntryPoint`]'s
|
|
/// `handler_location.file` transitively reaches the finding's file via
|
|
/// the call graph. The first matching entry-point (surface-canonical
|
|
/// order) is used to populate the `route` / `method` / `auth_required`
|
|
/// fields.
|
|
///
|
|
/// `reach = None` is byte-identical to the legacy [`findings_to_edges`]
|
|
/// behaviour. Path strings on both sides must use the same convention
|
|
/// (project-relative POSIX) for the widening to fire; mismatched paths
|
|
/// silently fall through to the file-local heuristic.
|
|
pub fn findings_to_edges_with_reach(
|
|
findings: &[Diag],
|
|
surface: &SurfaceMap,
|
|
reach: Option<&FileReachMap>,
|
|
) -> Vec<ChainEdge> {
|
|
findings
|
|
.iter()
|
|
.filter_map(|d| build_edge(d, surface, reach))
|
|
.collect()
|
|
}
|
|
|
|
fn build_edge(
|
|
diag: &Diag,
|
|
surface: &SurfaceMap,
|
|
reach: Option<&FileReachMap>,
|
|
) -> Option<ChainEdge> {
|
|
let evidence = diag.evidence.as_ref()?;
|
|
if evidence.sink_caps == 0 {
|
|
return None;
|
|
}
|
|
let cap_bits = evidence.sink_caps;
|
|
let primary_cap = pick_chain_cap(cap_bits)?;
|
|
let location = SourceLocation::new(diag.path.clone(), diag.line as u32, diag.col as u32);
|
|
let reach_kind = locate_reach(&location, surface, reach);
|
|
let feasibility = Feasibility::for_finding(diag);
|
|
let finding = FindingRef {
|
|
finding_id: diag.finding_id.clone(),
|
|
stable_hash: diag.stable_hash,
|
|
location,
|
|
rule_id: diag.id.clone(),
|
|
cap_bits,
|
|
};
|
|
Some(ChainEdge {
|
|
finding,
|
|
primary_cap,
|
|
reach: reach_kind,
|
|
feasibility,
|
|
})
|
|
}
|
|
|
|
/// Return the lowest single-bit [`Cap`] present in `bits`, or `None`
|
|
/// when `bits == 0`. Deterministic: always picks the lowest bit.
|
|
pub fn lowest_cap(bits: u32) -> Option<Cap> {
|
|
if bits == 0 {
|
|
return None;
|
|
}
|
|
let lowest = 1u32 << bits.trailing_zeros();
|
|
Cap::from_bits(lowest)
|
|
}
|
|
|
|
/// Pick the chain-relevant [`Cap`] from a sink-cap bitmask.
|
|
///
|
|
/// When multiple caps are set, prefer one that has a standalone rule in
|
|
/// [`crate::chain::impact::IMPACT_LATTICE`] (e.g. `CODE_EXEC`,
|
|
/// `DESERIALIZE`, `SSRF`) over the lowest set bit. A finding with
|
|
/// `sink_caps = SQL_QUERY | CODE_EXEC` previously resolved to
|
|
/// `SQL_QUERY` (the lowest bit) and missed the `CODE_EXEC → Rce`
|
|
/// lattice rule; this helper resolves it to `CODE_EXEC` instead.
|
|
///
|
|
/// Iterates bits low to high so ties between caps with standalone
|
|
/// rules stay deterministic. Falls back to [`lowest_cap`] when no
|
|
/// bit has a standalone rule, preserving single-cap behaviour.
|
|
pub fn pick_chain_cap(bits: u32) -> Option<Cap> {
|
|
if bits == 0 {
|
|
return None;
|
|
}
|
|
let mut remaining = bits;
|
|
while remaining != 0 {
|
|
let bit = 1u32 << remaining.trailing_zeros();
|
|
if let Some(cap) = Cap::from_bits(bit)
|
|
&& lookup_impact(cap, None).is_some()
|
|
{
|
|
return Some(cap);
|
|
}
|
|
remaining &= !bit;
|
|
}
|
|
lowest_cap(bits)
|
|
}
|
|
|
|
fn locate_reach(loc: &SourceLocation, surface: &SurfaceMap, reach: Option<&FileReachMap>) -> Reach {
|
|
// Within each pass, prefer an *unauthenticated* entry-point over an
|
|
// auth-gated one: the chain composer scores worst-case exposure, and
|
|
// taking the first match used to under-report whenever an auth-gated
|
|
// route happened to sort first in the same file.
|
|
let pick = |matches_entry: &dyn Fn(&crate::surface::EntryPoint) -> bool| -> Option<Reach> {
|
|
let mut best: Option<&crate::surface::EntryPoint> = None;
|
|
for node in &surface.nodes {
|
|
if let SurfaceNode::EntryPoint(ep) = node
|
|
&& matches_entry(ep)
|
|
{
|
|
if !ep.auth_required {
|
|
best = Some(ep);
|
|
break;
|
|
}
|
|
best.get_or_insert(ep);
|
|
}
|
|
}
|
|
best.map(|ep| Reach::Reachable {
|
|
location: ep.location.clone(),
|
|
method: ep.method,
|
|
route: ep.route.clone(),
|
|
auth_required: ep.auth_required,
|
|
})
|
|
};
|
|
// Pass 1: file-local match (legacy behaviour, always applies).
|
|
if let Some(found) = pick(&|ep| ep.handler_location.file == loc.file) {
|
|
return found;
|
|
}
|
|
// Pass 2: transitive caller match via the call graph. Only fires
|
|
// when `reach` is supplied — keeps the legacy file-local behaviour
|
|
// for callers that have not yet wired the call-graph reach map.
|
|
if let Some(reach) = reach
|
|
&& let Some(found) = pick(&|ep| reach.reaches(&ep.handler_location.file, &loc.file))
|
|
{
|
|
return found;
|
|
}
|
|
Reach::Unreachable
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::commands::scan::Diag;
|
|
use crate::evidence::Evidence;
|
|
use crate::patterns::FindingCategory;
|
|
|
|
fn diag_with_cap(path: &str, line: usize, caps: Cap) -> Diag {
|
|
let ev = Evidence {
|
|
sink_caps: caps.bits(),
|
|
..Evidence::default()
|
|
};
|
|
Diag {
|
|
path: path.into(),
|
|
line,
|
|
col: 1,
|
|
id: "test-rule".into(),
|
|
category: FindingCategory::Security,
|
|
evidence: Some(ev),
|
|
..Diag::default()
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn lowest_cap_picks_least_significant_bit() {
|
|
let combined = Cap::SQL_QUERY | Cap::FILE_IO;
|
|
assert_eq!(lowest_cap(combined.bits()), Some(Cap::FILE_IO));
|
|
}
|
|
|
|
#[test]
|
|
fn pick_chain_cap_prefers_standalone_rule_cap() {
|
|
// SQL_QUERY (bit 7) has no standalone lattice rule; CODE_EXEC
|
|
// (bit 10) does. Lowest-bit alone would pick SQL_QUERY.
|
|
let combined = Cap::SQL_QUERY | Cap::CODE_EXEC;
|
|
assert_eq!(pick_chain_cap(combined.bits()), Some(Cap::CODE_EXEC));
|
|
}
|
|
|
|
#[test]
|
|
fn pick_chain_cap_falls_back_to_lowest_when_no_standalone_rule() {
|
|
// SQL_QUERY + FILE_IO: neither has a standalone rule, fall
|
|
// back to lowest_cap behaviour.
|
|
let combined = Cap::SQL_QUERY | Cap::FILE_IO;
|
|
assert_eq!(pick_chain_cap(combined.bits()), Some(Cap::FILE_IO));
|
|
}
|
|
|
|
#[test]
|
|
fn pick_chain_cap_single_bit_unchanged() {
|
|
assert_eq!(pick_chain_cap(Cap::CODE_EXEC.bits()), Some(Cap::CODE_EXEC));
|
|
assert_eq!(pick_chain_cap(Cap::SQL_QUERY.bits()), Some(Cap::SQL_QUERY));
|
|
assert_eq!(pick_chain_cap(0), None);
|
|
}
|
|
|
|
#[test]
|
|
fn drops_findings_without_cap_bits() {
|
|
let mut d = diag_with_cap("a.py", 1, Cap::CODE_EXEC);
|
|
d.evidence.as_mut().unwrap().sink_caps = 0;
|
|
let edges = findings_to_edges(&[d], &SurfaceMap::new());
|
|
assert!(edges.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn reach_unreachable_without_matching_entry_point() {
|
|
let d = diag_with_cap("orphan.py", 2, Cap::CODE_EXEC);
|
|
let edges = findings_to_edges(&[d], &SurfaceMap::new());
|
|
assert_eq!(edges.len(), 1);
|
|
assert!(matches!(edges[0].reach, Reach::Unreachable));
|
|
}
|
|
|
|
/// Cross-file finding becomes Reachable when the call-graph reach
|
|
/// map records a transitive caller in the entry-point's file.
|
|
#[test]
|
|
fn reach_widens_with_file_reach_map() {
|
|
use crate::callgraph::{FileReachMap, build_call_graph};
|
|
use crate::entry_points::HttpMethod;
|
|
use crate::summary::{FuncSummary, merge_summaries};
|
|
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
|
|
|
// routes.py::handle -> helper.py::sink
|
|
let handle = FuncSummary {
|
|
name: "handle".into(),
|
|
file_path: "routes.py".into(),
|
|
lang: "python".into(),
|
|
param_count: 0,
|
|
callees: vec![crate::summary::CalleeSite::bare("sink")],
|
|
..Default::default()
|
|
};
|
|
let sink = FuncSummary {
|
|
name: "sink".into(),
|
|
file_path: "helper.py".into(),
|
|
lang: "python".into(),
|
|
param_count: 0,
|
|
..Default::default()
|
|
};
|
|
let gs = merge_summaries(vec![handle, sink], None);
|
|
let cg = build_call_graph(&gs, &[]);
|
|
let reach = FileReachMap::build(&cg);
|
|
|
|
let mut surface = SurfaceMap::new();
|
|
surface.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
|
|
location: SourceLocation::new("routes.py", 1, 1),
|
|
framework: Framework::Flask,
|
|
method: HttpMethod::GET,
|
|
route: "/".into(),
|
|
handler_name: "handle".into(),
|
|
handler_location: SourceLocation::new("routes.py", 2, 1),
|
|
auth_required: false,
|
|
}));
|
|
|
|
let d = diag_with_cap("helper.py", 10, Cap::CODE_EXEC);
|
|
|
|
// Without reach: file-local lookup leaves the finding Unreachable.
|
|
let edges = findings_to_edges(std::slice::from_ref(&d), &surface);
|
|
assert!(matches!(edges[0].reach, Reach::Unreachable));
|
|
|
|
// With reach: transitive caller in `routes.py` lifts to Reachable.
|
|
let edges = findings_to_edges_with_reach(&[d], &surface, Some(&reach));
|
|
match &edges[0].reach {
|
|
Reach::Reachable { route, method, .. } => {
|
|
assert_eq!(route, "/");
|
|
assert_eq!(*method, HttpMethod::GET);
|
|
}
|
|
other => panic!("expected Reachable, got {other:?}"),
|
|
}
|
|
}
|
|
}
|