This commit is contained in:
Eli Peter 2026-06-05 10:16:30 -05:00 committed by GitHub
parent 55247b7fcd
commit 991c84a1eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
1464 changed files with 225448 additions and 1985 deletions

View file

@ -3,7 +3,6 @@
//! Tracks inclusive `[lo, hi]` integer bounds. `None` = unbounded (−∞ or +∞).
//! Both `None` = Top (any integer). Provides arithmetic transfer functions
//! (add, sub, mul, div, mod) with overflow-safe semantics.
#![allow(clippy::collapsible_if)]
use crate::state::lattice::{AbstractDomain, Lattice};
use serde::{Deserialize, Serialize};

View file

@ -102,6 +102,7 @@ fn parse_timeout_diag(path: &Path, timeout_ms: u64) -> Diag {
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
stable_hash: 0,
}
}
@ -234,10 +235,17 @@ fn build_taint_diag(
.map(sanitize_desc)
})
.unwrap_or_else(|| "(unknown)".into());
// Sink-callee attribution: when the sink node is an *argument* of a call
// (e.g. PHP `header("location: " . $_GET['x'])` — the `$_GET[...]` subscript
// carries `callee = "$_GET"` but `outer_callee = "header"`), the enclosing
// call is the real sink and should be displayed, not the source token.
// `outer_callee` is only populated for nested/argument positions, so for a
// plain call node it is None and we fall back to the node's own callee.
let call_site_callee = cfg_graph[finding.sink]
.call
.callee
.outer_callee
.as_deref()
.or(cfg_graph[finding.sink].call.callee.as_deref())
.map(sanitize_desc)
.unwrap_or_else(|| "(unknown)".into());
let kind_label = source_kind_label(finding.source_kind);
@ -706,6 +714,7 @@ fn build_taint_diag(
rollup: None,
finding_id: finding.finding_id.clone(),
alternative_finding_ids: finding.alternative_finding_ids.to_vec(),
stable_hash: 0,
};
// Post-fill explanation and confidence limiters
@ -779,6 +788,35 @@ fn lang_for_path(path: &Path) -> Option<(Language, &'static str)> {
}
}
/// All language slugs the scanner can parse, paired with the file extensions
/// that map to them. Single source of truth shared with [`lang_for_path`]; the
/// `supported_extensions_resolve_to_their_slug` test asserts they stay in sync.
pub(crate) const SUPPORTED_LANGUAGE_EXTENSIONS: &[(&str, &[&str])] = &[
("rust", &["rs"]),
("c", &["c"]),
(
"cpp",
&["cpp", "cc", "cxx", "c++", "hpp", "hxx", "hh", "h++"],
),
("java", &["java"]),
("go", &["go"]),
("php", &["php"]),
("python", &["py"]),
("typescript", &["ts", "tsx"]),
("javascript", &["js", "jsx"]),
("ruby", &["rb"]),
];
/// File extensions associated with a language slug (case-insensitive). Returns
/// an empty slice if `slug` is not a supported language.
pub fn extensions_for_lang(slug: &str) -> &'static [&'static str] {
SUPPORTED_LANGUAGE_EXTENSIONS
.iter()
.find(|(s, _)| s.eq_ignore_ascii_case(slug))
.map(|(_, exts)| *exts)
.unwrap_or(&[])
}
/// Fast binary-file guard: skip if >1% NUL bytes.
fn is_binary(bytes: &[u8]) -> bool {
bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1
@ -965,9 +1003,11 @@ fn is_test_suppressible_pattern(id: &str) -> bool {
// deterministic test data, insecure RNG used for fixture seeding.
id.ends_with(".secrets.hardcoded_secret")
|| id.ends_with(".secrets.hardcoded_key")
|| id.ends_with(".crypto.hardcoded_key")
|| id.ends_with(".crypto.math_random")
|| id.ends_with(".crypto.insecure_random")
|| id.ends_with(".crypto.weak_digest")
|| id.ends_with(".crypto.weak_algorithm")
|| id.ends_with(".crypto.md5")
|| id.ends_with(".crypto.sha1")
|| id.ends_with(".crypto.rand")
@ -1041,9 +1081,7 @@ fn downgrade_severity(s: Severity) -> Severity {
}
}
// ─────────────────────────────────────────────────────────────────────────────
// ParsedSource + ParsedFile: shared parse/CFG pipeline
// ─────────────────────────────────────────────────────────────────────────────
/// Level 1: parsed tree + lang info. No CFG construction.
struct ParsedSource<'a> {
@ -1363,6 +1401,7 @@ impl<'a> ParsedSource<'a> {
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
stable_hash: 0,
});
}
}
@ -1890,7 +1929,6 @@ impl<'a> ParsedFile<'a> {
cfg: &body.graph,
entry: body.entry,
lang: caller_lang,
file_path: &self.source.file_path_str,
source_bytes: self.source.bytes,
func_summaries: self.local_summaries(),
global_summaries,
@ -1950,13 +1988,35 @@ impl<'a> ParsedFile<'a> {
cfg_analysis::Confidence::Medium => crate::evidence::Confidence::Medium,
cfg_analysis::Confidence::Low => crate::evidence::Confidence::Low,
});
// Carry the sink node's resolved Sink caps onto the structural
// finding's evidence so downstream cap-classification (and the
// eval `cap_of`) buckets `cfg-unguarded-sink` under its real cap
// (sqli/cmdi/ssrf/…) instead of the catch-all `other`. Without
// this every taint-less structural sink finding fell through to
// `other`, hiding real recall (e.g. dvpwa `cur.execute` SQLi)
// and inflating the `other` bucket. Non-sink structural findings
// (resource-leak, auth-gap) carry no Sink label, so this is 0.
let cf_sink_caps: u32 = cf
.evidence
.first()
.map(|&n| {
cfg_ctx.cfg[n].taint.labels.iter().fold(0u32, |acc, l| {
if let crate::labels::DataLabel::Sink(c) = l {
acc | c.bits()
} else {
acc
}
})
})
.unwrap_or(0);
let cf_category = FindingCategory::for_structural_rule(&cf.rule_id);
out.push(Diag {
path: self.source.path.to_string_lossy().into_owned(),
line: point.row + 1,
col: point.column + 1,
severity: cf.severity,
id: cf.rule_id,
category: FindingCategory::Security,
category: cf_category,
path_validated: false,
guard_kind: None,
message: Some(cf.message),
@ -1971,6 +2031,7 @@ impl<'a> ParsedFile<'a> {
kind: "sink".into(),
snippet: None,
}),
sink_caps: cf_sink_caps,
guards: vec![],
sanitizers: vec![],
state: None,
@ -1984,6 +2045,7 @@ impl<'a> ParsedFile<'a> {
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
stable_hash: 0,
});
}
} // end for body in bodies (CFG structural analyses)
@ -2031,7 +2093,7 @@ impl<'a> ParsedFile<'a> {
col: point.column + 1,
severity: sf.severity,
id: sf.rule_id.clone(),
category: FindingCategory::Security,
category: FindingCategory::for_structural_rule(&sf.rule_id),
path_validated: false,
guard_kind: None,
message: Some(sf.message.clone()),
@ -2064,6 +2126,7 @@ impl<'a> ParsedFile<'a> {
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
stable_hash: 0,
});
}
@ -2157,9 +2220,7 @@ impl<'a> ParsedFile<'a> {
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Pass 1: Extract function summaries (no taint analysis)
// ─────────────────────────────────────────────────────────────────────────────
/// Extract function summaries from pre-read bytes.
///
@ -2305,7 +2366,10 @@ pub fn perf_stage_breakdown_fused(
TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &taint_diags);
let _filtered: Vec<_> = ast_findings
.into_iter()
.filter(|d| !suppression.should_suppress(&d.id, d.line))
.filter(|d| {
!suppression.should_suppress(&d.id, d.line)
&& !suppression.is_redundant_ast_pattern(&d.id, d.line)
})
.collect();
let t_suppr = s_suppr.elapsed().as_micros();
@ -2449,9 +2513,7 @@ pub fn extract_all_summaries_from_bytes(
))
}
// ─────────────────────────────────────────────────────────────────────────────
// Constant-argument suppression helper
// ─────────────────────────────────────────────────────────────────────────────
/// Returns `true` when the captured call node has only literal arguments
/// (string, number, boolean, null/nil/none), or identifier arguments that
@ -5351,9 +5413,7 @@ fn has_interpolation(node: tree_sitter::Node) -> bool {
false
}
// ─────────────────────────────────────────────────────────────────────────────
// Layer B: AST pattern suppression when taint confirms safety
// ─────────────────────────────────────────────────────────────────────────────
/// Map the second segment of a pattern ID (e.g. "cmdi" from "py.cmdi.os_system")
/// to the `Cap` that taint analysis models. Returns `None` for categories taint
@ -5425,6 +5485,14 @@ struct TaintSuppressionCtx {
/// 11 inline analysis but the sink's enclosing scope has no
/// labelled Sanitizer of its own.
interproc_sanitizer_callers: HashSet<Option<String>>,
/// Union of resolved sink-cap bits for cap-specific taint findings at
/// each line. Used by [`Self::is_redundant_ast_pattern`] to drop an
/// AST-pattern finding only when the flow engine already emitted a
/// specific rule id for the same vulnerability class. Legacy generic
/// findings (`taint-unsanitised-flow`, `cfg-unguarded-sink`) are not
/// canonical enough to subsume language-specific AST rule IDs such as
/// `py.cmdi.subprocess_shell` or `c.cmdi.system`.
specific_taint_finding_caps_by_line: HashMap<usize, u32>,
}
impl TaintSuppressionCtx {
@ -5623,6 +5691,26 @@ impl TaintSuppressionCtx {
.map(|d| d.line)
.collect();
// Cap bits per line for cap-specific flow-backed findings only, so a
// redundant AST pattern at the same line+cap can be dropped in favour
// of the richer flow. Do not count legacy generic findings here:
// `taint-unsanitised-flow` and `cfg-unguarded-sink` carry evidence,
// but their rule ids are deliberately catch-alls, while AST `cmdi`,
// `sqli`, etc. IDs are the canonical namespace many tests, SARIF
// consumers, and dynamic-verification spec derivation rely on.
let mut specific_taint_finding_caps_by_line: HashMap<usize, u32> = HashMap::new();
for d in taint_diags {
if d.id.starts_with("taint-") && !d.id.starts_with("taint-unsanitised-flow") {
if let Some(caps) = d.evidence.as_ref().map(|e| e.sink_caps) {
if caps != 0 {
*specific_taint_finding_caps_by_line
.entry(d.line)
.or_default() |= caps;
}
}
}
}
// Per-function partition of taint findings. Maps each finding's
// line to the enclosing function scope by reusing
// `sink_func_at_line` (the same span/function mapping the Sink-side
@ -5646,9 +5734,30 @@ impl TaintSuppressionCtx {
engine_validated_funcs,
source_killed_funcs,
interproc_sanitizer_callers,
specific_taint_finding_caps_by_line,
}
}
/// Returns `true` when an AST pattern finding is a redundant restatement
/// of a flow the taint engine already reported at the same line.
///
/// The taint / structural flow finding carries source + path evidence the
/// bare pattern lacks, so when both fire at the same line for the same
/// cap the pattern is pure duplicate noise. This is the
/// taint-found-it-UNSAFE counterpart to [`Self::should_suppress`]'s
/// taint-found-it-SAFE logic: there, no flow finding means the pattern
/// may carry unique signal; here, a same-cap flow finding means it does
/// not. Cap-matched (not line-only) so a pattern whose cap differs from
/// the co-located flow's cap — a genuinely distinct sink — is preserved.
fn is_redundant_ast_pattern(&self, pattern_id: &str, line: usize) -> bool {
let Some(cap) = pattern_category_cap(pattern_id) else {
return false;
};
self.specific_taint_finding_caps_by_line
.get(&line)
.is_some_and(|caps| caps & cap.bits() != 0)
}
/// Returns `true` if this AST pattern finding should be suppressed.
fn should_suppress(&self, pattern_id: &str, line: usize) -> bool {
// Condition 1: pattern category maps to a Cap taint models
@ -5734,9 +5843,7 @@ impl TaintSuppressionCtx {
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Pass 2 / singlefile: Full rule execution (AST queries + taint)
// ─────────────────────────────────────────────────────────────────────────────
/// Run all enabled analyses on pre-read bytes and return diagnostics.
///
@ -5779,11 +5886,10 @@ pub fn run_rules_on_bytes(
let suppression =
TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &out);
let ast_findings = parsed.source.run_ast_queries(cfg);
out.extend(
ast_findings
.into_iter()
.filter(|d| !suppression.should_suppress(&d.id, d.line)),
);
out.extend(ast_findings.into_iter().filter(|d| {
!suppression.should_suppress(&d.id, d.line)
&& !suppression.is_redundant_ast_pattern(&d.id, d.line)
}));
}
if cfg.scanner.mode == AnalysisMode::Full {
out.extend(parsed.run_auth_analyses(cfg, global_summaries, scan_root));
@ -5812,9 +5918,7 @@ pub fn run_rules_on_file(
run_rules_on_bytes(&bytes, path, cfg, global_summaries, scan_root)
}
// ─────────────────────────────────────────────────────────────────────────────
// Fused single-pass: extract summaries + run full analysis in one parse/CFG
// ─────────────────────────────────────────────────────────────────────────────
/// Result of a fused analysis pass: both function summaries and diagnostics.
pub struct FusedResult {
@ -5979,11 +6083,10 @@ pub fn analyse_file_fused(
if needs_cfg && cfg.scanner.mode == AnalysisMode::Full {
let suppression =
TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &out);
out.extend(
ast_findings
.into_iter()
.filter(|d| !suppression.should_suppress(&d.id, d.line)),
);
out.extend(ast_findings.into_iter().filter(|d| {
!suppression.should_suppress(&d.id, d.line)
&& !suppression.is_redundant_ast_pattern(&d.id, d.line)
}));
} else {
out.extend(ast_findings);
}
@ -6086,9 +6189,7 @@ pub fn analyse_file_fused(
})
}
// ─────────────────────────────────────────────────────────────────────────────
// Text-based pattern scanning (non-tree-sitter files)
// ─────────────────────────────────────────────────────────────────────────────
/// Run text-based pattern scanners on files whose extension is not supported
/// by tree-sitter. Currently handles `.ejs` templates.

View file

@ -0,0 +1,287 @@
//! Canonical per-framework authentication-marker registry.
//!
//! Both the Phase 22 surface probes (`src/surface/lang/*.rs`) and the
//! auth-analysis recogniser consult this module so a marker that is
//! known to one side cannot drift away from the other. Each constant
//! is a flat `&[&str]` of identifier shapes that signal a route is
//! gated behind authentication; surface probes match the leaf segment
//! of a decorator / middleware / extractor identifier
//! (case-insensitive), and the auth analyser folds these into its
//! per-language `login_guard_names` / `authorization_check_names`
//! tables via [`router_auth_markers_for_lang`].
//!
//! The lists were lifted verbatim from the per-probe constants that
//! shipped with Phase 22; further additions land here and propagate to
//! every consumer at once.
//!
//! Lookups: prefer [`is_router_auth_marker`] for the framework-aware
//! dispatch, fall back to [`is_known_router_auth_marker`] when the
//! framework is not yet identified at the call site.
use crate::symbol::Lang;
/// Frameworks the surface probes recognise. Distinct from
/// [`crate::surface::Framework`] (which carries pretty-print metadata)
/// so this module stays free of surface-layer types and can be
/// imported by `auth_analysis::extract` without a circular dep.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum AuthFramework {
Flask,
FastApi,
Django,
Spring,
JavaServlet,
Quarkus,
Express,
Koa,
Gin,
ActixWeb,
Axum,
}
/// Flask (`@login_required`, `@requires_auth`, …).
pub const FLASK_DECORATORS: &[&str] = &[
"login_required",
"auth_required",
"jwt_required",
"token_required",
"requires_auth",
"authenticated",
"require_login",
];
/// FastAPI (`Depends(get_current_user)`, `@login_required`, …).
pub const FASTAPI_DECORATORS: &[&str] = &[
"login_required",
"auth_required",
"jwt_required",
"token_required",
"requires_auth",
"authenticated",
"require_auth",
"require_login",
"current_user",
];
/// Django (`@login_required`, `@permission_required`, …).
pub const DJANGO_DECORATORS: &[&str] = &[
"login_required",
"permission_required",
"user_passes_test",
"staff_member_required",
"csrf_protect",
"require_authenticated",
"auth_required",
];
/// Spring (`@PreAuthorize`, `@Secured`, …).
pub const SPRING_ANNOTATIONS: &[&str] = &[
"PreAuthorize",
"PostAuthorize",
"Secured",
"RolesAllowed",
"AuthenticationPrincipal",
];
/// Java Servlet / JAX-RS (`@RolesAllowed`, `@RequiresAuthentication`, …).
pub const SERVLET_ANNOTATIONS: &[&str] = &[
"RolesAllowed",
"DenyAll",
"RequiresAuthentication",
"RequiresUser",
];
/// Quarkus (`@Authenticated`, `@RolesAllowed`, …).
pub const QUARKUS_ANNOTATIONS: &[&str] = &[
"Authenticated",
"RolesAllowed",
"DenyAll",
"RequiresAuthentication",
];
/// Express middleware (`app.use(requireAuth)`, `passport.authenticate`, …).
pub const EXPRESS_MIDDLEWARES: &[&str] = &[
"requireAuth",
"requireUser",
"isAuthenticated",
"ensureAuthenticated",
"ensureLoggedIn",
"authenticate",
"authMiddleware",
"verifyToken",
"verifyJwt",
"checkJwt",
"passport",
"jwt",
];
/// Koa middleware.
pub const KOA_MIDDLEWARES: &[&str] = &[
"requireAuth",
"requireUser",
"isAuthenticated",
"ensureAuthenticated",
"authenticate",
"authMiddleware",
"verifyToken",
"verifyJwt",
"checkJwt",
"passport",
"jwt",
"koaJwt",
];
/// Gin middleware (`router.Use(AuthRequired())`, `jwt.JWT()`, …).
pub const GIN_MIDDLEWARES: &[&str] = &[
"AuthRequired",
"JWT",
"JWTAuth",
"Auth",
"RequireAuth",
"RequireUser",
"VerifyToken",
"BasicAuth",
];
/// actix-web extractors (`Identity`, `BearerAuth`, …).
pub const ACTIX_EXTRACTORS: &[&str] = &[
"Identity",
"BearerAuth",
"BasicAuth",
"JwtClaims",
"Authenticated",
"User",
];
/// axum extractors (`Extension<User>`, `BearerAuth`, …).
pub const AXUM_EXTRACTORS: &[&str] = &[
"Extension<User",
"BearerAuth",
"RequireAuth",
"AuthenticatedUser",
"JwtClaims",
];
/// Per-framework marker list. Returns the empty slice when the
/// framework is not registered yet.
pub fn markers_for(framework: AuthFramework) -> &'static [&'static str] {
match framework {
AuthFramework::Flask => FLASK_DECORATORS,
AuthFramework::FastApi => FASTAPI_DECORATORS,
AuthFramework::Django => DJANGO_DECORATORS,
AuthFramework::Spring => SPRING_ANNOTATIONS,
AuthFramework::JavaServlet => SERVLET_ANNOTATIONS,
AuthFramework::Quarkus => QUARKUS_ANNOTATIONS,
AuthFramework::Express => EXPRESS_MIDDLEWARES,
AuthFramework::Koa => KOA_MIDDLEWARES,
AuthFramework::Gin => GIN_MIDDLEWARES,
AuthFramework::ActixWeb => ACTIX_EXTRACTORS,
AuthFramework::Axum => AXUM_EXTRACTORS,
}
}
/// Case-insensitive whole-string match against the per-framework list.
pub fn is_router_auth_marker(framework: AuthFramework, marker: &str) -> bool {
let m = marker.trim();
markers_for(framework)
.iter()
.any(|cand| cand.eq_ignore_ascii_case(m))
}
/// Loose match against every framework's list. Used when the call
/// site has the language but not the specific framework — e.g. an
/// auth-analyser folding "is this a known router-level guard?" into a
/// per-language ruleset where the framework split is opaque.
pub fn is_known_router_auth_marker(marker: &str) -> bool {
let m = marker.trim();
[
FLASK_DECORATORS,
FASTAPI_DECORATORS,
DJANGO_DECORATORS,
SPRING_ANNOTATIONS,
SERVLET_ANNOTATIONS,
QUARKUS_ANNOTATIONS,
EXPRESS_MIDDLEWARES,
KOA_MIDDLEWARES,
GIN_MIDDLEWARES,
ACTIX_EXTRACTORS,
AXUM_EXTRACTORS,
]
.iter()
.any(|list| list.iter().any(|cand| cand.eq_ignore_ascii_case(m)))
}
/// Every router-auth marker the canonical registry knows for `lang`.
/// Used by `auth_analysis::config::default_for` to seed
/// `login_guard_names` so a marker added here propagates into the
/// per-language guard list without a second edit.
pub fn router_auth_markers_for_lang(lang: Lang) -> Vec<&'static str> {
let lists: &[&[&str]] = match lang {
Lang::Python => &[FLASK_DECORATORS, FASTAPI_DECORATORS, DJANGO_DECORATORS],
Lang::Java => &[SPRING_ANNOTATIONS, SERVLET_ANNOTATIONS, QUARKUS_ANNOTATIONS],
Lang::JavaScript | Lang::TypeScript => &[EXPRESS_MIDDLEWARES, KOA_MIDDLEWARES],
Lang::Go => &[GIN_MIDDLEWARES],
Lang::Rust => &[ACTIX_EXTRACTORS, AXUM_EXTRACTORS],
_ => &[],
};
let mut out: Vec<&'static str> = lists.iter().flat_map(|l| l.iter().copied()).collect();
out.sort_unstable();
out.dedup();
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn flask_login_required_resolves_case_insensitively() {
assert!(is_router_auth_marker(
AuthFramework::Flask,
"login_required"
));
assert!(is_router_auth_marker(
AuthFramework::Flask,
"Login_Required"
));
assert!(!is_router_auth_marker(
AuthFramework::Flask,
"something_else"
));
}
#[test]
fn spring_preauthorize_resolves() {
assert!(is_router_auth_marker(AuthFramework::Spring, "PreAuthorize"));
assert!(!is_router_auth_marker(AuthFramework::Spring, "GetMapping"));
}
#[test]
fn known_marker_matches_across_frameworks() {
// `RolesAllowed` shows up in Spring, Servlet, and Quarkus —
// the framework-agnostic helper finds it regardless.
assert!(is_known_router_auth_marker("RolesAllowed"));
assert!(is_known_router_auth_marker("login_required"));
assert!(!is_known_router_auth_marker("not_an_auth_marker_xyz"));
}
#[test]
fn python_router_markers_cover_every_framework() {
let markers = router_auth_markers_for_lang(Lang::Python);
for &decorator in FLASK_DECORATORS {
assert!(markers.contains(&decorator), "missing flask: {decorator}");
}
for &decorator in FASTAPI_DECORATORS {
assert!(markers.contains(&decorator), "missing fastapi: {decorator}");
}
for &decorator in DJANGO_DECORATORS {
assert!(markers.contains(&decorator), "missing django: {decorator}");
}
}
#[test]
fn router_markers_for_unknown_lang_is_empty() {
assert!(router_auth_markers_for_lang(Lang::Ruby).is_empty());
assert!(router_auth_markers_for_lang(Lang::Php).is_empty());
}
}

View file

@ -902,6 +902,24 @@ fn is_self_scoped_session_base(base: &str) -> bool {
| "ctx.session.currentUser"
| "ctx.state.user"
| "ctx.state.currentUser"
// The caller's own id from the session is self-scoped: fetching
// your own record with it is not IDOR (only a foreign,
// request-supplied id is). The `.user` forms above missed the
// `req.session.userId` / `session.uid` idiom.
| "req.session.userId"
| "request.session.userId"
| "session.userId"
| "req.session.userid"
| "request.session.userid"
| "session.userid"
| "req.session.uid"
| "request.session.uid"
| "session.uid"
| "ctx.session.userId"
| "ctx.session.userid"
| "ctx.session.uid"
| "ctx.state.userId"
| "ctx.state.uid"
)
}

View file

@ -1,3 +1,8 @@
//! Configuration for the Rust auth-analysis pass.
//!
//! Holds [`AuthAnalysisRules`] (admin path/guard patterns, sink classes, and
//! name canonicalization) that drive `rs.auth.missing_ownership_check`.
use crate::auth_analysis::model::SinkClass;
use crate::labels::bare_method_name;
use crate::utils::config::Config;

View file

@ -1,3 +1,9 @@
//! Shared AST-extraction helpers for the auth-analysis framework adapters.
//!
//! Cross-framework primitives — analysis-unit collection, call-site and
//! `ValueRef` extraction, and tree-sitter node/string/span helpers — used by the
//! per-framework extractors in this directory (`express`, `axum`, `django`, …).
use crate::auth_analysis::config::{AuthAnalysisRules, canonical_name, matches_name, strip_quotes};
use crate::auth_analysis::model::{
AnalysisUnit, AnalysisUnitKind, AuthCheck, AuthCheckKind, AuthorizationModel, CallSite,
@ -3942,6 +3948,27 @@ fn collect_param_names(
}
}
}
// TypeScript `required_parameter` / `optional_parameter`. Descend only
// into the binding `pattern`, never the `type` annotation: the default
// arm harvests id-like names from object-type fields (`user: { id }`)
// and lifts typed-bounded scalar ids (`UserId: number`) into
// `unit.params`, over-firing the user-input gate on non-route helpers.
// Mirrors the Rust `parameter` arm plus the Go/Python id-like filter.
"required_parameter" | "optional_parameter" => {
if let Some(pattern) = node.child_by_field_name("pattern") {
if pattern.kind() == "identifier" && node.child_by_field_name("type").is_some() {
let name = text(pattern, bytes);
if !name.is_empty()
&& !out.contains(&name)
&& (include_id_like_typed || !is_python_id_like_typed_param(&name))
{
out.push(name);
}
} else {
collect_param_names(pattern, bytes, include_id_like_typed, out);
}
}
}
_ => {
for idx in 0..node.named_child_count() {
let Some(child) = node.named_child(idx as u32) else {

View file

@ -56,6 +56,7 @@
//! - [`sql_semantics`]: ACL-join and `user_id`-predicate detection without a
//! SQL parser
pub mod auth_markers;
pub mod checks;
pub mod config;
pub mod extract;
@ -1014,7 +1015,18 @@ fn auth_finding_to_diag(finding: &checks::AuthFinding, tree: &Tree, file_path: &
guard_kind: None,
message: Some(finding.message.clone()),
labels: vec![],
confidence: Some(Confidence::Medium),
// Auth-analysis findings are *structural* (parameter-name + control-flow
// shape heuristics) and carry no taint witness — `source = None`,
// `sink_caps = 0`, no flow steps — so the per-payload dynamic oracle
// cannot confirm or refute them (missing-authz needs a 2-user
// differential the corpus does not run). Emitting them at Medium put a
// large zero-witness, dynamically-Unsupported tranche on the default /
// verified surface (the bulk of the nodegoat/railsgoat/juiceshop `auth`
// FP flood). Demote to Low so they sit below the default min-confidence
// and verify gates while remaining available for access-control audits.
// assert_has tests pin rule-id presence, not confidence, so they stay
// green.
confidence: Some(Confidence::Low),
evidence: Some(Evidence {
source: None,
sink: Some(SpanEvidence {
@ -1037,6 +1049,7 @@ fn auth_finding_to_diag(finding: &checks::AuthFinding, tree: &Tree, file_path: &
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
stable_hash: 0,
}
}

619
src/baseline.rs Normal file
View file

@ -0,0 +1,619 @@
//! Baseline diffing for patch-validation CI mode (§M6.5 / Pillar A §15.1).
//!
//! `nyx scan --baseline <file>` reads a previous scan's JSON output (or a
//! stripped `.nyx/baseline.json`) and joins on `Diag::stable_hash`. The
//! result is a per-finding `VerdictDiffEntry` with a typed `Transition` that
//! CI gates can act on.
//!
//! `nyx scan --baseline-write <file>` writes a stripped baseline JSON:
//! only `stable_hash`, `dynamic_verdict`, `severity`, `path`, and `rule_id`.
//! No source code is included.
use crate::commands::scan::Diag;
use crate::evidence::VerifyStatus;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::Path;
// Baseline entry (stripped — no source code)
/// A stripped baseline entry: only what is needed for cross-commit diffing.
/// Contains no source code snippets.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BaselineEntry {
pub stable_hash: u64,
/// Dynamic verdict status from the scan that wrote this baseline.
/// `None` when `--verify` was not run.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub dynamic_verdict: Option<VerifyStatus>,
pub severity: String,
pub path: String,
pub rule_id: String,
}
// Transition enum
/// How a finding's verdict changed between the baseline scan and the current
/// scan.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Transition {
/// Finding exists in the current scan but was absent from the baseline.
New,
/// Finding appears in both scans; verdict is unchanged (or neither scan
/// ran `--verify`).
Unchanged,
/// Finding was present in the baseline but disappeared from the current
/// scan — the vulnerability is gone.
Resolved,
/// Finding in both; was `NotConfirmed` in baseline, now `Confirmed`.
Regressed,
/// Finding in both; baseline had no verdict (or `Inconclusive` /
/// `Unsupported`) and it is now `Confirmed`.
FlippedConfirmed,
/// Finding in both; was `Confirmed` in baseline, now `NotConfirmed` —
/// the fix is proven.
FlippedNotConfirmed,
}
// VerdictDiffEntry
/// Per-finding verdict diff produced by comparing a baseline to a current scan.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VerdictDiffEntry {
/// Stable cross-commit identity hash.
pub stable_hash: u64,
pub path: String,
pub line: usize,
pub rule_id: String,
/// Verdict in the baseline scan (`None` when verify was not run).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub baseline_status: Option<VerifyStatus>,
/// Verdict in the current scan (`None` when verify was not run or finding
/// is absent from the current scan).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub current_status: Option<VerifyStatus>,
pub transition: Transition,
}
/// Full verdict diff between a baseline and a current scan.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VerdictDiff {
pub entries: Vec<VerdictDiffEntry>,
}
// Load / write helpers
/// Load baseline entries from a file.
///
/// Accepts two JSON formats:
/// - Stripped baseline (`Vec<BaselineEntry>`) — written by `--baseline-write`.
/// - Full scan output (`Vec<Diag>`) — written by `nyx scan --format json`.
///
/// Detection heuristic: try `Vec<BaselineEntry>` first (requires `rule_id`);
/// fall back to `Vec<Diag>`.
pub fn load_baseline(path: &Path) -> crate::errors::NyxResult<Vec<BaselineEntry>> {
let content = std::fs::read_to_string(path).map_err(|e| {
crate::errors::NyxError::Msg(format!("cannot read baseline {}: {e}", path.display()))
})?;
// Try stripped format first.
if let Ok(entries) = serde_json::from_str::<Vec<BaselineEntry>>(&content) {
return Ok(entries);
}
// Fall back to full Diag list.
let diags: Vec<Diag> = serde_json::from_str(&content).map_err(|e| {
crate::errors::NyxError::Msg(format!(
"baseline {}: not a valid BaselineEntry list or Diag list: {e}",
path.display()
))
})?;
Ok(diags_to_baseline_entries(&diags))
}
/// Convert `Diag` values to `BaselineEntry` values.
///
/// Only findings with a non-zero `stable_hash` are included; findings without
/// a hash cannot be joined across scans.
pub fn diags_to_baseline_entries(diags: &[Diag]) -> Vec<BaselineEntry> {
diags
.iter()
.filter(|d| d.stable_hash != 0)
.map(|d| BaselineEntry {
stable_hash: d.stable_hash,
dynamic_verdict: d
.evidence
.as_ref()
.and_then(|ev| ev.dynamic_verdict.as_ref())
.map(|vr| vr.status),
severity: d.severity.as_db_str().to_string(),
path: d.path.clone(),
rule_id: d.id.clone(),
})
.collect()
}
/// Write a stripped baseline JSON to `path`.
///
/// The file contains only `stable_hash`, `dynamic_verdict`, `severity`,
/// `path`, and `rule_id` — no source code snippets or flow steps.
pub fn write_baseline(path: &Path, diags: &[Diag]) -> crate::errors::NyxResult<()> {
let entries = diags_to_baseline_entries(diags);
let json = serde_json::to_string_pretty(&entries)
.map_err(|e| crate::errors::NyxError::Msg(format!("baseline serialize error: {e}")))?;
if let Some(parent) = path.parent()
&& !parent.as_os_str().is_empty()
{
std::fs::create_dir_all(parent).map_err(|e| {
crate::errors::NyxError::Msg(format!(
"cannot create baseline dir {}: {e}",
parent.display()
))
})?;
}
std::fs::write(path, json).map_err(|e| {
crate::errors::NyxError::Msg(format!("cannot write baseline {}: {e}", path.display()))
})
}
// Diff computation
fn classify_transition(
baseline: Option<VerifyStatus>,
current: Option<VerifyStatus>,
) -> Transition {
match (baseline, current) {
// No verdict change (including both None)
(a, b) if a == b => Transition::Unchanged,
// Confirmed → NotConfirmed: fix proven
(Some(VerifyStatus::Confirmed), Some(VerifyStatus::NotConfirmed)) => {
Transition::FlippedNotConfirmed
}
// NotConfirmed → Confirmed: regression
(Some(VerifyStatus::NotConfirmed), Some(VerifyStatus::Confirmed)) => Transition::Regressed,
// None / Inconclusive / Unsupported → Confirmed
(_, Some(VerifyStatus::Confirmed)) => Transition::FlippedConfirmed,
// Everything else: treat as unchanged (e.g. Confirmed → Inconclusive
// without a clean NotConfirmed proof is not a resolution)
_ => Transition::Unchanged,
}
}
/// Compute a verdict diff between a loaded baseline and the current findings.
pub fn compute_verdict_diff(baseline: &[BaselineEntry], current: &[Diag]) -> VerdictDiff {
// Build lookup maps keyed by stable_hash.
let baseline_map: HashMap<u64, &BaselineEntry> =
baseline.iter().map(|e| (e.stable_hash, e)).collect();
let current_map: HashMap<u64, &Diag> = current
.iter()
.filter(|d| d.stable_hash != 0)
.map(|d| (d.stable_hash, d))
.collect();
let mut entries = Vec::new();
// Walk current findings.
for (&hash, diag) in &current_map {
let current_status = diag
.evidence
.as_ref()
.and_then(|ev| ev.dynamic_verdict.as_ref())
.map(|vr| vr.status);
if let Some(base) = baseline_map.get(&hash) {
let transition = classify_transition(base.dynamic_verdict, current_status);
entries.push(VerdictDiffEntry {
stable_hash: hash,
path: diag.path.clone(),
line: diag.line,
rule_id: diag.id.clone(),
baseline_status: base.dynamic_verdict,
current_status,
transition,
});
} else {
// Not in baseline → New.
entries.push(VerdictDiffEntry {
stable_hash: hash,
path: diag.path.clone(),
line: diag.line,
rule_id: diag.id.clone(),
baseline_status: None,
current_status,
transition: Transition::New,
});
}
}
// Walk baseline findings absent from current → Resolved.
for (&hash, base) in &baseline_map {
if !current_map.contains_key(&hash) {
entries.push(VerdictDiffEntry {
stable_hash: hash,
path: base.path.clone(),
line: 0,
rule_id: base.rule_id.clone(),
baseline_status: base.dynamic_verdict,
current_status: None,
transition: Transition::Resolved,
});
}
}
// Sort for deterministic output: Resolved first, then New, then the rest,
// all sub-sorted by (path, line).
entries.sort_by(|a, b| {
fn order(t: Transition) -> u8 {
match t {
Transition::Resolved => 0,
Transition::FlippedNotConfirmed => 1,
Transition::New => 2,
Transition::Regressed => 3,
Transition::FlippedConfirmed => 4,
Transition::Unchanged => 5,
}
}
order(a.transition)
.cmp(&order(b.transition))
.then_with(|| a.path.cmp(&b.path))
.then_with(|| a.line.cmp(&b.line))
});
VerdictDiff { entries }
}
// CI gates
/// Gate: exit code 2 if any new `Confirmed` finding appears.
///
/// Triggers on `transition == New && current_status == Confirmed` or
/// `transition == FlippedConfirmed`.
pub const GATE_NO_NEW_CONFIRMED: &str = "no-new-confirmed";
/// Gate: exit code 2 if any baseline-`Confirmed` finding is not fully resolved.
///
/// A baseline-Confirmed finding is resolved only when it is absent from the
/// current scan (`Resolved`) or its current verdict is `NotConfirmed`
/// (`FlippedNotConfirmed`). All other current statuses (`Confirmed`,
/// `Inconclusive`, `Unsupported`) violate this gate.
pub const GATE_RESOLVE_ALL_CONFIRMED: &str = "resolve-all-confirmed";
/// Check a named CI gate against a verdict diff.
///
/// Returns `true` when the gate passes (condition not violated) and `false`
/// when it fails (caller should exit with code 2).
///
/// Unknown gate names always pass so future gate additions are forward-
/// compatible without requiring a binary upgrade.
pub fn check_gate(diff: &VerdictDiff, gate: &str) -> bool {
match gate {
GATE_NO_NEW_CONFIRMED => !diff.entries.iter().any(|e| {
matches!(e.transition, Transition::New | Transition::FlippedConfirmed)
&& e.current_status == Some(VerifyStatus::Confirmed)
}),
GATE_RESOLVE_ALL_CONFIRMED => !diff.entries.iter().any(|e| {
e.baseline_status == Some(VerifyStatus::Confirmed)
&& matches!(
e.current_status,
Some(VerifyStatus::Confirmed)
// PartiallyConfirmed = sink still reachable at
// runtime, so a baseline-Confirmed finding that is
// now partial has NOT been resolved.
| Some(VerifyStatus::PartiallyConfirmed)
| Some(VerifyStatus::Inconclusive)
| Some(VerifyStatus::Unsupported)
)
}),
_ => true,
}
}
// Console / JSON rendering
fn status_str(s: Option<VerifyStatus>) -> &'static str {
match s {
Some(VerifyStatus::Confirmed) => "Confirmed",
Some(VerifyStatus::PartiallyConfirmed) => "PartiallyConfirmed",
Some(VerifyStatus::NotConfirmed) => "NotConfirmed",
Some(VerifyStatus::Inconclusive) => "Inconclusive",
Some(VerifyStatus::Unsupported) => "Unsupported",
None => "(no verdict)",
}
}
/// Render a verdict diff as a human-readable console summary.
pub fn format_diff_console(diff: &VerdictDiff) -> String {
if diff.entries.is_empty() {
return String::from(" (no findings in baseline or current scan)\n");
}
let mut lines = Vec::new();
let mut non_unchanged = 0usize;
for e in &diff.entries {
let hash_str = format!("{:016x}", e.stable_hash);
let loc = if e.line > 0 {
format!("{}:{}", e.path, e.line)
} else {
e.path.clone()
};
match e.transition {
Transition::New => {
non_unchanged += 1;
lines.push(format!(
" + {hash_str}: new {} at {loc}",
status_str(e.current_status)
));
}
Transition::Resolved => {
non_unchanged += 1;
lines.push(format!(
" - {hash_str}: {} \u{2192} removed (resolved) at {loc}",
status_str(e.baseline_status)
));
}
Transition::FlippedNotConfirmed => {
non_unchanged += 1;
lines.push(format!(
" - {hash_str}: Confirmed \u{2192} NotConfirmed at {loc} (resolved)"
));
}
Transition::Regressed => {
non_unchanged += 1;
lines.push(format!(
" ! {hash_str}: NotConfirmed \u{2192} Confirmed at {loc} (regressed)"
));
}
Transition::FlippedConfirmed => {
non_unchanged += 1;
lines.push(format!(" + {hash_str}: new Confirmed at {loc}"));
}
Transition::Unchanged => {}
}
}
if non_unchanged == 0 {
return String::from(" (no changes from baseline)\n");
}
lines.join("\n") + "\n"
}
// Tests
#[cfg(test)]
mod tests {
use super::*;
use crate::commands::scan::{Diag, compute_stable_hash};
use crate::evidence::{Evidence, VerifyResult, VerifyStatus};
use crate::patterns::{FindingCategory, Severity};
fn make_diag(path: &str, line: usize, rule: &str) -> Diag {
let mut d = Diag {
path: path.to_string(),
line,
col: 0,
severity: Severity::High,
id: rule.to_string(),
category: FindingCategory::Security,
path_validated: false,
guard_kind: None,
message: None,
labels: vec![],
confidence: None,
evidence: None,
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: vec![],
stable_hash: 0,
};
d.stable_hash = compute_stable_hash(&d);
d
}
fn with_verdict(mut d: Diag, status: VerifyStatus) -> Diag {
d.evidence = Some(Evidence {
dynamic_verdict: Some(VerifyResult {
finding_id: format!("{:016x}", d.stable_hash),
status,
triggered_payload: None,
reason: None,
inconclusive_reason: None,
detail: None,
attempts: vec![],
toolchain_match: None,
differential: None,
replay_stable: None,
wrong: None,
hardening_outcome: None,
}),
..Default::default()
});
d
}
#[test]
fn new_finding_no_verdict() {
let current = vec![make_diag("src/a.py", 1, "py.sqli")];
let diff = compute_verdict_diff(&[], &current);
assert_eq!(diff.entries.len(), 1);
assert_eq!(diff.entries[0].transition, Transition::New);
assert_eq!(diff.entries[0].current_status, None);
}
#[test]
fn new_confirmed_finding() {
let current = vec![with_verdict(
make_diag("src/a.py", 1, "py.sqli"),
VerifyStatus::Confirmed,
)];
let diff = compute_verdict_diff(&[], &current);
assert_eq!(diff.entries[0].transition, Transition::New);
assert_eq!(
diff.entries[0].current_status,
Some(VerifyStatus::Confirmed)
);
}
#[test]
fn resolved_finding() {
let baseline_diag = make_diag("src/a.py", 1, "py.sqli");
let baseline = diags_to_baseline_entries(&[baseline_diag]);
let diff = compute_verdict_diff(&baseline, &[]);
assert_eq!(diff.entries.len(), 1);
assert_eq!(diff.entries[0].transition, Transition::Resolved);
}
#[test]
fn flipped_not_confirmed() {
let d = make_diag("src/a.py", 1, "py.sqli");
let baseline = vec![BaselineEntry {
stable_hash: d.stable_hash,
dynamic_verdict: Some(VerifyStatus::Confirmed),
severity: "high".to_string(),
path: d.path.clone(),
rule_id: d.id.clone(),
}];
let current = vec![with_verdict(d, VerifyStatus::NotConfirmed)];
let diff = compute_verdict_diff(&baseline, &current);
assert_eq!(diff.entries[0].transition, Transition::FlippedNotConfirmed);
}
#[test]
fn regressed() {
let d = make_diag("src/a.py", 1, "py.sqli");
let baseline = vec![BaselineEntry {
stable_hash: d.stable_hash,
dynamic_verdict: Some(VerifyStatus::NotConfirmed),
severity: "high".to_string(),
path: d.path.clone(),
rule_id: d.id.clone(),
}];
let current = vec![with_verdict(d, VerifyStatus::Confirmed)];
let diff = compute_verdict_diff(&baseline, &current);
assert_eq!(diff.entries[0].transition, Transition::Regressed);
}
#[test]
fn gate_no_new_confirmed_passes_when_no_confirmed() {
let d = make_diag("src/a.py", 1, "py.sqli");
let diff = compute_verdict_diff(&[], &[d]);
assert!(check_gate(&diff, GATE_NO_NEW_CONFIRMED));
}
#[test]
fn gate_no_new_confirmed_fails_on_new_confirmed() {
let current = vec![with_verdict(
make_diag("src/a.py", 1, "py.sqli"),
VerifyStatus::Confirmed,
)];
let diff = compute_verdict_diff(&[], &current);
assert!(!check_gate(&diff, GATE_NO_NEW_CONFIRMED));
}
#[test]
fn gate_resolve_all_confirmed_passes_when_flipped() {
let d = make_diag("src/a.py", 1, "py.sqli");
let baseline = vec![BaselineEntry {
stable_hash: d.stable_hash,
dynamic_verdict: Some(VerifyStatus::Confirmed),
severity: "high".to_string(),
path: d.path.clone(),
rule_id: d.id.clone(),
}];
let current = vec![with_verdict(d, VerifyStatus::NotConfirmed)];
let diff = compute_verdict_diff(&baseline, &current);
assert!(check_gate(&diff, GATE_RESOLVE_ALL_CONFIRMED));
}
#[test]
fn gate_resolve_all_confirmed_fails_when_still_confirmed() {
let d = make_diag("src/a.py", 1, "py.sqli");
let baseline = vec![BaselineEntry {
stable_hash: d.stable_hash,
dynamic_verdict: Some(VerifyStatus::Confirmed),
severity: "high".to_string(),
path: d.path.clone(),
rule_id: d.id.clone(),
}];
let current = vec![with_verdict(d, VerifyStatus::Confirmed)];
let diff = compute_verdict_diff(&baseline, &current);
assert!(!check_gate(&diff, GATE_RESOLVE_ALL_CONFIRMED));
}
#[test]
fn gate_resolve_all_confirmed_passes_when_resolved() {
let d = make_diag("src/a.py", 1, "py.sqli");
let baseline = vec![BaselineEntry {
stable_hash: d.stable_hash,
dynamic_verdict: Some(VerifyStatus::Confirmed),
severity: "high".to_string(),
path: d.path.clone(),
rule_id: d.id.clone(),
}];
// No current findings (finding disappeared entirely).
let diff = compute_verdict_diff(&baseline, &[]);
assert!(check_gate(&diff, GATE_RESOLVE_ALL_CONFIRMED));
}
#[test]
fn write_and_load_roundtrip() {
let d = with_verdict(make_diag("src/a.py", 1, "py.sqli"), VerifyStatus::Confirmed);
let tmp = tempfile::NamedTempFile::new().unwrap();
write_baseline(tmp.path(), std::slice::from_ref(&d)).unwrap();
let loaded = load_baseline(tmp.path()).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].stable_hash, d.stable_hash);
assert_eq!(loaded[0].dynamic_verdict, Some(VerifyStatus::Confirmed));
assert_eq!(loaded[0].path, "src/a.py");
assert_eq!(loaded[0].rule_id, "py.sqli");
}
#[test]
fn load_full_diag_json() {
let d = with_verdict(make_diag("src/a.py", 1, "py.sqli"), VerifyStatus::Confirmed);
let json = serde_json::to_string(&[&d]).unwrap();
let tmp = tempfile::NamedTempFile::new().unwrap();
std::fs::write(tmp.path(), &json).unwrap();
let loaded = load_baseline(tmp.path()).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].stable_hash, d.stable_hash);
}
#[test]
fn baseline_write_no_source() {
let mut d = with_verdict(make_diag("src/a.py", 1, "py.sqli"), VerifyStatus::Confirmed);
// Add a flow_step with a snippet (source code) to the evidence.
if let Some(ref mut ev) = d.evidence {
ev.flow_steps = vec![crate::evidence::FlowStep {
step: 1,
kind: crate::evidence::FlowStepKind::Source,
file: "src/a.py".into(),
line: 1,
col: 0,
snippet: Some("SECRET CODE".into()),
variable: None,
callee: None,
function: None,
is_cross_file: false,
}];
}
let tmp = tempfile::NamedTempFile::new().unwrap();
write_baseline(tmp.path(), &[d]).unwrap();
let content = std::fs::read_to_string(tmp.path()).unwrap();
assert!(
!content.contains("SECRET CODE"),
"baseline must not contain source code"
);
}
#[test]
fn unknown_gate_passes() {
let diff = VerdictDiff { entries: vec![] };
assert!(check_gate(&diff, "some-future-gate-name"));
}
}

View file

@ -20,16 +20,13 @@ use smallvec::SmallVec;
use std::collections::{BTreeMap, HashMap};
use std::path::{Path, PathBuf};
// ─────────────────────────────────────────────────────────────────────────────
// Types
// ─────────────────────────────────────────────────────────────────────────────
/// Metadata attached to each call-graph edge.
#[derive(Debug, Clone)]
pub struct CallEdge {
/// The raw callee string as it appeared in source (e.g. `"env::var"`).
/// Preserved for diagnostics, **not** the normalized form used for resolution.
#[allow(dead_code)] // used for future diagnostics and path display
pub call_site: String,
}
@ -52,10 +49,10 @@ pub struct AmbiguousCallee {
///
/// Nodes are [`FuncKey`]s (one per function definition across all files).
/// Edges represent call-site relationships resolved after pass 1.
#[derive(Debug)]
pub struct CallGraph {
pub graph: DiGraph<FuncKey, CallEdge>,
/// `FuncKey → NodeIndex` for quick lookup.
#[allow(dead_code)] // used for future topo-ordered analysis and call-graph queries
pub index: HashMap<FuncKey, NodeIndex>,
/// Callee strings that could not be resolved to any [`FuncKey`].
pub unresolved_not_found: Vec<UnresolvedCallee>,
@ -77,9 +74,7 @@ pub struct CallGraphAnalysis {
pub topo_scc_callee_first: Vec<usize>,
}
// ─────────────────────────────────────────────────────────────────────────────
// Callee-name normalization
// ─────────────────────────────────────────────────────────────────────────────
/// Extract the last segment of a qualified callee name for resolution.
///
@ -165,9 +160,7 @@ pub(crate) fn callee_container_hint(raw: &str) -> &str {
""
}
// ─────────────────────────────────────────────────────────────────────────────
// Class / container → method index
// ─────────────────────────────────────────────────────────────────────────────
/// Per-language `(container, method_name)` → candidate [`FuncKey`] index.
///
@ -260,20 +253,6 @@ impl ClassMethodIndex {
.unwrap_or_default(),
}
}
/// Number of distinct `(lang, container, method)` keys. Exposed
/// for diagnostics / tests; production code uses [`Self::resolve`].
#[allow(dead_code)]
pub fn container_keys_len(&self) -> usize {
self.by_container.len()
}
/// Number of distinct `(lang, method)` keys. Exposed for
/// diagnostics / tests.
#[allow(dead_code)]
pub fn name_keys_len(&self) -> usize {
self.by_name.len()
}
}
// ── Type hierarchy index ────────────────────────────────────────────────
@ -293,11 +272,6 @@ impl ClassMethodIndex {
pub struct TypeHierarchyIndex {
/// `(lang, super_type)` → distinct sub-type / impl container names.
by_super: HashMap<(Lang, String), SmallVec<[String; 4]>>,
/// `(lang, sub_type)` → super-types this type extends / implements.
/// Future use for `super.method()` resolution; populated for
/// completeness today.
#[allow(dead_code)]
by_sub: HashMap<(Lang, String), SmallVec<[String; 2]>>,
}
impl TypeHierarchyIndex {
@ -308,7 +282,6 @@ impl TypeHierarchyIndex {
/// summary) collapse via the membership check.
pub fn build(summaries: &GlobalSummaries) -> Self {
let mut by_super: HashMap<(Lang, String), SmallVec<[String; 4]>> = HashMap::new();
let mut by_sub: HashMap<(Lang, String), SmallVec<[String; 2]>> = HashMap::new();
for (key, summary) in summaries.iter() {
let lang = key.lang;
@ -320,14 +293,10 @@ impl TypeHierarchyIndex {
if !subs.iter().any(|s| s == sub) {
subs.push(sub.clone());
}
let sups = by_sub.entry((lang, sub.clone())).or_default();
if !sups.iter().any(|s| s == sup) {
sups.push(sup.clone());
}
}
}
TypeHierarchyIndex { by_super, by_sub }
TypeHierarchyIndex { by_super }
}
/// Return the distinct sub-type / impl container names for
@ -341,16 +310,6 @@ impl TypeHierarchyIndex {
.unwrap_or_default()
}
/// Return the recorded super-types of `sub_type`. Empty when
/// `sub_type` has no recorded super-types in this language.
#[allow(dead_code)]
pub fn supers_of(&self, lang: Lang, sub_type: &str) -> &[String] {
self.by_sub
.get(&(lang, sub_type.to_string()))
.map(|v| v.as_slice())
.unwrap_or_default()
}
/// Number of distinct `(lang, super_type)` keys. Exposed for
/// diagnostics / tests.
#[allow(dead_code)]
@ -409,9 +368,7 @@ impl TypeHierarchyIndex {
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Call-graph construction
// ─────────────────────────────────────────────────────────────────────────────
/// Build the whole-program call graph from merged summaries.
///
@ -777,9 +734,7 @@ fn resolve_via_interop(
None
}
// ─────────────────────────────────────────────────────────────────────────────
// SCC / topological analysis
// ─────────────────────────────────────────────────────────────────────────────
/// Compute SCC decomposition and topological ordering of the call graph.
///
@ -807,9 +762,7 @@ pub fn analyse(cg: &CallGraph) -> CallGraphAnalysis {
}
}
// ─────────────────────────────────────────────────────────────────────────────
// File-level batch ordering
// ─────────────────────────────────────────────────────────────────────────────
/// A batch of files at a single topological position, annotated with whether
/// any contributing SCC contains mutual recursion (len > 1) and whether any
@ -862,6 +815,141 @@ pub fn callers_of(cg: &CallGraph, callee: &FuncKey) -> Vec<FuncKey> {
.collect()
}
/// Reverse-edge BFS: return every [`FuncKey`] that *transitively* calls
/// `callee`, i.e. the union of [`callers_of`] applied recursively until
/// the reverse frontier is exhausted.
///
/// Used by the chain composer to widen file-scoped reach: a sink inside
/// `internal_helper.py` whose enclosing function is reached only through
/// `routes.py` is *reachable* in the chain sense, but the file-local
/// match in `chain::edges::locate_reach` / `chain::search::compose_chain`
/// misses it. This helper produces the closure once so callers can
/// resolve reach in O(1) afterwards.
///
/// Excludes `callee` itself from the returned set, matching the
/// "strictly upstream" semantics callers want. Empty when `callee` is
/// unknown to the graph.
///
/// Cost: O(V + E) BFS from `callee`'s reverse frontier; bounded by the
/// connected component size.
pub fn callers_transitive(cg: &CallGraph, callee: &FuncKey) -> std::collections::HashSet<FuncKey> {
let mut seen: std::collections::HashSet<FuncKey> = std::collections::HashSet::new();
let Some(&start) = cg.index.get(callee) else {
return seen;
};
let mut frontier: Vec<NodeIndex> = cg
.graph
.neighbors_directed(start, petgraph::Direction::Incoming)
.collect();
while let Some(node) = frontier.pop() {
let key = cg.graph[node].clone();
if !seen.insert(key) {
continue;
}
for next in cg
.graph
.neighbors_directed(node, petgraph::Direction::Incoming)
{
if !seen.contains(&cg.graph[next]) {
frontier.push(next);
}
}
}
seen
}
/// File-level transitive reach map built from a [`CallGraph`].
///
/// For each `namespace` (file path) in the graph, records every other
/// namespace that contains at least one transitive caller. Built once
/// per scan so the chain composer can widen a finding's
/// `Reach::Reachable` decision beyond the file-local heuristic in
/// `chain::edges::locate_reach` without re-running BFS per
/// finding.
///
/// Map shape: `callee_namespace → { caller_namespace, … }`. A file
/// always appears in its own caller set so intra-file recursion stays
/// reachable.
///
/// `scan_root` is optional path-normalisation context. Callers that
/// build the map without a scan root must pass project-relative POSIX
/// paths to [`FileReachMap::reaches`] directly. When a root is set
/// (typical in production scans), [`FileReachMap::reaches`] applies
/// [`crate::symbol::normalize_namespace`] to its arguments before
/// lookup so absolute host paths (the convention on
/// [`crate::commands::scan::Diag`]'s `path`) and project-relative paths
/// (the convention on call-graph [`FuncKey::namespace`] and
/// [`crate::surface::SourceLocation::file`]) both resolve to the
/// stored keys.
#[derive(Debug, Default, Clone)]
pub struct FileReachMap {
by_callee_ns: HashMap<String, std::collections::HashSet<String>>,
scan_root: Option<String>,
}
impl FileReachMap {
/// Build the map from every function's reverse transitive closure.
///
/// O(V × (V + E)) worst case, but the per-function BFS is sparse on
/// real call graphs (median in-degree < 4 on the eval corpus).
///
/// The returned map has no scan root configured; pair with
/// [`FileReachMap::with_scan_root`] when callers may pass absolute
/// paths.
pub fn build(cg: &CallGraph) -> Self {
let mut by_callee_ns: HashMap<String, std::collections::HashSet<String>> = HashMap::new();
for callee in cg.index.keys() {
let entry = by_callee_ns.entry(callee.namespace.clone()).or_default();
entry.insert(callee.namespace.clone());
for caller in callers_transitive(cg, callee) {
entry.insert(caller.namespace);
}
}
FileReachMap {
by_callee_ns,
scan_root: None,
}
}
/// Attach a scan root so [`FileReachMap::reaches`] can normalise
/// absolute host paths back to the project-relative POSIX form the
/// map keys use. Pass `None` to clear an existing root.
pub fn with_scan_root<P: AsRef<std::path::Path>>(mut self, root: Option<P>) -> Self {
self.scan_root = root.map(|p| p.as_ref().to_string_lossy().into_owned());
self
}
/// True when `caller` transitively reaches at least one function
/// defined in `callee`. Inputs may be either project-relative
/// POSIX paths (matching the call-graph namespace convention) or
/// absolute host paths when a scan root was set via
/// [`FileReachMap::with_scan_root`]. False when either path is
/// unknown to the graph (conservative: chain composer falls back
/// to the file-local heuristic).
pub fn reaches(&self, caller: &str, callee: &str) -> bool {
let lookup_callee = self.normalize(callee);
let lookup_caller = self.normalize(caller);
self.by_callee_ns
.get(lookup_callee.as_ref())
.is_some_and(|set| set.contains(lookup_caller.as_ref()))
}
/// Number of distinct callee namespaces tracked. Exposed for
/// diagnostics / tests.
pub fn callee_ns_len(&self) -> usize {
self.by_callee_ns.len()
}
fn normalize<'a>(&self, path: &'a str) -> std::borrow::Cow<'a, str> {
match self.scan_root.as_deref() {
Some(root) => {
std::borrow::Cow::Owned(crate::symbol::normalize_namespace(path, Some(root)))
}
None => std::borrow::Cow::Borrowed(path),
}
}
}
/// Compute the set of file namespaces that must be re-analysed when a
/// given set of callee [`FuncKey`]s have had their summaries refined.
///
@ -905,10 +993,16 @@ pub fn scc_spans_files(cg: &CallGraph, scc: &[NodeIndex]) -> bool {
iter.any(|n| cg.graph[*n].namespace.as_str() != first_ns)
}
/// Like [`scc_file_batches`] but annotates each batch with whether any
/// contributing SCC has mutual recursion (`len > 1`).
/// Map SCC topological order to an ordered sequence of file-path batches
/// annotated with whether any contributing SCC is mutually recursive
/// (`len > 1`) or cross-file.
///
/// Returns `(ordered_batches, orphan_files)`.
/// A file is placed in the earliest batch where any of its functions appear
/// (min topo index), so leaf callees become available before the callers
/// that depend on them.
///
/// Returns `(ordered_batches, orphan_files)`. Orphans are paths from
/// `all_files` that have no functions in the call graph.
pub fn scc_file_batches_with_metadata<'a>(
cg: &CallGraph,
analysis: &CallGraphAnalysis,
@ -989,8 +1083,8 @@ pub fn scc_file_batches_with_metadata<'a>(
///
/// Returns `(ordered_batches, orphan_files)` where orphan_files are paths
/// from `all_files` that have no functions in the call graph.
#[allow(dead_code)] // kept for tests; production callers use scc_file_batches_with_metadata
pub fn scc_file_batches<'a>(
#[cfg(test)]
pub(super) fn scc_file_batches<'a>(
cg: &CallGraph,
analysis: &CallGraphAnalysis,
all_files: &'a [PathBuf],
@ -1033,9 +1127,7 @@ pub fn scc_file_batches<'a>(
(batches, orphans)
}
// ─────────────────────────────────────────────────────────────────────────────
// Tests
// ─────────────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
@ -2798,4 +2890,127 @@ mod tests {
assert!(cg.unresolved_not_found.is_empty());
assert!(cg.unresolved_ambiguous.is_empty());
}
// ── callers_transitive + FileReachMap ───────────────────────────────
/// Three-hop chain across three files:
/// `routes.py::handle -> service.py::process -> helper.py::sink`
/// `callers_transitive(sink)` must return both `process` and `handle`.
/// `FileReachMap` must record `routes.py` and `service.py` as callers
/// of `helper.py`.
#[test]
fn callers_transitive_walks_multi_hop_chain() {
let handle = make_summary("handle", "routes.py", "python", 0, vec!["process"]);
let process = make_summary("process", "service.py", "python", 0, vec!["sink"]);
let sink = make_summary("sink", "helper.py", "python", 0, vec![]);
let gs = merge_summaries(vec![handle, process, sink], None);
let cg = build_call_graph(&gs, &[]);
let sink_key = FuncKey {
lang: Lang::Python,
namespace: "helper.py".into(),
name: "sink".into(),
arity: Some(0),
..Default::default()
};
let transitive = callers_transitive(&cg, &sink_key);
let caller_names: std::collections::HashSet<String> =
transitive.iter().map(|k| k.name.clone()).collect();
assert!(
caller_names.contains("process"),
"process should reach sink"
);
assert!(caller_names.contains("handle"), "handle should reach sink");
assert_eq!(transitive.len(), 2, "sink itself must be excluded");
let reach = FileReachMap::build(&cg);
assert!(reach.reaches("routes.py", "helper.py"));
assert!(reach.reaches("service.py", "helper.py"));
assert!(reach.reaches("helper.py", "helper.py"), "self-reach");
assert!(!reach.reaches("helper.py", "routes.py"));
}
#[test]
fn callers_transitive_empty_for_unknown_key() {
let leaf = make_summary("leaf", "a.py", "python", 0, vec![]);
let gs = merge_summaries(vec![leaf], None);
let cg = build_call_graph(&gs, &[]);
let ghost = FuncKey {
lang: Lang::Python,
namespace: "nowhere.py".into(),
name: "ghost".into(),
arity: Some(0),
..Default::default()
};
assert!(callers_transitive(&cg, &ghost).is_empty());
}
#[test]
fn file_reach_map_handles_disconnected_components() {
let a_caller = make_summary("a_caller", "a.py", "python", 0, vec!["a_sink"]);
let a_sink = make_summary("a_sink", "a.py", "python", 0, vec![]);
let b_caller = make_summary("b_caller", "b.py", "python", 0, vec!["b_sink"]);
let b_sink = make_summary("b_sink", "b.py", "python", 0, vec![]);
let gs = merge_summaries(vec![a_caller, a_sink, b_caller, b_sink], None);
let cg = build_call_graph(&gs, &[]);
let reach = FileReachMap::build(&cg);
assert!(reach.reaches("a.py", "a.py"));
assert!(reach.reaches("b.py", "b.py"));
// Disconnected: a.py does not reach b.py.
assert!(!reach.reaches("a.py", "b.py"));
assert!(!reach.reaches("b.py", "a.py"));
assert_eq!(reach.callee_ns_len(), 2);
}
/// `with_scan_root` normalises absolute host paths to the
/// project-relative POSIX form the map keys carry, so
/// `reaches("/abs/scan/routes.py", "/abs/scan/helper.py")` finds
/// the same entry as the project-relative
/// `reaches("routes.py", "helper.py")` call. Mirrors the
/// production wire-up in `src/commands/scan.rs`: the call-graph
/// uses project-relative namespaces while `Diag.path` (from
/// `src/ast.rs`) is the absolute walker path.
#[test]
fn file_reach_map_with_scan_root_normalises_absolute_paths() {
let handle = make_summary("handle", "routes.py", "python", 0, vec!["sink"]);
let sink = make_summary("sink", "helper.py", "python", 0, vec![]);
let gs = merge_summaries(vec![handle, sink], None);
let cg = build_call_graph(&gs, &[]);
let scan_root = std::path::Path::new("/abs/scan");
let reach = FileReachMap::build(&cg).with_scan_root(Some(scan_root));
// Mixed conventions: surface (project-relative) caller,
// Diag (absolute) callee. Pre-fix this returned false.
assert!(reach.reaches("routes.py", "/abs/scan/helper.py"));
// Both absolute: also resolves.
assert!(reach.reaches("/abs/scan/routes.py", "/abs/scan/helper.py"));
// Trailing-slash root works.
let reach_trail =
FileReachMap::build(&cg).with_scan_root(Some(std::path::Path::new("/abs/scan/")));
assert!(reach_trail.reaches("/abs/scan/routes.py", "/abs/scan/helper.py"));
// Both project-relative: still resolves (legacy behaviour).
assert!(reach.reaches("routes.py", "helper.py"));
// Path outside the root falls through normalize_namespace
// unchanged and does not collide with a project-relative key.
assert!(!reach.reaches("/other/root/routes.py", "/other/root/helper.py"));
}
/// `with_scan_root(None)` clears a previously set root and
/// restores strict project-relative lookup semantics.
#[test]
fn file_reach_map_with_scan_root_none_clears_root() {
let handle = make_summary("handle", "routes.py", "python", 0, vec!["sink"]);
let sink = make_summary("sink", "helper.py", "python", 0, vec![]);
let gs = merge_summaries(vec![handle, sink], None);
let cg = build_call_graph(&gs, &[]);
let reach: FileReachMap = FileReachMap::build(&cg)
.with_scan_root(Some(std::path::Path::new("/abs/scan")))
.with_scan_root::<&std::path::Path>(None);
// Absolute lookup no longer resolves once root is cleared.
assert!(!reach.reaches("/abs/scan/routes.py", "/abs/scan/helper.py"));
// Project-relative still works.
assert!(reach.reaches("routes.py", "helper.py"));
}
}

View file

@ -121,9 +121,7 @@ fn extract_case_literal_text<'a>(case: Node<'a>, lang: &str, code: &'a [u8]) ->
}
}
// -------------------------------------------------------------------------
// Exception-source detection for try/catch wiring
// -------------------------------------------------------------------------
/// Returns true if this CFG node can implicitly raise an exception (calls).
/// Explicit throws are collected separately via `throw_targets`.
@ -190,9 +188,7 @@ pub(super) fn extract_catch_param_name<'a>(
}
}
// -------------------------------------------------------------------------
// Ruby begin/rescue/ensure handler
// -------------------------------------------------------------------------
/// Builds CFG for Ruby's `begin`/`rescue`/`ensure` blocks (and `body_statement`
/// with inline rescue). Ruby's `begin` has no `body` field, the try-body
@ -442,9 +438,7 @@ pub(super) fn build_begin_rescue<'a>(
}
}
// -------------------------------------------------------------------------
// switch handler, multi-way dispatch with fallthrough
// -------------------------------------------------------------------------
/// True for AST kinds that wrap a single switch case body.
pub(super) fn is_switch_case_kind(kind: &str) -> bool {
@ -780,9 +774,7 @@ pub(super) fn build_switch<'a>(
exits
}
// -------------------------------------------------------------------------
// try/catch/finally handler
// -------------------------------------------------------------------------
#[allow(clippy::too_many_arguments)]
pub(super) fn build_try<'a>(

View file

@ -388,9 +388,7 @@ fn js_catch_no_param_no_synthetic() {
);
}
// ─────────────────────────────────────────────────────────────────
// Ruby begin/rescue/ensure tests
// ─────────────────────────────────────────────────────────────────
#[test]
fn ruby_begin_rescue_has_exception_edges() {
@ -540,9 +538,7 @@ fn ruby_multiple_rescue_clauses() {
}
}
// ─────────────────────────────────────────────────────────────────
// Short-circuit evaluation tests
// ─────────────────────────────────────────────────────────────────
/// Helper: collect all If nodes from the CFG.
fn if_nodes(cfg: &Cfg) -> Vec<NodeIndex> {
@ -2008,10 +2004,8 @@ fn local_summary_callees_have_distinct_ordinals() {
assert_ne!(ord0, ord1, "ordinals must differ across sites");
}
// ─────────────────────────────────────────────────────────────────────
// Anonymous function body naming via syntactic context
// (derive_anon_fn_name_from_context coverage)
// ─────────────────────────────────────────────────────────────────────
fn js_body_names(src: &[u8]) -> Vec<String> {
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
@ -2531,9 +2525,7 @@ fn pointer_disabled_skips_subscript_synthesis() {
});
}
// ─────────────────────────────────────────────────────────────────
// Gap-filling: switch / for / do-while / nested loops / re-throw
// ─────────────────────────────────────────────────────────────────
/// JS `switch` should produce one synthetic dispatch `If` node per
/// case (default excluded when at the tail), plus True edges into
@ -2908,12 +2900,10 @@ fn js_empty_function_body_well_formed() {
}
}
// ─────────────────────────────────────────────────────────────────────
// Loop CFG structure: every loop variant must produce a Loop header
// with at least one Back edge that targets that header. Without these
// invariants the SSA loop-induction-variable phi placement is wrong
// and the abstract-interp widening points are missed.
// ─────────────────────────────────────────────────────────────────────
fn loop_headers(cfg: &Cfg) -> Vec<NodeIndex> {
cfg.node_indices()
@ -3958,3 +3948,134 @@ fn rhs_array_literal_elements_recognise_per_language_shapes() {
// Non-array-shape node returns empty (defensive guard).
assert!(run("javascript", b"const x = tainted;\n", &["identifier"]).is_empty());
}
/// `CalleeSite.span` should carry the 1-based (line, col) of each call's
/// node span so downstream consumers (surface map, datastore/external
/// detectors) can render real coordinates instead of `line: 0`.
#[test]
fn callee_site_span_carries_line_and_column() {
// Three calls on three different lines. The leading newline puts
// line 1 at the blank line; `helper(x, y);` is on line 3, etc.
let src = b"
function outer(obj, x, y) {
helper(x, y);
obj.method(x);
}
";
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
let (_key, outer) = file_cfg
.summaries
.iter()
.find(|(k, _)| k.name == "outer")
.expect("outer summary should exist");
let helper_site = outer
.callees
.iter()
.find(|c| c.name == "helper")
.expect("helper call should be recorded");
let (line, col) = helper_site.span.expect("span populated at CFG-build time");
assert_eq!(line, 3, "helper(...) sits on the 3rd source line");
assert!(col >= 5, "indented 4 spaces — column is 1-based and > 4");
let method_site = outer
.callees
.iter()
.find(|c| c.name.ends_with("method"))
.expect("method call should be recorded");
let (mline, _) = method_site.span.expect("method span populated");
assert_eq!(mline, 4, "obj.method(x) on line 4");
}
// Constant-branch fold: CondArith capture + evaluation
/// `CondArith::eval`/`eval_bool` must fold the two OWASP-Benchmark
/// arithmetic guard shapes to a definite boolean, using integer
/// (truncating) division, and must return `None` — never a wrong fold —
/// for any undefined operation or unresolved variable.
#[test]
fn cond_arith_eval_is_sound() {
use crate::cfg::{BinOp, CondArith, CondVal};
let lit = |n| Box::new(CondArith::Lit(n));
let var = |s: &str| Box::new(CondArith::Var(s.to_string()));
let bin = |op, l, r| Box::new(CondArith::Bin(op, l, r));
// num = 86 resolver.
let r86 = |name: &str| if name == "num" { Some(86) } else { None };
// (7*42) - num > 200 → 208 > 200 → true.
let shape1 = CondArith::Bin(
BinOp::Gt,
bin(BinOp::Sub, bin(BinOp::Mul, lit(7), lit(42)), var("num")),
lit(200),
);
assert_eq!(shape1.eval_bool(&r86), Some(true));
// (500/42) + num > 200 → 11 + 196 = 207 > 200 → true (integer div).
let r196 = |name: &str| if name == "num" { Some(196) } else { None };
let shape2 = CondArith::Bin(
BinOp::Gt,
bin(BinOp::Add, bin(BinOp::Div, lit(500), lit(42)), var("num")),
lit(200),
);
assert_eq!(shape2.eval_bool(&r196), Some(true));
// Integer division truncates toward zero (500/42 == 11, not ~11.9).
assert_eq!(
CondArith::Bin(BinOp::Div, lit(500), lit(42)).eval(&r86),
Some(CondVal::Int(11))
);
// Unresolved variable → None (no prune).
let none = |_: &str| None;
assert_eq!(shape1.eval_bool(&none), None);
// Division / modulo by zero → None (never a wrong fold).
assert_eq!(CondArith::Bin(BinOp::Div, lit(1), lit(0)).eval(&r86), None);
assert_eq!(CondArith::Bin(BinOp::Mod, lit(1), lit(0)).eval(&r86), None);
// Arithmetic overflow → None.
assert_eq!(
CondArith::Bin(BinOp::Mul, lit(i64::MAX), lit(2)).eval(&r86),
None
);
// Bare integer at the top level is not a branch condition → eval_bool None.
assert_eq!(CondArith::Lit(1).eval_bool(&r86), None);
// Comparing a boolean sub-result as an integer operand → None.
let cmp = bin(BinOp::Gt, lit(2), lit(1)); // yields Bool
assert_eq!(CondArith::Bin(BinOp::Add, cmp, lit(1)).eval(&r86), None);
}
/// The CFG builder must capture a pure integer-arithmetic comparison as a
/// `CondArith` on the `If` node, and must refuse (None) any condition that
/// touches a call / field access / string.
#[test]
fn build_cond_arith_captures_pure_int_comparison() {
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
let src = br#"
class C {
void m(int num, String s) {
if ((7 * 42) - num > 200) { foo(); }
if (s.length() > 200) { bar(); }
}
}
"#;
let (cfg, _entry) = parse_and_build(src, "java", ts_lang);
let ifs = if_nodes(&cfg);
let arith: Vec<_> = ifs
.iter()
.filter_map(|&n| cfg[n].cond_arith.clone())
.collect();
// Exactly one If condition is a pure int-arith comparison; the
// `s.length() > 200` one must NOT be captured (it contains a call).
assert_eq!(
arith.len(),
1,
"only the pure int comparison should yield a CondArith, got {arith:?}"
);
// It folds to a definite bool once `num` is known constant.
let r = |name: &str| if name == "num" { Some(86) } else { None };
assert_eq!(arith[0].eval_bool(&r), Some(true));
}

View file

@ -1,8 +1,8 @@
use super::helpers::first_member_label;
use super::{
AstMeta, Cfg, EdgeKind, MAX_COND_VARS, MAX_CONDITION_TEXT_LEN, NodeInfo, StmtKind,
collect_idents, connect_all, detect_eq_with_const, detect_negation, has_call_descendant,
member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
build_cond_arith, collect_idents, connect_all, detect_eq_with_const, detect_negation,
has_call_descendant, member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
};
use crate::labels::{DataLabel, LangAnalysisRules, classify};
use crate::utils::snippet::truncate_at_char_boundary;
@ -10,9 +10,7 @@ use petgraph::graph::NodeIndex;
use smallvec::SmallVec;
use tree_sitter::Node;
// -------------------------------------------------------------------------
// Short-circuit boolean operator helpers
// -------------------------------------------------------------------------
#[derive(Debug, Clone, Copy, PartialEq)]
pub(super) enum BoolOp {
@ -225,6 +223,13 @@ pub(super) fn build_ternary_diamond<'a>(
// taint engine's equality-narrowing fires for `x === 'literal' ? …`.
let cond_if = push_condition_node(g, cond_ast, lang, code, enclosing_func);
g[cond_if].is_eq_with_const = detect_eq_with_const(cond_ast, lang);
// Capture the pure int-arith + comparison tree so `fold_constant_branches`
// can prune a dead constant-condition arm of the ternary (e.g. Java
// `(7*18)+num > 200 ? "const" : param` with `num` a known int constant),
// exactly as it does for the if-form. `build_cond_arith` is conservative
// (returns None for any call/field/string/`&&`/`||`/`!` shape) so this is
// sound for every language the diamond fires on.
g[cond_if].cond_arith = build_cond_arith(cond_ast, lang, code, 0);
connect_all(g, preds, cond_if, pred_edge);
// 2. Branches. Each branch produces its own exit frontier (≥ 1 node) ,

View file

@ -90,9 +90,7 @@ fn collect_ts_type_alias_local_collections(root: Node<'_>, code: &[u8], out: &mu
});
}
// ─────────────────────────────────────────────────────────────────────
// Java
// ─────────────────────────────────────────────────────────────────────
/// Walk the AST for `class_declaration` nodes whose body contains
/// `field_declaration`s with classifiable types. Only class-level
@ -144,9 +142,7 @@ fn collect_java(root: Node<'_>, code: &[u8], out: &mut HashMap<String, DtoFields
});
}
// ─────────────────────────────────────────────────────────────────────
// TypeScript / JavaScript
// ─────────────────────────────────────────────────────────────────────
/// Walk for `interface_declaration` and `class_declaration` nodes.
/// Interfaces with `property_signature` children and classes with
@ -224,9 +220,7 @@ fn extract_ts_property<'a>(node: Node<'a>, code: &'a [u8]) -> Option<(String, Ty
Some((field_name, kind))
}
// ─────────────────────────────────────────────────────────────────────
// Rust
// ─────────────────────────────────────────────────────────────────────
/// Walk for `struct_item` nodes whose body lists named fields.
fn collect_rust(root: Node<'_>, code: &[u8], out: &mut HashMap<String, DtoFields>) {
@ -276,9 +270,7 @@ fn collect_rust(root: Node<'_>, code: &[u8], out: &mut HashMap<String, DtoFields
});
}
// ─────────────────────────────────────────────────────────────────────
// Python (Pydantic)
// ─────────────────────────────────────────────────────────────────────
/// Walk for `class_definition` nodes whose superclass list contains
/// `BaseModel` / `pydantic.BaseModel`. Each `expression_statement` in
@ -360,9 +352,7 @@ fn python_inherits_basemodel<'a>(class_node: Node<'a>, code: &'a [u8]) -> bool {
false
}
// ─────────────────────────────────────────────────────────────────────
// Walk helper
// ─────────────────────────────────────────────────────────────────────
fn walk<'a, F: FnMut(Node<'a>)>(node: Node<'a>, f: &mut F) {
f(node);

View file

@ -4,9 +4,7 @@ use crate::labels::{DataLabel, Kind, classify, lookup};
use smallvec::SmallVec;
use tree_sitter::Node;
// -------------------------------------------------------------------------
// Utility helpers
// -------------------------------------------------------------------------
/// Return the text of a node.
#[inline]
@ -1018,10 +1016,10 @@ pub(crate) fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
/// AST kind names for subscript / index expressions
/// across the languages whose container-element flow we model.
///
/// JS/TS use `subscript_expression`; Python uses `subscript`; Go uses
/// `index_expression`. Other languages either lower indexing through
/// method calls (Rust slice indexing) or are out of scope for the
/// initial W5 rollout (Java/Ruby/PHP/C/C++).
/// JS/TS and C/C++ use `subscript_expression`; Python uses `subscript`;
/// Go uses `index_expression`. Other languages either lower indexing
/// through method calls (Rust slice indexing) or are out of scope for
/// the initial W5 rollout (Java/Ruby/PHP).
#[inline]
pub(crate) fn is_subscript_kind(kind: &str) -> bool {
matches!(
@ -1086,7 +1084,8 @@ pub(crate) fn subscript_components<'a>(n: Node<'a>, code: &'a [u8]) -> Option<(S
return None;
}
let arr_text = text_of(arr, code)?;
// PHP-style `$x` strip not needed here, Go/JS/Python don't use it.
// PHP-style `$x` strip not needed here; the supported languages
// don't use it for local array identifiers.
let idx_text = text_of(idx, code)?;
Some((arr_text, idx_text))
}

View file

@ -54,9 +54,7 @@ pub(crate) fn collect_hierarchy_edges(
acc
}
// ─────────────────────────────────────────────────────────────────────
// Java
// ─────────────────────────────────────────────────────────────────────
fn collect_java<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
walk(root, &mut |node| {
@ -146,9 +144,7 @@ fn type_identifier_text(n: Node<'_>, code: &[u8]) -> Option<String> {
}
}
// ─────────────────────────────────────────────────────────────────────
// Rust
// ─────────────────────────────────────────────────────────────────────
/// Walk for `impl_item` nodes and emit edges from the concrete type to
/// the trait being implemented. Inherent impls (`impl Foo {}`) emit
@ -199,9 +195,7 @@ fn rust_path_leaf(n: Node<'_>, code: &[u8]) -> Option<String> {
}
}
// ─────────────────────────────────────────────────────────────────────
// TypeScript / JavaScript
// ─────────────────────────────────────────────────────────────────────
fn collect_ts<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
walk(root, &mut |node| {
@ -268,9 +262,7 @@ fn collect_ts_heritage<F: FnMut(String, String)>(
}
}
// ─────────────────────────────────────────────────────────────────────
// Python
// ─────────────────────────────────────────────────────────────────────
fn collect_python<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
walk(root, &mut |node| {
@ -314,9 +306,7 @@ fn python_base_text(n: Node<'_>, code: &[u8]) -> Option<String> {
}
}
// ─────────────────────────────────────────────────────────────────────
// Ruby
// ─────────────────────────────────────────────────────────────────────
fn collect_ruby<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
walk(root, &mut |node| {
@ -345,9 +335,7 @@ fn collect_ruby<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mu
});
}
// ─────────────────────────────────────────────────────────────────────
// PHP
// ─────────────────────────────────────────────────────────────────────
fn collect_php<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
walk(root, &mut |node| {
@ -382,9 +370,7 @@ fn collect_php<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut
});
}
// ─────────────────────────────────────────────────────────────────────
// C++
// ─────────────────────────────────────────────────────────────────────
fn collect_cpp<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
walk(root, &mut |node| {
@ -419,9 +405,7 @@ fn collect_cpp<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut
});
}
// ─────────────────────────────────────────────────────────────────────
// Helpers
// ─────────────────────────────────────────────────────────────────────
fn walk<'a, F: FnMut(Node<'a>)>(node: Node<'a>, f: &mut F) {
f(node);

View file

@ -135,9 +135,7 @@ fn map_fs_module_to_promises(module: &str) -> Option<String> {
}
}
// -------------------------------------------------------------------------
// Import binding extraction
// -------------------------------------------------------------------------
/// Walk the top-level AST nodes and collect import alias bindings:
///
@ -615,6 +613,4 @@ fn scoped_identifier_matches(node: Node, code: &[u8], crate_prefix: &str, leaf:
(Some(p), Some(l)) if p == crate_prefix && l == leaf)
}
// -------------------------------------------------------------------------
// === PUBLIC ENTRY POINT =================================================
// -------------------------------------------------------------------------

View file

@ -1,3 +1,9 @@
//! Literal and constant-expression extraction from tree-sitter AST nodes.
//!
//! Parses integer and string literals, folds constant binary ops, and derives
//! template/string prefixes and quote stripping for CFG construction and
//! const propagation.
use super::conditions::unwrap_parens;
use super::helpers::{collect_array_pattern_bindings_indexed, collect_rhs_array_literal_elements};
use super::{
@ -1198,10 +1204,22 @@ pub(super) fn is_syntactic_literal(node: Node, code: &[u8]) -> bool {
| "string_content"
| "string_fragment" => !has_string_interpolation(node),
// Numbers
"integer" | "integer_literal" | "int_literal" | "float" | "float_literal" | "number" => {
true
}
// Numbers. Java's grammar uses radix-tagged kinds
// (`decimal_integer_literal`, `hex_integer_literal`, …) rather than a
// bare `integer`, so `int num = 86;` would otherwise miss this arm and
// lower to `Const(None)` (Varying) instead of `Const("86")`.
"integer"
| "integer_literal"
| "int_literal"
| "float"
| "float_literal"
| "number"
| "decimal_integer_literal"
| "hex_integer_literal"
| "octal_integer_literal"
| "binary_integer_literal"
| "decimal_floating_point_literal"
| "hex_floating_point_literal" => true,
// Booleans / null / nil / none
"true" | "false" | "null" | "nil" | "none" | "null_literal" | "boolean"
@ -2544,6 +2562,37 @@ pub(super) fn def_use(
}
}
}
// Java `enhanced_for_statement` binds the loop variable on the
// `name` field and the iterable on the `value` field; Ruby's
// `for x in coll` uses `pattern`/`value`. Neither uses the
// JS/Python `left`/`right` convention, so without this mapping
// the loop binding was never recorded as a define and taint on
// the iterable could not reach the loop variable (OWASP's
// dominant `for (Cookie c : req.getCookies())` shape).
if left.is_none() && right.is_none() {
if let Some(v) = ast.child_by_field_name("value") {
left = ast
.child_by_field_name("name")
.or_else(|| ast.child_by_field_name("pattern"));
right = Some(v);
}
}
// PHP `foreach ($coll as $v)` / `foreach ($coll as $k => $v)`:
// the iterable and binding are unnamed children separated by the
// `as` keyword (only `body` is a named field). Map the binding
// onto `left` and the iterable onto `right` so the shared
// define/use logic below records the loop variable.
if left.is_none() && right.is_none() && ast.kind() == "foreach_statement" {
let mut cursor = ast.walk();
let kids: Vec<Node> = ast.children(&mut cursor).collect();
if let Some(as_pos) = kids.iter().position(|c| c.kind() == "as") {
right = kids[..as_pos].iter().rev().find(|c| c.is_named()).copied();
left = kids[as_pos + 1..]
.iter()
.find(|c| c.is_named() && lookup(lang, c.kind()) != Kind::Block)
.copied();
}
}
if left.is_none() && right.is_none() {
// C-style for, defer to default ident collection.
let mut idents = Vec::new();

View file

@ -12,11 +12,7 @@
//! `export_summaries` converts in-graph [`LocalFuncSummary`] values to
//! the serializable [`crate::summary::FuncSummary`] form.
#![allow(
clippy::collapsible_if,
clippy::let_and_return,
clippy::unnecessary_map_or
)]
#![allow(clippy::let_and_return, clippy::unnecessary_map_or)]
use petgraph::algo::dominators::{Dominators, simple_fast};
use petgraph::prelude::*;
@ -431,6 +427,131 @@ pub enum BinOp {
GtEq,
}
impl BinOp {
/// True for the six comparison operators (result is a boolean 0/1).
pub fn is_comparison(self) -> bool {
matches!(
self,
BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq
)
}
}
/// A branch condition captured as a pure integer-arithmetic + comparison
/// expression tree at CFG-build time (where the real tree-sitter AST is
/// available, so operator precedence and parentheses are correct by
/// construction — no text re-parsing downstream).
///
/// Built only when *every* leaf is an integer literal or a plain identifier
/// and *every* interior node is an arithmetic / comparison / bitwise operator,
/// a unary `-`, or a parenthesis. Any call, field access, string, container,
/// or compound-boolean (`&&` / `||`) subtree makes the builder return `None`
/// for the whole condition. Identifiers are stored by name and resolved to
/// their constant SSA value at fold time
/// ([`crate::ssa::const_prop::fold_constant_branches`]); the actual numeric
/// evaluation is shared in [`CondArith::eval`].
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum CondArith {
/// Integer literal.
Lit(i64),
/// Identifier — resolved to a constant integer at fold time, else unknown.
Var(String),
/// Unary integer negation: `-x`.
Neg(Box<CondArith>),
/// Binary arithmetic / bitwise / comparison.
Bin(BinOp, Box<CondArith>, Box<CondArith>),
}
/// Result of folding a [`CondArith`] against a constant environment.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CondVal {
Int(i64),
Bool(bool),
}
impl CondArith {
/// Evaluate against a variable→constant-integer resolver. Returns `None`
/// the moment anything is non-constant or an operation is undefined
/// (division/modulo by zero, arithmetic overflow, type mismatch), so a
/// caller can only ever prune on a *definite* result. All integer
/// arithmetic is checked; overflow yields `None` rather than a wrapped
/// value, which keeps the fold sound across the i32/i64 gap.
pub fn eval(&self, resolve: &impl Fn(&str) -> Option<i64>) -> Option<CondVal> {
match self {
CondArith::Lit(n) => Some(CondVal::Int(*n)),
CondArith::Var(name) => resolve(name).map(CondVal::Int),
CondArith::Neg(inner) => match inner.eval(resolve)? {
CondVal::Int(n) => n.checked_neg().map(CondVal::Int),
CondVal::Bool(_) => None,
},
CondArith::Bin(op, l, r) => {
let lhs = match l.eval(resolve)? {
CondVal::Int(n) => n,
CondVal::Bool(_) => return None,
};
let rhs = match r.eval(resolve)? {
CondVal::Int(n) => n,
CondVal::Bool(_) => return None,
};
let arith = |v: Option<i64>| v.map(CondVal::Int);
match op {
BinOp::Add => arith(lhs.checked_add(rhs)),
BinOp::Sub => arith(lhs.checked_sub(rhs)),
BinOp::Mul => arith(lhs.checked_mul(rhs)),
// Java/Rust integer division and modulo both truncate
// toward zero; `checked_*` rejects div-by-zero and
// i64::MIN / -1 overflow.
BinOp::Div => arith(lhs.checked_div(rhs)),
BinOp::Mod => arith(lhs.checked_rem(rhs)),
BinOp::BitAnd => arith(Some(lhs & rhs)),
BinOp::BitOr => arith(Some(lhs | rhs)),
BinOp::BitXor => arith(Some(lhs ^ rhs)),
BinOp::LeftShift => u32::try_from(rhs)
.ok()
.and_then(|s| lhs.checked_shl(s))
.map(CondVal::Int),
BinOp::RightShift => u32::try_from(rhs)
.ok()
.and_then(|s| lhs.checked_shr(s))
.map(CondVal::Int),
BinOp::Eq => Some(CondVal::Bool(lhs == rhs)),
BinOp::NotEq => Some(CondVal::Bool(lhs != rhs)),
BinOp::Lt => Some(CondVal::Bool(lhs < rhs)),
BinOp::LtEq => Some(CondVal::Bool(lhs <= rhs)),
BinOp::Gt => Some(CondVal::Bool(lhs > rhs)),
BinOp::GtEq => Some(CondVal::Bool(lhs >= rhs)),
}
}
}
}
/// Evaluate to a definite boolean, or `None`. The top-level node must be a
/// comparison (a bare integer is not a branch condition we fold).
pub fn eval_bool(&self, resolve: &impl Fn(&str) -> Option<i64>) -> Option<bool> {
match self.eval(resolve)? {
CondVal::Bool(b) => Some(b),
CondVal::Int(_) => None,
}
}
/// Collect every identifier name referenced by the tree.
pub fn collect_vars(&self, out: &mut Vec<String>) {
match self {
CondArith::Lit(_) => {}
CondArith::Var(name) => {
if !out.iter().any(|v| v == name) {
out.push(name.clone());
}
}
CondArith::Neg(inner) => inner.collect_vars(out),
CondArith::Bin(_, l, r) => {
l.collect_vars(out);
r.collect_vars(out);
}
}
}
}
/// Call-related metadata for CFG nodes.
#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct CallMeta {
@ -662,6 +783,17 @@ pub struct NodeInfo {
pub condition_vars: Vec<String>,
/// For If nodes: whether the condition has a leading negation (`!` / `not`).
pub condition_negated: bool,
/// For If / conditional (ternary) nodes: the condition as a pure
/// integer-arithmetic + comparison expression tree, when the whole
/// condition is built only from integer literals, identifiers, arithmetic
/// / comparison operators, and parentheses. `None` for any condition that
/// touches a call, field access, string, compound boolean (`&&`/`||`), or
/// any shape this evaluator cannot prove constant. Consumed by
/// [`crate::ssa::const_prop::fold_constant_branches`] to prune branches
/// whose condition folds to a definite boolean once its variables are
/// resolved to constants — closing the synthetic "dead branch keeps the
/// tainted phi operand alive" false positive without any text re-parsing.
pub cond_arith: Option<CondArith>,
/// True when this is a Call node whose argument list contains only
/// syntactic literal values (strings, numbers, booleans, null/nil,
/// arrays/lists/tuples of literals). Also true for zero-argument calls
@ -791,10 +923,7 @@ impl NodeInfo {
/// lose information.
#[derive(Debug, Clone)]
pub struct LocalFuncSummary {
#[allow(dead_code)] // used for future intra-file graph traversal
pub entry: NodeIndex,
#[allow(dead_code)] // used for future intra-file graph traversal
pub exit: NodeIndex,
pub source_caps: Cap,
pub sanitizer_caps: Cap,
pub sink_caps: Cap,
@ -822,9 +951,7 @@ pub struct LocalFuncSummary {
pub type Cfg = Graph<NodeInfo, EdgeKind>;
pub type FuncSummaries = HashMap<FuncKey, LocalFuncSummary>;
// -------------------------------------------------------------------------
// Per-body CFG types
// -------------------------------------------------------------------------
/// Opaque identifier for an executable body within a file.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
@ -901,7 +1028,6 @@ pub struct BodyCfg {
pub meta: BodyMeta,
pub graph: Cfg,
pub entry: NodeIndex,
pub exit: NodeIndex,
}
/// A single import alias binding: local alias → original exported name + module.
@ -1069,7 +1195,7 @@ fn extract_condition_raw<'a>(
ast: Node<'a>,
lang: &str,
code: &'a [u8],
) -> (Option<String>, Vec<String>, bool) {
) -> (Option<String>, Vec<String>, bool, Option<CondArith>) {
// 1. Find the condition subtree.
let cond_node = ast.child_by_field_name("condition").or_else(|| {
// Rust `if_expression` uses positional children: the condition is
@ -1089,7 +1215,7 @@ fn extract_condition_raw<'a>(
});
let Some(cond) = cond_node else {
return (None, Vec::new(), false);
return (None, Vec::new(), false, None);
};
// 2. Detect leading negation (`!expr`, `not expr`, Ruby `unless`).
@ -1107,7 +1233,20 @@ fn extract_condition_raw<'a>(
let text = text_of(cond, code)
.map(|t| truncate_at_char_boundary(&t, MAX_CONDITION_TEXT_LEN).to_string());
(text, vars, negated)
// 5. Capture the pure integer-arithmetic + comparison tree (for constant
// branch folding). Built from the FULL condition node `cond` (not the
// negation-stripped `inner`) so the folded boolean matches the
// Branch terminator's `true_blk = cond-true` semantics directly. Ruby
// `unless` swaps the True/False edges in the CFG builder (lines
// ~5029), so the branch polarity would be inverted — skip it to stay
// sound (`unless` with a constant arithmetic guard is negligible).
let cond_arith = if ast.kind() == "unless" {
None
} else {
build_cond_arith(cond, lang, code, 0)
};
(text, vars, negated, cond_arith)
}
/// Detect leading negation and return the inner expression.
@ -1245,6 +1384,174 @@ fn extract_bin_op(ast: Node, lang: &str) -> Option<BinOp> {
None
}
/// Parse an integer literal node to its `i64` value, honouring hex / octal /
/// binary radix prefixes and Java/Rust digit separators (`1_000`). Returns
/// `None` for floats, non-literals, or values that overflow `i64`.
fn parse_int_literal(node: Node, code: &[u8]) -> Option<i64> {
let kind = node.kind();
let is_int = matches!(
kind,
"integer"
| "integer_literal"
| "int_literal"
| "number"
| "number_literal"
| "decimal_integer_literal"
| "hex_integer_literal"
| "octal_integer_literal"
| "binary_integer_literal"
);
if !is_int {
return None;
}
let raw = std::str::from_utf8(&code[node.byte_range()]).ok()?.trim();
// Strip Java long suffix and digit separators.
let cleaned: String = raw
.trim_end_matches(['l', 'L'])
.chars()
.filter(|c| *c != '_')
.collect();
if let Ok(v) = cleaned.parse::<i64>() {
return Some(v);
}
if let Some(h) = cleaned
.strip_prefix("0x")
.or_else(|| cleaned.strip_prefix("0X"))
{
return i64::from_str_radix(h, 16).ok();
}
if let Some(o) = cleaned
.strip_prefix("0o")
.or_else(|| cleaned.strip_prefix("0O"))
{
return i64::from_str_radix(o, 8).ok();
}
if let Some(b) = cleaned
.strip_prefix("0b")
.or_else(|| cleaned.strip_prefix("0B"))
{
return i64::from_str_radix(b, 2).ok();
}
None
}
/// Map the operator token of a binary expression node to a [`BinOp`].
/// Scans for the single anonymous operator child (operands are named).
/// Returns `None` for boolean operators (`&&` / `||`), assignment, or any
/// token not in the arithmetic / bitwise / comparison set — those make the
/// enclosing [`CondArith`] build bail.
fn binary_op_token(node: Node) -> Option<BinOp> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.is_named() {
continue;
}
return match child.kind() {
"+" => Some(BinOp::Add),
"-" => Some(BinOp::Sub),
"*" => Some(BinOp::Mul),
"/" => Some(BinOp::Div),
"%" => Some(BinOp::Mod),
"&" => Some(BinOp::BitAnd),
"|" => Some(BinOp::BitOr),
"^" => Some(BinOp::BitXor),
"<<" => Some(BinOp::LeftShift),
">>" => Some(BinOp::RightShift),
"==" | "===" => Some(BinOp::Eq),
"!=" | "!==" => Some(BinOp::NotEq),
"<" => Some(BinOp::Lt),
"<=" => Some(BinOp::LtEq),
">" => Some(BinOp::Gt),
">=" => Some(BinOp::GtEq),
_ => None,
};
}
None
}
/// Build a [`CondArith`] tree from a condition AST subtree, or `None` if the
/// condition is not a pure integer-arithmetic + comparison expression. Uses
/// the real tree-sitter node so operator precedence and parentheses are
/// already encoded in the tree shape — no text parsing. Conservative by
/// construction: any unrecognised node kind (call, field access, string,
/// boolean `&&`/`||`, unary `!`) returns `None`, which disables folding for
/// that branch (never a wrong fold). Depth-bounded to guard against
/// pathological nesting.
pub(super) fn build_cond_arith(
node: Node,
lang: &str,
code: &[u8],
depth: u32,
) -> Option<CondArith> {
if depth > 64 {
return None;
}
let kind = node.kind();
// Unwrap parentheses (transparent to value).
if matches!(
kind,
"parenthesized_expression" | "parenthesized" | "parenthesized_statement"
) {
let inner = node.named_child(0)?;
return build_cond_arith(inner, lang, code, depth + 1);
}
if let Some(n) = parse_int_literal(node, code) {
return Some(CondArith::Lit(n));
}
// Bare identifier (reject dotted paths / field access — those are not
// captured here; only a plain local whose const value we can resolve).
if matches!(kind, "identifier" | "simple_identifier") {
let name = text_of(node, code)?;
if !name.is_empty()
&& name
.chars()
.all(|c| c.is_alphanumeric() || c == '_' || c == '$')
{
return Some(CondArith::Var(name));
}
return None;
}
// Unary `-` only (boolean `!` / `not` is intentionally unsupported: its
// operand would be a boolean, which `CondArith::eval` rejects, so folding
// a negated condition is left to the conservative `None` path).
if matches!(
kind,
"unary_expression" | "unary_operator" | "prefix_unary_expression" | "unary"
) {
let operand = node.named_child(0)?;
let mut cursor = node.walk();
let is_neg = node
.children(&mut cursor)
.any(|c| !c.is_named() && c.kind() == "-");
if is_neg {
return Some(CondArith::Neg(Box::new(build_cond_arith(
operand,
lang,
code,
depth + 1,
)?)));
}
return None;
}
// Binary arithmetic / comparison: exactly two operands + one operator.
if is_binary_expr_kind(kind, lang) {
if node.named_child_count() != 2 {
return None; // chained comparison (Python `a < b < c`) etc.
}
let op = binary_op_token(node)?;
let lhs = build_cond_arith(node.named_child(0)?, lang, code, depth + 1)?;
let rhs = build_cond_arith(node.named_child(1)?, lang, code, depth + 1)?;
return Some(CondArith::Bin(op, Box::new(lhs), Box::new(rhs)));
}
None
}
/// Find the RHS value node of an assignment-like AST node (variable declarator,
/// lexical declaration, assignment expression). Used by helpers that need to
/// inspect what an identifier is being initialized to.
@ -2071,6 +2378,32 @@ fn is_binary_expr_kind(kind: &str, lang: &str) -> bool {
}
}
/// Classification text for a for-each loop's iterable expression.
///
/// Subscript / index iterables (`$_GET['x']`, `params[:list]`, `arr[i]`)
/// classify on their **base object**: taint sources are keyed on the base
/// name (`$_GET`, `params`), and the trailing index would otherwise break
/// the word-boundary suffix match in `classify`. Non-subscript iterables
/// (method calls, member chains, bare identifiers) use their full text.
fn iterable_label_text(iter: Node, code: &[u8]) -> Option<String> {
if matches!(
iter.kind(),
"subscript_expression" | "subscript" | "index_expression" | "element_reference"
) {
let base = iter
.child_by_field_name("object")
.or_else(|| iter.child_by_field_name("operand"))
.or_else(|| iter.child_by_field_name("value"))
.or_else(|| iter.child(0));
if let Some(b) = base
&& let Some(t) = text_of(b, code)
{
return Some(t);
}
}
text_of(iter, code)
}
/// Create a node in one short borrow and optionally attach a taint label.
#[allow(clippy::too_many_arguments)]
pub(super) fn push_node<'a>(
@ -2212,6 +2545,51 @@ pub(super) fn push_node<'a>(
text = iter_text;
}
// Java `for (T x : iter)`: tree-sitter-java emits `enhanced_for_statement`
// with the iterable on the `value` field. Classify against the iterable
// text so a source-returning call (`req.getCookies()`,
// `req.getParameterValues(..)`) lights up a Source on the loop node and
// the loop binding inherits its taint — the same loop-binding-inherits-
// iterator-taint contract the JS/Python rewrites above provide. The
// loop variable itself is recorded as a define by `def_use`'s Kind::For
// arm (via the `name`/`value` mapping), so the Source-labeled loop node
// taints the binding directly.
if lang == "java"
&& ast.kind() == "enhanced_for_statement"
&& let Some(value) = ast.child_by_field_name("value")
&& let Some(iter_text) = iterable_label_text(value, code)
{
text = iter_text;
}
// PHP `foreach ($iter as $v)` / `foreach ($iter as $k => $v)`: the
// iterable is the named child immediately preceding the `as` keyword
// (only `body` is a named field). Classify against the iterable text so
// a superglobal/source iterable (`$_GET[..]`, `$_POST[..]`) taints the
// loop binding, matching the JS/Python/Java rewrites.
if lang == "php" && ast.kind() == "foreach_statement" {
let mut cursor = ast.walk();
let kids: Vec<Node> = ast.children(&mut cursor).collect();
if let Some(as_pos) = kids.iter().position(|c| c.kind() == "as")
&& let Some(iter_node) = kids[..as_pos].iter().rev().find(|c| c.is_named()).copied()
&& let Some(iter_text) = iterable_label_text(iter_node, code)
{
text = iter_text;
}
}
// Ruby `for x in coll`: tree-sitter-ruby's `for` node carries the
// iterable on the `value` field. (The idiomatic `coll.each { |x| }`
// form is a method call with a block and is handled by the call/block
// machinery, not here.)
if lang == "ruby"
&& ast.kind() == "for"
&& let Some(value) = ast.child_by_field_name("value")
&& let Some(iter_text) = iterable_label_text(value, code)
{
text = iter_text;
}
// If this is a declaration/expression wrapper or an assignment that
// *contains* a call, prefer the first inner call identifier instead of
// the whole line. Track the inner call's byte span so we can populate
@ -2511,6 +2889,23 @@ pub(super) fn push_node<'a>(
}
}
// Conditions can contain source/sink calls whose argument side effects are
// load-bearing for taint, e.g. C `if (!fgets(buf, n, stdin)) return;`.
// Classify the condition call so output-parameter sources still lower as
// SSA calls while the CFG node keeps its branch shape.
if labels.is_empty()
&& matches!(lookup(lang, ast.kind()), Kind::If | Kind::While)
&& let Some(cond) = ast.child_by_field_name("condition")
&& let Some((ident, ident_span)) = first_call_ident_with_span(cond, lang, code)
&& let Some(l) = classify(lang, &ident, extra)
{
labels.push(l);
text = ident;
if inner_text_span.is_none() {
inner_text_span = Some(ident_span);
}
}
// For `if let` / `while let` patterns: try to classify the value expression
// in the let-condition as a source/sink. E.g. `if let Ok(cmd) = env::var("CMD")`
// should recognise `env::var` as a taint source and label this node accordingly.
@ -3147,11 +3542,12 @@ pub(super) fn push_node<'a>(
};
// Extract condition metadata for If nodes.
let (condition_text, condition_vars, condition_negated) = if kind == StmtKind::If {
extract_condition_raw(ast, lang, code)
} else {
(None, Vec::new(), false)
};
let (condition_text, condition_vars, condition_negated, cond_arith) =
if matches!(lookup(lang, ast.kind()), Kind::If) {
extract_condition_raw(ast, lang, code)
} else {
(None, Vec::new(), false, None)
};
// Extract per-argument identifiers for Call nodes.
// Also extract for gated-sink nodes so payload-arg filtering works.
@ -3427,6 +3823,7 @@ pub(super) fn push_node<'a>(
condition_text,
condition_vars,
condition_negated,
cond_arith,
all_args_literal,
catch_param: false,
arg_callees,
@ -4677,10 +5074,8 @@ fn apply_arg_source_bindings(
}
}
// -------------------------------------------------------------------------
// The recursive *workhorse* that converts an AST node into a CFG slice.
// Returns the set of *exit* nodes that need to be wired further.
// -------------------------------------------------------------------------
#[allow(clippy::too_many_arguments)]
pub(super) fn build_sub<'a>(
ast: Node<'a>,
@ -4701,9 +5096,7 @@ pub(super) fn build_sub<'a>(
current_body_id: BodyId,
) -> Vec<NodeIndex> {
match lookup(lang, ast.kind()) {
// ─────────────────────────────────────────────────────────────────
// IF/ELSE: two branches that remerge afterwards
// ─────────────────────────────────────────────────────────────────
Kind::If => {
// Some grammars (Go `if init; cond {}`, sibling C-style forms)
// attach an init / "initializer" subtree that runs before the
@ -4985,9 +5378,7 @@ pub(super) fn build_sub<'a>(
}
}
// ─────────────────────────────────────────────────────────────────
// WHILE / FOR: classic loop with a back edge.
// ─────────────────────────────────────────────────────────────────
Kind::While | Kind::For => {
let header = push_node(
g,
@ -5129,9 +5520,7 @@ pub(super) fn build_sub<'a>(
}
}
// ─────────────────────────────────────────────────────────────────
// Control-flow sinks (return / break / continue).
// ─────────────────────────────────────────────────────────────────
Kind::Return => {
if has_call_descendant(ast, lang) {
// Return-call bug fix: emit a Call node BEFORE the Return so
@ -5427,9 +5816,7 @@ pub(super) fn build_sub<'a>(
current_body_id,
),
// ─────────────────────────────────────────────────────────────────
// BLOCK: statements execute sequentially
// ─────────────────────────────────────────────────────────────────
Kind::SourceFile | Kind::Block => {
// Ruby body_statement with rescue/ensure = implicit begin/rescue
if lang == "ruby" && ast.kind() == "body_statement" {
@ -5664,7 +6051,7 @@ pub(super) fn build_sub<'a>(
for idx in fn_graph.node_indices() {
let info = &fn_graph[idx];
if let Some(callee) = &info.call.callee {
let site = build_callee_site(callee, info, lang);
let site = build_callee_site(callee, info, lang, code);
// Dedup by (name, arity, receiver, qualifier, ordinal). A
// single function may legitimately contain multiple distinct
// calls to the same callee (e.g. different ordinals or
@ -5789,7 +6176,6 @@ pub(super) fn build_sub<'a>(
key,
LocalFuncSummary {
entry: fn_entry,
exit: fn_exit,
source_caps: fn_src_bits,
sanitizer_caps: fn_sani_bits,
sink_caps: fn_sink_bits,
@ -5839,7 +6225,6 @@ pub(super) fn build_sub<'a>(
},
graph: fn_graph,
entry: fn_entry,
exit: fn_exit,
});
// ── 7) Insert placeholder in parent graph ─────────────────────────
@ -5899,10 +6284,14 @@ pub(super) fn build_sub<'a>(
);
}
// JS/TS ternary-RHS split: `var x = c ? a : b;` and
// JS/TS/Java ternary-RHS split: `var x = c ? a : b;` and
// `obj.prop = c ? a : b;` lower to a real diamond CFG so the
// condition is control-flow (not a data-flow `uses` entry).
if matches!(lang, "javascript" | "typescript" | "tsx")
// Java uses the same `ternary_expression` AST kind; routing it
// through the diamond lets `fold_constant_branches` prune dead
// constant-condition arms (`cond ? "const" : param`) the same way
// it does for the if-form.
if matches!(lang, "javascript" | "typescript" | "tsx" | "java")
&& let Some((lhs_ast, ternary_ast)) = find_ternary_rhs_wrapper(ast)
{
let (lhs_text, lhs_labels) =
@ -6157,8 +6546,8 @@ pub(super) fn build_sub<'a>(
// Assignment that may contain a call (Python `x = os.getenv(...)`, Ruby `x = gets()`)
Kind::Assignment => {
// JS/TS ternary-RHS split, same rationale as the CallWrapper branch.
if matches!(lang, "javascript" | "typescript" | "tsx")
// JS/TS/Java ternary-RHS split, same rationale as the CallWrapper branch.
if matches!(lang, "javascript" | "typescript" | "tsx" | "java")
&& let (Some(left), Some(right)) = (
ast.child_by_field_name("left"),
ast.child_by_field_name("right"),
@ -6259,9 +6648,7 @@ pub(super) fn build_sub<'a>(
analysis_rules,
),
// ─────────────────────────────────────────────────────────────────
// Every other node = simple sequential statement
// ─────────────────────────────────────────────────────────────────
_ => {
// React JSX `dangerouslySetInnerHTML={{__html: x}}` synthesis
// (Phase 06): handles arrow-bodied components like
@ -6428,7 +6815,6 @@ pub(crate) fn build_cfg<'a>(
},
graph: g,
entry,
exit,
};
bodies.insert(0, toplevel);
// Sort by BodyId so that bodies[i].meta.id == BodyId(i).
@ -6632,7 +7018,12 @@ fn apply_gated_label_rules(
/// remains the single segment immediately before the leaf (back-compat
/// with the legacy heuristic). For method calls the qualifier is
/// redundant with `receiver` and is left `None`.
fn build_callee_site(callee: &str, info: &NodeInfo, lang: &str) -> crate::summary::CalleeSite {
fn build_callee_site(
callee: &str,
info: &NodeInfo,
lang: &str,
code: &[u8],
) -> crate::summary::CalleeSite {
use crate::summary::CalleeSite;
let receiver = info.call.receiver.clone();
@ -6661,15 +7052,39 @@ fn build_callee_site(callee: &str, info: &NodeInfo, lang: &str) -> crate::summar
None
};
let span = callee_span_line_col(code, info.ast.span.0);
CalleeSite {
name: callee.to_string(),
arity,
receiver,
qualifier,
ordinal: info.call.call_ordinal,
span,
}
}
/// Convert a byte offset into a 1-based `(line, col)` pair against `code`.
///
/// Returns `None` only when `code` is empty (no source to resolve against);
/// out-of-range offsets are clamped to `code.len()` so a synthetic node
/// whose span overshoots the file still produces the last-line coordinate
/// rather than `None`.
fn callee_span_line_col(code: &[u8], offset: usize) -> Option<(u32, u32)> {
if code.is_empty() {
return None;
}
let clamped = offset.min(code.len());
let prefix = &code[..clamped];
let line = prefix.iter().filter(|&&b| b == b'\n').count() as u32 + 1;
let col_bytes = match prefix.iter().rposition(|&b| b == b'\n') {
Some(idx) => clamped - idx - 1,
None => clamped,
} as u32
+ 1;
Some((line, col_bytes))
}
/// Convert the graphlocal `FuncSummaries` into serialisable [`FuncSummary`]
/// values suitable for crossfile persistence.
pub(crate) fn export_summaries(
@ -6721,21 +7136,5 @@ pub(crate) fn export_summaries(
.collect()
}
// pub(crate) fn dump_cfg(g: &Cfg) {
// debug!(target: "taint", "CFG DUMP: nodes = {}, edges = {}", g.node_count(), g.edge_count());
// for idx in g.node_indices() {
// debug!(target: "taint", " node {:>3}: {:?}", idx.index(), g[idx]);
// }
// for e in g.edge_references() {
// debug!(
// target: "taint",
// " edge {:>3} → {:<3} ({:?})",
// e.source().index(),
// e.target().index(),
// e.weight()
// );
// }
// }
#[cfg(test)]
mod cfg_tests;

View file

@ -157,10 +157,6 @@ fn find_auth_nodes(ctx: &AnalysisContext) -> Vec<NodeIndex> {
}
impl CfgAnalysis for AuthGap {
fn name(&self) -> &'static str {
"auth-gap"
}
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
// Decorator/annotation/attribute auth on the body declaration
// already gates every sink in the body, skip the
@ -218,7 +214,6 @@ impl CfgAnalysis for AuthGap {
findings.push(CfgFinding {
rule_id: "cfg-auth-gap".to_string(),
title: "Missing auth check".to_string(),
severity: Severity::High,
confidence: Confidence::Medium,
span: info.ast.span,

View file

@ -100,38 +100,6 @@ fn build_reversed_graph(cfg: &Cfg) -> Graph<NodeInfo, EdgeKind> {
rev
}
/// Find all nodes matching a specific callee name pattern.
#[allow(dead_code)]
pub fn find_call_nodes_matching(cfg: &Cfg, matchers: &[&str]) -> Vec<NodeIndex> {
cfg.node_indices()
.filter(|&idx| {
if cfg[idx].kind != StmtKind::Call {
return false;
}
if let Some(callee) = &cfg[idx].call.callee {
let callee_lower = callee.to_ascii_lowercase();
matchers.iter().any(|m| {
let ml = m.to_ascii_lowercase();
if ml.ends_with('_') {
callee_lower.starts_with(&ml)
} else {
callee_lower.ends_with(&ml)
}
})
} else {
false
}
})
.collect()
}
/// Check if there exists any path from `from` to `to` in the CFG.
#[allow(dead_code)]
pub fn has_path(cfg: &Cfg, from: NodeIndex, to: NodeIndex) -> bool {
let reachable = reachable_set(cfg, from);
reachable.contains(&to)
}
/// Compute shortest distance (in hops) from `from` to `to`.
pub fn shortest_distance(cfg: &Cfg, from: NodeIndex, to: NodeIndex) -> Option<usize> {
use std::collections::VecDeque;

View file

@ -306,10 +306,6 @@ fn find_post_if_sinks(cfg: &crate::cfg::Cfg, if_node: NodeIndex) -> Vec<NodeInde
}
impl CfgAnalysis for IncompleteErrorHandling {
fn name(&self) -> &'static str {
"incomplete-error-handling"
}
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
let mut findings = Vec::new();
@ -369,7 +365,6 @@ impl CfgAnalysis for IncompleteErrorHandling {
if has_dangerous_successor {
findings.push(CfgFinding {
rule_id: "cfg-error-fallthrough".to_string(),
title: "Error check without return".to_string(),
severity: Severity::Medium,
confidence: Confidence::Medium,
span: info.ast.span,

View file

@ -1,4 +1,7 @@
#![allow(clippy::collapsible_if)]
//! Unguarded-sink detection via CFG dominator analysis.
//!
//! Flags dangerous sinks that are not dominated by an appropriate guard
//! (validation or auth check) on every path from an entry point.
use super::dominators::{self, dominates};
use super::rules;
@ -177,6 +180,109 @@ fn ssa_all_sink_operands_const_or_param(ctx: &AnalysisContext, sink: NodeIndex)
args_ok && receiver_ok
}
/// Suppress a `cfg-unguarded-sink` finding when the sink restricts its
/// injection payload to specific argument positions (`sink_payload_args`)
/// and every operand at those positions resolves to a concrete constant.
///
/// The flat [`is_all_args_constant`] check inspects *every* operand, so a
/// safe parameterised call like Go's
/// `db.QueryContext(context.Background(), "SELECT … $1", bind)` is wrongly
/// rejected: only arg 1 (the SQL string, `payload_args = [1]`) can carry an
/// injection, yet the non-payload `context.Background()` call and the
/// positional bind value are non-constant operands that defeat the
/// all-operands test. The taint engine already honours the payload-arg
/// gate (no `taint-unsanitised-flow` fires), so under `!has_taint` a sink
/// whose payload positions are all literals is safe by construction.
fn sink_payload_args_const(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
let payload_positions = match &ctx.cfg[sink].call.sink_payload_args {
Some(p) if !p.is_empty() => p,
_ => return false,
};
let Some(facts) = ctx.body_const_facts else {
return false;
};
let Some(&sink_val) = facts.ssa.cfg_node_map.get(&sink) else {
return false;
};
let Some(inst) = find_inst(&facts.ssa, sink_val) else {
return false;
};
let SsaOp::Call { args, .. } = &inst.op else {
return false;
};
// Every payload-position operand must resolve to a concrete literal. A
// payload position outside the recorded arg list cannot be proven safe.
payload_positions.iter().all(|&pos| match args.get(pos) {
Some(group) => group.iter().all(|v| {
matches!(
facts.const_values.get(v),
Some(
ConstLattice::Str(_)
| ConstLattice::Int(_)
| ConstLattice::Bool(_)
| ConstLattice::Null
)
)
}),
None => false,
})
}
/// Suppress a `cfg-unguarded-sink` SSRF finding when the sink's URL operand
/// is origin-locked: it is the result of a `new URL(path, base)` /
/// `urljoin(base, path)` / `url.JoinPath(base, …)` builder whose base
/// argument pins the scheme+host, so the (attacker-controlled) path
/// component cannot redirect the request off the locked origin.
///
/// Mirrors the taint engine's `StringFact::from_url_with_base` prefix-lock
/// (`url_builder_arg_indices` + `is_string_safe_for_ssrf`): the taint engine
/// stays silent on this shape, so the parallel structural finding is a false
/// positive. The base is recognised as either a string literal recorded on
/// the builder node (`arg_string_literals[base_idx]`) or a const-bound
/// identifier whose SSA operand resolves to a concrete string.
fn sink_url_origin_locked(ctx: &AnalysisContext, sink: NodeIndex, sink_caps: Cap) -> bool {
if !sink_caps.contains(Cap::SSRF) {
return false;
}
let sink_info = &ctx.cfg[sink];
let sink_func = sink_info.ast.enclosing_func.as_deref();
// CFG one-hop trace (mirrors `is_all_args_constant`): the SSA
// `cfg_node_map` only covers the body whose facts are attached to `ctx`,
// so for a sink inside a nested function (e.g. an Express arrow handler)
// the SSA path misses it. Walk the CFG instead: for every variable the
// sink uses, find its defining node in the same function and test whether
// that definition is an origin-locking URL builder.
sink_info.taint.uses.iter().any(|u| {
ctx.cfg.node_indices().any(|idx| {
let info = &ctx.cfg[idx];
if info.ast.enclosing_func.as_deref() != sink_func {
return false;
}
if info.taint.defines.as_deref() != Some(u.as_str()) {
return false;
}
// `info` defines `u`. Is it `new URL(path, base)` / `urljoin` /
// `JoinPath` with a string-literal base pinning scheme+host?
let Some(callee) = info.call.callee.as_deref() else {
return false;
};
let Some((_path_idx, base_idx)) = crate::ssa::type_facts::url_builder_arg_indices(
ctx.lang,
callee,
info.call.outer_callee.as_deref(),
info.call.is_constructor,
) else {
return false;
};
info.call
.arg_string_literals
.get(base_idx)
.and_then(|s| s.as_deref())
.is_some()
})
})
}
/// Return true if the SSA body contains a *named* variable whose definition
/// is a constant, the SSA signature of an explicit `name = "literal"`
/// reassignment. Used as the gate for the broader operand-Param suppression:
@ -2493,6 +2599,18 @@ fn local_is_param_derived<'a>(
continue;
}
found_def = true;
// A `foreach` / `for-each` loop binding iterates collection
// *elements*, not a direct parameter pass-through. Even when the
// iterable is a bare parameter (`foreach ($param as $v)`), the
// per-element values are not simple wrapper plumbing, so do not
// clear them as parameter-derived — keep the structural finding
// for `foreach ($param as $v) { sink($v) }` shapes (literal-keyed
// arrays are already suppressed earlier by
// `sink_arg_uses_safe_foreach_key`).
if info.kind == StmtKind::Loop {
all_def_clear = false;
break;
}
if info
.taint
.labels
@ -2715,10 +2833,6 @@ fn sink_in_entrypoint(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
}
impl CfgAnalysis for UnguardedSink {
fn name(&self) -> &'static str {
"unguarded-sink"
}
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
let doms = dominators::compute_dominators(ctx.cfg, ctx.entry);
let sink_nodes = dominators::find_sink_nodes(ctx.cfg);
@ -2799,6 +2913,29 @@ impl CfgAnalysis for UnguardedSink {
continue;
}
// Payload-arg-gated sinks (e.g. Go `db.QueryContext(ctx, sql,
// ...binds)`, `payload_args = [1]`): only the payload positions can
// carry an injection. When the taint engine is already silent
// (`!has_taint`) and every payload-position operand is a constant
// literal, the non-payload operands (a `context.Context`, bind
// values) cannot make the call dangerous, so the structural finding
// is a false positive even though `is_all_args_constant` rejects it.
if !has_taint && sink_payload_args_const(ctx, *sink) {
continue;
}
// Origin-locked URL SSRF sinks (`fetch(new URL(path, "https://…"))`):
// the builder's literal base pins scheme+host, so the
// attacker-controlled path cannot redirect off-origin. The taint
// engine already suppresses this via the abstract prefix-lock, so
// the parallel structural finding is a false positive. NOT gated
// on `!has_taint`: the origin lock holds precisely *because* the
// tainted path reaches the builder — the host stays fixed — so the
// syntactic taint-reaches signal must not re-open the finding.
if sink_url_origin_locked(ctx, *sink, sink_caps) {
continue;
}
// SSA latest-def suppression: when the taint engine has already
// proved no source-tainted data reaches this sink (`!has_taint`)
// and every SSA operand resolves to a constant, callee-fragment
@ -2976,7 +3113,6 @@ impl CfgAnalysis for UnguardedSink {
findings.push(CfgFinding {
rule_id: "cfg-unguarded-sink".to_string(),
title: "Unguarded sink".to_string(),
severity,
confidence,
span: sink_info.ast.span,

View file

@ -140,8 +140,6 @@ pub enum Confidence {
#[derive(Debug, Clone)]
pub struct CfgFinding {
pub rule_id: String,
#[allow(dead_code)]
pub title: String,
pub severity: Severity,
pub confidence: Confidence,
pub span: (usize, usize),
@ -154,12 +152,8 @@ pub struct AnalysisContext<'a> {
pub cfg: &'a crate::cfg::Cfg,
pub entry: NodeIndex,
pub lang: Lang,
#[allow(dead_code)]
pub file_path: &'a str,
#[allow(dead_code)]
pub source_bytes: &'a [u8],
pub func_summaries: &'a FuncSummaries,
#[allow(dead_code)]
pub global_summaries: Option<&'a GlobalSummaries>,
/// Per-file SSA summaries map produced by
/// `lower_all_functions_from_bodies` (after both the augment pass
@ -170,7 +164,6 @@ pub struct AnalysisContext<'a> {
/// suppress structural findings whose taint flow has been proven
/// validated through helper summaries (CVE-2026-25544 patched
/// counterpart).
#[allow(dead_code)]
pub ssa_summaries: Option<
&'a std::collections::HashMap<
crate::symbol::FuncKey,
@ -218,8 +211,6 @@ pub struct AnalysisContext<'a> {
}
pub trait CfgAnalysis {
#[allow(dead_code)]
fn name(&self) -> &'static str;
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding>;
}

View file

@ -531,10 +531,6 @@ fn has_explicit_lock_acquire(ctx: &AnalysisContext, acquire: NodeIndex) -> bool
}
impl CfgAnalysis for ResourceMisuse {
fn name(&self) -> &'static str {
"resource-misuse"
}
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
let pairs = rules::resource_pairs(ctx.lang);
let exit = match dominators::find_exit_node(ctx.cfg) {
@ -631,7 +627,6 @@ impl CfgAnalysis for ResourceMisuse {
} else {
"cfg-resource-leak".to_string()
},
title: format!("{} may leak", pair.resource_name),
severity: Severity::Medium,
confidence: Confidence::Medium,
span: info.ast.span,

View file

@ -23,7 +23,6 @@ fn parse_and_analyse<A: CfgAnalysis>(
cfg,
entry,
lang,
file_path: "test.rs",
source_bytes: src,
func_summaries: summaries,
global_summaries: None,
@ -54,7 +53,6 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
cfg,
entry,
lang,
file_path: "test.rs",
source_bytes: src,
func_summaries: summaries,
global_summaries: None,
@ -90,7 +88,6 @@ fn parse_and_run_all_with_taint(
cfg,
entry,
lang,
file_path: "test.rs",
source_bytes: src,
func_summaries: summaries,
global_summaries: None,
@ -210,7 +207,6 @@ fn parse_and_analyse_with_ssa<A: CfgAnalysis>(
cfg: &body.graph,
entry: body.entry,
lang,
file_path: "test.rs",
source_bytes: src,
func_summaries: &file_cfg.summaries,
global_summaries: None,
@ -1227,7 +1223,6 @@ fn config_sanitizer_suppresses_unguarded_sink() {
cfg,
entry,
lang,
file_path: "test.rs",
source_bytes: src,
func_summaries: summaries,
global_summaries: None,
@ -1708,7 +1703,6 @@ fn cfg_only_no_taint_produces_low_severity() {
cfg,
entry,
lang,
file_path: "test.rs",
source_bytes: src,
func_summaries: summaries,
global_summaries: None,

View file

@ -38,10 +38,6 @@ fn event_handler_callbacks(ctx: &AnalysisContext) -> HashSet<String> {
}
impl CfgAnalysis for UnreachableCode {
fn name(&self) -> &'static str {
"unreachable-code"
}
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
let reachable = dominators::reachable_set(ctx.cfg, ctx.entry);
let handler_callbacks = event_handler_callbacks(ctx);
@ -122,7 +118,6 @@ impl CfgAnalysis for UnreachableCode {
findings.push(CfgFinding {
rule_id: rule_id.to_string(),
title: title.to_string(),
severity,
confidence: Confidence::High,
span: info.ast.span,

352
src/chain/edges.rs Normal file
View file

@ -0,0 +1,352 @@
//! Phase 24 — convert per-finding [`Diag`]s into chain-graph edges.
//!
//! Each call to [`findings_to_edges`] emits exactly one [`ChainEdge`]
//! per input finding. The edge is *typed* by:
//!
//! - the primary [`Cap`] bit picked from [`Evidence::sink_caps`](crate::evidence::Evidence::sink_caps)
//! (the lowest-bit set, chosen deterministically), and
//! - the *reach* — the surface [`EntryPoint`](crate::surface::EntryPoint) in the same file as the
//! finding, when one exists, otherwise [`Reach::Unreachable`].
//!
//! Phase 25's path search composes these edges with the SurfaceMap's
//! `Reaches` edges into full chains. Phase 24 does not run any path
//! search or do call-graph traversal: edges are emitted at finding
//! granularity and carry only the file-local reach hint.
use crate::callgraph::FileReachMap;
use crate::commands::scan::Diag;
use crate::entry_points::HttpMethod;
use crate::labels::Cap;
use crate::surface::{SourceLocation, SurfaceMap, SurfaceNode};
use serde::{Deserialize, Serialize};
use super::feasibility::Feasibility;
use super::impact::lookup_impact;
/// Compact reference to a static finding embedded in a [`ChainEdge`].
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FindingRef {
/// Stable finding ID (matches [`Diag::finding_id`] when present).
pub finding_id: String,
/// Stable 64-bit hash from [`Diag::stable_hash`]. Zero when the
/// finding has not been hashed yet.
pub stable_hash: u64,
/// Source location of the sink.
pub location: SourceLocation,
/// Rule identifier (`Diag::id`).
pub rule_id: String,
/// Resolved sink cap bits ([`Evidence::sink_caps`](crate::evidence::Evidence::sink_caps)).
pub cap_bits: u32,
}
/// Whether the finding lands inside an externally-reachable surface
/// entry-point. Phase 24 only resolves *file-local* reach: a finding
/// in `app/views.py` is treated as reachable if any
/// [`EntryPoint`](crate::surface::EntryPoint) declares a handler in
/// that same file. Phase 25 will fold the call graph in.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "reach", rename_all = "snake_case")]
pub enum Reach {
/// Finding is in a file that hosts at least one entry-point.
/// `route` and `method` describe the first matching entry-point
/// (surface-canonical order).
Reachable {
location: SourceLocation,
method: HttpMethod,
route: String,
auth_required: bool,
},
/// Finding is in a file with no surface entry-points.
Unreachable,
}
/// One edge in the chain graph.
///
/// Phase 24's edges live at the granularity of a single finding.
/// Phase 25 will introduce additional edge kinds (entry → finding,
/// finding → sink-cluster, etc.) once path search is wired up.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ChainEdge {
pub finding: FindingRef,
/// Primary cap classification. Picked via [`pick_chain_cap`]: when
/// several cap bits are set, prefers a bit that has a standalone
/// rule in [`crate::chain::impact::IMPACT_LATTICE`] over the
/// lowest bit so a `SQL_QUERY | CODE_EXEC` finding lands on the
/// chain-relevant cap (`CODE_EXEC`). Falls back to the lowest set
/// bit when no bit has a standalone rule, keeping single-cap
/// findings deterministic.
pub primary_cap: Cap,
/// Where the finding sits relative to the surface.
pub reach: Reach,
/// Phase 25 path-score factor.
pub feasibility: Feasibility,
}
/// Convert each [`Diag`] to one [`ChainEdge`].
///
/// Findings without cap bits (`Diag::evidence.sink_caps == 0`) are
/// dropped — the chain composer cannot classify them on a typed
/// lattice and Phase 25's scoring expects every edge to expose a
/// primary cap. This is a deliberate quiet-drop: such findings are
/// usually structural CFG diagnostics (e.g. `cfg-auth-gap`) whose
/// chain participation is modelled by the SurfaceMap's
/// `AuthRequiredOn` edges instead.
///
/// The output order mirrors `findings`; the caller is responsible for
/// any further canonicalisation.
pub fn findings_to_edges(findings: &[Diag], surface: &SurfaceMap) -> Vec<ChainEdge> {
findings_to_edges_with_reach(findings, surface, None)
}
/// Like [`findings_to_edges`] but optionally consults a [`FileReachMap`]
/// to widen `Reach::Reachable` beyond the file-local match.
///
/// When `reach` is `Some`, a finding's enclosing file is also considered
/// `Reachable` whenever any [`SurfaceNode::EntryPoint`]'s
/// `handler_location.file` transitively reaches the finding's file via
/// the call graph. The first matching entry-point (surface-canonical
/// order) is used to populate the `route` / `method` / `auth_required`
/// fields.
///
/// `reach = None` is byte-identical to the legacy [`findings_to_edges`]
/// behaviour. Path strings on both sides must use the same convention
/// (project-relative POSIX) for the widening to fire; mismatched paths
/// silently fall through to the file-local heuristic.
pub fn findings_to_edges_with_reach(
findings: &[Diag],
surface: &SurfaceMap,
reach: Option<&FileReachMap>,
) -> Vec<ChainEdge> {
findings
.iter()
.filter_map(|d| build_edge(d, surface, reach))
.collect()
}
fn build_edge(
diag: &Diag,
surface: &SurfaceMap,
reach: Option<&FileReachMap>,
) -> Option<ChainEdge> {
let evidence = diag.evidence.as_ref()?;
if evidence.sink_caps == 0 {
return None;
}
let cap_bits = evidence.sink_caps;
let primary_cap = pick_chain_cap(cap_bits)?;
let location = SourceLocation::new(diag.path.clone(), diag.line as u32, diag.col as u32);
let reach_kind = locate_reach(&location, surface, reach);
let feasibility = Feasibility::for_finding(diag);
let finding = FindingRef {
finding_id: diag.finding_id.clone(),
stable_hash: diag.stable_hash,
location,
rule_id: diag.id.clone(),
cap_bits,
};
Some(ChainEdge {
finding,
primary_cap,
reach: reach_kind,
feasibility,
})
}
/// Return the lowest single-bit [`Cap`] present in `bits`, or `None`
/// when `bits == 0`. Deterministic: always picks the lowest bit.
pub fn lowest_cap(bits: u32) -> Option<Cap> {
if bits == 0 {
return None;
}
let lowest = 1u32 << bits.trailing_zeros();
Cap::from_bits(lowest)
}
/// Pick the chain-relevant [`Cap`] from a sink-cap bitmask.
///
/// When multiple caps are set, prefer one that has a standalone rule in
/// [`crate::chain::impact::IMPACT_LATTICE`] (e.g. `CODE_EXEC`,
/// `DESERIALIZE`, `SSRF`) over the lowest set bit. A finding with
/// `sink_caps = SQL_QUERY | CODE_EXEC` previously resolved to
/// `SQL_QUERY` (the lowest bit) and missed the `CODE_EXEC → Rce`
/// lattice rule; this helper resolves it to `CODE_EXEC` instead.
///
/// Iterates bits low to high so ties between caps with standalone
/// rules stay deterministic. Falls back to [`lowest_cap`] when no
/// bit has a standalone rule, preserving single-cap behaviour.
pub fn pick_chain_cap(bits: u32) -> Option<Cap> {
if bits == 0 {
return None;
}
let mut remaining = bits;
while remaining != 0 {
let bit = 1u32 << remaining.trailing_zeros();
if let Some(cap) = Cap::from_bits(bit)
&& lookup_impact(cap, None).is_some()
{
return Some(cap);
}
remaining &= !bit;
}
lowest_cap(bits)
}
fn locate_reach(loc: &SourceLocation, surface: &SurfaceMap, reach: Option<&FileReachMap>) -> Reach {
// Pass 1: file-local match (legacy behaviour, always applies).
for node in &surface.nodes {
if let SurfaceNode::EntryPoint(ep) = node
&& ep.handler_location.file == loc.file
{
return Reach::Reachable {
location: ep.location.clone(),
method: ep.method,
route: ep.route.clone(),
auth_required: ep.auth_required,
};
}
}
// Pass 2: transitive caller match via the call graph. Only fires
// when `reach` is supplied — keeps the legacy file-local behaviour
// for callers that have not yet wired the call-graph reach map.
if let Some(reach) = reach {
for node in &surface.nodes {
if let SurfaceNode::EntryPoint(ep) = node
&& reach.reaches(&ep.handler_location.file, &loc.file)
{
return Reach::Reachable {
location: ep.location.clone(),
method: ep.method,
route: ep.route.clone(),
auth_required: ep.auth_required,
};
}
}
}
Reach::Unreachable
}
#[cfg(test)]
mod tests {
use super::*;
use crate::commands::scan::Diag;
use crate::evidence::Evidence;
use crate::patterns::FindingCategory;
fn diag_with_cap(path: &str, line: usize, caps: Cap) -> Diag {
let ev = Evidence {
sink_caps: caps.bits(),
..Evidence::default()
};
Diag {
path: path.into(),
line,
col: 1,
id: "test-rule".into(),
category: FindingCategory::Security,
evidence: Some(ev),
..Diag::default()
}
}
#[test]
fn lowest_cap_picks_least_significant_bit() {
let combined = Cap::SQL_QUERY | Cap::FILE_IO;
assert_eq!(lowest_cap(combined.bits()), Some(Cap::FILE_IO));
}
#[test]
fn pick_chain_cap_prefers_standalone_rule_cap() {
// SQL_QUERY (bit 7) has no standalone lattice rule; CODE_EXEC
// (bit 10) does. Lowest-bit alone would pick SQL_QUERY.
let combined = Cap::SQL_QUERY | Cap::CODE_EXEC;
assert_eq!(pick_chain_cap(combined.bits()), Some(Cap::CODE_EXEC));
}
#[test]
fn pick_chain_cap_falls_back_to_lowest_when_no_standalone_rule() {
// SQL_QUERY + FILE_IO: neither has a standalone rule, fall
// back to lowest_cap behaviour.
let combined = Cap::SQL_QUERY | Cap::FILE_IO;
assert_eq!(pick_chain_cap(combined.bits()), Some(Cap::FILE_IO));
}
#[test]
fn pick_chain_cap_single_bit_unchanged() {
assert_eq!(pick_chain_cap(Cap::CODE_EXEC.bits()), Some(Cap::CODE_EXEC));
assert_eq!(pick_chain_cap(Cap::SQL_QUERY.bits()), Some(Cap::SQL_QUERY));
assert_eq!(pick_chain_cap(0), None);
}
#[test]
fn drops_findings_without_cap_bits() {
let mut d = diag_with_cap("a.py", 1, Cap::CODE_EXEC);
d.evidence.as_mut().unwrap().sink_caps = 0;
let edges = findings_to_edges(&[d], &SurfaceMap::new());
assert!(edges.is_empty());
}
#[test]
fn reach_unreachable_without_matching_entry_point() {
let d = diag_with_cap("orphan.py", 2, Cap::CODE_EXEC);
let edges = findings_to_edges(&[d], &SurfaceMap::new());
assert_eq!(edges.len(), 1);
assert!(matches!(edges[0].reach, Reach::Unreachable));
}
/// Cross-file finding becomes Reachable when the call-graph reach
/// map records a transitive caller in the entry-point's file.
#[test]
fn reach_widens_with_file_reach_map() {
use crate::callgraph::{FileReachMap, build_call_graph};
use crate::entry_points::HttpMethod;
use crate::summary::{FuncSummary, merge_summaries};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
// routes.py::handle -> helper.py::sink
let handle = FuncSummary {
name: "handle".into(),
file_path: "routes.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![crate::summary::CalleeSite::bare("sink")],
..Default::default()
};
let sink = FuncSummary {
name: "sink".into(),
file_path: "helper.py".into(),
lang: "python".into(),
param_count: 0,
..Default::default()
};
let gs = merge_summaries(vec![handle, sink], None);
let cg = build_call_graph(&gs, &[]);
let reach = FileReachMap::build(&cg);
let mut surface = SurfaceMap::new();
surface.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
location: SourceLocation::new("routes.py", 1, 1),
framework: Framework::Flask,
method: HttpMethod::GET,
route: "/".into(),
handler_name: "handle".into(),
handler_location: SourceLocation::new("routes.py", 2, 1),
auth_required: false,
}));
let d = diag_with_cap("helper.py", 10, Cap::CODE_EXEC);
// Without reach: file-local lookup leaves the finding Unreachable.
let edges = findings_to_edges(std::slice::from_ref(&d), &surface);
assert!(matches!(edges[0].reach, Reach::Unreachable));
// With reach: transitive caller in `routes.py` lifts to Reachable.
let edges = findings_to_edges_with_reach(&[d], &surface, Some(&reach));
match &edges[0].reach {
Reach::Reachable { route, method, .. } => {
assert_eq!(route, "/");
assert_eq!(*method, HttpMethod::GET);
}
other => panic!("expected Reachable, got {other:?}"),
}
}
}

157
src/chain/feasibility.rs Normal file
View file

@ -0,0 +1,157 @@
//! Phase 24 — feasibility scoring for chain edges.
//!
//! Each edge produced by [`crate::chain::edges::findings_to_edges`]
//! carries a feasibility weight in `[0.0, 1.0]`. The weight enters
//! Phase 25's path score as the multiplicative factor in
//! `score(path) = sum(impact) * product(feasibility)`, so a single
//! low-feasibility hop dampens the entire chain.
//!
//! # Buckets
//!
//! | Bucket | Weight | Trigger |
//! |-------------------------|--------|-------------------------------------------------------------|
//! | [`Confirmed`] | `1.0` | dynamic [`VerifyStatus::Confirmed`] |
//! | [`InconclusiveHighConf`]| `0.5` | dynamic [`VerifyStatus::Inconclusive`] + static `High` |
//! | [`Unverified`] | `0.1` | everything else (no verdict, `NotConfirmed`, `Unsupported`, |
//! | | | or `Inconclusive` without a high static confidence) |
//!
//! [`Confirmed`]: Feasibility::Confirmed
//! [`InconclusiveHighConf`]: Feasibility::InconclusiveHighConf
//! [`Unverified`]: Feasibility::Unverified
//! [`VerifyStatus::Confirmed`]: crate::evidence::VerifyStatus::Confirmed
//! [`VerifyStatus::Inconclusive`]: crate::evidence::VerifyStatus::Inconclusive
use crate::commands::scan::Diag;
use crate::evidence::{Confidence, VerifyResult, VerifyStatus};
use serde::{Deserialize, Serialize};
/// Discrete feasibility bucket for a chain edge.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Feasibility {
/// Dynamic verification fired the sink probe.
Confirmed,
/// Dynamic verification was Inconclusive but the static engine's
/// confidence in the finding is `High`. Used for findings that
/// the verifier could not exercise (build failure, sandbox refuse)
/// but where the static evidence is strong.
InconclusiveHighConf,
/// Everything else — no dynamic verification, dynamic verdict was
/// `NotConfirmed`/`PartiallyConfirmed`/`Unsupported`, or dynamic was
/// `Inconclusive` but static confidence is not `High`. A
/// `PartiallyConfirmed` verdict proves only that the sink is reachable,
/// not that the exploit chain completes, so it stays conservative here:
/// it must not inflate a multi-hop path score.
Unverified,
}
impl Feasibility {
/// Multiplicative weight contributed to Phase 25's path score.
pub const fn score(self) -> f32 {
match self {
Feasibility::Confirmed => 1.0,
Feasibility::InconclusiveHighConf => 0.5,
Feasibility::Unverified => 0.1,
}
}
/// Translate a dynamic [`VerifyResult`] into a feasibility weight.
///
/// This is the literal signature the design doc specifies. It
/// cannot distinguish `Inconclusive` with high static confidence
/// from `Inconclusive` with low static confidence (the static
/// confidence is carried on the [`Diag`], not on the
/// [`VerifyResult`]); use [`Feasibility::for_finding`] when both
/// halves of the input are available.
pub fn from_verdict(verdict: Option<&VerifyResult>) -> f32 {
Self::bucket_from_verdict(verdict, None).score()
}
/// Same as [`from_verdict`](Self::from_verdict) but consults the
/// static `Diag.confidence` so the `Inconclusive_HighConf` bucket
/// in the doc's table can fire. Phase 25's scoring pass uses this
/// flavour.
pub fn for_finding(diag: &Diag) -> Feasibility {
let verdict = diag
.evidence
.as_ref()
.and_then(|e| e.dynamic_verdict.as_ref());
Self::bucket_from_verdict(verdict, diag.confidence)
}
/// Discrete-bucket flavour of [`from_verdict`](Self::from_verdict).
/// Exposed for callers that want the bucket (e.g. for telemetry or
/// UI badges) before reducing to an `f32`.
pub fn bucket_from_verdict(
verdict: Option<&VerifyResult>,
static_confidence: Option<Confidence>,
) -> Feasibility {
match verdict.map(|v| v.status) {
Some(VerifyStatus::Confirmed) => Feasibility::Confirmed,
Some(VerifyStatus::Inconclusive) if static_confidence == Some(Confidence::High) => {
Feasibility::InconclusiveHighConf
}
_ => Feasibility::Unverified,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::evidence::VerifyResult;
fn verdict(status: VerifyStatus) -> VerifyResult {
VerifyResult {
finding_id: "f".into(),
status,
triggered_payload: None,
reason: None,
inconclusive_reason: None,
detail: None,
attempts: vec![],
toolchain_match: None,
differential: None,
replay_stable: None,
wrong: None,
hardening_outcome: None,
}
}
#[test]
fn confirmed_returns_one() {
let v = verdict(VerifyStatus::Confirmed);
assert_eq!(Feasibility::from_verdict(Some(&v)), 1.0);
}
#[test]
fn inconclusive_without_confidence_returns_unverified() {
let v = verdict(VerifyStatus::Inconclusive);
assert_eq!(Feasibility::from_verdict(Some(&v)), 0.1);
}
#[test]
fn inconclusive_with_high_confidence_returns_half() {
let v = verdict(VerifyStatus::Inconclusive);
let b = Feasibility::bucket_from_verdict(Some(&v), Some(Confidence::High));
assert_eq!(b, Feasibility::InconclusiveHighConf);
assert_eq!(b.score(), 0.5);
}
#[test]
fn not_confirmed_returns_unverified() {
let v = verdict(VerifyStatus::NotConfirmed);
assert_eq!(Feasibility::from_verdict(Some(&v)), 0.1);
}
#[test]
fn unsupported_returns_unverified() {
let v = verdict(VerifyStatus::Unsupported);
assert_eq!(Feasibility::from_verdict(Some(&v)), 0.1);
}
#[test]
fn no_verdict_returns_unverified() {
assert_eq!(Feasibility::from_verdict(None), 0.1);
}
}

247
src/chain/finding.rs Normal file
View file

@ -0,0 +1,247 @@
//! Phase 25 — chain finding emitted by the composer.
//!
//! A [`ChainFinding`] is the externally-visible artefact produced by
//! Track G: a sequence of static findings whose composition implies a
//! higher-level [`ImpactCategory`] than any single member. The chain
//! has its own [`ChainSeverity`] (a strict superset of the per-finding
//! [`crate::patterns::Severity`] axis, with `Critical` reserved for
//! chains so default-severity gates do not accidentally fire on a
//! chained-only impact).
//!
//! # Determinism
//!
//! `stable_hash` is the BLAKE3-truncated digest of the chain member
//! hashes joined with the implied impact byte. Two scans of the same
//! source produce the same `stable_hash` regardless of DFS visitation
//! order.
//!
//! # Suppressing constituents in default output
//!
//! Phase 25 keeps individual constituent findings on the wire — they
//! still travel inside `Diag` form — but the JSON / SARIF emitters
//! gate their visibility on [`crate::utils::config::OutputConfig::show_chain_constituents`].
//! See `crate::output::filter_constituents` for the gating.
use crate::chain::edges::FindingRef;
use crate::chain::impact::ImpactCategory;
use crate::evidence::{VerifyResult, VerifyStatus};
use serde::{Deserialize, Serialize};
use std::fmt;
/// Severity bucket assigned to a [`ChainFinding`].
///
/// Distinct from [`crate::patterns::Severity`] so that chain output
/// (which is, by construction, a composition of *several* findings)
/// does not collide with the per-finding axis. `Critical` is the
/// highest grade and is reserved for chains whose impact is
/// terminal RCE (`Rce`, `BrowserToLocalRce`).
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ChainSeverity {
Low,
Medium,
High,
Critical,
}
impl fmt::Display for ChainSeverity {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(match self {
ChainSeverity::Low => "LOW",
ChainSeverity::Medium => "MEDIUM",
ChainSeverity::High => "HIGH",
ChainSeverity::Critical => "CRITICAL",
})
}
}
impl ChainSeverity {
/// Phase 26 — drop one severity bucket. Used by composite
/// re-verification when the chain's dynamic verdict is
/// `Inconclusive`: the chain stays on the wire but its severity
/// loses one notch so triagers see the verification gap.
///
/// `Low` is the floor — calling `downgraded()` on `Low` returns
/// `Low` so the helper is idempotent.
pub fn downgraded(self) -> Self {
match self {
ChainSeverity::Critical => ChainSeverity::High,
ChainSeverity::High => ChainSeverity::Medium,
ChainSeverity::Medium => ChainSeverity::Low,
ChainSeverity::Low => ChainSeverity::Low,
}
}
}
/// One member of a [`ChainFinding`].
///
/// Wraps a [`FindingRef`] so the chain output can name each constituent
/// without duplicating the finding's evidence; consumers join back to
/// the `findings: [...]` array via [`FindingRef::finding_id`] /
/// [`FindingRef::stable_hash`].
pub type ChainMember = FindingRef;
/// A composed exploit chain.
///
/// Phase 25 emits these from [`crate::chain::search::find_chains`].
/// Phase 26 will populate `dynamic_verdict` from a composite
/// re-verification pass; Phase 25 always leaves it as `None`.
///
/// `PartialEq` is omitted because [`crate::evidence::VerifyResult`] is
/// not `PartialEq`. Equality checks at the test layer compare on
/// `stable_hash` instead.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChainFinding {
/// BLAKE3 of `(member.stable_hash for member in members) || implied_impact`,
/// truncated to 64 bits. Stable across scans for the same chain.
pub stable_hash: u64,
/// Constituent findings, in path order (entry-adjacent first,
/// sink-adjacent last).
pub members: Vec<ChainMember>,
/// The dangerous-local sink terminating the chain. Carries the
/// callee function name and cap bits so consumers can describe
/// the chain without re-walking the SurfaceMap.
pub sink: ChainSink,
/// Composed impact category derived from member caps + adjacency.
pub implied_impact: ImpactCategory,
/// Chain severity, computed in [`crate::output::severity`].
pub severity: ChainSeverity,
/// Numeric score from [`crate::chain::score::score_path`].
/// Carried verbatim for JSON output so consumers can re-sort.
pub score: f64,
/// Composite dynamic verification verdict. `None` until Phase 26's
/// `reverify_chain` runs over the chain.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub dynamic_verdict: Option<VerifyResult>,
/// Phase 26 — Track G.3: human-readable reason when composite
/// re-verification altered the chain's outcome. Populated when
/// `dynamic_verdict.status` is `Inconclusive` and the severity was
/// downgraded; `None` when the verdict either confirmed the chain
/// or left the severity untouched.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reverify_reason: Option<String>,
}
/// Sink terminus of a [`ChainFinding`]. Mirrors the
/// [`crate::surface::DangerousLocal`] node the path ends at.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ChainSink {
pub file: String,
pub line: u32,
pub col: u32,
pub function_name: String,
pub cap_bits: u32,
}
impl ChainFinding {
/// Compute the stable hash from a member list + impact category.
/// Exposed so callers that build a `ChainFinding` outside
/// [`crate::chain::search`] (tests, future composers) stay in sync
/// with the canonical hash formula.
pub fn compute_stable_hash(members: &[ChainMember], implied_impact: ImpactCategory) -> u64 {
let mut h = blake3::Hasher::new();
for m in members {
h.update(&m.stable_hash.to_le_bytes());
}
h.update(&[impact_byte(implied_impact)]);
let out = h.finalize();
let bytes = out.as_bytes();
u64::from_le_bytes(bytes[..8].try_into().unwrap())
}
/// Phase 26 — Track G.3: attach a composite verdict + apply the
/// `Inconclusive → severity downgrade` rule.
///
/// - `Confirmed` / `NotConfirmed` / `Unsupported`: severity stays
/// put; `reverify_reason` cleared.
/// - `Inconclusive`: severity drops one bucket
/// ([`ChainSeverity::downgraded`]) and `reverify_reason` is set
/// from the verdict's typed inconclusive reason (with a fallback
/// to a generic "inconclusive composite verification" string when
/// the verdict has no typed reason).
pub fn apply_dynamic_verdict(&mut self, verdict: VerifyResult) {
if verdict.status == VerifyStatus::Inconclusive {
self.severity = self.severity.downgraded();
let reason = match &verdict.inconclusive_reason {
Some(r) => format!("composite reverification inconclusive: {r}"),
None => match verdict.detail.as_deref() {
Some(d) if !d.is_empty() => {
format!("composite reverification inconclusive: {d}")
}
_ => "composite reverification inconclusive".to_owned(),
},
};
self.reverify_reason = Some(reason);
} else {
self.reverify_reason = None;
}
self.dynamic_verdict = Some(verdict);
}
}
/// Stable byte tag for each [`ImpactCategory`]. Used by
/// [`ChainFinding::compute_stable_hash`] so adding an impact variant
/// does not silently shift every other chain's hash.
const fn impact_byte(c: ImpactCategory) -> u8 {
match c {
ImpactCategory::Rce => 1,
ImpactCategory::BrowserToLocalRce => 2,
ImpactCategory::SessionHijack => 3,
ImpactCategory::InternalNetworkAccess => 4,
ImpactCategory::InfoDisclosure => 5,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::chain::edges::FindingRef;
use crate::surface::SourceLocation;
fn member(hash: u64) -> ChainMember {
FindingRef {
finding_id: format!("f-{hash}"),
stable_hash: hash,
location: SourceLocation::new("a.py", 1, 1),
rule_id: "test".into(),
cap_bits: 0,
}
}
#[test]
fn stable_hash_changes_with_member_order() {
let a = ChainFinding::compute_stable_hash(&[member(1), member(2)], ImpactCategory::Rce);
let b = ChainFinding::compute_stable_hash(&[member(2), member(1)], ImpactCategory::Rce);
assert_ne!(a, b);
}
#[test]
fn stable_hash_changes_with_impact() {
let a = ChainFinding::compute_stable_hash(&[member(1), member(2)], ImpactCategory::Rce);
let b = ChainFinding::compute_stable_hash(
&[member(1), member(2)],
ImpactCategory::BrowserToLocalRce,
);
assert_ne!(a, b);
}
#[test]
fn stable_hash_deterministic_across_calls() {
let h1 = ChainFinding::compute_stable_hash(
&[member(1), member(2), member(3)],
ImpactCategory::Rce,
);
let h2 = ChainFinding::compute_stable_hash(
&[member(1), member(2), member(3)],
ImpactCategory::Rce,
);
assert_eq!(h1, h2);
}
#[test]
fn severity_ordering_is_critical_top() {
assert!(ChainSeverity::Critical > ChainSeverity::High);
assert!(ChainSeverity::High > ChainSeverity::Medium);
assert!(ChainSeverity::Medium > ChainSeverity::Low);
}
}

333
src/chain/impact.rs Normal file
View file

@ -0,0 +1,333 @@
//! Phase 24 — impact lattice for the exploit-chain composer.
//!
//! Each [`ImpactRule`] is a `(source_cap, adjacent_cap, result)` triple
//! drawn from the design doc's lattice:
//!
//! | Rule | Result |
//! |-------------------------------|-------------------------|
//! | `CODE_EXEC` | `Rce` |
//! | `DESERIALIZE` | `Rce` |
//! | `SSRF` | `InternalNetworkAccess` |
//! | `OPEN_REDIRECT + UNAUTHORIZED_ID` | `SessionHijack` |
//! | `HEADER_INJECTION + CODE_EXEC` | `BrowserToLocalRce` |
//! | `FILE_IO + DATA_EXFIL` | `InfoDisclosure` |
//!
//! The doc spells some lattice nodes with surface-level handles
//! (`UserSession`, `Cors`, `NoAuth`, `LocalListener`,
//! `SensitiveFileIo`, `PathTraversal`). Those nodes do not map 1:1
//! onto [`Cap`] bits, so the table above uses the closest [`Cap`]
//! approximations:
//!
//! - `UserSession` → [`Cap::UNAUTHORIZED_ID`] (request-bound caller
//! identifier carrier)
//! - `Cors + NoAuth` → [`Cap::HEADER_INJECTION`] (the CORS-relaxing
//! header is the structural marker; the no-auth side is folded into
//! Phase 25's surface-property check on [`crate::surface::EntryPoint::auth_required`])
//! - `LocalListener` → no cap; folded into Phase 25's surface check
//! ([`crate::surface::DataStoreKind::Sql`] /
//! [`crate::surface::ExternalServiceKind::HttpApi`] etc.)
//! - `SensitiveFileIo` → [`Cap::DATA_EXFIL`] (egress-of-sensitive-data
//! carrier)
//! - `PathTraversal` → [`Cap::FILE_IO`]
//!
//! # Exhaustiveness
//!
//! Pattern-matching exhaustively on [`Cap`] is impossible — it is a
//! `bitflags!` struct over `u32`, not a closed enum. This module
//! adopts the [`crate::dynamic::corpus`] pattern instead: every Cap
//! bit belongs to exactly one of [`IMPACT_LATTICE_COVERED`] or
//! [`IMPACT_LATTICE_UNCOVERED`], with a const assertion that the
//! union equals [`Cap::all`]. Adding a new `Cap` bit without
//! updating one of those constants fails to compile.
use crate::labels::Cap;
use serde::{Deserialize, Serialize};
/// Impact category produced by a successful chain composition.
///
/// Phase 24 enumerates the categories the doc's lattice produces.
/// Phase 25's scoring pass attaches a severity to each category and
/// folds them into the final [`crate::chain::ChainGraph`] output.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ImpactCategory {
/// Remote code execution.
Rce,
/// Browser-mediated path to local code execution (e.g. permissive
/// CORS plus an unauthenticated endpoint that hands off to a
/// `CODE_EXEC` sink).
BrowserToLocalRce,
/// Session-token hijack via an attacker-controlled redirect that
/// keeps the user's auth identity in the request flow.
SessionHijack,
/// SSRF that lands on an internal/local listener.
InternalNetworkAccess,
/// Sensitive data egress through a path-traversal-like primitive.
InfoDisclosure,
}
/// One rule in the impact lattice.
///
/// `adjacent_cap` is `None` for self-sufficient rules
/// (`CODE_EXEC → Rce`, `DESERIALIZE → Rce`, `SSRF → InternalNetworkAccess`)
/// and `Some(cap)` for rules that need a second co-located finding
/// (`OPEN_REDIRECT + UNAUTHORIZED_ID → SessionHijack`, etc.).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ImpactRule {
pub source_cap: Cap,
pub adjacent_cap: Option<Cap>,
pub result: ImpactCategory,
}
/// The default impact lattice from the design doc.
///
/// Order matters for [`lookup_impact`]: more specific rules
/// (`adjacent_cap.is_some()`) appear before the broader fallbacks so a
/// `CODE_EXEC + ...` finding pair is classified as
/// `BrowserToLocalRce` before the standalone `CODE_EXEC → Rce`
/// fallback fires.
pub static IMPACT_LATTICE: &[ImpactRule] = &[
// ── 2-cap rules (most specific first) ─────────────────────────
ImpactRule {
source_cap: Cap::OPEN_REDIRECT,
adjacent_cap: Some(Cap::UNAUTHORIZED_ID),
result: ImpactCategory::SessionHijack,
},
ImpactRule {
source_cap: Cap::HEADER_INJECTION,
adjacent_cap: Some(Cap::CODE_EXEC),
result: ImpactCategory::BrowserToLocalRce,
},
ImpactRule {
source_cap: Cap::FILE_IO,
adjacent_cap: Some(Cap::DATA_EXFIL),
result: ImpactCategory::InfoDisclosure,
},
// ── 1-cap rules ───────────────────────────────────────────────
ImpactRule {
source_cap: Cap::CODE_EXEC,
adjacent_cap: None,
result: ImpactCategory::Rce,
},
ImpactRule {
source_cap: Cap::DESERIALIZE,
adjacent_cap: None,
result: ImpactCategory::Rce,
},
ImpactRule {
source_cap: Cap::SSRF,
adjacent_cap: None,
result: ImpactCategory::InternalNetworkAccess,
},
];
/// Caps that participate in at least one impact rule (either as
/// `source_cap` or as `adjacent_cap`). Update when adding a rule.
pub const IMPACT_LATTICE_COVERED: u32 = Cap::CODE_EXEC.bits()
| Cap::DESERIALIZE.bits()
| Cap::SSRF.bits()
| Cap::OPEN_REDIRECT.bits()
| Cap::UNAUTHORIZED_ID.bits()
| Cap::HEADER_INJECTION.bits()
| Cap::FILE_IO.bits()
| Cap::DATA_EXFIL.bits();
/// Caps that do not participate in any impact rule today. Adding a
/// rule that consumes one of these caps requires moving it into
/// [`IMPACT_LATTICE_COVERED`] above.
pub const IMPACT_LATTICE_UNCOVERED: u32 = Cap::ENV_VAR.bits()
| Cap::HTML_ESCAPE.bits()
| Cap::SHELL_ESCAPE.bits()
| Cap::URL_ENCODE.bits()
| Cap::JSON_PARSE.bits()
| Cap::FMT_STRING.bits()
| Cap::SQL_QUERY.bits()
| Cap::CRYPTO.bits()
| Cap::LDAP_INJECTION.bits()
| Cap::XPATH_INJECTION.bits()
| Cap::SSTI.bits()
| Cap::XXE.bits()
| Cap::PROTOTYPE_POLLUTION.bits();
const _: () = assert!(
IMPACT_LATTICE_COVERED | IMPACT_LATTICE_UNCOVERED == Cap::all().bits(),
"Cap bit missing from impact lattice coverage; \
add to IMPACT_LATTICE_COVERED or IMPACT_LATTICE_UNCOVERED and decide \
whether it should participate in a chain rule",
);
const _: () = assert!(
IMPACT_LATTICE_COVERED & IMPACT_LATTICE_UNCOVERED == 0,
"Cap bit appears in both IMPACT_LATTICE_COVERED and IMPACT_LATTICE_UNCOVERED",
);
/// Union of every cap bit referenced by an [`IMPACT_LATTICE`] rule, as
/// `source_cap` or `adjacent_cap`. Computed at compile time.
#[allow(dead_code)] // Called from a const assertion; MSRV lints may miss const-eval uses.
const fn rule_coverage_bits() -> u32 {
let mut acc: u32 = 0;
let mut i = 0;
while i < IMPACT_LATTICE.len() {
let rule = IMPACT_LATTICE[i];
acc |= rule.source_cap.bits();
acc |= match rule.adjacent_cap {
Some(a) => a.bits(),
None => 0,
};
i += 1;
}
acc
}
const _: () = assert!(
rule_coverage_bits() == IMPACT_LATTICE_COVERED,
"IMPACT_LATTICE_COVERED claims a cap bit that no IMPACT_LATTICE rule references; \
drop it from IMPACT_LATTICE_COVERED or add a rule that consumes it",
);
/// Precomputed standalone-rule table indexed by `Cap` bit position.
///
/// Built once at compile time from [`IMPACT_LATTICE`]. `Cap` is a
/// `bitflags!` u32, so each cap occupies one bit position 0..32; the
/// table stores the standalone [`ImpactCategory`] (if any) for that
/// position. [`lookup_impact`] uses this to short-circuit its
/// second-pass and third-pass walks in O(1).
static STANDALONE_BY_BIT: [Option<ImpactCategory>; 32] = build_standalone_table();
const fn build_standalone_table() -> [Option<ImpactCategory>; 32] {
let mut table = [None; 32];
let mut i = 0;
while i < IMPACT_LATTICE.len() {
let rule = IMPACT_LATTICE[i];
if rule.adjacent_cap.is_none() {
let bit = rule.source_cap.bits().trailing_zeros() as usize;
table[bit] = Some(rule.result);
}
i += 1;
}
table
}
fn standalone_lookup(cap: Cap) -> Option<ImpactCategory> {
let bits = cap.bits();
if bits == 0 || bits.count_ones() != 1 {
return None;
}
STANDALONE_BY_BIT[bits.trailing_zeros() as usize]
}
/// Look up an [`ImpactCategory`] for a (source, adjacent) cap pair.
///
/// `adjacent` is `None` when the caller has not yet found a partner
/// finding. Returns the most-specific matching rule.
///
/// Phase 25's path search calls this once per candidate path with the
/// path's primary and secondary caps; multiple cap matches choose the
/// first rule in [`IMPACT_LATTICE`] order (specific before fallback).
///
/// The standalone-rule walks (second + third pass) are O(1) via
/// `STANDALONE_BY_BIT`. The two-cap walk (first pass) stays linear
/// because the 2-cap subset is small (today: three rules); promote
/// to a sorted-pair binary search if the lattice grows past ~16
/// pair-rules.
pub fn lookup_impact(source: Cap, adjacent: Option<Cap>) -> Option<ImpactCategory> {
// First pass: exact source + matching adjacency (or both ways).
if let Some(adj) = adjacent {
for rule in IMPACT_LATTICE {
if let Some(rule_adj) = rule.adjacent_cap {
let direct = rule.source_cap == source && rule_adj == adj;
let swapped = rule.source_cap == adj && rule_adj == source;
if direct || swapped {
return Some(rule.result);
}
}
}
}
// Second pass: standalone rule on source_cap (O(1) table lookup).
if let Some(cat) = standalone_lookup(source) {
return Some(cat);
}
// Third pass: if `adjacent` is given but the pair didn't hit,
// try the standalone rule on adjacent_cap so a CODE_EXEC + UNRELATED
// pair still reaches `Rce`.
if let Some(adj) = adjacent
&& let Some(cat) = standalone_lookup(adj)
{
return Some(cat);
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cmdi_alone_maps_to_rce() {
assert_eq!(
lookup_impact(Cap::CODE_EXEC, None),
Some(ImpactCategory::Rce)
);
}
#[test]
fn deserialize_alone_maps_to_rce() {
assert_eq!(
lookup_impact(Cap::DESERIALIZE, None),
Some(ImpactCategory::Rce)
);
}
#[test]
fn ssrf_alone_maps_to_internal_network_access() {
assert_eq!(
lookup_impact(Cap::SSRF, None),
Some(ImpactCategory::InternalNetworkAccess)
);
}
#[test]
fn open_redirect_plus_user_session_maps_to_session_hijack() {
assert_eq!(
lookup_impact(Cap::OPEN_REDIRECT, Some(Cap::UNAUTHORIZED_ID)),
Some(ImpactCategory::SessionHijack)
);
// Argument order should not matter.
assert_eq!(
lookup_impact(Cap::UNAUTHORIZED_ID, Some(Cap::OPEN_REDIRECT)),
Some(ImpactCategory::SessionHijack)
);
}
#[test]
fn cors_plus_codeexec_maps_to_browser_local_rce() {
assert_eq!(
lookup_impact(Cap::HEADER_INJECTION, Some(Cap::CODE_EXEC)),
Some(ImpactCategory::BrowserToLocalRce)
);
}
#[test]
fn path_traversal_plus_sensitive_io_maps_to_info_disclosure() {
assert_eq!(
lookup_impact(Cap::FILE_IO, Some(Cap::DATA_EXFIL)),
Some(ImpactCategory::InfoDisclosure)
);
}
#[test]
fn unknown_cap_returns_none() {
assert_eq!(lookup_impact(Cap::HTML_ESCAPE, None), None);
assert_eq!(lookup_impact(Cap::CRYPTO, None), None);
}
#[test]
fn pair_with_uncovered_adjacency_falls_through_to_standalone() {
// CODE_EXEC + CRYPTO: CRYPTO has no rule, so we fall back to
// the standalone CODE_EXEC → Rce rule.
assert_eq!(
lookup_impact(Cap::CODE_EXEC, Some(Cap::CRYPTO)),
Some(ImpactCategory::Rce)
);
}
}

140
src/chain/mod.rs Normal file
View file

@ -0,0 +1,140 @@
//! Phase 24 — exploit-chain composer scaffolding (Track G.1).
//!
//! A `ChainGraph` is the small intermediate representation the chain
//! composer walks between two pre-existing artefacts: the flat list of
//! per-finding [`Diag`](crate::commands::scan::Diag)s produced by the
//! static analyser and the [`SurfaceMap`](crate::surface::SurfaceMap)
//! produced by Track F.
//!
//! Phase 24 ships the types only. The implicit-attacker node and the
//! bounded DFS that walks edges into [`ChainFinding`]s land in Phase 25
//! (`src/chain/search.rs`); composite re-verification lands in Phase 26
//! (`src/chain/reverify.rs`).
//!
//! # Storage shape
//!
//! Two parallel `Vec`s — `nodes` and `edges` — mirroring `SurfaceMap`'s
//! shape. Determinism is the caller's responsibility: edges are
//! produced in the order the source [`Diag`](crate::commands::scan::Diag) slice presents, and
//! `findings_to_edges` does not sort the input. Phase 25 will fold
//! these into a `petgraph::DiGraph` for path search.
//!
//! # Lattice exhaustiveness
//!
//! [`impact`] keeps a `IMPACT_LATTICE_COVERED | IMPACT_LATTICE_UNCOVERED
//! == Cap::all().bits()` const assertion, mirroring the
//! `CORPUS_SUPPORTED | CORPUS_UNSUPPORTED == Cap::all().bits()` pattern
//! in [`crate::dynamic::corpus`]. Adding a new `Cap` bit without
//! updating the lattice fails to compile.
use crate::entry_points::HttpMethod;
use crate::labels::Cap;
use crate::surface::SourceLocation;
use serde::{Deserialize, Serialize};
pub mod edges;
pub mod feasibility;
pub mod finding;
pub mod impact;
#[cfg(feature = "dynamic")]
pub mod reverify;
pub mod score;
pub mod search;
pub use edges::{ChainEdge, FindingRef, findings_to_edges, findings_to_edges_with_reach};
pub use feasibility::Feasibility;
pub use finding::{ChainFinding, ChainMember, ChainSeverity, ChainSink};
pub use impact::{IMPACT_LATTICE, ImpactCategory, ImpactRule, lookup_impact};
#[cfg(feature = "dynamic")]
pub use reverify::{
ChainReverifyResult, ChainStepSpec, CompositeReverifier, DefaultCompositeReverifier,
chain_step_specs, reverify_chain, reverify_chain_with, reverify_top_chains,
reverify_top_chains_with,
};
pub use score::{ChainScoreConfig, category_weight, min_score_default, score_path};
pub use search::{ChainSearchConfig, find_chains, find_chains_with_reach};
/// One node in a [`ChainGraph`].
///
/// `Entry` and `Sink` nodes are translated 1:1 from the SurfaceMap's
/// [`crate::surface::SurfaceNode::EntryPoint`] and
/// [`crate::surface::SurfaceNode::DangerousLocal`] variants. `Finding`
/// nodes wrap a static [`Diag`](crate::commands::scan::Diag) so a path
/// from an entry to a sink can pin which finding witnesses each hop.
/// Phase 25's path search treats the implicit attacker as a virtual
/// predecessor of every `Entry`; there is no explicit `Attacker`
/// variant on this enum.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "node", rename_all = "snake_case")]
pub enum ChainNode {
/// A web entry-point lifted from the SurfaceMap.
Entry {
location: SourceLocation,
method: HttpMethod,
route: String,
auth_required: bool,
},
/// A static finding produced by the analyser.
Finding(FindingRef),
/// A dangerous-local sink lifted from the SurfaceMap.
Sink {
location: SourceLocation,
function_name: String,
cap_bits: u32,
},
}
impl ChainNode {
/// Source location of this node. Used for byte-deterministic
/// ordering and for the `nyx surface`-style human display.
pub fn location(&self) -> &SourceLocation {
match self {
ChainNode::Entry { location, .. } => location,
ChainNode::Finding(f) => &f.location,
ChainNode::Sink { location, .. } => location,
}
}
/// Cap bitmask carried by this node, or `0` for entry nodes. Used
/// by Phase 25 to discriminate which [`ImpactRule`] a path matches.
pub fn cap_bits(&self) -> u32 {
match self {
ChainNode::Entry { .. } => 0,
ChainNode::Finding(f) => f.cap_bits,
ChainNode::Sink { cap_bits, .. } => *cap_bits,
}
}
}
/// The full chain graph. Phase 24 only exposes the types; the
/// composer that fills the vectors lands in Phase 25.
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct ChainGraph {
pub nodes: Vec<ChainNode>,
pub edges: Vec<ChainEdge>,
}
impl ChainGraph {
pub fn new() -> Self {
Self::default()
}
pub fn node_count(&self) -> usize {
self.nodes.len()
}
pub fn edge_count(&self) -> usize {
self.edges.len()
}
}
/// Convert a primary [`Cap`] bit into the closest matching impact
/// category in isolation (no adjacency). Returns `None` when the cap
/// has no terminal interpretation on its own — chain composition needs
/// an additional cap or surface property to lift it.
///
/// Phase 25's path-search code calls this as a fast-path before
/// consulting the full [`IMPACT_LATTICE`].
pub fn standalone_impact(cap: Cap) -> Option<ImpactCategory> {
lookup_impact(cap, None)
}

862
src/chain/reverify.rs Normal file
View file

@ -0,0 +1,862 @@
//! Phase 26 — Track G.3: end-to-end chain re-verification.
//!
//! Phase 25 emitted [`ChainFinding`]s scored by static + per-finding
//! feasibility but left `dynamic_verdict` permanently `None`. Phase 26
//! drives the top-scoring Confirmed chains through a *single* composite
//! dynamic run: each member's step harness is composed via
//! [`crate::dynamic::lang::compose_chain_step`] and the output of one
//! step is threaded into the next via
//! [`crate::dynamic::lang::ChainStepHarness::PREV_OUTPUT_ENV`], with
//! the final step terminating at the chain's sink probe.
//!
//! # Outcome shape
//!
//! [`reverify_chain`] returns a [`ChainReverifyResult`] carrying the
//! composite [`VerifyResult`] alongside the severity before and after
//! the verdict was applied. The severity-downgrade rule is documented
//! on [`crate::chain::finding::ChainFinding::apply_dynamic_verdict`]:
//! `Inconclusive` drops the chain one bucket and records a reason;
//! every other status leaves the severity intact.
//!
//! # Per-member harness specs
//!
//! Both the default reverifier and out-of-tree callers consume
//! [`chain_step_specs`] to materialise one [`HarnessSpec`] per
//! `chain.members` slot. The helper looks each member up in the
//! caller-supplied `member_diags` slice by
//! [`crate::chain::edges::FindingRef::stable_hash`] and reuses
//! [`HarnessSpec::from_finding_full`] so the chain's per-step specs
//! match what the per-finding verifier would have derived. This is
//! the API-shape sub-task of the Phase 26 live-execution split: it
//! lets callers (today: the default reverifier; tomorrow: a live
//! sandbox composer) inspect whether every step is drivable before
//! committing to a build / run pass.
//!
//! # Cost control
//!
//! Re-verification is opt-in via
//! [`crate::utils::config::ChainConfig::reverify_top_n`] — only the top
//! N chains by score reach the composite run. Set to `0` to skip the
//! pass entirely. The helper [`reverify_top_chains`] applies the
//! caller's reverifier to the top-N slice in place, leaving the rest
//! untouched.
//!
//! # Testability
//!
//! Production callers use [`reverify_chain`] (which dispatches to
//! [`DefaultCompositeReverifier`]). Tests inject a stub
//! [`CompositeReverifier`] via [`reverify_chain_with`] /
//! [`reverify_top_chains_with`] so the severity-downgrade pipeline can
//! be exercised without a live sandbox backend.
use crate::chain::finding::{ChainFinding, ChainSeverity};
use crate::commands::scan::Diag;
use crate::dynamic::build_sandbox::dispatch_prepare;
use crate::dynamic::harness::{self, BuiltHarness};
use crate::dynamic::lang::{self, ChainStepTerminal};
use crate::dynamic::sandbox;
use crate::dynamic::spec::HarnessSpec;
use crate::dynamic::verify::VerifyOptions;
use crate::evidence::{InconclusiveReason, UnsupportedReason, VerifyResult, VerifyStatus};
use crate::surface::SurfaceMap;
use std::collections::HashMap;
use std::path::PathBuf;
/// Outcome of composite re-verification for a single chain.
///
/// Carries the [`VerifyResult`] the composite run produced plus the
/// severity transition so callers (e.g. the scan command's output
/// pipeline) can decide whether to emit a Slack-style "downgraded by
/// dynamic verification" badge.
#[derive(Debug, Clone)]
pub struct ChainReverifyResult {
/// Stable hash of the chain re-verified.
pub chain_hash: u64,
/// Composite dynamic verdict assembled by the reverifier.
pub verdict: VerifyResult,
/// Severity carried on the chain *before* the verdict was applied.
pub severity_before: ChainSeverity,
/// Severity carried on the chain *after* the verdict was applied.
/// Equals `severity_before` unless the verdict was `Inconclusive`.
pub severity_after: ChainSeverity,
/// Human-readable downgrade reason, when one was recorded.
/// Mirrors [`ChainFinding::reverify_reason`] for the post-apply
/// state.
pub downgrade_reason: Option<String>,
}
impl ChainReverifyResult {
/// True when the verdict caused the chain's severity to drop a
/// bucket.
pub fn was_downgraded(&self) -> bool {
self.severity_before != self.severity_after
}
}
/// Per-member harness-spec derivation result.
///
/// One entry per `chain.members` slot, in chain order. `member_hash`
/// is copied from the [`crate::chain::edges::FindingRef::stable_hash`];
/// `result` is the outcome of running [`HarnessSpec::from_finding_full`]
/// against the matching [`Diag`] from the caller's slice.
///
/// A member whose hash has no diag match records
/// [`UnsupportedReason::NoFlowSteps`] so the caller can distinguish
/// "spec derivation failed" from "diag missing from the scan input".
#[derive(Debug, Clone)]
pub struct ChainStepSpec {
pub member_hash: u64,
pub result: Result<HarnessSpec, UnsupportedReason>,
}
/// Derive one [`HarnessSpec`] per chain member, in chain order.
///
/// Looks each member up in `member_diags` by stable hash (zero-hash
/// diags are skipped — the pre-`compute_stable_hash` placeholder
/// produced by tests and synthetic harnesses). Members whose hash has
/// no diag match record [`UnsupportedReason::NoFlowSteps`] so the
/// caller can tell the difference between "spec derivation failed" and
/// "diag missing from the scan input".
///
/// The function does **not** run anything: it returns derived specs so
/// the caller (today: [`DefaultCompositeReverifier`]; tomorrow: a live
/// sandbox composer) can decide whether to commit to a build / run
/// pass. Used as the API-shape half of the Phase 26 live-execution
/// split — see the crate-level docs for the wider design.
pub fn chain_step_specs(
chain: &ChainFinding,
member_diags: &[Diag],
opts: &VerifyOptions,
) -> Vec<ChainStepSpec> {
let mut by_hash: HashMap<u64, &Diag> = HashMap::with_capacity(member_diags.len());
for d in member_diags {
if d.stable_hash != 0 {
by_hash.insert(d.stable_hash, d);
}
}
chain
.members
.iter()
.map(|m| {
let result = match by_hash.get(&m.stable_hash).copied() {
Some(d) => HarnessSpec::from_finding_full(
d,
opts.verify_all_confidence,
opts.summaries.as_deref(),
opts.callgraph.as_deref(),
),
None => Err(UnsupportedReason::NoFlowSteps),
};
ChainStepSpec {
member_hash: m.stable_hash,
result,
}
})
.collect()
}
/// Pluggable composite-reverifier surface.
///
/// Production callers use [`DefaultCompositeReverifier`] (which drives
/// the per-step harness compose path). Tests substitute a stub that
/// returns canned [`VerifyResult`]s so the downgrade-and-record
/// machinery can be exercised without a live sandbox backend.
///
/// `member_diags` carries the [`Diag`]s that produced `chain.members`,
/// in any order — implementations look them up by
/// [`crate::chain::edges::FindingRef::stable_hash`] via
/// [`chain_step_specs`]. Threading the slice (instead of a pre-built
/// `HashMap`) mirrors how
/// [`crate::dynamic::verify::VerifyOptions::summaries`] flows:
/// callers hold the full project diag list and the trait surface
/// stays free of cross-coupling.
pub trait CompositeReverifier {
/// Run the composite dynamic re-verification for `chain` and return
/// the resulting verdict.
fn reverify(
&self,
chain: &ChainFinding,
member_diags: &[Diag],
surface: &SurfaceMap,
opts: &VerifyOptions,
) -> VerifyResult;
}
/// Phase 26 default composite reverifier.
///
/// The composite-harness composer walks `chain.members`, derives one
/// [`HarnessSpec`] per member via [`chain_step_specs`], drives each
/// derived spec through [`harness::build`] + [`dispatch_prepare`] so
/// the per-language build cost is amortised against the on-disk caches,
/// then runs each step sequentially through [`sandbox::run`] with the
/// previous step's stdout threaded into the next step via
/// [`crate::dynamic::lang::ChainStepHarness::PREV_OUTPUT_ENV`].
///
/// Today the default reverifier surfaces
/// `Inconclusive(BackendInsufficient)` when invoked. The `detail`
/// field reports spec-derivation, per-step build coverage, AND per-
/// step run coverage so operators (and the [`reverify_top_chains`]
/// caller) can see how far down the live execution path the chain
/// got: `derived N/M`, `built B/N (cache_hit=H, build_ms=T,
/// build_errors=E)`, `ran S/B (sandbox_errors=SE, timeouts=TO,
/// nonzero_exits=NE, final_sink_hit=F)`. Callers that need a
/// deterministic outcome (tests, CI) use [`reverify_chain_with`] with
/// a stubbed reverifier.
///
/// The verdict stays `Inconclusive` even on a fully-successful run
/// pass because today's per-language [`lang::compose_chain_step`]
/// shims echo `NYX_PREV_OUTPUT` to stdout but do not yet invoke the
/// chain's terminal sink — the sink-rewrite pass that wires the final
/// step's probe call lands separately. Once that pass arrives, the
/// `final_sink_hit=true` branch will flip the verdict to `Confirmed`.
///
/// Languages whose [`dispatch_prepare`] returns `Unsupported`
/// (Ruby today) are counted under `build_errors` and skipped from the
/// run loop; their `compose_chain_step` source is never staged.
///
/// Workdir lifetime: every per-step build is content-addressed by
/// [`HarnessSpec::spec_hash`] under `/tmp/nyx-harness/{spec_hash}`,
/// and the per-language `prepare_*` caches under the host's
/// `ProjectDirs` cache root are keyed on `(lockfile_hash,
/// toolchain_id, language)`. Repeated calls with the same specs are
/// idempotent — no per-call growth on disk. The chain-step source
/// (`step.py`, `step.sh`, etc.) is written into the same workdir
/// alongside the harness source; filenames are distinct so they do
/// not collide with [`harness::build`] output for the same spec_hash.
pub struct DefaultCompositeReverifier;
impl CompositeReverifier for DefaultCompositeReverifier {
fn reverify(
&self,
chain: &ChainFinding,
member_diags: &[Diag],
_surface: &SurfaceMap,
opts: &VerifyOptions,
) -> VerifyResult {
let finding_id = format!("chain-{:016x}", chain.stable_hash);
let specs = chain_step_specs(chain, member_diags, opts);
let total = specs.len();
let derived_specs: Vec<&HarnessSpec> = specs
.iter()
.filter_map(|s| s.result.as_ref().ok())
.collect();
let derived = derived_specs.len();
// Sub-task (b) main of the Phase 26 live-execution split:
// drive each derived spec through the per-language build
// pipeline so each step's interpreter / compile artefact is
// staged in its content-addressed workdir before the run
// pass. Failures are counted, not propagated — the outer
// verdict stays `Inconclusive(BackendInsufficient)` until
// the sink-rewrite pass lands.
let profile = opts.sandbox.process_hardening;
let mut built = 0usize;
let mut cache_hits = 0usize;
let mut total_build_ms: u128 = 0;
let mut build_errors = 0usize;
let mut built_steps: Vec<(PathBuf, &HarnessSpec)> = Vec::with_capacity(derived);
for spec in &derived_specs {
match harness::build(spec) {
Ok(built_harness) => {
match dispatch_prepare(spec, &built_harness.workdir, profile) {
Ok(result) => {
built += 1;
if result.cache_hit {
cache_hits += 1;
}
total_build_ms =
total_build_ms.saturating_add(result.duration.as_millis());
built_steps.push((built_harness.workdir, spec));
}
Err(_) => build_errors += 1,
}
}
Err(_) => build_errors += 1,
}
}
// Sub-task (c) of the Phase 26 live-execution split:
// sequentially run each built chain-step harness through
// `sandbox::run`, threading the previous step's stdout into
// the next step via `NYX_PREV_OUTPUT`. The final step is
// composed with a `ChainStepTerminal` carrying the chain's
// sink callee, so the per-language emitter splices in a
// `__nyx_probe(callee, prev)` call plus the
// `SINK_HIT_SENTINEL` banner that `sandbox::run` detects via
// `SandboxOutcome::sink_hit`.
let terminal = ChainStepTerminal {
sink_callee: chain.sink.function_name.clone(),
sink_cap_bits: chain.sink.cap_bits,
};
let (steps_run, sandbox_errors, steps_timeout, nonzero_exits, final_sink_hit) =
run_chain_steps(&built_steps, &opts.sandbox, &terminal);
let detail = format!(
"composite chain re-verification: live runs collect step coverage; \
derived {derived}/{total} harness specs; \
built {built}/{derived} (cache_hit={cache_hits}, build_ms={total_build_ms}, build_errors={build_errors}); \
ran {steps_run}/{built} (sandbox_errors={sandbox_errors}, timeouts={steps_timeout}, nonzero_exits={nonzero_exits}, final_sink_hit={final_sink_hit})"
);
// Verdict resolution: a composite chain is `Confirmed` when
// (a) every derived step built, (b) every built step ran
// without a sandbox error, (c) the final step's terminal
// compose fired the sink sentinel (`final_sink_hit=true`).
// Anything short of all three keeps the verdict
// `Inconclusive(BackendInsufficient)` so the chain's severity
// takes the existing downgrade rule.
let all_built = derived > 0 && built == derived;
let all_ran = built > 0 && steps_run == built && sandbox_errors == 0;
if all_built && all_ran && final_sink_hit {
// Phase 31 telemetry stability stamping. When the caller
// opts in via `NYX_VERIFY_REPLAY_STABLE=1` (mirrored by
// [`VerifyOptions::replay_stable_check`]) we re-run the
// chain step sequence one more time on the same built
// workdirs and stamp `replay_stable` based on whether the
// second pass also fires the sink sentinel. `Some(true)`
// means the chain reproduces; `Some(false)` means the chain
// is flaky (rare but a real eval-corpus signal); the field
// stays `None` when the opt-in is off.
let replay_stable = if opts.replay_stable_check {
let (_, replay_sandbox_errors, _, _, replay_final_sink_hit) =
run_chain_steps(&built_steps, &opts.sandbox, &terminal);
if replay_sandbox_errors == 0 {
Some(replay_final_sink_hit)
} else {
None
}
} else {
None
};
VerifyResult {
finding_id,
status: VerifyStatus::Confirmed,
triggered_payload: None,
reason: None,
inconclusive_reason: None,
detail: Some(detail),
attempts: vec![],
toolchain_match: None,
differential: None,
replay_stable,
wrong: None,
hardening_outcome: None,
}
} else {
VerifyResult {
finding_id,
status: VerifyStatus::Inconclusive,
triggered_payload: None,
reason: None,
inconclusive_reason: Some(InconclusiveReason::BackendInsufficient {
backend: "composite-chain".to_owned(),
oracle_kind: "chain-step-harness".to_owned(),
}),
detail: Some(detail),
attempts: vec![],
toolchain_match: None,
differential: None,
replay_stable: None,
wrong: None,
hardening_outcome: None,
}
}
}
}
/// Phase 26 sub-task (c): sequentially run each built chain step
/// through [`sandbox::run`] with `NYX_PREV_OUTPUT` threading.
///
/// Returns `(steps_run, sandbox_errors, timeouts, nonzero_exits,
/// final_sink_hit)`. The final step's [`sandbox::SandboxOutcome::sink_hit`]
/// is captured for the verdict's `detail` field (sub-task (d)); today
/// the per-language [`lang::compose_chain_step`] sources echo
/// `NYX_PREV_OUTPUT` to stdout without invoking the chain's terminal
/// sink, so `final_sink_hit` stays `false` until the sink-rewrite
/// pass lands.
///
/// `sandbox_errors` aborts the rest of the chain — a step that can
/// neither spawn nor stage its source file has no useful `stdout` to
/// thread into the next step. Non-zero exits and timeouts are
/// recorded but do not stop the chain: the previous step's stdout is
/// still threaded forward so partial-success chains keep collecting
/// coverage.
///
/// `base_opts` is cloned per step; the per-step clone overlays the
/// chain-step's `extra_env` (typically the single `NYX_PREV_OUTPUT`
/// binding) on top of any caller-provided extras and drops the
/// per-finding `stub_harness` because chain-step harnesses do not
/// drive boundary stubs.
fn run_chain_steps(
built_steps: &[(PathBuf, &HarnessSpec)],
base_opts: &sandbox::SandboxOptions,
terminal: &ChainStepTerminal,
) -> (usize, usize, usize, usize, bool) {
let mut steps_run = 0usize;
let mut sandbox_errors = 0usize;
let mut steps_timeout = 0usize;
let mut nonzero_exits = 0usize;
let mut final_sink_hit = false;
let mut prev_output: Option<Vec<u8>> = None;
let last_idx = built_steps.len().saturating_sub(1);
for (idx, (workdir, spec)) in built_steps.iter().enumerate() {
let step_terminal = if idx == last_idx {
Some(terminal)
} else {
None
};
let step = lang::compose_chain_step(spec.lang, prev_output.as_deref(), step_terminal);
let step_path = workdir.join(&step.filename);
if let Some(parent) = step_path.parent() {
let _ = std::fs::create_dir_all(parent);
}
if std::fs::write(&step_path, step.source.as_bytes()).is_err() {
sandbox_errors += 1;
break;
}
let mut extra_files_failed = false;
for (rel, content) in &step.extra_files {
let dest = workdir.join(rel);
if let Some(parent) = dest.parent() {
let _ = std::fs::create_dir_all(parent);
}
if std::fs::write(&dest, content.as_bytes()).is_err() {
extra_files_failed = true;
break;
}
}
if extra_files_failed {
sandbox_errors += 1;
break;
}
let mut step_opts = base_opts.clone();
step_opts.extra_env.extend(step.extra_env.iter().cloned());
step_opts.stub_harness = None;
let step_built = BuiltHarness {
workdir: workdir.clone(),
command: step.command.clone(),
env: vec![],
source: step.source.clone(),
entry_source: String::new(),
};
match sandbox::run(&step_built, b"", &step_opts) {
Ok(outcome) => {
steps_run += 1;
if outcome.timed_out {
steps_timeout += 1;
}
if outcome.exit_code.unwrap_or(-1) != 0 {
nonzero_exits += 1;
}
if idx == last_idx {
final_sink_hit = outcome.sink_hit;
}
prev_output = Some(outcome.stdout);
}
Err(_) => {
sandbox_errors += 1;
break;
}
}
}
(
steps_run,
sandbox_errors,
steps_timeout,
nonzero_exits,
final_sink_hit,
)
}
/// Phase 26 — Track G.3: drive composite dynamic re-verification for
/// one chain.
///
/// Wraps [`reverify_chain_with`] with the [`DefaultCompositeReverifier`].
pub fn reverify_chain(
chain: &mut ChainFinding,
member_diags: &[Diag],
surface: &SurfaceMap,
opts: &VerifyOptions,
) -> ChainReverifyResult {
reverify_chain_with(
chain,
member_diags,
surface,
opts,
&DefaultCompositeReverifier,
)
}
/// Inject-the-reverifier flavour of [`reverify_chain`].
///
/// Mutates `chain` in place: attaches the verdict via
/// [`ChainFinding::apply_dynamic_verdict`] (which applies the severity-
/// downgrade rule) and returns a [`ChainReverifyResult`] summarising
/// the transition.
pub fn reverify_chain_with(
chain: &mut ChainFinding,
member_diags: &[Diag],
surface: &SurfaceMap,
opts: &VerifyOptions,
reverifier: &dyn CompositeReverifier,
) -> ChainReverifyResult {
let chain_hash = chain.stable_hash;
let severity_before = chain.severity;
let verdict = reverifier.reverify(chain, member_diags, surface, opts);
chain.apply_dynamic_verdict(verdict.clone());
ChainReverifyResult {
chain_hash,
verdict,
severity_before,
severity_after: chain.severity,
downgrade_reason: chain.reverify_reason.clone(),
}
}
/// Phase 26 — Track G.3 cost-control entry point.
///
/// Re-verifies the top `top_n` chains by score order (chains are
/// canonicalised score-descending by [`crate::chain::search::find_chains`],
/// so the slice prefix is already the right set). `top_n == 0`
/// short-circuits the entire pass.
///
/// `member_diags` is the full project diag list — each chain's
/// reverifier looks up its own constituent diags by stable hash via
/// [`chain_step_specs`].
///
/// Mutates `chains` in place; returns one [`ChainReverifyResult`] per
/// re-verified chain. Chains past the `top_n` cut keep their
/// pre-existing `dynamic_verdict` / `reverify_reason` / `severity`.
pub fn reverify_top_chains(
chains: &mut [ChainFinding],
member_diags: &[Diag],
surface: &SurfaceMap,
opts: &VerifyOptions,
top_n: usize,
) -> Vec<ChainReverifyResult> {
reverify_top_chains_with(
chains,
member_diags,
surface,
opts,
top_n,
&DefaultCompositeReverifier,
)
}
/// Inject-the-reverifier flavour of [`reverify_top_chains`].
pub fn reverify_top_chains_with(
chains: &mut [ChainFinding],
member_diags: &[Diag],
surface: &SurfaceMap,
opts: &VerifyOptions,
top_n: usize,
reverifier: &dyn CompositeReverifier,
) -> Vec<ChainReverifyResult> {
if top_n == 0 || chains.is_empty() {
return Vec::new();
}
let bound = top_n.min(chains.len());
chains
.iter_mut()
.take(bound)
.map(|c| reverify_chain_with(c, member_diags, surface, opts, reverifier))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::chain::edges::FindingRef;
use crate::chain::finding::{ChainFinding, ChainSink};
use crate::chain::impact::ImpactCategory;
use crate::surface::SourceLocation;
fn mk_chain(hash: u64, severity: ChainSeverity, impact: ImpactCategory) -> ChainFinding {
ChainFinding {
stable_hash: hash,
members: vec![FindingRef {
finding_id: format!("f-{hash}"),
stable_hash: hash,
location: SourceLocation::new("a.py", 1, 1),
rule_id: "r".into(),
cap_bits: 0,
}],
sink: ChainSink {
file: "a.py".into(),
line: 5,
col: 1,
function_name: "sink".into(),
cap_bits: 0,
},
implied_impact: impact,
severity,
score: 100.0,
dynamic_verdict: None,
reverify_reason: None,
}
}
fn verdict(status: VerifyStatus) -> VerifyResult {
VerifyResult {
finding_id: "f".into(),
status,
triggered_payload: None,
reason: None,
inconclusive_reason: None,
detail: None,
attempts: vec![],
toolchain_match: None,
differential: None,
replay_stable: None,
wrong: None,
hardening_outcome: None,
}
}
struct StubReverifier(VerifyStatus);
impl CompositeReverifier for StubReverifier {
fn reverify(
&self,
_chain: &ChainFinding,
_member_diags: &[Diag],
_surface: &SurfaceMap,
_opts: &VerifyOptions,
) -> VerifyResult {
verdict(self.0)
}
}
#[test]
fn confirmed_verdict_leaves_severity_unchanged() {
let mut chain = mk_chain(1, ChainSeverity::Critical, ImpactCategory::Rce);
let surface = SurfaceMap::new();
let opts = VerifyOptions::default();
let result = reverify_chain_with(
&mut chain,
&[],
&surface,
&opts,
&StubReverifier(VerifyStatus::Confirmed),
);
assert!(!result.was_downgraded());
assert_eq!(result.severity_after, ChainSeverity::Critical);
assert_eq!(chain.severity, ChainSeverity::Critical);
assert_eq!(
chain.dynamic_verdict.as_ref().unwrap().status,
VerifyStatus::Confirmed
);
assert!(chain.reverify_reason.is_none());
}
#[test]
fn inconclusive_verdict_downgrades_severity_and_records_reason() {
let mut chain = mk_chain(2, ChainSeverity::Critical, ImpactCategory::Rce);
let surface = SurfaceMap::new();
let opts = VerifyOptions::default();
let result = reverify_chain_with(
&mut chain,
&[],
&surface,
&opts,
&StubReverifier(VerifyStatus::Inconclusive),
);
assert!(result.was_downgraded());
assert_eq!(result.severity_before, ChainSeverity::Critical);
assert_eq!(result.severity_after, ChainSeverity::High);
assert_eq!(chain.severity, ChainSeverity::High);
assert!(chain.reverify_reason.is_some());
}
#[test]
fn inconclusive_at_low_floors_at_low() {
let mut chain = mk_chain(3, ChainSeverity::Low, ImpactCategory::InfoDisclosure);
let surface = SurfaceMap::new();
let opts = VerifyOptions::default();
let result = reverify_chain_with(
&mut chain,
&[],
&surface,
&opts,
&StubReverifier(VerifyStatus::Inconclusive),
);
// Severity floors at Low; was_downgraded returns false because
// the bucket did not change even though the verdict was
// inconclusive.
assert_eq!(result.severity_after, ChainSeverity::Low);
assert!(chain.reverify_reason.is_some(), "reason still recorded");
}
#[test]
fn top_n_zero_skips_pass_entirely() {
let mut chains = vec![
mk_chain(1, ChainSeverity::Critical, ImpactCategory::Rce),
mk_chain(2, ChainSeverity::High, ImpactCategory::SessionHijack),
];
let surface = SurfaceMap::new();
let opts = VerifyOptions::default();
let results = reverify_top_chains_with(
&mut chains,
&[],
&surface,
&opts,
0,
&StubReverifier(VerifyStatus::Confirmed),
);
assert!(results.is_empty());
for c in &chains {
assert!(
c.dynamic_verdict.is_none(),
"no verdict attached when top_n=0"
);
}
}
#[test]
fn top_n_limits_reverified_chain_count() {
let mut chains = vec![
mk_chain(1, ChainSeverity::Critical, ImpactCategory::Rce),
mk_chain(2, ChainSeverity::High, ImpactCategory::SessionHijack),
mk_chain(3, ChainSeverity::Medium, ImpactCategory::InfoDisclosure),
];
let surface = SurfaceMap::new();
let opts = VerifyOptions::default();
let results = reverify_top_chains_with(
&mut chains,
&[],
&surface,
&opts,
2,
&StubReverifier(VerifyStatus::Confirmed),
);
assert_eq!(results.len(), 2);
assert!(chains[0].dynamic_verdict.is_some());
assert!(chains[1].dynamic_verdict.is_some());
assert!(
chains[2].dynamic_verdict.is_none(),
"tail beyond top_n is untouched"
);
}
#[test]
fn default_reverifier_returns_inconclusive_backend_insufficient() {
let mut chain = mk_chain(99, ChainSeverity::Critical, ImpactCategory::Rce);
let surface = SurfaceMap::new();
let opts = VerifyOptions::default();
let result = reverify_chain(&mut chain, &[], &surface, &opts);
assert_eq!(result.verdict.status, VerifyStatus::Inconclusive);
assert!(matches!(
result.verdict.inconclusive_reason,
Some(InconclusiveReason::BackendInsufficient { .. })
));
// Severity dropped one bucket because the default is inconclusive.
assert_eq!(chain.severity, ChainSeverity::High);
}
#[test]
fn default_reverifier_detail_reports_spec_derivation_coverage() {
let mut chain = mk_chain(0xDE, ChainSeverity::High, ImpactCategory::SessionHijack);
// No diags threaded in — every member should fall through to
// `NoFlowSteps` and the detail string should report 0/N.
let surface = SurfaceMap::new();
let opts = VerifyOptions::default();
let result = reverify_chain(&mut chain, &[], &surface, &opts);
let detail = result.verdict.detail.as_deref().expect("detail populated");
assert!(
detail.contains("0/1"),
"detail must report 0/1 specs derived for a single-member chain with no diags; got {detail:?}"
);
}
#[test]
fn default_reverifier_detail_reports_build_coverage_with_no_derived_specs() {
// No diags → 0/N derived → 0/0 built. Verifies the build
// segment of the detail string is well-formed even when the
// build pipeline is never invoked.
let mut chain = mk_chain(0xBD, ChainSeverity::Medium, ImpactCategory::InfoDisclosure);
let surface = SurfaceMap::new();
let opts = VerifyOptions::default();
let result = reverify_chain(&mut chain, &[], &surface, &opts);
let detail = result.verdict.detail.as_deref().expect("detail populated");
assert!(
detail.contains("built 0/0"),
"detail must report 0/0 built when no specs derived; got {detail:?}"
);
assert!(
detail.contains("cache_hit=0"),
"detail must zero cache_hit when no builds attempted; got {detail:?}"
);
assert!(
detail.contains("build_ms=0"),
"detail must zero build_ms when no builds attempted; got {detail:?}"
);
assert!(
detail.contains("build_errors=0"),
"detail must zero build_errors when no builds attempted; got {detail:?}"
);
}
#[test]
fn default_reverifier_detail_reports_run_coverage_with_no_built_steps() {
// No diags → 0/N derived → 0/0 built → 0/0 ran. Verifies the
// run-coverage segment of the detail string is well-formed
// even when the chain-step run loop is never entered.
let mut chain = mk_chain(0xCD, ChainSeverity::Medium, ImpactCategory::InfoDisclosure);
let surface = SurfaceMap::new();
let opts = VerifyOptions::default();
let result = reverify_chain(&mut chain, &[], &surface, &opts);
let detail = result.verdict.detail.as_deref().expect("detail populated");
assert!(
detail.contains("ran 0/0"),
"detail must report 0/0 ran when no specs built; got {detail:?}"
);
assert!(
detail.contains("sandbox_errors=0"),
"detail must zero sandbox_errors when no runs attempted; got {detail:?}"
);
assert!(
detail.contains("timeouts=0"),
"detail must zero timeouts when no runs attempted; got {detail:?}"
);
assert!(
detail.contains("nonzero_exits=0"),
"detail must zero nonzero_exits when no runs attempted; got {detail:?}"
);
assert!(
detail.contains("final_sink_hit=false"),
"detail must stamp final_sink_hit=false when no runs attempted; got {detail:?}"
);
}
#[test]
fn run_chain_steps_with_empty_input_is_a_no_op() {
// Locks the contract that the run loop is a no-op when no
// steps built — the run-coverage detail segment is wholly a
// function of the (steps_run, sandbox_errors, timeouts,
// nonzero_exits, final_sink_hit) tuple this helper returns.
let opts = sandbox::SandboxOptions::default();
let terminal = ChainStepTerminal {
sink_callee: "noop".into(),
sink_cap_bits: 0,
};
let result = run_chain_steps(&[], &opts, &terminal);
assert_eq!(result, (0, 0, 0, 0, false));
}
#[test]
fn chain_step_specs_reports_no_flow_steps_for_missing_diag() {
let chain = mk_chain(7, ChainSeverity::Medium, ImpactCategory::InfoDisclosure);
let opts = VerifyOptions::default();
let specs = chain_step_specs(&chain, &[], &opts);
assert_eq!(specs.len(), 1);
assert_eq!(specs[0].member_hash, 7);
assert!(matches!(
specs[0].result,
Err(UnsupportedReason::NoFlowSteps)
));
}
}

197
src/chain/score.rs Normal file
View file

@ -0,0 +1,197 @@
//! Phase 25 — scoring for composed exploit chains.
//!
//! `score(path) = sum(impact) * product(feasibility)`
//!
//! The impact term is the sum of per-member [`ImpactCategory`] weights
//! (each member contributes the weight of the *standalone* category its
//! primary cap maps to, or `0` when the cap has no standalone impact —
//! the cap still contributes adjacency to the final implied impact via
//! the composer). The feasibility term is the product of every
//! member's [`Feasibility::score`].
//!
//! # Threshold
//!
//! [`min_score_default`] is the in-code fallback when `[chain] min_score`
//! is unset in `nyx.toml`. Path search drops any composed chain whose
//! score is strictly below the configured threshold.
use crate::chain::edges::ChainEdge;
use crate::chain::feasibility::Feasibility;
use crate::chain::impact::ImpactCategory;
use serde::{Deserialize, Serialize};
/// Per-impact-category numeric weight contributed to the additive
/// impact term. The relative ordering matches the design doc's
/// criticality ranking; absolute values are kept simple integers so
/// the resulting `score` stays human-comparable.
///
/// `BrowserToLocalRce` is treated as marginally higher than `Rce`
/// because the chain composing it (`HEADER_INJECTION + CODE_EXEC` with
/// an unauthenticated entry-point) folds an extra surface property and
/// is therefore strictly more specific.
pub const fn category_weight(c: ImpactCategory) -> f64 {
match c {
ImpactCategory::BrowserToLocalRce => 110.0,
ImpactCategory::Rce => 100.0,
ImpactCategory::SessionHijack => 80.0,
ImpactCategory::InternalNetworkAccess => 60.0,
ImpactCategory::InfoDisclosure => 50.0,
}
}
/// `f64` cap floor for the multiplicative feasibility term. Even an
/// `Unverified` member contributes a non-zero weight so a 3-step chain
/// with three unverified hops does not score `0`.
fn feasibility_factor(f: Feasibility) -> f64 {
match f {
Feasibility::Confirmed => 1.0,
Feasibility::InconclusiveHighConf => 0.5,
Feasibility::Unverified => 0.1,
}
}
/// Compute the chain score for a path.
///
/// `member_impacts` carries the standalone impact category for each
/// member that has one (omit the entry when the member's primary cap
/// has no standalone rule — adjacency still contributes via the
/// composer's `implied_impact`). `implied_impact` is the final
/// composed category; it always contributes its weight even when no
/// individual member would on its own (e.g. the `OPEN_REDIRECT +
/// UNAUTHORIZED_ID → SessionHijack` rule).
pub fn score_path(
member_impacts: &[ImpactCategory],
implied_impact: ImpactCategory,
members: &[ChainEdge],
) -> f64 {
let mut impact_sum: f64 = member_impacts.iter().copied().map(category_weight).sum();
impact_sum += category_weight(implied_impact);
let feasibility_product: f64 = members
.iter()
.map(|e| feasibility_factor(e.feasibility))
.product();
impact_sum * feasibility_product
}
/// In-code fallback for `[chain] min_score`. Set so a single
/// `Unverified` `InfoDisclosure` finding (score = 50 * 0.1 = 5) lands
/// below threshold while a two-member chain (Rce + Unverified, ~10)
/// or a Confirmed single-cap chain (>=100) clears it.
pub const fn min_score_default() -> f64 {
9.5
}
/// `[chain]` section of `nyx.toml`. Persisted via
/// [`crate::utils::config::ChainConfig`].
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct ChainScoreConfig {
/// Path-search threshold. Chains below this score are dropped.
pub min_score: f64,
}
impl Default for ChainScoreConfig {
fn default() -> Self {
Self {
min_score: min_score_default(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::chain::edges::{ChainEdge, FindingRef};
use crate::chain::feasibility::Feasibility;
use crate::chain::impact::ImpactCategory;
use crate::labels::Cap;
use crate::surface::SourceLocation;
fn edge(feas: Feasibility) -> ChainEdge {
ChainEdge {
finding: FindingRef {
finding_id: "f".into(),
stable_hash: 0,
location: SourceLocation::new("a.py", 1, 1),
rule_id: "r".into(),
cap_bits: Cap::CODE_EXEC.bits(),
},
primary_cap: Cap::CODE_EXEC,
reach: crate::chain::edges::Reach::Unreachable,
feasibility: feas,
}
}
#[test]
fn single_confirmed_rce_clears_default_threshold() {
let s = score_path(
&[ImpactCategory::Rce],
ImpactCategory::Rce,
&[edge(Feasibility::Confirmed)],
);
// 100 (member) + 100 (implied) = 200 * 1.0 = 200
assert!(s > min_score_default());
assert!((s - 200.0).abs() < f64::EPSILON);
}
#[test]
fn unverified_single_member_below_threshold() {
// 50 + 50 = 100 * 0.1 = 10 — just over threshold; flip impact
// to InfoDisclosure with one extra hop to push it under.
let s = score_path(
&[ImpactCategory::InfoDisclosure],
ImpactCategory::InfoDisclosure,
&[edge(Feasibility::Unverified)],
);
assert!(s > min_score_default()); // 50+50=100 * 0.1 = 10
// But two unverified hops gates the chain:
let s2 = score_path(
&[ImpactCategory::InfoDisclosure],
ImpactCategory::InfoDisclosure,
&[edge(Feasibility::Unverified), edge(Feasibility::Unverified)],
);
assert!(s2 < min_score_default()); // 100 * 0.01 = 1.0
}
#[test]
fn feasibility_dampens_score() {
let confirmed = score_path(
&[ImpactCategory::Rce],
ImpactCategory::Rce,
&[edge(Feasibility::Confirmed), edge(Feasibility::Confirmed)],
);
let inconclusive = score_path(
&[ImpactCategory::Rce],
ImpactCategory::Rce,
&[
edge(Feasibility::Confirmed),
edge(Feasibility::InconclusiveHighConf),
],
);
let unverified = score_path(
&[ImpactCategory::Rce],
ImpactCategory::Rce,
&[edge(Feasibility::Confirmed), edge(Feasibility::Unverified)],
);
assert!(confirmed > inconclusive);
assert!(inconclusive > unverified);
}
#[test]
fn category_weights_strictly_ordered() {
assert!(
category_weight(ImpactCategory::BrowserToLocalRce)
> category_weight(ImpactCategory::Rce)
);
assert!(
category_weight(ImpactCategory::Rce) > category_weight(ImpactCategory::SessionHijack)
);
assert!(
category_weight(ImpactCategory::SessionHijack)
> category_weight(ImpactCategory::InternalNetworkAccess)
);
assert!(
category_weight(ImpactCategory::InternalNetworkAccess)
> category_weight(ImpactCategory::InfoDisclosure)
);
}
}

943
src/chain/search.rs Normal file
View file

@ -0,0 +1,943 @@
//! Phase 25 — bounded path search for exploit-chain composition.
//!
//! Path topology:
//!
//! ```text
//! Attacker (virtual) → EntryPoint → Finding* → Sink
//! ```
//!
//! The DFS starts at the implicit attacker node (virtually adjacent to
//! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`](ChainSearchConfig::max_depth)
//! per-finding hops, and terminates at any
//! [`crate::surface::DangerousLocal`] node. Each emitted
//! [`ChainFinding`] is the deterministic minimum-length path through a
//! given (entry, sink) pair.
//!
//! # Determinism
//!
//! 1. SurfaceMap nodes are canonicalised before search — every input
//! list (entries, sinks) is iterated in `SourceLocation` order.
//! 2. Candidate per-entry findings are sorted by
//! [`crate::chain::edges::FindingRef::stable_hash`] before DFS,
//! breaking ties by `rule_id` so collisions stay reproducible.
//! 3. The emitted chain list is sorted by `score` descending (ties
//! broken by `stable_hash` descending, then `implied_impact`
//! descending) before return.
//!
//! Running the same fixture 10× produces a byte-identical chain list.
//!
//! # Phase 24 follow-ups closed here
//!
//! - `BrowserToLocalRce` auth-gate predicate: when the lattice yields
//! `BrowserToLocalRce` from `HEADER_INJECTION + CODE_EXEC`, the path
//! is only kept when the entry's `auth_required` is `false`. Auth-
//! gated entries downgrade to the closest standalone impact.
//! - SSRF + LocalListener refinement: when the lattice yields
//! `InternalNetworkAccess` and the SurfaceMap exposes a local
//! listener (a [`crate::surface::DataStore`] / [`crate::surface::ExternalService`]
//! bound to a loopback host), the path is preserved; without a local
//! listener the chain is still emitted but scored lower (no boost).
//!
//! The "file-local reach → call-graph-aware reach" upgrade remains
//! deferred (see deferred.md): the DFS still treats two findings as
//! adjacent when they share a source file, mirroring Phase 24's
//! `findings_to_edges` reach resolver.
//!
//! Entry-to-finding affinity is enforced symmetrically: the
//! per-entry candidate filter requires the finding's source file to
//! overlap with the entry's `handler_location.file` (or a
//! call-graph reach hit) on top of the route+method match. Without
//! this gate, two entries that happen to share a (route, method) in
//! a monorepo would each claim every finding under that key,
//! producing `O(entries × findings)` phantom chains that the dedup
//! pass would then collapse.
use crate::callgraph::FileReachMap;
use crate::chain::edges::{ChainEdge, Reach};
use crate::chain::finding::{ChainFinding, ChainSink};
use crate::chain::impact::{ImpactCategory, lookup_impact};
use crate::chain::score::score_path;
use crate::labels::Cap;
use crate::surface::{DangerousLocal, EntryPoint, SurfaceMap, SurfaceNode};
/// Bounded-DFS search configuration.
#[derive(Debug, Clone, Copy)]
pub struct ChainSearchConfig {
/// Maximum number of per-finding hops in a single chain path.
/// `0` disables search (no chain is ever emitted).
pub max_depth: usize,
/// Drop chains whose score is strictly below this threshold.
pub min_score: f64,
}
impl Default for ChainSearchConfig {
fn default() -> Self {
Self {
max_depth: 4,
min_score: crate::chain::score::min_score_default(),
}
}
}
/// Result of one search pass: every chain whose score cleared
/// `cfg.min_score`, deterministically ordered.
pub fn find_chains(
edges: &[ChainEdge],
surface: &SurfaceMap,
cfg: ChainSearchConfig,
) -> Vec<ChainFinding> {
find_chains_with_reach(edges, surface, cfg, None)
}
/// Like [`find_chains`] but optionally consults a [`FileReachMap`] to
/// widen the per-entry-per-sink file-scope filter beyond literal
/// file-equality.
///
/// When `reach` is `Some`, a candidate edge is in scope for a given
/// sink whenever the finding's file *or* a transitive caller of it
/// reaches the sink's file via the call graph. `reach = None`
/// preserves the legacy file-local behaviour for callers that have
/// not yet wired the call-graph reach map.
pub fn find_chains_with_reach(
edges: &[ChainEdge],
surface: &SurfaceMap,
cfg: ChainSearchConfig,
reach: Option<&FileReachMap>,
) -> Vec<ChainFinding> {
if cfg.max_depth == 0 || edges.is_empty() {
return Vec::new();
}
let sinks = collect_sinks(surface);
let entries = collect_entries(surface);
let local_listener_present = has_local_listener(surface);
let mut chains: Vec<ChainFinding> = Vec::new();
for entry in &entries {
// Per-entry candidate edge slice: every edge whose reach
// points at this entry, sorted deterministically.
let mut candidates: Vec<&ChainEdge> = edges
.iter()
.filter(|e| edge_reaches_entry(e, entry, reach))
.collect();
candidates.sort_by(|a, b| {
(
a.finding.stable_hash,
&a.finding.rule_id,
&a.finding.location,
)
.cmp(&(
b.finding.stable_hash,
&b.finding.rule_id,
&b.finding.location,
))
});
for sink in &sinks {
// Scope candidates to the sink: same-file match (legacy),
// optionally widened by a call-graph-derived reach map so
// a finding in `internal_helper.py` whose enclosing
// function is reached only through `routes.py` still
// composes against a sink in `routes.py`.
let scoped: Vec<&ChainEdge> = candidates
.iter()
.filter(|e| {
paths_overlap(&e.finding.location.file, &sink.location.file)
|| reach.is_some_and(|r| {
r.reaches(&e.finding.location.file, &sink.location.file)
})
})
.copied()
.collect();
if let Some(chain) =
compose_chain(entry, sink, &scoped, cfg.max_depth, local_listener_present)
&& chain.score >= cfg.min_score
{
chains.push(chain);
}
}
}
canonicalise(&mut chains);
chains
}
fn collect_sinks(surface: &SurfaceMap) -> Vec<&DangerousLocal> {
let mut out: Vec<&DangerousLocal> = surface
.nodes
.iter()
.filter_map(|n| match n {
SurfaceNode::DangerousLocal(d) => Some(d),
_ => None,
})
.collect();
out.sort_by(|a, b| (&a.location, &a.function_name).cmp(&(&b.location, &b.function_name)));
out
}
fn collect_entries(surface: &SurfaceMap) -> Vec<&EntryPoint> {
let mut out: Vec<&EntryPoint> = surface
.nodes
.iter()
.filter_map(|n| match n {
SurfaceNode::EntryPoint(e) => Some(e),
_ => None,
})
.collect();
out.sort_by(|a, b| (&a.location, &a.route).cmp(&(&b.location, &b.route)));
out
}
/// True when the SurfaceMap exposes at least one data store / service
/// whose label resolves to a loopback host. Used by the SSRF +
/// LocalListener refinement in [`compose_chain`].
fn has_local_listener(surface: &SurfaceMap) -> bool {
surface.nodes.iter().any(|n| match n {
SurfaceNode::DataStore(d) => is_loopback_label(&d.label),
SurfaceNode::ExternalService(s) => is_loopback_label(&s.label),
_ => false,
})
}
fn is_loopback_label(s: &str) -> bool {
let lower = s.to_ascii_lowercase();
lower.contains("127.0.0.1")
|| lower.contains("localhost")
|| lower.contains("0.0.0.0")
|| lower.starts_with("unix:")
|| lower.contains("://localhost")
}
fn edge_reaches_entry(edge: &ChainEdge, entry: &EntryPoint, reach: Option<&FileReachMap>) -> bool {
let route_method_match = match &edge.reach {
Reach::Reachable { route, method, .. } => *route == entry.route && *method == entry.method,
Reach::Unreachable => return false,
};
if !route_method_match {
return false;
}
// File-affinity gate: the entry's handler must live in (or
// transitively call into) the same file as the finding.
// Without this, multiple entries that happen to declare the
// same (route, method) — common in monorepos that ship
// several small services side-by-side — would each claim
// every finding, producing O(entries × findings) phantom
// chains. The same shape as the sink-scope filter below:
// literal file-suffix overlap first, fall back to the
// call-graph reach map.
let entry_file = &entry.handler_location.file;
let finding_file = &edge.finding.location.file;
paths_overlap(entry_file, finding_file)
|| reach.is_some_and(|r| r.reaches(entry_file, finding_file))
}
fn paths_overlap(a: &str, b: &str) -> bool {
if a == b {
return true;
}
// Strip leading directory components and compare suffix. Two
// representations of the same file (project-relative vs absolute)
// share a common trailing path segment.
let a_tail = a.rsplit('/').next().unwrap_or(a);
let b_tail = b.rsplit('/').next().unwrap_or(b);
a_tail == b_tail && !a_tail.is_empty()
}
/// Build a single chain for one (entry, sink) pair.
///
/// Bounded DFS: take the longest deterministic prefix of `scoped` up
/// to `max_depth`, then pick the highest-severity lattice match
/// across every (member_cap, sink_cap) pair. Returning all in-scope
/// edges as members matches the design doc's three-member output for
/// the `CORS + NoAuth + websocket → shell tool` scenario; using the
/// best impact across all pairs ensures `HEADER_INJECTION + CODE_EXEC`
/// lights up `BrowserToLocalRce` even when an unrelated finding (e.g.
/// the standalone auth-gap diagnostic) is sorted first.
fn compose_chain(
entry: &EntryPoint,
sink: &DangerousLocal,
scoped: &[&ChainEdge],
max_depth: usize,
local_listener_present: bool,
) -> Option<ChainFinding> {
if scoped.is_empty() {
return None;
}
let bound = scoped.len().min(max_depth);
let path: Vec<&ChainEdge> = scoped[..bound].to_vec();
let sink_cap = sole_cap(sink.cap_bits)?;
let (impact, member_impacts) = resolve_impact(&path, sink_cap, entry, local_listener_present)?;
let mut chain = build_chain(entry, sink, &path, impact, &member_impacts);
// SSRF + LocalListener refinement (Phase 24 deferred close): when
// the implied impact is `InternalNetworkAccess` AND the SurfaceMap
// exposes a loopback listener, the chain is more concrete than the
// bare lattice match — lift the score so it ranks above SSRF chains
// without a corroborating in-process target.
if impact == ImpactCategory::InternalNetworkAccess && local_listener_present {
chain.score *= LOCAL_LISTENER_BOOST;
}
Some(chain)
}
/// Score multiplier applied when an `InternalNetworkAccess` chain has
/// a corroborating loopback listener in the SurfaceMap. Calibrated to
/// lift the chain above an otherwise-identical SSRF chain that lacks
/// the listener context, without overtaking strictly more severe
/// categories.
const LOCAL_LISTENER_BOOST: f64 = 1.5;
/// Pick the lowest-bit single [`Cap`] from `bits`, or `None` when no
/// bit is set. Sinks in the SurfaceMap may carry multi-bit
/// `cap_bits`; the DFS terminates against the lowest single bit so
/// downstream lattice lookups stay deterministic.
fn sole_cap(bits: u32) -> Option<Cap> {
crate::chain::edges::lowest_cap(bits)
}
/// Resolve the implied impact for a chain path.
///
/// Walks every (member.primary_cap, sink_cap) pair and picks the
/// highest-severity lattice match. Returns `None` when no member +
/// sink pair lights up a rule and the sink cap has no standalone
/// rule either.
///
/// Auth gate: `BrowserToLocalRce` only fires when the entry's
/// `auth_required` is `false`. Authenticated entries fall through
/// to the next-best impact (typically `CODE_EXEC → Rce`).
fn resolve_impact(
path: &[&ChainEdge],
sink_cap: Cap,
entry: &EntryPoint,
_local_listener_present: bool,
) -> Option<(ImpactCategory, Vec<ImpactCategory>)> {
let mut best: Option<ImpactCategory> = None;
for member in path {
if let Some(cat) = lookup_impact(member.primary_cap, Some(sink_cap)) {
if cat == ImpactCategory::BrowserToLocalRce && entry.auth_required {
// Auth gate: this rule cannot fire when the entry is
// authed. Keep walking — another pair may light up
// a different rule.
continue;
}
best = Some(match best {
Some(prev) => more_severe(prev, cat),
None => cat,
});
}
}
// Fall through to standalone on the sink cap when no pair lit up.
if best.is_none() {
best = lookup_impact(sink_cap, None);
}
best.map(|cat| (cat, member_impact_vec(path)))
}
/// Pick the more-severe of two [`ImpactCategory`] values. Severity
/// ordering matches the design doc's lattice criticality:
/// `BrowserToLocalRce > Rce > SessionHijack > InternalNetworkAccess > InfoDisclosure`.
fn more_severe(a: ImpactCategory, b: ImpactCategory) -> ImpactCategory {
if severity_rank(a) >= severity_rank(b) {
a
} else {
b
}
}
fn severity_rank(c: ImpactCategory) -> u8 {
match c {
ImpactCategory::BrowserToLocalRce => 5,
ImpactCategory::Rce => 4,
ImpactCategory::SessionHijack => 3,
ImpactCategory::InternalNetworkAccess => 2,
ImpactCategory::InfoDisclosure => 1,
}
}
fn member_impact_vec(path: &[&ChainEdge]) -> Vec<ImpactCategory> {
path.iter()
.filter_map(|e| crate::chain::standalone_impact(e.primary_cap))
.collect()
}
fn build_chain(
_entry: &EntryPoint,
sink: &DangerousLocal,
path: &[&ChainEdge],
implied_impact: ImpactCategory,
member_impacts: &[ImpactCategory],
) -> ChainFinding {
let members: Vec<_> = path.iter().map(|e| e.finding.clone()).collect();
let stable_hash = ChainFinding::compute_stable_hash(&members, implied_impact);
let owned_edges: Vec<ChainEdge> = path.iter().map(|e| (*e).clone()).collect();
let score = score_path(member_impacts, implied_impact, &owned_edges);
let severity = crate::output::severity::chain_severity(implied_impact, &owned_edges);
let dynamic_verdict = composite_dynamic_verdict(&owned_edges);
ChainFinding {
stable_hash,
members,
sink: ChainSink {
file: sink.location.file.clone(),
line: sink.location.line,
col: sink.location.col,
function_name: sink.function_name.clone(),
cap_bits: sink.cap_bits,
},
implied_impact,
severity,
score,
dynamic_verdict,
reverify_reason: None,
}
}
/// Phase 25 placeholder for composite verification. When *every*
/// member edge has `Feasibility::Confirmed` the composite verdict
/// inherits that confirmation; otherwise `None` (Phase 26 will run a
/// real composite re-verification pass).
fn composite_dynamic_verdict(_path: &[ChainEdge]) -> Option<crate::evidence::VerifyResult> {
None
}
fn canonicalise(chains: &mut Vec<ChainFinding>) {
chains.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then(b.stable_hash.cmp(&a.stable_hash))
.then(b.implied_impact.cmp(&a.implied_impact))
});
// Drop duplicates: two chains with the same stable_hash and the
// same terminal sink serialise byte-identically (stable_hash is a
// function of members + implied_impact, and the wire format
// exposes only members, sink, impact, severity, score). They arise
// when multiple entry-points share a (route, method) but are
// otherwise unrelated (e.g. monorepos, or a scan covering multiple
// small apps), each claiming the same finding via the route-only
// candidate filter in `find_chains_with_reach`. Keep the first
// occurrence after the sort above; the sort is total enough that
// the survivor is deterministic.
chains.dedup_by(|a, b| a.stable_hash == b.stable_hash && a.sink == b.sink);
}
// Manual Ord/PartialOrd for ImpactCategory so the canonicalise
// tie-break has a total order. Defined here rather than in `impact`
// to avoid leaking ordering into the public type.
impl PartialOrd for ImpactCategory {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for ImpactCategory {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
(*self as u8).cmp(&(*other as u8))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::chain::ChainSeverity;
use crate::chain::edges::FindingRef;
use crate::chain::feasibility::Feasibility;
use crate::entry_points::HttpMethod;
use crate::labels::Cap;
use crate::surface::{
DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode,
};
fn loc(file: &str, line: u32) -> SourceLocation {
SourceLocation::new(file, line, 1)
}
fn entry(file: &str, route: &str, auth: bool) -> SurfaceNode {
SurfaceNode::EntryPoint(EntryPoint {
location: loc(file, 1),
framework: Framework::Flask,
method: HttpMethod::POST,
route: route.into(),
handler_name: "h".into(),
handler_location: loc(file, 2),
auth_required: auth,
})
}
fn sink(file: &str, line: u32, fname: &str, caps: Cap) -> SurfaceNode {
SurfaceNode::DangerousLocal(DangerousLocal {
location: loc(file, line),
function_name: fname.into(),
cap_bits: caps.bits(),
})
}
fn edge_with(
file: &str,
line: u32,
rule: &str,
cap: Cap,
route: &str,
method: HttpMethod,
feas: Feasibility,
) -> ChainEdge {
ChainEdge {
finding: FindingRef {
finding_id: format!("{rule}-{line}"),
stable_hash: blake3::hash(format!("{rule}:{file}:{line}").as_bytes()).as_bytes()
[..8]
.try_into()
.map(u64::from_le_bytes)
.unwrap(),
location: loc(file, line),
rule_id: rule.into(),
cap_bits: cap.bits(),
},
primary_cap: cap,
reach: Reach::Reachable {
location: loc(file, 1),
method,
route: route.into(),
auth_required: false,
},
feasibility: feas,
}
}
#[test]
fn returns_empty_when_no_findings() {
let surface = SurfaceMap::new();
let result = find_chains(&[], &surface, ChainSearchConfig::default());
assert!(result.is_empty());
}
#[test]
fn standalone_codeexec_via_unauthed_entry_emits_rce_chain() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/exec", false));
surface
.nodes
.push(sink("app.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"app.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/exec",
HttpMethod::POST,
Feasibility::Confirmed,
);
let chains = find_chains(&[e], &surface, ChainSearchConfig::default());
assert_eq!(chains.len(), 1);
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
}
#[test]
fn header_injection_plus_codeexec_via_unauthed_entry_is_browser_local_rce() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/ws", false));
surface
.nodes
.push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC));
let cors = edge_with(
"app.py",
10,
"cfg-cors-allow-all",
Cap::HEADER_INJECTION,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let exec = edge_with(
"app.py",
20,
"taint-codeexec",
Cap::CODE_EXEC,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let chains = find_chains(
&[cors, exec],
&surface,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(chains.len(), 1);
assert_eq!(chains[0].implied_impact, ImpactCategory::BrowserToLocalRce);
assert_eq!(chains[0].severity, ChainSeverity::Critical);
}
#[test]
fn authed_entry_downgrades_browser_local_rce_to_rce() {
let mut surface = SurfaceMap::new();
// Same fixture but entry is authed — should NOT light up
// BrowserToLocalRce.
surface.nodes.push(entry("app.py", "/ws", true));
surface
.nodes
.push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC));
let cors = edge_with(
"app.py",
10,
"cfg-cors-allow-all",
Cap::HEADER_INJECTION,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let exec = edge_with(
"app.py",
20,
"taint-codeexec",
Cap::CODE_EXEC,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let chains = find_chains(
&[cors, exec],
&surface,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(chains.len(), 1);
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
}
#[test]
fn determinism_across_runs() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/exec", false));
surface
.nodes
.push(sink("app.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"app.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/exec",
HttpMethod::POST,
Feasibility::Confirmed,
);
let cfg = ChainSearchConfig::default();
let first = find_chains(std::slice::from_ref(&e), &surface, cfg);
let first_hashes: Vec<u64> = first.iter().map(|c| c.stable_hash).collect();
for _ in 0..9 {
let again = find_chains(std::slice::from_ref(&e), &surface, cfg);
let again_hashes: Vec<u64> = again.iter().map(|c| c.stable_hash).collect();
assert_eq!(again_hashes, first_hashes);
}
}
#[test]
fn ssrf_with_local_listener_scores_higher_than_without() {
use crate::surface::{DataStore, DataStoreKind};
let edge = || -> ChainEdge {
edge_with(
"app.py",
10,
"taint-ssrf",
Cap::SSRF,
"/fetch",
HttpMethod::POST,
Feasibility::Confirmed,
)
};
let mut surface_no_listener = SurfaceMap::new();
surface_no_listener
.nodes
.push(entry("app.py", "/fetch", false));
surface_no_listener
.nodes
.push(sink("app.py", 20, "requests.get", Cap::SSRF));
let baseline = find_chains(
&[edge()],
&surface_no_listener,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(baseline.len(), 1);
assert_eq!(
baseline[0].implied_impact,
ImpactCategory::InternalNetworkAccess
);
let mut surface_with_listener = surface_no_listener.clone();
surface_with_listener
.nodes
.push(SurfaceNode::DataStore(DataStore {
location: loc("app.py", 5),
kind: DataStoreKind::KeyValue,
label: "redis://127.0.0.1:6379".into(),
}));
let boosted = find_chains(
&[edge()],
&surface_with_listener,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(boosted.len(), 1);
assert_eq!(
boosted[0].implied_impact,
ImpactCategory::InternalNetworkAccess
);
let ratio = boosted[0].score / baseline[0].score;
assert!(
(ratio - LOCAL_LISTENER_BOOST).abs() < 1e-9,
"expected ×{LOCAL_LISTENER_BOOST} boost, got ratio={ratio}"
);
}
#[test]
fn score_threshold_drops_low_score_chains() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/r", false));
surface.nodes.push(sink("app.py", 20, "open", Cap::FILE_IO));
let e = edge_with(
"app.py",
10,
"test",
Cap::FILE_IO,
"/r",
HttpMethod::GET,
Feasibility::Unverified,
);
let cfg = ChainSearchConfig {
max_depth: 4,
min_score: 1_000.0,
};
let chains = find_chains(&[e], &surface, cfg);
assert!(chains.is_empty());
}
/// Sink in a different file than the finding composes only when the
/// call-graph reach map records a transitive caller relationship.
#[test]
fn cross_file_chain_requires_reach_map() {
use crate::callgraph::{FileReachMap, build_call_graph};
use crate::summary::{FuncSummary, merge_summaries};
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("routes.py", "/exec", false));
// Sink lives in a helper file the entry handler transitively
// reaches, not the entry file itself.
surface
.nodes
.push(sink("helper.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"routes.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/exec",
HttpMethod::POST,
Feasibility::Unverified,
);
let cfg = ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
};
// No reach map: routes.py finding cannot compose against
// helper.py sink because `paths_overlap` rejects the pair.
let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg);
assert!(
baseline.is_empty(),
"without reach map, cross-file chain must not compose"
);
// Reach map: routes.py::handle calls helper.py::sink so
// helper.py is reachable from routes.py.
let handle = FuncSummary {
name: "handle".into(),
file_path: "routes.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![crate::summary::CalleeSite::bare("sink")],
..Default::default()
};
let sink_fn = FuncSummary {
name: "sink".into(),
file_path: "helper.py".into(),
lang: "python".into(),
param_count: 0,
..Default::default()
};
let gs = merge_summaries(vec![handle, sink_fn], None);
let cg = build_call_graph(&gs, &[]);
let reach = FileReachMap::build(&cg);
let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach));
assert_eq!(
chains.len(),
1,
"reach map should widen scope to include helper.py sink"
);
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
}
#[test]
fn duplicate_chains_from_shared_route_method_are_deduped() {
// Three unrelated handler files each declare POST /run. Each
// file holds one finding + one dangerous-local sink. Without
// the dedup pass, the per-entry candidate filter (route +
// method only) lets every entry claim every finding, and the
// sink-file scope filter then emits one chain per (entry,
// sink) pair — 3 chains per file × 3 files = 9 chains where
// each finding appears 3×. The wire format does not surface
// the entry, so the duplicates serialise byte-identically.
// `canonicalise` must drop them.
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("a.js", "/run", false));
surface.nodes.push(entry("b.js", "/run", false));
surface.nodes.push(entry("c.py", "/run", false));
surface.nodes.push(sink("a.js", 7, "eval", Cap::CODE_EXEC));
surface.nodes.push(sink("b.js", 7, "eval", Cap::CODE_EXEC));
surface.nodes.push(sink("c.py", 7, "eval", Cap::CODE_EXEC));
let edges = vec![
edge_with(
"a.js",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
),
edge_with(
"b.js",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
),
edge_with(
"c.py",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
),
];
let chains = find_chains(&edges, &surface, ChainSearchConfig::default());
assert_eq!(
chains.len(),
3,
"expected one chain per finding, not entries × findings",
);
let mut hashes: Vec<u64> = chains.iter().map(|c| c.stable_hash).collect();
hashes.sort();
hashes.dedup();
assert_eq!(
hashes.len(),
3,
"surviving chains must have distinct hashes"
);
}
/// File-affinity gate on `edge_reaches_entry`: an entry only
/// claims candidate findings that live in its own handler file
/// (or are reached from it via the call graph). Two unrelated
/// entries declaring the same (route, method) on different
/// files do not cross-claim each other's findings.
#[test]
fn entry_file_affinity_rejects_cross_file_findings_without_reach() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("a.js", "/run", false));
surface.nodes.push(entry("b.js", "/run", false));
surface.nodes.push(sink("a.js", 7, "eval", Cap::CODE_EXEC));
surface.nodes.push(sink("b.js", 7, "eval", Cap::CODE_EXEC));
// Single finding lives in a.js only. Both entries match
// route+method but only entry@a.js shares the file.
let edges = vec![edge_with(
"a.js",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
)];
let chains = find_chains(&edges, &surface, ChainSearchConfig::default());
assert_eq!(
chains.len(),
1,
"entry@b.js must not claim a finding in a.js without reach map",
);
assert_eq!(chains[0].sink.file, "a.js");
}
/// File-affinity gate widens through the call-graph reach map:
/// an entry whose handler reaches the finding's file (via the
/// `FileReachMap`) still claims the finding even when the
/// literal file suffixes differ.
#[test]
fn entry_file_affinity_widens_with_reach_map() {
use crate::callgraph::{FileReachMap, build_call_graph};
use crate::summary::{FuncSummary, merge_summaries};
let mut surface = SurfaceMap::new();
// Entry handler lives in routes.py. Finding lives in a
// helper file that routes.py transitively calls.
surface.nodes.push(entry("routes.py", "/run", false));
surface
.nodes
.push(sink("helper.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"helper.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
);
let cfg = ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
};
// Without a reach map the file-affinity gate rejects the
// entry/finding pairing.
let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg);
assert!(
baseline.is_empty(),
"without reach map, cross-file entry/finding pair must reject",
);
// Build a reach map where routes.py::handle calls
// helper.py::sink, so helper.py is reachable from routes.py.
let handle = FuncSummary {
name: "handle".into(),
file_path: "routes.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![crate::summary::CalleeSite::bare("sink")],
..Default::default()
};
let sink_fn = FuncSummary {
name: "sink".into(),
file_path: "helper.py".into(),
lang: "python".into(),
param_count: 0,
..Default::default()
};
let gs = merge_summaries(vec![handle, sink_fn], None);
let cg = build_call_graph(&gs, &[]);
let reach = FileReachMap::build(&cg);
let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach));
assert_eq!(
chains.len(),
1,
"reach map should widen entry-affinity to helper.py",
);
assert_eq!(chains[0].sink.file, "helper.py");
}
}

View file

@ -36,6 +36,12 @@ impl Commands {
&& (fmt == OutputFormat::Json || fmt == OutputFormat::Sarif)
}
/// Whether the user explicitly asked this invocation to suppress
/// human-readable output.
pub fn quiet_requested(&self) -> bool {
matches!(self, Commands::Scan { quiet: true, .. })
}
/// Whether this is a long-running server command (skip timing output).
pub fn is_serve(&self) -> bool {
matches!(self, Commands::Serve { .. })
@ -50,6 +56,7 @@ impl Commands {
Commands::Scan { explain_engine, .. } => *explain_engine,
Commands::List { .. } => true,
Commands::Rules { .. } => true,
Commands::Surface { .. } => true,
Commands::Config { action } => {
matches!(action, ConfigAction::Show { .. } | ConfigAction::Path)
}
@ -105,6 +112,32 @@ pub enum ScanMode {
Taint,
}
/// Output format for `nyx surface`.
#[derive(Debug, Copy, Clone, PartialEq, Eq, ValueEnum, Default)]
pub enum SurfaceFormat {
/// Indented tree, one entry-point per line, with reach summary.
#[default]
Text,
/// Canonical SurfaceMap JSON, byte-identical to the SQLite payload.
Json,
/// Graphviz DOT source; pipe through `dot -Tsvg` to render.
Dot,
/// SVG produced by spawning the local `dot` binary on the DOT
/// rendering. Fails when graphviz is not installed.
Svg,
}
impl std::fmt::Display for SurfaceFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SurfaceFormat::Text => write!(f, "text"),
SurfaceFormat::Json => write!(f, "json"),
SurfaceFormat::Dot => write!(f, "dot"),
SurfaceFormat::Svg => write!(f, "svg"),
}
}
}
/// Engine-depth profile that sets the full stack of analysis toggles
/// in one shot. Individual engine flags override the profile.
#[derive(Debug, Copy, Clone, PartialEq, Eq, ValueEnum)]
@ -184,6 +217,7 @@ impl std::fmt::Display for EngineProfile {
}
#[derive(Subcommand)]
#[allow(clippy::large_enum_variant)]
pub enum Commands {
/// Scan project for vulnerabilities
Scan {
@ -245,6 +279,17 @@ pub enum Commands {
#[arg(long, help_heading = "Output")]
quiet: bool,
/// Print the dynamic-verifier trace to stderr at end-of-verify.
///
/// When dynamic verification is enabled, the verifier records a
/// per-finding [`crate::dynamic::trace::VerifyTrace`]. Setting this
/// flag flushes every recorded `TraceEvent` to stderr after each
/// verdict, matching the stream that already lands in the repro
/// bundle at `expected/trace.jsonl`. Off by default so non-interactive
/// scans stay quiet.
#[arg(long, help_heading = "Output")]
verbose: bool,
/// Exit with code 1 if any finding meets or exceeds this severity
///
/// Useful for CI gating. Example: --fail-on HIGH
@ -320,7 +365,6 @@ pub enum Commands {
#[arg(long, help_heading = "Output")]
require_converged: bool,
// ── Analysis engine toggles (override [analysis.engine] config) ───
/// Enable path-constraint solving (default: on)
#[arg(
long,
@ -409,7 +453,6 @@ pub enum Commands {
#[arg(long, help_heading = "Limits")]
max_pointsto: Option<u32>,
// ── Deprecated aliases (hidden) ─────────────────────────────────
/// Deprecated: use --index off
#[arg(long, hide = true)]
no_index: bool,
@ -429,6 +472,121 @@ pub enum Commands {
/// Deprecated: use --mode cfg
#[arg(long, hide = true)]
cfg_only: bool,
/// Build a harness and dynamically verify each finding in a sandbox.
///
/// Dynamic verification is on by default. This flag is a no-op when
/// verification is already enabled via config. Use `--no-verify` to
/// disable it for a single run. Default builds include dynamic support;
/// custom `--no-default-features` builds need `--features dynamic`.
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
#[arg(long, help_heading = "Dynamic", conflicts_with = "no_verify")]
verify: bool,
/// Skip dynamic verification for this run.
///
/// Overrides `verify = true` from config. Useful when you want a
/// fast static-only scan without permanently changing `nyx.toml`.
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
#[arg(long, help_heading = "Dynamic", conflicts_with = "verify")]
no_verify: bool,
/// Also verify `Confidence < Medium` findings dynamically.
///
/// By default only `Confidence >= Medium` findings are verified. Pass
/// this flag to run verification on all findings regardless of
/// confidence. Intended for payload tuning and backfill runs.
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
#[arg(long, help_heading = "Dynamic")]
verify_all_confidence: bool,
/// Force the process sandbox backend (less isolation, dev use only).
///
/// By default the docker backend is used when available. This flag
/// restricts the backend to the in-process runner. Cannot be combined
/// with `--backend docker`.
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
#[arg(long, help_heading = "Dynamic")]
unsafe_sandbox: bool,
/// Sandbox backend to use for dynamic verification.
///
/// `auto` (default): docker when available, else process.
/// `docker`: require docker; fail if unavailable.
/// `process`: in-process runner (same as `--unsafe-sandbox`).
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
#[arg(long, help_heading = "Dynamic", value_name = "BACKEND")]
backend: Option<String>,
/// Process-backend hardening profile applied to every verified finding.
///
/// `standard` (default): baseline only. Linux runs no-new-privs +
/// memory rlimit; macOS skips the sandbox-exec wrap.
/// `strict`: full lockdown. Linux layers namespaces, chroot to
/// workdir, and a default-deny seccomp filter; macOS wraps the
/// harness with `sandbox-exec -f <cap>.sb`. Opt-in because
/// interpreted Linux harnesses may SIGSYS until the per-language
/// seccomp allowlists are expanded.
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
#[arg(
long,
help_heading = "Dynamic",
value_name = "PROFILE",
value_parser = ["standard", "strict"],
)]
harden: Option<String>,
/// Read a previous scan's JSON output (or a stripped .nyx/baseline.json)
/// and diff it against the current scan on stable_hash.
///
/// Emits a verdict diff showing New / Resolved / FlippedConfirmed /
/// FlippedNotConfirmed transitions. Combine with --gate to enforce CI
/// policies.
#[arg(long, value_name = "FILE", help_heading = "Baseline")]
baseline: Option<String>,
/// Write a stripped baseline JSON to FILE after scanning.
///
/// The file contains only stable_hash, dynamic_verdict, severity, path,
/// and rule_id (no source code). A CI job can persist this file to
/// compare future scans against without leaking source.
#[arg(long, value_name = "FILE", help_heading = "Baseline")]
baseline_write: Option<String>,
/// CI gate to enforce when --baseline is active.
///
/// `no-new-confirmed`: exit 2 if any new Confirmed finding appears.
/// `resolve-all-confirmed`: exit 2 if any baseline-Confirmed finding
/// is not fully resolved (absent or NotConfirmed in the current scan).
#[arg(
long,
value_name = "GATE",
value_parser = ["no-new-confirmed", "resolve-all-confirmed"],
help_heading = "Baseline"
)]
gate: Option<String>,
},
/// Submit feedback on a dynamic verification verdict.
///
/// Records a correction or confirmation for a finding's verdict in the
/// local telemetry log. Requires `--features dynamic`.
#[cfg_attr(not(feature = "dynamic"), command(hide = true))]
VerifyFeedback {
/// Stable finding ID (16-char hex, shown in `nyx scan --verify` output).
finding_id: String,
/// Mark this verdict as wrong and record a reason.
#[arg(long, conflicts_with = "right")]
wrong: Option<String>,
/// Confirm this verdict is correct.
#[arg(long, conflicts_with = "wrong")]
right: bool,
/// Upload feedback to Nyx telemetry (not yet implemented; reserved).
#[arg(long)]
upload: bool,
},
/// Manage project indexes
@ -466,6 +624,37 @@ pub enum Commands {
action: RulesAction,
},
/// Print the project's attack-surface map.
///
/// Loads the SurfaceMap persisted by the most recent indexed scan
/// when available, otherwise builds an entry-point-only map by
/// running the per-language framework probes against the on-disk
/// source. Pass `--build` to force a full inline build (pass-1
/// summary extraction + call-graph construction) when no indexed
/// scan exists; that populates DataStore / ExternalService /
/// DangerousLocal nodes the entry-points-only fallback omits.
/// Use `--format dot` and pipe through `dot -Tsvg` to produce a
/// renderable graph; `--format svg` does the same in one step when
/// graphviz is installed locally.
Surface {
/// Path to inspect (defaults to current directory)
#[arg(default_value = ".")]
path: String,
/// Output format: text (default), json, dot, svg
#[arg(long, value_enum, default_value_t = SurfaceFormat::Text)]
format: SurfaceFormat,
/// Build the full SurfaceMap from source even when no indexed
/// scan exists. Runs pass-1 summary extraction + call-graph
/// build inline (same cost as `nyx index build`), then renders
/// data-store / external-service / dangerous-local nodes plus
/// reach edges. Without this flag, an unscanned project
/// produces an entry-points-only map.
#[arg(long)]
build: bool,
},
/// Start the local web UI for browsing scan results
Serve {
/// Path to scan root (defaults to current directory)
@ -515,7 +704,11 @@ pub enum ConfigAction {
#[arg(long)]
kind: String,
/// Capability: env_var, html_escape, shell_escape, url_encode, json_parse, file_io, or all
/// Capability slug. One of: env_var, html_escape, shell_escape,
/// url_encode, json_parse, file_io, fmt_string, sql_query, deserialize,
/// ssrf, code_exec, crypto, unauthorized_id, data_exfil, ldap_injection,
/// xpath_injection, header_injection, open_redirect, ssti, xxe,
/// prototype_pollution, or all. See docs/cli.md.
#[arg(long)]
cap: String,
},

View file

@ -1,7 +1,6 @@
use crate::cli::IndexAction;
use crate::database::index::{Indexer, IssueRow};
use crate::database::index::{IndexWriteQueue, Indexer, IssueRow};
use crate::errors::NyxResult;
use crate::patterns::Severity;
use crate::server::progress::{ScanMetrics, ScanProgress, ScanStage};
use crate::server::scan_log::ScanLogCollector;
use crate::utils::Config;
@ -27,6 +26,11 @@ pub fn handle(
IndexAction::Build { path, force } => {
let build_path = std::path::Path::new(&path).canonicalize()?;
let (project_name, db_path) = get_project_info(&build_path, database_dir)?;
let _ = crate::utils::targets::remember_target(
database_dir,
&build_path,
crate::utils::targets::TargetTouch::Seen,
);
if force || !db_path.exists() {
build_index(
@ -200,108 +204,123 @@ pub fn build_index_with_observer(
let metrics = metrics.cloned();
let logs = logs.cloned();
let pass1_start = std::time::Instant::now();
paths
.into_par_iter()
.try_for_each(|path| -> NyxResult<()> {
let mut idx = Indexer::from_pool(project_name, &pool)?;
let writer = IndexWriteQueue::start(project_name.to_owned(), Arc::clone(&pool));
let write_tx = writer.sender();
let index_result = paths.into_par_iter().try_for_each(|path| -> NyxResult<()> {
// Read once, hash once, pass bytes to both rule execution and
// summary extraction. Use pre-computed hash for upsert to avoid
// a redundant file read inside upsert_file.
let bytes = std::fs::read(&path)?;
let hash = Indexer::digest_bytes(&bytes);
// Read once, hash once, pass bytes to both rule execution and
// summary extraction. Use pre-computed hash for upsert to avoid
// a redundant file read inside upsert_file.
let bytes = std::fs::read(&path)?;
let hash = Indexer::digest_bytes(&bytes);
// Parse once and persist every artifact we can reuse later:
// findings, coarse summaries, and precise SSA summaries.
let fused = crate::commands::scan::analyse_file_fused(
&bytes,
&path,
config,
None,
Some(project_path),
)?;
if let Some(ref p) = progress {
p.inc_parsed(1);
p.set_current_file(&path.to_string_lossy());
if let Some(lang) = fused.summaries.first().map(|s| s.lang.as_str()) {
p.record_language(lang);
}
}
if let Some(ref m) = metrics {
m.cfg_nodes.fetch_add(fused.cfg_nodes as u64, Relaxed);
}
// Parse once and persist every artifact we can reuse later:
// findings, coarse summaries, and precise SSA summaries.
let fused = crate::commands::scan::analyse_file_fused(
&bytes,
&path,
config,
None,
Some(project_path),
let issue_rows: Vec<(String, String, i64, i64)> = fused
.diags
.iter()
.map(|d| {
(
d.id.clone(),
d.severity.as_db_str().to_string(),
d.line as i64,
d.col as i64,
)
})
.collect();
let summaries = fused.summaries;
let ssa_rows: Vec<_> = fused
.ssa_summaries
.into_iter()
.map(|(key, sum)| {
(
key.name,
key.arity.unwrap_or(0),
key.lang.as_str().to_string(),
key.namespace,
key.container,
key.disambig,
key.kind,
sum,
)
})
.collect();
// Persist SSA callee bodies at index-build time so CLI-initiated
// rebuilds (`--index rebuild`) populate the same
// `ssa_function_bodies` rows that `scan_with_index_parallel`
// would have written via its pass-1 branch. Without this,
// indexed scans load zero cross-file bodies and cross-file
// inline silently falls back to summary resolution.
let body_rows: Vec<_> = fused
.ssa_bodies
.into_iter()
.map(|(key, body)| {
(
key.name,
key.arity.unwrap_or(0),
key.lang.as_str().to_string(),
key.namespace,
key.container,
key.disambig,
key.kind,
body,
)
})
.collect();
let path_for_write = path.clone();
write_tx.enqueue(move |idx| {
let file_id = idx.upsert_file_with_hash(&path_for_write, &hash)?;
idx.replace_issues(
file_id,
issue_rows
.iter()
.map(|(rule_id, severity, line, col)| IssueRow {
rule_id: rule_id.as_str(),
severity: severity.as_str(),
line: *line,
col: *col,
}),
)?;
if let Some(ref p) = progress {
p.inc_parsed(1);
p.set_current_file(&path.to_string_lossy());
if let Some(lang) = fused.summaries.first().map(|s| s.lang.as_str()) {
p.record_language(lang);
}
if !summaries.is_empty() {
idx.replace_summaries_for_file(&path_for_write, &hash, &summaries)?;
}
if let Some(ref m) = metrics {
m.cfg_nodes.fetch_add(fused.cfg_nodes as u64, Relaxed);
if !ssa_rows.is_empty() {
idx.replace_ssa_summaries_for_file(&path_for_write, &hash, &ssa_rows)?;
}
let file_id = idx.upsert_file_with_hash(&path, &hash)?;
let rows: Vec<IssueRow> = fused
.diags
.iter()
.map(|d| IssueRow {
rule_id: d.id.as_ref(),
severity: match d.severity {
Severity::High => "HIGH",
Severity::Medium => "MEDIUM",
Severity::Low => "LOW",
},
line: d.line as i64,
col: d.col as i64,
})
.collect();
idx.replace_issues(file_id, rows)?;
if !fused.summaries.is_empty() {
idx.replace_summaries_for_file(&path, &hash, &fused.summaries)?;
if !body_rows.is_empty() {
idx.replace_ssa_bodies_for_file(&path_for_write, &hash, &body_rows)?;
}
if !fused.ssa_summaries.is_empty() {
let ssa_rows: Vec<_> = fused
.ssa_summaries
.into_iter()
.map(|(key, sum)| {
(
key.name,
key.arity.unwrap_or(0),
key.lang.as_str().to_string(),
key.namespace,
key.container,
key.disambig,
key.kind,
sum,
)
})
.collect();
idx.replace_ssa_summaries_for_file(&path, &hash, &ssa_rows)?;
}
// Persist SSA callee bodies at index-build time so CLI-initiated
// rebuilds (`--index rebuild`) populate the same
// `ssa_function_bodies` rows that `scan_with_index_parallel`
// would have written via its pass-1 branch. Without this,
// indexed scans load zero cross-file bodies and cross-file
// inline silently falls back to summary resolution.
if !fused.ssa_bodies.is_empty() {
let body_rows: Vec<_> = fused
.ssa_bodies
.into_iter()
.map(|(key, body)| {
(
key.name,
key.arity.unwrap_or(0),
key.lang.as_str().to_string(),
key.namespace,
key.container,
key.disambig,
key.kind,
body,
)
})
.collect();
idx.replace_ssa_bodies_for_file(&path, &hash, &body_rows)?;
}
pb.inc(1);
Ok(())
})?;
pb.inc(1);
Ok(())
});
drop(write_tx);
let writer_result = writer.finish("Index rebuild");
index_result?;
writer_result?;
pb.finish_and_clear();
if let Some(p) = &progress {
p.record_pass1_ms(pass1_start.elapsed().as_millis() as u64);

View file

@ -14,6 +14,7 @@ pub mod rules;
pub mod scan;
#[cfg(feature = "serve")]
pub mod serve;
pub mod surface;
use crate::cli::{Commands, EngineProfile, IndexMode, ScanMode};
use crate::errors::NyxResult;
@ -57,6 +58,7 @@ pub fn handle_command(
all_targets,
keep_nonprod_severity,
quiet,
verbose,
fail_on,
no_state,
no_rank,
@ -97,6 +99,15 @@ pub fn handle_command(
high_only,
ast_only,
cfg_only,
verify,
no_verify,
verify_all_confidence,
unsafe_sandbox,
backend,
harden,
baseline,
baseline_write,
gate,
} => {
// ── Apply profile first (CLI flags override after) ──────────
if let Some(ref name) = profile {
@ -307,6 +318,58 @@ pub fn handle_command(
// resolved straight from config; no CLI overrides yet.
let _ = crate::utils::detector_options::install(config.detectors.clone());
// ── Dynamic verification ────────────────────────────────────
#[cfg(feature = "dynamic")]
{
// Validate and apply --unsafe-sandbox / --backend combo.
let explicit_backend = backend.as_deref().unwrap_or("auto");
if unsafe_sandbox && explicit_backend == "docker" {
return Err(crate::errors::NyxError::Msg(
"--unsafe-sandbox and --backend docker are mutually exclusive: \
--unsafe-sandbox forces the process backend; \
docker cannot be reached through this flag."
.into(),
));
}
let resolved_backend = if unsafe_sandbox {
"process"
} else {
explicit_backend
};
// --verify / --no-verify override the config default.
if no_verify {
config.scanner.verify = false;
} else if verify {
config.scanner.verify = true;
}
// --verify-all-confidence overrides the confidence gate.
if verify_all_confidence {
config.scanner.verify_all_confidence = true;
}
config.scanner.verify_backend = resolved_backend.to_owned();
// --harden=<standard|strict> overrides the config default.
if let Some(ref profile) = harden {
config.scanner.harden_profile = profile.to_owned();
}
}
// Without the dynamic feature, keep the user's verify toggle in
// the resolved config so the scan command can either suppress the
// warning (`--no-verify`) or explain why verification is static-only.
#[cfg(not(feature = "dynamic"))]
{
if no_verify {
config.scanner.verify = false;
} else if verify {
config.scanner.verify = true;
}
if verify_all_confidence {
config.scanner.verify_all_confidence = true;
}
let _ = unsafe_sandbox;
let _ = backend;
let _ = harden;
}
// ── --explain-engine: print resolved config and exit ────────
if explain_engine {
print_engine_explanation(config, engine_profile);
@ -325,8 +388,27 @@ pub fn handle_command(
show_instances.as_deref(),
database_dir,
config,
baseline.as_deref().map(std::path::Path::new),
baseline_write.as_deref().map(std::path::Path::new),
gate.as_deref(),
verbose,
)?;
}
#[cfg(feature = "dynamic")]
Commands::VerifyFeedback {
finding_id,
wrong,
right,
upload,
} => {
handle_verify_feedback(&finding_id, wrong.as_deref(), right, upload)?;
}
#[cfg(not(feature = "dynamic"))]
Commands::VerifyFeedback { .. } => {
return Err(crate::errors::NyxError::Msg(
"The `dynamic` feature is not enabled. Rebuild with `cargo build --features dynamic`.".into(),
));
}
Commands::Index { action } => {
install_from_config(config);
index::handle(action, database_dir, config)?;
@ -356,6 +438,14 @@ pub fn handle_command(
Commands::Rules { action } => {
self::rules::handle(action, config)?;
}
Commands::Surface {
path,
format,
build,
} => {
install_from_config(config);
surface::handle(&path, format, build, database_dir, config)?;
}
Commands::Serve {
path,
port,
@ -387,6 +477,59 @@ pub fn handle_command(
Ok(())
}
/// Handle `nyx verify-feedback` (§21.2).
///
/// Records the user's correction or confirmation for a finding verdict.
/// Local-first: writes to the telemetry log; no auto-upload.
#[cfg(feature = "dynamic")]
fn handle_verify_feedback(
finding_id: &str,
wrong: Option<&str>,
right: bool,
upload: bool,
) -> crate::errors::NyxResult<()> {
use std::fs::OpenOptions;
use std::io::Write;
let _ = upload; // Upload not yet implemented (reserved).
let feedback_kind = if let Some(reason) = wrong {
format!("wrong:{reason}")
} else if right {
"right".to_owned()
} else {
return Err(crate::errors::NyxError::Msg(
"specify --wrong \"reason\" or --right".into(),
));
};
let record = serde_json::json!({
"ts": chrono::Utc::now().to_rfc3339(),
"event": "verify_feedback",
"finding_id": finding_id,
"feedback": feedback_kind,
});
// Append to the telemetry log.
if let Some(log_path) = crate::dynamic::telemetry::log_path() {
if let Some(parent) = log_path.parent() {
let _ = std::fs::create_dir_all(parent);
}
if let Ok(mut f) = OpenOptions::new().create(true).append(true).open(&log_path) {
let _ = writeln!(f, "{}", serde_json::to_string(&record).unwrap_or_default());
}
eprintln!(
"Feedback recorded for finding {}. Log: {}",
finding_id,
log_path.display()
);
} else {
eprintln!("Feedback recorded (in-memory only; cannot determine cache path).");
}
Ok(())
}
/// Pretty-print the effective analysis-engine configuration for
/// `nyx scan --explain-engine`. Writes to stdout so it composes with
/// standard shell redirection and process substitution.

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,9 @@
use crate::database::index::Indexer;
use crate::errors::NyxResult;
use crate::server::app::{AppState, ServerEvent, build_router};
use crate::server::jobs::JobManager;
use crate::server::security::LocalServerSecurity;
use crate::utils::config::Config;
use crate::utils::project::get_project_info;
use crate::utils::targets::{TargetTouch, remember_target};
use console::style;
use parking_lot::RwLock;
use std::path::Path;
@ -31,18 +30,7 @@ pub fn handle(
let rayon_stack_size = config.performance.rayon_thread_stack_size;
let (event_tx, _) = tokio::sync::broadcast::channel(64);
// Initialize DB pool for scan persistence
let db_pool = {
let (_, db_path) = get_project_info(&scan_root, database_dir)?;
match Indexer::init(&db_path) {
Ok(pool) => Some(pool),
Err(e) => {
tracing::warn!("Failed to initialize scan DB: {e}");
None
}
}
};
let _ = remember_target(database_dir, &scan_root, TargetTouch::Seen);
let addr = socket_addr(&host, port);
@ -75,16 +63,17 @@ pub fn handle(
let security = LocalServerSecurity::new(local_addr.port());
let state = AppState {
scan_root: scan_root.clone(),
scan_root: Arc::new(RwLock::new(scan_root.clone())),
config_dir: config_dir.to_path_buf(),
database_dir: database_dir.to_path_buf(),
security,
config: Arc::new(RwLock::new(config.clone())),
job_manager: Arc::new(JobManager::new(max_jobs, rayon_stack_size)),
event_tx: event_tx.clone(),
db_pool,
db_pools: Arc::new(RwLock::new(std::collections::HashMap::new())),
findings_cache: Arc::new(RwLock::new(None)),
};
let _ = state.db_pool_for(&scan_root);
// Invalidate the findings cache whenever a scan finishes so the next
// request rebuilds against fresh diags. The next-request rebuild keeps

750
src/commands/surface.rs Normal file
View file

@ -0,0 +1,750 @@
//! `nyx surface` subcommand.
//!
//! Walks the project tree, builds a [`SurfaceMap`] from the framework
//! probes (plus any persisted data-store / external-service /
//! dangerous-local nodes from a prior indexed scan) and renders the
//! map in the format requested by the user.
//!
//! Output formats:
//! * `text`: indented tree per entry-point, grouped by file
//! * `json`: canonical JSON (byte-identical to the SQLite payload)
//! * `dot`: graphviz source, ready to pipe through `dot -Tsvg`
//! * `svg`: graphviz source rendered via the local `dot` binary
//!
//! The command is read-only: it never persists to SQLite and never
//! modifies the project tree. It tries to load a previously persisted
//! map first; if none exists (no `nyx scan` ever ran, or the index was
//! cleaned) it falls back to building a fresh entry-point-only map by
//! running the framework probes against the on-disk source.
//!
//! Pass `--build` to force a full inline build that runs pass-1
//! summary extraction + call-graph construction. That populates the
//! same DataStore / ExternalService / DangerousLocal nodes and Reaches
//! edges that an indexed scan would have persisted, at the cost of
//! parsing the project tree once (same wall-clock as `nyx index
//! build`).
use crate::ast::extract_all_summaries_from_bytes;
use crate::callgraph;
use crate::cli::SurfaceFormat;
use crate::database::index::Indexer;
use crate::errors::{NyxError, NyxResult};
use crate::summary::GlobalSummaries;
use crate::surface::{
DataStoreKind, EdgeKind, EntryPoint, ExternalServiceKind, SurfaceMap, SurfaceNode,
build::{SurfaceBuildInputs, build_surface_map},
};
use crate::utils::Config;
use crate::utils::project::get_project_info;
use crate::walk::spawn_file_walker;
use crossbeam_channel::TryRecvError;
use rayon::prelude::*;
use std::collections::BTreeMap;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
/// Top-level CLI handler. Resolves the scan root, loads or builds a
/// [`SurfaceMap`], renders it in `format`, and writes to stdout.
///
/// When `build_inline` is `true`, the persisted SurfaceMap (if any) is
/// ignored and the full map is built by running pass-1 summary
/// extraction + call-graph construction against the on-disk source.
/// This populates DataStore / ExternalService / DangerousLocal nodes
/// and Reaches edges that the entry-points-only fallback omits.
pub fn handle(
path: &str,
format: SurfaceFormat,
build_inline: bool,
database_dir: &Path,
config: &Config,
) -> NyxResult<()> {
let scan_root = Path::new(path).canonicalize()?;
let map = if build_inline {
build_full_from_filesystem(&scan_root, config)?
} else {
load_or_build(&scan_root, database_dir, config)?
};
let stdout = std::io::stdout();
let mut out = stdout.lock();
match format {
SurfaceFormat::Text => {
out.write_all(render_text(&map, Some(&scan_root)).as_bytes())?;
}
SurfaceFormat::Json => {
let mut canon = map;
let bytes = canon
.to_json()
.map_err(|e| NyxError::Msg(format!("surface map JSON: {e}")))?;
out.write_all(&bytes)?;
out.write_all(b"\n")?;
}
SurfaceFormat::Dot => {
out.write_all(render_dot(&map).as_bytes())?;
}
SurfaceFormat::Svg => {
let svg = render_svg(&map)?;
out.write_all(&svg)?;
}
}
Ok(())
}
/// Load the SurfaceMap persisted under `scan_root`'s project entry, or
/// build a fresh entry-point-only map from the filesystem when no
/// indexed scan has ever populated one.
pub fn load_or_build(
scan_root: &Path,
database_dir: &Path,
config: &Config,
) -> NyxResult<SurfaceMap> {
if let Ok((project, db_path)) = get_project_info(scan_root, database_dir)
&& db_path.exists()
&& let Ok(pool) = Indexer::init(&db_path)
&& let Ok(idx) = Indexer::from_pool(&project, &pool)
&& let Ok(Some(map)) = idx.load_surface_map()
&& !map.nodes.is_empty()
{
return Ok(map);
}
build_from_filesystem(scan_root, config)
}
fn build_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<SurfaceMap> {
let files = collect_files(scan_root, config)?;
let summaries = GlobalSummaries::new();
let call_graph = callgraph::build_call_graph(&summaries, &[]);
let inputs = SurfaceBuildInputs {
files: &files,
scan_root: Some(scan_root),
global_summaries: &summaries,
call_graph: &call_graph,
config,
};
Ok(build_surface_map(&inputs))
}
/// Build a full SurfaceMap from source by running pass-1 summary
/// extraction inline + call-graph construction, then handing the
/// resulting [`GlobalSummaries`] + [`CallGraph`] to
/// [`build_surface_map`]. Same cost as `nyx index build` pass 1 but
/// holds nothing in SQLite.
fn build_full_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<SurfaceMap> {
let files = collect_files(scan_root, config)?;
let mut summaries = build_summaries_inline(&files, scan_root, config);
summaries.install_hierarchy();
let call_graph = callgraph::build_call_graph(&summaries, &[]);
let inputs = SurfaceBuildInputs {
files: &files,
scan_root: Some(scan_root),
global_summaries: &summaries,
call_graph: &call_graph,
config,
};
Ok(build_surface_map(&inputs))
}
/// Run pass-1 summary extraction across `files` in parallel and merge
/// the per-thread results into a single [`GlobalSummaries`]. Mirrors
/// the `scan_filesystem_with_observer` pass-1 fold/reduce shape but
/// strips out the progress / metrics / logs threading the surface
/// command does not need.
///
/// Per-file errors are swallowed so a single bad file does not kill
/// the whole map.
fn build_summaries_inline(files: &[PathBuf], scan_root: &Path, config: &Config) -> GlobalSummaries {
let root_str = scan_root.to_string_lossy().into_owned();
let mg = config.module_graph.as_deref();
files
.par_iter()
.fold(GlobalSummaries::new, |mut local_gs, path| {
let Ok(bytes) = std::fs::read(path) else {
return local_gs;
};
let Ok((func_summaries, ssa_summaries, ssa_bodies, auth_summaries, cross_pkg)) =
extract_all_summaries_from_bytes(&bytes, path, config, Some(scan_root))
else {
return local_gs;
};
for s in func_summaries {
let key = s.func_key_with_resolver(Some(&root_str), mg);
local_gs.insert(key, s);
}
for (key, ssa_sum) in ssa_summaries {
local_gs.insert_ssa(key, ssa_sum);
}
for (key, body) in ssa_bodies {
local_gs.insert_body(key, body);
}
for (key, auth_sum) in auth_summaries {
local_gs.insert_auth(key, auth_sum);
}
if let Some((ns, map)) = cross_pkg {
local_gs.insert_cross_package_imports(ns, map);
}
local_gs
})
.reduce(GlobalSummaries::new, |mut a, b| {
a.merge(b);
a
})
}
fn collect_files(root: &Path, config: &Config) -> NyxResult<Vec<PathBuf>> {
let (rx, handle) = spawn_file_walker(root, config);
let mut out = Vec::new();
loop {
match rx.try_recv() {
Ok(batch) => out.extend(batch),
Err(TryRecvError::Empty) => match rx.recv() {
Ok(batch) => out.extend(batch),
Err(_) => break,
},
Err(TryRecvError::Disconnected) => break,
}
}
let _ = handle.join();
Ok(out)
}
// Text rendering
/// Produce a human-readable tree. Files appear as top-level headers;
/// each entry-point sits under its host file with its reach summary
/// (`Reaches: …`). Data stores / external services / dangerous locals
/// that no entry-point reaches are grouped under a trailing "Unreached"
/// section so a reviewer notices orphaned attack surface.
pub fn render_text(map: &SurfaceMap, scan_root: Option<&Path>) -> String {
let mut out = String::new();
if let Some(root) = scan_root {
out.push_str(&format!("Surface map for {}\n", root.display()));
} else {
out.push_str("Surface map\n");
}
let entry_count = count_kind(map, |n| matches!(n, SurfaceNode::EntryPoint(_)));
let ds_count = count_kind(map, |n| matches!(n, SurfaceNode::DataStore(_)));
let es_count = count_kind(map, |n| matches!(n, SurfaceNode::ExternalService(_)));
let dl_count = count_kind(map, |n| matches!(n, SurfaceNode::DangerousLocal(_)));
out.push_str(&format!(
" {} {}, {} {}, {} {}, {} {}\n\n",
entry_count,
plural(entry_count, "entry-point", "entry-points"),
ds_count,
plural(ds_count, "data store", "data stores"),
es_count,
plural(es_count, "external service", "external services"),
dl_count,
plural(dl_count, "dangerous local", "dangerous locals"),
));
if map.nodes.is_empty() {
out.push_str(" (no entry-points or sinks detected)\n");
return out;
}
let mut by_file: BTreeMap<&str, Vec<usize>> = BTreeMap::new();
for (idx, node) in map.nodes.iter().enumerate() {
by_file
.entry(node.location().file.as_str())
.or_default()
.push(idx);
}
let mut reached: std::collections::HashSet<u32> = std::collections::HashSet::new();
for edge in &map.edges {
if matches!(edge.kind, EdgeKind::Reaches) {
reached.insert(edge.to);
}
}
for (file, indices) in &by_file {
out.push_str(&format!("{file}\n"));
let entry_indices: Vec<usize> = indices
.iter()
.copied()
.filter(|i| matches!(map.nodes[*i], SurfaceNode::EntryPoint(_)))
.collect();
if !entry_indices.is_empty() {
for &ei in &entry_indices {
let SurfaceNode::EntryPoint(ep) = &map.nodes[ei] else {
continue;
};
render_entry_point(&mut out, ep, ei as u32, map);
}
}
for &i in indices {
match &map.nodes[i] {
SurfaceNode::DataStore(_)
| SurfaceNode::ExternalService(_)
| SurfaceNode::DangerousLocal(_) => {
if !entry_indices.is_empty() {
continue;
}
if reached.contains(&(i as u32)) {
continue;
}
render_node_line(&mut out, &map.nodes[i], " ");
}
_ => {}
}
}
out.push('\n');
}
// Orphans: destinations that no entry-point reaches.
let mut orphans: Vec<usize> = Vec::new();
for (idx, node) in map.nodes.iter().enumerate() {
if matches!(node, SurfaceNode::EntryPoint(_)) {
continue;
}
if reached.contains(&(idx as u32)) {
continue;
}
// Already printed under host file when there were no entry-points;
// suppress to avoid duplication.
let host_has_entries = by_file
.get(node.location().file.as_str())
.map(|v| {
v.iter()
.any(|&j| matches!(map.nodes[j], SurfaceNode::EntryPoint(_)))
})
.unwrap_or(false);
if !host_has_entries {
continue;
}
orphans.push(idx);
}
if !orphans.is_empty() {
out.push_str("Unreached surface\n");
for idx in orphans {
render_node_line(&mut out, &map.nodes[idx], " ");
}
}
out
}
fn render_entry_point(out: &mut String, ep: &EntryPoint, ep_idx: u32, map: &SurfaceMap) {
let auth = if ep.auth_required { " [auth]" } else { "" };
out.push_str(&format!(
" {} {} ({:?}){}\n",
method_str(ep.method),
ep.route,
ep.framework,
auth
));
out.push_str(&format!(
" handler: {} at {}:{}\n",
ep.handler_name, ep.handler_location.file, ep.handler_location.line
));
let mut reached: Vec<&SurfaceNode> = map
.edges
.iter()
.filter(|e| e.from == ep_idx && matches!(e.kind, EdgeKind::Reaches))
.filter_map(|e| map.nodes.get(e.to as usize))
.collect();
reached.sort_by(|a, b| a.location().cmp(b.location()));
if reached.is_empty() {
out.push_str(" reaches: (none)\n");
return;
}
out.push_str(" reaches:\n");
for node in reached {
render_node_line(out, node, " - ");
}
}
fn render_node_line(out: &mut String, node: &SurfaceNode, prefix: &str) {
match node {
SurfaceNode::EntryPoint(ep) => {
out.push_str(&format!(
"{prefix}entry {} {} ({:?})\n",
method_str(ep.method),
ep.route,
ep.framework
));
}
SurfaceNode::DataStore(ds) => {
out.push_str(&format!(
"{prefix}data-store ({}): {} [{}:{}]\n",
ds_kind_str(ds.kind),
ds.label,
ds.location.file,
ds.location.line
));
}
SurfaceNode::ExternalService(es) => {
out.push_str(&format!(
"{prefix}external ({}): {} [{}:{}]\n",
es_kind_str(es.kind),
es.label,
es.location.file,
es.location.line
));
}
SurfaceNode::DangerousLocal(dl) => {
out.push_str(&format!(
"{prefix}dangerous: {} (cap=0x{:x}) [{}:{}]\n",
dl.function_name, dl.cap_bits, dl.location.file, dl.location.line
));
}
}
}
fn count_kind<F: Fn(&SurfaceNode) -> bool>(map: &SurfaceMap, f: F) -> usize {
map.nodes.iter().filter(|n| f(n)).count()
}
fn plural(count: usize, singular: &'static str, plural: &'static str) -> &'static str {
if count == 1 { singular } else { plural }
}
fn method_str(m: crate::entry_points::HttpMethod) -> &'static str {
use crate::entry_points::HttpMethod::*;
match m {
GET => "GET",
HEAD => "HEAD",
POST => "POST",
PUT => "PUT",
PATCH => "PATCH",
DELETE => "DELETE",
OPTIONS => "OPTIONS",
}
}
fn ds_kind_str(k: DataStoreKind) -> &'static str {
match k {
DataStoreKind::Sql => "sql",
DataStoreKind::KeyValue => "key_value",
DataStoreKind::Document => "document",
DataStoreKind::BlobStore => "blob_store",
DataStoreKind::Filesystem => "filesystem",
DataStoreKind::Unknown => "unknown",
}
}
fn es_kind_str(k: ExternalServiceKind) -> &'static str {
match k {
ExternalServiceKind::HttpApi => "http_api",
ExternalServiceKind::MessageBroker => "message_broker",
ExternalServiceKind::SearchIndex => "search_index",
ExternalServiceKind::AuthProvider => "auth_provider",
ExternalServiceKind::Unknown => "unknown",
}
}
// DOT / SVG rendering
pub fn render_dot(map: &SurfaceMap) -> String {
let mut out = String::new();
out.push_str("digraph nyx_surface {\n");
out.push_str(" rankdir=LR;\n");
out.push_str(" node [fontname=\"Helvetica\", shape=box, style=rounded];\n");
for (i, node) in map.nodes.iter().enumerate() {
let (label, shape, color) = match node {
SurfaceNode::EntryPoint(ep) => (
format!(
"{} {}\\n{:?}\\n{}",
method_str(ep.method),
escape_dot(&ep.route),
ep.framework,
escape_dot(&ep.handler_name),
),
"box",
if ep.auth_required {
"#3aa57c"
} else {
"#3072c4"
},
),
SurfaceNode::DataStore(ds) => (
format!(
"DataStore ({})\\n{}",
ds_kind_str(ds.kind),
escape_dot(&ds.label)
),
"cylinder",
"#b07a18",
),
SurfaceNode::ExternalService(es) => (
format!(
"External ({})\\n{}",
es_kind_str(es.kind),
escape_dot(&es.label)
),
"component",
"#8b3aa5",
),
SurfaceNode::DangerousLocal(dl) => (
format!(
"Dangerous\\n{}\\ncap=0x{:x}",
escape_dot(&dl.function_name),
dl.cap_bits
),
"octagon",
"#c44141",
),
};
out.push_str(&format!(
" n{i} [label=\"{label}\", shape={shape}, color=\"{color}\", fontcolor=\"{color}\"];\n",
));
}
for edge in &map.edges {
let style = match edge.kind {
EdgeKind::Reaches => "solid",
EdgeKind::Calls => "dashed",
EdgeKind::ReadsFrom => "solid",
EdgeKind::WritesTo => "bold",
EdgeKind::TalksTo => "solid",
EdgeKind::Triggers => "dotted",
EdgeKind::AuthRequiredOn => "dotted",
};
out.push_str(&format!(
" n{} -> n{} [label=\"{:?}\", style={style}];\n",
edge.from, edge.to, edge.kind
));
}
out.push_str("}\n");
out
}
fn escape_dot(s: &str) -> String {
s.replace('\\', "\\\\")
.replace('"', "\\\"")
.replace('\n', "\\n")
}
fn render_svg(map: &SurfaceMap) -> NyxResult<Vec<u8>> {
let dot = render_dot(map);
let mut child = Command::new("dot")
.arg("-Tsvg")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| {
NyxError::Msg(format!(
"failed to spawn `dot` for SVG rendering: {e}. Install graphviz, or use `--format dot` and pipe through `dot -Tsvg` yourself."
))
})?;
if let Some(mut stdin) = child.stdin.take() {
stdin
.write_all(dot.as_bytes())
.map_err(|e| NyxError::Msg(format!("write DOT to dot stdin: {e}")))?;
}
let output = child
.wait_with_output()
.map_err(|e| NyxError::Msg(format!("waiting on `dot`: {e}")))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
return Err(NyxError::Msg(format!("dot exited non-zero: {stderr}")));
}
Ok(output.stdout)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::entry_points::HttpMethod;
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceEdge, SurfaceNode};
fn flask_fixture_map() -> SurfaceMap {
let mut map = SurfaceMap::new();
map.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
location: SourceLocation::new("app.py", 5, 1),
framework: Framework::Flask,
method: HttpMethod::GET,
route: "/users".into(),
handler_name: "list_users".into(),
handler_location: SourceLocation::new("app.py", 6, 1),
auth_required: false,
}));
map.canonicalize();
map
}
#[test]
fn text_render_shows_entry_point() {
let m = flask_fixture_map();
let text = render_text(&m, None);
assert!(text.contains("GET /users"));
assert!(text.contains("handler: list_users"));
assert!(text.contains("app.py"));
}
#[test]
fn dot_render_emits_digraph_header() {
let m = flask_fixture_map();
let dot = render_dot(&m);
assert!(dot.starts_with("digraph nyx_surface"));
assert!(dot.contains("GET /users"));
}
#[test]
fn dot_escapes_quotes_in_labels() {
let mut m = SurfaceMap::new();
m.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
location: SourceLocation::new("a.py", 1, 1),
framework: Framework::Flask,
method: HttpMethod::GET,
route: r#"/with"quote"#.into(),
handler_name: "h".into(),
handler_location: SourceLocation::new("a.py", 2, 1),
auth_required: false,
}));
let dot = render_dot(&m);
assert!(dot.contains(r#"/with\"quote"#));
}
#[test]
fn text_render_groups_reaches_under_entry() {
let mut m = flask_fixture_map();
m.nodes.push(SurfaceNode::DangerousLocal(
crate::surface::DangerousLocal {
location: SourceLocation::new("app.py", 12, 1),
function_name: "eval".into(),
cap_bits: crate::labels::Cap::CODE_EXEC.bits(),
},
));
// Build edge after canonicalize so indices are stable.
m.canonicalize();
let ep_idx = m
.nodes
.iter()
.position(|n| matches!(n, SurfaceNode::EntryPoint(_)))
.unwrap() as u32;
let dl_idx = m
.nodes
.iter()
.position(|n| matches!(n, SurfaceNode::DangerousLocal(_)))
.unwrap() as u32;
m.edges.push(SurfaceEdge {
from: ep_idx,
to: dl_idx,
kind: EdgeKind::Reaches,
});
m.canonicalize();
let text = render_text(&m, None);
assert!(text.contains("reaches:"));
assert!(text.contains("dangerous: eval"));
}
#[test]
fn build_summaries_inline_extracts_function_summaries() {
// Establishes that the inline pass-1 path produces the same
// `GlobalSummaries` shape that an indexed scan would have
// persisted — at minimum, one FuncSummary per top-level
// function in the fixture. Without this guarantee the surface
// build downstream falls back to entry-points-only because
// `detect_data_stores` / `detect_external_services` /
// `detect_dangerous_locals` walk the summaries map.
let td = tempfile::tempdir().unwrap();
let project_dir = td.path();
std::fs::write(
project_dir.join("app.py"),
"from flask import Flask, request\n\
app = Flask(__name__)\n\
\n\
@app.route('/run')\n\
def run():\n\
cmd = request.args.get('cmd')\n\
return str(eval(cmd))\n\
\n\
def helper(x):\n\
return eval(x)\n",
)
.unwrap();
let cfg = Config::default();
let canon = project_dir.canonicalize().unwrap();
let files = collect_files(&canon, &cfg).unwrap();
let summaries = build_summaries_inline(&files, &canon, &cfg);
let names: Vec<String> = summaries.iter().map(|(k, _)| k.qualified_name()).collect();
assert!(
names.iter().any(|n| n.ends_with("run")),
"summaries should contain `run`, got {names:?}"
);
assert!(
names.iter().any(|n| n.ends_with("helper")),
"summaries should contain `helper`, got {names:?}"
);
}
#[test]
fn build_full_from_filesystem_walks_pass1_pipeline() {
// End-to-end smoke for `surface::handle(..., build=true)`: the
// inline-build path must produce a non-empty SurfaceMap on a
// project with a recognisable framework route. Equivalent to
// running `nyx surface --build .` on a single-file Flask app.
let td = tempfile::tempdir().unwrap();
let project_dir = td.path();
std::fs::write(
project_dir.join("app.py"),
"from flask import Flask, request\n\
app = Flask(__name__)\n\
\n\
@app.route('/run')\n\
def run():\n\
cmd = request.args.get('cmd')\n\
return str(eval(cmd))\n",
)
.unwrap();
let cfg = Config::default();
let canon = project_dir.canonicalize().unwrap();
let map = build_full_from_filesystem(&canon, &cfg).expect("inline build succeeds");
let has_entry = map
.nodes
.iter()
.any(|n| matches!(n, SurfaceNode::EntryPoint(_)));
assert!(has_entry, "Flask /run route should be detected");
}
#[test]
fn build_from_filesystem_entry_points_only_runs_with_empty_summaries() {
// Locks in the fallback contract: `build_from_filesystem` runs
// framework probes against an empty `GlobalSummaries` and
// produces only entry-point nodes. Any future change that
// accidentally widens the fallback to populate sinks should
// either ship through `--build` or update this test.
let td = tempfile::tempdir().unwrap();
let project_dir = td.path();
std::fs::write(
project_dir.join("app.py"),
"from flask import Flask\n\
app = Flask(__name__)\n\
\n\
@app.route('/run')\n\
def run():\n\
return 'ok'\n",
)
.unwrap();
let cfg = Config::default();
let canon = project_dir.canonicalize().unwrap();
let map = build_from_filesystem(&canon, &cfg).expect("fallback build succeeds");
// Entry point should still appear (framework probes run in the
// fallback path too).
assert!(
map.nodes
.iter()
.any(|n| matches!(n, SurfaceNode::EntryPoint(_))),
"Flask route should land via framework probe"
);
// No DataStore / ExternalService / DangerousLocal because the
// fallback path feeds an empty GlobalSummaries to the detectors.
let non_entry = map.nodes.iter().any(|n| {
matches!(
n,
SurfaceNode::DataStore(_)
| SurfaceNode::ExternalService(_)
| SurfaceNode::DangerousLocal(_)
)
});
assert!(
!non_entry,
"entry-points-only fallback should not produce non-entry nodes"
);
}
}

View file

@ -231,6 +231,13 @@ fn type_kind_index(kind: &TypeKind) -> u32 {
| TypeKind::GormDb
| TypeKind::SqlxDb
| TypeKind::HibernateSession => 3,
// ProcessBuilder participates only in the type-qualified callee
// resolver via `label_prefix()`; no dedicated bitset slot, share
// the Object index like the other receiver-only TypeKinds.
TypeKind::ProcessBuilder => 3,
// Runtime is likewise a type-qualified-resolver-only receiver kind
// (`Runtime.exec`); no dedicated bitset slot, share the Object index.
TypeKind::Runtime => 3,
}
}

View file

@ -15,8 +15,6 @@
//! literal operand. Necessary because individual comparisons are NOT
//! decomposed into separate SSA operations (condition nodes → `Nop`).
#![allow(clippy::collapsible_if)]
use crate::cfg::NodeInfo;
use crate::ssa::const_prop::ConstLattice;
use crate::ssa::ir::{BlockId, SsaBody, SsaValue};

View file

@ -275,6 +275,14 @@ pub fn class_name_to_type_kind(name: &str) -> Option<TypeKind> {
// type-qualified resolution to `Template.process`, the SSTI
// sink defined in `labels/java.rs`.
"Template" => Some(TypeKind::Template),
// `java.lang.Runtime` declared receiver type. Routes the
// split-receiver shape `Runtime r = Runtime.getRuntime(); ...
// r.exec(...)` through type-qualified resolution to
// `Runtime.exec` (the only `Runtime.*` rule, always SHELL_ESCAPE),
// complementing the `constructor_type` factory route for
// `Runtime.getRuntime()`. No benign `Runtime.exec` exists, so
// typing any `Runtime`-declared receiver carries no FP risk.
"Runtime" => Some(TypeKind::Runtime),
// Python qualified type names.
// Only covers raw lowered names from isinstance(). The lowering in lower.rs
// extracts the literal type text: isinstance(x, requests.Session) produces

View file

@ -19,11 +19,19 @@ pub mod index {
use r2d2_sqlite::SqliteConnectionManager;
use rusqlite::{Connection, OpenFlags, OptionalExtension, params};
use std::fs;
use std::io::Read;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
/// How long each SQLite connection waits for the single writer slot.
///
/// Indexed scans can have dozens of Rayon workers finishing analysis at
/// once. SQLite still permits only one writer, so a timeout here turns that
/// burst into short backpressure instead of surfacing SQLITE_BUSY.
const SQLITE_BUSY_TIMEOUT: Duration = Duration::from_secs(60);
/// DB schema (foreignkeys enabled).
const SCHEMA: &str = r#"
@ -206,6 +214,36 @@ pub mod index {
first_seen_at TEXT NOT NULL
);
-- Dynamic verdict cache (§12 Q5).
-- Keyed on (spec_hash, entry_content_hash, transitive_import_digest).
-- Invalidation: any of entry content, import digest, toolchain_id,
-- corpus_version, or spec_format_version change DELETE row re-run.
CREATE TABLE IF NOT EXISTS dynamic_verdict_cache (
id INTEGER PRIMARY KEY AUTOINCREMENT,
spec_hash TEXT NOT NULL,
entry_content_hash TEXT NOT NULL,
transitive_import_digest TEXT NOT NULL,
toolchain_id TEXT NOT NULL,
corpus_version INTEGER NOT NULL,
spec_format_version INTEGER NOT NULL,
verdict_json TEXT NOT NULL,
created_at TEXT NOT NULL,
UNIQUE(spec_hash, entry_content_hash, transitive_import_digest,
toolchain_id, corpus_version, spec_format_version)
);
CREATE INDEX IF NOT EXISTS idx_dynamic_verdict_cache_spec_hash
ON dynamic_verdict_cache(spec_hash);
-- Phase 21: persisted attack-surface map. One row per project.
-- Stored as canonical JSON so the round-trip is byte-identical
-- across rescans (see `SurfaceMap::to_json`).
CREATE TABLE IF NOT EXISTS surface_map (
project TEXT PRIMARY KEY,
map_json BLOB NOT NULL,
updated_at INTEGER NOT NULL
);
-- Indexes on (project, file_path) for the per-file replace_* paths.
-- Without these, every DELETE WHERE project=? AND file_path=? does a
-- full table scan, which dominates indexing time as the cache grows.
@ -252,9 +290,6 @@ pub mod index {
/// footprint.
pub const SCHEMA_VERSION: &str = "4";
// TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
// TODO: ADD DROP AND GIVE A CLI PARAMETER FOR DROP
/// A single issue row, ready for insertion.
#[derive(Debug, Clone)]
pub struct IssueRow<'a> {
@ -264,6 +299,127 @@ pub mod index {
pub col: i64,
}
type IndexWriteJob = Box<dyn FnOnce(&mut Indexer) -> NyxResult<()> + Send + 'static>;
#[derive(Default)]
struct IndexWriteReport {
error_count: usize,
samples: Vec<String>,
}
impl IndexWriteReport {
fn record(&mut self, err: impl ToString) {
self.error_count += 1;
if self.samples.len() < 8 {
self.samples.push(err.to_string());
}
}
}
/// Bounded handle for submitting persisted-index writes.
///
/// The scanner can keep parsing in parallel while this sender applies
/// backpressure when SQLite's single writer falls behind.
#[derive(Clone)]
pub(crate) struct IndexWriteSender {
tx: crossbeam_channel::Sender<IndexWriteJob>,
}
impl IndexWriteSender {
pub(crate) fn enqueue<F>(&self, job: F) -> NyxResult<()>
where
F: FnOnce(&mut Indexer) -> NyxResult<()> + Send + 'static,
{
self.tx
.send(Box::new(job))
.map_err(|_| NyxError::Msg("database writer stopped before accepting write".into()))
}
}
/// Single-writer queue for project index mutations.
///
/// SQLite permits many readers but only one writer. Parallel scans should
/// therefore submit analyzed file results here instead of letting every
/// Rayon worker compete for the writer lock.
pub(crate) struct IndexWriteQueue {
tx: IndexWriteSender,
handle: std::thread::JoinHandle<IndexWriteReport>,
}
impl IndexWriteQueue {
pub(crate) fn start(
project: impl Into<String>,
pool: Arc<Pool<SqliteConnectionManager>>,
) -> Self {
let capacity = std::env::var("NYX_INDEX_WRITE_QUEUE_MAX")
.ok()
.and_then(|v| v.parse::<usize>().ok())
.filter(|n| *n >= 1)
.unwrap_or_else(|| (num_cpus::get() * 2).max(64));
Self::start_with_capacity(project, pool, capacity)
}
pub(crate) fn start_with_capacity(
project: impl Into<String>,
pool: Arc<Pool<SqliteConnectionManager>>,
capacity: usize,
) -> Self {
let project = project.into();
let (tx, rx) = crossbeam_channel::bounded::<IndexWriteJob>(capacity.max(1));
let handle = std::thread::spawn(move || {
let mut report = IndexWriteReport::default();
let mut idx = match Indexer::from_pool(&project, &pool) {
Ok(idx) => idx,
Err(err) => {
report.record(format!("writer init: {err}"));
return report;
}
};
for job in rx {
if let Err(err) = job(&mut idx) {
report.record(err);
}
}
report
});
Self {
tx: IndexWriteSender { tx },
handle,
}
}
pub(crate) fn sender(&self) -> IndexWriteSender {
self.tx.clone()
}
pub(crate) fn finish(self, stage: &str) -> NyxResult<()> {
let Self { tx, handle } = self;
drop(tx);
let report = handle
.join()
.map_err(|_| NyxError::Msg(format!("{stage} database writer panicked")))?;
if report.error_count == 0 {
return Ok(());
}
let mut details = report.samples;
if report.error_count > details.len() {
details.push(format!(
"... and {} more",
report.error_count - details.len()
));
}
Err(NyxError::Msg(format!(
"{stage} failed to persist scan state: {}",
details.join("; ")
)))
}
}
/// A scan record for DB persistence.
#[derive(Debug, Clone)]
pub struct ScanRecord {
@ -311,9 +467,62 @@ pub mod index {
project: String,
}
/// SQLite database files start with this 16-byte ASCII magic.
const SQLITE_MAGIC: &[u8; 16] = b"SQLite format 3\0";
/// Reject obviously non-SQLite files before handing them to the
/// connection pool, where the same rejection costs minutes instead of
/// microseconds on some corruption shapes.
///
/// Returns `Ok(())` when:
/// * the file does not exist (the pool will `CREATE` it),
/// * the file is zero-length (SQLite treats this as a fresh DB),
/// * the first 16 bytes match the SQLite magic header,
/// * the file is shorter than the magic but non-empty (extremely
/// unusual; we defer to SQLite rather than gating arbitrarily).
///
/// Returns `Err(NyxError::Sql(...))` carrying `SQLITE_NOTADB` when the
/// header is present but does not match.
fn preflight_header(database_path: &Path) -> NyxResult<()> {
let Ok(meta) = fs::metadata(database_path) else {
return Ok(());
};
if !meta.is_file() {
return Ok(());
}
if meta.len() < SQLITE_MAGIC.len() as u64 {
return Ok(());
}
let mut head = [0u8; 16];
let mut f = fs::File::open(database_path)?;
f.read_exact(&mut head)?;
if &head != SQLITE_MAGIC {
return Err(NyxError::Sql(rusqlite::Error::SqliteFailure(
rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_NOTADB),
Some(format!(
"file at {} is not a SQLite database (header magic mismatch)",
database_path.display(),
)),
)));
}
Ok(())
}
impl Indexer {
pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
// Fast-fail when the existing file is clearly not a SQLite
// database. Without this guard, certain corruption shapes
// (truncated header, header overwritten with arbitrary bytes,
// mid-page damage that preserves magic) can keep SQLite busy
// for 150-200 seconds inside the PRAGMA / schema execution
// below before it surfaces SQLITE_NOTADB or SQLITE_CORRUPT.
// A zero-length file is treated as a fresh DB by SQLite, so we
// only validate when the file is large enough to hold the
// 16-byte magic header.
preflight_header(database_path)?;
// NO_MUTEX is safe because r2d2 ensures each pooled connection
// is only ever used by one thread at a time. Combined with WAL
// mode this allows concurrent readers + a single writer without
@ -321,31 +530,9 @@ pub mod index {
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
| OpenFlags::SQLITE_OPEN_CREATE
| OpenFlags::SQLITE_OPEN_NO_MUTEX;
let manager = SqliteConnectionManager::file(database_path).with_flags(flags);
// r2d2's default `max_size` is 10, which can stall rayon
// workers on machines with more cores than that during the
// parallel indexing pass. Size the pool to comfortably hold
// a connection per rayon thread plus a small slack.
//
// `NYX_INDEX_POOL_MAX` overrides the auto-sized default. Use it in
// fd-constrained environments (test sandboxes, containers with low
// ulimit) where many parallel indexed scans would otherwise exhaust
// EMFILE: each pooled SQLite WAL connection costs ~3 fds (db + -wal
// + -shm), so 30 parallel scans × 16 conns × 3 fds = 1440 fds.
let max_conns = std::env::var("NYX_INDEX_POOL_MAX")
.ok()
.and_then(|v| v.parse::<u32>().ok())
.filter(|n| *n >= 1)
.unwrap_or_else(|| (num_cpus::get() as u32 + 4).max(16));
let pool = Arc::new(Pool::builder().max_size(max_conns).build(manager)?);
{
let conn = pool.get()?;
let conn = Self::open_configured_connection(database_path, flags)?;
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.pragma_update(None, "synchronous", "NORMAL")?;
conn.pragma_update(None, "cache_size", "-8000")?; // 8 MB
conn.pragma_update(None, "temp_store", "MEMORY")?;
conn.pragma_update(None, "mmap_size", "268435456")?; // 256 MB
conn.execute_batch(SCHEMA)?;
// Migrate: if the function_summaries table is missing any required
@ -472,6 +659,22 @@ pub mod index {
conn.execute_batch(SCHEMA)?;
}
// Phase 21: ensure the `surface_map` table exists on
// DBs created before this column set was introduced.
let surface_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master
WHERE type = 'table' AND name = 'surface_map'",
[],
|_| Ok(true),
)
.optional()?
.unwrap_or(false);
if !surface_exists {
tracing::info!("creating surface_map table");
conn.execute_batch(SCHEMA)?;
}
// Schema version check: invalidate cached summary tables
// when the on-disk artefact layout has changed in an
// incompatible way, independently of the engine version.
@ -483,9 +686,48 @@ pub mod index {
// version changes so stale serialized data cannot be loaded.
Self::check_engine_version(&conn)?;
}
let manager = SqliteConnectionManager::file(database_path)
.with_flags(flags)
.with_init(Self::configure_connection);
// r2d2's default `max_size` is 10, which can stall rayon
// workers on machines with more cores than that during the
// parallel indexing pass. Size the pool to comfortably hold
// a connection per rayon thread plus a small slack.
//
// `NYX_INDEX_POOL_MAX` overrides the auto-sized default. Use it in
// fd-constrained environments (test sandboxes, containers with low
// ulimit) where many parallel indexed scans would otherwise exhaust
// EMFILE: each pooled SQLite WAL connection costs ~3 fds (db + -wal
// + -shm), so 30 parallel scans × 16 conns × 3 fds = 1440 fds.
let max_conns = std::env::var("NYX_INDEX_POOL_MAX")
.ok()
.and_then(|v| v.parse::<u32>().ok())
.filter(|n| *n >= 1)
.unwrap_or_else(|| (num_cpus::get() as u32 + 4).max(16));
let pool = Arc::new(Pool::builder().max_size(max_conns).build(manager)?);
Ok(pool)
}
fn open_configured_connection(
database_path: &Path,
flags: OpenFlags,
) -> rusqlite::Result<Connection> {
let mut conn = Connection::open_with_flags(database_path, flags)?;
Self::configure_connection(&mut conn)?;
Ok(conn)
}
fn configure_connection(conn: &mut Connection) -> rusqlite::Result<()> {
conn.busy_timeout(SQLITE_BUSY_TIMEOUT)?;
conn.pragma_update(None, "foreign_keys", "ON")?;
conn.pragma_update(None, "synchronous", "NORMAL")?;
conn.pragma_update(None, "cache_size", -8000i64)?; // 8 MB
conn.pragma_update(None, "temp_store", "MEMORY")?;
conn.pragma_update(None, "mmap_size", 268_435_456i64)?; // 256 MB
Ok(())
}
/// Add a column to an existing table when it is missing.
///
/// Non-destructive: leaves all existing rows untouched, populating
@ -686,7 +928,9 @@ pub mod index {
///
/// Short-circuits on mtime: if the stored mtime matches the
/// filesystem mtime, the file is assumed unchanged (skip hash).
#[allow(dead_code)] // used in tests and by should_scan_with_hash callers may fall back
/// Production scans use `should_scan_with_hash`, which avoids the
/// redundant `digest_file` read; this variant exists for tests.
#[cfg(test)]
pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
let meta = fs::metadata(path)?;
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
@ -852,6 +1096,7 @@ pub mod index {
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
stable_hash: 0,
})
})?;
@ -1806,6 +2051,60 @@ pub mod index {
Ok(out)
}
/// Persist a [`crate::surface::SurfaceMap`] for this project.
///
/// Replaces any previously-persisted map; the table holds one row
/// per project. The map is canonicalised before serialisation so
/// `replace_surface_map` + `load_surface_map` round-trip is
/// byte-identical for structurally identical maps.
pub fn replace_surface_map(&mut self, map: &crate::surface::SurfaceMap) -> NyxResult<()> {
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
let mut canon = map.clone();
let bytes = canon
.to_json()
.map_err(|e| NyxError::Msg(format!("surface map serialise: {e}")))?;
self.c().execute(
"INSERT OR REPLACE INTO surface_map (project, map_json, updated_at)
VALUES (?1, ?2, ?3)",
params![self.project, bytes, now],
)?;
Ok(())
}
/// Load the persisted [`crate::surface::SurfaceMap`] for this
/// project, or `None` when no map has been written.
pub fn load_surface_map(&self) -> NyxResult<Option<crate::surface::SurfaceMap>> {
let row: Option<Vec<u8>> = self
.c()
.query_row(
"SELECT map_json FROM surface_map WHERE project = ?1",
params![self.project],
|r| r.get::<_, Vec<u8>>(0),
)
.optional()?;
let Some(bytes) = row else {
return Ok(None);
};
let map = crate::surface::SurfaceMap::from_json(&bytes)
.map_err(|e| NyxError::Msg(format!("surface map deserialise: {e}")))?;
Ok(Some(map))
}
/// Return the raw JSON bytes stored for the surface map without
/// deserialising. Used by the round-trip parity tests so they
/// can compare on-disk bytes across rescans.
pub fn load_surface_map_bytes(&self) -> NyxResult<Option<Vec<u8>>> {
let row: Option<Vec<u8>> = self
.c()
.query_row(
"SELECT map_json FROM surface_map WHERE project = ?1",
params![self.project],
|r| r.get::<_, Vec<u8>>(0),
)
.optional()?;
Ok(row)
}
/// Remove a file and all derived persisted state for this project.
///
/// This deletes the file row, issues, and all persisted summary rows so
@ -1867,9 +2166,7 @@ pub mod index {
.collect::<Result<_, _>>()?)
}
// -------------------------------------------------------------------------
// Scan persistence
// -------------------------------------------------------------------------
/// Insert a new scan record.
pub fn insert_scan(&self, record: &ScanRecord) -> NyxResult<()> {
@ -2135,9 +2432,7 @@ pub mod index {
Ok(rows)
}
// -------------------------------------------------------------------------
// Triage state management
// -------------------------------------------------------------------------
/// Get the triage state for a single finding fingerprint.
/// Returns (state, note, updated_at) or None if no triage state exists.
@ -2159,7 +2454,6 @@ pub mod index {
/// Set the triage state for a single finding. Upserts the state and
/// appends an audit log entry. Returns the previous state (or "open").
#[allow(dead_code)]
pub fn set_triage_state(
&self,
fingerprint: &str,
@ -2518,9 +2812,7 @@ pub mod index {
Ok(count > 0)
}
// -------------------------------------------------------------------------
// Maintenance utilities
// -------------------------------------------------------------------------
pub fn clear(&self) -> NyxResult<()> {
self.c().execute_batch(
r#"
@ -2545,10 +2837,8 @@ pub mod index {
Ok(())
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
#[allow(dead_code)] // used by should_scan() and tests
#[cfg(test)]
fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
let mut hasher = blake3::Hasher::new();
let mut file = fs::File::open(path)?;
@ -3052,7 +3342,7 @@ fn clear_drops_ssa_summaries_table() {
// ── CalleeSsaBody persistence tests ──────────────────────────────────────
/// Helper: build a minimal CalleeSsaBody for DB tests.
#[allow(dead_code)] // used by tests below
#[cfg(test)]
fn make_test_callee_body(
num_blocks: usize,
param_count: usize,
@ -3621,6 +3911,77 @@ fn fresh_db_no_migration_needed() {
assert!(idx.get_files("proj").unwrap().is_empty());
}
#[test]
fn init_applies_busy_timeout_to_every_pooled_connection() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
// Hold several connections at once so r2d2 must hand out distinct pooled
// handles. The timeout is connection-local, so configuring only the schema
// setup connection would leave later worker connections at rusqlite's
// default.
let conns: Vec<_> = (0..4).map(|_| pool.get().unwrap()).collect();
for conn in &conns {
let timeout_ms: i64 = conn
.query_row("PRAGMA busy_timeout", [], |row| row.get(0))
.unwrap();
assert_eq!(timeout_ms, 60_000);
}
}
#[test]
fn index_write_queue_serializes_parallel_writes() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let pool = index::Indexer::init(&db).unwrap();
let project = "proj";
let writer =
index::IndexWriteQueue::start_with_capacity(project, std::sync::Arc::clone(&pool), 2);
let tx = writer.sender();
let mut handles = Vec::new();
for i in 0..16 {
let path = td.path().join(format!("file_{i}.rs"));
let source = format!("fn f_{i}() {{}}\n");
std::fs::write(&path, &source).unwrap();
let hash = index::Indexer::digest_bytes(source.as_bytes());
let tx = tx.clone();
handles.push(std::thread::spawn(move || {
tx.enqueue(move |idx| {
let file_id = idx.upsert_file_with_hash(&path, &hash)?;
let issue_rows = [(String::from("test-rule"), String::from("LOW"), 1_i64, 0_i64)];
idx.replace_issues(
file_id,
issue_rows
.iter()
.map(|(rule_id, severity, line, col)| index::IssueRow {
rule_id: rule_id.as_str(),
severity: severity.as_str(),
line: *line,
col: *col,
}),
)?;
Ok(())
})
.unwrap();
}));
}
for handle in handles {
handle.join().unwrap();
}
drop(tx);
writer.finish("test").unwrap();
let idx = index::Indexer::from_pool(project, &pool).unwrap();
let files = idx.get_files(project).unwrap();
assert_eq!(files.len(), 16);
for path in files {
assert_eq!(idx.get_issues_from_file(&path).unwrap().len(), 1);
}
}
#[test]
fn missing_ssa_namespace_column_triggers_recreate() {
let td = tempfile::tempdir().unwrap();

113
src/dynamic/build_pool/c.rs Normal file
View file

@ -0,0 +1,113 @@
//! C build pool (Phase 23 / Track O.1).
//!
//! Wraps the C compiler in `ccache` (when present) backed by a shared object
//! cache under the pool cache root, so a finding that recompiles a harness
//! whose `main.c` matches a previously-built one gets a cache hit instead of a
//! cold `cc` invocation.
//!
//! `ccache` degrades gracefully: when it is not on `PATH` the pool runs the
//! bare compiler, byte-for-byte the same `cc` invocation the legacy
//! [`crate::dynamic::build_sandbox::prepare_c`] path uses, so success / failure
//! parity holds. The static-link fallback (drop `-static` and retry) mirrors
//! the legacy `run_cc` behaviour for chroot-bound Strict-profile harnesses.
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
use std::path::Path;
use std::time::Instant;
pub struct CPool {
cc_bin: String,
ccache_bin: Option<String>,
}
impl CPool {
pub fn try_new() -> Result<Self, String> {
let cc_bin = std::env::var("NYX_CC_BIN").unwrap_or_else(|_| "cc".to_owned());
if !binary_runnable(&cc_bin, "--version") {
return Err(format!("c-pool: {cc_bin} not runnable"));
}
Ok(CPool {
cc_bin,
ccache_bin: super::detect_ccache(),
})
}
}
impl BuildPool for CPool {
fn name(&self) -> &'static str {
"c"
}
/// `args[0]` = binary destination, `args[1]` = `"static"` or `"dynamic"`.
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
let start = Instant::now();
let dest = match args.first() {
Some(d) => d.clone(),
None => {
return PoolCompileResult {
success: false,
stderr: "c-pool: missing binary destination arg".to_owned(),
duration: start.elapsed(),
};
}
};
let static_link = args.get(1).map(|s| s == "static").unwrap_or(false);
if static_link {
match self.run(workdir, &dest, &["-static", "-O0", "-g"]) {
Ok(()) => {
return PoolCompileResult {
success: true,
stderr: String::new(),
duration: start.elapsed(),
};
}
Err(stderr) => {
unsafe { std::env::set_var("NYX_BUILD_STATIC_FALLBACK", "1") };
eprintln!("nyx: c-pool cc -static failed, retrying without -static: {stderr}");
let _ = std::fs::remove_file(&dest);
}
}
}
match self.run(workdir, &dest, &["-O0", "-g"]) {
Ok(()) => PoolCompileResult {
success: true,
stderr: String::new(),
duration: start.elapsed(),
},
Err(stderr) => PoolCompileResult {
success: false,
stderr,
duration: start.elapsed(),
},
}
}
fn is_healthy(&self) -> bool {
binary_runnable(&self.cc_bin, "--version")
}
}
impl CPool {
/// Run one compile of `main.c`, optionally fronted by `ccache`.
fn run(&self, workdir: &Path, dest: &str, leading_flags: &[&str]) -> Result<(), String> {
let mut cmd = match (&self.ccache_bin, pool_cache_dir("c", "ccache")) {
(Some(ccache), Some(cache_dir)) => {
let mut c = base_command(ccache);
c.arg(&self.cc_bin).env("CCACHE_DIR", cache_dir);
c
}
_ => base_command(&self.cc_bin),
};
cmd.args(leading_flags)
.args(["-o", dest, "main.c"])
.current_dir(workdir);
let output = cmd.output().map_err(|e| format!("c-pool: cc: {e}"))?;
if !output.status.success() {
return Err(String::from_utf8_lossy(&output.stderr).into_owned());
}
Ok(())
}
}

View file

@ -0,0 +1,83 @@
//! C++ build pool (Phase 23 / Track O.1).
//!
//! Same shape as the C pool: front the C++ driver with `ccache` backed by a
//! shared object cache under the pool cache root. Falls back to a bare
//! `c++ -std=c++17` compile — byte-for-byte the legacy
//! [`crate::dynamic::build_sandbox::prepare_cpp`] invocation — when `ccache` is
//! absent.
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
use std::path::Path;
use std::time::Instant;
pub struct CppPool {
cxx_bin: String,
ccache_bin: Option<String>,
}
impl CppPool {
pub fn try_new() -> Result<Self, String> {
let cxx_bin = std::env::var("NYX_CXX_BIN").unwrap_or_else(|_| "c++".to_owned());
if !binary_runnable(&cxx_bin, "--version") {
return Err(format!("cpp-pool: {cxx_bin} not runnable"));
}
Ok(CppPool {
cxx_bin,
ccache_bin: super::detect_ccache(),
})
}
}
impl BuildPool for CppPool {
fn name(&self) -> &'static str {
"cpp"
}
/// `args[0]` = absolute path the compiled `nyx_harness` binary lands at.
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
let start = Instant::now();
let dest = match args.first() {
Some(d) => d.clone(),
None => {
return PoolCompileResult {
success: false,
stderr: "cpp-pool: missing binary destination arg".to_owned(),
duration: start.elapsed(),
};
}
};
let mut cmd = match (&self.ccache_bin, pool_cache_dir("cpp", "ccache")) {
(Some(ccache), Some(cache_dir)) => {
let mut c = base_command(ccache);
c.arg(&self.cxx_bin).env("CCACHE_DIR", cache_dir);
c
}
_ => base_command(&self.cxx_bin),
};
cmd.args(["-O0", "-g", "-std=c++17", "-o", &dest, "main.cpp"])
.current_dir(workdir);
match cmd.output() {
Ok(o) if o.status.success() => PoolCompileResult {
success: true,
stderr: String::new(),
duration: start.elapsed(),
},
Ok(o) => PoolCompileResult {
success: false,
stderr: String::from_utf8_lossy(&o.stderr).into_owned(),
duration: start.elapsed(),
},
Err(e) => PoolCompileResult {
success: false,
stderr: format!("cpp-pool: c++: {e}"),
duration: start.elapsed(),
},
}
}
fn is_healthy(&self) -> bool {
binary_runnable(&self.cxx_bin, "--version")
}
}

View file

@ -0,0 +1,140 @@
//! Go build pool (Phase 23 / Track O.1).
//!
//! The legacy [`crate::dynamic::build_sandbox::prepare_go`] gives each finding
//! its own `GOCACHE`/`GOMODCACHE` (default: a per-workdir `.gocache`), so the
//! Go toolchain recompiles the standard library and every module from cold on
//! every harness.
//!
//! [`GoPool`] mounts one shared `GOCACHE` + `GOMODCACHE` under the pool cache
//! root so compiled std-lib + module artefacts are reused across findings, and
//! builds with `-trimpath -buildvcs=false` so the output is reproducible (no
//! absolute workdir paths or VCS stamping baked in, which otherwise defeats the
//! build cache's keying).
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
use std::path::Path;
use std::time::Instant;
pub struct GoPool {
go_bin: String,
}
impl GoPool {
pub fn try_new() -> Result<Self, String> {
let go_bin = std::env::var("NYX_GO_BIN").unwrap_or_else(|_| "go".to_owned());
if !binary_runnable(&go_bin, "version") {
return Err(format!("go-pool: {go_bin} not runnable"));
}
Ok(GoPool { go_bin })
}
}
impl BuildPool for GoPool {
fn name(&self) -> &'static str {
"go"
}
/// `args[0]` = absolute path the compiled `nyx_harness` binary must land
/// at.
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
let start = Instant::now();
let dest = match args.first() {
Some(d) => d.clone(),
None => {
return PoolCompileResult {
success: false,
stderr: "go-pool: missing binary destination arg".to_owned(),
duration: start.elapsed(),
};
}
};
let go_cache = match pool_cache_dir("go", "cache") {
Some(d) => d,
None => {
return PoolCompileResult {
success: false,
stderr: "go-pool: no shared GOCACHE".to_owned(),
duration: start.elapsed(),
};
}
};
let go_mod_cache = match pool_cache_dir("go", "modcache") {
Some(d) => d,
None => {
return PoolCompileResult {
success: false,
stderr: "go-pool: no shared GOMODCACHE".to_owned(),
duration: start.elapsed(),
};
}
};
let go_path = std::env::var("GOPATH").unwrap_or_else(|_| {
std::env::var("HOME")
.map(|h| format!("{h}/go"))
.unwrap_or_else(|_| "/tmp/go".to_owned())
});
// `go mod tidy` resolves imports into the shared module cache.
if workdir.join("go.mod").exists() {
let tidy = base_command(&self.go_bin)
.args(["mod", "tidy"])
.current_dir(workdir)
.env("GOCACHE", &go_cache)
.env("GOPATH", &go_path)
.env("GOMODCACHE", &go_mod_cache)
.output();
match tidy {
Ok(o) if o.status.success() => {}
Ok(o) => {
let mut msg = String::from_utf8_lossy(&o.stderr).into_owned();
if msg.is_empty() {
msg = String::from_utf8_lossy(&o.stdout).into_owned();
}
return PoolCompileResult {
success: false,
stderr: format!("go mod tidy failed: {msg}"),
duration: start.elapsed(),
};
}
Err(e) => {
return PoolCompileResult {
success: false,
stderr: format!("go-pool: go mod tidy: {e}"),
duration: start.elapsed(),
};
}
}
}
let output = base_command(&self.go_bin)
.args(["build", "-trimpath", "-buildvcs=false", "-o", &dest, "."])
.current_dir(workdir)
.env("GOCACHE", &go_cache)
.env("GOPATH", &go_path)
.env("GOMODCACHE", &go_mod_cache)
.output();
match output {
Ok(o) if o.status.success() => PoolCompileResult {
success: true,
stderr: String::new(),
duration: start.elapsed(),
},
Ok(o) => PoolCompileResult {
success: false,
stderr: String::from_utf8_lossy(&o.stderr).into_owned(),
duration: start.elapsed(),
},
Err(e) => PoolCompileResult {
success: false,
stderr: format!("go-pool: go build: {e}"),
duration: start.elapsed(),
},
}
}
fn is_healthy(&self) -> bool {
binary_runnable(&self.go_bin, "version")
}
}

View file

@ -0,0 +1,952 @@
//! Long-lived `javac` daemon (Phase 22 / Track O.0).
//!
//! The legacy `try_compile_java_with_toolchain` in `build_sandbox` shell-execs a
//! fresh `javac` per harness — every invocation pays the JVM cold-start tax
//! (~700ms on the macOS reference machine, ~300ms on Linux CI). At 50
//! findings per OWASP-scale run that single line burns > 30s before any
//! real work happens.
//!
//! [`JavacPool`] replaces the shell-exec with a long-running worker JVM:
//!
//! ```text
//! nyx ─┐
//! │ framed JSON ┌─────────────┐
//! ├──stdin──────► │ NyxJavac │
//! │ │ Worker │
//! │ ◄──stdout──── │ (live JVM) │
//! │ framed JSON └─────────────┘
//! ```
//!
//! Bootstrap (paid once per toolchain id):
//! 1. Drop `NyxJavacWorker.java` into a cache dir.
//! 2. Compile it with `javac` (~1s).
//! 3. Spawn `java -cp <dir> NyxJavacWorker` (~700ms cold start).
//! 4. Read the worker's `{"ready":true}` banner.
//!
//! After bootstrap, each [`JavacPool::compile_batch`] is a single JSON
//! round-trip — typical wall-clock < 50ms even on small harnesses.
//!
//! # Robustness
//!
//! A crashed / hung worker is non-fatal:
//! - On any IO error, the pool marks itself unhealthy and the caller
//! falls back to the direct-spawn legacy path.
//! - The next pool lookup spawns a fresh worker.
//!
//! # Test hook
//!
//! `NYX_JAVAC_BIN` + `NYX_JAVA_BIN` override the binaries the pool
//! invokes so integration tests can swap in a wrapper.
use super::{BuildPool, PoolCompileResult};
use serde::Deserialize;
use std::fs::{File, OpenOptions};
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
use std::sync::{Mutex, mpsc};
use std::thread;
use std::time::{Duration, Instant};
/// Java source compiled at first use to drive the worker.
const WORKER_SOURCE: &str = include_str!("java_worker/NyxJavacWorker.java");
const WORKER_CLASS: &str = "NyxJavacWorker";
const WORKER_FILENAME: &str = "NyxJavacWorker.java";
/// Manifest written last (atomically) by `publish_class_set` after every
/// class lands, so its presence is the "publish finished" signal a
/// lock-free reader keys on. Its *contents* are NOT trusted as the
/// completeness oracle -- see `WORKER_CLASS_FILES`.
const WORKER_MANIFEST: &str = ".worker-classes";
/// The exact set of `.class` files the worker JVM must load at runtime:
/// the top-level class plus its nested `$Request` / `$Parser` types.
///
/// Readiness keys on *this fixed set*, not on whatever the on-disk
/// manifest happens to name. A bootstrap cache left by an older binary
/// can carry a manifest that lists only `NyxJavacWorker.class`; trusting
/// that list let the gate pass with the nested classes absent, so the
/// worker spawned, announced readiness, then died on the first request
/// with `NoClassDefFoundError` surfaced as
/// `nyx-javac-worker: parse error: NyxJavacWorker$Parser`. Pinning the
/// required set here makes any such partial cache fail the gate and
/// trigger a clean recompile. Kept in lock-step with the worker's real
/// nested-class layout by `worker_class_files_match_javac_output`.
const WORKER_CLASS_FILES: &[&str] = &[
"NyxJavacWorker.class",
"NyxJavacWorker$Request.class",
"NyxJavacWorker$Parser.class",
];
const WORKER_READY_TIMEOUT: Duration = Duration::from_secs(10);
const COMPILE_RESPONSE_TIMEOUT: Duration = Duration::from_secs(60);
/// Live worker handle. Held inside a `Mutex` so concurrent
/// `compile_batch` callers serialise on the single JVM.
struct Worker {
child: Child,
stdin: ChildStdin,
stdout: BufReader<ChildStdout>,
next_id: u64,
}
pub struct JavacPool {
/// `None` when the worker has crashed and a future call should
/// surface the unhealthy state to the dispatcher.
inner: Mutex<Option<Worker>>,
/// Cache dir holding `NyxJavacWorker.class`. Persisted between
/// runs so subsequent process invocations skip the compile step.
bootstrap_dir: PathBuf,
}
impl JavacPool {
/// Create a fresh pool for `toolchain_id`.
///
/// Returns `Err` when the worker cannot be bootstrapped (missing
/// `javac`, missing `java`, compile failure, spawn failure). The
/// caller is expected to fall back to the legacy direct-spawn path
/// on any error.
pub fn try_new(toolchain_id: &str) -> Result<Self, String> {
let bootstrap_dir = bootstrap_dir_for(toolchain_id)?;
std::fs::create_dir_all(&bootstrap_dir)
.map_err(|e| format!("javac-pool: mkdir {}: {e}", bootstrap_dir.display()))?;
ensure_worker_compiled(&bootstrap_dir)?;
let worker = spawn_worker(&bootstrap_dir)?;
Ok(JavacPool {
inner: Mutex::new(Some(worker)),
bootstrap_dir,
})
}
fn compile_with_worker(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
let start = Instant::now();
let mut guard = match self.inner.lock() {
Ok(g) => g,
Err(p) => p.into_inner(),
};
// If a prior call torched the worker, try one re-spawn here so
// the caller doesn't see consecutive failures from a transient
// JVM crash.
if guard.is_none()
&& let Ok(w) = spawn_worker(&self.bootstrap_dir)
{
*guard = Some(w);
}
let worker = match guard.as_mut() {
Some(w) => w,
None => {
return PoolCompileResult {
success: false,
stderr: "javac-pool: worker unavailable".to_owned(),
duration: start.elapsed(),
};
}
};
let id = worker.next_id;
worker.next_id = worker.next_id.wrapping_add(1);
let req = build_request(id, workdir, args);
if let Err(e) = worker.stdin.write_all(req.as_bytes()) {
*guard = None;
return PoolCompileResult {
success: false,
stderr: format!("javac-pool: write failed: {e}"),
duration: start.elapsed(),
};
}
if let Err(e) = worker.stdin.flush() {
*guard = None;
return PoolCompileResult {
success: false,
stderr: format!("javac-pool: flush failed: {e}"),
duration: start.elapsed(),
};
}
match read_line_with_timeout(
&mut worker.child,
&mut worker.stdout,
COMPILE_RESPONSE_TIMEOUT,
"read response",
) {
Ok(None) => {
*guard = None;
PoolCompileResult {
success: false,
stderr: "javac-pool: worker closed stdout".to_owned(),
duration: start.elapsed(),
}
}
Err(e) => {
*guard = None;
PoolCompileResult {
success: false,
stderr: e,
duration: start.elapsed(),
}
}
Ok(Some(line)) => match parse_response(&line) {
Some((success, stderr)) => PoolCompileResult {
success,
stderr,
duration: start.elapsed(),
},
None => {
*guard = None;
PoolCompileResult {
success: false,
stderr: format!("javac-pool: malformed response: {line}"),
duration: start.elapsed(),
}
}
},
}
}
}
impl Drop for JavacPool {
fn drop(&mut self) {
// Best-effort: close stdin so the worker exits cleanly, then
// wait briefly. We don't propagate errors -- pool teardown
// happens at process exit, by which point everyone is already
// leaving anyway.
if let Ok(mut guard) = self.inner.lock()
&& let Some(mut worker) = guard.take()
{
// Dropping stdin sends EOF to the worker's `readLine` loop.
drop(worker.stdin);
let _ = worker.child.wait();
}
}
}
impl BuildPool for JavacPool {
fn name(&self) -> &'static str {
"javac"
}
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
self.compile_with_worker(workdir, args)
}
fn is_healthy(&self) -> bool {
match self.inner.lock() {
Ok(g) => g.is_some(),
Err(_) => false,
}
}
}
fn bootstrap_dir_for(toolchain_id: &str) -> Result<PathBuf, String> {
if let Ok(custom) = std::env::var("NYX_BUILD_POOL_DIR") {
return Ok(PathBuf::from(custom).join("javac").join(toolchain_id));
}
let base = directories::ProjectDirs::from("dev", "nyx", "nyx")
.ok_or_else(|| "javac-pool: no cache dir on this platform".to_owned())?;
Ok(base
.cache_dir()
.join("dynamic")
.join("build-pool")
.join("javac")
.join(toolchain_id))
}
/// Drop `NyxJavacWorker.java` + compile `NyxJavacWorker.class` into
/// `dir` if they are not already present. Always re-writes the source
/// when the on-disk copy differs from the embedded one so a binary
/// upgrade picks up worker fixes without manual cache eviction.
///
/// The bootstrap dir is shared across every concurrent `nyx` process on
/// the host, so the compile-and-publish step is hardened against the
/// cross-process race that otherwise hands a half-written
/// `NyxJavacWorker.class` to a peer process spawning its worker (which
/// then fails to start, manifesting downstream as a flaky build):
///
/// - The publish is **atomic**: `javac` writes into a private,
/// pid-scoped staging dir and the finished class is `rename`d into
/// place. A concurrent reader sees either the previous complete
/// class or the new one, never a partial file. The old class is
/// never `remove`d first.
/// - Compiles are **serialised** on a `flock(2)` over `.bootstrap.lock`
/// so two processes never run `javac` into the same staging at once
/// and a waiter re-checks the now-published class instead of
/// recompiling.
fn ensure_worker_compiled(dir: &Path) -> Result<(), String> {
let src_path = dir.join(WORKER_FILENAME);
// Fast path: a complete class set already matches the current worker
// source. Checked before taking the cross-process lock so steady
// state stays lock-free.
if worker_class_ready(dir) {
return Ok(());
}
// Serialise the compile-and-publish across processes sharing `dir`.
let _lock = BootstrapLock::acquire(dir)?;
// Re-check under the lock: another process may have published a good
// class set while we were waiting on the lock.
if worker_class_ready(dir) {
return Ok(());
}
// Publish the source (idempotent) so cache inspectors can see what
// the class was built from.
std::fs::write(&src_path, WORKER_SOURCE)
.map_err(|e| format!("javac-pool: write worker source: {e}"))?;
// Compile into a private staging dir, then atomically publish the
// class files into place.
let staging = dir.join(format!(".compile-{}", std::process::id()));
let _ = std::fs::remove_dir_all(&staging);
std::fs::create_dir_all(&staging).map_err(|e| format!("javac-pool: mkdir staging: {e}"))?;
let javac = std::env::var("NYX_JAVAC_BIN").unwrap_or_else(|_| "javac".to_owned());
let compiled = Command::new(&javac)
// Pin the source charset so the bootstrap compile is independent of
// the host locale (a `C`/`POSIX` CI runner defaults `javac` to
// `US-ASCII` and would reject any non-ASCII byte in the worker
// source). Mirrors the harness-compile pin in `build_sandbox`.
.arg("-encoding")
.arg("UTF-8")
.arg("-d")
.arg(&staging)
.arg(&src_path)
.env_clear()
.env("PATH", std::env::var("PATH").unwrap_or_default())
.env("HOME", std::env::var("HOME").unwrap_or_default())
.output();
let output = match compiled {
Ok(o) => o,
Err(e) => {
let _ = std::fs::remove_dir_all(&staging);
return Err(format!("javac-pool: spawn javac: {e}"));
}
};
if !output.status.success() {
let _ = std::fs::remove_dir_all(&staging);
return Err(format!(
"javac-pool: bootstrap compile failed: {}",
String::from_utf8_lossy(&output.stderr),
));
}
let publish = publish_class_set(&staging, dir);
let _ = std::fs::remove_dir_all(&staging);
publish
}
/// Move every `.class` file `javac` emitted from the private `staging`
/// dir into the shared `dir`, then write the manifest last.
///
/// The worker source compiles to the top-level `NyxJavacWorker.class`
/// plus the nested `NyxJavacWorker$Request` / `NyxJavacWorker$Parser`
/// classes. Every one of them must land in `dir` (the worker JVM's
/// classpath), or the worker hits `NoClassDefFoundError` the first time
/// it touches a nested class -- which surfaced downstream as a bogus
/// `nyx-javac-worker: parse error: NyxJavacWorker$Parser`.
///
/// Renames are same-filesystem (staging is a child of `dir`) so each is
/// atomic. The manifest is written last via a temp-then-rename, so a
/// concurrent peer on the lock-free fast path sees either no manifest
/// (and serialises on the lock) or a complete one whose every named
/// class is already in place.
fn publish_class_set(staging: &Path, dir: &Path) -> Result<(), String> {
let entries =
std::fs::read_dir(staging).map_err(|e| format!("javac-pool: read staging dir: {e}"))?;
let mut names: Vec<String> = Vec::new();
for entry in entries {
let path = entry
.map_err(|e| format!("javac-pool: read staging entry: {e}"))?
.path();
if path.extension().is_none_or(|x| x != "class") {
continue;
}
let name = match path.file_name().and_then(|n| n.to_str()) {
Some(n) => n.to_owned(),
None => continue,
};
std::fs::rename(&path, dir.join(&name))
.map_err(|e| format!("javac-pool: publish {name}: {e}"))?;
names.push(name);
}
if names.is_empty() {
return Err("javac-pool: bootstrap compile produced no .class files".to_owned());
}
// Refuse to publish (and to write the readiness-signalling manifest) a
// set missing any class the worker loads at runtime. Fail loud here
// rather than leave a half-set the worker would die on later.
for required in WORKER_CLASS_FILES {
if !names.iter().any(|n| n == required) {
return Err(format!(
"javac-pool: bootstrap compile missing required class {required}; got {names:?}",
));
}
}
// Write the manifest atomically (temp + rename) so it appears in one
// step after every class is already published.
let manifest = dir.join(WORKER_MANIFEST);
let tmp = dir.join(format!("{WORKER_MANIFEST}.{}", std::process::id()));
std::fs::write(&tmp, names.join("\n"))
.map_err(|e| format!("javac-pool: write manifest: {e}"))?;
std::fs::rename(&tmp, &manifest).map_err(|e| {
let _ = std::fs::remove_file(&tmp);
format!("javac-pool: publish manifest: {e}")
})?;
Ok(())
}
/// True when `dir` holds a complete, non-empty class set built from the
/// current embedded `WORKER_SOURCE`: the source matches, the manifest is
/// present, and every class the manifest names exists and is non-empty.
fn worker_class_ready(dir: &Path) -> bool {
if std::fs::read_to_string(dir.join(WORKER_FILENAME))
.ok()
.as_deref()
!= Some(WORKER_SOURCE)
{
return false;
}
// The manifest is written last by `publish_class_set`, so its presence
// is the "publish finished" barrier: a reader that sees it knows no
// peer is mid-rename. Absence forces the cross-process lock path.
if std::fs::metadata(dir.join(WORKER_MANIFEST)).is_err() {
return false;
}
// Completeness is judged against the fixed required set, never against
// the manifest's lines -- a stale or partial manifest must not be able
// to vouch for classes it simply fails to name.
for name in WORKER_CLASS_FILES {
let present = std::fs::metadata(dir.join(name))
.map(|m| m.is_file() && m.len() > 0)
.unwrap_or(false);
if !present {
return false;
}
}
true
}
/// Cross-process advisory lock guarding the shared bootstrap dir's
/// compile-and-publish step. The held lock file lives at
/// `<dir>/.bootstrap.lock`; the `flock(2)` releases when the guard (and
/// thus the file) drops.
struct BootstrapLock {
_file: File,
}
impl BootstrapLock {
fn acquire(dir: &Path) -> Result<Self, String> {
let lock_path = dir.join(".bootstrap.lock");
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(false)
.open(&lock_path)
.map_err(|e| format!("javac-pool: open bootstrap lock: {e}"))?;
lock_file_exclusive(&file).map_err(|e| format!("javac-pool: bootstrap lock: {e}"))?;
Ok(BootstrapLock { _file: file })
}
}
#[cfg(unix)]
fn lock_file_exclusive(file: &File) -> std::io::Result<()> {
use std::os::fd::AsRawFd;
unsafe extern "C" {
fn flock(fd: i32, operation: i32) -> i32;
}
const LOCK_EX: i32 = 2;
loop {
// SAFETY: `file.as_raw_fd()` is a live fd owned by `file`; `flock`
// only reads the scalar args and we check the return value.
let ret = unsafe { flock(file.as_raw_fd(), LOCK_EX) };
if ret == 0 {
return Ok(());
}
let err = std::io::Error::last_os_error();
if err.kind() == std::io::ErrorKind::Interrupted {
continue;
}
return Err(err);
}
}
#[cfg(not(unix))]
fn lock_file_exclusive(_file: &File) -> std::io::Result<()> {
Ok(())
}
fn spawn_worker(dir: &Path) -> Result<Worker, String> {
let java = std::env::var("NYX_JAVA_BIN").unwrap_or_else(|_| "java".to_owned());
let mut child = Command::new(&java)
// The worker is tiny -- keep the JVM frugal so the pool
// overhead stays well below the per-finding cost it
// replaces.
.arg("-Xss256k")
.arg("-XX:+UseSerialGC")
.arg("-cp")
.arg(dir)
.arg(WORKER_CLASS)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.env_clear()
.env("PATH", std::env::var("PATH").unwrap_or_default())
.env("HOME", std::env::var("HOME").unwrap_or_default())
.spawn()
.map_err(|e| format!("javac-pool: spawn java: {e}"))?;
let stdin = child
.stdin
.take()
.ok_or_else(|| "javac-pool: missing stdin".to_owned())?;
let stdout = child
.stdout
.take()
.ok_or_else(|| "javac-pool: missing stdout".to_owned())?;
let mut stdout = BufReader::new(stdout);
let banner =
match read_line_with_timeout(&mut child, &mut stdout, WORKER_READY_TIMEOUT, "read banner")?
{
Some(line) => line,
None => {
let _ = child.kill();
let stderr_tail = drain_stderr(&mut child);
return Err(format!(
"javac-pool: worker closed stdout before readiness; stderr: {stderr_tail}",
));
}
};
if !banner.contains("\"ready\":true") {
// Drain stderr for diagnostic context, then bail.
let _ = child.kill();
let stderr_tail = drain_stderr(&mut child);
return Err(format!(
"javac-pool: worker did not announce readiness; got {banner:?}; stderr: {stderr_tail}",
));
}
Ok(Worker {
child,
stdin,
stdout,
next_id: 0,
})
}
fn drain_stderr(child: &mut Child) -> String {
use std::io::Read;
let mut buf = String::new();
if let Some(mut e) = child.stderr.take() {
// Best-effort, non-blocking-ish.
let _ = e.read_to_string(&mut buf);
}
buf
}
fn read_line_with_timeout(
child: &mut Child,
stdout: &mut BufReader<ChildStdout>,
timeout: Duration,
context: &str,
) -> Result<Option<String>, String> {
let (tx, rx) = mpsc::channel();
thread::scope(|scope| {
scope.spawn(move || {
let mut line = String::new();
let result = stdout.read_line(&mut line).map(|n| (n, line));
let _ = tx.send(result);
});
match rx.recv_timeout(timeout) {
Ok(Ok((0, _))) => Ok(None),
Ok(Ok((_n, line))) => Ok(Some(line)),
Ok(Err(e)) => Err(format!("javac-pool: {context} failed: {e}")),
Err(mpsc::RecvTimeoutError::Timeout) => {
let _ = child.kill();
Err(format!("javac-pool: {context} timed out after {timeout:?}"))
}
Err(mpsc::RecvTimeoutError::Disconnected) => {
Err(format!("javac-pool: {context} reader disconnected"))
}
}
})
}
fn build_request(id: u64, workdir: &Path, args: &[String]) -> String {
let mut s = String::with_capacity(128 + args.iter().map(|a| a.len() + 4).sum::<usize>());
s.push_str("{\"id\":\"");
s.push_str(&id.to_string());
s.push_str("\",\"cwd\":");
append_json_string(&mut s, &workdir.to_string_lossy());
s.push_str(",\"args\":[");
for (i, a) in args.iter().enumerate() {
if i > 0 {
s.push(',');
}
append_json_string(&mut s, a);
}
s.push_str("]}\n");
s
}
fn append_json_string(out: &mut String, s: &str) {
out.push('"');
for c in s.chars() {
match c {
'\\' => out.push_str("\\\\"),
'"' => out.push_str("\\\""),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
c => out.push(c),
}
}
out.push('"');
}
/// Extract `(success, stderr)` from a worker JSON response line.
fn parse_response(line: &str) -> Option<(bool, String)> {
let response: JavacWorkerResponse = serde_json::from_str(line).ok()?;
let stderr =
decode_b64(&response.stderr_b64).unwrap_or_else(|| "<unable to decode stderr>".to_owned());
Some((response.success, stderr))
}
#[derive(Debug, Deserialize)]
struct JavacWorkerResponse {
success: bool,
#[serde(default)]
stderr_b64: String,
}
/// Tiny RFC 4648 base64 decoder. Used only for the worker's
/// `stderr_b64` field so we can carry raw bytes through the JSON
/// envelope without dragging in a base64 crate.
fn decode_b64(s: &str) -> Option<String> {
static ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let mut lookup = [0xffu8; 256];
for (i, &b) in ALPHABET.iter().enumerate() {
lookup[b as usize] = i as u8;
}
let bytes: Vec<u8> = s.bytes().filter(|b| !b.is_ascii_whitespace()).collect();
let mut out = Vec::with_capacity(bytes.len() / 4 * 3);
for chunk in bytes.chunks(4) {
if chunk.len() < 2 {
return None;
}
let mut vals = [0u8; 4];
let mut pads = 0;
for (i, &b) in chunk.iter().enumerate() {
if b == b'=' {
pads += 1;
vals[i] = 0;
} else {
let v = lookup[b as usize];
if v == 0xff {
return None;
}
vals[i] = v;
}
}
let triple = ((vals[0] as u32) << 18)
| ((vals[1] as u32) << 12)
| ((vals[2] as u32) << 6)
| (vals[3] as u32);
out.push(((triple >> 16) & 0xff) as u8);
if pads < 2 {
out.push(((triple >> 8) & 0xff) as u8);
}
if pads < 1 {
out.push((triple & 0xff) as u8);
}
}
String::from_utf8(out).ok()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn request_envelope_escapes_specials() {
let s = build_request(
7,
Path::new("/tmp/x"),
&["a\"b".to_owned(), "c\\d".to_owned()],
);
assert!(s.contains("\"id\":\"7\""));
assert!(s.contains("\"cwd\":\"/tmp/x\""));
assert!(s.contains("\"a\\\"b\""));
assert!(s.contains("\"c\\\\d\""));
assert!(s.ends_with("]}\n"));
}
#[test]
fn parse_response_success() {
let (ok, err) =
parse_response("{\"id\":\"0\",\"success\":true,\"stderr_b64\":\"\"}\n").unwrap();
assert!(ok);
assert!(err.is_empty());
}
#[test]
fn parse_response_failure_decodes_stderr() {
// "boom" -> base64 "Ym9vbQ=="
let (ok, err) =
parse_response("{\"id\":\"1\",\"success\":false,\"stderr_b64\":\"Ym9vbQ==\"}\n")
.unwrap();
assert!(!ok);
assert_eq!(err, "boom");
}
#[test]
fn parse_response_rejects_off_shape() {
assert!(parse_response("not json").is_none());
// Missing success field.
assert!(parse_response("{\"id\":\"0\",\"stderr_b64\":\"\"}").is_none());
}
#[test]
fn parse_response_accepts_reordered_fields() {
let (ok, err) =
parse_response("{\"stderr_b64\":\"YQ==\",\"success\":true,\"id\":\"7\"}\n").unwrap();
assert!(ok);
assert_eq!(err, "a");
}
#[test]
fn b64_decode_roundtrip() {
for (raw, encoded) in &[
("", ""),
("a", "YQ=="),
("ab", "YWI="),
("abc", "YWJj"),
("hello world", "aGVsbG8gd29ybGQ="),
] {
assert_eq!(decode_b64(encoded).as_deref(), Some(*raw));
}
}
#[test]
fn worker_class_ready_rejects_truncated_or_mismatched() {
let tmp = tempfile::TempDir::new().unwrap();
let dir = tmp.path();
let src = dir.join(WORKER_FILENAME);
let main_class = dir.join(format!("{WORKER_CLASS}.class"));
let parser = dir.join(format!("{WORKER_CLASS}$Parser.class"));
let request = dir.join(format!("{WORKER_CLASS}$Request.class"));
let manifest = dir.join(WORKER_MANIFEST);
let manifest_body = format!(
"{WORKER_CLASS}.class\n{WORKER_CLASS}$Parser.class\n{WORKER_CLASS}$Request.class"
);
// Nothing on disk yet.
assert!(!worker_class_ready(dir));
// Matching source but no class / manifest.
std::fs::write(&src, WORKER_SOURCE).unwrap();
assert!(!worker_class_ready(dir));
// Top-level class + manifest present but the nested classes are
// missing -- the stale-cache shape an older binary left behind.
std::fs::write(&main_class, b"\xca\xfe\xba\xbe").unwrap();
std::fs::write(&manifest, &manifest_body).unwrap();
assert!(!worker_class_ready(dir));
// A zero-byte nested class (the corruption shape a racing peer can
// leave behind) must not count as ready.
std::fs::write(&parser, b"").unwrap();
std::fs::write(&request, b"\xca\xfe\xba\xbe").unwrap();
assert!(!worker_class_ready(dir));
// Every required class non-empty with matching source is ready.
std::fs::write(&parser, b"\xca\xfe\xba\xbe").unwrap();
assert!(worker_class_ready(dir));
// A missing manifest invalidates an otherwise-complete class set.
std::fs::remove_file(&manifest).unwrap();
assert!(!worker_class_ready(dir));
std::fs::write(&manifest, &manifest_body).unwrap();
assert!(worker_class_ready(dir));
// Stale source invalidates an otherwise-present class set.
std::fs::write(&src, "// not the worker source").unwrap();
assert!(!worker_class_ready(dir));
}
#[test]
fn worker_class_ready_rejects_manifest_that_omits_nested_classes() {
// The exact stale-cache shape that produced
// `nyx-javac-worker: parse error: NyxJavacWorker$Parser` on Linux:
// a self-consistent manifest that simply does not name the nested
// classes, with only the top-level class on disk. The old guard
// iterated the manifest's lines and so trusted this; readiness must
// now reject it because the fixed required set is incomplete.
let tmp = tempfile::TempDir::new().unwrap();
let dir = tmp.path();
std::fs::write(dir.join(WORKER_FILENAME), WORKER_SOURCE).unwrap();
std::fs::write(
dir.join(format!("{WORKER_CLASS}.class")),
b"\xca\xfe\xba\xbe",
)
.unwrap();
// Manifest names only the top-level class -- exactly what poisoned
// the persisted bootstrap cache.
std::fs::write(dir.join(WORKER_MANIFEST), format!("{WORKER_CLASS}.class")).unwrap();
assert!(
!worker_class_ready(dir),
"a manifest omitting the nested classes must not satisfy readiness",
);
// Drop in the nested classes the worker actually loads -> ready.
std::fs::write(
dir.join(format!("{WORKER_CLASS}$Parser.class")),
b"\xca\xfe\xba\xbe",
)
.unwrap();
std::fs::write(
dir.join(format!("{WORKER_CLASS}$Request.class")),
b"\xca\xfe\xba\xbe",
)
.unwrap();
assert!(worker_class_ready(dir));
}
#[test]
fn ensure_worker_compiled_heals_partial_cache() {
// End-to-end heal: seed the exact poisoned-cache shape that broke
// Linux (top-level class + a one-line manifest, nested classes
// absent) and confirm `ensure_worker_compiled` recompiles a full,
// loadable class set instead of trusting the stale manifest.
let javac = std::env::var("NYX_JAVAC_BIN").unwrap_or_else(|_| "javac".to_owned());
let have_javac = std::process::Command::new(&javac)
.arg("-version")
.output()
.map(|o| o.status.success())
.unwrap_or(false);
if !have_javac {
return; // No JDK on this host: nothing to recompile with.
}
let tmp = tempfile::TempDir::new().unwrap();
let dir = tmp.path();
std::fs::write(dir.join(WORKER_FILENAME), WORKER_SOURCE).unwrap();
std::fs::write(
dir.join(format!("{WORKER_CLASS}.class")),
b"\xca\xfe\xba\xbe",
)
.unwrap();
std::fs::write(dir.join(WORKER_MANIFEST), format!("{WORKER_CLASS}.class")).unwrap();
assert!(
!worker_class_ready(dir),
"poisoned cache must read not-ready"
);
ensure_worker_compiled(dir).expect("recompile heals the cache");
assert!(worker_class_ready(dir), "healed cache must read ready");
for cls in WORKER_CLASS_FILES {
let meta = std::fs::metadata(dir.join(cls)).expect("class published");
assert!(meta.len() > 0, "{cls} must be a real (non-empty) class");
}
}
#[test]
fn worker_class_files_match_javac_output() {
// Guards `WORKER_CLASS_FILES` against drift: compile the embedded
// worker source and assert the emitted `.class` set is exactly the
// pinned required set, so a future nested type added to the worker
// can't silently fall outside the readiness gate.
let javac = std::env::var("NYX_JAVAC_BIN").unwrap_or_else(|_| "javac".to_owned());
let have_javac = std::process::Command::new(&javac)
.arg("-version")
.output()
.map(|o| o.status.success())
.unwrap_or(false);
if !have_javac {
return; // JRE-only / no JDK: nothing to compile against.
}
let tmp = tempfile::TempDir::new().unwrap();
let src = tmp.path().join(WORKER_FILENAME);
std::fs::write(&src, WORKER_SOURCE).unwrap();
let out = tmp.path().join("out");
std::fs::create_dir_all(&out).unwrap();
let status = std::process::Command::new(&javac)
.arg("-encoding")
.arg("UTF-8")
.arg("-d")
.arg(&out)
.arg(&src)
.status()
.expect("spawn javac");
assert!(status.success(), "worker source must compile");
let mut emitted: Vec<String> = std::fs::read_dir(&out)
.unwrap()
.filter_map(|e| e.ok())
.map(|e| e.file_name().to_string_lossy().into_owned())
.filter(|n| n.ends_with(".class"))
.collect();
emitted.sort();
let mut expected: Vec<String> =
WORKER_CLASS_FILES.iter().map(|s| (*s).to_owned()).collect();
expected.sort();
assert_eq!(
emitted, expected,
"WORKER_CLASS_FILES must mirror the worker's javac output",
);
}
#[test]
fn publish_class_set_moves_every_class_and_writes_manifest() {
let tmp = tempfile::TempDir::new().unwrap();
let dir = tmp.path();
let staging = dir.join(".compile-test");
std::fs::create_dir_all(&staging).unwrap();
// Simulate javac output: top-level + nested classes plus a
// non-class artifact that must be ignored.
std::fs::write(staging.join("NyxJavacWorker.class"), b"\xca\xfe\xba\xbe").unwrap();
std::fs::write(
staging.join("NyxJavacWorker$Parser.class"),
b"\xca\xfe\xba\xbe",
)
.unwrap();
std::fs::write(
staging.join("NyxJavacWorker$Request.class"),
b"\xca\xfe\xba\xbe",
)
.unwrap();
std::fs::write(staging.join("notes.txt"), b"ignore me").unwrap();
publish_class_set(&staging, dir).expect("publish");
for cls in [
"NyxJavacWorker.class",
"NyxJavacWorker$Parser.class",
"NyxJavacWorker$Request.class",
] {
assert!(dir.join(cls).is_file(), "{cls} must be published");
}
// The non-class file stays in staging (not published).
assert!(!dir.join("notes.txt").exists());
let manifest = std::fs::read_to_string(dir.join(WORKER_MANIFEST)).unwrap();
let listed: Vec<&str> = manifest.lines().collect();
assert_eq!(listed.len(), 3, "manifest lists all 3 classes: {listed:?}");
assert!(listed.contains(&"NyxJavacWorker$Parser.class"));
}
#[test]
fn bootstrap_lock_is_reentrant_across_sequential_acquires() {
// The flock is released when the guard drops, so back-to-back
// acquires from the same process succeed without deadlock.
let dir = tempfile::TempDir::new().unwrap();
{
let _g = BootstrapLock::acquire(dir.path()).expect("first acquire");
}
let _g = BootstrapLock::acquire(dir.path()).expect("second acquire");
assert!(dir.path().join(".bootstrap.lock").exists());
}
}

View file

@ -0,0 +1,256 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//
// Long-lived javac worker bundled with nyx-scanner. The Rust pool side
// compiles + spawns this once per toolchain id; subsequent harness
// compiles run in-process via ToolProvider#getSystemJavaCompiler so the
// JVM cold-start cost is amortised across every harness in a verify run.
//
// Wire format: newline-terminated UTF-8 JSON, one request per line:
// {"id":"0","cwd":"/path/to/workdir","args":["-d","/tmp/x","Foo.java"]}\n
//
// Response: newline-terminated UTF-8 JSON, one per request:
// {"id":"0","success":true,"stderr_b64":"<base64 of javac stderr>"}\n
//
// stderr is base64-encoded so it never embeds raw newlines or quotes
// inside the JSON envelope -- keeps the parser on both sides tiny.
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import javax.tools.JavaCompiler;
import javax.tools.ToolProvider;
public class NyxJavacWorker {
public static void main(String[] argv) throws Exception {
JavaCompiler compiler = ToolProvider.getSystemJavaCompiler();
if (compiler == null) {
// JRE without javac (rare on dev boxes, possible on slim CI
// images). Signal the Rust side so it falls back to the
// direct-spawn legacy path.
System.err.println("nyx-javac-worker: no system Java compiler (JRE-only install?)");
System.exit(2);
}
BufferedReader in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
PrintStream out = new PrintStream(System.out, true, StandardCharsets.UTF_8);
// Banner line. The Rust side reads this first so it knows the
// worker is live before it queues any compile requests.
out.println("{\"ready\":true}");
out.flush();
String line;
while ((line = in.readLine()) != null) {
line = line.trim();
if (line.isEmpty()) continue;
Request req;
try {
req = parse(line);
} catch (Throwable t) {
// Malformed request -- emit an error response keyed on
// an empty id so the Rust side can at least surface it.
writeResponse(out, "", false, ("nyx-javac-worker: parse error: " + t.getMessage()).getBytes(StandardCharsets.UTF_8));
continue;
}
ByteArrayOutputStream errBuf = new ByteArrayOutputStream();
PrintStream errStream = new PrintStream(errBuf, true, StandardCharsets.UTF_8);
int rc;
try {
String[] args = req.args.toArray(new String[0]);
if (req.cwd != null && !req.cwd.isEmpty()) {
// The JDK compiler API has no per-task cwd switch,
// so we rewrite relative args. The harness build
// already supplies absolute paths via the Rust side,
// but we still set user.dir defensively so any
// relative -d / -cp / source-path entries resolve
// against the requested workdir rather than the
// worker JVM's launch directory.
System.setProperty("user.dir", req.cwd);
}
rc = compiler.run(null, null, errStream, args);
} catch (Throwable t) {
t.printStackTrace(errStream);
rc = 1;
}
boolean success = (rc == 0);
writeResponse(out, req.id, success, errBuf.toByteArray());
}
}
private static void writeResponse(PrintStream out, String id, boolean success, byte[] stderr) {
String b64 = Base64.getEncoder().encodeToString(stderr);
StringBuilder sb = new StringBuilder(64 + b64.length());
sb.append("{\"id\":");
appendJsonString(sb, id);
sb.append(",\"success\":").append(success);
sb.append(",\"stderr_b64\":\"").append(b64).append("\"}");
out.println(sb);
out.flush();
}
private static void appendJsonString(StringBuilder sb, String s) {
sb.append('"');
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
switch (c) {
case '\\': sb.append("\\\\"); break;
case '"': sb.append("\\\""); break;
case '\n': sb.append("\\n"); break;
case '\r': sb.append("\\r"); break;
case '\t': sb.append("\\t"); break;
default:
if (c < 0x20) {
sb.append(String.format("\\u%04x", (int) c));
} else {
sb.append(c);
}
}
}
sb.append('"');
}
private static final class Request {
String id = "";
String cwd = "";
List<String> args = new ArrayList<>();
}
private static Request parse(String s) {
Parser p = new Parser(s);
Request r = new Request();
p.skipWs();
p.expect('{');
p.skipWs();
if (p.peek() == '}') {
p.next();
return r;
}
while (true) {
p.skipWs();
String key = p.parseString();
p.skipWs();
p.expect(':');
p.skipWs();
if (key.equals("id")) {
r.id = p.parseString();
} else if (key.equals("cwd")) {
r.cwd = p.parseString();
} else if (key.equals("args")) {
p.expect('[');
p.skipWs();
if (p.peek() != ']') {
while (true) {
p.skipWs();
r.args.add(p.parseString());
p.skipWs();
if (p.peek() == ',') { p.next(); continue; }
break;
}
}
p.skipWs();
p.expect(']');
} else {
skipValue(p);
}
p.skipWs();
if (p.peek() == ',') { p.next(); continue; }
break;
}
p.skipWs();
p.expect('}');
return r;
}
private static void skipValue(Parser p) {
p.skipWs();
char c = p.peek();
if (c == '"') { p.parseString(); }
else if (c == '[') {
p.next();
p.skipWs();
if (p.peek() != ']') {
while (true) {
skipValue(p); p.skipWs();
if (p.peek() == ',') { p.next(); continue; }
break;
}
}
p.skipWs();
p.expect(']');
} else if (c == '{') {
p.next();
p.skipWs();
if (p.peek() != '}') {
while (true) {
p.skipWs();
p.parseString();
p.skipWs();
p.expect(':');
skipValue(p);
p.skipWs();
if (p.peek() == ',') { p.next(); continue; }
break;
}
}
p.skipWs();
p.expect('}');
} else {
int start = p.pos;
while (p.pos < p.s.length() && "0123456789.-+eEtrufalsn".indexOf(p.s.charAt(p.pos)) >= 0) {
p.pos++;
}
if (p.pos == start) {
throw new RuntimeException("bad value at " + p.pos);
}
}
}
private static final class Parser {
final String s; int pos = 0;
Parser(String s) { this.s = s; }
char peek() { return s.charAt(pos); }
char next() { return s.charAt(pos++); }
void skipWs() { while (pos < s.length() && Character.isWhitespace(s.charAt(pos))) pos++; }
void expect(char c) {
if (pos >= s.length() || s.charAt(pos) != c) {
throw new RuntimeException("expected '" + c + "' at " + pos + " of " + s);
}
pos++;
}
String parseString() {
expect('"');
StringBuilder sb = new StringBuilder();
while (pos < s.length()) {
char c = s.charAt(pos++);
if (c == '"') return sb.toString();
if (c == '\\') {
char e = s.charAt(pos++);
switch (e) {
case '"': sb.append('"'); break;
case '\\': sb.append('\\'); break;
case '/': sb.append('/'); break;
case 'b': sb.append('\b'); break;
case 'f': sb.append('\f'); break;
case 'n': sb.append('\n'); break;
case 'r': sb.append('\r'); break;
case 't': sb.append('\t'); break;
case 'u': {
String hex = s.substring(pos, pos + 4);
pos += 4;
sb.append((char) Integer.parseInt(hex, 16));
break;
}
default: throw new RuntimeException("bad escape \\" + e);
}
} else {
sb.append(c);
}
}
throw new RuntimeException("unterminated string");
}
}
}

View file

@ -0,0 +1,340 @@
//! Build pools: long-lived compiler / toolchain daemons shared across many
//! per-finding harness builds.
//!
//! The naive `prepare_*` path in [`crate::dynamic::build_sandbox`] spawns a
//! fresh `javac` / `tsc` / `cargo build` subprocess for every finding the
//! verifier touches. Cold-start dominates the cost: `javac` alone burns
//! ~700ms before it has read a single source. A 50-harness OWASP run pays
//! that 50× — > 30s of pure JVM startup.
//!
//! A `BuildPool` is a long-running worker process (or in-process service)
//! that compiles batches of harness sources in a single toolchain instance.
//! The per-harness wall-clock collapses to milliseconds once the pool is
//! warm.
//!
//! # Lifecycle
//!
//! `OnceLock<Arc<P>>` per toolchain id, lazily spawned on first request.
//! Pools live for the rest of the process; the OS reaps them on exit.
//! Crashes are non-fatal: callers fall back to the legacy direct-spawn path
//! via [`BuildPool::is_healthy`] and a re-spawn on the next call.
//!
//! # Future-language plug-in
//!
//! Per-language sub-modules (`java.rs`, eventually `node.rs`, `python.rs`,
//! …) implement the [`BuildPool`] trait. The harness build dispatcher in
//! [`crate::dynamic::build_sandbox`] reads `NYX_DYNAMIC_BUILD_POOL` and
//! routes each request to the matching pool when enabled.
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::Duration;
pub mod c;
pub mod cpp;
pub mod go;
pub mod java;
pub mod node;
pub mod php;
pub mod python;
pub mod ruby;
pub mod rust;
/// Outcome of a single batched compile request.
#[derive(Debug)]
pub struct PoolCompileResult {
/// `true` when the toolchain reported a clean compile.
pub success: bool,
/// Toolchain stderr — surfaced as `BuildError::BuildFailed` upstream
/// when `success == false`.
pub stderr: String,
/// Wall-clock for the in-pool compile step (excludes any IPC / queue
/// wait time). Useful for telemetry; callers may ignore.
pub duration: Duration,
}
/// Common contract for every per-language build pool.
///
/// Implementations are expected to be `Send + Sync` so an `Arc<dyn BuildPool>`
/// can be cached in a static `OnceLock` and shared across rayon worker
/// threads.
pub trait BuildPool: Send + Sync {
/// Stable identifier — used in log lines + telemetry so an operator
/// can correlate a pool warmup with the harness that triggered it.
fn name(&self) -> &'static str;
/// Compile every source file under `workdir` matching the pool's
/// language convention. On success the toolchain has written
/// artefacts back into `workdir` (or wherever the pool's contract
/// dictates).
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult;
/// Cheap health check — when this returns `false`, the harness build
/// dispatcher falls back to the direct-spawn legacy path and tears
/// down the cached handle so the next request triggers a re-spawn.
fn is_healthy(&self) -> bool;
}
/// Languages that ship a [`BuildPool`] implementation and are therefore
/// enabled by default. Phase 22 shipped `java`; Phase 23 (Track O.1) adds
/// the remaining eight, so every supported language now has a warm fast path
/// unless an operator opts out via `NYX_DYNAMIC_BUILD_POOL=<lang>=0`.
const POOL_ENABLED_LANGS: &[&str] = &[
"java", "node", "python", "php", "ruby", "go", "rust", "c", "cpp",
];
/// Parse the `NYX_DYNAMIC_BUILD_POOL` env var.
///
/// Format is a comma-separated list of `lang=bit` entries: `java=1,node=0`.
/// A missing language returns the default: `true` for every language that
/// ships a pool (see `POOL_ENABLED_LANGS`), `false` otherwise.
pub fn is_pool_enabled(lang: &str) -> bool {
let default = POOL_ENABLED_LANGS.contains(&lang);
let raw = match std::env::var("NYX_DYNAMIC_BUILD_POOL") {
Ok(v) => v,
Err(_) => return default,
};
for entry in raw.split(',') {
let entry = entry.trim();
if entry.is_empty() {
continue;
}
let (k, v) = match entry.split_once('=') {
Some(kv) => kv,
None => continue,
};
if k.trim().eq_ignore_ascii_case(lang) {
return matches!(v.trim(), "1" | "true" | "TRUE" | "yes" | "on");
}
}
default
}
/// Shared root for a pool's persistent caches (sccache dir, shared
/// `GOCACHE`, opcache file-cache, Bootsnap cache, shared venvs, …).
///
/// Honours `NYX_BUILD_POOL_DIR` so tests can redirect the cache into a
/// `TempDir`; otherwise falls back to the platform cache dir, mirroring
/// the javac pool's layout under `dynamic/build-pool/`.
///
/// Returns `None` only when neither the env override nor a platform cache
/// dir is available — callers treat that as "pool unavailable" and fall
/// back to the legacy direct-spawn build path.
pub(crate) fn pool_cache_dir(lang: &str, sub: &str) -> Option<PathBuf> {
let custom = std::env::var("NYX_BUILD_POOL_DIR").ok().map(PathBuf::from);
let base = if let Some(custom) = custom.clone() {
custom
} else {
directories::ProjectDirs::from("dev", "nyx", "nyx")?
.cache_dir()
.join("dynamic")
.join("build-pool")
};
let dir = base.join(lang).join(sub);
if ensure_writable_dir(&dir).is_some() {
return Some(dir);
}
if custom.is_some() {
return None;
}
let fallback = std::env::temp_dir()
.join("nyx")
.join("dynamic")
.join("build-pool")
.join(lang)
.join(sub);
ensure_writable_dir(&fallback)
}
fn ensure_writable_dir(dir: &Path) -> Option<PathBuf> {
std::fs::create_dir_all(dir).ok()?;
let probe = dir.join(format!(".nyx-write-probe-{}", std::process::id()));
std::fs::write(&probe, b"ok").ok()?;
let _ = std::fs::remove_file(probe);
Some(dir.to_path_buf())
}
/// Construct a `Command` for `bin` with a scrubbed environment, matching
/// the isolation envelope every legacy `prepare_*` build uses: `env_clear`
/// plus an inherited `PATH` + `HOME` only. Pools layer their cache env
/// (`CARGO_TARGET_DIR`, `CCACHE_DIR`, `GOCACHE`, …) on top of this.
pub(crate) fn base_command(bin: &str) -> Command {
let mut cmd = Command::new(bin);
let tmp = build_temp_dir();
cmd.env_clear()
.env("PATH", std::env::var("PATH").unwrap_or_default())
.env("HOME", std::env::var("HOME").unwrap_or_default())
.env("TMPDIR", &tmp)
.env("TMP", &tmp)
.env("TEMP", &tmp);
cmd
}
fn build_temp_dir() -> PathBuf {
let dir = std::env::temp_dir().join("nyx-build-tmp");
if std::fs::create_dir_all(&dir).is_ok() {
return dir;
}
std::env::temp_dir()
}
/// Hermetic Bundler / RubyGems environment pinned to a writable per-workdir
/// vendor directory.
///
/// Points `GEM_HOME` and `BUNDLE_PATH` at `<workdir>/vendor/bundle` so every
/// gem *install* lands in a directory the current user owns. This is the
/// load-bearing fix for the harness build invoking `sudo`: legacy Bundler
/// (1.x) shells out to `sudo` when the install target — the root-owned system
/// gem dir (`/Library/Ruby/Gems/...`) — is not writable, which then blocks on
/// a terminal password prompt (`sudo: a terminal is required to read the
/// password`). With a writable target there is no privilege escalation and
/// no prompt, ever.
///
/// `GEM_PATH` is deliberately left unset so RubyGems still includes the system
/// gem path when *resolving* (paired with `BUNDLE_DISABLE_SHARED_GEMS=false`),
/// letting an already-installed gem satisfy the Gemfile without a network
/// fetch — while installs of missing gems still land in the writable vendor
/// dir. `BUNDLE_APP_CONFIG` keeps Bundler's per-project config writable and
/// inside the workdir.
///
/// Returned as env pairs (not applied to a `Command` here) so both the pooled
/// path ([`ruby::RubyPool`]) and the legacy direct-spawn path
/// ([`crate::dynamic::build_sandbox`]) layer them on identically. Setting
/// these env vars is Bundler-version-agnostic: 1.x and 2.x both honour
/// `BUNDLE_*` / `GEM_*`, unlike the 2.x-only `bundle config set` CLI the old
/// path relied on (which is a silent no-op on 1.x, leaving the install target
/// pointed at the system dir — the original root cause).
pub(crate) fn ruby_hermetic_env(workdir: &Path) -> Vec<(&'static str, std::ffi::OsString)> {
let gem_dir = workdir.join("vendor").join("bundle");
let _ = std::fs::create_dir_all(&gem_dir);
vec![
("GEM_HOME", gem_dir.clone().into_os_string()),
("BUNDLE_PATH", gem_dir.into_os_string()),
("BUNDLE_DISABLE_SHARED_GEMS", "false".into()),
("BUNDLE_FROZEN", "false".into()),
(
"BUNDLE_APP_CONFIG",
workdir.join(".bundle").into_os_string(),
),
]
}
/// Merge a process's stdout and stderr into one diagnostic blob.
///
/// Some build tools split their failure diagnostics across streams — Bundler
/// in particular prints "Could not find gem …" to stdout while only an
/// unrelated RubyGems extension warning lands on stderr. Capturing both keeps
/// the downstream host-limitation classifier from missing the real reason.
pub(crate) fn combine_output(stdout: &[u8], stderr: &[u8]) -> String {
let out = String::from_utf8_lossy(stdout);
let err = String::from_utf8_lossy(stderr);
match (out.trim().is_empty(), err.trim().is_empty()) {
(true, _) => err.into_owned(),
(false, true) => out.into_owned(),
(false, false) => format!("{out}\n{err}"),
}
}
/// Detect a runnable `ccache` binary (honouring `NYX_CCACHE_BIN`). Shared
/// by the C and C++ pools to front their compiler with the shared object
/// cache; `None` means "compile bare", preserving legacy parity.
pub(crate) fn detect_ccache() -> Option<String> {
let bin = std::env::var("NYX_CCACHE_BIN").unwrap_or_else(|_| "ccache".to_owned());
binary_runnable(&bin, "--version").then_some(bin)
}
/// Cheap "is this binary runnable" probe used by every pool's
/// [`BuildPool::is_healthy`] / `try_new`. Runs `bin <probe_arg>` with a
/// scrubbed env and reports whether it exited 0.
pub(crate) fn binary_runnable(bin: &str, probe_arg: &str) -> bool {
base_command(bin)
.arg(probe_arg)
.stdin(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map(|s| s.success())
.unwrap_or(false)
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Mutex;
static ENV_LOCK: Mutex<()> = Mutex::new(());
struct EnvGuard {
prior: Option<String>,
}
impl EnvGuard {
fn set(value: Option<&str>) -> Self {
let prior = std::env::var("NYX_DYNAMIC_BUILD_POOL").ok();
match value {
Some(v) => unsafe { std::env::set_var("NYX_DYNAMIC_BUILD_POOL", v) },
None => unsafe { std::env::remove_var("NYX_DYNAMIC_BUILD_POOL") },
}
Self { prior }
}
}
impl Drop for EnvGuard {
fn drop(&mut self) {
match self.prior.take() {
Some(v) => unsafe { std::env::set_var("NYX_DYNAMIC_BUILD_POOL", v) },
None => unsafe { std::env::remove_var("NYX_DYNAMIC_BUILD_POOL") },
}
}
}
#[test]
fn default_enables_every_shipped_pool() {
let _l = ENV_LOCK.lock().unwrap();
let _g = EnvGuard::set(None);
for lang in POOL_ENABLED_LANGS {
assert!(is_pool_enabled(lang), "{lang} pool must default on");
}
// A language with no pool stays off.
assert!(!is_pool_enabled("cobol"));
}
#[test]
fn explicit_override_disables_node() {
let _l = ENV_LOCK.lock().unwrap();
let _g = EnvGuard::set(Some("node=0"));
assert!(!is_pool_enabled("node"));
// Other languages keep their default-on state.
assert!(is_pool_enabled("python"));
}
#[test]
fn explicit_override_disables_java() {
let _l = ENV_LOCK.lock().unwrap();
let _g = EnvGuard::set(Some("java=0"));
assert!(!is_pool_enabled("java"));
}
#[test]
fn multi_entry_parses_per_lang() {
let _l = ENV_LOCK.lock().unwrap();
let _g = EnvGuard::set(Some("java=1,node=1,python=0"));
assert!(is_pool_enabled("java"));
assert!(is_pool_enabled("node"));
assert!(!is_pool_enabled("python"));
}
#[test]
fn case_insensitive_keys() {
let _l = ENV_LOCK.lock().unwrap();
let _g = EnvGuard::set(Some("JAVA=0"));
assert!(!is_pool_enabled("java"));
}
#[test]
fn unknown_value_treated_as_disabled() {
let _l = ENV_LOCK.lock().unwrap();
let _g = EnvGuard::set(Some("java=maybe"));
assert!(!is_pool_enabled("java"));
}
}

View file

@ -0,0 +1,87 @@
//! Node.js build pool (Phase 23 / Track O.1).
//!
//! `prepare_node` already snapshots `node_modules` per `package.json` hash.
//! What it lacks is a shared npm download cache: a fresh lock hash re-downloads
//! every tarball from cold.
//!
//! [`NodePool`] points `npm_config_cache` at the shared pool root so package
//! tarballs are reused across lock hashes, collapsing a cold `npm install` to
//! an unpack of already-fetched tarballs. TypeScript harnesses that do not
//! need full type checking are run with `--experimental-strip-types` at
//! execution time (the runner reads [`strip_types_flag`]); the pool itself only
//! owns the install step.
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
use std::path::Path;
use std::time::Instant;
pub struct NodePool {
npm_bin: String,
}
impl NodePool {
pub fn try_new() -> Result<Self, String> {
let npm_bin = std::env::var("NYX_NPM_BIN").unwrap_or_else(|_| "npm".to_owned());
if !binary_runnable(&npm_bin, "--version") {
return Err(format!("node-pool: {npm_bin} not runnable"));
}
Ok(NodePool { npm_bin })
}
}
/// The Node flag that lets a TS harness skip a full `tsc` compile when the
/// spec does not need type checking. Surfaced as a free function so the
/// runner can splice it into the harness exec without holding a pool handle.
pub fn strip_types_flag() -> &'static str {
"--experimental-strip-types"
}
impl BuildPool for NodePool {
fn name(&self) -> &'static str {
"node"
}
/// Install dependencies declared by `workdir/package.json` into
/// `workdir/node_modules`. Args are unused.
fn compile_batch(&self, workdir: &Path, _args: &[String]) -> PoolCompileResult {
let start = Instant::now();
let mut cmd = base_command(&self.npm_bin);
cmd.args(["install", "--no-save", "--no-audit", "--no-fund"])
.current_dir(workdir);
if let Some(cache) = pool_cache_dir("node", "npm-cache") {
cmd.env("npm_config_cache", cache);
}
match cmd.output() {
Ok(o) if o.status.success() => PoolCompileResult {
success: true,
stderr: String::new(),
duration: start.elapsed(),
},
Ok(o) => PoolCompileResult {
success: false,
stderr: String::from_utf8_lossy(&o.stderr).into_owned(),
duration: start.elapsed(),
},
Err(e) => PoolCompileResult {
success: false,
stderr: format!("node-pool: npm install: {e}"),
duration: start.elapsed(),
},
}
}
fn is_healthy(&self) -> bool {
binary_runnable(&self.npm_bin, "--version")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strip_types_flag_is_the_node_native_ts_flag() {
assert_eq!(strip_types_flag(), "--experimental-strip-types");
}
}

View file

@ -0,0 +1,110 @@
//! PHP build pool (Phase 23 / Track O.1).
//!
//! Two warm caches keyed off the Composer lockfile:
//! - `COMPOSER_CACHE_DIR` points at the shared pool root so package downloads
//! are reused across lock hashes, and
//! - an opcache file-cache directory is pre-warmed so the harness `php`
//! process skips re-parsing the vendored sources on first run.
//!
//! Both degrade gracefully: a missing `composer` makes `try_new` fail and the
//! caller falls back to the legacy
//! [`crate::dynamic::build_sandbox::prepare_php`] path; a missing `php` simply
//! skips the opcache warm (the install still succeeds).
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
use std::path::Path;
use std::time::Instant;
pub struct PhpPool {
composer_bin: String,
}
impl PhpPool {
pub fn try_new() -> Result<Self, String> {
let composer_bin =
std::env::var("NYX_COMPOSER_BIN").unwrap_or_else(|_| "composer".to_owned());
if !binary_runnable(&composer_bin, "--version") {
return Err(format!("php-pool: {composer_bin} not runnable"));
}
Ok(PhpPool { composer_bin })
}
}
impl BuildPool for PhpPool {
fn name(&self) -> &'static str {
"php"
}
/// Install `composer.json` deps into `workdir/vendor` then warm the
/// shared opcache file-cache. Args are unused.
fn compile_batch(&self, workdir: &Path, _args: &[String]) -> PoolCompileResult {
let start = Instant::now();
let mut cmd = base_command(&self.composer_bin);
cmd.args(["install", "--no-interaction", "--no-dev", "--prefer-dist"])
.current_dir(workdir)
.env("COMPOSER_ALLOW_SUPERUSER", "1");
if let Some(cache) = pool_cache_dir("php", "composer-cache") {
cmd.env("COMPOSER_CACHE_DIR", cache);
}
match cmd.output() {
Ok(o) if o.status.success() => {}
Ok(o) => {
return PoolCompileResult {
success: false,
stderr: String::from_utf8_lossy(&o.stderr).into_owned(),
duration: start.elapsed(),
};
}
Err(e) => {
return PoolCompileResult {
success: false,
stderr: format!("php-pool: composer install: {e}"),
duration: start.elapsed(),
};
}
}
warm_opcache(workdir);
PoolCompileResult {
success: true,
stderr: String::new(),
duration: start.elapsed(),
}
}
fn is_healthy(&self) -> bool {
binary_runnable(&self.composer_bin, "--version")
}
}
/// Best-effort opcache file-cache pre-warm: compile every vendored `.php`
/// into the shared opcache file-cache so the harness `php` process boots with
/// the bytecode already on disk. A missing `php` or partial failure is
/// swallowed — the install already succeeded and opcache is a pure speed win.
fn warm_opcache(workdir: &Path) {
let vendor = workdir.join("vendor");
if !vendor.exists() {
return;
}
let php = std::env::var("NYX_PHP_BIN").unwrap_or_else(|_| "php".to_owned());
let file_cache = match pool_cache_dir("php", "opcache") {
Some(d) => d,
None => return,
};
let _ = base_command(&php)
.arg("-d")
.arg("opcache.enable_cli=1")
.arg("-d")
.arg(format!("opcache.file_cache={}", file_cache.display()))
.arg("-d")
.arg("opcache.file_cache_only=1")
.arg("-r")
.arg(
"foreach(new RecursiveIteratorIterator(new RecursiveDirectoryIterator('vendor')) \
as $f){ if(substr($f,-4)==='.php'){ @opcache_compile_file($f); } }",
)
.current_dir(workdir)
.output();
}

View file

@ -0,0 +1,122 @@
//! Python build pool (Phase 23 / Track O.1).
//!
//! `prepare_python` already keys its venv on the requirements hash, so the
//! venv itself is the "shared venv per `requirements_hash`". What the legacy
//! path lacks is a warm bytecode cache: the first harness to import a package
//! pays the `.py` -> `.pyc` compile.
//!
//! [`PythonPool`] runs `python -m compileall` over the venv's `site-packages`
//! once at venv-creation time so every later harness import is a `__pycache__`
//! hit. The pip download cache is pointed at the shared pool root so repeated
//! installs across requirements hashes reuse wheels.
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
use std::path::Path;
use std::time::Instant;
pub struct PythonPool;
impl PythonPool {
pub fn try_new(python_bin: &str) -> Result<Self, String> {
if !binary_runnable(python_bin, "--version") {
return Err(format!("python-pool: {python_bin} not runnable"));
}
Ok(PythonPool)
}
}
impl BuildPool for PythonPool {
fn name(&self) -> &'static str {
"python"
}
/// `args[0]` = venv path to create, `args[1]` = python interpreter binary.
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
let start = Instant::now();
let venv_path = match args.first() {
Some(v) => Path::new(v),
None => {
return PoolCompileResult {
success: false,
stderr: "python-pool: missing venv path arg".to_owned(),
duration: start.elapsed(),
};
}
};
let python = args.get(1).map(String::as_str).unwrap_or("python3");
// 1. Create the venv.
let create = base_command(python)
.args(["-m", "venv", "--clear", "--system-site-packages"])
.arg(venv_path)
.status();
match create {
Ok(s) if s.success() => {}
Ok(s) => {
return PoolCompileResult {
success: false,
stderr: format!("venv create failed: exit {s}"),
duration: start.elapsed(),
};
}
Err(e) => {
return PoolCompileResult {
success: false,
stderr: format!("python-pool: venv create: {e}"),
duration: start.elapsed(),
};
}
}
// 2. Install requirements with the shared wheel cache.
let req_path = workdir.join("requirements.txt");
if req_path.exists() {
let pip = venv_path.join("bin").join("pip");
let mut cmd = base_command(&pip.to_string_lossy());
cmd.args(["install", "-r"]).arg(&req_path);
if let Some(cache) = pool_cache_dir("python", "pip-cache") {
cmd.env("PIP_CACHE_DIR", cache);
} else {
cmd.arg("--no-cache-dir");
}
match cmd.output() {
Ok(o) if o.status.success() => {}
Ok(o) => {
return PoolCompileResult {
success: false,
stderr: String::from_utf8_lossy(&o.stderr).into_owned(),
duration: start.elapsed(),
};
}
Err(e) => {
return PoolCompileResult {
success: false,
stderr: format!("python-pool: pip install: {e}"),
duration: start.elapsed(),
};
}
}
}
// 3. Warm __pycache__ for the whole venv (best-effort: a partial
// failure to byte-compile one module must not fail the build).
let venv_python = venv_path.join("bin").join("python");
let _ = base_command(&venv_python.to_string_lossy())
.args(["-m", "compileall", "-q"])
.arg(venv_path)
.output();
PoolCompileResult {
success: true,
stderr: String::new(),
duration: start.elapsed(),
}
}
fn is_healthy(&self) -> bool {
// The interpreter is resolved per-request via args; treat the pool as
// always healthy and let an unrunnable interpreter surface as a build
// error, which the dispatcher already falls back from.
true
}
}

View file

@ -0,0 +1,120 @@
//! Ruby build pool (Phase 23 / Track O.1).
//!
//! `prepare_ruby` already vendors gems per `Gemfile.lock` hash. What it lacks
//! is a warm Bootsnap cache: the first harness to `require` a gem pays the
//! load-path scan + compile.
//!
//! [`RubyPool`] points `BOOTSNAP_CACHE_DIR` at the shared pool root and runs
//! `bundle install` with the shared gem cache. Bootsnap then persists its
//! compiled require-cache across findings. Falls back to the legacy path when
//! `bundle` is not runnable.
use super::{
BuildPool, PoolCompileResult, base_command, binary_runnable, combine_output, pool_cache_dir,
ruby_hermetic_env,
};
use std::path::Path;
use std::time::Instant;
pub struct RubyPool {
bundle_bin: String,
}
impl RubyPool {
pub fn try_new() -> Result<Self, String> {
let bundle_bin = std::env::var("NYX_BUNDLE_BIN").unwrap_or_else(|_| "bundle".to_owned());
if !binary_runnable(&bundle_bin, "--version") {
return Err(format!("ruby-pool: {bundle_bin} not runnable"));
}
Ok(RubyPool { bundle_bin })
}
fn bundle(&self, workdir: &Path) -> std::process::Command {
let mut cmd = base_command(&self.bundle_bin);
cmd.current_dir(workdir);
// Writable gem target → no privilege escalation → never `sudo`.
for (k, v) in ruby_hermetic_env(workdir) {
cmd.env(k, v);
}
if let Some(cache) = pool_cache_dir("ruby", "bootsnap") {
cmd.env("BOOTSNAP_CACHE_DIR", cache);
}
cmd
}
}
impl BuildPool for RubyPool {
fn name(&self) -> &'static str {
"ruby"
}
/// Resolve `Gemfile` deps into `workdir/vendor/bundle`. Args are unused.
fn compile_batch(&self, workdir: &Path, _args: &[String]) -> PoolCompileResult {
let start = Instant::now();
// `bundle check` short-circuits when the host already has every gem.
//
// Run the check with the *runtime* environment — plain system gems, no
// `GEM_HOME`/`BUNDLE_PATH` override. The harness is executed as
// `ruby harness.rb`, whose `require 'bundler/setup'` resolves against
// the system gem path, so the build-time check must consult that same
// path to predict whether the run will succeed. The hermetic
// `GEM_HOME` override (below) exists only to give `bundle install` a
// writable, sudo-free target for *missing* gems; applying it to the
// check breaks Bundler 1.x's ability to see an already-installed system
// gem (e.g. `rack`), turning a satisfiable Gemfile into a spurious
// BuildFailed.
let mut check = base_command(&self.bundle_bin);
check.current_dir(workdir);
if let Some(cache) = pool_cache_dir("ruby", "bootsnap") {
check.env("BOOTSNAP_CACHE_DIR", cache);
}
if let Ok(o) = check.arg("check").output()
&& o.status.success()
{
return PoolCompileResult {
success: true,
stderr: String::new(),
duration: start.elapsed(),
};
}
// The install target is pinned to a writable vendor dir via
// `ruby_hermetic_env` (GEM_HOME / BUNDLE_PATH), so the legacy
// `bundle config set --local path …` step is gone: it is 2.x-only
// syntax that no-ops on Bundler 1.x (leaving the target pointed at
// the root-owned system dir — the `sudo` root cause). `--local`
// keeps the build offline: missing gems fail fast with a
// host-limitation error instead of reaching for the network.
let install = self
.bundle(workdir)
.args(["install", "--local", "--jobs", "4", "--retry", "0"])
.output();
match install {
Ok(o) if o.status.success() => PoolCompileResult {
success: true,
stderr: String::new(),
duration: start.elapsed(),
},
Ok(o) => PoolCompileResult {
success: false,
// Bundler prints its dependency-resolution diagnostics
// ("Could not find gem '…' in any of the gem sources …") to
// STDOUT, leaving only the RubyGems extension warning on
// stderr. Combine both so the host-limitation classifier at
// the verify boundary can see the real reason.
stderr: combine_output(&o.stdout, &o.stderr),
duration: start.elapsed(),
},
Err(e) => PoolCompileResult {
success: false,
stderr: format!("ruby-pool: bundle install: {e}"),
duration: start.elapsed(),
},
}
}
fn is_healthy(&self) -> bool {
binary_runnable(&self.bundle_bin, "--version")
}
}

View file

@ -0,0 +1,369 @@
//! Rust build pool (Phase 23 / Track O.1).
//!
//! The legacy [`crate::dynamic::build_sandbox::prepare_rust`] runs a fresh
//! `cargo build --release` per finding with a per-workdir `target/`. Every
//! harness therefore recompiles the (identical) harness scaffold and all of
//! its dependencies from cold.
//!
//! [`RustPool`] keeps two warm caches keyed on the `Cargo.lock` hash:
//! - a shared `CARGO_TARGET_DIR` so incremental artefacts survive across
//! per-finding workdirs, and
//! - `sccache` as `RUSTC_WRAPPER` when it is on `PATH`, which caches the
//! per-crate `rustc` invocations across *different* lock hashes too.
//!
//! Both degrade gracefully: a missing `sccache` simply drops the wrapper and
//! a fresh lock hash gets a fresh (empty) shared target dir. The compile
//! itself is byte-for-byte the same `cargo build --release` the legacy path
//! runs, so success / failure parity holds.
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
use blake3::Hasher;
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant};
pub struct RustPool {
cargo_bin: String,
/// `Some(path)` when an `sccache` binary is runnable. Wired in as
/// `RUSTC_WRAPPER`; `None` falls back to plain `rustc`.
sccache_bin: Option<String>,
}
impl RustPool {
pub fn try_new() -> Result<Self, String> {
let cargo_bin = std::env::var("NYX_CARGO_BIN").unwrap_or_else(|_| "cargo".to_owned());
if !binary_runnable(&cargo_bin, "--version") {
return Err(format!("rust-pool: {cargo_bin} not runnable"));
}
let sccache_bin = detect_sccache();
Ok(RustPool {
cargo_bin,
sccache_bin,
})
}
}
fn detect_sccache() -> Option<String> {
let bin = std::env::var("NYX_SCCACHE_BIN").unwrap_or_else(|_| "sccache".to_owned());
binary_runnable(&bin, "--version").then_some(bin)
}
impl BuildPool for RustPool {
fn name(&self) -> &'static str {
"rust"
}
/// `args[0]` = absolute path the compiled `nyx_harness` binary must land
/// at (the caller's cache slot).
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
let start = Instant::now();
let dest = match args.first() {
Some(d) => Path::new(d),
None => {
return PoolCompileResult {
success: false,
stderr: "rust-pool: missing binary destination arg".to_owned(),
duration: start.elapsed(),
};
}
};
// Key the shared target dir on the manifest *and* every `src/` file,
// not the manifest alone. Two fixtures built for the same cap share a
// `Cargo.toml` (identical lock hash) but differ only in their source;
// a manifest-only key routed both into the same `release/nyx_harness`
// slot, letting cargo skip the second fixture's relink so the copy
// below shipped the *first* fixture's binary — cross-fixture verdict
// corruption (a vuln / benign pair confirming identically). Folding
// the source hash in gives each distinct harness its own target dir.
let build_hash = hash_build_inputs(workdir);
let target_dir = match pool_cache_dir("rust", &build_hash) {
Some(d) => d,
None => {
return PoolCompileResult {
success: false,
stderr: "rust-pool: no shared target dir".to_owned(),
duration: start.elapsed(),
};
}
};
// Serialise build + copy across processes for this shared target dir.
//
// The target dir is keyed only on the Cargo manifest hash, so every
// fixture that shares a `Cargo.toml` compiles the same bin name
// (`nyx_harness`) into the same `release/nyx_harness` path here.
// `cargo` already serialises the *build* across processes via its own
// target lock, but releases that lock the moment it exits — before the
// copy below moves `release/nyx_harness` to the caller's per-fixture
// cache slot. A second process's `cargo build` landing in that window
// overwrites `release/nyx_harness`, so we copy a *different* fixture's
// binary into our slot and poison its build cache (observed as
// cross-fixture verdict corruption under a parallel `cargo test`).
// Holding this lock across build+copy folds the copy into the existing
// serialised section, so it adds the copy's few milliseconds, not a
// new build barrier.
let _build_lock = TargetDirLock::acquire(&target_dir);
let mut cmd = base_command(&self.cargo_bin);
cmd.args(["build", "--release"])
.current_dir(workdir)
.env(
"CARGO_HOME",
std::env::var("CARGO_HOME").unwrap_or_else(|_| default_cargo_home()),
)
.env(
"RUSTUP_HOME",
std::env::var("RUSTUP_HOME").unwrap_or_default(),
)
.env("CARGO_TARGET_DIR", &target_dir);
if let Some(sccache) = &self.sccache_bin {
cmd.env("RUSTC_WRAPPER", sccache);
}
let output = match cmd.output() {
Ok(o) => o,
Err(e) => {
return PoolCompileResult {
success: false,
stderr: format!("rust-pool: cargo build: {e}"),
duration: start.elapsed(),
};
}
};
if !output.status.success() {
return PoolCompileResult {
success: false,
stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
duration: start.elapsed(),
};
}
let compiled = target_dir.join("release").join("nyx_harness");
if let Err(e) = std::fs::copy(&compiled, dest) {
return PoolCompileResult {
success: false,
stderr: format!(
"rust-pool: cargo build ok but copy {} -> {} failed: {e}",
compiled.display(),
dest.display(),
),
duration: start.elapsed(),
};
}
PoolCompileResult {
success: true,
stderr: String::new(),
duration: start.elapsed(),
}
}
fn is_healthy(&self) -> bool {
binary_runnable(&self.cargo_bin, "--version")
}
}
fn default_cargo_home() -> String {
std::env::var("HOME")
.map(|h| format!("{h}/.cargo"))
.unwrap_or_else(|_| ".cargo".to_owned())
}
/// Cross-process advisory lock guarding build+copy for a shared
/// `CARGO_TARGET_DIR` (see the call site in [`RustPool::compile_batch`]).
///
/// Implemented as an atomic `create_new` (O_EXCL) lockfile so it works across
/// the separate processes a parallel `cargo test` spawns — an in-process
/// `Mutex` would not. A lock older than `STALE_AFTER` is stolen so a crashed
/// holder cannot wedge the pool, and acquisition gives up after `MAX_WAIT`
/// (proceeding unlocked) so a pathological case degrades to the pre-fix
/// behaviour rather than deadlocking.
struct TargetDirLock {
path: PathBuf,
/// Only the process that created the lockfile removes it on drop, so a
/// give-up / steal path never deletes another holder's lock.
owned: bool,
}
impl TargetDirLock {
fn acquire(target_dir: &Path) -> Self {
const MAX_WAIT: Duration = Duration::from_secs(300);
const STALE_AFTER: Duration = Duration::from_secs(180);
let path = target_dir.join(".nyx-pool-build.lock");
let start = Instant::now();
let mut spins: u64 = 0;
loop {
match std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&path)
{
Ok(mut f) => {
use std::io::Write;
let _ = writeln!(f, "{}", std::process::id());
return Self { path, owned: true };
}
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
// Steal a stale lock left behind by a crashed holder.
if let Ok(meta) = std::fs::metadata(&path)
&& let Ok(mtime) = meta.modified()
&& mtime.elapsed().map(|d| d > STALE_AFTER).unwrap_or(false)
{
let _ = std::fs::remove_file(&path);
continue;
}
if start.elapsed() > MAX_WAIT {
// Best-effort: a slow build beats a deadlock.
return Self { path, owned: false };
}
let nap = 10u64.saturating_add(spins.min(40).saturating_mul(2));
std::thread::sleep(Duration::from_millis(nap));
spins = spins.saturating_add(1);
}
Err(_) => {
// Cannot create the lockfile (perms / race on dir) — proceed
// unlocked rather than fail the build outright.
return Self { path, owned: false };
}
}
}
}
}
impl Drop for TargetDirLock {
fn drop(&mut self) {
if self.owned {
let _ = std::fs::remove_file(&self.path);
}
}
}
/// Stable short hash of the named manifest files under `workdir`.
fn hash_files(workdir: &Path, files: &[&str]) -> String {
let mut h = Hasher::new();
for fname in files {
if let Ok(content) = std::fs::read(workdir.join(fname)) {
h.update(fname.as_bytes());
h.update(&content);
}
}
let out = h.finalize();
format!(
"{:016x}",
u64::from_le_bytes(out.as_bytes()[..8].try_into().unwrap())
)
}
/// Hash of every input that determines the compiled `nyx_harness` binary: the
/// Cargo manifest/lock *plus* every `.rs` file under `src/`. Used to key the
/// shared `CARGO_TARGET_DIR` so source-distinct harnesses never share a
/// `release/nyx_harness` slot (see the call site in [`RustPool::compile_batch`]
/// for why manifest-only keying corrupted cross-fixture verdicts). Mirrors
/// [`crate::dynamic::build_sandbox::compute_rust_lockfile_hash`].
fn hash_build_inputs(workdir: &Path) -> String {
let manifest = hash_files(workdir, &["Cargo.lock", "Cargo.toml"]);
let src_dir = workdir.join("src");
let mut rs_files: Vec<PathBuf> = Vec::new();
collect_rs_files(&src_dir, &src_dir, &mut rs_files);
rs_files.sort();
let mut h = Hasher::new();
for rel in &rs_files {
if let Ok(content) = std::fs::read(src_dir.join(rel)) {
h.update(rel.to_string_lossy().as_bytes());
h.update(b"\0");
h.update(&content);
}
}
let out = h.finalize();
format!(
"{manifest}-{:016x}",
u64::from_le_bytes(out.as_bytes()[..8].try_into().unwrap())
)
}
/// Recursively collect `.rs` file paths (relative to `root`) under `dir`.
fn collect_rs_files(root: &Path, dir: &Path, out: &mut Vec<PathBuf>) {
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
collect_rs_files(root, &path, out);
} else if path.extension().and_then(|e| e.to_str()) == Some("rs")
&& let Ok(rel) = path.strip_prefix(root)
{
out.push(rel.to_path_buf());
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hash_is_deterministic_and_content_sensitive() {
let dir = tempfile::TempDir::new().unwrap();
let h1 = hash_files(dir.path(), &["Cargo.lock"]);
let h2 = hash_files(dir.path(), &["Cargo.lock"]);
assert_eq!(h1, h2);
std::fs::write(dir.path().join("Cargo.lock"), b"[[package]]\n").unwrap();
let h3 = hash_files(dir.path(), &["Cargo.lock"]);
assert_ne!(h1, h3);
}
#[test]
fn build_hash_differs_for_same_manifest_distinct_source() {
// A vuln / benign pair built for the same cap ships an identical
// Cargo.toml but a different `src/entry.rs`. The shared target-dir key
// must differ between them, else cargo skips the second relink and the
// pool copies out the first fixture's binary (cross-fixture verdict
// corruption — the cmdi / data-exfil Rust regression).
let manifest = b"[package]\nname=\"nyx_harness\"\nversion=\"0.0.0\"\n";
let vuln = tempfile::TempDir::new().unwrap();
std::fs::create_dir_all(vuln.path().join("src")).unwrap();
std::fs::write(vuln.path().join("Cargo.toml"), manifest).unwrap();
std::fs::write(vuln.path().join("src/main.rs"), b"fn main(){}\n").unwrap();
std::fs::write(
vuln.path().join("src/entry.rs"),
b"pub fn run(){ /*vuln*/ }\n",
)
.unwrap();
let benign = tempfile::TempDir::new().unwrap();
std::fs::create_dir_all(benign.path().join("src")).unwrap();
std::fs::write(benign.path().join("Cargo.toml"), manifest).unwrap();
std::fs::write(benign.path().join("src/main.rs"), b"fn main(){}\n").unwrap();
std::fs::write(
benign.path().join("src/entry.rs"),
b"pub fn run(){ /*benign*/ }\n",
)
.unwrap();
// Identical manifests collide under the old manifest-only key …
assert_eq!(
hash_files(vuln.path(), &["Cargo.lock", "Cargo.toml"]),
hash_files(benign.path(), &["Cargo.lock", "Cargo.toml"]),
);
// … but the source-aware key separates them.
assert_ne!(
hash_build_inputs(vuln.path()),
hash_build_inputs(benign.path())
);
}
#[test]
fn missing_dest_arg_is_an_error_not_a_panic() {
let dir = tempfile::TempDir::new().unwrap();
// Construct without a toolchain probe so the test runs JDK/cargo-free.
let pool = RustPool {
cargo_bin: "cargo".to_owned(),
sccache_bin: None,
};
let r = pool.compile_batch(dir.path(), &[]);
assert!(!r.success);
assert!(r.stderr.contains("missing binary destination"));
}
}

2879
src/dynamic/build_sandbox.rs Normal file

File diff suppressed because it is too large Load diff

214
src/dynamic/corpus.rs Normal file
View file

@ -0,0 +1,214 @@
// Legacy [`Oracle::OutputContains`] is intentionally retained for
// pre-Phase-06 corpus entries until they migrate to
// [`Oracle::SinkProbe`]. The deprecation warning is informational, not a
// signal to migrate inside this module.
#![allow(deprecated)]
//! Per-capability payload corpus, keyed by `(Cap, Lang)`.
//!
//! Each `(Cap, Lang)` pair maps to a small set of canonical payloads plus a
//! matching detection oracle. Payloads are static data — adding a new one
//! is a code review, not a runtime config knob, so they cannot drift
//! between versions.
//!
//! Differential confirmation (§4.1): every non-benign payload either
//! references a paired benign control (resolved inside the same
//! `(cap, lang)` slice) or carries a written
//! [`CuratedPayload::no_benign_control_rationale`] explaining why no
//! control is meaningful. The [`audit`] module enforces this both at
//! compile time and via the runtime `corpus_registry::audit` test.
//!
//! # Module layout
//!
//! ```text
//! corpus.rs — types, public re-exports, module root
//! corpus/registry.rs — CapCorpus, CORPUS, payloads_for{,_lang}
//! corpus/audit.rs — compile-time + runtime audits
//! corpus/<cap>/<lang>.rs — per-(cap, lang) `pub const PAYLOADS`
//! ```
//!
//! Adding a new language for a cap means: drop a new file under
//! `corpus/<cap>/<lang>.rs`, register `pub mod <lang>;` in the cap's
//! `mod.rs`, and wire `(Cap::<CAP>, Lang::<Lang>, <cap>::<lang>::PAYLOADS)`
//! into `registry::ENTRIES`. No other file needs to change.
//!
//! # Corpus governance (§16.1)
//!
//! Every payload carries [`PayloadProvenance`], a [`CuratedPayload::since_corpus_version`],
//! and at least one [`CuratedPayload::fixture_paths`] entry. The [`CORPUS_VERSION`] const
//! tracks the history of incompatible corpus changes; bumping it
//! invalidates all `dynamic_verdict_cache` entries whose spec touched the
//! changed cap.
use crate::dynamic::oracle::ProbePredicate;
use crate::labels::Cap;
use crate::symbol::Lang;
pub mod audit;
pub mod registry;
mod cmdi;
mod crypto;
mod data_exfil;
mod deserialize;
mod fmt_string;
mod header_injection;
mod json_parse;
mod ldap;
mod open_redirect;
// `pub(crate)` so the Java emitter can read the FILE_IO canary filename /
// marker consts it must stage into the servlet harness workdir.
pub(crate) mod path_trav;
mod prototype_pollution;
mod sqli;
mod ssrf;
mod ssti;
mod unauthorized_id;
mod xpath;
mod xss;
mod xxe;
pub use registry::{
CORPUS, CORPUS_UNSUPPORTED_LANG_NEUTRAL, audit_marker_collisions, benign_payload_for,
benign_payload_for_lang, materialise_bytes, payloads_for, payloads_for_lang,
resolve_benign_control, resolve_benign_control_lang,
};
/// Re-exported canonical [`Oracle`] type.
///
/// The actual enum lives in [`crate::dynamic::oracle`] alongside
/// [`crate::dynamic::oracle::ProbePredicate`] and
/// [`crate::dynamic::oracle::oracle_fired`]. Re-exported here so the
/// `CuratedPayload.oracle: Oracle` field reads naturally and existing
/// `crate::dynamic::corpus::Oracle` callers keep working.
pub use crate::dynamic::oracle::Oracle;
/// Bump when the corpus content changes in a way that invalidates previously-
/// computed [`crate::dynamic::spec::HarnessSpec::spec_hash`] values.
///
/// # Bump history
///
/// | Version | Date | Change |
/// |---------|------------|-----------------------------------------------|
/// | 1 | 2025-11-01 | Initial corpus (SQLi, CMDI, PATH_TRAV, SSRF, XSS) |
/// | 2 | 2025-12-15 | SSRF OOB-variant added; oracle semantics tightened |
/// | 3 | 2026-05-12 | Migrated to `CuratedPayload`; provenance + fixture_paths enforced; SSRF OOB-nonce slot added |
/// | 4 | 2026-05-14 | Phase 07: `benign_control` paired refs + benign payloads added to SQLI / CMDI / SSRF (file-scheme) |
/// | 5 | 2026-05-16 | FMT_STRING SinkCrash payload + benign control (Phase 08 unrelated-crash acceptance fixture) |
/// | 6 | 2026-05-17 | Phase 02 / Track J.0: `(Cap, Lang)` registry refactor; `no_benign_control_rationale` field; compile-time provenance audit |
/// | 7 | 2026-05-17 | Phase 03 / Track J.1: `DESERIALIZE` cap lit for Java / Python / PHP / Ruby; `ProbeKind::Deserialize` + `ProbePredicate::DeserializeGadgetInvoked` |
/// | 8 | 2026-05-17 | Phase 04 / Track J.2: `SSTI` cap lit for Jinja2 / ERB / Twig / Thymeleaf / Handlebars; `ProbePredicate::TemplateEvalEqual` |
/// | 9 | 2026-05-17 | Phase 05 / Track J.3: `XXE` cap lit for Java / Python / PHP / Ruby / Go; `ProbeKind::Xxe` + `ProbePredicate::XxeEntityExpanded` |
/// | 10 | 2026-05-17 | Phase 06 / Track J.4: `LDAP_INJECTION` cap lit for Java / Python / PHP; `ProbeKind::Ldap` + `ProbePredicate::LdapResultCountGreaterThan`; `StubKind::Ldap` + in-sandbox LDAP server stub |
/// | 11 | 2026-05-17 | Phase 07 / Track J.5: `XPATH_INJECTION` cap lit for Java / Python / PHP / JS; `ProbeKind::Xpath`; `LdapResultCountGreaterThan` renamed to `QueryResultCountGreaterThan` (shared by LDAP + XPath); `xpath_corpus.xml` staged in workdir |
/// | 12 | 2026-05-18 | Phase 08 / Track J.6: `HEADER_INJECTION` cap lit for Java / Python / PHP / Ruby / JS / Go / Rust; `ProbeKind::HeaderEmit` + `ProbePredicate::HeaderInjected`; per-lang `setHeader` shims |
/// | 13 | 2026-05-18 | Phase 09 / Track J.7: `OPEN_REDIRECT` cap lit for Java / Python / PHP / Ruby / JS / Go / Rust; `ProbeKind::Redirect` + `ProbePredicate::RedirectHostNotIn`; per-lang `sendRedirect` / `redirect()` shims |
/// | 14 | 2026-05-18 | Phase 10 / Track J.8: `PROTOTYPE_POLLUTION` cap lit for JS / TS; `ProbeKind::PrototypePollution` + `ProbePredicate::PrototypeCanaryTouched`; Node harness installs `Proxy`-style canary trap on `Object.prototype.__nyx_canary` |
/// | 15 | 2026-05-18 | Phase 11 / Track J.9: `CRYPTO` (Java/Python/PHP/Go/Rust) + `JSON_PARSE` (JS/Python/Ruby) + `UNAUTHORIZED_ID` (7 langs) + `DATA_EXFIL` (7 langs); `ProbeKind::{WeakKey,IdorAccess,OutboundNetwork}` + `ProbePredicate::{WeakKeyEntropy,IdorBoundaryCrossed,OutboundHostNotIn}`; `UnsupportedReason::SoundOracleUnavailable` for caps with no sound oracle |
/// | 16 | 2026-06-01 | Collision-resistant `cmdi` (`CODE_EXEC`) marker: payload `; echo NYX_PWN_$((113*7))_CMDI`, oracle `OutputContains("NYX_PWN_791_CMDI")`. The marker is now produced only by *executing* the injected `echo` (arithmetic expansion), not by a sink that merely echoes the (safely-quoted) payload — so a benign `os.system("echo " + shlex.quote(x))` control no longer false-confirms. Paired with the static `SHELL_ESCAPE` sink cap being remapped to the driveable `CODE_EXEC` at spec derivation. |
/// | 17 | 2026-06-01 | Collision-resistant `path_traversal` (`FILE_IO`) Java payload for the entry-driven servlet harness: vuln `../nyx_pt_canary` reads a workdir-root canary the emitter plants; oracle `OutputContains(CANARY_MARKER)` where the marker is the canary's CONTENT (not a substring of the path payload), so a fixture that echoes the requested filename back cannot reproduce it — only an unsanitised read of the canary does. |
pub const CORPUS_VERSION: u32 = 17;
/// Where a payload originated.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PayloadProvenance {
/// Manually written and reviewed by the Nyx team.
Curated,
/// Produced by the internal mutation fuzzer (`fuzz/dynamic_corpus/`).
/// Still requires human promotion review (§16.4) before landing here.
InternalFuzzer,
/// Derived from a public CVE or external security report.
ExternalReport,
}
/// Reference from a vulnerable payload to its paired benign control.
///
/// Resolved at call time by scanning the same cap's payload slice for an
/// `is_benign == true` entry whose `label` matches. Stored as `&'static
/// str` (rather than a back-pointer to [`CuratedPayload`]) so the corpus
/// tables stay `const`-declarable.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct PayloadRef {
/// Label of the benign-control entry inside the same cap's payload set.
pub label: &'static str,
}
/// A single payload entry in the curated corpus.
///
/// Governs both static payload bytes (or an OOB-nonce template) and the
/// oracle used to confirm the vulnerability fired. All fields are
/// `'static` so the corpus can live in read-only memory.
#[derive(Debug, Clone)]
pub struct CuratedPayload {
/// Bytes injected into the [`crate::dynamic::spec::PayloadSlot`].
///
/// When [`Self::oob_nonce_slot`] is `true` this field is ignored; the
/// runner materialises the actual bytes from the OOB listener URL at
/// call time.
pub bytes: &'static [u8],
/// Human label for logs and reports.
pub label: &'static str,
/// How we decide the sink fired. See [`Oracle`].
pub oracle: Oracle,
/// If `true`, this is a benign control payload.
/// `Confirmed` requires the vuln payload to trigger AND the benign payload
/// NOT to trigger (differential confirmation, §4.1).
pub is_benign: bool,
/// Where this payload came from.
pub provenance: PayloadProvenance,
/// `CORPUS_VERSION` when this payload was added.
pub since_corpus_version: u32,
/// `CORPUS_VERSION` at which this payload was deprecated, if any.
pub deprecated_at_corpus_version: Option<u32>,
/// Source files that exercise this payload in the dynamic harness.
/// At least one entry required per §16.1.
pub fixture_paths: &'static [&'static str],
/// When `true`, the runner generates the actual bytes from the OOB
/// listener URL + per-finding nonce at execution time (SSRF OOB variant).
/// The `bytes` field is unused for such payloads.
pub oob_nonce_slot: bool,
/// Structured-oracle predicates evaluated against
/// [`crate::dynamic::probe::SinkProbe`] records drained from the run's
/// probe channel (Phase 06 — Track C.1). Always populated; empty when
/// the payload still relies on the legacy
/// [`Oracle::OutputContains`]
/// path and has not been migrated to
/// [`Oracle::SinkProbe`] yet.
pub probe_predicates: &'static [ProbePredicate],
/// Paired benign-control payload inside the same cap's slice.
///
/// `Some(PayloadRef)` on a vulnerable entry means the differential rule
/// (Phase 07, §4.1) compares this entry's oracle firing against the
/// referenced benign. `None` marks the entry as having no paired
/// control — the runner downgrades any would-be `Confirmed` to
/// [`crate::evidence::InconclusiveReason::NoBenignControl`].
/// Always `None` on benign entries themselves.
pub benign_control: Option<PayloadRef>,
/// Written rationale required when a non-benign payload has
/// `benign_control = None`. Compile-time audit
/// ([`audit::audit_benign_controls_runtime`]) rejects any entry that
/// elides the paired control without a non-empty explanation here.
/// Always `None` on entries that DO carry a `benign_control` and on
/// benign entries themselves.
pub no_benign_control_rationale: Option<&'static str>,
}
/// Backward-compatible type alias.
pub type Payload = CuratedPayload;
/// Read-only registry of `(Cap, Lang)` payload slices.
///
/// Constructed once as the [`registry::CORPUS`] const. Layered as
/// `&'static` slices so the entire registry can live in read-only memory
/// and so [`audit`] can walk it in const eval.
#[derive(Debug, Clone, Copy)]
pub struct CapCorpus {
/// `(Cap, Lang, payloads)` triples. A single cap may appear once per
/// supported language. See [`registry::payloads_for_lang`] for the
/// per-language lookup and [`registry::payloads_for`] for the
/// back-compatible union shim.
pub entries: &'static [(Cap, Lang, &'static [CuratedPayload])],
/// Per-cap probe predicates lifted off individual payloads. Reserved
/// for later Track J phases; empty in Phase 02.
pub oracles: &'static [(Cap, &'static [ProbePredicate])],
}

212
src/dynamic/corpus/audit.rs Normal file
View file

@ -0,0 +1,212 @@
//! Compile-time + runtime audits over the corpus registry.
//!
//! Two invariants enforced here fail the build (via `const _: () = assert!(...)`)
//! if they regress:
//!
//! 1. **`benign_control` resolves locally.** Every non-benign payload either
//! references a benign control whose `label` appears inside the same
//! `(cap, lang)` slice, *or* carries an explicit
//! [`CuratedPayload::no_benign_control_rationale`] with a non-empty
//! written rationale. Without this guard the differential rule
//! (§4.1) silently downgrades to `Inconclusive(NoBenignControl)`
//! whenever a maintainer forgets to wire a paired benign entry.
//!
//! 2. **Cap coverage is exhaustive.** The set of caps appearing in
//! [`CORPUS`]'s [`entries`](super::CapCorpus::entries) OR [`CORPUS_UNSUPPORTED_LANG_NEUTRAL`] must
//! equal [`Cap::all`]. Adding a new `Cap` bit without classifying it
//! fails the build.
//!
//! The runtime `corpus_registry::audit` test mirrors both checks so
//! failure surfaces in `cargo test` output, not just `cargo build`.
use super::CuratedPayload;
use super::registry::{CORPUS, CORPUS_UNSUPPORTED_LANG_NEUTRAL};
use crate::labels::Cap;
/// Byte-level equality for `&'static str` usable in const eval.
#[allow(dead_code)] // Called from const-eval audit helpers on MSRV/CI compilers.
const fn str_eq(a: &str, b: &str) -> bool {
let ab = a.as_bytes();
let bb = b.as_bytes();
if ab.len() != bb.len() {
return false;
}
let mut i = 0;
while i < ab.len() {
if ab[i] != bb[i] {
return false;
}
i += 1;
}
true
}
/// Walk every `(cap, lang)` slice; for each non-benign payload check that
/// either its `benign_control.label` resolves inside the same slice or it
/// carries a non-empty `no_benign_control_rationale`.
#[allow(dead_code)] // Called from a const assertion; MSRV lints may miss const-eval uses.
const fn audit_benign_controls() -> bool {
let entries = CORPUS.entries;
let mut e = 0;
while e < entries.len() {
let slice: &[CuratedPayload] = entries[e].2;
let mut i = 0;
while i < slice.len() {
let p = &slice[i];
if !p.is_benign {
match p.benign_control {
Some(r) => {
let mut j = 0;
let mut found = false;
while j < slice.len() {
if slice[j].is_benign && str_eq(slice[j].label, r.label) {
found = true;
break;
}
j += 1;
}
if !found {
return false;
}
}
None => match p.no_benign_control_rationale {
Some(rationale) => {
if rationale.is_empty() {
return false;
}
}
None => return false,
},
}
}
i += 1;
}
e += 1;
}
true
}
/// OR of cap bits appearing in `CORPUS.entries`.
const fn registered_cap_bits() -> u32 {
let entries = CORPUS.entries;
let mut bits = 0u32;
let mut i = 0;
while i < entries.len() {
bits |= entries[i].0.bits();
i += 1;
}
bits
}
/// Compile-time guards. Bumping or breaking these fails `cargo build`.
const _: () = assert!(
audit_benign_controls(),
"corpus audit: a non-benign payload references a `benign_control` whose \
label does not resolve inside its own (cap, lang) slice AND carries no \
`no_benign_control_rationale` see src/dynamic/corpus/audit.rs.",
);
const _: () = assert!(
registered_cap_bits() | CORPUS_UNSUPPORTED_LANG_NEUTRAL == Cap::all().bits(),
"corpus audit: union of (cap, lang) entries and \
`CORPUS_UNSUPPORTED_LANG_NEUTRAL` does not cover every `Cap` bit. \
Add the missing cap to either a `(cap, lang)` slice or the \
lang-neutral unsupported list.",
);
/// Runtime mirror of the compile-time benign-control audit.
pub fn audit_benign_controls_runtime() -> Result<(), String> {
for &(cap, lang, slice) in CORPUS.entries {
for p in slice {
if p.is_benign {
continue;
}
match p.benign_control {
Some(r) => {
let found = slice.iter().any(|q| q.is_benign && q.label == r.label);
if !found {
return Err(format!(
"({:?}, {:?}) vuln payload {:?} references missing \
benign_control label {:?}",
cap, lang, p.label, r.label,
));
}
}
None => match p.no_benign_control_rationale {
Some(rationale) if !rationale.is_empty() => {}
_ => {
return Err(format!(
"({:?}, {:?}) vuln payload {:?} has neither a \
benign_control nor a written \
no_benign_control_rationale",
cap, lang, p.label,
));
}
},
}
}
}
Ok(())
}
/// Runtime mirror of the compile-time cap-coverage audit.
pub fn audit_cap_coverage_runtime() -> Result<(), String> {
let covered = registered_cap_bits() | CORPUS_UNSUPPORTED_LANG_NEUTRAL;
if covered != Cap::all().bits() {
let missing = Cap::all().bits() & !covered;
return Err(format!(
"Cap bits {missing:#x} are neither registered in CORPUS.entries \
nor listed in CORPUS_UNSUPPORTED_LANG_NEUTRAL",
));
}
Ok(())
}
/// Track J.0 deferred audit: a non-benign payload's `benign_control.label`
/// must be unique *within its own `(cap, lang)` slice* — and a benign
/// payload's label may not collide with any other benign label inside the
/// same cap across lang slices, otherwise the lang-agnostic union shim
/// could resolve a vuln payload in language A against a benign payload
/// declared in language B (the latent §4.1 bug captured in the deferred
/// queue).
pub fn audit_benign_label_uniqueness_runtime() -> Result<(), String> {
use std::collections::HashMap;
let mut by_cap: HashMap<u32, HashMap<&'static str, crate::symbol::Lang>> = HashMap::new();
for &(cap, lang, slice) in CORPUS.entries {
let bucket = by_cap.entry(cap.bits()).or_default();
for p in slice {
if !p.is_benign {
continue;
}
if let Some(prev_lang) = bucket.insert(p.label, lang)
&& prev_lang != lang
{
return Err(format!(
"benign label {:?} for cap {:#x} is registered in both \
{:?} and {:?} lang-agnostic resolve_benign_control \
could match the wrong language",
p.label,
cap.bits(),
prev_lang,
lang,
));
}
}
}
Ok(())
}
#[cfg(test)]
mod corpus_registry {
use super::*;
/// Plan §02 acceptance: `cargo test corpus_registry::audit` must pass.
/// The test name and module name jointly form the required path.
#[test]
fn audit() {
audit_benign_controls_runtime().expect("benign_control audit failed");
audit_cap_coverage_runtime().expect("cap coverage audit failed");
audit_benign_label_uniqueness_runtime().expect("benign label uniqueness audit failed");
}
}

View file

@ -0,0 +1,46 @@
//! C `Cap::CODE_EXEC` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
label: "cmdi-echo-marker-c",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/c/cmdi/cmdi_exec.c",
"tests/benchmark/corpus/c/cmdi/cmdi_fgets.c",
"tests/benchmark/corpus/c/cmdi/cmdi_popen.c",
"tests/benchmark/corpus/c/cmdi/cmdi_system.c",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "cmdi-benign-c",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"benign_safe_cmdi_NYX_BENIGN",
label: "cmdi-benign-c",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/c/cmdi/cmdi_exec.c",
"tests/benchmark/corpus/c/cmdi/cmdi_fgets.c",
"tests/benchmark/corpus/c/cmdi/cmdi_popen.c",
"tests/benchmark/corpus/c/cmdi/cmdi_system.c",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,52 @@
//! C++ `Cap::CODE_EXEC` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
label: "cmdi-echo-marker-cpp",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/cpp/cmdi/cmdi_class_inline_method.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_exec.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_getline.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_lambda_passthrough.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_popen.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_stl_vector_string.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_system.cpp",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "cmdi-benign-cpp",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"benign_safe_cmdi_NYX_BENIGN",
label: "cmdi-benign-cpp",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/cpp/cmdi/cmdi_class_inline_method.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_exec.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_getline.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_lambda_passthrough.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_popen.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_stl_vector_string.cpp",
"tests/benchmark/corpus/cpp/cmdi/cmdi_system.cpp",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,46 @@
//! Go `Cap::CODE_EXEC` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
label: "cmdi-echo-marker-go",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/go/cmdi/cmdi_direct.go",
"tests/benchmark/corpus/go/cmdi/cmdi_indirect.go",
"tests/benchmark/corpus/go/cmdi/cmdi_unvalidated_queue_element.go",
"tests/benchmark/corpus/go/cmdi/vuln_error_log_then_sink.go",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "cmdi-benign-go",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"benign_safe_cmdi_NYX_BENIGN",
label: "cmdi-benign-go",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/go/cmdi/cmdi_direct.go",
"tests/benchmark/corpus/go/cmdi/cmdi_indirect.go",
"tests/benchmark/corpus/go/cmdi/cmdi_unvalidated_queue_element.go",
"tests/benchmark/corpus/go/cmdi/vuln_error_log_then_sink.go",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,42 @@
//! Java `Cap::CODE_EXEC` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
label: "cmdi-echo-marker-java",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/java/cmdi/CmdiDirect.java",
"tests/benchmark/corpus/java/cmdi/CmdiIndirect.java",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "cmdi-benign-java",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"benign_safe_cmdi_NYX_BENIGN",
label: "cmdi-benign-java",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/java/cmdi/CmdiDirect.java",
"tests/benchmark/corpus/java/cmdi/CmdiIndirect.java",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,42 @@
//! JavaScript `Cap::CODE_EXEC` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
label: "cmdi-echo-marker-javascript",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/javascript/cmdi/cmdi_direct.js",
"tests/benchmark/corpus/javascript/cmdi/cmdi_indirect.js",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "cmdi-benign-javascript",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"benign_safe_cmdi_NYX_BENIGN",
label: "cmdi-benign-javascript",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/javascript/cmdi/cmdi_direct.js",
"tests/benchmark/corpus/javascript/cmdi/cmdi_indirect.js",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,12 @@
//! Command-injection (`Cap::CODE_EXEC`) per-language payload slices.
pub mod c;
pub mod cpp;
pub mod go;
pub mod java;
pub mod javascript;
pub mod php;
pub mod python;
pub mod ruby;
pub mod rust;
pub mod typescript;

View file

@ -0,0 +1,42 @@
//! PHP `Cap::CODE_EXEC` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
label: "cmdi-echo-marker-php",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/php/cmdi/cmdi_direct.php",
"tests/benchmark/corpus/php/cmdi/cmdi_indirect.php",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "cmdi-benign-php",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"benign_safe_cmdi_NYX_BENIGN",
label: "cmdi-benign-php",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/php/cmdi/cmdi_direct.php",
"tests/benchmark/corpus/php/cmdi/cmdi_indirect.php",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,48 @@
//! Python `Cap::CODE_EXEC` payloads.
//!
//! Same shell-syntax bytes as [`super::rust::PAYLOADS`]; the per-language
//! slice exists so the lookup is a per-language assertion rather than a
//! cross-language fallback through [`super::super::registry::payloads_for`].
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
label: "cmdi-echo-marker-python",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/python/cmdi/cmdi_direct.py",
"tests/benchmark/corpus/python/cmdi/cmdi_indirect.py",
"tests/benchmark/corpus/python/cmdi/cmdi_popen_shell.py",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "cmdi-benign-python",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"benign_safe_cmdi_NYX_BENIGN",
label: "cmdi-benign-python",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/python/cmdi/cmdi_direct.py",
"tests/benchmark/corpus/python/cmdi/cmdi_indirect.py",
"tests/benchmark/corpus/python/cmdi/cmdi_popen_shell.py",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,44 @@
//! Ruby `Cap::CODE_EXEC` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
label: "cmdi-echo-marker-ruby",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/ruby/cmdi/cmdi_backtick.rb",
"tests/benchmark/corpus/ruby/cmdi/cmdi_kernel_open.rb",
"tests/benchmark/corpus/ruby/cmdi/cmdi_system.rb",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "cmdi-benign-ruby",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"benign_safe_cmdi_NYX_BENIGN",
label: "cmdi-benign-ruby",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/ruby/cmdi/cmdi_backtick.rb",
"tests/benchmark/corpus/ruby/cmdi/cmdi_kernel_open.rb",
"tests/benchmark/corpus/ruby/cmdi/cmdi_system.rb",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,48 @@
//! Command-injection payloads exercised by Rust fixtures
//! (`tests/benchmark/corpus/rust/cmdi/`).
//!
//! Bytes are shell-syntax, not Rust-specific; Track J phases 0311 add
//! per-language slices (Python `os.system`, PHP `exec`, …) as new fixtures
//! land.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
label: "cmdi-echo-marker",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 1,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/rust/cmdi/cmdi_command.rs",
"tests/benchmark/corpus/rust/cmdi/cmdi_args.rs",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "cmdi-benign",
}),
no_benign_control_rationale: None,
},
// Benign control: plain text that should never produce the cmdi marker.
CuratedPayload {
bytes: b"benign_safe_cmdi_NYX_BENIGN",
label: "cmdi-benign",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 4,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/rust/cmdi/cmdi_command.rs",
"tests/benchmark/corpus/rust/cmdi/cmdi_args.rs",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,42 @@
//! TypeScript `Cap::CODE_EXEC` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
label: "cmdi-echo-marker-typescript",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/typescript/cmdi/cmdi_async_wrapper.ts",
"tests/benchmark/corpus/typescript/cmdi/cmdi_exec_template.ts",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "cmdi-benign-typescript",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"benign_safe_cmdi_NYX_BENIGN",
label: "cmdi-benign-typescript",
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/benchmark/corpus/typescript/cmdi/cmdi_async_wrapper.ts",
"tests/benchmark/corpus/typescript/cmdi/cmdi_exec_template.ts",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,50 @@
//! Go `Cap::CRYPTO` payloads — `math/rand.Intn` weak-key
//! generation.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const WEAK_BITS: u32 = 16;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_CRYPTO_WEAK",
label: "crypto-go-weak-random",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/crypto/go/vuln.go"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
benign_control: Some(PayloadRef {
label: "crypto-go-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_CRYPTO_STRONG",
label: "crypto-go-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/crypto/go/benign.go"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,61 @@
//! Java `Cap::CRYPTO` payloads — `java.util.Random.nextBytes`
//! weak-key generation.
//!
//! Vuln payload: marker bytes that signal the harness to drive its
//! `java.util.Random` key-generation path. The harness emits a key
//! bounded inside a 16-bit search space and writes a
//! [`crate::dynamic::probe::ProbeKind::WeakKey`] probe — the
//! [`crate::dynamic::oracle::ProbePredicate::WeakKeyEntropy`]
//! predicate fires for `key_int < 2^16`.
//!
//! Benign control: marker bytes that route the harness through
//! `java.security.SecureRandom`, producing a 256-bit key whose
//! integer view trivially exceeds the budget.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const WEAK_BITS: u32 = 16;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_CRYPTO_WEAK",
label: "crypto-java-weak-random",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/crypto/java/vuln.java"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
benign_control: Some(PayloadRef {
label: "crypto-java-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_CRYPTO_STRONG",
label: "crypto-java-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/crypto/java/benign.java"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,26 @@
//! Weak-crypto (`Cap::CRYPTO`) per-language payload slices.
//!
//! Phase 11 (Track J.9) carves a weak-key entropy oracle across the
//! five backend languages where homegrown key generation is common
//! enough to matter: Java (`java.util.Random.nextBytes` → key bytes),
//! Python (`random.randint(0, 0xFFFF)`), PHP (`mt_rand(0, 0xFFFF)`),
//! Go (`math/rand.Intn(0x10000)`), Rust (`rand::thread_rng` truncated
//! to 16 bits). Every vuln payload triggers the harness's
//! instrumented key-generation path with a seed that produces an
//! attacker-derivable key bounded inside the 16-bit search space.
//! The harness shim writes a
//! [`crate::dynamic::probe::ProbeKind::WeakKey { key_int }`] probe
//! with the produced integer view of the key bytes; the
//! [`crate::dynamic::oracle::ProbePredicate::WeakKeyEntropy`]
//! predicate fires when `key_int < 2^max_bits` (`max_bits = 16` by
//! default). The paired benign control routes the same harness
//! through a CSPRNG (`SecureRandom`, `secrets.token_bytes`,
//! `random_bytes(32)`, `crypto/rand.Read`, `rand::rngs::OsRng`) so
//! the produced `key_int` trivially exceeds the budget and the
//! predicate stays clear.
pub mod go;
pub mod java;
pub mod php;
pub mod python;
pub mod rust;

View file

@ -0,0 +1,49 @@
//! PHP `Cap::CRYPTO` payloads — `mt_rand` weak-key generation.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const WEAK_BITS: u32 = 16;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_CRYPTO_WEAK",
label: "crypto-php-weak-random",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/crypto/php/vuln.php"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
benign_control: Some(PayloadRef {
label: "crypto-php-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_CRYPTO_STRONG",
label: "crypto-php-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/crypto/php/benign.php"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,59 @@
//! Python `Cap::CRYPTO` payloads — `random.randint` weak-key
//! generation.
//!
//! Vuln payload: marker bytes that route the harness through
//! `random.randint(0, 0xFFFF)`; the harness emits a
//! [`crate::dynamic::probe::ProbeKind::WeakKey`] probe and the
//! [`crate::dynamic::oracle::ProbePredicate::WeakKeyEntropy`]
//! predicate fires.
//!
//! Benign control: marker bytes that route the harness through
//! `secrets.token_bytes(32)`.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const WEAK_BITS: u32 = 16;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_CRYPTO_WEAK",
label: "crypto-python-weak-random",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/crypto/python/vuln.py"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
benign_control: Some(PayloadRef {
label: "crypto-python-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_CRYPTO_STRONG",
label: "crypto-python-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/crypto/python/benign.py"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,50 @@
//! Rust `Cap::CRYPTO` payloads — `rand::thread_rng` weak-key
//! generation truncated to 16 bits.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const WEAK_BITS: u32 = 16;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_CRYPTO_WEAK",
label: "crypto-rust-weak-random",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/crypto/rust/vuln.rs"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
benign_control: Some(PayloadRef {
label: "crypto-rust-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_CRYPTO_STRONG",
label: "crypto-rust-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::WeakKeyEntropy {
max_bits: WEAK_BITS,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/crypto/rust/benign.rs"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,49 @@
//! go `Cap::DATA_EXFIL` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"attacker.test",
label: "data-exfil-go-external",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/go/vuln.go"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
benign_control: Some(PayloadRef {
label: "data-exfil-go-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"127.0.0.1",
label: "data-exfil-go-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/go/benign.go"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,49 @@
//! java `Cap::DATA_EXFIL` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"attacker.test",
label: "data-exfil-java-external",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/java/Vuln.java"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
benign_control: Some(PayloadRef {
label: "data-exfil-java-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"127.0.0.1",
label: "data-exfil-java-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/java/Benign.java"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,49 @@
//! js `Cap::DATA_EXFIL` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"attacker.test",
label: "data-exfil-js-external",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/js/vuln.js"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
benign_control: Some(PayloadRef {
label: "data-exfil-js-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"127.0.0.1",
label: "data-exfil-js-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/js/benign.js"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,22 @@
//! Data-exfiltration (`Cap::DATA_EXFIL`) per-language payload
//! slices.
//!
//! Phase 11 (Track J.9) carves an outbound-network oracle across
//! all seven backend-capable languages. Each harness stands up a
//! mock HTTP client that records the destination host of every
//! outbound request via a
//! [`crate::dynamic::probe::ProbeKind::OutboundNetwork { host }`]
//! probe. The
//! [`crate::dynamic::oracle::ProbePredicate::OutboundHostNotIn`]
//! predicate fires when the captured `host` falls outside the
//! configured loopback allowlist (`&["127.0.0.1", "localhost"]`).
//! The vuln payload supplies `attacker.test`; the paired benign
//! control supplies `127.0.0.1` so the predicate stays clear.
pub mod go;
pub mod java;
pub mod js;
pub mod php;
pub mod python;
pub mod ruby;
pub mod rust;

View file

@ -0,0 +1,49 @@
//! php `Cap::DATA_EXFIL` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"attacker.test",
label: "data-exfil-php-external",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/php/vuln.php"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
benign_control: Some(PayloadRef {
label: "data-exfil-php-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"127.0.0.1",
label: "data-exfil-php-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/php/benign.php"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,49 @@
//! python `Cap::DATA_EXFIL` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"attacker.test",
label: "data-exfil-python-external",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/python/vuln.py"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
benign_control: Some(PayloadRef {
label: "data-exfil-python-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"127.0.0.1",
label: "data-exfil-python-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/python/benign.py"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,49 @@
//! ruby `Cap::DATA_EXFIL` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"attacker.test",
label: "data-exfil-ruby-external",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/ruby/vuln.rb"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
benign_control: Some(PayloadRef {
label: "data-exfil-ruby-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"127.0.0.1",
label: "data-exfil-ruby-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/ruby/benign.rb"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,49 @@
//! rust `Cap::DATA_EXFIL` payloads.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"attacker.test",
label: "data-exfil-rust-external",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/rust/vuln.rs"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
benign_control: Some(PayloadRef {
label: "data-exfil-rust-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"127.0.0.1",
label: "data-exfil-rust-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::OutboundHostNotIn {
allowlist: ALLOWLIST,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/data_exfil/rust/benign.rs"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,62 @@
//! Java `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: a base64-encoded `java.io.ObjectInputStream` byte stream
//! that materialises a gadget class outside the harness's allowlist.
//! The harness's `RestrictedObjectInputStream.resolveClass` intercepts
//! the lookup and emits a `ProbeKind::Deserialize { gadget_chain_invoked
//! = true }` probe before aborting the chain.
//!
//! Benign control: a base64-encoded `ObjectInputStream` byte stream of a
//! single allow-listed `java.lang.Integer`. The class lives inside the
//! resolveClass allowlist so no Deserialize probe is emitted.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
// Marker class name embedded in the serialized stream — the
// harness allowlist contains `java.lang.Integer` and `java.lang.String`
// only. The byte form is a small literal so const-eval can keep it.
bytes: b"NYX_GADGET_CLASS:org.nyx.deserialize.Gadget",
label: "java-deserialize-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/deserialize/java/Vuln.java"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "java-deserialize-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
// Allow-listed payload — the marker carries `java.lang.Integer`,
// which the harness resolveClass accepts without writing a probe.
bytes: b"NYX_GADGET_CLASS:java.lang.Integer",
label: "java-deserialize-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/deserialize/java/Benign.java"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,17 @@
//! Deserialization (`Cap::DESERIALIZE`) per-language payload slices.
//!
//! Phase 03 (Track J.1) lands the first cap end-to-end: Java
//! (`ObjectInputStream.readObject` / `XMLDecoder`), Python (`pickle.loads`
//! / `yaml.unsafe_load`), PHP (`unserialize`), and Ruby (`Marshal.load`
//! / `YAML.load`). Every vuln payload is paired with a benign control
//! whose oracle should *not* fire — the per-language harness shims
//! emit a [`crate::dynamic::probe::ProbeKind::Deserialize`] record with
//! `gadget_chain_invoked: true` when a non-allowlisted gadget class is
//! materialised by the instrumented deserialiser; benign well-formed
//! serialized data does not reach the allowlist boundary and so leaves
//! no Deserialize probe.
pub mod java;
pub mod php;
pub mod python;
pub mod ruby;

View file

@ -0,0 +1,60 @@
//! PHP `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: marker string handed to `unserialize($input)` where the
//! harness wraps the call with `['allowed_classes' => false]` and an
//! observer on `__wakeup`. When `unserialize` materialises a
//! `__PHP_Incomplete_Class` from a non-allowlisted class name, the
//! observer emits a `ProbeKind::Deserialize { gadget_chain_invoked:
//! true }` probe.
//!
//! Benign control: serialised primitive (an `int`) that
//! `unserialize` materialises without engaging the allowlist boundary.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:PHP_Object_Injection_RCE",
label: "php-unserialize-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/deserialize/php/vuln.php"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "php-unserialize-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
// Allow-listed marker — the harness allowlist accepts
// `__primitive_int` as a no-op type representing a serialised
// integer literal.
bytes: b"NYX_GADGET_CLASS:__primitive_int",
label: "php-unserialize-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/deserialize/php/benign.php"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,56 @@
//! Python `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: marker string consumed by the harness shim which calls
//! `pickle.Unpickler(...).load()` with `find_class` overridden to record
//! a `ProbeKind::Deserialize { gadget_chain_invoked: true }` whenever a
//! non-allowlisted class is requested. The harness allowlists
//! `builtins.list` / `builtins.dict` / `builtins.int`; the marker class
//! `nyx.gadget.RCE` is outside that set.
//!
//! Benign control: payload requests only allow-listed builtins.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:nyx.gadget.RCE",
label: "python-pickle-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/deserialize/python/vuln.py"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "python-pickle-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:builtins.list",
label: "python-pickle-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/deserialize/python/benign.py"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,57 @@
//! Ruby `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: marker string consumed by the harness shim which calls
//! `Marshal.load(input)` with `Marshal.const_defined?`-style
//! instrumentation that records a `ProbeKind::Deserialize {
//! gadget_chain_invoked: true }` probe whenever a non-allowlisted
//! constant is materialised. The harness allowlist contains `Integer`
//! / `String` / `Array`.
//!
//! Benign control: marker requests only the allow-listed `Integer`
//! constant.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:Nyx::Gadget::RCE",
label: "ruby-marshal-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/deserialize/ruby/vuln.rb"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "ruby-marshal-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:Integer",
label: "ruby-marshal-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/deserialize/ruby/benign.rb"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,56 @@
//! Format-string (`Cap::FMT_STRING`) payloads exercised by C fixtures
//! (`tests/dynamic_fixtures/c/free_fn/`).
//!
//! The vuln payload confirms via a sink-site Crash probe rather than an
//! output marker. The bytes themselves are not load-bearing — the
//! detection contract is "process aborts inside the entry call AFTER the
//! crash-guard installs". Fixtures choose how to crash on the payload;
//! the canonical example is a `printf`-family sink that interprets `%n`
//! against a controlled destination pointer on a guard-compiled binary.
//!
//! The benign control must reach the same entry without crashing; its
//! bytes carry `NYX_BENIGN` so fixture code can short-circuit before
//! the fault path.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::SignalSet;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"%n%n%n%n%n%n%n%n%n%n",
label: "fmt-string-percent-n-crash",
oracle: Oracle::SinkCrash {
signals: SignalSet::all(),
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 5,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/c/free_fn/sink_fault.c",
"tests/dynamic_fixtures/c/free_fn/setup_fault.c",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: Some(PayloadRef {
label: "fmt-string-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"benign_safe_fmt_NYX_BENIGN",
label: "fmt-string-benign",
oracle: Oracle::SinkCrash {
signals: SignalSet::all(),
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 5,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/c/free_fn/sink_fault.c"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,3 @@
//! Format-string (`Cap::FMT_STRING`) per-language payload slices.
pub mod c;

View file

@ -0,0 +1,56 @@
//! Go `Cap::HEADER_INJECTION` payloads —
//! `http.ResponseWriter.Header().Set` CRLF injection.
//!
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
//! nyx-injected=pwn`. Spliced into the host's `w.Header().Set("Set-
//! Cookie", value)` call without CRLF stripping.
//!
//! Benign control: same logical cookie value pre-encoded with
//! `net/url.QueryEscape`. Captured value carries `%0D%0A` so the
//! predicate stays clear.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-go-crlf",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/go/vuln.go"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-go-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-go-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/go/benign.go"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,122 @@
//! Java `Cap::HEADER_INJECTION` payloads —
//! `HttpServletResponse.setHeader` CRLF injection.
//!
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
//! nyx-injected=pwn`. Concatenated into the host's
//! `response.setHeader("Set-Cookie", value)` call without CRLF
//! stripping, the wire response carries the attacker's second
//! header. The harness's instrumented `setHeader` records a
//! `ProbeKind::HeaderEmit { name: "Set-Cookie", value: <raw bytes> }`
//! probe with the unescaped CRLF intact.
//!
//! Benign control: same logical session-id, but the harness's
//! benign code path runs the value through `URLEncoder.encode(...,
//! "UTF-8")` so the carried bytes become
//! `nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn`. The
//! captured value has no literal `\r\n`; the
//! [`ProbePredicate::HeaderInjected`] predicate stays clear.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-java-crlf",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/java/Vuln.java"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-java-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-java-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/java/Benign.java"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
// Same CRLF-bearing bytes as the servlet payload above, but pinned
// to the `java_raw` fixture (a `java.net.ServerSocket` driven by
// `createServer` + `runOnce` that writes raw bytes via
// `OutputStream.write(byte[])`). The wire frame captured off the
// response socket carries two distinct `Set-Cookie:` lines, so
// `HeaderSmuggledInWire { primary: "Set-Cookie", smuggled:
// "Set-Cookie" }` fires — proving the smuggled header survived
// to the actual wire instead of being CRLF-stripped en route by
// Tomcat / Jetty / Undertow.
//
// Distinct payload (not just an extra predicate on the servlet
// row) because every modern Java servlet container response
// serializer strips CRLF at the wire-write boundary, so the
// wire-frame predicate would never fire against the canonical
// servlet fixture.
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-java-raw-wire-smuggle",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/java_raw/Vuln.java"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-java-raw-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-java-raw-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/java_raw/Vuln.java"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,114 @@
//! JavaScript `Cap::HEADER_INJECTION` payloads —
//! `http.ServerResponse#setHeader` CRLF injection.
//!
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
//! nyx-injected=pwn`. Spliced into the host's
//! `res.setHeader('Set-Cookie', value)` call without CRLF stripping.
//!
//! Benign control: same logical cookie value pre-encoded with
//! `encodeURIComponent`. Captured value carries `%0D%0A` so the
//! predicate stays clear.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-js-crlf",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/js/vuln.js"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-js-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-js-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/js/benign.js"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
// Same CRLF-bearing bytes as the Node payload above, but pinned to
// the `js_raw` fixture (a `net.createServer` callback writing raw
// bytes via `socket.write`). The wire frame captured off the
// response socket carries two distinct `Set-Cookie:` lines, so
// `HeaderSmuggledInWire { primary: "Set-Cookie", smuggled:
// "Set-Cookie" }` fires — proving the smuggled header survived to
// the actual wire instead of being CRLF-stripped en route.
//
// Distinct payload (not just an extra predicate on the Node row)
// because Node's `http.ServerResponse#setHeader` validator strips
// CRLF at the wire-write boundary, so the wire-frame predicate
// would never fire against the canonical Node fixture. See
// `.pitboss/play/deferred.md` (Phase 08 wire-frame option A) for
// the framework-level CRLF-strip empirical from session-0018.
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-js-raw-wire-smuggle",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/js_raw/vuln.js"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-js-raw-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-js-raw-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/js_raw/vuln.js"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,31 @@
//! HTTP response-header CRLF injection (`Cap::HEADER_INJECTION`)
//! per-language payload slices.
//!
//! Phase 08 (Track J.6) carves header injection across the seven HTTP
//! framework ecosystems Nyx supports: Java (`HttpServletResponse.
//! setHeader`), Python (`flask.Response.headers.__setitem__`), PHP
//! (`header()`), Ruby (`Rack::Response#set_header`), JavaScript
//! (`http.ServerResponse#setHeader`), Go (`http.ResponseWriter.
//! Header().Set`), Rust (`axum`-style `HeaderMap::insert`). Every
//! vuln payload appends a `\r\n` followed by an injected header line
//! (`Set-Cookie: nyx-injected=pwn`) — once the host code splices the
//! attacker bytes into the response writer's value argument the wire
//! actually carries two headers instead of one. The paired benign
//! control passes the same logical value through the per-language URL
//! encoder so the captured value carries `%0d%0a` (not the raw
//! bytes), the encoded text is preserved verbatim inside a single
//! header value, and the differential rule stays clear.
//!
//! The oracle's
//! [`crate::dynamic::oracle::ProbePredicate::HeaderInjected`] reads
//! the per-payload `ProbeKind::HeaderEmit { name, value }` records
//! and fires when the value contains a literal CRLF byte pair —
//! vuln passes, benign clears, fulfilling the §4.1 differential rule.
pub mod go;
pub mod java;
pub mod js;
pub mod php;
pub mod python;
pub mod ruby;
pub mod rust;

View file

@ -0,0 +1,117 @@
//! PHP `Cap::HEADER_INJECTION` payloads — `header()` CRLF injection.
//!
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
//! nyx-injected=pwn`. Concatenated into the host's `header("Set-
//! Cookie: " . $value)` call without CRLF stripping, the wire response
//! carries the attacker's second header. The harness's instrumented
//! `header()` records a `ProbeKind::HeaderEmit` probe with the
//! unescaped CRLF intact.
//!
//! Benign control: same logical cookie value pre-encoded with PHP's
//! `urlencode`. Captured value carries `%0D%0A` so the predicate
//! stays clear.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-php-crlf",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/php/vuln.php"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-php-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-php-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/php/benign.php"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
// Same CRLF-bearing bytes as the `header()` payload above, but
// pinned to the `php_raw` fixture (a `stream_socket_server` driven
// by `create_server` + `run_once` that writes raw bytes via
// `fwrite($conn, $raw)`). The wire frame captured off the
// response socket carries two distinct `Set-Cookie:` lines, so
// `HeaderSmuggledInWire { primary: "Set-Cookie", smuggled:
// "Set-Cookie" }` fires — proving the smuggled header survived to
// the actual wire instead of being CRLF-stripped en route.
//
// Distinct payload (not just an extra predicate on the `header()`
// row) because PHP's built-in `header()` rejects raw CRLF since
// 5.1.2 and modern Slim / Laravel / Symfony response serializers
// strip CRLF at the wire-write boundary, so the wire-frame
// predicate would never fire against the canonical `header()`
// fixture.
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-php-raw-wire-smuggle",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/php_raw/vuln.php"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-php-raw-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-php-raw-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/php_raw/vuln.php"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,120 @@
//! Python `Cap::HEADER_INJECTION` payloads —
//! `flask.Response.headers.__setitem__` CRLF injection.
//!
//! Vuln payload: a session cookie value followed by `\r\nSet-Cookie:
//! nyx-injected=pwn`. Spliced into the host's
//! `response.headers["Set-Cookie"] = value` assignment without CRLF
//! stripping, the WSGI layer carries the attacker's second header on
//! the wire. The harness's instrumented response writer records a
//! `ProbeKind::HeaderEmit { name: "Set-Cookie", value: <raw bytes> }`
//! probe with the unescaped CRLF intact.
//!
//! Benign control: same logical cookie value pre-encoded with
//! `urllib.parse.quote`. The carried bytes become
//! `nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn` — no literal
//! CRLF — and the [`ProbePredicate::HeaderInjected`] predicate stays
//! clear.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-python-crlf",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/python/vuln.py"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-python-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-python-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/python/benign.py"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
// Same CRLF-bearing bytes as the Flask payload above, but pinned
// to the `python_raw` fixture (a `BaseHTTPRequestHandler` writing
// raw bytes via `self.wfile.write`). The wire frame captured off
// the response socket carries two distinct `Set-Cookie:` lines, so
// `HeaderSmuggledInWire { primary: "Set-Cookie", smuggled:
// "Set-Cookie" }` fires — proving the smuggled header survived to
// the actual wire instead of being CRLF-stripped en route.
//
// Distinct payload (not just an extra predicate on the Flask row)
// because Flask's werkzeug response serializer strips CRLF at the
// wire-write boundary, so the wire-frame predicate would never
// fire against the canonical Flask fixture. See
// `.pitboss/play/deferred.md` (Phase 08 wire-frame option A) for
// the framework-level CRLF-strip empirical from session-0018.
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-python-raw-wire-smuggle",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/python_raw/vuln.py"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-python-raw-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-python-raw-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/python_raw/vuln.py"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,114 @@
//! Ruby `Cap::HEADER_INJECTION` payloads —
//! `Rack::Response#set_header` CRLF injection.
//!
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
//! nyx-injected=pwn`. Spliced into the host's
//! `response.set_header("Set-Cookie", value)` call without CRLF
//! stripping, the wire response carries the attacker's second header.
//!
//! Benign control: same logical cookie value pre-encoded with
//! `URI.encode_www_form_component`. Captured value carries `%0D%0A`
//! so the predicate stays clear.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-ruby-crlf",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/ruby/vuln.rb"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-ruby-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-ruby-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/ruby/benign.rb"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
// Same CRLF-bearing bytes as the Rack payload above, but pinned to
// the `ruby_raw` fixture (a `TCPServer` driven by `create_server`
// + `run_once` that writes raw bytes via `TCPSocket#write`). The
// wire frame captured off the response socket carries two
// distinct `Set-Cookie:` lines, so `HeaderSmuggledInWire { primary:
// "Set-Cookie", smuggled: "Set-Cookie" }` fires — proving the
// smuggled header survived to the actual wire instead of being
// CRLF-stripped en route.
//
// Distinct payload (not just an extra predicate on the Rack row)
// because Rack / Sinatra / Rails response serializers strip CRLF
// at the wire-write boundary, so the wire-frame predicate would
// never fire against the canonical Rack fixture.
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-ruby-raw-wire-smuggle",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/ruby_raw/vuln.rb"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-ruby-raw-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-ruby-raw-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/ruby_raw/vuln.rb"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,116 @@
//! Rust `Cap::HEADER_INJECTION` payloads — `axum`-style
//! `HeaderMap::insert` CRLF injection.
//!
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
//! nyx-injected=pwn`. Spliced into a hand-rolled `HeaderMap` insert
//! that bypasses the `HeaderValue::from_str` validity check (e.g.
//! `HeaderValue::from_bytes(...).unwrap()` over a tainted slice).
//!
//! Benign control: same logical cookie value pre-encoded with the
//! `percent-encoding` crate. Captured value carries `%0D%0A` so the
//! predicate stays clear.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-rust-crlf",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/rust/vuln.rs"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-rust-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-rust-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderInjected {
header_name: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/rust/benign.rs"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
// Same CRLF-bearing bytes as the axum payload above, but pinned to
// the `rust_raw` fixture (a `std::net::TcpListener` driven by
// `create_server` + `run_once` that writes raw bytes via
// `TcpStream::write_all`). The wire frame captured off the
// response socket carries two distinct `Set-Cookie:` lines, so
// `HeaderSmuggledInWire { primary: "Set-Cookie", smuggled:
// "Set-Cookie" }` fires — proving the smuggled header survived to
// the actual wire instead of being CRLF-stripped en route.
//
// Distinct payload (not just an extra predicate on the axum row)
// because every framework's response serializer strips CRLF at
// the wire-write boundary, so the wire-frame predicate would
// never fire against the canonical axum fixture. See
// `.pitboss/play/deferred.md` (Phase 08 wire-frame option A) for
// the framework-level CRLF-strip empirical from session-0018.
CuratedPayload {
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
label: "header-injection-rust-raw-wire-smuggle",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/rust_raw/vuln.rs"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
benign_control: Some(PayloadRef {
label: "header-injection-rust-raw-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
label: "header-injection-rust-raw-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::HeaderSmuggledInWire {
primary: "Set-Cookie",
smuggled: "Set-Cookie",
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 12,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/header_injection/rust_raw/vuln.rs"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,54 @@
//! Go `Cap::JSON_PARSE` payloads.
//!
//! The depth pair shares a single fixture; the payload tag
//! (`NYX_JSON_DEEP` vs `NYX_JSON_SHALLOW`) picks the branch. Go has
//! no prototype-pollution surface so the canary half of the slice is
//! intentionally omitted.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const MAX_DEPTH: u32 = 64;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_JSON_DEEP",
label: "json-parse-go-depth-bomb",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::JsonParseExcessiveDepth {
max_depth: MAX_DEPTH,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/json_parse_depth/go/vuln.go"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::JsonParseExcessiveDepth {
max_depth: MAX_DEPTH,
}],
benign_control: Some(PayloadRef {
label: "json-parse-go-depth-shallow",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_JSON_SHALLOW",
label: "json-parse-go-depth-shallow",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::JsonParseExcessiveDepth {
max_depth: MAX_DEPTH,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/json_parse_depth/go/vuln.go"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,59 @@
//! Java `Cap::JSON_PARSE` payloads.
//!
//! The depth pair shares a single fixture; the payload tag
//! (`NYX_JSON_DEEP` vs `NYX_JSON_SHALLOW`) picks the branch. Java has
//! no prototype-pollution surface so the canary half of the slice is
//! intentionally omitted, matching the PHP / Go / Rust shape.
//!
//! Java has no stdlib JSON parser, so the harness ships a hand-rolled
//! iterative JSON walker as a sibling class (`NyxJsonProbe.java`); the
//! fixture calls `NyxJsonProbe.parse(text)` in place of any Jackson /
//! Gson dependency so the build path never reaches for an external jar.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
const MAX_DEPTH: u32 = 64;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_JSON_DEEP",
label: "json-parse-java-depth-bomb",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::JsonParseExcessiveDepth {
max_depth: MAX_DEPTH,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/json_parse_depth/java/Vuln.java"],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::JsonParseExcessiveDepth {
max_depth: MAX_DEPTH,
}],
benign_control: Some(PayloadRef {
label: "json-parse-java-depth-shallow",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_JSON_SHALLOW",
label: "json-parse-java-depth-shallow",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::JsonParseExcessiveDepth {
max_depth: MAX_DEPTH,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &["tests/dynamic_fixtures/json_parse_depth/java/Vuln.java"],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

Some files were not shown because too many files have changed in this diff Show more