mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
Dynamic (#77)
This commit is contained in:
parent
55247b7fcd
commit
991c84a1eb
1464 changed files with 225448 additions and 1985 deletions
|
|
@ -3,7 +3,6 @@
|
|||
//! Tracks inclusive `[lo, hi]` integer bounds. `None` = unbounded (−∞ or +∞).
|
||||
//! Both `None` = Top (any integer). Provides arithmetic transfer functions
|
||||
//! (add, sub, mul, div, mod) with overflow-safe semantics.
|
||||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use crate::state::lattice::{AbstractDomain, Lattice};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
|
|||
159
src/ast.rs
159
src/ast.rs
|
|
@ -102,6 +102,7 @@ fn parse_timeout_diag(path: &Path, timeout_ms: u64) -> Diag {
|
|||
rollup: None,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: Vec::new(),
|
||||
stable_hash: 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -234,10 +235,17 @@ fn build_taint_diag(
|
|||
.map(sanitize_desc)
|
||||
})
|
||||
.unwrap_or_else(|| "(unknown)".into());
|
||||
// Sink-callee attribution: when the sink node is an *argument* of a call
|
||||
// (e.g. PHP `header("location: " . $_GET['x'])` — the `$_GET[...]` subscript
|
||||
// carries `callee = "$_GET"` but `outer_callee = "header"`), the enclosing
|
||||
// call is the real sink and should be displayed, not the source token.
|
||||
// `outer_callee` is only populated for nested/argument positions, so for a
|
||||
// plain call node it is None and we fall back to the node's own callee.
|
||||
let call_site_callee = cfg_graph[finding.sink]
|
||||
.call
|
||||
.callee
|
||||
.outer_callee
|
||||
.as_deref()
|
||||
.or(cfg_graph[finding.sink].call.callee.as_deref())
|
||||
.map(sanitize_desc)
|
||||
.unwrap_or_else(|| "(unknown)".into());
|
||||
let kind_label = source_kind_label(finding.source_kind);
|
||||
|
|
@ -706,6 +714,7 @@ fn build_taint_diag(
|
|||
rollup: None,
|
||||
finding_id: finding.finding_id.clone(),
|
||||
alternative_finding_ids: finding.alternative_finding_ids.to_vec(),
|
||||
stable_hash: 0,
|
||||
};
|
||||
|
||||
// Post-fill explanation and confidence limiters
|
||||
|
|
@ -779,6 +788,35 @@ fn lang_for_path(path: &Path) -> Option<(Language, &'static str)> {
|
|||
}
|
||||
}
|
||||
|
||||
/// All language slugs the scanner can parse, paired with the file extensions
|
||||
/// that map to them. Single source of truth shared with [`lang_for_path`]; the
|
||||
/// `supported_extensions_resolve_to_their_slug` test asserts they stay in sync.
|
||||
pub(crate) const SUPPORTED_LANGUAGE_EXTENSIONS: &[(&str, &[&str])] = &[
|
||||
("rust", &["rs"]),
|
||||
("c", &["c"]),
|
||||
(
|
||||
"cpp",
|
||||
&["cpp", "cc", "cxx", "c++", "hpp", "hxx", "hh", "h++"],
|
||||
),
|
||||
("java", &["java"]),
|
||||
("go", &["go"]),
|
||||
("php", &["php"]),
|
||||
("python", &["py"]),
|
||||
("typescript", &["ts", "tsx"]),
|
||||
("javascript", &["js", "jsx"]),
|
||||
("ruby", &["rb"]),
|
||||
];
|
||||
|
||||
/// File extensions associated with a language slug (case-insensitive). Returns
|
||||
/// an empty slice if `slug` is not a supported language.
|
||||
pub fn extensions_for_lang(slug: &str) -> &'static [&'static str] {
|
||||
SUPPORTED_LANGUAGE_EXTENSIONS
|
||||
.iter()
|
||||
.find(|(s, _)| s.eq_ignore_ascii_case(slug))
|
||||
.map(|(_, exts)| *exts)
|
||||
.unwrap_or(&[])
|
||||
}
|
||||
|
||||
/// Fast binary-file guard: skip if >1% NUL bytes.
|
||||
fn is_binary(bytes: &[u8]) -> bool {
|
||||
bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1
|
||||
|
|
@ -965,9 +1003,11 @@ fn is_test_suppressible_pattern(id: &str) -> bool {
|
|||
// deterministic test data, insecure RNG used for fixture seeding.
|
||||
id.ends_with(".secrets.hardcoded_secret")
|
||||
|| id.ends_with(".secrets.hardcoded_key")
|
||||
|| id.ends_with(".crypto.hardcoded_key")
|
||||
|| id.ends_with(".crypto.math_random")
|
||||
|| id.ends_with(".crypto.insecure_random")
|
||||
|| id.ends_with(".crypto.weak_digest")
|
||||
|| id.ends_with(".crypto.weak_algorithm")
|
||||
|| id.ends_with(".crypto.md5")
|
||||
|| id.ends_with(".crypto.sha1")
|
||||
|| id.ends_with(".crypto.rand")
|
||||
|
|
@ -1041,9 +1081,7 @@ fn downgrade_severity(s: Severity) -> Severity {
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// ParsedSource + ParsedFile: shared parse/CFG pipeline
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Level 1: parsed tree + lang info. No CFG construction.
|
||||
struct ParsedSource<'a> {
|
||||
|
|
@ -1363,6 +1401,7 @@ impl<'a> ParsedSource<'a> {
|
|||
rollup: None,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: Vec::new(),
|
||||
stable_hash: 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -1890,7 +1929,6 @@ impl<'a> ParsedFile<'a> {
|
|||
cfg: &body.graph,
|
||||
entry: body.entry,
|
||||
lang: caller_lang,
|
||||
file_path: &self.source.file_path_str,
|
||||
source_bytes: self.source.bytes,
|
||||
func_summaries: self.local_summaries(),
|
||||
global_summaries,
|
||||
|
|
@ -1950,13 +1988,35 @@ impl<'a> ParsedFile<'a> {
|
|||
cfg_analysis::Confidence::Medium => crate::evidence::Confidence::Medium,
|
||||
cfg_analysis::Confidence::Low => crate::evidence::Confidence::Low,
|
||||
});
|
||||
// Carry the sink node's resolved Sink caps onto the structural
|
||||
// finding's evidence so downstream cap-classification (and the
|
||||
// eval `cap_of`) buckets `cfg-unguarded-sink` under its real cap
|
||||
// (sqli/cmdi/ssrf/…) instead of the catch-all `other`. Without
|
||||
// this every taint-less structural sink finding fell through to
|
||||
// `other`, hiding real recall (e.g. dvpwa `cur.execute` SQLi)
|
||||
// and inflating the `other` bucket. Non-sink structural findings
|
||||
// (resource-leak, auth-gap) carry no Sink label, so this is 0.
|
||||
let cf_sink_caps: u32 = cf
|
||||
.evidence
|
||||
.first()
|
||||
.map(|&n| {
|
||||
cfg_ctx.cfg[n].taint.labels.iter().fold(0u32, |acc, l| {
|
||||
if let crate::labels::DataLabel::Sink(c) = l {
|
||||
acc | c.bits()
|
||||
} else {
|
||||
acc
|
||||
}
|
||||
})
|
||||
})
|
||||
.unwrap_or(0);
|
||||
let cf_category = FindingCategory::for_structural_rule(&cf.rule_id);
|
||||
out.push(Diag {
|
||||
path: self.source.path.to_string_lossy().into_owned(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: cf.severity,
|
||||
id: cf.rule_id,
|
||||
category: FindingCategory::Security,
|
||||
category: cf_category,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some(cf.message),
|
||||
|
|
@ -1971,6 +2031,7 @@ impl<'a> ParsedFile<'a> {
|
|||
kind: "sink".into(),
|
||||
snippet: None,
|
||||
}),
|
||||
sink_caps: cf_sink_caps,
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
|
|
@ -1984,6 +2045,7 @@ impl<'a> ParsedFile<'a> {
|
|||
rollup: None,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: Vec::new(),
|
||||
stable_hash: 0,
|
||||
});
|
||||
}
|
||||
} // end for body in bodies (CFG structural analyses)
|
||||
|
|
@ -2031,7 +2093,7 @@ impl<'a> ParsedFile<'a> {
|
|||
col: point.column + 1,
|
||||
severity: sf.severity,
|
||||
id: sf.rule_id.clone(),
|
||||
category: FindingCategory::Security,
|
||||
category: FindingCategory::for_structural_rule(&sf.rule_id),
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some(sf.message.clone()),
|
||||
|
|
@ -2064,6 +2126,7 @@ impl<'a> ParsedFile<'a> {
|
|||
rollup: None,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: Vec::new(),
|
||||
stable_hash: 0,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -2157,9 +2220,7 @@ impl<'a> ParsedFile<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Pass 1: Extract function summaries (no taint analysis)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Extract function summaries from pre-read bytes.
|
||||
///
|
||||
|
|
@ -2305,7 +2366,10 @@ pub fn perf_stage_breakdown_fused(
|
|||
TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &taint_diags);
|
||||
let _filtered: Vec<_> = ast_findings
|
||||
.into_iter()
|
||||
.filter(|d| !suppression.should_suppress(&d.id, d.line))
|
||||
.filter(|d| {
|
||||
!suppression.should_suppress(&d.id, d.line)
|
||||
&& !suppression.is_redundant_ast_pattern(&d.id, d.line)
|
||||
})
|
||||
.collect();
|
||||
let t_suppr = s_suppr.elapsed().as_micros();
|
||||
|
||||
|
|
@ -2449,9 +2513,7 @@ pub fn extract_all_summaries_from_bytes(
|
|||
))
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Constant-argument suppression helper
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Returns `true` when the captured call node has only literal arguments
|
||||
/// (string, number, boolean, null/nil/none), or identifier arguments that
|
||||
|
|
@ -5351,9 +5413,7 @@ fn has_interpolation(node: tree_sitter::Node) -> bool {
|
|||
false
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Layer B: AST pattern suppression when taint confirms safety
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Map the second segment of a pattern ID (e.g. "cmdi" from "py.cmdi.os_system")
|
||||
/// to the `Cap` that taint analysis models. Returns `None` for categories taint
|
||||
|
|
@ -5425,6 +5485,14 @@ struct TaintSuppressionCtx {
|
|||
/// 11 inline analysis but the sink's enclosing scope has no
|
||||
/// labelled Sanitizer of its own.
|
||||
interproc_sanitizer_callers: HashSet<Option<String>>,
|
||||
/// Union of resolved sink-cap bits for cap-specific taint findings at
|
||||
/// each line. Used by [`Self::is_redundant_ast_pattern`] to drop an
|
||||
/// AST-pattern finding only when the flow engine already emitted a
|
||||
/// specific rule id for the same vulnerability class. Legacy generic
|
||||
/// findings (`taint-unsanitised-flow`, `cfg-unguarded-sink`) are not
|
||||
/// canonical enough to subsume language-specific AST rule IDs such as
|
||||
/// `py.cmdi.subprocess_shell` or `c.cmdi.system`.
|
||||
specific_taint_finding_caps_by_line: HashMap<usize, u32>,
|
||||
}
|
||||
|
||||
impl TaintSuppressionCtx {
|
||||
|
|
@ -5623,6 +5691,26 @@ impl TaintSuppressionCtx {
|
|||
.map(|d| d.line)
|
||||
.collect();
|
||||
|
||||
// Cap bits per line for cap-specific flow-backed findings only, so a
|
||||
// redundant AST pattern at the same line+cap can be dropped in favour
|
||||
// of the richer flow. Do not count legacy generic findings here:
|
||||
// `taint-unsanitised-flow` and `cfg-unguarded-sink` carry evidence,
|
||||
// but their rule ids are deliberately catch-alls, while AST `cmdi`,
|
||||
// `sqli`, etc. IDs are the canonical namespace many tests, SARIF
|
||||
// consumers, and dynamic-verification spec derivation rely on.
|
||||
let mut specific_taint_finding_caps_by_line: HashMap<usize, u32> = HashMap::new();
|
||||
for d in taint_diags {
|
||||
if d.id.starts_with("taint-") && !d.id.starts_with("taint-unsanitised-flow") {
|
||||
if let Some(caps) = d.evidence.as_ref().map(|e| e.sink_caps) {
|
||||
if caps != 0 {
|
||||
*specific_taint_finding_caps_by_line
|
||||
.entry(d.line)
|
||||
.or_default() |= caps;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Per-function partition of taint findings. Maps each finding's
|
||||
// line to the enclosing function scope by reusing
|
||||
// `sink_func_at_line` (the same span/function mapping the Sink-side
|
||||
|
|
@ -5646,9 +5734,30 @@ impl TaintSuppressionCtx {
|
|||
engine_validated_funcs,
|
||||
source_killed_funcs,
|
||||
interproc_sanitizer_callers,
|
||||
specific_taint_finding_caps_by_line,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` when an AST pattern finding is a redundant restatement
|
||||
/// of a flow the taint engine already reported at the same line.
|
||||
///
|
||||
/// The taint / structural flow finding carries source + path evidence the
|
||||
/// bare pattern lacks, so when both fire at the same line for the same
|
||||
/// cap the pattern is pure duplicate noise. This is the
|
||||
/// taint-found-it-UNSAFE counterpart to [`Self::should_suppress`]'s
|
||||
/// taint-found-it-SAFE logic: there, no flow finding means the pattern
|
||||
/// may carry unique signal; here, a same-cap flow finding means it does
|
||||
/// not. Cap-matched (not line-only) so a pattern whose cap differs from
|
||||
/// the co-located flow's cap — a genuinely distinct sink — is preserved.
|
||||
fn is_redundant_ast_pattern(&self, pattern_id: &str, line: usize) -> bool {
|
||||
let Some(cap) = pattern_category_cap(pattern_id) else {
|
||||
return false;
|
||||
};
|
||||
self.specific_taint_finding_caps_by_line
|
||||
.get(&line)
|
||||
.is_some_and(|caps| caps & cap.bits() != 0)
|
||||
}
|
||||
|
||||
/// Returns `true` if this AST pattern finding should be suppressed.
|
||||
fn should_suppress(&self, pattern_id: &str, line: usize) -> bool {
|
||||
// Condition 1: pattern category maps to a Cap taint models
|
||||
|
|
@ -5734,9 +5843,7 @@ impl TaintSuppressionCtx {
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Pass 2 / single‑file: Full rule execution (AST queries + taint)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Run all enabled analyses on pre-read bytes and return diagnostics.
|
||||
///
|
||||
|
|
@ -5779,11 +5886,10 @@ pub fn run_rules_on_bytes(
|
|||
let suppression =
|
||||
TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &out);
|
||||
let ast_findings = parsed.source.run_ast_queries(cfg);
|
||||
out.extend(
|
||||
ast_findings
|
||||
.into_iter()
|
||||
.filter(|d| !suppression.should_suppress(&d.id, d.line)),
|
||||
);
|
||||
out.extend(ast_findings.into_iter().filter(|d| {
|
||||
!suppression.should_suppress(&d.id, d.line)
|
||||
&& !suppression.is_redundant_ast_pattern(&d.id, d.line)
|
||||
}));
|
||||
}
|
||||
if cfg.scanner.mode == AnalysisMode::Full {
|
||||
out.extend(parsed.run_auth_analyses(cfg, global_summaries, scan_root));
|
||||
|
|
@ -5812,9 +5918,7 @@ pub fn run_rules_on_file(
|
|||
run_rules_on_bytes(&bytes, path, cfg, global_summaries, scan_root)
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Fused single-pass: extract summaries + run full analysis in one parse/CFG
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Result of a fused analysis pass: both function summaries and diagnostics.
|
||||
pub struct FusedResult {
|
||||
|
|
@ -5979,11 +6083,10 @@ pub fn analyse_file_fused(
|
|||
if needs_cfg && cfg.scanner.mode == AnalysisMode::Full {
|
||||
let suppression =
|
||||
TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &out);
|
||||
out.extend(
|
||||
ast_findings
|
||||
.into_iter()
|
||||
.filter(|d| !suppression.should_suppress(&d.id, d.line)),
|
||||
);
|
||||
out.extend(ast_findings.into_iter().filter(|d| {
|
||||
!suppression.should_suppress(&d.id, d.line)
|
||||
&& !suppression.is_redundant_ast_pattern(&d.id, d.line)
|
||||
}));
|
||||
} else {
|
||||
out.extend(ast_findings);
|
||||
}
|
||||
|
|
@ -6086,9 +6189,7 @@ pub fn analyse_file_fused(
|
|||
})
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Text-based pattern scanning (non-tree-sitter files)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Run text-based pattern scanners on files whose extension is not supported
|
||||
/// by tree-sitter. Currently handles `.ejs` templates.
|
||||
|
|
|
|||
287
src/auth_analysis/auth_markers.rs
Normal file
287
src/auth_analysis/auth_markers.rs
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
//! Canonical per-framework authentication-marker registry.
|
||||
//!
|
||||
//! Both the Phase 22 surface probes (`src/surface/lang/*.rs`) and the
|
||||
//! auth-analysis recogniser consult this module so a marker that is
|
||||
//! known to one side cannot drift away from the other. Each constant
|
||||
//! is a flat `&[&str]` of identifier shapes that signal a route is
|
||||
//! gated behind authentication; surface probes match the leaf segment
|
||||
//! of a decorator / middleware / extractor identifier
|
||||
//! (case-insensitive), and the auth analyser folds these into its
|
||||
//! per-language `login_guard_names` / `authorization_check_names`
|
||||
//! tables via [`router_auth_markers_for_lang`].
|
||||
//!
|
||||
//! The lists were lifted verbatim from the per-probe constants that
|
||||
//! shipped with Phase 22; further additions land here and propagate to
|
||||
//! every consumer at once.
|
||||
//!
|
||||
//! Lookups: prefer [`is_router_auth_marker`] for the framework-aware
|
||||
//! dispatch, fall back to [`is_known_router_auth_marker`] when the
|
||||
//! framework is not yet identified at the call site.
|
||||
|
||||
use crate::symbol::Lang;
|
||||
|
||||
/// Frameworks the surface probes recognise. Distinct from
|
||||
/// [`crate::surface::Framework`] (which carries pretty-print metadata)
|
||||
/// so this module stays free of surface-layer types and can be
|
||||
/// imported by `auth_analysis::extract` without a circular dep.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum AuthFramework {
|
||||
Flask,
|
||||
FastApi,
|
||||
Django,
|
||||
Spring,
|
||||
JavaServlet,
|
||||
Quarkus,
|
||||
Express,
|
||||
Koa,
|
||||
Gin,
|
||||
ActixWeb,
|
||||
Axum,
|
||||
}
|
||||
|
||||
/// Flask (`@login_required`, `@requires_auth`, …).
|
||||
pub const FLASK_DECORATORS: &[&str] = &[
|
||||
"login_required",
|
||||
"auth_required",
|
||||
"jwt_required",
|
||||
"token_required",
|
||||
"requires_auth",
|
||||
"authenticated",
|
||||
"require_login",
|
||||
];
|
||||
|
||||
/// FastAPI (`Depends(get_current_user)`, `@login_required`, …).
|
||||
pub const FASTAPI_DECORATORS: &[&str] = &[
|
||||
"login_required",
|
||||
"auth_required",
|
||||
"jwt_required",
|
||||
"token_required",
|
||||
"requires_auth",
|
||||
"authenticated",
|
||||
"require_auth",
|
||||
"require_login",
|
||||
"current_user",
|
||||
];
|
||||
|
||||
/// Django (`@login_required`, `@permission_required`, …).
|
||||
pub const DJANGO_DECORATORS: &[&str] = &[
|
||||
"login_required",
|
||||
"permission_required",
|
||||
"user_passes_test",
|
||||
"staff_member_required",
|
||||
"csrf_protect",
|
||||
"require_authenticated",
|
||||
"auth_required",
|
||||
];
|
||||
|
||||
/// Spring (`@PreAuthorize`, `@Secured`, …).
|
||||
pub const SPRING_ANNOTATIONS: &[&str] = &[
|
||||
"PreAuthorize",
|
||||
"PostAuthorize",
|
||||
"Secured",
|
||||
"RolesAllowed",
|
||||
"AuthenticationPrincipal",
|
||||
];
|
||||
|
||||
/// Java Servlet / JAX-RS (`@RolesAllowed`, `@RequiresAuthentication`, …).
|
||||
pub const SERVLET_ANNOTATIONS: &[&str] = &[
|
||||
"RolesAllowed",
|
||||
"DenyAll",
|
||||
"RequiresAuthentication",
|
||||
"RequiresUser",
|
||||
];
|
||||
|
||||
/// Quarkus (`@Authenticated`, `@RolesAllowed`, …).
|
||||
pub const QUARKUS_ANNOTATIONS: &[&str] = &[
|
||||
"Authenticated",
|
||||
"RolesAllowed",
|
||||
"DenyAll",
|
||||
"RequiresAuthentication",
|
||||
];
|
||||
|
||||
/// Express middleware (`app.use(requireAuth)`, `passport.authenticate`, …).
|
||||
pub const EXPRESS_MIDDLEWARES: &[&str] = &[
|
||||
"requireAuth",
|
||||
"requireUser",
|
||||
"isAuthenticated",
|
||||
"ensureAuthenticated",
|
||||
"ensureLoggedIn",
|
||||
"authenticate",
|
||||
"authMiddleware",
|
||||
"verifyToken",
|
||||
"verifyJwt",
|
||||
"checkJwt",
|
||||
"passport",
|
||||
"jwt",
|
||||
];
|
||||
|
||||
/// Koa middleware.
|
||||
pub const KOA_MIDDLEWARES: &[&str] = &[
|
||||
"requireAuth",
|
||||
"requireUser",
|
||||
"isAuthenticated",
|
||||
"ensureAuthenticated",
|
||||
"authenticate",
|
||||
"authMiddleware",
|
||||
"verifyToken",
|
||||
"verifyJwt",
|
||||
"checkJwt",
|
||||
"passport",
|
||||
"jwt",
|
||||
"koaJwt",
|
||||
];
|
||||
|
||||
/// Gin middleware (`router.Use(AuthRequired())`, `jwt.JWT()`, …).
|
||||
pub const GIN_MIDDLEWARES: &[&str] = &[
|
||||
"AuthRequired",
|
||||
"JWT",
|
||||
"JWTAuth",
|
||||
"Auth",
|
||||
"RequireAuth",
|
||||
"RequireUser",
|
||||
"VerifyToken",
|
||||
"BasicAuth",
|
||||
];
|
||||
|
||||
/// actix-web extractors (`Identity`, `BearerAuth`, …).
|
||||
pub const ACTIX_EXTRACTORS: &[&str] = &[
|
||||
"Identity",
|
||||
"BearerAuth",
|
||||
"BasicAuth",
|
||||
"JwtClaims",
|
||||
"Authenticated",
|
||||
"User",
|
||||
];
|
||||
|
||||
/// axum extractors (`Extension<User>`, `BearerAuth`, …).
|
||||
pub const AXUM_EXTRACTORS: &[&str] = &[
|
||||
"Extension<User",
|
||||
"BearerAuth",
|
||||
"RequireAuth",
|
||||
"AuthenticatedUser",
|
||||
"JwtClaims",
|
||||
];
|
||||
|
||||
/// Per-framework marker list. Returns the empty slice when the
|
||||
/// framework is not registered yet.
|
||||
pub fn markers_for(framework: AuthFramework) -> &'static [&'static str] {
|
||||
match framework {
|
||||
AuthFramework::Flask => FLASK_DECORATORS,
|
||||
AuthFramework::FastApi => FASTAPI_DECORATORS,
|
||||
AuthFramework::Django => DJANGO_DECORATORS,
|
||||
AuthFramework::Spring => SPRING_ANNOTATIONS,
|
||||
AuthFramework::JavaServlet => SERVLET_ANNOTATIONS,
|
||||
AuthFramework::Quarkus => QUARKUS_ANNOTATIONS,
|
||||
AuthFramework::Express => EXPRESS_MIDDLEWARES,
|
||||
AuthFramework::Koa => KOA_MIDDLEWARES,
|
||||
AuthFramework::Gin => GIN_MIDDLEWARES,
|
||||
AuthFramework::ActixWeb => ACTIX_EXTRACTORS,
|
||||
AuthFramework::Axum => AXUM_EXTRACTORS,
|
||||
}
|
||||
}
|
||||
|
||||
/// Case-insensitive whole-string match against the per-framework list.
|
||||
pub fn is_router_auth_marker(framework: AuthFramework, marker: &str) -> bool {
|
||||
let m = marker.trim();
|
||||
markers_for(framework)
|
||||
.iter()
|
||||
.any(|cand| cand.eq_ignore_ascii_case(m))
|
||||
}
|
||||
|
||||
/// Loose match against every framework's list. Used when the call
|
||||
/// site has the language but not the specific framework — e.g. an
|
||||
/// auth-analyser folding "is this a known router-level guard?" into a
|
||||
/// per-language ruleset where the framework split is opaque.
|
||||
pub fn is_known_router_auth_marker(marker: &str) -> bool {
|
||||
let m = marker.trim();
|
||||
[
|
||||
FLASK_DECORATORS,
|
||||
FASTAPI_DECORATORS,
|
||||
DJANGO_DECORATORS,
|
||||
SPRING_ANNOTATIONS,
|
||||
SERVLET_ANNOTATIONS,
|
||||
QUARKUS_ANNOTATIONS,
|
||||
EXPRESS_MIDDLEWARES,
|
||||
KOA_MIDDLEWARES,
|
||||
GIN_MIDDLEWARES,
|
||||
ACTIX_EXTRACTORS,
|
||||
AXUM_EXTRACTORS,
|
||||
]
|
||||
.iter()
|
||||
.any(|list| list.iter().any(|cand| cand.eq_ignore_ascii_case(m)))
|
||||
}
|
||||
|
||||
/// Every router-auth marker the canonical registry knows for `lang`.
|
||||
/// Used by `auth_analysis::config::default_for` to seed
|
||||
/// `login_guard_names` so a marker added here propagates into the
|
||||
/// per-language guard list without a second edit.
|
||||
pub fn router_auth_markers_for_lang(lang: Lang) -> Vec<&'static str> {
|
||||
let lists: &[&[&str]] = match lang {
|
||||
Lang::Python => &[FLASK_DECORATORS, FASTAPI_DECORATORS, DJANGO_DECORATORS],
|
||||
Lang::Java => &[SPRING_ANNOTATIONS, SERVLET_ANNOTATIONS, QUARKUS_ANNOTATIONS],
|
||||
Lang::JavaScript | Lang::TypeScript => &[EXPRESS_MIDDLEWARES, KOA_MIDDLEWARES],
|
||||
Lang::Go => &[GIN_MIDDLEWARES],
|
||||
Lang::Rust => &[ACTIX_EXTRACTORS, AXUM_EXTRACTORS],
|
||||
_ => &[],
|
||||
};
|
||||
let mut out: Vec<&'static str> = lists.iter().flat_map(|l| l.iter().copied()).collect();
|
||||
out.sort_unstable();
|
||||
out.dedup();
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn flask_login_required_resolves_case_insensitively() {
|
||||
assert!(is_router_auth_marker(
|
||||
AuthFramework::Flask,
|
||||
"login_required"
|
||||
));
|
||||
assert!(is_router_auth_marker(
|
||||
AuthFramework::Flask,
|
||||
"Login_Required"
|
||||
));
|
||||
assert!(!is_router_auth_marker(
|
||||
AuthFramework::Flask,
|
||||
"something_else"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spring_preauthorize_resolves() {
|
||||
assert!(is_router_auth_marker(AuthFramework::Spring, "PreAuthorize"));
|
||||
assert!(!is_router_auth_marker(AuthFramework::Spring, "GetMapping"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn known_marker_matches_across_frameworks() {
|
||||
// `RolesAllowed` shows up in Spring, Servlet, and Quarkus —
|
||||
// the framework-agnostic helper finds it regardless.
|
||||
assert!(is_known_router_auth_marker("RolesAllowed"));
|
||||
assert!(is_known_router_auth_marker("login_required"));
|
||||
assert!(!is_known_router_auth_marker("not_an_auth_marker_xyz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_router_markers_cover_every_framework() {
|
||||
let markers = router_auth_markers_for_lang(Lang::Python);
|
||||
for &decorator in FLASK_DECORATORS {
|
||||
assert!(markers.contains(&decorator), "missing flask: {decorator}");
|
||||
}
|
||||
for &decorator in FASTAPI_DECORATORS {
|
||||
assert!(markers.contains(&decorator), "missing fastapi: {decorator}");
|
||||
}
|
||||
for &decorator in DJANGO_DECORATORS {
|
||||
assert!(markers.contains(&decorator), "missing django: {decorator}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn router_markers_for_unknown_lang_is_empty() {
|
||||
assert!(router_auth_markers_for_lang(Lang::Ruby).is_empty());
|
||||
assert!(router_auth_markers_for_lang(Lang::Php).is_empty());
|
||||
}
|
||||
}
|
||||
|
|
@ -902,6 +902,24 @@ fn is_self_scoped_session_base(base: &str) -> bool {
|
|||
| "ctx.session.currentUser"
|
||||
| "ctx.state.user"
|
||||
| "ctx.state.currentUser"
|
||||
// The caller's own id from the session is self-scoped: fetching
|
||||
// your own record with it is not IDOR (only a foreign,
|
||||
// request-supplied id is). The `.user` forms above missed the
|
||||
// `req.session.userId` / `session.uid` idiom.
|
||||
| "req.session.userId"
|
||||
| "request.session.userId"
|
||||
| "session.userId"
|
||||
| "req.session.userid"
|
||||
| "request.session.userid"
|
||||
| "session.userid"
|
||||
| "req.session.uid"
|
||||
| "request.session.uid"
|
||||
| "session.uid"
|
||||
| "ctx.session.userId"
|
||||
| "ctx.session.userid"
|
||||
| "ctx.session.uid"
|
||||
| "ctx.state.userId"
|
||||
| "ctx.state.uid"
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,8 @@
|
|||
//! Configuration for the Rust auth-analysis pass.
|
||||
//!
|
||||
//! Holds [`AuthAnalysisRules`] (admin path/guard patterns, sink classes, and
|
||||
//! name canonicalization) that drive `rs.auth.missing_ownership_check`.
|
||||
|
||||
use crate::auth_analysis::model::SinkClass;
|
||||
use crate::labels::bare_method_name;
|
||||
use crate::utils::config::Config;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,9 @@
|
|||
//! Shared AST-extraction helpers for the auth-analysis framework adapters.
|
||||
//!
|
||||
//! Cross-framework primitives — analysis-unit collection, call-site and
|
||||
//! `ValueRef` extraction, and tree-sitter node/string/span helpers — used by the
|
||||
//! per-framework extractors in this directory (`express`, `axum`, `django`, …).
|
||||
|
||||
use crate::auth_analysis::config::{AuthAnalysisRules, canonical_name, matches_name, strip_quotes};
|
||||
use crate::auth_analysis::model::{
|
||||
AnalysisUnit, AnalysisUnitKind, AuthCheck, AuthCheckKind, AuthorizationModel, CallSite,
|
||||
|
|
@ -3942,6 +3948,27 @@ fn collect_param_names(
|
|||
}
|
||||
}
|
||||
}
|
||||
// TypeScript `required_parameter` / `optional_parameter`. Descend only
|
||||
// into the binding `pattern`, never the `type` annotation: the default
|
||||
// arm harvests id-like names from object-type fields (`user: { id }`)
|
||||
// and lifts typed-bounded scalar ids (`UserId: number`) into
|
||||
// `unit.params`, over-firing the user-input gate on non-route helpers.
|
||||
// Mirrors the Rust `parameter` arm plus the Go/Python id-like filter.
|
||||
"required_parameter" | "optional_parameter" => {
|
||||
if let Some(pattern) = node.child_by_field_name("pattern") {
|
||||
if pattern.kind() == "identifier" && node.child_by_field_name("type").is_some() {
|
||||
let name = text(pattern, bytes);
|
||||
if !name.is_empty()
|
||||
&& !out.contains(&name)
|
||||
&& (include_id_like_typed || !is_python_id_like_typed_param(&name))
|
||||
{
|
||||
out.push(name);
|
||||
}
|
||||
} else {
|
||||
collect_param_names(pattern, bytes, include_id_like_typed, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
for idx in 0..node.named_child_count() {
|
||||
let Some(child) = node.named_child(idx as u32) else {
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@
|
|||
//! - [`sql_semantics`]: ACL-join and `user_id`-predicate detection without a
|
||||
//! SQL parser
|
||||
|
||||
pub mod auth_markers;
|
||||
pub mod checks;
|
||||
pub mod config;
|
||||
pub mod extract;
|
||||
|
|
@ -1014,7 +1015,18 @@ fn auth_finding_to_diag(finding: &checks::AuthFinding, tree: &Tree, file_path: &
|
|||
guard_kind: None,
|
||||
message: Some(finding.message.clone()),
|
||||
labels: vec![],
|
||||
confidence: Some(Confidence::Medium),
|
||||
// Auth-analysis findings are *structural* (parameter-name + control-flow
|
||||
// shape heuristics) and carry no taint witness — `source = None`,
|
||||
// `sink_caps = 0`, no flow steps — so the per-payload dynamic oracle
|
||||
// cannot confirm or refute them (missing-authz needs a 2-user
|
||||
// differential the corpus does not run). Emitting them at Medium put a
|
||||
// large zero-witness, dynamically-Unsupported tranche on the default /
|
||||
// verified surface (the bulk of the nodegoat/railsgoat/juiceshop `auth`
|
||||
// FP flood). Demote to Low so they sit below the default min-confidence
|
||||
// and verify gates while remaining available for access-control audits.
|
||||
// assert_has tests pin rule-id presence, not confidence, so they stay
|
||||
// green.
|
||||
confidence: Some(Confidence::Low),
|
||||
evidence: Some(Evidence {
|
||||
source: None,
|
||||
sink: Some(SpanEvidence {
|
||||
|
|
@ -1037,6 +1049,7 @@ fn auth_finding_to_diag(finding: &checks::AuthFinding, tree: &Tree, file_path: &
|
|||
rollup: None,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: Vec::new(),
|
||||
stable_hash: 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
619
src/baseline.rs
Normal file
619
src/baseline.rs
Normal file
|
|
@ -0,0 +1,619 @@
|
|||
//! Baseline diffing for patch-validation CI mode (§M6.5 / Pillar A §15.1).
|
||||
//!
|
||||
//! `nyx scan --baseline <file>` reads a previous scan's JSON output (or a
|
||||
//! stripped `.nyx/baseline.json`) and joins on `Diag::stable_hash`. The
|
||||
//! result is a per-finding `VerdictDiffEntry` with a typed `Transition` that
|
||||
//! CI gates can act on.
|
||||
//!
|
||||
//! `nyx scan --baseline-write <file>` writes a stripped baseline JSON:
|
||||
//! only `stable_hash`, `dynamic_verdict`, `severity`, `path`, and `rule_id`.
|
||||
//! No source code is included.
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::evidence::VerifyStatus;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
// Baseline entry (stripped — no source code)
|
||||
|
||||
/// A stripped baseline entry: only what is needed for cross-commit diffing.
|
||||
/// Contains no source code snippets.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BaselineEntry {
|
||||
pub stable_hash: u64,
|
||||
/// Dynamic verdict status from the scan that wrote this baseline.
|
||||
/// `None` when `--verify` was not run.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub dynamic_verdict: Option<VerifyStatus>,
|
||||
pub severity: String,
|
||||
pub path: String,
|
||||
pub rule_id: String,
|
||||
}
|
||||
|
||||
// Transition enum
|
||||
|
||||
/// How a finding's verdict changed between the baseline scan and the current
|
||||
/// scan.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum Transition {
|
||||
/// Finding exists in the current scan but was absent from the baseline.
|
||||
New,
|
||||
/// Finding appears in both scans; verdict is unchanged (or neither scan
|
||||
/// ran `--verify`).
|
||||
Unchanged,
|
||||
/// Finding was present in the baseline but disappeared from the current
|
||||
/// scan — the vulnerability is gone.
|
||||
Resolved,
|
||||
/// Finding in both; was `NotConfirmed` in baseline, now `Confirmed`.
|
||||
Regressed,
|
||||
/// Finding in both; baseline had no verdict (or `Inconclusive` /
|
||||
/// `Unsupported`) and it is now `Confirmed`.
|
||||
FlippedConfirmed,
|
||||
/// Finding in both; was `Confirmed` in baseline, now `NotConfirmed` —
|
||||
/// the fix is proven.
|
||||
FlippedNotConfirmed,
|
||||
}
|
||||
|
||||
// VerdictDiffEntry
|
||||
|
||||
/// Per-finding verdict diff produced by comparing a baseline to a current scan.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VerdictDiffEntry {
|
||||
/// Stable cross-commit identity hash.
|
||||
pub stable_hash: u64,
|
||||
pub path: String,
|
||||
pub line: usize,
|
||||
pub rule_id: String,
|
||||
/// Verdict in the baseline scan (`None` when verify was not run).
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub baseline_status: Option<VerifyStatus>,
|
||||
/// Verdict in the current scan (`None` when verify was not run or finding
|
||||
/// is absent from the current scan).
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub current_status: Option<VerifyStatus>,
|
||||
pub transition: Transition,
|
||||
}
|
||||
|
||||
/// Full verdict diff between a baseline and a current scan.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VerdictDiff {
|
||||
pub entries: Vec<VerdictDiffEntry>,
|
||||
}
|
||||
|
||||
// Load / write helpers
|
||||
|
||||
/// Load baseline entries from a file.
|
||||
///
|
||||
/// Accepts two JSON formats:
|
||||
/// - Stripped baseline (`Vec<BaselineEntry>`) — written by `--baseline-write`.
|
||||
/// - Full scan output (`Vec<Diag>`) — written by `nyx scan --format json`.
|
||||
///
|
||||
/// Detection heuristic: try `Vec<BaselineEntry>` first (requires `rule_id`);
|
||||
/// fall back to `Vec<Diag>`.
|
||||
pub fn load_baseline(path: &Path) -> crate::errors::NyxResult<Vec<BaselineEntry>> {
|
||||
let content = std::fs::read_to_string(path).map_err(|e| {
|
||||
crate::errors::NyxError::Msg(format!("cannot read baseline {}: {e}", path.display()))
|
||||
})?;
|
||||
|
||||
// Try stripped format first.
|
||||
if let Ok(entries) = serde_json::from_str::<Vec<BaselineEntry>>(&content) {
|
||||
return Ok(entries);
|
||||
}
|
||||
|
||||
// Fall back to full Diag list.
|
||||
let diags: Vec<Diag> = serde_json::from_str(&content).map_err(|e| {
|
||||
crate::errors::NyxError::Msg(format!(
|
||||
"baseline {}: not a valid BaselineEntry list or Diag list: {e}",
|
||||
path.display()
|
||||
))
|
||||
})?;
|
||||
Ok(diags_to_baseline_entries(&diags))
|
||||
}
|
||||
|
||||
/// Convert `Diag` values to `BaselineEntry` values.
|
||||
///
|
||||
/// Only findings with a non-zero `stable_hash` are included; findings without
|
||||
/// a hash cannot be joined across scans.
|
||||
pub fn diags_to_baseline_entries(diags: &[Diag]) -> Vec<BaselineEntry> {
|
||||
diags
|
||||
.iter()
|
||||
.filter(|d| d.stable_hash != 0)
|
||||
.map(|d| BaselineEntry {
|
||||
stable_hash: d.stable_hash,
|
||||
dynamic_verdict: d
|
||||
.evidence
|
||||
.as_ref()
|
||||
.and_then(|ev| ev.dynamic_verdict.as_ref())
|
||||
.map(|vr| vr.status),
|
||||
severity: d.severity.as_db_str().to_string(),
|
||||
path: d.path.clone(),
|
||||
rule_id: d.id.clone(),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Write a stripped baseline JSON to `path`.
|
||||
///
|
||||
/// The file contains only `stable_hash`, `dynamic_verdict`, `severity`,
|
||||
/// `path`, and `rule_id` — no source code snippets or flow steps.
|
||||
pub fn write_baseline(path: &Path, diags: &[Diag]) -> crate::errors::NyxResult<()> {
|
||||
let entries = diags_to_baseline_entries(diags);
|
||||
let json = serde_json::to_string_pretty(&entries)
|
||||
.map_err(|e| crate::errors::NyxError::Msg(format!("baseline serialize error: {e}")))?;
|
||||
if let Some(parent) = path.parent()
|
||||
&& !parent.as_os_str().is_empty()
|
||||
{
|
||||
std::fs::create_dir_all(parent).map_err(|e| {
|
||||
crate::errors::NyxError::Msg(format!(
|
||||
"cannot create baseline dir {}: {e}",
|
||||
parent.display()
|
||||
))
|
||||
})?;
|
||||
}
|
||||
std::fs::write(path, json).map_err(|e| {
|
||||
crate::errors::NyxError::Msg(format!("cannot write baseline {}: {e}", path.display()))
|
||||
})
|
||||
}
|
||||
|
||||
// Diff computation
|
||||
|
||||
fn classify_transition(
|
||||
baseline: Option<VerifyStatus>,
|
||||
current: Option<VerifyStatus>,
|
||||
) -> Transition {
|
||||
match (baseline, current) {
|
||||
// No verdict change (including both None)
|
||||
(a, b) if a == b => Transition::Unchanged,
|
||||
// Confirmed → NotConfirmed: fix proven
|
||||
(Some(VerifyStatus::Confirmed), Some(VerifyStatus::NotConfirmed)) => {
|
||||
Transition::FlippedNotConfirmed
|
||||
}
|
||||
// NotConfirmed → Confirmed: regression
|
||||
(Some(VerifyStatus::NotConfirmed), Some(VerifyStatus::Confirmed)) => Transition::Regressed,
|
||||
// None / Inconclusive / Unsupported → Confirmed
|
||||
(_, Some(VerifyStatus::Confirmed)) => Transition::FlippedConfirmed,
|
||||
// Everything else: treat as unchanged (e.g. Confirmed → Inconclusive
|
||||
// without a clean NotConfirmed proof is not a resolution)
|
||||
_ => Transition::Unchanged,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute a verdict diff between a loaded baseline and the current findings.
|
||||
pub fn compute_verdict_diff(baseline: &[BaselineEntry], current: &[Diag]) -> VerdictDiff {
|
||||
// Build lookup maps keyed by stable_hash.
|
||||
let baseline_map: HashMap<u64, &BaselineEntry> =
|
||||
baseline.iter().map(|e| (e.stable_hash, e)).collect();
|
||||
let current_map: HashMap<u64, &Diag> = current
|
||||
.iter()
|
||||
.filter(|d| d.stable_hash != 0)
|
||||
.map(|d| (d.stable_hash, d))
|
||||
.collect();
|
||||
|
||||
let mut entries = Vec::new();
|
||||
|
||||
// Walk current findings.
|
||||
for (&hash, diag) in ¤t_map {
|
||||
let current_status = diag
|
||||
.evidence
|
||||
.as_ref()
|
||||
.and_then(|ev| ev.dynamic_verdict.as_ref())
|
||||
.map(|vr| vr.status);
|
||||
|
||||
if let Some(base) = baseline_map.get(&hash) {
|
||||
let transition = classify_transition(base.dynamic_verdict, current_status);
|
||||
entries.push(VerdictDiffEntry {
|
||||
stable_hash: hash,
|
||||
path: diag.path.clone(),
|
||||
line: diag.line,
|
||||
rule_id: diag.id.clone(),
|
||||
baseline_status: base.dynamic_verdict,
|
||||
current_status,
|
||||
transition,
|
||||
});
|
||||
} else {
|
||||
// Not in baseline → New.
|
||||
entries.push(VerdictDiffEntry {
|
||||
stable_hash: hash,
|
||||
path: diag.path.clone(),
|
||||
line: diag.line,
|
||||
rule_id: diag.id.clone(),
|
||||
baseline_status: None,
|
||||
current_status,
|
||||
transition: Transition::New,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Walk baseline findings absent from current → Resolved.
|
||||
for (&hash, base) in &baseline_map {
|
||||
if !current_map.contains_key(&hash) {
|
||||
entries.push(VerdictDiffEntry {
|
||||
stable_hash: hash,
|
||||
path: base.path.clone(),
|
||||
line: 0,
|
||||
rule_id: base.rule_id.clone(),
|
||||
baseline_status: base.dynamic_verdict,
|
||||
current_status: None,
|
||||
transition: Transition::Resolved,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Sort for deterministic output: Resolved first, then New, then the rest,
|
||||
// all sub-sorted by (path, line).
|
||||
entries.sort_by(|a, b| {
|
||||
fn order(t: Transition) -> u8 {
|
||||
match t {
|
||||
Transition::Resolved => 0,
|
||||
Transition::FlippedNotConfirmed => 1,
|
||||
Transition::New => 2,
|
||||
Transition::Regressed => 3,
|
||||
Transition::FlippedConfirmed => 4,
|
||||
Transition::Unchanged => 5,
|
||||
}
|
||||
}
|
||||
order(a.transition)
|
||||
.cmp(&order(b.transition))
|
||||
.then_with(|| a.path.cmp(&b.path))
|
||||
.then_with(|| a.line.cmp(&b.line))
|
||||
});
|
||||
|
||||
VerdictDiff { entries }
|
||||
}
|
||||
|
||||
// CI gates
|
||||
|
||||
/// Gate: exit code 2 if any new `Confirmed` finding appears.
|
||||
///
|
||||
/// Triggers on `transition == New && current_status == Confirmed` or
|
||||
/// `transition == FlippedConfirmed`.
|
||||
pub const GATE_NO_NEW_CONFIRMED: &str = "no-new-confirmed";
|
||||
|
||||
/// Gate: exit code 2 if any baseline-`Confirmed` finding is not fully resolved.
|
||||
///
|
||||
/// A baseline-Confirmed finding is resolved only when it is absent from the
|
||||
/// current scan (`Resolved`) or its current verdict is `NotConfirmed`
|
||||
/// (`FlippedNotConfirmed`). All other current statuses (`Confirmed`,
|
||||
/// `Inconclusive`, `Unsupported`) violate this gate.
|
||||
pub const GATE_RESOLVE_ALL_CONFIRMED: &str = "resolve-all-confirmed";
|
||||
|
||||
/// Check a named CI gate against a verdict diff.
|
||||
///
|
||||
/// Returns `true` when the gate passes (condition not violated) and `false`
|
||||
/// when it fails (caller should exit with code 2).
|
||||
///
|
||||
/// Unknown gate names always pass so future gate additions are forward-
|
||||
/// compatible without requiring a binary upgrade.
|
||||
pub fn check_gate(diff: &VerdictDiff, gate: &str) -> bool {
|
||||
match gate {
|
||||
GATE_NO_NEW_CONFIRMED => !diff.entries.iter().any(|e| {
|
||||
matches!(e.transition, Transition::New | Transition::FlippedConfirmed)
|
||||
&& e.current_status == Some(VerifyStatus::Confirmed)
|
||||
}),
|
||||
GATE_RESOLVE_ALL_CONFIRMED => !diff.entries.iter().any(|e| {
|
||||
e.baseline_status == Some(VerifyStatus::Confirmed)
|
||||
&& matches!(
|
||||
e.current_status,
|
||||
Some(VerifyStatus::Confirmed)
|
||||
// PartiallyConfirmed = sink still reachable at
|
||||
// runtime, so a baseline-Confirmed finding that is
|
||||
// now partial has NOT been resolved.
|
||||
| Some(VerifyStatus::PartiallyConfirmed)
|
||||
| Some(VerifyStatus::Inconclusive)
|
||||
| Some(VerifyStatus::Unsupported)
|
||||
)
|
||||
}),
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
// Console / JSON rendering
|
||||
|
||||
fn status_str(s: Option<VerifyStatus>) -> &'static str {
|
||||
match s {
|
||||
Some(VerifyStatus::Confirmed) => "Confirmed",
|
||||
Some(VerifyStatus::PartiallyConfirmed) => "PartiallyConfirmed",
|
||||
Some(VerifyStatus::NotConfirmed) => "NotConfirmed",
|
||||
Some(VerifyStatus::Inconclusive) => "Inconclusive",
|
||||
Some(VerifyStatus::Unsupported) => "Unsupported",
|
||||
None => "(no verdict)",
|
||||
}
|
||||
}
|
||||
|
||||
/// Render a verdict diff as a human-readable console summary.
|
||||
pub fn format_diff_console(diff: &VerdictDiff) -> String {
|
||||
if diff.entries.is_empty() {
|
||||
return String::from(" (no findings in baseline or current scan)\n");
|
||||
}
|
||||
|
||||
let mut lines = Vec::new();
|
||||
let mut non_unchanged = 0usize;
|
||||
|
||||
for e in &diff.entries {
|
||||
let hash_str = format!("{:016x}", e.stable_hash);
|
||||
let loc = if e.line > 0 {
|
||||
format!("{}:{}", e.path, e.line)
|
||||
} else {
|
||||
e.path.clone()
|
||||
};
|
||||
match e.transition {
|
||||
Transition::New => {
|
||||
non_unchanged += 1;
|
||||
lines.push(format!(
|
||||
" + {hash_str}: new {} at {loc}",
|
||||
status_str(e.current_status)
|
||||
));
|
||||
}
|
||||
Transition::Resolved => {
|
||||
non_unchanged += 1;
|
||||
lines.push(format!(
|
||||
" - {hash_str}: {} \u{2192} removed (resolved) at {loc}",
|
||||
status_str(e.baseline_status)
|
||||
));
|
||||
}
|
||||
Transition::FlippedNotConfirmed => {
|
||||
non_unchanged += 1;
|
||||
lines.push(format!(
|
||||
" - {hash_str}: Confirmed \u{2192} NotConfirmed at {loc} (resolved)"
|
||||
));
|
||||
}
|
||||
Transition::Regressed => {
|
||||
non_unchanged += 1;
|
||||
lines.push(format!(
|
||||
" ! {hash_str}: NotConfirmed \u{2192} Confirmed at {loc} (regressed)"
|
||||
));
|
||||
}
|
||||
Transition::FlippedConfirmed => {
|
||||
non_unchanged += 1;
|
||||
lines.push(format!(" + {hash_str}: new Confirmed at {loc}"));
|
||||
}
|
||||
Transition::Unchanged => {}
|
||||
}
|
||||
}
|
||||
|
||||
if non_unchanged == 0 {
|
||||
return String::from(" (no changes from baseline)\n");
|
||||
}
|
||||
|
||||
lines.join("\n") + "\n"
|
||||
}
|
||||
|
||||
// Tests
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::commands::scan::{Diag, compute_stable_hash};
|
||||
use crate::evidence::{Evidence, VerifyResult, VerifyStatus};
|
||||
use crate::patterns::{FindingCategory, Severity};
|
||||
|
||||
fn make_diag(path: &str, line: usize, rule: &str) -> Diag {
|
||||
let mut d = Diag {
|
||||
path: path.to_string(),
|
||||
line,
|
||||
col: 0,
|
||||
severity: Severity::High,
|
||||
id: rule.to_string(),
|
||||
category: FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: vec![],
|
||||
stable_hash: 0,
|
||||
};
|
||||
d.stable_hash = compute_stable_hash(&d);
|
||||
d
|
||||
}
|
||||
|
||||
fn with_verdict(mut d: Diag, status: VerifyStatus) -> Diag {
|
||||
d.evidence = Some(Evidence {
|
||||
dynamic_verdict: Some(VerifyResult {
|
||||
finding_id: format!("{:016x}", d.stable_hash),
|
||||
status,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
differential: None,
|
||||
replay_stable: None,
|
||||
wrong: None,
|
||||
hardening_outcome: None,
|
||||
}),
|
||||
..Default::default()
|
||||
});
|
||||
d
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_finding_no_verdict() {
|
||||
let current = vec![make_diag("src/a.py", 1, "py.sqli")];
|
||||
let diff = compute_verdict_diff(&[], ¤t);
|
||||
assert_eq!(diff.entries.len(), 1);
|
||||
assert_eq!(diff.entries[0].transition, Transition::New);
|
||||
assert_eq!(diff.entries[0].current_status, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_confirmed_finding() {
|
||||
let current = vec![with_verdict(
|
||||
make_diag("src/a.py", 1, "py.sqli"),
|
||||
VerifyStatus::Confirmed,
|
||||
)];
|
||||
let diff = compute_verdict_diff(&[], ¤t);
|
||||
assert_eq!(diff.entries[0].transition, Transition::New);
|
||||
assert_eq!(
|
||||
diff.entries[0].current_status,
|
||||
Some(VerifyStatus::Confirmed)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolved_finding() {
|
||||
let baseline_diag = make_diag("src/a.py", 1, "py.sqli");
|
||||
let baseline = diags_to_baseline_entries(&[baseline_diag]);
|
||||
let diff = compute_verdict_diff(&baseline, &[]);
|
||||
assert_eq!(diff.entries.len(), 1);
|
||||
assert_eq!(diff.entries[0].transition, Transition::Resolved);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flipped_not_confirmed() {
|
||||
let d = make_diag("src/a.py", 1, "py.sqli");
|
||||
let baseline = vec![BaselineEntry {
|
||||
stable_hash: d.stable_hash,
|
||||
dynamic_verdict: Some(VerifyStatus::Confirmed),
|
||||
severity: "high".to_string(),
|
||||
path: d.path.clone(),
|
||||
rule_id: d.id.clone(),
|
||||
}];
|
||||
let current = vec![with_verdict(d, VerifyStatus::NotConfirmed)];
|
||||
let diff = compute_verdict_diff(&baseline, ¤t);
|
||||
assert_eq!(diff.entries[0].transition, Transition::FlippedNotConfirmed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regressed() {
|
||||
let d = make_diag("src/a.py", 1, "py.sqli");
|
||||
let baseline = vec![BaselineEntry {
|
||||
stable_hash: d.stable_hash,
|
||||
dynamic_verdict: Some(VerifyStatus::NotConfirmed),
|
||||
severity: "high".to_string(),
|
||||
path: d.path.clone(),
|
||||
rule_id: d.id.clone(),
|
||||
}];
|
||||
let current = vec![with_verdict(d, VerifyStatus::Confirmed)];
|
||||
let diff = compute_verdict_diff(&baseline, ¤t);
|
||||
assert_eq!(diff.entries[0].transition, Transition::Regressed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gate_no_new_confirmed_passes_when_no_confirmed() {
|
||||
let d = make_diag("src/a.py", 1, "py.sqli");
|
||||
let diff = compute_verdict_diff(&[], &[d]);
|
||||
assert!(check_gate(&diff, GATE_NO_NEW_CONFIRMED));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gate_no_new_confirmed_fails_on_new_confirmed() {
|
||||
let current = vec![with_verdict(
|
||||
make_diag("src/a.py", 1, "py.sqli"),
|
||||
VerifyStatus::Confirmed,
|
||||
)];
|
||||
let diff = compute_verdict_diff(&[], ¤t);
|
||||
assert!(!check_gate(&diff, GATE_NO_NEW_CONFIRMED));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gate_resolve_all_confirmed_passes_when_flipped() {
|
||||
let d = make_diag("src/a.py", 1, "py.sqli");
|
||||
let baseline = vec![BaselineEntry {
|
||||
stable_hash: d.stable_hash,
|
||||
dynamic_verdict: Some(VerifyStatus::Confirmed),
|
||||
severity: "high".to_string(),
|
||||
path: d.path.clone(),
|
||||
rule_id: d.id.clone(),
|
||||
}];
|
||||
let current = vec![with_verdict(d, VerifyStatus::NotConfirmed)];
|
||||
let diff = compute_verdict_diff(&baseline, ¤t);
|
||||
assert!(check_gate(&diff, GATE_RESOLVE_ALL_CONFIRMED));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gate_resolve_all_confirmed_fails_when_still_confirmed() {
|
||||
let d = make_diag("src/a.py", 1, "py.sqli");
|
||||
let baseline = vec![BaselineEntry {
|
||||
stable_hash: d.stable_hash,
|
||||
dynamic_verdict: Some(VerifyStatus::Confirmed),
|
||||
severity: "high".to_string(),
|
||||
path: d.path.clone(),
|
||||
rule_id: d.id.clone(),
|
||||
}];
|
||||
let current = vec![with_verdict(d, VerifyStatus::Confirmed)];
|
||||
let diff = compute_verdict_diff(&baseline, ¤t);
|
||||
assert!(!check_gate(&diff, GATE_RESOLVE_ALL_CONFIRMED));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gate_resolve_all_confirmed_passes_when_resolved() {
|
||||
let d = make_diag("src/a.py", 1, "py.sqli");
|
||||
let baseline = vec![BaselineEntry {
|
||||
stable_hash: d.stable_hash,
|
||||
dynamic_verdict: Some(VerifyStatus::Confirmed),
|
||||
severity: "high".to_string(),
|
||||
path: d.path.clone(),
|
||||
rule_id: d.id.clone(),
|
||||
}];
|
||||
// No current findings (finding disappeared entirely).
|
||||
let diff = compute_verdict_diff(&baseline, &[]);
|
||||
assert!(check_gate(&diff, GATE_RESOLVE_ALL_CONFIRMED));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_and_load_roundtrip() {
|
||||
let d = with_verdict(make_diag("src/a.py", 1, "py.sqli"), VerifyStatus::Confirmed);
|
||||
let tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
write_baseline(tmp.path(), std::slice::from_ref(&d)).unwrap();
|
||||
let loaded = load_baseline(tmp.path()).unwrap();
|
||||
assert_eq!(loaded.len(), 1);
|
||||
assert_eq!(loaded[0].stable_hash, d.stable_hash);
|
||||
assert_eq!(loaded[0].dynamic_verdict, Some(VerifyStatus::Confirmed));
|
||||
assert_eq!(loaded[0].path, "src/a.py");
|
||||
assert_eq!(loaded[0].rule_id, "py.sqli");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn load_full_diag_json() {
|
||||
let d = with_verdict(make_diag("src/a.py", 1, "py.sqli"), VerifyStatus::Confirmed);
|
||||
let json = serde_json::to_string(&[&d]).unwrap();
|
||||
let tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
std::fs::write(tmp.path(), &json).unwrap();
|
||||
let loaded = load_baseline(tmp.path()).unwrap();
|
||||
assert_eq!(loaded.len(), 1);
|
||||
assert_eq!(loaded[0].stable_hash, d.stable_hash);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn baseline_write_no_source() {
|
||||
let mut d = with_verdict(make_diag("src/a.py", 1, "py.sqli"), VerifyStatus::Confirmed);
|
||||
// Add a flow_step with a snippet (source code) to the evidence.
|
||||
if let Some(ref mut ev) = d.evidence {
|
||||
ev.flow_steps = vec![crate::evidence::FlowStep {
|
||||
step: 1,
|
||||
kind: crate::evidence::FlowStepKind::Source,
|
||||
file: "src/a.py".into(),
|
||||
line: 1,
|
||||
col: 0,
|
||||
snippet: Some("SECRET CODE".into()),
|
||||
variable: None,
|
||||
callee: None,
|
||||
function: None,
|
||||
is_cross_file: false,
|
||||
}];
|
||||
}
|
||||
let tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
write_baseline(tmp.path(), &[d]).unwrap();
|
||||
let content = std::fs::read_to_string(tmp.path()).unwrap();
|
||||
assert!(
|
||||
!content.contains("SECRET CODE"),
|
||||
"baseline must not contain source code"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_gate_passes() {
|
||||
let diff = VerdictDiff { entries: vec![] };
|
||||
assert!(check_gate(&diff, "some-future-gate-name"));
|
||||
}
|
||||
}
|
||||
327
src/callgraph.rs
327
src/callgraph.rs
|
|
@ -20,16 +20,13 @@ use smallvec::SmallVec;
|
|||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Types
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Metadata attached to each call-graph edge.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CallEdge {
|
||||
/// The raw callee string as it appeared in source (e.g. `"env::var"`).
|
||||
/// Preserved for diagnostics, **not** the normalized form used for resolution.
|
||||
#[allow(dead_code)] // used for future diagnostics and path display
|
||||
pub call_site: String,
|
||||
}
|
||||
|
||||
|
|
@ -52,10 +49,10 @@ pub struct AmbiguousCallee {
|
|||
///
|
||||
/// Nodes are [`FuncKey`]s (one per function definition across all files).
|
||||
/// Edges represent call-site relationships resolved after pass 1.
|
||||
#[derive(Debug)]
|
||||
pub struct CallGraph {
|
||||
pub graph: DiGraph<FuncKey, CallEdge>,
|
||||
/// `FuncKey → NodeIndex` for quick lookup.
|
||||
#[allow(dead_code)] // used for future topo-ordered analysis and call-graph queries
|
||||
pub index: HashMap<FuncKey, NodeIndex>,
|
||||
/// Callee strings that could not be resolved to any [`FuncKey`].
|
||||
pub unresolved_not_found: Vec<UnresolvedCallee>,
|
||||
|
|
@ -77,9 +74,7 @@ pub struct CallGraphAnalysis {
|
|||
pub topo_scc_callee_first: Vec<usize>,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Callee-name normalization
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Extract the last segment of a qualified callee name for resolution.
|
||||
///
|
||||
|
|
@ -165,9 +160,7 @@ pub(crate) fn callee_container_hint(raw: &str) -> &str {
|
|||
""
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Class / container → method index
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Per-language `(container, method_name)` → candidate [`FuncKey`] index.
|
||||
///
|
||||
|
|
@ -260,20 +253,6 @@ impl ClassMethodIndex {
|
|||
.unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Number of distinct `(lang, container, method)` keys. Exposed
|
||||
/// for diagnostics / tests; production code uses [`Self::resolve`].
|
||||
#[allow(dead_code)]
|
||||
pub fn container_keys_len(&self) -> usize {
|
||||
self.by_container.len()
|
||||
}
|
||||
|
||||
/// Number of distinct `(lang, method)` keys. Exposed for
|
||||
/// diagnostics / tests.
|
||||
#[allow(dead_code)]
|
||||
pub fn name_keys_len(&self) -> usize {
|
||||
self.by_name.len()
|
||||
}
|
||||
}
|
||||
|
||||
// ── Type hierarchy index ────────────────────────────────────────────────
|
||||
|
|
@ -293,11 +272,6 @@ impl ClassMethodIndex {
|
|||
pub struct TypeHierarchyIndex {
|
||||
/// `(lang, super_type)` → distinct sub-type / impl container names.
|
||||
by_super: HashMap<(Lang, String), SmallVec<[String; 4]>>,
|
||||
/// `(lang, sub_type)` → super-types this type extends / implements.
|
||||
/// Future use for `super.method()` resolution; populated for
|
||||
/// completeness today.
|
||||
#[allow(dead_code)]
|
||||
by_sub: HashMap<(Lang, String), SmallVec<[String; 2]>>,
|
||||
}
|
||||
|
||||
impl TypeHierarchyIndex {
|
||||
|
|
@ -308,7 +282,6 @@ impl TypeHierarchyIndex {
|
|||
/// summary) collapse via the membership check.
|
||||
pub fn build(summaries: &GlobalSummaries) -> Self {
|
||||
let mut by_super: HashMap<(Lang, String), SmallVec<[String; 4]>> = HashMap::new();
|
||||
let mut by_sub: HashMap<(Lang, String), SmallVec<[String; 2]>> = HashMap::new();
|
||||
|
||||
for (key, summary) in summaries.iter() {
|
||||
let lang = key.lang;
|
||||
|
|
@ -320,14 +293,10 @@ impl TypeHierarchyIndex {
|
|||
if !subs.iter().any(|s| s == sub) {
|
||||
subs.push(sub.clone());
|
||||
}
|
||||
let sups = by_sub.entry((lang, sub.clone())).or_default();
|
||||
if !sups.iter().any(|s| s == sup) {
|
||||
sups.push(sup.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TypeHierarchyIndex { by_super, by_sub }
|
||||
TypeHierarchyIndex { by_super }
|
||||
}
|
||||
|
||||
/// Return the distinct sub-type / impl container names for
|
||||
|
|
@ -341,16 +310,6 @@ impl TypeHierarchyIndex {
|
|||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Return the recorded super-types of `sub_type`. Empty when
|
||||
/// `sub_type` has no recorded super-types in this language.
|
||||
#[allow(dead_code)]
|
||||
pub fn supers_of(&self, lang: Lang, sub_type: &str) -> &[String] {
|
||||
self.by_sub
|
||||
.get(&(lang, sub_type.to_string()))
|
||||
.map(|v| v.as_slice())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Number of distinct `(lang, super_type)` keys. Exposed for
|
||||
/// diagnostics / tests.
|
||||
#[allow(dead_code)]
|
||||
|
|
@ -409,9 +368,7 @@ impl TypeHierarchyIndex {
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Call-graph construction
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Build the whole-program call graph from merged summaries.
|
||||
///
|
||||
|
|
@ -777,9 +734,7 @@ fn resolve_via_interop(
|
|||
None
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// SCC / topological analysis
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Compute SCC decomposition and topological ordering of the call graph.
|
||||
///
|
||||
|
|
@ -807,9 +762,7 @@ pub fn analyse(cg: &CallGraph) -> CallGraphAnalysis {
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// File-level batch ordering
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// A batch of files at a single topological position, annotated with whether
|
||||
/// any contributing SCC contains mutual recursion (len > 1) and whether any
|
||||
|
|
@ -862,6 +815,141 @@ pub fn callers_of(cg: &CallGraph, callee: &FuncKey) -> Vec<FuncKey> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Reverse-edge BFS: return every [`FuncKey`] that *transitively* calls
|
||||
/// `callee`, i.e. the union of [`callers_of`] applied recursively until
|
||||
/// the reverse frontier is exhausted.
|
||||
///
|
||||
/// Used by the chain composer to widen file-scoped reach: a sink inside
|
||||
/// `internal_helper.py` whose enclosing function is reached only through
|
||||
/// `routes.py` is *reachable* in the chain sense, but the file-local
|
||||
/// match in `chain::edges::locate_reach` / `chain::search::compose_chain`
|
||||
/// misses it. This helper produces the closure once so callers can
|
||||
/// resolve reach in O(1) afterwards.
|
||||
///
|
||||
/// Excludes `callee` itself from the returned set, matching the
|
||||
/// "strictly upstream" semantics callers want. Empty when `callee` is
|
||||
/// unknown to the graph.
|
||||
///
|
||||
/// Cost: O(V + E) BFS from `callee`'s reverse frontier; bounded by the
|
||||
/// connected component size.
|
||||
pub fn callers_transitive(cg: &CallGraph, callee: &FuncKey) -> std::collections::HashSet<FuncKey> {
|
||||
let mut seen: std::collections::HashSet<FuncKey> = std::collections::HashSet::new();
|
||||
let Some(&start) = cg.index.get(callee) else {
|
||||
return seen;
|
||||
};
|
||||
let mut frontier: Vec<NodeIndex> = cg
|
||||
.graph
|
||||
.neighbors_directed(start, petgraph::Direction::Incoming)
|
||||
.collect();
|
||||
while let Some(node) = frontier.pop() {
|
||||
let key = cg.graph[node].clone();
|
||||
if !seen.insert(key) {
|
||||
continue;
|
||||
}
|
||||
for next in cg
|
||||
.graph
|
||||
.neighbors_directed(node, petgraph::Direction::Incoming)
|
||||
{
|
||||
if !seen.contains(&cg.graph[next]) {
|
||||
frontier.push(next);
|
||||
}
|
||||
}
|
||||
}
|
||||
seen
|
||||
}
|
||||
|
||||
/// File-level transitive reach map built from a [`CallGraph`].
|
||||
///
|
||||
/// For each `namespace` (file path) in the graph, records every other
|
||||
/// namespace that contains at least one transitive caller. Built once
|
||||
/// per scan so the chain composer can widen a finding's
|
||||
/// `Reach::Reachable` decision beyond the file-local heuristic in
|
||||
/// `chain::edges::locate_reach` without re-running BFS per
|
||||
/// finding.
|
||||
///
|
||||
/// Map shape: `callee_namespace → { caller_namespace, … }`. A file
|
||||
/// always appears in its own caller set so intra-file recursion stays
|
||||
/// reachable.
|
||||
///
|
||||
/// `scan_root` is optional path-normalisation context. Callers that
|
||||
/// build the map without a scan root must pass project-relative POSIX
|
||||
/// paths to [`FileReachMap::reaches`] directly. When a root is set
|
||||
/// (typical in production scans), [`FileReachMap::reaches`] applies
|
||||
/// [`crate::symbol::normalize_namespace`] to its arguments before
|
||||
/// lookup so absolute host paths (the convention on
|
||||
/// [`crate::commands::scan::Diag`]'s `path`) and project-relative paths
|
||||
/// (the convention on call-graph [`FuncKey::namespace`] and
|
||||
/// [`crate::surface::SourceLocation::file`]) both resolve to the
|
||||
/// stored keys.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct FileReachMap {
|
||||
by_callee_ns: HashMap<String, std::collections::HashSet<String>>,
|
||||
scan_root: Option<String>,
|
||||
}
|
||||
|
||||
impl FileReachMap {
|
||||
/// Build the map from every function's reverse transitive closure.
|
||||
///
|
||||
/// O(V × (V + E)) worst case, but the per-function BFS is sparse on
|
||||
/// real call graphs (median in-degree < 4 on the eval corpus).
|
||||
///
|
||||
/// The returned map has no scan root configured; pair with
|
||||
/// [`FileReachMap::with_scan_root`] when callers may pass absolute
|
||||
/// paths.
|
||||
pub fn build(cg: &CallGraph) -> Self {
|
||||
let mut by_callee_ns: HashMap<String, std::collections::HashSet<String>> = HashMap::new();
|
||||
for callee in cg.index.keys() {
|
||||
let entry = by_callee_ns.entry(callee.namespace.clone()).or_default();
|
||||
entry.insert(callee.namespace.clone());
|
||||
for caller in callers_transitive(cg, callee) {
|
||||
entry.insert(caller.namespace);
|
||||
}
|
||||
}
|
||||
FileReachMap {
|
||||
by_callee_ns,
|
||||
scan_root: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Attach a scan root so [`FileReachMap::reaches`] can normalise
|
||||
/// absolute host paths back to the project-relative POSIX form the
|
||||
/// map keys use. Pass `None` to clear an existing root.
|
||||
pub fn with_scan_root<P: AsRef<std::path::Path>>(mut self, root: Option<P>) -> Self {
|
||||
self.scan_root = root.map(|p| p.as_ref().to_string_lossy().into_owned());
|
||||
self
|
||||
}
|
||||
|
||||
/// True when `caller` transitively reaches at least one function
|
||||
/// defined in `callee`. Inputs may be either project-relative
|
||||
/// POSIX paths (matching the call-graph namespace convention) or
|
||||
/// absolute host paths when a scan root was set via
|
||||
/// [`FileReachMap::with_scan_root`]. False when either path is
|
||||
/// unknown to the graph (conservative: chain composer falls back
|
||||
/// to the file-local heuristic).
|
||||
pub fn reaches(&self, caller: &str, callee: &str) -> bool {
|
||||
let lookup_callee = self.normalize(callee);
|
||||
let lookup_caller = self.normalize(caller);
|
||||
self.by_callee_ns
|
||||
.get(lookup_callee.as_ref())
|
||||
.is_some_and(|set| set.contains(lookup_caller.as_ref()))
|
||||
}
|
||||
|
||||
/// Number of distinct callee namespaces tracked. Exposed for
|
||||
/// diagnostics / tests.
|
||||
pub fn callee_ns_len(&self) -> usize {
|
||||
self.by_callee_ns.len()
|
||||
}
|
||||
|
||||
fn normalize<'a>(&self, path: &'a str) -> std::borrow::Cow<'a, str> {
|
||||
match self.scan_root.as_deref() {
|
||||
Some(root) => {
|
||||
std::borrow::Cow::Owned(crate::symbol::normalize_namespace(path, Some(root)))
|
||||
}
|
||||
None => std::borrow::Cow::Borrowed(path),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the set of file namespaces that must be re-analysed when a
|
||||
/// given set of callee [`FuncKey`]s have had their summaries refined.
|
||||
///
|
||||
|
|
@ -905,10 +993,16 @@ pub fn scc_spans_files(cg: &CallGraph, scc: &[NodeIndex]) -> bool {
|
|||
iter.any(|n| cg.graph[*n].namespace.as_str() != first_ns)
|
||||
}
|
||||
|
||||
/// Like [`scc_file_batches`] but annotates each batch with whether any
|
||||
/// contributing SCC has mutual recursion (`len > 1`).
|
||||
/// Map SCC topological order to an ordered sequence of file-path batches
|
||||
/// annotated with whether any contributing SCC is mutually recursive
|
||||
/// (`len > 1`) or cross-file.
|
||||
///
|
||||
/// Returns `(ordered_batches, orphan_files)`.
|
||||
/// A file is placed in the earliest batch where any of its functions appear
|
||||
/// (min topo index), so leaf callees become available before the callers
|
||||
/// that depend on them.
|
||||
///
|
||||
/// Returns `(ordered_batches, orphan_files)`. Orphans are paths from
|
||||
/// `all_files` that have no functions in the call graph.
|
||||
pub fn scc_file_batches_with_metadata<'a>(
|
||||
cg: &CallGraph,
|
||||
analysis: &CallGraphAnalysis,
|
||||
|
|
@ -989,8 +1083,8 @@ pub fn scc_file_batches_with_metadata<'a>(
|
|||
///
|
||||
/// Returns `(ordered_batches, orphan_files)` where orphan_files are paths
|
||||
/// from `all_files` that have no functions in the call graph.
|
||||
#[allow(dead_code)] // kept for tests; production callers use scc_file_batches_with_metadata
|
||||
pub fn scc_file_batches<'a>(
|
||||
#[cfg(test)]
|
||||
pub(super) fn scc_file_batches<'a>(
|
||||
cg: &CallGraph,
|
||||
analysis: &CallGraphAnalysis,
|
||||
all_files: &'a [PathBuf],
|
||||
|
|
@ -1033,9 +1127,7 @@ pub fn scc_file_batches<'a>(
|
|||
(batches, orphans)
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Tests
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
@ -2798,4 +2890,127 @@ mod tests {
|
|||
assert!(cg.unresolved_not_found.is_empty());
|
||||
assert!(cg.unresolved_ambiguous.is_empty());
|
||||
}
|
||||
|
||||
// ── callers_transitive + FileReachMap ───────────────────────────────
|
||||
|
||||
/// Three-hop chain across three files:
|
||||
/// `routes.py::handle -> service.py::process -> helper.py::sink`
|
||||
/// `callers_transitive(sink)` must return both `process` and `handle`.
|
||||
/// `FileReachMap` must record `routes.py` and `service.py` as callers
|
||||
/// of `helper.py`.
|
||||
#[test]
|
||||
fn callers_transitive_walks_multi_hop_chain() {
|
||||
let handle = make_summary("handle", "routes.py", "python", 0, vec!["process"]);
|
||||
let process = make_summary("process", "service.py", "python", 0, vec!["sink"]);
|
||||
let sink = make_summary("sink", "helper.py", "python", 0, vec![]);
|
||||
let gs = merge_summaries(vec![handle, process, sink], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
|
||||
let sink_key = FuncKey {
|
||||
lang: Lang::Python,
|
||||
namespace: "helper.py".into(),
|
||||
name: "sink".into(),
|
||||
arity: Some(0),
|
||||
..Default::default()
|
||||
};
|
||||
let transitive = callers_transitive(&cg, &sink_key);
|
||||
let caller_names: std::collections::HashSet<String> =
|
||||
transitive.iter().map(|k| k.name.clone()).collect();
|
||||
assert!(
|
||||
caller_names.contains("process"),
|
||||
"process should reach sink"
|
||||
);
|
||||
assert!(caller_names.contains("handle"), "handle should reach sink");
|
||||
assert_eq!(transitive.len(), 2, "sink itself must be excluded");
|
||||
|
||||
let reach = FileReachMap::build(&cg);
|
||||
assert!(reach.reaches("routes.py", "helper.py"));
|
||||
assert!(reach.reaches("service.py", "helper.py"));
|
||||
assert!(reach.reaches("helper.py", "helper.py"), "self-reach");
|
||||
assert!(!reach.reaches("helper.py", "routes.py"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn callers_transitive_empty_for_unknown_key() {
|
||||
let leaf = make_summary("leaf", "a.py", "python", 0, vec![]);
|
||||
let gs = merge_summaries(vec![leaf], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let ghost = FuncKey {
|
||||
lang: Lang::Python,
|
||||
namespace: "nowhere.py".into(),
|
||||
name: "ghost".into(),
|
||||
arity: Some(0),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(callers_transitive(&cg, &ghost).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn file_reach_map_handles_disconnected_components() {
|
||||
let a_caller = make_summary("a_caller", "a.py", "python", 0, vec!["a_sink"]);
|
||||
let a_sink = make_summary("a_sink", "a.py", "python", 0, vec![]);
|
||||
let b_caller = make_summary("b_caller", "b.py", "python", 0, vec!["b_sink"]);
|
||||
let b_sink = make_summary("b_sink", "b.py", "python", 0, vec![]);
|
||||
let gs = merge_summaries(vec![a_caller, a_sink, b_caller, b_sink], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let reach = FileReachMap::build(&cg);
|
||||
|
||||
assert!(reach.reaches("a.py", "a.py"));
|
||||
assert!(reach.reaches("b.py", "b.py"));
|
||||
// Disconnected: a.py does not reach b.py.
|
||||
assert!(!reach.reaches("a.py", "b.py"));
|
||||
assert!(!reach.reaches("b.py", "a.py"));
|
||||
assert_eq!(reach.callee_ns_len(), 2);
|
||||
}
|
||||
|
||||
/// `with_scan_root` normalises absolute host paths to the
|
||||
/// project-relative POSIX form the map keys carry, so
|
||||
/// `reaches("/abs/scan/routes.py", "/abs/scan/helper.py")` finds
|
||||
/// the same entry as the project-relative
|
||||
/// `reaches("routes.py", "helper.py")` call. Mirrors the
|
||||
/// production wire-up in `src/commands/scan.rs`: the call-graph
|
||||
/// uses project-relative namespaces while `Diag.path` (from
|
||||
/// `src/ast.rs`) is the absolute walker path.
|
||||
#[test]
|
||||
fn file_reach_map_with_scan_root_normalises_absolute_paths() {
|
||||
let handle = make_summary("handle", "routes.py", "python", 0, vec!["sink"]);
|
||||
let sink = make_summary("sink", "helper.py", "python", 0, vec![]);
|
||||
let gs = merge_summaries(vec![handle, sink], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let scan_root = std::path::Path::new("/abs/scan");
|
||||
let reach = FileReachMap::build(&cg).with_scan_root(Some(scan_root));
|
||||
|
||||
// Mixed conventions: surface (project-relative) caller,
|
||||
// Diag (absolute) callee. Pre-fix this returned false.
|
||||
assert!(reach.reaches("routes.py", "/abs/scan/helper.py"));
|
||||
// Both absolute: also resolves.
|
||||
assert!(reach.reaches("/abs/scan/routes.py", "/abs/scan/helper.py"));
|
||||
// Trailing-slash root works.
|
||||
let reach_trail =
|
||||
FileReachMap::build(&cg).with_scan_root(Some(std::path::Path::new("/abs/scan/")));
|
||||
assert!(reach_trail.reaches("/abs/scan/routes.py", "/abs/scan/helper.py"));
|
||||
// Both project-relative: still resolves (legacy behaviour).
|
||||
assert!(reach.reaches("routes.py", "helper.py"));
|
||||
// Path outside the root falls through normalize_namespace
|
||||
// unchanged and does not collide with a project-relative key.
|
||||
assert!(!reach.reaches("/other/root/routes.py", "/other/root/helper.py"));
|
||||
}
|
||||
|
||||
/// `with_scan_root(None)` clears a previously set root and
|
||||
/// restores strict project-relative lookup semantics.
|
||||
#[test]
|
||||
fn file_reach_map_with_scan_root_none_clears_root() {
|
||||
let handle = make_summary("handle", "routes.py", "python", 0, vec!["sink"]);
|
||||
let sink = make_summary("sink", "helper.py", "python", 0, vec![]);
|
||||
let gs = merge_summaries(vec![handle, sink], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let reach: FileReachMap = FileReachMap::build(&cg)
|
||||
.with_scan_root(Some(std::path::Path::new("/abs/scan")))
|
||||
.with_scan_root::<&std::path::Path>(None);
|
||||
|
||||
// Absolute lookup no longer resolves once root is cleared.
|
||||
assert!(!reach.reaches("/abs/scan/routes.py", "/abs/scan/helper.py"));
|
||||
// Project-relative still works.
|
||||
assert!(reach.reaches("routes.py", "helper.py"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -121,9 +121,7 @@ fn extract_case_literal_text<'a>(case: Node<'a>, lang: &str, code: &'a [u8]) ->
|
|||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Exception-source detection for try/catch wiring
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// Returns true if this CFG node can implicitly raise an exception (calls).
|
||||
/// Explicit throws are collected separately via `throw_targets`.
|
||||
|
|
@ -190,9 +188,7 @@ pub(super) fn extract_catch_param_name<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Ruby begin/rescue/ensure handler
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// Builds CFG for Ruby's `begin`/`rescue`/`ensure` blocks (and `body_statement`
|
||||
/// with inline rescue). Ruby's `begin` has no `body` field, the try-body
|
||||
|
|
@ -442,9 +438,7 @@ pub(super) fn build_begin_rescue<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// switch handler, multi-way dispatch with fallthrough
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// True for AST kinds that wrap a single switch case body.
|
||||
pub(super) fn is_switch_case_kind(kind: &str) -> bool {
|
||||
|
|
@ -780,9 +774,7 @@ pub(super) fn build_switch<'a>(
|
|||
exits
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// try/catch/finally handler
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(super) fn build_try<'a>(
|
||||
|
|
|
|||
|
|
@ -388,9 +388,7 @@ fn js_catch_no_param_no_synthetic() {
|
|||
);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Ruby begin/rescue/ensure tests
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn ruby_begin_rescue_has_exception_edges() {
|
||||
|
|
@ -540,9 +538,7 @@ fn ruby_multiple_rescue_clauses() {
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Short-circuit evaluation tests
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Helper: collect all If nodes from the CFG.
|
||||
fn if_nodes(cfg: &Cfg) -> Vec<NodeIndex> {
|
||||
|
|
@ -2008,10 +2004,8 @@ fn local_summary_callees_have_distinct_ordinals() {
|
|||
assert_ne!(ord0, ord1, "ordinals must differ across sites");
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Anonymous function body naming via syntactic context
|
||||
// (derive_anon_fn_name_from_context coverage)
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
fn js_body_names(src: &[u8]) -> Vec<String> {
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
|
|
@ -2531,9 +2525,7 @@ fn pointer_disabled_skips_subscript_synthesis() {
|
|||
});
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Gap-filling: switch / for / do-while / nested loops / re-throw
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
/// JS `switch` should produce one synthetic dispatch `If` node per
|
||||
/// case (default excluded when at the tail), plus True edges into
|
||||
|
|
@ -2908,12 +2900,10 @@ fn js_empty_function_body_well_formed() {
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Loop CFG structure: every loop variant must produce a Loop header
|
||||
// with at least one Back edge that targets that header. Without these
|
||||
// invariants the SSA loop-induction-variable phi placement is wrong
|
||||
// and the abstract-interp widening points are missed.
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
fn loop_headers(cfg: &Cfg) -> Vec<NodeIndex> {
|
||||
cfg.node_indices()
|
||||
|
|
@ -3958,3 +3948,134 @@ fn rhs_array_literal_elements_recognise_per_language_shapes() {
|
|||
// Non-array-shape node returns empty (defensive guard).
|
||||
assert!(run("javascript", b"const x = tainted;\n", &["identifier"]).is_empty());
|
||||
}
|
||||
|
||||
/// `CalleeSite.span` should carry the 1-based (line, col) of each call's
|
||||
/// node span so downstream consumers (surface map, datastore/external
|
||||
/// detectors) can render real coordinates instead of `line: 0`.
|
||||
#[test]
|
||||
fn callee_site_span_carries_line_and_column() {
|
||||
// Three calls on three different lines. The leading newline puts
|
||||
// line 1 at the blank line; `helper(x, y);` is on line 3, etc.
|
||||
let src = b"
|
||||
function outer(obj, x, y) {
|
||||
helper(x, y);
|
||||
obj.method(x);
|
||||
}
|
||||
";
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_to_file_cfg(src, "javascript", ts_lang);
|
||||
let (_key, outer) = file_cfg
|
||||
.summaries
|
||||
.iter()
|
||||
.find(|(k, _)| k.name == "outer")
|
||||
.expect("outer summary should exist");
|
||||
|
||||
let helper_site = outer
|
||||
.callees
|
||||
.iter()
|
||||
.find(|c| c.name == "helper")
|
||||
.expect("helper call should be recorded");
|
||||
let (line, col) = helper_site.span.expect("span populated at CFG-build time");
|
||||
assert_eq!(line, 3, "helper(...) sits on the 3rd source line");
|
||||
assert!(col >= 5, "indented 4 spaces — column is 1-based and > 4");
|
||||
|
||||
let method_site = outer
|
||||
.callees
|
||||
.iter()
|
||||
.find(|c| c.name.ends_with("method"))
|
||||
.expect("method call should be recorded");
|
||||
let (mline, _) = method_site.span.expect("method span populated");
|
||||
assert_eq!(mline, 4, "obj.method(x) on line 4");
|
||||
}
|
||||
|
||||
// Constant-branch fold: CondArith capture + evaluation
|
||||
|
||||
/// `CondArith::eval`/`eval_bool` must fold the two OWASP-Benchmark
|
||||
/// arithmetic guard shapes to a definite boolean, using integer
|
||||
/// (truncating) division, and must return `None` — never a wrong fold —
|
||||
/// for any undefined operation or unresolved variable.
|
||||
#[test]
|
||||
fn cond_arith_eval_is_sound() {
|
||||
use crate::cfg::{BinOp, CondArith, CondVal};
|
||||
let lit = |n| Box::new(CondArith::Lit(n));
|
||||
let var = |s: &str| Box::new(CondArith::Var(s.to_string()));
|
||||
let bin = |op, l, r| Box::new(CondArith::Bin(op, l, r));
|
||||
|
||||
// num = 86 resolver.
|
||||
let r86 = |name: &str| if name == "num" { Some(86) } else { None };
|
||||
// (7*42) - num > 200 → 208 > 200 → true.
|
||||
let shape1 = CondArith::Bin(
|
||||
BinOp::Gt,
|
||||
bin(BinOp::Sub, bin(BinOp::Mul, lit(7), lit(42)), var("num")),
|
||||
lit(200),
|
||||
);
|
||||
assert_eq!(shape1.eval_bool(&r86), Some(true));
|
||||
|
||||
// (500/42) + num > 200 → 11 + 196 = 207 > 200 → true (integer div).
|
||||
let r196 = |name: &str| if name == "num" { Some(196) } else { None };
|
||||
let shape2 = CondArith::Bin(
|
||||
BinOp::Gt,
|
||||
bin(BinOp::Add, bin(BinOp::Div, lit(500), lit(42)), var("num")),
|
||||
lit(200),
|
||||
);
|
||||
assert_eq!(shape2.eval_bool(&r196), Some(true));
|
||||
// Integer division truncates toward zero (500/42 == 11, not ~11.9).
|
||||
assert_eq!(
|
||||
CondArith::Bin(BinOp::Div, lit(500), lit(42)).eval(&r86),
|
||||
Some(CondVal::Int(11))
|
||||
);
|
||||
|
||||
// Unresolved variable → None (no prune).
|
||||
let none = |_: &str| None;
|
||||
assert_eq!(shape1.eval_bool(&none), None);
|
||||
|
||||
// Division / modulo by zero → None (never a wrong fold).
|
||||
assert_eq!(CondArith::Bin(BinOp::Div, lit(1), lit(0)).eval(&r86), None);
|
||||
assert_eq!(CondArith::Bin(BinOp::Mod, lit(1), lit(0)).eval(&r86), None);
|
||||
|
||||
// Arithmetic overflow → None.
|
||||
assert_eq!(
|
||||
CondArith::Bin(BinOp::Mul, lit(i64::MAX), lit(2)).eval(&r86),
|
||||
None
|
||||
);
|
||||
|
||||
// Bare integer at the top level is not a branch condition → eval_bool None.
|
||||
assert_eq!(CondArith::Lit(1).eval_bool(&r86), None);
|
||||
|
||||
// Comparing a boolean sub-result as an integer operand → None.
|
||||
let cmp = bin(BinOp::Gt, lit(2), lit(1)); // yields Bool
|
||||
assert_eq!(CondArith::Bin(BinOp::Add, cmp, lit(1)).eval(&r86), None);
|
||||
}
|
||||
|
||||
/// The CFG builder must capture a pure integer-arithmetic comparison as a
|
||||
/// `CondArith` on the `If` node, and must refuse (None) any condition that
|
||||
/// touches a call / field access / string.
|
||||
#[test]
|
||||
fn build_cond_arith_captures_pure_int_comparison() {
|
||||
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
|
||||
let src = br#"
|
||||
class C {
|
||||
void m(int num, String s) {
|
||||
if ((7 * 42) - num > 200) { foo(); }
|
||||
if (s.length() > 200) { bar(); }
|
||||
}
|
||||
}
|
||||
"#;
|
||||
let (cfg, _entry) = parse_and_build(src, "java", ts_lang);
|
||||
let ifs = if_nodes(&cfg);
|
||||
let arith: Vec<_> = ifs
|
||||
.iter()
|
||||
.filter_map(|&n| cfg[n].cond_arith.clone())
|
||||
.collect();
|
||||
|
||||
// Exactly one If condition is a pure int-arith comparison; the
|
||||
// `s.length() > 200` one must NOT be captured (it contains a call).
|
||||
assert_eq!(
|
||||
arith.len(),
|
||||
1,
|
||||
"only the pure int comparison should yield a CondArith, got {arith:?}"
|
||||
);
|
||||
// It folds to a definite bool once `num` is known constant.
|
||||
let r = |name: &str| if name == "num" { Some(86) } else { None };
|
||||
assert_eq!(arith[0].eval_bool(&r), Some(true));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use super::helpers::first_member_label;
|
||||
use super::{
|
||||
AstMeta, Cfg, EdgeKind, MAX_COND_VARS, MAX_CONDITION_TEXT_LEN, NodeInfo, StmtKind,
|
||||
collect_idents, connect_all, detect_eq_with_const, detect_negation, has_call_descendant,
|
||||
member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
|
||||
build_cond_arith, collect_idents, connect_all, detect_eq_with_const, detect_negation,
|
||||
has_call_descendant, member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
|
||||
};
|
||||
use crate::labels::{DataLabel, LangAnalysisRules, classify};
|
||||
use crate::utils::snippet::truncate_at_char_boundary;
|
||||
|
|
@ -10,9 +10,7 @@ use petgraph::graph::NodeIndex;
|
|||
use smallvec::SmallVec;
|
||||
use tree_sitter::Node;
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Short-circuit boolean operator helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub(super) enum BoolOp {
|
||||
|
|
@ -225,6 +223,13 @@ pub(super) fn build_ternary_diamond<'a>(
|
|||
// taint engine's equality-narrowing fires for `x === 'literal' ? …`.
|
||||
let cond_if = push_condition_node(g, cond_ast, lang, code, enclosing_func);
|
||||
g[cond_if].is_eq_with_const = detect_eq_with_const(cond_ast, lang);
|
||||
// Capture the pure int-arith + comparison tree so `fold_constant_branches`
|
||||
// can prune a dead constant-condition arm of the ternary (e.g. Java
|
||||
// `(7*18)+num > 200 ? "const" : param` with `num` a known int constant),
|
||||
// exactly as it does for the if-form. `build_cond_arith` is conservative
|
||||
// (returns None for any call/field/string/`&&`/`||`/`!` shape) so this is
|
||||
// sound for every language the diamond fires on.
|
||||
g[cond_if].cond_arith = build_cond_arith(cond_ast, lang, code, 0);
|
||||
connect_all(g, preds, cond_if, pred_edge);
|
||||
|
||||
// 2. Branches. Each branch produces its own exit frontier (≥ 1 node) ,
|
||||
|
|
|
|||
|
|
@ -90,9 +90,7 @@ fn collect_ts_type_alias_local_collections(root: Node<'_>, code: &[u8], out: &mu
|
|||
});
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Java
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Walk the AST for `class_declaration` nodes whose body contains
|
||||
/// `field_declaration`s with classifiable types. Only class-level
|
||||
|
|
@ -144,9 +142,7 @@ fn collect_java(root: Node<'_>, code: &[u8], out: &mut HashMap<String, DtoFields
|
|||
});
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// TypeScript / JavaScript
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Walk for `interface_declaration` and `class_declaration` nodes.
|
||||
/// Interfaces with `property_signature` children and classes with
|
||||
|
|
@ -224,9 +220,7 @@ fn extract_ts_property<'a>(node: Node<'a>, code: &'a [u8]) -> Option<(String, Ty
|
|||
Some((field_name, kind))
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Rust
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Walk for `struct_item` nodes whose body lists named fields.
|
||||
fn collect_rust(root: Node<'_>, code: &[u8], out: &mut HashMap<String, DtoFields>) {
|
||||
|
|
@ -276,9 +270,7 @@ fn collect_rust(root: Node<'_>, code: &[u8], out: &mut HashMap<String, DtoFields
|
|||
});
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Python (Pydantic)
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Walk for `class_definition` nodes whose superclass list contains
|
||||
/// `BaseModel` / `pydantic.BaseModel`. Each `expression_statement` in
|
||||
|
|
@ -360,9 +352,7 @@ fn python_inherits_basemodel<'a>(class_node: Node<'a>, code: &'a [u8]) -> bool {
|
|||
false
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Walk helper
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
fn walk<'a, F: FnMut(Node<'a>)>(node: Node<'a>, f: &mut F) {
|
||||
f(node);
|
||||
|
|
|
|||
|
|
@ -4,9 +4,7 @@ use crate::labels::{DataLabel, Kind, classify, lookup};
|
|||
use smallvec::SmallVec;
|
||||
use tree_sitter::Node;
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Utility helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// Return the text of a node.
|
||||
#[inline]
|
||||
|
|
@ -1018,10 +1016,10 @@ pub(crate) fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
|
|||
/// AST kind names for subscript / index expressions
|
||||
/// across the languages whose container-element flow we model.
|
||||
///
|
||||
/// JS/TS use `subscript_expression`; Python uses `subscript`; Go uses
|
||||
/// `index_expression`. Other languages either lower indexing through
|
||||
/// method calls (Rust slice indexing) or are out of scope for the
|
||||
/// initial W5 rollout (Java/Ruby/PHP/C/C++).
|
||||
/// JS/TS and C/C++ use `subscript_expression`; Python uses `subscript`;
|
||||
/// Go uses `index_expression`. Other languages either lower indexing
|
||||
/// through method calls (Rust slice indexing) or are out of scope for
|
||||
/// the initial W5 rollout (Java/Ruby/PHP).
|
||||
#[inline]
|
||||
pub(crate) fn is_subscript_kind(kind: &str) -> bool {
|
||||
matches!(
|
||||
|
|
@ -1086,7 +1084,8 @@ pub(crate) fn subscript_components<'a>(n: Node<'a>, code: &'a [u8]) -> Option<(S
|
|||
return None;
|
||||
}
|
||||
let arr_text = text_of(arr, code)?;
|
||||
// PHP-style `$x` strip not needed here, Go/JS/Python don't use it.
|
||||
// PHP-style `$x` strip not needed here; the supported languages
|
||||
// don't use it for local array identifiers.
|
||||
let idx_text = text_of(idx, code)?;
|
||||
Some((arr_text, idx_text))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,9 +54,7 @@ pub(crate) fn collect_hierarchy_edges(
|
|||
acc
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Java
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
fn collect_java<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
|
||||
walk(root, &mut |node| {
|
||||
|
|
@ -146,9 +144,7 @@ fn type_identifier_text(n: Node<'_>, code: &[u8]) -> Option<String> {
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Rust
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Walk for `impl_item` nodes and emit edges from the concrete type to
|
||||
/// the trait being implemented. Inherent impls (`impl Foo {}`) emit
|
||||
|
|
@ -199,9 +195,7 @@ fn rust_path_leaf(n: Node<'_>, code: &[u8]) -> Option<String> {
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// TypeScript / JavaScript
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
fn collect_ts<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
|
||||
walk(root, &mut |node| {
|
||||
|
|
@ -268,9 +262,7 @@ fn collect_ts_heritage<F: FnMut(String, String)>(
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Python
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
fn collect_python<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
|
||||
walk(root, &mut |node| {
|
||||
|
|
@ -314,9 +306,7 @@ fn python_base_text(n: Node<'_>, code: &[u8]) -> Option<String> {
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Ruby
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
fn collect_ruby<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
|
||||
walk(root, &mut |node| {
|
||||
|
|
@ -345,9 +335,7 @@ fn collect_ruby<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mu
|
|||
});
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// PHP
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
fn collect_php<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
|
||||
walk(root, &mut |node| {
|
||||
|
|
@ -382,9 +370,7 @@ fn collect_php<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut
|
|||
});
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// C++
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
fn collect_cpp<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut F) {
|
||||
walk(root, &mut |node| {
|
||||
|
|
@ -419,9 +405,7 @@ fn collect_cpp<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mut
|
|||
});
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Helpers
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
fn walk<'a, F: FnMut(Node<'a>)>(node: Node<'a>, f: &mut F) {
|
||||
f(node);
|
||||
|
|
|
|||
|
|
@ -135,9 +135,7 @@ fn map_fs_module_to_promises(module: &str) -> Option<String> {
|
|||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Import binding extraction
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// Walk the top-level AST nodes and collect import alias bindings:
|
||||
///
|
||||
|
|
@ -615,6 +613,4 @@ fn scoped_identifier_matches(node: Node, code: &[u8], crate_prefix: &str, leaf:
|
|||
(Some(p), Some(l)) if p == crate_prefix && l == leaf)
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// === PUBLIC ENTRY POINT =================================================
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -1,3 +1,9 @@
|
|||
//! Literal and constant-expression extraction from tree-sitter AST nodes.
|
||||
//!
|
||||
//! Parses integer and string literals, folds constant binary ops, and derives
|
||||
//! template/string prefixes and quote stripping for CFG construction and
|
||||
//! const propagation.
|
||||
|
||||
use super::conditions::unwrap_parens;
|
||||
use super::helpers::{collect_array_pattern_bindings_indexed, collect_rhs_array_literal_elements};
|
||||
use super::{
|
||||
|
|
@ -1198,10 +1204,22 @@ pub(super) fn is_syntactic_literal(node: Node, code: &[u8]) -> bool {
|
|||
| "string_content"
|
||||
| "string_fragment" => !has_string_interpolation(node),
|
||||
|
||||
// Numbers
|
||||
"integer" | "integer_literal" | "int_literal" | "float" | "float_literal" | "number" => {
|
||||
true
|
||||
}
|
||||
// Numbers. Java's grammar uses radix-tagged kinds
|
||||
// (`decimal_integer_literal`, `hex_integer_literal`, …) rather than a
|
||||
// bare `integer`, so `int num = 86;` would otherwise miss this arm and
|
||||
// lower to `Const(None)` (Varying) instead of `Const("86")`.
|
||||
"integer"
|
||||
| "integer_literal"
|
||||
| "int_literal"
|
||||
| "float"
|
||||
| "float_literal"
|
||||
| "number"
|
||||
| "decimal_integer_literal"
|
||||
| "hex_integer_literal"
|
||||
| "octal_integer_literal"
|
||||
| "binary_integer_literal"
|
||||
| "decimal_floating_point_literal"
|
||||
| "hex_floating_point_literal" => true,
|
||||
|
||||
// Booleans / null / nil / none
|
||||
"true" | "false" | "null" | "nil" | "none" | "null_literal" | "boolean"
|
||||
|
|
@ -2544,6 +2562,37 @@ pub(super) fn def_use(
|
|||
}
|
||||
}
|
||||
}
|
||||
// Java `enhanced_for_statement` binds the loop variable on the
|
||||
// `name` field and the iterable on the `value` field; Ruby's
|
||||
// `for x in coll` uses `pattern`/`value`. Neither uses the
|
||||
// JS/Python `left`/`right` convention, so without this mapping
|
||||
// the loop binding was never recorded as a define and taint on
|
||||
// the iterable could not reach the loop variable (OWASP's
|
||||
// dominant `for (Cookie c : req.getCookies())` shape).
|
||||
if left.is_none() && right.is_none() {
|
||||
if let Some(v) = ast.child_by_field_name("value") {
|
||||
left = ast
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| ast.child_by_field_name("pattern"));
|
||||
right = Some(v);
|
||||
}
|
||||
}
|
||||
// PHP `foreach ($coll as $v)` / `foreach ($coll as $k => $v)`:
|
||||
// the iterable and binding are unnamed children separated by the
|
||||
// `as` keyword (only `body` is a named field). Map the binding
|
||||
// onto `left` and the iterable onto `right` so the shared
|
||||
// define/use logic below records the loop variable.
|
||||
if left.is_none() && right.is_none() && ast.kind() == "foreach_statement" {
|
||||
let mut cursor = ast.walk();
|
||||
let kids: Vec<Node> = ast.children(&mut cursor).collect();
|
||||
if let Some(as_pos) = kids.iter().position(|c| c.kind() == "as") {
|
||||
right = kids[..as_pos].iter().rev().find(|c| c.is_named()).copied();
|
||||
left = kids[as_pos + 1..]
|
||||
.iter()
|
||||
.find(|c| c.is_named() && lookup(lang, c.kind()) != Kind::Block)
|
||||
.copied();
|
||||
}
|
||||
}
|
||||
if left.is_none() && right.is_none() {
|
||||
// C-style for, defer to default ident collection.
|
||||
let mut idents = Vec::new();
|
||||
|
|
|
|||
511
src/cfg/mod.rs
511
src/cfg/mod.rs
|
|
@ -12,11 +12,7 @@
|
|||
//! `export_summaries` converts in-graph [`LocalFuncSummary`] values to
|
||||
//! the serializable [`crate::summary::FuncSummary`] form.
|
||||
|
||||
#![allow(
|
||||
clippy::collapsible_if,
|
||||
clippy::let_and_return,
|
||||
clippy::unnecessary_map_or
|
||||
)]
|
||||
#![allow(clippy::let_and_return, clippy::unnecessary_map_or)]
|
||||
|
||||
use petgraph::algo::dominators::{Dominators, simple_fast};
|
||||
use petgraph::prelude::*;
|
||||
|
|
@ -431,6 +427,131 @@ pub enum BinOp {
|
|||
GtEq,
|
||||
}
|
||||
|
||||
impl BinOp {
|
||||
/// True for the six comparison operators (result is a boolean 0/1).
|
||||
pub fn is_comparison(self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// A branch condition captured as a pure integer-arithmetic + comparison
|
||||
/// expression tree at CFG-build time (where the real tree-sitter AST is
|
||||
/// available, so operator precedence and parentheses are correct by
|
||||
/// construction — no text re-parsing downstream).
|
||||
///
|
||||
/// Built only when *every* leaf is an integer literal or a plain identifier
|
||||
/// and *every* interior node is an arithmetic / comparison / bitwise operator,
|
||||
/// a unary `-`, or a parenthesis. Any call, field access, string, container,
|
||||
/// or compound-boolean (`&&` / `||`) subtree makes the builder return `None`
|
||||
/// for the whole condition. Identifiers are stored by name and resolved to
|
||||
/// their constant SSA value at fold time
|
||||
/// ([`crate::ssa::const_prop::fold_constant_branches`]); the actual numeric
|
||||
/// evaluation is shared in [`CondArith::eval`].
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub enum CondArith {
|
||||
/// Integer literal.
|
||||
Lit(i64),
|
||||
/// Identifier — resolved to a constant integer at fold time, else unknown.
|
||||
Var(String),
|
||||
/// Unary integer negation: `-x`.
|
||||
Neg(Box<CondArith>),
|
||||
/// Binary arithmetic / bitwise / comparison.
|
||||
Bin(BinOp, Box<CondArith>, Box<CondArith>),
|
||||
}
|
||||
|
||||
/// Result of folding a [`CondArith`] against a constant environment.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum CondVal {
|
||||
Int(i64),
|
||||
Bool(bool),
|
||||
}
|
||||
|
||||
impl CondArith {
|
||||
/// Evaluate against a variable→constant-integer resolver. Returns `None`
|
||||
/// the moment anything is non-constant or an operation is undefined
|
||||
/// (division/modulo by zero, arithmetic overflow, type mismatch), so a
|
||||
/// caller can only ever prune on a *definite* result. All integer
|
||||
/// arithmetic is checked; overflow yields `None` rather than a wrapped
|
||||
/// value, which keeps the fold sound across the i32/i64 gap.
|
||||
pub fn eval(&self, resolve: &impl Fn(&str) -> Option<i64>) -> Option<CondVal> {
|
||||
match self {
|
||||
CondArith::Lit(n) => Some(CondVal::Int(*n)),
|
||||
CondArith::Var(name) => resolve(name).map(CondVal::Int),
|
||||
CondArith::Neg(inner) => match inner.eval(resolve)? {
|
||||
CondVal::Int(n) => n.checked_neg().map(CondVal::Int),
|
||||
CondVal::Bool(_) => None,
|
||||
},
|
||||
CondArith::Bin(op, l, r) => {
|
||||
let lhs = match l.eval(resolve)? {
|
||||
CondVal::Int(n) => n,
|
||||
CondVal::Bool(_) => return None,
|
||||
};
|
||||
let rhs = match r.eval(resolve)? {
|
||||
CondVal::Int(n) => n,
|
||||
CondVal::Bool(_) => return None,
|
||||
};
|
||||
let arith = |v: Option<i64>| v.map(CondVal::Int);
|
||||
match op {
|
||||
BinOp::Add => arith(lhs.checked_add(rhs)),
|
||||
BinOp::Sub => arith(lhs.checked_sub(rhs)),
|
||||
BinOp::Mul => arith(lhs.checked_mul(rhs)),
|
||||
// Java/Rust integer division and modulo both truncate
|
||||
// toward zero; `checked_*` rejects div-by-zero and
|
||||
// i64::MIN / -1 overflow.
|
||||
BinOp::Div => arith(lhs.checked_div(rhs)),
|
||||
BinOp::Mod => arith(lhs.checked_rem(rhs)),
|
||||
BinOp::BitAnd => arith(Some(lhs & rhs)),
|
||||
BinOp::BitOr => arith(Some(lhs | rhs)),
|
||||
BinOp::BitXor => arith(Some(lhs ^ rhs)),
|
||||
BinOp::LeftShift => u32::try_from(rhs)
|
||||
.ok()
|
||||
.and_then(|s| lhs.checked_shl(s))
|
||||
.map(CondVal::Int),
|
||||
BinOp::RightShift => u32::try_from(rhs)
|
||||
.ok()
|
||||
.and_then(|s| lhs.checked_shr(s))
|
||||
.map(CondVal::Int),
|
||||
BinOp::Eq => Some(CondVal::Bool(lhs == rhs)),
|
||||
BinOp::NotEq => Some(CondVal::Bool(lhs != rhs)),
|
||||
BinOp::Lt => Some(CondVal::Bool(lhs < rhs)),
|
||||
BinOp::LtEq => Some(CondVal::Bool(lhs <= rhs)),
|
||||
BinOp::Gt => Some(CondVal::Bool(lhs > rhs)),
|
||||
BinOp::GtEq => Some(CondVal::Bool(lhs >= rhs)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate to a definite boolean, or `None`. The top-level node must be a
|
||||
/// comparison (a bare integer is not a branch condition we fold).
|
||||
pub fn eval_bool(&self, resolve: &impl Fn(&str) -> Option<i64>) -> Option<bool> {
|
||||
match self.eval(resolve)? {
|
||||
CondVal::Bool(b) => Some(b),
|
||||
CondVal::Int(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect every identifier name referenced by the tree.
|
||||
pub fn collect_vars(&self, out: &mut Vec<String>) {
|
||||
match self {
|
||||
CondArith::Lit(_) => {}
|
||||
CondArith::Var(name) => {
|
||||
if !out.iter().any(|v| v == name) {
|
||||
out.push(name.clone());
|
||||
}
|
||||
}
|
||||
CondArith::Neg(inner) => inner.collect_vars(out),
|
||||
CondArith::Bin(_, l, r) => {
|
||||
l.collect_vars(out);
|
||||
r.collect_vars(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Call-related metadata for CFG nodes.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct CallMeta {
|
||||
|
|
@ -662,6 +783,17 @@ pub struct NodeInfo {
|
|||
pub condition_vars: Vec<String>,
|
||||
/// For If nodes: whether the condition has a leading negation (`!` / `not`).
|
||||
pub condition_negated: bool,
|
||||
/// For If / conditional (ternary) nodes: the condition as a pure
|
||||
/// integer-arithmetic + comparison expression tree, when the whole
|
||||
/// condition is built only from integer literals, identifiers, arithmetic
|
||||
/// / comparison operators, and parentheses. `None` for any condition that
|
||||
/// touches a call, field access, string, compound boolean (`&&`/`||`), or
|
||||
/// any shape this evaluator cannot prove constant. Consumed by
|
||||
/// [`crate::ssa::const_prop::fold_constant_branches`] to prune branches
|
||||
/// whose condition folds to a definite boolean once its variables are
|
||||
/// resolved to constants — closing the synthetic "dead branch keeps the
|
||||
/// tainted phi operand alive" false positive without any text re-parsing.
|
||||
pub cond_arith: Option<CondArith>,
|
||||
/// True when this is a Call node whose argument list contains only
|
||||
/// syntactic literal values (strings, numbers, booleans, null/nil,
|
||||
/// arrays/lists/tuples of literals). Also true for zero-argument calls
|
||||
|
|
@ -791,10 +923,7 @@ impl NodeInfo {
|
|||
/// lose information.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LocalFuncSummary {
|
||||
#[allow(dead_code)] // used for future intra-file graph traversal
|
||||
pub entry: NodeIndex,
|
||||
#[allow(dead_code)] // used for future intra-file graph traversal
|
||||
pub exit: NodeIndex,
|
||||
pub source_caps: Cap,
|
||||
pub sanitizer_caps: Cap,
|
||||
pub sink_caps: Cap,
|
||||
|
|
@ -822,9 +951,7 @@ pub struct LocalFuncSummary {
|
|||
pub type Cfg = Graph<NodeInfo, EdgeKind>;
|
||||
pub type FuncSummaries = HashMap<FuncKey, LocalFuncSummary>;
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Per-body CFG types
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// Opaque identifier for an executable body within a file.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
|
|
@ -901,7 +1028,6 @@ pub struct BodyCfg {
|
|||
pub meta: BodyMeta,
|
||||
pub graph: Cfg,
|
||||
pub entry: NodeIndex,
|
||||
pub exit: NodeIndex,
|
||||
}
|
||||
|
||||
/// A single import alias binding: local alias → original exported name + module.
|
||||
|
|
@ -1069,7 +1195,7 @@ fn extract_condition_raw<'a>(
|
|||
ast: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> (Option<String>, Vec<String>, bool) {
|
||||
) -> (Option<String>, Vec<String>, bool, Option<CondArith>) {
|
||||
// 1. Find the condition subtree.
|
||||
let cond_node = ast.child_by_field_name("condition").or_else(|| {
|
||||
// Rust `if_expression` uses positional children: the condition is
|
||||
|
|
@ -1089,7 +1215,7 @@ fn extract_condition_raw<'a>(
|
|||
});
|
||||
|
||||
let Some(cond) = cond_node else {
|
||||
return (None, Vec::new(), false);
|
||||
return (None, Vec::new(), false, None);
|
||||
};
|
||||
|
||||
// 2. Detect leading negation (`!expr`, `not expr`, Ruby `unless`).
|
||||
|
|
@ -1107,7 +1233,20 @@ fn extract_condition_raw<'a>(
|
|||
let text = text_of(cond, code)
|
||||
.map(|t| truncate_at_char_boundary(&t, MAX_CONDITION_TEXT_LEN).to_string());
|
||||
|
||||
(text, vars, negated)
|
||||
// 5. Capture the pure integer-arithmetic + comparison tree (for constant
|
||||
// branch folding). Built from the FULL condition node `cond` (not the
|
||||
// negation-stripped `inner`) so the folded boolean matches the
|
||||
// Branch terminator's `true_blk = cond-true` semantics directly. Ruby
|
||||
// `unless` swaps the True/False edges in the CFG builder (lines
|
||||
// ~5029), so the branch polarity would be inverted — skip it to stay
|
||||
// sound (`unless` with a constant arithmetic guard is negligible).
|
||||
let cond_arith = if ast.kind() == "unless" {
|
||||
None
|
||||
} else {
|
||||
build_cond_arith(cond, lang, code, 0)
|
||||
};
|
||||
|
||||
(text, vars, negated, cond_arith)
|
||||
}
|
||||
|
||||
/// Detect leading negation and return the inner expression.
|
||||
|
|
@ -1245,6 +1384,174 @@ fn extract_bin_op(ast: Node, lang: &str) -> Option<BinOp> {
|
|||
None
|
||||
}
|
||||
|
||||
/// Parse an integer literal node to its `i64` value, honouring hex / octal /
|
||||
/// binary radix prefixes and Java/Rust digit separators (`1_000`). Returns
|
||||
/// `None` for floats, non-literals, or values that overflow `i64`.
|
||||
fn parse_int_literal(node: Node, code: &[u8]) -> Option<i64> {
|
||||
let kind = node.kind();
|
||||
let is_int = matches!(
|
||||
kind,
|
||||
"integer"
|
||||
| "integer_literal"
|
||||
| "int_literal"
|
||||
| "number"
|
||||
| "number_literal"
|
||||
| "decimal_integer_literal"
|
||||
| "hex_integer_literal"
|
||||
| "octal_integer_literal"
|
||||
| "binary_integer_literal"
|
||||
);
|
||||
if !is_int {
|
||||
return None;
|
||||
}
|
||||
let raw = std::str::from_utf8(&code[node.byte_range()]).ok()?.trim();
|
||||
// Strip Java long suffix and digit separators.
|
||||
let cleaned: String = raw
|
||||
.trim_end_matches(['l', 'L'])
|
||||
.chars()
|
||||
.filter(|c| *c != '_')
|
||||
.collect();
|
||||
if let Ok(v) = cleaned.parse::<i64>() {
|
||||
return Some(v);
|
||||
}
|
||||
if let Some(h) = cleaned
|
||||
.strip_prefix("0x")
|
||||
.or_else(|| cleaned.strip_prefix("0X"))
|
||||
{
|
||||
return i64::from_str_radix(h, 16).ok();
|
||||
}
|
||||
if let Some(o) = cleaned
|
||||
.strip_prefix("0o")
|
||||
.or_else(|| cleaned.strip_prefix("0O"))
|
||||
{
|
||||
return i64::from_str_radix(o, 8).ok();
|
||||
}
|
||||
if let Some(b) = cleaned
|
||||
.strip_prefix("0b")
|
||||
.or_else(|| cleaned.strip_prefix("0B"))
|
||||
{
|
||||
return i64::from_str_radix(b, 2).ok();
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Map the operator token of a binary expression node to a [`BinOp`].
|
||||
/// Scans for the single anonymous operator child (operands are named).
|
||||
/// Returns `None` for boolean operators (`&&` / `||`), assignment, or any
|
||||
/// token not in the arithmetic / bitwise / comparison set — those make the
|
||||
/// enclosing [`CondArith`] build bail.
|
||||
fn binary_op_token(node: Node) -> Option<BinOp> {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if child.is_named() {
|
||||
continue;
|
||||
}
|
||||
return match child.kind() {
|
||||
"+" => Some(BinOp::Add),
|
||||
"-" => Some(BinOp::Sub),
|
||||
"*" => Some(BinOp::Mul),
|
||||
"/" => Some(BinOp::Div),
|
||||
"%" => Some(BinOp::Mod),
|
||||
"&" => Some(BinOp::BitAnd),
|
||||
"|" => Some(BinOp::BitOr),
|
||||
"^" => Some(BinOp::BitXor),
|
||||
"<<" => Some(BinOp::LeftShift),
|
||||
">>" => Some(BinOp::RightShift),
|
||||
"==" | "===" => Some(BinOp::Eq),
|
||||
"!=" | "!==" => Some(BinOp::NotEq),
|
||||
"<" => Some(BinOp::Lt),
|
||||
"<=" => Some(BinOp::LtEq),
|
||||
">" => Some(BinOp::Gt),
|
||||
">=" => Some(BinOp::GtEq),
|
||||
_ => None,
|
||||
};
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Build a [`CondArith`] tree from a condition AST subtree, or `None` if the
|
||||
/// condition is not a pure integer-arithmetic + comparison expression. Uses
|
||||
/// the real tree-sitter node so operator precedence and parentheses are
|
||||
/// already encoded in the tree shape — no text parsing. Conservative by
|
||||
/// construction: any unrecognised node kind (call, field access, string,
|
||||
/// boolean `&&`/`||`, unary `!`) returns `None`, which disables folding for
|
||||
/// that branch (never a wrong fold). Depth-bounded to guard against
|
||||
/// pathological nesting.
|
||||
pub(super) fn build_cond_arith(
|
||||
node: Node,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
depth: u32,
|
||||
) -> Option<CondArith> {
|
||||
if depth > 64 {
|
||||
return None;
|
||||
}
|
||||
let kind = node.kind();
|
||||
|
||||
// Unwrap parentheses (transparent to value).
|
||||
if matches!(
|
||||
kind,
|
||||
"parenthesized_expression" | "parenthesized" | "parenthesized_statement"
|
||||
) {
|
||||
let inner = node.named_child(0)?;
|
||||
return build_cond_arith(inner, lang, code, depth + 1);
|
||||
}
|
||||
|
||||
if let Some(n) = parse_int_literal(node, code) {
|
||||
return Some(CondArith::Lit(n));
|
||||
}
|
||||
|
||||
// Bare identifier (reject dotted paths / field access — those are not
|
||||
// captured here; only a plain local whose const value we can resolve).
|
||||
if matches!(kind, "identifier" | "simple_identifier") {
|
||||
let name = text_of(node, code)?;
|
||||
if !name.is_empty()
|
||||
&& name
|
||||
.chars()
|
||||
.all(|c| c.is_alphanumeric() || c == '_' || c == '$')
|
||||
{
|
||||
return Some(CondArith::Var(name));
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
||||
// Unary `-` only (boolean `!` / `not` is intentionally unsupported: its
|
||||
// operand would be a boolean, which `CondArith::eval` rejects, so folding
|
||||
// a negated condition is left to the conservative `None` path).
|
||||
if matches!(
|
||||
kind,
|
||||
"unary_expression" | "unary_operator" | "prefix_unary_expression" | "unary"
|
||||
) {
|
||||
let operand = node.named_child(0)?;
|
||||
let mut cursor = node.walk();
|
||||
let is_neg = node
|
||||
.children(&mut cursor)
|
||||
.any(|c| !c.is_named() && c.kind() == "-");
|
||||
if is_neg {
|
||||
return Some(CondArith::Neg(Box::new(build_cond_arith(
|
||||
operand,
|
||||
lang,
|
||||
code,
|
||||
depth + 1,
|
||||
)?)));
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
||||
// Binary arithmetic / comparison: exactly two operands + one operator.
|
||||
if is_binary_expr_kind(kind, lang) {
|
||||
if node.named_child_count() != 2 {
|
||||
return None; // chained comparison (Python `a < b < c`) etc.
|
||||
}
|
||||
let op = binary_op_token(node)?;
|
||||
let lhs = build_cond_arith(node.named_child(0)?, lang, code, depth + 1)?;
|
||||
let rhs = build_cond_arith(node.named_child(1)?, lang, code, depth + 1)?;
|
||||
return Some(CondArith::Bin(op, Box::new(lhs), Box::new(rhs)));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Find the RHS value node of an assignment-like AST node (variable declarator,
|
||||
/// lexical declaration, assignment expression). Used by helpers that need to
|
||||
/// inspect what an identifier is being initialized to.
|
||||
|
|
@ -2071,6 +2378,32 @@ fn is_binary_expr_kind(kind: &str, lang: &str) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
/// Classification text for a for-each loop's iterable expression.
|
||||
///
|
||||
/// Subscript / index iterables (`$_GET['x']`, `params[:list]`, `arr[i]`)
|
||||
/// classify on their **base object**: taint sources are keyed on the base
|
||||
/// name (`$_GET`, `params`), and the trailing index would otherwise break
|
||||
/// the word-boundary suffix match in `classify`. Non-subscript iterables
|
||||
/// (method calls, member chains, bare identifiers) use their full text.
|
||||
fn iterable_label_text(iter: Node, code: &[u8]) -> Option<String> {
|
||||
if matches!(
|
||||
iter.kind(),
|
||||
"subscript_expression" | "subscript" | "index_expression" | "element_reference"
|
||||
) {
|
||||
let base = iter
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| iter.child_by_field_name("operand"))
|
||||
.or_else(|| iter.child_by_field_name("value"))
|
||||
.or_else(|| iter.child(0));
|
||||
if let Some(b) = base
|
||||
&& let Some(t) = text_of(b, code)
|
||||
{
|
||||
return Some(t);
|
||||
}
|
||||
}
|
||||
text_of(iter, code)
|
||||
}
|
||||
|
||||
/// Create a node in one short borrow and optionally attach a taint label.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(super) fn push_node<'a>(
|
||||
|
|
@ -2212,6 +2545,51 @@ pub(super) fn push_node<'a>(
|
|||
text = iter_text;
|
||||
}
|
||||
|
||||
// Java `for (T x : iter)`: tree-sitter-java emits `enhanced_for_statement`
|
||||
// with the iterable on the `value` field. Classify against the iterable
|
||||
// text so a source-returning call (`req.getCookies()`,
|
||||
// `req.getParameterValues(..)`) lights up a Source on the loop node and
|
||||
// the loop binding inherits its taint — the same loop-binding-inherits-
|
||||
// iterator-taint contract the JS/Python rewrites above provide. The
|
||||
// loop variable itself is recorded as a define by `def_use`'s Kind::For
|
||||
// arm (via the `name`/`value` mapping), so the Source-labeled loop node
|
||||
// taints the binding directly.
|
||||
if lang == "java"
|
||||
&& ast.kind() == "enhanced_for_statement"
|
||||
&& let Some(value) = ast.child_by_field_name("value")
|
||||
&& let Some(iter_text) = iterable_label_text(value, code)
|
||||
{
|
||||
text = iter_text;
|
||||
}
|
||||
|
||||
// PHP `foreach ($iter as $v)` / `foreach ($iter as $k => $v)`: the
|
||||
// iterable is the named child immediately preceding the `as` keyword
|
||||
// (only `body` is a named field). Classify against the iterable text so
|
||||
// a superglobal/source iterable (`$_GET[..]`, `$_POST[..]`) taints the
|
||||
// loop binding, matching the JS/Python/Java rewrites.
|
||||
if lang == "php" && ast.kind() == "foreach_statement" {
|
||||
let mut cursor = ast.walk();
|
||||
let kids: Vec<Node> = ast.children(&mut cursor).collect();
|
||||
if let Some(as_pos) = kids.iter().position(|c| c.kind() == "as")
|
||||
&& let Some(iter_node) = kids[..as_pos].iter().rev().find(|c| c.is_named()).copied()
|
||||
&& let Some(iter_text) = iterable_label_text(iter_node, code)
|
||||
{
|
||||
text = iter_text;
|
||||
}
|
||||
}
|
||||
|
||||
// Ruby `for x in coll`: tree-sitter-ruby's `for` node carries the
|
||||
// iterable on the `value` field. (The idiomatic `coll.each { |x| }`
|
||||
// form is a method call with a block and is handled by the call/block
|
||||
// machinery, not here.)
|
||||
if lang == "ruby"
|
||||
&& ast.kind() == "for"
|
||||
&& let Some(value) = ast.child_by_field_name("value")
|
||||
&& let Some(iter_text) = iterable_label_text(value, code)
|
||||
{
|
||||
text = iter_text;
|
||||
}
|
||||
|
||||
// If this is a declaration/expression wrapper or an assignment that
|
||||
// *contains* a call, prefer the first inner call identifier instead of
|
||||
// the whole line. Track the inner call's byte span so we can populate
|
||||
|
|
@ -2511,6 +2889,23 @@ pub(super) fn push_node<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
// Conditions can contain source/sink calls whose argument side effects are
|
||||
// load-bearing for taint, e.g. C `if (!fgets(buf, n, stdin)) return;`.
|
||||
// Classify the condition call so output-parameter sources still lower as
|
||||
// SSA calls while the CFG node keeps its branch shape.
|
||||
if labels.is_empty()
|
||||
&& matches!(lookup(lang, ast.kind()), Kind::If | Kind::While)
|
||||
&& let Some(cond) = ast.child_by_field_name("condition")
|
||||
&& let Some((ident, ident_span)) = first_call_ident_with_span(cond, lang, code)
|
||||
&& let Some(l) = classify(lang, &ident, extra)
|
||||
{
|
||||
labels.push(l);
|
||||
text = ident;
|
||||
if inner_text_span.is_none() {
|
||||
inner_text_span = Some(ident_span);
|
||||
}
|
||||
}
|
||||
|
||||
// For `if let` / `while let` patterns: try to classify the value expression
|
||||
// in the let-condition as a source/sink. E.g. `if let Ok(cmd) = env::var("CMD")`
|
||||
// should recognise `env::var` as a taint source and label this node accordingly.
|
||||
|
|
@ -3147,11 +3542,12 @@ pub(super) fn push_node<'a>(
|
|||
};
|
||||
|
||||
// Extract condition metadata for If nodes.
|
||||
let (condition_text, condition_vars, condition_negated) = if kind == StmtKind::If {
|
||||
extract_condition_raw(ast, lang, code)
|
||||
} else {
|
||||
(None, Vec::new(), false)
|
||||
};
|
||||
let (condition_text, condition_vars, condition_negated, cond_arith) =
|
||||
if matches!(lookup(lang, ast.kind()), Kind::If) {
|
||||
extract_condition_raw(ast, lang, code)
|
||||
} else {
|
||||
(None, Vec::new(), false, None)
|
||||
};
|
||||
|
||||
// Extract per-argument identifiers for Call nodes.
|
||||
// Also extract for gated-sink nodes so payload-arg filtering works.
|
||||
|
|
@ -3427,6 +3823,7 @@ pub(super) fn push_node<'a>(
|
|||
condition_text,
|
||||
condition_vars,
|
||||
condition_negated,
|
||||
cond_arith,
|
||||
all_args_literal,
|
||||
catch_param: false,
|
||||
arg_callees,
|
||||
|
|
@ -4677,10 +5074,8 @@ fn apply_arg_source_bindings(
|
|||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// The recursive *work‑horse* that converts an AST node into a CFG slice.
|
||||
// Returns the set of *exit* nodes that need to be wired further.
|
||||
// -------------------------------------------------------------------------
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(super) fn build_sub<'a>(
|
||||
ast: Node<'a>,
|
||||
|
|
@ -4701,9 +5096,7 @@ pub(super) fn build_sub<'a>(
|
|||
current_body_id: BodyId,
|
||||
) -> Vec<NodeIndex> {
|
||||
match lookup(lang, ast.kind()) {
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// IF‑/ELSE: two branches that re‑merge afterwards
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
Kind::If => {
|
||||
// Some grammars (Go `if init; cond {}`, sibling C-style forms)
|
||||
// attach an init / "initializer" subtree that runs before the
|
||||
|
|
@ -4985,9 +5378,7 @@ pub(super) fn build_sub<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// WHILE / FOR: classic loop with a back edge.
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
Kind::While | Kind::For => {
|
||||
let header = push_node(
|
||||
g,
|
||||
|
|
@ -5129,9 +5520,7 @@ pub(super) fn build_sub<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Control-flow sinks (return / break / continue).
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
Kind::Return => {
|
||||
if has_call_descendant(ast, lang) {
|
||||
// Return-call bug fix: emit a Call node BEFORE the Return so
|
||||
|
|
@ -5427,9 +5816,7 @@ pub(super) fn build_sub<'a>(
|
|||
current_body_id,
|
||||
),
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// BLOCK: statements execute sequentially
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
Kind::SourceFile | Kind::Block => {
|
||||
// Ruby body_statement with rescue/ensure = implicit begin/rescue
|
||||
if lang == "ruby" && ast.kind() == "body_statement" {
|
||||
|
|
@ -5664,7 +6051,7 @@ pub(super) fn build_sub<'a>(
|
|||
for idx in fn_graph.node_indices() {
|
||||
let info = &fn_graph[idx];
|
||||
if let Some(callee) = &info.call.callee {
|
||||
let site = build_callee_site(callee, info, lang);
|
||||
let site = build_callee_site(callee, info, lang, code);
|
||||
// Dedup by (name, arity, receiver, qualifier, ordinal). A
|
||||
// single function may legitimately contain multiple distinct
|
||||
// calls to the same callee (e.g. different ordinals or
|
||||
|
|
@ -5789,7 +6176,6 @@ pub(super) fn build_sub<'a>(
|
|||
key,
|
||||
LocalFuncSummary {
|
||||
entry: fn_entry,
|
||||
exit: fn_exit,
|
||||
source_caps: fn_src_bits,
|
||||
sanitizer_caps: fn_sani_bits,
|
||||
sink_caps: fn_sink_bits,
|
||||
|
|
@ -5839,7 +6225,6 @@ pub(super) fn build_sub<'a>(
|
|||
},
|
||||
graph: fn_graph,
|
||||
entry: fn_entry,
|
||||
exit: fn_exit,
|
||||
});
|
||||
|
||||
// ── 7) Insert placeholder in parent graph ─────────────────────────
|
||||
|
|
@ -5899,10 +6284,14 @@ pub(super) fn build_sub<'a>(
|
|||
);
|
||||
}
|
||||
|
||||
// JS/TS ternary-RHS split: `var x = c ? a : b;` and
|
||||
// JS/TS/Java ternary-RHS split: `var x = c ? a : b;` and
|
||||
// `obj.prop = c ? a : b;` lower to a real diamond CFG so the
|
||||
// condition is control-flow (not a data-flow `uses` entry).
|
||||
if matches!(lang, "javascript" | "typescript" | "tsx")
|
||||
// Java uses the same `ternary_expression` AST kind; routing it
|
||||
// through the diamond lets `fold_constant_branches` prune dead
|
||||
// constant-condition arms (`cond ? "const" : param`) the same way
|
||||
// it does for the if-form.
|
||||
if matches!(lang, "javascript" | "typescript" | "tsx" | "java")
|
||||
&& let Some((lhs_ast, ternary_ast)) = find_ternary_rhs_wrapper(ast)
|
||||
{
|
||||
let (lhs_text, lhs_labels) =
|
||||
|
|
@ -6157,8 +6546,8 @@ pub(super) fn build_sub<'a>(
|
|||
|
||||
// Assignment that may contain a call (Python `x = os.getenv(...)`, Ruby `x = gets()`)
|
||||
Kind::Assignment => {
|
||||
// JS/TS ternary-RHS split, same rationale as the CallWrapper branch.
|
||||
if matches!(lang, "javascript" | "typescript" | "tsx")
|
||||
// JS/TS/Java ternary-RHS split, same rationale as the CallWrapper branch.
|
||||
if matches!(lang, "javascript" | "typescript" | "tsx" | "java")
|
||||
&& let (Some(left), Some(right)) = (
|
||||
ast.child_by_field_name("left"),
|
||||
ast.child_by_field_name("right"),
|
||||
|
|
@ -6259,9 +6648,7 @@ pub(super) fn build_sub<'a>(
|
|||
analysis_rules,
|
||||
),
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Every other node = simple sequential statement
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
_ => {
|
||||
// React JSX `dangerouslySetInnerHTML={{__html: x}}` synthesis
|
||||
// (Phase 06): handles arrow-bodied components like
|
||||
|
|
@ -6428,7 +6815,6 @@ pub(crate) fn build_cfg<'a>(
|
|||
},
|
||||
graph: g,
|
||||
entry,
|
||||
exit,
|
||||
};
|
||||
bodies.insert(0, toplevel);
|
||||
// Sort by BodyId so that bodies[i].meta.id == BodyId(i).
|
||||
|
|
@ -6632,7 +7018,12 @@ fn apply_gated_label_rules(
|
|||
/// remains the single segment immediately before the leaf (back-compat
|
||||
/// with the legacy heuristic). For method calls the qualifier is
|
||||
/// redundant with `receiver` and is left `None`.
|
||||
fn build_callee_site(callee: &str, info: &NodeInfo, lang: &str) -> crate::summary::CalleeSite {
|
||||
fn build_callee_site(
|
||||
callee: &str,
|
||||
info: &NodeInfo,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
) -> crate::summary::CalleeSite {
|
||||
use crate::summary::CalleeSite;
|
||||
|
||||
let receiver = info.call.receiver.clone();
|
||||
|
|
@ -6661,15 +7052,39 @@ fn build_callee_site(callee: &str, info: &NodeInfo, lang: &str) -> crate::summar
|
|||
None
|
||||
};
|
||||
|
||||
let span = callee_span_line_col(code, info.ast.span.0);
|
||||
|
||||
CalleeSite {
|
||||
name: callee.to_string(),
|
||||
arity,
|
||||
receiver,
|
||||
qualifier,
|
||||
ordinal: info.call.call_ordinal,
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a byte offset into a 1-based `(line, col)` pair against `code`.
|
||||
///
|
||||
/// Returns `None` only when `code` is empty (no source to resolve against);
|
||||
/// out-of-range offsets are clamped to `code.len()` so a synthetic node
|
||||
/// whose span overshoots the file still produces the last-line coordinate
|
||||
/// rather than `None`.
|
||||
fn callee_span_line_col(code: &[u8], offset: usize) -> Option<(u32, u32)> {
|
||||
if code.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let clamped = offset.min(code.len());
|
||||
let prefix = &code[..clamped];
|
||||
let line = prefix.iter().filter(|&&b| b == b'\n').count() as u32 + 1;
|
||||
let col_bytes = match prefix.iter().rposition(|&b| b == b'\n') {
|
||||
Some(idx) => clamped - idx - 1,
|
||||
None => clamped,
|
||||
} as u32
|
||||
+ 1;
|
||||
Some((line, col_bytes))
|
||||
}
|
||||
|
||||
/// Convert the graph‑local `FuncSummaries` into serialisable [`FuncSummary`]
|
||||
/// values suitable for cross‑file persistence.
|
||||
pub(crate) fn export_summaries(
|
||||
|
|
@ -6721,21 +7136,5 @@ pub(crate) fn export_summaries(
|
|||
.collect()
|
||||
}
|
||||
|
||||
// pub(crate) fn dump_cfg(g: &Cfg) {
|
||||
// debug!(target: "taint", "CFG DUMP: nodes = {}, edges = {}", g.node_count(), g.edge_count());
|
||||
// for idx in g.node_indices() {
|
||||
// debug!(target: "taint", " node {:>3}: {:?}", idx.index(), g[idx]);
|
||||
// }
|
||||
// for e in g.edge_references() {
|
||||
// debug!(
|
||||
// target: "taint",
|
||||
// " edge {:>3} → {:<3} ({:?})",
|
||||
// e.source().index(),
|
||||
// e.target().index(),
|
||||
// e.weight()
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
|
||||
#[cfg(test)]
|
||||
mod cfg_tests;
|
||||
|
|
|
|||
|
|
@ -157,10 +157,6 @@ fn find_auth_nodes(ctx: &AnalysisContext) -> Vec<NodeIndex> {
|
|||
}
|
||||
|
||||
impl CfgAnalysis for AuthGap {
|
||||
fn name(&self) -> &'static str {
|
||||
"auth-gap"
|
||||
}
|
||||
|
||||
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
||||
// Decorator/annotation/attribute auth on the body declaration
|
||||
// already gates every sink in the body, skip the
|
||||
|
|
@ -218,7 +214,6 @@ impl CfgAnalysis for AuthGap {
|
|||
|
||||
findings.push(CfgFinding {
|
||||
rule_id: "cfg-auth-gap".to_string(),
|
||||
title: "Missing auth check".to_string(),
|
||||
severity: Severity::High,
|
||||
confidence: Confidence::Medium,
|
||||
span: info.ast.span,
|
||||
|
|
|
|||
|
|
@ -100,38 +100,6 @@ fn build_reversed_graph(cfg: &Cfg) -> Graph<NodeInfo, EdgeKind> {
|
|||
rev
|
||||
}
|
||||
|
||||
/// Find all nodes matching a specific callee name pattern.
|
||||
#[allow(dead_code)]
|
||||
pub fn find_call_nodes_matching(cfg: &Cfg, matchers: &[&str]) -> Vec<NodeIndex> {
|
||||
cfg.node_indices()
|
||||
.filter(|&idx| {
|
||||
if cfg[idx].kind != StmtKind::Call {
|
||||
return false;
|
||||
}
|
||||
if let Some(callee) = &cfg[idx].call.callee {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
matchers.iter().any(|m| {
|
||||
let ml = m.to_ascii_lowercase();
|
||||
if ml.ends_with('_') {
|
||||
callee_lower.starts_with(&ml)
|
||||
} else {
|
||||
callee_lower.ends_with(&ml)
|
||||
}
|
||||
})
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Check if there exists any path from `from` to `to` in the CFG.
|
||||
#[allow(dead_code)]
|
||||
pub fn has_path(cfg: &Cfg, from: NodeIndex, to: NodeIndex) -> bool {
|
||||
let reachable = reachable_set(cfg, from);
|
||||
reachable.contains(&to)
|
||||
}
|
||||
|
||||
/// Compute shortest distance (in hops) from `from` to `to`.
|
||||
pub fn shortest_distance(cfg: &Cfg, from: NodeIndex, to: NodeIndex) -> Option<usize> {
|
||||
use std::collections::VecDeque;
|
||||
|
|
|
|||
|
|
@ -306,10 +306,6 @@ fn find_post_if_sinks(cfg: &crate::cfg::Cfg, if_node: NodeIndex) -> Vec<NodeInde
|
|||
}
|
||||
|
||||
impl CfgAnalysis for IncompleteErrorHandling {
|
||||
fn name(&self) -> &'static str {
|
||||
"incomplete-error-handling"
|
||||
}
|
||||
|
||||
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
|
|
@ -369,7 +365,6 @@ impl CfgAnalysis for IncompleteErrorHandling {
|
|||
if has_dangerous_successor {
|
||||
findings.push(CfgFinding {
|
||||
rule_id: "cfg-error-fallthrough".to_string(),
|
||||
title: "Error check without return".to_string(),
|
||||
severity: Severity::Medium,
|
||||
confidence: Confidence::Medium,
|
||||
span: info.ast.span,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
#![allow(clippy::collapsible_if)]
|
||||
//! Unguarded-sink detection via CFG dominator analysis.
|
||||
//!
|
||||
//! Flags dangerous sinks that are not dominated by an appropriate guard
|
||||
//! (validation or auth check) on every path from an entry point.
|
||||
|
||||
use super::dominators::{self, dominates};
|
||||
use super::rules;
|
||||
|
|
@ -177,6 +180,109 @@ fn ssa_all_sink_operands_const_or_param(ctx: &AnalysisContext, sink: NodeIndex)
|
|||
args_ok && receiver_ok
|
||||
}
|
||||
|
||||
/// Suppress a `cfg-unguarded-sink` finding when the sink restricts its
|
||||
/// injection payload to specific argument positions (`sink_payload_args`)
|
||||
/// and every operand at those positions resolves to a concrete constant.
|
||||
///
|
||||
/// The flat [`is_all_args_constant`] check inspects *every* operand, so a
|
||||
/// safe parameterised call like Go's
|
||||
/// `db.QueryContext(context.Background(), "SELECT … $1", bind)` is wrongly
|
||||
/// rejected: only arg 1 (the SQL string, `payload_args = [1]`) can carry an
|
||||
/// injection, yet the non-payload `context.Background()` call and the
|
||||
/// positional bind value are non-constant operands that defeat the
|
||||
/// all-operands test. The taint engine already honours the payload-arg
|
||||
/// gate (no `taint-unsanitised-flow` fires), so under `!has_taint` a sink
|
||||
/// whose payload positions are all literals is safe by construction.
|
||||
fn sink_payload_args_const(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
|
||||
let payload_positions = match &ctx.cfg[sink].call.sink_payload_args {
|
||||
Some(p) if !p.is_empty() => p,
|
||||
_ => return false,
|
||||
};
|
||||
let Some(facts) = ctx.body_const_facts else {
|
||||
return false;
|
||||
};
|
||||
let Some(&sink_val) = facts.ssa.cfg_node_map.get(&sink) else {
|
||||
return false;
|
||||
};
|
||||
let Some(inst) = find_inst(&facts.ssa, sink_val) else {
|
||||
return false;
|
||||
};
|
||||
let SsaOp::Call { args, .. } = &inst.op else {
|
||||
return false;
|
||||
};
|
||||
// Every payload-position operand must resolve to a concrete literal. A
|
||||
// payload position outside the recorded arg list cannot be proven safe.
|
||||
payload_positions.iter().all(|&pos| match args.get(pos) {
|
||||
Some(group) => group.iter().all(|v| {
|
||||
matches!(
|
||||
facts.const_values.get(v),
|
||||
Some(
|
||||
ConstLattice::Str(_)
|
||||
| ConstLattice::Int(_)
|
||||
| ConstLattice::Bool(_)
|
||||
| ConstLattice::Null
|
||||
)
|
||||
)
|
||||
}),
|
||||
None => false,
|
||||
})
|
||||
}
|
||||
|
||||
/// Suppress a `cfg-unguarded-sink` SSRF finding when the sink's URL operand
|
||||
/// is origin-locked: it is the result of a `new URL(path, base)` /
|
||||
/// `urljoin(base, path)` / `url.JoinPath(base, …)` builder whose base
|
||||
/// argument pins the scheme+host, so the (attacker-controlled) path
|
||||
/// component cannot redirect the request off the locked origin.
|
||||
///
|
||||
/// Mirrors the taint engine's `StringFact::from_url_with_base` prefix-lock
|
||||
/// (`url_builder_arg_indices` + `is_string_safe_for_ssrf`): the taint engine
|
||||
/// stays silent on this shape, so the parallel structural finding is a false
|
||||
/// positive. The base is recognised as either a string literal recorded on
|
||||
/// the builder node (`arg_string_literals[base_idx]`) or a const-bound
|
||||
/// identifier whose SSA operand resolves to a concrete string.
|
||||
fn sink_url_origin_locked(ctx: &AnalysisContext, sink: NodeIndex, sink_caps: Cap) -> bool {
|
||||
if !sink_caps.contains(Cap::SSRF) {
|
||||
return false;
|
||||
}
|
||||
let sink_info = &ctx.cfg[sink];
|
||||
let sink_func = sink_info.ast.enclosing_func.as_deref();
|
||||
// CFG one-hop trace (mirrors `is_all_args_constant`): the SSA
|
||||
// `cfg_node_map` only covers the body whose facts are attached to `ctx`,
|
||||
// so for a sink inside a nested function (e.g. an Express arrow handler)
|
||||
// the SSA path misses it. Walk the CFG instead: for every variable the
|
||||
// sink uses, find its defining node in the same function and test whether
|
||||
// that definition is an origin-locking URL builder.
|
||||
sink_info.taint.uses.iter().any(|u| {
|
||||
ctx.cfg.node_indices().any(|idx| {
|
||||
let info = &ctx.cfg[idx];
|
||||
if info.ast.enclosing_func.as_deref() != sink_func {
|
||||
return false;
|
||||
}
|
||||
if info.taint.defines.as_deref() != Some(u.as_str()) {
|
||||
return false;
|
||||
}
|
||||
// `info` defines `u`. Is it `new URL(path, base)` / `urljoin` /
|
||||
// `JoinPath` with a string-literal base pinning scheme+host?
|
||||
let Some(callee) = info.call.callee.as_deref() else {
|
||||
return false;
|
||||
};
|
||||
let Some((_path_idx, base_idx)) = crate::ssa::type_facts::url_builder_arg_indices(
|
||||
ctx.lang,
|
||||
callee,
|
||||
info.call.outer_callee.as_deref(),
|
||||
info.call.is_constructor,
|
||||
) else {
|
||||
return false;
|
||||
};
|
||||
info.call
|
||||
.arg_string_literals
|
||||
.get(base_idx)
|
||||
.and_then(|s| s.as_deref())
|
||||
.is_some()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// Return true if the SSA body contains a *named* variable whose definition
|
||||
/// is a constant, the SSA signature of an explicit `name = "literal"`
|
||||
/// reassignment. Used as the gate for the broader operand-Param suppression:
|
||||
|
|
@ -2493,6 +2599,18 @@ fn local_is_param_derived<'a>(
|
|||
continue;
|
||||
}
|
||||
found_def = true;
|
||||
// A `foreach` / `for-each` loop binding iterates collection
|
||||
// *elements*, not a direct parameter pass-through. Even when the
|
||||
// iterable is a bare parameter (`foreach ($param as $v)`), the
|
||||
// per-element values are not simple wrapper plumbing, so do not
|
||||
// clear them as parameter-derived — keep the structural finding
|
||||
// for `foreach ($param as $v) { sink($v) }` shapes (literal-keyed
|
||||
// arrays are already suppressed earlier by
|
||||
// `sink_arg_uses_safe_foreach_key`).
|
||||
if info.kind == StmtKind::Loop {
|
||||
all_def_clear = false;
|
||||
break;
|
||||
}
|
||||
if info
|
||||
.taint
|
||||
.labels
|
||||
|
|
@ -2715,10 +2833,6 @@ fn sink_in_entrypoint(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
|
|||
}
|
||||
|
||||
impl CfgAnalysis for UnguardedSink {
|
||||
fn name(&self) -> &'static str {
|
||||
"unguarded-sink"
|
||||
}
|
||||
|
||||
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
||||
let doms = dominators::compute_dominators(ctx.cfg, ctx.entry);
|
||||
let sink_nodes = dominators::find_sink_nodes(ctx.cfg);
|
||||
|
|
@ -2799,6 +2913,29 @@ impl CfgAnalysis for UnguardedSink {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Payload-arg-gated sinks (e.g. Go `db.QueryContext(ctx, sql,
|
||||
// ...binds)`, `payload_args = [1]`): only the payload positions can
|
||||
// carry an injection. When the taint engine is already silent
|
||||
// (`!has_taint`) and every payload-position operand is a constant
|
||||
// literal, the non-payload operands (a `context.Context`, bind
|
||||
// values) cannot make the call dangerous, so the structural finding
|
||||
// is a false positive even though `is_all_args_constant` rejects it.
|
||||
if !has_taint && sink_payload_args_const(ctx, *sink) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Origin-locked URL SSRF sinks (`fetch(new URL(path, "https://…"))`):
|
||||
// the builder's literal base pins scheme+host, so the
|
||||
// attacker-controlled path cannot redirect off-origin. The taint
|
||||
// engine already suppresses this via the abstract prefix-lock, so
|
||||
// the parallel structural finding is a false positive. NOT gated
|
||||
// on `!has_taint`: the origin lock holds precisely *because* the
|
||||
// tainted path reaches the builder — the host stays fixed — so the
|
||||
// syntactic taint-reaches signal must not re-open the finding.
|
||||
if sink_url_origin_locked(ctx, *sink, sink_caps) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// SSA latest-def suppression: when the taint engine has already
|
||||
// proved no source-tainted data reaches this sink (`!has_taint`)
|
||||
// and every SSA operand resolves to a constant, callee-fragment
|
||||
|
|
@ -2976,7 +3113,6 @@ impl CfgAnalysis for UnguardedSink {
|
|||
|
||||
findings.push(CfgFinding {
|
||||
rule_id: "cfg-unguarded-sink".to_string(),
|
||||
title: "Unguarded sink".to_string(),
|
||||
severity,
|
||||
confidence,
|
||||
span: sink_info.ast.span,
|
||||
|
|
|
|||
|
|
@ -140,8 +140,6 @@ pub enum Confidence {
|
|||
#[derive(Debug, Clone)]
|
||||
pub struct CfgFinding {
|
||||
pub rule_id: String,
|
||||
#[allow(dead_code)]
|
||||
pub title: String,
|
||||
pub severity: Severity,
|
||||
pub confidence: Confidence,
|
||||
pub span: (usize, usize),
|
||||
|
|
@ -154,12 +152,8 @@ pub struct AnalysisContext<'a> {
|
|||
pub cfg: &'a crate::cfg::Cfg,
|
||||
pub entry: NodeIndex,
|
||||
pub lang: Lang,
|
||||
#[allow(dead_code)]
|
||||
pub file_path: &'a str,
|
||||
#[allow(dead_code)]
|
||||
pub source_bytes: &'a [u8],
|
||||
pub func_summaries: &'a FuncSummaries,
|
||||
#[allow(dead_code)]
|
||||
pub global_summaries: Option<&'a GlobalSummaries>,
|
||||
/// Per-file SSA summaries map produced by
|
||||
/// `lower_all_functions_from_bodies` (after both the augment pass
|
||||
|
|
@ -170,7 +164,6 @@ pub struct AnalysisContext<'a> {
|
|||
/// suppress structural findings whose taint flow has been proven
|
||||
/// validated through helper summaries (CVE-2026-25544 patched
|
||||
/// counterpart).
|
||||
#[allow(dead_code)]
|
||||
pub ssa_summaries: Option<
|
||||
&'a std::collections::HashMap<
|
||||
crate::symbol::FuncKey,
|
||||
|
|
@ -218,8 +211,6 @@ pub struct AnalysisContext<'a> {
|
|||
}
|
||||
|
||||
pub trait CfgAnalysis {
|
||||
#[allow(dead_code)]
|
||||
fn name(&self) -> &'static str;
|
||||
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding>;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -531,10 +531,6 @@ fn has_explicit_lock_acquire(ctx: &AnalysisContext, acquire: NodeIndex) -> bool
|
|||
}
|
||||
|
||||
impl CfgAnalysis for ResourceMisuse {
|
||||
fn name(&self) -> &'static str {
|
||||
"resource-misuse"
|
||||
}
|
||||
|
||||
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
||||
let pairs = rules::resource_pairs(ctx.lang);
|
||||
let exit = match dominators::find_exit_node(ctx.cfg) {
|
||||
|
|
@ -631,7 +627,6 @@ impl CfgAnalysis for ResourceMisuse {
|
|||
} else {
|
||||
"cfg-resource-leak".to_string()
|
||||
},
|
||||
title: format!("{} may leak", pair.resource_name),
|
||||
severity: Severity::Medium,
|
||||
confidence: Confidence::Medium,
|
||||
span: info.ast.span,
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ fn parse_and_analyse<A: CfgAnalysis>(
|
|||
cfg,
|
||||
entry,
|
||||
lang,
|
||||
file_path: "test.rs",
|
||||
source_bytes: src,
|
||||
func_summaries: summaries,
|
||||
global_summaries: None,
|
||||
|
|
@ -54,7 +53,6 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
|
|||
cfg,
|
||||
entry,
|
||||
lang,
|
||||
file_path: "test.rs",
|
||||
source_bytes: src,
|
||||
func_summaries: summaries,
|
||||
global_summaries: None,
|
||||
|
|
@ -90,7 +88,6 @@ fn parse_and_run_all_with_taint(
|
|||
cfg,
|
||||
entry,
|
||||
lang,
|
||||
file_path: "test.rs",
|
||||
source_bytes: src,
|
||||
func_summaries: summaries,
|
||||
global_summaries: None,
|
||||
|
|
@ -210,7 +207,6 @@ fn parse_and_analyse_with_ssa<A: CfgAnalysis>(
|
|||
cfg: &body.graph,
|
||||
entry: body.entry,
|
||||
lang,
|
||||
file_path: "test.rs",
|
||||
source_bytes: src,
|
||||
func_summaries: &file_cfg.summaries,
|
||||
global_summaries: None,
|
||||
|
|
@ -1227,7 +1223,6 @@ fn config_sanitizer_suppresses_unguarded_sink() {
|
|||
cfg,
|
||||
entry,
|
||||
lang,
|
||||
file_path: "test.rs",
|
||||
source_bytes: src,
|
||||
func_summaries: summaries,
|
||||
global_summaries: None,
|
||||
|
|
@ -1708,7 +1703,6 @@ fn cfg_only_no_taint_produces_low_severity() {
|
|||
cfg,
|
||||
entry,
|
||||
lang,
|
||||
file_path: "test.rs",
|
||||
source_bytes: src,
|
||||
func_summaries: summaries,
|
||||
global_summaries: None,
|
||||
|
|
|
|||
|
|
@ -38,10 +38,6 @@ fn event_handler_callbacks(ctx: &AnalysisContext) -> HashSet<String> {
|
|||
}
|
||||
|
||||
impl CfgAnalysis for UnreachableCode {
|
||||
fn name(&self) -> &'static str {
|
||||
"unreachable-code"
|
||||
}
|
||||
|
||||
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
||||
let reachable = dominators::reachable_set(ctx.cfg, ctx.entry);
|
||||
let handler_callbacks = event_handler_callbacks(ctx);
|
||||
|
|
@ -122,7 +118,6 @@ impl CfgAnalysis for UnreachableCode {
|
|||
|
||||
findings.push(CfgFinding {
|
||||
rule_id: rule_id.to_string(),
|
||||
title: title.to_string(),
|
||||
severity,
|
||||
confidence: Confidence::High,
|
||||
span: info.ast.span,
|
||||
|
|
|
|||
352
src/chain/edges.rs
Normal file
352
src/chain/edges.rs
Normal file
|
|
@ -0,0 +1,352 @@
|
|||
//! Phase 24 — convert per-finding [`Diag`]s into chain-graph edges.
|
||||
//!
|
||||
//! Each call to [`findings_to_edges`] emits exactly one [`ChainEdge`]
|
||||
//! per input finding. The edge is *typed* by:
|
||||
//!
|
||||
//! - the primary [`Cap`] bit picked from [`Evidence::sink_caps`](crate::evidence::Evidence::sink_caps)
|
||||
//! (the lowest-bit set, chosen deterministically), and
|
||||
//! - the *reach* — the surface [`EntryPoint`](crate::surface::EntryPoint) in the same file as the
|
||||
//! finding, when one exists, otherwise [`Reach::Unreachable`].
|
||||
//!
|
||||
//! Phase 25's path search composes these edges with the SurfaceMap's
|
||||
//! `Reaches` edges into full chains. Phase 24 does not run any path
|
||||
//! search or do call-graph traversal: edges are emitted at finding
|
||||
//! granularity and carry only the file-local reach hint.
|
||||
|
||||
use crate::callgraph::FileReachMap;
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::labels::Cap;
|
||||
use crate::surface::{SourceLocation, SurfaceMap, SurfaceNode};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::feasibility::Feasibility;
|
||||
use super::impact::lookup_impact;
|
||||
|
||||
/// Compact reference to a static finding embedded in a [`ChainEdge`].
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct FindingRef {
|
||||
/// Stable finding ID (matches [`Diag::finding_id`] when present).
|
||||
pub finding_id: String,
|
||||
/// Stable 64-bit hash from [`Diag::stable_hash`]. Zero when the
|
||||
/// finding has not been hashed yet.
|
||||
pub stable_hash: u64,
|
||||
/// Source location of the sink.
|
||||
pub location: SourceLocation,
|
||||
/// Rule identifier (`Diag::id`).
|
||||
pub rule_id: String,
|
||||
/// Resolved sink cap bits ([`Evidence::sink_caps`](crate::evidence::Evidence::sink_caps)).
|
||||
pub cap_bits: u32,
|
||||
}
|
||||
|
||||
/// Whether the finding lands inside an externally-reachable surface
|
||||
/// entry-point. Phase 24 only resolves *file-local* reach: a finding
|
||||
/// in `app/views.py` is treated as reachable if any
|
||||
/// [`EntryPoint`](crate::surface::EntryPoint) declares a handler in
|
||||
/// that same file. Phase 25 will fold the call graph in.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(tag = "reach", rename_all = "snake_case")]
|
||||
pub enum Reach {
|
||||
/// Finding is in a file that hosts at least one entry-point.
|
||||
/// `route` and `method` describe the first matching entry-point
|
||||
/// (surface-canonical order).
|
||||
Reachable {
|
||||
location: SourceLocation,
|
||||
method: HttpMethod,
|
||||
route: String,
|
||||
auth_required: bool,
|
||||
},
|
||||
/// Finding is in a file with no surface entry-points.
|
||||
Unreachable,
|
||||
}
|
||||
|
||||
/// One edge in the chain graph.
|
||||
///
|
||||
/// Phase 24's edges live at the granularity of a single finding.
|
||||
/// Phase 25 will introduce additional edge kinds (entry → finding,
|
||||
/// finding → sink-cluster, etc.) once path search is wired up.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ChainEdge {
|
||||
pub finding: FindingRef,
|
||||
/// Primary cap classification. Picked via [`pick_chain_cap`]: when
|
||||
/// several cap bits are set, prefers a bit that has a standalone
|
||||
/// rule in [`crate::chain::impact::IMPACT_LATTICE`] over the
|
||||
/// lowest bit so a `SQL_QUERY | CODE_EXEC` finding lands on the
|
||||
/// chain-relevant cap (`CODE_EXEC`). Falls back to the lowest set
|
||||
/// bit when no bit has a standalone rule, keeping single-cap
|
||||
/// findings deterministic.
|
||||
pub primary_cap: Cap,
|
||||
/// Where the finding sits relative to the surface.
|
||||
pub reach: Reach,
|
||||
/// Phase 25 path-score factor.
|
||||
pub feasibility: Feasibility,
|
||||
}
|
||||
|
||||
/// Convert each [`Diag`] to one [`ChainEdge`].
|
||||
///
|
||||
/// Findings without cap bits (`Diag::evidence.sink_caps == 0`) are
|
||||
/// dropped — the chain composer cannot classify them on a typed
|
||||
/// lattice and Phase 25's scoring expects every edge to expose a
|
||||
/// primary cap. This is a deliberate quiet-drop: such findings are
|
||||
/// usually structural CFG diagnostics (e.g. `cfg-auth-gap`) whose
|
||||
/// chain participation is modelled by the SurfaceMap's
|
||||
/// `AuthRequiredOn` edges instead.
|
||||
///
|
||||
/// The output order mirrors `findings`; the caller is responsible for
|
||||
/// any further canonicalisation.
|
||||
pub fn findings_to_edges(findings: &[Diag], surface: &SurfaceMap) -> Vec<ChainEdge> {
|
||||
findings_to_edges_with_reach(findings, surface, None)
|
||||
}
|
||||
|
||||
/// Like [`findings_to_edges`] but optionally consults a [`FileReachMap`]
|
||||
/// to widen `Reach::Reachable` beyond the file-local match.
|
||||
///
|
||||
/// When `reach` is `Some`, a finding's enclosing file is also considered
|
||||
/// `Reachable` whenever any [`SurfaceNode::EntryPoint`]'s
|
||||
/// `handler_location.file` transitively reaches the finding's file via
|
||||
/// the call graph. The first matching entry-point (surface-canonical
|
||||
/// order) is used to populate the `route` / `method` / `auth_required`
|
||||
/// fields.
|
||||
///
|
||||
/// `reach = None` is byte-identical to the legacy [`findings_to_edges`]
|
||||
/// behaviour. Path strings on both sides must use the same convention
|
||||
/// (project-relative POSIX) for the widening to fire; mismatched paths
|
||||
/// silently fall through to the file-local heuristic.
|
||||
pub fn findings_to_edges_with_reach(
|
||||
findings: &[Diag],
|
||||
surface: &SurfaceMap,
|
||||
reach: Option<&FileReachMap>,
|
||||
) -> Vec<ChainEdge> {
|
||||
findings
|
||||
.iter()
|
||||
.filter_map(|d| build_edge(d, surface, reach))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_edge(
|
||||
diag: &Diag,
|
||||
surface: &SurfaceMap,
|
||||
reach: Option<&FileReachMap>,
|
||||
) -> Option<ChainEdge> {
|
||||
let evidence = diag.evidence.as_ref()?;
|
||||
if evidence.sink_caps == 0 {
|
||||
return None;
|
||||
}
|
||||
let cap_bits = evidence.sink_caps;
|
||||
let primary_cap = pick_chain_cap(cap_bits)?;
|
||||
let location = SourceLocation::new(diag.path.clone(), diag.line as u32, diag.col as u32);
|
||||
let reach_kind = locate_reach(&location, surface, reach);
|
||||
let feasibility = Feasibility::for_finding(diag);
|
||||
let finding = FindingRef {
|
||||
finding_id: diag.finding_id.clone(),
|
||||
stable_hash: diag.stable_hash,
|
||||
location,
|
||||
rule_id: diag.id.clone(),
|
||||
cap_bits,
|
||||
};
|
||||
Some(ChainEdge {
|
||||
finding,
|
||||
primary_cap,
|
||||
reach: reach_kind,
|
||||
feasibility,
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the lowest single-bit [`Cap`] present in `bits`, or `None`
|
||||
/// when `bits == 0`. Deterministic: always picks the lowest bit.
|
||||
pub fn lowest_cap(bits: u32) -> Option<Cap> {
|
||||
if bits == 0 {
|
||||
return None;
|
||||
}
|
||||
let lowest = 1u32 << bits.trailing_zeros();
|
||||
Cap::from_bits(lowest)
|
||||
}
|
||||
|
||||
/// Pick the chain-relevant [`Cap`] from a sink-cap bitmask.
|
||||
///
|
||||
/// When multiple caps are set, prefer one that has a standalone rule in
|
||||
/// [`crate::chain::impact::IMPACT_LATTICE`] (e.g. `CODE_EXEC`,
|
||||
/// `DESERIALIZE`, `SSRF`) over the lowest set bit. A finding with
|
||||
/// `sink_caps = SQL_QUERY | CODE_EXEC` previously resolved to
|
||||
/// `SQL_QUERY` (the lowest bit) and missed the `CODE_EXEC → Rce`
|
||||
/// lattice rule; this helper resolves it to `CODE_EXEC` instead.
|
||||
///
|
||||
/// Iterates bits low to high so ties between caps with standalone
|
||||
/// rules stay deterministic. Falls back to [`lowest_cap`] when no
|
||||
/// bit has a standalone rule, preserving single-cap behaviour.
|
||||
pub fn pick_chain_cap(bits: u32) -> Option<Cap> {
|
||||
if bits == 0 {
|
||||
return None;
|
||||
}
|
||||
let mut remaining = bits;
|
||||
while remaining != 0 {
|
||||
let bit = 1u32 << remaining.trailing_zeros();
|
||||
if let Some(cap) = Cap::from_bits(bit)
|
||||
&& lookup_impact(cap, None).is_some()
|
||||
{
|
||||
return Some(cap);
|
||||
}
|
||||
remaining &= !bit;
|
||||
}
|
||||
lowest_cap(bits)
|
||||
}
|
||||
|
||||
fn locate_reach(loc: &SourceLocation, surface: &SurfaceMap, reach: Option<&FileReachMap>) -> Reach {
|
||||
// Pass 1: file-local match (legacy behaviour, always applies).
|
||||
for node in &surface.nodes {
|
||||
if let SurfaceNode::EntryPoint(ep) = node
|
||||
&& ep.handler_location.file == loc.file
|
||||
{
|
||||
return Reach::Reachable {
|
||||
location: ep.location.clone(),
|
||||
method: ep.method,
|
||||
route: ep.route.clone(),
|
||||
auth_required: ep.auth_required,
|
||||
};
|
||||
}
|
||||
}
|
||||
// Pass 2: transitive caller match via the call graph. Only fires
|
||||
// when `reach` is supplied — keeps the legacy file-local behaviour
|
||||
// for callers that have not yet wired the call-graph reach map.
|
||||
if let Some(reach) = reach {
|
||||
for node in &surface.nodes {
|
||||
if let SurfaceNode::EntryPoint(ep) = node
|
||||
&& reach.reaches(&ep.handler_location.file, &loc.file)
|
||||
{
|
||||
return Reach::Reachable {
|
||||
location: ep.location.clone(),
|
||||
method: ep.method,
|
||||
route: ep.route.clone(),
|
||||
auth_required: ep.auth_required,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
Reach::Unreachable
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::evidence::Evidence;
|
||||
use crate::patterns::FindingCategory;
|
||||
|
||||
fn diag_with_cap(path: &str, line: usize, caps: Cap) -> Diag {
|
||||
let ev = Evidence {
|
||||
sink_caps: caps.bits(),
|
||||
..Evidence::default()
|
||||
};
|
||||
Diag {
|
||||
path: path.into(),
|
||||
line,
|
||||
col: 1,
|
||||
id: "test-rule".into(),
|
||||
category: FindingCategory::Security,
|
||||
evidence: Some(ev),
|
||||
..Diag::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lowest_cap_picks_least_significant_bit() {
|
||||
let combined = Cap::SQL_QUERY | Cap::FILE_IO;
|
||||
assert_eq!(lowest_cap(combined.bits()), Some(Cap::FILE_IO));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pick_chain_cap_prefers_standalone_rule_cap() {
|
||||
// SQL_QUERY (bit 7) has no standalone lattice rule; CODE_EXEC
|
||||
// (bit 10) does. Lowest-bit alone would pick SQL_QUERY.
|
||||
let combined = Cap::SQL_QUERY | Cap::CODE_EXEC;
|
||||
assert_eq!(pick_chain_cap(combined.bits()), Some(Cap::CODE_EXEC));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pick_chain_cap_falls_back_to_lowest_when_no_standalone_rule() {
|
||||
// SQL_QUERY + FILE_IO: neither has a standalone rule, fall
|
||||
// back to lowest_cap behaviour.
|
||||
let combined = Cap::SQL_QUERY | Cap::FILE_IO;
|
||||
assert_eq!(pick_chain_cap(combined.bits()), Some(Cap::FILE_IO));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pick_chain_cap_single_bit_unchanged() {
|
||||
assert_eq!(pick_chain_cap(Cap::CODE_EXEC.bits()), Some(Cap::CODE_EXEC));
|
||||
assert_eq!(pick_chain_cap(Cap::SQL_QUERY.bits()), Some(Cap::SQL_QUERY));
|
||||
assert_eq!(pick_chain_cap(0), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drops_findings_without_cap_bits() {
|
||||
let mut d = diag_with_cap("a.py", 1, Cap::CODE_EXEC);
|
||||
d.evidence.as_mut().unwrap().sink_caps = 0;
|
||||
let edges = findings_to_edges(&[d], &SurfaceMap::new());
|
||||
assert!(edges.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reach_unreachable_without_matching_entry_point() {
|
||||
let d = diag_with_cap("orphan.py", 2, Cap::CODE_EXEC);
|
||||
let edges = findings_to_edges(&[d], &SurfaceMap::new());
|
||||
assert_eq!(edges.len(), 1);
|
||||
assert!(matches!(edges[0].reach, Reach::Unreachable));
|
||||
}
|
||||
|
||||
/// Cross-file finding becomes Reachable when the call-graph reach
|
||||
/// map records a transitive caller in the entry-point's file.
|
||||
#[test]
|
||||
fn reach_widens_with_file_reach_map() {
|
||||
use crate::callgraph::{FileReachMap, build_call_graph};
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::summary::{FuncSummary, merge_summaries};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
|
||||
// routes.py::handle -> helper.py::sink
|
||||
let handle = FuncSummary {
|
||||
name: "handle".into(),
|
||||
file_path: "routes.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
callees: vec![crate::summary::CalleeSite::bare("sink")],
|
||||
..Default::default()
|
||||
};
|
||||
let sink = FuncSummary {
|
||||
name: "sink".into(),
|
||||
file_path: "helper.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
..Default::default()
|
||||
};
|
||||
let gs = merge_summaries(vec![handle, sink], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let reach = FileReachMap::build(&cg);
|
||||
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new("routes.py", 1, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::GET,
|
||||
route: "/".into(),
|
||||
handler_name: "handle".into(),
|
||||
handler_location: SourceLocation::new("routes.py", 2, 1),
|
||||
auth_required: false,
|
||||
}));
|
||||
|
||||
let d = diag_with_cap("helper.py", 10, Cap::CODE_EXEC);
|
||||
|
||||
// Without reach: file-local lookup leaves the finding Unreachable.
|
||||
let edges = findings_to_edges(std::slice::from_ref(&d), &surface);
|
||||
assert!(matches!(edges[0].reach, Reach::Unreachable));
|
||||
|
||||
// With reach: transitive caller in `routes.py` lifts to Reachable.
|
||||
let edges = findings_to_edges_with_reach(&[d], &surface, Some(&reach));
|
||||
match &edges[0].reach {
|
||||
Reach::Reachable { route, method, .. } => {
|
||||
assert_eq!(route, "/");
|
||||
assert_eq!(*method, HttpMethod::GET);
|
||||
}
|
||||
other => panic!("expected Reachable, got {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
157
src/chain/feasibility.rs
Normal file
157
src/chain/feasibility.rs
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
//! Phase 24 — feasibility scoring for chain edges.
|
||||
//!
|
||||
//! Each edge produced by [`crate::chain::edges::findings_to_edges`]
|
||||
//! carries a feasibility weight in `[0.0, 1.0]`. The weight enters
|
||||
//! Phase 25's path score as the multiplicative factor in
|
||||
//! `score(path) = sum(impact) * product(feasibility)`, so a single
|
||||
//! low-feasibility hop dampens the entire chain.
|
||||
//!
|
||||
//! # Buckets
|
||||
//!
|
||||
//! | Bucket | Weight | Trigger |
|
||||
//! |-------------------------|--------|-------------------------------------------------------------|
|
||||
//! | [`Confirmed`] | `1.0` | dynamic [`VerifyStatus::Confirmed`] |
|
||||
//! | [`InconclusiveHighConf`]| `0.5` | dynamic [`VerifyStatus::Inconclusive`] + static `High` |
|
||||
//! | [`Unverified`] | `0.1` | everything else (no verdict, `NotConfirmed`, `Unsupported`, |
|
||||
//! | | | or `Inconclusive` without a high static confidence) |
|
||||
//!
|
||||
//! [`Confirmed`]: Feasibility::Confirmed
|
||||
//! [`InconclusiveHighConf`]: Feasibility::InconclusiveHighConf
|
||||
//! [`Unverified`]: Feasibility::Unverified
|
||||
//! [`VerifyStatus::Confirmed`]: crate::evidence::VerifyStatus::Confirmed
|
||||
//! [`VerifyStatus::Inconclusive`]: crate::evidence::VerifyStatus::Inconclusive
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::evidence::{Confidence, VerifyResult, VerifyStatus};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Discrete feasibility bucket for a chain edge.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum Feasibility {
|
||||
/// Dynamic verification fired the sink probe.
|
||||
Confirmed,
|
||||
/// Dynamic verification was Inconclusive but the static engine's
|
||||
/// confidence in the finding is `High`. Used for findings that
|
||||
/// the verifier could not exercise (build failure, sandbox refuse)
|
||||
/// but where the static evidence is strong.
|
||||
InconclusiveHighConf,
|
||||
/// Everything else — no dynamic verification, dynamic verdict was
|
||||
/// `NotConfirmed`/`PartiallyConfirmed`/`Unsupported`, or dynamic was
|
||||
/// `Inconclusive` but static confidence is not `High`. A
|
||||
/// `PartiallyConfirmed` verdict proves only that the sink is reachable,
|
||||
/// not that the exploit chain completes, so it stays conservative here:
|
||||
/// it must not inflate a multi-hop path score.
|
||||
Unverified,
|
||||
}
|
||||
|
||||
impl Feasibility {
|
||||
/// Multiplicative weight contributed to Phase 25's path score.
|
||||
pub const fn score(self) -> f32 {
|
||||
match self {
|
||||
Feasibility::Confirmed => 1.0,
|
||||
Feasibility::InconclusiveHighConf => 0.5,
|
||||
Feasibility::Unverified => 0.1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Translate a dynamic [`VerifyResult`] into a feasibility weight.
|
||||
///
|
||||
/// This is the literal signature the design doc specifies. It
|
||||
/// cannot distinguish `Inconclusive` with high static confidence
|
||||
/// from `Inconclusive` with low static confidence (the static
|
||||
/// confidence is carried on the [`Diag`], not on the
|
||||
/// [`VerifyResult`]); use [`Feasibility::for_finding`] when both
|
||||
/// halves of the input are available.
|
||||
pub fn from_verdict(verdict: Option<&VerifyResult>) -> f32 {
|
||||
Self::bucket_from_verdict(verdict, None).score()
|
||||
}
|
||||
|
||||
/// Same as [`from_verdict`](Self::from_verdict) but consults the
|
||||
/// static `Diag.confidence` so the `Inconclusive_HighConf` bucket
|
||||
/// in the doc's table can fire. Phase 25's scoring pass uses this
|
||||
/// flavour.
|
||||
pub fn for_finding(diag: &Diag) -> Feasibility {
|
||||
let verdict = diag
|
||||
.evidence
|
||||
.as_ref()
|
||||
.and_then(|e| e.dynamic_verdict.as_ref());
|
||||
Self::bucket_from_verdict(verdict, diag.confidence)
|
||||
}
|
||||
|
||||
/// Discrete-bucket flavour of [`from_verdict`](Self::from_verdict).
|
||||
/// Exposed for callers that want the bucket (e.g. for telemetry or
|
||||
/// UI badges) before reducing to an `f32`.
|
||||
pub fn bucket_from_verdict(
|
||||
verdict: Option<&VerifyResult>,
|
||||
static_confidence: Option<Confidence>,
|
||||
) -> Feasibility {
|
||||
match verdict.map(|v| v.status) {
|
||||
Some(VerifyStatus::Confirmed) => Feasibility::Confirmed,
|
||||
Some(VerifyStatus::Inconclusive) if static_confidence == Some(Confidence::High) => {
|
||||
Feasibility::InconclusiveHighConf
|
||||
}
|
||||
_ => Feasibility::Unverified,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::evidence::VerifyResult;
|
||||
|
||||
fn verdict(status: VerifyStatus) -> VerifyResult {
|
||||
VerifyResult {
|
||||
finding_id: "f".into(),
|
||||
status,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
differential: None,
|
||||
replay_stable: None,
|
||||
wrong: None,
|
||||
hardening_outcome: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn confirmed_returns_one() {
|
||||
let v = verdict(VerifyStatus::Confirmed);
|
||||
assert_eq!(Feasibility::from_verdict(Some(&v)), 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inconclusive_without_confidence_returns_unverified() {
|
||||
let v = verdict(VerifyStatus::Inconclusive);
|
||||
assert_eq!(Feasibility::from_verdict(Some(&v)), 0.1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inconclusive_with_high_confidence_returns_half() {
|
||||
let v = verdict(VerifyStatus::Inconclusive);
|
||||
let b = Feasibility::bucket_from_verdict(Some(&v), Some(Confidence::High));
|
||||
assert_eq!(b, Feasibility::InconclusiveHighConf);
|
||||
assert_eq!(b.score(), 0.5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_confirmed_returns_unverified() {
|
||||
let v = verdict(VerifyStatus::NotConfirmed);
|
||||
assert_eq!(Feasibility::from_verdict(Some(&v)), 0.1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unsupported_returns_unverified() {
|
||||
let v = verdict(VerifyStatus::Unsupported);
|
||||
assert_eq!(Feasibility::from_verdict(Some(&v)), 0.1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_verdict_returns_unverified() {
|
||||
assert_eq!(Feasibility::from_verdict(None), 0.1);
|
||||
}
|
||||
}
|
||||
247
src/chain/finding.rs
Normal file
247
src/chain/finding.rs
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
//! Phase 25 — chain finding emitted by the composer.
|
||||
//!
|
||||
//! A [`ChainFinding`] is the externally-visible artefact produced by
|
||||
//! Track G: a sequence of static findings whose composition implies a
|
||||
//! higher-level [`ImpactCategory`] than any single member. The chain
|
||||
//! has its own [`ChainSeverity`] (a strict superset of the per-finding
|
||||
//! [`crate::patterns::Severity`] axis, with `Critical` reserved for
|
||||
//! chains so default-severity gates do not accidentally fire on a
|
||||
//! chained-only impact).
|
||||
//!
|
||||
//! # Determinism
|
||||
//!
|
||||
//! `stable_hash` is the BLAKE3-truncated digest of the chain member
|
||||
//! hashes joined with the implied impact byte. Two scans of the same
|
||||
//! source produce the same `stable_hash` regardless of DFS visitation
|
||||
//! order.
|
||||
//!
|
||||
//! # Suppressing constituents in default output
|
||||
//!
|
||||
//! Phase 25 keeps individual constituent findings on the wire — they
|
||||
//! still travel inside `Diag` form — but the JSON / SARIF emitters
|
||||
//! gate their visibility on [`crate::utils::config::OutputConfig::show_chain_constituents`].
|
||||
//! See `crate::output::filter_constituents` for the gating.
|
||||
|
||||
use crate::chain::edges::FindingRef;
|
||||
use crate::chain::impact::ImpactCategory;
|
||||
use crate::evidence::{VerifyResult, VerifyStatus};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
|
||||
/// Severity bucket assigned to a [`ChainFinding`].
|
||||
///
|
||||
/// Distinct from [`crate::patterns::Severity`] so that chain output
|
||||
/// (which is, by construction, a composition of *several* findings)
|
||||
/// does not collide with the per-finding axis. `Critical` is the
|
||||
/// highest grade and is reserved for chains whose impact is
|
||||
/// terminal RCE (`Rce`, `BrowserToLocalRce`).
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ChainSeverity {
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
Critical,
|
||||
}
|
||||
|
||||
impl fmt::Display for ChainSeverity {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(match self {
|
||||
ChainSeverity::Low => "LOW",
|
||||
ChainSeverity::Medium => "MEDIUM",
|
||||
ChainSeverity::High => "HIGH",
|
||||
ChainSeverity::Critical => "CRITICAL",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ChainSeverity {
|
||||
/// Phase 26 — drop one severity bucket. Used by composite
|
||||
/// re-verification when the chain's dynamic verdict is
|
||||
/// `Inconclusive`: the chain stays on the wire but its severity
|
||||
/// loses one notch so triagers see the verification gap.
|
||||
///
|
||||
/// `Low` is the floor — calling `downgraded()` on `Low` returns
|
||||
/// `Low` so the helper is idempotent.
|
||||
pub fn downgraded(self) -> Self {
|
||||
match self {
|
||||
ChainSeverity::Critical => ChainSeverity::High,
|
||||
ChainSeverity::High => ChainSeverity::Medium,
|
||||
ChainSeverity::Medium => ChainSeverity::Low,
|
||||
ChainSeverity::Low => ChainSeverity::Low,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// One member of a [`ChainFinding`].
|
||||
///
|
||||
/// Wraps a [`FindingRef`] so the chain output can name each constituent
|
||||
/// without duplicating the finding's evidence; consumers join back to
|
||||
/// the `findings: [...]` array via [`FindingRef::finding_id`] /
|
||||
/// [`FindingRef::stable_hash`].
|
||||
pub type ChainMember = FindingRef;
|
||||
|
||||
/// A composed exploit chain.
|
||||
///
|
||||
/// Phase 25 emits these from [`crate::chain::search::find_chains`].
|
||||
/// Phase 26 will populate `dynamic_verdict` from a composite
|
||||
/// re-verification pass; Phase 25 always leaves it as `None`.
|
||||
///
|
||||
/// `PartialEq` is omitted because [`crate::evidence::VerifyResult`] is
|
||||
/// not `PartialEq`. Equality checks at the test layer compare on
|
||||
/// `stable_hash` instead.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ChainFinding {
|
||||
/// BLAKE3 of `(member.stable_hash for member in members) || implied_impact`,
|
||||
/// truncated to 64 bits. Stable across scans for the same chain.
|
||||
pub stable_hash: u64,
|
||||
/// Constituent findings, in path order (entry-adjacent first,
|
||||
/// sink-adjacent last).
|
||||
pub members: Vec<ChainMember>,
|
||||
/// The dangerous-local sink terminating the chain. Carries the
|
||||
/// callee function name and cap bits so consumers can describe
|
||||
/// the chain without re-walking the SurfaceMap.
|
||||
pub sink: ChainSink,
|
||||
/// Composed impact category derived from member caps + adjacency.
|
||||
pub implied_impact: ImpactCategory,
|
||||
/// Chain severity, computed in [`crate::output::severity`].
|
||||
pub severity: ChainSeverity,
|
||||
/// Numeric score from [`crate::chain::score::score_path`].
|
||||
/// Carried verbatim for JSON output so consumers can re-sort.
|
||||
pub score: f64,
|
||||
/// Composite dynamic verification verdict. `None` until Phase 26's
|
||||
/// `reverify_chain` runs over the chain.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub dynamic_verdict: Option<VerifyResult>,
|
||||
/// Phase 26 — Track G.3: human-readable reason when composite
|
||||
/// re-verification altered the chain's outcome. Populated when
|
||||
/// `dynamic_verdict.status` is `Inconclusive` and the severity was
|
||||
/// downgraded; `None` when the verdict either confirmed the chain
|
||||
/// or left the severity untouched.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub reverify_reason: Option<String>,
|
||||
}
|
||||
|
||||
/// Sink terminus of a [`ChainFinding`]. Mirrors the
|
||||
/// [`crate::surface::DangerousLocal`] node the path ends at.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ChainSink {
|
||||
pub file: String,
|
||||
pub line: u32,
|
||||
pub col: u32,
|
||||
pub function_name: String,
|
||||
pub cap_bits: u32,
|
||||
}
|
||||
|
||||
impl ChainFinding {
|
||||
/// Compute the stable hash from a member list + impact category.
|
||||
/// Exposed so callers that build a `ChainFinding` outside
|
||||
/// [`crate::chain::search`] (tests, future composers) stay in sync
|
||||
/// with the canonical hash formula.
|
||||
pub fn compute_stable_hash(members: &[ChainMember], implied_impact: ImpactCategory) -> u64 {
|
||||
let mut h = blake3::Hasher::new();
|
||||
for m in members {
|
||||
h.update(&m.stable_hash.to_le_bytes());
|
||||
}
|
||||
h.update(&[impact_byte(implied_impact)]);
|
||||
let out = h.finalize();
|
||||
let bytes = out.as_bytes();
|
||||
u64::from_le_bytes(bytes[..8].try_into().unwrap())
|
||||
}
|
||||
|
||||
/// Phase 26 — Track G.3: attach a composite verdict + apply the
|
||||
/// `Inconclusive → severity downgrade` rule.
|
||||
///
|
||||
/// - `Confirmed` / `NotConfirmed` / `Unsupported`: severity stays
|
||||
/// put; `reverify_reason` cleared.
|
||||
/// - `Inconclusive`: severity drops one bucket
|
||||
/// ([`ChainSeverity::downgraded`]) and `reverify_reason` is set
|
||||
/// from the verdict's typed inconclusive reason (with a fallback
|
||||
/// to a generic "inconclusive composite verification" string when
|
||||
/// the verdict has no typed reason).
|
||||
pub fn apply_dynamic_verdict(&mut self, verdict: VerifyResult) {
|
||||
if verdict.status == VerifyStatus::Inconclusive {
|
||||
self.severity = self.severity.downgraded();
|
||||
let reason = match &verdict.inconclusive_reason {
|
||||
Some(r) => format!("composite reverification inconclusive: {r}"),
|
||||
None => match verdict.detail.as_deref() {
|
||||
Some(d) if !d.is_empty() => {
|
||||
format!("composite reverification inconclusive: {d}")
|
||||
}
|
||||
_ => "composite reverification inconclusive".to_owned(),
|
||||
},
|
||||
};
|
||||
self.reverify_reason = Some(reason);
|
||||
} else {
|
||||
self.reverify_reason = None;
|
||||
}
|
||||
self.dynamic_verdict = Some(verdict);
|
||||
}
|
||||
}
|
||||
|
||||
/// Stable byte tag for each [`ImpactCategory`]. Used by
|
||||
/// [`ChainFinding::compute_stable_hash`] so adding an impact variant
|
||||
/// does not silently shift every other chain's hash.
|
||||
const fn impact_byte(c: ImpactCategory) -> u8 {
|
||||
match c {
|
||||
ImpactCategory::Rce => 1,
|
||||
ImpactCategory::BrowserToLocalRce => 2,
|
||||
ImpactCategory::SessionHijack => 3,
|
||||
ImpactCategory::InternalNetworkAccess => 4,
|
||||
ImpactCategory::InfoDisclosure => 5,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::chain::edges::FindingRef;
|
||||
use crate::surface::SourceLocation;
|
||||
|
||||
fn member(hash: u64) -> ChainMember {
|
||||
FindingRef {
|
||||
finding_id: format!("f-{hash}"),
|
||||
stable_hash: hash,
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
rule_id: "test".into(),
|
||||
cap_bits: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stable_hash_changes_with_member_order() {
|
||||
let a = ChainFinding::compute_stable_hash(&[member(1), member(2)], ImpactCategory::Rce);
|
||||
let b = ChainFinding::compute_stable_hash(&[member(2), member(1)], ImpactCategory::Rce);
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stable_hash_changes_with_impact() {
|
||||
let a = ChainFinding::compute_stable_hash(&[member(1), member(2)], ImpactCategory::Rce);
|
||||
let b = ChainFinding::compute_stable_hash(
|
||||
&[member(1), member(2)],
|
||||
ImpactCategory::BrowserToLocalRce,
|
||||
);
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stable_hash_deterministic_across_calls() {
|
||||
let h1 = ChainFinding::compute_stable_hash(
|
||||
&[member(1), member(2), member(3)],
|
||||
ImpactCategory::Rce,
|
||||
);
|
||||
let h2 = ChainFinding::compute_stable_hash(
|
||||
&[member(1), member(2), member(3)],
|
||||
ImpactCategory::Rce,
|
||||
);
|
||||
assert_eq!(h1, h2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_ordering_is_critical_top() {
|
||||
assert!(ChainSeverity::Critical > ChainSeverity::High);
|
||||
assert!(ChainSeverity::High > ChainSeverity::Medium);
|
||||
assert!(ChainSeverity::Medium > ChainSeverity::Low);
|
||||
}
|
||||
}
|
||||
333
src/chain/impact.rs
Normal file
333
src/chain/impact.rs
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
//! Phase 24 — impact lattice for the exploit-chain composer.
|
||||
//!
|
||||
//! Each [`ImpactRule`] is a `(source_cap, adjacent_cap, result)` triple
|
||||
//! drawn from the design doc's lattice:
|
||||
//!
|
||||
//! | Rule | Result |
|
||||
//! |-------------------------------|-------------------------|
|
||||
//! | `CODE_EXEC` | `Rce` |
|
||||
//! | `DESERIALIZE` | `Rce` |
|
||||
//! | `SSRF` | `InternalNetworkAccess` |
|
||||
//! | `OPEN_REDIRECT + UNAUTHORIZED_ID` | `SessionHijack` |
|
||||
//! | `HEADER_INJECTION + CODE_EXEC` | `BrowserToLocalRce` |
|
||||
//! | `FILE_IO + DATA_EXFIL` | `InfoDisclosure` |
|
||||
//!
|
||||
//! The doc spells some lattice nodes with surface-level handles
|
||||
//! (`UserSession`, `Cors`, `NoAuth`, `LocalListener`,
|
||||
//! `SensitiveFileIo`, `PathTraversal`). Those nodes do not map 1:1
|
||||
//! onto [`Cap`] bits, so the table above uses the closest [`Cap`]
|
||||
//! approximations:
|
||||
//!
|
||||
//! - `UserSession` → [`Cap::UNAUTHORIZED_ID`] (request-bound caller
|
||||
//! identifier carrier)
|
||||
//! - `Cors + NoAuth` → [`Cap::HEADER_INJECTION`] (the CORS-relaxing
|
||||
//! header is the structural marker; the no-auth side is folded into
|
||||
//! Phase 25's surface-property check on [`crate::surface::EntryPoint::auth_required`])
|
||||
//! - `LocalListener` → no cap; folded into Phase 25's surface check
|
||||
//! ([`crate::surface::DataStoreKind::Sql`] /
|
||||
//! [`crate::surface::ExternalServiceKind::HttpApi`] etc.)
|
||||
//! - `SensitiveFileIo` → [`Cap::DATA_EXFIL`] (egress-of-sensitive-data
|
||||
//! carrier)
|
||||
//! - `PathTraversal` → [`Cap::FILE_IO`]
|
||||
//!
|
||||
//! # Exhaustiveness
|
||||
//!
|
||||
//! Pattern-matching exhaustively on [`Cap`] is impossible — it is a
|
||||
//! `bitflags!` struct over `u32`, not a closed enum. This module
|
||||
//! adopts the [`crate::dynamic::corpus`] pattern instead: every Cap
|
||||
//! bit belongs to exactly one of [`IMPACT_LATTICE_COVERED`] or
|
||||
//! [`IMPACT_LATTICE_UNCOVERED`], with a const assertion that the
|
||||
//! union equals [`Cap::all`]. Adding a new `Cap` bit without
|
||||
//! updating one of those constants fails to compile.
|
||||
|
||||
use crate::labels::Cap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Impact category produced by a successful chain composition.
|
||||
///
|
||||
/// Phase 24 enumerates the categories the doc's lattice produces.
|
||||
/// Phase 25's scoring pass attaches a severity to each category and
|
||||
/// folds them into the final [`crate::chain::ChainGraph`] output.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ImpactCategory {
|
||||
/// Remote code execution.
|
||||
Rce,
|
||||
/// Browser-mediated path to local code execution (e.g. permissive
|
||||
/// CORS plus an unauthenticated endpoint that hands off to a
|
||||
/// `CODE_EXEC` sink).
|
||||
BrowserToLocalRce,
|
||||
/// Session-token hijack via an attacker-controlled redirect that
|
||||
/// keeps the user's auth identity in the request flow.
|
||||
SessionHijack,
|
||||
/// SSRF that lands on an internal/local listener.
|
||||
InternalNetworkAccess,
|
||||
/// Sensitive data egress through a path-traversal-like primitive.
|
||||
InfoDisclosure,
|
||||
}
|
||||
|
||||
/// One rule in the impact lattice.
|
||||
///
|
||||
/// `adjacent_cap` is `None` for self-sufficient rules
|
||||
/// (`CODE_EXEC → Rce`, `DESERIALIZE → Rce`, `SSRF → InternalNetworkAccess`)
|
||||
/// and `Some(cap)` for rules that need a second co-located finding
|
||||
/// (`OPEN_REDIRECT + UNAUTHORIZED_ID → SessionHijack`, etc.).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct ImpactRule {
|
||||
pub source_cap: Cap,
|
||||
pub adjacent_cap: Option<Cap>,
|
||||
pub result: ImpactCategory,
|
||||
}
|
||||
|
||||
/// The default impact lattice from the design doc.
|
||||
///
|
||||
/// Order matters for [`lookup_impact`]: more specific rules
|
||||
/// (`adjacent_cap.is_some()`) appear before the broader fallbacks so a
|
||||
/// `CODE_EXEC + ...` finding pair is classified as
|
||||
/// `BrowserToLocalRce` before the standalone `CODE_EXEC → Rce`
|
||||
/// fallback fires.
|
||||
pub static IMPACT_LATTICE: &[ImpactRule] = &[
|
||||
// ── 2-cap rules (most specific first) ─────────────────────────
|
||||
ImpactRule {
|
||||
source_cap: Cap::OPEN_REDIRECT,
|
||||
adjacent_cap: Some(Cap::UNAUTHORIZED_ID),
|
||||
result: ImpactCategory::SessionHijack,
|
||||
},
|
||||
ImpactRule {
|
||||
source_cap: Cap::HEADER_INJECTION,
|
||||
adjacent_cap: Some(Cap::CODE_EXEC),
|
||||
result: ImpactCategory::BrowserToLocalRce,
|
||||
},
|
||||
ImpactRule {
|
||||
source_cap: Cap::FILE_IO,
|
||||
adjacent_cap: Some(Cap::DATA_EXFIL),
|
||||
result: ImpactCategory::InfoDisclosure,
|
||||
},
|
||||
// ── 1-cap rules ───────────────────────────────────────────────
|
||||
ImpactRule {
|
||||
source_cap: Cap::CODE_EXEC,
|
||||
adjacent_cap: None,
|
||||
result: ImpactCategory::Rce,
|
||||
},
|
||||
ImpactRule {
|
||||
source_cap: Cap::DESERIALIZE,
|
||||
adjacent_cap: None,
|
||||
result: ImpactCategory::Rce,
|
||||
},
|
||||
ImpactRule {
|
||||
source_cap: Cap::SSRF,
|
||||
adjacent_cap: None,
|
||||
result: ImpactCategory::InternalNetworkAccess,
|
||||
},
|
||||
];
|
||||
|
||||
/// Caps that participate in at least one impact rule (either as
|
||||
/// `source_cap` or as `adjacent_cap`). Update when adding a rule.
|
||||
pub const IMPACT_LATTICE_COVERED: u32 = Cap::CODE_EXEC.bits()
|
||||
| Cap::DESERIALIZE.bits()
|
||||
| Cap::SSRF.bits()
|
||||
| Cap::OPEN_REDIRECT.bits()
|
||||
| Cap::UNAUTHORIZED_ID.bits()
|
||||
| Cap::HEADER_INJECTION.bits()
|
||||
| Cap::FILE_IO.bits()
|
||||
| Cap::DATA_EXFIL.bits();
|
||||
|
||||
/// Caps that do not participate in any impact rule today. Adding a
|
||||
/// rule that consumes one of these caps requires moving it into
|
||||
/// [`IMPACT_LATTICE_COVERED`] above.
|
||||
pub const IMPACT_LATTICE_UNCOVERED: u32 = Cap::ENV_VAR.bits()
|
||||
| Cap::HTML_ESCAPE.bits()
|
||||
| Cap::SHELL_ESCAPE.bits()
|
||||
| Cap::URL_ENCODE.bits()
|
||||
| Cap::JSON_PARSE.bits()
|
||||
| Cap::FMT_STRING.bits()
|
||||
| Cap::SQL_QUERY.bits()
|
||||
| Cap::CRYPTO.bits()
|
||||
| Cap::LDAP_INJECTION.bits()
|
||||
| Cap::XPATH_INJECTION.bits()
|
||||
| Cap::SSTI.bits()
|
||||
| Cap::XXE.bits()
|
||||
| Cap::PROTOTYPE_POLLUTION.bits();
|
||||
|
||||
const _: () = assert!(
|
||||
IMPACT_LATTICE_COVERED | IMPACT_LATTICE_UNCOVERED == Cap::all().bits(),
|
||||
"Cap bit missing from impact lattice coverage; \
|
||||
add to IMPACT_LATTICE_COVERED or IMPACT_LATTICE_UNCOVERED and decide \
|
||||
whether it should participate in a chain rule",
|
||||
);
|
||||
|
||||
const _: () = assert!(
|
||||
IMPACT_LATTICE_COVERED & IMPACT_LATTICE_UNCOVERED == 0,
|
||||
"Cap bit appears in both IMPACT_LATTICE_COVERED and IMPACT_LATTICE_UNCOVERED",
|
||||
);
|
||||
|
||||
/// Union of every cap bit referenced by an [`IMPACT_LATTICE`] rule, as
|
||||
/// `source_cap` or `adjacent_cap`. Computed at compile time.
|
||||
#[allow(dead_code)] // Called from a const assertion; MSRV lints may miss const-eval uses.
|
||||
const fn rule_coverage_bits() -> u32 {
|
||||
let mut acc: u32 = 0;
|
||||
let mut i = 0;
|
||||
while i < IMPACT_LATTICE.len() {
|
||||
let rule = IMPACT_LATTICE[i];
|
||||
acc |= rule.source_cap.bits();
|
||||
acc |= match rule.adjacent_cap {
|
||||
Some(a) => a.bits(),
|
||||
None => 0,
|
||||
};
|
||||
i += 1;
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
const _: () = assert!(
|
||||
rule_coverage_bits() == IMPACT_LATTICE_COVERED,
|
||||
"IMPACT_LATTICE_COVERED claims a cap bit that no IMPACT_LATTICE rule references; \
|
||||
drop it from IMPACT_LATTICE_COVERED or add a rule that consumes it",
|
||||
);
|
||||
|
||||
/// Precomputed standalone-rule table indexed by `Cap` bit position.
|
||||
///
|
||||
/// Built once at compile time from [`IMPACT_LATTICE`]. `Cap` is a
|
||||
/// `bitflags!` u32, so each cap occupies one bit position 0..32; the
|
||||
/// table stores the standalone [`ImpactCategory`] (if any) for that
|
||||
/// position. [`lookup_impact`] uses this to short-circuit its
|
||||
/// second-pass and third-pass walks in O(1).
|
||||
static STANDALONE_BY_BIT: [Option<ImpactCategory>; 32] = build_standalone_table();
|
||||
|
||||
const fn build_standalone_table() -> [Option<ImpactCategory>; 32] {
|
||||
let mut table = [None; 32];
|
||||
let mut i = 0;
|
||||
while i < IMPACT_LATTICE.len() {
|
||||
let rule = IMPACT_LATTICE[i];
|
||||
if rule.adjacent_cap.is_none() {
|
||||
let bit = rule.source_cap.bits().trailing_zeros() as usize;
|
||||
table[bit] = Some(rule.result);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
table
|
||||
}
|
||||
|
||||
fn standalone_lookup(cap: Cap) -> Option<ImpactCategory> {
|
||||
let bits = cap.bits();
|
||||
if bits == 0 || bits.count_ones() != 1 {
|
||||
return None;
|
||||
}
|
||||
STANDALONE_BY_BIT[bits.trailing_zeros() as usize]
|
||||
}
|
||||
|
||||
/// Look up an [`ImpactCategory`] for a (source, adjacent) cap pair.
|
||||
///
|
||||
/// `adjacent` is `None` when the caller has not yet found a partner
|
||||
/// finding. Returns the most-specific matching rule.
|
||||
///
|
||||
/// Phase 25's path search calls this once per candidate path with the
|
||||
/// path's primary and secondary caps; multiple cap matches choose the
|
||||
/// first rule in [`IMPACT_LATTICE`] order (specific before fallback).
|
||||
///
|
||||
/// The standalone-rule walks (second + third pass) are O(1) via
|
||||
/// `STANDALONE_BY_BIT`. The two-cap walk (first pass) stays linear
|
||||
/// because the 2-cap subset is small (today: three rules); promote
|
||||
/// to a sorted-pair binary search if the lattice grows past ~16
|
||||
/// pair-rules.
|
||||
pub fn lookup_impact(source: Cap, adjacent: Option<Cap>) -> Option<ImpactCategory> {
|
||||
// First pass: exact source + matching adjacency (or both ways).
|
||||
if let Some(adj) = adjacent {
|
||||
for rule in IMPACT_LATTICE {
|
||||
if let Some(rule_adj) = rule.adjacent_cap {
|
||||
let direct = rule.source_cap == source && rule_adj == adj;
|
||||
let swapped = rule.source_cap == adj && rule_adj == source;
|
||||
if direct || swapped {
|
||||
return Some(rule.result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Second pass: standalone rule on source_cap (O(1) table lookup).
|
||||
if let Some(cat) = standalone_lookup(source) {
|
||||
return Some(cat);
|
||||
}
|
||||
// Third pass: if `adjacent` is given but the pair didn't hit,
|
||||
// try the standalone rule on adjacent_cap so a CODE_EXEC + UNRELATED
|
||||
// pair still reaches `Rce`.
|
||||
if let Some(adj) = adjacent
|
||||
&& let Some(cat) = standalone_lookup(adj)
|
||||
{
|
||||
return Some(cat);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn cmdi_alone_maps_to_rce() {
|
||||
assert_eq!(
|
||||
lookup_impact(Cap::CODE_EXEC, None),
|
||||
Some(ImpactCategory::Rce)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_alone_maps_to_rce() {
|
||||
assert_eq!(
|
||||
lookup_impact(Cap::DESERIALIZE, None),
|
||||
Some(ImpactCategory::Rce)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssrf_alone_maps_to_internal_network_access() {
|
||||
assert_eq!(
|
||||
lookup_impact(Cap::SSRF, None),
|
||||
Some(ImpactCategory::InternalNetworkAccess)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_redirect_plus_user_session_maps_to_session_hijack() {
|
||||
assert_eq!(
|
||||
lookup_impact(Cap::OPEN_REDIRECT, Some(Cap::UNAUTHORIZED_ID)),
|
||||
Some(ImpactCategory::SessionHijack)
|
||||
);
|
||||
// Argument order should not matter.
|
||||
assert_eq!(
|
||||
lookup_impact(Cap::UNAUTHORIZED_ID, Some(Cap::OPEN_REDIRECT)),
|
||||
Some(ImpactCategory::SessionHijack)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cors_plus_codeexec_maps_to_browser_local_rce() {
|
||||
assert_eq!(
|
||||
lookup_impact(Cap::HEADER_INJECTION, Some(Cap::CODE_EXEC)),
|
||||
Some(ImpactCategory::BrowserToLocalRce)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_traversal_plus_sensitive_io_maps_to_info_disclosure() {
|
||||
assert_eq!(
|
||||
lookup_impact(Cap::FILE_IO, Some(Cap::DATA_EXFIL)),
|
||||
Some(ImpactCategory::InfoDisclosure)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_cap_returns_none() {
|
||||
assert_eq!(lookup_impact(Cap::HTML_ESCAPE, None), None);
|
||||
assert_eq!(lookup_impact(Cap::CRYPTO, None), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pair_with_uncovered_adjacency_falls_through_to_standalone() {
|
||||
// CODE_EXEC + CRYPTO: CRYPTO has no rule, so we fall back to
|
||||
// the standalone CODE_EXEC → Rce rule.
|
||||
assert_eq!(
|
||||
lookup_impact(Cap::CODE_EXEC, Some(Cap::CRYPTO)),
|
||||
Some(ImpactCategory::Rce)
|
||||
);
|
||||
}
|
||||
}
|
||||
140
src/chain/mod.rs
Normal file
140
src/chain/mod.rs
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
//! Phase 24 — exploit-chain composer scaffolding (Track G.1).
|
||||
//!
|
||||
//! A `ChainGraph` is the small intermediate representation the chain
|
||||
//! composer walks between two pre-existing artefacts: the flat list of
|
||||
//! per-finding [`Diag`](crate::commands::scan::Diag)s produced by the
|
||||
//! static analyser and the [`SurfaceMap`](crate::surface::SurfaceMap)
|
||||
//! produced by Track F.
|
||||
//!
|
||||
//! Phase 24 ships the types only. The implicit-attacker node and the
|
||||
//! bounded DFS that walks edges into [`ChainFinding`]s land in Phase 25
|
||||
//! (`src/chain/search.rs`); composite re-verification lands in Phase 26
|
||||
//! (`src/chain/reverify.rs`).
|
||||
//!
|
||||
//! # Storage shape
|
||||
//!
|
||||
//! Two parallel `Vec`s — `nodes` and `edges` — mirroring `SurfaceMap`'s
|
||||
//! shape. Determinism is the caller's responsibility: edges are
|
||||
//! produced in the order the source [`Diag`](crate::commands::scan::Diag) slice presents, and
|
||||
//! `findings_to_edges` does not sort the input. Phase 25 will fold
|
||||
//! these into a `petgraph::DiGraph` for path search.
|
||||
//!
|
||||
//! # Lattice exhaustiveness
|
||||
//!
|
||||
//! [`impact`] keeps a `IMPACT_LATTICE_COVERED | IMPACT_LATTICE_UNCOVERED
|
||||
//! == Cap::all().bits()` const assertion, mirroring the
|
||||
//! `CORPUS_SUPPORTED | CORPUS_UNSUPPORTED == Cap::all().bits()` pattern
|
||||
//! in [`crate::dynamic::corpus`]. Adding a new `Cap` bit without
|
||||
//! updating the lattice fails to compile.
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::labels::Cap;
|
||||
use crate::surface::SourceLocation;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub mod edges;
|
||||
pub mod feasibility;
|
||||
pub mod finding;
|
||||
pub mod impact;
|
||||
#[cfg(feature = "dynamic")]
|
||||
pub mod reverify;
|
||||
pub mod score;
|
||||
pub mod search;
|
||||
|
||||
pub use edges::{ChainEdge, FindingRef, findings_to_edges, findings_to_edges_with_reach};
|
||||
pub use feasibility::Feasibility;
|
||||
pub use finding::{ChainFinding, ChainMember, ChainSeverity, ChainSink};
|
||||
pub use impact::{IMPACT_LATTICE, ImpactCategory, ImpactRule, lookup_impact};
|
||||
#[cfg(feature = "dynamic")]
|
||||
pub use reverify::{
|
||||
ChainReverifyResult, ChainStepSpec, CompositeReverifier, DefaultCompositeReverifier,
|
||||
chain_step_specs, reverify_chain, reverify_chain_with, reverify_top_chains,
|
||||
reverify_top_chains_with,
|
||||
};
|
||||
pub use score::{ChainScoreConfig, category_weight, min_score_default, score_path};
|
||||
pub use search::{ChainSearchConfig, find_chains, find_chains_with_reach};
|
||||
|
||||
/// One node in a [`ChainGraph`].
|
||||
///
|
||||
/// `Entry` and `Sink` nodes are translated 1:1 from the SurfaceMap's
|
||||
/// [`crate::surface::SurfaceNode::EntryPoint`] and
|
||||
/// [`crate::surface::SurfaceNode::DangerousLocal`] variants. `Finding`
|
||||
/// nodes wrap a static [`Diag`](crate::commands::scan::Diag) so a path
|
||||
/// from an entry to a sink can pin which finding witnesses each hop.
|
||||
/// Phase 25's path search treats the implicit attacker as a virtual
|
||||
/// predecessor of every `Entry`; there is no explicit `Attacker`
|
||||
/// variant on this enum.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(tag = "node", rename_all = "snake_case")]
|
||||
pub enum ChainNode {
|
||||
/// A web entry-point lifted from the SurfaceMap.
|
||||
Entry {
|
||||
location: SourceLocation,
|
||||
method: HttpMethod,
|
||||
route: String,
|
||||
auth_required: bool,
|
||||
},
|
||||
/// A static finding produced by the analyser.
|
||||
Finding(FindingRef),
|
||||
/// A dangerous-local sink lifted from the SurfaceMap.
|
||||
Sink {
|
||||
location: SourceLocation,
|
||||
function_name: String,
|
||||
cap_bits: u32,
|
||||
},
|
||||
}
|
||||
|
||||
impl ChainNode {
|
||||
/// Source location of this node. Used for byte-deterministic
|
||||
/// ordering and for the `nyx surface`-style human display.
|
||||
pub fn location(&self) -> &SourceLocation {
|
||||
match self {
|
||||
ChainNode::Entry { location, .. } => location,
|
||||
ChainNode::Finding(f) => &f.location,
|
||||
ChainNode::Sink { location, .. } => location,
|
||||
}
|
||||
}
|
||||
|
||||
/// Cap bitmask carried by this node, or `0` for entry nodes. Used
|
||||
/// by Phase 25 to discriminate which [`ImpactRule`] a path matches.
|
||||
pub fn cap_bits(&self) -> u32 {
|
||||
match self {
|
||||
ChainNode::Entry { .. } => 0,
|
||||
ChainNode::Finding(f) => f.cap_bits,
|
||||
ChainNode::Sink { cap_bits, .. } => *cap_bits,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The full chain graph. Phase 24 only exposes the types; the
|
||||
/// composer that fills the vectors lands in Phase 25.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ChainGraph {
|
||||
pub nodes: Vec<ChainNode>,
|
||||
pub edges: Vec<ChainEdge>,
|
||||
}
|
||||
|
||||
impl ChainGraph {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn node_count(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
|
||||
pub fn edge_count(&self) -> usize {
|
||||
self.edges.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a primary [`Cap`] bit into the closest matching impact
|
||||
/// category in isolation (no adjacency). Returns `None` when the cap
|
||||
/// has no terminal interpretation on its own — chain composition needs
|
||||
/// an additional cap or surface property to lift it.
|
||||
///
|
||||
/// Phase 25's path-search code calls this as a fast-path before
|
||||
/// consulting the full [`IMPACT_LATTICE`].
|
||||
pub fn standalone_impact(cap: Cap) -> Option<ImpactCategory> {
|
||||
lookup_impact(cap, None)
|
||||
}
|
||||
862
src/chain/reverify.rs
Normal file
862
src/chain/reverify.rs
Normal file
|
|
@ -0,0 +1,862 @@
|
|||
//! Phase 26 — Track G.3: end-to-end chain re-verification.
|
||||
//!
|
||||
//! Phase 25 emitted [`ChainFinding`]s scored by static + per-finding
|
||||
//! feasibility but left `dynamic_verdict` permanently `None`. Phase 26
|
||||
//! drives the top-scoring Confirmed chains through a *single* composite
|
||||
//! dynamic run: each member's step harness is composed via
|
||||
//! [`crate::dynamic::lang::compose_chain_step`] and the output of one
|
||||
//! step is threaded into the next via
|
||||
//! [`crate::dynamic::lang::ChainStepHarness::PREV_OUTPUT_ENV`], with
|
||||
//! the final step terminating at the chain's sink probe.
|
||||
//!
|
||||
//! # Outcome shape
|
||||
//!
|
||||
//! [`reverify_chain`] returns a [`ChainReverifyResult`] carrying the
|
||||
//! composite [`VerifyResult`] alongside the severity before and after
|
||||
//! the verdict was applied. The severity-downgrade rule is documented
|
||||
//! on [`crate::chain::finding::ChainFinding::apply_dynamic_verdict`]:
|
||||
//! `Inconclusive` drops the chain one bucket and records a reason;
|
||||
//! every other status leaves the severity intact.
|
||||
//!
|
||||
//! # Per-member harness specs
|
||||
//!
|
||||
//! Both the default reverifier and out-of-tree callers consume
|
||||
//! [`chain_step_specs`] to materialise one [`HarnessSpec`] per
|
||||
//! `chain.members` slot. The helper looks each member up in the
|
||||
//! caller-supplied `member_diags` slice by
|
||||
//! [`crate::chain::edges::FindingRef::stable_hash`] and reuses
|
||||
//! [`HarnessSpec::from_finding_full`] so the chain's per-step specs
|
||||
//! match what the per-finding verifier would have derived. This is
|
||||
//! the API-shape sub-task of the Phase 26 live-execution split: it
|
||||
//! lets callers (today: the default reverifier; tomorrow: a live
|
||||
//! sandbox composer) inspect whether every step is drivable before
|
||||
//! committing to a build / run pass.
|
||||
//!
|
||||
//! # Cost control
|
||||
//!
|
||||
//! Re-verification is opt-in via
|
||||
//! [`crate::utils::config::ChainConfig::reverify_top_n`] — only the top
|
||||
//! N chains by score reach the composite run. Set to `0` to skip the
|
||||
//! pass entirely. The helper [`reverify_top_chains`] applies the
|
||||
//! caller's reverifier to the top-N slice in place, leaving the rest
|
||||
//! untouched.
|
||||
//!
|
||||
//! # Testability
|
||||
//!
|
||||
//! Production callers use [`reverify_chain`] (which dispatches to
|
||||
//! [`DefaultCompositeReverifier`]). Tests inject a stub
|
||||
//! [`CompositeReverifier`] via [`reverify_chain_with`] /
|
||||
//! [`reverify_top_chains_with`] so the severity-downgrade pipeline can
|
||||
//! be exercised without a live sandbox backend.
|
||||
|
||||
use crate::chain::finding::{ChainFinding, ChainSeverity};
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::dynamic::build_sandbox::dispatch_prepare;
|
||||
use crate::dynamic::harness::{self, BuiltHarness};
|
||||
use crate::dynamic::lang::{self, ChainStepTerminal};
|
||||
use crate::dynamic::sandbox;
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use crate::dynamic::verify::VerifyOptions;
|
||||
use crate::evidence::{InconclusiveReason, UnsupportedReason, VerifyResult, VerifyStatus};
|
||||
use crate::surface::SurfaceMap;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Outcome of composite re-verification for a single chain.
|
||||
///
|
||||
/// Carries the [`VerifyResult`] the composite run produced plus the
|
||||
/// severity transition so callers (e.g. the scan command's output
|
||||
/// pipeline) can decide whether to emit a Slack-style "downgraded by
|
||||
/// dynamic verification" badge.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ChainReverifyResult {
|
||||
/// Stable hash of the chain re-verified.
|
||||
pub chain_hash: u64,
|
||||
/// Composite dynamic verdict assembled by the reverifier.
|
||||
pub verdict: VerifyResult,
|
||||
/// Severity carried on the chain *before* the verdict was applied.
|
||||
pub severity_before: ChainSeverity,
|
||||
/// Severity carried on the chain *after* the verdict was applied.
|
||||
/// Equals `severity_before` unless the verdict was `Inconclusive`.
|
||||
pub severity_after: ChainSeverity,
|
||||
/// Human-readable downgrade reason, when one was recorded.
|
||||
/// Mirrors [`ChainFinding::reverify_reason`] for the post-apply
|
||||
/// state.
|
||||
pub downgrade_reason: Option<String>,
|
||||
}
|
||||
|
||||
impl ChainReverifyResult {
|
||||
/// True when the verdict caused the chain's severity to drop a
|
||||
/// bucket.
|
||||
pub fn was_downgraded(&self) -> bool {
|
||||
self.severity_before != self.severity_after
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-member harness-spec derivation result.
|
||||
///
|
||||
/// One entry per `chain.members` slot, in chain order. `member_hash`
|
||||
/// is copied from the [`crate::chain::edges::FindingRef::stable_hash`];
|
||||
/// `result` is the outcome of running [`HarnessSpec::from_finding_full`]
|
||||
/// against the matching [`Diag`] from the caller's slice.
|
||||
///
|
||||
/// A member whose hash has no diag match records
|
||||
/// [`UnsupportedReason::NoFlowSteps`] so the caller can distinguish
|
||||
/// "spec derivation failed" from "diag missing from the scan input".
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ChainStepSpec {
|
||||
pub member_hash: u64,
|
||||
pub result: Result<HarnessSpec, UnsupportedReason>,
|
||||
}
|
||||
|
||||
/// Derive one [`HarnessSpec`] per chain member, in chain order.
|
||||
///
|
||||
/// Looks each member up in `member_diags` by stable hash (zero-hash
|
||||
/// diags are skipped — the pre-`compute_stable_hash` placeholder
|
||||
/// produced by tests and synthetic harnesses). Members whose hash has
|
||||
/// no diag match record [`UnsupportedReason::NoFlowSteps`] so the
|
||||
/// caller can tell the difference between "spec derivation failed" and
|
||||
/// "diag missing from the scan input".
|
||||
///
|
||||
/// The function does **not** run anything: it returns derived specs so
|
||||
/// the caller (today: [`DefaultCompositeReverifier`]; tomorrow: a live
|
||||
/// sandbox composer) can decide whether to commit to a build / run
|
||||
/// pass. Used as the API-shape half of the Phase 26 live-execution
|
||||
/// split — see the crate-level docs for the wider design.
|
||||
pub fn chain_step_specs(
|
||||
chain: &ChainFinding,
|
||||
member_diags: &[Diag],
|
||||
opts: &VerifyOptions,
|
||||
) -> Vec<ChainStepSpec> {
|
||||
let mut by_hash: HashMap<u64, &Diag> = HashMap::with_capacity(member_diags.len());
|
||||
for d in member_diags {
|
||||
if d.stable_hash != 0 {
|
||||
by_hash.insert(d.stable_hash, d);
|
||||
}
|
||||
}
|
||||
chain
|
||||
.members
|
||||
.iter()
|
||||
.map(|m| {
|
||||
let result = match by_hash.get(&m.stable_hash).copied() {
|
||||
Some(d) => HarnessSpec::from_finding_full(
|
||||
d,
|
||||
opts.verify_all_confidence,
|
||||
opts.summaries.as_deref(),
|
||||
opts.callgraph.as_deref(),
|
||||
),
|
||||
None => Err(UnsupportedReason::NoFlowSteps),
|
||||
};
|
||||
ChainStepSpec {
|
||||
member_hash: m.stable_hash,
|
||||
result,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Pluggable composite-reverifier surface.
|
||||
///
|
||||
/// Production callers use [`DefaultCompositeReverifier`] (which drives
|
||||
/// the per-step harness compose path). Tests substitute a stub that
|
||||
/// returns canned [`VerifyResult`]s so the downgrade-and-record
|
||||
/// machinery can be exercised without a live sandbox backend.
|
||||
///
|
||||
/// `member_diags` carries the [`Diag`]s that produced `chain.members`,
|
||||
/// in any order — implementations look them up by
|
||||
/// [`crate::chain::edges::FindingRef::stable_hash`] via
|
||||
/// [`chain_step_specs`]. Threading the slice (instead of a pre-built
|
||||
/// `HashMap`) mirrors how
|
||||
/// [`crate::dynamic::verify::VerifyOptions::summaries`] flows:
|
||||
/// callers hold the full project diag list and the trait surface
|
||||
/// stays free of cross-coupling.
|
||||
pub trait CompositeReverifier {
|
||||
/// Run the composite dynamic re-verification for `chain` and return
|
||||
/// the resulting verdict.
|
||||
fn reverify(
|
||||
&self,
|
||||
chain: &ChainFinding,
|
||||
member_diags: &[Diag],
|
||||
surface: &SurfaceMap,
|
||||
opts: &VerifyOptions,
|
||||
) -> VerifyResult;
|
||||
}
|
||||
|
||||
/// Phase 26 default composite reverifier.
|
||||
///
|
||||
/// The composite-harness composer walks `chain.members`, derives one
|
||||
/// [`HarnessSpec`] per member via [`chain_step_specs`], drives each
|
||||
/// derived spec through [`harness::build`] + [`dispatch_prepare`] so
|
||||
/// the per-language build cost is amortised against the on-disk caches,
|
||||
/// then runs each step sequentially through [`sandbox::run`] with the
|
||||
/// previous step's stdout threaded into the next step via
|
||||
/// [`crate::dynamic::lang::ChainStepHarness::PREV_OUTPUT_ENV`].
|
||||
///
|
||||
/// Today the default reverifier surfaces
|
||||
/// `Inconclusive(BackendInsufficient)` when invoked. The `detail`
|
||||
/// field reports spec-derivation, per-step build coverage, AND per-
|
||||
/// step run coverage so operators (and the [`reverify_top_chains`]
|
||||
/// caller) can see how far down the live execution path the chain
|
||||
/// got: `derived N/M`, `built B/N (cache_hit=H, build_ms=T,
|
||||
/// build_errors=E)`, `ran S/B (sandbox_errors=SE, timeouts=TO,
|
||||
/// nonzero_exits=NE, final_sink_hit=F)`. Callers that need a
|
||||
/// deterministic outcome (tests, CI) use [`reverify_chain_with`] with
|
||||
/// a stubbed reverifier.
|
||||
///
|
||||
/// The verdict stays `Inconclusive` even on a fully-successful run
|
||||
/// pass because today's per-language [`lang::compose_chain_step`]
|
||||
/// shims echo `NYX_PREV_OUTPUT` to stdout but do not yet invoke the
|
||||
/// chain's terminal sink — the sink-rewrite pass that wires the final
|
||||
/// step's probe call lands separately. Once that pass arrives, the
|
||||
/// `final_sink_hit=true` branch will flip the verdict to `Confirmed`.
|
||||
///
|
||||
/// Languages whose [`dispatch_prepare`] returns `Unsupported`
|
||||
/// (Ruby today) are counted under `build_errors` and skipped from the
|
||||
/// run loop; their `compose_chain_step` source is never staged.
|
||||
///
|
||||
/// Workdir lifetime: every per-step build is content-addressed by
|
||||
/// [`HarnessSpec::spec_hash`] under `/tmp/nyx-harness/{spec_hash}`,
|
||||
/// and the per-language `prepare_*` caches under the host's
|
||||
/// `ProjectDirs` cache root are keyed on `(lockfile_hash,
|
||||
/// toolchain_id, language)`. Repeated calls with the same specs are
|
||||
/// idempotent — no per-call growth on disk. The chain-step source
|
||||
/// (`step.py`, `step.sh`, etc.) is written into the same workdir
|
||||
/// alongside the harness source; filenames are distinct so they do
|
||||
/// not collide with [`harness::build`] output for the same spec_hash.
|
||||
pub struct DefaultCompositeReverifier;
|
||||
|
||||
impl CompositeReverifier for DefaultCompositeReverifier {
|
||||
fn reverify(
|
||||
&self,
|
||||
chain: &ChainFinding,
|
||||
member_diags: &[Diag],
|
||||
_surface: &SurfaceMap,
|
||||
opts: &VerifyOptions,
|
||||
) -> VerifyResult {
|
||||
let finding_id = format!("chain-{:016x}", chain.stable_hash);
|
||||
let specs = chain_step_specs(chain, member_diags, opts);
|
||||
let total = specs.len();
|
||||
let derived_specs: Vec<&HarnessSpec> = specs
|
||||
.iter()
|
||||
.filter_map(|s| s.result.as_ref().ok())
|
||||
.collect();
|
||||
let derived = derived_specs.len();
|
||||
|
||||
// Sub-task (b) main of the Phase 26 live-execution split:
|
||||
// drive each derived spec through the per-language build
|
||||
// pipeline so each step's interpreter / compile artefact is
|
||||
// staged in its content-addressed workdir before the run
|
||||
// pass. Failures are counted, not propagated — the outer
|
||||
// verdict stays `Inconclusive(BackendInsufficient)` until
|
||||
// the sink-rewrite pass lands.
|
||||
let profile = opts.sandbox.process_hardening;
|
||||
let mut built = 0usize;
|
||||
let mut cache_hits = 0usize;
|
||||
let mut total_build_ms: u128 = 0;
|
||||
let mut build_errors = 0usize;
|
||||
let mut built_steps: Vec<(PathBuf, &HarnessSpec)> = Vec::with_capacity(derived);
|
||||
for spec in &derived_specs {
|
||||
match harness::build(spec) {
|
||||
Ok(built_harness) => {
|
||||
match dispatch_prepare(spec, &built_harness.workdir, profile) {
|
||||
Ok(result) => {
|
||||
built += 1;
|
||||
if result.cache_hit {
|
||||
cache_hits += 1;
|
||||
}
|
||||
total_build_ms =
|
||||
total_build_ms.saturating_add(result.duration.as_millis());
|
||||
built_steps.push((built_harness.workdir, spec));
|
||||
}
|
||||
Err(_) => build_errors += 1,
|
||||
}
|
||||
}
|
||||
Err(_) => build_errors += 1,
|
||||
}
|
||||
}
|
||||
|
||||
// Sub-task (c) of the Phase 26 live-execution split:
|
||||
// sequentially run each built chain-step harness through
|
||||
// `sandbox::run`, threading the previous step's stdout into
|
||||
// the next step via `NYX_PREV_OUTPUT`. The final step is
|
||||
// composed with a `ChainStepTerminal` carrying the chain's
|
||||
// sink callee, so the per-language emitter splices in a
|
||||
// `__nyx_probe(callee, prev)` call plus the
|
||||
// `SINK_HIT_SENTINEL` banner that `sandbox::run` detects via
|
||||
// `SandboxOutcome::sink_hit`.
|
||||
let terminal = ChainStepTerminal {
|
||||
sink_callee: chain.sink.function_name.clone(),
|
||||
sink_cap_bits: chain.sink.cap_bits,
|
||||
};
|
||||
let (steps_run, sandbox_errors, steps_timeout, nonzero_exits, final_sink_hit) =
|
||||
run_chain_steps(&built_steps, &opts.sandbox, &terminal);
|
||||
|
||||
let detail = format!(
|
||||
"composite chain re-verification: live runs collect step coverage; \
|
||||
derived {derived}/{total} harness specs; \
|
||||
built {built}/{derived} (cache_hit={cache_hits}, build_ms={total_build_ms}, build_errors={build_errors}); \
|
||||
ran {steps_run}/{built} (sandbox_errors={sandbox_errors}, timeouts={steps_timeout}, nonzero_exits={nonzero_exits}, final_sink_hit={final_sink_hit})"
|
||||
);
|
||||
|
||||
// Verdict resolution: a composite chain is `Confirmed` when
|
||||
// (a) every derived step built, (b) every built step ran
|
||||
// without a sandbox error, (c) the final step's terminal
|
||||
// compose fired the sink sentinel (`final_sink_hit=true`).
|
||||
// Anything short of all three keeps the verdict
|
||||
// `Inconclusive(BackendInsufficient)` so the chain's severity
|
||||
// takes the existing downgrade rule.
|
||||
let all_built = derived > 0 && built == derived;
|
||||
let all_ran = built > 0 && steps_run == built && sandbox_errors == 0;
|
||||
if all_built && all_ran && final_sink_hit {
|
||||
// Phase 31 telemetry stability stamping. When the caller
|
||||
// opts in via `NYX_VERIFY_REPLAY_STABLE=1` (mirrored by
|
||||
// [`VerifyOptions::replay_stable_check`]) we re-run the
|
||||
// chain step sequence one more time on the same built
|
||||
// workdirs and stamp `replay_stable` based on whether the
|
||||
// second pass also fires the sink sentinel. `Some(true)`
|
||||
// means the chain reproduces; `Some(false)` means the chain
|
||||
// is flaky (rare but a real eval-corpus signal); the field
|
||||
// stays `None` when the opt-in is off.
|
||||
let replay_stable = if opts.replay_stable_check {
|
||||
let (_, replay_sandbox_errors, _, _, replay_final_sink_hit) =
|
||||
run_chain_steps(&built_steps, &opts.sandbox, &terminal);
|
||||
if replay_sandbox_errors == 0 {
|
||||
Some(replay_final_sink_hit)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
VerifyResult {
|
||||
finding_id,
|
||||
status: VerifyStatus::Confirmed,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: Some(detail),
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
differential: None,
|
||||
replay_stable,
|
||||
wrong: None,
|
||||
hardening_outcome: None,
|
||||
}
|
||||
} else {
|
||||
VerifyResult {
|
||||
finding_id,
|
||||
status: VerifyStatus::Inconclusive,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: Some(InconclusiveReason::BackendInsufficient {
|
||||
backend: "composite-chain".to_owned(),
|
||||
oracle_kind: "chain-step-harness".to_owned(),
|
||||
}),
|
||||
detail: Some(detail),
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
differential: None,
|
||||
replay_stable: None,
|
||||
wrong: None,
|
||||
hardening_outcome: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 26 sub-task (c): sequentially run each built chain step
|
||||
/// through [`sandbox::run`] with `NYX_PREV_OUTPUT` threading.
|
||||
///
|
||||
/// Returns `(steps_run, sandbox_errors, timeouts, nonzero_exits,
|
||||
/// final_sink_hit)`. The final step's [`sandbox::SandboxOutcome::sink_hit`]
|
||||
/// is captured for the verdict's `detail` field (sub-task (d)); today
|
||||
/// the per-language [`lang::compose_chain_step`] sources echo
|
||||
/// `NYX_PREV_OUTPUT` to stdout without invoking the chain's terminal
|
||||
/// sink, so `final_sink_hit` stays `false` until the sink-rewrite
|
||||
/// pass lands.
|
||||
///
|
||||
/// `sandbox_errors` aborts the rest of the chain — a step that can
|
||||
/// neither spawn nor stage its source file has no useful `stdout` to
|
||||
/// thread into the next step. Non-zero exits and timeouts are
|
||||
/// recorded but do not stop the chain: the previous step's stdout is
|
||||
/// still threaded forward so partial-success chains keep collecting
|
||||
/// coverage.
|
||||
///
|
||||
/// `base_opts` is cloned per step; the per-step clone overlays the
|
||||
/// chain-step's `extra_env` (typically the single `NYX_PREV_OUTPUT`
|
||||
/// binding) on top of any caller-provided extras and drops the
|
||||
/// per-finding `stub_harness` because chain-step harnesses do not
|
||||
/// drive boundary stubs.
|
||||
fn run_chain_steps(
|
||||
built_steps: &[(PathBuf, &HarnessSpec)],
|
||||
base_opts: &sandbox::SandboxOptions,
|
||||
terminal: &ChainStepTerminal,
|
||||
) -> (usize, usize, usize, usize, bool) {
|
||||
let mut steps_run = 0usize;
|
||||
let mut sandbox_errors = 0usize;
|
||||
let mut steps_timeout = 0usize;
|
||||
let mut nonzero_exits = 0usize;
|
||||
let mut final_sink_hit = false;
|
||||
let mut prev_output: Option<Vec<u8>> = None;
|
||||
let last_idx = built_steps.len().saturating_sub(1);
|
||||
for (idx, (workdir, spec)) in built_steps.iter().enumerate() {
|
||||
let step_terminal = if idx == last_idx {
|
||||
Some(terminal)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let step = lang::compose_chain_step(spec.lang, prev_output.as_deref(), step_terminal);
|
||||
|
||||
let step_path = workdir.join(&step.filename);
|
||||
if let Some(parent) = step_path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
if std::fs::write(&step_path, step.source.as_bytes()).is_err() {
|
||||
sandbox_errors += 1;
|
||||
break;
|
||||
}
|
||||
let mut extra_files_failed = false;
|
||||
for (rel, content) in &step.extra_files {
|
||||
let dest = workdir.join(rel);
|
||||
if let Some(parent) = dest.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
if std::fs::write(&dest, content.as_bytes()).is_err() {
|
||||
extra_files_failed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if extra_files_failed {
|
||||
sandbox_errors += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
let mut step_opts = base_opts.clone();
|
||||
step_opts.extra_env.extend(step.extra_env.iter().cloned());
|
||||
step_opts.stub_harness = None;
|
||||
|
||||
let step_built = BuiltHarness {
|
||||
workdir: workdir.clone(),
|
||||
command: step.command.clone(),
|
||||
env: vec![],
|
||||
source: step.source.clone(),
|
||||
entry_source: String::new(),
|
||||
};
|
||||
|
||||
match sandbox::run(&step_built, b"", &step_opts) {
|
||||
Ok(outcome) => {
|
||||
steps_run += 1;
|
||||
if outcome.timed_out {
|
||||
steps_timeout += 1;
|
||||
}
|
||||
if outcome.exit_code.unwrap_or(-1) != 0 {
|
||||
nonzero_exits += 1;
|
||||
}
|
||||
if idx == last_idx {
|
||||
final_sink_hit = outcome.sink_hit;
|
||||
}
|
||||
prev_output = Some(outcome.stdout);
|
||||
}
|
||||
Err(_) => {
|
||||
sandbox_errors += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
(
|
||||
steps_run,
|
||||
sandbox_errors,
|
||||
steps_timeout,
|
||||
nonzero_exits,
|
||||
final_sink_hit,
|
||||
)
|
||||
}
|
||||
|
||||
/// Phase 26 — Track G.3: drive composite dynamic re-verification for
|
||||
/// one chain.
|
||||
///
|
||||
/// Wraps [`reverify_chain_with`] with the [`DefaultCompositeReverifier`].
|
||||
pub fn reverify_chain(
|
||||
chain: &mut ChainFinding,
|
||||
member_diags: &[Diag],
|
||||
surface: &SurfaceMap,
|
||||
opts: &VerifyOptions,
|
||||
) -> ChainReverifyResult {
|
||||
reverify_chain_with(
|
||||
chain,
|
||||
member_diags,
|
||||
surface,
|
||||
opts,
|
||||
&DefaultCompositeReverifier,
|
||||
)
|
||||
}
|
||||
|
||||
/// Inject-the-reverifier flavour of [`reverify_chain`].
|
||||
///
|
||||
/// Mutates `chain` in place: attaches the verdict via
|
||||
/// [`ChainFinding::apply_dynamic_verdict`] (which applies the severity-
|
||||
/// downgrade rule) and returns a [`ChainReverifyResult`] summarising
|
||||
/// the transition.
|
||||
pub fn reverify_chain_with(
|
||||
chain: &mut ChainFinding,
|
||||
member_diags: &[Diag],
|
||||
surface: &SurfaceMap,
|
||||
opts: &VerifyOptions,
|
||||
reverifier: &dyn CompositeReverifier,
|
||||
) -> ChainReverifyResult {
|
||||
let chain_hash = chain.stable_hash;
|
||||
let severity_before = chain.severity;
|
||||
let verdict = reverifier.reverify(chain, member_diags, surface, opts);
|
||||
chain.apply_dynamic_verdict(verdict.clone());
|
||||
ChainReverifyResult {
|
||||
chain_hash,
|
||||
verdict,
|
||||
severity_before,
|
||||
severity_after: chain.severity,
|
||||
downgrade_reason: chain.reverify_reason.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 26 — Track G.3 cost-control entry point.
|
||||
///
|
||||
/// Re-verifies the top `top_n` chains by score order (chains are
|
||||
/// canonicalised score-descending by [`crate::chain::search::find_chains`],
|
||||
/// so the slice prefix is already the right set). `top_n == 0`
|
||||
/// short-circuits the entire pass.
|
||||
///
|
||||
/// `member_diags` is the full project diag list — each chain's
|
||||
/// reverifier looks up its own constituent diags by stable hash via
|
||||
/// [`chain_step_specs`].
|
||||
///
|
||||
/// Mutates `chains` in place; returns one [`ChainReverifyResult`] per
|
||||
/// re-verified chain. Chains past the `top_n` cut keep their
|
||||
/// pre-existing `dynamic_verdict` / `reverify_reason` / `severity`.
|
||||
pub fn reverify_top_chains(
|
||||
chains: &mut [ChainFinding],
|
||||
member_diags: &[Diag],
|
||||
surface: &SurfaceMap,
|
||||
opts: &VerifyOptions,
|
||||
top_n: usize,
|
||||
) -> Vec<ChainReverifyResult> {
|
||||
reverify_top_chains_with(
|
||||
chains,
|
||||
member_diags,
|
||||
surface,
|
||||
opts,
|
||||
top_n,
|
||||
&DefaultCompositeReverifier,
|
||||
)
|
||||
}
|
||||
|
||||
/// Inject-the-reverifier flavour of [`reverify_top_chains`].
|
||||
pub fn reverify_top_chains_with(
|
||||
chains: &mut [ChainFinding],
|
||||
member_diags: &[Diag],
|
||||
surface: &SurfaceMap,
|
||||
opts: &VerifyOptions,
|
||||
top_n: usize,
|
||||
reverifier: &dyn CompositeReverifier,
|
||||
) -> Vec<ChainReverifyResult> {
|
||||
if top_n == 0 || chains.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
let bound = top_n.min(chains.len());
|
||||
chains
|
||||
.iter_mut()
|
||||
.take(bound)
|
||||
.map(|c| reverify_chain_with(c, member_diags, surface, opts, reverifier))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::chain::edges::FindingRef;
|
||||
use crate::chain::finding::{ChainFinding, ChainSink};
|
||||
use crate::chain::impact::ImpactCategory;
|
||||
use crate::surface::SourceLocation;
|
||||
|
||||
fn mk_chain(hash: u64, severity: ChainSeverity, impact: ImpactCategory) -> ChainFinding {
|
||||
ChainFinding {
|
||||
stable_hash: hash,
|
||||
members: vec![FindingRef {
|
||||
finding_id: format!("f-{hash}"),
|
||||
stable_hash: hash,
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
rule_id: "r".into(),
|
||||
cap_bits: 0,
|
||||
}],
|
||||
sink: ChainSink {
|
||||
file: "a.py".into(),
|
||||
line: 5,
|
||||
col: 1,
|
||||
function_name: "sink".into(),
|
||||
cap_bits: 0,
|
||||
},
|
||||
implied_impact: impact,
|
||||
severity,
|
||||
score: 100.0,
|
||||
dynamic_verdict: None,
|
||||
reverify_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn verdict(status: VerifyStatus) -> VerifyResult {
|
||||
VerifyResult {
|
||||
finding_id: "f".into(),
|
||||
status,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
differential: None,
|
||||
replay_stable: None,
|
||||
wrong: None,
|
||||
hardening_outcome: None,
|
||||
}
|
||||
}
|
||||
|
||||
struct StubReverifier(VerifyStatus);
|
||||
impl CompositeReverifier for StubReverifier {
|
||||
fn reverify(
|
||||
&self,
|
||||
_chain: &ChainFinding,
|
||||
_member_diags: &[Diag],
|
||||
_surface: &SurfaceMap,
|
||||
_opts: &VerifyOptions,
|
||||
) -> VerifyResult {
|
||||
verdict(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn confirmed_verdict_leaves_severity_unchanged() {
|
||||
let mut chain = mk_chain(1, ChainSeverity::Critical, ImpactCategory::Rce);
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let result = reverify_chain_with(
|
||||
&mut chain,
|
||||
&[],
|
||||
&surface,
|
||||
&opts,
|
||||
&StubReverifier(VerifyStatus::Confirmed),
|
||||
);
|
||||
assert!(!result.was_downgraded());
|
||||
assert_eq!(result.severity_after, ChainSeverity::Critical);
|
||||
assert_eq!(chain.severity, ChainSeverity::Critical);
|
||||
assert_eq!(
|
||||
chain.dynamic_verdict.as_ref().unwrap().status,
|
||||
VerifyStatus::Confirmed
|
||||
);
|
||||
assert!(chain.reverify_reason.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inconclusive_verdict_downgrades_severity_and_records_reason() {
|
||||
let mut chain = mk_chain(2, ChainSeverity::Critical, ImpactCategory::Rce);
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let result = reverify_chain_with(
|
||||
&mut chain,
|
||||
&[],
|
||||
&surface,
|
||||
&opts,
|
||||
&StubReverifier(VerifyStatus::Inconclusive),
|
||||
);
|
||||
assert!(result.was_downgraded());
|
||||
assert_eq!(result.severity_before, ChainSeverity::Critical);
|
||||
assert_eq!(result.severity_after, ChainSeverity::High);
|
||||
assert_eq!(chain.severity, ChainSeverity::High);
|
||||
assert!(chain.reverify_reason.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inconclusive_at_low_floors_at_low() {
|
||||
let mut chain = mk_chain(3, ChainSeverity::Low, ImpactCategory::InfoDisclosure);
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let result = reverify_chain_with(
|
||||
&mut chain,
|
||||
&[],
|
||||
&surface,
|
||||
&opts,
|
||||
&StubReverifier(VerifyStatus::Inconclusive),
|
||||
);
|
||||
// Severity floors at Low; was_downgraded returns false because
|
||||
// the bucket did not change even though the verdict was
|
||||
// inconclusive.
|
||||
assert_eq!(result.severity_after, ChainSeverity::Low);
|
||||
assert!(chain.reverify_reason.is_some(), "reason still recorded");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn top_n_zero_skips_pass_entirely() {
|
||||
let mut chains = vec![
|
||||
mk_chain(1, ChainSeverity::Critical, ImpactCategory::Rce),
|
||||
mk_chain(2, ChainSeverity::High, ImpactCategory::SessionHijack),
|
||||
];
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let results = reverify_top_chains_with(
|
||||
&mut chains,
|
||||
&[],
|
||||
&surface,
|
||||
&opts,
|
||||
0,
|
||||
&StubReverifier(VerifyStatus::Confirmed),
|
||||
);
|
||||
assert!(results.is_empty());
|
||||
for c in &chains {
|
||||
assert!(
|
||||
c.dynamic_verdict.is_none(),
|
||||
"no verdict attached when top_n=0"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn top_n_limits_reverified_chain_count() {
|
||||
let mut chains = vec![
|
||||
mk_chain(1, ChainSeverity::Critical, ImpactCategory::Rce),
|
||||
mk_chain(2, ChainSeverity::High, ImpactCategory::SessionHijack),
|
||||
mk_chain(3, ChainSeverity::Medium, ImpactCategory::InfoDisclosure),
|
||||
];
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let results = reverify_top_chains_with(
|
||||
&mut chains,
|
||||
&[],
|
||||
&surface,
|
||||
&opts,
|
||||
2,
|
||||
&StubReverifier(VerifyStatus::Confirmed),
|
||||
);
|
||||
assert_eq!(results.len(), 2);
|
||||
assert!(chains[0].dynamic_verdict.is_some());
|
||||
assert!(chains[1].dynamic_verdict.is_some());
|
||||
assert!(
|
||||
chains[2].dynamic_verdict.is_none(),
|
||||
"tail beyond top_n is untouched"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_reverifier_returns_inconclusive_backend_insufficient() {
|
||||
let mut chain = mk_chain(99, ChainSeverity::Critical, ImpactCategory::Rce);
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let result = reverify_chain(&mut chain, &[], &surface, &opts);
|
||||
assert_eq!(result.verdict.status, VerifyStatus::Inconclusive);
|
||||
assert!(matches!(
|
||||
result.verdict.inconclusive_reason,
|
||||
Some(InconclusiveReason::BackendInsufficient { .. })
|
||||
));
|
||||
// Severity dropped one bucket because the default is inconclusive.
|
||||
assert_eq!(chain.severity, ChainSeverity::High);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_reverifier_detail_reports_spec_derivation_coverage() {
|
||||
let mut chain = mk_chain(0xDE, ChainSeverity::High, ImpactCategory::SessionHijack);
|
||||
// No diags threaded in — every member should fall through to
|
||||
// `NoFlowSteps` and the detail string should report 0/N.
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let result = reverify_chain(&mut chain, &[], &surface, &opts);
|
||||
let detail = result.verdict.detail.as_deref().expect("detail populated");
|
||||
assert!(
|
||||
detail.contains("0/1"),
|
||||
"detail must report 0/1 specs derived for a single-member chain with no diags; got {detail:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_reverifier_detail_reports_build_coverage_with_no_derived_specs() {
|
||||
// No diags → 0/N derived → 0/0 built. Verifies the build
|
||||
// segment of the detail string is well-formed even when the
|
||||
// build pipeline is never invoked.
|
||||
let mut chain = mk_chain(0xBD, ChainSeverity::Medium, ImpactCategory::InfoDisclosure);
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let result = reverify_chain(&mut chain, &[], &surface, &opts);
|
||||
let detail = result.verdict.detail.as_deref().expect("detail populated");
|
||||
assert!(
|
||||
detail.contains("built 0/0"),
|
||||
"detail must report 0/0 built when no specs derived; got {detail:?}"
|
||||
);
|
||||
assert!(
|
||||
detail.contains("cache_hit=0"),
|
||||
"detail must zero cache_hit when no builds attempted; got {detail:?}"
|
||||
);
|
||||
assert!(
|
||||
detail.contains("build_ms=0"),
|
||||
"detail must zero build_ms when no builds attempted; got {detail:?}"
|
||||
);
|
||||
assert!(
|
||||
detail.contains("build_errors=0"),
|
||||
"detail must zero build_errors when no builds attempted; got {detail:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_reverifier_detail_reports_run_coverage_with_no_built_steps() {
|
||||
// No diags → 0/N derived → 0/0 built → 0/0 ran. Verifies the
|
||||
// run-coverage segment of the detail string is well-formed
|
||||
// even when the chain-step run loop is never entered.
|
||||
let mut chain = mk_chain(0xCD, ChainSeverity::Medium, ImpactCategory::InfoDisclosure);
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let result = reverify_chain(&mut chain, &[], &surface, &opts);
|
||||
let detail = result.verdict.detail.as_deref().expect("detail populated");
|
||||
assert!(
|
||||
detail.contains("ran 0/0"),
|
||||
"detail must report 0/0 ran when no specs built; got {detail:?}"
|
||||
);
|
||||
assert!(
|
||||
detail.contains("sandbox_errors=0"),
|
||||
"detail must zero sandbox_errors when no runs attempted; got {detail:?}"
|
||||
);
|
||||
assert!(
|
||||
detail.contains("timeouts=0"),
|
||||
"detail must zero timeouts when no runs attempted; got {detail:?}"
|
||||
);
|
||||
assert!(
|
||||
detail.contains("nonzero_exits=0"),
|
||||
"detail must zero nonzero_exits when no runs attempted; got {detail:?}"
|
||||
);
|
||||
assert!(
|
||||
detail.contains("final_sink_hit=false"),
|
||||
"detail must stamp final_sink_hit=false when no runs attempted; got {detail:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn run_chain_steps_with_empty_input_is_a_no_op() {
|
||||
// Locks the contract that the run loop is a no-op when no
|
||||
// steps built — the run-coverage detail segment is wholly a
|
||||
// function of the (steps_run, sandbox_errors, timeouts,
|
||||
// nonzero_exits, final_sink_hit) tuple this helper returns.
|
||||
let opts = sandbox::SandboxOptions::default();
|
||||
let terminal = ChainStepTerminal {
|
||||
sink_callee: "noop".into(),
|
||||
sink_cap_bits: 0,
|
||||
};
|
||||
let result = run_chain_steps(&[], &opts, &terminal);
|
||||
assert_eq!(result, (0, 0, 0, 0, false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chain_step_specs_reports_no_flow_steps_for_missing_diag() {
|
||||
let chain = mk_chain(7, ChainSeverity::Medium, ImpactCategory::InfoDisclosure);
|
||||
let opts = VerifyOptions::default();
|
||||
let specs = chain_step_specs(&chain, &[], &opts);
|
||||
assert_eq!(specs.len(), 1);
|
||||
assert_eq!(specs[0].member_hash, 7);
|
||||
assert!(matches!(
|
||||
specs[0].result,
|
||||
Err(UnsupportedReason::NoFlowSteps)
|
||||
));
|
||||
}
|
||||
}
|
||||
197
src/chain/score.rs
Normal file
197
src/chain/score.rs
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
//! Phase 25 — scoring for composed exploit chains.
|
||||
//!
|
||||
//! `score(path) = sum(impact) * product(feasibility)`
|
||||
//!
|
||||
//! The impact term is the sum of per-member [`ImpactCategory`] weights
|
||||
//! (each member contributes the weight of the *standalone* category its
|
||||
//! primary cap maps to, or `0` when the cap has no standalone impact —
|
||||
//! the cap still contributes adjacency to the final implied impact via
|
||||
//! the composer). The feasibility term is the product of every
|
||||
//! member's [`Feasibility::score`].
|
||||
//!
|
||||
//! # Threshold
|
||||
//!
|
||||
//! [`min_score_default`] is the in-code fallback when `[chain] min_score`
|
||||
//! is unset in `nyx.toml`. Path search drops any composed chain whose
|
||||
//! score is strictly below the configured threshold.
|
||||
|
||||
use crate::chain::edges::ChainEdge;
|
||||
use crate::chain::feasibility::Feasibility;
|
||||
use crate::chain::impact::ImpactCategory;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Per-impact-category numeric weight contributed to the additive
|
||||
/// impact term. The relative ordering matches the design doc's
|
||||
/// criticality ranking; absolute values are kept simple integers so
|
||||
/// the resulting `score` stays human-comparable.
|
||||
///
|
||||
/// `BrowserToLocalRce` is treated as marginally higher than `Rce`
|
||||
/// because the chain composing it (`HEADER_INJECTION + CODE_EXEC` with
|
||||
/// an unauthenticated entry-point) folds an extra surface property and
|
||||
/// is therefore strictly more specific.
|
||||
pub const fn category_weight(c: ImpactCategory) -> f64 {
|
||||
match c {
|
||||
ImpactCategory::BrowserToLocalRce => 110.0,
|
||||
ImpactCategory::Rce => 100.0,
|
||||
ImpactCategory::SessionHijack => 80.0,
|
||||
ImpactCategory::InternalNetworkAccess => 60.0,
|
||||
ImpactCategory::InfoDisclosure => 50.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// `f64` cap floor for the multiplicative feasibility term. Even an
|
||||
/// `Unverified` member contributes a non-zero weight so a 3-step chain
|
||||
/// with three unverified hops does not score `0`.
|
||||
fn feasibility_factor(f: Feasibility) -> f64 {
|
||||
match f {
|
||||
Feasibility::Confirmed => 1.0,
|
||||
Feasibility::InconclusiveHighConf => 0.5,
|
||||
Feasibility::Unverified => 0.1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the chain score for a path.
|
||||
///
|
||||
/// `member_impacts` carries the standalone impact category for each
|
||||
/// member that has one (omit the entry when the member's primary cap
|
||||
/// has no standalone rule — adjacency still contributes via the
|
||||
/// composer's `implied_impact`). `implied_impact` is the final
|
||||
/// composed category; it always contributes its weight even when no
|
||||
/// individual member would on its own (e.g. the `OPEN_REDIRECT +
|
||||
/// UNAUTHORIZED_ID → SessionHijack` rule).
|
||||
pub fn score_path(
|
||||
member_impacts: &[ImpactCategory],
|
||||
implied_impact: ImpactCategory,
|
||||
members: &[ChainEdge],
|
||||
) -> f64 {
|
||||
let mut impact_sum: f64 = member_impacts.iter().copied().map(category_weight).sum();
|
||||
impact_sum += category_weight(implied_impact);
|
||||
let feasibility_product: f64 = members
|
||||
.iter()
|
||||
.map(|e| feasibility_factor(e.feasibility))
|
||||
.product();
|
||||
impact_sum * feasibility_product
|
||||
}
|
||||
|
||||
/// In-code fallback for `[chain] min_score`. Set so a single
|
||||
/// `Unverified` `InfoDisclosure` finding (score = 50 * 0.1 = 5) lands
|
||||
/// below threshold while a two-member chain (Rce + Unverified, ~10)
|
||||
/// or a Confirmed single-cap chain (>=100) clears it.
|
||||
pub const fn min_score_default() -> f64 {
|
||||
9.5
|
||||
}
|
||||
|
||||
/// `[chain]` section of `nyx.toml`. Persisted via
|
||||
/// [`crate::utils::config::ChainConfig`].
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ChainScoreConfig {
|
||||
/// Path-search threshold. Chains below this score are dropped.
|
||||
pub min_score: f64,
|
||||
}
|
||||
|
||||
impl Default for ChainScoreConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
min_score: min_score_default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::chain::edges::{ChainEdge, FindingRef};
|
||||
use crate::chain::feasibility::Feasibility;
|
||||
use crate::chain::impact::ImpactCategory;
|
||||
use crate::labels::Cap;
|
||||
use crate::surface::SourceLocation;
|
||||
|
||||
fn edge(feas: Feasibility) -> ChainEdge {
|
||||
ChainEdge {
|
||||
finding: FindingRef {
|
||||
finding_id: "f".into(),
|
||||
stable_hash: 0,
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
rule_id: "r".into(),
|
||||
cap_bits: Cap::CODE_EXEC.bits(),
|
||||
},
|
||||
primary_cap: Cap::CODE_EXEC,
|
||||
reach: crate::chain::edges::Reach::Unreachable,
|
||||
feasibility: feas,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_confirmed_rce_clears_default_threshold() {
|
||||
let s = score_path(
|
||||
&[ImpactCategory::Rce],
|
||||
ImpactCategory::Rce,
|
||||
&[edge(Feasibility::Confirmed)],
|
||||
);
|
||||
// 100 (member) + 100 (implied) = 200 * 1.0 = 200
|
||||
assert!(s > min_score_default());
|
||||
assert!((s - 200.0).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unverified_single_member_below_threshold() {
|
||||
// 50 + 50 = 100 * 0.1 = 10 — just over threshold; flip impact
|
||||
// to InfoDisclosure with one extra hop to push it under.
|
||||
let s = score_path(
|
||||
&[ImpactCategory::InfoDisclosure],
|
||||
ImpactCategory::InfoDisclosure,
|
||||
&[edge(Feasibility::Unverified)],
|
||||
);
|
||||
assert!(s > min_score_default()); // 50+50=100 * 0.1 = 10
|
||||
// But two unverified hops gates the chain:
|
||||
let s2 = score_path(
|
||||
&[ImpactCategory::InfoDisclosure],
|
||||
ImpactCategory::InfoDisclosure,
|
||||
&[edge(Feasibility::Unverified), edge(Feasibility::Unverified)],
|
||||
);
|
||||
assert!(s2 < min_score_default()); // 100 * 0.01 = 1.0
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn feasibility_dampens_score() {
|
||||
let confirmed = score_path(
|
||||
&[ImpactCategory::Rce],
|
||||
ImpactCategory::Rce,
|
||||
&[edge(Feasibility::Confirmed), edge(Feasibility::Confirmed)],
|
||||
);
|
||||
let inconclusive = score_path(
|
||||
&[ImpactCategory::Rce],
|
||||
ImpactCategory::Rce,
|
||||
&[
|
||||
edge(Feasibility::Confirmed),
|
||||
edge(Feasibility::InconclusiveHighConf),
|
||||
],
|
||||
);
|
||||
let unverified = score_path(
|
||||
&[ImpactCategory::Rce],
|
||||
ImpactCategory::Rce,
|
||||
&[edge(Feasibility::Confirmed), edge(Feasibility::Unverified)],
|
||||
);
|
||||
assert!(confirmed > inconclusive);
|
||||
assert!(inconclusive > unverified);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn category_weights_strictly_ordered() {
|
||||
assert!(
|
||||
category_weight(ImpactCategory::BrowserToLocalRce)
|
||||
> category_weight(ImpactCategory::Rce)
|
||||
);
|
||||
assert!(
|
||||
category_weight(ImpactCategory::Rce) > category_weight(ImpactCategory::SessionHijack)
|
||||
);
|
||||
assert!(
|
||||
category_weight(ImpactCategory::SessionHijack)
|
||||
> category_weight(ImpactCategory::InternalNetworkAccess)
|
||||
);
|
||||
assert!(
|
||||
category_weight(ImpactCategory::InternalNetworkAccess)
|
||||
> category_weight(ImpactCategory::InfoDisclosure)
|
||||
);
|
||||
}
|
||||
}
|
||||
943
src/chain/search.rs
Normal file
943
src/chain/search.rs
Normal file
|
|
@ -0,0 +1,943 @@
|
|||
//! Phase 25 — bounded path search for exploit-chain composition.
|
||||
//!
|
||||
//! Path topology:
|
||||
//!
|
||||
//! ```text
|
||||
//! Attacker (virtual) → EntryPoint → Finding* → Sink
|
||||
//! ```
|
||||
//!
|
||||
//! The DFS starts at the implicit attacker node (virtually adjacent to
|
||||
//! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`](ChainSearchConfig::max_depth)
|
||||
//! per-finding hops, and terminates at any
|
||||
//! [`crate::surface::DangerousLocal`] node. Each emitted
|
||||
//! [`ChainFinding`] is the deterministic minimum-length path through a
|
||||
//! given (entry, sink) pair.
|
||||
//!
|
||||
//! # Determinism
|
||||
//!
|
||||
//! 1. SurfaceMap nodes are canonicalised before search — every input
|
||||
//! list (entries, sinks) is iterated in `SourceLocation` order.
|
||||
//! 2. Candidate per-entry findings are sorted by
|
||||
//! [`crate::chain::edges::FindingRef::stable_hash`] before DFS,
|
||||
//! breaking ties by `rule_id` so collisions stay reproducible.
|
||||
//! 3. The emitted chain list is sorted by `score` descending (ties
|
||||
//! broken by `stable_hash` descending, then `implied_impact`
|
||||
//! descending) before return.
|
||||
//!
|
||||
//! Running the same fixture 10× produces a byte-identical chain list.
|
||||
//!
|
||||
//! # Phase 24 follow-ups closed here
|
||||
//!
|
||||
//! - `BrowserToLocalRce` auth-gate predicate: when the lattice yields
|
||||
//! `BrowserToLocalRce` from `HEADER_INJECTION + CODE_EXEC`, the path
|
||||
//! is only kept when the entry's `auth_required` is `false`. Auth-
|
||||
//! gated entries downgrade to the closest standalone impact.
|
||||
//! - SSRF + LocalListener refinement: when the lattice yields
|
||||
//! `InternalNetworkAccess` and the SurfaceMap exposes a local
|
||||
//! listener (a [`crate::surface::DataStore`] / [`crate::surface::ExternalService`]
|
||||
//! bound to a loopback host), the path is preserved; without a local
|
||||
//! listener the chain is still emitted but scored lower (no boost).
|
||||
//!
|
||||
//! The "file-local reach → call-graph-aware reach" upgrade remains
|
||||
//! deferred (see deferred.md): the DFS still treats two findings as
|
||||
//! adjacent when they share a source file, mirroring Phase 24's
|
||||
//! `findings_to_edges` reach resolver.
|
||||
//!
|
||||
//! Entry-to-finding affinity is enforced symmetrically: the
|
||||
//! per-entry candidate filter requires the finding's source file to
|
||||
//! overlap with the entry's `handler_location.file` (or a
|
||||
//! call-graph reach hit) on top of the route+method match. Without
|
||||
//! this gate, two entries that happen to share a (route, method) in
|
||||
//! a monorepo would each claim every finding under that key,
|
||||
//! producing `O(entries × findings)` phantom chains that the dedup
|
||||
//! pass would then collapse.
|
||||
|
||||
use crate::callgraph::FileReachMap;
|
||||
use crate::chain::edges::{ChainEdge, Reach};
|
||||
use crate::chain::finding::{ChainFinding, ChainSink};
|
||||
use crate::chain::impact::{ImpactCategory, lookup_impact};
|
||||
use crate::chain::score::score_path;
|
||||
use crate::labels::Cap;
|
||||
use crate::surface::{DangerousLocal, EntryPoint, SurfaceMap, SurfaceNode};
|
||||
|
||||
/// Bounded-DFS search configuration.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct ChainSearchConfig {
|
||||
/// Maximum number of per-finding hops in a single chain path.
|
||||
/// `0` disables search (no chain is ever emitted).
|
||||
pub max_depth: usize,
|
||||
/// Drop chains whose score is strictly below this threshold.
|
||||
pub min_score: f64,
|
||||
}
|
||||
|
||||
impl Default for ChainSearchConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_depth: 4,
|
||||
min_score: crate::chain::score::min_score_default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of one search pass: every chain whose score cleared
|
||||
/// `cfg.min_score`, deterministically ordered.
|
||||
pub fn find_chains(
|
||||
edges: &[ChainEdge],
|
||||
surface: &SurfaceMap,
|
||||
cfg: ChainSearchConfig,
|
||||
) -> Vec<ChainFinding> {
|
||||
find_chains_with_reach(edges, surface, cfg, None)
|
||||
}
|
||||
|
||||
/// Like [`find_chains`] but optionally consults a [`FileReachMap`] to
|
||||
/// widen the per-entry-per-sink file-scope filter beyond literal
|
||||
/// file-equality.
|
||||
///
|
||||
/// When `reach` is `Some`, a candidate edge is in scope for a given
|
||||
/// sink whenever the finding's file *or* a transitive caller of it
|
||||
/// reaches the sink's file via the call graph. `reach = None`
|
||||
/// preserves the legacy file-local behaviour for callers that have
|
||||
/// not yet wired the call-graph reach map.
|
||||
pub fn find_chains_with_reach(
|
||||
edges: &[ChainEdge],
|
||||
surface: &SurfaceMap,
|
||||
cfg: ChainSearchConfig,
|
||||
reach: Option<&FileReachMap>,
|
||||
) -> Vec<ChainFinding> {
|
||||
if cfg.max_depth == 0 || edges.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
let sinks = collect_sinks(surface);
|
||||
let entries = collect_entries(surface);
|
||||
let local_listener_present = has_local_listener(surface);
|
||||
|
||||
let mut chains: Vec<ChainFinding> = Vec::new();
|
||||
for entry in &entries {
|
||||
// Per-entry candidate edge slice: every edge whose reach
|
||||
// points at this entry, sorted deterministically.
|
||||
let mut candidates: Vec<&ChainEdge> = edges
|
||||
.iter()
|
||||
.filter(|e| edge_reaches_entry(e, entry, reach))
|
||||
.collect();
|
||||
candidates.sort_by(|a, b| {
|
||||
(
|
||||
a.finding.stable_hash,
|
||||
&a.finding.rule_id,
|
||||
&a.finding.location,
|
||||
)
|
||||
.cmp(&(
|
||||
b.finding.stable_hash,
|
||||
&b.finding.rule_id,
|
||||
&b.finding.location,
|
||||
))
|
||||
});
|
||||
for sink in &sinks {
|
||||
// Scope candidates to the sink: same-file match (legacy),
|
||||
// optionally widened by a call-graph-derived reach map so
|
||||
// a finding in `internal_helper.py` whose enclosing
|
||||
// function is reached only through `routes.py` still
|
||||
// composes against a sink in `routes.py`.
|
||||
let scoped: Vec<&ChainEdge> = candidates
|
||||
.iter()
|
||||
.filter(|e| {
|
||||
paths_overlap(&e.finding.location.file, &sink.location.file)
|
||||
|| reach.is_some_and(|r| {
|
||||
r.reaches(&e.finding.location.file, &sink.location.file)
|
||||
})
|
||||
})
|
||||
.copied()
|
||||
.collect();
|
||||
if let Some(chain) =
|
||||
compose_chain(entry, sink, &scoped, cfg.max_depth, local_listener_present)
|
||||
&& chain.score >= cfg.min_score
|
||||
{
|
||||
chains.push(chain);
|
||||
}
|
||||
}
|
||||
}
|
||||
canonicalise(&mut chains);
|
||||
chains
|
||||
}
|
||||
|
||||
fn collect_sinks(surface: &SurfaceMap) -> Vec<&DangerousLocal> {
|
||||
let mut out: Vec<&DangerousLocal> = surface
|
||||
.nodes
|
||||
.iter()
|
||||
.filter_map(|n| match n {
|
||||
SurfaceNode::DangerousLocal(d) => Some(d),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
out.sort_by(|a, b| (&a.location, &a.function_name).cmp(&(&b.location, &b.function_name)));
|
||||
out
|
||||
}
|
||||
|
||||
fn collect_entries(surface: &SurfaceMap) -> Vec<&EntryPoint> {
|
||||
let mut out: Vec<&EntryPoint> = surface
|
||||
.nodes
|
||||
.iter()
|
||||
.filter_map(|n| match n {
|
||||
SurfaceNode::EntryPoint(e) => Some(e),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
out.sort_by(|a, b| (&a.location, &a.route).cmp(&(&b.location, &b.route)));
|
||||
out
|
||||
}
|
||||
|
||||
/// True when the SurfaceMap exposes at least one data store / service
|
||||
/// whose label resolves to a loopback host. Used by the SSRF +
|
||||
/// LocalListener refinement in [`compose_chain`].
|
||||
fn has_local_listener(surface: &SurfaceMap) -> bool {
|
||||
surface.nodes.iter().any(|n| match n {
|
||||
SurfaceNode::DataStore(d) => is_loopback_label(&d.label),
|
||||
SurfaceNode::ExternalService(s) => is_loopback_label(&s.label),
|
||||
_ => false,
|
||||
})
|
||||
}
|
||||
|
||||
fn is_loopback_label(s: &str) -> bool {
|
||||
let lower = s.to_ascii_lowercase();
|
||||
lower.contains("127.0.0.1")
|
||||
|| lower.contains("localhost")
|
||||
|| lower.contains("0.0.0.0")
|
||||
|| lower.starts_with("unix:")
|
||||
|| lower.contains("://localhost")
|
||||
}
|
||||
|
||||
fn edge_reaches_entry(edge: &ChainEdge, entry: &EntryPoint, reach: Option<&FileReachMap>) -> bool {
|
||||
let route_method_match = match &edge.reach {
|
||||
Reach::Reachable { route, method, .. } => *route == entry.route && *method == entry.method,
|
||||
Reach::Unreachable => return false,
|
||||
};
|
||||
if !route_method_match {
|
||||
return false;
|
||||
}
|
||||
// File-affinity gate: the entry's handler must live in (or
|
||||
// transitively call into) the same file as the finding.
|
||||
// Without this, multiple entries that happen to declare the
|
||||
// same (route, method) — common in monorepos that ship
|
||||
// several small services side-by-side — would each claim
|
||||
// every finding, producing O(entries × findings) phantom
|
||||
// chains. The same shape as the sink-scope filter below:
|
||||
// literal file-suffix overlap first, fall back to the
|
||||
// call-graph reach map.
|
||||
let entry_file = &entry.handler_location.file;
|
||||
let finding_file = &edge.finding.location.file;
|
||||
paths_overlap(entry_file, finding_file)
|
||||
|| reach.is_some_and(|r| r.reaches(entry_file, finding_file))
|
||||
}
|
||||
|
||||
fn paths_overlap(a: &str, b: &str) -> bool {
|
||||
if a == b {
|
||||
return true;
|
||||
}
|
||||
// Strip leading directory components and compare suffix. Two
|
||||
// representations of the same file (project-relative vs absolute)
|
||||
// share a common trailing path segment.
|
||||
let a_tail = a.rsplit('/').next().unwrap_or(a);
|
||||
let b_tail = b.rsplit('/').next().unwrap_or(b);
|
||||
a_tail == b_tail && !a_tail.is_empty()
|
||||
}
|
||||
|
||||
/// Build a single chain for one (entry, sink) pair.
|
||||
///
|
||||
/// Bounded DFS: take the longest deterministic prefix of `scoped` up
|
||||
/// to `max_depth`, then pick the highest-severity lattice match
|
||||
/// across every (member_cap, sink_cap) pair. Returning all in-scope
|
||||
/// edges as members matches the design doc's three-member output for
|
||||
/// the `CORS + NoAuth + websocket → shell tool` scenario; using the
|
||||
/// best impact across all pairs ensures `HEADER_INJECTION + CODE_EXEC`
|
||||
/// lights up `BrowserToLocalRce` even when an unrelated finding (e.g.
|
||||
/// the standalone auth-gap diagnostic) is sorted first.
|
||||
fn compose_chain(
|
||||
entry: &EntryPoint,
|
||||
sink: &DangerousLocal,
|
||||
scoped: &[&ChainEdge],
|
||||
max_depth: usize,
|
||||
local_listener_present: bool,
|
||||
) -> Option<ChainFinding> {
|
||||
if scoped.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let bound = scoped.len().min(max_depth);
|
||||
let path: Vec<&ChainEdge> = scoped[..bound].to_vec();
|
||||
let sink_cap = sole_cap(sink.cap_bits)?;
|
||||
let (impact, member_impacts) = resolve_impact(&path, sink_cap, entry, local_listener_present)?;
|
||||
let mut chain = build_chain(entry, sink, &path, impact, &member_impacts);
|
||||
// SSRF + LocalListener refinement (Phase 24 deferred close): when
|
||||
// the implied impact is `InternalNetworkAccess` AND the SurfaceMap
|
||||
// exposes a loopback listener, the chain is more concrete than the
|
||||
// bare lattice match — lift the score so it ranks above SSRF chains
|
||||
// without a corroborating in-process target.
|
||||
if impact == ImpactCategory::InternalNetworkAccess && local_listener_present {
|
||||
chain.score *= LOCAL_LISTENER_BOOST;
|
||||
}
|
||||
Some(chain)
|
||||
}
|
||||
|
||||
/// Score multiplier applied when an `InternalNetworkAccess` chain has
|
||||
/// a corroborating loopback listener in the SurfaceMap. Calibrated to
|
||||
/// lift the chain above an otherwise-identical SSRF chain that lacks
|
||||
/// the listener context, without overtaking strictly more severe
|
||||
/// categories.
|
||||
const LOCAL_LISTENER_BOOST: f64 = 1.5;
|
||||
|
||||
/// Pick the lowest-bit single [`Cap`] from `bits`, or `None` when no
|
||||
/// bit is set. Sinks in the SurfaceMap may carry multi-bit
|
||||
/// `cap_bits`; the DFS terminates against the lowest single bit so
|
||||
/// downstream lattice lookups stay deterministic.
|
||||
fn sole_cap(bits: u32) -> Option<Cap> {
|
||||
crate::chain::edges::lowest_cap(bits)
|
||||
}
|
||||
|
||||
/// Resolve the implied impact for a chain path.
|
||||
///
|
||||
/// Walks every (member.primary_cap, sink_cap) pair and picks the
|
||||
/// highest-severity lattice match. Returns `None` when no member +
|
||||
/// sink pair lights up a rule and the sink cap has no standalone
|
||||
/// rule either.
|
||||
///
|
||||
/// Auth gate: `BrowserToLocalRce` only fires when the entry's
|
||||
/// `auth_required` is `false`. Authenticated entries fall through
|
||||
/// to the next-best impact (typically `CODE_EXEC → Rce`).
|
||||
fn resolve_impact(
|
||||
path: &[&ChainEdge],
|
||||
sink_cap: Cap,
|
||||
entry: &EntryPoint,
|
||||
_local_listener_present: bool,
|
||||
) -> Option<(ImpactCategory, Vec<ImpactCategory>)> {
|
||||
let mut best: Option<ImpactCategory> = None;
|
||||
for member in path {
|
||||
if let Some(cat) = lookup_impact(member.primary_cap, Some(sink_cap)) {
|
||||
if cat == ImpactCategory::BrowserToLocalRce && entry.auth_required {
|
||||
// Auth gate: this rule cannot fire when the entry is
|
||||
// authed. Keep walking — another pair may light up
|
||||
// a different rule.
|
||||
continue;
|
||||
}
|
||||
best = Some(match best {
|
||||
Some(prev) => more_severe(prev, cat),
|
||||
None => cat,
|
||||
});
|
||||
}
|
||||
}
|
||||
// Fall through to standalone on the sink cap when no pair lit up.
|
||||
if best.is_none() {
|
||||
best = lookup_impact(sink_cap, None);
|
||||
}
|
||||
best.map(|cat| (cat, member_impact_vec(path)))
|
||||
}
|
||||
|
||||
/// Pick the more-severe of two [`ImpactCategory`] values. Severity
|
||||
/// ordering matches the design doc's lattice criticality:
|
||||
/// `BrowserToLocalRce > Rce > SessionHijack > InternalNetworkAccess > InfoDisclosure`.
|
||||
fn more_severe(a: ImpactCategory, b: ImpactCategory) -> ImpactCategory {
|
||||
if severity_rank(a) >= severity_rank(b) {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
}
|
||||
}
|
||||
|
||||
fn severity_rank(c: ImpactCategory) -> u8 {
|
||||
match c {
|
||||
ImpactCategory::BrowserToLocalRce => 5,
|
||||
ImpactCategory::Rce => 4,
|
||||
ImpactCategory::SessionHijack => 3,
|
||||
ImpactCategory::InternalNetworkAccess => 2,
|
||||
ImpactCategory::InfoDisclosure => 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn member_impact_vec(path: &[&ChainEdge]) -> Vec<ImpactCategory> {
|
||||
path.iter()
|
||||
.filter_map(|e| crate::chain::standalone_impact(e.primary_cap))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_chain(
|
||||
_entry: &EntryPoint,
|
||||
sink: &DangerousLocal,
|
||||
path: &[&ChainEdge],
|
||||
implied_impact: ImpactCategory,
|
||||
member_impacts: &[ImpactCategory],
|
||||
) -> ChainFinding {
|
||||
let members: Vec<_> = path.iter().map(|e| e.finding.clone()).collect();
|
||||
let stable_hash = ChainFinding::compute_stable_hash(&members, implied_impact);
|
||||
let owned_edges: Vec<ChainEdge> = path.iter().map(|e| (*e).clone()).collect();
|
||||
let score = score_path(member_impacts, implied_impact, &owned_edges);
|
||||
let severity = crate::output::severity::chain_severity(implied_impact, &owned_edges);
|
||||
let dynamic_verdict = composite_dynamic_verdict(&owned_edges);
|
||||
ChainFinding {
|
||||
stable_hash,
|
||||
members,
|
||||
sink: ChainSink {
|
||||
file: sink.location.file.clone(),
|
||||
line: sink.location.line,
|
||||
col: sink.location.col,
|
||||
function_name: sink.function_name.clone(),
|
||||
cap_bits: sink.cap_bits,
|
||||
},
|
||||
implied_impact,
|
||||
severity,
|
||||
score,
|
||||
dynamic_verdict,
|
||||
reverify_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 25 placeholder for composite verification. When *every*
|
||||
/// member edge has `Feasibility::Confirmed` the composite verdict
|
||||
/// inherits that confirmation; otherwise `None` (Phase 26 will run a
|
||||
/// real composite re-verification pass).
|
||||
fn composite_dynamic_verdict(_path: &[ChainEdge]) -> Option<crate::evidence::VerifyResult> {
|
||||
None
|
||||
}
|
||||
|
||||
fn canonicalise(chains: &mut Vec<ChainFinding>) {
|
||||
chains.sort_by(|a, b| {
|
||||
b.score
|
||||
.partial_cmp(&a.score)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then(b.stable_hash.cmp(&a.stable_hash))
|
||||
.then(b.implied_impact.cmp(&a.implied_impact))
|
||||
});
|
||||
// Drop duplicates: two chains with the same stable_hash and the
|
||||
// same terminal sink serialise byte-identically (stable_hash is a
|
||||
// function of members + implied_impact, and the wire format
|
||||
// exposes only members, sink, impact, severity, score). They arise
|
||||
// when multiple entry-points share a (route, method) but are
|
||||
// otherwise unrelated (e.g. monorepos, or a scan covering multiple
|
||||
// small apps), each claiming the same finding via the route-only
|
||||
// candidate filter in `find_chains_with_reach`. Keep the first
|
||||
// occurrence after the sort above; the sort is total enough that
|
||||
// the survivor is deterministic.
|
||||
chains.dedup_by(|a, b| a.stable_hash == b.stable_hash && a.sink == b.sink);
|
||||
}
|
||||
|
||||
// Manual Ord/PartialOrd for ImpactCategory so the canonicalise
|
||||
// tie-break has a total order. Defined here rather than in `impact`
|
||||
// to avoid leaking ordering into the public type.
|
||||
impl PartialOrd for ImpactCategory {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
impl Ord for ImpactCategory {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
(*self as u8).cmp(&(*other as u8))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::chain::ChainSeverity;
|
||||
use crate::chain::edges::FindingRef;
|
||||
use crate::chain::feasibility::Feasibility;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::labels::Cap;
|
||||
use crate::surface::{
|
||||
DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode,
|
||||
};
|
||||
|
||||
fn loc(file: &str, line: u32) -> SourceLocation {
|
||||
SourceLocation::new(file, line, 1)
|
||||
}
|
||||
|
||||
fn entry(file: &str, route: &str, auth: bool) -> SurfaceNode {
|
||||
SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: loc(file, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::POST,
|
||||
route: route.into(),
|
||||
handler_name: "h".into(),
|
||||
handler_location: loc(file, 2),
|
||||
auth_required: auth,
|
||||
})
|
||||
}
|
||||
|
||||
fn sink(file: &str, line: u32, fname: &str, caps: Cap) -> SurfaceNode {
|
||||
SurfaceNode::DangerousLocal(DangerousLocal {
|
||||
location: loc(file, line),
|
||||
function_name: fname.into(),
|
||||
cap_bits: caps.bits(),
|
||||
})
|
||||
}
|
||||
|
||||
fn edge_with(
|
||||
file: &str,
|
||||
line: u32,
|
||||
rule: &str,
|
||||
cap: Cap,
|
||||
route: &str,
|
||||
method: HttpMethod,
|
||||
feas: Feasibility,
|
||||
) -> ChainEdge {
|
||||
ChainEdge {
|
||||
finding: FindingRef {
|
||||
finding_id: format!("{rule}-{line}"),
|
||||
stable_hash: blake3::hash(format!("{rule}:{file}:{line}").as_bytes()).as_bytes()
|
||||
[..8]
|
||||
.try_into()
|
||||
.map(u64::from_le_bytes)
|
||||
.unwrap(),
|
||||
location: loc(file, line),
|
||||
rule_id: rule.into(),
|
||||
cap_bits: cap.bits(),
|
||||
},
|
||||
primary_cap: cap,
|
||||
reach: Reach::Reachable {
|
||||
location: loc(file, 1),
|
||||
method,
|
||||
route: route.into(),
|
||||
auth_required: false,
|
||||
},
|
||||
feasibility: feas,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn returns_empty_when_no_findings() {
|
||||
let surface = SurfaceMap::new();
|
||||
let result = find_chains(&[], &surface, ChainSearchConfig::default());
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn standalone_codeexec_via_unauthed_entry_emits_rce_chain() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("app.py", "/exec", false));
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("app.py", 20, "os.system", Cap::CODE_EXEC));
|
||||
let e = edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/exec",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Confirmed,
|
||||
);
|
||||
let chains = find_chains(&[e], &surface, ChainSearchConfig::default());
|
||||
assert_eq!(chains.len(), 1);
|
||||
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn header_injection_plus_codeexec_via_unauthed_entry_is_browser_local_rce() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("app.py", "/ws", false));
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC));
|
||||
let cors = edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"cfg-cors-allow-all",
|
||||
Cap::HEADER_INJECTION,
|
||||
"/ws",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let exec = edge_with(
|
||||
"app.py",
|
||||
20,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/ws",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let chains = find_chains(
|
||||
&[cors, exec],
|
||||
&surface,
|
||||
ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
},
|
||||
);
|
||||
assert_eq!(chains.len(), 1);
|
||||
assert_eq!(chains[0].implied_impact, ImpactCategory::BrowserToLocalRce);
|
||||
assert_eq!(chains[0].severity, ChainSeverity::Critical);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn authed_entry_downgrades_browser_local_rce_to_rce() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
// Same fixture but entry is authed — should NOT light up
|
||||
// BrowserToLocalRce.
|
||||
surface.nodes.push(entry("app.py", "/ws", true));
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC));
|
||||
let cors = edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"cfg-cors-allow-all",
|
||||
Cap::HEADER_INJECTION,
|
||||
"/ws",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let exec = edge_with(
|
||||
"app.py",
|
||||
20,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/ws",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let chains = find_chains(
|
||||
&[cors, exec],
|
||||
&surface,
|
||||
ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
},
|
||||
);
|
||||
assert_eq!(chains.len(), 1);
|
||||
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn determinism_across_runs() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("app.py", "/exec", false));
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("app.py", 20, "os.system", Cap::CODE_EXEC));
|
||||
let e = edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/exec",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Confirmed,
|
||||
);
|
||||
let cfg = ChainSearchConfig::default();
|
||||
let first = find_chains(std::slice::from_ref(&e), &surface, cfg);
|
||||
let first_hashes: Vec<u64> = first.iter().map(|c| c.stable_hash).collect();
|
||||
for _ in 0..9 {
|
||||
let again = find_chains(std::slice::from_ref(&e), &surface, cfg);
|
||||
let again_hashes: Vec<u64> = again.iter().map(|c| c.stable_hash).collect();
|
||||
assert_eq!(again_hashes, first_hashes);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssrf_with_local_listener_scores_higher_than_without() {
|
||||
use crate::surface::{DataStore, DataStoreKind};
|
||||
let edge = || -> ChainEdge {
|
||||
edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"taint-ssrf",
|
||||
Cap::SSRF,
|
||||
"/fetch",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Confirmed,
|
||||
)
|
||||
};
|
||||
let mut surface_no_listener = SurfaceMap::new();
|
||||
surface_no_listener
|
||||
.nodes
|
||||
.push(entry("app.py", "/fetch", false));
|
||||
surface_no_listener
|
||||
.nodes
|
||||
.push(sink("app.py", 20, "requests.get", Cap::SSRF));
|
||||
let baseline = find_chains(
|
||||
&[edge()],
|
||||
&surface_no_listener,
|
||||
ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
},
|
||||
);
|
||||
assert_eq!(baseline.len(), 1);
|
||||
assert_eq!(
|
||||
baseline[0].implied_impact,
|
||||
ImpactCategory::InternalNetworkAccess
|
||||
);
|
||||
|
||||
let mut surface_with_listener = surface_no_listener.clone();
|
||||
surface_with_listener
|
||||
.nodes
|
||||
.push(SurfaceNode::DataStore(DataStore {
|
||||
location: loc("app.py", 5),
|
||||
kind: DataStoreKind::KeyValue,
|
||||
label: "redis://127.0.0.1:6379".into(),
|
||||
}));
|
||||
let boosted = find_chains(
|
||||
&[edge()],
|
||||
&surface_with_listener,
|
||||
ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
},
|
||||
);
|
||||
assert_eq!(boosted.len(), 1);
|
||||
assert_eq!(
|
||||
boosted[0].implied_impact,
|
||||
ImpactCategory::InternalNetworkAccess
|
||||
);
|
||||
let ratio = boosted[0].score / baseline[0].score;
|
||||
assert!(
|
||||
(ratio - LOCAL_LISTENER_BOOST).abs() < 1e-9,
|
||||
"expected ×{LOCAL_LISTENER_BOOST} boost, got ratio={ratio}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_threshold_drops_low_score_chains() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("app.py", "/r", false));
|
||||
surface.nodes.push(sink("app.py", 20, "open", Cap::FILE_IO));
|
||||
let e = edge_with(
|
||||
"app.py",
|
||||
10,
|
||||
"test",
|
||||
Cap::FILE_IO,
|
||||
"/r",
|
||||
HttpMethod::GET,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let cfg = ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 1_000.0,
|
||||
};
|
||||
let chains = find_chains(&[e], &surface, cfg);
|
||||
assert!(chains.is_empty());
|
||||
}
|
||||
|
||||
/// Sink in a different file than the finding composes only when the
|
||||
/// call-graph reach map records a transitive caller relationship.
|
||||
#[test]
|
||||
fn cross_file_chain_requires_reach_map() {
|
||||
use crate::callgraph::{FileReachMap, build_call_graph};
|
||||
use crate::summary::{FuncSummary, merge_summaries};
|
||||
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("routes.py", "/exec", false));
|
||||
// Sink lives in a helper file the entry handler transitively
|
||||
// reaches, not the entry file itself.
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("helper.py", 20, "os.system", Cap::CODE_EXEC));
|
||||
let e = edge_with(
|
||||
"routes.py",
|
||||
10,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/exec",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
|
||||
let cfg = ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
};
|
||||
|
||||
// No reach map: routes.py finding cannot compose against
|
||||
// helper.py sink because `paths_overlap` rejects the pair.
|
||||
let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg);
|
||||
assert!(
|
||||
baseline.is_empty(),
|
||||
"without reach map, cross-file chain must not compose"
|
||||
);
|
||||
|
||||
// Reach map: routes.py::handle calls helper.py::sink so
|
||||
// helper.py is reachable from routes.py.
|
||||
let handle = FuncSummary {
|
||||
name: "handle".into(),
|
||||
file_path: "routes.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
callees: vec![crate::summary::CalleeSite::bare("sink")],
|
||||
..Default::default()
|
||||
};
|
||||
let sink_fn = FuncSummary {
|
||||
name: "sink".into(),
|
||||
file_path: "helper.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
..Default::default()
|
||||
};
|
||||
let gs = merge_summaries(vec![handle, sink_fn], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let reach = FileReachMap::build(&cg);
|
||||
|
||||
let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach));
|
||||
assert_eq!(
|
||||
chains.len(),
|
||||
1,
|
||||
"reach map should widen scope to include helper.py sink"
|
||||
);
|
||||
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duplicate_chains_from_shared_route_method_are_deduped() {
|
||||
// Three unrelated handler files each declare POST /run. Each
|
||||
// file holds one finding + one dangerous-local sink. Without
|
||||
// the dedup pass, the per-entry candidate filter (route +
|
||||
// method only) lets every entry claim every finding, and the
|
||||
// sink-file scope filter then emits one chain per (entry,
|
||||
// sink) pair — 3 chains per file × 3 files = 9 chains where
|
||||
// each finding appears 3×. The wire format does not surface
|
||||
// the entry, so the duplicates serialise byte-identically.
|
||||
// `canonicalise` must drop them.
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("a.js", "/run", false));
|
||||
surface.nodes.push(entry("b.js", "/run", false));
|
||||
surface.nodes.push(entry("c.py", "/run", false));
|
||||
surface.nodes.push(sink("a.js", 7, "eval", Cap::CODE_EXEC));
|
||||
surface.nodes.push(sink("b.js", 7, "eval", Cap::CODE_EXEC));
|
||||
surface.nodes.push(sink("c.py", 7, "eval", Cap::CODE_EXEC));
|
||||
let edges = vec![
|
||||
edge_with(
|
||||
"a.js",
|
||||
7,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/run",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
),
|
||||
edge_with(
|
||||
"b.js",
|
||||
7,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/run",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
),
|
||||
edge_with(
|
||||
"c.py",
|
||||
7,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/run",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
),
|
||||
];
|
||||
let chains = find_chains(&edges, &surface, ChainSearchConfig::default());
|
||||
assert_eq!(
|
||||
chains.len(),
|
||||
3,
|
||||
"expected one chain per finding, not entries × findings",
|
||||
);
|
||||
let mut hashes: Vec<u64> = chains.iter().map(|c| c.stable_hash).collect();
|
||||
hashes.sort();
|
||||
hashes.dedup();
|
||||
assert_eq!(
|
||||
hashes.len(),
|
||||
3,
|
||||
"surviving chains must have distinct hashes"
|
||||
);
|
||||
}
|
||||
|
||||
/// File-affinity gate on `edge_reaches_entry`: an entry only
|
||||
/// claims candidate findings that live in its own handler file
|
||||
/// (or are reached from it via the call graph). Two unrelated
|
||||
/// entries declaring the same (route, method) on different
|
||||
/// files do not cross-claim each other's findings.
|
||||
#[test]
|
||||
fn entry_file_affinity_rejects_cross_file_findings_without_reach() {
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("a.js", "/run", false));
|
||||
surface.nodes.push(entry("b.js", "/run", false));
|
||||
surface.nodes.push(sink("a.js", 7, "eval", Cap::CODE_EXEC));
|
||||
surface.nodes.push(sink("b.js", 7, "eval", Cap::CODE_EXEC));
|
||||
// Single finding lives in a.js only. Both entries match
|
||||
// route+method but only entry@a.js shares the file.
|
||||
let edges = vec![edge_with(
|
||||
"a.js",
|
||||
7,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/run",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
)];
|
||||
let chains = find_chains(&edges, &surface, ChainSearchConfig::default());
|
||||
assert_eq!(
|
||||
chains.len(),
|
||||
1,
|
||||
"entry@b.js must not claim a finding in a.js without reach map",
|
||||
);
|
||||
assert_eq!(chains[0].sink.file, "a.js");
|
||||
}
|
||||
|
||||
/// File-affinity gate widens through the call-graph reach map:
|
||||
/// an entry whose handler reaches the finding's file (via the
|
||||
/// `FileReachMap`) still claims the finding even when the
|
||||
/// literal file suffixes differ.
|
||||
#[test]
|
||||
fn entry_file_affinity_widens_with_reach_map() {
|
||||
use crate::callgraph::{FileReachMap, build_call_graph};
|
||||
use crate::summary::{FuncSummary, merge_summaries};
|
||||
|
||||
let mut surface = SurfaceMap::new();
|
||||
// Entry handler lives in routes.py. Finding lives in a
|
||||
// helper file that routes.py transitively calls.
|
||||
surface.nodes.push(entry("routes.py", "/run", false));
|
||||
surface
|
||||
.nodes
|
||||
.push(sink("helper.py", 20, "os.system", Cap::CODE_EXEC));
|
||||
let e = edge_with(
|
||||
"helper.py",
|
||||
10,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/run",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
let cfg = ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
};
|
||||
// Without a reach map the file-affinity gate rejects the
|
||||
// entry/finding pairing.
|
||||
let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg);
|
||||
assert!(
|
||||
baseline.is_empty(),
|
||||
"without reach map, cross-file entry/finding pair must reject",
|
||||
);
|
||||
// Build a reach map where routes.py::handle calls
|
||||
// helper.py::sink, so helper.py is reachable from routes.py.
|
||||
let handle = FuncSummary {
|
||||
name: "handle".into(),
|
||||
file_path: "routes.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
callees: vec![crate::summary::CalleeSite::bare("sink")],
|
||||
..Default::default()
|
||||
};
|
||||
let sink_fn = FuncSummary {
|
||||
name: "sink".into(),
|
||||
file_path: "helper.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
..Default::default()
|
||||
};
|
||||
let gs = merge_summaries(vec![handle, sink_fn], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let reach = FileReachMap::build(&cg);
|
||||
let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach));
|
||||
assert_eq!(
|
||||
chains.len(),
|
||||
1,
|
||||
"reach map should widen entry-affinity to helper.py",
|
||||
);
|
||||
assert_eq!(chains[0].sink.file, "helper.py");
|
||||
}
|
||||
}
|
||||
199
src/cli.rs
199
src/cli.rs
|
|
@ -36,6 +36,12 @@ impl Commands {
|
|||
&& (fmt == OutputFormat::Json || fmt == OutputFormat::Sarif)
|
||||
}
|
||||
|
||||
/// Whether the user explicitly asked this invocation to suppress
|
||||
/// human-readable output.
|
||||
pub fn quiet_requested(&self) -> bool {
|
||||
matches!(self, Commands::Scan { quiet: true, .. })
|
||||
}
|
||||
|
||||
/// Whether this is a long-running server command (skip timing output).
|
||||
pub fn is_serve(&self) -> bool {
|
||||
matches!(self, Commands::Serve { .. })
|
||||
|
|
@ -50,6 +56,7 @@ impl Commands {
|
|||
Commands::Scan { explain_engine, .. } => *explain_engine,
|
||||
Commands::List { .. } => true,
|
||||
Commands::Rules { .. } => true,
|
||||
Commands::Surface { .. } => true,
|
||||
Commands::Config { action } => {
|
||||
matches!(action, ConfigAction::Show { .. } | ConfigAction::Path)
|
||||
}
|
||||
|
|
@ -105,6 +112,32 @@ pub enum ScanMode {
|
|||
Taint,
|
||||
}
|
||||
|
||||
/// Output format for `nyx surface`.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, ValueEnum, Default)]
|
||||
pub enum SurfaceFormat {
|
||||
/// Indented tree, one entry-point per line, with reach summary.
|
||||
#[default]
|
||||
Text,
|
||||
/// Canonical SurfaceMap JSON, byte-identical to the SQLite payload.
|
||||
Json,
|
||||
/// Graphviz DOT source; pipe through `dot -Tsvg` to render.
|
||||
Dot,
|
||||
/// SVG produced by spawning the local `dot` binary on the DOT
|
||||
/// rendering. Fails when graphviz is not installed.
|
||||
Svg,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SurfaceFormat {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
SurfaceFormat::Text => write!(f, "text"),
|
||||
SurfaceFormat::Json => write!(f, "json"),
|
||||
SurfaceFormat::Dot => write!(f, "dot"),
|
||||
SurfaceFormat::Svg => write!(f, "svg"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Engine-depth profile that sets the full stack of analysis toggles
|
||||
/// in one shot. Individual engine flags override the profile.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, ValueEnum)]
|
||||
|
|
@ -184,6 +217,7 @@ impl std::fmt::Display for EngineProfile {
|
|||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum Commands {
|
||||
/// Scan project for vulnerabilities
|
||||
Scan {
|
||||
|
|
@ -245,6 +279,17 @@ pub enum Commands {
|
|||
#[arg(long, help_heading = "Output")]
|
||||
quiet: bool,
|
||||
|
||||
/// Print the dynamic-verifier trace to stderr at end-of-verify.
|
||||
///
|
||||
/// When dynamic verification is enabled, the verifier records a
|
||||
/// per-finding [`crate::dynamic::trace::VerifyTrace`]. Setting this
|
||||
/// flag flushes every recorded `TraceEvent` to stderr after each
|
||||
/// verdict, matching the stream that already lands in the repro
|
||||
/// bundle at `expected/trace.jsonl`. Off by default so non-interactive
|
||||
/// scans stay quiet.
|
||||
#[arg(long, help_heading = "Output")]
|
||||
verbose: bool,
|
||||
|
||||
/// Exit with code 1 if any finding meets or exceeds this severity
|
||||
///
|
||||
/// Useful for CI gating. Example: --fail-on HIGH
|
||||
|
|
@ -320,7 +365,6 @@ pub enum Commands {
|
|||
#[arg(long, help_heading = "Output")]
|
||||
require_converged: bool,
|
||||
|
||||
// ── Analysis engine toggles (override [analysis.engine] config) ───
|
||||
/// Enable path-constraint solving (default: on)
|
||||
#[arg(
|
||||
long,
|
||||
|
|
@ -409,7 +453,6 @@ pub enum Commands {
|
|||
#[arg(long, help_heading = "Limits")]
|
||||
max_pointsto: Option<u32>,
|
||||
|
||||
// ── Deprecated aliases (hidden) ─────────────────────────────────
|
||||
/// Deprecated: use --index off
|
||||
#[arg(long, hide = true)]
|
||||
no_index: bool,
|
||||
|
|
@ -429,6 +472,121 @@ pub enum Commands {
|
|||
/// Deprecated: use --mode cfg
|
||||
#[arg(long, hide = true)]
|
||||
cfg_only: bool,
|
||||
|
||||
/// Build a harness and dynamically verify each finding in a sandbox.
|
||||
///
|
||||
/// Dynamic verification is on by default. This flag is a no-op when
|
||||
/// verification is already enabled via config. Use `--no-verify` to
|
||||
/// disable it for a single run. Default builds include dynamic support;
|
||||
/// custom `--no-default-features` builds need `--features dynamic`.
|
||||
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
|
||||
#[arg(long, help_heading = "Dynamic", conflicts_with = "no_verify")]
|
||||
verify: bool,
|
||||
|
||||
/// Skip dynamic verification for this run.
|
||||
///
|
||||
/// Overrides `verify = true` from config. Useful when you want a
|
||||
/// fast static-only scan without permanently changing `nyx.toml`.
|
||||
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
|
||||
#[arg(long, help_heading = "Dynamic", conflicts_with = "verify")]
|
||||
no_verify: bool,
|
||||
|
||||
/// Also verify `Confidence < Medium` findings dynamically.
|
||||
///
|
||||
/// By default only `Confidence >= Medium` findings are verified. Pass
|
||||
/// this flag to run verification on all findings regardless of
|
||||
/// confidence. Intended for payload tuning and backfill runs.
|
||||
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
|
||||
#[arg(long, help_heading = "Dynamic")]
|
||||
verify_all_confidence: bool,
|
||||
|
||||
/// Force the process sandbox backend (less isolation, dev use only).
|
||||
///
|
||||
/// By default the docker backend is used when available. This flag
|
||||
/// restricts the backend to the in-process runner. Cannot be combined
|
||||
/// with `--backend docker`.
|
||||
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
|
||||
#[arg(long, help_heading = "Dynamic")]
|
||||
unsafe_sandbox: bool,
|
||||
|
||||
/// Sandbox backend to use for dynamic verification.
|
||||
///
|
||||
/// `auto` (default): docker when available, else process.
|
||||
/// `docker`: require docker; fail if unavailable.
|
||||
/// `process`: in-process runner (same as `--unsafe-sandbox`).
|
||||
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
|
||||
#[arg(long, help_heading = "Dynamic", value_name = "BACKEND")]
|
||||
backend: Option<String>,
|
||||
|
||||
/// Process-backend hardening profile applied to every verified finding.
|
||||
///
|
||||
/// `standard` (default): baseline only. Linux runs no-new-privs +
|
||||
/// memory rlimit; macOS skips the sandbox-exec wrap.
|
||||
/// `strict`: full lockdown. Linux layers namespaces, chroot to
|
||||
/// workdir, and a default-deny seccomp filter; macOS wraps the
|
||||
/// harness with `sandbox-exec -f <cap>.sb`. Opt-in because
|
||||
/// interpreted Linux harnesses may SIGSYS until the per-language
|
||||
/// seccomp allowlists are expanded.
|
||||
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
|
||||
#[arg(
|
||||
long,
|
||||
help_heading = "Dynamic",
|
||||
value_name = "PROFILE",
|
||||
value_parser = ["standard", "strict"],
|
||||
)]
|
||||
harden: Option<String>,
|
||||
|
||||
/// Read a previous scan's JSON output (or a stripped .nyx/baseline.json)
|
||||
/// and diff it against the current scan on stable_hash.
|
||||
///
|
||||
/// Emits a verdict diff showing New / Resolved / FlippedConfirmed /
|
||||
/// FlippedNotConfirmed transitions. Combine with --gate to enforce CI
|
||||
/// policies.
|
||||
#[arg(long, value_name = "FILE", help_heading = "Baseline")]
|
||||
baseline: Option<String>,
|
||||
|
||||
/// Write a stripped baseline JSON to FILE after scanning.
|
||||
///
|
||||
/// The file contains only stable_hash, dynamic_verdict, severity, path,
|
||||
/// and rule_id (no source code). A CI job can persist this file to
|
||||
/// compare future scans against without leaking source.
|
||||
#[arg(long, value_name = "FILE", help_heading = "Baseline")]
|
||||
baseline_write: Option<String>,
|
||||
|
||||
/// CI gate to enforce when --baseline is active.
|
||||
///
|
||||
/// `no-new-confirmed`: exit 2 if any new Confirmed finding appears.
|
||||
/// `resolve-all-confirmed`: exit 2 if any baseline-Confirmed finding
|
||||
/// is not fully resolved (absent or NotConfirmed in the current scan).
|
||||
#[arg(
|
||||
long,
|
||||
value_name = "GATE",
|
||||
value_parser = ["no-new-confirmed", "resolve-all-confirmed"],
|
||||
help_heading = "Baseline"
|
||||
)]
|
||||
gate: Option<String>,
|
||||
},
|
||||
|
||||
/// Submit feedback on a dynamic verification verdict.
|
||||
///
|
||||
/// Records a correction or confirmation for a finding's verdict in the
|
||||
/// local telemetry log. Requires `--features dynamic`.
|
||||
#[cfg_attr(not(feature = "dynamic"), command(hide = true))]
|
||||
VerifyFeedback {
|
||||
/// Stable finding ID (16-char hex, shown in `nyx scan --verify` output).
|
||||
finding_id: String,
|
||||
|
||||
/// Mark this verdict as wrong and record a reason.
|
||||
#[arg(long, conflicts_with = "right")]
|
||||
wrong: Option<String>,
|
||||
|
||||
/// Confirm this verdict is correct.
|
||||
#[arg(long, conflicts_with = "wrong")]
|
||||
right: bool,
|
||||
|
||||
/// Upload feedback to Nyx telemetry (not yet implemented; reserved).
|
||||
#[arg(long)]
|
||||
upload: bool,
|
||||
},
|
||||
|
||||
/// Manage project indexes
|
||||
|
|
@ -466,6 +624,37 @@ pub enum Commands {
|
|||
action: RulesAction,
|
||||
},
|
||||
|
||||
/// Print the project's attack-surface map.
|
||||
///
|
||||
/// Loads the SurfaceMap persisted by the most recent indexed scan
|
||||
/// when available, otherwise builds an entry-point-only map by
|
||||
/// running the per-language framework probes against the on-disk
|
||||
/// source. Pass `--build` to force a full inline build (pass-1
|
||||
/// summary extraction + call-graph construction) when no indexed
|
||||
/// scan exists; that populates DataStore / ExternalService /
|
||||
/// DangerousLocal nodes the entry-points-only fallback omits.
|
||||
/// Use `--format dot` and pipe through `dot -Tsvg` to produce a
|
||||
/// renderable graph; `--format svg` does the same in one step when
|
||||
/// graphviz is installed locally.
|
||||
Surface {
|
||||
/// Path to inspect (defaults to current directory)
|
||||
#[arg(default_value = ".")]
|
||||
path: String,
|
||||
|
||||
/// Output format: text (default), json, dot, svg
|
||||
#[arg(long, value_enum, default_value_t = SurfaceFormat::Text)]
|
||||
format: SurfaceFormat,
|
||||
|
||||
/// Build the full SurfaceMap from source even when no indexed
|
||||
/// scan exists. Runs pass-1 summary extraction + call-graph
|
||||
/// build inline (same cost as `nyx index build`), then renders
|
||||
/// data-store / external-service / dangerous-local nodes plus
|
||||
/// reach edges. Without this flag, an unscanned project
|
||||
/// produces an entry-points-only map.
|
||||
#[arg(long)]
|
||||
build: bool,
|
||||
},
|
||||
|
||||
/// Start the local web UI for browsing scan results
|
||||
Serve {
|
||||
/// Path to scan root (defaults to current directory)
|
||||
|
|
@ -515,7 +704,11 @@ pub enum ConfigAction {
|
|||
#[arg(long)]
|
||||
kind: String,
|
||||
|
||||
/// Capability: env_var, html_escape, shell_escape, url_encode, json_parse, file_io, or all
|
||||
/// Capability slug. One of: env_var, html_escape, shell_escape,
|
||||
/// url_encode, json_parse, file_io, fmt_string, sql_query, deserialize,
|
||||
/// ssrf, code_exec, crypto, unauthorized_id, data_exfil, ldap_injection,
|
||||
/// xpath_injection, header_injection, open_redirect, ssti, xxe,
|
||||
/// prototype_pollution, or all. See docs/cli.md.
|
||||
#[arg(long)]
|
||||
cap: String,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
use crate::cli::IndexAction;
|
||||
use crate::database::index::{Indexer, IssueRow};
|
||||
use crate::database::index::{IndexWriteQueue, Indexer, IssueRow};
|
||||
use crate::errors::NyxResult;
|
||||
use crate::patterns::Severity;
|
||||
use crate::server::progress::{ScanMetrics, ScanProgress, ScanStage};
|
||||
use crate::server::scan_log::ScanLogCollector;
|
||||
use crate::utils::Config;
|
||||
|
|
@ -27,6 +26,11 @@ pub fn handle(
|
|||
IndexAction::Build { path, force } => {
|
||||
let build_path = std::path::Path::new(&path).canonicalize()?;
|
||||
let (project_name, db_path) = get_project_info(&build_path, database_dir)?;
|
||||
let _ = crate::utils::targets::remember_target(
|
||||
database_dir,
|
||||
&build_path,
|
||||
crate::utils::targets::TargetTouch::Seen,
|
||||
);
|
||||
|
||||
if force || !db_path.exists() {
|
||||
build_index(
|
||||
|
|
@ -200,108 +204,123 @@ pub fn build_index_with_observer(
|
|||
let metrics = metrics.cloned();
|
||||
let logs = logs.cloned();
|
||||
let pass1_start = std::time::Instant::now();
|
||||
paths
|
||||
.into_par_iter()
|
||||
.try_for_each(|path| -> NyxResult<()> {
|
||||
let mut idx = Indexer::from_pool(project_name, &pool)?;
|
||||
let writer = IndexWriteQueue::start(project_name.to_owned(), Arc::clone(&pool));
|
||||
let write_tx = writer.sender();
|
||||
let index_result = paths.into_par_iter().try_for_each(|path| -> NyxResult<()> {
|
||||
// Read once, hash once, pass bytes to both rule execution and
|
||||
// summary extraction. Use pre-computed hash for upsert to avoid
|
||||
// a redundant file read inside upsert_file.
|
||||
let bytes = std::fs::read(&path)?;
|
||||
let hash = Indexer::digest_bytes(&bytes);
|
||||
|
||||
// Read once, hash once, pass bytes to both rule execution and
|
||||
// summary extraction. Use pre-computed hash for upsert to avoid
|
||||
// a redundant file read inside upsert_file.
|
||||
let bytes = std::fs::read(&path)?;
|
||||
let hash = Indexer::digest_bytes(&bytes);
|
||||
// Parse once and persist every artifact we can reuse later:
|
||||
// findings, coarse summaries, and precise SSA summaries.
|
||||
let fused = crate::commands::scan::analyse_file_fused(
|
||||
&bytes,
|
||||
&path,
|
||||
config,
|
||||
None,
|
||||
Some(project_path),
|
||||
)?;
|
||||
if let Some(ref p) = progress {
|
||||
p.inc_parsed(1);
|
||||
p.set_current_file(&path.to_string_lossy());
|
||||
if let Some(lang) = fused.summaries.first().map(|s| s.lang.as_str()) {
|
||||
p.record_language(lang);
|
||||
}
|
||||
}
|
||||
if let Some(ref m) = metrics {
|
||||
m.cfg_nodes.fetch_add(fused.cfg_nodes as u64, Relaxed);
|
||||
}
|
||||
|
||||
// Parse once and persist every artifact we can reuse later:
|
||||
// findings, coarse summaries, and precise SSA summaries.
|
||||
let fused = crate::commands::scan::analyse_file_fused(
|
||||
&bytes,
|
||||
&path,
|
||||
config,
|
||||
None,
|
||||
Some(project_path),
|
||||
let issue_rows: Vec<(String, String, i64, i64)> = fused
|
||||
.diags
|
||||
.iter()
|
||||
.map(|d| {
|
||||
(
|
||||
d.id.clone(),
|
||||
d.severity.as_db_str().to_string(),
|
||||
d.line as i64,
|
||||
d.col as i64,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let summaries = fused.summaries;
|
||||
let ssa_rows: Vec<_> = fused
|
||||
.ssa_summaries
|
||||
.into_iter()
|
||||
.map(|(key, sum)| {
|
||||
(
|
||||
key.name,
|
||||
key.arity.unwrap_or(0),
|
||||
key.lang.as_str().to_string(),
|
||||
key.namespace,
|
||||
key.container,
|
||||
key.disambig,
|
||||
key.kind,
|
||||
sum,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Persist SSA callee bodies at index-build time so CLI-initiated
|
||||
// rebuilds (`--index rebuild`) populate the same
|
||||
// `ssa_function_bodies` rows that `scan_with_index_parallel`
|
||||
// would have written via its pass-1 branch. Without this,
|
||||
// indexed scans load zero cross-file bodies and cross-file
|
||||
// inline silently falls back to summary resolution.
|
||||
let body_rows: Vec<_> = fused
|
||||
.ssa_bodies
|
||||
.into_iter()
|
||||
.map(|(key, body)| {
|
||||
(
|
||||
key.name,
|
||||
key.arity.unwrap_or(0),
|
||||
key.lang.as_str().to_string(),
|
||||
key.namespace,
|
||||
key.container,
|
||||
key.disambig,
|
||||
key.kind,
|
||||
body,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let path_for_write = path.clone();
|
||||
write_tx.enqueue(move |idx| {
|
||||
let file_id = idx.upsert_file_with_hash(&path_for_write, &hash)?;
|
||||
idx.replace_issues(
|
||||
file_id,
|
||||
issue_rows
|
||||
.iter()
|
||||
.map(|(rule_id, severity, line, col)| IssueRow {
|
||||
rule_id: rule_id.as_str(),
|
||||
severity: severity.as_str(),
|
||||
line: *line,
|
||||
col: *col,
|
||||
}),
|
||||
)?;
|
||||
if let Some(ref p) = progress {
|
||||
p.inc_parsed(1);
|
||||
p.set_current_file(&path.to_string_lossy());
|
||||
if let Some(lang) = fused.summaries.first().map(|s| s.lang.as_str()) {
|
||||
p.record_language(lang);
|
||||
}
|
||||
|
||||
if !summaries.is_empty() {
|
||||
idx.replace_summaries_for_file(&path_for_write, &hash, &summaries)?;
|
||||
}
|
||||
if let Some(ref m) = metrics {
|
||||
m.cfg_nodes.fetch_add(fused.cfg_nodes as u64, Relaxed);
|
||||
if !ssa_rows.is_empty() {
|
||||
idx.replace_ssa_summaries_for_file(&path_for_write, &hash, &ssa_rows)?;
|
||||
}
|
||||
let file_id = idx.upsert_file_with_hash(&path, &hash)?;
|
||||
|
||||
let rows: Vec<IssueRow> = fused
|
||||
.diags
|
||||
.iter()
|
||||
.map(|d| IssueRow {
|
||||
rule_id: d.id.as_ref(),
|
||||
severity: match d.severity {
|
||||
Severity::High => "HIGH",
|
||||
Severity::Medium => "MEDIUM",
|
||||
Severity::Low => "LOW",
|
||||
},
|
||||
line: d.line as i64,
|
||||
col: d.col as i64,
|
||||
})
|
||||
.collect();
|
||||
|
||||
idx.replace_issues(file_id, rows)?;
|
||||
|
||||
if !fused.summaries.is_empty() {
|
||||
idx.replace_summaries_for_file(&path, &hash, &fused.summaries)?;
|
||||
if !body_rows.is_empty() {
|
||||
idx.replace_ssa_bodies_for_file(&path_for_write, &hash, &body_rows)?;
|
||||
}
|
||||
|
||||
if !fused.ssa_summaries.is_empty() {
|
||||
let ssa_rows: Vec<_> = fused
|
||||
.ssa_summaries
|
||||
.into_iter()
|
||||
.map(|(key, sum)| {
|
||||
(
|
||||
key.name,
|
||||
key.arity.unwrap_or(0),
|
||||
key.lang.as_str().to_string(),
|
||||
key.namespace,
|
||||
key.container,
|
||||
key.disambig,
|
||||
key.kind,
|
||||
sum,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
idx.replace_ssa_summaries_for_file(&path, &hash, &ssa_rows)?;
|
||||
}
|
||||
|
||||
// Persist SSA callee bodies at index-build time so CLI-initiated
|
||||
// rebuilds (`--index rebuild`) populate the same
|
||||
// `ssa_function_bodies` rows that `scan_with_index_parallel`
|
||||
// would have written via its pass-1 branch. Without this,
|
||||
// indexed scans load zero cross-file bodies and cross-file
|
||||
// inline silently falls back to summary resolution.
|
||||
if !fused.ssa_bodies.is_empty() {
|
||||
let body_rows: Vec<_> = fused
|
||||
.ssa_bodies
|
||||
.into_iter()
|
||||
.map(|(key, body)| {
|
||||
(
|
||||
key.name,
|
||||
key.arity.unwrap_or(0),
|
||||
key.lang.as_str().to_string(),
|
||||
key.namespace,
|
||||
key.container,
|
||||
key.disambig,
|
||||
key.kind,
|
||||
body,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
idx.replace_ssa_bodies_for_file(&path, &hash, &body_rows)?;
|
||||
}
|
||||
|
||||
pb.inc(1);
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
pb.inc(1);
|
||||
Ok(())
|
||||
});
|
||||
drop(write_tx);
|
||||
let writer_result = writer.finish("Index rebuild");
|
||||
index_result?;
|
||||
writer_result?;
|
||||
pb.finish_and_clear();
|
||||
if let Some(p) = &progress {
|
||||
p.record_pass1_ms(pass1_start.elapsed().as_millis() as u64);
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ pub mod rules;
|
|||
pub mod scan;
|
||||
#[cfg(feature = "serve")]
|
||||
pub mod serve;
|
||||
pub mod surface;
|
||||
|
||||
use crate::cli::{Commands, EngineProfile, IndexMode, ScanMode};
|
||||
use crate::errors::NyxResult;
|
||||
|
|
@ -57,6 +58,7 @@ pub fn handle_command(
|
|||
all_targets,
|
||||
keep_nonprod_severity,
|
||||
quiet,
|
||||
verbose,
|
||||
fail_on,
|
||||
no_state,
|
||||
no_rank,
|
||||
|
|
@ -97,6 +99,15 @@ pub fn handle_command(
|
|||
high_only,
|
||||
ast_only,
|
||||
cfg_only,
|
||||
verify,
|
||||
no_verify,
|
||||
verify_all_confidence,
|
||||
unsafe_sandbox,
|
||||
backend,
|
||||
harden,
|
||||
baseline,
|
||||
baseline_write,
|
||||
gate,
|
||||
} => {
|
||||
// ── Apply profile first (CLI flags override after) ──────────
|
||||
if let Some(ref name) = profile {
|
||||
|
|
@ -307,6 +318,58 @@ pub fn handle_command(
|
|||
// resolved straight from config; no CLI overrides yet.
|
||||
let _ = crate::utils::detector_options::install(config.detectors.clone());
|
||||
|
||||
// ── Dynamic verification ────────────────────────────────────
|
||||
#[cfg(feature = "dynamic")]
|
||||
{
|
||||
// Validate and apply --unsafe-sandbox / --backend combo.
|
||||
let explicit_backend = backend.as_deref().unwrap_or("auto");
|
||||
if unsafe_sandbox && explicit_backend == "docker" {
|
||||
return Err(crate::errors::NyxError::Msg(
|
||||
"--unsafe-sandbox and --backend docker are mutually exclusive: \
|
||||
--unsafe-sandbox forces the process backend; \
|
||||
docker cannot be reached through this flag."
|
||||
.into(),
|
||||
));
|
||||
}
|
||||
let resolved_backend = if unsafe_sandbox {
|
||||
"process"
|
||||
} else {
|
||||
explicit_backend
|
||||
};
|
||||
// --verify / --no-verify override the config default.
|
||||
if no_verify {
|
||||
config.scanner.verify = false;
|
||||
} else if verify {
|
||||
config.scanner.verify = true;
|
||||
}
|
||||
// --verify-all-confidence overrides the confidence gate.
|
||||
if verify_all_confidence {
|
||||
config.scanner.verify_all_confidence = true;
|
||||
}
|
||||
config.scanner.verify_backend = resolved_backend.to_owned();
|
||||
// --harden=<standard|strict> overrides the config default.
|
||||
if let Some(ref profile) = harden {
|
||||
config.scanner.harden_profile = profile.to_owned();
|
||||
}
|
||||
}
|
||||
// Without the dynamic feature, keep the user's verify toggle in
|
||||
// the resolved config so the scan command can either suppress the
|
||||
// warning (`--no-verify`) or explain why verification is static-only.
|
||||
#[cfg(not(feature = "dynamic"))]
|
||||
{
|
||||
if no_verify {
|
||||
config.scanner.verify = false;
|
||||
} else if verify {
|
||||
config.scanner.verify = true;
|
||||
}
|
||||
if verify_all_confidence {
|
||||
config.scanner.verify_all_confidence = true;
|
||||
}
|
||||
let _ = unsafe_sandbox;
|
||||
let _ = backend;
|
||||
let _ = harden;
|
||||
}
|
||||
|
||||
// ── --explain-engine: print resolved config and exit ────────
|
||||
if explain_engine {
|
||||
print_engine_explanation(config, engine_profile);
|
||||
|
|
@ -325,8 +388,27 @@ pub fn handle_command(
|
|||
show_instances.as_deref(),
|
||||
database_dir,
|
||||
config,
|
||||
baseline.as_deref().map(std::path::Path::new),
|
||||
baseline_write.as_deref().map(std::path::Path::new),
|
||||
gate.as_deref(),
|
||||
verbose,
|
||||
)?;
|
||||
}
|
||||
#[cfg(feature = "dynamic")]
|
||||
Commands::VerifyFeedback {
|
||||
finding_id,
|
||||
wrong,
|
||||
right,
|
||||
upload,
|
||||
} => {
|
||||
handle_verify_feedback(&finding_id, wrong.as_deref(), right, upload)?;
|
||||
}
|
||||
#[cfg(not(feature = "dynamic"))]
|
||||
Commands::VerifyFeedback { .. } => {
|
||||
return Err(crate::errors::NyxError::Msg(
|
||||
"The `dynamic` feature is not enabled. Rebuild with `cargo build --features dynamic`.".into(),
|
||||
));
|
||||
}
|
||||
Commands::Index { action } => {
|
||||
install_from_config(config);
|
||||
index::handle(action, database_dir, config)?;
|
||||
|
|
@ -356,6 +438,14 @@ pub fn handle_command(
|
|||
Commands::Rules { action } => {
|
||||
self::rules::handle(action, config)?;
|
||||
}
|
||||
Commands::Surface {
|
||||
path,
|
||||
format,
|
||||
build,
|
||||
} => {
|
||||
install_from_config(config);
|
||||
surface::handle(&path, format, build, database_dir, config)?;
|
||||
}
|
||||
Commands::Serve {
|
||||
path,
|
||||
port,
|
||||
|
|
@ -387,6 +477,59 @@ pub fn handle_command(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle `nyx verify-feedback` (§21.2).
|
||||
///
|
||||
/// Records the user's correction or confirmation for a finding verdict.
|
||||
/// Local-first: writes to the telemetry log; no auto-upload.
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn handle_verify_feedback(
|
||||
finding_id: &str,
|
||||
wrong: Option<&str>,
|
||||
right: bool,
|
||||
upload: bool,
|
||||
) -> crate::errors::NyxResult<()> {
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::Write;
|
||||
|
||||
let _ = upload; // Upload not yet implemented (reserved).
|
||||
|
||||
let feedback_kind = if let Some(reason) = wrong {
|
||||
format!("wrong:{reason}")
|
||||
} else if right {
|
||||
"right".to_owned()
|
||||
} else {
|
||||
return Err(crate::errors::NyxError::Msg(
|
||||
"specify --wrong \"reason\" or --right".into(),
|
||||
));
|
||||
};
|
||||
|
||||
let record = serde_json::json!({
|
||||
"ts": chrono::Utc::now().to_rfc3339(),
|
||||
"event": "verify_feedback",
|
||||
"finding_id": finding_id,
|
||||
"feedback": feedback_kind,
|
||||
});
|
||||
|
||||
// Append to the telemetry log.
|
||||
if let Some(log_path) = crate::dynamic::telemetry::log_path() {
|
||||
if let Some(parent) = log_path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
if let Ok(mut f) = OpenOptions::new().create(true).append(true).open(&log_path) {
|
||||
let _ = writeln!(f, "{}", serde_json::to_string(&record).unwrap_or_default());
|
||||
}
|
||||
eprintln!(
|
||||
"Feedback recorded for finding {}. Log: {}",
|
||||
finding_id,
|
||||
log_path.display()
|
||||
);
|
||||
} else {
|
||||
eprintln!("Feedback recorded (in-memory only; cannot determine cache path).");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Pretty-print the effective analysis-engine configuration for
|
||||
/// `nyx scan --explain-engine`. Writes to stdout so it composes with
|
||||
/// standard shell redirection and process substitution.
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,10 +1,9 @@
|
|||
use crate::database::index::Indexer;
|
||||
use crate::errors::NyxResult;
|
||||
use crate::server::app::{AppState, ServerEvent, build_router};
|
||||
use crate::server::jobs::JobManager;
|
||||
use crate::server::security::LocalServerSecurity;
|
||||
use crate::utils::config::Config;
|
||||
use crate::utils::project::get_project_info;
|
||||
use crate::utils::targets::{TargetTouch, remember_target};
|
||||
use console::style;
|
||||
use parking_lot::RwLock;
|
||||
use std::path::Path;
|
||||
|
|
@ -31,18 +30,7 @@ pub fn handle(
|
|||
let rayon_stack_size = config.performance.rayon_thread_stack_size;
|
||||
|
||||
let (event_tx, _) = tokio::sync::broadcast::channel(64);
|
||||
|
||||
// Initialize DB pool for scan persistence
|
||||
let db_pool = {
|
||||
let (_, db_path) = get_project_info(&scan_root, database_dir)?;
|
||||
match Indexer::init(&db_path) {
|
||||
Ok(pool) => Some(pool),
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to initialize scan DB: {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
let _ = remember_target(database_dir, &scan_root, TargetTouch::Seen);
|
||||
|
||||
let addr = socket_addr(&host, port);
|
||||
|
||||
|
|
@ -75,16 +63,17 @@ pub fn handle(
|
|||
let security = LocalServerSecurity::new(local_addr.port());
|
||||
|
||||
let state = AppState {
|
||||
scan_root: scan_root.clone(),
|
||||
scan_root: Arc::new(RwLock::new(scan_root.clone())),
|
||||
config_dir: config_dir.to_path_buf(),
|
||||
database_dir: database_dir.to_path_buf(),
|
||||
security,
|
||||
config: Arc::new(RwLock::new(config.clone())),
|
||||
job_manager: Arc::new(JobManager::new(max_jobs, rayon_stack_size)),
|
||||
event_tx: event_tx.clone(),
|
||||
db_pool,
|
||||
db_pools: Arc::new(RwLock::new(std::collections::HashMap::new())),
|
||||
findings_cache: Arc::new(RwLock::new(None)),
|
||||
};
|
||||
let _ = state.db_pool_for(&scan_root);
|
||||
|
||||
// Invalidate the findings cache whenever a scan finishes so the next
|
||||
// request rebuilds against fresh diags. The next-request rebuild keeps
|
||||
|
|
|
|||
750
src/commands/surface.rs
Normal file
750
src/commands/surface.rs
Normal file
|
|
@ -0,0 +1,750 @@
|
|||
//! `nyx surface` subcommand.
|
||||
//!
|
||||
//! Walks the project tree, builds a [`SurfaceMap`] from the framework
|
||||
//! probes (plus any persisted data-store / external-service /
|
||||
//! dangerous-local nodes from a prior indexed scan) and renders the
|
||||
//! map in the format requested by the user.
|
||||
//!
|
||||
//! Output formats:
|
||||
//! * `text`: indented tree per entry-point, grouped by file
|
||||
//! * `json`: canonical JSON (byte-identical to the SQLite payload)
|
||||
//! * `dot`: graphviz source, ready to pipe through `dot -Tsvg`
|
||||
//! * `svg`: graphviz source rendered via the local `dot` binary
|
||||
//!
|
||||
//! The command is read-only: it never persists to SQLite and never
|
||||
//! modifies the project tree. It tries to load a previously persisted
|
||||
//! map first; if none exists (no `nyx scan` ever ran, or the index was
|
||||
//! cleaned) it falls back to building a fresh entry-point-only map by
|
||||
//! running the framework probes against the on-disk source.
|
||||
//!
|
||||
//! Pass `--build` to force a full inline build that runs pass-1
|
||||
//! summary extraction + call-graph construction. That populates the
|
||||
//! same DataStore / ExternalService / DangerousLocal nodes and Reaches
|
||||
//! edges that an indexed scan would have persisted, at the cost of
|
||||
//! parsing the project tree once (same wall-clock as `nyx index
|
||||
//! build`).
|
||||
|
||||
use crate::ast::extract_all_summaries_from_bytes;
|
||||
use crate::callgraph;
|
||||
use crate::cli::SurfaceFormat;
|
||||
use crate::database::index::Indexer;
|
||||
use crate::errors::{NyxError, NyxResult};
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::surface::{
|
||||
DataStoreKind, EdgeKind, EntryPoint, ExternalServiceKind, SurfaceMap, SurfaceNode,
|
||||
build::{SurfaceBuildInputs, build_surface_map},
|
||||
};
|
||||
use crate::utils::Config;
|
||||
use crate::utils::project::get_project_info;
|
||||
use crate::walk::spawn_file_walker;
|
||||
use crossbeam_channel::TryRecvError;
|
||||
use rayon::prelude::*;
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
/// Top-level CLI handler. Resolves the scan root, loads or builds a
|
||||
/// [`SurfaceMap`], renders it in `format`, and writes to stdout.
|
||||
///
|
||||
/// When `build_inline` is `true`, the persisted SurfaceMap (if any) is
|
||||
/// ignored and the full map is built by running pass-1 summary
|
||||
/// extraction + call-graph construction against the on-disk source.
|
||||
/// This populates DataStore / ExternalService / DangerousLocal nodes
|
||||
/// and Reaches edges that the entry-points-only fallback omits.
|
||||
pub fn handle(
|
||||
path: &str,
|
||||
format: SurfaceFormat,
|
||||
build_inline: bool,
|
||||
database_dir: &Path,
|
||||
config: &Config,
|
||||
) -> NyxResult<()> {
|
||||
let scan_root = Path::new(path).canonicalize()?;
|
||||
let map = if build_inline {
|
||||
build_full_from_filesystem(&scan_root, config)?
|
||||
} else {
|
||||
load_or_build(&scan_root, database_dir, config)?
|
||||
};
|
||||
let stdout = std::io::stdout();
|
||||
let mut out = stdout.lock();
|
||||
match format {
|
||||
SurfaceFormat::Text => {
|
||||
out.write_all(render_text(&map, Some(&scan_root)).as_bytes())?;
|
||||
}
|
||||
SurfaceFormat::Json => {
|
||||
let mut canon = map;
|
||||
let bytes = canon
|
||||
.to_json()
|
||||
.map_err(|e| NyxError::Msg(format!("surface map JSON: {e}")))?;
|
||||
out.write_all(&bytes)?;
|
||||
out.write_all(b"\n")?;
|
||||
}
|
||||
SurfaceFormat::Dot => {
|
||||
out.write_all(render_dot(&map).as_bytes())?;
|
||||
}
|
||||
SurfaceFormat::Svg => {
|
||||
let svg = render_svg(&map)?;
|
||||
out.write_all(&svg)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load the SurfaceMap persisted under `scan_root`'s project entry, or
|
||||
/// build a fresh entry-point-only map from the filesystem when no
|
||||
/// indexed scan has ever populated one.
|
||||
pub fn load_or_build(
|
||||
scan_root: &Path,
|
||||
database_dir: &Path,
|
||||
config: &Config,
|
||||
) -> NyxResult<SurfaceMap> {
|
||||
if let Ok((project, db_path)) = get_project_info(scan_root, database_dir)
|
||||
&& db_path.exists()
|
||||
&& let Ok(pool) = Indexer::init(&db_path)
|
||||
&& let Ok(idx) = Indexer::from_pool(&project, &pool)
|
||||
&& let Ok(Some(map)) = idx.load_surface_map()
|
||||
&& !map.nodes.is_empty()
|
||||
{
|
||||
return Ok(map);
|
||||
}
|
||||
build_from_filesystem(scan_root, config)
|
||||
}
|
||||
|
||||
fn build_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<SurfaceMap> {
|
||||
let files = collect_files(scan_root, config)?;
|
||||
let summaries = GlobalSummaries::new();
|
||||
let call_graph = callgraph::build_call_graph(&summaries, &[]);
|
||||
let inputs = SurfaceBuildInputs {
|
||||
files: &files,
|
||||
scan_root: Some(scan_root),
|
||||
global_summaries: &summaries,
|
||||
call_graph: &call_graph,
|
||||
config,
|
||||
};
|
||||
Ok(build_surface_map(&inputs))
|
||||
}
|
||||
|
||||
/// Build a full SurfaceMap from source by running pass-1 summary
|
||||
/// extraction inline + call-graph construction, then handing the
|
||||
/// resulting [`GlobalSummaries`] + [`CallGraph`] to
|
||||
/// [`build_surface_map`]. Same cost as `nyx index build` pass 1 but
|
||||
/// holds nothing in SQLite.
|
||||
fn build_full_from_filesystem(scan_root: &Path, config: &Config) -> NyxResult<SurfaceMap> {
|
||||
let files = collect_files(scan_root, config)?;
|
||||
let mut summaries = build_summaries_inline(&files, scan_root, config);
|
||||
summaries.install_hierarchy();
|
||||
let call_graph = callgraph::build_call_graph(&summaries, &[]);
|
||||
let inputs = SurfaceBuildInputs {
|
||||
files: &files,
|
||||
scan_root: Some(scan_root),
|
||||
global_summaries: &summaries,
|
||||
call_graph: &call_graph,
|
||||
config,
|
||||
};
|
||||
Ok(build_surface_map(&inputs))
|
||||
}
|
||||
|
||||
/// Run pass-1 summary extraction across `files` in parallel and merge
|
||||
/// the per-thread results into a single [`GlobalSummaries`]. Mirrors
|
||||
/// the `scan_filesystem_with_observer` pass-1 fold/reduce shape but
|
||||
/// strips out the progress / metrics / logs threading the surface
|
||||
/// command does not need.
|
||||
///
|
||||
/// Per-file errors are swallowed so a single bad file does not kill
|
||||
/// the whole map.
|
||||
fn build_summaries_inline(files: &[PathBuf], scan_root: &Path, config: &Config) -> GlobalSummaries {
|
||||
let root_str = scan_root.to_string_lossy().into_owned();
|
||||
let mg = config.module_graph.as_deref();
|
||||
files
|
||||
.par_iter()
|
||||
.fold(GlobalSummaries::new, |mut local_gs, path| {
|
||||
let Ok(bytes) = std::fs::read(path) else {
|
||||
return local_gs;
|
||||
};
|
||||
let Ok((func_summaries, ssa_summaries, ssa_bodies, auth_summaries, cross_pkg)) =
|
||||
extract_all_summaries_from_bytes(&bytes, path, config, Some(scan_root))
|
||||
else {
|
||||
return local_gs;
|
||||
};
|
||||
for s in func_summaries {
|
||||
let key = s.func_key_with_resolver(Some(&root_str), mg);
|
||||
local_gs.insert(key, s);
|
||||
}
|
||||
for (key, ssa_sum) in ssa_summaries {
|
||||
local_gs.insert_ssa(key, ssa_sum);
|
||||
}
|
||||
for (key, body) in ssa_bodies {
|
||||
local_gs.insert_body(key, body);
|
||||
}
|
||||
for (key, auth_sum) in auth_summaries {
|
||||
local_gs.insert_auth(key, auth_sum);
|
||||
}
|
||||
if let Some((ns, map)) = cross_pkg {
|
||||
local_gs.insert_cross_package_imports(ns, map);
|
||||
}
|
||||
local_gs
|
||||
})
|
||||
.reduce(GlobalSummaries::new, |mut a, b| {
|
||||
a.merge(b);
|
||||
a
|
||||
})
|
||||
}
|
||||
|
||||
fn collect_files(root: &Path, config: &Config) -> NyxResult<Vec<PathBuf>> {
|
||||
let (rx, handle) = spawn_file_walker(root, config);
|
||||
let mut out = Vec::new();
|
||||
loop {
|
||||
match rx.try_recv() {
|
||||
Ok(batch) => out.extend(batch),
|
||||
Err(TryRecvError::Empty) => match rx.recv() {
|
||||
Ok(batch) => out.extend(batch),
|
||||
Err(_) => break,
|
||||
},
|
||||
Err(TryRecvError::Disconnected) => break,
|
||||
}
|
||||
}
|
||||
let _ = handle.join();
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
// Text rendering
|
||||
|
||||
/// Produce a human-readable tree. Files appear as top-level headers;
|
||||
/// each entry-point sits under its host file with its reach summary
|
||||
/// (`Reaches: …`). Data stores / external services / dangerous locals
|
||||
/// that no entry-point reaches are grouped under a trailing "Unreached"
|
||||
/// section so a reviewer notices orphaned attack surface.
|
||||
pub fn render_text(map: &SurfaceMap, scan_root: Option<&Path>) -> String {
|
||||
let mut out = String::new();
|
||||
if let Some(root) = scan_root {
|
||||
out.push_str(&format!("Surface map for {}\n", root.display()));
|
||||
} else {
|
||||
out.push_str("Surface map\n");
|
||||
}
|
||||
let entry_count = count_kind(map, |n| matches!(n, SurfaceNode::EntryPoint(_)));
|
||||
let ds_count = count_kind(map, |n| matches!(n, SurfaceNode::DataStore(_)));
|
||||
let es_count = count_kind(map, |n| matches!(n, SurfaceNode::ExternalService(_)));
|
||||
let dl_count = count_kind(map, |n| matches!(n, SurfaceNode::DangerousLocal(_)));
|
||||
out.push_str(&format!(
|
||||
" {} {}, {} {}, {} {}, {} {}\n\n",
|
||||
entry_count,
|
||||
plural(entry_count, "entry-point", "entry-points"),
|
||||
ds_count,
|
||||
plural(ds_count, "data store", "data stores"),
|
||||
es_count,
|
||||
plural(es_count, "external service", "external services"),
|
||||
dl_count,
|
||||
plural(dl_count, "dangerous local", "dangerous locals"),
|
||||
));
|
||||
|
||||
if map.nodes.is_empty() {
|
||||
out.push_str(" (no entry-points or sinks detected)\n");
|
||||
return out;
|
||||
}
|
||||
|
||||
let mut by_file: BTreeMap<&str, Vec<usize>> = BTreeMap::new();
|
||||
for (idx, node) in map.nodes.iter().enumerate() {
|
||||
by_file
|
||||
.entry(node.location().file.as_str())
|
||||
.or_default()
|
||||
.push(idx);
|
||||
}
|
||||
|
||||
let mut reached: std::collections::HashSet<u32> = std::collections::HashSet::new();
|
||||
for edge in &map.edges {
|
||||
if matches!(edge.kind, EdgeKind::Reaches) {
|
||||
reached.insert(edge.to);
|
||||
}
|
||||
}
|
||||
|
||||
for (file, indices) in &by_file {
|
||||
out.push_str(&format!("{file}\n"));
|
||||
let entry_indices: Vec<usize> = indices
|
||||
.iter()
|
||||
.copied()
|
||||
.filter(|i| matches!(map.nodes[*i], SurfaceNode::EntryPoint(_)))
|
||||
.collect();
|
||||
if !entry_indices.is_empty() {
|
||||
for &ei in &entry_indices {
|
||||
let SurfaceNode::EntryPoint(ep) = &map.nodes[ei] else {
|
||||
continue;
|
||||
};
|
||||
render_entry_point(&mut out, ep, ei as u32, map);
|
||||
}
|
||||
}
|
||||
for &i in indices {
|
||||
match &map.nodes[i] {
|
||||
SurfaceNode::DataStore(_)
|
||||
| SurfaceNode::ExternalService(_)
|
||||
| SurfaceNode::DangerousLocal(_) => {
|
||||
if !entry_indices.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if reached.contains(&(i as u32)) {
|
||||
continue;
|
||||
}
|
||||
render_node_line(&mut out, &map.nodes[i], " ");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
out.push('\n');
|
||||
}
|
||||
|
||||
// Orphans: destinations that no entry-point reaches.
|
||||
let mut orphans: Vec<usize> = Vec::new();
|
||||
for (idx, node) in map.nodes.iter().enumerate() {
|
||||
if matches!(node, SurfaceNode::EntryPoint(_)) {
|
||||
continue;
|
||||
}
|
||||
if reached.contains(&(idx as u32)) {
|
||||
continue;
|
||||
}
|
||||
// Already printed under host file when there were no entry-points;
|
||||
// suppress to avoid duplication.
|
||||
let host_has_entries = by_file
|
||||
.get(node.location().file.as_str())
|
||||
.map(|v| {
|
||||
v.iter()
|
||||
.any(|&j| matches!(map.nodes[j], SurfaceNode::EntryPoint(_)))
|
||||
})
|
||||
.unwrap_or(false);
|
||||
if !host_has_entries {
|
||||
continue;
|
||||
}
|
||||
orphans.push(idx);
|
||||
}
|
||||
if !orphans.is_empty() {
|
||||
out.push_str("Unreached surface\n");
|
||||
for idx in orphans {
|
||||
render_node_line(&mut out, &map.nodes[idx], " ");
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn render_entry_point(out: &mut String, ep: &EntryPoint, ep_idx: u32, map: &SurfaceMap) {
|
||||
let auth = if ep.auth_required { " [auth]" } else { "" };
|
||||
out.push_str(&format!(
|
||||
" {} {} ({:?}){}\n",
|
||||
method_str(ep.method),
|
||||
ep.route,
|
||||
ep.framework,
|
||||
auth
|
||||
));
|
||||
out.push_str(&format!(
|
||||
" handler: {} at {}:{}\n",
|
||||
ep.handler_name, ep.handler_location.file, ep.handler_location.line
|
||||
));
|
||||
let mut reached: Vec<&SurfaceNode> = map
|
||||
.edges
|
||||
.iter()
|
||||
.filter(|e| e.from == ep_idx && matches!(e.kind, EdgeKind::Reaches))
|
||||
.filter_map(|e| map.nodes.get(e.to as usize))
|
||||
.collect();
|
||||
reached.sort_by(|a, b| a.location().cmp(b.location()));
|
||||
if reached.is_empty() {
|
||||
out.push_str(" reaches: (none)\n");
|
||||
return;
|
||||
}
|
||||
out.push_str(" reaches:\n");
|
||||
for node in reached {
|
||||
render_node_line(out, node, " - ");
|
||||
}
|
||||
}
|
||||
|
||||
fn render_node_line(out: &mut String, node: &SurfaceNode, prefix: &str) {
|
||||
match node {
|
||||
SurfaceNode::EntryPoint(ep) => {
|
||||
out.push_str(&format!(
|
||||
"{prefix}entry {} {} ({:?})\n",
|
||||
method_str(ep.method),
|
||||
ep.route,
|
||||
ep.framework
|
||||
));
|
||||
}
|
||||
SurfaceNode::DataStore(ds) => {
|
||||
out.push_str(&format!(
|
||||
"{prefix}data-store ({}): {} [{}:{}]\n",
|
||||
ds_kind_str(ds.kind),
|
||||
ds.label,
|
||||
ds.location.file,
|
||||
ds.location.line
|
||||
));
|
||||
}
|
||||
SurfaceNode::ExternalService(es) => {
|
||||
out.push_str(&format!(
|
||||
"{prefix}external ({}): {} [{}:{}]\n",
|
||||
es_kind_str(es.kind),
|
||||
es.label,
|
||||
es.location.file,
|
||||
es.location.line
|
||||
));
|
||||
}
|
||||
SurfaceNode::DangerousLocal(dl) => {
|
||||
out.push_str(&format!(
|
||||
"{prefix}dangerous: {} (cap=0x{:x}) [{}:{}]\n",
|
||||
dl.function_name, dl.cap_bits, dl.location.file, dl.location.line
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn count_kind<F: Fn(&SurfaceNode) -> bool>(map: &SurfaceMap, f: F) -> usize {
|
||||
map.nodes.iter().filter(|n| f(n)).count()
|
||||
}
|
||||
|
||||
fn plural(count: usize, singular: &'static str, plural: &'static str) -> &'static str {
|
||||
if count == 1 { singular } else { plural }
|
||||
}
|
||||
|
||||
fn method_str(m: crate::entry_points::HttpMethod) -> &'static str {
|
||||
use crate::entry_points::HttpMethod::*;
|
||||
match m {
|
||||
GET => "GET",
|
||||
HEAD => "HEAD",
|
||||
POST => "POST",
|
||||
PUT => "PUT",
|
||||
PATCH => "PATCH",
|
||||
DELETE => "DELETE",
|
||||
OPTIONS => "OPTIONS",
|
||||
}
|
||||
}
|
||||
|
||||
fn ds_kind_str(k: DataStoreKind) -> &'static str {
|
||||
match k {
|
||||
DataStoreKind::Sql => "sql",
|
||||
DataStoreKind::KeyValue => "key_value",
|
||||
DataStoreKind::Document => "document",
|
||||
DataStoreKind::BlobStore => "blob_store",
|
||||
DataStoreKind::Filesystem => "filesystem",
|
||||
DataStoreKind::Unknown => "unknown",
|
||||
}
|
||||
}
|
||||
|
||||
fn es_kind_str(k: ExternalServiceKind) -> &'static str {
|
||||
match k {
|
||||
ExternalServiceKind::HttpApi => "http_api",
|
||||
ExternalServiceKind::MessageBroker => "message_broker",
|
||||
ExternalServiceKind::SearchIndex => "search_index",
|
||||
ExternalServiceKind::AuthProvider => "auth_provider",
|
||||
ExternalServiceKind::Unknown => "unknown",
|
||||
}
|
||||
}
|
||||
|
||||
// DOT / SVG rendering
|
||||
|
||||
pub fn render_dot(map: &SurfaceMap) -> String {
|
||||
let mut out = String::new();
|
||||
out.push_str("digraph nyx_surface {\n");
|
||||
out.push_str(" rankdir=LR;\n");
|
||||
out.push_str(" node [fontname=\"Helvetica\", shape=box, style=rounded];\n");
|
||||
for (i, node) in map.nodes.iter().enumerate() {
|
||||
let (label, shape, color) = match node {
|
||||
SurfaceNode::EntryPoint(ep) => (
|
||||
format!(
|
||||
"{} {}\\n{:?}\\n{}",
|
||||
method_str(ep.method),
|
||||
escape_dot(&ep.route),
|
||||
ep.framework,
|
||||
escape_dot(&ep.handler_name),
|
||||
),
|
||||
"box",
|
||||
if ep.auth_required {
|
||||
"#3aa57c"
|
||||
} else {
|
||||
"#3072c4"
|
||||
},
|
||||
),
|
||||
SurfaceNode::DataStore(ds) => (
|
||||
format!(
|
||||
"DataStore ({})\\n{}",
|
||||
ds_kind_str(ds.kind),
|
||||
escape_dot(&ds.label)
|
||||
),
|
||||
"cylinder",
|
||||
"#b07a18",
|
||||
),
|
||||
SurfaceNode::ExternalService(es) => (
|
||||
format!(
|
||||
"External ({})\\n{}",
|
||||
es_kind_str(es.kind),
|
||||
escape_dot(&es.label)
|
||||
),
|
||||
"component",
|
||||
"#8b3aa5",
|
||||
),
|
||||
SurfaceNode::DangerousLocal(dl) => (
|
||||
format!(
|
||||
"Dangerous\\n{}\\ncap=0x{:x}",
|
||||
escape_dot(&dl.function_name),
|
||||
dl.cap_bits
|
||||
),
|
||||
"octagon",
|
||||
"#c44141",
|
||||
),
|
||||
};
|
||||
out.push_str(&format!(
|
||||
" n{i} [label=\"{label}\", shape={shape}, color=\"{color}\", fontcolor=\"{color}\"];\n",
|
||||
));
|
||||
}
|
||||
for edge in &map.edges {
|
||||
let style = match edge.kind {
|
||||
EdgeKind::Reaches => "solid",
|
||||
EdgeKind::Calls => "dashed",
|
||||
EdgeKind::ReadsFrom => "solid",
|
||||
EdgeKind::WritesTo => "bold",
|
||||
EdgeKind::TalksTo => "solid",
|
||||
EdgeKind::Triggers => "dotted",
|
||||
EdgeKind::AuthRequiredOn => "dotted",
|
||||
};
|
||||
out.push_str(&format!(
|
||||
" n{} -> n{} [label=\"{:?}\", style={style}];\n",
|
||||
edge.from, edge.to, edge.kind
|
||||
));
|
||||
}
|
||||
out.push_str("}\n");
|
||||
out
|
||||
}
|
||||
|
||||
fn escape_dot(s: &str) -> String {
|
||||
s.replace('\\', "\\\\")
|
||||
.replace('"', "\\\"")
|
||||
.replace('\n', "\\n")
|
||||
}
|
||||
|
||||
fn render_svg(map: &SurfaceMap) -> NyxResult<Vec<u8>> {
|
||||
let dot = render_dot(map);
|
||||
let mut child = Command::new("dot")
|
||||
.arg("-Tsvg")
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| {
|
||||
NyxError::Msg(format!(
|
||||
"failed to spawn `dot` for SVG rendering: {e}. Install graphviz, or use `--format dot` and pipe through `dot -Tsvg` yourself."
|
||||
))
|
||||
})?;
|
||||
if let Some(mut stdin) = child.stdin.take() {
|
||||
stdin
|
||||
.write_all(dot.as_bytes())
|
||||
.map_err(|e| NyxError::Msg(format!("write DOT to dot stdin: {e}")))?;
|
||||
}
|
||||
let output = child
|
||||
.wait_with_output()
|
||||
.map_err(|e| NyxError::Msg(format!("waiting on `dot`: {e}")))?;
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
|
||||
return Err(NyxError::Msg(format!("dot exited non-zero: {stderr}")));
|
||||
}
|
||||
Ok(output.stdout)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceEdge, SurfaceNode};
|
||||
|
||||
fn flask_fixture_map() -> SurfaceMap {
|
||||
let mut map = SurfaceMap::new();
|
||||
map.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new("app.py", 5, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::GET,
|
||||
route: "/users".into(),
|
||||
handler_name: "list_users".into(),
|
||||
handler_location: SourceLocation::new("app.py", 6, 1),
|
||||
auth_required: false,
|
||||
}));
|
||||
map.canonicalize();
|
||||
map
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn text_render_shows_entry_point() {
|
||||
let m = flask_fixture_map();
|
||||
let text = render_text(&m, None);
|
||||
assert!(text.contains("GET /users"));
|
||||
assert!(text.contains("handler: list_users"));
|
||||
assert!(text.contains("app.py"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_render_emits_digraph_header() {
|
||||
let m = flask_fixture_map();
|
||||
let dot = render_dot(&m);
|
||||
assert!(dot.starts_with("digraph nyx_surface"));
|
||||
assert!(dot.contains("GET /users"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_escapes_quotes_in_labels() {
|
||||
let mut m = SurfaceMap::new();
|
||||
m.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new("a.py", 1, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::GET,
|
||||
route: r#"/with"quote"#.into(),
|
||||
handler_name: "h".into(),
|
||||
handler_location: SourceLocation::new("a.py", 2, 1),
|
||||
auth_required: false,
|
||||
}));
|
||||
let dot = render_dot(&m);
|
||||
assert!(dot.contains(r#"/with\"quote"#));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn text_render_groups_reaches_under_entry() {
|
||||
let mut m = flask_fixture_map();
|
||||
m.nodes.push(SurfaceNode::DangerousLocal(
|
||||
crate::surface::DangerousLocal {
|
||||
location: SourceLocation::new("app.py", 12, 1),
|
||||
function_name: "eval".into(),
|
||||
cap_bits: crate::labels::Cap::CODE_EXEC.bits(),
|
||||
},
|
||||
));
|
||||
// Build edge after canonicalize so indices are stable.
|
||||
m.canonicalize();
|
||||
let ep_idx = m
|
||||
.nodes
|
||||
.iter()
|
||||
.position(|n| matches!(n, SurfaceNode::EntryPoint(_)))
|
||||
.unwrap() as u32;
|
||||
let dl_idx = m
|
||||
.nodes
|
||||
.iter()
|
||||
.position(|n| matches!(n, SurfaceNode::DangerousLocal(_)))
|
||||
.unwrap() as u32;
|
||||
m.edges.push(SurfaceEdge {
|
||||
from: ep_idx,
|
||||
to: dl_idx,
|
||||
kind: EdgeKind::Reaches,
|
||||
});
|
||||
m.canonicalize();
|
||||
let text = render_text(&m, None);
|
||||
assert!(text.contains("reaches:"));
|
||||
assert!(text.contains("dangerous: eval"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_summaries_inline_extracts_function_summaries() {
|
||||
// Establishes that the inline pass-1 path produces the same
|
||||
// `GlobalSummaries` shape that an indexed scan would have
|
||||
// persisted — at minimum, one FuncSummary per top-level
|
||||
// function in the fixture. Without this guarantee the surface
|
||||
// build downstream falls back to entry-points-only because
|
||||
// `detect_data_stores` / `detect_external_services` /
|
||||
// `detect_dangerous_locals` walk the summaries map.
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
let project_dir = td.path();
|
||||
std::fs::write(
|
||||
project_dir.join("app.py"),
|
||||
"from flask import Flask, request\n\
|
||||
app = Flask(__name__)\n\
|
||||
\n\
|
||||
@app.route('/run')\n\
|
||||
def run():\n\
|
||||
cmd = request.args.get('cmd')\n\
|
||||
return str(eval(cmd))\n\
|
||||
\n\
|
||||
def helper(x):\n\
|
||||
return eval(x)\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let cfg = Config::default();
|
||||
let canon = project_dir.canonicalize().unwrap();
|
||||
let files = collect_files(&canon, &cfg).unwrap();
|
||||
let summaries = build_summaries_inline(&files, &canon, &cfg);
|
||||
let names: Vec<String> = summaries.iter().map(|(k, _)| k.qualified_name()).collect();
|
||||
assert!(
|
||||
names.iter().any(|n| n.ends_with("run")),
|
||||
"summaries should contain `run`, got {names:?}"
|
||||
);
|
||||
assert!(
|
||||
names.iter().any(|n| n.ends_with("helper")),
|
||||
"summaries should contain `helper`, got {names:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_full_from_filesystem_walks_pass1_pipeline() {
|
||||
// End-to-end smoke for `surface::handle(..., build=true)`: the
|
||||
// inline-build path must produce a non-empty SurfaceMap on a
|
||||
// project with a recognisable framework route. Equivalent to
|
||||
// running `nyx surface --build .` on a single-file Flask app.
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
let project_dir = td.path();
|
||||
std::fs::write(
|
||||
project_dir.join("app.py"),
|
||||
"from flask import Flask, request\n\
|
||||
app = Flask(__name__)\n\
|
||||
\n\
|
||||
@app.route('/run')\n\
|
||||
def run():\n\
|
||||
cmd = request.args.get('cmd')\n\
|
||||
return str(eval(cmd))\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let cfg = Config::default();
|
||||
let canon = project_dir.canonicalize().unwrap();
|
||||
let map = build_full_from_filesystem(&canon, &cfg).expect("inline build succeeds");
|
||||
|
||||
let has_entry = map
|
||||
.nodes
|
||||
.iter()
|
||||
.any(|n| matches!(n, SurfaceNode::EntryPoint(_)));
|
||||
assert!(has_entry, "Flask /run route should be detected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_from_filesystem_entry_points_only_runs_with_empty_summaries() {
|
||||
// Locks in the fallback contract: `build_from_filesystem` runs
|
||||
// framework probes against an empty `GlobalSummaries` and
|
||||
// produces only entry-point nodes. Any future change that
|
||||
// accidentally widens the fallback to populate sinks should
|
||||
// either ship through `--build` or update this test.
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
let project_dir = td.path();
|
||||
std::fs::write(
|
||||
project_dir.join("app.py"),
|
||||
"from flask import Flask\n\
|
||||
app = Flask(__name__)\n\
|
||||
\n\
|
||||
@app.route('/run')\n\
|
||||
def run():\n\
|
||||
return 'ok'\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let cfg = Config::default();
|
||||
let canon = project_dir.canonicalize().unwrap();
|
||||
let map = build_from_filesystem(&canon, &cfg).expect("fallback build succeeds");
|
||||
|
||||
// Entry point should still appear (framework probes run in the
|
||||
// fallback path too).
|
||||
assert!(
|
||||
map.nodes
|
||||
.iter()
|
||||
.any(|n| matches!(n, SurfaceNode::EntryPoint(_))),
|
||||
"Flask route should land via framework probe"
|
||||
);
|
||||
// No DataStore / ExternalService / DangerousLocal because the
|
||||
// fallback path feeds an empty GlobalSummaries to the detectors.
|
||||
let non_entry = map.nodes.iter().any(|n| {
|
||||
matches!(
|
||||
n,
|
||||
SurfaceNode::DataStore(_)
|
||||
| SurfaceNode::ExternalService(_)
|
||||
| SurfaceNode::DangerousLocal(_)
|
||||
)
|
||||
});
|
||||
assert!(
|
||||
!non_entry,
|
||||
"entry-points-only fallback should not produce non-entry nodes"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -231,6 +231,13 @@ fn type_kind_index(kind: &TypeKind) -> u32 {
|
|||
| TypeKind::GormDb
|
||||
| TypeKind::SqlxDb
|
||||
| TypeKind::HibernateSession => 3,
|
||||
// ProcessBuilder participates only in the type-qualified callee
|
||||
// resolver via `label_prefix()`; no dedicated bitset slot, share
|
||||
// the Object index like the other receiver-only TypeKinds.
|
||||
TypeKind::ProcessBuilder => 3,
|
||||
// Runtime is likewise a type-qualified-resolver-only receiver kind
|
||||
// (`Runtime.exec`); no dedicated bitset slot, share the Object index.
|
||||
TypeKind::Runtime => 3,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,8 +15,6 @@
|
|||
//! literal operand. Necessary because individual comparisons are NOT
|
||||
//! decomposed into separate SSA operations (condition nodes → `Nop`).
|
||||
|
||||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use crate::cfg::NodeInfo;
|
||||
use crate::ssa::const_prop::ConstLattice;
|
||||
use crate::ssa::ir::{BlockId, SsaBody, SsaValue};
|
||||
|
|
|
|||
|
|
@ -275,6 +275,14 @@ pub fn class_name_to_type_kind(name: &str) -> Option<TypeKind> {
|
|||
// type-qualified resolution to `Template.process`, the SSTI
|
||||
// sink defined in `labels/java.rs`.
|
||||
"Template" => Some(TypeKind::Template),
|
||||
// `java.lang.Runtime` declared receiver type. Routes the
|
||||
// split-receiver shape `Runtime r = Runtime.getRuntime(); ...
|
||||
// r.exec(...)` through type-qualified resolution to
|
||||
// `Runtime.exec` (the only `Runtime.*` rule, always SHELL_ESCAPE),
|
||||
// complementing the `constructor_type` factory route for
|
||||
// `Runtime.getRuntime()`. No benign `Runtime.exec` exists, so
|
||||
// typing any `Runtime`-declared receiver carries no FP risk.
|
||||
"Runtime" => Some(TypeKind::Runtime),
|
||||
// Python qualified type names.
|
||||
// Only covers raw lowered names from isinstance(). The lowering in lower.rs
|
||||
// extracts the literal type text: isinstance(x, requests.Session) produces
|
||||
|
|
|
|||
439
src/database.rs
439
src/database.rs
|
|
@ -19,11 +19,19 @@ pub mod index {
|
|||
use r2d2_sqlite::SqliteConnectionManager;
|
||||
use rusqlite::{Connection, OpenFlags, OptionalExtension, params};
|
||||
use std::fs;
|
||||
use std::io::Read;
|
||||
use std::ops::Deref;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
/// How long each SQLite connection waits for the single writer slot.
|
||||
///
|
||||
/// Indexed scans can have dozens of Rayon workers finishing analysis at
|
||||
/// once. SQLite still permits only one writer, so a timeout here turns that
|
||||
/// burst into short backpressure instead of surfacing SQLITE_BUSY.
|
||||
const SQLITE_BUSY_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
/// DB schema (foreign‑keys enabled).
|
||||
const SCHEMA: &str = r#"
|
||||
|
|
@ -206,6 +214,36 @@ pub mod index {
|
|||
first_seen_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
-- Dynamic verdict cache (§12 Q5).
|
||||
-- Keyed on (spec_hash, entry_content_hash, transitive_import_digest).
|
||||
-- Invalidation: any of entry content, import digest, toolchain_id,
|
||||
-- corpus_version, or spec_format_version change → DELETE row → re-run.
|
||||
CREATE TABLE IF NOT EXISTS dynamic_verdict_cache (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
spec_hash TEXT NOT NULL,
|
||||
entry_content_hash TEXT NOT NULL,
|
||||
transitive_import_digest TEXT NOT NULL,
|
||||
toolchain_id TEXT NOT NULL,
|
||||
corpus_version INTEGER NOT NULL,
|
||||
spec_format_version INTEGER NOT NULL,
|
||||
verdict_json TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
UNIQUE(spec_hash, entry_content_hash, transitive_import_digest,
|
||||
toolchain_id, corpus_version, spec_format_version)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dynamic_verdict_cache_spec_hash
|
||||
ON dynamic_verdict_cache(spec_hash);
|
||||
|
||||
-- Phase 21: persisted attack-surface map. One row per project.
|
||||
-- Stored as canonical JSON so the round-trip is byte-identical
|
||||
-- across rescans (see `SurfaceMap::to_json`).
|
||||
CREATE TABLE IF NOT EXISTS surface_map (
|
||||
project TEXT PRIMARY KEY,
|
||||
map_json BLOB NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
);
|
||||
|
||||
-- Indexes on (project, file_path) for the per-file replace_* paths.
|
||||
-- Without these, every DELETE WHERE project=? AND file_path=? does a
|
||||
-- full table scan, which dominates indexing time as the cache grows.
|
||||
|
|
@ -252,9 +290,6 @@ pub mod index {
|
|||
/// footprint.
|
||||
pub const SCHEMA_VERSION: &str = "4";
|
||||
|
||||
// TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
|
||||
// TODO: ADD DROP AND GIVE A CLI PARAMETER FOR DROP
|
||||
|
||||
/// A single issue row, ready for insertion.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IssueRow<'a> {
|
||||
|
|
@ -264,6 +299,127 @@ pub mod index {
|
|||
pub col: i64,
|
||||
}
|
||||
|
||||
type IndexWriteJob = Box<dyn FnOnce(&mut Indexer) -> NyxResult<()> + Send + 'static>;
|
||||
|
||||
#[derive(Default)]
|
||||
struct IndexWriteReport {
|
||||
error_count: usize,
|
||||
samples: Vec<String>,
|
||||
}
|
||||
|
||||
impl IndexWriteReport {
|
||||
fn record(&mut self, err: impl ToString) {
|
||||
self.error_count += 1;
|
||||
if self.samples.len() < 8 {
|
||||
self.samples.push(err.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Bounded handle for submitting persisted-index writes.
|
||||
///
|
||||
/// The scanner can keep parsing in parallel while this sender applies
|
||||
/// backpressure when SQLite's single writer falls behind.
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct IndexWriteSender {
|
||||
tx: crossbeam_channel::Sender<IndexWriteJob>,
|
||||
}
|
||||
|
||||
impl IndexWriteSender {
|
||||
pub(crate) fn enqueue<F>(&self, job: F) -> NyxResult<()>
|
||||
where
|
||||
F: FnOnce(&mut Indexer) -> NyxResult<()> + Send + 'static,
|
||||
{
|
||||
self.tx
|
||||
.send(Box::new(job))
|
||||
.map_err(|_| NyxError::Msg("database writer stopped before accepting write".into()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Single-writer queue for project index mutations.
|
||||
///
|
||||
/// SQLite permits many readers but only one writer. Parallel scans should
|
||||
/// therefore submit analyzed file results here instead of letting every
|
||||
/// Rayon worker compete for the writer lock.
|
||||
pub(crate) struct IndexWriteQueue {
|
||||
tx: IndexWriteSender,
|
||||
handle: std::thread::JoinHandle<IndexWriteReport>,
|
||||
}
|
||||
|
||||
impl IndexWriteQueue {
|
||||
pub(crate) fn start(
|
||||
project: impl Into<String>,
|
||||
pool: Arc<Pool<SqliteConnectionManager>>,
|
||||
) -> Self {
|
||||
let capacity = std::env::var("NYX_INDEX_WRITE_QUEUE_MAX")
|
||||
.ok()
|
||||
.and_then(|v| v.parse::<usize>().ok())
|
||||
.filter(|n| *n >= 1)
|
||||
.unwrap_or_else(|| (num_cpus::get() * 2).max(64));
|
||||
Self::start_with_capacity(project, pool, capacity)
|
||||
}
|
||||
|
||||
pub(crate) fn start_with_capacity(
|
||||
project: impl Into<String>,
|
||||
pool: Arc<Pool<SqliteConnectionManager>>,
|
||||
capacity: usize,
|
||||
) -> Self {
|
||||
let project = project.into();
|
||||
let (tx, rx) = crossbeam_channel::bounded::<IndexWriteJob>(capacity.max(1));
|
||||
let handle = std::thread::spawn(move || {
|
||||
let mut report = IndexWriteReport::default();
|
||||
let mut idx = match Indexer::from_pool(&project, &pool) {
|
||||
Ok(idx) => idx,
|
||||
Err(err) => {
|
||||
report.record(format!("writer init: {err}"));
|
||||
return report;
|
||||
}
|
||||
};
|
||||
|
||||
for job in rx {
|
||||
if let Err(err) = job(&mut idx) {
|
||||
report.record(err);
|
||||
}
|
||||
}
|
||||
|
||||
report
|
||||
});
|
||||
|
||||
Self {
|
||||
tx: IndexWriteSender { tx },
|
||||
handle,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn sender(&self) -> IndexWriteSender {
|
||||
self.tx.clone()
|
||||
}
|
||||
|
||||
pub(crate) fn finish(self, stage: &str) -> NyxResult<()> {
|
||||
let Self { tx, handle } = self;
|
||||
drop(tx);
|
||||
let report = handle
|
||||
.join()
|
||||
.map_err(|_| NyxError::Msg(format!("{stage} database writer panicked")))?;
|
||||
if report.error_count == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut details = report.samples;
|
||||
if report.error_count > details.len() {
|
||||
details.push(format!(
|
||||
"... and {} more",
|
||||
report.error_count - details.len()
|
||||
));
|
||||
}
|
||||
|
||||
Err(NyxError::Msg(format!(
|
||||
"{stage} failed to persist scan state: {}",
|
||||
details.join("; ")
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
/// A scan record for DB persistence.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ScanRecord {
|
||||
|
|
@ -311,9 +467,62 @@ pub mod index {
|
|||
project: String,
|
||||
}
|
||||
|
||||
/// SQLite database files start with this 16-byte ASCII magic.
|
||||
const SQLITE_MAGIC: &[u8; 16] = b"SQLite format 3\0";
|
||||
|
||||
/// Reject obviously non-SQLite files before handing them to the
|
||||
/// connection pool, where the same rejection costs minutes instead of
|
||||
/// microseconds on some corruption shapes.
|
||||
///
|
||||
/// Returns `Ok(())` when:
|
||||
/// * the file does not exist (the pool will `CREATE` it),
|
||||
/// * the file is zero-length (SQLite treats this as a fresh DB),
|
||||
/// * the first 16 bytes match the SQLite magic header,
|
||||
/// * the file is shorter than the magic but non-empty (extremely
|
||||
/// unusual; we defer to SQLite rather than gating arbitrarily).
|
||||
///
|
||||
/// Returns `Err(NyxError::Sql(...))` carrying `SQLITE_NOTADB` when the
|
||||
/// header is present but does not match.
|
||||
fn preflight_header(database_path: &Path) -> NyxResult<()> {
|
||||
let Ok(meta) = fs::metadata(database_path) else {
|
||||
return Ok(());
|
||||
};
|
||||
if !meta.is_file() {
|
||||
return Ok(());
|
||||
}
|
||||
if meta.len() < SQLITE_MAGIC.len() as u64 {
|
||||
return Ok(());
|
||||
}
|
||||
let mut head = [0u8; 16];
|
||||
let mut f = fs::File::open(database_path)?;
|
||||
f.read_exact(&mut head)?;
|
||||
if &head != SQLITE_MAGIC {
|
||||
return Err(NyxError::Sql(rusqlite::Error::SqliteFailure(
|
||||
rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_NOTADB),
|
||||
Some(format!(
|
||||
"file at {} is not a SQLite database (header magic mismatch)",
|
||||
database_path.display(),
|
||||
)),
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl Indexer {
|
||||
pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
|
||||
let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
|
||||
|
||||
// Fast-fail when the existing file is clearly not a SQLite
|
||||
// database. Without this guard, certain corruption shapes
|
||||
// (truncated header, header overwritten with arbitrary bytes,
|
||||
// mid-page damage that preserves magic) can keep SQLite busy
|
||||
// for 150-200 seconds inside the PRAGMA / schema execution
|
||||
// below before it surfaces SQLITE_NOTADB or SQLITE_CORRUPT.
|
||||
// A zero-length file is treated as a fresh DB by SQLite, so we
|
||||
// only validate when the file is large enough to hold the
|
||||
// 16-byte magic header.
|
||||
preflight_header(database_path)?;
|
||||
|
||||
// NO_MUTEX is safe because r2d2 ensures each pooled connection
|
||||
// is only ever used by one thread at a time. Combined with WAL
|
||||
// mode this allows concurrent readers + a single writer without
|
||||
|
|
@ -321,31 +530,9 @@ pub mod index {
|
|||
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
|
||||
| OpenFlags::SQLITE_OPEN_CREATE
|
||||
| OpenFlags::SQLITE_OPEN_NO_MUTEX;
|
||||
let manager = SqliteConnectionManager::file(database_path).with_flags(flags);
|
||||
// r2d2's default `max_size` is 10, which can stall rayon
|
||||
// workers on machines with more cores than that during the
|
||||
// parallel indexing pass. Size the pool to comfortably hold
|
||||
// a connection per rayon thread plus a small slack.
|
||||
//
|
||||
// `NYX_INDEX_POOL_MAX` overrides the auto-sized default. Use it in
|
||||
// fd-constrained environments (test sandboxes, containers with low
|
||||
// ulimit) where many parallel indexed scans would otherwise exhaust
|
||||
// EMFILE: each pooled SQLite WAL connection costs ~3 fds (db + -wal
|
||||
// + -shm), so 30 parallel scans × 16 conns × 3 fds = 1440 fds.
|
||||
let max_conns = std::env::var("NYX_INDEX_POOL_MAX")
|
||||
.ok()
|
||||
.and_then(|v| v.parse::<u32>().ok())
|
||||
.filter(|n| *n >= 1)
|
||||
.unwrap_or_else(|| (num_cpus::get() as u32 + 4).max(16));
|
||||
let pool = Arc::new(Pool::builder().max_size(max_conns).build(manager)?);
|
||||
|
||||
{
|
||||
let conn = pool.get()?;
|
||||
let conn = Self::open_configured_connection(database_path, flags)?;
|
||||
conn.pragma_update(None, "journal_mode", "WAL")?;
|
||||
conn.pragma_update(None, "synchronous", "NORMAL")?;
|
||||
conn.pragma_update(None, "cache_size", "-8000")?; // 8 MB
|
||||
conn.pragma_update(None, "temp_store", "MEMORY")?;
|
||||
conn.pragma_update(None, "mmap_size", "268435456")?; // 256 MB
|
||||
conn.execute_batch(SCHEMA)?;
|
||||
|
||||
// Migrate: if the function_summaries table is missing any required
|
||||
|
|
@ -472,6 +659,22 @@ pub mod index {
|
|||
conn.execute_batch(SCHEMA)?;
|
||||
}
|
||||
|
||||
// Phase 21: ensure the `surface_map` table exists on
|
||||
// DBs created before this column set was introduced.
|
||||
let surface_exists: bool = conn
|
||||
.query_row(
|
||||
"SELECT 1 FROM sqlite_master
|
||||
WHERE type = 'table' AND name = 'surface_map'",
|
||||
[],
|
||||
|_| Ok(true),
|
||||
)
|
||||
.optional()?
|
||||
.unwrap_or(false);
|
||||
if !surface_exists {
|
||||
tracing::info!("creating surface_map table");
|
||||
conn.execute_batch(SCHEMA)?;
|
||||
}
|
||||
|
||||
// Schema version check: invalidate cached summary tables
|
||||
// when the on-disk artefact layout has changed in an
|
||||
// incompatible way, independently of the engine version.
|
||||
|
|
@ -483,9 +686,48 @@ pub mod index {
|
|||
// version changes so stale serialized data cannot be loaded.
|
||||
Self::check_engine_version(&conn)?;
|
||||
}
|
||||
|
||||
let manager = SqliteConnectionManager::file(database_path)
|
||||
.with_flags(flags)
|
||||
.with_init(Self::configure_connection);
|
||||
// r2d2's default `max_size` is 10, which can stall rayon
|
||||
// workers on machines with more cores than that during the
|
||||
// parallel indexing pass. Size the pool to comfortably hold
|
||||
// a connection per rayon thread plus a small slack.
|
||||
//
|
||||
// `NYX_INDEX_POOL_MAX` overrides the auto-sized default. Use it in
|
||||
// fd-constrained environments (test sandboxes, containers with low
|
||||
// ulimit) where many parallel indexed scans would otherwise exhaust
|
||||
// EMFILE: each pooled SQLite WAL connection costs ~3 fds (db + -wal
|
||||
// + -shm), so 30 parallel scans × 16 conns × 3 fds = 1440 fds.
|
||||
let max_conns = std::env::var("NYX_INDEX_POOL_MAX")
|
||||
.ok()
|
||||
.and_then(|v| v.parse::<u32>().ok())
|
||||
.filter(|n| *n >= 1)
|
||||
.unwrap_or_else(|| (num_cpus::get() as u32 + 4).max(16));
|
||||
let pool = Arc::new(Pool::builder().max_size(max_conns).build(manager)?);
|
||||
Ok(pool)
|
||||
}
|
||||
|
||||
fn open_configured_connection(
|
||||
database_path: &Path,
|
||||
flags: OpenFlags,
|
||||
) -> rusqlite::Result<Connection> {
|
||||
let mut conn = Connection::open_with_flags(database_path, flags)?;
|
||||
Self::configure_connection(&mut conn)?;
|
||||
Ok(conn)
|
||||
}
|
||||
|
||||
fn configure_connection(conn: &mut Connection) -> rusqlite::Result<()> {
|
||||
conn.busy_timeout(SQLITE_BUSY_TIMEOUT)?;
|
||||
conn.pragma_update(None, "foreign_keys", "ON")?;
|
||||
conn.pragma_update(None, "synchronous", "NORMAL")?;
|
||||
conn.pragma_update(None, "cache_size", -8000i64)?; // 8 MB
|
||||
conn.pragma_update(None, "temp_store", "MEMORY")?;
|
||||
conn.pragma_update(None, "mmap_size", 268_435_456i64)?; // 256 MB
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a column to an existing table when it is missing.
|
||||
///
|
||||
/// Non-destructive: leaves all existing rows untouched, populating
|
||||
|
|
@ -686,7 +928,9 @@ pub mod index {
|
|||
///
|
||||
/// Short-circuits on mtime: if the stored mtime matches the
|
||||
/// filesystem mtime, the file is assumed unchanged (skip hash).
|
||||
#[allow(dead_code)] // used in tests and by should_scan_with_hash callers may fall back
|
||||
/// Production scans use `should_scan_with_hash`, which avoids the
|
||||
/// redundant `digest_file` read; this variant exists for tests.
|
||||
#[cfg(test)]
|
||||
pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
|
||||
let meta = fs::metadata(path)?;
|
||||
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
|
|
@ -852,6 +1096,7 @@ pub mod index {
|
|||
rollup: None,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: Vec::new(),
|
||||
stable_hash: 0,
|
||||
})
|
||||
})?;
|
||||
|
||||
|
|
@ -1806,6 +2051,60 @@ pub mod index {
|
|||
Ok(out)
|
||||
}
|
||||
|
||||
/// Persist a [`crate::surface::SurfaceMap`] for this project.
|
||||
///
|
||||
/// Replaces any previously-persisted map; the table holds one row
|
||||
/// per project. The map is canonicalised before serialisation so
|
||||
/// `replace_surface_map` + `load_surface_map` round-trip is
|
||||
/// byte-identical for structurally identical maps.
|
||||
pub fn replace_surface_map(&mut self, map: &crate::surface::SurfaceMap) -> NyxResult<()> {
|
||||
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
let mut canon = map.clone();
|
||||
let bytes = canon
|
||||
.to_json()
|
||||
.map_err(|e| NyxError::Msg(format!("surface map serialise: {e}")))?;
|
||||
self.c().execute(
|
||||
"INSERT OR REPLACE INTO surface_map (project, map_json, updated_at)
|
||||
VALUES (?1, ?2, ?3)",
|
||||
params![self.project, bytes, now],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load the persisted [`crate::surface::SurfaceMap`] for this
|
||||
/// project, or `None` when no map has been written.
|
||||
pub fn load_surface_map(&self) -> NyxResult<Option<crate::surface::SurfaceMap>> {
|
||||
let row: Option<Vec<u8>> = self
|
||||
.c()
|
||||
.query_row(
|
||||
"SELECT map_json FROM surface_map WHERE project = ?1",
|
||||
params![self.project],
|
||||
|r| r.get::<_, Vec<u8>>(0),
|
||||
)
|
||||
.optional()?;
|
||||
let Some(bytes) = row else {
|
||||
return Ok(None);
|
||||
};
|
||||
let map = crate::surface::SurfaceMap::from_json(&bytes)
|
||||
.map_err(|e| NyxError::Msg(format!("surface map deserialise: {e}")))?;
|
||||
Ok(Some(map))
|
||||
}
|
||||
|
||||
/// Return the raw JSON bytes stored for the surface map without
|
||||
/// deserialising. Used by the round-trip parity tests so they
|
||||
/// can compare on-disk bytes across rescans.
|
||||
pub fn load_surface_map_bytes(&self) -> NyxResult<Option<Vec<u8>>> {
|
||||
let row: Option<Vec<u8>> = self
|
||||
.c()
|
||||
.query_row(
|
||||
"SELECT map_json FROM surface_map WHERE project = ?1",
|
||||
params![self.project],
|
||||
|r| r.get::<_, Vec<u8>>(0),
|
||||
)
|
||||
.optional()?;
|
||||
Ok(row)
|
||||
}
|
||||
|
||||
/// Remove a file and all derived persisted state for this project.
|
||||
///
|
||||
/// This deletes the file row, issues, and all persisted summary rows so
|
||||
|
|
@ -1867,9 +2166,7 @@ pub mod index {
|
|||
.collect::<Result<_, _>>()?)
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Scan persistence
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// Insert a new scan record.
|
||||
pub fn insert_scan(&self, record: &ScanRecord) -> NyxResult<()> {
|
||||
|
|
@ -2135,9 +2432,7 @@ pub mod index {
|
|||
Ok(rows)
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Triage state management
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// Get the triage state for a single finding fingerprint.
|
||||
/// Returns (state, note, updated_at) or None if no triage state exists.
|
||||
|
|
@ -2159,7 +2454,6 @@ pub mod index {
|
|||
|
||||
/// Set the triage state for a single finding. Upserts the state and
|
||||
/// appends an audit log entry. Returns the previous state (or "open").
|
||||
#[allow(dead_code)]
|
||||
pub fn set_triage_state(
|
||||
&self,
|
||||
fingerprint: &str,
|
||||
|
|
@ -2518,9 +2812,7 @@ pub mod index {
|
|||
Ok(count > 0)
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Maintenance utilities
|
||||
// -------------------------------------------------------------------------
|
||||
pub fn clear(&self) -> NyxResult<()> {
|
||||
self.c().execute_batch(
|
||||
r#"
|
||||
|
|
@ -2545,10 +2837,8 @@ pub mod index {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// -------------------------------------------------------------------------
|
||||
#[allow(dead_code)] // used by should_scan() and tests
|
||||
#[cfg(test)]
|
||||
fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
let mut file = fs::File::open(path)?;
|
||||
|
|
@ -3052,7 +3342,7 @@ fn clear_drops_ssa_summaries_table() {
|
|||
// ── CalleeSsaBody persistence tests ──────────────────────────────────────
|
||||
|
||||
/// Helper: build a minimal CalleeSsaBody for DB tests.
|
||||
#[allow(dead_code)] // used by tests below
|
||||
#[cfg(test)]
|
||||
fn make_test_callee_body(
|
||||
num_blocks: usize,
|
||||
param_count: usize,
|
||||
|
|
@ -3621,6 +3911,77 @@ fn fresh_db_no_migration_needed() {
|
|||
assert!(idx.get_files("proj").unwrap().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn init_applies_busy_timeout_to_every_pooled_connection() {
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
let db = td.path().join("nyx.sqlite");
|
||||
let pool = index::Indexer::init(&db).unwrap();
|
||||
|
||||
// Hold several connections at once so r2d2 must hand out distinct pooled
|
||||
// handles. The timeout is connection-local, so configuring only the schema
|
||||
// setup connection would leave later worker connections at rusqlite's
|
||||
// default.
|
||||
let conns: Vec<_> = (0..4).map(|_| pool.get().unwrap()).collect();
|
||||
for conn in &conns {
|
||||
let timeout_ms: i64 = conn
|
||||
.query_row("PRAGMA busy_timeout", [], |row| row.get(0))
|
||||
.unwrap();
|
||||
assert_eq!(timeout_ms, 60_000);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn index_write_queue_serializes_parallel_writes() {
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
let db = td.path().join("nyx.sqlite");
|
||||
let pool = index::Indexer::init(&db).unwrap();
|
||||
let project = "proj";
|
||||
let writer =
|
||||
index::IndexWriteQueue::start_with_capacity(project, std::sync::Arc::clone(&pool), 2);
|
||||
let tx = writer.sender();
|
||||
|
||||
let mut handles = Vec::new();
|
||||
for i in 0..16 {
|
||||
let path = td.path().join(format!("file_{i}.rs"));
|
||||
let source = format!("fn f_{i}() {{}}\n");
|
||||
std::fs::write(&path, &source).unwrap();
|
||||
let hash = index::Indexer::digest_bytes(source.as_bytes());
|
||||
let tx = tx.clone();
|
||||
handles.push(std::thread::spawn(move || {
|
||||
tx.enqueue(move |idx| {
|
||||
let file_id = idx.upsert_file_with_hash(&path, &hash)?;
|
||||
let issue_rows = [(String::from("test-rule"), String::from("LOW"), 1_i64, 0_i64)];
|
||||
idx.replace_issues(
|
||||
file_id,
|
||||
issue_rows
|
||||
.iter()
|
||||
.map(|(rule_id, severity, line, col)| index::IssueRow {
|
||||
rule_id: rule_id.as_str(),
|
||||
severity: severity.as_str(),
|
||||
line: *line,
|
||||
col: *col,
|
||||
}),
|
||||
)?;
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
}));
|
||||
}
|
||||
|
||||
for handle in handles {
|
||||
handle.join().unwrap();
|
||||
}
|
||||
drop(tx);
|
||||
writer.finish("test").unwrap();
|
||||
|
||||
let idx = index::Indexer::from_pool(project, &pool).unwrap();
|
||||
let files = idx.get_files(project).unwrap();
|
||||
assert_eq!(files.len(), 16);
|
||||
for path in files {
|
||||
assert_eq!(idx.get_issues_from_file(&path).unwrap().len(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_ssa_namespace_column_triggers_recreate() {
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
|
|
|
|||
113
src/dynamic/build_pool/c.rs
Normal file
113
src/dynamic/build_pool/c.rs
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
//! C build pool (Phase 23 / Track O.1).
|
||||
//!
|
||||
//! Wraps the C compiler in `ccache` (when present) backed by a shared object
|
||||
//! cache under the pool cache root, so a finding that recompiles a harness
|
||||
//! whose `main.c` matches a previously-built one gets a cache hit instead of a
|
||||
//! cold `cc` invocation.
|
||||
//!
|
||||
//! `ccache` degrades gracefully: when it is not on `PATH` the pool runs the
|
||||
//! bare compiler, byte-for-byte the same `cc` invocation the legacy
|
||||
//! [`crate::dynamic::build_sandbox::prepare_c`] path uses, so success / failure
|
||||
//! parity holds. The static-link fallback (drop `-static` and retry) mirrors
|
||||
//! the legacy `run_cc` behaviour for chroot-bound Strict-profile harnesses.
|
||||
|
||||
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct CPool {
|
||||
cc_bin: String,
|
||||
ccache_bin: Option<String>,
|
||||
}
|
||||
|
||||
impl CPool {
|
||||
pub fn try_new() -> Result<Self, String> {
|
||||
let cc_bin = std::env::var("NYX_CC_BIN").unwrap_or_else(|_| "cc".to_owned());
|
||||
if !binary_runnable(&cc_bin, "--version") {
|
||||
return Err(format!("c-pool: {cc_bin} not runnable"));
|
||||
}
|
||||
Ok(CPool {
|
||||
cc_bin,
|
||||
ccache_bin: super::detect_ccache(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl BuildPool for CPool {
|
||||
fn name(&self) -> &'static str {
|
||||
"c"
|
||||
}
|
||||
|
||||
/// `args[0]` = binary destination, `args[1]` = `"static"` or `"dynamic"`.
|
||||
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
|
||||
let start = Instant::now();
|
||||
let dest = match args.first() {
|
||||
Some(d) => d.clone(),
|
||||
None => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: "c-pool: missing binary destination arg".to_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
let static_link = args.get(1).map(|s| s == "static").unwrap_or(false);
|
||||
|
||||
if static_link {
|
||||
match self.run(workdir, &dest, &["-static", "-O0", "-g"]) {
|
||||
Ok(()) => {
|
||||
return PoolCompileResult {
|
||||
success: true,
|
||||
stderr: String::new(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
Err(stderr) => {
|
||||
unsafe { std::env::set_var("NYX_BUILD_STATIC_FALLBACK", "1") };
|
||||
eprintln!("nyx: c-pool cc -static failed, retrying without -static: {stderr}");
|
||||
let _ = std::fs::remove_file(&dest);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match self.run(workdir, &dest, &["-O0", "-g"]) {
|
||||
Ok(()) => PoolCompileResult {
|
||||
success: true,
|
||||
stderr: String::new(),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
Err(stderr) => PoolCompileResult {
|
||||
success: false,
|
||||
stderr,
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn is_healthy(&self) -> bool {
|
||||
binary_runnable(&self.cc_bin, "--version")
|
||||
}
|
||||
}
|
||||
|
||||
impl CPool {
|
||||
/// Run one compile of `main.c`, optionally fronted by `ccache`.
|
||||
fn run(&self, workdir: &Path, dest: &str, leading_flags: &[&str]) -> Result<(), String> {
|
||||
let mut cmd = match (&self.ccache_bin, pool_cache_dir("c", "ccache")) {
|
||||
(Some(ccache), Some(cache_dir)) => {
|
||||
let mut c = base_command(ccache);
|
||||
c.arg(&self.cc_bin).env("CCACHE_DIR", cache_dir);
|
||||
c
|
||||
}
|
||||
_ => base_command(&self.cc_bin),
|
||||
};
|
||||
cmd.args(leading_flags)
|
||||
.args(["-o", dest, "main.c"])
|
||||
.current_dir(workdir);
|
||||
|
||||
let output = cmd.output().map_err(|e| format!("c-pool: cc: {e}"))?;
|
||||
if !output.status.success() {
|
||||
return Err(String::from_utf8_lossy(&output.stderr).into_owned());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
83
src/dynamic/build_pool/cpp.rs
Normal file
83
src/dynamic/build_pool/cpp.rs
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
//! C++ build pool (Phase 23 / Track O.1).
|
||||
//!
|
||||
//! Same shape as the C pool: front the C++ driver with `ccache` backed by a
|
||||
//! shared object cache under the pool cache root. Falls back to a bare
|
||||
//! `c++ -std=c++17` compile — byte-for-byte the legacy
|
||||
//! [`crate::dynamic::build_sandbox::prepare_cpp`] invocation — when `ccache` is
|
||||
//! absent.
|
||||
|
||||
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct CppPool {
|
||||
cxx_bin: String,
|
||||
ccache_bin: Option<String>,
|
||||
}
|
||||
|
||||
impl CppPool {
|
||||
pub fn try_new() -> Result<Self, String> {
|
||||
let cxx_bin = std::env::var("NYX_CXX_BIN").unwrap_or_else(|_| "c++".to_owned());
|
||||
if !binary_runnable(&cxx_bin, "--version") {
|
||||
return Err(format!("cpp-pool: {cxx_bin} not runnable"));
|
||||
}
|
||||
Ok(CppPool {
|
||||
cxx_bin,
|
||||
ccache_bin: super::detect_ccache(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl BuildPool for CppPool {
|
||||
fn name(&self) -> &'static str {
|
||||
"cpp"
|
||||
}
|
||||
|
||||
/// `args[0]` = absolute path the compiled `nyx_harness` binary lands at.
|
||||
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
|
||||
let start = Instant::now();
|
||||
let dest = match args.first() {
|
||||
Some(d) => d.clone(),
|
||||
None => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: "cpp-pool: missing binary destination arg".to_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let mut cmd = match (&self.ccache_bin, pool_cache_dir("cpp", "ccache")) {
|
||||
(Some(ccache), Some(cache_dir)) => {
|
||||
let mut c = base_command(ccache);
|
||||
c.arg(&self.cxx_bin).env("CCACHE_DIR", cache_dir);
|
||||
c
|
||||
}
|
||||
_ => base_command(&self.cxx_bin),
|
||||
};
|
||||
cmd.args(["-O0", "-g", "-std=c++17", "-o", &dest, "main.cpp"])
|
||||
.current_dir(workdir);
|
||||
|
||||
match cmd.output() {
|
||||
Ok(o) if o.status.success() => PoolCompileResult {
|
||||
success: true,
|
||||
stderr: String::new(),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
Ok(o) => PoolCompileResult {
|
||||
success: false,
|
||||
stderr: String::from_utf8_lossy(&o.stderr).into_owned(),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
Err(e) => PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("cpp-pool: c++: {e}"),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn is_healthy(&self) -> bool {
|
||||
binary_runnable(&self.cxx_bin, "--version")
|
||||
}
|
||||
}
|
||||
140
src/dynamic/build_pool/go.rs
Normal file
140
src/dynamic/build_pool/go.rs
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
//! Go build pool (Phase 23 / Track O.1).
|
||||
//!
|
||||
//! The legacy [`crate::dynamic::build_sandbox::prepare_go`] gives each finding
|
||||
//! its own `GOCACHE`/`GOMODCACHE` (default: a per-workdir `.gocache`), so the
|
||||
//! Go toolchain recompiles the standard library and every module from cold on
|
||||
//! every harness.
|
||||
//!
|
||||
//! [`GoPool`] mounts one shared `GOCACHE` + `GOMODCACHE` under the pool cache
|
||||
//! root so compiled std-lib + module artefacts are reused across findings, and
|
||||
//! builds with `-trimpath -buildvcs=false` so the output is reproducible (no
|
||||
//! absolute workdir paths or VCS stamping baked in, which otherwise defeats the
|
||||
//! build cache's keying).
|
||||
|
||||
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct GoPool {
|
||||
go_bin: String,
|
||||
}
|
||||
|
||||
impl GoPool {
|
||||
pub fn try_new() -> Result<Self, String> {
|
||||
let go_bin = std::env::var("NYX_GO_BIN").unwrap_or_else(|_| "go".to_owned());
|
||||
if !binary_runnable(&go_bin, "version") {
|
||||
return Err(format!("go-pool: {go_bin} not runnable"));
|
||||
}
|
||||
Ok(GoPool { go_bin })
|
||||
}
|
||||
}
|
||||
|
||||
impl BuildPool for GoPool {
|
||||
fn name(&self) -> &'static str {
|
||||
"go"
|
||||
}
|
||||
|
||||
/// `args[0]` = absolute path the compiled `nyx_harness` binary must land
|
||||
/// at.
|
||||
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
|
||||
let start = Instant::now();
|
||||
let dest = match args.first() {
|
||||
Some(d) => d.clone(),
|
||||
None => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: "go-pool: missing binary destination arg".to_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let go_cache = match pool_cache_dir("go", "cache") {
|
||||
Some(d) => d,
|
||||
None => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: "go-pool: no shared GOCACHE".to_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
let go_mod_cache = match pool_cache_dir("go", "modcache") {
|
||||
Some(d) => d,
|
||||
None => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: "go-pool: no shared GOMODCACHE".to_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
let go_path = std::env::var("GOPATH").unwrap_or_else(|_| {
|
||||
std::env::var("HOME")
|
||||
.map(|h| format!("{h}/go"))
|
||||
.unwrap_or_else(|_| "/tmp/go".to_owned())
|
||||
});
|
||||
|
||||
// `go mod tidy` resolves imports into the shared module cache.
|
||||
if workdir.join("go.mod").exists() {
|
||||
let tidy = base_command(&self.go_bin)
|
||||
.args(["mod", "tidy"])
|
||||
.current_dir(workdir)
|
||||
.env("GOCACHE", &go_cache)
|
||||
.env("GOPATH", &go_path)
|
||||
.env("GOMODCACHE", &go_mod_cache)
|
||||
.output();
|
||||
match tidy {
|
||||
Ok(o) if o.status.success() => {}
|
||||
Ok(o) => {
|
||||
let mut msg = String::from_utf8_lossy(&o.stderr).into_owned();
|
||||
if msg.is_empty() {
|
||||
msg = String::from_utf8_lossy(&o.stdout).into_owned();
|
||||
}
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("go mod tidy failed: {msg}"),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
Err(e) => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("go-pool: go mod tidy: {e}"),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let output = base_command(&self.go_bin)
|
||||
.args(["build", "-trimpath", "-buildvcs=false", "-o", &dest, "."])
|
||||
.current_dir(workdir)
|
||||
.env("GOCACHE", &go_cache)
|
||||
.env("GOPATH", &go_path)
|
||||
.env("GOMODCACHE", &go_mod_cache)
|
||||
.output();
|
||||
|
||||
match output {
|
||||
Ok(o) if o.status.success() => PoolCompileResult {
|
||||
success: true,
|
||||
stderr: String::new(),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
Ok(o) => PoolCompileResult {
|
||||
success: false,
|
||||
stderr: String::from_utf8_lossy(&o.stderr).into_owned(),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
Err(e) => PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("go-pool: go build: {e}"),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn is_healthy(&self) -> bool {
|
||||
binary_runnable(&self.go_bin, "version")
|
||||
}
|
||||
}
|
||||
952
src/dynamic/build_pool/java.rs
Normal file
952
src/dynamic/build_pool/java.rs
Normal file
|
|
@ -0,0 +1,952 @@
|
|||
//! Long-lived `javac` daemon (Phase 22 / Track O.0).
|
||||
//!
|
||||
//! The legacy `try_compile_java_with_toolchain` in `build_sandbox` shell-execs a
|
||||
//! fresh `javac` per harness — every invocation pays the JVM cold-start tax
|
||||
//! (~700ms on the macOS reference machine, ~300ms on Linux CI). At 50
|
||||
//! findings per OWASP-scale run that single line burns > 30s before any
|
||||
//! real work happens.
|
||||
//!
|
||||
//! [`JavacPool`] replaces the shell-exec with a long-running worker JVM:
|
||||
//!
|
||||
//! ```text
|
||||
//! nyx ─┐
|
||||
//! │ framed JSON ┌─────────────┐
|
||||
//! ├──stdin──────► │ NyxJavac │
|
||||
//! │ │ Worker │
|
||||
//! │ ◄──stdout──── │ (live JVM) │
|
||||
//! │ framed JSON └─────────────┘
|
||||
//! ```
|
||||
//!
|
||||
//! Bootstrap (paid once per toolchain id):
|
||||
//! 1. Drop `NyxJavacWorker.java` into a cache dir.
|
||||
//! 2. Compile it with `javac` (~1s).
|
||||
//! 3. Spawn `java -cp <dir> NyxJavacWorker` (~700ms cold start).
|
||||
//! 4. Read the worker's `{"ready":true}` banner.
|
||||
//!
|
||||
//! After bootstrap, each [`JavacPool::compile_batch`] is a single JSON
|
||||
//! round-trip — typical wall-clock < 50ms even on small harnesses.
|
||||
//!
|
||||
//! # Robustness
|
||||
//!
|
||||
//! A crashed / hung worker is non-fatal:
|
||||
//! - On any IO error, the pool marks itself unhealthy and the caller
|
||||
//! falls back to the direct-spawn legacy path.
|
||||
//! - The next pool lookup spawns a fresh worker.
|
||||
//!
|
||||
//! # Test hook
|
||||
//!
|
||||
//! `NYX_JAVAC_BIN` + `NYX_JAVA_BIN` override the binaries the pool
|
||||
//! invokes so integration tests can swap in a wrapper.
|
||||
|
||||
use super::{BuildPool, PoolCompileResult};
|
||||
use serde::Deserialize;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
|
||||
use std::sync::{Mutex, mpsc};
|
||||
use std::thread;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// Java source compiled at first use to drive the worker.
|
||||
const WORKER_SOURCE: &str = include_str!("java_worker/NyxJavacWorker.java");
|
||||
const WORKER_CLASS: &str = "NyxJavacWorker";
|
||||
const WORKER_FILENAME: &str = "NyxJavacWorker.java";
|
||||
/// Manifest written last (atomically) by `publish_class_set` after every
|
||||
/// class lands, so its presence is the "publish finished" signal a
|
||||
/// lock-free reader keys on. Its *contents* are NOT trusted as the
|
||||
/// completeness oracle -- see `WORKER_CLASS_FILES`.
|
||||
const WORKER_MANIFEST: &str = ".worker-classes";
|
||||
|
||||
/// The exact set of `.class` files the worker JVM must load at runtime:
|
||||
/// the top-level class plus its nested `$Request` / `$Parser` types.
|
||||
///
|
||||
/// Readiness keys on *this fixed set*, not on whatever the on-disk
|
||||
/// manifest happens to name. A bootstrap cache left by an older binary
|
||||
/// can carry a manifest that lists only `NyxJavacWorker.class`; trusting
|
||||
/// that list let the gate pass with the nested classes absent, so the
|
||||
/// worker spawned, announced readiness, then died on the first request
|
||||
/// with `NoClassDefFoundError` surfaced as
|
||||
/// `nyx-javac-worker: parse error: NyxJavacWorker$Parser`. Pinning the
|
||||
/// required set here makes any such partial cache fail the gate and
|
||||
/// trigger a clean recompile. Kept in lock-step with the worker's real
|
||||
/// nested-class layout by `worker_class_files_match_javac_output`.
|
||||
const WORKER_CLASS_FILES: &[&str] = &[
|
||||
"NyxJavacWorker.class",
|
||||
"NyxJavacWorker$Request.class",
|
||||
"NyxJavacWorker$Parser.class",
|
||||
];
|
||||
const WORKER_READY_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
const COMPILE_RESPONSE_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
/// Live worker handle. Held inside a `Mutex` so concurrent
|
||||
/// `compile_batch` callers serialise on the single JVM.
|
||||
struct Worker {
|
||||
child: Child,
|
||||
stdin: ChildStdin,
|
||||
stdout: BufReader<ChildStdout>,
|
||||
next_id: u64,
|
||||
}
|
||||
|
||||
pub struct JavacPool {
|
||||
/// `None` when the worker has crashed and a future call should
|
||||
/// surface the unhealthy state to the dispatcher.
|
||||
inner: Mutex<Option<Worker>>,
|
||||
/// Cache dir holding `NyxJavacWorker.class`. Persisted between
|
||||
/// runs so subsequent process invocations skip the compile step.
|
||||
bootstrap_dir: PathBuf,
|
||||
}
|
||||
|
||||
impl JavacPool {
|
||||
/// Create a fresh pool for `toolchain_id`.
|
||||
///
|
||||
/// Returns `Err` when the worker cannot be bootstrapped (missing
|
||||
/// `javac`, missing `java`, compile failure, spawn failure). The
|
||||
/// caller is expected to fall back to the legacy direct-spawn path
|
||||
/// on any error.
|
||||
pub fn try_new(toolchain_id: &str) -> Result<Self, String> {
|
||||
let bootstrap_dir = bootstrap_dir_for(toolchain_id)?;
|
||||
std::fs::create_dir_all(&bootstrap_dir)
|
||||
.map_err(|e| format!("javac-pool: mkdir {}: {e}", bootstrap_dir.display()))?;
|
||||
|
||||
ensure_worker_compiled(&bootstrap_dir)?;
|
||||
let worker = spawn_worker(&bootstrap_dir)?;
|
||||
Ok(JavacPool {
|
||||
inner: Mutex::new(Some(worker)),
|
||||
bootstrap_dir,
|
||||
})
|
||||
}
|
||||
|
||||
fn compile_with_worker(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
|
||||
let start = Instant::now();
|
||||
let mut guard = match self.inner.lock() {
|
||||
Ok(g) => g,
|
||||
Err(p) => p.into_inner(),
|
||||
};
|
||||
|
||||
// If a prior call torched the worker, try one re-spawn here so
|
||||
// the caller doesn't see consecutive failures from a transient
|
||||
// JVM crash.
|
||||
if guard.is_none()
|
||||
&& let Ok(w) = spawn_worker(&self.bootstrap_dir)
|
||||
{
|
||||
*guard = Some(w);
|
||||
}
|
||||
let worker = match guard.as_mut() {
|
||||
Some(w) => w,
|
||||
None => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: "javac-pool: worker unavailable".to_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let id = worker.next_id;
|
||||
worker.next_id = worker.next_id.wrapping_add(1);
|
||||
let req = build_request(id, workdir, args);
|
||||
if let Err(e) = worker.stdin.write_all(req.as_bytes()) {
|
||||
*guard = None;
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("javac-pool: write failed: {e}"),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
if let Err(e) = worker.stdin.flush() {
|
||||
*guard = None;
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("javac-pool: flush failed: {e}"),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
|
||||
match read_line_with_timeout(
|
||||
&mut worker.child,
|
||||
&mut worker.stdout,
|
||||
COMPILE_RESPONSE_TIMEOUT,
|
||||
"read response",
|
||||
) {
|
||||
Ok(None) => {
|
||||
*guard = None;
|
||||
PoolCompileResult {
|
||||
success: false,
|
||||
stderr: "javac-pool: worker closed stdout".to_owned(),
|
||||
duration: start.elapsed(),
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
*guard = None;
|
||||
PoolCompileResult {
|
||||
success: false,
|
||||
stderr: e,
|
||||
duration: start.elapsed(),
|
||||
}
|
||||
}
|
||||
Ok(Some(line)) => match parse_response(&line) {
|
||||
Some((success, stderr)) => PoolCompileResult {
|
||||
success,
|
||||
stderr,
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
None => {
|
||||
*guard = None;
|
||||
PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("javac-pool: malformed response: {line}"),
|
||||
duration: start.elapsed(),
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for JavacPool {
|
||||
fn drop(&mut self) {
|
||||
// Best-effort: close stdin so the worker exits cleanly, then
|
||||
// wait briefly. We don't propagate errors -- pool teardown
|
||||
// happens at process exit, by which point everyone is already
|
||||
// leaving anyway.
|
||||
if let Ok(mut guard) = self.inner.lock()
|
||||
&& let Some(mut worker) = guard.take()
|
||||
{
|
||||
// Dropping stdin sends EOF to the worker's `readLine` loop.
|
||||
drop(worker.stdin);
|
||||
let _ = worker.child.wait();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BuildPool for JavacPool {
|
||||
fn name(&self) -> &'static str {
|
||||
"javac"
|
||||
}
|
||||
|
||||
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
|
||||
self.compile_with_worker(workdir, args)
|
||||
}
|
||||
|
||||
fn is_healthy(&self) -> bool {
|
||||
match self.inner.lock() {
|
||||
Ok(g) => g.is_some(),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn bootstrap_dir_for(toolchain_id: &str) -> Result<PathBuf, String> {
|
||||
if let Ok(custom) = std::env::var("NYX_BUILD_POOL_DIR") {
|
||||
return Ok(PathBuf::from(custom).join("javac").join(toolchain_id));
|
||||
}
|
||||
let base = directories::ProjectDirs::from("dev", "nyx", "nyx")
|
||||
.ok_or_else(|| "javac-pool: no cache dir on this platform".to_owned())?;
|
||||
Ok(base
|
||||
.cache_dir()
|
||||
.join("dynamic")
|
||||
.join("build-pool")
|
||||
.join("javac")
|
||||
.join(toolchain_id))
|
||||
}
|
||||
|
||||
/// Drop `NyxJavacWorker.java` + compile `NyxJavacWorker.class` into
|
||||
/// `dir` if they are not already present. Always re-writes the source
|
||||
/// when the on-disk copy differs from the embedded one so a binary
|
||||
/// upgrade picks up worker fixes without manual cache eviction.
|
||||
///
|
||||
/// The bootstrap dir is shared across every concurrent `nyx` process on
|
||||
/// the host, so the compile-and-publish step is hardened against the
|
||||
/// cross-process race that otherwise hands a half-written
|
||||
/// `NyxJavacWorker.class` to a peer process spawning its worker (which
|
||||
/// then fails to start, manifesting downstream as a flaky build):
|
||||
///
|
||||
/// - The publish is **atomic**: `javac` writes into a private,
|
||||
/// pid-scoped staging dir and the finished class is `rename`d into
|
||||
/// place. A concurrent reader sees either the previous complete
|
||||
/// class or the new one, never a partial file. The old class is
|
||||
/// never `remove`d first.
|
||||
/// - Compiles are **serialised** on a `flock(2)` over `.bootstrap.lock`
|
||||
/// so two processes never run `javac` into the same staging at once
|
||||
/// and a waiter re-checks the now-published class instead of
|
||||
/// recompiling.
|
||||
fn ensure_worker_compiled(dir: &Path) -> Result<(), String> {
|
||||
let src_path = dir.join(WORKER_FILENAME);
|
||||
|
||||
// Fast path: a complete class set already matches the current worker
|
||||
// source. Checked before taking the cross-process lock so steady
|
||||
// state stays lock-free.
|
||||
if worker_class_ready(dir) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Serialise the compile-and-publish across processes sharing `dir`.
|
||||
let _lock = BootstrapLock::acquire(dir)?;
|
||||
|
||||
// Re-check under the lock: another process may have published a good
|
||||
// class set while we were waiting on the lock.
|
||||
if worker_class_ready(dir) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Publish the source (idempotent) so cache inspectors can see what
|
||||
// the class was built from.
|
||||
std::fs::write(&src_path, WORKER_SOURCE)
|
||||
.map_err(|e| format!("javac-pool: write worker source: {e}"))?;
|
||||
|
||||
// Compile into a private staging dir, then atomically publish the
|
||||
// class files into place.
|
||||
let staging = dir.join(format!(".compile-{}", std::process::id()));
|
||||
let _ = std::fs::remove_dir_all(&staging);
|
||||
std::fs::create_dir_all(&staging).map_err(|e| format!("javac-pool: mkdir staging: {e}"))?;
|
||||
let javac = std::env::var("NYX_JAVAC_BIN").unwrap_or_else(|_| "javac".to_owned());
|
||||
let compiled = Command::new(&javac)
|
||||
// Pin the source charset so the bootstrap compile is independent of
|
||||
// the host locale (a `C`/`POSIX` CI runner defaults `javac` to
|
||||
// `US-ASCII` and would reject any non-ASCII byte in the worker
|
||||
// source). Mirrors the harness-compile pin in `build_sandbox`.
|
||||
.arg("-encoding")
|
||||
.arg("UTF-8")
|
||||
.arg("-d")
|
||||
.arg(&staging)
|
||||
.arg(&src_path)
|
||||
.env_clear()
|
||||
.env("PATH", std::env::var("PATH").unwrap_or_default())
|
||||
.env("HOME", std::env::var("HOME").unwrap_or_default())
|
||||
.output();
|
||||
let output = match compiled {
|
||||
Ok(o) => o,
|
||||
Err(e) => {
|
||||
let _ = std::fs::remove_dir_all(&staging);
|
||||
return Err(format!("javac-pool: spawn javac: {e}"));
|
||||
}
|
||||
};
|
||||
if !output.status.success() {
|
||||
let _ = std::fs::remove_dir_all(&staging);
|
||||
return Err(format!(
|
||||
"javac-pool: bootstrap compile failed: {}",
|
||||
String::from_utf8_lossy(&output.stderr),
|
||||
));
|
||||
}
|
||||
let publish = publish_class_set(&staging, dir);
|
||||
let _ = std::fs::remove_dir_all(&staging);
|
||||
publish
|
||||
}
|
||||
|
||||
/// Move every `.class` file `javac` emitted from the private `staging`
|
||||
/// dir into the shared `dir`, then write the manifest last.
|
||||
///
|
||||
/// The worker source compiles to the top-level `NyxJavacWorker.class`
|
||||
/// plus the nested `NyxJavacWorker$Request` / `NyxJavacWorker$Parser`
|
||||
/// classes. Every one of them must land in `dir` (the worker JVM's
|
||||
/// classpath), or the worker hits `NoClassDefFoundError` the first time
|
||||
/// it touches a nested class -- which surfaced downstream as a bogus
|
||||
/// `nyx-javac-worker: parse error: NyxJavacWorker$Parser`.
|
||||
///
|
||||
/// Renames are same-filesystem (staging is a child of `dir`) so each is
|
||||
/// atomic. The manifest is written last via a temp-then-rename, so a
|
||||
/// concurrent peer on the lock-free fast path sees either no manifest
|
||||
/// (and serialises on the lock) or a complete one whose every named
|
||||
/// class is already in place.
|
||||
fn publish_class_set(staging: &Path, dir: &Path) -> Result<(), String> {
|
||||
let entries =
|
||||
std::fs::read_dir(staging).map_err(|e| format!("javac-pool: read staging dir: {e}"))?;
|
||||
let mut names: Vec<String> = Vec::new();
|
||||
for entry in entries {
|
||||
let path = entry
|
||||
.map_err(|e| format!("javac-pool: read staging entry: {e}"))?
|
||||
.path();
|
||||
if path.extension().is_none_or(|x| x != "class") {
|
||||
continue;
|
||||
}
|
||||
let name = match path.file_name().and_then(|n| n.to_str()) {
|
||||
Some(n) => n.to_owned(),
|
||||
None => continue,
|
||||
};
|
||||
std::fs::rename(&path, dir.join(&name))
|
||||
.map_err(|e| format!("javac-pool: publish {name}: {e}"))?;
|
||||
names.push(name);
|
||||
}
|
||||
if names.is_empty() {
|
||||
return Err("javac-pool: bootstrap compile produced no .class files".to_owned());
|
||||
}
|
||||
// Refuse to publish (and to write the readiness-signalling manifest) a
|
||||
// set missing any class the worker loads at runtime. Fail loud here
|
||||
// rather than leave a half-set the worker would die on later.
|
||||
for required in WORKER_CLASS_FILES {
|
||||
if !names.iter().any(|n| n == required) {
|
||||
return Err(format!(
|
||||
"javac-pool: bootstrap compile missing required class {required}; got {names:?}",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Write the manifest atomically (temp + rename) so it appears in one
|
||||
// step after every class is already published.
|
||||
let manifest = dir.join(WORKER_MANIFEST);
|
||||
let tmp = dir.join(format!("{WORKER_MANIFEST}.{}", std::process::id()));
|
||||
std::fs::write(&tmp, names.join("\n"))
|
||||
.map_err(|e| format!("javac-pool: write manifest: {e}"))?;
|
||||
std::fs::rename(&tmp, &manifest).map_err(|e| {
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
format!("javac-pool: publish manifest: {e}")
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// True when `dir` holds a complete, non-empty class set built from the
|
||||
/// current embedded `WORKER_SOURCE`: the source matches, the manifest is
|
||||
/// present, and every class the manifest names exists and is non-empty.
|
||||
fn worker_class_ready(dir: &Path) -> bool {
|
||||
if std::fs::read_to_string(dir.join(WORKER_FILENAME))
|
||||
.ok()
|
||||
.as_deref()
|
||||
!= Some(WORKER_SOURCE)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// The manifest is written last by `publish_class_set`, so its presence
|
||||
// is the "publish finished" barrier: a reader that sees it knows no
|
||||
// peer is mid-rename. Absence forces the cross-process lock path.
|
||||
if std::fs::metadata(dir.join(WORKER_MANIFEST)).is_err() {
|
||||
return false;
|
||||
}
|
||||
// Completeness is judged against the fixed required set, never against
|
||||
// the manifest's lines -- a stale or partial manifest must not be able
|
||||
// to vouch for classes it simply fails to name.
|
||||
for name in WORKER_CLASS_FILES {
|
||||
let present = std::fs::metadata(dir.join(name))
|
||||
.map(|m| m.is_file() && m.len() > 0)
|
||||
.unwrap_or(false);
|
||||
if !present {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Cross-process advisory lock guarding the shared bootstrap dir's
|
||||
/// compile-and-publish step. The held lock file lives at
|
||||
/// `<dir>/.bootstrap.lock`; the `flock(2)` releases when the guard (and
|
||||
/// thus the file) drops.
|
||||
struct BootstrapLock {
|
||||
_file: File,
|
||||
}
|
||||
|
||||
impl BootstrapLock {
|
||||
fn acquire(dir: &Path) -> Result<Self, String> {
|
||||
let lock_path = dir.join(".bootstrap.lock");
|
||||
let file = OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(false)
|
||||
.open(&lock_path)
|
||||
.map_err(|e| format!("javac-pool: open bootstrap lock: {e}"))?;
|
||||
lock_file_exclusive(&file).map_err(|e| format!("javac-pool: bootstrap lock: {e}"))?;
|
||||
Ok(BootstrapLock { _file: file })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn lock_file_exclusive(file: &File) -> std::io::Result<()> {
|
||||
use std::os::fd::AsRawFd;
|
||||
|
||||
unsafe extern "C" {
|
||||
fn flock(fd: i32, operation: i32) -> i32;
|
||||
}
|
||||
const LOCK_EX: i32 = 2;
|
||||
loop {
|
||||
// SAFETY: `file.as_raw_fd()` is a live fd owned by `file`; `flock`
|
||||
// only reads the scalar args and we check the return value.
|
||||
let ret = unsafe { flock(file.as_raw_fd(), LOCK_EX) };
|
||||
if ret == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
let err = std::io::Error::last_os_error();
|
||||
if err.kind() == std::io::ErrorKind::Interrupted {
|
||||
continue;
|
||||
}
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn lock_file_exclusive(_file: &File) -> std::io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn spawn_worker(dir: &Path) -> Result<Worker, String> {
|
||||
let java = std::env::var("NYX_JAVA_BIN").unwrap_or_else(|_| "java".to_owned());
|
||||
let mut child = Command::new(&java)
|
||||
// The worker is tiny -- keep the JVM frugal so the pool
|
||||
// overhead stays well below the per-finding cost it
|
||||
// replaces.
|
||||
.arg("-Xss256k")
|
||||
.arg("-XX:+UseSerialGC")
|
||||
.arg("-cp")
|
||||
.arg(dir)
|
||||
.arg(WORKER_CLASS)
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.env_clear()
|
||||
.env("PATH", std::env::var("PATH").unwrap_or_default())
|
||||
.env("HOME", std::env::var("HOME").unwrap_or_default())
|
||||
.spawn()
|
||||
.map_err(|e| format!("javac-pool: spawn java: {e}"))?;
|
||||
|
||||
let stdin = child
|
||||
.stdin
|
||||
.take()
|
||||
.ok_or_else(|| "javac-pool: missing stdin".to_owned())?;
|
||||
let stdout = child
|
||||
.stdout
|
||||
.take()
|
||||
.ok_or_else(|| "javac-pool: missing stdout".to_owned())?;
|
||||
let mut stdout = BufReader::new(stdout);
|
||||
|
||||
let banner =
|
||||
match read_line_with_timeout(&mut child, &mut stdout, WORKER_READY_TIMEOUT, "read banner")?
|
||||
{
|
||||
Some(line) => line,
|
||||
None => {
|
||||
let _ = child.kill();
|
||||
let stderr_tail = drain_stderr(&mut child);
|
||||
return Err(format!(
|
||||
"javac-pool: worker closed stdout before readiness; stderr: {stderr_tail}",
|
||||
));
|
||||
}
|
||||
};
|
||||
if !banner.contains("\"ready\":true") {
|
||||
// Drain stderr for diagnostic context, then bail.
|
||||
let _ = child.kill();
|
||||
let stderr_tail = drain_stderr(&mut child);
|
||||
return Err(format!(
|
||||
"javac-pool: worker did not announce readiness; got {banner:?}; stderr: {stderr_tail}",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(Worker {
|
||||
child,
|
||||
stdin,
|
||||
stdout,
|
||||
next_id: 0,
|
||||
})
|
||||
}
|
||||
|
||||
fn drain_stderr(child: &mut Child) -> String {
|
||||
use std::io::Read;
|
||||
let mut buf = String::new();
|
||||
if let Some(mut e) = child.stderr.take() {
|
||||
// Best-effort, non-blocking-ish.
|
||||
let _ = e.read_to_string(&mut buf);
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
fn read_line_with_timeout(
|
||||
child: &mut Child,
|
||||
stdout: &mut BufReader<ChildStdout>,
|
||||
timeout: Duration,
|
||||
context: &str,
|
||||
) -> Result<Option<String>, String> {
|
||||
let (tx, rx) = mpsc::channel();
|
||||
thread::scope(|scope| {
|
||||
scope.spawn(move || {
|
||||
let mut line = String::new();
|
||||
let result = stdout.read_line(&mut line).map(|n| (n, line));
|
||||
let _ = tx.send(result);
|
||||
});
|
||||
match rx.recv_timeout(timeout) {
|
||||
Ok(Ok((0, _))) => Ok(None),
|
||||
Ok(Ok((_n, line))) => Ok(Some(line)),
|
||||
Ok(Err(e)) => Err(format!("javac-pool: {context} failed: {e}")),
|
||||
Err(mpsc::RecvTimeoutError::Timeout) => {
|
||||
let _ = child.kill();
|
||||
Err(format!("javac-pool: {context} timed out after {timeout:?}"))
|
||||
}
|
||||
Err(mpsc::RecvTimeoutError::Disconnected) => {
|
||||
Err(format!("javac-pool: {context} reader disconnected"))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn build_request(id: u64, workdir: &Path, args: &[String]) -> String {
|
||||
let mut s = String::with_capacity(128 + args.iter().map(|a| a.len() + 4).sum::<usize>());
|
||||
s.push_str("{\"id\":\"");
|
||||
s.push_str(&id.to_string());
|
||||
s.push_str("\",\"cwd\":");
|
||||
append_json_string(&mut s, &workdir.to_string_lossy());
|
||||
s.push_str(",\"args\":[");
|
||||
for (i, a) in args.iter().enumerate() {
|
||||
if i > 0 {
|
||||
s.push(',');
|
||||
}
|
||||
append_json_string(&mut s, a);
|
||||
}
|
||||
s.push_str("]}\n");
|
||||
s
|
||||
}
|
||||
|
||||
fn append_json_string(out: &mut String, s: &str) {
|
||||
out.push('"');
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'\\' => out.push_str("\\\\"),
|
||||
'"' => out.push_str("\\\""),
|
||||
'\n' => out.push_str("\\n"),
|
||||
'\r' => out.push_str("\\r"),
|
||||
'\t' => out.push_str("\\t"),
|
||||
c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
|
||||
c => out.push(c),
|
||||
}
|
||||
}
|
||||
out.push('"');
|
||||
}
|
||||
|
||||
/// Extract `(success, stderr)` from a worker JSON response line.
|
||||
fn parse_response(line: &str) -> Option<(bool, String)> {
|
||||
let response: JavacWorkerResponse = serde_json::from_str(line).ok()?;
|
||||
let stderr =
|
||||
decode_b64(&response.stderr_b64).unwrap_or_else(|| "<unable to decode stderr>".to_owned());
|
||||
Some((response.success, stderr))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct JavacWorkerResponse {
|
||||
success: bool,
|
||||
#[serde(default)]
|
||||
stderr_b64: String,
|
||||
}
|
||||
|
||||
/// Tiny RFC 4648 base64 decoder. Used only for the worker's
|
||||
/// `stderr_b64` field so we can carry raw bytes through the JSON
|
||||
/// envelope without dragging in a base64 crate.
|
||||
fn decode_b64(s: &str) -> Option<String> {
|
||||
static ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
let mut lookup = [0xffu8; 256];
|
||||
for (i, &b) in ALPHABET.iter().enumerate() {
|
||||
lookup[b as usize] = i as u8;
|
||||
}
|
||||
let bytes: Vec<u8> = s.bytes().filter(|b| !b.is_ascii_whitespace()).collect();
|
||||
let mut out = Vec::with_capacity(bytes.len() / 4 * 3);
|
||||
for chunk in bytes.chunks(4) {
|
||||
if chunk.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
let mut vals = [0u8; 4];
|
||||
let mut pads = 0;
|
||||
for (i, &b) in chunk.iter().enumerate() {
|
||||
if b == b'=' {
|
||||
pads += 1;
|
||||
vals[i] = 0;
|
||||
} else {
|
||||
let v = lookup[b as usize];
|
||||
if v == 0xff {
|
||||
return None;
|
||||
}
|
||||
vals[i] = v;
|
||||
}
|
||||
}
|
||||
let triple = ((vals[0] as u32) << 18)
|
||||
| ((vals[1] as u32) << 12)
|
||||
| ((vals[2] as u32) << 6)
|
||||
| (vals[3] as u32);
|
||||
out.push(((triple >> 16) & 0xff) as u8);
|
||||
if pads < 2 {
|
||||
out.push(((triple >> 8) & 0xff) as u8);
|
||||
}
|
||||
if pads < 1 {
|
||||
out.push((triple & 0xff) as u8);
|
||||
}
|
||||
}
|
||||
String::from_utf8(out).ok()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn request_envelope_escapes_specials() {
|
||||
let s = build_request(
|
||||
7,
|
||||
Path::new("/tmp/x"),
|
||||
&["a\"b".to_owned(), "c\\d".to_owned()],
|
||||
);
|
||||
assert!(s.contains("\"id\":\"7\""));
|
||||
assert!(s.contains("\"cwd\":\"/tmp/x\""));
|
||||
assert!(s.contains("\"a\\\"b\""));
|
||||
assert!(s.contains("\"c\\\\d\""));
|
||||
assert!(s.ends_with("]}\n"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_response_success() {
|
||||
let (ok, err) =
|
||||
parse_response("{\"id\":\"0\",\"success\":true,\"stderr_b64\":\"\"}\n").unwrap();
|
||||
assert!(ok);
|
||||
assert!(err.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_response_failure_decodes_stderr() {
|
||||
// "boom" -> base64 "Ym9vbQ=="
|
||||
let (ok, err) =
|
||||
parse_response("{\"id\":\"1\",\"success\":false,\"stderr_b64\":\"Ym9vbQ==\"}\n")
|
||||
.unwrap();
|
||||
assert!(!ok);
|
||||
assert_eq!(err, "boom");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_response_rejects_off_shape() {
|
||||
assert!(parse_response("not json").is_none());
|
||||
// Missing success field.
|
||||
assert!(parse_response("{\"id\":\"0\",\"stderr_b64\":\"\"}").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_response_accepts_reordered_fields() {
|
||||
let (ok, err) =
|
||||
parse_response("{\"stderr_b64\":\"YQ==\",\"success\":true,\"id\":\"7\"}\n").unwrap();
|
||||
assert!(ok);
|
||||
assert_eq!(err, "a");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn b64_decode_roundtrip() {
|
||||
for (raw, encoded) in &[
|
||||
("", ""),
|
||||
("a", "YQ=="),
|
||||
("ab", "YWI="),
|
||||
("abc", "YWJj"),
|
||||
("hello world", "aGVsbG8gd29ybGQ="),
|
||||
] {
|
||||
assert_eq!(decode_b64(encoded).as_deref(), Some(*raw));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn worker_class_ready_rejects_truncated_or_mismatched() {
|
||||
let tmp = tempfile::TempDir::new().unwrap();
|
||||
let dir = tmp.path();
|
||||
let src = dir.join(WORKER_FILENAME);
|
||||
let main_class = dir.join(format!("{WORKER_CLASS}.class"));
|
||||
let parser = dir.join(format!("{WORKER_CLASS}$Parser.class"));
|
||||
let request = dir.join(format!("{WORKER_CLASS}$Request.class"));
|
||||
let manifest = dir.join(WORKER_MANIFEST);
|
||||
let manifest_body = format!(
|
||||
"{WORKER_CLASS}.class\n{WORKER_CLASS}$Parser.class\n{WORKER_CLASS}$Request.class"
|
||||
);
|
||||
|
||||
// Nothing on disk yet.
|
||||
assert!(!worker_class_ready(dir));
|
||||
|
||||
// Matching source but no class / manifest.
|
||||
std::fs::write(&src, WORKER_SOURCE).unwrap();
|
||||
assert!(!worker_class_ready(dir));
|
||||
|
||||
// Top-level class + manifest present but the nested classes are
|
||||
// missing -- the stale-cache shape an older binary left behind.
|
||||
std::fs::write(&main_class, b"\xca\xfe\xba\xbe").unwrap();
|
||||
std::fs::write(&manifest, &manifest_body).unwrap();
|
||||
assert!(!worker_class_ready(dir));
|
||||
|
||||
// A zero-byte nested class (the corruption shape a racing peer can
|
||||
// leave behind) must not count as ready.
|
||||
std::fs::write(&parser, b"").unwrap();
|
||||
std::fs::write(&request, b"\xca\xfe\xba\xbe").unwrap();
|
||||
assert!(!worker_class_ready(dir));
|
||||
|
||||
// Every required class non-empty with matching source is ready.
|
||||
std::fs::write(&parser, b"\xca\xfe\xba\xbe").unwrap();
|
||||
assert!(worker_class_ready(dir));
|
||||
|
||||
// A missing manifest invalidates an otherwise-complete class set.
|
||||
std::fs::remove_file(&manifest).unwrap();
|
||||
assert!(!worker_class_ready(dir));
|
||||
std::fs::write(&manifest, &manifest_body).unwrap();
|
||||
assert!(worker_class_ready(dir));
|
||||
|
||||
// Stale source invalidates an otherwise-present class set.
|
||||
std::fs::write(&src, "// not the worker source").unwrap();
|
||||
assert!(!worker_class_ready(dir));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn worker_class_ready_rejects_manifest_that_omits_nested_classes() {
|
||||
// The exact stale-cache shape that produced
|
||||
// `nyx-javac-worker: parse error: NyxJavacWorker$Parser` on Linux:
|
||||
// a self-consistent manifest that simply does not name the nested
|
||||
// classes, with only the top-level class on disk. The old guard
|
||||
// iterated the manifest's lines and so trusted this; readiness must
|
||||
// now reject it because the fixed required set is incomplete.
|
||||
let tmp = tempfile::TempDir::new().unwrap();
|
||||
let dir = tmp.path();
|
||||
std::fs::write(dir.join(WORKER_FILENAME), WORKER_SOURCE).unwrap();
|
||||
std::fs::write(
|
||||
dir.join(format!("{WORKER_CLASS}.class")),
|
||||
b"\xca\xfe\xba\xbe",
|
||||
)
|
||||
.unwrap();
|
||||
// Manifest names only the top-level class -- exactly what poisoned
|
||||
// the persisted bootstrap cache.
|
||||
std::fs::write(dir.join(WORKER_MANIFEST), format!("{WORKER_CLASS}.class")).unwrap();
|
||||
assert!(
|
||||
!worker_class_ready(dir),
|
||||
"a manifest omitting the nested classes must not satisfy readiness",
|
||||
);
|
||||
|
||||
// Drop in the nested classes the worker actually loads -> ready.
|
||||
std::fs::write(
|
||||
dir.join(format!("{WORKER_CLASS}$Parser.class")),
|
||||
b"\xca\xfe\xba\xbe",
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::write(
|
||||
dir.join(format!("{WORKER_CLASS}$Request.class")),
|
||||
b"\xca\xfe\xba\xbe",
|
||||
)
|
||||
.unwrap();
|
||||
assert!(worker_class_ready(dir));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ensure_worker_compiled_heals_partial_cache() {
|
||||
// End-to-end heal: seed the exact poisoned-cache shape that broke
|
||||
// Linux (top-level class + a one-line manifest, nested classes
|
||||
// absent) and confirm `ensure_worker_compiled` recompiles a full,
|
||||
// loadable class set instead of trusting the stale manifest.
|
||||
let javac = std::env::var("NYX_JAVAC_BIN").unwrap_or_else(|_| "javac".to_owned());
|
||||
let have_javac = std::process::Command::new(&javac)
|
||||
.arg("-version")
|
||||
.output()
|
||||
.map(|o| o.status.success())
|
||||
.unwrap_or(false);
|
||||
if !have_javac {
|
||||
return; // No JDK on this host: nothing to recompile with.
|
||||
}
|
||||
let tmp = tempfile::TempDir::new().unwrap();
|
||||
let dir = tmp.path();
|
||||
std::fs::write(dir.join(WORKER_FILENAME), WORKER_SOURCE).unwrap();
|
||||
std::fs::write(
|
||||
dir.join(format!("{WORKER_CLASS}.class")),
|
||||
b"\xca\xfe\xba\xbe",
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::write(dir.join(WORKER_MANIFEST), format!("{WORKER_CLASS}.class")).unwrap();
|
||||
assert!(
|
||||
!worker_class_ready(dir),
|
||||
"poisoned cache must read not-ready"
|
||||
);
|
||||
|
||||
ensure_worker_compiled(dir).expect("recompile heals the cache");
|
||||
|
||||
assert!(worker_class_ready(dir), "healed cache must read ready");
|
||||
for cls in WORKER_CLASS_FILES {
|
||||
let meta = std::fs::metadata(dir.join(cls)).expect("class published");
|
||||
assert!(meta.len() > 0, "{cls} must be a real (non-empty) class");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn worker_class_files_match_javac_output() {
|
||||
// Guards `WORKER_CLASS_FILES` against drift: compile the embedded
|
||||
// worker source and assert the emitted `.class` set is exactly the
|
||||
// pinned required set, so a future nested type added to the worker
|
||||
// can't silently fall outside the readiness gate.
|
||||
let javac = std::env::var("NYX_JAVAC_BIN").unwrap_or_else(|_| "javac".to_owned());
|
||||
let have_javac = std::process::Command::new(&javac)
|
||||
.arg("-version")
|
||||
.output()
|
||||
.map(|o| o.status.success())
|
||||
.unwrap_or(false);
|
||||
if !have_javac {
|
||||
return; // JRE-only / no JDK: nothing to compile against.
|
||||
}
|
||||
let tmp = tempfile::TempDir::new().unwrap();
|
||||
let src = tmp.path().join(WORKER_FILENAME);
|
||||
std::fs::write(&src, WORKER_SOURCE).unwrap();
|
||||
let out = tmp.path().join("out");
|
||||
std::fs::create_dir_all(&out).unwrap();
|
||||
let status = std::process::Command::new(&javac)
|
||||
.arg("-encoding")
|
||||
.arg("UTF-8")
|
||||
.arg("-d")
|
||||
.arg(&out)
|
||||
.arg(&src)
|
||||
.status()
|
||||
.expect("spawn javac");
|
||||
assert!(status.success(), "worker source must compile");
|
||||
|
||||
let mut emitted: Vec<String> = std::fs::read_dir(&out)
|
||||
.unwrap()
|
||||
.filter_map(|e| e.ok())
|
||||
.map(|e| e.file_name().to_string_lossy().into_owned())
|
||||
.filter(|n| n.ends_with(".class"))
|
||||
.collect();
|
||||
emitted.sort();
|
||||
let mut expected: Vec<String> =
|
||||
WORKER_CLASS_FILES.iter().map(|s| (*s).to_owned()).collect();
|
||||
expected.sort();
|
||||
assert_eq!(
|
||||
emitted, expected,
|
||||
"WORKER_CLASS_FILES must mirror the worker's javac output",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn publish_class_set_moves_every_class_and_writes_manifest() {
|
||||
let tmp = tempfile::TempDir::new().unwrap();
|
||||
let dir = tmp.path();
|
||||
let staging = dir.join(".compile-test");
|
||||
std::fs::create_dir_all(&staging).unwrap();
|
||||
// Simulate javac output: top-level + nested classes plus a
|
||||
// non-class artifact that must be ignored.
|
||||
std::fs::write(staging.join("NyxJavacWorker.class"), b"\xca\xfe\xba\xbe").unwrap();
|
||||
std::fs::write(
|
||||
staging.join("NyxJavacWorker$Parser.class"),
|
||||
b"\xca\xfe\xba\xbe",
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::write(
|
||||
staging.join("NyxJavacWorker$Request.class"),
|
||||
b"\xca\xfe\xba\xbe",
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::write(staging.join("notes.txt"), b"ignore me").unwrap();
|
||||
|
||||
publish_class_set(&staging, dir).expect("publish");
|
||||
|
||||
for cls in [
|
||||
"NyxJavacWorker.class",
|
||||
"NyxJavacWorker$Parser.class",
|
||||
"NyxJavacWorker$Request.class",
|
||||
] {
|
||||
assert!(dir.join(cls).is_file(), "{cls} must be published");
|
||||
}
|
||||
// The non-class file stays in staging (not published).
|
||||
assert!(!dir.join("notes.txt").exists());
|
||||
|
||||
let manifest = std::fs::read_to_string(dir.join(WORKER_MANIFEST)).unwrap();
|
||||
let listed: Vec<&str> = manifest.lines().collect();
|
||||
assert_eq!(listed.len(), 3, "manifest lists all 3 classes: {listed:?}");
|
||||
assert!(listed.contains(&"NyxJavacWorker$Parser.class"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bootstrap_lock_is_reentrant_across_sequential_acquires() {
|
||||
// The flock is released when the guard drops, so back-to-back
|
||||
// acquires from the same process succeed without deadlock.
|
||||
let dir = tempfile::TempDir::new().unwrap();
|
||||
{
|
||||
let _g = BootstrapLock::acquire(dir.path()).expect("first acquire");
|
||||
}
|
||||
let _g = BootstrapLock::acquire(dir.path()).expect("second acquire");
|
||||
assert!(dir.path().join(".bootstrap.lock").exists());
|
||||
}
|
||||
}
|
||||
256
src/dynamic/build_pool/java_worker/NyxJavacWorker.java
Normal file
256
src/dynamic/build_pool/java_worker/NyxJavacWorker.java
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
//
|
||||
// Long-lived javac worker bundled with nyx-scanner. The Rust pool side
|
||||
// compiles + spawns this once per toolchain id; subsequent harness
|
||||
// compiles run in-process via ToolProvider#getSystemJavaCompiler so the
|
||||
// JVM cold-start cost is amortised across every harness in a verify run.
|
||||
//
|
||||
// Wire format: newline-terminated UTF-8 JSON, one request per line:
|
||||
// {"id":"0","cwd":"/path/to/workdir","args":["-d","/tmp/x","Foo.java"]}\n
|
||||
//
|
||||
// Response: newline-terminated UTF-8 JSON, one per request:
|
||||
// {"id":"0","success":true,"stderr_b64":"<base64 of javac stderr>"}\n
|
||||
//
|
||||
// stderr is base64-encoded so it never embeds raw newlines or quotes
|
||||
// inside the JSON envelope -- keeps the parser on both sides tiny.
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.PrintStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Base64;
|
||||
import java.util.List;
|
||||
import javax.tools.JavaCompiler;
|
||||
import javax.tools.ToolProvider;
|
||||
|
||||
public class NyxJavacWorker {
|
||||
public static void main(String[] argv) throws Exception {
|
||||
JavaCompiler compiler = ToolProvider.getSystemJavaCompiler();
|
||||
if (compiler == null) {
|
||||
// JRE without javac (rare on dev boxes, possible on slim CI
|
||||
// images). Signal the Rust side so it falls back to the
|
||||
// direct-spawn legacy path.
|
||||
System.err.println("nyx-javac-worker: no system Java compiler (JRE-only install?)");
|
||||
System.exit(2);
|
||||
}
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
|
||||
PrintStream out = new PrintStream(System.out, true, StandardCharsets.UTF_8);
|
||||
// Banner line. The Rust side reads this first so it knows the
|
||||
// worker is live before it queues any compile requests.
|
||||
out.println("{\"ready\":true}");
|
||||
out.flush();
|
||||
|
||||
String line;
|
||||
while ((line = in.readLine()) != null) {
|
||||
line = line.trim();
|
||||
if (line.isEmpty()) continue;
|
||||
Request req;
|
||||
try {
|
||||
req = parse(line);
|
||||
} catch (Throwable t) {
|
||||
// Malformed request -- emit an error response keyed on
|
||||
// an empty id so the Rust side can at least surface it.
|
||||
writeResponse(out, "", false, ("nyx-javac-worker: parse error: " + t.getMessage()).getBytes(StandardCharsets.UTF_8));
|
||||
continue;
|
||||
}
|
||||
ByteArrayOutputStream errBuf = new ByteArrayOutputStream();
|
||||
PrintStream errStream = new PrintStream(errBuf, true, StandardCharsets.UTF_8);
|
||||
int rc;
|
||||
try {
|
||||
String[] args = req.args.toArray(new String[0]);
|
||||
if (req.cwd != null && !req.cwd.isEmpty()) {
|
||||
// The JDK compiler API has no per-task cwd switch,
|
||||
// so we rewrite relative args. The harness build
|
||||
// already supplies absolute paths via the Rust side,
|
||||
// but we still set user.dir defensively so any
|
||||
// relative -d / -cp / source-path entries resolve
|
||||
// against the requested workdir rather than the
|
||||
// worker JVM's launch directory.
|
||||
System.setProperty("user.dir", req.cwd);
|
||||
}
|
||||
rc = compiler.run(null, null, errStream, args);
|
||||
} catch (Throwable t) {
|
||||
t.printStackTrace(errStream);
|
||||
rc = 1;
|
||||
}
|
||||
boolean success = (rc == 0);
|
||||
writeResponse(out, req.id, success, errBuf.toByteArray());
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeResponse(PrintStream out, String id, boolean success, byte[] stderr) {
|
||||
String b64 = Base64.getEncoder().encodeToString(stderr);
|
||||
StringBuilder sb = new StringBuilder(64 + b64.length());
|
||||
sb.append("{\"id\":");
|
||||
appendJsonString(sb, id);
|
||||
sb.append(",\"success\":").append(success);
|
||||
sb.append(",\"stderr_b64\":\"").append(b64).append("\"}");
|
||||
out.println(sb);
|
||||
out.flush();
|
||||
}
|
||||
|
||||
private static void appendJsonString(StringBuilder sb, String s) {
|
||||
sb.append('"');
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char c = s.charAt(i);
|
||||
switch (c) {
|
||||
case '\\': sb.append("\\\\"); break;
|
||||
case '"': sb.append("\\\""); break;
|
||||
case '\n': sb.append("\\n"); break;
|
||||
case '\r': sb.append("\\r"); break;
|
||||
case '\t': sb.append("\\t"); break;
|
||||
default:
|
||||
if (c < 0x20) {
|
||||
sb.append(String.format("\\u%04x", (int) c));
|
||||
} else {
|
||||
sb.append(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
sb.append('"');
|
||||
}
|
||||
|
||||
private static final class Request {
|
||||
String id = "";
|
||||
String cwd = "";
|
||||
List<String> args = new ArrayList<>();
|
||||
}
|
||||
|
||||
private static Request parse(String s) {
|
||||
Parser p = new Parser(s);
|
||||
Request r = new Request();
|
||||
p.skipWs();
|
||||
p.expect('{');
|
||||
p.skipWs();
|
||||
if (p.peek() == '}') {
|
||||
p.next();
|
||||
return r;
|
||||
}
|
||||
while (true) {
|
||||
p.skipWs();
|
||||
String key = p.parseString();
|
||||
p.skipWs();
|
||||
p.expect(':');
|
||||
p.skipWs();
|
||||
if (key.equals("id")) {
|
||||
r.id = p.parseString();
|
||||
} else if (key.equals("cwd")) {
|
||||
r.cwd = p.parseString();
|
||||
} else if (key.equals("args")) {
|
||||
p.expect('[');
|
||||
p.skipWs();
|
||||
if (p.peek() != ']') {
|
||||
while (true) {
|
||||
p.skipWs();
|
||||
r.args.add(p.parseString());
|
||||
p.skipWs();
|
||||
if (p.peek() == ',') { p.next(); continue; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
p.skipWs();
|
||||
p.expect(']');
|
||||
} else {
|
||||
skipValue(p);
|
||||
}
|
||||
p.skipWs();
|
||||
if (p.peek() == ',') { p.next(); continue; }
|
||||
break;
|
||||
}
|
||||
p.skipWs();
|
||||
p.expect('}');
|
||||
return r;
|
||||
}
|
||||
|
||||
private static void skipValue(Parser p) {
|
||||
p.skipWs();
|
||||
char c = p.peek();
|
||||
if (c == '"') { p.parseString(); }
|
||||
else if (c == '[') {
|
||||
p.next();
|
||||
p.skipWs();
|
||||
if (p.peek() != ']') {
|
||||
while (true) {
|
||||
skipValue(p); p.skipWs();
|
||||
if (p.peek() == ',') { p.next(); continue; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
p.skipWs();
|
||||
p.expect(']');
|
||||
} else if (c == '{') {
|
||||
p.next();
|
||||
p.skipWs();
|
||||
if (p.peek() != '}') {
|
||||
while (true) {
|
||||
p.skipWs();
|
||||
p.parseString();
|
||||
p.skipWs();
|
||||
p.expect(':');
|
||||
skipValue(p);
|
||||
p.skipWs();
|
||||
if (p.peek() == ',') { p.next(); continue; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
p.skipWs();
|
||||
p.expect('}');
|
||||
} else {
|
||||
int start = p.pos;
|
||||
while (p.pos < p.s.length() && "0123456789.-+eEtrufalsn".indexOf(p.s.charAt(p.pos)) >= 0) {
|
||||
p.pos++;
|
||||
}
|
||||
if (p.pos == start) {
|
||||
throw new RuntimeException("bad value at " + p.pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Parser {
|
||||
final String s; int pos = 0;
|
||||
Parser(String s) { this.s = s; }
|
||||
char peek() { return s.charAt(pos); }
|
||||
char next() { return s.charAt(pos++); }
|
||||
void skipWs() { while (pos < s.length() && Character.isWhitespace(s.charAt(pos))) pos++; }
|
||||
void expect(char c) {
|
||||
if (pos >= s.length() || s.charAt(pos) != c) {
|
||||
throw new RuntimeException("expected '" + c + "' at " + pos + " of " + s);
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
String parseString() {
|
||||
expect('"');
|
||||
StringBuilder sb = new StringBuilder();
|
||||
while (pos < s.length()) {
|
||||
char c = s.charAt(pos++);
|
||||
if (c == '"') return sb.toString();
|
||||
if (c == '\\') {
|
||||
char e = s.charAt(pos++);
|
||||
switch (e) {
|
||||
case '"': sb.append('"'); break;
|
||||
case '\\': sb.append('\\'); break;
|
||||
case '/': sb.append('/'); break;
|
||||
case 'b': sb.append('\b'); break;
|
||||
case 'f': sb.append('\f'); break;
|
||||
case 'n': sb.append('\n'); break;
|
||||
case 'r': sb.append('\r'); break;
|
||||
case 't': sb.append('\t'); break;
|
||||
case 'u': {
|
||||
String hex = s.substring(pos, pos + 4);
|
||||
pos += 4;
|
||||
sb.append((char) Integer.parseInt(hex, 16));
|
||||
break;
|
||||
}
|
||||
default: throw new RuntimeException("bad escape \\" + e);
|
||||
}
|
||||
} else {
|
||||
sb.append(c);
|
||||
}
|
||||
}
|
||||
throw new RuntimeException("unterminated string");
|
||||
}
|
||||
}
|
||||
}
|
||||
340
src/dynamic/build_pool/mod.rs
Normal file
340
src/dynamic/build_pool/mod.rs
Normal file
|
|
@ -0,0 +1,340 @@
|
|||
//! Build pools: long-lived compiler / toolchain daemons shared across many
|
||||
//! per-finding harness builds.
|
||||
//!
|
||||
//! The naive `prepare_*` path in [`crate::dynamic::build_sandbox`] spawns a
|
||||
//! fresh `javac` / `tsc` / `cargo build` subprocess for every finding the
|
||||
//! verifier touches. Cold-start dominates the cost: `javac` alone burns
|
||||
//! ~700ms before it has read a single source. A 50-harness OWASP run pays
|
||||
//! that 50× — > 30s of pure JVM startup.
|
||||
//!
|
||||
//! A `BuildPool` is a long-running worker process (or in-process service)
|
||||
//! that compiles batches of harness sources in a single toolchain instance.
|
||||
//! The per-harness wall-clock collapses to milliseconds once the pool is
|
||||
//! warm.
|
||||
//!
|
||||
//! # Lifecycle
|
||||
//!
|
||||
//! `OnceLock<Arc<P>>` per toolchain id, lazily spawned on first request.
|
||||
//! Pools live for the rest of the process; the OS reaps them on exit.
|
||||
//! Crashes are non-fatal: callers fall back to the legacy direct-spawn path
|
||||
//! via [`BuildPool::is_healthy`] and a re-spawn on the next call.
|
||||
//!
|
||||
//! # Future-language plug-in
|
||||
//!
|
||||
//! Per-language sub-modules (`java.rs`, eventually `node.rs`, `python.rs`,
|
||||
//! …) implement the [`BuildPool`] trait. The harness build dispatcher in
|
||||
//! [`crate::dynamic::build_sandbox`] reads `NYX_DYNAMIC_BUILD_POOL` and
|
||||
//! routes each request to the matching pool when enabled.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
|
||||
pub mod c;
|
||||
pub mod cpp;
|
||||
pub mod go;
|
||||
pub mod java;
|
||||
pub mod node;
|
||||
pub mod php;
|
||||
pub mod python;
|
||||
pub mod ruby;
|
||||
pub mod rust;
|
||||
|
||||
/// Outcome of a single batched compile request.
|
||||
#[derive(Debug)]
|
||||
pub struct PoolCompileResult {
|
||||
/// `true` when the toolchain reported a clean compile.
|
||||
pub success: bool,
|
||||
/// Toolchain stderr — surfaced as `BuildError::BuildFailed` upstream
|
||||
/// when `success == false`.
|
||||
pub stderr: String,
|
||||
/// Wall-clock for the in-pool compile step (excludes any IPC / queue
|
||||
/// wait time). Useful for telemetry; callers may ignore.
|
||||
pub duration: Duration,
|
||||
}
|
||||
|
||||
/// Common contract for every per-language build pool.
|
||||
///
|
||||
/// Implementations are expected to be `Send + Sync` so an `Arc<dyn BuildPool>`
|
||||
/// can be cached in a static `OnceLock` and shared across rayon worker
|
||||
/// threads.
|
||||
pub trait BuildPool: Send + Sync {
|
||||
/// Stable identifier — used in log lines + telemetry so an operator
|
||||
/// can correlate a pool warmup with the harness that triggered it.
|
||||
fn name(&self) -> &'static str;
|
||||
|
||||
/// Compile every source file under `workdir` matching the pool's
|
||||
/// language convention. On success the toolchain has written
|
||||
/// artefacts back into `workdir` (or wherever the pool's contract
|
||||
/// dictates).
|
||||
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult;
|
||||
|
||||
/// Cheap health check — when this returns `false`, the harness build
|
||||
/// dispatcher falls back to the direct-spawn legacy path and tears
|
||||
/// down the cached handle so the next request triggers a re-spawn.
|
||||
fn is_healthy(&self) -> bool;
|
||||
}
|
||||
|
||||
/// Languages that ship a [`BuildPool`] implementation and are therefore
|
||||
/// enabled by default. Phase 22 shipped `java`; Phase 23 (Track O.1) adds
|
||||
/// the remaining eight, so every supported language now has a warm fast path
|
||||
/// unless an operator opts out via `NYX_DYNAMIC_BUILD_POOL=<lang>=0`.
|
||||
const POOL_ENABLED_LANGS: &[&str] = &[
|
||||
"java", "node", "python", "php", "ruby", "go", "rust", "c", "cpp",
|
||||
];
|
||||
|
||||
/// Parse the `NYX_DYNAMIC_BUILD_POOL` env var.
|
||||
///
|
||||
/// Format is a comma-separated list of `lang=bit` entries: `java=1,node=0`.
|
||||
/// A missing language returns the default: `true` for every language that
|
||||
/// ships a pool (see `POOL_ENABLED_LANGS`), `false` otherwise.
|
||||
pub fn is_pool_enabled(lang: &str) -> bool {
|
||||
let default = POOL_ENABLED_LANGS.contains(&lang);
|
||||
let raw = match std::env::var("NYX_DYNAMIC_BUILD_POOL") {
|
||||
Ok(v) => v,
|
||||
Err(_) => return default,
|
||||
};
|
||||
for entry in raw.split(',') {
|
||||
let entry = entry.trim();
|
||||
if entry.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let (k, v) = match entry.split_once('=') {
|
||||
Some(kv) => kv,
|
||||
None => continue,
|
||||
};
|
||||
if k.trim().eq_ignore_ascii_case(lang) {
|
||||
return matches!(v.trim(), "1" | "true" | "TRUE" | "yes" | "on");
|
||||
}
|
||||
}
|
||||
default
|
||||
}
|
||||
|
||||
/// Shared root for a pool's persistent caches (sccache dir, shared
|
||||
/// `GOCACHE`, opcache file-cache, Bootsnap cache, shared venvs, …).
|
||||
///
|
||||
/// Honours `NYX_BUILD_POOL_DIR` so tests can redirect the cache into a
|
||||
/// `TempDir`; otherwise falls back to the platform cache dir, mirroring
|
||||
/// the javac pool's layout under `dynamic/build-pool/`.
|
||||
///
|
||||
/// Returns `None` only when neither the env override nor a platform cache
|
||||
/// dir is available — callers treat that as "pool unavailable" and fall
|
||||
/// back to the legacy direct-spawn build path.
|
||||
pub(crate) fn pool_cache_dir(lang: &str, sub: &str) -> Option<PathBuf> {
|
||||
let custom = std::env::var("NYX_BUILD_POOL_DIR").ok().map(PathBuf::from);
|
||||
let base = if let Some(custom) = custom.clone() {
|
||||
custom
|
||||
} else {
|
||||
directories::ProjectDirs::from("dev", "nyx", "nyx")?
|
||||
.cache_dir()
|
||||
.join("dynamic")
|
||||
.join("build-pool")
|
||||
};
|
||||
let dir = base.join(lang).join(sub);
|
||||
if ensure_writable_dir(&dir).is_some() {
|
||||
return Some(dir);
|
||||
}
|
||||
if custom.is_some() {
|
||||
return None;
|
||||
}
|
||||
let fallback = std::env::temp_dir()
|
||||
.join("nyx")
|
||||
.join("dynamic")
|
||||
.join("build-pool")
|
||||
.join(lang)
|
||||
.join(sub);
|
||||
ensure_writable_dir(&fallback)
|
||||
}
|
||||
|
||||
fn ensure_writable_dir(dir: &Path) -> Option<PathBuf> {
|
||||
std::fs::create_dir_all(dir).ok()?;
|
||||
let probe = dir.join(format!(".nyx-write-probe-{}", std::process::id()));
|
||||
std::fs::write(&probe, b"ok").ok()?;
|
||||
let _ = std::fs::remove_file(probe);
|
||||
Some(dir.to_path_buf())
|
||||
}
|
||||
|
||||
/// Construct a `Command` for `bin` with a scrubbed environment, matching
|
||||
/// the isolation envelope every legacy `prepare_*` build uses: `env_clear`
|
||||
/// plus an inherited `PATH` + `HOME` only. Pools layer their cache env
|
||||
/// (`CARGO_TARGET_DIR`, `CCACHE_DIR`, `GOCACHE`, …) on top of this.
|
||||
pub(crate) fn base_command(bin: &str) -> Command {
|
||||
let mut cmd = Command::new(bin);
|
||||
let tmp = build_temp_dir();
|
||||
cmd.env_clear()
|
||||
.env("PATH", std::env::var("PATH").unwrap_or_default())
|
||||
.env("HOME", std::env::var("HOME").unwrap_or_default())
|
||||
.env("TMPDIR", &tmp)
|
||||
.env("TMP", &tmp)
|
||||
.env("TEMP", &tmp);
|
||||
cmd
|
||||
}
|
||||
|
||||
fn build_temp_dir() -> PathBuf {
|
||||
let dir = std::env::temp_dir().join("nyx-build-tmp");
|
||||
if std::fs::create_dir_all(&dir).is_ok() {
|
||||
return dir;
|
||||
}
|
||||
std::env::temp_dir()
|
||||
}
|
||||
|
||||
/// Hermetic Bundler / RubyGems environment pinned to a writable per-workdir
|
||||
/// vendor directory.
|
||||
///
|
||||
/// Points `GEM_HOME` and `BUNDLE_PATH` at `<workdir>/vendor/bundle` so every
|
||||
/// gem *install* lands in a directory the current user owns. This is the
|
||||
/// load-bearing fix for the harness build invoking `sudo`: legacy Bundler
|
||||
/// (1.x) shells out to `sudo` when the install target — the root-owned system
|
||||
/// gem dir (`/Library/Ruby/Gems/...`) — is not writable, which then blocks on
|
||||
/// a terminal password prompt (`sudo: a terminal is required to read the
|
||||
/// password`). With a writable target there is no privilege escalation and
|
||||
/// no prompt, ever.
|
||||
///
|
||||
/// `GEM_PATH` is deliberately left unset so RubyGems still includes the system
|
||||
/// gem path when *resolving* (paired with `BUNDLE_DISABLE_SHARED_GEMS=false`),
|
||||
/// letting an already-installed gem satisfy the Gemfile without a network
|
||||
/// fetch — while installs of missing gems still land in the writable vendor
|
||||
/// dir. `BUNDLE_APP_CONFIG` keeps Bundler's per-project config writable and
|
||||
/// inside the workdir.
|
||||
///
|
||||
/// Returned as env pairs (not applied to a `Command` here) so both the pooled
|
||||
/// path ([`ruby::RubyPool`]) and the legacy direct-spawn path
|
||||
/// ([`crate::dynamic::build_sandbox`]) layer them on identically. Setting
|
||||
/// these env vars is Bundler-version-agnostic: 1.x and 2.x both honour
|
||||
/// `BUNDLE_*` / `GEM_*`, unlike the 2.x-only `bundle config set` CLI the old
|
||||
/// path relied on (which is a silent no-op on 1.x, leaving the install target
|
||||
/// pointed at the system dir — the original root cause).
|
||||
pub(crate) fn ruby_hermetic_env(workdir: &Path) -> Vec<(&'static str, std::ffi::OsString)> {
|
||||
let gem_dir = workdir.join("vendor").join("bundle");
|
||||
let _ = std::fs::create_dir_all(&gem_dir);
|
||||
vec![
|
||||
("GEM_HOME", gem_dir.clone().into_os_string()),
|
||||
("BUNDLE_PATH", gem_dir.into_os_string()),
|
||||
("BUNDLE_DISABLE_SHARED_GEMS", "false".into()),
|
||||
("BUNDLE_FROZEN", "false".into()),
|
||||
(
|
||||
"BUNDLE_APP_CONFIG",
|
||||
workdir.join(".bundle").into_os_string(),
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
/// Merge a process's stdout and stderr into one diagnostic blob.
|
||||
///
|
||||
/// Some build tools split their failure diagnostics across streams — Bundler
|
||||
/// in particular prints "Could not find gem …" to stdout while only an
|
||||
/// unrelated RubyGems extension warning lands on stderr. Capturing both keeps
|
||||
/// the downstream host-limitation classifier from missing the real reason.
|
||||
pub(crate) fn combine_output(stdout: &[u8], stderr: &[u8]) -> String {
|
||||
let out = String::from_utf8_lossy(stdout);
|
||||
let err = String::from_utf8_lossy(stderr);
|
||||
match (out.trim().is_empty(), err.trim().is_empty()) {
|
||||
(true, _) => err.into_owned(),
|
||||
(false, true) => out.into_owned(),
|
||||
(false, false) => format!("{out}\n{err}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect a runnable `ccache` binary (honouring `NYX_CCACHE_BIN`). Shared
|
||||
/// by the C and C++ pools to front their compiler with the shared object
|
||||
/// cache; `None` means "compile bare", preserving legacy parity.
|
||||
pub(crate) fn detect_ccache() -> Option<String> {
|
||||
let bin = std::env::var("NYX_CCACHE_BIN").unwrap_or_else(|_| "ccache".to_owned());
|
||||
binary_runnable(&bin, "--version").then_some(bin)
|
||||
}
|
||||
|
||||
/// Cheap "is this binary runnable" probe used by every pool's
|
||||
/// [`BuildPool::is_healthy`] / `try_new`. Runs `bin <probe_arg>` with a
|
||||
/// scrubbed env and reports whether it exited 0.
|
||||
pub(crate) fn binary_runnable(bin: &str, probe_arg: &str) -> bool {
|
||||
base_command(bin)
|
||||
.arg(probe_arg)
|
||||
.stdin(std::process::Stdio::null())
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.status()
|
||||
.map(|s| s.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::Mutex;
|
||||
|
||||
static ENV_LOCK: Mutex<()> = Mutex::new(());
|
||||
|
||||
struct EnvGuard {
|
||||
prior: Option<String>,
|
||||
}
|
||||
|
||||
impl EnvGuard {
|
||||
fn set(value: Option<&str>) -> Self {
|
||||
let prior = std::env::var("NYX_DYNAMIC_BUILD_POOL").ok();
|
||||
match value {
|
||||
Some(v) => unsafe { std::env::set_var("NYX_DYNAMIC_BUILD_POOL", v) },
|
||||
None => unsafe { std::env::remove_var("NYX_DYNAMIC_BUILD_POOL") },
|
||||
}
|
||||
Self { prior }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for EnvGuard {
|
||||
fn drop(&mut self) {
|
||||
match self.prior.take() {
|
||||
Some(v) => unsafe { std::env::set_var("NYX_DYNAMIC_BUILD_POOL", v) },
|
||||
None => unsafe { std::env::remove_var("NYX_DYNAMIC_BUILD_POOL") },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_enables_every_shipped_pool() {
|
||||
let _l = ENV_LOCK.lock().unwrap();
|
||||
let _g = EnvGuard::set(None);
|
||||
for lang in POOL_ENABLED_LANGS {
|
||||
assert!(is_pool_enabled(lang), "{lang} pool must default on");
|
||||
}
|
||||
// A language with no pool stays off.
|
||||
assert!(!is_pool_enabled("cobol"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_override_disables_node() {
|
||||
let _l = ENV_LOCK.lock().unwrap();
|
||||
let _g = EnvGuard::set(Some("node=0"));
|
||||
assert!(!is_pool_enabled("node"));
|
||||
// Other languages keep their default-on state.
|
||||
assert!(is_pool_enabled("python"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_override_disables_java() {
|
||||
let _l = ENV_LOCK.lock().unwrap();
|
||||
let _g = EnvGuard::set(Some("java=0"));
|
||||
assert!(!is_pool_enabled("java"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_entry_parses_per_lang() {
|
||||
let _l = ENV_LOCK.lock().unwrap();
|
||||
let _g = EnvGuard::set(Some("java=1,node=1,python=0"));
|
||||
assert!(is_pool_enabled("java"));
|
||||
assert!(is_pool_enabled("node"));
|
||||
assert!(!is_pool_enabled("python"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn case_insensitive_keys() {
|
||||
let _l = ENV_LOCK.lock().unwrap();
|
||||
let _g = EnvGuard::set(Some("JAVA=0"));
|
||||
assert!(!is_pool_enabled("java"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_value_treated_as_disabled() {
|
||||
let _l = ENV_LOCK.lock().unwrap();
|
||||
let _g = EnvGuard::set(Some("java=maybe"));
|
||||
assert!(!is_pool_enabled("java"));
|
||||
}
|
||||
}
|
||||
87
src/dynamic/build_pool/node.rs
Normal file
87
src/dynamic/build_pool/node.rs
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
//! Node.js build pool (Phase 23 / Track O.1).
|
||||
//!
|
||||
//! `prepare_node` already snapshots `node_modules` per `package.json` hash.
|
||||
//! What it lacks is a shared npm download cache: a fresh lock hash re-downloads
|
||||
//! every tarball from cold.
|
||||
//!
|
||||
//! [`NodePool`] points `npm_config_cache` at the shared pool root so package
|
||||
//! tarballs are reused across lock hashes, collapsing a cold `npm install` to
|
||||
//! an unpack of already-fetched tarballs. TypeScript harnesses that do not
|
||||
//! need full type checking are run with `--experimental-strip-types` at
|
||||
//! execution time (the runner reads [`strip_types_flag`]); the pool itself only
|
||||
//! owns the install step.
|
||||
|
||||
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct NodePool {
|
||||
npm_bin: String,
|
||||
}
|
||||
|
||||
impl NodePool {
|
||||
pub fn try_new() -> Result<Self, String> {
|
||||
let npm_bin = std::env::var("NYX_NPM_BIN").unwrap_or_else(|_| "npm".to_owned());
|
||||
if !binary_runnable(&npm_bin, "--version") {
|
||||
return Err(format!("node-pool: {npm_bin} not runnable"));
|
||||
}
|
||||
Ok(NodePool { npm_bin })
|
||||
}
|
||||
}
|
||||
|
||||
/// The Node flag that lets a TS harness skip a full `tsc` compile when the
|
||||
/// spec does not need type checking. Surfaced as a free function so the
|
||||
/// runner can splice it into the harness exec without holding a pool handle.
|
||||
pub fn strip_types_flag() -> &'static str {
|
||||
"--experimental-strip-types"
|
||||
}
|
||||
|
||||
impl BuildPool for NodePool {
|
||||
fn name(&self) -> &'static str {
|
||||
"node"
|
||||
}
|
||||
|
||||
/// Install dependencies declared by `workdir/package.json` into
|
||||
/// `workdir/node_modules`. Args are unused.
|
||||
fn compile_batch(&self, workdir: &Path, _args: &[String]) -> PoolCompileResult {
|
||||
let start = Instant::now();
|
||||
let mut cmd = base_command(&self.npm_bin);
|
||||
cmd.args(["install", "--no-save", "--no-audit", "--no-fund"])
|
||||
.current_dir(workdir);
|
||||
if let Some(cache) = pool_cache_dir("node", "npm-cache") {
|
||||
cmd.env("npm_config_cache", cache);
|
||||
}
|
||||
|
||||
match cmd.output() {
|
||||
Ok(o) if o.status.success() => PoolCompileResult {
|
||||
success: true,
|
||||
stderr: String::new(),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
Ok(o) => PoolCompileResult {
|
||||
success: false,
|
||||
stderr: String::from_utf8_lossy(&o.stderr).into_owned(),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
Err(e) => PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("node-pool: npm install: {e}"),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn is_healthy(&self) -> bool {
|
||||
binary_runnable(&self.npm_bin, "--version")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn strip_types_flag_is_the_node_native_ts_flag() {
|
||||
assert_eq!(strip_types_flag(), "--experimental-strip-types");
|
||||
}
|
||||
}
|
||||
110
src/dynamic/build_pool/php.rs
Normal file
110
src/dynamic/build_pool/php.rs
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
//! PHP build pool (Phase 23 / Track O.1).
|
||||
//!
|
||||
//! Two warm caches keyed off the Composer lockfile:
|
||||
//! - `COMPOSER_CACHE_DIR` points at the shared pool root so package downloads
|
||||
//! are reused across lock hashes, and
|
||||
//! - an opcache file-cache directory is pre-warmed so the harness `php`
|
||||
//! process skips re-parsing the vendored sources on first run.
|
||||
//!
|
||||
//! Both degrade gracefully: a missing `composer` makes `try_new` fail and the
|
||||
//! caller falls back to the legacy
|
||||
//! [`crate::dynamic::build_sandbox::prepare_php`] path; a missing `php` simply
|
||||
//! skips the opcache warm (the install still succeeds).
|
||||
|
||||
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct PhpPool {
|
||||
composer_bin: String,
|
||||
}
|
||||
|
||||
impl PhpPool {
|
||||
pub fn try_new() -> Result<Self, String> {
|
||||
let composer_bin =
|
||||
std::env::var("NYX_COMPOSER_BIN").unwrap_or_else(|_| "composer".to_owned());
|
||||
if !binary_runnable(&composer_bin, "--version") {
|
||||
return Err(format!("php-pool: {composer_bin} not runnable"));
|
||||
}
|
||||
Ok(PhpPool { composer_bin })
|
||||
}
|
||||
}
|
||||
|
||||
impl BuildPool for PhpPool {
|
||||
fn name(&self) -> &'static str {
|
||||
"php"
|
||||
}
|
||||
|
||||
/// Install `composer.json` deps into `workdir/vendor` then warm the
|
||||
/// shared opcache file-cache. Args are unused.
|
||||
fn compile_batch(&self, workdir: &Path, _args: &[String]) -> PoolCompileResult {
|
||||
let start = Instant::now();
|
||||
let mut cmd = base_command(&self.composer_bin);
|
||||
cmd.args(["install", "--no-interaction", "--no-dev", "--prefer-dist"])
|
||||
.current_dir(workdir)
|
||||
.env("COMPOSER_ALLOW_SUPERUSER", "1");
|
||||
if let Some(cache) = pool_cache_dir("php", "composer-cache") {
|
||||
cmd.env("COMPOSER_CACHE_DIR", cache);
|
||||
}
|
||||
|
||||
match cmd.output() {
|
||||
Ok(o) if o.status.success() => {}
|
||||
Ok(o) => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: String::from_utf8_lossy(&o.stderr).into_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
Err(e) => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("php-pool: composer install: {e}"),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
warm_opcache(workdir);
|
||||
|
||||
PoolCompileResult {
|
||||
success: true,
|
||||
stderr: String::new(),
|
||||
duration: start.elapsed(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_healthy(&self) -> bool {
|
||||
binary_runnable(&self.composer_bin, "--version")
|
||||
}
|
||||
}
|
||||
|
||||
/// Best-effort opcache file-cache pre-warm: compile every vendored `.php`
|
||||
/// into the shared opcache file-cache so the harness `php` process boots with
|
||||
/// the bytecode already on disk. A missing `php` or partial failure is
|
||||
/// swallowed — the install already succeeded and opcache is a pure speed win.
|
||||
fn warm_opcache(workdir: &Path) {
|
||||
let vendor = workdir.join("vendor");
|
||||
if !vendor.exists() {
|
||||
return;
|
||||
}
|
||||
let php = std::env::var("NYX_PHP_BIN").unwrap_or_else(|_| "php".to_owned());
|
||||
let file_cache = match pool_cache_dir("php", "opcache") {
|
||||
Some(d) => d,
|
||||
None => return,
|
||||
};
|
||||
let _ = base_command(&php)
|
||||
.arg("-d")
|
||||
.arg("opcache.enable_cli=1")
|
||||
.arg("-d")
|
||||
.arg(format!("opcache.file_cache={}", file_cache.display()))
|
||||
.arg("-d")
|
||||
.arg("opcache.file_cache_only=1")
|
||||
.arg("-r")
|
||||
.arg(
|
||||
"foreach(new RecursiveIteratorIterator(new RecursiveDirectoryIterator('vendor')) \
|
||||
as $f){ if(substr($f,-4)==='.php'){ @opcache_compile_file($f); } }",
|
||||
)
|
||||
.current_dir(workdir)
|
||||
.output();
|
||||
}
|
||||
122
src/dynamic/build_pool/python.rs
Normal file
122
src/dynamic/build_pool/python.rs
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
//! Python build pool (Phase 23 / Track O.1).
|
||||
//!
|
||||
//! `prepare_python` already keys its venv on the requirements hash, so the
|
||||
//! venv itself is the "shared venv per `requirements_hash`". What the legacy
|
||||
//! path lacks is a warm bytecode cache: the first harness to import a package
|
||||
//! pays the `.py` -> `.pyc` compile.
|
||||
//!
|
||||
//! [`PythonPool`] runs `python -m compileall` over the venv's `site-packages`
|
||||
//! once at venv-creation time so every later harness import is a `__pycache__`
|
||||
//! hit. The pip download cache is pointed at the shared pool root so repeated
|
||||
//! installs across requirements hashes reuse wheels.
|
||||
|
||||
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct PythonPool;
|
||||
|
||||
impl PythonPool {
|
||||
pub fn try_new(python_bin: &str) -> Result<Self, String> {
|
||||
if !binary_runnable(python_bin, "--version") {
|
||||
return Err(format!("python-pool: {python_bin} not runnable"));
|
||||
}
|
||||
Ok(PythonPool)
|
||||
}
|
||||
}
|
||||
|
||||
impl BuildPool for PythonPool {
|
||||
fn name(&self) -> &'static str {
|
||||
"python"
|
||||
}
|
||||
|
||||
/// `args[0]` = venv path to create, `args[1]` = python interpreter binary.
|
||||
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
|
||||
let start = Instant::now();
|
||||
let venv_path = match args.first() {
|
||||
Some(v) => Path::new(v),
|
||||
None => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: "python-pool: missing venv path arg".to_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
let python = args.get(1).map(String::as_str).unwrap_or("python3");
|
||||
|
||||
// 1. Create the venv.
|
||||
let create = base_command(python)
|
||||
.args(["-m", "venv", "--clear", "--system-site-packages"])
|
||||
.arg(venv_path)
|
||||
.status();
|
||||
match create {
|
||||
Ok(s) if s.success() => {}
|
||||
Ok(s) => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("venv create failed: exit {s}"),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
Err(e) => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("python-pool: venv create: {e}"),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Install requirements with the shared wheel cache.
|
||||
let req_path = workdir.join("requirements.txt");
|
||||
if req_path.exists() {
|
||||
let pip = venv_path.join("bin").join("pip");
|
||||
let mut cmd = base_command(&pip.to_string_lossy());
|
||||
cmd.args(["install", "-r"]).arg(&req_path);
|
||||
if let Some(cache) = pool_cache_dir("python", "pip-cache") {
|
||||
cmd.env("PIP_CACHE_DIR", cache);
|
||||
} else {
|
||||
cmd.arg("--no-cache-dir");
|
||||
}
|
||||
match cmd.output() {
|
||||
Ok(o) if o.status.success() => {}
|
||||
Ok(o) => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: String::from_utf8_lossy(&o.stderr).into_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
Err(e) => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("python-pool: pip install: {e}"),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Warm __pycache__ for the whole venv (best-effort: a partial
|
||||
// failure to byte-compile one module must not fail the build).
|
||||
let venv_python = venv_path.join("bin").join("python");
|
||||
let _ = base_command(&venv_python.to_string_lossy())
|
||||
.args(["-m", "compileall", "-q"])
|
||||
.arg(venv_path)
|
||||
.output();
|
||||
|
||||
PoolCompileResult {
|
||||
success: true,
|
||||
stderr: String::new(),
|
||||
duration: start.elapsed(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_healthy(&self) -> bool {
|
||||
// The interpreter is resolved per-request via args; treat the pool as
|
||||
// always healthy and let an unrunnable interpreter surface as a build
|
||||
// error, which the dispatcher already falls back from.
|
||||
true
|
||||
}
|
||||
}
|
||||
120
src/dynamic/build_pool/ruby.rs
Normal file
120
src/dynamic/build_pool/ruby.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
//! Ruby build pool (Phase 23 / Track O.1).
|
||||
//!
|
||||
//! `prepare_ruby` already vendors gems per `Gemfile.lock` hash. What it lacks
|
||||
//! is a warm Bootsnap cache: the first harness to `require` a gem pays the
|
||||
//! load-path scan + compile.
|
||||
//!
|
||||
//! [`RubyPool`] points `BOOTSNAP_CACHE_DIR` at the shared pool root and runs
|
||||
//! `bundle install` with the shared gem cache. Bootsnap then persists its
|
||||
//! compiled require-cache across findings. Falls back to the legacy path when
|
||||
//! `bundle` is not runnable.
|
||||
|
||||
use super::{
|
||||
BuildPool, PoolCompileResult, base_command, binary_runnable, combine_output, pool_cache_dir,
|
||||
ruby_hermetic_env,
|
||||
};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct RubyPool {
|
||||
bundle_bin: String,
|
||||
}
|
||||
|
||||
impl RubyPool {
|
||||
pub fn try_new() -> Result<Self, String> {
|
||||
let bundle_bin = std::env::var("NYX_BUNDLE_BIN").unwrap_or_else(|_| "bundle".to_owned());
|
||||
if !binary_runnable(&bundle_bin, "--version") {
|
||||
return Err(format!("ruby-pool: {bundle_bin} not runnable"));
|
||||
}
|
||||
Ok(RubyPool { bundle_bin })
|
||||
}
|
||||
|
||||
fn bundle(&self, workdir: &Path) -> std::process::Command {
|
||||
let mut cmd = base_command(&self.bundle_bin);
|
||||
cmd.current_dir(workdir);
|
||||
// Writable gem target → no privilege escalation → never `sudo`.
|
||||
for (k, v) in ruby_hermetic_env(workdir) {
|
||||
cmd.env(k, v);
|
||||
}
|
||||
if let Some(cache) = pool_cache_dir("ruby", "bootsnap") {
|
||||
cmd.env("BOOTSNAP_CACHE_DIR", cache);
|
||||
}
|
||||
cmd
|
||||
}
|
||||
}
|
||||
|
||||
impl BuildPool for RubyPool {
|
||||
fn name(&self) -> &'static str {
|
||||
"ruby"
|
||||
}
|
||||
|
||||
/// Resolve `Gemfile` deps into `workdir/vendor/bundle`. Args are unused.
|
||||
fn compile_batch(&self, workdir: &Path, _args: &[String]) -> PoolCompileResult {
|
||||
let start = Instant::now();
|
||||
|
||||
// `bundle check` short-circuits when the host already has every gem.
|
||||
//
|
||||
// Run the check with the *runtime* environment — plain system gems, no
|
||||
// `GEM_HOME`/`BUNDLE_PATH` override. The harness is executed as
|
||||
// `ruby harness.rb`, whose `require 'bundler/setup'` resolves against
|
||||
// the system gem path, so the build-time check must consult that same
|
||||
// path to predict whether the run will succeed. The hermetic
|
||||
// `GEM_HOME` override (below) exists only to give `bundle install` a
|
||||
// writable, sudo-free target for *missing* gems; applying it to the
|
||||
// check breaks Bundler 1.x's ability to see an already-installed system
|
||||
// gem (e.g. `rack`), turning a satisfiable Gemfile into a spurious
|
||||
// BuildFailed.
|
||||
let mut check = base_command(&self.bundle_bin);
|
||||
check.current_dir(workdir);
|
||||
if let Some(cache) = pool_cache_dir("ruby", "bootsnap") {
|
||||
check.env("BOOTSNAP_CACHE_DIR", cache);
|
||||
}
|
||||
if let Ok(o) = check.arg("check").output()
|
||||
&& o.status.success()
|
||||
{
|
||||
return PoolCompileResult {
|
||||
success: true,
|
||||
stderr: String::new(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
|
||||
// The install target is pinned to a writable vendor dir via
|
||||
// `ruby_hermetic_env` (GEM_HOME / BUNDLE_PATH), so the legacy
|
||||
// `bundle config set --local path …` step is gone: it is 2.x-only
|
||||
// syntax that no-ops on Bundler 1.x (leaving the target pointed at
|
||||
// the root-owned system dir — the `sudo` root cause). `--local`
|
||||
// keeps the build offline: missing gems fail fast with a
|
||||
// host-limitation error instead of reaching for the network.
|
||||
let install = self
|
||||
.bundle(workdir)
|
||||
.args(["install", "--local", "--jobs", "4", "--retry", "0"])
|
||||
.output();
|
||||
match install {
|
||||
Ok(o) if o.status.success() => PoolCompileResult {
|
||||
success: true,
|
||||
stderr: String::new(),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
Ok(o) => PoolCompileResult {
|
||||
success: false,
|
||||
// Bundler prints its dependency-resolution diagnostics
|
||||
// ("Could not find gem '…' in any of the gem sources …") to
|
||||
// STDOUT, leaving only the RubyGems extension warning on
|
||||
// stderr. Combine both so the host-limitation classifier at
|
||||
// the verify boundary can see the real reason.
|
||||
stderr: combine_output(&o.stdout, &o.stderr),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
Err(e) => PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("ruby-pool: bundle install: {e}"),
|
||||
duration: start.elapsed(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn is_healthy(&self) -> bool {
|
||||
binary_runnable(&self.bundle_bin, "--version")
|
||||
}
|
||||
}
|
||||
369
src/dynamic/build_pool/rust.rs
Normal file
369
src/dynamic/build_pool/rust.rs
Normal file
|
|
@ -0,0 +1,369 @@
|
|||
//! Rust build pool (Phase 23 / Track O.1).
|
||||
//!
|
||||
//! The legacy [`crate::dynamic::build_sandbox::prepare_rust`] runs a fresh
|
||||
//! `cargo build --release` per finding with a per-workdir `target/`. Every
|
||||
//! harness therefore recompiles the (identical) harness scaffold and all of
|
||||
//! its dependencies from cold.
|
||||
//!
|
||||
//! [`RustPool`] keeps two warm caches keyed on the `Cargo.lock` hash:
|
||||
//! - a shared `CARGO_TARGET_DIR` so incremental artefacts survive across
|
||||
//! per-finding workdirs, and
|
||||
//! - `sccache` as `RUSTC_WRAPPER` when it is on `PATH`, which caches the
|
||||
//! per-crate `rustc` invocations across *different* lock hashes too.
|
||||
//!
|
||||
//! Both degrade gracefully: a missing `sccache` simply drops the wrapper and
|
||||
//! a fresh lock hash gets a fresh (empty) shared target dir. The compile
|
||||
//! itself is byte-for-byte the same `cargo build --release` the legacy path
|
||||
//! runs, so success / failure parity holds.
|
||||
|
||||
use super::{BuildPool, PoolCompileResult, base_command, binary_runnable, pool_cache_dir};
|
||||
use blake3::Hasher;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
pub struct RustPool {
|
||||
cargo_bin: String,
|
||||
/// `Some(path)` when an `sccache` binary is runnable. Wired in as
|
||||
/// `RUSTC_WRAPPER`; `None` falls back to plain `rustc`.
|
||||
sccache_bin: Option<String>,
|
||||
}
|
||||
|
||||
impl RustPool {
|
||||
pub fn try_new() -> Result<Self, String> {
|
||||
let cargo_bin = std::env::var("NYX_CARGO_BIN").unwrap_or_else(|_| "cargo".to_owned());
|
||||
if !binary_runnable(&cargo_bin, "--version") {
|
||||
return Err(format!("rust-pool: {cargo_bin} not runnable"));
|
||||
}
|
||||
let sccache_bin = detect_sccache();
|
||||
Ok(RustPool {
|
||||
cargo_bin,
|
||||
sccache_bin,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn detect_sccache() -> Option<String> {
|
||||
let bin = std::env::var("NYX_SCCACHE_BIN").unwrap_or_else(|_| "sccache".to_owned());
|
||||
binary_runnable(&bin, "--version").then_some(bin)
|
||||
}
|
||||
|
||||
impl BuildPool for RustPool {
|
||||
fn name(&self) -> &'static str {
|
||||
"rust"
|
||||
}
|
||||
|
||||
/// `args[0]` = absolute path the compiled `nyx_harness` binary must land
|
||||
/// at (the caller's cache slot).
|
||||
fn compile_batch(&self, workdir: &Path, args: &[String]) -> PoolCompileResult {
|
||||
let start = Instant::now();
|
||||
let dest = match args.first() {
|
||||
Some(d) => Path::new(d),
|
||||
None => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: "rust-pool: missing binary destination arg".to_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// Key the shared target dir on the manifest *and* every `src/` file,
|
||||
// not the manifest alone. Two fixtures built for the same cap share a
|
||||
// `Cargo.toml` (identical lock hash) but differ only in their source;
|
||||
// a manifest-only key routed both into the same `release/nyx_harness`
|
||||
// slot, letting cargo skip the second fixture's relink so the copy
|
||||
// below shipped the *first* fixture's binary — cross-fixture verdict
|
||||
// corruption (a vuln / benign pair confirming identically). Folding
|
||||
// the source hash in gives each distinct harness its own target dir.
|
||||
let build_hash = hash_build_inputs(workdir);
|
||||
let target_dir = match pool_cache_dir("rust", &build_hash) {
|
||||
Some(d) => d,
|
||||
None => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: "rust-pool: no shared target dir".to_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// Serialise build + copy across processes for this shared target dir.
|
||||
//
|
||||
// The target dir is keyed only on the Cargo manifest hash, so every
|
||||
// fixture that shares a `Cargo.toml` compiles the same bin name
|
||||
// (`nyx_harness`) into the same `release/nyx_harness` path here.
|
||||
// `cargo` already serialises the *build* across processes via its own
|
||||
// target lock, but releases that lock the moment it exits — before the
|
||||
// copy below moves `release/nyx_harness` to the caller's per-fixture
|
||||
// cache slot. A second process's `cargo build` landing in that window
|
||||
// overwrites `release/nyx_harness`, so we copy a *different* fixture's
|
||||
// binary into our slot and poison its build cache (observed as
|
||||
// cross-fixture verdict corruption under a parallel `cargo test`).
|
||||
// Holding this lock across build+copy folds the copy into the existing
|
||||
// serialised section, so it adds the copy's few milliseconds, not a
|
||||
// new build barrier.
|
||||
let _build_lock = TargetDirLock::acquire(&target_dir);
|
||||
|
||||
let mut cmd = base_command(&self.cargo_bin);
|
||||
cmd.args(["build", "--release"])
|
||||
.current_dir(workdir)
|
||||
.env(
|
||||
"CARGO_HOME",
|
||||
std::env::var("CARGO_HOME").unwrap_or_else(|_| default_cargo_home()),
|
||||
)
|
||||
.env(
|
||||
"RUSTUP_HOME",
|
||||
std::env::var("RUSTUP_HOME").unwrap_or_default(),
|
||||
)
|
||||
.env("CARGO_TARGET_DIR", &target_dir);
|
||||
if let Some(sccache) = &self.sccache_bin {
|
||||
cmd.env("RUSTC_WRAPPER", sccache);
|
||||
}
|
||||
|
||||
let output = match cmd.output() {
|
||||
Ok(o) => o,
|
||||
Err(e) => {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!("rust-pool: cargo build: {e}"),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
};
|
||||
if !output.status.success() {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
|
||||
let compiled = target_dir.join("release").join("nyx_harness");
|
||||
if let Err(e) = std::fs::copy(&compiled, dest) {
|
||||
return PoolCompileResult {
|
||||
success: false,
|
||||
stderr: format!(
|
||||
"rust-pool: cargo build ok but copy {} -> {} failed: {e}",
|
||||
compiled.display(),
|
||||
dest.display(),
|
||||
),
|
||||
duration: start.elapsed(),
|
||||
};
|
||||
}
|
||||
PoolCompileResult {
|
||||
success: true,
|
||||
stderr: String::new(),
|
||||
duration: start.elapsed(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_healthy(&self) -> bool {
|
||||
binary_runnable(&self.cargo_bin, "--version")
|
||||
}
|
||||
}
|
||||
|
||||
fn default_cargo_home() -> String {
|
||||
std::env::var("HOME")
|
||||
.map(|h| format!("{h}/.cargo"))
|
||||
.unwrap_or_else(|_| ".cargo".to_owned())
|
||||
}
|
||||
|
||||
/// Cross-process advisory lock guarding build+copy for a shared
|
||||
/// `CARGO_TARGET_DIR` (see the call site in [`RustPool::compile_batch`]).
|
||||
///
|
||||
/// Implemented as an atomic `create_new` (O_EXCL) lockfile so it works across
|
||||
/// the separate processes a parallel `cargo test` spawns — an in-process
|
||||
/// `Mutex` would not. A lock older than `STALE_AFTER` is stolen so a crashed
|
||||
/// holder cannot wedge the pool, and acquisition gives up after `MAX_WAIT`
|
||||
/// (proceeding unlocked) so a pathological case degrades to the pre-fix
|
||||
/// behaviour rather than deadlocking.
|
||||
struct TargetDirLock {
|
||||
path: PathBuf,
|
||||
/// Only the process that created the lockfile removes it on drop, so a
|
||||
/// give-up / steal path never deletes another holder's lock.
|
||||
owned: bool,
|
||||
}
|
||||
|
||||
impl TargetDirLock {
|
||||
fn acquire(target_dir: &Path) -> Self {
|
||||
const MAX_WAIT: Duration = Duration::from_secs(300);
|
||||
const STALE_AFTER: Duration = Duration::from_secs(180);
|
||||
let path = target_dir.join(".nyx-pool-build.lock");
|
||||
let start = Instant::now();
|
||||
let mut spins: u64 = 0;
|
||||
loop {
|
||||
match std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(&path)
|
||||
{
|
||||
Ok(mut f) => {
|
||||
use std::io::Write;
|
||||
let _ = writeln!(f, "{}", std::process::id());
|
||||
return Self { path, owned: true };
|
||||
}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
|
||||
// Steal a stale lock left behind by a crashed holder.
|
||||
if let Ok(meta) = std::fs::metadata(&path)
|
||||
&& let Ok(mtime) = meta.modified()
|
||||
&& mtime.elapsed().map(|d| d > STALE_AFTER).unwrap_or(false)
|
||||
{
|
||||
let _ = std::fs::remove_file(&path);
|
||||
continue;
|
||||
}
|
||||
if start.elapsed() > MAX_WAIT {
|
||||
// Best-effort: a slow build beats a deadlock.
|
||||
return Self { path, owned: false };
|
||||
}
|
||||
let nap = 10u64.saturating_add(spins.min(40).saturating_mul(2));
|
||||
std::thread::sleep(Duration::from_millis(nap));
|
||||
spins = spins.saturating_add(1);
|
||||
}
|
||||
Err(_) => {
|
||||
// Cannot create the lockfile (perms / race on dir) — proceed
|
||||
// unlocked rather than fail the build outright.
|
||||
return Self { path, owned: false };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TargetDirLock {
|
||||
fn drop(&mut self) {
|
||||
if self.owned {
|
||||
let _ = std::fs::remove_file(&self.path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Stable short hash of the named manifest files under `workdir`.
|
||||
fn hash_files(workdir: &Path, files: &[&str]) -> String {
|
||||
let mut h = Hasher::new();
|
||||
for fname in files {
|
||||
if let Ok(content) = std::fs::read(workdir.join(fname)) {
|
||||
h.update(fname.as_bytes());
|
||||
h.update(&content);
|
||||
}
|
||||
}
|
||||
let out = h.finalize();
|
||||
format!(
|
||||
"{:016x}",
|
||||
u64::from_le_bytes(out.as_bytes()[..8].try_into().unwrap())
|
||||
)
|
||||
}
|
||||
|
||||
/// Hash of every input that determines the compiled `nyx_harness` binary: the
|
||||
/// Cargo manifest/lock *plus* every `.rs` file under `src/`. Used to key the
|
||||
/// shared `CARGO_TARGET_DIR` so source-distinct harnesses never share a
|
||||
/// `release/nyx_harness` slot (see the call site in [`RustPool::compile_batch`]
|
||||
/// for why manifest-only keying corrupted cross-fixture verdicts). Mirrors
|
||||
/// [`crate::dynamic::build_sandbox::compute_rust_lockfile_hash`].
|
||||
fn hash_build_inputs(workdir: &Path) -> String {
|
||||
let manifest = hash_files(workdir, &["Cargo.lock", "Cargo.toml"]);
|
||||
let src_dir = workdir.join("src");
|
||||
let mut rs_files: Vec<PathBuf> = Vec::new();
|
||||
collect_rs_files(&src_dir, &src_dir, &mut rs_files);
|
||||
rs_files.sort();
|
||||
let mut h = Hasher::new();
|
||||
for rel in &rs_files {
|
||||
if let Ok(content) = std::fs::read(src_dir.join(rel)) {
|
||||
h.update(rel.to_string_lossy().as_bytes());
|
||||
h.update(b"\0");
|
||||
h.update(&content);
|
||||
}
|
||||
}
|
||||
let out = h.finalize();
|
||||
format!(
|
||||
"{manifest}-{:016x}",
|
||||
u64::from_le_bytes(out.as_bytes()[..8].try_into().unwrap())
|
||||
)
|
||||
}
|
||||
|
||||
/// Recursively collect `.rs` file paths (relative to `root`) under `dir`.
|
||||
fn collect_rs_files(root: &Path, dir: &Path, out: &mut Vec<PathBuf>) {
|
||||
let entries = match std::fs::read_dir(dir) {
|
||||
Ok(e) => e,
|
||||
Err(_) => return,
|
||||
};
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
collect_rs_files(root, &path, out);
|
||||
} else if path.extension().and_then(|e| e.to_str()) == Some("rs")
|
||||
&& let Ok(rel) = path.strip_prefix(root)
|
||||
{
|
||||
out.push(rel.to_path_buf());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn hash_is_deterministic_and_content_sensitive() {
|
||||
let dir = tempfile::TempDir::new().unwrap();
|
||||
let h1 = hash_files(dir.path(), &["Cargo.lock"]);
|
||||
let h2 = hash_files(dir.path(), &["Cargo.lock"]);
|
||||
assert_eq!(h1, h2);
|
||||
std::fs::write(dir.path().join("Cargo.lock"), b"[[package]]\n").unwrap();
|
||||
let h3 = hash_files(dir.path(), &["Cargo.lock"]);
|
||||
assert_ne!(h1, h3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_hash_differs_for_same_manifest_distinct_source() {
|
||||
// A vuln / benign pair built for the same cap ships an identical
|
||||
// Cargo.toml but a different `src/entry.rs`. The shared target-dir key
|
||||
// must differ between them, else cargo skips the second relink and the
|
||||
// pool copies out the first fixture's binary (cross-fixture verdict
|
||||
// corruption — the cmdi / data-exfil Rust regression).
|
||||
let manifest = b"[package]\nname=\"nyx_harness\"\nversion=\"0.0.0\"\n";
|
||||
|
||||
let vuln = tempfile::TempDir::new().unwrap();
|
||||
std::fs::create_dir_all(vuln.path().join("src")).unwrap();
|
||||
std::fs::write(vuln.path().join("Cargo.toml"), manifest).unwrap();
|
||||
std::fs::write(vuln.path().join("src/main.rs"), b"fn main(){}\n").unwrap();
|
||||
std::fs::write(
|
||||
vuln.path().join("src/entry.rs"),
|
||||
b"pub fn run(){ /*vuln*/ }\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let benign = tempfile::TempDir::new().unwrap();
|
||||
std::fs::create_dir_all(benign.path().join("src")).unwrap();
|
||||
std::fs::write(benign.path().join("Cargo.toml"), manifest).unwrap();
|
||||
std::fs::write(benign.path().join("src/main.rs"), b"fn main(){}\n").unwrap();
|
||||
std::fs::write(
|
||||
benign.path().join("src/entry.rs"),
|
||||
b"pub fn run(){ /*benign*/ }\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Identical manifests collide under the old manifest-only key …
|
||||
assert_eq!(
|
||||
hash_files(vuln.path(), &["Cargo.lock", "Cargo.toml"]),
|
||||
hash_files(benign.path(), &["Cargo.lock", "Cargo.toml"]),
|
||||
);
|
||||
// … but the source-aware key separates them.
|
||||
assert_ne!(
|
||||
hash_build_inputs(vuln.path()),
|
||||
hash_build_inputs(benign.path())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_dest_arg_is_an_error_not_a_panic() {
|
||||
let dir = tempfile::TempDir::new().unwrap();
|
||||
// Construct without a toolchain probe so the test runs JDK/cargo-free.
|
||||
let pool = RustPool {
|
||||
cargo_bin: "cargo".to_owned(),
|
||||
sccache_bin: None,
|
||||
};
|
||||
let r = pool.compile_batch(dir.path(), &[]);
|
||||
assert!(!r.success);
|
||||
assert!(r.stderr.contains("missing binary destination"));
|
||||
}
|
||||
}
|
||||
2879
src/dynamic/build_sandbox.rs
Normal file
2879
src/dynamic/build_sandbox.rs
Normal file
File diff suppressed because it is too large
Load diff
214
src/dynamic/corpus.rs
Normal file
214
src/dynamic/corpus.rs
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
// Legacy [`Oracle::OutputContains`] is intentionally retained for
|
||||
// pre-Phase-06 corpus entries until they migrate to
|
||||
// [`Oracle::SinkProbe`]. The deprecation warning is informational, not a
|
||||
// signal to migrate inside this module.
|
||||
#![allow(deprecated)]
|
||||
|
||||
//! Per-capability payload corpus, keyed by `(Cap, Lang)`.
|
||||
//!
|
||||
//! Each `(Cap, Lang)` pair maps to a small set of canonical payloads plus a
|
||||
//! matching detection oracle. Payloads are static data — adding a new one
|
||||
//! is a code review, not a runtime config knob, so they cannot drift
|
||||
//! between versions.
|
||||
//!
|
||||
//! Differential confirmation (§4.1): every non-benign payload either
|
||||
//! references a paired benign control (resolved inside the same
|
||||
//! `(cap, lang)` slice) or carries a written
|
||||
//! [`CuratedPayload::no_benign_control_rationale`] explaining why no
|
||||
//! control is meaningful. The [`audit`] module enforces this both at
|
||||
//! compile time and via the runtime `corpus_registry::audit` test.
|
||||
//!
|
||||
//! # Module layout
|
||||
//!
|
||||
//! ```text
|
||||
//! corpus.rs — types, public re-exports, module root
|
||||
//! corpus/registry.rs — CapCorpus, CORPUS, payloads_for{,_lang}
|
||||
//! corpus/audit.rs — compile-time + runtime audits
|
||||
//! corpus/<cap>/<lang>.rs — per-(cap, lang) `pub const PAYLOADS`
|
||||
//! ```
|
||||
//!
|
||||
//! Adding a new language for a cap means: drop a new file under
|
||||
//! `corpus/<cap>/<lang>.rs`, register `pub mod <lang>;` in the cap's
|
||||
//! `mod.rs`, and wire `(Cap::<CAP>, Lang::<Lang>, <cap>::<lang>::PAYLOADS)`
|
||||
//! into `registry::ENTRIES`. No other file needs to change.
|
||||
//!
|
||||
//! # Corpus governance (§16.1)
|
||||
//!
|
||||
//! Every payload carries [`PayloadProvenance`], a [`CuratedPayload::since_corpus_version`],
|
||||
//! and at least one [`CuratedPayload::fixture_paths`] entry. The [`CORPUS_VERSION`] const
|
||||
//! tracks the history of incompatible corpus changes; bumping it
|
||||
//! invalidates all `dynamic_verdict_cache` entries whose spec touched the
|
||||
//! changed cap.
|
||||
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
use crate::labels::Cap;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
pub mod audit;
|
||||
pub mod registry;
|
||||
|
||||
mod cmdi;
|
||||
mod crypto;
|
||||
mod data_exfil;
|
||||
mod deserialize;
|
||||
mod fmt_string;
|
||||
mod header_injection;
|
||||
mod json_parse;
|
||||
mod ldap;
|
||||
mod open_redirect;
|
||||
// `pub(crate)` so the Java emitter can read the FILE_IO canary filename /
|
||||
// marker consts it must stage into the servlet harness workdir.
|
||||
pub(crate) mod path_trav;
|
||||
mod prototype_pollution;
|
||||
mod sqli;
|
||||
mod ssrf;
|
||||
mod ssti;
|
||||
mod unauthorized_id;
|
||||
mod xpath;
|
||||
mod xss;
|
||||
mod xxe;
|
||||
|
||||
pub use registry::{
|
||||
CORPUS, CORPUS_UNSUPPORTED_LANG_NEUTRAL, audit_marker_collisions, benign_payload_for,
|
||||
benign_payload_for_lang, materialise_bytes, payloads_for, payloads_for_lang,
|
||||
resolve_benign_control, resolve_benign_control_lang,
|
||||
};
|
||||
|
||||
/// Re-exported canonical [`Oracle`] type.
|
||||
///
|
||||
/// The actual enum lives in [`crate::dynamic::oracle`] alongside
|
||||
/// [`crate::dynamic::oracle::ProbePredicate`] and
|
||||
/// [`crate::dynamic::oracle::oracle_fired`]. Re-exported here so the
|
||||
/// `CuratedPayload.oracle: Oracle` field reads naturally and existing
|
||||
/// `crate::dynamic::corpus::Oracle` callers keep working.
|
||||
pub use crate::dynamic::oracle::Oracle;
|
||||
|
||||
/// Bump when the corpus content changes in a way that invalidates previously-
|
||||
/// computed [`crate::dynamic::spec::HarnessSpec::spec_hash`] values.
|
||||
///
|
||||
/// # Bump history
|
||||
///
|
||||
/// | Version | Date | Change |
|
||||
/// |---------|------------|-----------------------------------------------|
|
||||
/// | 1 | 2025-11-01 | Initial corpus (SQLi, CMDI, PATH_TRAV, SSRF, XSS) |
|
||||
/// | 2 | 2025-12-15 | SSRF OOB-variant added; oracle semantics tightened |
|
||||
/// | 3 | 2026-05-12 | Migrated to `CuratedPayload`; provenance + fixture_paths enforced; SSRF OOB-nonce slot added |
|
||||
/// | 4 | 2026-05-14 | Phase 07: `benign_control` paired refs + benign payloads added to SQLI / CMDI / SSRF (file-scheme) |
|
||||
/// | 5 | 2026-05-16 | FMT_STRING SinkCrash payload + benign control (Phase 08 unrelated-crash acceptance fixture) |
|
||||
/// | 6 | 2026-05-17 | Phase 02 / Track J.0: `(Cap, Lang)` registry refactor; `no_benign_control_rationale` field; compile-time provenance audit |
|
||||
/// | 7 | 2026-05-17 | Phase 03 / Track J.1: `DESERIALIZE` cap lit for Java / Python / PHP / Ruby; `ProbeKind::Deserialize` + `ProbePredicate::DeserializeGadgetInvoked` |
|
||||
/// | 8 | 2026-05-17 | Phase 04 / Track J.2: `SSTI` cap lit for Jinja2 / ERB / Twig / Thymeleaf / Handlebars; `ProbePredicate::TemplateEvalEqual` |
|
||||
/// | 9 | 2026-05-17 | Phase 05 / Track J.3: `XXE` cap lit for Java / Python / PHP / Ruby / Go; `ProbeKind::Xxe` + `ProbePredicate::XxeEntityExpanded` |
|
||||
/// | 10 | 2026-05-17 | Phase 06 / Track J.4: `LDAP_INJECTION` cap lit for Java / Python / PHP; `ProbeKind::Ldap` + `ProbePredicate::LdapResultCountGreaterThan`; `StubKind::Ldap` + in-sandbox LDAP server stub |
|
||||
/// | 11 | 2026-05-17 | Phase 07 / Track J.5: `XPATH_INJECTION` cap lit for Java / Python / PHP / JS; `ProbeKind::Xpath`; `LdapResultCountGreaterThan` renamed to `QueryResultCountGreaterThan` (shared by LDAP + XPath); `xpath_corpus.xml` staged in workdir |
|
||||
/// | 12 | 2026-05-18 | Phase 08 / Track J.6: `HEADER_INJECTION` cap lit for Java / Python / PHP / Ruby / JS / Go / Rust; `ProbeKind::HeaderEmit` + `ProbePredicate::HeaderInjected`; per-lang `setHeader` shims |
|
||||
/// | 13 | 2026-05-18 | Phase 09 / Track J.7: `OPEN_REDIRECT` cap lit for Java / Python / PHP / Ruby / JS / Go / Rust; `ProbeKind::Redirect` + `ProbePredicate::RedirectHostNotIn`; per-lang `sendRedirect` / `redirect()` shims |
|
||||
/// | 14 | 2026-05-18 | Phase 10 / Track J.8: `PROTOTYPE_POLLUTION` cap lit for JS / TS; `ProbeKind::PrototypePollution` + `ProbePredicate::PrototypeCanaryTouched`; Node harness installs `Proxy`-style canary trap on `Object.prototype.__nyx_canary` |
|
||||
/// | 15 | 2026-05-18 | Phase 11 / Track J.9: `CRYPTO` (Java/Python/PHP/Go/Rust) + `JSON_PARSE` (JS/Python/Ruby) + `UNAUTHORIZED_ID` (7 langs) + `DATA_EXFIL` (7 langs); `ProbeKind::{WeakKey,IdorAccess,OutboundNetwork}` + `ProbePredicate::{WeakKeyEntropy,IdorBoundaryCrossed,OutboundHostNotIn}`; `UnsupportedReason::SoundOracleUnavailable` for caps with no sound oracle |
|
||||
/// | 16 | 2026-06-01 | Collision-resistant `cmdi` (`CODE_EXEC`) marker: payload `; echo NYX_PWN_$((113*7))_CMDI`, oracle `OutputContains("NYX_PWN_791_CMDI")`. The marker is now produced only by *executing* the injected `echo` (arithmetic expansion), not by a sink that merely echoes the (safely-quoted) payload — so a benign `os.system("echo " + shlex.quote(x))` control no longer false-confirms. Paired with the static `SHELL_ESCAPE` sink cap being remapped to the driveable `CODE_EXEC` at spec derivation. |
|
||||
/// | 17 | 2026-06-01 | Collision-resistant `path_traversal` (`FILE_IO`) Java payload for the entry-driven servlet harness: vuln `../nyx_pt_canary` reads a workdir-root canary the emitter plants; oracle `OutputContains(CANARY_MARKER)` where the marker is the canary's CONTENT (not a substring of the path payload), so a fixture that echoes the requested filename back cannot reproduce it — only an unsanitised read of the canary does. |
|
||||
pub const CORPUS_VERSION: u32 = 17;
|
||||
|
||||
/// Where a payload originated.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PayloadProvenance {
|
||||
/// Manually written and reviewed by the Nyx team.
|
||||
Curated,
|
||||
/// Produced by the internal mutation fuzzer (`fuzz/dynamic_corpus/`).
|
||||
/// Still requires human promotion review (§16.4) before landing here.
|
||||
InternalFuzzer,
|
||||
/// Derived from a public CVE or external security report.
|
||||
ExternalReport,
|
||||
}
|
||||
|
||||
/// Reference from a vulnerable payload to its paired benign control.
|
||||
///
|
||||
/// Resolved at call time by scanning the same cap's payload slice for an
|
||||
/// `is_benign == true` entry whose `label` matches. Stored as `&'static
|
||||
/// str` (rather than a back-pointer to [`CuratedPayload`]) so the corpus
|
||||
/// tables stay `const`-declarable.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct PayloadRef {
|
||||
/// Label of the benign-control entry inside the same cap's payload set.
|
||||
pub label: &'static str,
|
||||
}
|
||||
|
||||
/// A single payload entry in the curated corpus.
|
||||
///
|
||||
/// Governs both static payload bytes (or an OOB-nonce template) and the
|
||||
/// oracle used to confirm the vulnerability fired. All fields are
|
||||
/// `'static` so the corpus can live in read-only memory.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CuratedPayload {
|
||||
/// Bytes injected into the [`crate::dynamic::spec::PayloadSlot`].
|
||||
///
|
||||
/// When [`Self::oob_nonce_slot`] is `true` this field is ignored; the
|
||||
/// runner materialises the actual bytes from the OOB listener URL at
|
||||
/// call time.
|
||||
pub bytes: &'static [u8],
|
||||
/// Human label for logs and reports.
|
||||
pub label: &'static str,
|
||||
/// How we decide the sink fired. See [`Oracle`].
|
||||
pub oracle: Oracle,
|
||||
/// If `true`, this is a benign control payload.
|
||||
/// `Confirmed` requires the vuln payload to trigger AND the benign payload
|
||||
/// NOT to trigger (differential confirmation, §4.1).
|
||||
pub is_benign: bool,
|
||||
/// Where this payload came from.
|
||||
pub provenance: PayloadProvenance,
|
||||
/// `CORPUS_VERSION` when this payload was added.
|
||||
pub since_corpus_version: u32,
|
||||
/// `CORPUS_VERSION` at which this payload was deprecated, if any.
|
||||
pub deprecated_at_corpus_version: Option<u32>,
|
||||
/// Source files that exercise this payload in the dynamic harness.
|
||||
/// At least one entry required per §16.1.
|
||||
pub fixture_paths: &'static [&'static str],
|
||||
/// When `true`, the runner generates the actual bytes from the OOB
|
||||
/// listener URL + per-finding nonce at execution time (SSRF OOB variant).
|
||||
/// The `bytes` field is unused for such payloads.
|
||||
pub oob_nonce_slot: bool,
|
||||
/// Structured-oracle predicates evaluated against
|
||||
/// [`crate::dynamic::probe::SinkProbe`] records drained from the run's
|
||||
/// probe channel (Phase 06 — Track C.1). Always populated; empty when
|
||||
/// the payload still relies on the legacy
|
||||
/// [`Oracle::OutputContains`]
|
||||
/// path and has not been migrated to
|
||||
/// [`Oracle::SinkProbe`] yet.
|
||||
pub probe_predicates: &'static [ProbePredicate],
|
||||
/// Paired benign-control payload inside the same cap's slice.
|
||||
///
|
||||
/// `Some(PayloadRef)` on a vulnerable entry means the differential rule
|
||||
/// (Phase 07, §4.1) compares this entry's oracle firing against the
|
||||
/// referenced benign. `None` marks the entry as having no paired
|
||||
/// control — the runner downgrades any would-be `Confirmed` to
|
||||
/// [`crate::evidence::InconclusiveReason::NoBenignControl`].
|
||||
/// Always `None` on benign entries themselves.
|
||||
pub benign_control: Option<PayloadRef>,
|
||||
/// Written rationale required when a non-benign payload has
|
||||
/// `benign_control = None`. Compile-time audit
|
||||
/// ([`audit::audit_benign_controls_runtime`]) rejects any entry that
|
||||
/// elides the paired control without a non-empty explanation here.
|
||||
/// Always `None` on entries that DO carry a `benign_control` and on
|
||||
/// benign entries themselves.
|
||||
pub no_benign_control_rationale: Option<&'static str>,
|
||||
}
|
||||
|
||||
/// Backward-compatible type alias.
|
||||
pub type Payload = CuratedPayload;
|
||||
|
||||
/// Read-only registry of `(Cap, Lang)` payload slices.
|
||||
///
|
||||
/// Constructed once as the [`registry::CORPUS`] const. Layered as
|
||||
/// `&'static` slices so the entire registry can live in read-only memory
|
||||
/// and so [`audit`] can walk it in const eval.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct CapCorpus {
|
||||
/// `(Cap, Lang, payloads)` triples. A single cap may appear once per
|
||||
/// supported language. See [`registry::payloads_for_lang`] for the
|
||||
/// per-language lookup and [`registry::payloads_for`] for the
|
||||
/// back-compatible union shim.
|
||||
pub entries: &'static [(Cap, Lang, &'static [CuratedPayload])],
|
||||
/// Per-cap probe predicates lifted off individual payloads. Reserved
|
||||
/// for later Track J phases; empty in Phase 02.
|
||||
pub oracles: &'static [(Cap, &'static [ProbePredicate])],
|
||||
}
|
||||
212
src/dynamic/corpus/audit.rs
Normal file
212
src/dynamic/corpus/audit.rs
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
//! Compile-time + runtime audits over the corpus registry.
|
||||
//!
|
||||
//! Two invariants enforced here fail the build (via `const _: () = assert!(...)`)
|
||||
//! if they regress:
|
||||
//!
|
||||
//! 1. **`benign_control` resolves locally.** Every non-benign payload either
|
||||
//! references a benign control whose `label` appears inside the same
|
||||
//! `(cap, lang)` slice, *or* carries an explicit
|
||||
//! [`CuratedPayload::no_benign_control_rationale`] with a non-empty
|
||||
//! written rationale. Without this guard the differential rule
|
||||
//! (§4.1) silently downgrades to `Inconclusive(NoBenignControl)`
|
||||
//! whenever a maintainer forgets to wire a paired benign entry.
|
||||
//!
|
||||
//! 2. **Cap coverage is exhaustive.** The set of caps appearing in
|
||||
//! [`CORPUS`]'s [`entries`](super::CapCorpus::entries) OR [`CORPUS_UNSUPPORTED_LANG_NEUTRAL`] must
|
||||
//! equal [`Cap::all`]. Adding a new `Cap` bit without classifying it
|
||||
//! fails the build.
|
||||
//!
|
||||
//! The runtime `corpus_registry::audit` test mirrors both checks so
|
||||
//! failure surfaces in `cargo test` output, not just `cargo build`.
|
||||
|
||||
use super::CuratedPayload;
|
||||
use super::registry::{CORPUS, CORPUS_UNSUPPORTED_LANG_NEUTRAL};
|
||||
use crate::labels::Cap;
|
||||
|
||||
/// Byte-level equality for `&'static str` usable in const eval.
|
||||
#[allow(dead_code)] // Called from const-eval audit helpers on MSRV/CI compilers.
|
||||
const fn str_eq(a: &str, b: &str) -> bool {
|
||||
let ab = a.as_bytes();
|
||||
let bb = b.as_bytes();
|
||||
if ab.len() != bb.len() {
|
||||
return false;
|
||||
}
|
||||
let mut i = 0;
|
||||
while i < ab.len() {
|
||||
if ab[i] != bb[i] {
|
||||
return false;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Walk every `(cap, lang)` slice; for each non-benign payload check that
|
||||
/// either its `benign_control.label` resolves inside the same slice or it
|
||||
/// carries a non-empty `no_benign_control_rationale`.
|
||||
#[allow(dead_code)] // Called from a const assertion; MSRV lints may miss const-eval uses.
|
||||
const fn audit_benign_controls() -> bool {
|
||||
let entries = CORPUS.entries;
|
||||
let mut e = 0;
|
||||
while e < entries.len() {
|
||||
let slice: &[CuratedPayload] = entries[e].2;
|
||||
let mut i = 0;
|
||||
while i < slice.len() {
|
||||
let p = &slice[i];
|
||||
if !p.is_benign {
|
||||
match p.benign_control {
|
||||
Some(r) => {
|
||||
let mut j = 0;
|
||||
let mut found = false;
|
||||
while j < slice.len() {
|
||||
if slice[j].is_benign && str_eq(slice[j].label, r.label) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
j += 1;
|
||||
}
|
||||
if !found {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
None => match p.no_benign_control_rationale {
|
||||
Some(rationale) => {
|
||||
if rationale.is_empty() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
None => return false,
|
||||
},
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
e += 1;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// OR of cap bits appearing in `CORPUS.entries`.
|
||||
const fn registered_cap_bits() -> u32 {
|
||||
let entries = CORPUS.entries;
|
||||
let mut bits = 0u32;
|
||||
let mut i = 0;
|
||||
while i < entries.len() {
|
||||
bits |= entries[i].0.bits();
|
||||
i += 1;
|
||||
}
|
||||
bits
|
||||
}
|
||||
|
||||
/// Compile-time guards. Bumping or breaking these fails `cargo build`.
|
||||
const _: () = assert!(
|
||||
audit_benign_controls(),
|
||||
"corpus audit: a non-benign payload references a `benign_control` whose \
|
||||
label does not resolve inside its own (cap, lang) slice AND carries no \
|
||||
`no_benign_control_rationale` — see src/dynamic/corpus/audit.rs.",
|
||||
);
|
||||
|
||||
const _: () = assert!(
|
||||
registered_cap_bits() | CORPUS_UNSUPPORTED_LANG_NEUTRAL == Cap::all().bits(),
|
||||
"corpus audit: union of (cap, lang) entries and \
|
||||
`CORPUS_UNSUPPORTED_LANG_NEUTRAL` does not cover every `Cap` bit. \
|
||||
Add the missing cap to either a `(cap, lang)` slice or the \
|
||||
lang-neutral unsupported list.",
|
||||
);
|
||||
|
||||
/// Runtime mirror of the compile-time benign-control audit.
|
||||
pub fn audit_benign_controls_runtime() -> Result<(), String> {
|
||||
for &(cap, lang, slice) in CORPUS.entries {
|
||||
for p in slice {
|
||||
if p.is_benign {
|
||||
continue;
|
||||
}
|
||||
match p.benign_control {
|
||||
Some(r) => {
|
||||
let found = slice.iter().any(|q| q.is_benign && q.label == r.label);
|
||||
if !found {
|
||||
return Err(format!(
|
||||
"({:?}, {:?}) vuln payload {:?} references missing \
|
||||
benign_control label {:?}",
|
||||
cap, lang, p.label, r.label,
|
||||
));
|
||||
}
|
||||
}
|
||||
None => match p.no_benign_control_rationale {
|
||||
Some(rationale) if !rationale.is_empty() => {}
|
||||
_ => {
|
||||
return Err(format!(
|
||||
"({:?}, {:?}) vuln payload {:?} has neither a \
|
||||
benign_control nor a written \
|
||||
no_benign_control_rationale",
|
||||
cap, lang, p.label,
|
||||
));
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Runtime mirror of the compile-time cap-coverage audit.
|
||||
pub fn audit_cap_coverage_runtime() -> Result<(), String> {
|
||||
let covered = registered_cap_bits() | CORPUS_UNSUPPORTED_LANG_NEUTRAL;
|
||||
if covered != Cap::all().bits() {
|
||||
let missing = Cap::all().bits() & !covered;
|
||||
return Err(format!(
|
||||
"Cap bits {missing:#x} are neither registered in CORPUS.entries \
|
||||
nor listed in CORPUS_UNSUPPORTED_LANG_NEUTRAL",
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Track J.0 deferred audit: a non-benign payload's `benign_control.label`
|
||||
/// must be unique *within its own `(cap, lang)` slice* — and a benign
|
||||
/// payload's label may not collide with any other benign label inside the
|
||||
/// same cap across lang slices, otherwise the lang-agnostic union shim
|
||||
/// could resolve a vuln payload in language A against a benign payload
|
||||
/// declared in language B (the latent §4.1 bug captured in the deferred
|
||||
/// queue).
|
||||
pub fn audit_benign_label_uniqueness_runtime() -> Result<(), String> {
|
||||
use std::collections::HashMap;
|
||||
|
||||
let mut by_cap: HashMap<u32, HashMap<&'static str, crate::symbol::Lang>> = HashMap::new();
|
||||
for &(cap, lang, slice) in CORPUS.entries {
|
||||
let bucket = by_cap.entry(cap.bits()).or_default();
|
||||
for p in slice {
|
||||
if !p.is_benign {
|
||||
continue;
|
||||
}
|
||||
if let Some(prev_lang) = bucket.insert(p.label, lang)
|
||||
&& prev_lang != lang
|
||||
{
|
||||
return Err(format!(
|
||||
"benign label {:?} for cap {:#x} is registered in both \
|
||||
{:?} and {:?} — lang-agnostic resolve_benign_control \
|
||||
could match the wrong language",
|
||||
p.label,
|
||||
cap.bits(),
|
||||
prev_lang,
|
||||
lang,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod corpus_registry {
|
||||
use super::*;
|
||||
|
||||
/// Plan §02 acceptance: `cargo test corpus_registry::audit` must pass.
|
||||
/// The test name and module name jointly form the required path.
|
||||
#[test]
|
||||
fn audit() {
|
||||
audit_benign_controls_runtime().expect("benign_control audit failed");
|
||||
audit_cap_coverage_runtime().expect("cap coverage audit failed");
|
||||
audit_benign_label_uniqueness_runtime().expect("benign label uniqueness audit failed");
|
||||
}
|
||||
}
|
||||
46
src/dynamic/corpus/cmdi/c.rs
Normal file
46
src/dynamic/corpus/cmdi/c.rs
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
//! C `Cap::CODE_EXEC` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
|
||||
label: "cmdi-echo-marker-c",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/c/cmdi/cmdi_exec.c",
|
||||
"tests/benchmark/corpus/c/cmdi/cmdi_fgets.c",
|
||||
"tests/benchmark/corpus/c/cmdi/cmdi_popen.c",
|
||||
"tests/benchmark/corpus/c/cmdi/cmdi_system.c",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "cmdi-benign-c",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_cmdi_NYX_BENIGN",
|
||||
label: "cmdi-benign-c",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/c/cmdi/cmdi_exec.c",
|
||||
"tests/benchmark/corpus/c/cmdi/cmdi_fgets.c",
|
||||
"tests/benchmark/corpus/c/cmdi/cmdi_popen.c",
|
||||
"tests/benchmark/corpus/c/cmdi/cmdi_system.c",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
52
src/dynamic/corpus/cmdi/cpp.rs
Normal file
52
src/dynamic/corpus/cmdi/cpp.rs
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
//! C++ `Cap::CODE_EXEC` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
|
||||
label: "cmdi-echo-marker-cpp",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_class_inline_method.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_exec.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_getline.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_lambda_passthrough.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_popen.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_stl_vector_string.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_system.cpp",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "cmdi-benign-cpp",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_cmdi_NYX_BENIGN",
|
||||
label: "cmdi-benign-cpp",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_class_inline_method.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_exec.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_getline.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_lambda_passthrough.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_popen.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_stl_vector_string.cpp",
|
||||
"tests/benchmark/corpus/cpp/cmdi/cmdi_system.cpp",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
46
src/dynamic/corpus/cmdi/go.rs
Normal file
46
src/dynamic/corpus/cmdi/go.rs
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
//! Go `Cap::CODE_EXEC` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
|
||||
label: "cmdi-echo-marker-go",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/go/cmdi/cmdi_direct.go",
|
||||
"tests/benchmark/corpus/go/cmdi/cmdi_indirect.go",
|
||||
"tests/benchmark/corpus/go/cmdi/cmdi_unvalidated_queue_element.go",
|
||||
"tests/benchmark/corpus/go/cmdi/vuln_error_log_then_sink.go",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "cmdi-benign-go",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_cmdi_NYX_BENIGN",
|
||||
label: "cmdi-benign-go",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/go/cmdi/cmdi_direct.go",
|
||||
"tests/benchmark/corpus/go/cmdi/cmdi_indirect.go",
|
||||
"tests/benchmark/corpus/go/cmdi/cmdi_unvalidated_queue_element.go",
|
||||
"tests/benchmark/corpus/go/cmdi/vuln_error_log_then_sink.go",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
42
src/dynamic/corpus/cmdi/java.rs
Normal file
42
src/dynamic/corpus/cmdi/java.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
//! Java `Cap::CODE_EXEC` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
|
||||
label: "cmdi-echo-marker-java",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/java/cmdi/CmdiDirect.java",
|
||||
"tests/benchmark/corpus/java/cmdi/CmdiIndirect.java",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "cmdi-benign-java",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_cmdi_NYX_BENIGN",
|
||||
label: "cmdi-benign-java",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/java/cmdi/CmdiDirect.java",
|
||||
"tests/benchmark/corpus/java/cmdi/CmdiIndirect.java",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
42
src/dynamic/corpus/cmdi/javascript.rs
Normal file
42
src/dynamic/corpus/cmdi/javascript.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
//! JavaScript `Cap::CODE_EXEC` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
|
||||
label: "cmdi-echo-marker-javascript",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/javascript/cmdi/cmdi_direct.js",
|
||||
"tests/benchmark/corpus/javascript/cmdi/cmdi_indirect.js",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "cmdi-benign-javascript",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_cmdi_NYX_BENIGN",
|
||||
label: "cmdi-benign-javascript",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/javascript/cmdi/cmdi_direct.js",
|
||||
"tests/benchmark/corpus/javascript/cmdi/cmdi_indirect.js",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
12
src/dynamic/corpus/cmdi/mod.rs
Normal file
12
src/dynamic/corpus/cmdi/mod.rs
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
//! Command-injection (`Cap::CODE_EXEC`) per-language payload slices.
|
||||
|
||||
pub mod c;
|
||||
pub mod cpp;
|
||||
pub mod go;
|
||||
pub mod java;
|
||||
pub mod javascript;
|
||||
pub mod php;
|
||||
pub mod python;
|
||||
pub mod ruby;
|
||||
pub mod rust;
|
||||
pub mod typescript;
|
||||
42
src/dynamic/corpus/cmdi/php.rs
Normal file
42
src/dynamic/corpus/cmdi/php.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
//! PHP `Cap::CODE_EXEC` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
|
||||
label: "cmdi-echo-marker-php",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/php/cmdi/cmdi_direct.php",
|
||||
"tests/benchmark/corpus/php/cmdi/cmdi_indirect.php",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "cmdi-benign-php",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_cmdi_NYX_BENIGN",
|
||||
label: "cmdi-benign-php",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/php/cmdi/cmdi_direct.php",
|
||||
"tests/benchmark/corpus/php/cmdi/cmdi_indirect.php",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
48
src/dynamic/corpus/cmdi/python.rs
Normal file
48
src/dynamic/corpus/cmdi/python.rs
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
//! Python `Cap::CODE_EXEC` payloads.
|
||||
//!
|
||||
//! Same shell-syntax bytes as [`super::rust::PAYLOADS`]; the per-language
|
||||
//! slice exists so the lookup is a per-language assertion rather than a
|
||||
//! cross-language fallback through [`super::super::registry::payloads_for`].
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
|
||||
label: "cmdi-echo-marker-python",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/python/cmdi/cmdi_direct.py",
|
||||
"tests/benchmark/corpus/python/cmdi/cmdi_indirect.py",
|
||||
"tests/benchmark/corpus/python/cmdi/cmdi_popen_shell.py",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "cmdi-benign-python",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_cmdi_NYX_BENIGN",
|
||||
label: "cmdi-benign-python",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/python/cmdi/cmdi_direct.py",
|
||||
"tests/benchmark/corpus/python/cmdi/cmdi_indirect.py",
|
||||
"tests/benchmark/corpus/python/cmdi/cmdi_popen_shell.py",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
44
src/dynamic/corpus/cmdi/ruby.rs
Normal file
44
src/dynamic/corpus/cmdi/ruby.rs
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
//! Ruby `Cap::CODE_EXEC` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
|
||||
label: "cmdi-echo-marker-ruby",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/ruby/cmdi/cmdi_backtick.rb",
|
||||
"tests/benchmark/corpus/ruby/cmdi/cmdi_kernel_open.rb",
|
||||
"tests/benchmark/corpus/ruby/cmdi/cmdi_system.rb",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "cmdi-benign-ruby",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_cmdi_NYX_BENIGN",
|
||||
label: "cmdi-benign-ruby",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/ruby/cmdi/cmdi_backtick.rb",
|
||||
"tests/benchmark/corpus/ruby/cmdi/cmdi_kernel_open.rb",
|
||||
"tests/benchmark/corpus/ruby/cmdi/cmdi_system.rb",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
48
src/dynamic/corpus/cmdi/rust.rs
Normal file
48
src/dynamic/corpus/cmdi/rust.rs
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
//! Command-injection payloads exercised by Rust fixtures
|
||||
//! (`tests/benchmark/corpus/rust/cmdi/`).
|
||||
//!
|
||||
//! Bytes are shell-syntax, not Rust-specific; Track J phases 03–11 add
|
||||
//! per-language slices (Python `os.system`, PHP `exec`, …) as new fixtures
|
||||
//! land.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
|
||||
label: "cmdi-echo-marker",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 1,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/rust/cmdi/cmdi_command.rs",
|
||||
"tests/benchmark/corpus/rust/cmdi/cmdi_args.rs",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "cmdi-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
// Benign control: plain text that should never produce the cmdi marker.
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_cmdi_NYX_BENIGN",
|
||||
label: "cmdi-benign",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 4,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/rust/cmdi/cmdi_command.rs",
|
||||
"tests/benchmark/corpus/rust/cmdi/cmdi_args.rs",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
42
src/dynamic/corpus/cmdi/typescript.rs
Normal file
42
src/dynamic/corpus/cmdi/typescript.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
//! TypeScript `Cap::CODE_EXEC` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b":; echo NYX_PWN_$((113*7))_CMDI",
|
||||
label: "cmdi-echo-marker-typescript",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/typescript/cmdi/cmdi_async_wrapper.ts",
|
||||
"tests/benchmark/corpus/typescript/cmdi/cmdi_exec_template.ts",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "cmdi-benign-typescript",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_cmdi_NYX_BENIGN",
|
||||
label: "cmdi-benign-typescript",
|
||||
oracle: Oracle::OutputContains("NYX_PWN_791_CMDI"),
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/benchmark/corpus/typescript/cmdi/cmdi_async_wrapper.ts",
|
||||
"tests/benchmark/corpus/typescript/cmdi/cmdi_exec_template.ts",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
50
src/dynamic/corpus/crypto/go.rs
Normal file
50
src/dynamic/corpus/crypto/go.rs
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
//! Go `Cap::CRYPTO` payloads — `math/rand.Intn` weak-key
|
||||
//! generation.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const WEAK_BITS: u32 = 16;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_CRYPTO_WEAK",
|
||||
label: "crypto-go-weak-random",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/crypto/go/vuln.go"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "crypto-go-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_CRYPTO_STRONG",
|
||||
label: "crypto-go-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/crypto/go/benign.go"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
61
src/dynamic/corpus/crypto/java.rs
Normal file
61
src/dynamic/corpus/crypto/java.rs
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
//! Java `Cap::CRYPTO` payloads — `java.util.Random.nextBytes`
|
||||
//! weak-key generation.
|
||||
//!
|
||||
//! Vuln payload: marker bytes that signal the harness to drive its
|
||||
//! `java.util.Random` key-generation path. The harness emits a key
|
||||
//! bounded inside a 16-bit search space and writes a
|
||||
//! [`crate::dynamic::probe::ProbeKind::WeakKey`] probe — the
|
||||
//! [`crate::dynamic::oracle::ProbePredicate::WeakKeyEntropy`]
|
||||
//! predicate fires for `key_int < 2^16`.
|
||||
//!
|
||||
//! Benign control: marker bytes that route the harness through
|
||||
//! `java.security.SecureRandom`, producing a 256-bit key whose
|
||||
//! integer view trivially exceeds the budget.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const WEAK_BITS: u32 = 16;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_CRYPTO_WEAK",
|
||||
label: "crypto-java-weak-random",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/crypto/java/vuln.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "crypto-java-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_CRYPTO_STRONG",
|
||||
label: "crypto-java-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/crypto/java/benign.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
26
src/dynamic/corpus/crypto/mod.rs
Normal file
26
src/dynamic/corpus/crypto/mod.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
//! Weak-crypto (`Cap::CRYPTO`) per-language payload slices.
|
||||
//!
|
||||
//! Phase 11 (Track J.9) carves a weak-key entropy oracle across the
|
||||
//! five backend languages where homegrown key generation is common
|
||||
//! enough to matter: Java (`java.util.Random.nextBytes` → key bytes),
|
||||
//! Python (`random.randint(0, 0xFFFF)`), PHP (`mt_rand(0, 0xFFFF)`),
|
||||
//! Go (`math/rand.Intn(0x10000)`), Rust (`rand::thread_rng` truncated
|
||||
//! to 16 bits). Every vuln payload triggers the harness's
|
||||
//! instrumented key-generation path with a seed that produces an
|
||||
//! attacker-derivable key bounded inside the 16-bit search space.
|
||||
//! The harness shim writes a
|
||||
//! [`crate::dynamic::probe::ProbeKind::WeakKey { key_int }`] probe
|
||||
//! with the produced integer view of the key bytes; the
|
||||
//! [`crate::dynamic::oracle::ProbePredicate::WeakKeyEntropy`]
|
||||
//! predicate fires when `key_int < 2^max_bits` (`max_bits = 16` by
|
||||
//! default). The paired benign control routes the same harness
|
||||
//! through a CSPRNG (`SecureRandom`, `secrets.token_bytes`,
|
||||
//! `random_bytes(32)`, `crypto/rand.Read`, `rand::rngs::OsRng`) so
|
||||
//! the produced `key_int` trivially exceeds the budget and the
|
||||
//! predicate stays clear.
|
||||
|
||||
pub mod go;
|
||||
pub mod java;
|
||||
pub mod php;
|
||||
pub mod python;
|
||||
pub mod rust;
|
||||
49
src/dynamic/corpus/crypto/php.rs
Normal file
49
src/dynamic/corpus/crypto/php.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
//! PHP `Cap::CRYPTO` payloads — `mt_rand` weak-key generation.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const WEAK_BITS: u32 = 16;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_CRYPTO_WEAK",
|
||||
label: "crypto-php-weak-random",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/crypto/php/vuln.php"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "crypto-php-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_CRYPTO_STRONG",
|
||||
label: "crypto-php-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/crypto/php/benign.php"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
59
src/dynamic/corpus/crypto/python.rs
Normal file
59
src/dynamic/corpus/crypto/python.rs
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
//! Python `Cap::CRYPTO` payloads — `random.randint` weak-key
|
||||
//! generation.
|
||||
//!
|
||||
//! Vuln payload: marker bytes that route the harness through
|
||||
//! `random.randint(0, 0xFFFF)`; the harness emits a
|
||||
//! [`crate::dynamic::probe::ProbeKind::WeakKey`] probe and the
|
||||
//! [`crate::dynamic::oracle::ProbePredicate::WeakKeyEntropy`]
|
||||
//! predicate fires.
|
||||
//!
|
||||
//! Benign control: marker bytes that route the harness through
|
||||
//! `secrets.token_bytes(32)`.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const WEAK_BITS: u32 = 16;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_CRYPTO_WEAK",
|
||||
label: "crypto-python-weak-random",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/crypto/python/vuln.py"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "crypto-python-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_CRYPTO_STRONG",
|
||||
label: "crypto-python-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/crypto/python/benign.py"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
50
src/dynamic/corpus/crypto/rust.rs
Normal file
50
src/dynamic/corpus/crypto/rust.rs
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
//! Rust `Cap::CRYPTO` payloads — `rand::thread_rng` weak-key
|
||||
//! generation truncated to 16 bits.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const WEAK_BITS: u32 = 16;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_CRYPTO_WEAK",
|
||||
label: "crypto-rust-weak-random",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/crypto/rust/vuln.rs"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "crypto-rust-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_CRYPTO_STRONG",
|
||||
label: "crypto-rust-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::WeakKeyEntropy {
|
||||
max_bits: WEAK_BITS,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/crypto/rust/benign.rs"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
49
src/dynamic/corpus/data_exfil/go.rs
Normal file
49
src/dynamic/corpus/data_exfil/go.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
//! go `Cap::DATA_EXFIL` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"attacker.test",
|
||||
label: "data-exfil-go-external",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/go/vuln.go"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "data-exfil-go-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"127.0.0.1",
|
||||
label: "data-exfil-go-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/go/benign.go"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
49
src/dynamic/corpus/data_exfil/java.rs
Normal file
49
src/dynamic/corpus/data_exfil/java.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
//! java `Cap::DATA_EXFIL` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"attacker.test",
|
||||
label: "data-exfil-java-external",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/java/Vuln.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "data-exfil-java-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"127.0.0.1",
|
||||
label: "data-exfil-java-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/java/Benign.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
49
src/dynamic/corpus/data_exfil/js.rs
Normal file
49
src/dynamic/corpus/data_exfil/js.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
//! js `Cap::DATA_EXFIL` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"attacker.test",
|
||||
label: "data-exfil-js-external",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/js/vuln.js"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "data-exfil-js-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"127.0.0.1",
|
||||
label: "data-exfil-js-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/js/benign.js"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
22
src/dynamic/corpus/data_exfil/mod.rs
Normal file
22
src/dynamic/corpus/data_exfil/mod.rs
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
//! Data-exfiltration (`Cap::DATA_EXFIL`) per-language payload
|
||||
//! slices.
|
||||
//!
|
||||
//! Phase 11 (Track J.9) carves an outbound-network oracle across
|
||||
//! all seven backend-capable languages. Each harness stands up a
|
||||
//! mock HTTP client that records the destination host of every
|
||||
//! outbound request via a
|
||||
//! [`crate::dynamic::probe::ProbeKind::OutboundNetwork { host }`]
|
||||
//! probe. The
|
||||
//! [`crate::dynamic::oracle::ProbePredicate::OutboundHostNotIn`]
|
||||
//! predicate fires when the captured `host` falls outside the
|
||||
//! configured loopback allowlist (`&["127.0.0.1", "localhost"]`).
|
||||
//! The vuln payload supplies `attacker.test`; the paired benign
|
||||
//! control supplies `127.0.0.1` so the predicate stays clear.
|
||||
|
||||
pub mod go;
|
||||
pub mod java;
|
||||
pub mod js;
|
||||
pub mod php;
|
||||
pub mod python;
|
||||
pub mod ruby;
|
||||
pub mod rust;
|
||||
49
src/dynamic/corpus/data_exfil/php.rs
Normal file
49
src/dynamic/corpus/data_exfil/php.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
//! php `Cap::DATA_EXFIL` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"attacker.test",
|
||||
label: "data-exfil-php-external",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/php/vuln.php"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "data-exfil-php-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"127.0.0.1",
|
||||
label: "data-exfil-php-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/php/benign.php"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
49
src/dynamic/corpus/data_exfil/python.rs
Normal file
49
src/dynamic/corpus/data_exfil/python.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
//! python `Cap::DATA_EXFIL` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"attacker.test",
|
||||
label: "data-exfil-python-external",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/python/vuln.py"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "data-exfil-python-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"127.0.0.1",
|
||||
label: "data-exfil-python-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/python/benign.py"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
49
src/dynamic/corpus/data_exfil/ruby.rs
Normal file
49
src/dynamic/corpus/data_exfil/ruby.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
//! ruby `Cap::DATA_EXFIL` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"attacker.test",
|
||||
label: "data-exfil-ruby-external",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/ruby/vuln.rb"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "data-exfil-ruby-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"127.0.0.1",
|
||||
label: "data-exfil-ruby-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/ruby/benign.rb"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
49
src/dynamic/corpus/data_exfil/rust.rs
Normal file
49
src/dynamic/corpus/data_exfil/rust.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
//! rust `Cap::DATA_EXFIL` payloads.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const ALLOWLIST: &[&str] = &["127.0.0.1", "localhost"];
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"attacker.test",
|
||||
label: "data-exfil-rust-external",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/rust/vuln.rs"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "data-exfil-rust-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"127.0.0.1",
|
||||
label: "data-exfil-rust-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::OutboundHostNotIn {
|
||||
allowlist: ALLOWLIST,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/data_exfil/rust/benign.rs"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
62
src/dynamic/corpus/deserialize/java.rs
Normal file
62
src/dynamic/corpus/deserialize/java.rs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
//! Java `Cap::DESERIALIZE` payloads.
|
||||
//!
|
||||
//! Vuln payload: a base64-encoded `java.io.ObjectInputStream` byte stream
|
||||
//! that materialises a gadget class outside the harness's allowlist.
|
||||
//! The harness's `RestrictedObjectInputStream.resolveClass` intercepts
|
||||
//! the lookup and emits a `ProbeKind::Deserialize { gadget_chain_invoked
|
||||
//! = true }` probe before aborting the chain.
|
||||
//!
|
||||
//! Benign control: a base64-encoded `ObjectInputStream` byte stream of a
|
||||
//! single allow-listed `java.lang.Integer`. The class lives inside the
|
||||
//! resolveClass allowlist so no Deserialize probe is emitted.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
// Marker class name embedded in the serialized stream — the
|
||||
// harness allowlist contains `java.lang.Integer` and `java.lang.String`
|
||||
// only. The byte form is a small literal so const-eval can keep it.
|
||||
bytes: b"NYX_GADGET_CLASS:org.nyx.deserialize.Gadget",
|
||||
label: "java-deserialize-gadget",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 7,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/deserialize/java/Vuln.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "java-deserialize-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
// Allow-listed payload — the marker carries `java.lang.Integer`,
|
||||
// which the harness resolveClass accepts without writing a probe.
|
||||
bytes: b"NYX_GADGET_CLASS:java.lang.Integer",
|
||||
label: "java-deserialize-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 7,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/deserialize/java/Benign.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
17
src/dynamic/corpus/deserialize/mod.rs
Normal file
17
src/dynamic/corpus/deserialize/mod.rs
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
//! Deserialization (`Cap::DESERIALIZE`) per-language payload slices.
|
||||
//!
|
||||
//! Phase 03 (Track J.1) lands the first cap end-to-end: Java
|
||||
//! (`ObjectInputStream.readObject` / `XMLDecoder`), Python (`pickle.loads`
|
||||
//! / `yaml.unsafe_load`), PHP (`unserialize`), and Ruby (`Marshal.load`
|
||||
//! / `YAML.load`). Every vuln payload is paired with a benign control
|
||||
//! whose oracle should *not* fire — the per-language harness shims
|
||||
//! emit a [`crate::dynamic::probe::ProbeKind::Deserialize`] record with
|
||||
//! `gadget_chain_invoked: true` when a non-allowlisted gadget class is
|
||||
//! materialised by the instrumented deserialiser; benign well-formed
|
||||
//! serialized data does not reach the allowlist boundary and so leaves
|
||||
//! no Deserialize probe.
|
||||
|
||||
pub mod java;
|
||||
pub mod php;
|
||||
pub mod python;
|
||||
pub mod ruby;
|
||||
60
src/dynamic/corpus/deserialize/php.rs
Normal file
60
src/dynamic/corpus/deserialize/php.rs
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
//! PHP `Cap::DESERIALIZE` payloads.
|
||||
//!
|
||||
//! Vuln payload: marker string handed to `unserialize($input)` where the
|
||||
//! harness wraps the call with `['allowed_classes' => false]` and an
|
||||
//! observer on `__wakeup`. When `unserialize` materialises a
|
||||
//! `__PHP_Incomplete_Class` from a non-allowlisted class name, the
|
||||
//! observer emits a `ProbeKind::Deserialize { gadget_chain_invoked:
|
||||
//! true }` probe.
|
||||
//!
|
||||
//! Benign control: serialised primitive (an `int`) that
|
||||
//! `unserialize` materialises without engaging the allowlist boundary.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_GADGET_CLASS:PHP_Object_Injection_RCE",
|
||||
label: "php-unserialize-gadget",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 7,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/deserialize/php/vuln.php"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "php-unserialize-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
// Allow-listed marker — the harness allowlist accepts
|
||||
// `__primitive_int` as a no-op type representing a serialised
|
||||
// integer literal.
|
||||
bytes: b"NYX_GADGET_CLASS:__primitive_int",
|
||||
label: "php-unserialize-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 7,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/deserialize/php/benign.php"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
56
src/dynamic/corpus/deserialize/python.rs
Normal file
56
src/dynamic/corpus/deserialize/python.rs
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
//! Python `Cap::DESERIALIZE` payloads.
|
||||
//!
|
||||
//! Vuln payload: marker string consumed by the harness shim which calls
|
||||
//! `pickle.Unpickler(...).load()` with `find_class` overridden to record
|
||||
//! a `ProbeKind::Deserialize { gadget_chain_invoked: true }` whenever a
|
||||
//! non-allowlisted class is requested. The harness allowlists
|
||||
//! `builtins.list` / `builtins.dict` / `builtins.int`; the marker class
|
||||
//! `nyx.gadget.RCE` is outside that set.
|
||||
//!
|
||||
//! Benign control: payload requests only allow-listed builtins.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_GADGET_CLASS:nyx.gadget.RCE",
|
||||
label: "python-pickle-gadget",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 7,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/deserialize/python/vuln.py"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "python-pickle-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_GADGET_CLASS:builtins.list",
|
||||
label: "python-pickle-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 7,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/deserialize/python/benign.py"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
57
src/dynamic/corpus/deserialize/ruby.rs
Normal file
57
src/dynamic/corpus/deserialize/ruby.rs
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
//! Ruby `Cap::DESERIALIZE` payloads.
|
||||
//!
|
||||
//! Vuln payload: marker string consumed by the harness shim which calls
|
||||
//! `Marshal.load(input)` with `Marshal.const_defined?`-style
|
||||
//! instrumentation that records a `ProbeKind::Deserialize {
|
||||
//! gadget_chain_invoked: true }` probe whenever a non-allowlisted
|
||||
//! constant is materialised. The harness allowlist contains `Integer`
|
||||
//! / `String` / `Array`.
|
||||
//!
|
||||
//! Benign control: marker requests only the allow-listed `Integer`
|
||||
//! constant.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_GADGET_CLASS:Nyx::Gadget::RCE",
|
||||
label: "ruby-marshal-gadget",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 7,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/deserialize/ruby/vuln.rb"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "ruby-marshal-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_GADGET_CLASS:Integer",
|
||||
label: "ruby-marshal-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
|
||||
require_invoked: true,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 7,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/deserialize/ruby/benign.rb"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
56
src/dynamic/corpus/fmt_string/c.rs
Normal file
56
src/dynamic/corpus/fmt_string/c.rs
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
//! Format-string (`Cap::FMT_STRING`) payloads exercised by C fixtures
|
||||
//! (`tests/dynamic_fixtures/c/free_fn/`).
|
||||
//!
|
||||
//! The vuln payload confirms via a sink-site Crash probe rather than an
|
||||
//! output marker. The bytes themselves are not load-bearing — the
|
||||
//! detection contract is "process aborts inside the entry call AFTER the
|
||||
//! crash-guard installs". Fixtures choose how to crash on the payload;
|
||||
//! the canonical example is a `printf`-family sink that interprets `%n`
|
||||
//! against a controlled destination pointer on a guard-compiled binary.
|
||||
//!
|
||||
//! The benign control must reach the same entry without crashing; its
|
||||
//! bytes carry `NYX_BENIGN` so fixture code can short-circuit before
|
||||
//! the fault path.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::SignalSet;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"%n%n%n%n%n%n%n%n%n%n",
|
||||
label: "fmt-string-percent-n-crash",
|
||||
oracle: Oracle::SinkCrash {
|
||||
signals: SignalSet::all(),
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 5,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/c/free_fn/sink_fault.c",
|
||||
"tests/dynamic_fixtures/c/free_fn/setup_fault.c",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "fmt-string-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"benign_safe_fmt_NYX_BENIGN",
|
||||
label: "fmt-string-benign",
|
||||
oracle: Oracle::SinkCrash {
|
||||
signals: SignalSet::all(),
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 5,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/c/free_fn/sink_fault.c"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
3
src/dynamic/corpus/fmt_string/mod.rs
Normal file
3
src/dynamic/corpus/fmt_string/mod.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
//! Format-string (`Cap::FMT_STRING`) per-language payload slices.
|
||||
|
||||
pub mod c;
|
||||
56
src/dynamic/corpus/header_injection/go.rs
Normal file
56
src/dynamic/corpus/header_injection/go.rs
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
//! Go `Cap::HEADER_INJECTION` payloads —
|
||||
//! `http.ResponseWriter.Header().Set` CRLF injection.
|
||||
//!
|
||||
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
|
||||
//! nyx-injected=pwn`. Spliced into the host's `w.Header().Set("Set-
|
||||
//! Cookie", value)` call without CRLF stripping.
|
||||
//!
|
||||
//! Benign control: same logical cookie value pre-encoded with
|
||||
//! `net/url.QueryEscape`. Captured value carries `%0D%0A` so the
|
||||
//! predicate stays clear.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-go-crlf",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/go/vuln.go"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-go-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-go-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/go/benign.go"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
122
src/dynamic/corpus/header_injection/java.rs
Normal file
122
src/dynamic/corpus/header_injection/java.rs
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
//! Java `Cap::HEADER_INJECTION` payloads —
|
||||
//! `HttpServletResponse.setHeader` CRLF injection.
|
||||
//!
|
||||
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
|
||||
//! nyx-injected=pwn`. Concatenated into the host's
|
||||
//! `response.setHeader("Set-Cookie", value)` call without CRLF
|
||||
//! stripping, the wire response carries the attacker's second
|
||||
//! header. The harness's instrumented `setHeader` records a
|
||||
//! `ProbeKind::HeaderEmit { name: "Set-Cookie", value: <raw bytes> }`
|
||||
//! probe with the unescaped CRLF intact.
|
||||
//!
|
||||
//! Benign control: same logical session-id, but the harness's
|
||||
//! benign code path runs the value through `URLEncoder.encode(...,
|
||||
//! "UTF-8")` so the carried bytes become
|
||||
//! `nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn`. The
|
||||
//! captured value has no literal `\r\n`; the
|
||||
//! [`ProbePredicate::HeaderInjected`] predicate stays clear.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-java-crlf",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/java/Vuln.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-java-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-java-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/java/Benign.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
|
||||
// Same CRLF-bearing bytes as the servlet payload above, but pinned
|
||||
// to the `java_raw` fixture (a `java.net.ServerSocket` driven by
|
||||
// `createServer` + `runOnce` that writes raw bytes via
|
||||
// `OutputStream.write(byte[])`). The wire frame captured off the
|
||||
// response socket carries two distinct `Set-Cookie:` lines, so
|
||||
// `HeaderSmuggledInWire { primary: "Set-Cookie", smuggled:
|
||||
// "Set-Cookie" }` fires — proving the smuggled header survived
|
||||
// to the actual wire instead of being CRLF-stripped en route by
|
||||
// Tomcat / Jetty / Undertow.
|
||||
//
|
||||
// Distinct payload (not just an extra predicate on the servlet
|
||||
// row) because every modern Java servlet container response
|
||||
// serializer strips CRLF at the wire-write boundary, so the
|
||||
// wire-frame predicate would never fire against the canonical
|
||||
// servlet fixture.
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-java-raw-wire-smuggle",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/java_raw/Vuln.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-java-raw-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-java-raw-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/java_raw/Vuln.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
114
src/dynamic/corpus/header_injection/js.rs
Normal file
114
src/dynamic/corpus/header_injection/js.rs
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
//! JavaScript `Cap::HEADER_INJECTION` payloads —
|
||||
//! `http.ServerResponse#setHeader` CRLF injection.
|
||||
//!
|
||||
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
|
||||
//! nyx-injected=pwn`. Spliced into the host's
|
||||
//! `res.setHeader('Set-Cookie', value)` call without CRLF stripping.
|
||||
//!
|
||||
//! Benign control: same logical cookie value pre-encoded with
|
||||
//! `encodeURIComponent`. Captured value carries `%0D%0A` so the
|
||||
//! predicate stays clear.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-js-crlf",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/js/vuln.js"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-js-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-js-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/js/benign.js"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
|
||||
// Same CRLF-bearing bytes as the Node payload above, but pinned to
|
||||
// the `js_raw` fixture (a `net.createServer` callback writing raw
|
||||
// bytes via `socket.write`). The wire frame captured off the
|
||||
// response socket carries two distinct `Set-Cookie:` lines, so
|
||||
// `HeaderSmuggledInWire { primary: "Set-Cookie", smuggled:
|
||||
// "Set-Cookie" }` fires — proving the smuggled header survived to
|
||||
// the actual wire instead of being CRLF-stripped en route.
|
||||
//
|
||||
// Distinct payload (not just an extra predicate on the Node row)
|
||||
// because Node's `http.ServerResponse#setHeader` validator strips
|
||||
// CRLF at the wire-write boundary, so the wire-frame predicate
|
||||
// would never fire against the canonical Node fixture. See
|
||||
// `.pitboss/play/deferred.md` (Phase 08 wire-frame option A) for
|
||||
// the framework-level CRLF-strip empirical from session-0018.
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-js-raw-wire-smuggle",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/js_raw/vuln.js"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-js-raw-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-js-raw-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/js_raw/vuln.js"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
31
src/dynamic/corpus/header_injection/mod.rs
Normal file
31
src/dynamic/corpus/header_injection/mod.rs
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
//! HTTP response-header CRLF injection (`Cap::HEADER_INJECTION`)
|
||||
//! per-language payload slices.
|
||||
//!
|
||||
//! Phase 08 (Track J.6) carves header injection across the seven HTTP
|
||||
//! framework ecosystems Nyx supports: Java (`HttpServletResponse.
|
||||
//! setHeader`), Python (`flask.Response.headers.__setitem__`), PHP
|
||||
//! (`header()`), Ruby (`Rack::Response#set_header`), JavaScript
|
||||
//! (`http.ServerResponse#setHeader`), Go (`http.ResponseWriter.
|
||||
//! Header().Set`), Rust (`axum`-style `HeaderMap::insert`). Every
|
||||
//! vuln payload appends a `\r\n` followed by an injected header line
|
||||
//! (`Set-Cookie: nyx-injected=pwn`) — once the host code splices the
|
||||
//! attacker bytes into the response writer's value argument the wire
|
||||
//! actually carries two headers instead of one. The paired benign
|
||||
//! control passes the same logical value through the per-language URL
|
||||
//! encoder so the captured value carries `%0d%0a` (not the raw
|
||||
//! bytes), the encoded text is preserved verbatim inside a single
|
||||
//! header value, and the differential rule stays clear.
|
||||
//!
|
||||
//! The oracle's
|
||||
//! [`crate::dynamic::oracle::ProbePredicate::HeaderInjected`] reads
|
||||
//! the per-payload `ProbeKind::HeaderEmit { name, value }` records
|
||||
//! and fires when the value contains a literal CRLF byte pair —
|
||||
//! vuln passes, benign clears, fulfilling the §4.1 differential rule.
|
||||
|
||||
pub mod go;
|
||||
pub mod java;
|
||||
pub mod js;
|
||||
pub mod php;
|
||||
pub mod python;
|
||||
pub mod ruby;
|
||||
pub mod rust;
|
||||
117
src/dynamic/corpus/header_injection/php.rs
Normal file
117
src/dynamic/corpus/header_injection/php.rs
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
//! PHP `Cap::HEADER_INJECTION` payloads — `header()` CRLF injection.
|
||||
//!
|
||||
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
|
||||
//! nyx-injected=pwn`. Concatenated into the host's `header("Set-
|
||||
//! Cookie: " . $value)` call without CRLF stripping, the wire response
|
||||
//! carries the attacker's second header. The harness's instrumented
|
||||
//! `header()` records a `ProbeKind::HeaderEmit` probe with the
|
||||
//! unescaped CRLF intact.
|
||||
//!
|
||||
//! Benign control: same logical cookie value pre-encoded with PHP's
|
||||
//! `urlencode`. Captured value carries `%0D%0A` so the predicate
|
||||
//! stays clear.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-php-crlf",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/php/vuln.php"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-php-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-php-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/php/benign.php"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
|
||||
// Same CRLF-bearing bytes as the `header()` payload above, but
|
||||
// pinned to the `php_raw` fixture (a `stream_socket_server` driven
|
||||
// by `create_server` + `run_once` that writes raw bytes via
|
||||
// `fwrite($conn, $raw)`). The wire frame captured off the
|
||||
// response socket carries two distinct `Set-Cookie:` lines, so
|
||||
// `HeaderSmuggledInWire { primary: "Set-Cookie", smuggled:
|
||||
// "Set-Cookie" }` fires — proving the smuggled header survived to
|
||||
// the actual wire instead of being CRLF-stripped en route.
|
||||
//
|
||||
// Distinct payload (not just an extra predicate on the `header()`
|
||||
// row) because PHP's built-in `header()` rejects raw CRLF since
|
||||
// 5.1.2 and modern Slim / Laravel / Symfony response serializers
|
||||
// strip CRLF at the wire-write boundary, so the wire-frame
|
||||
// predicate would never fire against the canonical `header()`
|
||||
// fixture.
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-php-raw-wire-smuggle",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/php_raw/vuln.php"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-php-raw-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-php-raw-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/php_raw/vuln.php"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
120
src/dynamic/corpus/header_injection/python.rs
Normal file
120
src/dynamic/corpus/header_injection/python.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
//! Python `Cap::HEADER_INJECTION` payloads —
|
||||
//! `flask.Response.headers.__setitem__` CRLF injection.
|
||||
//!
|
||||
//! Vuln payload: a session cookie value followed by `\r\nSet-Cookie:
|
||||
//! nyx-injected=pwn`. Spliced into the host's
|
||||
//! `response.headers["Set-Cookie"] = value` assignment without CRLF
|
||||
//! stripping, the WSGI layer carries the attacker's second header on
|
||||
//! the wire. The harness's instrumented response writer records a
|
||||
//! `ProbeKind::HeaderEmit { name: "Set-Cookie", value: <raw bytes> }`
|
||||
//! probe with the unescaped CRLF intact.
|
||||
//!
|
||||
//! Benign control: same logical cookie value pre-encoded with
|
||||
//! `urllib.parse.quote`. The carried bytes become
|
||||
//! `nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn` — no literal
|
||||
//! CRLF — and the [`ProbePredicate::HeaderInjected`] predicate stays
|
||||
//! clear.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-python-crlf",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/python/vuln.py"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-python-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-python-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/python/benign.py"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
|
||||
// Same CRLF-bearing bytes as the Flask payload above, but pinned
|
||||
// to the `python_raw` fixture (a `BaseHTTPRequestHandler` writing
|
||||
// raw bytes via `self.wfile.write`). The wire frame captured off
|
||||
// the response socket carries two distinct `Set-Cookie:` lines, so
|
||||
// `HeaderSmuggledInWire { primary: "Set-Cookie", smuggled:
|
||||
// "Set-Cookie" }` fires — proving the smuggled header survived to
|
||||
// the actual wire instead of being CRLF-stripped en route.
|
||||
//
|
||||
// Distinct payload (not just an extra predicate on the Flask row)
|
||||
// because Flask's werkzeug response serializer strips CRLF at the
|
||||
// wire-write boundary, so the wire-frame predicate would never
|
||||
// fire against the canonical Flask fixture. See
|
||||
// `.pitboss/play/deferred.md` (Phase 08 wire-frame option A) for
|
||||
// the framework-level CRLF-strip empirical from session-0018.
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-python-raw-wire-smuggle",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/python_raw/vuln.py"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-python-raw-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-python-raw-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/python_raw/vuln.py"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
114
src/dynamic/corpus/header_injection/ruby.rs
Normal file
114
src/dynamic/corpus/header_injection/ruby.rs
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
//! Ruby `Cap::HEADER_INJECTION` payloads —
|
||||
//! `Rack::Response#set_header` CRLF injection.
|
||||
//!
|
||||
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
|
||||
//! nyx-injected=pwn`. Spliced into the host's
|
||||
//! `response.set_header("Set-Cookie", value)` call without CRLF
|
||||
//! stripping, the wire response carries the attacker's second header.
|
||||
//!
|
||||
//! Benign control: same logical cookie value pre-encoded with
|
||||
//! `URI.encode_www_form_component`. Captured value carries `%0D%0A`
|
||||
//! so the predicate stays clear.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-ruby-crlf",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/ruby/vuln.rb"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-ruby-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-ruby-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/ruby/benign.rb"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
|
||||
// Same CRLF-bearing bytes as the Rack payload above, but pinned to
|
||||
// the `ruby_raw` fixture (a `TCPServer` driven by `create_server`
|
||||
// + `run_once` that writes raw bytes via `TCPSocket#write`). The
|
||||
// wire frame captured off the response socket carries two
|
||||
// distinct `Set-Cookie:` lines, so `HeaderSmuggledInWire { primary:
|
||||
// "Set-Cookie", smuggled: "Set-Cookie" }` fires — proving the
|
||||
// smuggled header survived to the actual wire instead of being
|
||||
// CRLF-stripped en route.
|
||||
//
|
||||
// Distinct payload (not just an extra predicate on the Rack row)
|
||||
// because Rack / Sinatra / Rails response serializers strip CRLF
|
||||
// at the wire-write boundary, so the wire-frame predicate would
|
||||
// never fire against the canonical Rack fixture.
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-ruby-raw-wire-smuggle",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/ruby_raw/vuln.rb"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-ruby-raw-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-ruby-raw-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/ruby_raw/vuln.rb"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
116
src/dynamic/corpus/header_injection/rust.rs
Normal file
116
src/dynamic/corpus/header_injection/rust.rs
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
//! Rust `Cap::HEADER_INJECTION` payloads — `axum`-style
|
||||
//! `HeaderMap::insert` CRLF injection.
|
||||
//!
|
||||
//! Vuln payload: a cookie value followed by `\r\nSet-Cookie:
|
||||
//! nyx-injected=pwn`. Spliced into a hand-rolled `HeaderMap` insert
|
||||
//! that bypasses the `HeaderValue::from_str` validity check (e.g.
|
||||
//! `HeaderValue::from_bytes(...).unwrap()` over a tainted slice).
|
||||
//!
|
||||
//! Benign control: same logical cookie value pre-encoded with the
|
||||
//! `percent-encoding` crate. Captured value carries `%0D%0A` so the
|
||||
//! predicate stays clear.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-rust-crlf",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/rust/vuln.rs"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-rust-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-rust-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderInjected {
|
||||
header_name: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/rust/benign.rs"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
// Phase 08 tier-(b): raw-socket wire-frame smuggling payload.
|
||||
// Same CRLF-bearing bytes as the axum payload above, but pinned to
|
||||
// the `rust_raw` fixture (a `std::net::TcpListener` driven by
|
||||
// `create_server` + `run_once` that writes raw bytes via
|
||||
// `TcpStream::write_all`). The wire frame captured off the
|
||||
// response socket carries two distinct `Set-Cookie:` lines, so
|
||||
// `HeaderSmuggledInWire { primary: "Set-Cookie", smuggled:
|
||||
// "Set-Cookie" }` fires — proving the smuggled header survived to
|
||||
// the actual wire instead of being CRLF-stripped en route.
|
||||
//
|
||||
// Distinct payload (not just an extra predicate on the axum row)
|
||||
// because every framework's response serializer strips CRLF at
|
||||
// the wire-write boundary, so the wire-frame predicate would
|
||||
// never fire against the canonical axum fixture. See
|
||||
// `.pitboss/play/deferred.md` (Phase 08 wire-frame option A) for
|
||||
// the framework-level CRLF-strip empirical from session-0018.
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session\r\nSet-Cookie: nyx-injected=pwn",
|
||||
label: "header-injection-rust-raw-wire-smuggle",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/rust_raw/vuln.rs"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "header-injection-rust-raw-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn",
|
||||
label: "header-injection-rust-raw-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::HeaderSmuggledInWire {
|
||||
primary: "Set-Cookie",
|
||||
smuggled: "Set-Cookie",
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 12,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/header_injection/rust_raw/vuln.rs"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
54
src/dynamic/corpus/json_parse/go.rs
Normal file
54
src/dynamic/corpus/json_parse/go.rs
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
//! Go `Cap::JSON_PARSE` payloads.
|
||||
//!
|
||||
//! The depth pair shares a single fixture; the payload tag
|
||||
//! (`NYX_JSON_DEEP` vs `NYX_JSON_SHALLOW`) picks the branch. Go has
|
||||
//! no prototype-pollution surface so the canary half of the slice is
|
||||
//! intentionally omitted.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const MAX_DEPTH: u32 = 64;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_JSON_DEEP",
|
||||
label: "json-parse-go-depth-bomb",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::JsonParseExcessiveDepth {
|
||||
max_depth: MAX_DEPTH,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/json_parse_depth/go/vuln.go"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::JsonParseExcessiveDepth {
|
||||
max_depth: MAX_DEPTH,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "json-parse-go-depth-shallow",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_JSON_SHALLOW",
|
||||
label: "json-parse-go-depth-shallow",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::JsonParseExcessiveDepth {
|
||||
max_depth: MAX_DEPTH,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/json_parse_depth/go/vuln.go"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
59
src/dynamic/corpus/json_parse/java.rs
Normal file
59
src/dynamic/corpus/json_parse/java.rs
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
//! Java `Cap::JSON_PARSE` payloads.
|
||||
//!
|
||||
//! The depth pair shares a single fixture; the payload tag
|
||||
//! (`NYX_JSON_DEEP` vs `NYX_JSON_SHALLOW`) picks the branch. Java has
|
||||
//! no prototype-pollution surface so the canary half of the slice is
|
||||
//! intentionally omitted, matching the PHP / Go / Rust shape.
|
||||
//!
|
||||
//! Java has no stdlib JSON parser, so the harness ships a hand-rolled
|
||||
//! iterative JSON walker as a sibling class (`NyxJsonProbe.java`); the
|
||||
//! fixture calls `NyxJsonProbe.parse(text)` in place of any Jackson /
|
||||
//! Gson dependency so the build path never reaches for an external jar.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
const MAX_DEPTH: u32 = 64;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_JSON_DEEP",
|
||||
label: "json-parse-java-depth-bomb",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::JsonParseExcessiveDepth {
|
||||
max_depth: MAX_DEPTH,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/json_parse_depth/java/Vuln.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::JsonParseExcessiveDepth {
|
||||
max_depth: MAX_DEPTH,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "json-parse-java-depth-shallow",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: b"NYX_JSON_SHALLOW",
|
||||
label: "json-parse-java-depth-shallow",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::JsonParseExcessiveDepth {
|
||||
max_depth: MAX_DEPTH,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &["tests/dynamic_fixtures/json_parse_depth/java/Vuln.java"],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue