docs: update inline references and improve XSS detection in Java servlet writers, refactor matchers for clarity and extend sanitizer support

This commit is contained in:
elipeter 2026-06-03 11:32:30 -05:00
parent c2cd6f009e
commit eb4332edb5
56 changed files with 339 additions and 144 deletions

View file

@ -304,7 +304,16 @@ PY
|| { echo " FAIL: wall-clock exceeds budget"; return 1; }
echo "[]" > "${results_report}"
# --static buckets a command-injection finding that carries only the
# SHELL_ESCAPE sink cap (the static, unconfirmed cmdi class for every
# language) as `cmdi` instead of `other`. Without a dynamic Confirm the
# SHELL_ESCAPE→CODE_EXEC remap never runs (Java servlet harnesses build-
# fail in CI), so the default lens leaves every cmdi finding in `other`
# and reads the cmdi cell as 0/0/N; the static lens is the correct
# bucketing for an unconfirmed scan and is appended at lowest priority so
# no higher-priority cap cell changes.
python3 "${REPO_ROOT}/tests/eval_corpus/tabulate.py" \
--static \
--label owasp \
--scan "${scan_report}" \
--ground-truth "${REPO_ROOT}/tests/eval_corpus/ground_truth/owasp_benchmark_v1.2.json" \
@ -416,7 +425,13 @@ PY
|| { echo " FAIL: ${label} wall-clock exceeds budget"; return 1; }
echo "[]" > "${results_report}"
# --static: bucket SHELL_ESCAPE-only command-injection findings as `cmdi`
# (see the Gate 6 note) so the per-cap table reflects the engine's real
# static classification in CI where no dynamic Confirm runs the
# SHELL_ESCAPE→CODE_EXEC remap. Appended at lowest priority; no other cap
# cell changes.
local -a tabulate_args=(
--static
--label "${label}"
--scan "${scan_report}"
--ground-truth "${gt}"

View file

@ -1006,6 +1006,7 @@ fn is_test_suppressible_pattern(id: &str) -> bool {
|| id.ends_with(".crypto.math_random")
|| id.ends_with(".crypto.insecure_random")
|| id.ends_with(".crypto.weak_digest")
|| id.ends_with(".crypto.weak_algorithm")
|| id.ends_with(".crypto.md5")
|| id.ends_with(".crypto.sha1")
|| id.ends_with(".crypto.rand")

View file

@ -180,6 +180,109 @@ fn ssa_all_sink_operands_const_or_param(ctx: &AnalysisContext, sink: NodeIndex)
args_ok && receiver_ok
}
/// Suppress a `cfg-unguarded-sink` finding when the sink restricts its
/// injection payload to specific argument positions (`sink_payload_args`)
/// and every operand at those positions resolves to a concrete constant.
///
/// The flat [`is_all_args_constant`] check inspects *every* operand, so a
/// safe parameterised call like Go's
/// `db.QueryContext(context.Background(), "SELECT … $1", bind)` is wrongly
/// rejected: only arg 1 (the SQL string, `payload_args = [1]`) can carry an
/// injection, yet the non-payload `context.Background()` call and the
/// positional bind value are non-constant operands that defeat the
/// all-operands test. The taint engine already honours the payload-arg
/// gate (no `taint-unsanitised-flow` fires), so under `!has_taint` a sink
/// whose payload positions are all literals is safe by construction.
fn sink_payload_args_const(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
let payload_positions = match &ctx.cfg[sink].call.sink_payload_args {
Some(p) if !p.is_empty() => p,
_ => return false,
};
let Some(facts) = ctx.body_const_facts else {
return false;
};
let Some(&sink_val) = facts.ssa.cfg_node_map.get(&sink) else {
return false;
};
let Some(inst) = find_inst(&facts.ssa, sink_val) else {
return false;
};
let SsaOp::Call { args, .. } = &inst.op else {
return false;
};
// Every payload-position operand must resolve to a concrete literal. A
// payload position outside the recorded arg list cannot be proven safe.
payload_positions.iter().all(|&pos| match args.get(pos) {
Some(group) => group.iter().all(|v| {
matches!(
facts.const_values.get(v),
Some(
ConstLattice::Str(_)
| ConstLattice::Int(_)
| ConstLattice::Bool(_)
| ConstLattice::Null
)
)
}),
None => false,
})
}
/// Suppress a `cfg-unguarded-sink` SSRF finding when the sink's URL operand
/// is origin-locked: it is the result of a `new URL(path, base)` /
/// `urljoin(base, path)` / `url.JoinPath(base, …)` builder whose base
/// argument pins the scheme+host, so the (attacker-controlled) path
/// component cannot redirect the request off the locked origin.
///
/// Mirrors the taint engine's `StringFact::from_url_with_base` prefix-lock
/// (`url_builder_arg_indices` + `is_string_safe_for_ssrf`): the taint engine
/// stays silent on this shape, so the parallel structural finding is a false
/// positive. The base is recognised as either a string literal recorded on
/// the builder node (`arg_string_literals[base_idx]`) or a const-bound
/// identifier whose SSA operand resolves to a concrete string.
fn sink_url_origin_locked(ctx: &AnalysisContext, sink: NodeIndex, sink_caps: Cap) -> bool {
if !sink_caps.contains(Cap::SSRF) {
return false;
}
let sink_info = &ctx.cfg[sink];
let sink_func = sink_info.ast.enclosing_func.as_deref();
// CFG one-hop trace (mirrors `is_all_args_constant`): the SSA
// `cfg_node_map` only covers the body whose facts are attached to `ctx`,
// so for a sink inside a nested function (e.g. an Express arrow handler)
// the SSA path misses it. Walk the CFG instead: for every variable the
// sink uses, find its defining node in the same function and test whether
// that definition is an origin-locking URL builder.
sink_info.taint.uses.iter().any(|u| {
ctx.cfg.node_indices().any(|idx| {
let info = &ctx.cfg[idx];
if info.ast.enclosing_func.as_deref() != sink_func {
return false;
}
if info.taint.defines.as_deref() != Some(u.as_str()) {
return false;
}
// `info` defines `u`. Is it `new URL(path, base)` / `urljoin` /
// `JoinPath` with a string-literal base pinning scheme+host?
let Some(callee) = info.call.callee.as_deref() else {
return false;
};
let Some((_path_idx, base_idx)) = crate::ssa::type_facts::url_builder_arg_indices(
ctx.lang,
callee,
info.call.outer_callee.as_deref(),
info.call.is_constructor,
) else {
return false;
};
info.call
.arg_string_literals
.get(base_idx)
.and_then(|s| s.as_deref())
.is_some()
})
})
}
/// Return true if the SSA body contains a *named* variable whose definition
/// is a constant, the SSA signature of an explicit `name = "literal"`
/// reassignment. Used as the gate for the broader operand-Param suppression:
@ -2810,6 +2913,29 @@ impl CfgAnalysis for UnguardedSink {
continue;
}
// Payload-arg-gated sinks (e.g. Go `db.QueryContext(ctx, sql,
// ...binds)`, `payload_args = [1]`): only the payload positions can
// carry an injection. When the taint engine is already silent
// (`!has_taint`) and every payload-position operand is a constant
// literal, the non-payload operands (a `context.Context`, bind
// values) cannot make the call dangerous, so the structural finding
// is a false positive even though `is_all_args_constant` rejects it.
if !has_taint && sink_payload_args_const(ctx, *sink) {
continue;
}
// Origin-locked URL SSRF sinks (`fetch(new URL(path, "https://…"))`):
// the builder's literal base pins scheme+host, so the
// attacker-controlled path cannot redirect off-origin. The taint
// engine already suppresses this via the abstract prefix-lock, so
// the parallel structural finding is a false positive. NOT gated
// on `!has_taint`: the origin lock holds precisely *because* the
// tainted path reaches the builder — the host stays fixed — so the
// syntactic taint-reaches signal must not re-open the finding.
if sink_url_origin_locked(ctx, *sink, sink_caps) {
continue;
}
// SSA latest-def suppression: when the taint engine has already
// proved no source-tainted data reaches this sink (`!has_taint`)
// and every SSA operand resolves to a constant, callee-fragment

View file

@ -3,9 +3,9 @@
//! Each call to [`findings_to_edges`] emits exactly one [`ChainEdge`]
//! per input finding. The edge is *typed* by:
//!
//! - the primary [`Cap`] bit picked from [`Evidence::sink_caps`]
//! - the primary [`Cap`] bit picked from [`Evidence::sink_caps`](crate::evidence::Evidence::sink_caps)
//! (the lowest-bit set, chosen deterministically), and
//! - the *reach* — the surface [`EntryPoint`] in the same file as the
//! - the *reach* — the surface [`EntryPoint`](crate::surface::EntryPoint) in the same file as the
//! finding, when one exists, otherwise [`Reach::Unreachable`].
//!
//! Phase 25's path search composes these edges with the SurfaceMap's
@ -35,7 +35,7 @@ pub struct FindingRef {
pub location: SourceLocation,
/// Rule identifier (`Diag::id`).
pub rule_id: String,
/// Resolved sink cap bits ([`Evidence::sink_caps`]).
/// Resolved sink cap bits ([`Evidence::sink_caps`](crate::evidence::Evidence::sink_caps)).
pub cap_bits: u32,
}

View file

@ -226,7 +226,7 @@ fn standalone_lookup(cap: Cap) -> Option<ImpactCategory> {
/// first rule in [`IMPACT_LATTICE`] order (specific before fallback).
///
/// The standalone-rule walks (second + third pass) are O(1) via
/// [`STANDALONE_BY_BIT`]. The two-cap walk (first pass) stays linear
/// `STANDALONE_BY_BIT`. The two-cap walk (first pass) stays linear
/// because the 2-cap subset is small (today: three rules); promote
/// to a sorted-pair binary search if the lattice grows past ~16
/// pair-rules.

View file

@ -15,7 +15,7 @@
//!
//! Two parallel `Vec`s — `nodes` and `edges` — mirroring `SurfaceMap`'s
//! shape. Determinism is the caller's responsibility: edges are
//! produced in the order the source [`Diag`] slice presents, and
//! produced in the order the source [`Diag`](crate::commands::scan::Diag) slice presents, and
//! `findings_to_edges` does not sort the input. Phase 25 will fold
//! these into a `petgraph::DiGraph` for path search.
//!

View file

@ -7,7 +7,7 @@
//! ```
//!
//! The DFS starts at the implicit attacker node (virtually adjacent to
//! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`]
//! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`](ChainSearchConfig::max_depth)
//! per-finding hops, and terminates at any
//! [`crate::surface::DangerousLocal`] node. Each emitted
//! [`ChainFinding`] is the deterministic minimum-length path through a

View file

@ -1,6 +1,6 @@
//! Long-lived `javac` daemon (Phase 22 / Track O.0).
//!
//! The legacy [`crate::dynamic::build_sandbox::try_compile_java`] shell-execs a
//! The legacy `try_compile_java_with_toolchain` in `build_sandbox` shell-execs a
//! fresh `javac` per harness — every invocation pays the JVM cold-start tax
//! (~700ms on the macOS reference machine, ~300ms on Linux CI). At 50
//! findings per OWASP-scale run that single line burns > 30s before any

View file

@ -87,7 +87,7 @@ const POOL_ENABLED_LANGS: &[&str] = &[
///
/// Format is a comma-separated list of `lang=bit` entries: `java=1,node=0`.
/// A missing language returns the default: `true` for every language that
/// ships a pool (see [`POOL_ENABLED_LANGS`]), `false` otherwise.
/// ships a pool (see `POOL_ENABLED_LANGS`), `false` otherwise.
pub fn is_pool_enabled(lang: &str) -> bool {
let default = POOL_ENABLED_LANGS.contains(&lang);
let raw = match std::env::var("NYX_DYNAMIC_BUILD_POOL") {

View file

@ -1783,7 +1783,7 @@ pub struct ChainStepBuildResult {
/// so a `Vec<HarnessSpec>` can be driven through the build pipeline
/// without per-language match arms scattered across each caller. The
/// production single-finding runner stays on the per-language match in
/// [`crate::dynamic::runner::execute`] because it folds the build result
/// [`crate::dynamic::runner::run_spec`] because it folds the build result
/// into command-vector rewrites that vary per language and have no
/// uniform shape — the chain reverifier does not need those rewrites
/// because the sandbox-run sub-task ((c) of Phase 26 follow-up) will

View file

@ -30,12 +30,12 @@
//! Adding a new language for a cap means: drop a new file under
//! `corpus/<cap>/<lang>.rs`, register `pub mod <lang>;` in the cap's
//! `mod.rs`, and wire `(Cap::<CAP>, Lang::<Lang>, <cap>::<lang>::PAYLOADS)`
//! into [`registry::ENTRIES`]. No other file needs to change.
//! into `registry::ENTRIES`. No other file needs to change.
//!
//! # Corpus governance (§16.1)
//!
//! Every payload carries [`PayloadProvenance`], a [`since_corpus_version`],
//! and at least one [`fixture_paths`] entry. The [`CORPUS_VERSION`] const
//! Every payload carries [`PayloadProvenance`], a [`CuratedPayload::since_corpus_version`],
//! and at least one [`CuratedPayload::fixture_paths`] entry. The [`CORPUS_VERSION`] const
//! tracks the history of incompatible corpus changes; bumping it
//! invalidates all `dynamic_verdict_cache` entries whose spec touched the
//! changed cap.
@ -171,9 +171,9 @@ pub struct CuratedPayload {
/// [`crate::dynamic::probe::SinkProbe`] records drained from the run's
/// probe channel (Phase 06 — Track C.1). Always populated; empty when
/// the payload still relies on the legacy
/// [`Oracle::OutputContains`](crate::dynamic::oracle::Oracle::OutputContains)
/// [`Oracle::OutputContains`]
/// path and has not been migrated to
/// [`Oracle::SinkProbe`](crate::dynamic::oracle::Oracle::SinkProbe) yet.
/// [`Oracle::SinkProbe`] yet.
pub probe_predicates: &'static [ProbePredicate],
/// Paired benign-control payload inside the same cap's slice.
///

View file

@ -12,7 +12,7 @@
//! whenever a maintainer forgets to wire a paired benign entry.
//!
//! 2. **Cap coverage is exhaustive.** The set of caps appearing in
//! [`CORPUS::entries`] OR [`CORPUS_UNSUPPORTED_LANG_NEUTRAL`] must
//! [`CORPUS`]'s [`entries`](super::CapCorpus::entries) OR [`CORPUS_UNSUPPORTED_LANG_NEUTRAL`] must
//! equal [`Cap::all`]. Adding a new `Cap` bit without classifying it
//! fails the build.
//!

View file

@ -36,7 +36,7 @@ use crate::symbol::Lang;
/// Caps with no payloads of their own — source-only sources, sanitizers,
/// and sinks we cannot yet model with a reliable oracle. The
/// [`super::audit`] module asserts that the union of caps covered by
/// [`CORPUS::entries`] and this constant equals [`Cap::all`].
/// [`CORPUS`]'s [`entries`](CapCorpus::entries) and this constant equals [`Cap::all`].
///
/// Phase 11 (Track J.9) carved `CRYPTO`, `JSON_PARSE`,
/// `UNAUTHORIZED_ID`, and `DATA_EXFIL` corpora; the remaining caps

View file

@ -20,7 +20,7 @@
//! specialisation of [`evaluate_sets`] and delegates to it.
//!
//! "Fires" means [`crate::dynamic::oracle::oracle_fired`] returned `true`
//! against the run's [`SandboxOutcome`] + drained [`SinkProbe`] set —
//! against the run's [`SandboxOutcome`](crate::dynamic::sandbox::SandboxOutcome) + drained [`SinkProbe`] set —
//! invariant across `Oracle::OutputContains` and `Oracle::SinkProbe`.
use crate::dynamic::probe::SinkProbe;

View file

@ -115,7 +115,7 @@ pub fn derive_secret(spec_hash: &str, env_var_name: &str) -> SecretValue {
/// | Ruby | `ENV["X"]`, `ENV.fetch("X")` |
/// | C/C++ | `getenv("X")` |
///
/// Static substring scan — bounded by [`IMPORT_SCAN_LIMIT`] like the import
/// Static substring scan — bounded by `IMPORT_SCAN_LIMIT` like the import
/// extractor. No AST: an entry-file with `os.environ.get(some_var)` (a
/// non-literal arg) is intentionally skipped; the secret bag is populated
/// from literal references only so a typo cannot produce noisy injection.
@ -367,7 +367,7 @@ pub struct CapturedDeps {
/// add the package-manager deps required when the real import is present.
pub framework_adapter: Option<String>,
/// Three-valued lang-has-framework signal (see
/// [`FrameworkContext::lang_has_web_framework`]).
/// [`FrameworkContext::lang_has_web_framework`](crate::utils::project::FrameworkContext::lang_has_web_framework)).
pub framework_signal: Option<bool>,
/// Absolute paths of local config files reachable from the entry
/// point's directory. Each is copied verbatim into the workdir
@ -380,7 +380,7 @@ pub struct CapturedDeps {
/// Manifest files (lockfile + project manifest pair) recognised for
/// [`Self::toolchain`]'s language. Each entry is an absolute path
/// inside `project_root`; the first existing entry from
/// [`MANIFEST_FILES_BY_LANG`] wins for [`Self::lockfile`].
/// `MANIFEST_FILES_BY_LANG` wins for [`Self::lockfile`].
pub manifests: Vec<PathBuf>,
/// First recognised manifest file (== `manifests[0]` when present).
/// Used by the per-language emitter as the canonical lockfile when

View file

@ -12,7 +12,7 @@
//! order of [`super::FrameworkAdapter::name`]. The lexical ordering
//! gives a deterministic first-match result that survives merges /
//! rebases without subtle re-ordering bugs. A `framework` unit test
//! ([`super::tests::registry_is_empty_for_every_lang_phase_01`])
//! (`registry_is_empty_for_every_lang_phase_01`)
//! captures the Phase-01 starting baseline so a phase that registers
//! its first adapter is forced to update both the slice *and* the
//! regression guard in the same change.

View file

@ -9,7 +9,7 @@
//! (`__NYX_SINK_HIT__` sentinel on stdout).
//! 5. Lets the sink either fire or not — the oracle observes from outside.
//!
//! One generator per [`Lang`]. Each emits source plus a build command.
//! One generator per [`Lang`](crate::symbol::Lang). Each emits source plus a build command.
//! Build artefacts are staged inside the sandbox working dir, never the
//! user's tree.

View file

@ -1,7 +1,7 @@
//! C harness emitter.
//!
//! Phase 16 (Track B Rust + C/C++ vertical) replaces the stub body with
//! dispatch over [`CShape`] — the cross product of [`EntryKind`] and a
//! dispatch over [`CShape`] — the cross product of [`EntryKind`](crate::dynamic::spec::EntryKind) and a
//! lightweight per-file shape detector that inspects the entry file for
//! `main(int argc, char *argv[])`, libFuzzer's `LLVMFuzzerTestOneInput`,
//! and free functions with `(const char*, size_t)` signatures.

View file

@ -1,7 +1,7 @@
//! Go harness emitter.
//!
//! Phase 15 (Track B Go vertical) replaces the single legacy `emit` body
//! with dispatch over [`GoShape`] — the cross product of [`EntryKind`]
//! with dispatch over [`GoShape`] — the cross product of [`EntryKind`](crate::dynamic::spec::EntryKind)
//! and a lightweight per-file shape detector that inspects the entry
//! file for `net/http` handler signatures, gin context handlers,
//! `flag.Parse` CLIs, and `func(args ...) error` fuzz harnesses.
@ -312,7 +312,7 @@ fn read_entry_source(entry_file: &str) -> String {
/// Phase 09 — Track D.2: synthesise a `go.mod` listing every captured
/// third-party import path. Standard-library imports are skipped via
/// [`is_go_stdlib`].
/// `is_go_stdlib`.
pub fn materialize_go(env: &Environment) -> RuntimeArtifacts {
let mut artifacts = RuntimeArtifacts::new();
let go_version = env

View file

@ -2,7 +2,7 @@
//!
//! Phase 14 (Track B Java vertical) replaces the single legacy `emit`
//! body with dispatch over [`JavaShape`] — the cross product of
//! [`EntryKind`] and a lightweight per-file shape detector that inspects
//! [`EntryKind`](crate::dynamic::spec::EntryKind) and a lightweight per-file shape detector that inspects
//! the entry file for servlet / Spring / Quarkus annotations, JUnit
//! markers, and `static main(String[])` signatures.
//!
@ -200,10 +200,10 @@ impl JavaShape {
/// pass an empty string and the function returns
/// [`Self::StaticMethod`]).
///
/// Framework / annotation detection wins over the [`EntryKind`]
/// Framework / annotation detection wins over the [`EntryKind`](crate::dynamic::spec::EntryKind)
/// axis: when the source clearly imports a servlet or Spring
/// controller the shape is selected even if the spec derivation
/// pipeline tagged the entry kind as [`EntryKind::Function`].
/// pipeline tagged the entry kind as [`EntryKind::Function`](crate::dynamic::spec::EntryKind::Function).
pub fn detect(spec: &HarnessSpec, source: &str) -> Self {
let entry = spec.entry_name.as_str();
let kind = spec.entry_kind.tag();
@ -1273,7 +1273,7 @@ public class NyxHarness {{
/// template, and dispatches the resulting filter against the
/// in-sandbox LDAP stub via `javax.naming.directory.InitialDirContext`
/// over the real LDAPv3 BER wire (the stub's accept loop at
/// [`crate::dynamic::stubs::ldap_server::accept_loop`] auto-detects
/// `crate::dynamic::stubs::ldap_server::accept_loop` auto-detects
/// the `0x30 SEQUENCE` lead byte and routes through the BER
/// reader/writer at [`crate::dynamic::stubs::ldap_ber`]). Falls back
/// to an in-process RFC 4515 subset matcher against three canonical
@ -2417,7 +2417,7 @@ public class NyxHarness {{
/// tree without pulling Jackson / Gson onto the classpath. The
/// fixture calls `NyxJsonProbe.parse(text)` in place of any library
/// JSON parser. When the parser's own
/// [`NyxJsonProbe.NyxJsonDepthException`] fires (nesting above
/// `NyxJsonProbe.NyxJsonDepthException` fires (nesting above
/// `MAX_PARSE_DEPTH = 4096`) the harness emits a `JsonParse { depth:
/// 0, excessive_depth: true }` probe before continuing — matches the
/// PHP `JSON_ERROR_DEPTH` and Python `RecursionError` excess paths.

View file

@ -20,7 +20,7 @@
//! The bundle ships both `javax.servlet` and `jakarta.servlet` so source
//! files predating the EE 9 rename and source files using the new
//! namespace both link. Each stub is generated from the same template via
//! [`make_servlet_stubs`] so the two trees stay in sync.
//! `make_servlet_stubs` so the two trees stay in sync.
/// Stub bundle for the servlet-shape Java harnesses.
///

View file

@ -3,16 +3,16 @@
//! After Phase 13 (Track B JS + TS vertical) the per-shape dispatch lives in
//! [`crate::dynamic::lang::js_shared`]. This module is the typed surface for
//! `Lang::JavaScript`: registers the [`JavaScriptEmitter`] in the dispatch
//! table, advertises the supported [`EntryKind`] set, and forwards
//! table, advertises the supported [`EntryKind`](crate::dynamic::spec::EntryKind) set, and forwards
//! `emit` / `materialize_runtime` calls to the shared module.
//!
//! Payload slot support (handled by `js_shared::emit`):
//! - [`PayloadSlot::Param`] — n-th positional argument.
//! - [`PayloadSlot::EnvVar`] — set env var before calling.
//! - [`PayloadSlot::Stdin`] — pipe payload to `process.stdin`.
//! - [`PayloadSlot::QueryParam`] — HTTP-shaped query param (Express / Koa / Next).
//! - [`PayloadSlot::HttpBody`] — HTTP body (Express / Koa / Next).
//! - [`PayloadSlot::Argv`] — coerced to positional `Param(0)` by build_call.
//! - [`PayloadSlot::Param`](crate::dynamic::spec::PayloadSlot::Param) — n-th positional argument.
//! - [`PayloadSlot::EnvVar`](crate::dynamic::spec::PayloadSlot::EnvVar) — set env var before calling.
//! - [`PayloadSlot::Stdin`](crate::dynamic::spec::PayloadSlot::Stdin) — pipe payload to `process.stdin`.
//! - [`PayloadSlot::QueryParam`](crate::dynamic::spec::PayloadSlot::QueryParam) — HTTP-shaped query param (Express / Koa / Next).
//! - [`PayloadSlot::HttpBody`](crate::dynamic::spec::PayloadSlot::HttpBody) — HTTP body (Express / Koa / Next).
//! - [`PayloadSlot::Argv`](crate::dynamic::spec::PayloadSlot::Argv) — coerced to positional `Param(0)` by build_call.
use crate::dynamic::environment::{Environment, RuntimeArtifacts};
use crate::dynamic::lang::{

View file

@ -132,7 +132,7 @@ pub trait LangEmitter {
/// Build a harness source bundle for `spec`.
fn emit(&self, spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason>;
/// The set of [`EntryKind`] variants this emitter understands,
/// The set of [`EntryKind`](crate::dynamic::spec::EntryKind) variants this emitter understands,
/// projected to the [`EntryKindTag`] discriminant so the slice can
/// live in `'static` storage even after Phase 18 extended
/// `EntryKind` with data-bearing variants.

View file

@ -2,7 +2,7 @@
//!
//! Phase 15 (Track B PHP vertical) replaces the single legacy `emit`
//! body with dispatch over [`PhpShape`] — the cross product of
//! [`EntryKind`] and a lightweight per-file shape detector that
//! [`EntryKind`](crate::dynamic::spec::EntryKind) and a lightweight per-file shape detector that
//! inspects the entry file for Slim/Laravel/Symfony route closures,
//! `$argv`-driven CLI scripts, and top-level script bodies.
//!
@ -856,7 +856,7 @@ echo json_encode(["entity_expanded" => $expanded]) . "\n";
/// Reads `NYX_PAYLOAD`, splices it into a `(uid=<payload>)` filter,
/// and — when `NYX_LDAP_ENDPOINT` is set — routes the search through
/// the in-sandbox LDAP stub over the real LDAPv3 BER wire (the stub's
/// accept loop at [`crate::dynamic::stubs::ldap_server::accept_loop`]
/// accept loop at `crate::dynamic::stubs::ldap_server::accept_loop`
/// auto-detects the `0x30 SEQUENCE` lead byte and routes through the
/// reader/writer at [`crate::dynamic::stubs::ldap_ber`]). Falls back
/// to an in-process RFC 4515 subset matcher against three canonical

View file

@ -2,7 +2,7 @@
//!
//! Phase 12 (Track B Python vertical) replaces the single legacy
//! `emit` body with dispatch over [`PythonShape`] — the cross product of
//! [`EntryKind`] and a lightweight per-file shape detector that inspects
//! [`EntryKind`](crate::dynamic::spec::EntryKind) and a lightweight per-file shape detector that inspects
//! the entry file for framework decorators / CLI gates / async / pytest
//! conventions. Each shape returns its own [`HarnessSource`] but shares
//! the Phase 06 probe shim ([`probe_shim`]) and payload prelude so the
@ -14,7 +14,7 @@
//! positionally with the payload). The dispatch never returns an
//! emitter-side error for an unknown shape — that responsibility belongs
//! to `lang::emit`, which has already gated on
//! [`EntryKind`] via [`PythonEmitter::entry_kinds_supported`].
//! [`EntryKind`](crate::dynamic::spec::EntryKind) via [`PythonEmitter::entry_kinds_supported`].
//!
//! Payload slot support:
//! - [`PayloadSlot::Param`] — n-th positional argument.
@ -176,10 +176,10 @@ impl PythonShape {
/// pass an empty string and the function returns [`Self::Generic`]).
///
/// Framework detection (Flask / FastAPI / Django) wins over the
/// [`EntryKind`] axis: when the source clearly imports one of those
/// [`EntryKind`](crate::dynamic::spec::EntryKind) axis: when the source clearly imports one of those
/// frameworks the route shape is selected even if the spec
/// derivation pipeline tagged the entry kind as
/// [`EntryKind::Function`]. This makes the dispatcher robust
/// [`EntryKind::Function`](crate::dynamic::spec::EntryKind::Function). This makes the dispatcher robust
/// against the synthetic flow-step path used by tests and against
/// the legacy substring-only entry-kind heuristic.
pub fn detect(spec: &HarnessSpec, source: &str) -> Self {
@ -2616,7 +2616,7 @@ if __name__ == "__main__":
/// Reads `NYX_PAYLOAD`, splices it into a `(uid=<payload>)` filter,
/// and — when `NYX_LDAP_ENDPOINT` is set — routes the search through
/// the in-sandbox LDAP stub over the real LDAPv3 BER wire (the stub's
/// accept loop at [`crate::dynamic::stubs::ldap_server::accept_loop`]
/// accept loop at `crate::dynamic::stubs::ldap_server::accept_loop`
/// auto-detects the `0x30 SEQUENCE` lead byte and routes through the
/// reader/writer at [`crate::dynamic::stubs::ldap_ber`]). Falls back
/// to an in-process RFC 4515 subset matcher against three canonical

View file

@ -2,7 +2,7 @@
//!
//! Phase 15 (Track B Ruby vertical) replaces the previous `LangUnsupported`
//! stub with dispatch over [`RubyShape`] — the cross product of
//! [`EntryKind`] and a lightweight per-file shape detector that inspects
//! [`EntryKind`](crate::dynamic::spec::EntryKind) and a lightweight per-file shape detector that inspects
//! the entry file for Sinatra routes, Rails controller actions, Hanami
//! actions, Rack middleware, and generic controller methods.
//!

View file

@ -2671,7 +2671,7 @@ fn is_ident_char(ch: char) -> bool {
/// - Other caps use only std (no extra deps).
///
/// `libc` is always pinned because the Phase 16 probe shim (spliced into
/// `src/main.rs` by [`generate_main_rs`]) calls `libc::sigaction` from
/// `src/main.rs` by `generate_main_rs`) calls `libc::sigaction` from
/// `__nyx_install_crash_guard`. The shim is unconditionally compiled so
/// the dep must be unconditional too.
pub fn generate_cargo_toml(cap: Cap) -> String {

View file

@ -78,7 +78,7 @@ impl OobListener {
/// URL to embed in a payload for `nonce`.
///
/// Format: `http://127.0.0.1:{port}/{nonce}`. Use this URL for the
/// process sandbox. For Docker sandboxes use [`nonce_url_for_host`].
/// process sandbox. For Docker sandboxes use [`Self::nonce_url_for_host`].
pub fn nonce_url(&self, nonce: &str) -> String {
format!("http://127.0.0.1:{}/{}", self.port, nonce)
}

View file

@ -1301,7 +1301,7 @@ impl Canary {
/// Derive a 32-byte canary for the finding identified by `spec_hash`.
///
/// `BLAKE3("nyx.dynamic.canary.v1" ‖ run_nonce ‖ spec_hash)`. The
/// [`run_nonce`] is a process-global value seeded once from the OS
/// `run_nonce` is a process-global value seeded once from the OS
/// CSPRNG (mixed with time + pid as a fallback), so two runs of the same
/// spec draw different canaries and a stale probe record cannot satisfy a
/// later run. Keying on `spec_hash` gives every finding in a single run

View file

@ -330,7 +330,7 @@ impl DenyRule {
/// Finding's path or evidence references a production endpoint
/// (e.g. `api.prod.example.com`, `*.production.*`,
/// `*-prod.amazonaws.com`). Conservative: matched against the
/// short list in [`PROD_ENDPOINT_REGEXES`].
/// short list in `PROD_ENDPOINT_REGEXES`.
pub const PRODUCTION_ENDPOINT: &'static str = "production-endpoint";
}
@ -382,8 +382,8 @@ const PROD_ENDPOINT_REGEXES: &[&str] = &[
/// snippets, and the `SpanEvidence` snippets for source/sink/guard/
/// sanitizer entries. Each text is fed to three predicates in turn
/// — [`DenyRule::CREDENTIALS`] (via [`crate::utils::redact::contains_secret`]),
/// [`DenyRule::PRIVATE_KEY`] (via [`PRIVATE_KEY_LITERALS`]),
/// [`DenyRule::PRODUCTION_ENDPOINT`] (via [`PROD_ENDPOINT_REGEXES`]).
/// [`DenyRule::PRIVATE_KEY`] (via `PRIVATE_KEY_LITERALS`),
/// [`DenyRule::PRODUCTION_ENDPOINT`] (via `PROD_ENDPOINT_REGEXES`).
/// The first match wins and the verifier short-circuits to
/// [`crate::evidence::InconclusiveReason::PolicyDeniedDynamic`].
///

View file

@ -50,7 +50,7 @@ use directories::ProjectDirs;
use std::fs;
use std::path::{Path, PathBuf};
/// Emitted by [`write`] on success.
/// Emitted by [`write()`] on success.
#[derive(Debug, Clone)]
pub struct ReproArtifact {
/// Absolute path to the repro bundle root.
@ -288,7 +288,7 @@ fn repro_root(spec_hash: &str) -> Result<PathBuf, ReproError> {
/// Resolve the bundle path for `spec_hash` without creating any directories.
///
/// Returns the same path [`write`] uses (`~/.cache/nyx/dynamic/repro/{spec_hash}/`)
/// Returns the same path [`write()`] uses (`~/.cache/nyx/dynamic/repro/{spec_hash}/`)
/// so callers can locate an existing bundle for replay. Respects the
/// `NYX_REPRO_BASE` test override.
///

View file

@ -3,18 +3,18 @@
//! A harness needs the language toolchain's heavyweight dependency tree
//! (`node_modules`, `vendor`, `target/`, …) but that tree is identical across
//! every finding in a run — installing it per-finding is the bulk of the
//! per-workdir setup cost. A [`Baseline`] holds one shared, warmed copy under
//! per-workdir setup cost. A [`Baseline`](crate::dynamic::sandbox::baseline::Baseline) holds one shared, warmed copy under
//! the build-pool cache dir; each per-finding workdir gets a cheap snapshot of
//! it:
//!
//! - **macOS** — a `clonefile` CoW snapshot (via
//! [`crate::dynamic::harness::copy_workdir`]).
//! `crate::dynamic::harness::copy_workdir`).
//! - **Linux** — a read-only `mount --bind`, falling back to a reflink copy
//! when bind mounts are unavailable (no `CAP_SYS_ADMIN` / not in a mount
//! namespace).
//!
//! The baseline root honours `NYX_BUILD_POOL_DIR` through
//! [`crate::dynamic::build_pool::pool_cache_dir`], so tests can redirect it
//! `crate::dynamic::build_pool::pool_cache_dir`, so tests can redirect it
//! into a `TempDir` and it shares the same on-disk layout as the Phase 22/23
//! build pools (`<cache>/dynamic/build-pool/<lang>/baseline`).

View file

@ -2,8 +2,8 @@
//!
//! This module is the thin layer between the pinned-digest catalogue
//! (`tools/image-builder/images.toml` → `src/dynamic/toolchain.rs::IMAGE_DIGESTS`)
//! and the existing docker invocations in [`super::run_docker`] /
//! [`super::run_native_binary_docker`].
//! and the existing docker invocations in `super::run_docker` /
//! `super::run_native_binary_docker`.
//!
//! Responsibilities:
//!
@ -16,7 +16,7 @@
//! - mounts each `StubHarness` filesystem root at a fixed `/nyx/stubs/<n>`
//! path so harness-side shims can find them without hard-coding host
//! tempdir layouts,
//! - honours the [`super::NetworkPolicy`] (none / OOB / stubs-only / open)
//! - honours the [`NetworkPolicy`](crate::dynamic::sandbox::NetworkPolicy) (none / OOB / stubs-only / open)
//! using the same flag set as the legacy `start_container`.
//!
//! All helpers are infallible w.r.t. docker availability — they return arg
@ -157,7 +157,7 @@ pub fn stub_mount_args(stub_roots: &[std::path::PathBuf]) -> Vec<String> {
/// Render the `--network` + `--add-host` flag slice for a [`NetworkPolicy`].
///
/// Mirrors the legacy block in [`super::start_container`] so callers using
/// Mirrors the legacy block in `super::start_container` so callers using
/// the new docker.rs entry point produce byte-identical container layouts
/// to the existing path — important for `tests/dynamic_parity.rs` to keep
/// reading the same verdicts across backends.

View file

@ -23,7 +23,7 @@
//! 3. The probe is cached behind a `OnceLock` so repeated calls into [`run`]
//! do not re-`stat` the binary every time. Tests that swap
//! `NYX_FIRECRACKER_BIN` between scenarios bypass the cache via the
//! uncached [`is_firecracker_reachable`] helper.
//! uncached [`is_firecracker_reachable`](crate::dynamic::sandbox::firecracker::is_firecracker_reachable) helper.
use std::sync::OnceLock;

View file

@ -54,7 +54,7 @@ pub mod firecracker;
/// Phase 17 (Track E.1) + Phase 18 (Track E.2) per-run hardening outcome.
///
/// Returned by [`run_process`] on the [`SandboxOutcome`] so callers (tests +
/// Returned by `run_process` on the [`SandboxOutcome`] so callers (tests +
/// telemetry) can inspect the per-primitive status without consulting a
/// process-global singleton. The previous Phase 17/18 implementation kept
/// the outcome in `process_linux::LAST_OUTCOME` / `process_macos::LAST_OUTCOME`
@ -81,7 +81,7 @@ pub enum HardeningRecord {
/// IMAGE_DIGESTS`] entries to docker image refs, render `docker run`
/// flag slices that honour [`NetworkPolicy`], and mount the harness
/// workdir at the fixed `/work` path. The legacy entry points in this
/// file ([`run_docker`] / [`run_native_binary_docker`]) call into
/// file (`run_docker` / `run_native_binary_docker`) call into
/// `docker::ensure_image_pulled` so every harness run uses the catalogue
/// pin when one is available.
pub mod docker;
@ -233,7 +233,7 @@ pub struct SandboxOptions {
/// Phase 17 (Track E.1): cap bits used to minimise the seccomp-bpf
/// allowlist applied to the Linux process backend. When `0`, the
/// process backend installs only the cap-independent `base` allowlist
/// from [`seccomp::seccomp_policy.toml`]; when non-zero, every cap bit
/// from `seccomp::seccomp_policy.toml`; when non-zero, every cap bit
/// set adds its allowlisted syscalls on top. Other backends ignore
/// this field.
pub seccomp_caps: u32,
@ -264,7 +264,7 @@ pub struct SandboxOptions {
/// primitive toggles.
#[doc(hidden)]
pub ablation: Option<AblationMask>,
/// Phase 30 (Track C observability): optional [`VerifyTrace`] handle
/// Phase 30 (Track C observability): optional [`VerifyTrace`](crate::dynamic::trace::VerifyTrace) handle
/// the runner appends pipeline stages to (`build_started`,
/// `build_done`, `sandbox_started`, `oracle_wait`, `oracle_observed`).
/// `None` keeps the runner silent — sandbox-level callers that do
@ -284,7 +284,7 @@ pub struct SandboxOptions {
/// no-new-privs, all rlimits, namespace unshare, chroot to workdir,
/// default-deny seccomp filter scoped to [`SandboxOptions::seccomp_caps`].
/// Each primitive is best-effort; failures degrade to
/// [`HardeningLevel::Partial`] without aborting the run.
/// `HardeningLevel::Partial` without aborting the run.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ProcessHardeningProfile {
#[default]
@ -420,7 +420,7 @@ impl HostPort {
/// selector.
/// - [`NetworkPolicy::OobOutbound`] — the legacy "OOB only" path: the
/// harness can reach the per-scan OOB listener (and only it via the
/// Linux iptables filter in [`apply_oob_egress_filter`]). Docker:
/// Linux iptables filter in `apply_oob_egress_filter`). Docker:
/// `bridge` + host-gateway + iptables OOB-port filter.
/// - [`NetworkPolicy::Open`] — unrestricted outbound. Docker: `bridge`
/// with no egress filter. Reserved for diagnostic / dev-only runs;

View file

@ -1,13 +1,13 @@
//! Phase 18 (Track E.2) — macOS process backend hardening.
//!
//! macOS analogue of [`super::process_linux`]. Where the Linux backend
//! macOS analogue of `super::process_linux`. Where the Linux backend
//! installs a `pre_exec` sequence (prctl + rlimits + unshare + chroot +
//! seccomp-bpf), the macOS backend wraps the harness command with
//! `sandbox-exec(1)` driven by a per-capability `.sb` policy file.
//!
//! Profile selection
//! -----------------
//! [`profile_for_caps`] maps the [`SandboxOptions::seccomp_caps`] bitset
//! [`profile_for_caps`] maps the [`SandboxOptions::seccomp_caps`](super::SandboxOptions::seccomp_caps) bitset
//! (set by the verifier from `spec.expected_cap`) to a profile name in
//! `src/dynamic/sandbox_profiles/`:
//!
@ -254,13 +254,13 @@ pub fn profile_path(name: &str) -> Option<PathBuf> {
// without needing macOS-host sandbox-exec access.
/// Env var consulted by [`profile_path`] to enable the deny-default
/// splice. When set to `1` / `true`, [`deny_default_seed_for`] is
/// splice. When set to `1` / `true`, `deny_default_seed_for` is
/// invoked for every materialised profile; missing seeds fall back to
/// the baked `(allow default)` body so misconfiguration cannot brick
/// the sandbox-exec backend.
pub const SB_DENY_DEFAULT_ENV: &str = "NYX_SB_DENY_DEFAULT";
/// Env var consulted by [`deny_default_seed_for`] to locate the seed
/// Env var consulted by `deny_default_seed_for` to locate the seed
/// directory. Defaults to `tools/sb-trace/` relative to the workspace
/// root when unset; tests override this to point at a tempdir-backed
/// fixture set.

View file

@ -77,7 +77,7 @@ pub enum PayloadSlot {
HttpBody,
/// Environment variable.
EnvVar(String),
/// CLI argv slot (0-based, excluding argv[0]).
/// CLI argv slot (0-based, excluding `argv[0]`).
Argv(usize),
/// stdin.
Stdin,
@ -144,7 +144,7 @@ pub struct HarnessSpec {
/// this field is `None` for every spec; subsequent Track-L phases
/// register adapters and back-fill the binding.
///
/// Excluded from [`compute_spec_hash`]: the binding is descriptive
/// Excluded from `compute_spec_hash`: the binding is descriptive
/// metadata derived from the entry function and does not change
/// the harness boundary topology that the spec hash protects.
/// `#[serde(default, skip_serializing_if = "Option::is_none")]` so
@ -157,10 +157,10 @@ pub struct HarnessSpec {
/// decide whether to bootstrap a full Spring test context
/// (`SpringApplication.run` + `MockMvc`) or the lighter
/// reflective invocation path the legacy shapes use. Populated
/// by [`attach_framework_binding`] when the `java-spring`
/// by `attach_framework_binding` when the `java-spring`
/// adapter binds.
///
/// Excluded from [`compute_spec_hash`] for the same reason as
/// Excluded from `compute_spec_hash` for the same reason as
/// `framework`: the toggle is descriptive metadata driven by the
/// adapter binding, not a per-spec boundary topology axis.
/// Pre-Phase-14 serialised specs deserialise to the default
@ -663,7 +663,7 @@ fn first_annotated_entry(steps: &[crate::evidence::FlowStep]) -> Option<EntryRef
/// `java.deser.readobject`, `rs.auth.missing_ownership_check.taint`) plus the
/// finding's sink evidence. The diag's path and line locate the sink call
/// site; the rule namespace's first segment selects the language, and the
/// second segment maps to a [`Cap`] via [`cap_for_rule_category`].
/// second segment maps to a [`Cap`] via `cap_for_rule_category`.
///
/// A synthetic single-step `Source` flow is constructed at the diag location
/// so downstream consumers that walk `evidence.flow_steps` keep working. The
@ -901,7 +901,7 @@ pub fn derive_from_callgraph_entry_with(
/// Strict reverse-edge-BFS-only variant of
/// [`derive_from_callgraph_entry_full`].
///
/// Returns `Some(spec)` only when [`find_entry_via_callgraph`] resolves
/// Returns `Some(spec)` only when `find_entry_via_callgraph` resolves
/// the sink's enclosing function to a framework-bound ancestor via the
/// whole-program callgraph. Unlike
/// [`derive_from_callgraph_entry_full`], the summary-entry-kind fallback

View file

@ -26,9 +26,9 @@
//! `SearchResultEntry` (0x64), `SearchResultDone` (0x65).
//!
//! Context-specific tags inside `Filter` (RFC 4511 §4.5.1):
//! and [0], or [1], not [2], equalityMatch [3], substrings [4],
//! greaterOrEqual [5], lessOrEqual [6], present [7], approxMatch [8].
//! Plus simple-auth [0] inside `AuthenticationChoice`.
//! and \[0\], or \[1\], not \[2\], equalityMatch \[3\], substrings \[4\],
//! greaterOrEqual \[5\], lessOrEqual \[6\], present \[7\], approxMatch \[8\].
//! Plus simple-auth \[0\] inside `AuthenticationChoice`.
//!
//! Length encoding: short-form (single byte 0x00-0x7F) and long-form
//! (0x81-0x84 length-of-length, value up to 32 bits). Indefinite

View file

@ -14,7 +14,7 @@
//!
//! The accept loop peeks the first byte on each connection. When it
//! sees the universal `SEQUENCE` tag (`0x30`) — the leading byte of
//! every well-formed LDAPv3 [`LDAPMessage`] — it routes the
//! every well-formed LDAPv3 `LDAPMessage` — it routes the
//! conversation through [`super::ldap_ber`] so a harness using a stock
//! LDAP client (`javax.naming.directory.InitialDirContext`,
//! `python-ldap`, `ldap3`, …) can talk to the stub on the LDAPv3 wire

View file

@ -6,7 +6,7 @@
//! boundary can fire under test without depending on a live external
//! service. Each stub exposes:
//!
//! 1. [`StubProvider::start`] — spin the service up. The constructor of
//! 1. `StubProvider::start` — spin the service up. The constructor of
//! each concrete stub plays this role (e.g. [`SqlStub::start`]); the
//! trait method just hands back the kind for type-erased
//! introspection.

View file

@ -57,7 +57,7 @@ pub const NYX_VERSION: &str = env!("CARGO_PKG_VERSION");
/// so it can sit on a `Serialize`-derived struct alongside the other envelope
/// fields without an allocation. Mirrors
/// [`crate::dynamic::corpus::CORPUS_VERSION`]; the compile-time assertion
/// below + the [`corpus_version_const_matches_corpus_module`] runtime test
/// below + the `corpus_version_const_matches_corpus_module` runtime test
/// jointly guard drift.
pub const CORPUS_VERSION: &str = "17";

View file

@ -57,9 +57,30 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
case_sensitive: false,
},
// OWASP ESAPI encoders
// OWASP ESAPI encoders. The idiomatic call site is the fluent
// `ESAPI.encoder().encodeForHTML(x)` chain, which Java's chain collapse
// rewrites to the callee text `ESAPI.encodeForHTML` (the intermediate
// `encoder()` call is dropped), so the class-qualified
// `Encoder.encodeForHTML` matcher never fires on it. Match the
// `ESAPI.`- and `encoder.`-qualified forms so a value run through the
// canonical XSS encoder has its HTML_ESCAPE cap cleared before it reaches
// a `response.getWriter()` sink. Deliberately NOT matched bare: the OWASP
// Benchmark ships a decoy `Utils.encodeForHTML(...)` that returns the
// string UNCHANGED to test whether a scanner is fooled by the method name,
// so a bare `encodeForHTML` matcher would suppress real reflected-XSS.
LabelRule {
matchers: &["Encoder.encodeForHTML", "Encoder.encodeForJavaScript"],
matchers: &[
"Encoder.encodeForHTML",
"Encoder.encodeForJavaScript",
"ESAPI.encodeForHTML",
"ESAPI.encodeForHTMLAttribute",
"ESAPI.encodeForJavaScript",
"ESAPI.encodeForCSS",
"encoder.encodeForHTML",
"encoder.encodeForHTMLAttribute",
"encoder.encodeForJavaScript",
"encoder.encodeForCSS",
],
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
case_sensitive: false,
},
@ -232,10 +253,20 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: true,
},
// HTTP response sinks, println/print are broad (also match System.out)
// but necessary to catch response.getWriter().println() via suffix matching.
// HTTP response reflected-XSS sinks. `println` / `print` / `write` are
// the servlet response-writer output verbs; `write` is the dominant form
// in real servlets (`response.getWriter().write(html)`). All three are
// matched bare because Java collapses the writer chain
// `response.getWriter().write(x)` to the callee text `response.write`
// (the intermediate `getWriter()` call is dropped), so a receiver-typed
// `HttpResponse.write` rule never sees it. The breadth is bounded two
// ways: `System.out.println` / `System.err.println` are excluded by
// `suppress_known_safe_callees`, and `receiver_incompatible_sink_caps`
// strips `HTML_ESCAPE` whenever the receiver resolves to a non-response
// type (a `FileWriter` / `FileOutputStream` typed `FileHandle`, a DB
// connection, etc.), so genuine file/stream writes do not register as XSS.
LabelRule {
matchers: &["println", "print"],
matchers: &["println", "print", "write"],
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},

View file

@ -114,43 +114,72 @@ pub const PATTERNS: &[Pattern] = &[
confidence: Confidence::Medium,
},
// ── Tier A: Weak crypto ────────────────────────────────────────────
//
// The `type:`/`object:` node is matched with the `(_)` wildcard and a
// text `#match?` rather than a bare `(type_identifier) (#eq? …)` so the
// fully-qualified call shapes that dominate real code (and the entire
// OWASP Benchmark) are caught: `new java.util.Random()` parses the type
// as a `scoped_type_identifier`, not a `type_identifier`, which the old
// `#eq? @t "Random"` query silently never matched (0 crypto findings on
// the whole corpus). The fix keeps the reliable `#eq?` but captures the
// LAST type-name segment from either a bare `(type_identifier)` or the
// direct `(type_identifier)` child of a `(scoped_type_identifier)`, so
// both `new Random()` and `new java.util.Random()` match while
// `SecureRandom` (a different whole segment) does not.
Pattern {
id: "java.crypto.insecure_random",
description: "new Random() (java.util.Random) is not cryptographically secure",
query: r#"(object_creation_expression
type: (type_identifier) @t (#eq? @t "Random"))
type: [
(type_identifier) @t
(scoped_type_identifier (type_identifier) @t)
]
(#eq? @t "Random"))
@vuln"#,
severity: Severity::Low,
tier: PatternTier::A,
category: PatternCategory::Crypto,
confidence: Confidence::Medium,
},
// Weak crypto algorithm passed to a `getInstance("…")` factory, keyed on
// the algorithm string so the qualifier (`javax.crypto.Cipher` /
// `java.security.MessageDigest` FQN or a bare class) does not matter — the
// old per-class queries pinned `object: (identifier) "MessageDigest"` /
// `"Random"` and silently never matched the fully-qualified call shapes
// that dominate real code (0 crypto findings on the whole OWASP corpus).
// Three alternations, all proven to fire from this `(string_literal)`
// position:
// * `^.des/` — single-DES *cipher transforms* (`"DES/CBC/PKCS5Padding"`).
// The trailing `/` (mode separator) is required so the genuinely-weak
// single-DES Cipher fires while a bare `KeyGenerator.getInstance("DES")`
// key-spec and the stronger triple-DES `"DESede/…"` (which the OWASP
// Benchmark labels benign) do NOT — `"DESe"` has no `/` after `des`.
// * `^.(rc2|rc4|blowfish)` — broken stream/block ciphers (rare, real).
// * `^.(md2|md4|md5|sha1|sha-1).$` — broken hash digests as the WHOLE
// algorithm string (the trailing `.$` matches the closing quote so
// `"SHA1PRNG"` / `"HmacSHA1"` / `"SHA-256"` do NOT match).
// `getInstance` with any of these is `Cipher`/`MessageDigest` by
// construction; strong transforms (`AES/CBC`, `AES/GCM`, `SHA-256`) miss.
Pattern {
id: "java.crypto.weak_digest",
description: "MessageDigest.getInstance(\"MD5\"/\"SHA1\") uses a weak hash algorithm",
id: "java.crypto.weak_algorithm",
description: "Cipher/MessageDigest.getInstance with a broken algorithm (DES/RC4/MD5/SHA-1)",
query: r#"(method_invocation
object: (identifier) @c (#eq? @c "MessageDigest")
name: (identifier) @id (#eq? @id "getInstance")
arguments: (argument_list
(string_literal) @alg (#match? @alg "(?i)(md5|sha-?1)")))
@vuln"#,
severity: Severity::Low,
tier: PatternTier::A,
category: PatternCategory::Crypto,
confidence: Confidence::Medium,
},
// ── Tier A: XSS (servlet) ──────────────────────────────────────────
Pattern {
id: "java.xss.getwriter_print",
description: "response.getWriter().print/println writes output without encoding",
query: r#"(method_invocation
object: (method_invocation
name: (identifier) @gw (#eq? @gw "getWriter"))
name: (identifier) @id (#match? @id "^(print|println|write)$"))
(string_literal) @alg (#match? @alg "(?i)(^.des/|^.(rc2|rc4|blowfish)|^.(md2|md4|md5|sha1|sha-1).$)")))
@vuln"#,
severity: Severity::Medium,
tier: PatternTier::A,
category: PatternCategory::Xss,
confidence: Confidence::High,
category: PatternCategory::Crypto,
confidence: Confidence::Medium,
},
// Tier A reflected-XSS was previously a bare syntactic match on every
// `response.getWriter().print/println/write(...)` regardless of whether the
// written value was attacker-controlled or already HTML-encoded. On the
// OWASP Benchmark that fired ~4400 times at precision 0.05 (it flagged
// constant strings and `ESAPI.encoder().encodeForHTML(...)`-wrapped output
// identically to a raw tainted write). Reflected XSS is now a taint sink
// (`Sink(Cap::HTML_ESCAPE)` on the servlet writer verbs in
// `labels/java.rs`), which fires only when an un-encoded tainted value
// reaches the writer, so the syntactic pattern is retired.
];

View file

@ -1,4 +1,4 @@
//! `GET /api/surface` — serve the project's [`SurfaceMap`].
//! `GET /api/surface` — serve the project's [`SurfaceMap`](crate::surface::SurfaceMap).
//!
//! Loads the map persisted by the most recent indexed scan from
//! SQLite, falling back to building a fresh entry-point-only map from

View file

@ -2,7 +2,7 @@
//!
//! Builds basic blocks, computes dominators and dominance frontiers via
//! petgraph, inserts phi nodes, and renames variables over the dominator-tree
//! preorder to produce an [`SsaBody`](super::ir::SsaBody).
//! preorder to produce an [`SsaBody`].
#![allow(
clippy::if_same_then_else,

View file

@ -3,15 +3,15 @@
//! Phase 22 dispatch:
//!
//! 1. Per-file framework probes (one parser per language) emit
//! [`SurfaceNode::EntryPoint`] nodes for every recognised route /
//! [`SurfaceNode::EntryPoint`](crate::surface::SurfaceNode::EntryPoint) nodes for every recognised route /
//! handler.
//! 2. [`super::datastore::detect_data_stores`] walks
//! [`GlobalSummaries`] and emits [`SurfaceNode::DataStore`] nodes
//! [`GlobalSummaries`] and emits [`SurfaceNode::DataStore`](crate::surface::SurfaceNode::DataStore) nodes
//! for every recognised driver call.
//! 3. [`super::external::detect_external_services`] walks summaries +
//! SSRF caps and emits [`SurfaceNode::ExternalService`] nodes.
//! SSRF caps and emits [`SurfaceNode::ExternalService`](crate::surface::SurfaceNode::ExternalService) nodes.
//! 4. [`super::dangerous::detect_dangerous_locals`] walks summaries
//! and emits [`SurfaceNode::DangerousLocal`] nodes for every
//! and emits [`SurfaceNode::DangerousLocal`](crate::surface::SurfaceNode::DangerousLocal) nodes for every
//! function whose `sink_caps` include CODE_EXEC / DESERIALIZE /
//! SSTI / FMT_STRING.
//! 5. [`super::reachability::populate_reaches_edges`] runs a BFS over

View file

@ -344,7 +344,7 @@ const DRIVER_RULES: &[DriverRule] = &[
///
/// When the bare callee name does not hit a rule, the type-fact engine's
/// per-call `typed_call_receivers` map (read off the matching
/// [`crate::summary::SsaFuncSummary`]) is consulted: a callee whose
/// [`crate::summary::ssa_summary::SsaFuncSummary`]) is consulted: a callee whose
/// receiver was resolved to `TypeKind::DatabaseConnection` or
/// `TypeKind::FileHandle` is retried under the type-qualified name
/// `"DatabaseConnection.<method>"` / `"FileHandle.<method>"`, picking up

View file

@ -327,7 +327,7 @@ const CLIENT_RULES: &[ClientRule] = &[
///
/// When the bare callee name does not hit a rule, the type-fact engine's
/// per-call `typed_call_receivers` map (read off the matching
/// [`crate::summary::SsaFuncSummary`]) is consulted: a callee whose
/// [`crate::summary::ssa_summary::SsaFuncSummary`]) is consulted: a callee whose
/// receiver was resolved to `TypeKind::HttpClient` /
/// `TypeKind::RequestBuilder` / `TypeKind::Url` is retried under the
/// type-qualified name `"{container}.<method>"`, picking up the

View file

@ -1,6 +1,6 @@
//! Shared helpers used by the per-(language, framework) probes.
//!
//! Each probe extracts an [`EntryPoint`] node from a parsed source file
//! Each probe extracts an [`EntryPoint`](crate::surface::EntryPoint) node from a parsed source file
//! by walking the framework's route declaration shape. These helpers
//! cover the bookkeeping common to every probe: building a stable
//! [`SourceLocation`] from a tree-sitter node, decoding common string

View file

@ -4,7 +4,7 @@
//! whole-program [`CallGraph`].
//!
//! For each entry-point we first locate the matching call-graph
//! [`FuncKey`] by `(namespace, function_name)` (the entry-point's
//! [`FuncKey`](crate::symbol::FuncKey) by `(namespace, function_name)` (the entry-point's
//! `handler_location.file` is the project-relative POSIX path used as
//! `FuncKey::namespace`, and `handler_name` is the leaf function
//! name). From that node we run a BFS over forward call-graph edges

View file

@ -75,6 +75,15 @@ _CAP_BIT_TABLE = [
(1 << 18, "xss"), # SSTI (template_injection); also covers XSS sinks
(1 << 19, "xxe"),
(1 << 20, "prototype_pollution"),
# HTML_ESCAPE (1<<1) is the universal reflected-XSS *sink* cap across every
# language (`grep 'Sink(Cap::HTML_ESCAPE)' src/labels/` — PHP echo, JS
# innerHTML, Java servlet writers, etc.); the same bit is the html-escape
# *sanitizer* cap, so a finding only carries it as a sink when an un-encoded
# tainted value reached an HTML output. Placed LAST so any higher-priority
# sink bit (SQL_QUERY, FILE_IO, ...) on the same finding wins; a finding
# carrying only HTML_ESCAPE is reflected XSS. Without this, every
# taint-based reflected-XSS finding mis-buckets to "other".
(1 << 1, "xss"),
]
# Static lens (see --static): SHELL_ESCAPE (1<<2) is the command-injection sink

View file

@ -1,7 +1,7 @@
{
"required_findings": [
{ "id_prefix": "java.reflection.class_forname", "min_count": 1 },
{ "id_prefix": "java.crypto.weak_digest", "min_count": 1 }
{ "id_prefix": "java.crypto.weak_algorithm", "min_count": 1 }
],
"forbidden_findings": [],
"noise_budget": {

View file

@ -45,14 +45,14 @@
"notes": "Runtime.getRuntime().exec(command) with deserialized input; AST pattern correctly matches"
},
{
"rule_id": "java.xss.getwriter_print",
"rule_id": "taint-unsanitised-flow",
"severity": "MEDIUM",
"must_not_match": true,
"line_range": [
11,
11
],
"notes": "response.getWriter().println(\"Done\") — constant string, Layer B suppresses (regression guard)"
"notes": "response.getWriter().println(\"Done\") — constant string, must NOT raise reflected-XSS (Cap::HTML_ESCAPE). Regression guard retargeted from the retired java.xss.getwriter_print AST pattern to the taint sink that now owns reflected XSS."
},
{
"rule_id": "taint-unsanitised-flow",

View file

@ -80,14 +80,14 @@
"notes": "source at 11:9 (request.getParameter(\"input\")) flows through SQL query (line 17) into result set output at out.println(rs.getString(1)); second-order taint via tainted query results"
},
{
"rule_id": "java.xss.getwriter_print",
"rule_id": "taint-unsanitised-flow",
"severity": "MEDIUM",
"must_not_match": true,
"line_range": [
26,
26
],
"notes": "response.getWriter().println(new String(data)) — file-read data, Layer B suppresses (regression guard)"
"notes": "response.getWriter().println(new String(data)) — file-read bytes, not reflected request input, must NOT raise reflected-XSS (Cap::HTML_ESCAPE). Regression guard retargeted from the retired java.xss.getwriter_print AST pattern to the taint sink that now owns reflected XSS."
}
]
}

View file

@ -9,15 +9,7 @@
"must_match": true,
"line_range": [5, 12],
"evidence_contains": [],
"notes": "catch(Exception e) binds e as tainted; e flows to println sink via catch parameter"
},
{
"rule_id": "java.xss.getwriter_print",
"severity": "MEDIUM",
"must_match": true,
"line_range": [10, 10],
"evidence_contains": [],
"notes": "response.getWriter().println() in catch block — AST pattern detects potential XSS via error response"
"notes": "catch(Exception e) binds e as tainted; e flows to response.getWriter().println at line 10 — reflected XSS via error response. Replaces the retired java.xss.getwriter_print AST pattern: reflected XSS is now a taint sink (Sink(Cap::HTML_ESCAPE)), so this is taint-confirmed rather than flagged on every writer call."
}
]
}

View file

@ -19,21 +19,13 @@
"evidence_contains": [],
"notes": "AST pattern detects executeQuery with string concatenation — SQL injection"
},
{
"rule_id": "java.xss.getwriter_print",
"severity": "MEDIUM",
"must_match": true,
"line_range": [12, 12],
"evidence_contains": [],
"notes": "response.getWriter().println() with user input — reflected XSS via error response"
},
{
"rule_id": "taint-unsanitised-flow",
"severity": "HIGH",
"must_match": true,
"line_range": [7, 12],
"evidence_contains": [],
"notes": "request.getParameter flows to response.getWriter().println — user input reflected in error response"
"notes": "request.getParameter flows to response.getWriter().println at line 12 — user input reflected in error response. Replaces the retired java.xss.getwriter_print AST pattern: reflected XSS is now a taint sink (Sink(Cap::HTML_ESCAPE)), taint-confirmed rather than flagged on every writer call."
}
]
}