diff --git a/scripts/m7_ship_gate.sh b/scripts/m7_ship_gate.sh index d96519b4..1fcc891a 100755 --- a/scripts/m7_ship_gate.sh +++ b/scripts/m7_ship_gate.sh @@ -304,7 +304,16 @@ PY || { echo " FAIL: wall-clock exceeds budget"; return 1; } echo "[]" > "${results_report}" + # --static buckets a command-injection finding that carries only the + # SHELL_ESCAPE sink cap (the static, unconfirmed cmdi class for every + # language) as `cmdi` instead of `other`. Without a dynamic Confirm the + # SHELL_ESCAPE→CODE_EXEC remap never runs (Java servlet harnesses build- + # fail in CI), so the default lens leaves every cmdi finding in `other` + # and reads the cmdi cell as 0/0/N; the static lens is the correct + # bucketing for an unconfirmed scan and is appended at lowest priority so + # no higher-priority cap cell changes. python3 "${REPO_ROOT}/tests/eval_corpus/tabulate.py" \ + --static \ --label owasp \ --scan "${scan_report}" \ --ground-truth "${REPO_ROOT}/tests/eval_corpus/ground_truth/owasp_benchmark_v1.2.json" \ @@ -416,7 +425,13 @@ PY || { echo " FAIL: ${label} wall-clock exceeds budget"; return 1; } echo "[]" > "${results_report}" + # --static: bucket SHELL_ESCAPE-only command-injection findings as `cmdi` + # (see the Gate 6 note) so the per-cap table reflects the engine's real + # static classification in CI where no dynamic Confirm runs the + # SHELL_ESCAPE→CODE_EXEC remap. Appended at lowest priority; no other cap + # cell changes. local -a tabulate_args=( + --static --label "${label}" --scan "${scan_report}" --ground-truth "${gt}" diff --git a/src/ast.rs b/src/ast.rs index f461df9b..1f1cdf40 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1006,6 +1006,7 @@ fn is_test_suppressible_pattern(id: &str) -> bool { || id.ends_with(".crypto.math_random") || id.ends_with(".crypto.insecure_random") || id.ends_with(".crypto.weak_digest") + || id.ends_with(".crypto.weak_algorithm") || id.ends_with(".crypto.md5") || id.ends_with(".crypto.sha1") || id.ends_with(".crypto.rand") diff --git a/src/cfg_analysis/guards.rs b/src/cfg_analysis/guards.rs index a23bfdf7..4e4bcb55 100644 --- a/src/cfg_analysis/guards.rs +++ b/src/cfg_analysis/guards.rs @@ -180,6 +180,109 @@ fn ssa_all_sink_operands_const_or_param(ctx: &AnalysisContext, sink: NodeIndex) args_ok && receiver_ok } +/// Suppress a `cfg-unguarded-sink` finding when the sink restricts its +/// injection payload to specific argument positions (`sink_payload_args`) +/// and every operand at those positions resolves to a concrete constant. +/// +/// The flat [`is_all_args_constant`] check inspects *every* operand, so a +/// safe parameterised call like Go's +/// `db.QueryContext(context.Background(), "SELECT … $1", bind)` is wrongly +/// rejected: only arg 1 (the SQL string, `payload_args = [1]`) can carry an +/// injection, yet the non-payload `context.Background()` call and the +/// positional bind value are non-constant operands that defeat the +/// all-operands test. The taint engine already honours the payload-arg +/// gate (no `taint-unsanitised-flow` fires), so under `!has_taint` a sink +/// whose payload positions are all literals is safe by construction. +fn sink_payload_args_const(ctx: &AnalysisContext, sink: NodeIndex) -> bool { + let payload_positions = match &ctx.cfg[sink].call.sink_payload_args { + Some(p) if !p.is_empty() => p, + _ => return false, + }; + let Some(facts) = ctx.body_const_facts else { + return false; + }; + let Some(&sink_val) = facts.ssa.cfg_node_map.get(&sink) else { + return false; + }; + let Some(inst) = find_inst(&facts.ssa, sink_val) else { + return false; + }; + let SsaOp::Call { args, .. } = &inst.op else { + return false; + }; + // Every payload-position operand must resolve to a concrete literal. A + // payload position outside the recorded arg list cannot be proven safe. + payload_positions.iter().all(|&pos| match args.get(pos) { + Some(group) => group.iter().all(|v| { + matches!( + facts.const_values.get(v), + Some( + ConstLattice::Str(_) + | ConstLattice::Int(_) + | ConstLattice::Bool(_) + | ConstLattice::Null + ) + ) + }), + None => false, + }) +} + +/// Suppress a `cfg-unguarded-sink` SSRF finding when the sink's URL operand +/// is origin-locked: it is the result of a `new URL(path, base)` / +/// `urljoin(base, path)` / `url.JoinPath(base, …)` builder whose base +/// argument pins the scheme+host, so the (attacker-controlled) path +/// component cannot redirect the request off the locked origin. +/// +/// Mirrors the taint engine's `StringFact::from_url_with_base` prefix-lock +/// (`url_builder_arg_indices` + `is_string_safe_for_ssrf`): the taint engine +/// stays silent on this shape, so the parallel structural finding is a false +/// positive. The base is recognised as either a string literal recorded on +/// the builder node (`arg_string_literals[base_idx]`) or a const-bound +/// identifier whose SSA operand resolves to a concrete string. +fn sink_url_origin_locked(ctx: &AnalysisContext, sink: NodeIndex, sink_caps: Cap) -> bool { + if !sink_caps.contains(Cap::SSRF) { + return false; + } + let sink_info = &ctx.cfg[sink]; + let sink_func = sink_info.ast.enclosing_func.as_deref(); + // CFG one-hop trace (mirrors `is_all_args_constant`): the SSA + // `cfg_node_map` only covers the body whose facts are attached to `ctx`, + // so for a sink inside a nested function (e.g. an Express arrow handler) + // the SSA path misses it. Walk the CFG instead: for every variable the + // sink uses, find its defining node in the same function and test whether + // that definition is an origin-locking URL builder. + sink_info.taint.uses.iter().any(|u| { + ctx.cfg.node_indices().any(|idx| { + let info = &ctx.cfg[idx]; + if info.ast.enclosing_func.as_deref() != sink_func { + return false; + } + if info.taint.defines.as_deref() != Some(u.as_str()) { + return false; + } + // `info` defines `u`. Is it `new URL(path, base)` / `urljoin` / + // `JoinPath` with a string-literal base pinning scheme+host? + let Some(callee) = info.call.callee.as_deref() else { + return false; + }; + let Some((_path_idx, base_idx)) = crate::ssa::type_facts::url_builder_arg_indices( + ctx.lang, + callee, + info.call.outer_callee.as_deref(), + info.call.is_constructor, + ) else { + return false; + }; + info.call + .arg_string_literals + .get(base_idx) + .and_then(|s| s.as_deref()) + .is_some() + }) + }) +} + /// Return true if the SSA body contains a *named* variable whose definition /// is a constant, the SSA signature of an explicit `name = "literal"` /// reassignment. Used as the gate for the broader operand-Param suppression: @@ -2810,6 +2913,29 @@ impl CfgAnalysis for UnguardedSink { continue; } + // Payload-arg-gated sinks (e.g. Go `db.QueryContext(ctx, sql, + // ...binds)`, `payload_args = [1]`): only the payload positions can + // carry an injection. When the taint engine is already silent + // (`!has_taint`) and every payload-position operand is a constant + // literal, the non-payload operands (a `context.Context`, bind + // values) cannot make the call dangerous, so the structural finding + // is a false positive even though `is_all_args_constant` rejects it. + if !has_taint && sink_payload_args_const(ctx, *sink) { + continue; + } + + // Origin-locked URL SSRF sinks (`fetch(new URL(path, "https://…"))`): + // the builder's literal base pins scheme+host, so the + // attacker-controlled path cannot redirect off-origin. The taint + // engine already suppresses this via the abstract prefix-lock, so + // the parallel structural finding is a false positive. NOT gated + // on `!has_taint`: the origin lock holds precisely *because* the + // tainted path reaches the builder — the host stays fixed — so the + // syntactic taint-reaches signal must not re-open the finding. + if sink_url_origin_locked(ctx, *sink, sink_caps) { + continue; + } + // SSA latest-def suppression: when the taint engine has already // proved no source-tainted data reaches this sink (`!has_taint`) // and every SSA operand resolves to a constant, callee-fragment diff --git a/src/chain/edges.rs b/src/chain/edges.rs index cf2da89b..353f7d3b 100644 --- a/src/chain/edges.rs +++ b/src/chain/edges.rs @@ -3,9 +3,9 @@ //! Each call to [`findings_to_edges`] emits exactly one [`ChainEdge`] //! per input finding. The edge is *typed* by: //! -//! - the primary [`Cap`] bit picked from [`Evidence::sink_caps`] +//! - the primary [`Cap`] bit picked from [`Evidence::sink_caps`](crate::evidence::Evidence::sink_caps) //! (the lowest-bit set, chosen deterministically), and -//! - the *reach* — the surface [`EntryPoint`] in the same file as the +//! - the *reach* — the surface [`EntryPoint`](crate::surface::EntryPoint) in the same file as the //! finding, when one exists, otherwise [`Reach::Unreachable`]. //! //! Phase 25's path search composes these edges with the SurfaceMap's @@ -35,7 +35,7 @@ pub struct FindingRef { pub location: SourceLocation, /// Rule identifier (`Diag::id`). pub rule_id: String, - /// Resolved sink cap bits ([`Evidence::sink_caps`]). + /// Resolved sink cap bits ([`Evidence::sink_caps`](crate::evidence::Evidence::sink_caps)). pub cap_bits: u32, } diff --git a/src/chain/impact.rs b/src/chain/impact.rs index 0a7cf5b2..f7b0cd1c 100644 --- a/src/chain/impact.rs +++ b/src/chain/impact.rs @@ -226,7 +226,7 @@ fn standalone_lookup(cap: Cap) -> Option { /// first rule in [`IMPACT_LATTICE`] order (specific before fallback). /// /// The standalone-rule walks (second + third pass) are O(1) via -/// [`STANDALONE_BY_BIT`]. The two-cap walk (first pass) stays linear +/// `STANDALONE_BY_BIT`. The two-cap walk (first pass) stays linear /// because the 2-cap subset is small (today: three rules); promote /// to a sorted-pair binary search if the lattice grows past ~16 /// pair-rules. diff --git a/src/chain/mod.rs b/src/chain/mod.rs index 39861634..6e7a78a2 100644 --- a/src/chain/mod.rs +++ b/src/chain/mod.rs @@ -15,7 +15,7 @@ //! //! Two parallel `Vec`s — `nodes` and `edges` — mirroring `SurfaceMap`'s //! shape. Determinism is the caller's responsibility: edges are -//! produced in the order the source [`Diag`] slice presents, and +//! produced in the order the source [`Diag`](crate::commands::scan::Diag) slice presents, and //! `findings_to_edges` does not sort the input. Phase 25 will fold //! these into a `petgraph::DiGraph` for path search. //! diff --git a/src/chain/search.rs b/src/chain/search.rs index 9ab7fb22..30bb1d2e 100644 --- a/src/chain/search.rs +++ b/src/chain/search.rs @@ -7,7 +7,7 @@ //! ``` //! //! The DFS starts at the implicit attacker node (virtually adjacent to -//! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`] +//! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`](ChainSearchConfig::max_depth) //! per-finding hops, and terminates at any //! [`crate::surface::DangerousLocal`] node. Each emitted //! [`ChainFinding`] is the deterministic minimum-length path through a diff --git a/src/dynamic/build_pool/java.rs b/src/dynamic/build_pool/java.rs index f1b2c6e0..2d41e46a 100644 --- a/src/dynamic/build_pool/java.rs +++ b/src/dynamic/build_pool/java.rs @@ -1,6 +1,6 @@ //! Long-lived `javac` daemon (Phase 22 / Track O.0). //! -//! The legacy [`crate::dynamic::build_sandbox::try_compile_java`] shell-execs a +//! The legacy `try_compile_java_with_toolchain` in `build_sandbox` shell-execs a //! fresh `javac` per harness — every invocation pays the JVM cold-start tax //! (~700ms on the macOS reference machine, ~300ms on Linux CI). At 50 //! findings per OWASP-scale run that single line burns > 30s before any diff --git a/src/dynamic/build_pool/mod.rs b/src/dynamic/build_pool/mod.rs index d533be6a..403b8775 100644 --- a/src/dynamic/build_pool/mod.rs +++ b/src/dynamic/build_pool/mod.rs @@ -87,7 +87,7 @@ const POOL_ENABLED_LANGS: &[&str] = &[ /// /// Format is a comma-separated list of `lang=bit` entries: `java=1,node=0`. /// A missing language returns the default: `true` for every language that -/// ships a pool (see [`POOL_ENABLED_LANGS`]), `false` otherwise. +/// ships a pool (see `POOL_ENABLED_LANGS`), `false` otherwise. pub fn is_pool_enabled(lang: &str) -> bool { let default = POOL_ENABLED_LANGS.contains(&lang); let raw = match std::env::var("NYX_DYNAMIC_BUILD_POOL") { diff --git a/src/dynamic/build_sandbox.rs b/src/dynamic/build_sandbox.rs index ac62c82f..6d45ff75 100644 --- a/src/dynamic/build_sandbox.rs +++ b/src/dynamic/build_sandbox.rs @@ -1783,7 +1783,7 @@ pub struct ChainStepBuildResult { /// so a `Vec` can be driven through the build pipeline /// without per-language match arms scattered across each caller. The /// production single-finding runner stays on the per-language match in -/// [`crate::dynamic::runner::execute`] because it folds the build result +/// [`crate::dynamic::runner::run_spec`] because it folds the build result /// into command-vector rewrites that vary per language and have no /// uniform shape — the chain reverifier does not need those rewrites /// because the sandbox-run sub-task ((c) of Phase 26 follow-up) will diff --git a/src/dynamic/corpus.rs b/src/dynamic/corpus.rs index fa779ac6..4fd506ef 100644 --- a/src/dynamic/corpus.rs +++ b/src/dynamic/corpus.rs @@ -30,12 +30,12 @@ //! Adding a new language for a cap means: drop a new file under //! `corpus//.rs`, register `pub mod ;` in the cap's //! `mod.rs`, and wire `(Cap::, Lang::, ::::PAYLOADS)` -//! into [`registry::ENTRIES`]. No other file needs to change. +//! into `registry::ENTRIES`. No other file needs to change. //! //! # Corpus governance (§16.1) //! -//! Every payload carries [`PayloadProvenance`], a [`since_corpus_version`], -//! and at least one [`fixture_paths`] entry. The [`CORPUS_VERSION`] const +//! Every payload carries [`PayloadProvenance`], a [`CuratedPayload::since_corpus_version`], +//! and at least one [`CuratedPayload::fixture_paths`] entry. The [`CORPUS_VERSION`] const //! tracks the history of incompatible corpus changes; bumping it //! invalidates all `dynamic_verdict_cache` entries whose spec touched the //! changed cap. @@ -171,9 +171,9 @@ pub struct CuratedPayload { /// [`crate::dynamic::probe::SinkProbe`] records drained from the run's /// probe channel (Phase 06 — Track C.1). Always populated; empty when /// the payload still relies on the legacy - /// [`Oracle::OutputContains`](crate::dynamic::oracle::Oracle::OutputContains) + /// [`Oracle::OutputContains`] /// path and has not been migrated to - /// [`Oracle::SinkProbe`](crate::dynamic::oracle::Oracle::SinkProbe) yet. + /// [`Oracle::SinkProbe`] yet. pub probe_predicates: &'static [ProbePredicate], /// Paired benign-control payload inside the same cap's slice. /// diff --git a/src/dynamic/corpus/audit.rs b/src/dynamic/corpus/audit.rs index 161ca17b..5efcbc12 100644 --- a/src/dynamic/corpus/audit.rs +++ b/src/dynamic/corpus/audit.rs @@ -12,7 +12,7 @@ //! whenever a maintainer forgets to wire a paired benign entry. //! //! 2. **Cap coverage is exhaustive.** The set of caps appearing in -//! [`CORPUS::entries`] OR [`CORPUS_UNSUPPORTED_LANG_NEUTRAL`] must +//! [`CORPUS`]'s [`entries`](super::CapCorpus::entries) OR [`CORPUS_UNSUPPORTED_LANG_NEUTRAL`] must //! equal [`Cap::all`]. Adding a new `Cap` bit without classifying it //! fails the build. //! diff --git a/src/dynamic/corpus/registry.rs b/src/dynamic/corpus/registry.rs index be404e90..938d549d 100644 --- a/src/dynamic/corpus/registry.rs +++ b/src/dynamic/corpus/registry.rs @@ -36,7 +36,7 @@ use crate::symbol::Lang; /// Caps with no payloads of their own — source-only sources, sanitizers, /// and sinks we cannot yet model with a reliable oracle. The /// [`super::audit`] module asserts that the union of caps covered by -/// [`CORPUS::entries`] and this constant equals [`Cap::all`]. +/// [`CORPUS`]'s [`entries`](CapCorpus::entries) and this constant equals [`Cap::all`]. /// /// Phase 11 (Track J.9) carved `CRYPTO`, `JSON_PARSE`, /// `UNAUTHORIZED_ID`, and `DATA_EXFIL` corpora; the remaining caps diff --git a/src/dynamic/differential.rs b/src/dynamic/differential.rs index 5a6d5ecb..5a2365b7 100644 --- a/src/dynamic/differential.rs +++ b/src/dynamic/differential.rs @@ -20,7 +20,7 @@ //! specialisation of [`evaluate_sets`] and delegates to it. //! //! "Fires" means [`crate::dynamic::oracle::oracle_fired`] returned `true` -//! against the run's [`SandboxOutcome`] + drained [`SinkProbe`] set — +//! against the run's [`SandboxOutcome`](crate::dynamic::sandbox::SandboxOutcome) + drained [`SinkProbe`] set — //! invariant across `Oracle::OutputContains` and `Oracle::SinkProbe`. use crate::dynamic::probe::SinkProbe; diff --git a/src/dynamic/environment.rs b/src/dynamic/environment.rs index a0e1df9c..784c7227 100644 --- a/src/dynamic/environment.rs +++ b/src/dynamic/environment.rs @@ -115,7 +115,7 @@ pub fn derive_secret(spec_hash: &str, env_var_name: &str) -> SecretValue { /// | Ruby | `ENV["X"]`, `ENV.fetch("X")` | /// | C/C++ | `getenv("X")` | /// -/// Static substring scan — bounded by [`IMPORT_SCAN_LIMIT`] like the import +/// Static substring scan — bounded by `IMPORT_SCAN_LIMIT` like the import /// extractor. No AST: an entry-file with `os.environ.get(some_var)` (a /// non-literal arg) is intentionally skipped; the secret bag is populated /// from literal references only so a typo cannot produce noisy injection. @@ -367,7 +367,7 @@ pub struct CapturedDeps { /// add the package-manager deps required when the real import is present. pub framework_adapter: Option, /// Three-valued lang-has-framework signal (see - /// [`FrameworkContext::lang_has_web_framework`]). + /// [`FrameworkContext::lang_has_web_framework`](crate::utils::project::FrameworkContext::lang_has_web_framework)). pub framework_signal: Option, /// Absolute paths of local config files reachable from the entry /// point's directory. Each is copied verbatim into the workdir @@ -380,7 +380,7 @@ pub struct CapturedDeps { /// Manifest files (lockfile + project manifest pair) recognised for /// [`Self::toolchain`]'s language. Each entry is an absolute path /// inside `project_root`; the first existing entry from - /// [`MANIFEST_FILES_BY_LANG`] wins for [`Self::lockfile`]. + /// `MANIFEST_FILES_BY_LANG` wins for [`Self::lockfile`]. pub manifests: Vec, /// First recognised manifest file (== `manifests[0]` when present). /// Used by the per-language emitter as the canonical lockfile when diff --git a/src/dynamic/framework/registry.rs b/src/dynamic/framework/registry.rs index b4f9bb72..4f6fdb38 100644 --- a/src/dynamic/framework/registry.rs +++ b/src/dynamic/framework/registry.rs @@ -12,7 +12,7 @@ //! order of [`super::FrameworkAdapter::name`]. The lexical ordering //! gives a deterministic first-match result that survives merges / //! rebases without subtle re-ordering bugs. A `framework` unit test -//! ([`super::tests::registry_is_empty_for_every_lang_phase_01`]) +//! (`registry_is_empty_for_every_lang_phase_01`) //! captures the Phase-01 starting baseline so a phase that registers //! its first adapter is forced to update both the slice *and* the //! regression guard in the same change. diff --git a/src/dynamic/harness.rs b/src/dynamic/harness.rs index 179e4fc5..4e6ee3d7 100644 --- a/src/dynamic/harness.rs +++ b/src/dynamic/harness.rs @@ -9,7 +9,7 @@ //! (`__NYX_SINK_HIT__` sentinel on stdout). //! 5. Lets the sink either fire or not — the oracle observes from outside. //! -//! One generator per [`Lang`]. Each emits source plus a build command. +//! One generator per [`Lang`](crate::symbol::Lang). Each emits source plus a build command. //! Build artefacts are staged inside the sandbox working dir, never the //! user's tree. diff --git a/src/dynamic/lang/c.rs b/src/dynamic/lang/c.rs index db9aba0b..ba755f1b 100644 --- a/src/dynamic/lang/c.rs +++ b/src/dynamic/lang/c.rs @@ -1,7 +1,7 @@ //! C harness emitter. //! //! Phase 16 (Track B Rust + C/C++ vertical) replaces the stub body with -//! dispatch over [`CShape`] — the cross product of [`EntryKind`] and a +//! dispatch over [`CShape`] — the cross product of [`EntryKind`](crate::dynamic::spec::EntryKind) and a //! lightweight per-file shape detector that inspects the entry file for //! `main(int argc, char *argv[])`, libFuzzer's `LLVMFuzzerTestOneInput`, //! and free functions with `(const char*, size_t)` signatures. diff --git a/src/dynamic/lang/go.rs b/src/dynamic/lang/go.rs index 250afb4f..438f1151 100644 --- a/src/dynamic/lang/go.rs +++ b/src/dynamic/lang/go.rs @@ -1,7 +1,7 @@ //! Go harness emitter. //! //! Phase 15 (Track B Go vertical) replaces the single legacy `emit` body -//! with dispatch over [`GoShape`] — the cross product of [`EntryKind`] +//! with dispatch over [`GoShape`] — the cross product of [`EntryKind`](crate::dynamic::spec::EntryKind) //! and a lightweight per-file shape detector that inspects the entry //! file for `net/http` handler signatures, gin context handlers, //! `flag.Parse` CLIs, and `func(args ...) error` fuzz harnesses. @@ -312,7 +312,7 @@ fn read_entry_source(entry_file: &str) -> String { /// Phase 09 — Track D.2: synthesise a `go.mod` listing every captured /// third-party import path. Standard-library imports are skipped via -/// [`is_go_stdlib`]. +/// `is_go_stdlib`. pub fn materialize_go(env: &Environment) -> RuntimeArtifacts { let mut artifacts = RuntimeArtifacts::new(); let go_version = env diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index 69a7c560..5016da9d 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -2,7 +2,7 @@ //! //! Phase 14 (Track B Java vertical) replaces the single legacy `emit` //! body with dispatch over [`JavaShape`] — the cross product of -//! [`EntryKind`] and a lightweight per-file shape detector that inspects +//! [`EntryKind`](crate::dynamic::spec::EntryKind) and a lightweight per-file shape detector that inspects //! the entry file for servlet / Spring / Quarkus annotations, JUnit //! markers, and `static main(String[])` signatures. //! @@ -200,10 +200,10 @@ impl JavaShape { /// pass an empty string and the function returns /// [`Self::StaticMethod`]). /// - /// Framework / annotation detection wins over the [`EntryKind`] + /// Framework / annotation detection wins over the [`EntryKind`](crate::dynamic::spec::EntryKind) /// axis: when the source clearly imports a servlet or Spring /// controller the shape is selected even if the spec derivation - /// pipeline tagged the entry kind as [`EntryKind::Function`]. + /// pipeline tagged the entry kind as [`EntryKind::Function`](crate::dynamic::spec::EntryKind::Function). pub fn detect(spec: &HarnessSpec, source: &str) -> Self { let entry = spec.entry_name.as_str(); let kind = spec.entry_kind.tag(); @@ -1273,7 +1273,7 @@ public class NyxHarness {{ /// template, and dispatches the resulting filter against the /// in-sandbox LDAP stub via `javax.naming.directory.InitialDirContext` /// over the real LDAPv3 BER wire (the stub's accept loop at -/// [`crate::dynamic::stubs::ldap_server::accept_loop`] auto-detects +/// `crate::dynamic::stubs::ldap_server::accept_loop` auto-detects /// the `0x30 SEQUENCE` lead byte and routes through the BER /// reader/writer at [`crate::dynamic::stubs::ldap_ber`]). Falls back /// to an in-process RFC 4515 subset matcher against three canonical @@ -2417,7 +2417,7 @@ public class NyxHarness {{ /// tree without pulling Jackson / Gson onto the classpath. The /// fixture calls `NyxJsonProbe.parse(text)` in place of any library /// JSON parser. When the parser's own -/// [`NyxJsonProbe.NyxJsonDepthException`] fires (nesting above +/// `NyxJsonProbe.NyxJsonDepthException` fires (nesting above /// `MAX_PARSE_DEPTH = 4096`) the harness emits a `JsonParse { depth: /// 0, excessive_depth: true }` probe before continuing — matches the /// PHP `JSON_ERROR_DEPTH` and Python `RecursionError` excess paths. diff --git a/src/dynamic/lang/java_servlet_stubs.rs b/src/dynamic/lang/java_servlet_stubs.rs index 1af17fa1..b352834c 100644 --- a/src/dynamic/lang/java_servlet_stubs.rs +++ b/src/dynamic/lang/java_servlet_stubs.rs @@ -20,7 +20,7 @@ //! The bundle ships both `javax.servlet` and `jakarta.servlet` so source //! files predating the EE 9 rename and source files using the new //! namespace both link. Each stub is generated from the same template via -//! [`make_servlet_stubs`] so the two trees stay in sync. +//! `make_servlet_stubs` so the two trees stay in sync. /// Stub bundle for the servlet-shape Java harnesses. /// diff --git a/src/dynamic/lang/javascript.rs b/src/dynamic/lang/javascript.rs index 9e9e1f07..a3ddc916 100644 --- a/src/dynamic/lang/javascript.rs +++ b/src/dynamic/lang/javascript.rs @@ -3,16 +3,16 @@ //! After Phase 13 (Track B JS + TS vertical) the per-shape dispatch lives in //! [`crate::dynamic::lang::js_shared`]. This module is the typed surface for //! `Lang::JavaScript`: registers the [`JavaScriptEmitter`] in the dispatch -//! table, advertises the supported [`EntryKind`] set, and forwards +//! table, advertises the supported [`EntryKind`](crate::dynamic::spec::EntryKind) set, and forwards //! `emit` / `materialize_runtime` calls to the shared module. //! //! Payload slot support (handled by `js_shared::emit`): -//! - [`PayloadSlot::Param`] — n-th positional argument. -//! - [`PayloadSlot::EnvVar`] — set env var before calling. -//! - [`PayloadSlot::Stdin`] — pipe payload to `process.stdin`. -//! - [`PayloadSlot::QueryParam`] — HTTP-shaped query param (Express / Koa / Next). -//! - [`PayloadSlot::HttpBody`] — HTTP body (Express / Koa / Next). -//! - [`PayloadSlot::Argv`] — coerced to positional `Param(0)` by build_call. +//! - [`PayloadSlot::Param`](crate::dynamic::spec::PayloadSlot::Param) — n-th positional argument. +//! - [`PayloadSlot::EnvVar`](crate::dynamic::spec::PayloadSlot::EnvVar) — set env var before calling. +//! - [`PayloadSlot::Stdin`](crate::dynamic::spec::PayloadSlot::Stdin) — pipe payload to `process.stdin`. +//! - [`PayloadSlot::QueryParam`](crate::dynamic::spec::PayloadSlot::QueryParam) — HTTP-shaped query param (Express / Koa / Next). +//! - [`PayloadSlot::HttpBody`](crate::dynamic::spec::PayloadSlot::HttpBody) — HTTP body (Express / Koa / Next). +//! - [`PayloadSlot::Argv`](crate::dynamic::spec::PayloadSlot::Argv) — coerced to positional `Param(0)` by build_call. use crate::dynamic::environment::{Environment, RuntimeArtifacts}; use crate::dynamic::lang::{ diff --git a/src/dynamic/lang/mod.rs b/src/dynamic/lang/mod.rs index cb24498a..489391d6 100644 --- a/src/dynamic/lang/mod.rs +++ b/src/dynamic/lang/mod.rs @@ -132,7 +132,7 @@ pub trait LangEmitter { /// Build a harness source bundle for `spec`. fn emit(&self, spec: &HarnessSpec) -> Result; - /// The set of [`EntryKind`] variants this emitter understands, + /// The set of [`EntryKind`](crate::dynamic::spec::EntryKind) variants this emitter understands, /// projected to the [`EntryKindTag`] discriminant so the slice can /// live in `'static` storage even after Phase 18 extended /// `EntryKind` with data-bearing variants. diff --git a/src/dynamic/lang/php.rs b/src/dynamic/lang/php.rs index 68708781..f3f36e0c 100644 --- a/src/dynamic/lang/php.rs +++ b/src/dynamic/lang/php.rs @@ -2,7 +2,7 @@ //! //! Phase 15 (Track B PHP vertical) replaces the single legacy `emit` //! body with dispatch over [`PhpShape`] — the cross product of -//! [`EntryKind`] and a lightweight per-file shape detector that +//! [`EntryKind`](crate::dynamic::spec::EntryKind) and a lightweight per-file shape detector that //! inspects the entry file for Slim/Laravel/Symfony route closures, //! `$argv`-driven CLI scripts, and top-level script bodies. //! @@ -856,7 +856,7 @@ echo json_encode(["entity_expanded" => $expanded]) . "\n"; /// Reads `NYX_PAYLOAD`, splices it into a `(uid=)` filter, /// and — when `NYX_LDAP_ENDPOINT` is set — routes the search through /// the in-sandbox LDAP stub over the real LDAPv3 BER wire (the stub's -/// accept loop at [`crate::dynamic::stubs::ldap_server::accept_loop`] +/// accept loop at `crate::dynamic::stubs::ldap_server::accept_loop` /// auto-detects the `0x30 SEQUENCE` lead byte and routes through the /// reader/writer at [`crate::dynamic::stubs::ldap_ber`]). Falls back /// to an in-process RFC 4515 subset matcher against three canonical diff --git a/src/dynamic/lang/python.rs b/src/dynamic/lang/python.rs index 91ecdc60..3e9d8ff2 100644 --- a/src/dynamic/lang/python.rs +++ b/src/dynamic/lang/python.rs @@ -2,7 +2,7 @@ //! //! Phase 12 (Track B Python vertical) replaces the single legacy //! `emit` body with dispatch over [`PythonShape`] — the cross product of -//! [`EntryKind`] and a lightweight per-file shape detector that inspects +//! [`EntryKind`](crate::dynamic::spec::EntryKind) and a lightweight per-file shape detector that inspects //! the entry file for framework decorators / CLI gates / async / pytest //! conventions. Each shape returns its own [`HarnessSource`] but shares //! the Phase 06 probe shim ([`probe_shim`]) and payload prelude so the @@ -14,7 +14,7 @@ //! positionally with the payload). The dispatch never returns an //! emitter-side error for an unknown shape — that responsibility belongs //! to `lang::emit`, which has already gated on -//! [`EntryKind`] via [`PythonEmitter::entry_kinds_supported`]. +//! [`EntryKind`](crate::dynamic::spec::EntryKind) via [`PythonEmitter::entry_kinds_supported`]. //! //! Payload slot support: //! - [`PayloadSlot::Param`] — n-th positional argument. @@ -176,10 +176,10 @@ impl PythonShape { /// pass an empty string and the function returns [`Self::Generic`]). /// /// Framework detection (Flask / FastAPI / Django) wins over the - /// [`EntryKind`] axis: when the source clearly imports one of those + /// [`EntryKind`](crate::dynamic::spec::EntryKind) axis: when the source clearly imports one of those /// frameworks the route shape is selected even if the spec /// derivation pipeline tagged the entry kind as - /// [`EntryKind::Function`]. This makes the dispatcher robust + /// [`EntryKind::Function`](crate::dynamic::spec::EntryKind::Function). This makes the dispatcher robust /// against the synthetic flow-step path used by tests and against /// the legacy substring-only entry-kind heuristic. pub fn detect(spec: &HarnessSpec, source: &str) -> Self { @@ -2616,7 +2616,7 @@ if __name__ == "__main__": /// Reads `NYX_PAYLOAD`, splices it into a `(uid=)` filter, /// and — when `NYX_LDAP_ENDPOINT` is set — routes the search through /// the in-sandbox LDAP stub over the real LDAPv3 BER wire (the stub's -/// accept loop at [`crate::dynamic::stubs::ldap_server::accept_loop`] +/// accept loop at `crate::dynamic::stubs::ldap_server::accept_loop` /// auto-detects the `0x30 SEQUENCE` lead byte and routes through the /// reader/writer at [`crate::dynamic::stubs::ldap_ber`]). Falls back /// to an in-process RFC 4515 subset matcher against three canonical diff --git a/src/dynamic/lang/ruby.rs b/src/dynamic/lang/ruby.rs index 8fe5b974..d5d83551 100644 --- a/src/dynamic/lang/ruby.rs +++ b/src/dynamic/lang/ruby.rs @@ -2,7 +2,7 @@ //! //! Phase 15 (Track B Ruby vertical) replaces the previous `LangUnsupported` //! stub with dispatch over [`RubyShape`] — the cross product of -//! [`EntryKind`] and a lightweight per-file shape detector that inspects +//! [`EntryKind`](crate::dynamic::spec::EntryKind) and a lightweight per-file shape detector that inspects //! the entry file for Sinatra routes, Rails controller actions, Hanami //! actions, Rack middleware, and generic controller methods. //! diff --git a/src/dynamic/lang/rust.rs b/src/dynamic/lang/rust.rs index e817e7f9..d6dc2864 100644 --- a/src/dynamic/lang/rust.rs +++ b/src/dynamic/lang/rust.rs @@ -2671,7 +2671,7 @@ fn is_ident_char(ch: char) -> bool { /// - Other caps use only std (no extra deps). /// /// `libc` is always pinned because the Phase 16 probe shim (spliced into -/// `src/main.rs` by [`generate_main_rs`]) calls `libc::sigaction` from +/// `src/main.rs` by `generate_main_rs`) calls `libc::sigaction` from /// `__nyx_install_crash_guard`. The shim is unconditionally compiled so /// the dep must be unconditional too. pub fn generate_cargo_toml(cap: Cap) -> String { diff --git a/src/dynamic/oob.rs b/src/dynamic/oob.rs index 91c6d7f2..3cc6da3c 100644 --- a/src/dynamic/oob.rs +++ b/src/dynamic/oob.rs @@ -78,7 +78,7 @@ impl OobListener { /// URL to embed in a payload for `nonce`. /// /// Format: `http://127.0.0.1:{port}/{nonce}`. Use this URL for the - /// process sandbox. For Docker sandboxes use [`nonce_url_for_host`]. + /// process sandbox. For Docker sandboxes use [`Self::nonce_url_for_host`]. pub fn nonce_url(&self, nonce: &str) -> String { format!("http://127.0.0.1:{}/{}", self.port, nonce) } diff --git a/src/dynamic/oracle.rs b/src/dynamic/oracle.rs index b47073d2..7e3b8037 100644 --- a/src/dynamic/oracle.rs +++ b/src/dynamic/oracle.rs @@ -1301,7 +1301,7 @@ impl Canary { /// Derive a 32-byte canary for the finding identified by `spec_hash`. /// /// `BLAKE3("nyx.dynamic.canary.v1" ‖ run_nonce ‖ spec_hash)`. The - /// [`run_nonce`] is a process-global value seeded once from the OS + /// `run_nonce` is a process-global value seeded once from the OS /// CSPRNG (mixed with time + pid as a fallback), so two runs of the same /// spec draw different canaries and a stale probe record cannot satisfy a /// later run. Keying on `spec_hash` gives every finding in a single run diff --git a/src/dynamic/policy.rs b/src/dynamic/policy.rs index cfa09c94..3997506c 100644 --- a/src/dynamic/policy.rs +++ b/src/dynamic/policy.rs @@ -330,7 +330,7 @@ impl DenyRule { /// Finding's path or evidence references a production endpoint /// (e.g. `api.prod.example.com`, `*.production.*`, /// `*-prod.amazonaws.com`). Conservative: matched against the - /// short list in [`PROD_ENDPOINT_REGEXES`]. + /// short list in `PROD_ENDPOINT_REGEXES`. pub const PRODUCTION_ENDPOINT: &'static str = "production-endpoint"; } @@ -382,8 +382,8 @@ const PROD_ENDPOINT_REGEXES: &[&str] = &[ /// snippets, and the `SpanEvidence` snippets for source/sink/guard/ /// sanitizer entries. Each text is fed to three predicates in turn /// — [`DenyRule::CREDENTIALS`] (via [`crate::utils::redact::contains_secret`]), -/// [`DenyRule::PRIVATE_KEY`] (via [`PRIVATE_KEY_LITERALS`]), -/// [`DenyRule::PRODUCTION_ENDPOINT`] (via [`PROD_ENDPOINT_REGEXES`]). +/// [`DenyRule::PRIVATE_KEY`] (via `PRIVATE_KEY_LITERALS`), +/// [`DenyRule::PRODUCTION_ENDPOINT`] (via `PROD_ENDPOINT_REGEXES`). /// The first match wins and the verifier short-circuits to /// [`crate::evidence::InconclusiveReason::PolicyDeniedDynamic`]. /// diff --git a/src/dynamic/repro.rs b/src/dynamic/repro.rs index 94b12ce8..b4c1a96e 100644 --- a/src/dynamic/repro.rs +++ b/src/dynamic/repro.rs @@ -50,7 +50,7 @@ use directories::ProjectDirs; use std::fs; use std::path::{Path, PathBuf}; -/// Emitted by [`write`] on success. +/// Emitted by [`write()`] on success. #[derive(Debug, Clone)] pub struct ReproArtifact { /// Absolute path to the repro bundle root. @@ -288,7 +288,7 @@ fn repro_root(spec_hash: &str) -> Result { /// Resolve the bundle path for `spec_hash` without creating any directories. /// -/// Returns the same path [`write`] uses (`~/.cache/nyx/dynamic/repro/{spec_hash}/`) +/// Returns the same path [`write()`] uses (`~/.cache/nyx/dynamic/repro/{spec_hash}/`) /// so callers can locate an existing bundle for replay. Respects the /// `NYX_REPRO_BASE` test override. /// diff --git a/src/dynamic/sandbox/baseline.rs b/src/dynamic/sandbox/baseline.rs index 64a028b2..8be7c4b5 100644 --- a/src/dynamic/sandbox/baseline.rs +++ b/src/dynamic/sandbox/baseline.rs @@ -3,18 +3,18 @@ //! A harness needs the language toolchain's heavyweight dependency tree //! (`node_modules`, `vendor`, `target/`, …) but that tree is identical across //! every finding in a run — installing it per-finding is the bulk of the -//! per-workdir setup cost. A [`Baseline`] holds one shared, warmed copy under +//! per-workdir setup cost. A [`Baseline`](crate::dynamic::sandbox::baseline::Baseline) holds one shared, warmed copy under //! the build-pool cache dir; each per-finding workdir gets a cheap snapshot of //! it: //! //! - **macOS** — a `clonefile` CoW snapshot (via -//! [`crate::dynamic::harness::copy_workdir`]). +//! `crate::dynamic::harness::copy_workdir`). //! - **Linux** — a read-only `mount --bind`, falling back to a reflink copy //! when bind mounts are unavailable (no `CAP_SYS_ADMIN` / not in a mount //! namespace). //! //! The baseline root honours `NYX_BUILD_POOL_DIR` through -//! [`crate::dynamic::build_pool::pool_cache_dir`], so tests can redirect it +//! `crate::dynamic::build_pool::pool_cache_dir`, so tests can redirect it //! into a `TempDir` and it shares the same on-disk layout as the Phase 22/23 //! build pools (`/dynamic/build-pool//baseline`). diff --git a/src/dynamic/sandbox/docker.rs b/src/dynamic/sandbox/docker.rs index 07446ae4..cf723993 100644 --- a/src/dynamic/sandbox/docker.rs +++ b/src/dynamic/sandbox/docker.rs @@ -2,8 +2,8 @@ //! //! This module is the thin layer between the pinned-digest catalogue //! (`tools/image-builder/images.toml` → `src/dynamic/toolchain.rs::IMAGE_DIGESTS`) -//! and the existing docker invocations in [`super::run_docker`] / -//! [`super::run_native_binary_docker`]. +//! and the existing docker invocations in `super::run_docker` / +//! `super::run_native_binary_docker`. //! //! Responsibilities: //! @@ -16,7 +16,7 @@ //! - mounts each `StubHarness` filesystem root at a fixed `/nyx/stubs/` //! path so harness-side shims can find them without hard-coding host //! tempdir layouts, -//! - honours the [`super::NetworkPolicy`] (none / OOB / stubs-only / open) +//! - honours the [`NetworkPolicy`](crate::dynamic::sandbox::NetworkPolicy) (none / OOB / stubs-only / open) //! using the same flag set as the legacy `start_container`. //! //! All helpers are infallible w.r.t. docker availability — they return arg @@ -157,7 +157,7 @@ pub fn stub_mount_args(stub_roots: &[std::path::PathBuf]) -> Vec { /// Render the `--network` + `--add-host` flag slice for a [`NetworkPolicy`]. /// -/// Mirrors the legacy block in [`super::start_container`] so callers using +/// Mirrors the legacy block in `super::start_container` so callers using /// the new docker.rs entry point produce byte-identical container layouts /// to the existing path — important for `tests/dynamic_parity.rs` to keep /// reading the same verdicts across backends. diff --git a/src/dynamic/sandbox/firecracker.rs b/src/dynamic/sandbox/firecracker.rs index 07999dad..6cc1f366 100644 --- a/src/dynamic/sandbox/firecracker.rs +++ b/src/dynamic/sandbox/firecracker.rs @@ -23,7 +23,7 @@ //! 3. The probe is cached behind a `OnceLock` so repeated calls into [`run`] //! do not re-`stat` the binary every time. Tests that swap //! `NYX_FIRECRACKER_BIN` between scenarios bypass the cache via the -//! uncached [`is_firecracker_reachable`] helper. +//! uncached [`is_firecracker_reachable`](crate::dynamic::sandbox::firecracker::is_firecracker_reachable) helper. use std::sync::OnceLock; diff --git a/src/dynamic/sandbox/mod.rs b/src/dynamic/sandbox/mod.rs index 64d01a48..7e359896 100644 --- a/src/dynamic/sandbox/mod.rs +++ b/src/dynamic/sandbox/mod.rs @@ -54,7 +54,7 @@ pub mod firecracker; /// Phase 17 (Track E.1) + Phase 18 (Track E.2) per-run hardening outcome. /// -/// Returned by [`run_process`] on the [`SandboxOutcome`] so callers (tests + +/// Returned by `run_process` on the [`SandboxOutcome`] so callers (tests + /// telemetry) can inspect the per-primitive status without consulting a /// process-global singleton. The previous Phase 17/18 implementation kept /// the outcome in `process_linux::LAST_OUTCOME` / `process_macos::LAST_OUTCOME` @@ -81,7 +81,7 @@ pub enum HardeningRecord { /// IMAGE_DIGESTS`] entries to docker image refs, render `docker run` /// flag slices that honour [`NetworkPolicy`], and mount the harness /// workdir at the fixed `/work` path. The legacy entry points in this -/// file ([`run_docker`] / [`run_native_binary_docker`]) call into +/// file (`run_docker` / `run_native_binary_docker`) call into /// `docker::ensure_image_pulled` so every harness run uses the catalogue /// pin when one is available. pub mod docker; @@ -233,7 +233,7 @@ pub struct SandboxOptions { /// Phase 17 (Track E.1): cap bits used to minimise the seccomp-bpf /// allowlist applied to the Linux process backend. When `0`, the /// process backend installs only the cap-independent `base` allowlist - /// from [`seccomp::seccomp_policy.toml`]; when non-zero, every cap bit + /// from `seccomp::seccomp_policy.toml`; when non-zero, every cap bit /// set adds its allowlisted syscalls on top. Other backends ignore /// this field. pub seccomp_caps: u32, @@ -264,7 +264,7 @@ pub struct SandboxOptions { /// primitive toggles. #[doc(hidden)] pub ablation: Option, - /// Phase 30 (Track C observability): optional [`VerifyTrace`] handle + /// Phase 30 (Track C observability): optional [`VerifyTrace`](crate::dynamic::trace::VerifyTrace) handle /// the runner appends pipeline stages to (`build_started`, /// `build_done`, `sandbox_started`, `oracle_wait`, `oracle_observed`). /// `None` keeps the runner silent — sandbox-level callers that do @@ -284,7 +284,7 @@ pub struct SandboxOptions { /// no-new-privs, all rlimits, namespace unshare, chroot to workdir, /// default-deny seccomp filter scoped to [`SandboxOptions::seccomp_caps`]. /// Each primitive is best-effort; failures degrade to -/// [`HardeningLevel::Partial`] without aborting the run. +/// `HardeningLevel::Partial` without aborting the run. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum ProcessHardeningProfile { #[default] @@ -420,7 +420,7 @@ impl HostPort { /// selector. /// - [`NetworkPolicy::OobOutbound`] — the legacy "OOB only" path: the /// harness can reach the per-scan OOB listener (and only it via the -/// Linux iptables filter in [`apply_oob_egress_filter`]). Docker: +/// Linux iptables filter in `apply_oob_egress_filter`). Docker: /// `bridge` + host-gateway + iptables OOB-port filter. /// - [`NetworkPolicy::Open`] — unrestricted outbound. Docker: `bridge` /// with no egress filter. Reserved for diagnostic / dev-only runs; diff --git a/src/dynamic/sandbox/process_macos.rs b/src/dynamic/sandbox/process_macos.rs index 807cb2d7..51e2c131 100644 --- a/src/dynamic/sandbox/process_macos.rs +++ b/src/dynamic/sandbox/process_macos.rs @@ -1,13 +1,13 @@ //! Phase 18 (Track E.2) — macOS process backend hardening. //! -//! macOS analogue of [`super::process_linux`]. Where the Linux backend +//! macOS analogue of `super::process_linux`. Where the Linux backend //! installs a `pre_exec` sequence (prctl + rlimits + unshare + chroot + //! seccomp-bpf), the macOS backend wraps the harness command with //! `sandbox-exec(1)` driven by a per-capability `.sb` policy file. //! //! Profile selection //! ----------------- -//! [`profile_for_caps`] maps the [`SandboxOptions::seccomp_caps`] bitset +//! [`profile_for_caps`] maps the [`SandboxOptions::seccomp_caps`](super::SandboxOptions::seccomp_caps) bitset //! (set by the verifier from `spec.expected_cap`) to a profile name in //! `src/dynamic/sandbox_profiles/`: //! @@ -254,13 +254,13 @@ pub fn profile_path(name: &str) -> Option { // without needing macOS-host sandbox-exec access. /// Env var consulted by [`profile_path`] to enable the deny-default -/// splice. When set to `1` / `true`, [`deny_default_seed_for`] is +/// splice. When set to `1` / `true`, `deny_default_seed_for` is /// invoked for every materialised profile; missing seeds fall back to /// the baked `(allow default)` body so misconfiguration cannot brick /// the sandbox-exec backend. pub const SB_DENY_DEFAULT_ENV: &str = "NYX_SB_DENY_DEFAULT"; -/// Env var consulted by [`deny_default_seed_for`] to locate the seed +/// Env var consulted by `deny_default_seed_for` to locate the seed /// directory. Defaults to `tools/sb-trace/` relative to the workspace /// root when unset; tests override this to point at a tempdir-backed /// fixture set. diff --git a/src/dynamic/spec.rs b/src/dynamic/spec.rs index ea86b702..c3f21420 100644 --- a/src/dynamic/spec.rs +++ b/src/dynamic/spec.rs @@ -77,7 +77,7 @@ pub enum PayloadSlot { HttpBody, /// Environment variable. EnvVar(String), - /// CLI argv slot (0-based, excluding argv[0]). + /// CLI argv slot (0-based, excluding `argv[0]`). Argv(usize), /// stdin. Stdin, @@ -144,7 +144,7 @@ pub struct HarnessSpec { /// this field is `None` for every spec; subsequent Track-L phases /// register adapters and back-fill the binding. /// - /// Excluded from [`compute_spec_hash`]: the binding is descriptive + /// Excluded from `compute_spec_hash`: the binding is descriptive /// metadata derived from the entry function and does not change /// the harness boundary topology that the spec hash protects. /// `#[serde(default, skip_serializing_if = "Option::is_none")]` so @@ -157,10 +157,10 @@ pub struct HarnessSpec { /// decide whether to bootstrap a full Spring test context /// (`SpringApplication.run` + `MockMvc`) or the lighter /// reflective invocation path the legacy shapes use. Populated - /// by [`attach_framework_binding`] when the `java-spring` + /// by `attach_framework_binding` when the `java-spring` /// adapter binds. /// - /// Excluded from [`compute_spec_hash`] for the same reason as + /// Excluded from `compute_spec_hash` for the same reason as /// `framework`: the toggle is descriptive metadata driven by the /// adapter binding, not a per-spec boundary topology axis. /// Pre-Phase-14 serialised specs deserialise to the default @@ -663,7 +663,7 @@ fn first_annotated_entry(steps: &[crate::evidence::FlowStep]) -> Option"` / `"FileHandle."`, picking up diff --git a/src/surface/external.rs b/src/surface/external.rs index b3e75b67..bd42db4f 100644 --- a/src/surface/external.rs +++ b/src/surface/external.rs @@ -327,7 +327,7 @@ const CLIENT_RULES: &[ClientRule] = &[ /// /// When the bare callee name does not hit a rule, the type-fact engine's /// per-call `typed_call_receivers` map (read off the matching -/// [`crate::summary::SsaFuncSummary`]) is consulted: a callee whose +/// [`crate::summary::ssa_summary::SsaFuncSummary`]) is consulted: a callee whose /// receiver was resolved to `TypeKind::HttpClient` / /// `TypeKind::RequestBuilder` / `TypeKind::Url` is retried under the /// type-qualified name `"{container}."`, picking up the diff --git a/src/surface/lang/common.rs b/src/surface/lang/common.rs index a97d72b4..f139b948 100644 --- a/src/surface/lang/common.rs +++ b/src/surface/lang/common.rs @@ -1,6 +1,6 @@ //! Shared helpers used by the per-(language, framework) probes. //! -//! Each probe extracts an [`EntryPoint`] node from a parsed source file +//! Each probe extracts an [`EntryPoint`](crate::surface::EntryPoint) node from a parsed source file //! by walking the framework's route declaration shape. These helpers //! cover the bookkeeping common to every probe: building a stable //! [`SourceLocation`] from a tree-sitter node, decoding common string diff --git a/src/surface/reachability.rs b/src/surface/reachability.rs index d57b0d15..603a006c 100644 --- a/src/surface/reachability.rs +++ b/src/surface/reachability.rs @@ -4,7 +4,7 @@ //! whole-program [`CallGraph`]. //! //! For each entry-point we first locate the matching call-graph -//! [`FuncKey`] by `(namespace, function_name)` (the entry-point's +//! [`FuncKey`](crate::symbol::FuncKey) by `(namespace, function_name)` (the entry-point's //! `handler_location.file` is the project-relative POSIX path used as //! `FuncKey::namespace`, and `handler_name` is the leaf function //! name). From that node we run a BFS over forward call-graph edges diff --git a/tests/eval_corpus/tabulate.py b/tests/eval_corpus/tabulate.py index 2eb86e25..f9fb3d77 100644 --- a/tests/eval_corpus/tabulate.py +++ b/tests/eval_corpus/tabulate.py @@ -75,6 +75,15 @@ _CAP_BIT_TABLE = [ (1 << 18, "xss"), # SSTI (template_injection); also covers XSS sinks (1 << 19, "xxe"), (1 << 20, "prototype_pollution"), + # HTML_ESCAPE (1<<1) is the universal reflected-XSS *sink* cap across every + # language (`grep 'Sink(Cap::HTML_ESCAPE)' src/labels/` — PHP echo, JS + # innerHTML, Java servlet writers, etc.); the same bit is the html-escape + # *sanitizer* cap, so a finding only carries it as a sink when an un-encoded + # tainted value reached an HTML output. Placed LAST so any higher-priority + # sink bit (SQL_QUERY, FILE_IO, ...) on the same finding wins; a finding + # carrying only HTML_ESCAPE is reflected XSS. Without this, every + # taint-based reflected-XSS finding mis-buckets to "other". + (1 << 1, "xss"), ] # Static lens (see --static): SHELL_ESCAPE (1<<2) is the command-injection sink diff --git a/tests/fixtures/fp_guards/ast_layer_a_java_call_args/expectations.json b/tests/fixtures/fp_guards/ast_layer_a_java_call_args/expectations.json index bf3a3ba8..d45d1dba 100644 --- a/tests/fixtures/fp_guards/ast_layer_a_java_call_args/expectations.json +++ b/tests/fixtures/fp_guards/ast_layer_a_java_call_args/expectations.json @@ -1,7 +1,7 @@ { "required_findings": [ { "id_prefix": "java.reflection.class_forname", "min_count": 1 }, - { "id_prefix": "java.crypto.weak_digest", "min_count": 1 } + { "id_prefix": "java.crypto.weak_algorithm", "min_count": 1 } ], "forbidden_findings": [], "noise_budget": { diff --git a/tests/fixtures/real_world/java/mixed/deser_cmdi.expect.json b/tests/fixtures/real_world/java/mixed/deser_cmdi.expect.json index 2a77e6c9..574e4ee1 100644 --- a/tests/fixtures/real_world/java/mixed/deser_cmdi.expect.json +++ b/tests/fixtures/real_world/java/mixed/deser_cmdi.expect.json @@ -45,14 +45,14 @@ "notes": "Runtime.getRuntime().exec(command) with deserialized input; AST pattern correctly matches" }, { - "rule_id": "java.xss.getwriter_print", + "rule_id": "taint-unsanitised-flow", "severity": "MEDIUM", "must_not_match": true, "line_range": [ 11, 11 ], - "notes": "response.getWriter().println(\"Done\") — constant string, Layer B suppresses (regression guard)" + "notes": "response.getWriter().println(\"Done\") — constant string, must NOT raise reflected-XSS (Cap::HTML_ESCAPE). Regression guard retargeted from the retired java.xss.getwriter_print AST pattern to the taint sink that now owns reflected XSS." }, { "rule_id": "taint-unsanitised-flow", diff --git a/tests/fixtures/real_world/java/mixed/servlet_full.expect.json b/tests/fixtures/real_world/java/mixed/servlet_full.expect.json index 3efc4311..c53ad579 100644 --- a/tests/fixtures/real_world/java/mixed/servlet_full.expect.json +++ b/tests/fixtures/real_world/java/mixed/servlet_full.expect.json @@ -80,14 +80,14 @@ "notes": "source at 11:9 (request.getParameter(\"input\")) flows through SQL query (line 17) into result set output at out.println(rs.getString(1)); second-order taint via tainted query results" }, { - "rule_id": "java.xss.getwriter_print", + "rule_id": "taint-unsanitised-flow", "severity": "MEDIUM", "must_not_match": true, "line_range": [ 26, 26 ], - "notes": "response.getWriter().println(new String(data)) — file-read data, Layer B suppresses (regression guard)" + "notes": "response.getWriter().println(new String(data)) — file-read bytes, not reflected request input, must NOT raise reflected-XSS (Cap::HTML_ESCAPE). Regression guard retargeted from the retired java.xss.getwriter_print AST pattern to the taint sink that now owns reflected XSS." } ] } diff --git a/tests/fixtures/real_world/java/taint/catch_param_sink.expect.json b/tests/fixtures/real_world/java/taint/catch_param_sink.expect.json index 4622acaf..24e111fd 100644 --- a/tests/fixtures/real_world/java/taint/catch_param_sink.expect.json +++ b/tests/fixtures/real_world/java/taint/catch_param_sink.expect.json @@ -9,15 +9,7 @@ "must_match": true, "line_range": [5, 12], "evidence_contains": [], - "notes": "catch(Exception e) binds e as tainted; e flows to println sink via catch parameter" - }, - { - "rule_id": "java.xss.getwriter_print", - "severity": "MEDIUM", - "must_match": true, - "line_range": [10, 10], - "evidence_contains": [], - "notes": "response.getWriter().println() in catch block — AST pattern detects potential XSS via error response" + "notes": "catch(Exception e) binds e as tainted; e flows to response.getWriter().println at line 10 — reflected XSS via error response. Replaces the retired java.xss.getwriter_print AST pattern: reflected XSS is now a taint sink (Sink(Cap::HTML_ESCAPE)), so this is taint-confirmed rather than flagged on every writer call." } ] } diff --git a/tests/fixtures/real_world/java/taint/try_catch_sqli.expect.json b/tests/fixtures/real_world/java/taint/try_catch_sqli.expect.json index 871f53a2..31fd756e 100644 --- a/tests/fixtures/real_world/java/taint/try_catch_sqli.expect.json +++ b/tests/fixtures/real_world/java/taint/try_catch_sqli.expect.json @@ -19,21 +19,13 @@ "evidence_contains": [], "notes": "AST pattern detects executeQuery with string concatenation — SQL injection" }, - { - "rule_id": "java.xss.getwriter_print", - "severity": "MEDIUM", - "must_match": true, - "line_range": [12, 12], - "evidence_contains": [], - "notes": "response.getWriter().println() with user input — reflected XSS via error response" - }, { "rule_id": "taint-unsanitised-flow", "severity": "HIGH", "must_match": true, "line_range": [7, 12], "evidence_contains": [], - "notes": "request.getParameter flows to response.getWriter().println — user input reflected in error response" + "notes": "request.getParameter flows to response.getWriter().println at line 12 — user input reflected in error response. Replaces the retired java.xss.getwriter_print AST pattern: reflected XSS is now a taint sink (Sink(Cap::HTML_ESCAPE)), taint-confirmed rather than flagged on every writer call." } ] }