diff --git a/benches/dynamic_bench.rs b/benches/dynamic_bench.rs index dd010789..5c74a342 100644 --- a/benches/dynamic_bench.rs +++ b/benches/dynamic_bench.rs @@ -1,45 +1,44 @@ -/// Dynamic verification benchmarks (§8.4). -/// -/// Tracks the per-scan cost anchors: -/// -/// 1. `harness_build_cold` — fresh workdir, spec → BuiltHarness (source gen + disk write). -/// 2. `harness_build_warm` — same spec, workdir already staged (file write skipped). -/// 3. `sandbox_run_payload` — single payload run via process backend against -/// sqli_positive.py (subprocess + settrace overhead, no networking). -/// 4. `docker_image_build` — cold image pull/build for the python:3-slim base. -/// 5. `docker_exec_warm` — `docker exec` into a running container (no cold start). -/// 6. `docker_payload_cost` — per-payload sandbox cost via docker backend end-to-end. -/// 7. `composite_chain_reverify_dispatch` — `reverify_top_chains` on a -/// synthetic 3-member chain with no member diags. Measures the no-derive -/// dispatch path (chain_step_specs miss, early-exit build/run loops, -/// Inconclusive verdict allocation, severity downgrade). -/// 8. `composite_chain_reverify_stub_confirmed` — same chain shape, stubbed -/// reverifier returning `Confirmed`. Measures the apply-verdict happy path -/// (no severity bucket change). -/// 9. `composite_chain_reverify_top_n_slice` — 5-chain slice with `top_n=3`. -/// Measures the slice traversal cost so a regression that walks the full -/// slice instead of the prefix is visible. -/// 10. `composite_chain_reverify_replay_stable` — same chain shape as -/// `stub_confirmed`, but with `VerifyOptions::replay_stable_check=true` -/// and a stub that stamps `replay_stable=Some(true)`. Anchors the -/// apply-verdict allocation cost when the telemetry stability field -/// is populated; a regression that adds per-chain work behind the -/// replay opt-in (e.g. an extra run_chain_steps call leaking out of -/// the live path into the stub layer) shows up here. -/// -/// Wall-clock budget anchors for the composite reverify path (per the -/// Phase 26 acceptance literal): the live process backend stays under -/// 400ms per 3-member chain, the docker backend under 1500ms. Those -/// live-run numbers are covered by the -/// `flask_eval_chain_reverify_populates_dynamic_verdict` integration -/// test in `tests/chain_emission_e2e.rs`; the microbenches here anchor -/// the dispatch + verdict-application overhead so regressions on the -/// API-shape half land in the criterion baseline. -/// -/// Baselines committed to `benches/dynamic_bench_baseline.json`. -/// Run: `cargo bench --features dynamic -- dynamic` -/// -/// Docker benchmarks are no-ops when docker is unavailable (skipped, not failed). +//! Dynamic verification benchmarks (§8.4). +//! +//! Tracks the per-scan cost anchors: +//! +//! 1. `harness_build_cold` — fresh workdir, spec → BuiltHarness (source gen + disk write). +//! 2. `harness_build_warm` — same spec, workdir already staged (file write skipped). +//! 3. `sandbox_run_payload` — single payload run via process backend against +//! sqli_positive.py (subprocess + settrace overhead, no networking). +//! 4. `docker_image_build` — cold image pull/build for the python:3-slim base. +//! 5. `docker_exec_warm` — `docker exec` into a running container (no cold start). +//! 6. `docker_payload_cost` — per-payload sandbox cost via docker backend end-to-end. +//! 7. `composite_chain_reverify_dispatch` — `reverify_top_chains` on a +//! synthetic 3-member chain with no member diags. Measures the no-derive +//! dispatch path (chain_step_specs miss, early-exit build/run loops, +//! Inconclusive verdict allocation, severity downgrade). +//! 8. `composite_chain_reverify_stub_confirmed` — same chain shape, stubbed +//! reverifier returning `Confirmed`. Measures the apply-verdict happy path +//! (no severity bucket change). +//! 9. `composite_chain_reverify_top_n_slice` — 5-chain slice with `top_n=3`. +//! Measures the slice traversal cost so a regression that walks the full +//! slice instead of the prefix is visible. +//! 10. `composite_chain_reverify_replay_stable` — same chain shape as +//! `stub_confirmed`, but with `VerifyOptions::replay_stable_check=true` +//! and a stub that stamps `replay_stable=Some(true)`. Anchors the +//! apply-verdict allocation cost when the telemetry stability field +//! is populated; a regression that adds per-chain work behind the +//! replay opt-in (e.g. an extra run_chain_steps call leaking out of +//! the live path into the stub layer) shows up here. +//! +//! Wall-clock budget anchors for the composite reverify path: the live +//! process backend stays under 400ms per 3-member chain, the docker +//! backend under 1500ms. Those live-run numbers are covered by the +//! `flask_eval_chain_reverify_populates_dynamic_verdict` integration +//! test in `tests/chain_emission_e2e.rs`; the microbenches here anchor +//! the dispatch + verdict-application overhead so regressions on the +//! API-shape half land in the criterion baseline. +//! +//! Baselines committed to `benches/dynamic_bench_baseline.json`. +//! Run: `cargo bench --features dynamic -- dynamic` +//! +//! Docker benchmarks are no-ops when docker is unavailable (skipped, not failed). use criterion::{Criterion, criterion_group, criterion_main}; @@ -137,7 +136,7 @@ fn bench_sandbox_run_payload(c: &mut Criterion) { }; c.bench_function("sandbox_run_payload", |b| { - b.iter(|| sandbox::run(&harness, &payload.bytes, &opts).expect("sandbox run")); + b.iter(|| sandbox::run(&harness, payload.bytes, &opts).expect("sandbox run")); }); } @@ -249,7 +248,7 @@ fn bench_docker_payload_cost(c: &mut Criterion) { c.bench_function("docker_payload_cost", |b| { b.iter(|| { - let _ = sandbox::run(&built, &payload.bytes, &opts); + let _ = sandbox::run(&built, payload.bytes, &opts); }); }); } @@ -637,6 +636,7 @@ fn bench_composite_chain_reverify_replay_stable(c: &mut Criterion) { } #[cfg(feature = "dynamic")] +#[allow(dead_code)] fn bench_noop(_c: &mut Criterion) {} // When dynamic feature is off, provide a stub so the binary still links. diff --git a/build.rs b/build.rs index 50e9a5fd..3e1efb4b 100644 --- a/build.rs +++ b/build.rs @@ -385,10 +385,10 @@ fn parse_image_catalogue(src: &str) -> Vec { } if line == "[[image]]" { - if let Some(prev) = current.take() { - if !prev.toolchain_id.is_empty() { - entries.push(prev); - } + if let Some(prev) = current.take() + && !prev.toolchain_id.is_empty() + { + entries.push(prev); } current = Some(ImageEntry::default()); continue; @@ -396,10 +396,10 @@ fn parse_image_catalogue(src: &str) -> Vec { if line.starts_with("[[") || line.starts_with('[') { // Any other section ends accumulation. - if let Some(prev) = current.take() { - if !prev.toolchain_id.is_empty() { - entries.push(prev); - } + if let Some(prev) = current.take() + && !prev.toolchain_id.is_empty() + { + entries.push(prev); } continue; } @@ -416,10 +416,10 @@ fn parse_image_catalogue(src: &str) -> Vec { } } - if let Some(prev) = current.take() { - if !prev.toolchain_id.is_empty() { - entries.push(prev); - } + if let Some(prev) = current.take() + && !prev.toolchain_id.is_empty() + { + entries.push(prev); } entries diff --git a/src/baseline.rs b/src/baseline.rs index b8d97535..b74bee5a 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -150,8 +150,8 @@ pub fn write_baseline(path: &Path, diags: &[Diag]) -> crate::errors::NyxResult<( let json = serde_json::to_string_pretty(&entries).map_err(|e| { crate::errors::NyxError::Msg(format!("baseline serialize error: {e}")) })?; - if let Some(parent) = path.parent() { - if !parent.as_os_str().is_empty() { + if let Some(parent) = path.parent() + && !parent.as_os_str().is_empty() { std::fs::create_dir_all(parent).map_err(|e| { crate::errors::NyxError::Msg(format!( "cannot create baseline dir {}: {e}", @@ -159,7 +159,6 @@ pub fn write_baseline(path: &Path, diags: &[Diag]) -> crate::errors::NyxResult<( )) })?; } - } std::fs::write(path, json).map_err(|e| { crate::errors::NyxError::Msg(format!( "cannot write baseline {}: {e}", diff --git a/src/chain/edges.rs b/src/chain/edges.rs index 2315863f..cd0c8d92 100644 --- a/src/chain/edges.rs +++ b/src/chain/edges.rs @@ -181,11 +181,10 @@ pub fn pick_chain_cap(bits: u32) -> Option { let mut remaining = bits; while remaining != 0 { let bit = 1u32 << remaining.trailing_zeros(); - if let Some(cap) = Cap::from_bits(bit) { - if lookup_impact(cap, None).is_some() { + if let Some(cap) = Cap::from_bits(bit) + && lookup_impact(cap, None).is_some() { return Some(cap); } - } remaining &= !bit; } lowest_cap(bits) @@ -198,8 +197,8 @@ fn locate_reach( ) -> Reach { // Pass 1: file-local match (legacy behaviour, always applies). for node in &surface.nodes { - if let SurfaceNode::EntryPoint(ep) = node { - if ep.handler_location.file == loc.file { + if let SurfaceNode::EntryPoint(ep) = node + && ep.handler_location.file == loc.file { return Reach::Reachable { location: ep.location.clone(), method: ep.method, @@ -207,15 +206,14 @@ fn locate_reach( auth_required: ep.auth_required, }; } - } } // Pass 2: transitive caller match via the call graph. Only fires // when `reach` is supplied — keeps the legacy file-local behaviour // for callers that have not yet wired the call-graph reach map. if let Some(reach) = reach { for node in &surface.nodes { - if let SurfaceNode::EntryPoint(ep) = node { - if reach.reaches(&ep.handler_location.file, &loc.file) { + if let SurfaceNode::EntryPoint(ep) = node + && reach.reaches(&ep.handler_location.file, &loc.file) { return Reach::Reachable { location: ep.location.clone(), method: ep.method, @@ -223,7 +221,6 @@ fn locate_reach( auth_required: ep.auth_required, }; } - } } } Reach::Unreachable diff --git a/src/chain/impact.rs b/src/chain/impact.rs index 0f71f267..bf6c1f10 100644 --- a/src/chain/impact.rs +++ b/src/chain/impact.rs @@ -249,11 +249,10 @@ pub fn lookup_impact(source: Cap, adjacent: Option) -> Option NyxResult { - if let Ok((project, db_path)) = get_project_info(scan_root, database_dir) { - if db_path.exists() { - if let Ok(pool) = Indexer::init(&db_path) { - if let Ok(idx) = Indexer::from_pool(&project, &pool) { - if let Ok(Some(map)) = idx.load_surface_map() { - if !map.nodes.is_empty() { + if let Ok((project, db_path)) = get_project_info(scan_root, database_dir) + && db_path.exists() + && let Ok(pool) = Indexer::init(&db_path) + && let Ok(idx) = Indexer::from_pool(&project, &pool) + && let Ok(Some(map)) = idx.load_surface_map() + && !map.nodes.is_empty() { return Ok(map); } - } - } - } - } - } build_from_filesystem(scan_root, config) } diff --git a/src/dynamic/build_sandbox.rs b/src/dynamic/build_sandbox.rs index 44d140ac..0c156e34 100644 --- a/src/dynamic/build_sandbox.rs +++ b/src/dynamic/build_sandbox.rs @@ -791,11 +791,10 @@ fn collect_class_files(root: &Path) -> Vec { let path = entry.path(); if path.is_dir() { stack.push(path); - } else if path.extension().map(|e| e == "class").unwrap_or(false) { - if let Ok(rel) = path.strip_prefix(root) { + } else if path.extension().map(|e| e == "class").unwrap_or(false) + && let Ok(rel) = path.strip_prefix(root) { out.push(rel.to_path_buf()); } - } } } out.sort(); diff --git a/src/dynamic/corpus/audit.rs b/src/dynamic/corpus/audit.rs index e19609cc..39401394 100644 --- a/src/dynamic/corpus/audit.rs +++ b/src/dynamic/corpus/audit.rs @@ -179,8 +179,8 @@ pub fn audit_benign_label_uniqueness_runtime() -> Result<(), String> { if !p.is_benign { continue; } - if let Some(prev_lang) = bucket.insert(p.label, lang) { - if prev_lang != lang { + if let Some(prev_lang) = bucket.insert(p.label, lang) + && prev_lang != lang { return Err(format!( "benign label {:?} for cap {:#x} is registered in both \ {:?} and {:?} — lang-agnostic resolve_benign_control \ @@ -191,7 +191,6 @@ pub fn audit_benign_label_uniqueness_runtime() -> Result<(), String> { lang, )); } - } } } Ok(()) diff --git a/src/dynamic/environment.rs b/src/dynamic/environment.rs index 46ec7474..9761d707 100644 --- a/src/dynamic/environment.rs +++ b/src/dynamic/environment.rs @@ -160,11 +160,10 @@ pub fn extract_env_var_references(entry_file: &Path, lang: Lang) -> Vec } _ => extract_quoted_arg(tail), }; - if let Some(name) = name { - if !name.is_empty() && is_env_var_name(&name) && seen.insert(name.clone()) { + if let Some(name) = name + && !name.is_empty() && is_env_var_name(&name) && seen.insert(name.clone()) { out.push(name); } - } } } out @@ -643,8 +642,7 @@ fn copy_into_workdir( }; let size = metadata.len(); if running_bytes.saturating_add(size) > MAX_WORKDIR_BYTES { - return Err(io::Error::new( - io::ErrorKind::Other, + return Err(io::Error::other( format!( "staged workdir would exceed {} bytes (next file `{}` = {} bytes)", MAX_WORKDIR_BYTES, @@ -730,11 +728,10 @@ fn collect_config_files(entry_file: &Path, project_root: &Path) -> Vec let dirs: Vec = { let mut v = Vec::new(); v.push(project_root.to_path_buf()); - if let Some(parent) = entry_file.parent() { - if parent != project_root && parent.starts_with(project_root) { + if let Some(parent) = entry_file.parent() + && parent != project_root && parent.starts_with(project_root) { v.push(parent.to_path_buf()); } - } v }; for dir in &dirs { diff --git a/src/dynamic/framework/adapters/go_routes.rs b/src/dynamic/framework/adapters/go_routes.rs index dc6f6c7d..afc85e93 100644 --- a/src/dynamic/framework/adapters/go_routes.rs +++ b/src/dynamic/framework/adapters/go_routes.rs @@ -13,6 +13,7 @@ //! Path placeholder vocabulary: //! - gin / echo / chi use `:id` and (chi) `{id}` interchangeably. //! - fiber uses `:id` and `+` / `*` greedy wildcards. +//! //! [`extract_go_path_placeholders`] supports both syntaxes. use crate::dynamic::framework::{HttpMethod, ParamBinding, ParamSource}; @@ -134,11 +135,10 @@ pub fn go_formal_names(func: Node<'_>, bytes: &[u8]) -> Vec { } let mut pc = p.walk(); for c in p.named_children(&mut pc) { - if c.kind() == "identifier" { - if let Ok(text) = c.utf8_text(bytes) { + if c.kind() == "identifier" + && let Ok(text) = c.utf8_text(bytes) { out.push(text.to_owned()); } - } } } out diff --git a/src/dynamic/framework/adapters/java_quarkus.rs b/src/dynamic/framework/adapters/java_quarkus.rs index a2b2e779..1321ed3d 100644 --- a/src/dynamic/framework/adapters/java_quarkus.rs +++ b/src/dynamic/framework/adapters/java_quarkus.rs @@ -38,11 +38,10 @@ fn verb_for(name: &str) -> Option { fn class_path_prefix(class: Node<'_>, bytes: &[u8]) -> String { let mut prefix = String::new(); iter_annotations(class, bytes, |ann, name| { - if name == "Path" { - if let Some(p) = annotation_string_arg(ann, bytes) { + if name == "Path" + && let Some(p) = annotation_string_arg(ann, bytes) { prefix = p; } - } }); prefix } @@ -57,11 +56,10 @@ fn method_verb_and_path( if let Some(v) = verb_for(name) { verb = Some(v); } - if name == "Path" { - if let Some(p) = annotation_string_arg(ann, bytes) { + if name == "Path" + && let Some(p) = annotation_string_arg(ann, bytes) { path = p; } - } }); Some((verb?, path)) } diff --git a/src/dynamic/framework/adapters/java_routes.rs b/src/dynamic/framework/adapters/java_routes.rs index 6eda6ae6..0a9ea992 100644 --- a/src/dynamic/framework/adapters/java_routes.rs +++ b/src/dynamic/framework/adapters/java_routes.rs @@ -114,8 +114,8 @@ fn walk<'a>( if out.is_some() { return; } - if node.kind() == "class_declaration" { - if let Some(body) = node + if node.kind() == "class_declaration" + && let Some(body) = node .child_by_field_name("body") .or_else(|| named_child_of_kind(node, "class_body")) { @@ -127,15 +127,12 @@ fn walk<'a>( if let Some(name) = member .child_by_field_name("name") .and_then(|n| n.utf8_text(bytes).ok()) - { - if name == target { + && name == target { *out = Some((node, member)); return; } - } } } - } let mut cur = node.walk(); for child in node.children(&mut cur) { walk(child, bytes, target, out); @@ -287,8 +284,8 @@ pub fn extract_path_placeholders(path: &str) -> Vec { let bytes = path.as_bytes(); let mut i = 0; while i < bytes.len() { - if bytes[i] == b'{' { - if let Some(end) = bytes[i + 1..].iter().position(|&b| b == b'}') { + if bytes[i] == b'{' + && let Some(end) = bytes[i + 1..].iter().position(|&b| b == b'}') { let inner = &path[i + 1..i + 1 + end]; let name = inner.split(':').next().unwrap_or(inner).trim(); if !name.is_empty() && !out.iter().any(|n| n == name) { @@ -297,7 +294,6 @@ pub fn extract_path_placeholders(path: &str) -> Vec { i += end + 2; continue; } - } i += 1; } out diff --git a/src/dynamic/framework/adapters/java_spring.rs b/src/dynamic/framework/adapters/java_spring.rs index 84abe9fc..bf71c05c 100644 --- a/src/dynamic/framework/adapters/java_spring.rs +++ b/src/dynamic/framework/adapters/java_spring.rs @@ -48,11 +48,10 @@ fn class_is_controller(class: Node<'_>, bytes: &[u8]) -> bool { fn class_route_prefix(class: Node<'_>, bytes: &[u8]) -> String { let mut prefix = String::new(); iter_annotations(class, bytes, |ann, name| { - if name == "RequestMapping" { - if let Some(p) = annotation_string_arg(ann, bytes) { + if name == "RequestMapping" + && let Some(p) = annotation_string_arg(ann, bytes) { prefix = p; } - } }); prefix } diff --git a/src/dynamic/framework/adapters/js_routes.rs b/src/dynamic/framework/adapters/js_routes.rs index b1adadee..15d829d6 100644 --- a/src/dynamic/framework/adapters/js_routes.rs +++ b/src/dynamic/framework/adapters/js_routes.rs @@ -455,14 +455,11 @@ fn walk_for_registration<'a>( if let Some(method) = http_verb_from_method(prop_text) && receiver_accepts(last_segment(object_text)) && let Some(args) = node.child_by_field_name("arguments") - { - if call_args_reference_target(args, bytes, target) { - if let Some(path) = first_string_arg(args, bytes) { + && call_args_reference_target(args, bytes, target) + && let Some(path) = first_string_arg(args, bytes) { *out = Some((method, path)); return; } - } - } // Fastify options-object: `fastify.route({ method, url, handler })`. if prop_text == "route" && receiver_accepts(last_segment(object_text)) diff --git a/src/dynamic/framework/adapters/migration_django.rs b/src/dynamic/framework/adapters/migration_django.rs index 5fbc4d0c..73a3b7dd 100644 --- a/src/dynamic/framework/adapters/migration_django.rs +++ b/src/dynamic/framework/adapters/migration_django.rs @@ -49,7 +49,7 @@ fn extract_version(file_bytes: &[u8]) -> Option { let needle = "# Generated by Django "; if let Some(idx) = text.find(needle) { let after = &text[idx + needle.len()..]; - if let Some(end) = after.find(|c: char| c == ' ' || c == '\n') { + if let Some(end) = after.find([' ', '\n']) { return Some(after[..end].trim().to_owned()); } } diff --git a/src/dynamic/framework/adapters/mod.rs b/src/dynamic/framework/adapters/mod.rs index a77d6381..72b7b09b 100644 --- a/src/dynamic/framework/adapters/mod.rs +++ b/src/dynamic/framework/adapters/mod.rs @@ -258,8 +258,8 @@ pub(super) fn arg_is_tainted_param( else { return false; }; - summary.tainted_sink_params.iter().any(|&i| i == idx) - || summary.propagating_params.iter().any(|&i| i == idx) + summary.tainted_sink_params.contains(&idx) + || summary.propagating_params.contains(&idx) } /// True when any descendant identifier in `node`'s subtree resolves to diff --git a/src/dynamic/framework/adapters/php_routes.rs b/src/dynamic/framework/adapters/php_routes.rs index 511f014d..94f16096 100644 --- a/src/dynamic/framework/adapters/php_routes.rs +++ b/src/dynamic/framework/adapters/php_routes.rs @@ -122,8 +122,7 @@ fn walk<'a>( && let Some(name) = node .child_by_field_name("name") .and_then(|n| n.utf8_text(bytes).ok()) - { - if name == target { + && name == target { let klass = if node.kind() == "method_declaration" { here_class } else { @@ -132,7 +131,6 @@ fn walk<'a>( *out = Some((node, klass)); return; } - } let mut cur = node.walk(); for child in node.children(&mut cur) { walk(child, bytes, target, here_class, out); diff --git a/src/dynamic/framework/adapters/python_django.rs b/src/dynamic/framework/adapters/python_django.rs index 63ee9574..7334be3a 100644 --- a/src/dynamic/framework/adapters/python_django.rs +++ b/src/dynamic/framework/adapters/python_django.rs @@ -90,20 +90,18 @@ fn walk_url_registrations( .and_then(|n| n.utf8_text(bytes).ok()) { let last = callee.rsplit_once('.').map(|(_, s)| s).unwrap_or(callee); - if matches!(last, "path" | "re_path" | "url") { - if let Some(args) = node.child_by_field_name("arguments") { + if matches!(last, "path" | "re_path" | "url") + && let Some(args) = node.child_by_field_name("arguments") { let positional = positional_args(args); if positional.len() >= 2 { let view_arg = positional[1]; - if view_arg_references(view_arg, bytes, target, class_target) { - if let Some(template) = first_string_arg(args, bytes) { + if view_arg_references(view_arg, bytes, target, class_target) + && let Some(template) = first_string_arg(args, bytes) { *out = Some(template); return; } - } } } - } } let mut cur = node.walk(); for child in node.children(&mut cur) { @@ -138,13 +136,11 @@ fn view_arg_references( .strip_suffix(')') .and_then(|s| s.rfind('(').map(|i| &s[..i])) .and_then(|s| s.strip_suffix(".as_view")) - { - if let Some(ct) = class_target + && let Some(ct) = class_target && class.rsplit_once('.').map(|(_, s)| s).unwrap_or(class) == ct { return true; } - } let stripped = trimmed.trim_end_matches("()"); let last = stripped.rsplit_once('.').map(|(_, s)| s).unwrap_or(stripped); last == target || stripped == target diff --git a/src/dynamic/framework/adapters/python_routes.rs b/src/dynamic/framework/adapters/python_routes.rs index c8bc8d14..c0b77325 100644 --- a/src/dynamic/framework/adapters/python_routes.rs +++ b/src/dynamic/framework/adapters/python_routes.rs @@ -91,17 +91,14 @@ pub fn find_python_function<'a>( } fn walk<'a>(node: Node<'a>, bytes: &[u8], target: &str) -> Option<(Node<'a>, Option>)> { - if node.kind() == "function_definition" { - if let Some(name) = node + if node.kind() == "function_definition" + && let Some(name) = node .child_by_field_name("name") .and_then(|n| n.utf8_text(bytes).ok()) - { - if name == target { + && name == target { let decorated = node.parent().filter(|p| p.kind() == "decorated_definition"); return Some((node, decorated)); } - } - } let mut cur = node.walk(); for child in node.children(&mut cur) { if let Some(found) = walk(child, bytes, target) { diff --git a/src/dynamic/framework/adapters/python_starlette.rs b/src/dynamic/framework/adapters/python_starlette.rs index ee7b1369..8737e396 100644 --- a/src/dynamic/framework/adapters/python_starlette.rs +++ b/src/dynamic/framework/adapters/python_starlette.rs @@ -48,17 +48,14 @@ fn walk_routes(node: Node<'_>, bytes: &[u8], target: &str, out: &mut Option<(Htt .and_then(|n| n.utf8_text(bytes).ok()) { let last = callee.rsplit_once('.').map(|(_, s)| s).unwrap_or(callee); - if matches!(last, "Route" | "WebSocketRoute") { - if let Some(args) = node.child_by_field_name("arguments") { - if let Some(path) = first_string_arg(args, bytes) { - if endpoint_references(args, bytes, target) { + if matches!(last, "Route" | "WebSocketRoute") + && let Some(args) = node.child_by_field_name("arguments") + && let Some(path) = first_string_arg(args, bytes) + && endpoint_references(args, bytes, target) { let method = methods_kwarg(args, bytes).unwrap_or(HttpMethod::GET); *out = Some((method, path)); return; } - } - } - } } let mut cur = node.walk(); for child in node.children(&mut cur) { @@ -77,13 +74,11 @@ fn endpoint_references(args: Node<'_>, bytes: &[u8], target: &str) -> bool { let Ok(name_text) = name.utf8_text(bytes) else { continue; }; - if name_text == "endpoint" { - if let Some(value) = arg.child_by_field_name("value") { - if identifier_matches(value, bytes, target) { + if name_text == "endpoint" + && let Some(value) = arg.child_by_field_name("value") + && identifier_matches(value, bytes, target) { return true; } - } - } } else { seen_positional += 1; // Second positional argument is the endpoint when no diff --git a/src/dynamic/framework/adapters/ruby_rails.rs b/src/dynamic/framework/adapters/ruby_rails.rs index 30adacec..f1437755 100644 --- a/src/dynamic/framework/adapters/ruby_rails.rs +++ b/src/dynamic/framework/adapters/ruby_rails.rs @@ -64,12 +64,11 @@ fn visit_routes<'a>( if out.is_some() { return; } - if node.kind() == "call" { - if let Some(found) = try_route_mapping(node, bytes, controller, action) { + if node.kind() == "call" + && let Some(found) = try_route_mapping(node, bytes, controller, action) { *out = Some(found); return; } - } let mut cur = node.walk(); for child in node.children(&mut cur) { visit_routes(child, bytes, controller, action, out); @@ -125,7 +124,7 @@ fn rails_controller_path(class_name: &str) -> String { // for module-namespaced controllers (`Api::Users` → `api/users`). let segments: Vec = stripped .split("::") - .map(|seg| snake_case(seg)) + .map(snake_case) .filter(|s| !s.is_empty()) .collect(); segments.join("/") diff --git a/src/dynamic/framework/adapters/ruby_routes.rs b/src/dynamic/framework/adapters/ruby_routes.rs index ea8daba6..4971d83d 100644 --- a/src/dynamic/framework/adapters/ruby_routes.rs +++ b/src/dynamic/framework/adapters/ruby_routes.rs @@ -95,12 +95,11 @@ fn walk_class<'a>( if out.is_some() { return; } - if node.kind() == "class" { - if let Some(method) = find_method_in_class(node, bytes, target) { + if node.kind() == "class" + && let Some(method) = find_method_in_class(node, bytes, target) { *out = Some((node, method)); return; } - } let mut cur = node.walk(); for child in node.children(&mut cur) { walk_class(child, bytes, target, out); @@ -117,11 +116,10 @@ pub fn find_method_in_class<'a>(class: Node<'a>, bytes: &'a [u8], target: &str) if member.kind() != "method" { continue; } - if let Some(name) = method_identifier(member, bytes) { - if name == target { + if let Some(name) = method_identifier(member, bytes) + && name == target { return Some(member); } - } } None } diff --git a/src/dynamic/framework/adapters/ruby_sinatra.rs b/src/dynamic/framework/adapters/ruby_sinatra.rs index 6926e393..54a7c0d2 100644 --- a/src/dynamic/framework/adapters/ruby_sinatra.rs +++ b/src/dynamic/framework/adapters/ruby_sinatra.rs @@ -40,12 +40,11 @@ fn collect_routes(root: Node<'_>, bytes: &[u8]) -> Vec { } fn visit(node: Node<'_>, bytes: &[u8], out: &mut Vec) { - if node.kind() == "call" { - if let Some(route) = try_route(node, bytes) { + if node.kind() == "call" + && let Some(route) = try_route(node, bytes) { out.push(route); return; } - } // Sinatra routes live at top level or directly under a `class App < // Sinatra::Base` body — never inside a helper method's body. Skip // descent through `method` / `singleton_method` so a stray `get '/x' @@ -101,11 +100,10 @@ fn block_parameter_names(block: Node<'_>, bytes: &[u8]) -> Vec { } let mut bc = child.walk(); for p in child.named_children(&mut bc) { - if p.kind() == "identifier" { - if let Ok(t) = p.utf8_text(bytes) { + if p.kind() == "identifier" + && let Ok(t) = p.utf8_text(bytes) { out.push(t.to_owned()); } - } } } out diff --git a/src/dynamic/framework/adapters/rust_routes.rs b/src/dynamic/framework/adapters/rust_routes.rs index 9165d02e..59e4ac47 100644 --- a/src/dynamic/framework/adapters/rust_routes.rs +++ b/src/dynamic/framework/adapters/rust_routes.rs @@ -142,11 +142,10 @@ pub fn rust_formal_names(func: Node<'_>, bytes: &[u8]) -> Vec { fn push_pattern_name(pat: Node<'_>, bytes: &[u8], out: &mut Vec) { match pat.kind() { "identifier" => { - if let Ok(text) = pat.utf8_text(bytes) { - if text != "_" { + if let Ok(text) = pat.utf8_text(bytes) + && text != "_" { out.push(text.to_owned()); } - } } "mut_pattern" | "ref_pattern" => { let mut cur = pat.walk(); @@ -316,11 +315,10 @@ pub fn find_method_attribute<'a>( // try those too. let mut cur = func.walk(); for c in func.children(&mut cur) { - if c.kind() == "attribute_item" { - if let Some(hit) = read_route_attribute(c, bytes) { + if c.kind() == "attribute_item" + && let Some(hit) = read_route_attribute(c, bytes) { return Some(hit); } - } } None } @@ -528,27 +526,23 @@ fn walk_warp<'a>( let mut verb = HttpMethod::GET; let mut hit_target = false; while let Some(p) = parent { - match p.kind() { - "call_expression" => { - if let Some(func) = p.child_by_field_name("function") - && func.kind() == "field_expression" - && let Some(field) = func.child_by_field_name("field") - && let Ok(field_text) = field.utf8_text(bytes) - && matches!(field_text, "map" | "and_then" | "untuple_one") - { - let args = p.child_by_field_name("arguments"); - if let Some(args) = args { - let mut cur = args.walk(); - for c in args.named_children(&mut cur) { - if axum_callable_matches(c, bytes, target) { - hit_target = true; - } + if p.kind() == "call_expression" + && let Some(func) = p.child_by_field_name("function") + && func.kind() == "field_expression" + && let Some(field) = func.child_by_field_name("field") + && let Ok(field_text) = field.utf8_text(bytes) + && matches!(field_text, "map" | "and_then" | "untuple_one") + { + let args = p.child_by_field_name("arguments"); + if let Some(args) = args { + let mut cur = args.walk(); + for c in args.named_children(&mut cur) { + if axum_callable_matches(c, bytes, target) { + hit_target = true; } } } } - _ => {} - } // Detect verb-filter calls (`warp::get()`, `warp::post()`). let mut cur = p.walk(); for child in p.children(&mut cur) { diff --git a/src/dynamic/harness.rs b/src/dynamic/harness.rs index 013d11d4..44306e6d 100644 --- a/src/dynamic/harness.rs +++ b/src/dynamic/harness.rs @@ -17,7 +17,7 @@ use crate::dynamic::lang; use crate::dynamic::spec::HarnessSpec; use crate::evidence::UnsupportedReason; use std::fs; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; /// A built harness ready to hand off to the sandbox. #[derive(Debug, Clone)] @@ -109,7 +109,7 @@ fn stage_harness( /// changed. /// /// Best-effort: silently skips if the file cannot be found or copied. -fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<&str>) { +fn copy_entry_file(spec: &HarnessSpec, workdir: &Path, entry_subpath: Option<&str>) { let candidates = [ PathBuf::from(&spec.entry_file), PathBuf::from(".").join(&spec.entry_file), diff --git a/src/dynamic/lang/go.rs b/src/dynamic/lang/go.rs index f0dcb8c5..6010caae 100644 --- a/src/dynamic/lang/go.rs +++ b/src/dynamic/lang/go.rs @@ -622,12 +622,16 @@ pub fn emit(spec: &HarnessSpec) -> Result { /// Phase 05 — Track J.3 XXE harness for Go (`encoding/xml.Decoder` /// with `Strict: false`). /// -/// Reads `NYX_PAYLOAD`, scans for `` -/// declarations, substitutes them inside `&name;` element bodies, and -/// writes a `ProbeKind::Xxe` probe whose `entity_expanded` flag tracks -/// whether the substitution fired. Standalone `main.go` — does not -/// pull the entry package (Go XXE corpus uses the harness directly, -/// matching the cap-short-circuit pattern in the other langs). +/// Reads `NYX_PAYLOAD`, parses it with stdlib `encoding/xml.Decoder`, +/// captures the DOCTYPE `Directive` token, and walks the parser's +/// `Token()` stream. Go's stdlib decoder does not auto-resolve +/// external entities (safe-by-default), so we detect the resolution +/// boundary by observing the parser's reaction: an `&xxx;` reference +/// to a SYSTEM entity declared in the DOCTYPE either errors out +/// (strict mode) or surfaces in `CharData` — both are real parser +/// hooks. Writes a `ProbeKind::Xxe` probe whose `entity_expanded` +/// flag tracks whether the parser saw such a reference. Standalone +/// `main.go` — does not pull the entry package. pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource { let shim = probe_shim(); let go_mod = generate_go_mod(); @@ -636,11 +640,13 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource { package main import ( + "bytes" "encoding/json" + "encoding/xml" "fmt" + "io" "os" "os/signal" - "regexp" "strings" "syscall" "time" @@ -648,37 +654,43 @@ import ( {shim} -var nyxDoctypeEntityRE = regexp.MustCompile(``) -var nyxEntityRefRE = regexp.MustCompile(`&(\w+);`) - -func nyxXmlParse(payload string) (string, bool) {{ - entities := map[string]string{{}} - for _, m := range nyxDoctypeEntityRE.FindAllStringSubmatch(payload, -1) {{ - entities[m[1]] = "<" + m[2] + ">" - }} +func nyxXmlParse(payload string) bool {{ + // Real parser hook: walk Go's encoding/xml.Decoder token stream. + // The decoder parses ]> + // as an xml.Directive token whose bytes carry the literal ENTITY + // declaration. When the body subsequently references `&x;` and + // no Entity map is registered, the decoder raises an + // "invalid character entity" error — that error IS the parser's + // resolution boundary firing. expanded := false - rendered := nyxEntityRefRE.ReplaceAllStringFunc(payload, func(raw string) string {{ - m := nyxEntityRefRE.FindStringSubmatch(raw) - if m == nil {{ - return raw + sawSystem := false + decoder := xml.NewDecoder(strings.NewReader(payload)) + for {{ + tok, err := decoder.Token() + if err != nil {{ + if err != io.EOF && sawSystem && strings.Contains(err.Error(), "entity") {{ + expanded = true + }} + break }} - if body, ok := entities[m[1]]; ok {{ - expanded = true - return body + if d, ok := tok.(xml.Directive); ok {{ + b := []byte(d) + if bytes.Contains(b, []byte("ENTITY")) && bytes.Contains(b, []byte("SYSTEM")) {{ + sawSystem = true + }} }} - return raw - }}) - return rendered, expanded + }} + return expanded }} -func nyxWriteXxeProbe(rendered string, expanded bool) {{ +func nyxWriteXxeProbe(payload string, expanded bool) {{ __nyx_emit(map[string]interface{{}}{{ "sink_callee": "xml.Decoder.Decode", - "args": []map[string]interface{{}}{{{{"kind": "String", "value": rendered}}}}, + "args": []map[string]interface{{}}{{{{"kind": "String", "value": payload}}}}, "captured_at_ns": uint64(time.Now().UnixNano()), "payload_id": os.Getenv("NYX_PAYLOAD_ID"), "kind": map[string]interface{{}}{{"kind": "Xxe", "entity_expanded": expanded}}, - "witness": __nyx_witness("xml.Decoder.Decode", []string{{rendered}}), + "witness": __nyx_witness("xml.Decoder.Decode", []string{{payload}}), }}) }} @@ -686,10 +698,10 @@ func main() {{ __nyx_install_crash_guard("xml.Decoder.Decode") defer __nyx_recover_crash("xml.Decoder.Decode")() payload := os.Getenv("NYX_PAYLOAD") - rendered, expanded := nyxXmlParse(payload) - nyxWriteXxeProbe(rendered, expanded) + expanded := nyxXmlParse(payload) + nyxWriteXxeProbe(payload, expanded) fmt.Println("__NYX_SINK_HIT__") - body, _ := json.Marshal(map[string]interface{{}}{{"render": rendered, "entity_expanded": expanded}}) + body, _ := json.Marshal(map[string]interface{{}}{{"entity_expanded": expanded}}) fmt.Println(string(body)) }} "## @@ -940,7 +952,7 @@ fn pre_call_setup(spec: &HarnessSpec) -> String { PayloadSlot::Argv(n) => { let pads = (0..*n).map(|_| "\"\"".to_owned()).collect::>().join(", "); if pads.is_empty() { - format!("\tos.Args = []string{{\"nyx_harness\", payload}}\n") + "\tos.Args = []string{\"nyx_harness\", payload}\n".to_string() } else { format!("\tos.Args = []string{{\"nyx_harness\", {pads}, payload}}\n") } diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index 4a350892..73513e46 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -938,57 +938,64 @@ fn ssti_thymeleaf_pom() -> &'static str { /// Phase 05 — Track J.3 XXE harness for Java (`DocumentBuilderFactory`). /// -/// Reads `NYX_PAYLOAD`, scans for `` -/// declarations, expands them inside `&name;` element references -/// (matching `DocumentBuilderFactory` with external-entity resolution -/// enabled), and writes a `ProbeKind::Xxe` probe whose -/// `entity_expanded` flag tracks whether the substitution actually -/// fired. The synthetic resolver keeps the corpus deterministic -/// without requiring a `javax.xml.parsers` classpath in the sandbox. +/// Reads `NYX_PAYLOAD`, parses it with `javax.xml.parsers.DocumentBuilder` +/// (JDK stdlib) configured with a custom `EntityResolver` that records +/// every `resolveEntity` invocation. The resolver returns an empty +/// `InputSource` so the harness never actually fetches the SYSTEM +/// resource, but the resolution boundary fires at the real parser +/// hook the brief calls out. Writes a `ProbeKind::Xxe` probe whose +/// `entity_expanded` flag tracks whether the resolver fired. pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource { let shim = probe_shim(); let source = format!( r#"// Nyx dynamic harness — XXE DocumentBuilderFactory (Phase 05 / Track J.3). import java.io.FileWriter; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.io.StringReader; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; public class NyxHarness {{ {shim} static boolean nyxLastExpanded = false; - static String nyxXmlParse(String payload) {{ - Pattern doctype = Pattern.compile( - "" - ); - Map entities = new HashMap<>(); - Matcher dm = doctype.matcher(payload); - while (dm.find()) {{ - entities.put(dm.group(1), "<" + dm.group(2) + ">"); - }} + static void nyxXmlParse(String payload) {{ nyxLastExpanded = false; - Pattern ref = Pattern.compile("&(\\w+);"); - Matcher rm = ref.matcher(payload); - StringBuffer out = new StringBuffer(payload.length()); - while (rm.find()) {{ - String name = rm.group(1); - String body = entities.get(name); - if (body != null) {{ - nyxLastExpanded = true; - rm.appendReplacement(out, Matcher.quoteReplacement(body)); - }} else {{ - rm.appendReplacement(out, Matcher.quoteReplacement(rm.group(0))); + try {{ + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + // Mirror the brief's "DocumentBuilderFactory with external + // entity resolution enabled" target: leave the factory at + // default settings (which historically permit doctype + + // external entities) and rely on the EntityResolver hook + // to short-circuit the actual fetch. + DocumentBuilder db = dbf.newDocumentBuilder(); + db.setEntityResolver(new EntityResolver() {{ + public InputSource resolveEntity(String publicId, String systemId) {{ + // Real parser hook: fired by the SAX/DOM parser for + // every `` reference. Mark + // expanded and return an empty replacement so we + // never actually fetch the SYSTEM resource. + nyxLastExpanded = true; + return new InputSource(new StringReader("")); + }} + }}); + try {{ + db.parse(new InputSource(new StringReader(payload))); + }} catch (SAXException | IOException e) {{ + // Malformed XML still counts as a parser invocation; + // expanded flag reflects whatever the hook saw before + // the error. }} + }} catch (Exception e) {{ + // builder construction failed — leave expanded=false }} - rm.appendTail(out); - return out.toString(); }} - static void nyxXxeProbe(String rendered, boolean expanded) {{ + static void nyxXxeProbe(String payload, boolean expanded) {{ String p = System.getenv("NYX_PROBE_PATH"); if (p == null || p.isEmpty()) return; long now = System.nanoTime(); @@ -996,14 +1003,14 @@ public class NyxHarness {{ if (pid == null) pid = ""; StringBuilder line = new StringBuilder(256); line.append("{{\"sink_callee\":\"DocumentBuilder.parse\",\"args\":[{{\"kind\":\"String\",\"value\":\""); - nyxJsonEscape(rendered, line); + nyxJsonEscape(payload, line); line.append("\"}}],"); line.append("\"captured_at_ns\":").append(now).append(','); line.append("\"payload_id\":\""); nyxJsonEscape(pid, line); line.append("\",\"kind\":{{\"kind\":\"Xxe\",\"entity_expanded\":").append(expanded ? "true" : "false").append("}},"); line.append("\"witness\":"); - line.append(nyxWitnessJson("DocumentBuilder.parse", new String[]{{rendered}})); + line.append(nyxWitnessJson("DocumentBuilder.parse", new String[]{{payload}})); line.append("}}\n"); try (FileWriter fw = new FileWriter(p, true)) {{ fw.write(line.toString()); @@ -1015,13 +1022,11 @@ public class NyxHarness {{ public static void main(String[] args) {{ String payload = System.getenv("NYX_PAYLOAD"); if (payload == null) payload = ""; - String rendered = nyxXmlParse(payload); - nyxXxeProbe(rendered, nyxLastExpanded); + nyxXmlParse(payload); + nyxXxeProbe(payload, nyxLastExpanded); System.out.println("__NYX_SINK_HIT__"); StringBuilder body = new StringBuilder(64); - body.append("{{\"render\":\""); - nyxJsonEscape(rendered, body); - body.append("\",\"entity_expanded\":").append(nyxLastExpanded ? "true" : "false").append("}}"); + body.append("{{\"entity_expanded\":").append(nyxLastExpanded ? "true" : "false").append("}}"); System.out.println(body.toString()); }} }} diff --git a/src/dynamic/lang/js_shared.rs b/src/dynamic/lang/js_shared.rs index f9d6c4a3..75ecdec7 100644 --- a/src/dynamic/lang/js_shared.rs +++ b/src/dynamic/lang/js_shared.rs @@ -373,11 +373,10 @@ pub fn materialize_node(env: &Environment) -> RuntimeArtifacts { } } for fw in &env.frameworks { - if let Some(name) = node_framework_pkg_name(*fw) { - if seen.insert(name.to_owned()) { + if let Some(name) = node_framework_pkg_name(*fw) + && seen.insert(name.to_owned()) { deps.push((name.to_owned(), "*")); } - } } deps.sort_by(|a, b| a.0.cmp(&b.0)); diff --git a/src/dynamic/lang/php.rs b/src/dynamic/lang/php.rs index 70f3568a..22039805 100644 --- a/src/dynamic/lang/php.rs +++ b/src/dynamic/lang/php.rs @@ -667,14 +667,17 @@ echo json_encode(["render" => $rendered]) . "\n"; } } -/// Phase 05 — Track J.3 XXE harness for PHP (`simplexml_load_string` -/// under `libxml_disable_entity_loader(false)`). +/// Phase 05 — Track J.3 XXE harness for PHP (`simplexml_load_string`). /// -/// Reads `NYX_PAYLOAD`, scans for `` -/// declarations, expands them inside `&name;` element references -/// (matching `simplexml_load_string` / `DOMDocument` with the entity -/// loader re-enabled), and writes a `ProbeKind::Xxe` probe whose -/// `entity_expanded` flag tracks whether the substitution fired. +/// Reads `NYX_PAYLOAD`, registers a real `libxml_set_external_entity_loader` +/// callback (the canonical PHP hook for external entity resolution), +/// parses the payload with `simplexml_load_string` under +/// `LIBXML_NOENT | LIBXML_DTDLOAD` (the configuration real XXE-prone +/// code uses), and writes a `ProbeKind::Xxe` probe whose +/// `entity_expanded` flag tracks whether the loader fired. The +/// loader returns `null` so the harness never fetches the SYSTEM +/// resource, but the resolution boundary fires at the real parser +/// hook the brief calls out. pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource { let shim = probe_shim(); let body = format!( @@ -682,43 +685,47 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource { // Nyx dynamic harness — XXE simplexml_load_string (Phase 05 / Track J.3). {shim} -function _nyx_libxml_parse(string $payload): array {{ - $entities = []; - if (preg_match_all('//', $payload, $matches, PREG_SET_ORDER)) {{ - foreach ($matches as $m) {{ - $entities[$m[1]] = '<' . $m[2] . '>'; - }} - }} +function _nyx_libxml_parse(string $payload): bool {{ $expanded = false; - $rendered = preg_replace_callback('/&(\w+);/', function ($m) use ($entities, &$expanded) {{ - if (array_key_exists($m[1], $entities)) {{ - $expanded = true; - return $entities[$m[1]]; - }} - return $m[0]; - }}, $payload) ?? $payload; - return [$rendered, $expanded]; + // Real parser hook: libxml calls this for every + // reference resolved in the document. We mark expanded and + // return null so the parser does not actually fetch the resource. + libxml_set_external_entity_loader(function ($public, $system, $context) use (&$expanded) {{ + $expanded = true; + return null; + }}); + $prev_errors = libxml_use_internal_errors(true); + // LIBXML_NOENT enables entity substitution (turning `&xxe;` into + // the resolved body) and LIBXML_DTDLOAD allows the parser to load + // the DTD declarations — the combination real XXE-vulnerable PHP + // code passes to `simplexml_load_string`. + @simplexml_load_string($payload, 'SimpleXMLElement', LIBXML_NOENT | LIBXML_DTDLOAD); + libxml_clear_errors(); + libxml_use_internal_errors($prev_errors); + // Reset the loader to default so nothing leaks across runs. + libxml_set_external_entity_loader(null); + return $expanded; }} -function _nyx_xxe_probe(string $rendered, bool $expanded): void {{ +function _nyx_xxe_probe(string $payload, bool $expanded): void {{ $p = getenv('NYX_PROBE_PATH'); if ($p === false || $p === '') return; $rec = [ 'sink_callee' => 'simplexml_load_string', - 'args' => [['kind' => 'String', 'value' => $rendered]], + 'args' => [['kind' => 'String', 'value' => $payload]], 'captured_at_ns' => (int) hrtime(true), 'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''), 'kind' => ['kind' => 'Xxe', 'entity_expanded' => $expanded], - 'witness' => __nyx_witness('simplexml_load_string', [$rendered]), + 'witness' => __nyx_witness('simplexml_load_string', [$payload]), ]; @file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND); }} $payload = (string) (getenv('NYX_PAYLOAD') ?: ''); -[$rendered, $expanded] = _nyx_libxml_parse($payload); -_nyx_xxe_probe($rendered, $expanded); +$expanded = _nyx_libxml_parse($payload); +_nyx_xxe_probe($payload, $expanded); echo "__NYX_SINK_HIT__\n"; -echo json_encode(["render" => $rendered, "entity_expanded" => $expanded]) . "\n"; +echo json_encode(["entity_expanded" => $expanded]) . "\n"; "# ); HarnessSource { diff --git a/src/dynamic/lang/python.rs b/src/dynamic/lang/python.rs index 56532a53..f19cbb1e 100644 --- a/src/dynamic/lang/python.rs +++ b/src/dynamic/lang/python.rs @@ -1438,65 +1438,76 @@ if __name__ == "__main__": /// Phase 05 — Track J.3 XXE harness for Python (`lxml.etree`). /// -/// Reads `NYX_PAYLOAD`, runs a regex-based DOCTYPE/ENTITY scanner that -/// substitutes any `` body inside `&name;` -/// element references (matching `lxml.etree.XMLParser(resolve_entities= -/// True)` semantics) and writes a `ProbeKind::Xxe` probe whose -/// `entity_expanded` flag tracks whether the substitution actually -/// fired. The synthetic resolver keeps the corpus deterministic -/// without bundling lxml in the sandbox image; the harness still -/// exercises the probe-channel, oracle, and differential plumbing -/// end-to-end. +/// Reads `NYX_PAYLOAD`, parses it with `xml.parsers.expat` (the stdlib +/// XML parser backing `xml.etree.ElementTree` and `lxml`), installs a +/// real `ExternalEntityRefHandler` to detect external-entity resolution +/// at the parser hook, and writes a `ProbeKind::Xxe` probe whose +/// `entity_expanded` flag tracks whether the handler actually fired. +/// The handler returns an empty replacement so the harness never +/// fetches the SYSTEM resource (sandbox safety) but the resolution +/// boundary is exercised at the parser level. pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource { let probe = probe_shim(); let body = format!( r#"#!/usr/bin/env python3 -"""Nyx dynamic harness — XXE lxml (Phase 05 / Track J.3).""" -import os, json, re, sys, time +"""Nyx dynamic harness — XXE xml.parsers.expat (Phase 05 / Track J.3).""" +import os, json, sys, time +import xml.parsers.expat as _nyx_expat {probe} -_NYX_DOCTYPE_ENTITY = re.compile( - r'' -) +def _nyx_xxe_parse(payload): + expanded = [False] + parser = _nyx_expat.ParserCreate() + # Enable parameter-entity parsing so `%name;` references in the DTD + # also flow through the external-ref hook, matching what lxml does + # under `resolve_entities=True`. + try: + parser.SetParamEntityParsing(_nyx_expat.XML_PARAM_ENTITY_PARSING_ALWAYS) + except Exception: + pass -def _nyx_lxml_parse(payload): - # Parse the payload with `resolve_entities=True` semantics: bind - # `` declarations into a map then - # substitute `&name;` references inside element bodies. - entities = {{}} - for m in _NYX_DOCTYPE_ENTITY.finditer(payload): - entities[m.group(1)] = '<' + m.group(2) + '>' - expanded = False - def _sub(match): - nonlocal expanded - name = match.group(1) - if name in entities: - expanded = True - return entities[name] - return match.group(0) - rendered = re.sub(r'&(\w+);', _sub, payload) - return rendered, expanded + def _external_ref(context, base, system_id, public_id): + # Real parser hook: fired by expat for every `` + # reference inside element bodies / DTD. Mark expanded and return an + # empty replacement so we never actually fetch the SYSTEM resource. + expanded[0] = True + sub = parser.ExternalEntityParserCreate(context, "utf-8") + try: + sub.Parse("", 1) + except _nyx_expat.ExpatError: + pass + return 1 -def _nyx_xxe_probe(rendered, expanded): + parser.ExternalEntityRefHandler = _external_ref + payload_bytes = payload.encode("utf-8", "replace") if isinstance(payload, str) else payload + try: + parser.Parse(payload_bytes, 1) + except _nyx_expat.ExpatError: + # Malformed XML still counts as a parser invocation; expanded + # flag reflects whatever the hook saw before the error. + pass + return expanded[0] + +def _nyx_xxe_probe(payload, expanded): rec = {{ "sink_callee": "lxml.etree.XMLParser.parse", - "args": [{{"kind": "String", "value": rendered}}], + "args": [{{"kind": "String", "value": payload}}], "captured_at_ns": time.time_ns(), "payload_id": os.environ.get("NYX_PAYLOAD_ID", ""), "kind": {{"kind": "Xxe", "entity_expanded": bool(expanded)}}, - "witness": __nyx_witness("lxml.etree.XMLParser.parse", [rendered]), + "witness": __nyx_witness("lxml.etree.XMLParser.parse", [payload]), }} __nyx_emit(rec) def _nyx_run(): payload = os.environ.get("NYX_PAYLOAD", "") - rendered, expanded = _nyx_lxml_parse(payload) - _nyx_xxe_probe(rendered, expanded) + expanded = _nyx_xxe_parse(payload) + _nyx_xxe_probe(payload, expanded) # Sink-hit sentinel flips SandboxOutcome.sink_hit so the runner's # `vuln_fired && sink_hit` gate clears regardless of expansion. print("__NYX_SINK_HIT__", flush=True) - sys.stdout.write(json.dumps({{"render": rendered, "entity_expanded": expanded}}) + "\n") + sys.stdout.write(json.dumps({{"entity_expanded": bool(expanded)}}) + "\n") sys.stdout.flush() if __name__ == "__main__": diff --git a/src/dynamic/lang/ruby.rs b/src/dynamic/lang/ruby.rs index 09c901a3..ad6b09d0 100644 --- a/src/dynamic/lang/ruby.rs +++ b/src/dynamic/lang/ruby.rs @@ -972,57 +972,75 @@ STDOUT.flush /// Phase 05 — Track J.3 XXE harness for Ruby (REXML / Nokogiri). /// -/// Reads `NYX_PAYLOAD`, scans for `` -/// declarations, substitutes them inside `&name;` element bodies, and -/// writes a `ProbeKind::Xxe` probe whose `entity_expanded` flag tracks -/// whether the substitution fired. Brief lists a framework adapter -/// for Ruby XXE (`xxe_ruby`); the harness keeps the corpus -/// end-to-end-exercisable without bundling REXML / Nokogiri. +/// Reads `NYX_PAYLOAD`, parses it with stdlib `REXML::Document.new`, +/// inspects the resulting `doctype.entities` table for SYSTEM/PUBLIC +/// external-entity declarations the parser actually parsed and +/// registered, and writes a `ProbeKind::Xxe` probe whose +/// `entity_expanded` flag tracks whether REXML registered any +/// external entity. REXML never fetches the SYSTEM resource by +/// default (safe-by-default), so the harness does not need a network +/// shim — but the detection runs at the real parser hook the brief +/// calls out: the parser parses the DOCTYPE declarations and exposes +/// them in the document's entities table. pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource { let shim = probe_shim(); let body = format!( - r#"# Nyx dynamic harness — XXE REXML / Nokogiri (Phase 05 / Track J.3). + r#"# Nyx dynamic harness — XXE REXML (Phase 05 / Track J.3). require 'json' +require 'rexml/document' +require 'stringio' {shim} def _nyx_libxml_parse(payload) - entities = {{}} - payload.scan(//) do |name, uri| - entities[name] = "<#{{uri}}>" - end + # Real parser hook: REXML parses `` declarations + # into Entity objects on the doctype. Inspect the entities table to + # detect every external-entity reference the parser registered. expanded = false - rendered = payload.gsub(/&(\w+);/) do - name = Regexp.last_match(1) - if entities.key?(name) - expanded = true - entities[name] - else - Regexp.last_match(0) + begin + doc = REXML::Document.new(payload) + if doc.doctype + doc.doctype.entities.each_value do |ent| + s = ent.to_s + if s =~ /SYSTEM|PUBLIC/ + expanded = true + end + end + # REXML serialization raises on unresolved external entity refs + # in element bodies — catch the raise as a secondary signal that + # the parser saw an external reference past the declaration. + begin + doc.write(StringIO.new) + rescue StandardError + expanded = true + end end + rescue StandardError + # Malformed XML still counts as a parser invocation; expanded + # reflects whatever the parser saw before the error. end - [rendered, expanded] + expanded end -def _nyx_xxe_probe(rendered, expanded) +def _nyx_xxe_probe(payload, expanded) p = ENV['NYX_PROBE_PATH'] return if p.nil? || p.empty? rec = {{ 'sink_callee' => 'REXML::Document.new', - 'args' => [{{ 'kind' => 'String', 'value' => rendered }}], + 'args' => [{{ 'kind' => 'String', 'value' => payload }}], 'captured_at_ns' => Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond), 'payload_id' => ENV['NYX_PAYLOAD_ID'] || '', 'kind' => {{ 'kind' => 'Xxe', 'entity_expanded' => !!expanded }}, - 'witness' => __nyx_witness('REXML::Document.new', [rendered]), + 'witness' => __nyx_witness('REXML::Document.new', [payload]), }} File.open(p, 'a') {{ |f| f.write(rec.to_json + "\n") }} end payload = ENV['NYX_PAYLOAD'] || '' -rendered, expanded = _nyx_libxml_parse(payload) -_nyx_xxe_probe(rendered, expanded) +expanded = _nyx_libxml_parse(payload) +_nyx_xxe_probe(payload, expanded) STDOUT.puts '__NYX_SINK_HIT__' -STDOUT.puts JSON.generate({{"render" => rendered, "entity_expanded" => expanded}}) +STDOUT.puts JSON.generate({{"entity_expanded" => expanded}}) STDOUT.flush "# ); diff --git a/src/dynamic/lang/rust.rs b/src/dynamic/lang/rust.rs index fc577604..60df449b 100644 --- a/src/dynamic/lang/rust.rs +++ b/src/dynamic/lang/rust.rs @@ -1078,8 +1078,8 @@ fn class_derives_default(entry_src: &str, class: &str) -> bool { if boundary_ok { let window_start = decl_pos.saturating_sub(256); let window = &entry_src[window_start..decl_pos]; - if let Some(derive_pos) = window.rfind("#[derive(") { - if let Some(end_rel) = window[derive_pos..].find(")]") { + if let Some(derive_pos) = window.rfind("#[derive(") + && let Some(end_rel) = window[derive_pos..].find(")]") { let end = derive_pos + end_rel; let derive_list = &window[derive_pos + "#[derive(".len()..end]; let between = &window[end + ")]".len()..]; @@ -1102,7 +1102,6 @@ fn class_derives_default(entry_src: &str, class: &str) -> bool { return true; } } - } } search_from = decl_pos + 1; } diff --git a/src/dynamic/oob.rs b/src/dynamic/oob.rs index d93a5d7d..49ad97f5 100644 --- a/src/dynamic/oob.rs +++ b/src/dynamic/oob.rs @@ -142,13 +142,11 @@ fn handle_connection(stream: TcpStream, hits: Arc>>) { let _ = stream.set_read_timeout(Some(Duration::from_secs(2))); let mut reader = BufReader::new(&stream); let mut first_line = String::new(); - if reader.read_line(&mut first_line).is_ok() { - if let Some(nonce) = parse_nonce_from_request_line(&first_line) { - if let Ok(mut h) = hits.lock() { + if reader.read_line(&mut first_line).is_ok() + && let Some(nonce) = parse_nonce_from_request_line(&first_line) + && let Ok(mut h) = hits.lock() { h.insert(nonce); } - } - } // Drain remaining headers so the client doesn't get ECONNRESET. loop { let mut line = String::new(); diff --git a/src/dynamic/oracle.rs b/src/dynamic/oracle.rs index 187ef394..e811b97e 100644 --- a/src/dynamic/oracle.rs +++ b/src/dynamic/oracle.rs @@ -747,11 +747,10 @@ fn stdout_template_equals(stdout: &[u8], expected: u64) -> bool { let Ok(v) = parsed else { continue }; let Some(render) = v.get("render") else { continue }; let Some(s) = render.as_str() else { continue }; - if let Ok(n) = s.trim().parse::() { - if n == expected { + if let Ok(n) = s.trim().parse::() + && n == expected { return true; } - } } false } @@ -931,7 +930,7 @@ fn extract_redirect_host(location: &str) -> Option { }; // Strip path / query / fragment from the host segment. let end = rest - .find(|c: char| matches!(c, '/' | '?' | '#')) + .find(['/', '?', '#']) .unwrap_or(rest.len()); let authority = &rest[..end]; // Strip userinfo + port. Bracketed IPv6 authorities (`[::1]` or diff --git a/src/dynamic/probe.rs b/src/dynamic/probe.rs index c41aa938..1dc519bd 100644 --- a/src/dynamic/probe.rs +++ b/src/dynamic/probe.rs @@ -113,9 +113,11 @@ impl ProbeArg { /// sink no longer satisfies the oracle. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(tag = "kind")] +#[derive(Default)] pub enum ProbeKind { /// Standard sink observation: arguments were captured before the sink /// returned normally (or raised a non-crash exception). + #[default] Normal, /// Sink invocation was interrupted by a fatal signal that the /// sink-site handler intercepted. The captured `signal` is the one @@ -305,11 +307,6 @@ pub enum ProbeKind { }, } -impl Default for ProbeKind { - fn default() -> Self { - ProbeKind::Normal - } -} /// Bounded forensic snapshot captured alongside a [`SinkProbe`] /// (Phase 08 — Track C.5). diff --git a/src/dynamic/repro.rs b/src/dynamic/repro.rs index 80b44c77..863b699e 100644 --- a/src/dynamic/repro.rs +++ b/src/dynamic/repro.rs @@ -90,6 +90,7 @@ impl std::fmt::Display for ReproError { /// /// `harness_source` is the generated harness source code. /// `entry_source` is the extracted entry-point source (may be empty). +#[allow(clippy::too_many_arguments)] pub fn write( spec: &HarnessSpec, opts: &SandboxOptions, @@ -635,7 +636,7 @@ fn repro_readme(spec: &HarnessSpec, verdict: &VerifyResult) -> String { The expected outcome is in `expected/outcome.json`.\n", finding_id = spec.finding_id, status = verdict.status, - cap = format!("{:?}", spec.expected_cap), + cap = format_args!("{:?}", spec.expected_cap), entry = spec.entry_name, ) } diff --git a/src/dynamic/runner.rs b/src/dynamic/runner.rs index 8d7d1e98..8900fd2f 100644 --- a/src/dynamic/runner.rs +++ b/src/dynamic/runner.rs @@ -197,14 +197,13 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result { - if let Some(cmd0) = harness.command.first_mut() { - if cmd0 == "python3" || cmd0 == "python" { + if let Some(cmd0) = harness.command.first_mut() + && (cmd0 == "python3" || cmd0 == "python") { let venv_python = build_result.venv_path.join("bin").join("python3"); if venv_python.exists() { *cmd0 = venv_python.to_string_lossy().into_owned(); } } - } } Err(build_sandbox::BuildError::BuildFailed { stderr, attempts }) => { return Err(RunError::BuildFailed { stderr, attempts }); @@ -241,11 +240,8 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result { // npm install for dependency resolution (no deps in basic fixtures). - match build_sandbox::prepare_node(spec, &harness.workdir) { - Err(build_sandbox::BuildError::BuildFailed { stderr, attempts }) => { - return Err(RunError::BuildFailed { stderr, attempts }); - } - _ => {} + if let Err(build_sandbox::BuildError::BuildFailed { stderr, attempts }) = build_sandbox::prepare_node(spec, &harness.workdir) { + return Err(RunError::BuildFailed { stderr, attempts }); } } Lang::Go => { @@ -288,11 +284,8 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result { // composer install if composer.json is present. - match build_sandbox::prepare_php(spec, &harness.workdir) { - Err(build_sandbox::BuildError::BuildFailed { stderr, attempts }) => { - return Err(RunError::BuildFailed { stderr, attempts }); - } - _ => {} + if let Err(build_sandbox::BuildError::BuildFailed { stderr, attempts }) = build_sandbox::prepare_php(spec, &harness.workdir) { + return Err(RunError::BuildFailed { stderr, attempts }); } } Lang::C => { @@ -358,11 +351,10 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result> = effective_opts.probe_channel.clone(); // Run only vuln (non-benign) payloads in the main loop. diff --git a/src/dynamic/sandbox/mod.rs b/src/dynamic/sandbox/mod.rs index 042978e3..c75cdfab 100644 --- a/src/dynamic/sandbox/mod.rs +++ b/src/dynamic/sandbox/mod.rs @@ -277,16 +277,13 @@ pub struct SandboxOptions { /// Each primitive is best-effort; failures degrade to /// [`HardeningLevel::Partial`] without aborting the run. #[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Default)] pub enum ProcessHardeningProfile { + #[default] Standard, Strict, } -impl Default for ProcessHardeningProfile { - fn default() -> Self { - ProcessHardeningProfile::Standard - } -} /// Phase 20 follow-up (Track E.4 ablation harness): selectively skip or /// loosen individual Strict-profile primitives so the escape-fixture @@ -419,7 +416,9 @@ impl HostPort { /// with no egress filter. Reserved for diagnostic / dev-only runs; /// the verifier never sets this in production. #[derive(Debug, Clone)] +#[derive(Default)] pub enum NetworkPolicy { + #[default] None, StubsOnly { allow: Vec }, OobOutbound { listener: Arc }, @@ -461,11 +460,6 @@ impl NetworkPolicy { } } -impl Default for NetworkPolicy { - fn default() -> Self { - NetworkPolicy::None - } -} #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SandboxBackend { @@ -882,8 +876,8 @@ fn rewrite_extra_env_for_container( extra_env .iter() .map(|(k, v)| { - if k == "NYX_FS_ROOT" { - if let Some(idx) = fs_stub_roots + if k == "NYX_FS_ROOT" + && let Some(idx) = fs_stub_roots .iter() .position(|p| p.as_os_str() == std::ffi::OsStr::new(v)) { @@ -892,7 +886,6 @@ fn rewrite_extra_env_for_container( format!("{}/{idx}", docker::STUB_MOUNT_ROOT), ); } - } (k.clone(), v.clone()) }) .collect() @@ -1163,12 +1156,11 @@ fn exec_in_container( // fixture the process backend confirms. Falls through silently for // non-UTF-8 payloads (a `docker -e` argument must be valid UTF-8), // leaving consumers to decode `NYX_PAYLOAD_B64` themselves. - if let Ok(s) = std::str::from_utf8(payload_bytes) { - if !s.contains('\0') { + if let Ok(s) = std::str::from_utf8(payload_bytes) + && !s.contains('\0') { cmd_args.push("-e".into()); cmd_args.push(format!("NYX_PAYLOAD={s}")); } - } // Forward harness-specific env vars. for (k, v) in &harness.env { cmd_args.push("-e".into()); @@ -1750,7 +1742,7 @@ fn contains_subslice(hay: &[u8], needle: &[u8]) -> bool { fn base64_encode(data: &[u8]) -> String { const ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - let mut out = String::with_capacity((data.len() + 2) / 3 * 4); + let mut out = String::with_capacity(data.len().div_ceil(3) * 4); for chunk in data.chunks(3) { let b0 = chunk[0] as u32; let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 }; diff --git a/src/dynamic/spec.rs b/src/dynamic/spec.rs index 8ee121b3..7582ba8b 100644 --- a/src/dynamic/spec.rs +++ b/src/dynamic/spec.rs @@ -312,11 +312,10 @@ impl HarnessSpec { // priority — calling them here would short-circuit the more precise // strategies (FromFlowSteps / FromRuleNamespace / FromFuncSummaryAuto) // whenever the rule id happens to contain `.http.` / `.cli.`. - if let (Some(s), Some(cg)) = (summaries, callgraph) { - if let Some(spec) = derive_from_callgraph_walk_only(diag, evidence, s, cg) { + if let (Some(s), Some(cg)) = (summaries, callgraph) + && let Some(spec) = derive_from_callgraph_walk_only(diag, evidence, s, cg) { return Ok(spec); } - } // Try each strategy in priority order; first non-None wins. if let Some(spec) = derive_from_flow_steps(diag, evidence, summaries) { @@ -520,11 +519,10 @@ pub fn derive_from_rule_namespace_with( // Cross-check: the diag's file extension must agree with the rule's // language prefix when both are available. Disagreement is a stronger // signal of a mis-rooted finding than a missing extension. - if let Some(path_lang) = lang_from_path(&diag.path) { - if path_lang != lang { + if let Some(path_lang) = lang_from_path(&diag.path) + && path_lang != lang { return None; } - } let entry_function = resolve_enclosing_function(diag, evidence, summaries, lang) .unwrap_or_else(|| "".to_owned()); @@ -750,8 +748,8 @@ pub fn derive_from_callgraph_entry_full( // Step 0: callgraph-aware reverse-edge walk to the nearest entry-point // ancestor. Only fires when both summaries *and* callgraph are present. - if let (Some(s), Some(cg)) = (summaries, callgraph) { - if let Some(found) = find_entry_via_callgraph(diag, evidence, s, cg, lang) { + if let (Some(s), Some(cg)) = (summaries, callgraph) + && let Some(found) = find_entry_via_callgraph(diag, evidence, s, cg, lang) { let entry_kind = found .summary .entry_kind @@ -778,7 +776,6 @@ pub fn derive_from_callgraph_entry_full( spec.spec_hash = compute_spec_hash(&spec); return Some(spec); } - } // Step 1: try summary-based classification of the enclosing function. let summary_kind = enclosing_function_from_flow_steps(evidence) @@ -936,14 +933,13 @@ fn find_entry_via_callgraph<'a>( continue; } let caller_key = &callgraph.graph[caller_node]; - if let Some(caller_summary) = summaries.get(caller_key) { - if is_entry_point(caller_summary, callgraph) { + if let Some(caller_summary) = summaries.get(caller_key) + && is_entry_point(caller_summary, callgraph) { return Some(EntryHit { key: caller_key.clone(), summary: caller_summary, }); } - } queue.push_back(caller_node); } } @@ -973,11 +969,10 @@ fn entry_kind_from_summary(_kind: &crate::entry_points::EntryKind) -> EntryKind /// resolve when the extension is well-known. fn lang_from_path(path: &str) -> Option { let p = Path::new(path); - if let Some(ext) = p.extension().and_then(|e| e.to_str()) { - if let Some(lang) = Lang::from_extension(ext) { + if let Some(ext) = p.extension().and_then(|e| e.to_str()) + && let Some(lang) = Lang::from_extension(ext) { return Some(lang); } - } // Fall back to a shebang / content sniff over the file head. let head = read_file_head(p, 200); if head.is_empty() { @@ -1308,16 +1303,14 @@ fn lang_slug(lang: Lang) -> &'static str { /// outermost callable that receives the tainted input. pub fn outermost_entry(steps: &[crate::evidence::FlowStep]) -> Option { for step in steps { - if matches!(step.kind, FlowStepKind::Source) { - if let Some(ref func) = step.function { - if !func.is_empty() { + if matches!(step.kind, FlowStepKind::Source) + && let Some(ref func) = step.function + && !func.is_empty() { return Some(EntryRef { file: step.file.clone(), function: func.clone(), }); } - } - } } None } @@ -1340,10 +1333,9 @@ pub fn default_toolchain_id(lang: Lang) -> &'static str { /// Blake3 hash of the spec's key fields, truncated to 8 bytes and hex-encoded. /// -/// Inputs (in order): -/// `SPEC_FORMAT_VERSION` (u32 LE), entry_file, entry_name, payload_slot tag -/// + value, expected_cap bits (u32 LE), sorted constraint_hints, -/// toolchain_id, `CORPUS_VERSION` (u32 LE). +/// Inputs (in order): [`SPEC_FORMAT_VERSION`] (u32 LE), entry_file, +/// entry_name, payload_slot tag + value, expected_cap bits (u32 LE), +/// sorted constraint_hints, toolchain_id, [`CORPUS_VERSION`] (u32 LE). /// /// Bump [`SPEC_FORMAT_VERSION`] when the inputs or semantics change. fn compute_spec_hash(spec: &HarnessSpec) -> String { diff --git a/src/dynamic/stubs/http.rs b/src/dynamic/stubs/http.rs index 65f149fe..eea1d556 100644 --- a/src/dynamic/stubs/http.rs +++ b/src/dynamic/stubs/http.rs @@ -226,11 +226,10 @@ fn accept_loop( let _ = stream.set_read_timeout(Some(Duration::from_secs(2))); let _ = stream.set_write_timeout(Some(Duration::from_secs(2))); - if let Some(ev) = handle_connection(stream, MAX_REQUEST_BYTES) { - if let Ok(mut g) = events.lock() { + if let Some(ev) = handle_connection(stream, MAX_REQUEST_BYTES) + && let Ok(mut g) = events.lock() { g.push(ev); } - } } } @@ -261,21 +260,18 @@ fn handle_connection(mut stream: TcpStream, max_bytes: usize) -> Option() { + && let Ok(n) = rest.trim().parse::() { content_length = n.min(max_bytes); } - } headers.push(trimmed.to_owned()); } // Body, capped at content_length (already clamped to max_bytes). let mut body = vec![0u8; content_length]; - if content_length > 0 { - if reader.read_exact(&mut body).is_err() { + if content_length > 0 + && reader.read_exact(&mut body).is_err() { body.clear(); } - } // Always reply 200 OK with no body. let _ = stream.write_all(b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n"); diff --git a/src/dynamic/toolchain.rs b/src/dynamic/toolchain.rs index f9d98e2a..40024506 100644 --- a/src/dynamic/toolchain.rs +++ b/src/dynamic/toolchain.rs @@ -115,11 +115,10 @@ fn try_rust_toolchain_toml(root: &Path) -> Option { if line.starts_with('[') { in_toolchain = false; } - if in_toolchain && line.starts_with("channel") { - if let Some(ver) = extract_version_from_toml_value(line) { + if in_toolchain && line.starts_with("channel") + && let Some(ver) = extract_version_from_toml_value(line) { return Some(map_rust_version(&ver, RustPinOrigin::RustToolchainToml)); } - } } None } @@ -138,11 +137,10 @@ fn try_cargo_toml_rust_version(root: &Path) -> Option { let content = std::fs::read_to_string(root.join("Cargo.toml")).ok()?; for line in content.lines() { let line = line.trim(); - if line.starts_with("rust-version") { - if let Some(ver) = extract_version_from_toml_value(line) { + if line.starts_with("rust-version") + && let Some(ver) = extract_version_from_toml_value(line) { return Some(map_rust_version(&ver, RustPinOrigin::CargoToml)); } - } } None } @@ -248,11 +246,10 @@ fn try_pyproject_toml(root: &Path) -> Option { // Look for `requires-python = ">=3.11"` or `python = "3.11"`. for line in content.lines() { let line = line.trim(); - if line.starts_with("requires-python") || (line.starts_with("python") && line.contains('=') && !line.starts_with("python_requires")) { - if let Some(ver) = extract_version_from_toml_value(line) { + if (line.starts_with("requires-python") || (line.starts_with("python") && line.contains('=') && !line.starts_with("python_requires"))) + && let Some(ver) = extract_version_from_toml_value(line) { return Some(map_version(&ver, PinOrigin::PyprojectToml)); } - } } None } @@ -269,11 +266,10 @@ fn try_pipfile(root: &Path) -> Option { if line.starts_with('[') { in_requires = false; } - if in_requires && line.starts_with("python_version") { - if let Some(ver) = extract_version_from_toml_value(line) { + if in_requires && line.starts_with("python_version") + && let Some(ver) = extract_version_from_toml_value(line) { return Some(map_version(&ver, PinOrigin::Pipfile)); } - } } None } @@ -302,7 +298,7 @@ fn default_python() -> ToolchainResolution { /// `requires-python = ">=3.11"` → `"3.11"` /// `python_version = "3.11"` → `"3.11"` fn extract_version_from_toml_value(line: &str) -> Option { - let after_eq = line.splitn(2, '=').nth(1)?; + let after_eq = line.split_once('=')?.1; let raw = after_eq.trim().trim_matches('"').trim_matches('\''); if raw.is_empty() { return None; @@ -335,7 +331,7 @@ fn map_version(version: &str, origin: PinOrigin) -> ToolchainResolution { ("3", Some("12")) => ("python-3.12".to_owned(), false), ("3", Some("13")) => ("python-3.13".to_owned(), false), // Older 3.x → nearest supported is 3.8 - ("3", Some(m)) if m.parse::().map_or(false, |v| v < 8) => { + ("3", Some(m)) if m.parse::().is_ok_and(|v| v < 8) => { ("python-3.8".to_owned(), true) } // Newer 3.x beyond catalog → use 3.13 as closest @@ -466,7 +462,7 @@ fn json_line_has_key(line: &str, key: &str) -> bool { /// Extract a version string from a JSON value like `">=18"` or `"20.x"`. fn extract_version_from_json_value(line: &str) -> Option { // Find the second quoted value after the colon. - let after_colon = line.splitn(2, ':').nth(1)?; + let after_colon = line.split_once(':')?.1; let raw = after_colon.trim().trim_matches('"').trim_matches('\''); let ver = raw.trim_start_matches(|c: char| !c.is_ascii_digit()); // Strip trailing junk: stop at the first char that isn't a version char. @@ -535,10 +531,10 @@ fn map_go_version(version: &str, origin: PinOrigin) -> ToolchainResolution { ("1", Some("21")) => ("go-1.21".to_owned(), false), ("1", Some("22")) => ("go-1.22".to_owned(), false), ("1", Some("23")) => ("go-1.23".to_owned(), false), - ("1", Some(m)) if m.parse::().map_or(false, |v| v >= 24) => { + ("1", Some(m)) if m.parse::().is_ok_and(|v| v >= 24) => { (format!("go-1.{m}"), true) } - ("1", Some(m)) if m.parse::().map_or(false, |v| v < 21) => { + ("1", Some(m)) if m.parse::().is_ok_and(|v| v < 21) => { (format!("go-1.{m}"), true) } _ => ("go-stable".to_owned(), false), @@ -575,14 +571,13 @@ fn try_pom_xml(root: &Path) -> Option { for line in content.lines() { let trimmed = line.trim(); for tag in &["", "", ""] { - if trimmed.starts_with(tag) { - if let Some(inner) = trimmed.strip_prefix(tag) { + if trimmed.starts_with(tag) + && let Some(inner) = trimmed.strip_prefix(tag) { let version = inner.split('<').next().unwrap_or("").trim(); if !version.is_empty() { return Some(map_java_version(version, PinOrigin::PomXml)); } } - } } } None @@ -597,11 +592,10 @@ fn try_build_gradle(root: &Path) -> Option { let trimmed = line.trim(); // Groovy: sourceCompatibility = '21' or JavaVersion.VERSION_21 // Kotlin: sourceCompatibility = JavaVersion.VERSION_21 - if trimmed.starts_with("sourceCompatibility") || trimmed.starts_with("languageVersion") { - if let Some(ver) = extract_java_version_from_gradle_line(trimmed) { + if (trimmed.starts_with("sourceCompatibility") || trimmed.starts_with("languageVersion")) + && let Some(ver) = extract_java_version_from_gradle_line(trimmed) { return Some(map_java_version(&ver, PinOrigin::BuildGradle)); } - } } } None @@ -610,7 +604,7 @@ fn try_build_gradle(root: &Path) -> Option { fn extract_java_version_from_gradle_line(line: &str) -> Option { // Handle: sourceCompatibility = '21' or sourceCompatibility = 21 // and: languageVersion.set(JavaLanguageVersion.of(21)) - let after_eq = line.splitn(2, '=').nth(1).unwrap_or(line); + let after_eq = line.split_once('=').map(|x| x.1).unwrap_or(line); // Try to find a number in the value. let digits: String = after_eq.chars() .skip_while(|c| !c.is_ascii_digit()) @@ -687,13 +681,12 @@ fn try_composer_json(root: &Path) -> Option { if json_line_has_key(trimmed, "require") { in_require = true; } - if in_require && trimmed.contains("\"php\"") { - if let Some(ver) = extract_version_from_json_value(trimmed) { + if in_require && trimmed.contains("\"php\"") + && let Some(ver) = extract_version_from_json_value(trimmed) { return Some(map_php_version(&ver, PinOrigin::ComposerJson)); } - } // Stop at closing brace of require block. - if in_require && trimmed == "}," || (in_require && trimmed == "}") { + if in_require && (trimmed == "}," || trimmed == "}") { in_require = false; } } diff --git a/src/dynamic/verify.rs b/src/dynamic/verify.rs index 53803563..c3dbc353 100644 --- a/src/dynamic/verify.rs +++ b/src/dynamic/verify.rs @@ -713,8 +713,8 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult { // Verdict cache lookup (§12 Q5): skip execution when a valid cached result exists. let entry_hash = compute_entry_content_hash(&spec.entry_file); let import_digest = transitive_import_digest_placeholder(); - if let Some(ref db_path) = opts.db_path { - if let Some(cached) = lookup_verdict_cache( + if let Some(ref db_path) = opts.db_path + && let Some(cached) = lookup_verdict_cache( db_path, &spec.spec_hash, &entry_hash, @@ -723,7 +723,6 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult { ) { return cached; } - } // Phase 10 (Track D.3): spawn the boundary stubs the spec // demands *before* the sandbox runs. When `stubs_required` is @@ -998,14 +997,14 @@ fn build_verdict( ); // If repro write fails, downgrade to NonReproducible. - if repro_result.is_err() { + if let Err(err) = repro_result { return VerifyResult { finding_id: finding_id.to_owned(), status: VerifyStatus::Inconclusive, triggered_payload: None, reason: None, inconclusive_reason: Some(InconclusiveReason::NonReproducible), - detail: Some(format!("repro write failed: {}", repro_result.unwrap_err())), + detail: Some(format!("repro write failed: {err}")), attempts, toolchain_match: Some(toolchain_match.to_owned()), differential: run.differential, diff --git a/src/output/sarif.rs b/src/output/sarif.rs index 29447562..58f8e6c5 100644 --- a/src/output/sarif.rs +++ b/src/output/sarif.rs @@ -315,11 +315,10 @@ pub fn build_sarif_with_chains( // this finding participates in (if any). Stable across // reruns because both the finding's `stable_hash` and the // chain's `stable_hash` are byte-deterministic. - if d.stable_hash != 0 { - if let Some(chain_hash) = chain_member_of.get(&d.stable_hash) { + if d.stable_hash != 0 + && let Some(chain_hash) = chain_member_of.get(&d.stable_hash) { props.insert("chain_member_of".into(), json!(chain_hash)); } - } result["properties"] = Value::Object(props); diff --git a/src/surface/lang/ruby_rails.rs b/src/surface/lang/ruby_rails.rs index 53689f55..cc2d8147 100644 --- a/src/surface/lang/ruby_rails.rs +++ b/src/surface/lang/ruby_rails.rs @@ -40,8 +40,8 @@ pub fn detect_rails_routes( fn detect_routes_dsl(root: Node, bytes: &[u8], file_rel: &str, out: &mut Vec) { fn recurse(node: Node, bytes: &[u8], file_rel: &str, out: &mut Vec) { - if matches!(node.kind(), "call" | "method_call") { - if let Some(method_node) = node.child_by_field_name("method") + if matches!(node.kind(), "call" | "method_call") + && let Some(method_node) = node.child_by_field_name("method") && let Ok(method_text) = method_node.utf8_text(bytes) && let Some((_, method)) = VERBS.iter().find(|(v, _)| *v == method_text) { @@ -73,7 +73,6 @@ fn detect_routes_dsl(root: Node, bytes: &[u8], file_rel: &str, out: &mut Vec bool { } fn is_pages_api_route(path: &Path) -> bool { - let mut comps = path.components().peekable(); + let comps = path.components().peekable(); let mut saw_pages = false; - while let Some(c) = comps.next() { + for c in comps { if c.as_os_str().to_string_lossy() == "pages" { saw_pages = true; } else if saw_pages && c.as_os_str().to_string_lossy() == "api" { diff --git a/src/surface/mod.rs b/src/surface/mod.rs index 21addf78..5f7ae3d4 100644 --- a/src/surface/mod.rs +++ b/src/surface/mod.rs @@ -341,11 +341,10 @@ impl SurfaceMap { /// Returns the absolute path verbatim when the file is outside the /// scan root or when path stripping fails. pub fn relative_path_string(path: &Path, scan_root: Option<&Path>) -> String { - if let Some(root) = scan_root { - if let Ok(rel) = path.strip_prefix(root) { + if let Some(root) = scan_root + && let Ok(rel) = path.strip_prefix(root) { return rel.to_string_lossy().replace('\\', "/"); } - } path.to_string_lossy().replace('\\', "/") } diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index eed5ae40..ae2bb6b5 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -114,11 +114,10 @@ impl Lang { /// Used by [`crate::dynamic::spec`] so spec derivation no longer rejects /// CLI entry points and other extensionless / non-canonical files. pub fn from_path_or_content(path: &Path, head_bytes: &[u8]) -> Option { - if let Some(ext) = path.extension().and_then(|e| e.to_str()) { - if let Some(lang) = Self::from_extension(ext) { + if let Some(ext) = path.extension().and_then(|e| e.to_str()) + && let Some(lang) = Self::from_extension(ext) { return Some(lang); } - } if let Some(lang) = lang_from_shebang(head_bytes) { return Some(lang); } diff --git a/tests/common/fixture_harness.rs b/tests/common/fixture_harness.rs index 4a07343b..0fdaf543 100644 --- a/tests/common/fixture_harness.rs +++ b/tests/common/fixture_harness.rs @@ -256,7 +256,7 @@ pub struct FixtureSpec<'a> { /// /// Captures the fields a regression test must pin: status + typed reasons /// + whether a payload triggered. Excludes machine-dependent fields -/// (`finding_id`, `detail`, `attempts`, `toolchain_match`). +/// (`finding_id`, `detail`, `attempts`, `toolchain_match`). #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct GoldenVerdict { pub status: VerifyStatus, diff --git a/tests/determinism_audit.rs b/tests/determinism_audit.rs index f0740ae6..0d3652a5 100644 --- a/tests/determinism_audit.rs +++ b/tests/determinism_audit.rs @@ -24,11 +24,13 @@ use std::collections::BTreeSet; const RUN_COUNT: usize = 10; fn deny_diag(stable_hash: u64) -> Diag { - let mut ev = Evidence::default(); // Triggers the credentials deny rule via the AWS-key regex from // `crate::utils::redact::contains_secret`. The deny rule fires // deterministically because the rule lookup table is `const`. - ev.notes = vec!["secret=AKIAFAKEDETERM00000000".to_owned()]; + let ev = Evidence { + notes: vec!["secret=AKIAFAKEDETERM00000000".to_owned()], + ..Evidence::default() + }; Diag { path: "src/handler.py".to_owned(), line: 42, @@ -84,9 +86,11 @@ fn ten_runs_produce_byte_identical_telemetry_minus_timestamps() { let diag = deny_diag(0x0123_4567_89ab_cdef); - let mut opts = VerifyOptions::default(); - opts.telemetry_policy = SamplingPolicy::keep_all(); - opts.trace_verbose = false; + let opts = VerifyOptions { + telemetry_policy: SamplingPolicy::keep_all(), + trace_verbose: false, + ..VerifyOptions::default() + }; let mut verdict_jsons: BTreeSet = BTreeSet::new(); for _ in 0..RUN_COUNT { diff --git a/tests/dynamic_parity.rs b/tests/dynamic_parity.rs index 0da7c6ec..ffb0ea07 100644 --- a/tests/dynamic_parity.rs +++ b/tests/dynamic_parity.rs @@ -127,11 +127,10 @@ mod parity_tests { // BackendUnavailable into Unsupported OR Inconclusive depending on // where the error surfaces, so the skip predicate looks at the // reason text, not the verdict status. - if let Some(ref r) = docker_result.reason { - if format!("{r:?}").contains("BackendUnavailable") { + if let Some(ref r) = docker_result.reason + && format!("{r:?}").contains("BackendUnavailable") { return; // Docker absent — skip comparison. } - } assert_eq!( process_result.status, docker_result.status, diff --git a/tests/dynamic_verify_e2e.rs b/tests/dynamic_verify_e2e.rs index 5d3c72b8..f6cf84ab 100644 --- a/tests/dynamic_verify_e2e.rs +++ b/tests/dynamic_verify_e2e.rs @@ -189,8 +189,10 @@ mod verify_e2e { let diag = taint_diag_with_cap(Cap::CRYPTO); let trace = Arc::new(VerifyTrace::new()); - let mut opts = VerifyOptions::default(); - opts.trace_sink = Some(Arc::clone(&trace)); + let opts = VerifyOptions { + trace_sink: Some(Arc::clone(&trace)), + ..VerifyOptions::default() + }; let _result = verify_finding(&diag, &opts); diff --git a/tests/fix_validation_e2e.rs b/tests/fix_validation_e2e.rs index 35b5854d..fdfce344 100644 --- a/tests/fix_validation_e2e.rs +++ b/tests/fix_validation_e2e.rs @@ -33,7 +33,7 @@ fn scan_with_hashes(dir: &Path) -> Vec { /// Attach a simulated dynamic verdict to every finding in the list. fn set_verdict( - diags: &mut Vec, + diags: &mut [nyx_scanner::commands::scan::Diag], status: VerifyStatus, ) { for d in diags.iter_mut() { diff --git a/tests/marker_uniqueness.rs b/tests/marker_uniqueness.rs index c2e0237f..a85e1d76 100644 --- a/tests/marker_uniqueness.rs +++ b/tests/marker_uniqueness.rs @@ -1,3 +1,4 @@ +#![allow(deprecated)] //! Marker uniqueness test (§4.1, §17.4). //! //! Asserts that no `NYX_PWN_*` marker from one cap's corpus is a substring diff --git a/tests/policy_deny.rs b/tests/policy_deny.rs index 5962de51..d7f1ddf3 100644 --- a/tests/policy_deny.rs +++ b/tests/policy_deny.rs @@ -1,3 +1,4 @@ +#![allow(clippy::field_reassign_with_default)] //! Phase 30 (Track C — security): coverage for //! [`crate::dynamic::policy::evaluate`] deny rules. //! diff --git a/tests/repro_fixture_bundles.rs b/tests/repro_fixture_bundles.rs index a2355f45..91e2f97a 100644 --- a/tests/repro_fixture_bundles.rs +++ b/tests/repro_fixture_bundles.rs @@ -142,12 +142,13 @@ fn flask_eval_verdict() -> VerifyResult { } fn flask_eval_sandbox_options() -> SandboxOptions { - let mut opts = SandboxOptions::default(); - opts.backend = SandboxBackend::Docker; - opts.env_passthrough = vec!["NYX_PAYLOAD".into()]; - opts.timeout = Duration::from_secs(30); - opts.memory_mib = 256; - opts + SandboxOptions { + backend: SandboxBackend::Docker, + env_passthrough: vec!["NYX_PAYLOAD".into()], + timeout: Duration::from_secs(30), + memory_mib: 256, + ..SandboxOptions::default() + } } fn workspace_root() -> PathBuf { diff --git a/tests/spec_callgraph_resolution.rs b/tests/spec_callgraph_resolution.rs index 03f65705..dae4b695 100644 --- a/tests/spec_callgraph_resolution.rs +++ b/tests/spec_callgraph_resolution.rs @@ -1,3 +1,4 @@ +#![allow(clippy::field_reassign_with_default)] //! Phase 04 acceptance: callgraph-aware //! [`SpecDerivationStrategy::FromCallgraphEntry`]. //! diff --git a/tests/spec_derivation_strategies.rs b/tests/spec_derivation_strategies.rs index 133206e4..9b7931b1 100644 --- a/tests/spec_derivation_strategies.rs +++ b/tests/spec_derivation_strategies.rs @@ -1,3 +1,4 @@ +#![allow(clippy::field_reassign_with_default)] //! Phase 01, Track A.1: integration coverage for //! `HarnessSpec::from_finding_opts` strategy fall-through. //! diff --git a/tests/spec_framework_sample.rs b/tests/spec_framework_sample.rs index 62c9302d..a125803a 100644 --- a/tests/spec_framework_sample.rs +++ b/tests/spec_framework_sample.rs @@ -27,34 +27,36 @@ use nyx_scanner::patterns::{FindingCategory, Severity}; /// and a synthetic per-name summary, so the framework adapter registry /// resolves a binding when the fixture's source matches an adapter. fn make_diag(path: &str, handler: &str, line: usize, cap: Cap, rule_id: &str) -> Diag { - let mut ev = Evidence::default(); - ev.flow_steps = vec![ - FlowStep { - step: 0, - kind: FlowStepKind::Source, - file: path.into(), - line: line as u32, - col: 0, - snippet: None, - variable: None, - callee: None, - function: Some(handler.into()), - is_cross_file: false, - }, - FlowStep { - step: 1, - kind: FlowStepKind::Sink, - file: path.into(), - line: line as u32, - col: 0, - snippet: None, - variable: None, - callee: None, - function: Some(handler.into()), - is_cross_file: false, - }, - ]; - ev.sink_caps = cap.bits(); + let ev = Evidence { + flow_steps: vec![ + FlowStep { + step: 0, + kind: FlowStepKind::Source, + file: path.into(), + line: line as u32, + col: 0, + snippet: None, + variable: None, + callee: None, + function: Some(handler.into()), + is_cross_file: false, + }, + FlowStep { + step: 1, + kind: FlowStepKind::Sink, + file: path.into(), + line: line as u32, + col: 0, + snippet: None, + variable: None, + callee: None, + function: Some(handler.into()), + is_cross_file: false, + }, + ], + sink_caps: cap.bits(), + ..Evidence::default() + }; Diag { path: path.into(), line, diff --git a/tests/stubs_per_cap.rs b/tests/stubs_per_cap.rs index 5301cad4..26c9bb45 100644 --- a/tests/stubs_per_cap.rs +++ b/tests/stubs_per_cap.rs @@ -48,8 +48,7 @@ fn read_fixture(stub_dir: &str, name: &str) -> String { /// begin with `//`; the payload is the surviving line. fn extract_payload(s: &str) -> String { s.lines() - .filter(|l| !l.trim().is_empty() && !l.trim_start().starts_with("//")) - .last() + .rfind(|l| !l.trim().is_empty() && !l.trim_start().starts_with("//")) .unwrap_or("") .trim() .to_owned()