feat(lint): centralize clippy::collapsible_if allowance in Cargo.toml and remove redundant file-level declarations

This commit is contained in:
elipeter 2026-06-02 18:30:14 -05:00
parent 1f5777ff11
commit 1ebeb233c4
53 changed files with 851 additions and 212 deletions

View file

@ -3,7 +3,6 @@
//! Tracks inclusive `[lo, hi]` integer bounds. `None` = unbounded (−∞ or +∞).
//! Both `None` = Top (any integer). Provides arithmetic transfer functions
//! (add, sub, mul, div, mod) with overflow-safe semantics.
#![allow(clippy::collapsible_if)]
use crate::state::lattice::{AbstractDomain, Lattice};
use serde::{Deserialize, Serialize};

View file

@ -781,6 +781,35 @@ fn lang_for_path(path: &Path) -> Option<(Language, &'static str)> {
}
}
/// All language slugs the scanner can parse, paired with the file extensions
/// that map to them. Single source of truth shared with [`lang_for_path`]; the
/// `supported_extensions_resolve_to_their_slug` test asserts they stay in sync.
pub(crate) const SUPPORTED_LANGUAGE_EXTENSIONS: &[(&str, &[&str])] = &[
("rust", &["rs"]),
("c", &["c"]),
(
"cpp",
&["cpp", "cc", "cxx", "c++", "hpp", "hxx", "hh", "h++"],
),
("java", &["java"]),
("go", &["go"]),
("php", &["php"]),
("python", &["py"]),
("typescript", &["ts", "tsx"]),
("javascript", &["js", "jsx"]),
("ruby", &["rb"]),
];
/// File extensions associated with a language slug (case-insensitive). Returns
/// an empty slice if `slug` is not a supported language.
pub fn extensions_for_lang(slug: &str) -> &'static [&'static str] {
SUPPORTED_LANGUAGE_EXTENSIONS
.iter()
.find(|(s, _)| s.eq_ignore_ascii_case(slug))
.map(|(_, exts)| *exts)
.unwrap_or(&[])
}
/// Fast binary-file guard: skip if >1% NUL bytes.
fn is_binary(bytes: &[u8]) -> bool {
bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1

View file

@ -1,3 +1,8 @@
//! Configuration for the Rust auth-analysis pass.
//!
//! Holds [`AuthAnalysisRules`] (admin path/guard patterns, sink classes, and
//! name canonicalization) that drive `rs.auth.missing_ownership_check`.
use crate::auth_analysis::model::SinkClass;
use crate::labels::bare_method_name;
use crate::utils::config::Config;

View file

@ -1,3 +1,9 @@
//! Shared AST-extraction helpers for the auth-analysis framework adapters.
//!
//! Cross-framework primitives — analysis-unit collection, call-site and
//! `ValueRef` extraction, and tree-sitter node/string/span helpers — used by the
//! per-framework extractors in this directory (`express`, `axum`, `django`, …).
use crate::auth_analysis::config::{AuthAnalysisRules, canonical_name, matches_name, strip_quotes};
use crate::auth_analysis::model::{
AnalysisUnit, AnalysisUnitKind, AuthCheck, AuthCheckKind, AuthorizationModel, CallSite,

View file

@ -1,8 +1,8 @@
use super::helpers::first_member_label;
use super::{
AstMeta, Cfg, EdgeKind, MAX_COND_VARS, MAX_CONDITION_TEXT_LEN, NodeInfo, StmtKind,
collect_idents, connect_all, detect_eq_with_const, detect_negation, has_call_descendant,
member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
build_cond_arith, collect_idents, connect_all, detect_eq_with_const, detect_negation,
has_call_descendant, member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
};
use crate::labels::{DataLabel, LangAnalysisRules, classify};
use crate::utils::snippet::truncate_at_char_boundary;
@ -223,6 +223,13 @@ pub(super) fn build_ternary_diamond<'a>(
// taint engine's equality-narrowing fires for `x === 'literal' ? …`.
let cond_if = push_condition_node(g, cond_ast, lang, code, enclosing_func);
g[cond_if].is_eq_with_const = detect_eq_with_const(cond_ast, lang);
// Capture the pure int-arith + comparison tree so `fold_constant_branches`
// can prune a dead constant-condition arm of the ternary (e.g. Java
// `(7*18)+num > 200 ? "const" : param` with `num` a known int constant),
// exactly as it does for the if-form. `build_cond_arith` is conservative
// (returns None for any call/field/string/`&&`/`||`/`!` shape) so this is
// sound for every language the diamond fires on.
g[cond_if].cond_arith = build_cond_arith(cond_ast, lang, code, 0);
connect_all(g, preds, cond_if, pred_edge);
// 2. Branches. Each branch produces its own exit frontier (≥ 1 node) ,

View file

@ -1,3 +1,9 @@
//! Literal and constant-expression extraction from tree-sitter AST nodes.
//!
//! Parses integer and string literals, folds constant binary ops, and derives
//! template/string prefixes and quote stripping for CFG construction and
//! const propagation.
use super::conditions::unwrap_parens;
use super::helpers::{collect_array_pattern_bindings_indexed, collect_rhs_array_literal_elements};
use super::{

View file

@ -12,11 +12,7 @@
//! `export_summaries` converts in-graph [`LocalFuncSummary`] values to
//! the serializable [`crate::summary::FuncSummary`] form.
#![allow(
clippy::collapsible_if,
clippy::let_and_return,
clippy::unnecessary_map_or
)]
#![allow(clippy::let_and_return, clippy::unnecessary_map_or)]
use petgraph::algo::dominators::{Dominators, simple_fast};
use petgraph::prelude::*;
@ -1481,7 +1477,12 @@ fn binary_op_token(node: Node) -> Option<BinOp> {
/// boolean `&&`/`||`, unary `!`) returns `None`, which disables folding for
/// that branch (never a wrong fold). Depth-bounded to guard against
/// pathological nesting.
fn build_cond_arith(node: Node, lang: &str, code: &[u8], depth: u32) -> Option<CondArith> {
pub(super) fn build_cond_arith(
node: Node,
lang: &str,
code: &[u8],
depth: u32,
) -> Option<CondArith> {
if depth > 64 {
return None;
}
@ -6283,10 +6284,14 @@ pub(super) fn build_sub<'a>(
);
}
// JS/TS ternary-RHS split: `var x = c ? a : b;` and
// JS/TS/Java ternary-RHS split: `var x = c ? a : b;` and
// `obj.prop = c ? a : b;` lower to a real diamond CFG so the
// condition is control-flow (not a data-flow `uses` entry).
if matches!(lang, "javascript" | "typescript" | "tsx")
// Java uses the same `ternary_expression` AST kind; routing it
// through the diamond lets `fold_constant_branches` prune dead
// constant-condition arms (`cond ? "const" : param`) the same way
// it does for the if-form.
if matches!(lang, "javascript" | "typescript" | "tsx" | "java")
&& let Some((lhs_ast, ternary_ast)) = find_ternary_rhs_wrapper(ast)
{
let (lhs_text, lhs_labels) =
@ -6541,8 +6546,8 @@ pub(super) fn build_sub<'a>(
// Assignment that may contain a call (Python `x = os.getenv(...)`, Ruby `x = gets()`)
Kind::Assignment => {
// JS/TS ternary-RHS split, same rationale as the CallWrapper branch.
if matches!(lang, "javascript" | "typescript" | "tsx")
// JS/TS/Java ternary-RHS split, same rationale as the CallWrapper branch.
if matches!(lang, "javascript" | "typescript" | "tsx" | "java")
&& let (Some(left), Some(right)) = (
ast.child_by_field_name("left"),
ast.child_by_field_name("right"),

View file

@ -1,4 +1,7 @@
#![allow(clippy::collapsible_if)]
//! Unguarded-sink detection via CFG dominator analysis.
//!
//! Flags dangerous sinks that are not dominated by an appropriate guard
//! (validation or auth check) on every path from an entry point.
use super::dominators::{self, dominates};
use super::rules;

View file

@ -1,4 +1,12 @@
#![allow(clippy::collapsible_if, clippy::type_complexity)]
//! Scan-pipeline orchestration: the two-pass + topo-batch driver behind
//! `nyx scan`.
//!
//! Coordinates summary extraction (pass 1), SCC-ordered taint analysis with a
//! bounded fixpoint (pass 2, `run_topo_batches`), the indexed parallel scan path
//! (`scan_with_index_parallel_observer`), suppression application, and per-file
//! panic isolation (`recover_or_propagate`).
#![allow(clippy::type_complexity)]
pub(crate) use crate::ast::{
analyse_file_fused, extract_all_summaries_from_bytes, run_rules_on_bytes, run_rules_on_file,

View file

@ -15,8 +15,6 @@
//! literal operand. Necessary because individual comparisons are NOT
//! decomposed into separate SSA operations (condition nodes → `Nop`).
#![allow(clippy::collapsible_if)]
use crate::cfg::NodeInfo;
use crate::ssa::const_prop::ConstLattice;
use crate::ssa::ir::{BlockId, SsaBody, SsaValue};

View file

@ -152,6 +152,8 @@ fn bind_mount_ro(src: &Path, dst: &Path) -> io::Result<()> {
let cdst = CString::new(dst.as_os_str().as_bytes())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
// SAFETY: `csrc`/`cdst` are `CString`s that outlive the call, so the pointers
// reference valid NUL-terminated C strings. Return value checked below.
let bind = unsafe {
mount(
csrc.as_ptr(),
@ -165,6 +167,8 @@ fn bind_mount_ro(src: &Path, dst: &Path) -> io::Result<()> {
return Err(io::Error::last_os_error());
}
// Best-effort read-only remount; leave the rw bind if it fails.
// SAFETY: `cdst` outlives the call; the other pointers are null, accepted by
// `mount(2)` for a remount.
unsafe {
mount(
std::ptr::null(),

View file

@ -724,7 +724,7 @@ fn register_exit_cleanup() {
unsafe extern "C" {
fn atexit(f: extern "C" fn()) -> i32;
}
// Safety: atexit(3) is async-signal-safe for registration; the handler
// SAFETY: atexit(3) is async-signal-safe for registration; the handler
// itself runs on the main thread during normal shutdown, after all Rust
// destructors, so std::process::Command is safe to call from it.
unsafe { atexit(stop_all_containers) };
@ -1870,6 +1870,7 @@ fn libc_kill(pid: i32, sig: i32) -> i32 {
unsafe extern "C" {
fn kill(pid: i32, sig: i32) -> i32;
}
// SAFETY: `kill(2)` takes only scalar args and touches no caller memory.
unsafe { kill(pid, sig) }
}

View file

@ -29,6 +29,8 @@
//! record into it, execve(2) drops the write end, and the parent's
//! drain thread sees EOF and records the outcome.
#![warn(clippy::undocumented_unsafe_blocks)]
use crate::dynamic::sandbox::seccomp;
use crate::dynamic::sandbox::seccomp::bpf::SockFilter;
use crate::dynamic::sandbox::{AblationMask, ProcessHardeningProfile, SandboxOptions};
@ -144,11 +146,14 @@ struct StatusPipe {
impl StatusPipe {
fn new() -> std::io::Result<Self> {
// SAFETY: declares the libc `pipe2(2)` ABI; the signature matches <unistd.h>.
unsafe extern "C" {
fn pipe2(pipefd: *mut i32, flags: i32) -> i32;
}
const O_CLOEXEC: i32 = 0o2_000_000;
let mut fds = [-1_i32; 2];
// SAFETY: `fds` is a valid 2-element array the kernel writes into; `pipe2`
// reads no caller memory beyond that pointer. Return value checked below.
let ret = unsafe { pipe2(fds.as_mut_ptr(), O_CLOEXEC) };
if ret != 0 {
return Err(std::io::Error::last_os_error());
@ -161,15 +166,20 @@ impl StatusPipe {
}
fn close_fd(fd: RawFd) {
// SAFETY: declares the libc `close(2)` ABI; signature matches <unistd.h>.
unsafe extern "C" {
fn close(fd: i32) -> i32;
}
// SAFETY: `fd` is an owned raw fd closed exactly once; the return value is
// intentionally ignored (best-effort close).
unsafe { close(fd) };
}
/// Drain `read_fd` into a `HardeningOutcome`. Wire format is the
/// 15-byte fixed-width record produced by [`encode_outcome`].
fn drain_outcome(read_fd: RawFd) -> Option<HardeningOutcome> {
// SAFETY: `read_fd` is an owned raw fd (the pipe read end) used nowhere else;
// `File` takes sole ownership and closes it on drop.
let mut file = unsafe { std::fs::File::from_raw_fd(read_fd) };
let mut buf = Vec::with_capacity(64);
if file.read_to_end(&mut buf).is_err() {
@ -276,6 +286,8 @@ struct Rlimit {
max: u64,
}
// SAFETY: declares the libc syscall-wrapper ABI (setrlimit/prctl/unshare/chroot/
// chdir/mount/write/__errno_location); signatures match the glibc/musl headers.
unsafe extern "C" {
fn setrlimit(resource: i32, rlim: *const Rlimit) -> i32;
fn prctl(option: i32, arg2: u64, arg3: u64, arg4: u64, arg5: u64) -> i32;
@ -294,6 +306,8 @@ unsafe extern "C" {
}
fn last_errno() -> i32 {
// SAFETY: `__errno_location` returns a valid pointer to the calling thread's
// errno; dereferencing it right after a failed syscall is the standard idiom.
unsafe { *__errno_location() }
}
@ -302,6 +316,8 @@ fn apply_rlimit(resource: i32, bytes: u64) -> PrimitiveStatus {
cur: bytes,
max: bytes,
};
// SAFETY: `&rl` points to a valid `Rlimit` for the duration of the call;
// `setrlimit` only reads it and returns a status checked below.
let ret = unsafe { setrlimit(resource, &rl) };
if ret == 0 {
PrimitiveStatus::Applied
@ -311,6 +327,8 @@ fn apply_rlimit(resource: i32, bytes: u64) -> PrimitiveStatus {
}
fn apply_no_new_privs() -> PrimitiveStatus {
// SAFETY: `prctl(PR_SET_NO_NEW_PRIVS, ..)` takes only scalar args and touches
// no caller memory; the return value is checked below.
let ret = unsafe { prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
if ret == 0 {
PrimitiveStatus::Applied
@ -326,6 +344,8 @@ fn apply_unshare_with_flags(flags: i32) -> PrimitiveStatus {
// ablation drops individual flags via `AblationMask::no_userns` /
// `no_pidns` so the escape-fixture matrix can prove the namespace
// primitive carries its weight.
// SAFETY: `unshare` takes a scalar flag set and touches no caller memory;
// the return value is checked below.
let ret = unsafe { unshare(flags) };
if ret == 0 {
PrimitiveStatus::Applied
@ -354,11 +374,14 @@ fn apply_chroot(workdir: &[u8]) -> PrimitiveStatus {
// `workdir` is NUL-terminated by `canonicalize_workdir` so we can
// hand the bytes straight to `chroot(2)` without allocating in
// pre_exec.
// SAFETY: `workdir` is NUL-terminated by `canonicalize_workdir`, so the
// pointer references a valid C string for the duration of the call.
let ret = unsafe { chroot(workdir.as_ptr() as *const i8) };
if ret != 0 {
return PrimitiveStatus::Failed(last_errno());
}
let root = b"/\0";
// SAFETY: `root` is a NUL-terminated byte literal, a valid C string.
let ret = unsafe { chdir(root.as_ptr() as *const i8) };
if ret != 0 {
return PrimitiveStatus::Failed(last_errno());
@ -391,6 +414,9 @@ struct BindMount {
fn apply_bind_mounts(mounts: &[BindMount]) {
let none = b"none\0";
for m in mounts {
// SAFETY: `source_nul`/`dest_nul` are NUL-terminated by
// `canonicalize_bind_mount` and `none` is a NUL-terminated literal, so
// every pointer references a valid C string for the duration of the call.
let r = unsafe {
mount(
m.source_nul.as_ptr() as *const i8,
@ -403,6 +429,8 @@ fn apply_bind_mounts(mounts: &[BindMount]) {
if r != 0 {
continue;
}
// SAFETY: `dest_nul` is NUL-terminated; the remaining pointers are null,
// which `mount(2)` accepts for a remount. Best-effort: result ignored.
unsafe {
mount(
std::ptr::null(),
@ -541,7 +569,7 @@ pub fn install_pre_exec(
let read_fd = pipe.as_ref().map(|p| p.read_fd);
let plan_for_child = plan.clone();
// Safety: pre_exec runs after fork(2) and before execve(2). We must
// SAFETY: pre_exec runs after fork(2) and before execve(2). We must
// not allocate, take any locks, or call into the Rust runtime. The
// captured `plan_for_child` is moved in; reading its already-allocated
// fields is safe because no allocator call is needed.

View file

@ -28,6 +28,8 @@
//! can't be filtered without a number, and any kernel that recognises
//! the name has the number too. Tests assert the policy round-trips.
#![warn(clippy::undocumented_unsafe_blocks)]
pub mod bpf;
pub mod syscalls;
@ -42,6 +44,8 @@ const PR_SET_NO_NEW_PRIVS: i32 = 38;
const PR_SET_SECCOMP: i32 = 22;
const SECCOMP_MODE_FILTER: u64 = 2;
// SAFETY: declares the libc `prctl(2)` / `__errno_location` ABI; signatures
// match the glibc/musl headers.
unsafe extern "C" {
fn prctl(option: i32, arg2: u64, arg3: u64, arg4: u64, arg5: u64) -> i32;
fn __errno_location() -> *mut i32;
@ -142,12 +146,16 @@ pub fn install_compiled_filter(program: &[SockFilter]) -> std::io::Result<()> {
// seccomp filter install. The Phase 17 hardening sequence already
// calls it earlier, but installing here too is idempotent and
// protects direct callers.
// SAFETY: `prctl(PR_SET_NO_NEW_PRIVS, ..)` takes only scalar args and touches
// no caller memory; idempotent, result intentionally ignored.
let _ = unsafe { prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
let prog = SockFprog {
len: program.len() as u16,
filter: program.as_ptr(),
};
// SAFETY: `prog` and the `program` slice it points to outlive the call; the
// pointer passed as u64 references a valid `SockFprog`. Return value checked below.
let ret = unsafe {
prctl(
PR_SET_SECCOMP,
@ -160,6 +168,8 @@ pub fn install_compiled_filter(program: &[SockFilter]) -> std::io::Result<()> {
if ret == 0 {
Ok(())
} else {
// SAFETY: `__errno_location` returns a valid per-thread errno pointer,
// dereferenced immediately after the failed prctl call.
Err(std::io::Error::from_raw_os_error(unsafe {
*__errno_location()
}))

View file

@ -141,14 +141,23 @@ impl BrokerStub {
.append(true)
.create(true)
.open(&self.log_path)?;
writeln!(
f,
"{}\t{}\t{}",
// Build the whole record (including the trailing newline) up front and
// emit it in a single `write_all`. A `writeln!` issues one syscall per
// format fragment, so a concurrent `drain_events` reader could observe a
// torn line (e.g. just `deliver` with no tab) and misclassify it. For a
// record small enough to land in one `write()` (the common case) the
// append-mode `write_all` is delivered atomically; very large records
// can still span multiple `write()`s, so the drain's newline-framing
// guard remains the backstop. Both the tab-and-newline-stripped
// destination and the newline-stripped payload guarantee the record
// occupies exactly one physical line regardless.
let line = format!(
"{}\t{}\t{}\n",
action.replace('\t', " "),
destination.replace('\t', " "),
payload
)?;
Ok(())
destination.replace(['\t', '\n'], " "),
payload.replace('\n', " ")
);
f.write_all(line.as_bytes())
}
}
@ -202,17 +211,38 @@ impl StubProvider for BrokerStub {
}
let mut events = Vec::new();
let mut bytes_read = 0_u64;
let mut buf = String::new();
let mut consumed = 0_u64;
let mut buf: Vec<u8> = Vec::new();
loop {
buf.clear();
let n = match reader.read_line(&mut buf) {
// Read raw bytes up to and including the next '\n'. Byte-oriented
// (rather than `read_line` into a `String`) so a non-UTF-8 payload
// written by an in-sandbox harness — e.g. Go's `string(msg.Data)`
// over the shared `NYX_*_LOG` — degrades to a lossy decode instead
// of erroring out. With `read_line` such a byte would return `Err`,
// and the `Err => break` arm would park the cursor on that line
// forever, permanently stalling the stream and dropping every
// record after it.
let n = match reader.read_until(b'\n', &mut buf) {
Ok(0) => break,
Ok(n) => n,
Err(_) => break,
};
bytes_read += n as u64;
let line = buf.trim_end_matches(['\r', '\n']);
// A chunk that does not end in '\n' is the tail of an in-flight
// append: a writer thread is mid-record. Leave it unconsumed (do
// not advance the cursor past it) so the next drain re-reads it
// once it is complete. Without this guard the partial line would be
// skipped forever and, worse, `parse_broker_log_line` would
// misclassify a tab-less fragment like `deliver` as a `publish`.
if buf.last() != Some(&b'\n') {
break;
}
consumed += n as u64;
// Strip exactly the single '\n' line terminator. The log is
// newline-framed (never CRLF), so a trailing '\r' is payload data
// and must be preserved rather than greedily trimmed.
let decoded = String::from_utf8_lossy(&buf[..buf.len() - 1]);
let line = decoded.as_ref();
if line.is_empty() {
continue;
}
@ -229,7 +259,7 @@ impl StubProvider for BrokerStub {
};
events.push(event);
}
*cursor += bytes_read;
*cursor += consumed;
events
}
}
@ -3378,13 +3408,19 @@ fn append_broker_event(
.append(true)
.create(true)
.open(log_path)?;
writeln!(
f,
"{}\t{}\t{}",
// Single `write_all` append: see `record_event` for why a `writeln!` is
// unsafe against concurrent drains, and for the atomicity caveat on very
// large records. The broker server threads append from multiple handlers,
// so a torn record is otherwise observable mid-flight. The destination
// (path/topic-derived, e.g. a percent-decoded `%0A`) is stripped of tabs
// and newlines and the payload of newlines so the record stays one line.
let line = format!(
"{}\t{}\t{}\n",
action.replace('\t', " "),
destination.replace('\t', " "),
payload
)
destination.replace(['\t', '\n'], " "),
payload.replace('\n', " ")
);
f.write_all(line.as_bytes())
}
#[cfg(test)]
@ -4154,7 +4190,10 @@ mod tests {
"{delivery:?}"
);
let events = stub.drain_events();
// The MSG frame reaches the wire before the server appends the matching
// `deliver` record (see `nats_deliver`), so draining the moment the
// payload arrives can race the log write. Poll until both records land.
let events = drain_events_until(&stub, 2, Duration::from_secs(5));
let actions: Vec<&str> = events
.iter()
.map(|ev| ev.detail.get("action").unwrap().as_str())

View file

@ -4,7 +4,6 @@
//! sanitizer/guard info, state-machine transitions) in a structured form
//! that can be serialized to JSON and consumed by ranking, filtering,
//! and downstream tooling.
#![allow(clippy::collapsible_if)]
use crate::commands::scan::Diag;
use crate::labels::Cap;

View file

@ -2,7 +2,6 @@
//!
//! Produces professional, security-tool-grade aligned output with a clear
//! severity hierarchy, normalised taint flow rendering, and stable wrapping.
#![allow(clippy::collapsible_if)]
use crate::chain::finding::ChainFinding;
use crate::commands::scan::{Diag, SuppressionStats};

View file

@ -658,28 +658,6 @@ mod tests {
}
}
#[allow(dead_code)]
fn with_history<'a>(
summary: &'a FindingSummary,
findings: &'a [Diag],
triage: f64,
files: u64,
) -> HealthInputs<'a> {
HealthInputs {
has_history: true,
..first_scan(summary, findings, triage, files)
}
}
#[allow(dead_code)]
fn sev_score(h: &HealthScore) -> u8 {
h.components
.iter()
.find(|c| c.label == "Severity pressure")
.unwrap()
.score
}
// ── Foundational behaviour ───────────────────────────────────────
#[test]

View file

@ -98,7 +98,7 @@ impl JobManager {
db_pool: Option<Arc<Pool<SqliteConnectionManager>>>,
database_dir: PathBuf,
) -> Result<String, &'static str> {
let mut active = self.active_job_id.lock().unwrap();
let mut active = self.active_job_id.lock().unwrap_or_else(|p| p.into_inner());
if active.is_some() {
return Err("A scan is already running");
}
@ -129,8 +129,8 @@ impl JobManager {
};
{
let mut jobs = self.jobs.lock().unwrap();
let mut order = self.job_order.lock().unwrap();
let mut jobs = self.jobs.lock().unwrap_or_else(|p| p.into_inner());
let mut order = self.job_order.lock().unwrap_or_else(|p| p.into_inner());
// Evict oldest if at capacity.
while order.len() >= self.max_jobs {
@ -323,7 +323,7 @@ impl JobManager {
// Brief lock: just update in-memory job state.
{
let mut jobs = manager.jobs.lock().unwrap();
let mut jobs = manager.jobs.lock().unwrap_or_else(|p| p.into_inner());
if let Some(job) = jobs.get_mut(&jid) {
job.finished_at = Some(finished_at);
job.duration_secs = Some(elapsed);
@ -338,7 +338,10 @@ impl JobManager {
// Clear active flag.
{
let mut active = manager.active_job_id.lock().unwrap();
let mut active = manager
.active_job_id
.lock()
.unwrap_or_else(|p| p.into_inner());
if active.as_deref() == Some(&jid) {
*active = None;
}
@ -396,13 +399,17 @@ impl JobManager {
/// Get a specific job.
pub fn get_job(&self, id: &str) -> Option<ScanJob> {
self.jobs.lock().unwrap().get(id).cloned()
self.jobs
.lock()
.unwrap_or_else(|p| p.into_inner())
.get(id)
.cloned()
}
/// List all jobs, most recent first.
pub fn list_jobs(&self) -> Vec<ScanJob> {
let jobs = self.jobs.lock().unwrap();
let order = self.job_order.lock().unwrap();
let jobs = self.jobs.lock().unwrap_or_else(|p| p.into_inner());
let order = self.job_order.lock().unwrap_or_else(|p| p.into_inner());
order
.iter()
.rev()
@ -412,16 +419,20 @@ impl JobManager {
/// Get the currently active (running) job.
pub fn active_job(&self) -> Option<ScanJob> {
let active = self.active_job_id.lock().unwrap();
active
.as_ref()
.and_then(|id| self.jobs.lock().unwrap().get(id).cloned())
let active = self.active_job_id.lock().unwrap_or_else(|p| p.into_inner());
active.as_ref().and_then(|id| {
self.jobs
.lock()
.unwrap_or_else(|p| p.into_inner())
.get(id)
.cloned()
})
}
/// Get the latest completed job.
pub fn get_latest_completed(&self) -> Option<ScanJob> {
let jobs = self.jobs.lock().unwrap();
let order = self.job_order.lock().unwrap();
let jobs = self.jobs.lock().unwrap_or_else(|p| p.into_inner());
let order = self.job_order.lock().unwrap_or_else(|p| p.into_inner());
order
.iter()
.rev()
@ -432,17 +443,17 @@ impl JobManager {
/// Remove a job from in-memory state. Rejects if the scan is currently running.
pub fn remove_job(&self, id: &str) -> Result<(), &'static str> {
let active = self.active_job_id.lock().unwrap();
let active = self.active_job_id.lock().unwrap_or_else(|p| p.into_inner());
if active.as_deref() == Some(id) {
return Err("Cannot delete a running scan");
}
drop(active);
let mut jobs = self.jobs.lock().unwrap();
let mut jobs = self.jobs.lock().unwrap_or_else(|p| p.into_inner());
if jobs.remove(id).is_none() {
return Err("Scan not found");
}
let mut order = self.job_order.lock().unwrap();
let mut order = self.job_order.lock().unwrap_or_else(|p| p.into_inner());
order.retain(|x| x != id);
Ok(())
}

View file

@ -1,5 +1,3 @@
#![allow(clippy::collapsible_if)]
use crate::database::index::Indexer;
use crate::server::app::AppState;
use crate::server::models::lang_for_finding_path;

View file

@ -1,5 +1,3 @@
#![allow(clippy::collapsible_if)]
use crate::commands::scan::Diag;
use crate::database::index::Indexer;
use crate::server::app::{AppState, CachedFindings};

View file

@ -1,5 +1,3 @@
#![allow(clippy::collapsible_if)]
use crate::commands::scan::Diag;
use crate::database::index::{Indexer, ScanRecord};
use crate::evidence::{Confidence, Verdict};

View file

@ -1,4 +1,4 @@
#![allow(clippy::collapsible_if, clippy::redundant_closure)]
#![allow(clippy::redundant_closure)]
use crate::commands::scan::Diag;
use crate::database::index::{Indexer, ScanRecord};
@ -50,11 +50,13 @@ struct StartScanRequest {
verify_backend: Option<String>,
/// Process-backend hardening profile: "standard" | "strict".
harden_profile: Option<String>,
#[allow(dead_code)]
/// Restrict the scan to these language slugs (e.g. `["java", "python"]`).
/// An unknown slug returns 400.
languages: Option<Vec<String>>,
#[allow(dead_code)]
/// Whitelist: scan only files under these paths (relative to the scan root
/// or absolute).
include_paths: Option<Vec<String>>,
#[allow(dead_code)]
/// Exclude these directories/files from the scan.
exclude_paths: Option<Vec<String>>,
}
@ -126,6 +128,34 @@ fn apply_harden_profile(
}
}
/// Restrict the scan to the requested language slugs by excluding the file
/// extensions of every *other* supported language. Returns 400 on an unknown
/// slug. No-op when `languages` is empty.
fn apply_language_filter(
config: &mut crate::utils::config::Config,
languages: &[String],
) -> Result<(), (StatusCode, Json<serde_json::Value>)> {
if languages.is_empty() {
return Ok(());
}
let mut selected: HashSet<&'static str> = HashSet::new();
for lang in languages {
let exts = crate::ast::extensions_for_lang(lang);
if exts.is_empty() {
return Err(bad_request(&format!("unknown language: {lang}")));
}
selected.extend(exts.iter().copied());
}
for (_slug, exts) in crate::ast::SUPPORTED_LANGUAGE_EXTENSIONS {
for ext in *exts {
if !selected.contains(ext) {
config.scanner.excluded_extensions.push((*ext).to_string());
}
}
}
Ok(())
}
async fn start_scan(
State(state): State<AppState>,
body: Option<Json<StartScanRequest>>,
@ -172,6 +202,23 @@ async fn start_scan(
apply_harden_profile(&mut config, profile)?;
}
if let Some(ref include) = req.include_paths {
config
.scanner
.included_paths
.extend(include.iter().cloned());
}
if let Some(ref exclude) = req.exclude_paths {
for p in exclude {
// A path may name a directory subtree or a single file; cover both.
config.scanner.excluded_directories.push(p.clone());
config.scanner.excluded_files.push(p.clone());
}
}
if let Some(ref langs) = req.languages {
apply_language_filter(&mut config, langs)?;
}
#[cfg(not(feature = "dynamic"))]
if config.scanner.verify || config.scanner.verify_all_confidence {
return Err(bad_request(

View file

@ -1,5 +1,3 @@
#![allow(clippy::collapsible_if)]
use std::collections::HashMap;
use super::ir::*;

View file

@ -20,7 +20,7 @@
//! - Unknown/unproven indices fall back to Elements (conservative)
//! - Analysis runs as a pre-pass in optimize_ssa(), like type_facts
#![allow(clippy::collapsible_if, clippy::unnecessary_map_or)]
#![allow(clippy::unnecessary_map_or)]
use crate::cfg::Cfg;
use crate::labels::{Cap, bare_method_name};
@ -119,14 +119,45 @@ pub const MAX_TRACKED_INDICES: usize = 8;
/// provably a non-negative integer constant (via the function's own const
/// propagation pass).
///
/// Ordering: `Elements < Index(0) < Index(1) < …` so that sorted merge-join
/// in `HeapState` groups all slots for the same `HeapObjectId` together.
/// Ordering: `Elements < Index(0) < Index(1) < … < Key(h0) < Key(h1) < …` so
/// that sorted merge-join in `HeapState` groups all slots for the same
/// `HeapObjectId` together.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum HeapSlot {
/// Coarse union of all elements (push/pop, dynamic index, overflow).
Elements,
/// Constant-index slot, proven by the current function's const propagation.
Index(u64),
/// Constant **string-key** slot, proven by const propagation (`map.put("k",
/// v)` / `map.get("k")` with a literal `"k"`). The `u64` is a stable hash
/// of the key string ([`hash_const_key`]). Distinct from `Index(n)` so an
/// integer index and a string key that happen to share a numeric value
/// never alias. A hash collision between two distinct string keys merely
/// reverts to the pre-existing coarse merge for those two keys (sound, no
/// new false negative).
Key(u64),
}
/// Stable FNV-1a hash of a constant string key. Deterministic across runs
/// (no `RandomState`), so a `put("k", …)` and a later `get("k")` resolve to
/// the same [`HeapSlot::Key`] within and across analysis passes.
pub fn hash_const_key(s: &str) -> u64 {
let mut h: u64 = 0xcbf29ce484222325;
for b in s.as_bytes() {
h ^= *b as u64;
h = h.wrapping_mul(0x100000001b3);
}
h
}
impl HeapSlot {
/// Whether this is a precise per-key/per-index slot (as opposed to the
/// coarse `Elements` slot). Keyed slots share the `MAX_TRACKED_INDICES`
/// budget and the overflow-collapse-to-`Elements` policy.
#[inline]
fn is_keyed(self) -> bool {
matches!(self, HeapSlot::Index(_) | HeapSlot::Key(_))
}
}
// ── HeapObjectId ─────────────────────────────────────────────────────────
@ -332,19 +363,26 @@ impl HeapState {
return;
}
// Check index overflow before inserting a new Index slot.
if let HeapSlot::Index(_) = slot {
// Keyed-slot overflow: when a container already tracks the maximum
// number of distinct keyed (`Index`/`Key`) slots, a *new* key is
// folded into the coarse `Elements` slot instead of creating another
// keyed cell. Existing keyed cells are **kept** — they are never
// removed. This keeps the lattice monotone: the old collapse-to-
// Elements behaviour *removed* keyed cells, so a `join` that
// re-introduced distinct keys followed by a `store` that re-collapsed
// them made the per-block state oscillate forever and the taint
// worklist never converged (it bailed at the 100k-iteration safety
// cap, silently dropping that function's findings). Keyed slots only
// ever arise from bounded sources (integer indices `0..MAX_TRACKED_
// INDICES` and the finite set of constant string keys in the source;
// dynamic keys already resolve to `Elements`), so refusing to grow
// past the cap bounds the state without any removal.
if slot.is_keyed() {
let key = (id, slot);
let already_present = self.entries.binary_search_by_key(&key, |(k, _)| *k).is_ok();
if !already_present {
let index_count = self.count_indices_for(id);
if index_count >= MAX_TRACKED_INDICES {
// Collapse: merge all Index(*) entries into Elements,
// then store the new taint into Elements too.
self.collapse_indices_to_elements(id);
self.store_raw(id, HeapSlot::Elements, caps, origins);
return;
}
if !already_present && self.count_indices_for(id) >= MAX_TRACKED_INDICES {
self.store_raw(id, HeapSlot::Elements, caps, origins);
return;
}
}
@ -385,14 +423,20 @@ impl HeapState {
/// Load taint from a specific (object, slot) pair.
///
/// - `Index(n)`: returns union of `(id, Index(n))` `(id, Elements)`.
/// - `Elements`: returns union of `(id, Elements)` all `(id, Index(*))`.
/// - `Key(h)`: returns union of `(id, Key(h))` `(id, Elements)` — a
/// constant-key read sees only its own key's taint plus any taint
/// written under a dynamic/unknown key (which lands in `Elements`); it
/// does NOT see other constant keys' cells.
/// - `Elements`: returns union of `(id, Elements)` all keyed slots
/// (`Index(*)` and `Key(*)`) — a dynamic/unknown-key read conservatively
/// sees every recorded keyed write.
pub fn load(&self, id: HeapObjectId, slot: HeapSlot) -> Option<HeapTaint> {
match slot {
HeapSlot::Index(n) => {
// Union specific index with Elements.
let idx_taint = self.load_raw(id, HeapSlot::Index(n));
HeapSlot::Index(_) | HeapSlot::Key(_) => {
// Union the specific keyed slot with Elements.
let slot_taint = self.load_raw(id, slot);
let elem_taint = self.load_raw(id, HeapSlot::Elements);
match (idx_taint, elem_taint) {
match (slot_taint, elem_taint) {
(Some(a), Some(b)) => Some(a.union(b)),
(Some(a), None) => Some(a.clone()),
(None, Some(b)) => Some(b.clone()),
@ -496,35 +540,13 @@ impl HeapState {
true
}
/// Count distinct `Index(*)` slots for a given object.
/// Count distinct keyed (`Index(*)` / `Key(*)`) slots for a given object.
fn count_indices_for(&self, id: HeapObjectId) -> usize {
self.entries
.iter()
.filter(|((eid, slot), _)| *eid == id && matches!(slot, HeapSlot::Index(_)))
.filter(|((eid, slot), _)| *eid == id && slot.is_keyed())
.count()
}
/// Collapse all `Index(*)` entries for `id` into `Elements`.
fn collapse_indices_to_elements(&mut self, id: HeapObjectId) {
// Collect taint from all Index entries for this object.
let mut merged_caps = Cap::empty();
let mut merged_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
self.entries.retain(|((eid, slot), taint)| {
if *eid == id && matches!(slot, HeapSlot::Index(_)) {
merged_caps |= taint.caps;
for orig in &taint.origins {
crate::taint::ssa_transfer::push_origin_bounded(&mut merged_origins, *orig);
}
false // remove this entry
} else {
true // keep
}
});
// Merge into Elements.
if !merged_caps.is_empty() {
self.store_raw(id, HeapSlot::Elements, merged_caps, &merged_origins);
}
}
}
// ── PointsToResult ───────────────────────────────────────────────────────
@ -1242,7 +1264,7 @@ mod tests {
}
#[test]
fn heap_max_tracked_indices_collapse() {
fn heap_max_tracked_indices_overflow_to_elements() {
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
@ -1255,20 +1277,123 @@ mod tests {
&[origin(i as u32)],
);
}
assert_eq!(h.count_indices_for(id), MAX_TRACKED_INDICES);
// One more should trigger collapse into Elements
// One more (a NEW key past the cap) folds into Elements, but the
// existing keyed cells are KEPT — the lattice must be monotone (no
// removal), or the taint worklist oscillates and never converges.
h.store(
id,
HeapSlot::Index(MAX_TRACKED_INDICES as u64),
Cap::SQL_QUERY,
&[origin(99)],
);
// Existing keyed cells preserved (not collapsed away).
assert_eq!(h.count_indices_for(id), MAX_TRACKED_INDICES);
// All Index entries should be collapsed into Elements.
// There should be no Index entries left.
assert_eq!(h.count_indices_for(id), 0);
// The overflowed key's taint is now reachable via Elements.
let t = h.load(id, HeapSlot::Elements).unwrap();
assert!(t.caps.contains(Cap::HTML_ESCAPE)); // over kept Index slots
assert!(t.caps.contains(Cap::SQL_QUERY)); // the overflowed key
// An existing key still reads its own cell ( Elements).
let t0 = h.load(id, HeapSlot::Index(0)).unwrap();
assert!(t0.caps.contains(Cap::HTML_ESCAPE));
}
// Elements load should see all taint
// ── HeapSlot::Key (string-key) tests ────────────────────────────
#[test]
fn hash_const_key_is_deterministic_and_distinct() {
// Same key → same hash (so put("k") and get("k") resolve identically).
assert_eq!(hash_const_key("keyB-85059"), hash_const_key("keyB-85059"));
// Distinct keys → distinct hashes (the common case).
assert_ne!(hash_const_key("keyA-85059"), hash_const_key("keyB-85059"));
}
#[test]
fn heap_key_store_load_isolation() {
// Store under "keyB", load under "keyA" → no taint (the BenchmarkTest00171
// shape: map.put("keyB", param); map.get("keyA")).
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
let kb = HeapSlot::Key(hash_const_key("keyB-85059"));
let ka = HeapSlot::Key(hash_const_key("keyA-85059"));
h.store(id, kb, Cap::SHELL_ESCAPE, &[origin(0)]);
// Same key sees the taint.
let t = h.load(id, kb).unwrap();
assert_eq!(t.caps, Cap::SHELL_ESCAPE);
// A different constant key does NOT (no Elements, no other Key cell).
assert!(h.load(id, ka).is_none());
}
#[test]
fn heap_key_load_unions_with_elements() {
// A dynamic/unknown-key write lands in Elements; a constant-key read
// still conservatively sees it.
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
h.store(id, HeapSlot::Elements, Cap::SQL_QUERY, &[origin(0)]);
let t = h.load(id, HeapSlot::Key(hash_const_key("k"))).unwrap();
assert_eq!(t.caps, Cap::SQL_QUERY);
}
#[test]
fn heap_elements_load_unions_all_keys() {
// A dynamic/unknown-key read (Elements slot) sees every constant-key write.
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
h.store(
id,
HeapSlot::Key(hash_const_key("a")),
Cap::HTML_ESCAPE,
&[origin(0)],
);
h.store(
id,
HeapSlot::Key(hash_const_key("b")),
Cap::SQL_QUERY,
&[origin(1)],
);
let t = h.load(id, HeapSlot::Elements).unwrap();
assert_eq!(t.caps, Cap::HTML_ESCAPE | Cap::SQL_QUERY);
}
#[test]
fn heap_key_and_index_are_disjoint() {
// A string-key slot and an integer-index slot never alias, even if the
// index value coincides with a key hash bucket.
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
h.store(id, HeapSlot::Index(0), Cap::FILE_IO, &[origin(0)]);
// A keyed read sees only its own cell (+ Elements, which is empty here),
// never the Index(0) cell.
assert!(h.load(id, HeapSlot::Key(hash_const_key("0"))).is_none());
}
#[test]
fn heap_max_tracked_keys_overflow_to_elements() {
// A NEW string key past the cap folds into Elements (over-approx,
// sound) while existing keyed cells are kept (monotone — no removal).
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
for i in 0..MAX_TRACKED_INDICES {
h.store(
id,
HeapSlot::Key(hash_const_key(&format!("key{i}"))),
Cap::HTML_ESCAPE,
&[origin(i as u32)],
);
}
assert_eq!(h.count_indices_for(id), MAX_TRACKED_INDICES);
h.store(
id,
HeapSlot::Key(hash_const_key("overflow")),
Cap::SQL_QUERY,
&[origin(99)],
);
// Existing keyed cells preserved.
assert_eq!(h.count_indices_for(id), MAX_TRACKED_INDICES);
let t = h.load(id, HeapSlot::Elements).unwrap();
assert!(t.caps.contains(Cap::HTML_ESCAPE));
assert!(t.caps.contains(Cap::SQL_QUERY));

View file

@ -1,5 +1,10 @@
//! AST → CFG → SSA lowering (Cytron et al.).
//!
//! Builds basic blocks, computes dominators and dominance frontiers via
//! petgraph, inserts phi nodes, and renames variables over the dominator-tree
//! preorder to produce an [`SsaBody`](super::ir::SsaBody).
#![allow(
clippy::collapsible_if,
clippy::if_same_then_else,
clippy::needless_range_loop,
clippy::only_used_in_recursion,

View file

@ -1,4 +1,4 @@
#![allow(clippy::collapsible_if, clippy::redundant_closure)]
#![allow(clippy::redundant_closure)]
//! Static hash-map lookup abstract analysis.
//!

View file

@ -1,5 +1,11 @@
#![allow(clippy::if_same_then_else)]
//! Lightweight type inference for SSA values.
//!
//! Derives [`TypeKind`] facts (ints, URLs, HTTP clients/responses, DB
//! connections, file handles) from constructors, factories, and literals, used
//! to suppress type-safe sinks and to resolve receiver-qualified callees.
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap};

View file

@ -1,4 +1,4 @@
#![allow(clippy::collapsible_if, clippy::unnecessary_map_or)]
#![allow(clippy::unnecessary_map_or)]
use super::domain::{AuthLevel, ProductState, ResourceLifecycle};
use super::engine::DataflowResult;

View file

@ -1,4 +1,8 @@
#![allow(clippy::collapsible_if)]
//! `DefaultTransfer`: resource-lifecycle and auth-state transfer function for
//! the generic monotone dataflow engine (separate from taint).
//!
//! Tracks `ResourceLifecycle`, `AuthLevel`, and chain-proxy/product state to
//! detect leaks, use-after-close, double-close, and auth-state issues.
use super::domain::{AuthLevel, ChainProxyState, ProductState, ResourceLifecycle};
use super::engine::Transfer;

View file

@ -11,7 +11,7 @@
//! ([`AUTH_ANNOTATIONS`]).
use crate::entry_points::HttpMethod;
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, unquote};
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
@ -218,12 +218,6 @@ fn method_name(method: Node, bytes: &[u8]) -> Option<String> {
.map(str::to_string)
}
#[allow(dead_code)]
fn read_string_literal(node: Node, bytes: &[u8]) -> Option<String> {
let raw = node.utf8_text(bytes).ok()?;
Some(unquote(raw))
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -14,7 +14,7 @@
//! termination. Verdict aggregation is sound: `Infeasible` is only returned
//! when the entire relevant search space was explored without budget exhaustion.
#![allow(clippy::collapsible_if, clippy::unnecessary_map_or)]
#![allow(clippy::unnecessary_map_or)]
use std::collections::{HashMap, HashSet, VecDeque};

View file

@ -6,7 +6,7 @@
//! distinguish different objects.
//!
//! Design:
#![allow(clippy::collapsible_if, clippy::new_without_default)]
#![allow(clippy::new_without_default)]
//! - `FieldSlot::Named` for object properties (per-field precision).
//! - `FieldSlot::Elements` for container contents (flow-insensitive union ,
//! deliberately lower precision than named fields).

View file

@ -17,7 +17,6 @@
//! - Intra-callee forking with merge policies
#![allow(
clippy::collapsible_if,
clippy::let_and_return,
clippy::new_without_default,
clippy::question_mark,

View file

@ -3,7 +3,6 @@
//! Detects back edges, computes natural loop bodies, identifies induction
//! variables, and determines loop exit successors. All analysis is computed
//! once per `explore_finding()` invocation and shared across all paths.
#![allow(clippy::collapsible_if)]
use std::collections::{HashMap, HashSet};

View file

@ -9,11 +9,7 @@
//! Symbolic expression trees (`SymbolicValue`) preserve computation structure
//! through the path walk, enabling richer witness strings.
#![allow(
clippy::collapsible_if,
clippy::manual_ignore_case_cmp,
clippy::needless_borrow
)]
#![allow(clippy::manual_ignore_case_cmp, clippy::needless_borrow)]
pub mod executor;
pub mod heap;

View file

@ -28,7 +28,6 @@
//! `ConcreteStr` by the symbolic engine, it flows through as a
//! `ConstValue::Str` operand and is handled.
#![allow(
clippy::collapsible_if,
clippy::needless_borrows_for_generic_args,
clippy::new_without_default,
dead_code

View file

@ -6,11 +6,7 @@
//! Cross-file symbolic summary modeling: when a callee has an
//! `SsaFuncSummary` available via `GlobalSummaries`, the Call instruction's
//! return value is modeled symbolically instead of being treated as opaque.
#![allow(
clippy::collapsible_if,
clippy::if_same_then_else,
clippy::too_many_arguments
)]
#![allow(clippy::if_same_then_else, clippy::too_many_arguments)]
use crate::cfg::Cfg;
use crate::ssa::const_prop::ConstLattice;

View file

@ -1,5 +1,4 @@
//! Symbolic value expression trees.
#![allow(clippy::collapsible_if)]
use std::fmt;

View file

@ -73,7 +73,7 @@
//! - [`path_state`]: predicate classification for branch-sensitive propagation
//! - [`backwards`]: demand-driven backwards walk from sinks (off by default)
#![allow(clippy::collapsible_if, clippy::too_many_arguments)]
#![allow(clippy::too_many_arguments)]
pub mod backwards;
pub mod domain;

View file

@ -1,4 +1,8 @@
#![allow(clippy::collapsible_if)]
//! Predicate tracking for path-sensitive taint.
//!
//! Classifies if-conditions (`PredicateKind` / `classify_condition`) and narrows
//! validation to specific targets, so branch outcomes can validate or contradict
//! tainted values during the SSA taint solve.
// ─── PredicateKind ───────────────────────────────────────────────────────────

View file

@ -1,5 +1,13 @@
//! Block-level SSA taint worklist — the sole taint engine for all 10 languages.
//!
//! Drives a forward dataflow fixpoint over [`crate::ssa::SsaBody`] blocks
//! (`run_ssa_taint` / `run_ssa_taint_full`), propagating `SsaTaintState` through
//! `transfer_inst` with branch-aware narrowing, k=1 context-sensitive inlining
//! (`inline`), gated-sink detection (`events`), and interprocedural summary
//! extraction (`summary_extract`). Submodules: `events`, `inline`, `state`,
//! `summary_extract`.
#![allow(
clippy::collapsible_if,
clippy::if_same_then_else,
clippy::manual_flatten,
clippy::needless_range_loop,
@ -8317,34 +8325,118 @@ fn try_curl_url_propagation(
/// sets `const_values: Some(&callee_body.opt.const_values)` on the child
/// transfer, so callee-local constants are resolved.
/// - Unknown / non-integer / out-of-bounds: falls back to `HeapSlot::Elements`.
fn resolve_container_index(index_val: SsaValue, transfer: &SsaTaintTransfer) -> HeapSlot {
use crate::ssa::heap::MAX_TRACKED_INDICES;
if let Some(cv) = transfer.const_values {
if let Some(crate::ssa::const_prop::ConstLattice::Int(n)) = cv.get(&index_val) {
if *n >= 0 && (*n as u64) < MAX_TRACKED_INDICES as u64 {
return HeapSlot::Index(*n as u64);
}
/// Map a proven constant index/key to its precise `HeapSlot`, or `None`
/// (caller falls back to `HeapSlot::Elements`).
///
/// * Non-negative integer within `MAX_TRACKED_INDICES` → `Index(n)`.
/// * Any other string constant → `Key(hash)` — a keyed read sees only its own
/// key's cell (plus dynamic-key taint in `Elements`); a read of a *different*
/// constant key cannot inherit it. Unknown/dynamic keys keep the coarse
/// `Elements` merge, so no precision is lost and no false negative arises.
///
/// Both the SSA-value path (`resolve_container_index`) and the
/// literal-argument path (`resolve_op_slot`) funnel through here so a
/// `put("k", …)` written with a literal and a `get(kVar)` whose `kVar`
/// const-props to `"k"` resolve to the *same* slot.
fn slot_from_const(c: &crate::ssa::const_prop::ConstLattice) -> Option<HeapSlot> {
use crate::ssa::const_prop::ConstLattice;
use crate::ssa::heap::{MAX_TRACKED_INDICES, hash_const_key};
match c {
ConstLattice::Int(n) if *n >= 0 && (*n as u64) < MAX_TRACKED_INDICES as u64 => {
Some(HeapSlot::Index(*n as u64))
}
ConstLattice::Str(s) => Some(HeapSlot::Key(hash_const_key(s))),
_ => None,
}
HeapSlot::Elements
}
/// Look up the SSA op that defines value `v`, searching `v`'s defining block.
pub(super) fn op_for_value(ssa: &SsaBody, v: SsaValue) -> Option<&SsaOp> {
let vd = ssa.value_defs.get(v.0 as usize)?;
let blk = ssa.blocks.iter().find(|b| b.id == vd.block)?;
blk.phis
.iter()
.chain(blk.body.iter())
.find(|i| i.value == v)
.map(|i| &i.op)
}
/// Resolve a container index/key SSA value to a `HeapSlot` by tracing its
/// definition to an underlying constant. Handles the case where a literal
/// key (`map.get("k")`) surfaces as a *copy* of a `Const` (e.g.
/// `v = Assign([const])` from a cast/temporary) that the optimised
/// `const_values` map records as `Varying` rather than the literal. Bounded
/// depth; follows single-use `Assign` copies only (no phi merge, to stay
/// precise — a key joined across paths is genuinely dynamic).
fn slot_from_ssa_value(v: SsaValue, ssa: &SsaBody, depth: u32) -> Option<HeapSlot> {
if depth > 8 {
return None;
}
match op_for_value(ssa, v)? {
SsaOp::Const(Some(text)) => {
slot_from_const(&crate::ssa::const_prop::ConstLattice::parse(text))
}
SsaOp::Assign(uses) if uses.len() == 1 => slot_from_ssa_value(uses[0], ssa, depth + 1),
_ => None,
}
}
fn resolve_container_index(index_val: SsaValue, transfer: &SsaTaintTransfer) -> HeapSlot {
transfer
.const_values
.and_then(|cv| cv.get(&index_val))
.and_then(slot_from_const)
.unwrap_or(HeapSlot::Elements)
}
/// Resolve the `HeapSlot` for a container operation given its `index_arg`.
///
/// When `index_arg` is `Some(idx_pos)`, applies `arg_offset` and resolves
/// the SSA value from `args`. Otherwise returns `HeapSlot::Elements`.
/// the index/key. Two channels, checked in order:
/// 1. the SSA value at that argument position (a *variable* index/key that
/// const-props to an int/string);
/// 2. the parallel `arg_string_literals` slot (a *literal* index/key, e.g.
/// `map.get("keyB")`, which carries no SSA value because it is not a
/// variable — the dominant OWASP shape).
/// Otherwise returns `HeapSlot::Elements`.
fn resolve_op_slot(
index_arg: Option<usize>,
arg_offset: usize,
args: &[SmallVec<[SsaValue; 2]>],
arg_string_literals: &[Option<String>],
ssa: &SsaBody,
transfer: &SsaTaintTransfer,
) -> HeapSlot {
if let Some(idx_pos) = index_arg {
let effective = idx_pos + arg_offset;
if let Some(arg_vals) = args.get(effective) {
if let Some(&v) = arg_vals.first() {
return resolve_container_index(v, transfer);
// 1. Variable index/key channel: an SSA value that const-props to an
// int/string. Only claim resolution when it yields a *precise*
// slot — a literal key/index often surfaces here as an SSA value
// that const-prop could not pin down (so `resolve_container_index`
// returns `Elements`); in that case fall through to the next
// channel rather than collapsing to the coarse merge.
if let Some(&v) = args.get(effective).and_then(|g| g.first()) {
let slot = resolve_container_index(v, transfer);
if slot != HeapSlot::Elements {
return slot;
}
// 1b. SSA-trace channel: the value is a literal that surfaced as a
// copy of a `Const` (e.g. `(String) map.get("k")` lowers the
// key to `v = Assign([const])`, which optimised `const_values`
// records as `Varying`). Follow the def to the underlying
// constant so the keyed slot is recovered.
if let Some(slot) = slot_from_ssa_value(v, ssa, 0) {
return slot;
}
}
// 2. Literal index/key channel: the constant (string/int) literal
// captured at CFG build, parsed through the same `slot_from_const`
// mapping the variable path uses. This is the dominant OWASP
// shape (`map.get("keyB")`), where the key is a bare literal.
if let Some(Some(lit)) = arg_string_literals.get(effective) {
let parsed = crate::ssa::const_prop::ConstLattice::parse(lit);
if let Some(slot) = slot_from_const(&parsed) {
return slot;
}
}
}
@ -8363,7 +8455,7 @@ fn resolve_op_slot(
/// default propagation.
fn try_container_propagation(
inst: &SsaInst,
_info: &NodeInfo,
info: &NodeInfo,
args: &[SmallVec<[SsaValue; 2]>],
receiver: &Option<SsaValue>,
state: &mut SsaTaintState,
@ -8430,7 +8522,14 @@ fn try_container_propagation(
};
// Resolve index argument to HeapSlot (Index(n) or Elements).
let slot = resolve_op_slot(index_arg, arg_offset, args, transfer);
let slot = resolve_op_slot(
index_arg,
arg_offset,
args,
&info.call.arg_string_literals,
ssa,
transfer,
);
// Collect taint from value argument(s)
let mut val_caps = Cap::empty();
@ -8511,7 +8610,14 @@ fn try_container_propagation(
} else {
0
};
let slot = resolve_op_slot(index_arg, arg_offset, args, transfer);
let slot = resolve_op_slot(
index_arg,
arg_offset,
args,
&info.call.arg_string_literals,
ssa,
transfer,
);
// When points-to info available, load from heap objects
if let Some(pts) = lookup_pts(transfer, container_val) {

View file

@ -13,8 +13,8 @@
use super::events::extract_sink_arg_positions;
use super::state::{BindingKey, SsaTaintState};
use super::{
SsaTaintEvent, SsaTaintTransfer, detect_variant_inner_fact, run_ssa_taint_full, transfer_block,
transfer_inst,
SsaTaintEvent, SsaTaintTransfer, detect_variant_inner_fact, op_for_value, run_ssa_taint_full,
transfer_block, transfer_inst,
};
use crate::cfg::{BodyId, Cfg, FuncSummaries};
@ -32,6 +32,47 @@ use std::collections::{HashMap, HashSet};
/// Functions with more params fall back to legacy `FuncSummary`.
const MAX_PROBE_PARAMS: usize = 8;
/// Whether return value `v` provably evaluates to a compile-time constant —
/// its def is a `Const`, or an `Assign`/`Phi` whose every operand traces
/// (transitively) to a constant. A value that hits a parameter, a call, or any
/// other op is *not* provably constant (return `false`, conservative).
///
/// Used by `run_probe` to recognise a clean, param-free return so the
/// param-taint fallback does not attribute the seeded parameter's `Cap::all`
/// to a return that cannot reach it (the dead-branch-folded
/// `return v; v = Assign([phi([const])])` shape). Bounded depth.
fn rv_traces_to_constant(
ssa: &SsaBody,
v: SsaValue,
all_param_values: &HashSet<SsaValue>,
depth: u32,
budget: &mut u32,
) -> bool {
// Node budget caps total work so a wide phi/assign DAG (shared
// sub-expressions are re-visited without memoisation) cannot blow up;
// exhausting it returns the conservative `false`.
if depth > 16 || *budget == 0 || all_param_values.contains(&v) {
return false;
}
*budget -= 1;
match op_for_value(ssa, v) {
Some(SsaOp::Const(_)) => true,
Some(SsaOp::Assign(uses)) => {
!uses.is_empty()
&& uses
.iter()
.all(|&u| rv_traces_to_constant(ssa, u, all_param_values, depth + 1, budget))
}
Some(SsaOp::Phi(operands)) => {
!operands.is_empty()
&& operands
.iter()
.all(|(_, u)| rv_traces_to_constant(ssa, *u, all_param_values, depth + 1, budget))
}
_ => false,
}
}
/// Extract a precise per-parameter `SsaFuncSummary` from an already-lowered SSA body.
///
/// For each parameter (up to `MAX_PROBE_PARAMS`), runs a taint probe by seeding
@ -325,13 +366,17 @@ pub fn extract_ssa_func_summary_full(
// both when rv is tainted (derived) and when rv is untainted
// (the push result may have no taint but the param does).
// Skip when rv IS a param (already handled above) or when rv is
// a Const (provably untainted constant return).
let rv_is_const = ssa.blocks[bid]
.body
.iter()
.chain(ssa.blocks[bid].phis.iter())
.any(|inst| inst.value == rv && matches!(inst.op, SsaOp::Const(_)));
if !all_param_values.contains(&rv) && !rv_is_const {
// provably a constant (a return that traces — through Assign
// copies / phis — to only `Const` values cannot carry the
// seeded param's taint). The plain-`Const` check missed the
// dead-branch-folded shape `return v` where `v = Assign([phi([
// const])])`: the param is fully disconnected from the return,
// but the fallback would still attribute the seeded param's
// `Cap::all` to it, manufacturing a spurious `param→return`
// (Identity) edge that poisons every cross-file caller.
if !all_param_values.contains(&rv)
&& !rv_traces_to_constant(ssa, rv, &all_param_values, 0, &mut 256)
{
for (val, taint) in &exit.values {
if all_param_values.contains(val) {
block_param_caps |= taint.caps;

View file

@ -202,6 +202,13 @@ pub struct ScannerConfig {
/// Excluded files
pub excluded_files: Vec<String>,
/// Restrict the scan to these paths (relative to the scan root or absolute)
/// as a whitelist. When non-empty, only files matching one of these paths
/// are scanned; empty (default) scans everything not otherwise excluded.
/// Populated programmatically (e.g. the server `include_paths` request
/// field), not typically set in config files.
pub included_paths: Vec<String>,
/// RESERVED: not yet wired to walker. Whether to respect the global ignore file.
pub read_global_ignore: bool,
@ -335,6 +342,7 @@ impl Default for ScannerConfig {
.map(str::to_owned)
.collect(),
excluded_files: vec![].into_iter().map(str::to_owned).collect(),
included_paths: Vec::new(),
read_global_ignore: false,
read_vcsignore: true,
require_git_to_read_vcsignore: true,

View file

@ -1,5 +1,3 @@
#![allow(clippy::collapsible_if)]
use crate::errors::{NyxError, NyxResult};
use std::fs;
use std::io::Read;

View file

@ -75,6 +75,17 @@ fn build_overrides(root: &Path, cfg: &Config) -> ignore::overrides::Override {
tracing::warn!("invalid excludefile pattern {file}: {e}");
}
}
// Whitelist: when any include path is present, the override engine scans
// only files matching an include glob (intersected with the excludes above).
for inc in &cfg.scanner.included_paths {
let inc = inc.trim_end_matches('/');
if let Err(e) = ob.add(inc) {
tracing::warn!("invalid includepath pattern {inc}: {e}");
}
if let Err(e) = ob.add(&format!("{inc}/**")) {
tracing::warn!("invalid includepath pattern {inc}/**: {e}");
}
}
ob.build().unwrap_or_else(|e| {
tracing::error!("failed to build ignore overrides: {e}");