mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
feat(lint): centralize clippy::collapsible_if allowance in Cargo.toml and remove redundant file-level declarations
This commit is contained in:
parent
1f5777ff11
commit
1ebeb233c4
53 changed files with 851 additions and 212 deletions
|
|
@ -3,7 +3,6 @@
|
|||
//! Tracks inclusive `[lo, hi]` integer bounds. `None` = unbounded (−∞ or +∞).
|
||||
//! Both `None` = Top (any integer). Provides arithmetic transfer functions
|
||||
//! (add, sub, mul, div, mod) with overflow-safe semantics.
|
||||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use crate::state::lattice::{AbstractDomain, Lattice};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
|
|||
29
src/ast.rs
29
src/ast.rs
|
|
@ -781,6 +781,35 @@ fn lang_for_path(path: &Path) -> Option<(Language, &'static str)> {
|
|||
}
|
||||
}
|
||||
|
||||
/// All language slugs the scanner can parse, paired with the file extensions
|
||||
/// that map to them. Single source of truth shared with [`lang_for_path`]; the
|
||||
/// `supported_extensions_resolve_to_their_slug` test asserts they stay in sync.
|
||||
pub(crate) const SUPPORTED_LANGUAGE_EXTENSIONS: &[(&str, &[&str])] = &[
|
||||
("rust", &["rs"]),
|
||||
("c", &["c"]),
|
||||
(
|
||||
"cpp",
|
||||
&["cpp", "cc", "cxx", "c++", "hpp", "hxx", "hh", "h++"],
|
||||
),
|
||||
("java", &["java"]),
|
||||
("go", &["go"]),
|
||||
("php", &["php"]),
|
||||
("python", &["py"]),
|
||||
("typescript", &["ts", "tsx"]),
|
||||
("javascript", &["js", "jsx"]),
|
||||
("ruby", &["rb"]),
|
||||
];
|
||||
|
||||
/// File extensions associated with a language slug (case-insensitive). Returns
|
||||
/// an empty slice if `slug` is not a supported language.
|
||||
pub fn extensions_for_lang(slug: &str) -> &'static [&'static str] {
|
||||
SUPPORTED_LANGUAGE_EXTENSIONS
|
||||
.iter()
|
||||
.find(|(s, _)| s.eq_ignore_ascii_case(slug))
|
||||
.map(|(_, exts)| *exts)
|
||||
.unwrap_or(&[])
|
||||
}
|
||||
|
||||
/// Fast binary-file guard: skip if >1% NUL bytes.
|
||||
fn is_binary(bytes: &[u8]) -> bool {
|
||||
bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1
|
||||
|
|
|
|||
|
|
@ -1,3 +1,8 @@
|
|||
//! Configuration for the Rust auth-analysis pass.
|
||||
//!
|
||||
//! Holds [`AuthAnalysisRules`] (admin path/guard patterns, sink classes, and
|
||||
//! name canonicalization) that drive `rs.auth.missing_ownership_check`.
|
||||
|
||||
use crate::auth_analysis::model::SinkClass;
|
||||
use crate::labels::bare_method_name;
|
||||
use crate::utils::config::Config;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,9 @@
|
|||
//! Shared AST-extraction helpers for the auth-analysis framework adapters.
|
||||
//!
|
||||
//! Cross-framework primitives — analysis-unit collection, call-site and
|
||||
//! `ValueRef` extraction, and tree-sitter node/string/span helpers — used by the
|
||||
//! per-framework extractors in this directory (`express`, `axum`, `django`, …).
|
||||
|
||||
use crate::auth_analysis::config::{AuthAnalysisRules, canonical_name, matches_name, strip_quotes};
|
||||
use crate::auth_analysis::model::{
|
||||
AnalysisUnit, AnalysisUnitKind, AuthCheck, AuthCheckKind, AuthorizationModel, CallSite,
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use super::helpers::first_member_label;
|
||||
use super::{
|
||||
AstMeta, Cfg, EdgeKind, MAX_COND_VARS, MAX_CONDITION_TEXT_LEN, NodeInfo, StmtKind,
|
||||
collect_idents, connect_all, detect_eq_with_const, detect_negation, has_call_descendant,
|
||||
member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
|
||||
build_cond_arith, collect_idents, connect_all, detect_eq_with_const, detect_negation,
|
||||
has_call_descendant, member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
|
||||
};
|
||||
use crate::labels::{DataLabel, LangAnalysisRules, classify};
|
||||
use crate::utils::snippet::truncate_at_char_boundary;
|
||||
|
|
@ -223,6 +223,13 @@ pub(super) fn build_ternary_diamond<'a>(
|
|||
// taint engine's equality-narrowing fires for `x === 'literal' ? …`.
|
||||
let cond_if = push_condition_node(g, cond_ast, lang, code, enclosing_func);
|
||||
g[cond_if].is_eq_with_const = detect_eq_with_const(cond_ast, lang);
|
||||
// Capture the pure int-arith + comparison tree so `fold_constant_branches`
|
||||
// can prune a dead constant-condition arm of the ternary (e.g. Java
|
||||
// `(7*18)+num > 200 ? "const" : param` with `num` a known int constant),
|
||||
// exactly as it does for the if-form. `build_cond_arith` is conservative
|
||||
// (returns None for any call/field/string/`&&`/`||`/`!` shape) so this is
|
||||
// sound for every language the diamond fires on.
|
||||
g[cond_if].cond_arith = build_cond_arith(cond_ast, lang, code, 0);
|
||||
connect_all(g, preds, cond_if, pred_edge);
|
||||
|
||||
// 2. Branches. Each branch produces its own exit frontier (≥ 1 node) ,
|
||||
|
|
|
|||
|
|
@ -1,3 +1,9 @@
|
|||
//! Literal and constant-expression extraction from tree-sitter AST nodes.
|
||||
//!
|
||||
//! Parses integer and string literals, folds constant binary ops, and derives
|
||||
//! template/string prefixes and quote stripping for CFG construction and
|
||||
//! const propagation.
|
||||
|
||||
use super::conditions::unwrap_parens;
|
||||
use super::helpers::{collect_array_pattern_bindings_indexed, collect_rhs_array_literal_elements};
|
||||
use super::{
|
||||
|
|
|
|||
|
|
@ -12,11 +12,7 @@
|
|||
//! `export_summaries` converts in-graph [`LocalFuncSummary`] values to
|
||||
//! the serializable [`crate::summary::FuncSummary`] form.
|
||||
|
||||
#![allow(
|
||||
clippy::collapsible_if,
|
||||
clippy::let_and_return,
|
||||
clippy::unnecessary_map_or
|
||||
)]
|
||||
#![allow(clippy::let_and_return, clippy::unnecessary_map_or)]
|
||||
|
||||
use petgraph::algo::dominators::{Dominators, simple_fast};
|
||||
use petgraph::prelude::*;
|
||||
|
|
@ -1481,7 +1477,12 @@ fn binary_op_token(node: Node) -> Option<BinOp> {
|
|||
/// boolean `&&`/`||`, unary `!`) returns `None`, which disables folding for
|
||||
/// that branch (never a wrong fold). Depth-bounded to guard against
|
||||
/// pathological nesting.
|
||||
fn build_cond_arith(node: Node, lang: &str, code: &[u8], depth: u32) -> Option<CondArith> {
|
||||
pub(super) fn build_cond_arith(
|
||||
node: Node,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
depth: u32,
|
||||
) -> Option<CondArith> {
|
||||
if depth > 64 {
|
||||
return None;
|
||||
}
|
||||
|
|
@ -6283,10 +6284,14 @@ pub(super) fn build_sub<'a>(
|
|||
);
|
||||
}
|
||||
|
||||
// JS/TS ternary-RHS split: `var x = c ? a : b;` and
|
||||
// JS/TS/Java ternary-RHS split: `var x = c ? a : b;` and
|
||||
// `obj.prop = c ? a : b;` lower to a real diamond CFG so the
|
||||
// condition is control-flow (not a data-flow `uses` entry).
|
||||
if matches!(lang, "javascript" | "typescript" | "tsx")
|
||||
// Java uses the same `ternary_expression` AST kind; routing it
|
||||
// through the diamond lets `fold_constant_branches` prune dead
|
||||
// constant-condition arms (`cond ? "const" : param`) the same way
|
||||
// it does for the if-form.
|
||||
if matches!(lang, "javascript" | "typescript" | "tsx" | "java")
|
||||
&& let Some((lhs_ast, ternary_ast)) = find_ternary_rhs_wrapper(ast)
|
||||
{
|
||||
let (lhs_text, lhs_labels) =
|
||||
|
|
@ -6541,8 +6546,8 @@ pub(super) fn build_sub<'a>(
|
|||
|
||||
// Assignment that may contain a call (Python `x = os.getenv(...)`, Ruby `x = gets()`)
|
||||
Kind::Assignment => {
|
||||
// JS/TS ternary-RHS split, same rationale as the CallWrapper branch.
|
||||
if matches!(lang, "javascript" | "typescript" | "tsx")
|
||||
// JS/TS/Java ternary-RHS split, same rationale as the CallWrapper branch.
|
||||
if matches!(lang, "javascript" | "typescript" | "tsx" | "java")
|
||||
&& let (Some(left), Some(right)) = (
|
||||
ast.child_by_field_name("left"),
|
||||
ast.child_by_field_name("right"),
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
#![allow(clippy::collapsible_if)]
|
||||
//! Unguarded-sink detection via CFG dominator analysis.
|
||||
//!
|
||||
//! Flags dangerous sinks that are not dominated by an appropriate guard
|
||||
//! (validation or auth check) on every path from an entry point.
|
||||
|
||||
use super::dominators::{self, dominates};
|
||||
use super::rules;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,12 @@
|
|||
#![allow(clippy::collapsible_if, clippy::type_complexity)]
|
||||
//! Scan-pipeline orchestration: the two-pass + topo-batch driver behind
|
||||
//! `nyx scan`.
|
||||
//!
|
||||
//! Coordinates summary extraction (pass 1), SCC-ordered taint analysis with a
|
||||
//! bounded fixpoint (pass 2, `run_topo_batches`), the indexed parallel scan path
|
||||
//! (`scan_with_index_parallel_observer`), suppression application, and per-file
|
||||
//! panic isolation (`recover_or_propagate`).
|
||||
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
pub(crate) use crate::ast::{
|
||||
analyse_file_fused, extract_all_summaries_from_bytes, run_rules_on_bytes, run_rules_on_file,
|
||||
|
|
|
|||
|
|
@ -15,8 +15,6 @@
|
|||
//! literal operand. Necessary because individual comparisons are NOT
|
||||
//! decomposed into separate SSA operations (condition nodes → `Nop`).
|
||||
|
||||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use crate::cfg::NodeInfo;
|
||||
use crate::ssa::const_prop::ConstLattice;
|
||||
use crate::ssa::ir::{BlockId, SsaBody, SsaValue};
|
||||
|
|
|
|||
|
|
@ -152,6 +152,8 @@ fn bind_mount_ro(src: &Path, dst: &Path) -> io::Result<()> {
|
|||
let cdst = CString::new(dst.as_os_str().as_bytes())
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
|
||||
|
||||
// SAFETY: `csrc`/`cdst` are `CString`s that outlive the call, so the pointers
|
||||
// reference valid NUL-terminated C strings. Return value checked below.
|
||||
let bind = unsafe {
|
||||
mount(
|
||||
csrc.as_ptr(),
|
||||
|
|
@ -165,6 +167,8 @@ fn bind_mount_ro(src: &Path, dst: &Path) -> io::Result<()> {
|
|||
return Err(io::Error::last_os_error());
|
||||
}
|
||||
// Best-effort read-only remount; leave the rw bind if it fails.
|
||||
// SAFETY: `cdst` outlives the call; the other pointers are null, accepted by
|
||||
// `mount(2)` for a remount.
|
||||
unsafe {
|
||||
mount(
|
||||
std::ptr::null(),
|
||||
|
|
|
|||
|
|
@ -724,7 +724,7 @@ fn register_exit_cleanup() {
|
|||
unsafe extern "C" {
|
||||
fn atexit(f: extern "C" fn()) -> i32;
|
||||
}
|
||||
// Safety: atexit(3) is async-signal-safe for registration; the handler
|
||||
// SAFETY: atexit(3) is async-signal-safe for registration; the handler
|
||||
// itself runs on the main thread during normal shutdown, after all Rust
|
||||
// destructors, so std::process::Command is safe to call from it.
|
||||
unsafe { atexit(stop_all_containers) };
|
||||
|
|
@ -1870,6 +1870,7 @@ fn libc_kill(pid: i32, sig: i32) -> i32 {
|
|||
unsafe extern "C" {
|
||||
fn kill(pid: i32, sig: i32) -> i32;
|
||||
}
|
||||
// SAFETY: `kill(2)` takes only scalar args and touches no caller memory.
|
||||
unsafe { kill(pid, sig) }
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@
|
|||
//! record into it, execve(2) drops the write end, and the parent's
|
||||
//! drain thread sees EOF and records the outcome.
|
||||
|
||||
#![warn(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
use crate::dynamic::sandbox::seccomp;
|
||||
use crate::dynamic::sandbox::seccomp::bpf::SockFilter;
|
||||
use crate::dynamic::sandbox::{AblationMask, ProcessHardeningProfile, SandboxOptions};
|
||||
|
|
@ -144,11 +146,14 @@ struct StatusPipe {
|
|||
|
||||
impl StatusPipe {
|
||||
fn new() -> std::io::Result<Self> {
|
||||
// SAFETY: declares the libc `pipe2(2)` ABI; the signature matches <unistd.h>.
|
||||
unsafe extern "C" {
|
||||
fn pipe2(pipefd: *mut i32, flags: i32) -> i32;
|
||||
}
|
||||
const O_CLOEXEC: i32 = 0o2_000_000;
|
||||
let mut fds = [-1_i32; 2];
|
||||
// SAFETY: `fds` is a valid 2-element array the kernel writes into; `pipe2`
|
||||
// reads no caller memory beyond that pointer. Return value checked below.
|
||||
let ret = unsafe { pipe2(fds.as_mut_ptr(), O_CLOEXEC) };
|
||||
if ret != 0 {
|
||||
return Err(std::io::Error::last_os_error());
|
||||
|
|
@ -161,15 +166,20 @@ impl StatusPipe {
|
|||
}
|
||||
|
||||
fn close_fd(fd: RawFd) {
|
||||
// SAFETY: declares the libc `close(2)` ABI; signature matches <unistd.h>.
|
||||
unsafe extern "C" {
|
||||
fn close(fd: i32) -> i32;
|
||||
}
|
||||
// SAFETY: `fd` is an owned raw fd closed exactly once; the return value is
|
||||
// intentionally ignored (best-effort close).
|
||||
unsafe { close(fd) };
|
||||
}
|
||||
|
||||
/// Drain `read_fd` into a `HardeningOutcome`. Wire format is the
|
||||
/// 15-byte fixed-width record produced by [`encode_outcome`].
|
||||
fn drain_outcome(read_fd: RawFd) -> Option<HardeningOutcome> {
|
||||
// SAFETY: `read_fd` is an owned raw fd (the pipe read end) used nowhere else;
|
||||
// `File` takes sole ownership and closes it on drop.
|
||||
let mut file = unsafe { std::fs::File::from_raw_fd(read_fd) };
|
||||
let mut buf = Vec::with_capacity(64);
|
||||
if file.read_to_end(&mut buf).is_err() {
|
||||
|
|
@ -276,6 +286,8 @@ struct Rlimit {
|
|||
max: u64,
|
||||
}
|
||||
|
||||
// SAFETY: declares the libc syscall-wrapper ABI (setrlimit/prctl/unshare/chroot/
|
||||
// chdir/mount/write/__errno_location); signatures match the glibc/musl headers.
|
||||
unsafe extern "C" {
|
||||
fn setrlimit(resource: i32, rlim: *const Rlimit) -> i32;
|
||||
fn prctl(option: i32, arg2: u64, arg3: u64, arg4: u64, arg5: u64) -> i32;
|
||||
|
|
@ -294,6 +306,8 @@ unsafe extern "C" {
|
|||
}
|
||||
|
||||
fn last_errno() -> i32 {
|
||||
// SAFETY: `__errno_location` returns a valid pointer to the calling thread's
|
||||
// errno; dereferencing it right after a failed syscall is the standard idiom.
|
||||
unsafe { *__errno_location() }
|
||||
}
|
||||
|
||||
|
|
@ -302,6 +316,8 @@ fn apply_rlimit(resource: i32, bytes: u64) -> PrimitiveStatus {
|
|||
cur: bytes,
|
||||
max: bytes,
|
||||
};
|
||||
// SAFETY: `&rl` points to a valid `Rlimit` for the duration of the call;
|
||||
// `setrlimit` only reads it and returns a status checked below.
|
||||
let ret = unsafe { setrlimit(resource, &rl) };
|
||||
if ret == 0 {
|
||||
PrimitiveStatus::Applied
|
||||
|
|
@ -311,6 +327,8 @@ fn apply_rlimit(resource: i32, bytes: u64) -> PrimitiveStatus {
|
|||
}
|
||||
|
||||
fn apply_no_new_privs() -> PrimitiveStatus {
|
||||
// SAFETY: `prctl(PR_SET_NO_NEW_PRIVS, ..)` takes only scalar args and touches
|
||||
// no caller memory; the return value is checked below.
|
||||
let ret = unsafe { prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
|
||||
if ret == 0 {
|
||||
PrimitiveStatus::Applied
|
||||
|
|
@ -326,6 +344,8 @@ fn apply_unshare_with_flags(flags: i32) -> PrimitiveStatus {
|
|||
// ablation drops individual flags via `AblationMask::no_userns` /
|
||||
// `no_pidns` so the escape-fixture matrix can prove the namespace
|
||||
// primitive carries its weight.
|
||||
// SAFETY: `unshare` takes a scalar flag set and touches no caller memory;
|
||||
// the return value is checked below.
|
||||
let ret = unsafe { unshare(flags) };
|
||||
if ret == 0 {
|
||||
PrimitiveStatus::Applied
|
||||
|
|
@ -354,11 +374,14 @@ fn apply_chroot(workdir: &[u8]) -> PrimitiveStatus {
|
|||
// `workdir` is NUL-terminated by `canonicalize_workdir` so we can
|
||||
// hand the bytes straight to `chroot(2)` without allocating in
|
||||
// pre_exec.
|
||||
// SAFETY: `workdir` is NUL-terminated by `canonicalize_workdir`, so the
|
||||
// pointer references a valid C string for the duration of the call.
|
||||
let ret = unsafe { chroot(workdir.as_ptr() as *const i8) };
|
||||
if ret != 0 {
|
||||
return PrimitiveStatus::Failed(last_errno());
|
||||
}
|
||||
let root = b"/\0";
|
||||
// SAFETY: `root` is a NUL-terminated byte literal, a valid C string.
|
||||
let ret = unsafe { chdir(root.as_ptr() as *const i8) };
|
||||
if ret != 0 {
|
||||
return PrimitiveStatus::Failed(last_errno());
|
||||
|
|
@ -391,6 +414,9 @@ struct BindMount {
|
|||
fn apply_bind_mounts(mounts: &[BindMount]) {
|
||||
let none = b"none\0";
|
||||
for m in mounts {
|
||||
// SAFETY: `source_nul`/`dest_nul` are NUL-terminated by
|
||||
// `canonicalize_bind_mount` and `none` is a NUL-terminated literal, so
|
||||
// every pointer references a valid C string for the duration of the call.
|
||||
let r = unsafe {
|
||||
mount(
|
||||
m.source_nul.as_ptr() as *const i8,
|
||||
|
|
@ -403,6 +429,8 @@ fn apply_bind_mounts(mounts: &[BindMount]) {
|
|||
if r != 0 {
|
||||
continue;
|
||||
}
|
||||
// SAFETY: `dest_nul` is NUL-terminated; the remaining pointers are null,
|
||||
// which `mount(2)` accepts for a remount. Best-effort: result ignored.
|
||||
unsafe {
|
||||
mount(
|
||||
std::ptr::null(),
|
||||
|
|
@ -541,7 +569,7 @@ pub fn install_pre_exec(
|
|||
let read_fd = pipe.as_ref().map(|p| p.read_fd);
|
||||
let plan_for_child = plan.clone();
|
||||
|
||||
// Safety: pre_exec runs after fork(2) and before execve(2). We must
|
||||
// SAFETY: pre_exec runs after fork(2) and before execve(2). We must
|
||||
// not allocate, take any locks, or call into the Rust runtime. The
|
||||
// captured `plan_for_child` is moved in; reading its already-allocated
|
||||
// fields is safe because no allocator call is needed.
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@
|
|||
//! can't be filtered without a number, and any kernel that recognises
|
||||
//! the name has the number too. Tests assert the policy round-trips.
|
||||
|
||||
#![warn(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
pub mod bpf;
|
||||
pub mod syscalls;
|
||||
|
||||
|
|
@ -42,6 +44,8 @@ const PR_SET_NO_NEW_PRIVS: i32 = 38;
|
|||
const PR_SET_SECCOMP: i32 = 22;
|
||||
const SECCOMP_MODE_FILTER: u64 = 2;
|
||||
|
||||
// SAFETY: declares the libc `prctl(2)` / `__errno_location` ABI; signatures
|
||||
// match the glibc/musl headers.
|
||||
unsafe extern "C" {
|
||||
fn prctl(option: i32, arg2: u64, arg3: u64, arg4: u64, arg5: u64) -> i32;
|
||||
fn __errno_location() -> *mut i32;
|
||||
|
|
@ -142,12 +146,16 @@ pub fn install_compiled_filter(program: &[SockFilter]) -> std::io::Result<()> {
|
|||
// seccomp filter install. The Phase 17 hardening sequence already
|
||||
// calls it earlier, but installing here too is idempotent and
|
||||
// protects direct callers.
|
||||
// SAFETY: `prctl(PR_SET_NO_NEW_PRIVS, ..)` takes only scalar args and touches
|
||||
// no caller memory; idempotent, result intentionally ignored.
|
||||
let _ = unsafe { prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
|
||||
|
||||
let prog = SockFprog {
|
||||
len: program.len() as u16,
|
||||
filter: program.as_ptr(),
|
||||
};
|
||||
// SAFETY: `prog` and the `program` slice it points to outlive the call; the
|
||||
// pointer passed as u64 references a valid `SockFprog`. Return value checked below.
|
||||
let ret = unsafe {
|
||||
prctl(
|
||||
PR_SET_SECCOMP,
|
||||
|
|
@ -160,6 +168,8 @@ pub fn install_compiled_filter(program: &[SockFilter]) -> std::io::Result<()> {
|
|||
if ret == 0 {
|
||||
Ok(())
|
||||
} else {
|
||||
// SAFETY: `__errno_location` returns a valid per-thread errno pointer,
|
||||
// dereferenced immediately after the failed prctl call.
|
||||
Err(std::io::Error::from_raw_os_error(unsafe {
|
||||
*__errno_location()
|
||||
}))
|
||||
|
|
|
|||
|
|
@ -141,14 +141,23 @@ impl BrokerStub {
|
|||
.append(true)
|
||||
.create(true)
|
||||
.open(&self.log_path)?;
|
||||
writeln!(
|
||||
f,
|
||||
"{}\t{}\t{}",
|
||||
// Build the whole record (including the trailing newline) up front and
|
||||
// emit it in a single `write_all`. A `writeln!` issues one syscall per
|
||||
// format fragment, so a concurrent `drain_events` reader could observe a
|
||||
// torn line (e.g. just `deliver` with no tab) and misclassify it. For a
|
||||
// record small enough to land in one `write()` (the common case) the
|
||||
// append-mode `write_all` is delivered atomically; very large records
|
||||
// can still span multiple `write()`s, so the drain's newline-framing
|
||||
// guard remains the backstop. Both the tab-and-newline-stripped
|
||||
// destination and the newline-stripped payload guarantee the record
|
||||
// occupies exactly one physical line regardless.
|
||||
let line = format!(
|
||||
"{}\t{}\t{}\n",
|
||||
action.replace('\t', " "),
|
||||
destination.replace('\t', " "),
|
||||
payload
|
||||
)?;
|
||||
Ok(())
|
||||
destination.replace(['\t', '\n'], " "),
|
||||
payload.replace('\n', " ")
|
||||
);
|
||||
f.write_all(line.as_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -202,17 +211,38 @@ impl StubProvider for BrokerStub {
|
|||
}
|
||||
|
||||
let mut events = Vec::new();
|
||||
let mut bytes_read = 0_u64;
|
||||
let mut buf = String::new();
|
||||
let mut consumed = 0_u64;
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
loop {
|
||||
buf.clear();
|
||||
let n = match reader.read_line(&mut buf) {
|
||||
// Read raw bytes up to and including the next '\n'. Byte-oriented
|
||||
// (rather than `read_line` into a `String`) so a non-UTF-8 payload
|
||||
// written by an in-sandbox harness — e.g. Go's `string(msg.Data)`
|
||||
// over the shared `NYX_*_LOG` — degrades to a lossy decode instead
|
||||
// of erroring out. With `read_line` such a byte would return `Err`,
|
||||
// and the `Err => break` arm would park the cursor on that line
|
||||
// forever, permanently stalling the stream and dropping every
|
||||
// record after it.
|
||||
let n = match reader.read_until(b'\n', &mut buf) {
|
||||
Ok(0) => break,
|
||||
Ok(n) => n,
|
||||
Err(_) => break,
|
||||
};
|
||||
bytes_read += n as u64;
|
||||
let line = buf.trim_end_matches(['\r', '\n']);
|
||||
// A chunk that does not end in '\n' is the tail of an in-flight
|
||||
// append: a writer thread is mid-record. Leave it unconsumed (do
|
||||
// not advance the cursor past it) so the next drain re-reads it
|
||||
// once it is complete. Without this guard the partial line would be
|
||||
// skipped forever and, worse, `parse_broker_log_line` would
|
||||
// misclassify a tab-less fragment like `deliver` as a `publish`.
|
||||
if buf.last() != Some(&b'\n') {
|
||||
break;
|
||||
}
|
||||
consumed += n as u64;
|
||||
// Strip exactly the single '\n' line terminator. The log is
|
||||
// newline-framed (never CRLF), so a trailing '\r' is payload data
|
||||
// and must be preserved rather than greedily trimmed.
|
||||
let decoded = String::from_utf8_lossy(&buf[..buf.len() - 1]);
|
||||
let line = decoded.as_ref();
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -229,7 +259,7 @@ impl StubProvider for BrokerStub {
|
|||
};
|
||||
events.push(event);
|
||||
}
|
||||
*cursor += bytes_read;
|
||||
*cursor += consumed;
|
||||
events
|
||||
}
|
||||
}
|
||||
|
|
@ -3378,13 +3408,19 @@ fn append_broker_event(
|
|||
.append(true)
|
||||
.create(true)
|
||||
.open(log_path)?;
|
||||
writeln!(
|
||||
f,
|
||||
"{}\t{}\t{}",
|
||||
// Single `write_all` append: see `record_event` for why a `writeln!` is
|
||||
// unsafe against concurrent drains, and for the atomicity caveat on very
|
||||
// large records. The broker server threads append from multiple handlers,
|
||||
// so a torn record is otherwise observable mid-flight. The destination
|
||||
// (path/topic-derived, e.g. a percent-decoded `%0A`) is stripped of tabs
|
||||
// and newlines and the payload of newlines so the record stays one line.
|
||||
let line = format!(
|
||||
"{}\t{}\t{}\n",
|
||||
action.replace('\t', " "),
|
||||
destination.replace('\t', " "),
|
||||
payload
|
||||
)
|
||||
destination.replace(['\t', '\n'], " "),
|
||||
payload.replace('\n', " ")
|
||||
);
|
||||
f.write_all(line.as_bytes())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -4154,7 +4190,10 @@ mod tests {
|
|||
"{delivery:?}"
|
||||
);
|
||||
|
||||
let events = stub.drain_events();
|
||||
// The MSG frame reaches the wire before the server appends the matching
|
||||
// `deliver` record (see `nats_deliver`), so draining the moment the
|
||||
// payload arrives can race the log write. Poll until both records land.
|
||||
let events = drain_events_until(&stub, 2, Duration::from_secs(5));
|
||||
let actions: Vec<&str> = events
|
||||
.iter()
|
||||
.map(|ev| ev.detail.get("action").unwrap().as_str())
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
//! sanitizer/guard info, state-machine transitions) in a structured form
|
||||
//! that can be serialized to JSON and consumed by ranking, filtering,
|
||||
//! and downstream tooling.
|
||||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::labels::Cap;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
//!
|
||||
//! Produces professional, security-tool-grade aligned output with a clear
|
||||
//! severity hierarchy, normalised taint flow rendering, and stable wrapping.
|
||||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use crate::chain::finding::ChainFinding;
|
||||
use crate::commands::scan::{Diag, SuppressionStats};
|
||||
|
|
|
|||
|
|
@ -658,28 +658,6 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn with_history<'a>(
|
||||
summary: &'a FindingSummary,
|
||||
findings: &'a [Diag],
|
||||
triage: f64,
|
||||
files: u64,
|
||||
) -> HealthInputs<'a> {
|
||||
HealthInputs {
|
||||
has_history: true,
|
||||
..first_scan(summary, findings, triage, files)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn sev_score(h: &HealthScore) -> u8 {
|
||||
h.components
|
||||
.iter()
|
||||
.find(|c| c.label == "Severity pressure")
|
||||
.unwrap()
|
||||
.score
|
||||
}
|
||||
|
||||
// ── Foundational behaviour ───────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ impl JobManager {
|
|||
db_pool: Option<Arc<Pool<SqliteConnectionManager>>>,
|
||||
database_dir: PathBuf,
|
||||
) -> Result<String, &'static str> {
|
||||
let mut active = self.active_job_id.lock().unwrap();
|
||||
let mut active = self.active_job_id.lock().unwrap_or_else(|p| p.into_inner());
|
||||
if active.is_some() {
|
||||
return Err("A scan is already running");
|
||||
}
|
||||
|
|
@ -129,8 +129,8 @@ impl JobManager {
|
|||
};
|
||||
|
||||
{
|
||||
let mut jobs = self.jobs.lock().unwrap();
|
||||
let mut order = self.job_order.lock().unwrap();
|
||||
let mut jobs = self.jobs.lock().unwrap_or_else(|p| p.into_inner());
|
||||
let mut order = self.job_order.lock().unwrap_or_else(|p| p.into_inner());
|
||||
|
||||
// Evict oldest if at capacity.
|
||||
while order.len() >= self.max_jobs {
|
||||
|
|
@ -323,7 +323,7 @@ impl JobManager {
|
|||
|
||||
// Brief lock: just update in-memory job state.
|
||||
{
|
||||
let mut jobs = manager.jobs.lock().unwrap();
|
||||
let mut jobs = manager.jobs.lock().unwrap_or_else(|p| p.into_inner());
|
||||
if let Some(job) = jobs.get_mut(&jid) {
|
||||
job.finished_at = Some(finished_at);
|
||||
job.duration_secs = Some(elapsed);
|
||||
|
|
@ -338,7 +338,10 @@ impl JobManager {
|
|||
|
||||
// Clear active flag.
|
||||
{
|
||||
let mut active = manager.active_job_id.lock().unwrap();
|
||||
let mut active = manager
|
||||
.active_job_id
|
||||
.lock()
|
||||
.unwrap_or_else(|p| p.into_inner());
|
||||
if active.as_deref() == Some(&jid) {
|
||||
*active = None;
|
||||
}
|
||||
|
|
@ -396,13 +399,17 @@ impl JobManager {
|
|||
|
||||
/// Get a specific job.
|
||||
pub fn get_job(&self, id: &str) -> Option<ScanJob> {
|
||||
self.jobs.lock().unwrap().get(id).cloned()
|
||||
self.jobs
|
||||
.lock()
|
||||
.unwrap_or_else(|p| p.into_inner())
|
||||
.get(id)
|
||||
.cloned()
|
||||
}
|
||||
|
||||
/// List all jobs, most recent first.
|
||||
pub fn list_jobs(&self) -> Vec<ScanJob> {
|
||||
let jobs = self.jobs.lock().unwrap();
|
||||
let order = self.job_order.lock().unwrap();
|
||||
let jobs = self.jobs.lock().unwrap_or_else(|p| p.into_inner());
|
||||
let order = self.job_order.lock().unwrap_or_else(|p| p.into_inner());
|
||||
order
|
||||
.iter()
|
||||
.rev()
|
||||
|
|
@ -412,16 +419,20 @@ impl JobManager {
|
|||
|
||||
/// Get the currently active (running) job.
|
||||
pub fn active_job(&self) -> Option<ScanJob> {
|
||||
let active = self.active_job_id.lock().unwrap();
|
||||
active
|
||||
.as_ref()
|
||||
.and_then(|id| self.jobs.lock().unwrap().get(id).cloned())
|
||||
let active = self.active_job_id.lock().unwrap_or_else(|p| p.into_inner());
|
||||
active.as_ref().and_then(|id| {
|
||||
self.jobs
|
||||
.lock()
|
||||
.unwrap_or_else(|p| p.into_inner())
|
||||
.get(id)
|
||||
.cloned()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the latest completed job.
|
||||
pub fn get_latest_completed(&self) -> Option<ScanJob> {
|
||||
let jobs = self.jobs.lock().unwrap();
|
||||
let order = self.job_order.lock().unwrap();
|
||||
let jobs = self.jobs.lock().unwrap_or_else(|p| p.into_inner());
|
||||
let order = self.job_order.lock().unwrap_or_else(|p| p.into_inner());
|
||||
order
|
||||
.iter()
|
||||
.rev()
|
||||
|
|
@ -432,17 +443,17 @@ impl JobManager {
|
|||
|
||||
/// Remove a job from in-memory state. Rejects if the scan is currently running.
|
||||
pub fn remove_job(&self, id: &str) -> Result<(), &'static str> {
|
||||
let active = self.active_job_id.lock().unwrap();
|
||||
let active = self.active_job_id.lock().unwrap_or_else(|p| p.into_inner());
|
||||
if active.as_deref() == Some(id) {
|
||||
return Err("Cannot delete a running scan");
|
||||
}
|
||||
drop(active);
|
||||
|
||||
let mut jobs = self.jobs.lock().unwrap();
|
||||
let mut jobs = self.jobs.lock().unwrap_or_else(|p| p.into_inner());
|
||||
if jobs.remove(id).is_none() {
|
||||
return Err("Scan not found");
|
||||
}
|
||||
let mut order = self.job_order.lock().unwrap();
|
||||
let mut order = self.job_order.lock().unwrap_or_else(|p| p.into_inner());
|
||||
order.retain(|x| x != id);
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use crate::database::index::Indexer;
|
||||
use crate::server::app::AppState;
|
||||
use crate::server::models::lang_for_finding_path;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::database::index::Indexer;
|
||||
use crate::server::app::{AppState, CachedFindings};
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::database::index::{Indexer, ScanRecord};
|
||||
use crate::evidence::{Confidence, Verdict};
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#![allow(clippy::collapsible_if, clippy::redundant_closure)]
|
||||
#![allow(clippy::redundant_closure)]
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::database::index::{Indexer, ScanRecord};
|
||||
|
|
@ -50,11 +50,13 @@ struct StartScanRequest {
|
|||
verify_backend: Option<String>,
|
||||
/// Process-backend hardening profile: "standard" | "strict".
|
||||
harden_profile: Option<String>,
|
||||
#[allow(dead_code)]
|
||||
/// Restrict the scan to these language slugs (e.g. `["java", "python"]`).
|
||||
/// An unknown slug returns 400.
|
||||
languages: Option<Vec<String>>,
|
||||
#[allow(dead_code)]
|
||||
/// Whitelist: scan only files under these paths (relative to the scan root
|
||||
/// or absolute).
|
||||
include_paths: Option<Vec<String>>,
|
||||
#[allow(dead_code)]
|
||||
/// Exclude these directories/files from the scan.
|
||||
exclude_paths: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
|
|
@ -126,6 +128,34 @@ fn apply_harden_profile(
|
|||
}
|
||||
}
|
||||
|
||||
/// Restrict the scan to the requested language slugs by excluding the file
|
||||
/// extensions of every *other* supported language. Returns 400 on an unknown
|
||||
/// slug. No-op when `languages` is empty.
|
||||
fn apply_language_filter(
|
||||
config: &mut crate::utils::config::Config,
|
||||
languages: &[String],
|
||||
) -> Result<(), (StatusCode, Json<serde_json::Value>)> {
|
||||
if languages.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let mut selected: HashSet<&'static str> = HashSet::new();
|
||||
for lang in languages {
|
||||
let exts = crate::ast::extensions_for_lang(lang);
|
||||
if exts.is_empty() {
|
||||
return Err(bad_request(&format!("unknown language: {lang}")));
|
||||
}
|
||||
selected.extend(exts.iter().copied());
|
||||
}
|
||||
for (_slug, exts) in crate::ast::SUPPORTED_LANGUAGE_EXTENSIONS {
|
||||
for ext in *exts {
|
||||
if !selected.contains(ext) {
|
||||
config.scanner.excluded_extensions.push((*ext).to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn start_scan(
|
||||
State(state): State<AppState>,
|
||||
body: Option<Json<StartScanRequest>>,
|
||||
|
|
@ -172,6 +202,23 @@ async fn start_scan(
|
|||
apply_harden_profile(&mut config, profile)?;
|
||||
}
|
||||
|
||||
if let Some(ref include) = req.include_paths {
|
||||
config
|
||||
.scanner
|
||||
.included_paths
|
||||
.extend(include.iter().cloned());
|
||||
}
|
||||
if let Some(ref exclude) = req.exclude_paths {
|
||||
for p in exclude {
|
||||
// A path may name a directory subtree or a single file; cover both.
|
||||
config.scanner.excluded_directories.push(p.clone());
|
||||
config.scanner.excluded_files.push(p.clone());
|
||||
}
|
||||
}
|
||||
if let Some(ref langs) = req.languages {
|
||||
apply_language_filter(&mut config, langs)?;
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "dynamic"))]
|
||||
if config.scanner.verify || config.scanner.verify_all_confidence {
|
||||
return Err(bad_request(
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use super::ir::*;
|
||||
|
|
|
|||
223
src/ssa/heap.rs
223
src/ssa/heap.rs
|
|
@ -20,7 +20,7 @@
|
|||
//! - Unknown/unproven indices fall back to Elements (conservative)
|
||||
//! - Analysis runs as a pre-pass in optimize_ssa(), like type_facts
|
||||
|
||||
#![allow(clippy::collapsible_if, clippy::unnecessary_map_or)]
|
||||
#![allow(clippy::unnecessary_map_or)]
|
||||
|
||||
use crate::cfg::Cfg;
|
||||
use crate::labels::{Cap, bare_method_name};
|
||||
|
|
@ -119,14 +119,45 @@ pub const MAX_TRACKED_INDICES: usize = 8;
|
|||
/// provably a non-negative integer constant (via the function's own const
|
||||
/// propagation pass).
|
||||
///
|
||||
/// Ordering: `Elements < Index(0) < Index(1) < …` so that sorted merge-join
|
||||
/// in `HeapState` groups all slots for the same `HeapObjectId` together.
|
||||
/// Ordering: `Elements < Index(0) < Index(1) < … < Key(h0) < Key(h1) < …` so
|
||||
/// that sorted merge-join in `HeapState` groups all slots for the same
|
||||
/// `HeapObjectId` together.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub enum HeapSlot {
|
||||
/// Coarse union of all elements (push/pop, dynamic index, overflow).
|
||||
Elements,
|
||||
/// Constant-index slot, proven by the current function's const propagation.
|
||||
Index(u64),
|
||||
/// Constant **string-key** slot, proven by const propagation (`map.put("k",
|
||||
/// v)` / `map.get("k")` with a literal `"k"`). The `u64` is a stable hash
|
||||
/// of the key string ([`hash_const_key`]). Distinct from `Index(n)` so an
|
||||
/// integer index and a string key that happen to share a numeric value
|
||||
/// never alias. A hash collision between two distinct string keys merely
|
||||
/// reverts to the pre-existing coarse merge for those two keys (sound, no
|
||||
/// new false negative).
|
||||
Key(u64),
|
||||
}
|
||||
|
||||
/// Stable FNV-1a hash of a constant string key. Deterministic across runs
|
||||
/// (no `RandomState`), so a `put("k", …)` and a later `get("k")` resolve to
|
||||
/// the same [`HeapSlot::Key`] within and across analysis passes.
|
||||
pub fn hash_const_key(s: &str) -> u64 {
|
||||
let mut h: u64 = 0xcbf29ce484222325;
|
||||
for b in s.as_bytes() {
|
||||
h ^= *b as u64;
|
||||
h = h.wrapping_mul(0x100000001b3);
|
||||
}
|
||||
h
|
||||
}
|
||||
|
||||
impl HeapSlot {
|
||||
/// Whether this is a precise per-key/per-index slot (as opposed to the
|
||||
/// coarse `Elements` slot). Keyed slots share the `MAX_TRACKED_INDICES`
|
||||
/// budget and the overflow-collapse-to-`Elements` policy.
|
||||
#[inline]
|
||||
fn is_keyed(self) -> bool {
|
||||
matches!(self, HeapSlot::Index(_) | HeapSlot::Key(_))
|
||||
}
|
||||
}
|
||||
|
||||
// ── HeapObjectId ─────────────────────────────────────────────────────────
|
||||
|
|
@ -332,19 +363,26 @@ impl HeapState {
|
|||
return;
|
||||
}
|
||||
|
||||
// Check index overflow before inserting a new Index slot.
|
||||
if let HeapSlot::Index(_) = slot {
|
||||
// Keyed-slot overflow: when a container already tracks the maximum
|
||||
// number of distinct keyed (`Index`/`Key`) slots, a *new* key is
|
||||
// folded into the coarse `Elements` slot instead of creating another
|
||||
// keyed cell. Existing keyed cells are **kept** — they are never
|
||||
// removed. This keeps the lattice monotone: the old collapse-to-
|
||||
// Elements behaviour *removed* keyed cells, so a `join` that
|
||||
// re-introduced distinct keys followed by a `store` that re-collapsed
|
||||
// them made the per-block state oscillate forever and the taint
|
||||
// worklist never converged (it bailed at the 100k-iteration safety
|
||||
// cap, silently dropping that function's findings). Keyed slots only
|
||||
// ever arise from bounded sources (integer indices `0..MAX_TRACKED_
|
||||
// INDICES` and the finite set of constant string keys in the source;
|
||||
// dynamic keys already resolve to `Elements`), so refusing to grow
|
||||
// past the cap bounds the state without any removal.
|
||||
if slot.is_keyed() {
|
||||
let key = (id, slot);
|
||||
let already_present = self.entries.binary_search_by_key(&key, |(k, _)| *k).is_ok();
|
||||
if !already_present {
|
||||
let index_count = self.count_indices_for(id);
|
||||
if index_count >= MAX_TRACKED_INDICES {
|
||||
// Collapse: merge all Index(*) entries into Elements,
|
||||
// then store the new taint into Elements too.
|
||||
self.collapse_indices_to_elements(id);
|
||||
self.store_raw(id, HeapSlot::Elements, caps, origins);
|
||||
return;
|
||||
}
|
||||
if !already_present && self.count_indices_for(id) >= MAX_TRACKED_INDICES {
|
||||
self.store_raw(id, HeapSlot::Elements, caps, origins);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -385,14 +423,20 @@ impl HeapState {
|
|||
/// Load taint from a specific (object, slot) pair.
|
||||
///
|
||||
/// - `Index(n)`: returns union of `(id, Index(n))` ∪ `(id, Elements)`.
|
||||
/// - `Elements`: returns union of `(id, Elements)` ∪ all `(id, Index(*))`.
|
||||
/// - `Key(h)`: returns union of `(id, Key(h))` ∪ `(id, Elements)` — a
|
||||
/// constant-key read sees only its own key's taint plus any taint
|
||||
/// written under a dynamic/unknown key (which lands in `Elements`); it
|
||||
/// does NOT see other constant keys' cells.
|
||||
/// - `Elements`: returns union of `(id, Elements)` ∪ all keyed slots
|
||||
/// (`Index(*)` and `Key(*)`) — a dynamic/unknown-key read conservatively
|
||||
/// sees every recorded keyed write.
|
||||
pub fn load(&self, id: HeapObjectId, slot: HeapSlot) -> Option<HeapTaint> {
|
||||
match slot {
|
||||
HeapSlot::Index(n) => {
|
||||
// Union specific index with Elements.
|
||||
let idx_taint = self.load_raw(id, HeapSlot::Index(n));
|
||||
HeapSlot::Index(_) | HeapSlot::Key(_) => {
|
||||
// Union the specific keyed slot with Elements.
|
||||
let slot_taint = self.load_raw(id, slot);
|
||||
let elem_taint = self.load_raw(id, HeapSlot::Elements);
|
||||
match (idx_taint, elem_taint) {
|
||||
match (slot_taint, elem_taint) {
|
||||
(Some(a), Some(b)) => Some(a.union(b)),
|
||||
(Some(a), None) => Some(a.clone()),
|
||||
(None, Some(b)) => Some(b.clone()),
|
||||
|
|
@ -496,35 +540,13 @@ impl HeapState {
|
|||
true
|
||||
}
|
||||
|
||||
/// Count distinct `Index(*)` slots for a given object.
|
||||
/// Count distinct keyed (`Index(*)` / `Key(*)`) slots for a given object.
|
||||
fn count_indices_for(&self, id: HeapObjectId) -> usize {
|
||||
self.entries
|
||||
.iter()
|
||||
.filter(|((eid, slot), _)| *eid == id && matches!(slot, HeapSlot::Index(_)))
|
||||
.filter(|((eid, slot), _)| *eid == id && slot.is_keyed())
|
||||
.count()
|
||||
}
|
||||
|
||||
/// Collapse all `Index(*)` entries for `id` into `Elements`.
|
||||
fn collapse_indices_to_elements(&mut self, id: HeapObjectId) {
|
||||
// Collect taint from all Index entries for this object.
|
||||
let mut merged_caps = Cap::empty();
|
||||
let mut merged_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
self.entries.retain(|((eid, slot), taint)| {
|
||||
if *eid == id && matches!(slot, HeapSlot::Index(_)) {
|
||||
merged_caps |= taint.caps;
|
||||
for orig in &taint.origins {
|
||||
crate::taint::ssa_transfer::push_origin_bounded(&mut merged_origins, *orig);
|
||||
}
|
||||
false // remove this entry
|
||||
} else {
|
||||
true // keep
|
||||
}
|
||||
});
|
||||
// Merge into Elements.
|
||||
if !merged_caps.is_empty() {
|
||||
self.store_raw(id, HeapSlot::Elements, merged_caps, &merged_origins);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── PointsToResult ───────────────────────────────────────────────────────
|
||||
|
|
@ -1242,7 +1264,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn heap_max_tracked_indices_collapse() {
|
||||
fn heap_max_tracked_indices_overflow_to_elements() {
|
||||
let mut h = HeapState::empty();
|
||||
let id = HeapObjectId(SsaValue(0));
|
||||
|
||||
|
|
@ -1255,20 +1277,123 @@ mod tests {
|
|||
&[origin(i as u32)],
|
||||
);
|
||||
}
|
||||
assert_eq!(h.count_indices_for(id), MAX_TRACKED_INDICES);
|
||||
|
||||
// One more should trigger collapse into Elements
|
||||
// One more (a NEW key past the cap) folds into Elements, but the
|
||||
// existing keyed cells are KEPT — the lattice must be monotone (no
|
||||
// removal), or the taint worklist oscillates and never converges.
|
||||
h.store(
|
||||
id,
|
||||
HeapSlot::Index(MAX_TRACKED_INDICES as u64),
|
||||
Cap::SQL_QUERY,
|
||||
&[origin(99)],
|
||||
);
|
||||
// Existing keyed cells preserved (not collapsed away).
|
||||
assert_eq!(h.count_indices_for(id), MAX_TRACKED_INDICES);
|
||||
|
||||
// All Index entries should be collapsed into Elements.
|
||||
// There should be no Index entries left.
|
||||
assert_eq!(h.count_indices_for(id), 0);
|
||||
// The overflowed key's taint is now reachable via Elements.
|
||||
let t = h.load(id, HeapSlot::Elements).unwrap();
|
||||
assert!(t.caps.contains(Cap::HTML_ESCAPE)); // ∪ over kept Index slots
|
||||
assert!(t.caps.contains(Cap::SQL_QUERY)); // the overflowed key
|
||||
// An existing key still reads its own cell (∪ Elements).
|
||||
let t0 = h.load(id, HeapSlot::Index(0)).unwrap();
|
||||
assert!(t0.caps.contains(Cap::HTML_ESCAPE));
|
||||
}
|
||||
|
||||
// Elements load should see all taint
|
||||
// ── HeapSlot::Key (string-key) tests ────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn hash_const_key_is_deterministic_and_distinct() {
|
||||
// Same key → same hash (so put("k") and get("k") resolve identically).
|
||||
assert_eq!(hash_const_key("keyB-85059"), hash_const_key("keyB-85059"));
|
||||
// Distinct keys → distinct hashes (the common case).
|
||||
assert_ne!(hash_const_key("keyA-85059"), hash_const_key("keyB-85059"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn heap_key_store_load_isolation() {
|
||||
// Store under "keyB", load under "keyA" → no taint (the BenchmarkTest00171
|
||||
// shape: map.put("keyB", param); map.get("keyA")).
|
||||
let mut h = HeapState::empty();
|
||||
let id = HeapObjectId(SsaValue(0));
|
||||
let kb = HeapSlot::Key(hash_const_key("keyB-85059"));
|
||||
let ka = HeapSlot::Key(hash_const_key("keyA-85059"));
|
||||
h.store(id, kb, Cap::SHELL_ESCAPE, &[origin(0)]);
|
||||
|
||||
// Same key sees the taint.
|
||||
let t = h.load(id, kb).unwrap();
|
||||
assert_eq!(t.caps, Cap::SHELL_ESCAPE);
|
||||
// A different constant key does NOT (no Elements, no other Key cell).
|
||||
assert!(h.load(id, ka).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn heap_key_load_unions_with_elements() {
|
||||
// A dynamic/unknown-key write lands in Elements; a constant-key read
|
||||
// still conservatively sees it.
|
||||
let mut h = HeapState::empty();
|
||||
let id = HeapObjectId(SsaValue(0));
|
||||
h.store(id, HeapSlot::Elements, Cap::SQL_QUERY, &[origin(0)]);
|
||||
let t = h.load(id, HeapSlot::Key(hash_const_key("k"))).unwrap();
|
||||
assert_eq!(t.caps, Cap::SQL_QUERY);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn heap_elements_load_unions_all_keys() {
|
||||
// A dynamic/unknown-key read (Elements slot) sees every constant-key write.
|
||||
let mut h = HeapState::empty();
|
||||
let id = HeapObjectId(SsaValue(0));
|
||||
h.store(
|
||||
id,
|
||||
HeapSlot::Key(hash_const_key("a")),
|
||||
Cap::HTML_ESCAPE,
|
||||
&[origin(0)],
|
||||
);
|
||||
h.store(
|
||||
id,
|
||||
HeapSlot::Key(hash_const_key("b")),
|
||||
Cap::SQL_QUERY,
|
||||
&[origin(1)],
|
||||
);
|
||||
let t = h.load(id, HeapSlot::Elements).unwrap();
|
||||
assert_eq!(t.caps, Cap::HTML_ESCAPE | Cap::SQL_QUERY);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn heap_key_and_index_are_disjoint() {
|
||||
// A string-key slot and an integer-index slot never alias, even if the
|
||||
// index value coincides with a key hash bucket.
|
||||
let mut h = HeapState::empty();
|
||||
let id = HeapObjectId(SsaValue(0));
|
||||
h.store(id, HeapSlot::Index(0), Cap::FILE_IO, &[origin(0)]);
|
||||
// A keyed read sees only its own cell (+ Elements, which is empty here),
|
||||
// never the Index(0) cell.
|
||||
assert!(h.load(id, HeapSlot::Key(hash_const_key("0"))).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn heap_max_tracked_keys_overflow_to_elements() {
|
||||
// A NEW string key past the cap folds into Elements (over-approx,
|
||||
// sound) while existing keyed cells are kept (monotone — no removal).
|
||||
let mut h = HeapState::empty();
|
||||
let id = HeapObjectId(SsaValue(0));
|
||||
for i in 0..MAX_TRACKED_INDICES {
|
||||
h.store(
|
||||
id,
|
||||
HeapSlot::Key(hash_const_key(&format!("key{i}"))),
|
||||
Cap::HTML_ESCAPE,
|
||||
&[origin(i as u32)],
|
||||
);
|
||||
}
|
||||
assert_eq!(h.count_indices_for(id), MAX_TRACKED_INDICES);
|
||||
h.store(
|
||||
id,
|
||||
HeapSlot::Key(hash_const_key("overflow")),
|
||||
Cap::SQL_QUERY,
|
||||
&[origin(99)],
|
||||
);
|
||||
// Existing keyed cells preserved.
|
||||
assert_eq!(h.count_indices_for(id), MAX_TRACKED_INDICES);
|
||||
let t = h.load(id, HeapSlot::Elements).unwrap();
|
||||
assert!(t.caps.contains(Cap::HTML_ESCAPE));
|
||||
assert!(t.caps.contains(Cap::SQL_QUERY));
|
||||
|
|
|
|||
|
|
@ -1,5 +1,10 @@
|
|||
//! AST → CFG → SSA lowering (Cytron et al.).
|
||||
//!
|
||||
//! Builds basic blocks, computes dominators and dominance frontiers via
|
||||
//! petgraph, inserts phi nodes, and renames variables over the dominator-tree
|
||||
//! preorder to produce an [`SsaBody`](super::ir::SsaBody).
|
||||
|
||||
#![allow(
|
||||
clippy::collapsible_if,
|
||||
clippy::if_same_then_else,
|
||||
clippy::needless_range_loop,
|
||||
clippy::only_used_in_recursion,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#![allow(clippy::collapsible_if, clippy::redundant_closure)]
|
||||
#![allow(clippy::redundant_closure)]
|
||||
|
||||
//! Static hash-map lookup abstract analysis.
|
||||
//!
|
||||
|
|
|
|||
|
|
@ -1,5 +1,11 @@
|
|||
#![allow(clippy::if_same_then_else)]
|
||||
|
||||
//! Lightweight type inference for SSA values.
|
||||
//!
|
||||
//! Derives [`TypeKind`] facts (ints, URLs, HTTP clients/responses, DB
|
||||
//! connections, file handles) from constructors, factories, and literals, used
|
||||
//! to suppress type-safe sinks and to resolve receiver-qualified callees.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#![allow(clippy::collapsible_if, clippy::unnecessary_map_or)]
|
||||
#![allow(clippy::unnecessary_map_or)]
|
||||
|
||||
use super::domain::{AuthLevel, ProductState, ResourceLifecycle};
|
||||
use super::engine::DataflowResult;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,8 @@
|
|||
#![allow(clippy::collapsible_if)]
|
||||
//! `DefaultTransfer`: resource-lifecycle and auth-state transfer function for
|
||||
//! the generic monotone dataflow engine (separate from taint).
|
||||
//!
|
||||
//! Tracks `ResourceLifecycle`, `AuthLevel`, and chain-proxy/product state to
|
||||
//! detect leaks, use-after-close, double-close, and auth-state issues.
|
||||
|
||||
use super::domain::{AuthLevel, ChainProxyState, ProductState, ResourceLifecycle};
|
||||
use super::engine::Transfer;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
//! ([`AUTH_ANNOTATIONS`]).
|
||||
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file, unquote};
|
||||
use crate::surface::lang::common::{leaf_matches, loc_for, rel_file};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
|
@ -218,12 +218,6 @@ fn method_name(method: Node, bytes: &[u8]) -> Option<String> {
|
|||
.map(str::to_string)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn read_string_literal(node: Node, bytes: &[u8]) -> Option<String> {
|
||||
let raw = node.utf8_text(bytes).ok()?;
|
||||
Some(unquote(raw))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
//! termination. Verdict aggregation is sound: `Infeasible` is only returned
|
||||
//! when the entire relevant search space was explored without budget exhaustion.
|
||||
|
||||
#![allow(clippy::collapsible_if, clippy::unnecessary_map_or)]
|
||||
#![allow(clippy::unnecessary_map_or)]
|
||||
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
//! distinguish different objects.
|
||||
//!
|
||||
//! Design:
|
||||
#![allow(clippy::collapsible_if, clippy::new_without_default)]
|
||||
#![allow(clippy::new_without_default)]
|
||||
//! - `FieldSlot::Named` for object properties (per-field precision).
|
||||
//! - `FieldSlot::Elements` for container contents (flow-insensitive union ,
|
||||
//! deliberately lower precision than named fields).
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@
|
|||
//! - Intra-callee forking with merge policies
|
||||
|
||||
#![allow(
|
||||
clippy::collapsible_if,
|
||||
clippy::let_and_return,
|
||||
clippy::new_without_default,
|
||||
clippy::question_mark,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
//! Detects back edges, computes natural loop bodies, identifies induction
|
||||
//! variables, and determines loop exit successors. All analysis is computed
|
||||
//! once per `explore_finding()` invocation and shared across all paths.
|
||||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
|
|
|
|||
|
|
@ -9,11 +9,7 @@
|
|||
//! Symbolic expression trees (`SymbolicValue`) preserve computation structure
|
||||
//! through the path walk, enabling richer witness strings.
|
||||
|
||||
#![allow(
|
||||
clippy::collapsible_if,
|
||||
clippy::manual_ignore_case_cmp,
|
||||
clippy::needless_borrow
|
||||
)]
|
||||
#![allow(clippy::manual_ignore_case_cmp, clippy::needless_borrow)]
|
||||
|
||||
pub mod executor;
|
||||
pub mod heap;
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@
|
|||
//! `ConcreteStr` by the symbolic engine, it flows through as a
|
||||
//! `ConstValue::Str` operand and is handled.
|
||||
#![allow(
|
||||
clippy::collapsible_if,
|
||||
clippy::needless_borrows_for_generic_args,
|
||||
clippy::new_without_default,
|
||||
dead_code
|
||||
|
|
|
|||
|
|
@ -6,11 +6,7 @@
|
|||
//! Cross-file symbolic summary modeling: when a callee has an
|
||||
//! `SsaFuncSummary` available via `GlobalSummaries`, the Call instruction's
|
||||
//! return value is modeled symbolically instead of being treated as opaque.
|
||||
#![allow(
|
||||
clippy::collapsible_if,
|
||||
clippy::if_same_then_else,
|
||||
clippy::too_many_arguments
|
||||
)]
|
||||
#![allow(clippy::if_same_then_else, clippy::too_many_arguments)]
|
||||
|
||||
use crate::cfg::Cfg;
|
||||
use crate::ssa::const_prop::ConstLattice;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
//! Symbolic value expression trees.
|
||||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use std::fmt;
|
||||
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@
|
|||
//! - [`path_state`]: predicate classification for branch-sensitive propagation
|
||||
//! - [`backwards`]: demand-driven backwards walk from sinks (off by default)
|
||||
|
||||
#![allow(clippy::collapsible_if, clippy::too_many_arguments)]
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
pub mod backwards;
|
||||
pub mod domain;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,8 @@
|
|||
#![allow(clippy::collapsible_if)]
|
||||
//! Predicate tracking for path-sensitive taint.
|
||||
//!
|
||||
//! Classifies if-conditions (`PredicateKind` / `classify_condition`) and narrows
|
||||
//! validation to specific targets, so branch outcomes can validate or contradict
|
||||
//! tainted values during the SSA taint solve.
|
||||
|
||||
// ─── PredicateKind ───────────────────────────────────────────────────────────
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,13 @@
|
|||
//! Block-level SSA taint worklist — the sole taint engine for all 10 languages.
|
||||
//!
|
||||
//! Drives a forward dataflow fixpoint over [`crate::ssa::SsaBody`] blocks
|
||||
//! (`run_ssa_taint` / `run_ssa_taint_full`), propagating `SsaTaintState` through
|
||||
//! `transfer_inst` with branch-aware narrowing, k=1 context-sensitive inlining
|
||||
//! (`inline`), gated-sink detection (`events`), and interprocedural summary
|
||||
//! extraction (`summary_extract`). Submodules: `events`, `inline`, `state`,
|
||||
//! `summary_extract`.
|
||||
|
||||
#![allow(
|
||||
clippy::collapsible_if,
|
||||
clippy::if_same_then_else,
|
||||
clippy::manual_flatten,
|
||||
clippy::needless_range_loop,
|
||||
|
|
@ -8317,34 +8325,118 @@ fn try_curl_url_propagation(
|
|||
/// sets `const_values: Some(&callee_body.opt.const_values)` on the child
|
||||
/// transfer, so callee-local constants are resolved.
|
||||
/// - Unknown / non-integer / out-of-bounds: falls back to `HeapSlot::Elements`.
|
||||
fn resolve_container_index(index_val: SsaValue, transfer: &SsaTaintTransfer) -> HeapSlot {
|
||||
use crate::ssa::heap::MAX_TRACKED_INDICES;
|
||||
|
||||
if let Some(cv) = transfer.const_values {
|
||||
if let Some(crate::ssa::const_prop::ConstLattice::Int(n)) = cv.get(&index_val) {
|
||||
if *n >= 0 && (*n as u64) < MAX_TRACKED_INDICES as u64 {
|
||||
return HeapSlot::Index(*n as u64);
|
||||
}
|
||||
/// Map a proven constant index/key to its precise `HeapSlot`, or `None`
|
||||
/// (caller falls back to `HeapSlot::Elements`).
|
||||
///
|
||||
/// * Non-negative integer within `MAX_TRACKED_INDICES` → `Index(n)`.
|
||||
/// * Any other string constant → `Key(hash)` — a keyed read sees only its own
|
||||
/// key's cell (plus dynamic-key taint in `Elements`); a read of a *different*
|
||||
/// constant key cannot inherit it. Unknown/dynamic keys keep the coarse
|
||||
/// `Elements` merge, so no precision is lost and no false negative arises.
|
||||
///
|
||||
/// Both the SSA-value path (`resolve_container_index`) and the
|
||||
/// literal-argument path (`resolve_op_slot`) funnel through here so a
|
||||
/// `put("k", …)` written with a literal and a `get(kVar)` whose `kVar`
|
||||
/// const-props to `"k"` resolve to the *same* slot.
|
||||
fn slot_from_const(c: &crate::ssa::const_prop::ConstLattice) -> Option<HeapSlot> {
|
||||
use crate::ssa::const_prop::ConstLattice;
|
||||
use crate::ssa::heap::{MAX_TRACKED_INDICES, hash_const_key};
|
||||
match c {
|
||||
ConstLattice::Int(n) if *n >= 0 && (*n as u64) < MAX_TRACKED_INDICES as u64 => {
|
||||
Some(HeapSlot::Index(*n as u64))
|
||||
}
|
||||
ConstLattice::Str(s) => Some(HeapSlot::Key(hash_const_key(s))),
|
||||
_ => None,
|
||||
}
|
||||
HeapSlot::Elements
|
||||
}
|
||||
|
||||
/// Look up the SSA op that defines value `v`, searching `v`'s defining block.
|
||||
pub(super) fn op_for_value(ssa: &SsaBody, v: SsaValue) -> Option<&SsaOp> {
|
||||
let vd = ssa.value_defs.get(v.0 as usize)?;
|
||||
let blk = ssa.blocks.iter().find(|b| b.id == vd.block)?;
|
||||
blk.phis
|
||||
.iter()
|
||||
.chain(blk.body.iter())
|
||||
.find(|i| i.value == v)
|
||||
.map(|i| &i.op)
|
||||
}
|
||||
|
||||
/// Resolve a container index/key SSA value to a `HeapSlot` by tracing its
|
||||
/// definition to an underlying constant. Handles the case where a literal
|
||||
/// key (`map.get("k")`) surfaces as a *copy* of a `Const` (e.g.
|
||||
/// `v = Assign([const])` from a cast/temporary) that the optimised
|
||||
/// `const_values` map records as `Varying` rather than the literal. Bounded
|
||||
/// depth; follows single-use `Assign` copies only (no phi merge, to stay
|
||||
/// precise — a key joined across paths is genuinely dynamic).
|
||||
fn slot_from_ssa_value(v: SsaValue, ssa: &SsaBody, depth: u32) -> Option<HeapSlot> {
|
||||
if depth > 8 {
|
||||
return None;
|
||||
}
|
||||
match op_for_value(ssa, v)? {
|
||||
SsaOp::Const(Some(text)) => {
|
||||
slot_from_const(&crate::ssa::const_prop::ConstLattice::parse(text))
|
||||
}
|
||||
SsaOp::Assign(uses) if uses.len() == 1 => slot_from_ssa_value(uses[0], ssa, depth + 1),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_container_index(index_val: SsaValue, transfer: &SsaTaintTransfer) -> HeapSlot {
|
||||
transfer
|
||||
.const_values
|
||||
.and_then(|cv| cv.get(&index_val))
|
||||
.and_then(slot_from_const)
|
||||
.unwrap_or(HeapSlot::Elements)
|
||||
}
|
||||
|
||||
/// Resolve the `HeapSlot` for a container operation given its `index_arg`.
|
||||
///
|
||||
/// When `index_arg` is `Some(idx_pos)`, applies `arg_offset` and resolves
|
||||
/// the SSA value from `args`. Otherwise returns `HeapSlot::Elements`.
|
||||
/// the index/key. Two channels, checked in order:
|
||||
/// 1. the SSA value at that argument position (a *variable* index/key that
|
||||
/// const-props to an int/string);
|
||||
/// 2. the parallel `arg_string_literals` slot (a *literal* index/key, e.g.
|
||||
/// `map.get("keyB")`, which carries no SSA value because it is not a
|
||||
/// variable — the dominant OWASP shape).
|
||||
/// Otherwise returns `HeapSlot::Elements`.
|
||||
fn resolve_op_slot(
|
||||
index_arg: Option<usize>,
|
||||
arg_offset: usize,
|
||||
args: &[SmallVec<[SsaValue; 2]>],
|
||||
arg_string_literals: &[Option<String>],
|
||||
ssa: &SsaBody,
|
||||
transfer: &SsaTaintTransfer,
|
||||
) -> HeapSlot {
|
||||
if let Some(idx_pos) = index_arg {
|
||||
let effective = idx_pos + arg_offset;
|
||||
if let Some(arg_vals) = args.get(effective) {
|
||||
if let Some(&v) = arg_vals.first() {
|
||||
return resolve_container_index(v, transfer);
|
||||
// 1. Variable index/key channel: an SSA value that const-props to an
|
||||
// int/string. Only claim resolution when it yields a *precise*
|
||||
// slot — a literal key/index often surfaces here as an SSA value
|
||||
// that const-prop could not pin down (so `resolve_container_index`
|
||||
// returns `Elements`); in that case fall through to the next
|
||||
// channel rather than collapsing to the coarse merge.
|
||||
if let Some(&v) = args.get(effective).and_then(|g| g.first()) {
|
||||
let slot = resolve_container_index(v, transfer);
|
||||
if slot != HeapSlot::Elements {
|
||||
return slot;
|
||||
}
|
||||
// 1b. SSA-trace channel: the value is a literal that surfaced as a
|
||||
// copy of a `Const` (e.g. `(String) map.get("k")` lowers the
|
||||
// key to `v = Assign([const])`, which optimised `const_values`
|
||||
// records as `Varying`). Follow the def to the underlying
|
||||
// constant so the keyed slot is recovered.
|
||||
if let Some(slot) = slot_from_ssa_value(v, ssa, 0) {
|
||||
return slot;
|
||||
}
|
||||
}
|
||||
// 2. Literal index/key channel: the constant (string/int) literal
|
||||
// captured at CFG build, parsed through the same `slot_from_const`
|
||||
// mapping the variable path uses. This is the dominant OWASP
|
||||
// shape (`map.get("keyB")`), where the key is a bare literal.
|
||||
if let Some(Some(lit)) = arg_string_literals.get(effective) {
|
||||
let parsed = crate::ssa::const_prop::ConstLattice::parse(lit);
|
||||
if let Some(slot) = slot_from_const(&parsed) {
|
||||
return slot;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -8363,7 +8455,7 @@ fn resolve_op_slot(
|
|||
/// default propagation.
|
||||
fn try_container_propagation(
|
||||
inst: &SsaInst,
|
||||
_info: &NodeInfo,
|
||||
info: &NodeInfo,
|
||||
args: &[SmallVec<[SsaValue; 2]>],
|
||||
receiver: &Option<SsaValue>,
|
||||
state: &mut SsaTaintState,
|
||||
|
|
@ -8430,7 +8522,14 @@ fn try_container_propagation(
|
|||
};
|
||||
|
||||
// Resolve index argument to HeapSlot (Index(n) or Elements).
|
||||
let slot = resolve_op_slot(index_arg, arg_offset, args, transfer);
|
||||
let slot = resolve_op_slot(
|
||||
index_arg,
|
||||
arg_offset,
|
||||
args,
|
||||
&info.call.arg_string_literals,
|
||||
ssa,
|
||||
transfer,
|
||||
);
|
||||
|
||||
// Collect taint from value argument(s)
|
||||
let mut val_caps = Cap::empty();
|
||||
|
|
@ -8511,7 +8610,14 @@ fn try_container_propagation(
|
|||
} else {
|
||||
0
|
||||
};
|
||||
let slot = resolve_op_slot(index_arg, arg_offset, args, transfer);
|
||||
let slot = resolve_op_slot(
|
||||
index_arg,
|
||||
arg_offset,
|
||||
args,
|
||||
&info.call.arg_string_literals,
|
||||
ssa,
|
||||
transfer,
|
||||
);
|
||||
|
||||
// When points-to info available, load from heap objects
|
||||
if let Some(pts) = lookup_pts(transfer, container_val) {
|
||||
|
|
|
|||
|
|
@ -13,8 +13,8 @@
|
|||
use super::events::extract_sink_arg_positions;
|
||||
use super::state::{BindingKey, SsaTaintState};
|
||||
use super::{
|
||||
SsaTaintEvent, SsaTaintTransfer, detect_variant_inner_fact, run_ssa_taint_full, transfer_block,
|
||||
transfer_inst,
|
||||
SsaTaintEvent, SsaTaintTransfer, detect_variant_inner_fact, op_for_value, run_ssa_taint_full,
|
||||
transfer_block, transfer_inst,
|
||||
};
|
||||
|
||||
use crate::cfg::{BodyId, Cfg, FuncSummaries};
|
||||
|
|
@ -32,6 +32,47 @@ use std::collections::{HashMap, HashSet};
|
|||
/// Functions with more params fall back to legacy `FuncSummary`.
|
||||
const MAX_PROBE_PARAMS: usize = 8;
|
||||
|
||||
/// Whether return value `v` provably evaluates to a compile-time constant —
|
||||
/// its def is a `Const`, or an `Assign`/`Phi` whose every operand traces
|
||||
/// (transitively) to a constant. A value that hits a parameter, a call, or any
|
||||
/// other op is *not* provably constant (return `false`, conservative).
|
||||
///
|
||||
/// Used by `run_probe` to recognise a clean, param-free return so the
|
||||
/// param-taint fallback does not attribute the seeded parameter's `Cap::all`
|
||||
/// to a return that cannot reach it (the dead-branch-folded
|
||||
/// `return v; v = Assign([phi([const])])` shape). Bounded depth.
|
||||
fn rv_traces_to_constant(
|
||||
ssa: &SsaBody,
|
||||
v: SsaValue,
|
||||
all_param_values: &HashSet<SsaValue>,
|
||||
depth: u32,
|
||||
budget: &mut u32,
|
||||
) -> bool {
|
||||
// Node budget caps total work so a wide phi/assign DAG (shared
|
||||
// sub-expressions are re-visited without memoisation) cannot blow up;
|
||||
// exhausting it returns the conservative `false`.
|
||||
if depth > 16 || *budget == 0 || all_param_values.contains(&v) {
|
||||
return false;
|
||||
}
|
||||
*budget -= 1;
|
||||
match op_for_value(ssa, v) {
|
||||
Some(SsaOp::Const(_)) => true,
|
||||
Some(SsaOp::Assign(uses)) => {
|
||||
!uses.is_empty()
|
||||
&& uses
|
||||
.iter()
|
||||
.all(|&u| rv_traces_to_constant(ssa, u, all_param_values, depth + 1, budget))
|
||||
}
|
||||
Some(SsaOp::Phi(operands)) => {
|
||||
!operands.is_empty()
|
||||
&& operands
|
||||
.iter()
|
||||
.all(|(_, u)| rv_traces_to_constant(ssa, *u, all_param_values, depth + 1, budget))
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract a precise per-parameter `SsaFuncSummary` from an already-lowered SSA body.
|
||||
///
|
||||
/// For each parameter (up to `MAX_PROBE_PARAMS`), runs a taint probe by seeding
|
||||
|
|
@ -325,13 +366,17 @@ pub fn extract_ssa_func_summary_full(
|
|||
// both when rv is tainted (derived) and when rv is untainted
|
||||
// (the push result may have no taint but the param does).
|
||||
// Skip when rv IS a param (already handled above) or when rv is
|
||||
// a Const (provably untainted constant return).
|
||||
let rv_is_const = ssa.blocks[bid]
|
||||
.body
|
||||
.iter()
|
||||
.chain(ssa.blocks[bid].phis.iter())
|
||||
.any(|inst| inst.value == rv && matches!(inst.op, SsaOp::Const(_)));
|
||||
if !all_param_values.contains(&rv) && !rv_is_const {
|
||||
// provably a constant (a return that traces — through Assign
|
||||
// copies / phis — to only `Const` values cannot carry the
|
||||
// seeded param's taint). The plain-`Const` check missed the
|
||||
// dead-branch-folded shape `return v` where `v = Assign([phi([
|
||||
// const])])`: the param is fully disconnected from the return,
|
||||
// but the fallback would still attribute the seeded param's
|
||||
// `Cap::all` to it, manufacturing a spurious `param→return`
|
||||
// (Identity) edge that poisons every cross-file caller.
|
||||
if !all_param_values.contains(&rv)
|
||||
&& !rv_traces_to_constant(ssa, rv, &all_param_values, 0, &mut 256)
|
||||
{
|
||||
for (val, taint) in &exit.values {
|
||||
if all_param_values.contains(val) {
|
||||
block_param_caps |= taint.caps;
|
||||
|
|
|
|||
|
|
@ -202,6 +202,13 @@ pub struct ScannerConfig {
|
|||
/// Excluded files
|
||||
pub excluded_files: Vec<String>,
|
||||
|
||||
/// Restrict the scan to these paths (relative to the scan root or absolute)
|
||||
/// as a whitelist. When non-empty, only files matching one of these paths
|
||||
/// are scanned; empty (default) scans everything not otherwise excluded.
|
||||
/// Populated programmatically (e.g. the server `include_paths` request
|
||||
/// field), not typically set in config files.
|
||||
pub included_paths: Vec<String>,
|
||||
|
||||
/// RESERVED: not yet wired to walker. Whether to respect the global ignore file.
|
||||
pub read_global_ignore: bool,
|
||||
|
||||
|
|
@ -335,6 +342,7 @@ impl Default for ScannerConfig {
|
|||
.map(str::to_owned)
|
||||
.collect(),
|
||||
excluded_files: vec![].into_iter().map(str::to_owned).collect(),
|
||||
included_paths: Vec::new(),
|
||||
read_global_ignore: false,
|
||||
read_vcsignore: true,
|
||||
require_git_to_read_vcsignore: true,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
#![allow(clippy::collapsible_if)]
|
||||
|
||||
use crate::errors::{NyxError, NyxResult};
|
||||
use std::fs;
|
||||
use std::io::Read;
|
||||
|
|
|
|||
11
src/walk.rs
11
src/walk.rs
|
|
@ -75,6 +75,17 @@ fn build_overrides(root: &Path, cfg: &Config) -> ignore::overrides::Override {
|
|||
tracing::warn!("invalid exclude‐file pattern ‘{file}’: {e}");
|
||||
}
|
||||
}
|
||||
// Whitelist: when any include path is present, the override engine scans
|
||||
// only files matching an include glob (intersected with the excludes above).
|
||||
for inc in &cfg.scanner.included_paths {
|
||||
let inc = inc.trim_end_matches('/');
|
||||
if let Err(e) = ob.add(inc) {
|
||||
tracing::warn!("invalid include‐path pattern ‘{inc}’: {e}");
|
||||
}
|
||||
if let Err(e) = ob.add(&format!("{inc}/**")) {
|
||||
tracing::warn!("invalid include‐path pattern ‘{inc}/**’: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
ob.build().unwrap_or_else(|e| {
|
||||
tracing::error!("failed to build ignore overrides: {e}");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue