Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
Eli Peter 2026-04-29 19:53:34 -04:00 committed by GitHub
parent 4db0805de6
commit a438886217
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
291 changed files with 9485 additions and 3851 deletions

View file

@ -54,7 +54,7 @@ const MAX_TOTAL_STEPS: usize = 500;
/// A single exploration path in flight.
///
/// The executor advances this one block at a time via successor transitions.
/// No pre-computed block sequence successor choice happens at each terminator.
/// No pre-computed block sequence, successor choice happens at each terminator.
struct ExplorationState {
/// Current symbolic state (cloned at fork points).
sym_state: SymbolicState,
@ -71,7 +71,7 @@ struct ExplorationState {
/// Constraints checked on this path.
constraints_checked: u32,
/// Per-block visit count for bounded loop unrolling.
/// Inherited at fork points both branches share the visit history.
/// Inherited at fork points, both branches share the visit history.
visit_counts: HashMap<BlockId, u8>,
/// When `Some`, this path entered via an exception edge.
/// Moved into `sym_state.exception_context` immediately before block
@ -110,7 +110,7 @@ pub(super) struct ExplorationResult {
/// Compute the set of blocks on some CFG path from source to sink.
///
/// This is **CFG source-to-sink reachability pruning**, NOT a taint slice.
/// It does not prove that tainted data flows through these blocks only that
/// It does not prove that tainted data flows through these blocks, only that
/// control flow can reach the sink from the source through them. Used to
/// prevent exploring branches structurally disconnected from the
/// source-to-sink span.
@ -521,7 +521,7 @@ fn run_path(
steps_taken: state.steps_taken,
constraints_checked: state.constraints_checked,
visit_counts: state.visit_counts.clone(),
// Taint carrier not a faithful thrown-value model.
// Taint carrier, not a faithful thrown-value model.
// CatchParam transfer will mark the catch parameter tainted.
exception_context: Some(SymbolicValue::Unknown),
};
@ -545,7 +545,7 @@ fn run_path(
match (true_reachable, false_reachable) {
(false, false) => {
// Dead end neither successor reaches sink.
// Dead end, neither successor reaches sink.
// Still try to extract a witness: the path may have
// already walked past the sink node.
let witness = try_extract_witness(state, finding, ssa, cfg);
@ -594,7 +594,7 @@ fn run_path(
state.current_block = *false_blk;
}
(true, true) => {
// Both successors reachable fork candidate
// Both successors reachable, fork candidate
let can_fork = state.forks_used < MAX_FORKS_PER_FINDING
&& outcomes.len() + work_queue.len() + 1 < MAX_PATHS_PER_FINDING
&& *total_steps < MAX_TOTAL_STEPS;
@ -617,7 +617,7 @@ fn run_path(
smt_ctx,
);
} else {
// Budget exhausted follow original path
// Budget exhausted, follow original path
*search_exhausted = false;
let preferred_polarity = if on_path.contains(true_blk) {
true
@ -882,7 +882,7 @@ fn step_switch(
let _ = planned_remaining;
if !any_enqueued {
// All paths were pruned by infeasibility record the current
// All paths were pruned by infeasibility, record the current
// state's witness so the caller can decide.
let witness = try_extract_witness(state, finding, ssa, cfg);
return Some(PathOutcome {
@ -917,7 +917,7 @@ fn apply_branch_constraint(
};
if matches!(cond_expr, constraint::ConditionExpr::Unknown) {
// No useful constraint continue without recording
// No useful constraint, continue without recording
return None;
}
@ -938,7 +938,7 @@ fn apply_branch_constraint(
});
}
// SMT escalation check with Z3 when PathEnv says SAT but
// SMT escalation, check with Z3 when PathEnv says SAT but
// accumulated constraints have cross-variable shape.
#[cfg(feature = "smt")]
if let Some(smt) = smt_ctx {
@ -1073,7 +1073,7 @@ fn fork_at_branch(
work_queue.push_back(false_state);
}
// Original state consumed by fork no outcome from this path
// Original state consumed by fork, no outcome from this path
None
}
@ -1328,6 +1328,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
}
}
@ -1717,6 +1718,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let ctx = super::SymexContext {
@ -1861,6 +1863,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let ctx = super::SymexContext {
@ -2140,6 +2143,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let cfg_graph = crate::cfg::Cfg::new();

View file

@ -8,7 +8,7 @@
//! Design:
#![allow(clippy::collapsible_if, clippy::new_without_default)]
//! - `FieldSlot::Named` for object properties (per-field precision).
//! - `FieldSlot::Elements` for container contents (flow-insensitive union
//! - `FieldSlot::Elements` for container contents (flow-insensitive union ,
//! deliberately lower precision than named fields).
//! - Bounded: `MAX_HEAP_ENTRIES` total, `MAX_FIELDS_PER_OBJECT` per object.
//! Overflow silently drops the store (conservative: subsequent load → `Unknown`).
@ -313,7 +313,7 @@ impl SymbolicHeap {
/// Count non-index fields stored for a specific object.
///
/// Excludes `Index(*)` entries those are bounded separately by
/// Excludes `Index(*)` entries, those are bounded separately by
/// [`MAX_TRACKED_INDICES`] via [`count_indices_for`].
fn fields_for_object(&self, object: HeapObjectId) -> usize {
self.fields
@ -425,7 +425,7 @@ pub fn resolve_receiver_ssa(
/// Resolve an SSA value to a singleton `HeapObjectId` via points-to analysis.
///
/// Returns `Some` only when the points-to set contains exactly one object.
/// May-alias (set size > 1) or unknown (not in result) returns `None`
/// May-alias (set size > 1) or unknown (not in result) returns `None` ,
/// the caller should fall through to existing behavior (sound: never pick
/// among ambiguous options).
pub fn resolve_singleton_object(
@ -675,7 +675,7 @@ mod tests {
heap.load(&index_key(0, 1)),
SymbolicValue::ConcreteStr("safe".to_string())
);
// Taint: conservative Elements taint poisons Index(1).
// Taint: conservative, Elements taint poisons Index(1).
assert!(heap.is_tainted(&index_key(0, 1)));
}

View file

@ -2,8 +2,8 @@
//!
//! When a callee's `CalleeSsaBody` is available, the symbolic executor walks
//! the callee's SSA blocks as a nested frame instead of treating it as an
//! opaque `mk_call`. Full symbolic state return values, heap mutations,
//! taint, and path constraints is propagated back to the caller.
//! opaque `mk_call`. Full symbolic state, return values, heap mutations,
//! taint, and path constraints, is propagated back to the caller.
//!
//! Resolution order in `transfer_inst` Call arm:
//! container ops → string methods → **interprocedural execution** → summary → opaque mk_call.
@ -247,7 +247,7 @@ pub struct InterprocCtx<'a> {
/// Pre-lowered intra-file function bodies, keyed by canonical `FuncKey`.
pub callee_bodies: &'a HashMap<crate::symbol::FuncKey, CalleeSsaBody>,
/// The top-level caller's body CFG. Callees have their own per-body graphs
/// (see `CalleeSsaBody::body_graph`) `execute_callee` must swap this for
/// (see `CalleeSsaBody::body_graph`), `execute_callee` must swap this for
/// the callee's own graph before indexing by `SsaInst::cfg_node`.
pub cfg: &'a Cfg,
/// Source language.
@ -373,7 +373,7 @@ impl CallOutcome {
/// Create a cutoff outcome with conservative return.
///
/// Returns `Unknown` with taint preserved if any argument was tainted.
/// This ensures cutoffs never silently drop taint conservative soundness.
/// This ensures cutoffs never silently drop taint, conservative soundness.
fn cutoff(reason: CutoffReason, any_arg_tainted: bool) -> Self {
CallOutcome {
exit_states: if any_arg_tainted {
@ -478,7 +478,7 @@ pub fn select_merge_policy(exit_count: usize, has_cutoffs: bool) -> MergePolicy
/// - bits 1-4: SymbolicValue discriminant
/// - bits 5-15: hash of concrete value (if Concrete/ConcreteStr)
///
/// Richer than taint-only captures concrete string/int identity.
/// Richer than taint-only, captures concrete string/int identity.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct ArgAbstraction(SmallVec<[(usize, u16); 4]>);
@ -561,14 +561,14 @@ impl<'a> Drop for ReentryGuard<'a> {
/// reasons and conservative return values (taint preserved).
///
/// # Arguments
/// * `ctx` shared interprocedural context
/// * `callee_name` raw callee name from `SsaOp::Call`
/// * `arg_values` per-argument (caller SsaValue, SymbolicValue, tainted)
/// * `caller_heap` caller's current symbolic heap (for callee reads)
/// * `depth` current call depth (0 = top-level caller)
/// * `call_chain` function names from outermost caller to current
/// * `summary_ctx` summary context for nested calls that can't be inlined
/// * `heap_ctx` heap context for nested calls
/// * `ctx` , shared interprocedural context
/// * `callee_name` , raw callee name from `SsaOp::Call`
/// * `arg_values` , per-argument (caller SsaValue, SymbolicValue, tainted)
/// * `caller_heap` , caller's current symbolic heap (for callee reads)
/// * `depth` , current call depth (0 = top-level caller)
/// * `call_chain` , function names from outermost caller to current
/// * `summary_ctx` , summary context for nested calls that can't be inlined
/// * `heap_ctx` , heap context for nested calls
pub fn execute_callee(
ctx: &InterprocCtx,
callee_name: &str,
@ -616,7 +616,7 @@ pub fn execute_callee(
}
}
// Resolve callee by leaf name finds first FuncKey with matching name
// Resolve callee by leaf name, finds first FuncKey with matching name
// (optionally agreeing on arity). Symex preserves its existing leaf-name
// semantics; disambiguation happens upstream in the taint engine.
let normalized = callee_leaf_name(callee_name);
@ -642,7 +642,7 @@ pub fn execute_callee(
gs.resolve_callee_body(ctx.lang, normalized, arity_hint, ctx.caller_namespace)
}) {
Some(b) => (b, true),
None => return None, // No body fall through to summary
None => return None, // No body, fall through to summary
}
}
};
@ -825,7 +825,7 @@ pub fn execute_callee(
} else {
None
};
// `inst.cfg_node` indices are body-local refer to `body.body_graph`,
// `inst.cfg_node` indices are body-local, refer to `body.body_graph`,
// not `ctx.cfg` (the caller's graph). Fall back to `ctx.cfg` only for
// cross-file bodies, where `node_meta` is populated and the graph is
// never indexed directly.
@ -838,7 +838,7 @@ pub fn execute_callee(
path.predecessor,
summary_ctx,
heap_ctx,
// Pass None for interproc_ctx we handle nested calls directly below.
// Pass None for interproc_ctx, we handle nested calls directly below.
None,
Some(ctx.lang),
xfile_meta,
@ -1033,7 +1033,7 @@ fn detect_internal_sinks(
) {
for inst in block.body.iter() {
let labels: &[DataLabel] = if let Some(meta) = node_meta {
// cross-file body use embedded metadata
// cross-file body, use embedded metadata
meta.get(&(inst.cfg_node.index() as u32))
.map(|m| m.info.taint.labels.as_slice())
.unwrap_or(&[])
@ -1282,7 +1282,7 @@ fn compute_heap_delta(initial: &SymbolicHeap, final_heap: &SymbolicHeap) -> Vec<
///
/// Full structural equality is expensive for deep trees. This checks the
/// common cases (Concrete, ConcreteStr, Symbol, Unknown) and returns false
/// for complex expressions (conservative will over-report heap mutations).
/// for complex expressions (conservative, will over-report heap mutations).
fn sym_value_structurally_eq(a: &SymbolicValue, b: &SymbolicValue) -> bool {
match (a, b) {
(SymbolicValue::Concrete(x), SymbolicValue::Concrete(y)) => x == y,

View file

@ -94,11 +94,11 @@ impl LoopInfo {
match (true_in, false_in) {
(true, false) => Some(*false_blk),
(false, true) => Some(*true_blk),
(false, false) => Some(*true_blk), // both exit deterministic pick
(false, false) => Some(*true_blk), // both exit, deterministic pick
(true, true) => None, // nested: no clear exit
}
}
_ => None, // Goto or Return no branching exit
_ => None, // Goto or Return, no branching exit
}
}

View file

@ -174,7 +174,7 @@ fn analyse_finding_path(finding: &Finding, ctx: &SymexContext) -> SymbolicVerdic
}
if path_blocks.len() < 2 {
// Short path (single block, no branches) skip the multi-path
// Short path (single block, no branches), skip the multi-path
// explorer but still run a linear transfer to extract a witness.
let witness = linear_witness(finding, ctx, &path_blocks);
return SymbolicVerdict {
@ -411,6 +411,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let blocks = extract_path_blocks(&finding, &ssa);
@ -483,6 +484,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let ctx = SymexContext {
@ -541,6 +543,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let ssa = SsaBody {
@ -567,7 +570,7 @@ mod tests {
cross_file_bodies: None,
};
annotate_findings(std::slice::from_mut(&mut finding), &ctx);
// Should remain None skipped due to path_validated
// Should remain None, skipped due to path_validated
assert!(finding.symbolic.is_none());
}
@ -600,6 +603,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let ssa = SsaBody {
@ -626,7 +630,7 @@ mod tests {
cross_file_bodies: None,
};
annotate_findings(std::slice::from_mut(&mut finding), &ctx);
// Should remain None only 1 flow step
// Should remain None, only 1 flow step
assert!(finding.symbolic.is_none());
}
}

View file

@ -99,7 +99,7 @@ enum VarSort {
Str,
}
/// Polymorphic Z3 variable either integer or string sort.
/// Polymorphic Z3 variable, either integer or string sort.
enum Z3Var {
Int(Z3Int),
Str(Z3Str),
@ -140,7 +140,7 @@ impl SmtContext {
/// proves the constraints are contradictory.
///
/// Constraints that cannot be fully translated (unknown sorts, sort
/// conflicts, etc.) are silently skipped this is sound because omitting
/// conflicts, etc.) are silently skipped, this is sound because omitting
/// a constraint can only make Z3 return `Sat` when the actual result
/// might be `Unsat`, never the reverse.
pub fn check_path_feasibility(
@ -366,7 +366,7 @@ fn seed_from_path_env(solver: &Solver, var_map: &mut VarMap, env: &PathEnv) {
(Some(Z3Var::Str(r)), Some(Z3Var::Str(vi))) => {
solver.assert(&vi.eq(r));
}
_ => {} // Sort mismatch or missing skip.
_ => {} // Sort mismatch or missing, skip.
}
}
}
@ -380,7 +380,7 @@ fn seed_from_path_env(solver: &Solver, var_map: &mut VarMap, env: &PathEnv) {
(Some(Z3Var::Str(za)), Some(Z3Var::Str(zb))) => {
solver.assert(&za.ne(zb));
}
_ => {} // Sort mismatch or missing skip.
_ => {} // Sort mismatch or missing, skip.
}
}
@ -402,7 +402,7 @@ fn seed_from_path_env(solver: &Solver, var_map: &mut VarMap, env: &PathEnv) {
/// Translate a single path constraint into a Z3 assertion.
///
/// Skips constraints that cannot be fully translated (unknown sort, sort
/// conflict, etc.). This is sound see module-level docs.
/// conflict, etc.). This is sound, see module-level docs.
fn assert_path_constraint(
solver: &Solver,
var_map: &mut VarMap,
@ -440,7 +440,7 @@ fn assert_path_constraint(
}
}
}
// NullCheck, TypeCheck, Unknown skip (not modeled).
// NullCheck, TypeCheck, Unknown, skip (not modeled).
ConditionExpr::NullCheck { .. }
| ConditionExpr::TypeCheck { .. }
| ConditionExpr::Unknown => {}
@ -450,7 +450,7 @@ fn assert_path_constraint(
/// Infer a sort hint from a constant operand.
///
/// When one side of a comparison is a known constant, it hints the sort of
/// the other side (a `Value`). This is the lowest-priority evidence used
/// the other side (a `Value`). This is the lowest-priority evidence, used
/// only when var_map and PathEnv provide no information.
fn operand_sort_hint(op: &Operand) -> Option<VarSort> {
match op {
@ -487,7 +487,7 @@ fn translate_operand(var_map: &mut VarMap, op: &Operand, env: &PathEnv) -> Optio
if is_known_int(*v, env) {
return force_int_var(var_map, *v).map(Z3Expr::Int);
}
// 3. Unknown sort return None; caller may apply hint.
// 3. Unknown sort, return None; caller may apply hint.
None
}
Operand::Const(ConstValue::Null) | Operand::Unknown => None,
@ -723,7 +723,7 @@ mod tests {
#[test]
fn cross_variable_contradiction() {
// x > y AND y > x → Unsat
// PathEnv cannot detect this it tracks per-variable intervals.
// PathEnv cannot detect this, it tracks per-variable intervals.
let constraints = vec![
comparison_constraint(val(0), CompOp::Gt, val(1), true),
comparison_constraint(val(1), CompOp::Gt, val(0), true),

View file

@ -34,14 +34,14 @@ pub struct SymbolicState {
values: HashMap<SsaValue, SymbolicValue>,
/// Branch constraints collected along the path.
path_constraints: Vec<PathConstraint>,
/// SSA values known to carry taint. Eagerly propagated during transfer
/// SSA values known to carry taint. Eagerly propagated during transfer ,
/// no recursive expression-tree walking needed.
tainted_roots: HashSet<SsaValue>,
/// Field-sensitive symbolic heap.
heap: SymbolicHeap,
/// Exception context for catch-path symbolic execution.
/// When `Some`, the next `CatchParam` instruction consumes this value and
/// marks itself tainted. This is NOT a faithful model of the thrown value
/// marks itself tainted. This is NOT a faithful model of the thrown value ,
/// it is a taint carrier that signals "this CatchParam was reached via an
/// exception edge and should be treated as tainted." The symbolic value is
/// `Unknown` because we do not model the exception object's structure.
@ -143,7 +143,7 @@ impl SymbolicState {
let block_data = &ssa.blocks[block.0 as usize];
for phi in &block_data.phis {
self.values.insert(phi.value, SymbolicValue::Unknown);
// PRESERVE taint do NOT remove from tainted_roots.
// PRESERVE taint, do NOT remove from tainted_roots.
}
// Widen heap: degrade field symbolic precision, preserve taint.
self.heap.widen();
@ -163,7 +163,7 @@ impl SymbolicState {
ConstLattice::Str(s) => {
self.values.insert(v, SymbolicValue::ConcreteStr(s.clone()));
}
_ => {} // Bool, Null, Top, Varying not modeled
_ => {} // Bool, Null, Top, Varying, not modeled
}
}
}
@ -343,6 +343,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let ssa = SsaBody {
blocks: vec![],
@ -382,6 +383,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let ssa = SsaBody {
blocks: vec![],
@ -418,6 +420,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let ssa = SsaBody {
blocks: vec![],

View file

@ -34,9 +34,9 @@ pub enum StringMethod {
/// Where the string operand comes from in the call.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum StringOperandSource {
/// `receiver.method()` JS, Java, Ruby, Rust
/// `receiver.method()`, JS, Java, Ruby, Rust
Receiver,
/// `func(string, ...)` Python `len()`, Go `strings.*`, PHP `strlen()`
/// `func(string, ...)`, Python `len()`, Go `strings.*`, PHP `strlen()`
FirstArg,
}
@ -68,7 +68,7 @@ pub struct SanitizerInfo {
/// - **Representation transforms** (non-protective): witness-only, never
/// used for mismatch reasoning.
///
/// Symex `Encode`/`Decode` nodes preserve taint unconditionally this enum
/// Symex `Encode`/`Decode` nodes preserve taint unconditionally, this enum
/// carries no sanitization authority.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum TransformKind {
@ -79,7 +79,7 @@ pub enum TransformKind {
UrlEncode,
/// Shell quoting: single-quote wrapping with internal quote escaping.
ShellEscape,
/// SQL string escaping: `'` → `''`. Witness-only no label rule yet,
/// SQL string escaping: `'` → `''`. Witness-only, no label rule yet,
/// so `verified_cap()` returns `Cap::empty()`.
SqlEscape,
// ── Representation transforms (non-protective) ───────────────────────
@ -87,7 +87,7 @@ pub enum TransformKind {
Base64Encode,
/// Base64 decoding.
Base64Decode,
/// URL percent-decoding (reverses URL encoding anti-protective).
/// URL percent-decoding (reverses URL encoding, anti-protective).
UrlDecode,
}
@ -119,7 +119,7 @@ impl TransformKind {
TransformKind::HtmlEscape => Cap::HTML_ESCAPE,
TransformKind::UrlEncode => Cap::URL_ENCODE,
TransformKind::ShellEscape => Cap::SHELL_ESCAPE,
// SqlEscape: no verified label rule witness-only
// SqlEscape: no verified label rule, witness-only
TransformKind::SqlEscape => Cap::empty(),
// Representation transforms: not protective
TransformKind::Base64Encode
@ -149,7 +149,7 @@ pub struct TransformMethodInfo {
///
/// Returns `None` for unrecognized methods (fall through to opaque `Call`).
/// For `Replace`, only classifies when pattern and replacement args are concrete
/// strings dynamic patterns produce `None`.
/// strings, dynamic patterns produce `None`.
pub fn classify_string_method(
callee: &str,
args: &[SymbolicValue],
@ -217,7 +217,7 @@ fn classify_python(method: &str, callee: &str, args: &[SymbolicValue]) -> Option
use StringMethod::*;
use StringOperandSource::*;
// Python builtins: len(s) no receiver
// Python builtins: len(s), no receiver
if callee == "len" {
return Some(StringMethodInfo {
method: StrLen,
@ -396,7 +396,7 @@ fn classify_php(method: &str, callee: &str, args: &[SymbolicValue]) -> Option<St
operand_source: FirstArg,
}),
"str_replace" => {
// PHP: str_replace($search, $replace, $subject) string is arg[2]
// PHP: str_replace($search, $replace, $subject), string is arg[2]
// But in our callee model, receiver is not present for free functions.
// args[0] = pattern, args[1] = replacement, args[2] = subject
let (pat, rep) = extract_replace_args(args, 0)?;
@ -488,7 +488,7 @@ fn classify_c(method: &str) -> Option<StringMethodInfo> {
/// Classify a callee as a recognized encoding/decoding transform.
///
/// Returns `None` for unrecognized methods. Rich sanitizers (DOMPurify,
/// bleach, markupsafe, etc.) are intentionally NOT classified here they
/// bleach, markupsafe, etc.) are intentionally NOT classified here, they
/// are complex library-level sanitizers, not simple character-level escapes.
pub fn classify_transform_method(callee: &str, lang: Lang) -> Option<TransformMethodInfo> {
match lang {
@ -605,7 +605,7 @@ fn classify_transform_php(callee: &str) -> Option<TransformMethodInfo> {
kind: ShellEscape,
operand_source: FirstArg,
}),
// SQL escaping (witness-only no verified label rule)
// SQL escaping (witness-only, no verified label rule)
"addslashes" => Some(TransformMethodInfo {
kind: SqlEscape,
operand_source: FirstArg,
@ -624,7 +624,7 @@ fn classify_transform_java(callee: &str) -> Option<TransformMethodInfo> {
// examine the dotted callee for receiver-qualified disambiguation.
let method = bare_method_name(callee);
// URL encoding/decoding `java.net.URLEncoder.encode` / `URLDecoder.decode`.
// URL encoding/decoding, `java.net.URLEncoder.encode` / `URLDecoder.decode`.
if callee.ends_with("URLEncoder.encode") {
return Some(TransformMethodInfo {
kind: UrlEncode,
@ -639,7 +639,7 @@ fn classify_transform_java(callee: &str) -> Option<TransformMethodInfo> {
}
// Apache commons-text / commons-lang `StringEscapeUtils.escapeHtml4`,
// `escapeXml11`, `escapeXml10`. These are character-level entity escapes
// `escapeXml11`, `escapeXml10`. These are character-level entity escapes ,
// NOT rich sanitizers like OWASP ESAPI's `Encoder`.
if callee.ends_with("StringEscapeUtils.escapeHtml4")
|| callee.ends_with("StringEscapeUtils.escapeHtml")
@ -653,7 +653,7 @@ fn classify_transform_java(callee: &str) -> Option<TransformMethodInfo> {
});
}
// Base64 `Base64.getEncoder().encodeToString(bytes)` (and the URL-safe
// Base64, `Base64.getEncoder().encodeToString(bytes)` (and the URL-safe
// / MIME variants). Match by leaf method name; the encoder/decoder chain
// before it is opaque to symex, but the operand is still the first arg.
match method {
@ -661,7 +661,7 @@ fn classify_transform_java(callee: &str) -> Option<TransformMethodInfo> {
kind: Base64Encode,
operand_source: FirstArg,
}),
// `Base64.getDecoder().decode(s)` the leaf `decode` collides with
// `Base64.getDecoder().decode(s)`, the leaf `decode` collides with
// `URLDecoder.decode` (handled above) so this only matches when the
// URLDecoder branch did not.
"decode" if callee.contains("Base64") => Some(TransformMethodInfo {
@ -677,7 +677,7 @@ fn classify_transform_go(callee: &str) -> Option<TransformMethodInfo> {
use TransformKind::*;
match callee {
// URL encoding/decoding `net/url` package.
// URL encoding/decoding, `net/url` package.
"url.QueryEscape" | "url.PathEscape" => Some(TransformMethodInfo {
kind: UrlEncode,
operand_source: FirstArg,
@ -686,7 +686,7 @@ fn classify_transform_go(callee: &str) -> Option<TransformMethodInfo> {
kind: UrlDecode,
operand_source: FirstArg,
}),
// HTML entity escaping `html` package (NOT `template.HTMLEscapeString`,
// HTML entity escaping, `html` package (NOT `template.HTMLEscapeString`,
// which is a context-aware sanitizer). `html.UnescapeString` is
// intentionally NOT classified: TransformKind has no `HtmlUnescape`
// variant, and reusing UrlDecode would label the witness wrongly.
@ -694,7 +694,7 @@ fn classify_transform_go(callee: &str) -> Option<TransformMethodInfo> {
kind: HtmlEscape,
operand_source: FirstArg,
}),
// Base64 `encoding/base64` package, `StdEncoding`/`URLEncoding`/
// Base64, `encoding/base64` package, `StdEncoding`/`URLEncoding`/
// `RawStdEncoding`/`RawURLEncoding` all expose `EncodeToString`.
"base64.StdEncoding.EncodeToString"
| "base64.URLEncoding.EncodeToString"
@ -723,7 +723,7 @@ fn classify_transform_ruby(callee: &str) -> Option<TransformMethodInfo> {
let normalised = callee.replace("::", ".");
match normalised.as_str() {
// URL percent-encoding. Note: `CGI.escape` in Ruby is percent-encoding
// (NOT HTML escape that's `CGI.escapeHTML`).
// (NOT HTML escape, that's `CGI.escapeHTML`).
"CGI.escape" | "URI.encode_www_form_component" => Some(TransformMethodInfo {
kind: UrlEncode,
operand_source: FirstArg,
@ -732,7 +732,7 @@ fn classify_transform_ruby(callee: &str) -> Option<TransformMethodInfo> {
kind: UrlDecode,
operand_source: FirstArg,
}),
// HTML entity escaping (character-level NOT Rails `sanitize` or
// HTML entity escaping (character-level, NOT Rails `sanitize` or
// `strip_tags` which are rich sanitizers).
"ERB::Util.html_escape" | "ERB.Util.html_escape" | "CGI.escapeHTML" => {
Some(TransformMethodInfo {
@ -740,7 +740,7 @@ fn classify_transform_ruby(callee: &str) -> Option<TransformMethodInfo> {
operand_source: FirstArg,
})
}
// Base64 `Base64.strict_encode64` / `encode64` / `urlsafe_encode64`.
// Base64, `Base64.strict_encode64` / `encode64` / `urlsafe_encode64`.
"Base64.strict_encode64" | "Base64.encode64" | "Base64.urlsafe_encode64" => {
Some(TransformMethodInfo {
kind: Base64Encode,
@ -763,7 +763,7 @@ fn classify_transform_ruby(callee: &str) -> Option<TransformMethodInfo> {
/// Apply encoding for witness rendering.
///
/// **NOT a spec-complete codec.** These are witness-quality helpers only
/// **NOT a spec-complete codec.** These are witness-quality helpers only ,
/// not suitable for security decisions, not reusable outside witness display.
pub fn encode_concrete_for_witness(kind: TransformKind, input: &str) -> Option<String> {
match kind {
@ -980,7 +980,7 @@ pub fn evaluate_string_op_concrete(method: &StringMethod, receiver: &str) -> Opt
)),
StringMethod::StrLen => Some(SymbolicValue::Concrete(receiver.len() as i64)),
StringMethod::Substr => {
// Substr needs index args concrete evaluation handled in smart constructor
// Substr needs index args, concrete evaluation handled in smart constructor
None
}
}
@ -993,7 +993,7 @@ pub fn evaluate_string_op_concrete(method: &StringMethod, receiver: &str) -> Opt
/// Detect whether a Replace operation acts as a security sanitizer.
///
/// Returns `None` if the pattern is not security-relevant. This is conservative:
/// the symbolic string theory does NOT clear taint via Replace detection is
/// the symbolic string theory does NOT clear taint via Replace, detection is
/// informational only for witness quality.
pub fn detect_replace_sanitizer(
pattern: &str,
@ -1271,7 +1271,7 @@ mod tests {
#[test]
fn test_evaluate_substr_returns_none() {
// Substr needs index args concrete eval handled in smart constructor
// Substr needs index args, concrete eval handled in smart constructor
let result = evaluate_string_op_concrete(&StringMethod::Substr, "hello");
assert_eq!(result, None);
}
@ -1363,7 +1363,7 @@ mod tests {
#[test]
fn test_classify_transform_js_rich_sanitizer_not_matched() {
// DOMPurify.sanitize is a rich sanitizer NOT a simple escape
// DOMPurify.sanitize is a rich sanitizer, NOT a simple escape
assert!(classify_transform_method("DOMPurify.sanitize", Lang::JavaScript).is_none());
assert!(classify_transform_method("sanitizeHtml", Lang::JavaScript).is_none());
assert!(classify_transform_method("xss", Lang::JavaScript).is_none());
@ -1499,7 +1499,7 @@ mod tests {
#[test]
fn test_classify_transform_ruby_cgi_escape() {
let info = classify_transform_method("CGI.escape", Lang::Ruby).unwrap();
// CGI.escape is percent-encoding in Ruby (not HTML escape that's
// CGI.escape is percent-encoding in Ruby (not HTML escape, that's
// CGI.escapeHTML).
assert_eq!(info.kind, TransformKind::UrlEncode);
let info = classify_transform_method("CGI::escape", Lang::Ruby).unwrap();
@ -1532,7 +1532,7 @@ mod tests {
#[test]
fn test_classify_transform_ruby_rich_sanitizer_not_matched() {
// Rails `sanitize` / `strip_tags` are rich library sanitizers NOT
// Rails `sanitize` / `strip_tags` are rich library sanitizers, NOT
// simple character-level escapes.
assert!(classify_transform_method("sanitize", Lang::Ruby).is_none());
assert!(classify_transform_method("strip_tags", Lang::Ruby).is_none());
@ -1642,7 +1642,7 @@ mod tests {
#[test]
fn test_verified_cap_sql_escape_is_empty() {
// SqlEscape has no verified label rule witness-only
// SqlEscape has no verified label rule, witness-only
assert_eq!(TransformKind::SqlEscape.verified_cap(), Cap::empty());
assert!(!TransformKind::SqlEscape.is_protective());
}

View file

@ -102,18 +102,18 @@ pub fn transfer_inst(
}
SsaOp::SelfParam => {
// Implicit method receiver symbolic input, not tainted by default.
// Implicit method receiver, symbolic input, not tainted by default.
state.set(inst.value, SymbolicValue::Symbol(inst.value));
}
SsaOp::CatchParam => {
if let Some(exc_val) = state.take_exception_context() {
// On an exception path seed from exception context
// On an exception path, seed from exception context
// and mark tainted (matches taint engine: CatchParam gets Cap::all())
state.set(inst.value, exc_val);
state.mark_tainted(inst.value);
} else {
// Normal path or no explicit exception context still mark tainted
// Normal path or no explicit exception context, still mark tainted
// to match taint engine behavior (ssa_transfer.rs CatchParam gets Cap::all())
state.set(inst.value, SymbolicValue::Symbol(inst.value));
state.mark_tainted(inst.value);
@ -121,7 +121,7 @@ pub fn transfer_inst(
}
SsaOp::Nop => {
// Nop does not define a meaningful value skip.
// Nop does not define a meaningful value, skip.
}
SsaOp::Undef => {
@ -136,10 +136,10 @@ pub fn transfer_inst(
// receiver's taint to the result so flat root-set tracking
// continues to flow taint through chained accesses.
//
// Phase 4 deliberately keeps the opaque-Symbol model: without
// This pass deliberately keeps the opaque-Symbol model: without
// a field-sensitive heap, a dedicated `Field { receiver, name }`
// SymbolicValue variant cannot soundly carry concrete reads
// across method boundaries the witness pipeline already
// across method boundaries, the witness pipeline already
// reconstructs `obj.field` text from `ValueDef.var_name`
// (populated by lower.rs to `"base.f1.f2"` for chain projections).
// The structured variant is deferred to the field-sensitive
@ -166,7 +166,7 @@ pub fn transfer_inst(
// When RHS is a member expression, SSA produces 2 uses:
// uses[0] = dotted-path SSA value (e.g., v for "user.name")
// uses[1] = base variable SSA value (e.g., v for "user")
// The first operand IS the field value use it directly.
// The first operand IS the field value, use it directly.
if let Some(def) = ssa.value_defs.get(uses_slice[0].0 as usize) {
if def.var_name.as_ref().is_some_and(|n| n.contains('.')) {
let sym = state.get(uses_slice[0]);
@ -200,13 +200,13 @@ pub fn transfer_inst(
let sym = mk_binop(Op::from(bin_op), lhs, rhs);
state.set(inst.value, sym);
} else {
// No structural info conservative Unknown
// No structural info, conservative Unknown
state.set(inst.value, SymbolicValue::Unknown);
}
state.propagate_taint(inst.value, uses_slice);
}
_ => {
// 3+ operands complex expression
// 3+ operands, complex expression
state.set(inst.value, SymbolicValue::Unknown);
state.propagate_taint(inst.value, uses_slice);
}
@ -306,7 +306,7 @@ pub fn transfer_inst(
// Fall through to normal Call
}
ContainerOp::Writeback { .. } => {
// Symex doesn't model writeback yet taint
// Symex doesn't model writeback yet, taint
// engine handles the destination-arg taint
// directly. Fall through to normal Call.
}
@ -338,7 +338,7 @@ pub fn transfer_inst(
}
// Interprocedural symbolic execution.
// Execute callee body when available full state propagation.
// Execute callee body when available, full state propagation.
if let Some(ictx) = interproc_ctx {
let mut callee_args: Vec<(crate::ssa::ir::SsaValue, SymbolicValue, bool)> =
Vec::new();
@ -550,7 +550,7 @@ fn try_heap_alias_load(
/// Transfer a single SSA instruction with optional predecessor context.
///
/// ONLY phi instructions use predecessor-sensitive selection when
/// ONLY phi instructions use predecessor-sensitive selection, when
/// `predecessor` is `Some(bid)`, the phi resolves to the operand from
/// that specific predecessor block instead of building a `Phi(...)`
/// expression. All non-phi instructions delegate to [`transfer_inst`].
@ -579,7 +579,7 @@ pub fn transfer_inst_with_predecessor(
return;
}
}
// Predecessor not found among operands propagate from all (fallback)
// Predecessor not found among operands, propagate from all (fallback)
let operand_vals: Vec<_> = operands.iter().map(|(_, v)| *v).collect();
state.propagate_taint(inst.value, &operand_vals);
}
@ -715,7 +715,7 @@ fn try_string_method(
// If receiver was prepended to arg_syms, it's at index 0;
// otherwise first explicit arg is at index 0.
if let Some(recv) = receiver {
// Receiver was prepended it IS the string operand
// Receiver was prepended, it IS the string operand
(state.get(*recv), *recv)
} else if let Some(&first_op) = all_operands.first() {
(
@ -764,7 +764,7 @@ fn try_string_method(
/// Recognize encoding/decoding transforms and build structured
/// `Encode`/`Decode` nodes instead of opaque `Call`.
///
/// Taint is always propagated from the operand encoding preserves taint
/// Taint is always propagated from the operand, encoding preserves taint
/// unconditionally. This function does NOT sanitize.
fn try_transform_method(
state: &SymbolicState,
@ -902,7 +902,7 @@ fn model_from_summary(
///
/// When a receiver has a known type via type facts, tries type-qualified
/// callee name (e.g., `"HttpClient.send"`) before bare-name resolution. This
/// improves summary-based modeling only not general virtual dispatch.
/// improves summary-based modeling only, not general virtual dispatch.
fn resolve_callee_symbolically(
ctx: &SymexSummaryCtx,
callee: &str,
@ -913,7 +913,7 @@ fn resolve_callee_symbolically(
receiver: Option<SsaValue>,
) -> Option<SymbolicCallResult> {
// Type-qualified symbolic resolution when receiver has a known type.
// Improves summary-based modeling only not general virtual dispatch.
// Improves summary-based modeling only, not general virtual dispatch.
// Precedence: exact qualified > type-aided disambiguation > bare-name fallback.
if let (Some(tf), Some(recv)) = (ctx.type_facts, receiver)
&& let Some(receiver_type) = tf.get_type(recv)
@ -935,7 +935,7 @@ fn resolve_callee_symbolically(
// Attempt 2: Disambiguate among ambiguous bare-name candidates.
// Only select when a candidate's FuncKey.name EXACTLY equals the
// qualified name no substring matching, never guess.
// qualified name, no substring matching, never guess.
let bare_resolution =
ctx.global_summaries
.resolve_callee_key(method, ctx.lang, ctx.namespace, None);
@ -1632,7 +1632,7 @@ mod tests {
state.mark_tainted(SsaValue(0));
state.set(SsaValue(1), SymbolicValue::Concrete(42));
// Two Identity entries should fall back to mk_call, NOT pick one
// Two Identity entries, should fall back to mk_call, NOT pick one
let mut gs = GlobalSummaries::new();
insert_summary(
&mut gs,
@ -2131,7 +2131,7 @@ mod tests {
},
);
// Empty type facts no receiver type info
// Empty type facts, no receiver type info
let tf = make_type_facts(vec![]);
let ctx = SymexSummaryCtx {
global_summaries: &gs,
@ -2170,7 +2170,7 @@ mod tests {
#[test]
fn transfer_call_type_qualified_disambiguation() {
// Two summaries both named "send" in different namespaces.
// One named "HttpClient.send" type disambiguation picks it.
// One named "HttpClient.send", type disambiguation picks it.
let (cfg, node) = cfg_with_node(None);
let ssa = empty_ssa();
let mut state = SymbolicState::new();
@ -2180,7 +2180,7 @@ mod tests {
state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1)));
let mut gs = GlobalSummaries::new();
// First "send" generic, in ns A (Identity: passes through)
// First "send", generic, in ns A (Identity: passes through)
insert_java_summary(
&mut gs,
"send",
@ -2209,7 +2209,7 @@ mod tests {
typed_call_receivers: vec![],
},
);
// Second "send" in ns B, also with same arity → ambiguous bare-name
// Second "send", in ns B, also with same arity → ambiguous bare-name
insert_java_summary(
&mut gs,
"send",
@ -2247,7 +2247,7 @@ mod tests {
SsaFuncSummary {
param_to_return: vec![],
param_to_sink: vec![],
source_caps: Cap::ENV_VAR, // Source distinct signal
source_caps: Cap::ENV_VAR, // Source, distinct signal
param_to_sink_param: vec![],
param_container_to_return: vec![],
param_to_container_store: vec![],
@ -2276,7 +2276,7 @@ mod tests {
type_facts: Some(&tf),
};
// v2 = v1.send(v0) receiver v1 is HttpClient
// v2 = v1.send(v0), receiver v1 is HttpClient
let inst = make_inst(
2,
SsaOp::Call {
@ -2316,7 +2316,7 @@ mod tests {
state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1)));
let mut gs = GlobalSummaries::new();
// Summary under "DatabaseConnection.send" wrong type
// Summary under "DatabaseConnection.send", wrong type
insert_java_summary(
&mut gs,
"DatabaseConnection.send",
@ -2346,7 +2346,7 @@ mod tests {
},
);
// Receiver typed as HttpClient constructs "HttpClient.send", not "DatabaseConnection.send"
// Receiver typed as HttpClient, constructs "HttpClient.send", not "DatabaseConnection.send"
let tf = make_type_facts(vec![(SsaValue(1), TypeKind::HttpClient)]);
let ctx = SymexSummaryCtx {
global_summaries: &gs,
@ -2396,7 +2396,7 @@ mod tests {
state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1)));
let mut gs = GlobalSummaries::new();
// Two "send" summaries different namespaces → ambiguous
// Two "send" summaries, different namespaces → ambiguous
insert_java_summary(
&mut gs,
"send",
@ -2453,7 +2453,7 @@ mod tests {
typed_call_receivers: vec![],
},
);
// No "HttpClient.send" summary registered disambiguation has 0 exact matches
// No "HttpClient.send" summary registered, disambiguation has 0 exact matches
let tf = make_type_facts(vec![(SsaValue(1), TypeKind::HttpClient)]);
let ctx = SymexSummaryCtx {

View file

@ -5,6 +5,7 @@ use std::fmt;
use crate::cfg;
use crate::ssa::ir::{BlockId, SsaValue};
use crate::utils::snippet::truncate_at_char_boundary;
/// Maximum expression tree depth before collapsing to `Unknown`.
pub const MAX_EXPR_DEPTH: u32 = 32;
@ -120,7 +121,7 @@ pub enum SymbolicValue {
StrLen(Box<SymbolicValue>),
// ── Encoding/decoding transforms ───────────────────────────
/// Protective or representation transform applied to inner value.
/// Preserves taint unconditionally does NOT sanitize in symex.
/// Preserves taint unconditionally, does NOT sanitize in symex.
Encode(super::strings::TransformKind, Box<SymbolicValue>),
/// Decoding/reverse transform applied to inner value.
Decode(super::strings::TransformKind, Box<SymbolicValue>),
@ -189,7 +190,7 @@ impl SymbolicValue {
}
// ─────────────────────────────────────────────────────────────────────────────
// Smart constructors all tree-building goes through these
// Smart constructors, all tree-building goes through these
// ─────────────────────────────────────────────────────────────────────────────
/// Build a binary arithmetic expression with concrete folding and depth bounding.
@ -218,11 +219,11 @@ pub fn mk_binop(op: Op, lhs: SymbolicValue, rhs: SymbolicValue) -> SymbolicValue
a.checked_rem(*b)
}
}
// Bitwise &, |, ^ cannot overflow on i64
// Bitwise, &, |, ^ cannot overflow on i64
Op::BitAnd => Some(*a & *b),
Op::BitOr => Some(*a | *b),
Op::BitXor => Some(*a ^ *b),
// Shifts bounds-checked to 0..=63 (i64 width)
// Shifts, bounds-checked to 0..=63 (i64 width)
Op::LeftShift => {
if *b < 0 || *b > 63 {
None
@ -237,7 +238,7 @@ pub fn mk_binop(op: Op, lhs: SymbolicValue, rhs: SymbolicValue) -> SymbolicValue
a.checked_shr(*b as u32)
}
}
// Comparisons produce 1 (true) or 0 (false)
// Comparisons, produce 1 (true) or 0 (false)
Op::Eq => Some(if *a == *b { 1 } else { 0 }),
Op::NotEq => Some(if *a != *b { 1 } else { 0 }),
Op::Lt => Some(if *a < *b { 1 } else { 0 }),
@ -397,7 +398,7 @@ pub fn mk_substr(
let result = cs.get(i..).unwrap_or("");
return SymbolicValue::ConcreteStr(result.to_owned());
}
_ => {} // end is Some but not concrete can't fold
_ => {} // end is Some but not concrete, can't fold
}
}
}
@ -458,7 +459,7 @@ pub fn mk_decode(kind: super::strings::TransformKind, s: SymbolicValue) -> Symbo
}
// ─────────────────────────────────────────────────────────────────────────────
// Display human-readable witness strings
// Display, human-readable witness strings
// ─────────────────────────────────────────────────────────────────────────────
/// Maximum length for the Display output before truncation.
@ -468,10 +469,12 @@ const MAX_STR_DISPLAY_LEN: usize = 64;
impl fmt::Display for SymbolicValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Use an internal formatter, then truncate if needed.
// Use an internal formatter, then truncate if needed. UTF-8-safe
// truncation, `ConcreteStr` may carry localised text from source
// (e.g. Cyrillic / Gurmukhi regex literals).
let s = display_inner(self);
if s.len() > MAX_DISPLAY_LEN {
write!(f, "{}...", &s[..MAX_DISPLAY_LEN])
write!(f, "{}...", truncate_at_char_boundary(&s, MAX_DISPLAY_LEN))
} else {
write!(f, "{}", s)
}
@ -483,7 +486,10 @@ fn display_inner(val: &SymbolicValue) -> String {
SymbolicValue::Concrete(n) => format!("{}", n),
SymbolicValue::ConcreteStr(s) => {
if s.len() > MAX_STR_DISPLAY_LEN {
format!("\"{}...\"", &s[..MAX_STR_DISPLAY_LEN])
format!(
"\"{}...\"",
truncate_at_char_boundary(s, MAX_STR_DISPLAY_LEN)
)
} else {
format!("\"{}\"", s)
}
@ -675,7 +681,7 @@ mod tests {
#[test]
fn depth_bounding() {
// Build a chain of depth 33 should collapse to Unknown
// Build a chain of depth 33, should collapse to Unknown
let mut val = SymbolicValue::Symbol(SsaValue(0));
for _ in 0..MAX_EXPR_DEPTH {
val = mk_binop(Op::Add, val, SymbolicValue::Concrete(1));
@ -702,7 +708,7 @@ mod tests {
#[test]
fn concat_no_int_coercion() {
// ConcreteStr + Concrete(int) should NOT fold no type coercion
// ConcreteStr + Concrete(int) should NOT fold, no type coercion
let result = mk_concat(
SymbolicValue::ConcreteStr("val=".into()),
SymbolicValue::Concrete(42),
@ -980,7 +986,7 @@ mod tests {
#[test]
fn left_shift_amount_63() {
// Max valid shift should not panic
// Max valid shift, should not panic
let result = mk_binop(Op::LeftShift, c(1), c(63));
assert_eq!(result, c(1i64 << 63));
}
@ -1286,7 +1292,7 @@ mod tests {
/// `mk_phi` must not fold when operands have differing types
/// (e.g. one branch returns a Concrete int, another returns
/// ConcreteStr). The result is genuinely uncertain a Phi node
/// ConcreteStr). The result is genuinely uncertain, a Phi node
/// must be preserved to expose the type-conflict to downstream
/// witness logic, not collapse to one operand.
#[test]

View file

@ -1,7 +1,7 @@
//! Witness generation for confirmed symbolic findings.
//!
//! When the multi-path explorer confirms a finding as feasible, this module
//! generates a concrete proof witness an actual input value that would
//! generates a concrete proof witness, an actual input value that would
//! trigger the vulnerability. Witnesses are best-effort: if the expression
//! is not string-renderable or constraints are too complex, a generic
//! description is produced instead.
@ -44,7 +44,7 @@ pub fn extract_witness(
}
// 1b. When the sink is a Call node, the return value is typically opaque.
// Look for the best tainted argument instead that's where injected
// Look for the best tainted argument instead, that's where injected
// data actually flows into the sink.
let sym = unwrap_sink_call_arg(&sym, state);
@ -85,7 +85,7 @@ pub fn extract_witness(
// 6. Branch on string-renderability
if tainted.is_empty() {
// No tainted symbols expression is fully concrete or opaque
// No tainted symbols, expression is fully concrete or opaque
let concrete = evaluate_concrete(&sym);
Some(format!(
"input '{}' flows to {}(\"{}\")",
@ -125,7 +125,7 @@ pub fn extract_witness(
/// When the sink expression is a `Call`, find the most informative tainted
/// argument to use for witness generation instead of the opaque return value.
///
/// Scores each tainted arg by structural richness args containing protective
/// Scores each tainted arg by structural richness, args containing protective
/// transforms (`Encode`/`Decode`), string composition (`Concat`/`BinOp(Add)`),
/// or string methods (`Replace`/`Substr`/etc.) outrank bare `Call(...)`
/// wrappers (which typically come from prepended receivers or opaque property
@ -242,7 +242,7 @@ fn is_string_renderable(expr: &SymbolicValue) -> bool {
SymbolicValue::Substr(s, _, _) => is_string_renderable(s),
// Encoding/decoding transforms produce strings
SymbolicValue::Encode(_, s) | SymbolicValue::Decode(_, s) => is_string_renderable(s),
// StrLen returns integer not string-renderable
// StrLen returns integer, not string-renderable
SymbolicValue::StrLen(_) => false,
// BinOp(Add) on string-renderable operands is string concatenation
// in languages where + is overloaded (JS, Python, etc.)
@ -253,7 +253,7 @@ fn is_string_renderable(expr: &SymbolicValue) -> bool {
// pass-through for witness purposes (covers property access, simple
// wrappers). Multi-arg calls or calls with non-renderable args are opaque.
SymbolicValue::Call(_, args) if args.len() == 1 => is_string_renderable(&args[0]),
// Other arithmetic, opaque calls, phis, integers, unknown not string-renderable
// Other arithmetic, opaque calls, phis, integers, unknown, not string-renderable
SymbolicValue::Concrete(_)
| SymbolicValue::BinOp(_, _, _)
| SymbolicValue::Call(_, _)
@ -290,7 +290,7 @@ fn collect_tainted_inner(expr: &SymbolicValue, state: &SymbolicState, out: &mut
collect_tainted_inner(v, state, out);
}
}
// String operations recurse into operands
// String operations, recurse into operands
SymbolicValue::ToLower(s)
| SymbolicValue::ToUpper(s)
| SymbolicValue::Trim(s)
@ -352,7 +352,7 @@ fn substitute_tainted(
.collect();
SymbolicValue::Phi(new_ops)
}
// String operations recurse into operands
// String operations, recurse into operands
SymbolicValue::Trim(s) => {
SymbolicValue::Trim(Box::new(substitute_tainted(s, tainted, payload)))
}
@ -376,14 +376,14 @@ fn substitute_tainted(
end.as_ref()
.map(|e| Box::new(substitute_tainted(e, tainted, payload))),
),
// Encoding/decoding transforms preserve structure
// Encoding/decoding transforms, preserve structure
SymbolicValue::Encode(kind, s) => {
SymbolicValue::Encode(*kind, Box::new(substitute_tainted(s, tainted, payload)))
}
SymbolicValue::Decode(kind, s) => {
SymbolicValue::Decode(*kind, Box::new(substitute_tainted(s, tainted, payload)))
}
// Leaf nodes that are not tainted symbols return unchanged
// Leaf nodes that are not tainted symbols, return unchanged
other => other.clone(),
}
}
@ -407,7 +407,7 @@ fn evaluate_concrete(expr: &SymbolicValue) -> String {
let right = evaluate_concrete(r);
format!("{}{}", left, right)
}
// String operations apply to recursively evaluated inner
// String operations, apply to recursively evaluated inner
SymbolicValue::Trim(s) => evaluate_concrete(s).trim().to_owned(),
SymbolicValue::ToLower(s) => evaluate_concrete(s).to_lowercase(),
SymbolicValue::ToUpper(s) => evaluate_concrete(s).to_uppercase(),
@ -439,7 +439,7 @@ fn evaluate_concrete(expr: &SymbolicValue) -> String {
format!("{}", expr)
}
}
// Encoding/decoding apply transform to recursively evaluated inner
// Encoding/decoding, apply transform to recursively evaluated inner
SymbolicValue::Encode(kind, s) => {
let inner = evaluate_concrete(s);
super::strings::encode_concrete_for_witness(*kind, &inner)
@ -465,7 +465,7 @@ fn evaluate_concrete(expr: &SymbolicValue) -> String {
/// the sink's vulnerability class?
///
/// Returns a human-readable note if a transform's `verified_cap()` is
/// non-empty AND does NOT intersect the sink's cap indicating the
/// non-empty AND does NOT intersect the sink's cap, indicating the
/// transform does not match the sink's neutralization class.
///
/// This is a **heuristic witness annotation**, not a proof. Representation
@ -485,7 +485,7 @@ fn detect_transform_mismatch(expr: &SymbolicValue, sink_cap: Cap) -> Option<Stri
cap_description(sink_cap),
))
} else {
// Correct transform or non-protective recurse
// Correct transform or non-protective, recurse
detect_transform_mismatch(inner, sink_cap)
}
}
@ -579,6 +579,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
assert_eq!(sink_cap(&finding, &cfg), Cap::SQL_QUERY);
}
@ -613,6 +614,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let cap = sink_cap(&finding, &cfg);
assert!(cap.contains(Cap::SQL_QUERY));
@ -806,6 +808,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let witness = extract_witness(&state, &finding, &ssa, &cfg);
@ -849,6 +852,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
assert!(extract_witness(&state, &finding, &ssa, &cfg).is_none());
@ -862,7 +866,7 @@ mod tests {
let sink_val = SsaValue(10);
let tainted_val = SsaValue(5);
// BinOp(Add, tainted, 5) not string-renderable
// BinOp(Add, tainted, 5), not string-renderable
state.set(
sink_val,
SymbolicValue::BinOp(
@ -913,6 +917,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let witness = extract_witness(&state, &finding, &ssa, &cfg);
@ -931,7 +936,7 @@ mod tests {
let mut state = SymbolicState::new();
let sink_val = SsaValue(10);
// Fully concrete no taint
// Fully concrete, no taint
state.set(sink_val, SymbolicValue::ConcreteStr("SELECT 1".into()));
let mut cfg = Cfg::new();
@ -974,6 +979,7 @@ mod tests {
path_hash: 0,
finding_id: String::new(),
alternative_finding_ids: smallvec::SmallVec::new(),
effective_sink_caps: crate::labels::Cap::empty(),
};
let witness = extract_witness(&state, &finding, &ssa, &cfg);
@ -1007,7 +1013,7 @@ mod tests {
Box::new(SymbolicValue::Concrete(0)),
Some(Box::new(SymbolicValue::Concrete(5))),
)));
// StrLen returns int NOT string-renderable
// StrLen returns int, NOT string-renderable
assert!(!is_string_renderable(&SymbolicValue::StrLen(Box::new(
SymbolicValue::Symbol(SsaValue(0))
))));
@ -1160,7 +1166,7 @@ mod tests {
TransformKind::HtmlEscape,
Box::new(SymbolicValue::Symbol(SsaValue(0))),
);
// HTML escape at HTML_ESCAPE sink correct match
// HTML escape at HTML_ESCAPE sink, correct match
assert!(detect_transform_mismatch(&expr, Cap::HTML_ESCAPE).is_none());
}
@ -1198,7 +1204,7 @@ mod tests {
Box::new(SymbolicValue::ConcreteStr("prefix".into())),
Box::new(encoded),
);
// ShellEscape at SQL sink mismatch
// ShellEscape at SQL sink, mismatch
let result = detect_transform_mismatch(&expr, Cap::SQL_QUERY);
assert!(result.is_some());
assert!(result.unwrap().contains("shellEscape"));