Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
2026-06-30 20:39:39 +02:00 · 2026-04-29 19:53:34 -04:00 · 2026-04-29 19:53:34 -04:00 · a438886217
commit a438886217
parent 4db0805de6
291 changed files with 9485 additions and 3851 deletions
--- a/src/taint/path_state.rs
+++ b/src/taint/path_state.rs
@ -35,13 +35,13 @@ pub enum PredicateKind {
    /// Commonly paired with [`ShellMetaValidated`] in OR-chain rejection
    /// idioms (`if x.len() > MAX || x.contains(";") { reject }`).  Counts as
    /// a dominator guard for `cfg-unguarded-sink` purposes, but intentionally
-    /// does **not** mark variables as validated — the rejection direction is
+    /// does **not** mark variables as validated, the rejection direction is
    /// ambiguous from the condition alone (a `.len() > 5 { sink(x) }`
    /// gate is a precondition, not a rejection).
    BoundedLength,
    /// Comparison operators: `x == 5`, `x > threshold`
    Comparison,
-    /// Generic boolean test — cannot classify further.
+    /// Generic boolean test, cannot classify further.
    Unknown,
 }

@ -50,7 +50,7 @@ pub enum PredicateKind {
 ///
 /// Presence of any of these in user input is sufficient to enable shell
 /// injection, so rejecting input that contains them is a real sanitizer.
-/// `"foo"` or other non-metachar needles don't qualify — a rejection of
+/// `"foo"` or other non-metachar needles don't qualify, a rejection of
 /// those is business logic, not security.
 const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r", "\0"];

@ -65,7 +65,7 @@ const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r"
 ///   character class containing only metacharacters.
 ///
 /// Returns `false` if the needle is a non-metachar literal or cannot be
-/// extracted — falls through to broader classification.
+/// extracted, falls through to broader classification.
 fn is_shell_metachar_rejection(text: &str) -> bool {
    // Method-call form: `.contains(…)` / `.includes(…)` / `.include?(…)`
    for method in [".contains(", ".includes(", ".include?("] {
@ -134,7 +134,7 @@ fn extract_first_string_arg(after_open: &str) -> Option<String> {
 }

 /// For Python `"<METACHAR>" in x` (needle on the left side of ` in `), return
-/// the needle.  Returns `None` for `x in ALLOWED` (identifier on the left) —
+/// the needle.  Returns `None` for `x in ALLOWED` (identifier on the left) ,
 /// that is an allowlist check, not a rejection.
 fn extract_python_in_needle(text: &str) -> Option<String> {
    let pos = text.find(" in ")?;
@ -155,7 +155,7 @@ fn extract_python_in_needle(text: &str) -> Option<String> {

 /// Detect regex character classes that contain only shell metacharacters:
 /// `[;|&]`, `[;&`$]`, etc.  Missing: escape-class metacharacters inside the
-/// class (e.g. `[\n]`) — conservative, returns false there.
+/// class (e.g. `[\n]`), conservative, returns false there.
 fn is_metachar_regex_class(text: &str) -> bool {
    // Find `[` followed by content and `]`, anywhere in the text.
    let mut rest = text;
@ -180,7 +180,7 @@ fn is_metachar_regex_class(text: &str) -> bool {

 /// Check whether `text` looks like a bounded-length rejection:
 /// `x.len() > N`, `x.len() < N`, `x.length >= N`, etc. where `N` is an
-/// integer literal >= 2.  Excludes `> 0` / `>= 1` / `< 1` — those are
+/// integer literal >= 2.  Excludes `> 0` / `>= 1` / `< 1`, those are
 /// non-empty checks, which are not length-bound validations.
 fn is_bounded_length_check(lower: &str) -> bool {
    const PROBES: &[&str] = &[
@ -290,7 +290,7 @@ pub fn classify_condition(text: &str) -> PredicateKind {
    // Matched BEFORE AllowlistCheck so that `x.contains(";")` is recognized
    // as a rejection idiom rather than a membership test.  Checked on the
    // raw (non-lowercased) text so metacharacter comparisons stay
-    // case-accurate — `;` / `|` / `&` have no case.
+    // case-accurate, `;` / `|` / `&` have no case.
    if is_shell_metachar_rejection(text) {
        return PredicateKind::ShellMetaValidated;
    }
@ -409,7 +409,7 @@ pub fn classify_condition(text: &str) -> PredicateKind {
 /// validator's effect is opaque: we can't tell which argument is being
 /// checked. Returning the original kind with `None` target would cause
 /// upstream code to over-validate (mark every `condition_var` as validated).
-/// Instead, we fall back to `PredicateKind::Unknown` — safer to assume the
+/// Instead, we fall back to `PredicateKind::Unknown`, safer to assume the
 /// validator did nothing than to assume it validated every variable in the
 /// condition. Single-argument calls retain `(kind, None)` so downstream code
 /// can still use the predicate-summary bit tracking.
@ -442,7 +442,7 @@ pub fn classify_condition_with_target(text: &str) -> (PredicateKind, Option<Stri
            (kind, target)
        }
        PredicateKind::Comparison => {
-            // `x === '/login'`, `x == 5`, `null != obj` — when exactly one
+            // `x === '/login'`, `x == 5`, `null != obj`, when exactly one
            // side is a literal, extract the identifier side as the target.
            // Downstream `apply_branch_predicates` uses this to mark the
            // variable as `validated_may` on the true (equal) branch.
@ -464,7 +464,7 @@ pub fn classify_condition_with_target(text: &str) -> (PredicateKind, Option<Stri
 /// - `'a' == 'b'` → `None` (both sides are literals)
 /// - `obj.field == 3` → `None` (not a bare identifier)
 ///
-/// Best-effort text analysis — kept conservative to avoid false validation.
+/// Best-effort text analysis, kept conservative to avoid false validation.
 fn extract_comparison_target(text: &str) -> Option<String> {
    let trimmed = text.trim();

@ -537,7 +537,7 @@ fn is_comparison_literal(s: &str) -> bool {
 /// `Some(0)` for a call with empty argument list. Respects paren/bracket/brace
 /// nesting so `f(g(a, b), c)` counts as 2 top-level args.
 ///
-/// Best-effort — operates on source text, not an AST. Used by
+/// Best-effort, operates on source text, not an AST. Used by
 /// `classify_condition_with_target` to distinguish single-arg vs multi-arg
 /// validator calls when target extraction fails.
 fn count_call_args(text: &str) -> Option<usize> {
@ -592,7 +592,7 @@ fn extract_validation_target(text: &str) -> Option<String> {
        }
    }

-    // Function call pattern: `func(x, ...)` — extract first argument
+    // Function call pattern: `func(x, ...)`, extract first argument
    // Strip closing paren if present
    let args_inner = args_part.trim_end().strip_suffix(')').unwrap_or(args_part);
    // Take text up to first comma (first argument)
@ -653,7 +653,7 @@ fn extract_allowlist_target(text: &str) -> Option<String> {

    // Python `in` operator: `cmd in ALLOWED` / `cmd not in ALLOWED`
    if lower.contains(" in ") {
-        // Find the leftmost ` in ` — everything before it is the target expression
+        // Find the leftmost ` in `, everything before it is the target expression
        // Handle `not in` by looking for ` not in ` first
        let target_part = if let Some(pos) = lower.find(" not in ") {
            &trimmed[..pos]
@ -857,7 +857,7 @@ mod tests {

    #[test]
    fn classify_validation_requires_paren() {
-        // `x_valid == true` should NOT be ValidationCall — no `(` call syntax.
+        // `x_valid == true` should NOT be ValidationCall, no `(` call syntax.
        assert_eq!(
            classify_condition("x_valid == true"),
            PredicateKind::Comparison
@ -978,7 +978,7 @@ mod tests {

    #[test]
    fn target_multi_arg_fallback_opaque_expr_is_unknown() {
-        // `validate(x + 1, y)` — first arg is an expression, not an identifier.
+        // `validate(x + 1, y)`, first arg is an expression, not an identifier.
        // Target extraction fails. Multi-arg call, so fall back to Unknown
        // rather than letting upstream validate every condition var.
        let (kind, target) = classify_condition_with_target("validate(x + 1, y)");