mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
parent
4db0805de6
commit
a438886217
291 changed files with 9485 additions and 3851 deletions
|
|
@ -19,8 +19,8 @@ use serde::{Deserialize, Serialize};
|
|||
|
||||
/// Bit-level abstract fact: known-zero and known-one masks.
|
||||
///
|
||||
/// - `top()` = `{known_zero: 0, known_one: 0}` — no bits known
|
||||
/// - `bottom()` = `{known_zero: MAX, known_one: MAX}` — contradictory
|
||||
/// - `top()` = `{known_zero: 0, known_one: 0}`, no bits known
|
||||
/// - `bottom()` = `{known_zero: MAX, known_one: MAX}`, contradictory
|
||||
/// - `from_const(n)` = all 64 bits known
|
||||
///
|
||||
/// Invariant: `known_zero & known_one == 0` for non-bottom values.
|
||||
|
|
@ -253,7 +253,7 @@ impl AbstractDomain for BitFact {
|
|||
}
|
||||
}
|
||||
|
||||
/// Widen: same as join (finite lattice height — 64 bits × 3 states).
|
||||
/// Widen: same as join (finite lattice height, 64 bits × 3 states).
|
||||
fn widen(&self, other: &Self) -> Self {
|
||||
self.join(other)
|
||||
}
|
||||
|
|
@ -511,7 +511,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn right_shift_unknown_sign() {
|
||||
// Sign bit unknown — high bits after shift should be unknown
|
||||
// Sign bit unknown, high bits after shift should be unknown
|
||||
let a = BitFact {
|
||||
known_zero: 0x0F,
|
||||
known_one: 0,
|
||||
|
|
@ -687,7 +687,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// `a ⊓ b ⊑ a` and `a ⊓ b ⊑ b` — meet is the greatest lower bound.
|
||||
/// `a ⊓ b ⊑ a` and `a ⊓ b ⊑ b`, meet is the greatest lower bound.
|
||||
#[test]
|
||||
fn meet_is_lower_bound_bit() {
|
||||
let xs = sample_bits();
|
||||
|
|
@ -700,7 +700,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// `a ⊑ a ⊔ b` and `b ⊑ a ⊔ b` — join is the least upper bound.
|
||||
/// `a ⊑ a ⊔ b` and `b ⊑ a ⊔ b`, join is the least upper bound.
|
||||
#[test]
|
||||
fn join_is_upper_bound_bit() {
|
||||
let xs = sample_bits();
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@ use serde::{Deserialize, Serialize};
|
|||
|
||||
/// Numeric interval: `[lo, hi]` inclusive bounds.
|
||||
///
|
||||
/// - `top()` = `[None, None]` — any integer
|
||||
/// - `bottom()` = `[1, 0]` — empty / unsatisfiable (lo > hi)
|
||||
/// - `exact(n)` = `[n, n]` — singleton
|
||||
/// - `top()` = `[None, None]`, any integer
|
||||
/// - `bottom()` = `[1, 0]`, empty / unsatisfiable (lo > hi)
|
||||
/// - `exact(n)` = `[n, n]`, singleton
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct IntervalFact {
|
||||
pub lo: Option<i64>,
|
||||
|
|
@ -278,7 +278,7 @@ impl IntervalFact {
|
|||
/// - One non-negative singleton mask `m`: `[0, m]` regardless of other
|
||||
/// operand's sign (two's complement AND with a non-negative mask always
|
||||
/// produces a non-negative result bounded by the mask).
|
||||
/// - Both non-negative: `[0, min(a.hi, b.hi)]` — AND can only clear bits.
|
||||
/// - Both non-negative: `[0, min(a.hi, b.hi)]`, AND can only clear bits.
|
||||
pub fn bit_and(&self, other: &Self) -> Self {
|
||||
if self.is_bottom() || other.is_bottom() {
|
||||
return Self::bottom();
|
||||
|
|
@ -330,7 +330,7 @@ impl IntervalFact {
|
|||
/// - Singletons: exact computation.
|
||||
/// - `x | 0` → `x`, `0 | x` → `x`.
|
||||
/// - Both non-negative with known upper bounds: `[max(a.lo, b.lo),
|
||||
/// next_pow2_minus1(max(a.hi, b.hi))]` — OR can set any bit below
|
||||
/// next_pow2_minus1(max(a.hi, b.hi))]`, OR can set any bit below
|
||||
/// the highest set bit of either operand.
|
||||
pub fn bit_or(&self, other: &Self) -> Self {
|
||||
if self.is_bottom() || other.is_bottom() {
|
||||
|
|
@ -1054,7 +1054,7 @@ mod tests {
|
|||
let a = IntervalFact::exact(i64::MIN);
|
||||
let b = IntervalFact::exact(-1);
|
||||
let r = a.div(&b);
|
||||
// Either bound becomes None (graceful) — exact representation
|
||||
// Either bound becomes None (graceful), exact representation
|
||||
// depends on the impl, but we mainly assert no panic occurred
|
||||
// and the result is a valid interval.
|
||||
assert!(
|
||||
|
|
@ -1078,7 +1078,7 @@ mod tests {
|
|||
assert_eq!(r.hi, Some(2));
|
||||
}
|
||||
|
||||
/// Modulo by an interval that *contains* zero must escape to Top —
|
||||
/// Modulo by an interval that *contains* zero must escape to Top ,
|
||||
/// modulo-by-zero is undefined and we cannot precise-narrow it.
|
||||
#[test]
|
||||
fn modulo_divisor_spans_zero_is_top() {
|
||||
|
|
@ -1096,7 +1096,7 @@ mod tests {
|
|||
|
||||
/// `[i64::MIN, i64::MAX]` is the maximal interval. Any join with
|
||||
/// any other interval must remain `[i64::MIN, i64::MAX]` (or Top
|
||||
/// equivalent) — this guards against accidental narrowing on join.
|
||||
/// equivalent), this guards against accidental narrowing on join.
|
||||
#[test]
|
||||
fn full_range_is_join_absorbing() {
|
||||
let full = IntervalFact {
|
||||
|
|
@ -1347,7 +1347,7 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// Modulo with exact-zero divisor — must escape to Top.
|
||||
/// Modulo with exact-zero divisor, must escape to Top.
|
||||
#[test]
|
||||
fn modulo_by_exact_zero_is_top() {
|
||||
let a = IntervalFact {
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ pub fn is_enabled() -> bool {
|
|||
|
||||
/// Per-SSA-value abstract element: product of all subdomains.
|
||||
///
|
||||
/// Each subdomain is independent — join, meet, widen, and leq are applied
|
||||
/// Each subdomain is independent, join, meet, widen, and leq are applied
|
||||
/// component-wise. Adding a new subdomain requires adding a field here
|
||||
/// and updating the component-wise implementations.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
|
|
@ -182,15 +182,15 @@ pub const MAX_LITERAL_PREFIX_LEN: usize = 64;
|
|||
/// restricted so the summary size stays constant regardless of callee body
|
||||
/// complexity:
|
||||
///
|
||||
/// * [`IntervalTransfer::Top`] — no interval knowledge crosses (default).
|
||||
/// * [`IntervalTransfer::Identity`] — return = param (pass-through).
|
||||
/// * [`IntervalTransfer::Affine`] — return = param * `mul` + `add` with
|
||||
/// * [`IntervalTransfer::Top`], no interval knowledge crosses (default).
|
||||
/// * [`IntervalTransfer::Identity`], return = param (pass-through).
|
||||
/// * [`IntervalTransfer::Affine`], return = param * `mul` + `add` with
|
||||
/// `i64` constants; overflow defaults to Top at apply time.
|
||||
/// * [`IntervalTransfer::Clamped`] — return is always in `[lo, hi]` regardless
|
||||
/// * [`IntervalTransfer::Clamped`], return is always in `[lo, hi]` regardless
|
||||
/// of input. Captures callee-intrinsic bounds (e.g. `saturating` ops).
|
||||
///
|
||||
/// No unbounded expression trees, no nesting. A callee whose behaviour does
|
||||
/// not fit one of these forms falls back to `Top` — we never try to encode
|
||||
/// not fit one of these forms falls back to `Top`, we never try to encode
|
||||
/// richer algebra in the summary.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
pub enum IntervalTransfer {
|
||||
|
|
@ -247,9 +247,9 @@ impl IntervalTransfer {
|
|||
/// Mirrors [`IntervalTransfer`] for the string subdomain. Bounded by
|
||||
/// [`MAX_LITERAL_PREFIX_LEN`] to keep summary size constant.
|
||||
///
|
||||
/// * [`StringTransfer::Unknown`] — default.
|
||||
/// * [`StringTransfer::Identity`] — return = param.
|
||||
/// * [`StringTransfer::LiteralPrefix`] — return has this literal prefix
|
||||
/// * [`StringTransfer::Unknown`], default.
|
||||
/// * [`StringTransfer::Identity`], return = param.
|
||||
/// * [`StringTransfer::LiteralPrefix`], return has this literal prefix
|
||||
/// regardless of input (callee-intrinsic).
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
pub enum StringTransfer {
|
||||
|
|
@ -325,7 +325,7 @@ impl StringTransfer {
|
|||
/// caller's knowledge of each argument, without having to re-run the callee.
|
||||
///
|
||||
/// Composition rule: `apply(input) = (interval.apply, string.apply,
|
||||
/// bits=top)`. The bit domain is always Top — we do not track cross-file
|
||||
/// bits=top)`. The bit domain is always Top, we do not track cross-file
|
||||
/// bit transfers.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Default, Serialize, Deserialize)]
|
||||
pub struct AbstractTransfer {
|
||||
|
|
@ -351,7 +351,7 @@ impl AbstractTransfer {
|
|||
Self::default()
|
||||
}
|
||||
|
||||
/// True when neither subdomain carries any information — equivalent to
|
||||
/// True when neither subdomain carries any information, equivalent to
|
||||
/// "omit this entry entirely".
|
||||
pub fn is_top(&self) -> bool {
|
||||
is_interval_top(&self.interval) && is_string_unknown(&self.string)
|
||||
|
|
@ -410,7 +410,7 @@ impl AbstractState {
|
|||
/// Set abstract value for an SSA value. Drops Top values to save space.
|
||||
pub fn set(&mut self, v: SsaValue, val: AbstractValue) {
|
||||
if val.is_top() {
|
||||
// Don't store Top — it's the default
|
||||
// Don't store Top, it's the default
|
||||
if let Ok(idx) = self.values.binary_search_by_key(&v, |(id, _)| *id) {
|
||||
self.values.remove(idx);
|
||||
}
|
||||
|
|
@ -422,7 +422,7 @@ impl AbstractState {
|
|||
if self.values.len() < MAX_ABSTRACT_VALUES {
|
||||
self.values.insert(idx, (v, val));
|
||||
}
|
||||
// Over budget: silently drop (conservative — defaults to Top)
|
||||
// Over budget: silently drop (conservative, defaults to Top)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
//! Each axis is a three-value lattice [`Tri::No`] / [`Tri::Yes`] / [`Tri::Maybe`]
|
||||
//! where `Maybe` is Top (unknown) and `No` / `Yes` are the two definite
|
||||
//! refinements. A value is path-safe for a FILE_IO sink iff
|
||||
//! `dotdot == No && absolute == No` — i.e. we have proof that *no* `..`
|
||||
//! `dotdot == No && absolute == No`, i.e. we have proof that *no* `..`
|
||||
//! component and *no* absolute root can leak through. `normalized == Yes`
|
||||
//! alone is not sufficient (canonicalising an absolute input still produces
|
||||
//! an absolute path); prefix_lock is used separately to certify containment
|
||||
|
|
@ -52,7 +52,7 @@ pub enum Tri {
|
|||
No,
|
||||
/// Proven present.
|
||||
Yes,
|
||||
/// Unknown — no transfer or guard has proved the axis yet.
|
||||
/// Unknown, no transfer or guard has proved the axis yet.
|
||||
Maybe,
|
||||
}
|
||||
|
||||
|
|
@ -367,12 +367,12 @@ impl AbstractDomain for PathFact {
|
|||
/// narrowed axis can be proved safe.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PathRejection {
|
||||
/// `x.contains("..")` — false branch proves `dotdot = No` on the receiver.
|
||||
/// `x.contains("..")`, false branch proves `dotdot = No` on the receiver.
|
||||
DotDot,
|
||||
/// `x.starts_with("/")` / `x.starts_with('\\')` — false branch proves
|
||||
/// `x.starts_with("/")` / `x.starts_with('\\')`, false branch proves
|
||||
/// `absolute = No` on the receiver.
|
||||
AbsoluteSlash,
|
||||
/// `x.is_absolute()` / `Path::new(x).is_absolute()` — false branch proves
|
||||
/// `x.is_absolute()` / `Path::new(x).is_absolute()`, false branch proves
|
||||
/// `absolute = No` on the argument/receiver.
|
||||
IsAbsolute,
|
||||
/// Not a path-rejection idiom.
|
||||
|
|
@ -384,7 +384,7 @@ pub enum PathRejection {
|
|||
/// the listed axis is refined.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum PathAssertion {
|
||||
/// `x.starts_with("<literal_root>")` — true branch attaches
|
||||
/// `x.starts_with("<literal_root>")`, true branch attaches
|
||||
/// `prefix_lock = Some("<literal_root>")` to the receiver.
|
||||
PrefixLock(String),
|
||||
/// Not a path-assertion idiom.
|
||||
|
|
@ -426,7 +426,7 @@ pub fn classify_path_rejection_axes(text: &str) -> smallvec::SmallVec<[PathRejec
|
|||
let clause = clause.trim();
|
||||
// Multi-axis special case: `!filepath.IsLocal(p)` (Go).
|
||||
// `filepath.IsLocal` returns true iff the path stays within the
|
||||
// current directory — no leading `/`, no `..` segments, no Windows
|
||||
// current directory, no leading `/`, no `..` segments, no Windows
|
||||
// drive root. Idiomatic Go path-traversal guard:
|
||||
// `if !filepath.IsLocal(p) { return }`
|
||||
// The TRUE branch terminates; the FALSE branch (where IsLocal is
|
||||
|
|
@ -449,7 +449,7 @@ pub fn classify_path_rejection_axes(text: &str) -> smallvec::SmallVec<[PathRejec
|
|||
out
|
||||
}
|
||||
|
||||
/// Detect `!filepath.IsLocal(<expr>)` — Go's idiomatic path-traversal
|
||||
/// Detect `!filepath.IsLocal(<expr>)`, Go's idiomatic path-traversal
|
||||
/// guard. Whitespace-tolerant: `! filepath.IsLocal(`, `!filepath . IsLocal(`,
|
||||
/// etc. Used by [`classify_path_rejection_axes`] to inject both
|
||||
/// [`PathRejection::DotDot`] and [`PathRejection::IsAbsolute`] on the false
|
||||
|
|
@ -475,7 +475,7 @@ fn has_negated_filepath_is_local(clause: &str) -> bool {
|
|||
fn classify_path_rejection_atom(clause: &str) -> PathRejection {
|
||||
// `.contains("..")` (Rust, Java) / `.includes("..")` (JS/TS) /
|
||||
// `.include?("..")` (Ruby) / `strings.Contains(s, "..")` (Go) /
|
||||
// `strstr(s, "..")` (C/C++) — every form recognised by
|
||||
// `strstr(s, "..")` (C/C++), every form recognised by
|
||||
// `extract_contains_arg` returns `..` if the needle is the dotdot
|
||||
// segment.
|
||||
if let Some(needle) = extract_contains_arg(clause)
|
||||
|
|
@ -483,7 +483,7 @@ fn classify_path_rejection_atom(clause: &str) -> PathRejection {
|
|||
{
|
||||
return PathRejection::DotDot;
|
||||
}
|
||||
// Python `".." in s` — operator form. Look for `".." in <something>`
|
||||
// Python `".." in s`, operator form. Look for `".." in <something>`
|
||||
// anywhere in the clause text. Conservative: requires the literal
|
||||
// `".." in ` substring (whitespace-tolerant).
|
||||
if has_python_dotdot_in(clause) {
|
||||
|
|
@ -681,7 +681,7 @@ pub fn classify_path_assertion(text: &str) -> PathAssertion {
|
|||
/// * Must be non-empty.
|
||||
/// * The leaf segment must begin with an ASCII uppercase letter
|
||||
/// (Rust's variant / struct / type grammar).
|
||||
/// * The leaf segment must be ASCII alphanumeric / underscore — no
|
||||
/// * The leaf segment must be ASCII alphanumeric / underscore, no
|
||||
/// method call noise (parentheses, argument lists) survives here
|
||||
/// because callees arrive in their normalised scoped-identifier
|
||||
/// form.
|
||||
|
|
@ -700,7 +700,7 @@ pub fn is_structural_variant_ctor(callee: &str) -> bool {
|
|||
// upper-camel-case names an enum variant or tuple struct (`Some`,
|
||||
// `Ok`, `MyResult`). A scoped identifier whose *penultimate*
|
||||
// segment is upper-camel-case names an associated constructor on
|
||||
// that type — `Box::new`, `Cell::from`, `PathBuf::with_capacity`,
|
||||
// that type, `Box::new`, `Cell::from`, `PathBuf::with_capacity`,
|
||||
// etc. The latter is the lower-leaf-case shape we want to admit
|
||||
// alongside the bare-variant shape.
|
||||
let segments: smallvec::SmallVec<[&str; 4]> =
|
||||
|
|
@ -731,7 +731,7 @@ pub fn is_structural_variant_ctor(callee: &str) -> bool {
|
|||
/// PathFact of the receiver/first argument (the value being sanitised);
|
||||
/// it is used as the baseline to which the call's effect is applied.
|
||||
///
|
||||
/// Returned [`None`] means the callee is not a recognised path primitive —
|
||||
/// Returned [`None`] means the callee is not a recognised path primitive ,
|
||||
/// the caller should leave the result at its pre-existing PathFact (Top).
|
||||
///
|
||||
/// Backwards-compatible wrapper around [`classify_path_primitive_rust`].
|
||||
|
|
@ -743,7 +743,7 @@ pub fn classify_path_primitive(callee: &str, input_fact: &PathFact) -> Option<Pa
|
|||
|
||||
/// Per-language path-primitive dispatcher.
|
||||
///
|
||||
/// Routes to the language-specific classifier — Rust, Python, JS/TS, Go,
|
||||
/// Routes to the language-specific classifier, Rust, Python, JS/TS, Go,
|
||||
/// Java, Ruby, PHP, or C/C++. Returns [`None`] for languages without a
|
||||
/// classifier (or callees the language's classifier doesn't recognise).
|
||||
pub fn classify_path_primitive_for_lang(
|
||||
|
|
@ -784,7 +784,7 @@ pub fn is_structural_variant_ctor_for_lang(lang: crate::symbol::Lang, callee: &s
|
|||
}
|
||||
|
||||
/// Per-language predicate for "this callee is a zero-arg fresh-allocation
|
||||
/// constructor" — used by the variant-rejection-path classifier so that
|
||||
/// constructor", used by the variant-rejection-path classifier so that
|
||||
/// `String::new()` (Rust) / `''` (Python/JS/Java/...) is recognised as a
|
||||
/// no-attacker-content fresh value with cleared `dotdot`/`absolute` axes.
|
||||
///
|
||||
|
|
@ -803,7 +803,7 @@ pub fn is_zero_arg_allocator_for_lang(lang: crate::symbol::Lang, _callee: &str)
|
|||
false
|
||||
}
|
||||
|
||||
/// Rust path-primitive classifier — `fs::canonicalize`, `Path::new`,
|
||||
/// Rust path-primitive classifier, `fs::canonicalize`, `Path::new`,
|
||||
/// `PathBuf::from`, identity-string conversions.
|
||||
pub fn classify_path_primitive_rust(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
||||
// Accept both path-qualified (`std::fs::canonicalize`, `fs::canonicalize`)
|
||||
|
|
@ -826,7 +826,7 @@ pub fn classify_path_primitive_rust(callee: &str, input_fact: &PathFact) -> Opti
|
|||
// `Path::new(s)` / `PathBuf::from(s)`:
|
||||
// pass-through of the input's PathFact so downstream `starts_with`
|
||||
// checks against a Path/PathBuf value still see the underlying
|
||||
// string's narrowed axes. No axis is forced — wrapping does not
|
||||
// string's narrowed axes. No axis is forced, wrapping does not
|
||||
// sanitize on its own.
|
||||
"new" | "from" => {
|
||||
if callee_contains_segment(callee, "Path") || callee_contains_segment(callee, "PathBuf")
|
||||
|
|
@ -837,8 +837,8 @@ pub fn classify_path_primitive_rust(callee: &str, input_fact: &PathFact) -> Opti
|
|||
}
|
||||
}
|
||||
// Identity conversions on strings/paths. Each one re-binds the
|
||||
// same logical value — the converted String / PathBuf / OsString
|
||||
// still describes the exact same filesystem path — so the PathFact
|
||||
// same logical value, the converted String / PathBuf / OsString
|
||||
// still describes the exact same filesystem path, so the PathFact
|
||||
// flows through unchanged. Without this, a sanitised `s: &str`
|
||||
// would lose its narrowed axes the moment the helper returns
|
||||
// `s.to_string()` / `s.to_owned()` / `String::from(s)`.
|
||||
|
|
@ -849,7 +849,7 @@ pub fn classify_path_primitive_rust(callee: &str, input_fact: &PathFact) -> Opti
|
|||
}
|
||||
}
|
||||
|
||||
/// Python path-primitive classifier — `os.path.normpath`, `os.path.realpath`,
|
||||
/// Python path-primitive classifier, `os.path.normpath`, `os.path.realpath`,
|
||||
/// `pathlib.Path.resolve`, `os.path.abspath`.
|
||||
///
|
||||
/// Pattern conventions: tree-sitter-python emits dotted attribute access as
|
||||
|
|
@ -893,7 +893,7 @@ pub fn classify_path_primitive_python(callee: &str, input_fact: &PathFact) -> Op
|
|||
}
|
||||
}
|
||||
|
||||
/// JavaScript / TypeScript path-primitive classifier — Node's `path` module:
|
||||
/// JavaScript / TypeScript path-primitive classifier, Node's `path` module:
|
||||
/// `path.normalize`, `path.resolve`, `path.join`.
|
||||
pub fn classify_path_primitive_js(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
||||
let leaf = rightmost_segment(callee);
|
||||
|
|
@ -920,7 +920,7 @@ pub fn classify_path_primitive_js(callee: &str, input_fact: &PathFact) -> Option
|
|||
}
|
||||
}
|
||||
|
||||
/// Go path-primitive classifier — `path/filepath` package:
|
||||
/// Go path-primitive classifier, `path/filepath` package:
|
||||
/// `filepath.Clean`, `filepath.Abs`.
|
||||
pub fn classify_path_primitive_go(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
||||
let leaf = rightmost_segment(callee);
|
||||
|
|
@ -947,7 +947,7 @@ pub fn classify_path_primitive_go(callee: &str, input_fact: &PathFact) -> Option
|
|||
}
|
||||
}
|
||||
|
||||
/// Java path-primitive classifier — `java.nio.file.Path.normalize` /
|
||||
/// Java path-primitive classifier, `java.nio.file.Path.normalize` /
|
||||
/// `Paths.get(s).normalize().toAbsolutePath()`.
|
||||
pub fn classify_path_primitive_java(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
||||
let leaf = rightmost_segment(callee);
|
||||
|
|
@ -980,7 +980,7 @@ pub fn classify_path_primitive_java(callee: &str, input_fact: &PathFact) -> Opti
|
|||
}
|
||||
}
|
||||
|
||||
/// Ruby path-primitive classifier — `File.expand_path` / `Pathname#cleanpath`.
|
||||
/// Ruby path-primitive classifier, `File.expand_path` / `Pathname#cleanpath`.
|
||||
pub fn classify_path_primitive_ruby(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
||||
let leaf = rightmost_segment(callee);
|
||||
match leaf {
|
||||
|
|
@ -1005,13 +1005,13 @@ pub fn classify_path_primitive_ruby(callee: &str, input_fact: &PathFact) -> Opti
|
|||
}
|
||||
}
|
||||
|
||||
/// PHP path-primitive classifier — `realpath`, `basename`.
|
||||
/// PHP path-primitive classifier, `realpath`, `basename`.
|
||||
pub fn classify_path_primitive_php(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
||||
let leaf = rightmost_segment(callee);
|
||||
match leaf {
|
||||
// `realpath($s)`:
|
||||
// Resolves symlinks and `..`, returns absolute path. Returns
|
||||
// `false` if the file doesn't exist — but on the success path
|
||||
// `false` if the file doesn't exist, but on the success path
|
||||
// (which is what reaches a sink), it produces a clean absolute path.
|
||||
"realpath" => {
|
||||
let mut f = input_fact.clone();
|
||||
|
|
@ -1021,7 +1021,7 @@ pub fn classify_path_primitive_php(callee: &str, input_fact: &PathFact) -> Optio
|
|||
Some(f)
|
||||
}
|
||||
// `basename($s)`:
|
||||
// Strips directory components — guaranteed to contain no `..`
|
||||
// Strips directory components, guaranteed to contain no `..`
|
||||
// (basename of `..` is `..`, but basename of any traversal-
|
||||
// prefixed path is just the leaf). Conservative: clear dotdot.
|
||||
"basename" => {
|
||||
|
|
@ -1034,7 +1034,7 @@ pub fn classify_path_primitive_php(callee: &str, input_fact: &PathFact) -> Optio
|
|||
}
|
||||
}
|
||||
|
||||
/// C / C++ path-primitive classifier — POSIX `realpath`,
|
||||
/// C / C++ path-primitive classifier, POSIX `realpath`,
|
||||
/// `std::filesystem::canonical`.
|
||||
pub fn classify_path_primitive_c_cpp(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
||||
let leaf = rightmost_segment(callee);
|
||||
|
|
@ -1089,7 +1089,7 @@ fn extract_contains_arg(text: &str) -> Option<String> {
|
|||
"strstr(",
|
||||
] {
|
||||
if let Some(idx) = text.find(prefix) {
|
||||
// Skip past the first argument (receiver) — the literal needle
|
||||
// Skip past the first argument (receiver), the literal needle
|
||||
// is the second arg, separated by a comma. Find the comma at
|
||||
// top level inside this call.
|
||||
let inner = &text[idx + prefix.len()..];
|
||||
|
|
@ -1123,7 +1123,7 @@ fn extract_starts_with_arg(text: &str) -> Option<String> {
|
|||
return Some(s);
|
||||
}
|
||||
}
|
||||
// Go free-function form `strings.HasPrefix(r, "/")` — second arg.
|
||||
// Go free-function form `strings.HasPrefix(r, "/")`, second arg.
|
||||
if let Some(idx) = text.find("strings.HasPrefix(") {
|
||||
let inner = &text[idx + "strings.HasPrefix(".len()..];
|
||||
if let Some(comma_idx) = top_level_comma(inner) {
|
||||
|
|
@ -1762,7 +1762,7 @@ mod tests {
|
|||
assert!(is_structural_variant_ctor("Box::new"));
|
||||
assert!(is_structural_variant_ctor("std::option::Option::Some"));
|
||||
// User-defined upper-camel-case variant name participates the
|
||||
// same way — name list is not part of the contract.
|
||||
// same way, name list is not part of the contract.
|
||||
assert!(is_structural_variant_ctor("MyResult::Ok"));
|
||||
assert!(is_structural_variant_ctor("Wrapper"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
//! String abstract domain for abstract interpretation.
|
||||
//!
|
||||
//! Tracks known prefix, suffix, and — when provably bounded — the finite set
|
||||
//! Tracks known prefix, suffix, and, when provably bounded, the finite set
|
||||
//! of possible concrete string values. Used for SSRF suppression (URL prefix
|
||||
//! proves host is locked), command-injection suppression (lookup result
|
||||
//! bounded to a safe set of literals), and general string analysis.
|
||||
|
|
@ -78,7 +78,7 @@ impl StringFact {
|
|||
/// the finite domain is `{s}`.
|
||||
///
|
||||
/// Empty prefix/suffix are normalised to `None` because "starts/ends with
|
||||
/// the empty string" carries no constraint — keeping `Some("")` would
|
||||
/// the empty string" carries no constraint, keeping `Some("")` would
|
||||
/// break join idempotence (`Some("")` ⊔ `Some("")` collapses to `None`).
|
||||
pub fn exact(s: &str) -> Self {
|
||||
let prefix = truncate_prefix(s);
|
||||
|
|
@ -134,7 +134,7 @@ impl StringFact {
|
|||
/// Inputs are sorted and deduped. If the cardinality exceeds
|
||||
/// [`MAX_DOMAIN_SIZE`] or the input is empty, the domain collapses to
|
||||
/// `None` (Top on this sub-field). The prefix/suffix sub-fields remain
|
||||
/// unset — callers can combine with [`Self::exact`] for single-element
|
||||
/// unset, callers can combine with [`Self::exact`] for single-element
|
||||
/// sets if tighter facts are desired.
|
||||
pub fn finite_set(values: Vec<String>) -> Self {
|
||||
let mut v = values;
|
||||
|
|
@ -411,7 +411,7 @@ fn truncate_suffix(s: &str) -> String {
|
|||
/// Longest common prefix of two strings, char-aligned.
|
||||
///
|
||||
/// Iterates by `char` rather than `byte` so multi-byte UTF-8 code points are
|
||||
/// either kept whole or dropped — a byte-wise comparison would slice into the
|
||||
/// either kept whole or dropped, a byte-wise comparison would slice into the
|
||||
/// middle of a code point and produce mojibake (`x as char` on a UTF-8
|
||||
/// continuation byte yields a garbage Latin-1 character).
|
||||
pub fn longest_common_prefix(a: &str, b: &str) -> String {
|
||||
|
|
@ -746,7 +746,7 @@ mod tests {
|
|||
let a = StringFact::from_prefix("https://api.example.com/");
|
||||
let b = StringFact::from_prefix("https://db.example.com/");
|
||||
let r = a.join(&b);
|
||||
// Common prefix is "https://" — anything past that diverges.
|
||||
// Common prefix is "https://", anything past that diverges.
|
||||
assert_eq!(
|
||||
r.prefix.as_deref(),
|
||||
Some("https://"),
|
||||
|
|
@ -781,7 +781,7 @@ mod tests {
|
|||
]
|
||||
}
|
||||
|
||||
/// `x ⊔ x = x` — join is idempotent across all sample shapes.
|
||||
/// `x ⊔ x = x`, join is idempotent across all sample shapes.
|
||||
#[test]
|
||||
fn join_idempotent_string() {
|
||||
for a in sample_strings() {
|
||||
|
|
@ -789,7 +789,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// `x ⊔ y = y ⊔ x` — join is commutative.
|
||||
/// `x ⊔ y = y ⊔ x`, join is commutative.
|
||||
#[test]
|
||||
fn join_commutative_string() {
|
||||
let xs = sample_strings();
|
||||
|
|
@ -806,7 +806,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// `x ⊓ x = x` — meet is idempotent.
|
||||
/// `x ⊓ x = x`, meet is idempotent.
|
||||
#[test]
|
||||
fn meet_idempotent_string() {
|
||||
for a in sample_strings() {
|
||||
|
|
@ -814,7 +814,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// `x ⊓ y = y ⊓ x` — meet is commutative.
|
||||
/// `x ⊓ y = y ⊓ x`, meet is commutative.
|
||||
#[test]
|
||||
fn meet_commutative_string() {
|
||||
let xs = sample_strings();
|
||||
|
|
@ -844,7 +844,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// `x ⊑ x` — leq is reflexive.
|
||||
/// `x ⊑ x`, leq is reflexive.
|
||||
#[test]
|
||||
fn leq_reflexive_string() {
|
||||
for a in sample_strings() {
|
||||
|
|
@ -852,7 +852,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// **Soundness**: `widen(a, b) ⊒ join(a, b)` — widening must
|
||||
/// **Soundness**: `widen(a, b) ⊒ join(a, b)`, widening must
|
||||
/// over-approximate join, otherwise dataflow loses information.
|
||||
#[test]
|
||||
fn widen_over_approximates_join_string() {
|
||||
|
|
@ -905,7 +905,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// Empty-string exact value must distinguish from Top — it is a
|
||||
/// Empty-string exact value must distinguish from Top, it is a
|
||||
/// singleton (`{""}`), not unconstrained. After the empty-prefix
|
||||
/// normalisation, prefix/suffix are `None` (carry no extra info)
|
||||
/// but the `domain` field still pins the value to exactly `""`.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue