mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
2173 lines
79 KiB
Rust
2173 lines
79 KiB
Rust
//! Path abstract domain for abstract interpretation.
|
|
//!
|
|
//! Tracks the abstract effect of path-sanitizer primitives on filesystem path
|
|
//! values along three independent axes:
|
|
//!
|
|
//! - `dotdot`: whether the path contains a `..` component
|
|
//! - `absolute`: whether the path is absolute (rooted at `/`, `\\`, `C:\\`, …)
|
|
//! - `normalized`: whether the path has been passed through a canonicalisation
|
|
//! / structural filter step (e.g. `fs::canonicalize`, `Component::Normal`
|
|
//! iterator filter)
|
|
//!
|
|
//! Plus a `prefix_lock` that records the known canonical root of the path
|
|
//! after a `starts_with(root_literal)` guard has been asserted on it.
|
|
//!
|
|
//! Each axis is a three-value lattice [`Tri::No`] / [`Tri::Yes`] / [`Tri::Maybe`]
|
|
//! where `Maybe` is Top (unknown) and `No` / `Yes` are the two definite
|
|
//! refinements. A value is path-safe for a FILE_IO sink iff
|
|
//! `dotdot == No && absolute == No`, i.e. we have proof that *no* `..`
|
|
//! component and *no* absolute root can leak through. `normalized == Yes`
|
|
//! alone is not sufficient (canonicalising an absolute input still produces
|
|
//! an absolute path); prefix_lock is used separately to certify containment
|
|
//! under a known root.
|
|
//!
|
|
//! This domain is Rust-first: the transfer rules wired from
|
|
//! `src/taint/ssa_transfer` recognise Rust's standard library path primitives
|
|
//! (`fs::canonicalize`, `Path::new`, `.starts_with`, `.components`, …).
|
|
//! Per-language extension slots live alongside those transfer rules; this
|
|
//! file defines only the lattice and its laws.
|
|
|
|
use crate::state::lattice::{AbstractDomain, Lattice};
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
/// Maximum length (bytes) of a tracked prefix-lock root. Bounds on-disk
|
|
/// summary size for callees that stamp a long canonical root onto every
|
|
/// return value.
|
|
pub const MAX_PREFIX_LOCK_LEN: usize = 128;
|
|
|
|
/// Three-value lattice: proven-absent, proven-present, or unknown.
|
|
///
|
|
/// Ordering (join-semilattice where [`Tri::Maybe`] is Top):
|
|
///
|
|
/// - `No ⊑ Maybe`, `Yes ⊑ Maybe`
|
|
/// - `No` and `Yes` are **incomparable** (both are strict refinements of
|
|
/// `Maybe`, but neither subsumes the other).
|
|
/// - `join(No, No) = No`, `join(Yes, Yes) = Yes`, otherwise `Maybe`.
|
|
/// - `meet(Maybe, x) = x`, `meet(No, No) = No`, `meet(Yes, Yes) = Yes`,
|
|
/// `meet(No, Yes)` is contradictory (represented by the enclosing
|
|
/// [`PathFact`]'s bottom flag).
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
|
pub enum Tri {
|
|
/// Proven absent (`..` component not present, path not absolute, etc.).
|
|
No,
|
|
/// Proven present.
|
|
Yes,
|
|
/// Unknown, no transfer or guard has proved the axis yet.
|
|
Maybe,
|
|
}
|
|
|
|
impl Tri {
|
|
pub fn top() -> Self {
|
|
Tri::Maybe
|
|
}
|
|
|
|
pub fn is_top(&self) -> bool {
|
|
matches!(self, Tri::Maybe)
|
|
}
|
|
|
|
/// Join: least upper bound. Equal values are preserved; disagreements
|
|
/// widen to [`Tri::Maybe`].
|
|
pub fn join(&self, other: &Self) -> Self {
|
|
match (*self, *other) {
|
|
(a, b) if a == b => a,
|
|
_ => Tri::Maybe,
|
|
}
|
|
}
|
|
|
|
/// Meet: greatest lower bound. `Maybe ⊓ x = x`; disagreement between
|
|
/// `No` and `Yes` is contradictory and returns [`None`]. Callers convert
|
|
/// the resulting [`Option`] into a `PathFact` bottom flag at the product
|
|
/// level.
|
|
pub fn meet_checked(&self, other: &Self) -> Option<Self> {
|
|
match (*self, *other) {
|
|
(Tri::Maybe, x) | (x, Tri::Maybe) => Some(x),
|
|
(a, b) if a == b => Some(a),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Widen: drop to `Maybe` on any change.
|
|
pub fn widen(&self, other: &Self) -> Self {
|
|
if self == other { *self } else { Tri::Maybe }
|
|
}
|
|
|
|
/// Partial order: `self ⊑ other`.
|
|
pub fn leq(&self, other: &Self) -> bool {
|
|
match (*self, *other) {
|
|
(_, Tri::Maybe) => true,
|
|
(a, b) => a == b,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Path abstract fact.
|
|
///
|
|
/// Product of three [`Tri`] axes plus an optional canonical-prefix root.
|
|
/// The empty (`default()`) fact is Top on every axis: the abstract path
|
|
/// could be any filesystem path.
|
|
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
|
pub struct PathFact {
|
|
/// Does the path contain a `..` component?
|
|
pub dotdot: Tri,
|
|
/// Is the path absolute (rooted at `/`, `\`, drive letter)?
|
|
pub absolute: Tri,
|
|
/// Has the path been passed through a canonicalisation / component filter?
|
|
pub normalized: Tri,
|
|
/// Canonical root the path was proved to start with. `None` = unknown.
|
|
pub prefix_lock: Option<String>,
|
|
/// True when the fact is contradictory (e.g. two irreconcilable meets).
|
|
/// Carried as a flag rather than a sentinel so the primary path stays
|
|
/// allocation-free.
|
|
is_bottom: bool,
|
|
}
|
|
|
|
impl Default for PathFact {
|
|
fn default() -> Self {
|
|
Self::top()
|
|
}
|
|
}
|
|
|
|
impl PathFact {
|
|
/// Top: no knowledge on any axis.
|
|
pub fn top() -> Self {
|
|
Self {
|
|
dotdot: Tri::Maybe,
|
|
absolute: Tri::Maybe,
|
|
normalized: Tri::Maybe,
|
|
prefix_lock: None,
|
|
is_bottom: false,
|
|
}
|
|
}
|
|
|
|
/// Bottom: unsatisfiable / empty set.
|
|
pub fn bottom() -> Self {
|
|
Self {
|
|
dotdot: Tri::Maybe,
|
|
absolute: Tri::Maybe,
|
|
normalized: Tri::Maybe,
|
|
prefix_lock: None,
|
|
is_bottom: true,
|
|
}
|
|
}
|
|
|
|
pub fn is_top(&self) -> bool {
|
|
!self.is_bottom
|
|
&& self.dotdot == Tri::Maybe
|
|
&& self.absolute == Tri::Maybe
|
|
&& self.normalized == Tri::Maybe
|
|
&& self.prefix_lock.is_none()
|
|
}
|
|
|
|
pub fn is_bottom(&self) -> bool {
|
|
self.is_bottom
|
|
}
|
|
|
|
/// Construct a fact after a sanitisation step that clears `..` components.
|
|
pub fn with_dotdot_cleared(mut self) -> Self {
|
|
self.dotdot = Tri::No;
|
|
self
|
|
}
|
|
|
|
/// Construct a fact after a sanitisation step that clears absolute roots.
|
|
pub fn with_absolute_cleared(mut self) -> Self {
|
|
self.absolute = Tri::No;
|
|
self
|
|
}
|
|
|
|
/// Construct a fact after a normalisation step (canonicalize / components
|
|
/// filter). Sets `normalized = Yes` and clears `..`. Absolute axis is
|
|
/// **not** touched by default: `canonicalize("/etc/passwd")` stays
|
|
/// absolute, the plan's `canonicalize` transfer rule sets
|
|
/// `absolute = Yes` separately.
|
|
pub fn with_normalized(mut self) -> Self {
|
|
self.normalized = Tri::Yes;
|
|
self.dotdot = Tri::No;
|
|
self
|
|
}
|
|
|
|
/// Attach a prefix-lock root (the argument of a proven `starts_with`
|
|
/// guard). Truncates to [`MAX_PREFIX_LOCK_LEN`] on a char boundary so
|
|
/// on-disk summary size stays bounded.
|
|
pub fn with_prefix_lock(mut self, root: &str) -> Self {
|
|
if root.is_empty() {
|
|
return self;
|
|
}
|
|
self.prefix_lock = Some(truncate_prefix_lock(root));
|
|
self
|
|
}
|
|
|
|
/// True iff the fact proves both `dotdot = No` and `absolute = No`.
|
|
///
|
|
/// This is the core sink-suppression predicate: a relative, `..`-free
|
|
/// path can still escape into a parent via a symlink, but it cannot
|
|
/// reach an attacker-controlled absolute location and cannot contain
|
|
/// explicit parent-dir components, which together cover the
|
|
/// documented rs-safe-0** FPs.
|
|
pub fn is_path_safe(&self) -> bool {
|
|
!self.is_bottom && self.dotdot == Tri::No && self.absolute == Tri::No
|
|
}
|
|
|
|
/// True iff the fact proves the path stays inside a trusted region
|
|
/// for path-traversal purposes (the FILE_IO sink-suppression
|
|
/// predicate).
|
|
///
|
|
/// Accepts either of two structural invariants:
|
|
///
|
|
/// * `dotdot = No && absolute = No` — the relative-and-`..`-free
|
|
/// shape recognised by `is_path_safe`. Cannot escape to an
|
|
/// attacker-controlled absolute location.
|
|
/// * `dotdot = No && prefix_lock.is_some()` — a canonicalised path
|
|
/// (typically `File.expand_path` / `realpath` / `fs::canonicalize`)
|
|
/// that has been verified-rooted by a `starts_with`-style guard
|
|
/// against some prefix. The prefix may be opaque
|
|
/// ([`OPAQUE_PREFIX_LOCK`]); the structural guarantee is the same:
|
|
/// the path is provably inside the locked subtree.
|
|
///
|
|
/// This relaxation closes the rswag CVE-2023-38337 patched-counterpart
|
|
/// FP shape (`File.expand_path(File.join(root, p)) + start_with? root`)
|
|
/// and the equivalent Python (`os.path.realpath + .startswith(root)`)
|
|
/// and JS (`path.resolve + .startsWith(root)`) idioms, all of which
|
|
/// produce absolute paths but are sound against `..` traversal.
|
|
pub fn is_path_traversal_safe(&self) -> bool {
|
|
if self.is_bottom || self.dotdot != Tri::No {
|
|
return false;
|
|
}
|
|
self.absolute == Tri::No || self.prefix_lock.is_some()
|
|
}
|
|
|
|
/// True iff the fact has a prefix lock equal to or contained under
|
|
/// `root`. Used by sink-suppression to confirm that a path derived
|
|
/// from a locked root is provably still under that root.
|
|
pub fn prefix_locked_under(&self, root: &str) -> bool {
|
|
match &self.prefix_lock {
|
|
Some(p) => p.starts_with(root) || root.starts_with(p.as_str()),
|
|
None => false,
|
|
}
|
|
}
|
|
|
|
// ── Lattice operations ──────────────────────────────────────────────
|
|
|
|
pub fn join(&self, other: &Self) -> Self {
|
|
if self.is_bottom {
|
|
return other.clone();
|
|
}
|
|
if other.is_bottom {
|
|
return self.clone();
|
|
}
|
|
let prefix_lock = match (&self.prefix_lock, &other.prefix_lock) {
|
|
(Some(a), Some(b)) => {
|
|
// Longest common prefix; drop to None when LCP is empty.
|
|
let lcp = longest_common_prefix(a, b);
|
|
if lcp.is_empty() {
|
|
None
|
|
} else {
|
|
Some(truncate_prefix_lock(&lcp))
|
|
}
|
|
}
|
|
_ => None,
|
|
};
|
|
Self {
|
|
dotdot: self.dotdot.join(&other.dotdot),
|
|
absolute: self.absolute.join(&other.absolute),
|
|
normalized: self.normalized.join(&other.normalized),
|
|
prefix_lock,
|
|
is_bottom: false,
|
|
}
|
|
}
|
|
|
|
pub fn meet(&self, other: &Self) -> Self {
|
|
if self.is_bottom || other.is_bottom {
|
|
return Self::bottom();
|
|
}
|
|
let (dotdot, abs, norm) = match (
|
|
self.dotdot.meet_checked(&other.dotdot),
|
|
self.absolute.meet_checked(&other.absolute),
|
|
self.normalized.meet_checked(&other.normalized),
|
|
) {
|
|
(Some(a), Some(b), Some(c)) => (a, b, c),
|
|
_ => return Self::bottom(),
|
|
};
|
|
let prefix_lock = match (&self.prefix_lock, &other.prefix_lock) {
|
|
(Some(a), Some(b)) => {
|
|
// Consistent when one is a prefix of the other; pick the
|
|
// more specific (longer) root. Otherwise contradictory.
|
|
if a.starts_with(b.as_str()) {
|
|
Some(a.clone())
|
|
} else if b.starts_with(a.as_str()) {
|
|
Some(b.clone())
|
|
} else {
|
|
return Self::bottom();
|
|
}
|
|
}
|
|
(Some(a), None) => Some(a.clone()),
|
|
(None, Some(b)) => Some(b.clone()),
|
|
(None, None) => None,
|
|
};
|
|
Self {
|
|
dotdot,
|
|
absolute: abs,
|
|
normalized: norm,
|
|
prefix_lock,
|
|
is_bottom: false,
|
|
}
|
|
}
|
|
|
|
pub fn widen(&self, other: &Self) -> Self {
|
|
if self.is_bottom {
|
|
return other.clone();
|
|
}
|
|
if other.is_bottom {
|
|
return self.clone();
|
|
}
|
|
let prefix_lock = if self.prefix_lock == other.prefix_lock {
|
|
self.prefix_lock.clone()
|
|
} else {
|
|
None
|
|
};
|
|
Self {
|
|
dotdot: self.dotdot.widen(&other.dotdot),
|
|
absolute: self.absolute.widen(&other.absolute),
|
|
normalized: self.normalized.widen(&other.normalized),
|
|
prefix_lock,
|
|
is_bottom: false,
|
|
}
|
|
}
|
|
|
|
pub fn leq(&self, other: &Self) -> bool {
|
|
if self.is_bottom {
|
|
return true;
|
|
}
|
|
if other.is_bottom {
|
|
return false;
|
|
}
|
|
let prefix_ok = match (&self.prefix_lock, &other.prefix_lock) {
|
|
(_, None) => true,
|
|
(None, Some(_)) => false,
|
|
(Some(a), Some(b)) => a.starts_with(b.as_str()),
|
|
};
|
|
prefix_ok
|
|
&& self.dotdot.leq(&other.dotdot)
|
|
&& self.absolute.leq(&other.absolute)
|
|
&& self.normalized.leq(&other.normalized)
|
|
}
|
|
}
|
|
|
|
impl Lattice for PathFact {
|
|
fn bot() -> Self {
|
|
Self::bottom()
|
|
}
|
|
|
|
fn join(&self, other: &Self) -> Self {
|
|
self.join(other)
|
|
}
|
|
|
|
fn leq(&self, other: &Self) -> bool {
|
|
self.leq(other)
|
|
}
|
|
}
|
|
|
|
impl AbstractDomain for PathFact {
|
|
fn top() -> Self {
|
|
Self::top()
|
|
}
|
|
|
|
fn meet(&self, other: &Self) -> Self {
|
|
self.meet(other)
|
|
}
|
|
|
|
fn widen(&self, other: &Self) -> Self {
|
|
self.widen(other)
|
|
}
|
|
}
|
|
|
|
// ── Rust path-primitive classifiers ─────────────────────────────────────
|
|
//
|
|
// Per-language extension slot: each new language that wants to participate in
|
|
// PathFact should add its own classifier module and dispatch from
|
|
// `src/taint/ssa_transfer/mod.rs` on `transfer.lang`. Rust is wired here
|
|
// because the initial rs-safe-0** closure targets Rust idioms; Python's
|
|
// `os.path.normpath`, Java's `Path.normalize`, and Go's `filepath.Clean`
|
|
// would slot in alongside.
|
|
|
|
/// Classification of a branch-condition text against Rust path-rejection
|
|
/// idioms. The *rejection* interpretation is: when the condition is TRUE
|
|
/// the enclosing branch rejects (returns, panics, throws); when FALSE the
|
|
/// narrowed axis can be proved safe.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum PathRejection {
|
|
/// `x.contains("..")`, false branch proves `dotdot = No` on the receiver.
|
|
DotDot,
|
|
/// `x.starts_with("/")` / `x.starts_with('\\')`, false branch proves
|
|
/// `absolute = No` on the receiver.
|
|
AbsoluteSlash,
|
|
/// `x.is_absolute()` / `Path::new(x).is_absolute()`, false branch proves
|
|
/// `absolute = No` on the argument/receiver.
|
|
IsAbsolute,
|
|
/// Not a path-rejection idiom.
|
|
None,
|
|
}
|
|
|
|
/// Classification of a branch-condition text against Rust path *positive*
|
|
/// assertion idioms. When the condition is TRUE on the enclosing branch,
|
|
/// the listed axis is refined.
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum PathAssertion {
|
|
/// `x.starts_with("<literal_root>")`, true branch attaches
|
|
/// `prefix_lock = Some("<literal_root>")` to the receiver.
|
|
PrefixLock(String),
|
|
/// Not a path-assertion idiom.
|
|
None,
|
|
}
|
|
|
|
/// Sentinel root attached to a [`PathFact::prefix_lock`] when the
|
|
/// `starts_with`-style guard's argument is non-literal (a method call,
|
|
/// field access, configured root from the application). The structural
|
|
/// invariant — "verified rooted under SOME prefix" — is what the sink-
|
|
/// suppression layer needs; the *exact* prefix bytes are not. Combined
|
|
/// with a `dotdot=No` proof from canonicalisation or `..`-rejection, an
|
|
/// opaque prefix-lock is sufficient to prove the path stays inside a
|
|
/// trusted region.
|
|
pub const OPAQUE_PREFIX_LOCK: &str = "__nyx_opaque_prefix__";
|
|
|
|
/// Recognise a Rust path-rejection branch idiom from the raw condition text.
|
|
///
|
|
/// Accepts both atomic conditions (`x.contains("..")`) and multi-clause
|
|
/// disjunctions (`x.contains("..") || x.starts_with('/') || ...`). For
|
|
/// disjunctions the false branch implies **every** clause is false, so the
|
|
/// classifier returns the **first** recognised axis; callers should also
|
|
/// invoke [`classify_path_rejection_axes`] to pick up every axis covered
|
|
/// by an OR-chain. Conservative: returns [`PathRejection::None`] when no
|
|
/// path-rejection clause is found.
|
|
pub fn classify_path_rejection(text: &str) -> PathRejection {
|
|
let trimmed = text.trim();
|
|
if trimmed.is_empty() {
|
|
return PathRejection::None;
|
|
}
|
|
// Multi-clause OR: return the first recognised axis (callers should
|
|
// use `classify_path_rejection_axes` for the full set).
|
|
let axes = classify_path_rejection_axes(trimmed);
|
|
if axes.is_empty() {
|
|
return PathRejection::None;
|
|
}
|
|
axes[0]
|
|
}
|
|
|
|
/// Recognise every path-rejection axis covered by the condition, handling
|
|
/// disjunctions (`a || b || c`) by classifying each clause independently
|
|
/// and returning the union of recognised rejections.
|
|
///
|
|
/// The false branch of the whole condition implies all clauses are false,
|
|
/// so every recognised axis narrows on the false branch.
|
|
pub fn classify_path_rejection_axes(text: &str) -> smallvec::SmallVec<[PathRejection; 3]> {
|
|
let mut out: smallvec::SmallVec<[PathRejection; 3]> = smallvec::SmallVec::new();
|
|
for clause in split_top_level_or(text) {
|
|
let clause = clause.trim();
|
|
// Multi-axis special case: `!filepath.IsLocal(p)` (Go).
|
|
// `filepath.IsLocal` returns true iff the path stays within the
|
|
// current directory, no leading `/`, no `..` segments, no Windows
|
|
// drive root. Idiomatic Go path-traversal guard:
|
|
// `if !filepath.IsLocal(p) { return }`
|
|
// The TRUE branch terminates; the FALSE branch (where IsLocal is
|
|
// true) proves both `dotdot = No` and `absolute = No` on the
|
|
// argument simultaneously. Recognise it here so both axes flow
|
|
// into the surviving branch's PathFact narrowing.
|
|
if has_negated_filepath_is_local(clause) {
|
|
for axis in [PathRejection::DotDot, PathRejection::IsAbsolute] {
|
|
if !out.contains(&axis) {
|
|
out.push(axis);
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
let cls = classify_path_rejection_atom(clause);
|
|
if !matches!(cls, PathRejection::None) && !out.contains(&cls) {
|
|
out.push(cls);
|
|
}
|
|
}
|
|
out
|
|
}
|
|
|
|
/// True iff any top-level OR clause of `text` is the pre-negated
|
|
/// `!filepath.IsLocal(<expr>)` Go idiom — i.e. a clause whose `!` is
|
|
/// already consumed by [`classify_path_rejection_axes`] when reporting
|
|
/// the safe arm. Callers use this to decide whether AST-level negation
|
|
/// (`condition_negated`) was already accounted for by the classifier
|
|
/// (returns `true`) or still needs to flip the safe-arm polarity for
|
|
/// polarity-blind atoms like `!path.contains("..")` (returns `false`).
|
|
pub(crate) fn cond_has_pre_negated_islocal_clause(text: &str) -> bool {
|
|
for clause in split_top_level_or(text) {
|
|
if has_negated_filepath_is_local(clause.trim()) {
|
|
return true;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
/// Detect `!filepath.IsLocal(<expr>)`, Go's idiomatic path-traversal
|
|
/// guard. Whitespace-tolerant: `! filepath.IsLocal(`, `!filepath . IsLocal(`,
|
|
/// etc. Used by [`classify_path_rejection_axes`] to inject both
|
|
/// [`PathRejection::DotDot`] and [`PathRejection::IsAbsolute`] on the false
|
|
/// branch (which is the local-path branch by construction).
|
|
fn has_negated_filepath_is_local(clause: &str) -> bool {
|
|
// Strip surrounding parens once to handle `(!filepath.IsLocal(p))`.
|
|
let trimmed = clause.trim();
|
|
let inner = trimmed
|
|
.strip_prefix('(')
|
|
.and_then(|s| s.strip_suffix(')'))
|
|
.unwrap_or(trimmed)
|
|
.trim();
|
|
// Remove the leading `!` and any whitespace.
|
|
let after_not = match inner.strip_prefix('!') {
|
|
Some(rest) => rest.trim_start(),
|
|
None => return false,
|
|
};
|
|
// Compress whitespace around `.` so `filepath . IsLocal(` matches.
|
|
let compact: String = after_not.chars().filter(|c| !c.is_whitespace()).collect();
|
|
compact.starts_with("filepath.IsLocal(")
|
|
}
|
|
|
|
fn classify_path_rejection_atom(clause: &str) -> PathRejection {
|
|
// `.contains("..")` (Rust, Java) / `.includes("..")` (JS/TS) /
|
|
// `.include?("..")` (Ruby) / `strings.Contains(s, "..")` (Go) /
|
|
// `strstr(s, "..")` (C/C++), every form recognised by
|
|
// `extract_contains_arg` returns `..` if the needle is the dotdot
|
|
// segment.
|
|
if let Some(needle) = extract_contains_arg(clause)
|
|
&& needle == ".."
|
|
{
|
|
return PathRejection::DotDot;
|
|
}
|
|
// Python `".." in s`, operator form. Look for `".." in <something>`
|
|
// anywhere in the clause text. Conservative: requires the literal
|
|
// `".." in ` substring (whitespace-tolerant).
|
|
if has_python_dotdot_in(clause) {
|
|
return PathRejection::DotDot;
|
|
}
|
|
// `.starts_with('/')` (Rust) / `.startsWith("/")` (JS/TS/Java) /
|
|
// `.startswith("/")` (Python) / `.start_with?("/")` (Ruby) /
|
|
// `strings.HasPrefix(s, "/")` (Go).
|
|
if let Some(needle) = extract_starts_with_arg(clause)
|
|
&& (needle == "/" || needle == "\\")
|
|
{
|
|
return PathRejection::AbsoluteSlash;
|
|
}
|
|
// `.is_absolute()` (Rust) / `.isAbsolute()` (Java
|
|
// `Paths.get(s).isAbsolute()`) / `os.path.isabs(s)` (Python) /
|
|
// `filepath.IsAbs(s)` (Go).
|
|
if clause.contains(".is_absolute()")
|
|
|| clause.contains(".isAbsolute()")
|
|
|| clause.contains("os.path.isabs(")
|
|
|| clause.contains("filepath.IsAbs(")
|
|
{
|
|
return PathRejection::IsAbsolute;
|
|
}
|
|
// C/C++ subscript form: `s[0] == '/'` or `s[0] == '\\'` (and reversed).
|
|
// Idiomatic C/C++ absolute-path check since C has no `.startsWith` method.
|
|
if has_first_char_absolute_check(clause) {
|
|
return PathRejection::AbsoluteSlash;
|
|
}
|
|
PathRejection::None
|
|
}
|
|
|
|
/// Detect C/C++ `<var>[0] == '/'` or `<var>[0] == '\\'` subscript comparisons
|
|
/// (and the reversed `'/' == <var>[0]` form). Recognises quoted char or
|
|
/// string-literal forms. Conservative: needs both the `[0]` subscript and
|
|
/// a `'/'`/`'\\'` or `"/"`/`"\\"` literal within 32 chars of an `==` or `!=`
|
|
/// operator. Idiomatic absolute-path check in C since C lacks
|
|
/// `.starts_with` methods.
|
|
fn has_first_char_absolute_check(clause: &str) -> bool {
|
|
// We look for a subscript token `[0]` within the clause, then check that
|
|
// an `==` or `!=` operator lies between the subscript and a `/`/`\` literal
|
|
// on either side.
|
|
let bytes = clause.as_bytes();
|
|
let mut i = 0usize;
|
|
while i + 2 < bytes.len() {
|
|
if bytes[i] == b'[' && bytes[i + 1] == b'0' && bytes[i + 2] == b']' {
|
|
let lo = i.saturating_sub(32);
|
|
let hi = (i + 3 + 32).min(bytes.len());
|
|
let window = &bytes[lo..hi];
|
|
let has_op = window.windows(2).any(|w| w == b"==" || w == b"!=");
|
|
let has_lit = window.windows(3).any(|w| w == b"'/'")
|
|
|| window.windows(4).any(|w| w == b"'\\\\'")
|
|
|| window.windows(3).any(|w| w == b"\"/\"")
|
|
|| window.windows(4).any(|w| w == b"\"\\\\\"");
|
|
if has_op && has_lit {
|
|
return true;
|
|
}
|
|
}
|
|
i += 1;
|
|
}
|
|
false
|
|
}
|
|
|
|
/// Detect Python's `".." in s` operator form. The check is conservative:
|
|
/// it requires the literal substring `".." in ` (tolerating whitespace
|
|
/// between `".."` and `in`) anywhere in the clause text.
|
|
fn has_python_dotdot_in(clause: &str) -> bool {
|
|
// Look for `".."` followed by `in` keyword.
|
|
let bytes = clause.as_bytes();
|
|
let mut i = 0;
|
|
while i + 4 < bytes.len() {
|
|
if bytes[i] == b'"' && bytes[i + 1] == b'.' && bytes[i + 2] == b'.' && bytes[i + 3] == b'"'
|
|
{
|
|
// Skip whitespace after the closing quote.
|
|
let mut j = i + 4;
|
|
while j < bytes.len() && bytes[j].is_ascii_whitespace() {
|
|
j += 1;
|
|
}
|
|
if j + 2 <= bytes.len() && &bytes[j..j + 2] == b"in" {
|
|
// Require word boundary after `in`.
|
|
let after = bytes.get(j + 2).copied();
|
|
if after
|
|
.map(|c| !c.is_ascii_alphanumeric() && c != b'_')
|
|
.unwrap_or(true)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
i += 1;
|
|
}
|
|
false
|
|
}
|
|
|
|
/// Split a condition text on top-level `||` operators, ignoring those
|
|
/// inside string literals or nested parentheses. Also recognises Python's
|
|
/// keyword form ` or ` (whitespace-bounded) at top level so OR-chain
|
|
/// rejection idioms are decomposed identically across languages.
|
|
fn split_top_level_or(text: &str) -> smallvec::SmallVec<[&str; 4]> {
|
|
let mut out: smallvec::SmallVec<[&str; 4]> = smallvec::SmallVec::new();
|
|
let bytes = text.as_bytes();
|
|
let mut depth: i32 = 0;
|
|
let mut in_quote: Option<u8> = None;
|
|
let mut last = 0usize;
|
|
let mut i = 0usize;
|
|
while i < bytes.len() {
|
|
let b = bytes[i];
|
|
if let Some(q) = in_quote {
|
|
if b == b'\\' && i + 1 < bytes.len() {
|
|
i += 2;
|
|
continue;
|
|
}
|
|
if b == q {
|
|
in_quote = None;
|
|
}
|
|
i += 1;
|
|
continue;
|
|
}
|
|
match b {
|
|
b'"' | b'\'' => {
|
|
in_quote = Some(b);
|
|
i += 1;
|
|
continue;
|
|
}
|
|
b'(' | b'[' | b'{' => {
|
|
depth += 1;
|
|
i += 1;
|
|
continue;
|
|
}
|
|
b')' | b']' | b'}' => {
|
|
depth -= 1;
|
|
i += 1;
|
|
continue;
|
|
}
|
|
b'|' if depth == 0 && i + 1 < bytes.len() && bytes[i + 1] == b'|' => {
|
|
out.push(&text[last..i]);
|
|
last = i + 2;
|
|
i += 2;
|
|
continue;
|
|
}
|
|
// Python `or` keyword at top level. Require word boundaries on
|
|
// both sides: a preceding ASCII whitespace, and a following ASCII
|
|
// whitespace. Avoids splitting inside identifiers like
|
|
// `record_or_default`.
|
|
b'o' | b'O'
|
|
if depth == 0
|
|
&& i + 2 < bytes.len()
|
|
&& (bytes[i + 1] == b'r' || bytes[i + 1] == b'R')
|
|
&& bytes[i + 2].is_ascii_whitespace()
|
|
&& (i == 0 || bytes[i - 1].is_ascii_whitespace()) =>
|
|
{
|
|
// i is start of `or`. Trim trailing whitespace from the
|
|
// previous clause: out.push slice [last..i] but caller
|
|
// .trim()s anyway, so pushing the raw range is fine.
|
|
out.push(&text[last..i]);
|
|
last = i + 2;
|
|
i += 2;
|
|
continue;
|
|
}
|
|
_ => {
|
|
i += 1;
|
|
}
|
|
}
|
|
}
|
|
out.push(&text[last..]);
|
|
out
|
|
}
|
|
|
|
/// Recognise a path-positive-assertion branch idiom (language-agnostic).
|
|
///
|
|
/// Returns:
|
|
///
|
|
/// * `PrefixLock(<literal>)` when the condition is a `starts_with`-style
|
|
/// call with a literal prefix of length ≥ 2. Sibling single-character
|
|
/// prefixes (`"/"`, `"\\"`) are absolute-axis rejections, not locks.
|
|
/// * `PrefixLock(`[`OPAQUE_PREFIX_LOCK`]`)` when the call has a
|
|
/// non-empty, *non-literal* argument (method call, field access, local
|
|
/// variable). The opaque marker certifies the structural invariant
|
|
/// "verified rooted under some prefix" without committing to bytes,
|
|
/// which is exactly what FILE_IO sink-suppression needs to combine with
|
|
/// a `dotdot=No` proof — the upstream code path
|
|
/// `File.expand_path(...) + start_with?(<config_root>)` is the
|
|
/// motivating example.
|
|
/// * `None` otherwise.
|
|
pub fn classify_path_assertion(text: &str) -> PathAssertion {
|
|
let trimmed = text.trim();
|
|
match extract_starts_with_arg(trimmed) {
|
|
Some(needle) if needle.len() >= 2 => PathAssertion::PrefixLock(needle),
|
|
// Single-char literal (`"/"`, `"\\"`) is an absolute-axis
|
|
// rejection idiom handled by `classify_path_rejection_axes`, not
|
|
// a positive prefix-lock — fall through to None.
|
|
Some(_) => PathAssertion::None,
|
|
// No literal recovered: check for a non-literal argument
|
|
// (method call, field access, configured root) and attach the
|
|
// opaque marker so the structural "verified rooted under SOME
|
|
// prefix" invariant is recorded for downstream sink suppression.
|
|
None if has_starts_with_call_with_nonempty_arg(trimmed) => {
|
|
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
|
}
|
|
None => PathAssertion::None,
|
|
}
|
|
}
|
|
|
|
/// Recognise a *structural* one-argument enum-variant constructor.
|
|
///
|
|
/// Returns `true` when `callee` matches Rust's grammar for a variant
|
|
/// constructor call: the leaf (last path segment after `::` / `.`)
|
|
/// starts with an uppercase ASCII letter, and the callee has no method
|
|
/// receiver portion past a single terminal identifier. Callers combine
|
|
/// this with a structural "single-argument call, no receiver" gate; the
|
|
/// classification is deliberately name-agnostic and does not hard-code
|
|
/// `Some` / `Ok` / `Err` / `Box::new` / …, so user-defined enum variants
|
|
/// participate on the same footing as stdlib ones.
|
|
///
|
|
/// The heuristic is intentionally conservative:
|
|
/// * Must be non-empty.
|
|
/// * The leaf segment must begin with an ASCII uppercase letter
|
|
/// (Rust's variant / struct / type grammar).
|
|
/// * The leaf segment must be ASCII alphanumeric / underscore, no
|
|
/// method call noise (parentheses, argument lists) survives here
|
|
/// because callees arrive in their normalised scoped-identifier
|
|
/// form.
|
|
///
|
|
/// Callers that use this as a PathFact passthrough must still verify
|
|
/// the call has exactly one argument (or one argument past a receiver-
|
|
/// less structural gate); the leaf check alone does not constrain
|
|
/// arity.
|
|
pub fn is_structural_variant_ctor(callee: &str) -> bool {
|
|
let trimmed = callee.trim();
|
|
if trimmed.is_empty() {
|
|
return false;
|
|
}
|
|
// Accept either form by inspecting both the leaf and (for scoped
|
|
// callees) the penultimate segment. A bare identifier whose leaf is
|
|
// upper-camel-case names an enum variant or tuple struct (`Some`,
|
|
// `Ok`, `MyResult`). A scoped identifier whose *penultimate*
|
|
// segment is upper-camel-case names an associated constructor on
|
|
// that type, `Box::new`, `Cell::from`, `PathBuf::with_capacity`,
|
|
// etc. The latter is the lower-leaf-case shape we want to admit
|
|
// alongside the bare-variant shape.
|
|
let segments: smallvec::SmallVec<[&str; 4]> =
|
|
trimmed.split("::").filter(|s| !s.is_empty()).collect();
|
|
let is_upper_ident = |s: &str| -> bool {
|
|
match s.chars().next() {
|
|
Some(c) if c.is_ascii_uppercase() => {
|
|
s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
|
|
}
|
|
_ => false,
|
|
}
|
|
};
|
|
if segments.is_empty() {
|
|
return false;
|
|
}
|
|
if segments.len() == 1 {
|
|
return is_upper_ident(segments[0]);
|
|
}
|
|
// Scoped: accept either upper-camel-case leaf (`Module::Variant`)
|
|
// or upper-camel-case penultimate (`Type::associated_fn`).
|
|
let leaf = segments[segments.len() - 1];
|
|
let parent = segments[segments.len() - 2];
|
|
is_upper_ident(leaf) || is_upper_ident(parent)
|
|
}
|
|
|
|
/// Recognise a Rust path-producing primitive call by canonical callee name,
|
|
/// and return its PathFact effect on the result. `input_fact` is the
|
|
/// PathFact of the receiver/first argument (the value being sanitised);
|
|
/// it is used as the baseline to which the call's effect is applied.
|
|
///
|
|
/// Returned [`None`] means the callee is not a recognised path primitive ,
|
|
/// the caller should leave the result at its pre-existing PathFact (Top).
|
|
///
|
|
/// Backwards-compatible wrapper around [`classify_path_primitive_rust`].
|
|
/// New callers should prefer [`classify_path_primitive_for_lang`] which
|
|
/// dispatches on the source language.
|
|
pub fn classify_path_primitive(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
|
classify_path_primitive_rust(callee, input_fact)
|
|
}
|
|
|
|
/// Per-language path-primitive dispatcher.
|
|
///
|
|
/// Routes to the language-specific classifier, Rust, Python, JS/TS, Go,
|
|
/// Java, Ruby, PHP, or C/C++. Returns [`None`] for languages without a
|
|
/// classifier (or callees the language's classifier doesn't recognise).
|
|
pub fn classify_path_primitive_for_lang(
|
|
lang: crate::symbol::Lang,
|
|
callee: &str,
|
|
input_fact: &PathFact,
|
|
) -> Option<PathFact> {
|
|
use crate::symbol::Lang;
|
|
match lang {
|
|
Lang::Rust => classify_path_primitive_rust(callee, input_fact),
|
|
Lang::Python => classify_path_primitive_python(callee, input_fact),
|
|
Lang::JavaScript | Lang::TypeScript => classify_path_primitive_js(callee, input_fact),
|
|
Lang::Go => classify_path_primitive_go(callee, input_fact),
|
|
Lang::Java => classify_path_primitive_java(callee, input_fact),
|
|
Lang::Ruby => classify_path_primitive_ruby(callee, input_fact),
|
|
Lang::Php => classify_path_primitive_php(callee, input_fact),
|
|
Lang::C | Lang::Cpp => classify_path_primitive_c_cpp(callee, input_fact),
|
|
}
|
|
}
|
|
|
|
/// Per-language structural-variant-constructor predicate.
|
|
///
|
|
/// Rust uses ASCII-uppercase variant naming; other languages with
|
|
/// destructuring null/Optional idioms (Python `Optional[T]`, JS `null`,
|
|
/// Go `(T, error)`, Java `Optional<T>`, Ruby `nil`, PHP `?T`,
|
|
/// C++ `std::optional<T>`) don't share Rust's convention, so this
|
|
/// predicate is conservatively true only for Rust today. Per-language
|
|
/// extensions can opt in later.
|
|
pub fn is_structural_variant_ctor_for_lang(lang: crate::symbol::Lang, callee: &str) -> bool {
|
|
match lang {
|
|
crate::symbol::Lang::Rust => is_structural_variant_ctor(callee),
|
|
// Other languages: no grammatical variant-ctor convention to
|
|
// recognise structurally. `Some(s)` / `Ok(s)` are Rust-specific;
|
|
// Java's `Optional.of(s)` is a method call, not a constructor; JS
|
|
// returns `s` directly with `null` as the failure sentinel.
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
/// Per-language predicate for "this callee is a zero-arg fresh-allocation
|
|
/// constructor", used by the variant-rejection-path classifier so that
|
|
/// `String::new()` (Rust) / `''` (Python/JS/Java/...) is recognised as a
|
|
/// no-attacker-content fresh value with cleared `dotdot`/`absolute` axes.
|
|
///
|
|
/// Rust uses the `Type::method` scoped form recognised by
|
|
/// [`crate::ssa::type_facts::peel_identity_suffix`]. Other languages do
|
|
/// not (yet) have an equivalent grammar-driven recogniser; the rejection
|
|
/// arm in their fixtures returns either an empty string literal (handled
|
|
/// by `SsaOp::Const` seeding) or `None`/`null`/`nil` (handled by the
|
|
/// non-data-return skip).
|
|
pub fn is_zero_arg_allocator_for_lang(lang: crate::symbol::Lang, _callee: &str) -> bool {
|
|
// Currently a no-op for non-Rust languages: rejection-arm constructors
|
|
// are absorbed via `SsaOp::Const` seeding (e.g. `""` literal) or the
|
|
// [`is_non_data_return`] sentinel skip (`None`/`null`/`nil`). This
|
|
// function exists as the per-language extension point.
|
|
let _ = lang;
|
|
false
|
|
}
|
|
|
|
/// Rust path-primitive classifier, `fs::canonicalize`, `Path::new`,
|
|
/// `PathBuf::from`, identity-string conversions.
|
|
pub fn classify_path_primitive_rust(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
|
// Accept both path-qualified (`std::fs::canonicalize`, `fs::canonicalize`)
|
|
// and bare-leaf (`canonicalize`, produced from `p.canonicalize()` method
|
|
// calls after normalisation) forms.
|
|
let leaf = rightmost_segment(callee);
|
|
match leaf {
|
|
// `fs::canonicalize(p)` / `p.canonicalize()`:
|
|
// normalized = Yes, dotdot = No, absolute = Yes. The result is
|
|
// an absolute, fully-resolved path; combined with a prefix-lock
|
|
// via `.starts_with(root)`, this is the standard Rust
|
|
// path-containment idiom.
|
|
"canonicalize" => {
|
|
let mut f = input_fact.clone();
|
|
f.normalized = Tri::Yes;
|
|
f.dotdot = Tri::No;
|
|
f.absolute = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
// `Path::new(s)` / `PathBuf::from(s)`:
|
|
// pass-through of the input's PathFact so downstream `starts_with`
|
|
// checks against a Path/PathBuf value still see the underlying
|
|
// string's narrowed axes. No axis is forced, wrapping does not
|
|
// sanitize on its own.
|
|
"new" | "from" => {
|
|
if callee_contains_segment(callee, "Path") || callee_contains_segment(callee, "PathBuf")
|
|
{
|
|
Some(input_fact.clone())
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
// Identity conversions on strings/paths. Each one re-binds the
|
|
// same logical value, the converted String / PathBuf / OsString
|
|
// still describes the exact same filesystem path, so the PathFact
|
|
// flows through unchanged. Without this, a sanitised `s: &str`
|
|
// would lose its narrowed axes the moment the helper returns
|
|
// `s.to_string()` / `s.to_owned()` / `String::from(s)`.
|
|
"to_string" | "to_owned" | "clone" | "into" | "as_ref" | "as_str" | "as_path" => {
|
|
Some(input_fact.clone())
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Python path-primitive classifier, `os.path.normpath`, `os.path.realpath`,
|
|
/// `pathlib.Path.resolve`, `os.path.abspath`.
|
|
///
|
|
/// Pattern conventions: tree-sitter-python emits dotted attribute access as
|
|
/// `obj.attr.method` after [`crate::callgraph`] normalisation. Method calls
|
|
/// on Path objects appear as `Path.resolve` / `<bare>.resolve`; free-function
|
|
/// calls appear as `os.path.normpath` / `posixpath.normpath` / similar.
|
|
pub fn classify_path_primitive_python(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
|
let leaf = rightmost_segment(callee);
|
|
match leaf {
|
|
// `os.path.normpath(s)` / `posixpath.normpath(s)` / `ntpath.normpath`:
|
|
// Resolves `..` segments syntactically. dotdot = No.
|
|
// Does not make absolute.
|
|
"normpath" => {
|
|
let mut f = input_fact.clone();
|
|
f.dotdot = Tri::No;
|
|
f.normalized = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
// `os.path.realpath(s)` / `pathlib.Path.resolve()`:
|
|
// Resolves symlinks AND `..` AND yields an absolute path.
|
|
// normalized = Yes, dotdot = No, absolute = Yes.
|
|
"realpath" | "resolve" => {
|
|
let mut f = input_fact.clone();
|
|
f.normalized = Tri::Yes;
|
|
f.dotdot = Tri::No;
|
|
f.absolute = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
// `os.path.abspath(s)`:
|
|
// Returns an absolute version of the input. absolute = Yes.
|
|
// Does NOT clear `..` (abspath joins with cwd; trailing `..` survives).
|
|
"abspath" => {
|
|
let mut f = input_fact.clone();
|
|
f.absolute = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
// Identity conversions: `str(p)` / `Path(s)` / `os.fspath(s)` re-bind
|
|
// the same logical path.
|
|
"fspath" | "PurePath" | "PurePosixPath" | "PureWindowsPath" => Some(input_fact.clone()),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// JavaScript / TypeScript path-primitive classifier, Node's `path` module:
|
|
/// `path.normalize`, `path.resolve`, `path.join`.
|
|
pub fn classify_path_primitive_js(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
|
let leaf = rightmost_segment(callee);
|
|
match leaf {
|
|
// `path.normalize(p)`:
|
|
// Resolves `..` syntactically. dotdot = No.
|
|
"normalize" => {
|
|
let mut f = input_fact.clone();
|
|
f.dotdot = Tri::No;
|
|
f.normalized = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
// `path.resolve(p)`:
|
|
// Resolves to an absolute path, collapsing `..`.
|
|
// normalized = Yes, dotdot = No, absolute = Yes.
|
|
"resolve" => {
|
|
let mut f = input_fact.clone();
|
|
f.normalized = Tri::Yes;
|
|
f.dotdot = Tri::No;
|
|
f.absolute = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Go path-primitive classifier, `path/filepath` package:
|
|
/// `filepath.Clean`, `filepath.Abs`.
|
|
pub fn classify_path_primitive_go(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
|
let leaf = rightmost_segment(callee);
|
|
match leaf {
|
|
// `filepath.Clean(p)`:
|
|
// Lexical normalisation that resolves `..`. dotdot = No.
|
|
"Clean" => {
|
|
let mut f = input_fact.clone();
|
|
f.dotdot = Tri::No;
|
|
f.normalized = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
// `filepath.Abs(p)`:
|
|
// Returns an absolute path (also calls Clean).
|
|
// normalized = Yes, dotdot = No, absolute = Yes.
|
|
"Abs" => {
|
|
let mut f = input_fact.clone();
|
|
f.normalized = Tri::Yes;
|
|
f.dotdot = Tri::No;
|
|
f.absolute = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Java path-primitive classifier, `java.nio.file.Path.normalize` /
|
|
/// `Paths.get(s).normalize().toAbsolutePath()`.
|
|
pub fn classify_path_primitive_java(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
|
let leaf = rightmost_segment(callee);
|
|
match leaf {
|
|
// `Path.normalize()`:
|
|
// Lexical normalisation that resolves `..`.
|
|
"normalize" => {
|
|
let mut f = input_fact.clone();
|
|
f.dotdot = Tri::No;
|
|
f.normalized = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
// `Path.toAbsolutePath()`:
|
|
// Returns an absolute path.
|
|
"toAbsolutePath" => {
|
|
let mut f = input_fact.clone();
|
|
f.absolute = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
// `Path.toRealPath()`:
|
|
// Resolves symlinks and `..`, returns absolute path.
|
|
"toRealPath" => {
|
|
let mut f = input_fact.clone();
|
|
f.normalized = Tri::Yes;
|
|
f.dotdot = Tri::No;
|
|
f.absolute = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Ruby path-primitive classifier, `File.expand_path` / `Pathname#cleanpath`.
|
|
pub fn classify_path_primitive_ruby(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
|
let leaf = rightmost_segment(callee);
|
|
match leaf {
|
|
// `File.expand_path(s)`:
|
|
// Returns an absolute path with `..` collapsed.
|
|
"expand_path" => {
|
|
let mut f = input_fact.clone();
|
|
f.normalized = Tri::Yes;
|
|
f.dotdot = Tri::No;
|
|
f.absolute = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
// `Pathname#cleanpath`:
|
|
// Lexical normalisation that resolves `..`.
|
|
"cleanpath" => {
|
|
let mut f = input_fact.clone();
|
|
f.dotdot = Tri::No;
|
|
f.normalized = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// PHP path-primitive classifier, `realpath`, `basename`.
|
|
pub fn classify_path_primitive_php(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
|
let leaf = rightmost_segment(callee);
|
|
match leaf {
|
|
// `realpath($s)`:
|
|
// Resolves symlinks and `..`, returns absolute path. Returns
|
|
// `false` if the file doesn't exist, but on the success path
|
|
// (which is what reaches a sink), it produces a clean absolute path.
|
|
"realpath" => {
|
|
let mut f = input_fact.clone();
|
|
f.normalized = Tri::Yes;
|
|
f.dotdot = Tri::No;
|
|
f.absolute = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
// `basename($s)`:
|
|
// Strips directory components, guaranteed to contain no `..`
|
|
// (basename of `..` is `..`, but basename of any traversal-
|
|
// prefixed path is just the leaf). Conservative: clear dotdot.
|
|
"basename" => {
|
|
let mut f = input_fact.clone();
|
|
f.dotdot = Tri::No;
|
|
f.absolute = Tri::No;
|
|
Some(f)
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// C / C++ path-primitive classifier, POSIX `realpath`,
|
|
/// `std::filesystem::canonical`.
|
|
pub fn classify_path_primitive_c_cpp(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
|
|
let leaf = rightmost_segment(callee);
|
|
match leaf {
|
|
// POSIX `realpath(in, out)` / C++ `std::filesystem::canonical(p)`:
|
|
// Resolves to absolute canonical path.
|
|
"realpath" | "canonical" => {
|
|
let mut f = input_fact.clone();
|
|
f.normalized = Tri::Yes;
|
|
f.dotdot = Tri::No;
|
|
f.absolute = Tri::Yes;
|
|
Some(f)
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
// ── Text helpers (kept in sync with path_state.rs's parsing style) ─────
|
|
|
|
fn rightmost_segment(s: &str) -> &str {
|
|
let after_colons = s.rsplit("::").next().unwrap_or(s);
|
|
after_colons.rsplit('.').next().unwrap_or(after_colons)
|
|
}
|
|
|
|
fn callee_contains_segment(callee: &str, seg: &str) -> bool {
|
|
callee.split([':', '.']).any(|s| s == seg)
|
|
}
|
|
|
|
/// Extract the string argument passed to a "contains-like" call. Matches
|
|
/// the canonical method-call shapes across languages:
|
|
/// * Rust / Java / JS String: `r.contains("..")`
|
|
/// * JS / TS array: `r.includes("..")`
|
|
/// * Ruby: `r.include?("..")`
|
|
/// * Go: `strings.Contains(r, "..")`
|
|
/// * C / C++: `strstr(r, "..")` / `strchr(r, '/')`
|
|
fn extract_contains_arg(text: &str) -> Option<String> {
|
|
// Tier 1: method-call form `.contains(`, `.includes(`, `.include?(`.
|
|
for method in [".contains(", ".includes(", ".include?("] {
|
|
if let Some(idx) = text.find(method)
|
|
&& let Some(s) = extract_first_string_literal(&text[idx + method.len()..])
|
|
{
|
|
return Some(s);
|
|
}
|
|
}
|
|
// Tier 2: free-function form with the receiver as first arg. We can't
|
|
// recover the receiver from the text (the lowering already records it
|
|
// in `condition_vars`); we just need the literal needle to classify.
|
|
for prefix in [
|
|
"strings.Contains(",
|
|
"strings.HasPrefix(",
|
|
"strings.Index(",
|
|
"strstr(",
|
|
] {
|
|
if let Some(idx) = text.find(prefix) {
|
|
// Skip past the first argument (receiver), the literal needle
|
|
// is the second arg, separated by a comma. Find the comma at
|
|
// top level inside this call.
|
|
let inner = &text[idx + prefix.len()..];
|
|
if let Some(comma_idx) = top_level_comma(inner) {
|
|
let after_comma = &inner[comma_idx + 1..];
|
|
if let Some(s) = extract_first_string_literal(after_comma) {
|
|
return Some(s);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Extract the string argument passed to a "starts-with-like" call.
|
|
/// * Rust: `r.starts_with('/')`
|
|
/// * Ruby: `r.start_with?("/")`
|
|
/// * JS / TS / Java: `r.startsWith("/")`
|
|
/// * Python: `r.startswith("/")`
|
|
/// * Go: `strings.HasPrefix(r, "/")`
|
|
fn extract_starts_with_arg(text: &str) -> Option<String> {
|
|
for method in [
|
|
".starts_with(",
|
|
".start_with?(",
|
|
".startsWith(",
|
|
".startswith(",
|
|
] {
|
|
if let Some(idx) = text.find(method)
|
|
&& let Some(s) = extract_first_string_literal(&text[idx + method.len()..])
|
|
{
|
|
return Some(s);
|
|
}
|
|
}
|
|
// Go free-function form `strings.HasPrefix(r, "/")`, second arg.
|
|
if let Some(idx) = text.find("strings.HasPrefix(") {
|
|
let inner = &text[idx + "strings.HasPrefix(".len()..];
|
|
if let Some(comma_idx) = top_level_comma(inner) {
|
|
let after_comma = &inner[comma_idx + 1..];
|
|
if let Some(s) = extract_first_string_literal(after_comma) {
|
|
return Some(s);
|
|
}
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Detect a `starts_with`-style call with a non-empty argument, where the
|
|
/// argument is *not* recovered as a string literal by
|
|
/// [`extract_starts_with_arg`] (so it's a method call, field access, local
|
|
/// variable, etc.). Used by [`classify_path_assertion`] to attach an
|
|
/// opaque prefix-lock when the application validates with a configured
|
|
/// root rather than an inline string literal.
|
|
///
|
|
/// Whitespace-tolerant. Conservative: returns `false` for any shape where
|
|
/// the argument cannot be confirmed non-empty.
|
|
fn has_starts_with_call_with_nonempty_arg(text: &str) -> bool {
|
|
// Method-call forms with parens. The argument-presence check is
|
|
// simple: after the opening `(`, the first non-whitespace byte must
|
|
// not be `)` (empty arg list).
|
|
for method in [
|
|
".starts_with(",
|
|
".start_with?(",
|
|
".startsWith(",
|
|
".startswith(",
|
|
] {
|
|
if let Some(idx) = text.find(method) {
|
|
let after = &text[idx + method.len()..];
|
|
if first_non_ws_byte(after).is_some_and(|b| b != b')') {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
// Ruby paren-less call: `r.start_with? <expr>`. Tree-sitter still
|
|
// serialises the source text verbatim, so a space (or tab) follows
|
|
// the `?`. Require a non-empty, non-clause-terminator token after.
|
|
if let Some(idx) = text.find(".start_with?") {
|
|
let rest = &text[idx + ".start_with?".len()..];
|
|
// Skip the `(` form (already covered above) and any whitespace.
|
|
let after = rest.trim_start();
|
|
if !after.is_empty() {
|
|
let first = after.as_bytes()[0];
|
|
// `(` belongs to the parenthesised form; clause terminators
|
|
// (`&&` / `||` / `)` / `]` / `;` / `,`) mean the call has no
|
|
// arguments at this position.
|
|
if !matches!(first, b'(' | b'&' | b'|' | b')' | b']' | b';' | b',') {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
// Go free-function form `strings.HasPrefix(<recv>, <prefix>)`. The
|
|
// second argument must exist and be non-empty.
|
|
if let Some(idx) = text.find("strings.HasPrefix(") {
|
|
let inner = &text[idx + "strings.HasPrefix(".len()..];
|
|
if let Some(comma_idx) = top_level_comma(inner) {
|
|
let after_comma = inner[comma_idx + 1..].trim_start();
|
|
if !after_comma.is_empty() && !after_comma.starts_with(')') {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
/// Return the first non-whitespace byte of `text`, or `None` if the slice
|
|
/// is empty or all-whitespace.
|
|
fn first_non_ws_byte(text: &str) -> Option<u8> {
|
|
text.bytes().find(|b| !b.is_ascii_whitespace())
|
|
}
|
|
|
|
/// Find the index of the first top-level `,` in a slice (depth 0, ignoring
|
|
/// commas inside nested parentheses, brackets, braces, or string literals).
|
|
/// Returns `None` if no top-level comma is present.
|
|
fn top_level_comma(text: &str) -> Option<usize> {
|
|
let bytes = text.as_bytes();
|
|
let mut depth: i32 = 0;
|
|
let mut in_quote: Option<u8> = None;
|
|
let mut i = 0usize;
|
|
while i < bytes.len() {
|
|
let b = bytes[i];
|
|
if let Some(q) = in_quote {
|
|
if b == b'\\' && i + 1 < bytes.len() {
|
|
i += 2;
|
|
continue;
|
|
}
|
|
if b == q {
|
|
in_quote = None;
|
|
}
|
|
i += 1;
|
|
continue;
|
|
}
|
|
match b {
|
|
b'"' | b'\'' => {
|
|
in_quote = Some(b);
|
|
i += 1;
|
|
}
|
|
b'(' | b'[' | b'{' => {
|
|
depth += 1;
|
|
i += 1;
|
|
}
|
|
b')' | b']' | b'}' => {
|
|
depth -= 1;
|
|
i += 1;
|
|
}
|
|
b',' if depth == 0 => return Some(i),
|
|
_ => i += 1,
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Parse a `"..."` / `'...'` literal at the start of a slice (after an
|
|
/// opening `(`). Returns the inner text, handling the common Rust escapes
|
|
/// `\\`, `\"`, `\'`, `\n`, `\t`. `None` when the slice does not start
|
|
/// with a string literal.
|
|
fn extract_first_string_literal(after_open: &str) -> Option<String> {
|
|
let bytes = after_open.as_bytes();
|
|
let mut i = 0;
|
|
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
|
|
i += 1;
|
|
}
|
|
if i >= bytes.len() {
|
|
return None;
|
|
}
|
|
let quote = bytes[i];
|
|
if quote != b'"' && quote != b'\'' {
|
|
return None;
|
|
}
|
|
i += 1;
|
|
let mut out = Vec::new();
|
|
while i < bytes.len() {
|
|
let b = bytes[i];
|
|
if b == b'\\' && i + 1 < bytes.len() {
|
|
match bytes[i + 1] {
|
|
b'n' => out.push(b'\n'),
|
|
b'r' => out.push(b'\r'),
|
|
b't' => out.push(b'\t'),
|
|
c => out.push(c),
|
|
}
|
|
i += 2;
|
|
continue;
|
|
}
|
|
if b == quote {
|
|
return String::from_utf8(out).ok();
|
|
}
|
|
out.push(b);
|
|
i += 1;
|
|
}
|
|
None
|
|
}
|
|
|
|
// ── Helpers ─────────────────────────────────────────────────────────────
|
|
|
|
fn truncate_prefix_lock(s: &str) -> String {
|
|
if s.len() <= MAX_PREFIX_LOCK_LEN {
|
|
s.to_string()
|
|
} else {
|
|
let mut end = MAX_PREFIX_LOCK_LEN;
|
|
while end > 0 && !s.is_char_boundary(end) {
|
|
end -= 1;
|
|
}
|
|
s[..end].to_string()
|
|
}
|
|
}
|
|
|
|
/// Longest common prefix, char-aligned so multi-byte UTF-8 sequences are
|
|
/// kept whole. The earlier byte-iteration form re-encoded continuation
|
|
/// bytes as Latin-1 chars and produced mojibake; the same fix lives at
|
|
/// `crate::abstract_interp::string_domain::longest_common_prefix`.
|
|
fn longest_common_prefix(a: &str, b: &str) -> String {
|
|
a.chars()
|
|
.zip(b.chars())
|
|
.take_while(|(x, y)| x == y)
|
|
.map(|(x, _)| x)
|
|
.collect()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
// ── LCP helper ──────────────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn lcp_basic() {
|
|
assert_eq!(longest_common_prefix("abcdef", "abcxyz"), "abc");
|
|
assert_eq!(longest_common_prefix("abc", "abc"), "abc");
|
|
assert_eq!(longest_common_prefix("", "abc"), "");
|
|
}
|
|
|
|
#[test]
|
|
fn lcp_keeps_utf8_codepoints_whole() {
|
|
// Without char-alignment, byte iteration would emit the
|
|
// continuation byte 0xA9 as a separate char and corrupt the
|
|
// prefix. Both the 2-byte and 3-byte UTF-8 cases must survive.
|
|
assert_eq!(longest_common_prefix("héllo", "héllo!"), "héllo");
|
|
assert_eq!(longest_common_prefix("名前.json", "名前.txt"), "名前.");
|
|
}
|
|
|
|
// ── Tri lattice laws ────────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn tri_join_idempotent() {
|
|
for v in [Tri::No, Tri::Yes, Tri::Maybe] {
|
|
assert_eq!(v.join(&v), v);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn tri_join_commutative() {
|
|
let pairs = [
|
|
(Tri::No, Tri::Yes),
|
|
(Tri::No, Tri::Maybe),
|
|
(Tri::Yes, Tri::Maybe),
|
|
];
|
|
for (a, b) in pairs {
|
|
assert_eq!(a.join(&b), b.join(&a));
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn tri_join_disagreement_is_top() {
|
|
assert_eq!(Tri::No.join(&Tri::Yes), Tri::Maybe);
|
|
}
|
|
|
|
#[test]
|
|
fn tri_join_with_top_is_top() {
|
|
assert_eq!(Tri::No.join(&Tri::Maybe), Tri::Maybe);
|
|
assert_eq!(Tri::Yes.join(&Tri::Maybe), Tri::Maybe);
|
|
}
|
|
|
|
#[test]
|
|
fn tri_meet_top_is_identity() {
|
|
assert_eq!(Tri::No.meet_checked(&Tri::Maybe), Some(Tri::No));
|
|
assert_eq!(Tri::Maybe.meet_checked(&Tri::Yes), Some(Tri::Yes));
|
|
}
|
|
|
|
#[test]
|
|
fn tri_meet_contradiction_is_none() {
|
|
assert_eq!(Tri::No.meet_checked(&Tri::Yes), None);
|
|
assert_eq!(Tri::Yes.meet_checked(&Tri::No), None);
|
|
}
|
|
|
|
#[test]
|
|
fn tri_meet_agree() {
|
|
assert_eq!(Tri::No.meet_checked(&Tri::No), Some(Tri::No));
|
|
assert_eq!(Tri::Yes.meet_checked(&Tri::Yes), Some(Tri::Yes));
|
|
}
|
|
|
|
#[test]
|
|
fn tri_widen_drops_on_change() {
|
|
assert_eq!(Tri::No.widen(&Tri::Yes), Tri::Maybe);
|
|
assert_eq!(Tri::No.widen(&Tri::No), Tri::No);
|
|
}
|
|
|
|
#[test]
|
|
fn tri_leq_top_greatest() {
|
|
assert!(Tri::No.leq(&Tri::Maybe));
|
|
assert!(Tri::Yes.leq(&Tri::Maybe));
|
|
assert!(!Tri::Maybe.leq(&Tri::No));
|
|
}
|
|
|
|
// ── PathFact basics ─────────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn default_is_top() {
|
|
let f = PathFact::default();
|
|
assert!(f.is_top());
|
|
assert!(!f.is_bottom());
|
|
assert!(!f.is_path_safe());
|
|
}
|
|
|
|
#[test]
|
|
fn bottom_detection() {
|
|
let b = PathFact::bottom();
|
|
assert!(b.is_bottom());
|
|
assert!(!b.is_top());
|
|
assert!(!b.is_path_safe());
|
|
}
|
|
|
|
#[test]
|
|
fn is_path_safe_requires_both_axes() {
|
|
let mut f = PathFact::default().with_dotdot_cleared();
|
|
assert!(!f.is_path_safe(), "dotdot=No alone is insufficient");
|
|
f = f.with_absolute_cleared();
|
|
assert!(f.is_path_safe());
|
|
}
|
|
|
|
#[test]
|
|
fn is_path_safe_truth_table() {
|
|
let cases = [
|
|
(Tri::No, Tri::No, true),
|
|
(Tri::No, Tri::Yes, false),
|
|
(Tri::No, Tri::Maybe, false),
|
|
(Tri::Yes, Tri::No, false),
|
|
(Tri::Maybe, Tri::No, false),
|
|
(Tri::Maybe, Tri::Maybe, false),
|
|
];
|
|
for (dd, abs, expected) in cases {
|
|
let f = PathFact {
|
|
dotdot: dd,
|
|
absolute: abs,
|
|
normalized: Tri::Maybe,
|
|
prefix_lock: None,
|
|
is_bottom: false,
|
|
};
|
|
assert_eq!(
|
|
f.is_path_safe(),
|
|
expected,
|
|
"is_path_safe({:?}, {:?}) should be {expected}",
|
|
dd,
|
|
abs
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn with_normalized_clears_dotdot() {
|
|
let f = PathFact::default().with_normalized();
|
|
assert_eq!(f.dotdot, Tri::No);
|
|
assert_eq!(f.normalized, Tri::Yes);
|
|
assert_eq!(f.absolute, Tri::Maybe);
|
|
}
|
|
|
|
#[test]
|
|
fn with_prefix_lock_ignores_empty() {
|
|
let f = PathFact::default().with_prefix_lock("");
|
|
assert!(f.prefix_lock.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn with_prefix_lock_truncates() {
|
|
let huge = "/".to_string() + &"a".repeat(MAX_PREFIX_LOCK_LEN * 2);
|
|
let f = PathFact::default().with_prefix_lock(&huge);
|
|
assert!(
|
|
f.prefix_lock.as_deref().unwrap().len() <= MAX_PREFIX_LOCK_LEN,
|
|
"prefix_lock must be bounded"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn c_or_chain_rejection_full() {
|
|
// Exact text shape that lowering produces for c-safe-014 / c-safe-016.
|
|
let axes = classify_path_rejection_axes(
|
|
"strstr(s, \"..\") != NULL || s[0] == '/' || s[0] == '\\\\'",
|
|
);
|
|
assert!(
|
|
axes.contains(&PathRejection::DotDot),
|
|
"expected DotDot in {:?}",
|
|
axes
|
|
);
|
|
assert!(
|
|
axes.contains(&PathRejection::AbsoluteSlash),
|
|
"expected AbsoluteSlash in {:?}",
|
|
axes
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn classify_subscript_first_char_absolute() {
|
|
// C/C++ idiom: `s[0] == '/'`
|
|
assert_eq!(
|
|
classify_path_rejection_atom("s[0] == '/'"),
|
|
PathRejection::AbsoluteSlash
|
|
);
|
|
// `s[0] == '\\'` (backslash)
|
|
assert_eq!(
|
|
classify_path_rejection_atom("s[0] == '\\\\'"),
|
|
PathRejection::AbsoluteSlash
|
|
);
|
|
// Reversed comparison `'/' == s[0]`
|
|
assert_eq!(
|
|
classify_path_rejection_atom("'/' == in[0]"),
|
|
PathRejection::AbsoluteSlash
|
|
);
|
|
// `!=` operator inside a negated check (`s[0] != '/'`) also matches the
|
|
// literal-nearby pattern; classification callers gate on clause polarity.
|
|
assert_eq!(
|
|
classify_path_rejection_atom("s[0] != '\\\\'"),
|
|
PathRejection::AbsoluteSlash
|
|
);
|
|
// Negative: no literal near subscript
|
|
assert_eq!(
|
|
classify_path_rejection_atom("s[0] == c"),
|
|
PathRejection::None
|
|
);
|
|
// Negative: subscript but no equality op
|
|
assert_eq!(classify_path_rejection_atom("s[0]"), PathRejection::None);
|
|
// Regression: multibyte char inside the 32-byte search window must not
|
|
// panic on a non-char-boundary slice (fuzz crash repro).
|
|
let s = format!("{}s[0] == '/'", "—".repeat(20));
|
|
assert_eq!(
|
|
classify_path_rejection_atom(&s),
|
|
PathRejection::AbsoluteSlash
|
|
);
|
|
let s2 = format!("s[0] == '/'{}", "—".repeat(20));
|
|
assert_eq!(
|
|
classify_path_rejection_atom(&s2),
|
|
PathRejection::AbsoluteSlash
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn prefix_locked_under_works() {
|
|
let f = PathFact::default().with_prefix_lock("/var/app/uploads/");
|
|
assert!(f.prefix_locked_under("/var/app/"));
|
|
assert!(f.prefix_locked_under("/var/app/uploads/"));
|
|
assert!(!f.prefix_locked_under("/etc/"));
|
|
assert!(!PathFact::default().prefix_locked_under("/var/app/"));
|
|
}
|
|
|
|
// ── Lattice laws ────────────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn join_idempotent() {
|
|
let f = PathFact::default()
|
|
.with_dotdot_cleared()
|
|
.with_absolute_cleared();
|
|
assert_eq!(f.join(&f), f);
|
|
}
|
|
|
|
#[test]
|
|
fn join_commutative() {
|
|
let a = PathFact::default().with_dotdot_cleared();
|
|
let b = PathFact::default().with_absolute_cleared();
|
|
assert_eq!(a.join(&b), b.join(&a));
|
|
}
|
|
|
|
#[test]
|
|
fn join_associative() {
|
|
let a = PathFact::default().with_dotdot_cleared();
|
|
let b = PathFact::default().with_absolute_cleared();
|
|
let c = PathFact::default().with_normalized();
|
|
assert_eq!(a.join(&b).join(&c), a.join(&b.join(&c)));
|
|
}
|
|
|
|
#[test]
|
|
fn join_with_bottom_identity() {
|
|
let a = PathFact::default().with_dotdot_cleared();
|
|
assert_eq!(a.join(&PathFact::bottom()), a);
|
|
assert_eq!(PathFact::bottom().join(&a), a);
|
|
}
|
|
|
|
#[test]
|
|
fn join_disagreement_yields_maybe() {
|
|
let a = PathFact::default().with_dotdot_cleared(); // dotdot=No
|
|
let b = PathFact {
|
|
dotdot: Tri::Yes,
|
|
..Default::default()
|
|
};
|
|
let j = a.join(&b);
|
|
assert_eq!(j.dotdot, Tri::Maybe);
|
|
}
|
|
|
|
#[test]
|
|
fn join_prefix_locks_lcp() {
|
|
let a = PathFact::default().with_prefix_lock("/var/app/uploads/");
|
|
let b = PathFact::default().with_prefix_lock("/var/app/static/");
|
|
let j = a.join(&b);
|
|
assert_eq!(j.prefix_lock.as_deref(), Some("/var/app/"));
|
|
}
|
|
|
|
#[test]
|
|
fn join_prefix_locks_disjoint_drops() {
|
|
let a = PathFact::default().with_prefix_lock("/var/app/");
|
|
let b = PathFact::default().with_prefix_lock("/etc/");
|
|
let j = a.join(&b);
|
|
// LCP of "/var/app/" and "/etc/" is "/"; still a non-empty lock.
|
|
assert_eq!(j.prefix_lock.as_deref(), Some("/"));
|
|
let c = PathFact::default().with_prefix_lock("home/");
|
|
let d = PathFact::default().with_prefix_lock("etc/");
|
|
assert!(c.join(&d).prefix_lock.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn meet_top_is_identity() {
|
|
let a = PathFact::default()
|
|
.with_dotdot_cleared()
|
|
.with_absolute_cleared();
|
|
assert_eq!(a.meet(&PathFact::top()), a);
|
|
assert_eq!(PathFact::top().meet(&a), a);
|
|
}
|
|
|
|
#[test]
|
|
fn meet_refines() {
|
|
let a = PathFact::default().with_dotdot_cleared();
|
|
let b = PathFact::default().with_absolute_cleared();
|
|
let m = a.meet(&b);
|
|
assert_eq!(m.dotdot, Tri::No);
|
|
assert_eq!(m.absolute, Tri::No);
|
|
assert!(m.is_path_safe());
|
|
}
|
|
|
|
#[test]
|
|
fn meet_contradiction_is_bottom() {
|
|
let a = PathFact::default().with_dotdot_cleared(); // dotdot=No
|
|
let b = PathFact {
|
|
dotdot: Tri::Yes,
|
|
..Default::default()
|
|
};
|
|
assert!(a.meet(&b).is_bottom());
|
|
}
|
|
|
|
#[test]
|
|
fn meet_prefix_locks_picks_longer() {
|
|
let a = PathFact::default().with_prefix_lock("/var/app/");
|
|
let b = PathFact::default().with_prefix_lock("/var/app/uploads/");
|
|
let m = a.meet(&b);
|
|
assert_eq!(m.prefix_lock.as_deref(), Some("/var/app/uploads/"));
|
|
}
|
|
|
|
#[test]
|
|
fn meet_prefix_locks_disjoint_is_bottom() {
|
|
let a = PathFact::default().with_prefix_lock("/var/app/");
|
|
let b = PathFact::default().with_prefix_lock("/etc/");
|
|
assert!(a.meet(&b).is_bottom());
|
|
}
|
|
|
|
// ── Widening ────────────────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn widen_stable() {
|
|
let a = PathFact::default()
|
|
.with_dotdot_cleared()
|
|
.with_absolute_cleared();
|
|
assert_eq!(a.widen(&a), a);
|
|
}
|
|
|
|
#[test]
|
|
fn widen_drops_on_change() {
|
|
let a = PathFact::default().with_dotdot_cleared();
|
|
let b = PathFact {
|
|
dotdot: Tri::Yes,
|
|
..Default::default()
|
|
};
|
|
let w = a.widen(&b);
|
|
assert_eq!(w.dotdot, Tri::Maybe);
|
|
}
|
|
|
|
#[test]
|
|
fn widen_chain_terminates() {
|
|
// Finite-ascent guarantee: any sequence of widens must stabilise
|
|
// within a small fixed number of steps (each axis has height 2).
|
|
let mut cur = PathFact::default().with_dotdot_cleared();
|
|
let target = PathFact {
|
|
dotdot: Tri::Yes,
|
|
absolute: Tri::Yes,
|
|
normalized: Tri::Yes,
|
|
prefix_lock: None,
|
|
is_bottom: false,
|
|
};
|
|
for _ in 0..8 {
|
|
cur = cur.widen(&target);
|
|
}
|
|
// After widening with a disagreeing target, we drop to Top on that axis.
|
|
assert_eq!(cur.dotdot, Tri::Maybe);
|
|
assert_eq!(cur, cur.widen(&target), "must have stabilised");
|
|
}
|
|
|
|
#[test]
|
|
fn widen_prefix_drops_on_change() {
|
|
let a = PathFact::default().with_prefix_lock("/var/app/v1/");
|
|
let b = PathFact::default().with_prefix_lock("/var/app/v2/");
|
|
assert!(a.widen(&b).prefix_lock.is_none());
|
|
}
|
|
|
|
// ── Leq ─────────────────────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn leq_top_greatest() {
|
|
let a = PathFact::default().with_dotdot_cleared();
|
|
assert!(a.leq(&PathFact::top()));
|
|
assert!(!PathFact::top().leq(&a));
|
|
}
|
|
|
|
#[test]
|
|
fn leq_bottom_least() {
|
|
assert!(PathFact::bottom().leq(&PathFact::default()));
|
|
assert!(!PathFact::default().leq(&PathFact::bottom()));
|
|
}
|
|
|
|
#[test]
|
|
fn leq_refinement() {
|
|
let refined = PathFact::default()
|
|
.with_dotdot_cleared()
|
|
.with_absolute_cleared();
|
|
let coarse = PathFact::default().with_dotdot_cleared();
|
|
assert!(refined.leq(&coarse));
|
|
assert!(!coarse.leq(&refined));
|
|
}
|
|
|
|
// ── Rust classifier tests ───────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn rejection_contains_dotdot() {
|
|
assert_eq!(
|
|
classify_path_rejection("user.contains(\"..\")"),
|
|
PathRejection::DotDot
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn rejection_axes_disjunction_covers_all_clauses() {
|
|
let axes = classify_path_rejection_axes(
|
|
"s.contains(\"..\") || s.starts_with('/') || s.starts_with('\\\\')",
|
|
);
|
|
assert!(
|
|
axes.contains(&PathRejection::DotDot),
|
|
"expected DotDot in {axes:?}"
|
|
);
|
|
assert!(
|
|
axes.contains(&PathRejection::AbsoluteSlash),
|
|
"expected AbsoluteSlash in {axes:?}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn rejection_axes_deduplicates() {
|
|
let axes = classify_path_rejection_axes("a.starts_with('/') || b.starts_with(\"\\\\\")");
|
|
// Two absolute-slash clauses collapse to a single axis.
|
|
assert_eq!(
|
|
axes.iter()
|
|
.filter(|a| matches!(a, PathRejection::AbsoluteSlash))
|
|
.count(),
|
|
1
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn rejection_contains_other_needle_is_none() {
|
|
assert_eq!(
|
|
classify_path_rejection("name.contains(\";\")"),
|
|
PathRejection::None
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn rejection_starts_with_slash() {
|
|
assert_eq!(
|
|
classify_path_rejection("p.starts_with('/')"),
|
|
PathRejection::AbsoluteSlash
|
|
);
|
|
assert_eq!(
|
|
classify_path_rejection("p.starts_with(\"/\")"),
|
|
PathRejection::AbsoluteSlash
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn rejection_starts_with_backslash() {
|
|
assert_eq!(
|
|
classify_path_rejection("p.starts_with(\"\\\\\")"),
|
|
PathRejection::AbsoluteSlash
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn rejection_is_absolute() {
|
|
assert_eq!(
|
|
classify_path_rejection("Path::new(s).is_absolute()"),
|
|
PathRejection::IsAbsolute
|
|
);
|
|
assert_eq!(
|
|
classify_path_rejection("p.is_absolute()"),
|
|
PathRejection::IsAbsolute
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn assertion_prefix_lock() {
|
|
match classify_path_assertion("p.starts_with(\"/var/app/\")") {
|
|
PathAssertion::PrefixLock(r) => assert_eq!(r, "/var/app/"),
|
|
other => panic!("expected PrefixLock, got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn assertion_single_char_not_lock() {
|
|
assert_eq!(
|
|
classify_path_assertion("p.starts_with('/')"),
|
|
PathAssertion::None
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn assertion_opaque_prefix_lock_method_call_arg() {
|
|
// rswag CVE-2023-38337 patched shape: `start_with?` with a
|
|
// configured-root method call as argument. The exact bytes are
|
|
// unknown to the analyser, but the structural invariant "rooted
|
|
// under SOME prefix" is captured via the opaque marker.
|
|
assert_eq!(
|
|
classify_path_assertion("filename.start_with? @config.resolve_swagger_root(env)"),
|
|
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn assertion_opaque_prefix_lock_paren_method_call() {
|
|
// Same shape, parenthesised: `r.start_with?(some_root)`.
|
|
assert_eq!(
|
|
classify_path_assertion("filename.start_with?(@config.root)"),
|
|
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn assertion_opaque_prefix_lock_python_startswith() {
|
|
// Python: `os.path.realpath(p).startswith(safe_root)` where
|
|
// `safe_root` is a local variable, not a literal.
|
|
assert_eq!(
|
|
classify_path_assertion("p.startswith(safe_root)"),
|
|
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn assertion_opaque_prefix_lock_js_starts_with() {
|
|
assert_eq!(
|
|
classify_path_assertion("resolved.startsWith(uploadsDir)"),
|
|
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn assertion_opaque_prefix_lock_go_hasprefix() {
|
|
assert_eq!(
|
|
classify_path_assertion("strings.HasPrefix(p, safeRoot)"),
|
|
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn assertion_no_lock_on_empty_arg() {
|
|
// `r.starts_with()` (degenerate) should not produce a lock.
|
|
assert_eq!(
|
|
classify_path_assertion("r.starts_with()"),
|
|
PathAssertion::None
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn is_path_traversal_safe_relative_dotdot_free() {
|
|
let f = PathFact::default()
|
|
.with_dotdot_cleared()
|
|
.with_absolute_cleared();
|
|
assert!(f.is_path_traversal_safe());
|
|
}
|
|
|
|
#[test]
|
|
fn is_path_traversal_safe_canonicalised_with_prefix_lock() {
|
|
// `File.expand_path + start_with?(root)` shape: dotdot=No,
|
|
// absolute=Yes, prefix_lock=Some. The relaxed predicate should
|
|
// accept this even though the strict `is_path_safe` rejects it.
|
|
let f = PathFact::default()
|
|
.with_dotdot_cleared()
|
|
.with_prefix_lock("__nyx_opaque_prefix__");
|
|
assert!(!f.is_path_safe(), "absolute axis still Maybe blocks strict");
|
|
// Setting absolute=Yes via expand_path-style transfer:
|
|
let mut f2 = f.clone();
|
|
f2.absolute = Tri::Yes;
|
|
assert!(!f2.is_path_safe(), "absolute=Yes blocks strict predicate");
|
|
assert!(
|
|
f2.is_path_traversal_safe(),
|
|
"prefix_lock + dotdot=No is sufficient under relaxed predicate"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn is_path_traversal_safe_rejects_dotdot_maybe() {
|
|
let f = PathFact::default().with_prefix_lock("/var/app/");
|
|
// dotdot still Maybe — relaxed predicate must still reject.
|
|
assert!(!f.is_path_traversal_safe());
|
|
}
|
|
|
|
#[test]
|
|
fn is_path_traversal_safe_rejects_absolute_without_lock() {
|
|
let mut f = PathFact::default().with_dotdot_cleared();
|
|
f.absolute = Tri::Yes;
|
|
// No prefix_lock — relaxed predicate must reject.
|
|
assert!(!f.is_path_traversal_safe());
|
|
}
|
|
|
|
#[test]
|
|
fn is_path_traversal_safe_rejects_bottom() {
|
|
assert!(!PathFact::bottom().is_path_traversal_safe());
|
|
}
|
|
|
|
#[test]
|
|
fn primitive_canonicalize_normalises() {
|
|
let f = classify_path_primitive("fs::canonicalize", &PathFact::top()).unwrap();
|
|
assert_eq!(f.dotdot, Tri::No);
|
|
assert_eq!(f.normalized, Tri::Yes);
|
|
assert_eq!(f.absolute, Tri::Yes);
|
|
}
|
|
|
|
#[test]
|
|
fn primitive_method_canonicalize_normalises() {
|
|
let f = classify_path_primitive("canonicalize", &PathFact::top()).unwrap();
|
|
assert_eq!(f.normalized, Tri::Yes);
|
|
}
|
|
|
|
#[test]
|
|
fn primitive_path_new_passthrough() {
|
|
let input = PathFact::default()
|
|
.with_dotdot_cleared()
|
|
.with_absolute_cleared();
|
|
let f = classify_path_primitive("Path::new", &input).unwrap();
|
|
assert_eq!(f, input, "Path::new passes PathFact through unchanged");
|
|
}
|
|
|
|
#[test]
|
|
fn primitive_pathbuf_from_passthrough() {
|
|
let input = PathFact::default().with_dotdot_cleared();
|
|
let f = classify_path_primitive("PathBuf::from", &input).unwrap();
|
|
assert_eq!(f, input);
|
|
}
|
|
|
|
#[test]
|
|
fn primitive_unknown_returns_none() {
|
|
assert!(classify_path_primitive("unknown_fn", &PathFact::top()).is_none());
|
|
assert!(classify_path_primitive("vec::new", &PathFact::top()).is_none());
|
|
}
|
|
|
|
// ── Structural variant-ctor classifier ─────────────────────────────
|
|
|
|
#[test]
|
|
fn variant_ctor_recognises_upper_camel_leaf() {
|
|
assert!(is_structural_variant_ctor("Some"));
|
|
assert!(is_structural_variant_ctor("Ok"));
|
|
assert!(is_structural_variant_ctor("Err"));
|
|
assert!(is_structural_variant_ctor("Box::new"));
|
|
assert!(is_structural_variant_ctor("std::option::Option::Some"));
|
|
// User-defined upper-camel-case variant name participates the
|
|
// same way, name list is not part of the contract.
|
|
assert!(is_structural_variant_ctor("MyResult::Ok"));
|
|
assert!(is_structural_variant_ctor("Wrapper"));
|
|
}
|
|
|
|
#[test]
|
|
fn variant_ctor_rejects_lowercase_leaf() {
|
|
assert!(!is_structural_variant_ctor("foo"));
|
|
assert!(!is_structural_variant_ctor("bar::baz"));
|
|
assert!(!is_structural_variant_ctor("std::env::var"));
|
|
assert!(!is_structural_variant_ctor("to_string"));
|
|
}
|
|
|
|
#[test]
|
|
fn variant_ctor_rejects_empty_or_garbled() {
|
|
assert!(!is_structural_variant_ctor(""));
|
|
assert!(!is_structural_variant_ctor("::"));
|
|
assert!(!is_structural_variant_ctor("123"));
|
|
}
|
|
|
|
// ── PathFactReturnEntry merge / dedup ───────────────────────────────
|
|
|
|
#[test]
|
|
fn merge_path_fact_dedups_by_predicate_hash() {
|
|
use crate::summary::ssa_summary::{PathFactReturnEntry, merge_path_fact_return_paths};
|
|
use smallvec::SmallVec;
|
|
let mut acc: SmallVec<[PathFactReturnEntry; 2]> = SmallVec::new();
|
|
let f1 = PathFact::top().with_dotdot_cleared();
|
|
let f2 = PathFact::top().with_absolute_cleared();
|
|
merge_path_fact_return_paths(
|
|
&mut acc,
|
|
&[PathFactReturnEntry {
|
|
predicate_hash: 42,
|
|
known_true: 0,
|
|
known_false: 0,
|
|
path_fact: f1.clone(),
|
|
variant_inner_fact: None,
|
|
}],
|
|
);
|
|
merge_path_fact_return_paths(
|
|
&mut acc,
|
|
&[PathFactReturnEntry {
|
|
predicate_hash: 42,
|
|
known_true: 0,
|
|
known_false: 0,
|
|
path_fact: f2.clone(),
|
|
variant_inner_fact: None,
|
|
}],
|
|
);
|
|
assert_eq!(acc.len(), 1, "same predicate hash collapses to one entry");
|
|
let joined = f1.join(&f2);
|
|
assert_eq!(
|
|
acc[0].path_fact, joined,
|
|
"facts join on predicate-hash collision"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn merge_path_fact_distinct_hashes_kept_separate() {
|
|
use crate::summary::ssa_summary::{PathFactReturnEntry, merge_path_fact_return_paths};
|
|
use smallvec::SmallVec;
|
|
let mut acc: SmallVec<[PathFactReturnEntry; 2]> = SmallVec::new();
|
|
merge_path_fact_return_paths(
|
|
&mut acc,
|
|
&[
|
|
PathFactReturnEntry {
|
|
predicate_hash: 1,
|
|
known_true: 0,
|
|
known_false: 0,
|
|
path_fact: PathFact::top().with_dotdot_cleared(),
|
|
variant_inner_fact: None,
|
|
},
|
|
PathFactReturnEntry {
|
|
predicate_hash: 2,
|
|
known_true: 0,
|
|
known_false: 0,
|
|
path_fact: PathFact::top(),
|
|
variant_inner_fact: Some(PathFact::top().with_absolute_cleared()),
|
|
},
|
|
],
|
|
);
|
|
assert_eq!(acc.len(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn merge_path_fact_overflow_caps_at_bound() {
|
|
use crate::summary::ssa_summary::{
|
|
MAX_PATH_FACT_RETURN_ENTRIES, PathFactReturnEntry, merge_path_fact_return_paths,
|
|
};
|
|
use smallvec::SmallVec;
|
|
let mut acc: SmallVec<[PathFactReturnEntry; 2]> = SmallVec::new();
|
|
// Push twice as many distinct predicate hashes as the cap so
|
|
// overflow collapse fires repeatedly. Each collapse compacts
|
|
// the accumulator back to a single Top-predicate entry; the
|
|
// next insert lands fresh on top. The invariant we care
|
|
// about is bounded growth: the final length must not exceed
|
|
// `MAX_PATH_FACT_RETURN_ENTRIES`.
|
|
for i in 0..(MAX_PATH_FACT_RETURN_ENTRIES * 2) {
|
|
merge_path_fact_return_paths(
|
|
&mut acc,
|
|
&[PathFactReturnEntry {
|
|
predicate_hash: i as u64 + 100,
|
|
known_true: 0,
|
|
known_false: 0,
|
|
path_fact: PathFact::top().with_dotdot_cleared(),
|
|
variant_inner_fact: None,
|
|
}],
|
|
);
|
|
}
|
|
assert!(
|
|
acc.len() <= MAX_PATH_FACT_RETURN_ENTRIES,
|
|
"overflow growth stays bounded: got {}",
|
|
acc.len()
|
|
);
|
|
// Whichever of the post-collapse entries survives, at least
|
|
// one carries the unguarded (predicate_hash == 0) collapse
|
|
// sentinel from a previous overflow.
|
|
assert!(
|
|
acc.iter().any(|e| e.predicate_hash == 0),
|
|
"collapse sentinel must persist"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn leq_consistent_with_join() {
|
|
// a ⊑ b iff join(a, b) == b (within the domain's join-semilattice).
|
|
let a = PathFact::default().with_dotdot_cleared();
|
|
let b = PathFact::default()
|
|
.with_dotdot_cleared()
|
|
.with_absolute_cleared();
|
|
// b ⊑ a because b is strictly more informative.
|
|
assert!(b.leq(&a));
|
|
assert_eq!(b.join(&a), a);
|
|
}
|
|
}
|