Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
Eli Peter 2026-04-29 19:53:34 -04:00 committed by GitHub
parent 4db0805de6
commit a438886217
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
291 changed files with 9485 additions and 3851 deletions

View file

@ -19,8 +19,8 @@ use serde::{Deserialize, Serialize};
/// Bit-level abstract fact: known-zero and known-one masks.
///
/// - `top()` = `{known_zero: 0, known_one: 0}` no bits known
/// - `bottom()` = `{known_zero: MAX, known_one: MAX}` contradictory
/// - `top()` = `{known_zero: 0, known_one: 0}`, no bits known
/// - `bottom()` = `{known_zero: MAX, known_one: MAX}`, contradictory
/// - `from_const(n)` = all 64 bits known
///
/// Invariant: `known_zero & known_one == 0` for non-bottom values.
@ -253,7 +253,7 @@ impl AbstractDomain for BitFact {
}
}
/// Widen: same as join (finite lattice height 64 bits × 3 states).
/// Widen: same as join (finite lattice height, 64 bits × 3 states).
fn widen(&self, other: &Self) -> Self {
self.join(other)
}
@ -511,7 +511,7 @@ mod tests {
#[test]
fn right_shift_unknown_sign() {
// Sign bit unknown high bits after shift should be unknown
// Sign bit unknown, high bits after shift should be unknown
let a = BitFact {
known_zero: 0x0F,
known_one: 0,
@ -687,7 +687,7 @@ mod tests {
}
}
/// `a ⊓ b ⊑ a` and `a ⊓ b ⊑ b` meet is the greatest lower bound.
/// `a ⊓ b ⊑ a` and `a ⊓ b ⊑ b`, meet is the greatest lower bound.
#[test]
fn meet_is_lower_bound_bit() {
let xs = sample_bits();
@ -700,7 +700,7 @@ mod tests {
}
}
/// `a ⊑ a ⊔ b` and `b ⊑ a ⊔ b` join is the least upper bound.
/// `a ⊑ a ⊔ b` and `b ⊑ a ⊔ b`, join is the least upper bound.
#[test]
fn join_is_upper_bound_bit() {
let xs = sample_bits();

View file

@ -10,9 +10,9 @@ use serde::{Deserialize, Serialize};
/// Numeric interval: `[lo, hi]` inclusive bounds.
///
/// - `top()` = `[None, None]` any integer
/// - `bottom()` = `[1, 0]` empty / unsatisfiable (lo > hi)
/// - `exact(n)` = `[n, n]` singleton
/// - `top()` = `[None, None]`, any integer
/// - `bottom()` = `[1, 0]`, empty / unsatisfiable (lo > hi)
/// - `exact(n)` = `[n, n]`, singleton
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct IntervalFact {
pub lo: Option<i64>,
@ -278,7 +278,7 @@ impl IntervalFact {
/// - One non-negative singleton mask `m`: `[0, m]` regardless of other
/// operand's sign (two's complement AND with a non-negative mask always
/// produces a non-negative result bounded by the mask).
/// - Both non-negative: `[0, min(a.hi, b.hi)]` AND can only clear bits.
/// - Both non-negative: `[0, min(a.hi, b.hi)]`, AND can only clear bits.
pub fn bit_and(&self, other: &Self) -> Self {
if self.is_bottom() || other.is_bottom() {
return Self::bottom();
@ -330,7 +330,7 @@ impl IntervalFact {
/// - Singletons: exact computation.
/// - `x | 0` → `x`, `0 | x` → `x`.
/// - Both non-negative with known upper bounds: `[max(a.lo, b.lo),
/// next_pow2_minus1(max(a.hi, b.hi))]` OR can set any bit below
/// next_pow2_minus1(max(a.hi, b.hi))]`, OR can set any bit below
/// the highest set bit of either operand.
pub fn bit_or(&self, other: &Self) -> Self {
if self.is_bottom() || other.is_bottom() {
@ -1054,7 +1054,7 @@ mod tests {
let a = IntervalFact::exact(i64::MIN);
let b = IntervalFact::exact(-1);
let r = a.div(&b);
// Either bound becomes None (graceful) exact representation
// Either bound becomes None (graceful), exact representation
// depends on the impl, but we mainly assert no panic occurred
// and the result is a valid interval.
assert!(
@ -1078,7 +1078,7 @@ mod tests {
assert_eq!(r.hi, Some(2));
}
/// Modulo by an interval that *contains* zero must escape to Top
/// Modulo by an interval that *contains* zero must escape to Top ,
/// modulo-by-zero is undefined and we cannot precise-narrow it.
#[test]
fn modulo_divisor_spans_zero_is_top() {
@ -1096,7 +1096,7 @@ mod tests {
/// `[i64::MIN, i64::MAX]` is the maximal interval. Any join with
/// any other interval must remain `[i64::MIN, i64::MAX]` (or Top
/// equivalent) this guards against accidental narrowing on join.
/// equivalent), this guards against accidental narrowing on join.
#[test]
fn full_range_is_join_absorbing() {
let full = IntervalFact {
@ -1347,7 +1347,7 @@ mod tests {
);
}
/// Modulo with exact-zero divisor must escape to Top.
/// Modulo with exact-zero divisor, must escape to Top.
#[test]
fn modulo_by_exact_zero_is_top() {
let a = IntervalFact {

View file

@ -45,7 +45,7 @@ pub fn is_enabled() -> bool {
/// Per-SSA-value abstract element: product of all subdomains.
///
/// Each subdomain is independent join, meet, widen, and leq are applied
/// Each subdomain is independent, join, meet, widen, and leq are applied
/// component-wise. Adding a new subdomain requires adding a field here
/// and updating the component-wise implementations.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
@ -182,15 +182,15 @@ pub const MAX_LITERAL_PREFIX_LEN: usize = 64;
/// restricted so the summary size stays constant regardless of callee body
/// complexity:
///
/// * [`IntervalTransfer::Top`] no interval knowledge crosses (default).
/// * [`IntervalTransfer::Identity`] return = param (pass-through).
/// * [`IntervalTransfer::Affine`] return = param * `mul` + `add` with
/// * [`IntervalTransfer::Top`], no interval knowledge crosses (default).
/// * [`IntervalTransfer::Identity`], return = param (pass-through).
/// * [`IntervalTransfer::Affine`], return = param * `mul` + `add` with
/// `i64` constants; overflow defaults to Top at apply time.
/// * [`IntervalTransfer::Clamped`] return is always in `[lo, hi]` regardless
/// * [`IntervalTransfer::Clamped`], return is always in `[lo, hi]` regardless
/// of input. Captures callee-intrinsic bounds (e.g. `saturating` ops).
///
/// No unbounded expression trees, no nesting. A callee whose behaviour does
/// not fit one of these forms falls back to `Top` we never try to encode
/// not fit one of these forms falls back to `Top`, we never try to encode
/// richer algebra in the summary.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum IntervalTransfer {
@ -247,9 +247,9 @@ impl IntervalTransfer {
/// Mirrors [`IntervalTransfer`] for the string subdomain. Bounded by
/// [`MAX_LITERAL_PREFIX_LEN`] to keep summary size constant.
///
/// * [`StringTransfer::Unknown`] default.
/// * [`StringTransfer::Identity`] return = param.
/// * [`StringTransfer::LiteralPrefix`] return has this literal prefix
/// * [`StringTransfer::Unknown`], default.
/// * [`StringTransfer::Identity`], return = param.
/// * [`StringTransfer::LiteralPrefix`], return has this literal prefix
/// regardless of input (callee-intrinsic).
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum StringTransfer {
@ -325,7 +325,7 @@ impl StringTransfer {
/// caller's knowledge of each argument, without having to re-run the callee.
///
/// Composition rule: `apply(input) = (interval.apply, string.apply,
/// bits=top)`. The bit domain is always Top we do not track cross-file
/// bits=top)`. The bit domain is always Top, we do not track cross-file
/// bit transfers.
#[derive(Clone, Debug, PartialEq, Eq, Default, Serialize, Deserialize)]
pub struct AbstractTransfer {
@ -351,7 +351,7 @@ impl AbstractTransfer {
Self::default()
}
/// True when neither subdomain carries any information equivalent to
/// True when neither subdomain carries any information, equivalent to
/// "omit this entry entirely".
pub fn is_top(&self) -> bool {
is_interval_top(&self.interval) && is_string_unknown(&self.string)
@ -410,7 +410,7 @@ impl AbstractState {
/// Set abstract value for an SSA value. Drops Top values to save space.
pub fn set(&mut self, v: SsaValue, val: AbstractValue) {
if val.is_top() {
// Don't store Top it's the default
// Don't store Top, it's the default
if let Ok(idx) = self.values.binary_search_by_key(&v, |(id, _)| *id) {
self.values.remove(idx);
}
@ -422,7 +422,7 @@ impl AbstractState {
if self.values.len() < MAX_ABSTRACT_VALUES {
self.values.insert(idx, (v, val));
}
// Over budget: silently drop (conservative defaults to Top)
// Over budget: silently drop (conservative, defaults to Top)
}
}
}

View file

@ -15,7 +15,7 @@
//! Each axis is a three-value lattice [`Tri::No`] / [`Tri::Yes`] / [`Tri::Maybe`]
//! where `Maybe` is Top (unknown) and `No` / `Yes` are the two definite
//! refinements. A value is path-safe for a FILE_IO sink iff
//! `dotdot == No && absolute == No` i.e. we have proof that *no* `..`
//! `dotdot == No && absolute == No`, i.e. we have proof that *no* `..`
//! component and *no* absolute root can leak through. `normalized == Yes`
//! alone is not sufficient (canonicalising an absolute input still produces
//! an absolute path); prefix_lock is used separately to certify containment
@ -52,7 +52,7 @@ pub enum Tri {
No,
/// Proven present.
Yes,
/// Unknown no transfer or guard has proved the axis yet.
/// Unknown, no transfer or guard has proved the axis yet.
Maybe,
}
@ -367,12 +367,12 @@ impl AbstractDomain for PathFact {
/// narrowed axis can be proved safe.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PathRejection {
/// `x.contains("..")` false branch proves `dotdot = No` on the receiver.
/// `x.contains("..")`, false branch proves `dotdot = No` on the receiver.
DotDot,
/// `x.starts_with("/")` / `x.starts_with('\\')` false branch proves
/// `x.starts_with("/")` / `x.starts_with('\\')`, false branch proves
/// `absolute = No` on the receiver.
AbsoluteSlash,
/// `x.is_absolute()` / `Path::new(x).is_absolute()` false branch proves
/// `x.is_absolute()` / `Path::new(x).is_absolute()`, false branch proves
/// `absolute = No` on the argument/receiver.
IsAbsolute,
/// Not a path-rejection idiom.
@ -384,7 +384,7 @@ pub enum PathRejection {
/// the listed axis is refined.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PathAssertion {
/// `x.starts_with("<literal_root>")` true branch attaches
/// `x.starts_with("<literal_root>")`, true branch attaches
/// `prefix_lock = Some("<literal_root>")` to the receiver.
PrefixLock(String),
/// Not a path-assertion idiom.
@ -426,7 +426,7 @@ pub fn classify_path_rejection_axes(text: &str) -> smallvec::SmallVec<[PathRejec
let clause = clause.trim();
// Multi-axis special case: `!filepath.IsLocal(p)` (Go).
// `filepath.IsLocal` returns true iff the path stays within the
// current directory no leading `/`, no `..` segments, no Windows
// current directory, no leading `/`, no `..` segments, no Windows
// drive root. Idiomatic Go path-traversal guard:
// `if !filepath.IsLocal(p) { return }`
// The TRUE branch terminates; the FALSE branch (where IsLocal is
@ -449,7 +449,7 @@ pub fn classify_path_rejection_axes(text: &str) -> smallvec::SmallVec<[PathRejec
out
}
/// Detect `!filepath.IsLocal(<expr>)` Go's idiomatic path-traversal
/// Detect `!filepath.IsLocal(<expr>)`, Go's idiomatic path-traversal
/// guard. Whitespace-tolerant: `! filepath.IsLocal(`, `!filepath . IsLocal(`,
/// etc. Used by [`classify_path_rejection_axes`] to inject both
/// [`PathRejection::DotDot`] and [`PathRejection::IsAbsolute`] on the false
@ -475,7 +475,7 @@ fn has_negated_filepath_is_local(clause: &str) -> bool {
fn classify_path_rejection_atom(clause: &str) -> PathRejection {
// `.contains("..")` (Rust, Java) / `.includes("..")` (JS/TS) /
// `.include?("..")` (Ruby) / `strings.Contains(s, "..")` (Go) /
// `strstr(s, "..")` (C/C++) every form recognised by
// `strstr(s, "..")` (C/C++), every form recognised by
// `extract_contains_arg` returns `..` if the needle is the dotdot
// segment.
if let Some(needle) = extract_contains_arg(clause)
@ -483,7 +483,7 @@ fn classify_path_rejection_atom(clause: &str) -> PathRejection {
{
return PathRejection::DotDot;
}
// Python `".." in s` operator form. Look for `".." in <something>`
// Python `".." in s`, operator form. Look for `".." in <something>`
// anywhere in the clause text. Conservative: requires the literal
// `".." in ` substring (whitespace-tolerant).
if has_python_dotdot_in(clause) {
@ -681,7 +681,7 @@ pub fn classify_path_assertion(text: &str) -> PathAssertion {
/// * Must be non-empty.
/// * The leaf segment must begin with an ASCII uppercase letter
/// (Rust's variant / struct / type grammar).
/// * The leaf segment must be ASCII alphanumeric / underscore no
/// * The leaf segment must be ASCII alphanumeric / underscore, no
/// method call noise (parentheses, argument lists) survives here
/// because callees arrive in their normalised scoped-identifier
/// form.
@ -700,7 +700,7 @@ pub fn is_structural_variant_ctor(callee: &str) -> bool {
// upper-camel-case names an enum variant or tuple struct (`Some`,
// `Ok`, `MyResult`). A scoped identifier whose *penultimate*
// segment is upper-camel-case names an associated constructor on
// that type `Box::new`, `Cell::from`, `PathBuf::with_capacity`,
// that type, `Box::new`, `Cell::from`, `PathBuf::with_capacity`,
// etc. The latter is the lower-leaf-case shape we want to admit
// alongside the bare-variant shape.
let segments: smallvec::SmallVec<[&str; 4]> =
@ -731,7 +731,7 @@ pub fn is_structural_variant_ctor(callee: &str) -> bool {
/// PathFact of the receiver/first argument (the value being sanitised);
/// it is used as the baseline to which the call's effect is applied.
///
/// Returned [`None`] means the callee is not a recognised path primitive
/// Returned [`None`] means the callee is not a recognised path primitive ,
/// the caller should leave the result at its pre-existing PathFact (Top).
///
/// Backwards-compatible wrapper around [`classify_path_primitive_rust`].
@ -743,7 +743,7 @@ pub fn classify_path_primitive(callee: &str, input_fact: &PathFact) -> Option<Pa
/// Per-language path-primitive dispatcher.
///
/// Routes to the language-specific classifier Rust, Python, JS/TS, Go,
/// Routes to the language-specific classifier, Rust, Python, JS/TS, Go,
/// Java, Ruby, PHP, or C/C++. Returns [`None`] for languages without a
/// classifier (or callees the language's classifier doesn't recognise).
pub fn classify_path_primitive_for_lang(
@ -784,7 +784,7 @@ pub fn is_structural_variant_ctor_for_lang(lang: crate::symbol::Lang, callee: &s
}
/// Per-language predicate for "this callee is a zero-arg fresh-allocation
/// constructor" used by the variant-rejection-path classifier so that
/// constructor", used by the variant-rejection-path classifier so that
/// `String::new()` (Rust) / `''` (Python/JS/Java/...) is recognised as a
/// no-attacker-content fresh value with cleared `dotdot`/`absolute` axes.
///
@ -803,7 +803,7 @@ pub fn is_zero_arg_allocator_for_lang(lang: crate::symbol::Lang, _callee: &str)
false
}
/// Rust path-primitive classifier `fs::canonicalize`, `Path::new`,
/// Rust path-primitive classifier, `fs::canonicalize`, `Path::new`,
/// `PathBuf::from`, identity-string conversions.
pub fn classify_path_primitive_rust(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
// Accept both path-qualified (`std::fs::canonicalize`, `fs::canonicalize`)
@ -826,7 +826,7 @@ pub fn classify_path_primitive_rust(callee: &str, input_fact: &PathFact) -> Opti
// `Path::new(s)` / `PathBuf::from(s)`:
// pass-through of the input's PathFact so downstream `starts_with`
// checks against a Path/PathBuf value still see the underlying
// string's narrowed axes. No axis is forced wrapping does not
// string's narrowed axes. No axis is forced, wrapping does not
// sanitize on its own.
"new" | "from" => {
if callee_contains_segment(callee, "Path") || callee_contains_segment(callee, "PathBuf")
@ -837,8 +837,8 @@ pub fn classify_path_primitive_rust(callee: &str, input_fact: &PathFact) -> Opti
}
}
// Identity conversions on strings/paths. Each one re-binds the
// same logical value the converted String / PathBuf / OsString
// still describes the exact same filesystem path so the PathFact
// same logical value, the converted String / PathBuf / OsString
// still describes the exact same filesystem path, so the PathFact
// flows through unchanged. Without this, a sanitised `s: &str`
// would lose its narrowed axes the moment the helper returns
// `s.to_string()` / `s.to_owned()` / `String::from(s)`.
@ -849,7 +849,7 @@ pub fn classify_path_primitive_rust(callee: &str, input_fact: &PathFact) -> Opti
}
}
/// Python path-primitive classifier `os.path.normpath`, `os.path.realpath`,
/// Python path-primitive classifier, `os.path.normpath`, `os.path.realpath`,
/// `pathlib.Path.resolve`, `os.path.abspath`.
///
/// Pattern conventions: tree-sitter-python emits dotted attribute access as
@ -893,7 +893,7 @@ pub fn classify_path_primitive_python(callee: &str, input_fact: &PathFact) -> Op
}
}
/// JavaScript / TypeScript path-primitive classifier Node's `path` module:
/// JavaScript / TypeScript path-primitive classifier, Node's `path` module:
/// `path.normalize`, `path.resolve`, `path.join`.
pub fn classify_path_primitive_js(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
let leaf = rightmost_segment(callee);
@ -920,7 +920,7 @@ pub fn classify_path_primitive_js(callee: &str, input_fact: &PathFact) -> Option
}
}
/// Go path-primitive classifier `path/filepath` package:
/// Go path-primitive classifier, `path/filepath` package:
/// `filepath.Clean`, `filepath.Abs`.
pub fn classify_path_primitive_go(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
let leaf = rightmost_segment(callee);
@ -947,7 +947,7 @@ pub fn classify_path_primitive_go(callee: &str, input_fact: &PathFact) -> Option
}
}
/// Java path-primitive classifier `java.nio.file.Path.normalize` /
/// Java path-primitive classifier, `java.nio.file.Path.normalize` /
/// `Paths.get(s).normalize().toAbsolutePath()`.
pub fn classify_path_primitive_java(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
let leaf = rightmost_segment(callee);
@ -980,7 +980,7 @@ pub fn classify_path_primitive_java(callee: &str, input_fact: &PathFact) -> Opti
}
}
/// Ruby path-primitive classifier `File.expand_path` / `Pathname#cleanpath`.
/// Ruby path-primitive classifier, `File.expand_path` / `Pathname#cleanpath`.
pub fn classify_path_primitive_ruby(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
let leaf = rightmost_segment(callee);
match leaf {
@ -1005,13 +1005,13 @@ pub fn classify_path_primitive_ruby(callee: &str, input_fact: &PathFact) -> Opti
}
}
/// PHP path-primitive classifier `realpath`, `basename`.
/// PHP path-primitive classifier, `realpath`, `basename`.
pub fn classify_path_primitive_php(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
let leaf = rightmost_segment(callee);
match leaf {
// `realpath($s)`:
// Resolves symlinks and `..`, returns absolute path. Returns
// `false` if the file doesn't exist but on the success path
// `false` if the file doesn't exist, but on the success path
// (which is what reaches a sink), it produces a clean absolute path.
"realpath" => {
let mut f = input_fact.clone();
@ -1021,7 +1021,7 @@ pub fn classify_path_primitive_php(callee: &str, input_fact: &PathFact) -> Optio
Some(f)
}
// `basename($s)`:
// Strips directory components guaranteed to contain no `..`
// Strips directory components, guaranteed to contain no `..`
// (basename of `..` is `..`, but basename of any traversal-
// prefixed path is just the leaf). Conservative: clear dotdot.
"basename" => {
@ -1034,7 +1034,7 @@ pub fn classify_path_primitive_php(callee: &str, input_fact: &PathFact) -> Optio
}
}
/// C / C++ path-primitive classifier POSIX `realpath`,
/// C / C++ path-primitive classifier, POSIX `realpath`,
/// `std::filesystem::canonical`.
pub fn classify_path_primitive_c_cpp(callee: &str, input_fact: &PathFact) -> Option<PathFact> {
let leaf = rightmost_segment(callee);
@ -1089,7 +1089,7 @@ fn extract_contains_arg(text: &str) -> Option<String> {
"strstr(",
] {
if let Some(idx) = text.find(prefix) {
// Skip past the first argument (receiver) the literal needle
// Skip past the first argument (receiver), the literal needle
// is the second arg, separated by a comma. Find the comma at
// top level inside this call.
let inner = &text[idx + prefix.len()..];
@ -1123,7 +1123,7 @@ fn extract_starts_with_arg(text: &str) -> Option<String> {
return Some(s);
}
}
// Go free-function form `strings.HasPrefix(r, "/")` second arg.
// Go free-function form `strings.HasPrefix(r, "/")`, second arg.
if let Some(idx) = text.find("strings.HasPrefix(") {
let inner = &text[idx + "strings.HasPrefix(".len()..];
if let Some(comma_idx) = top_level_comma(inner) {
@ -1762,7 +1762,7 @@ mod tests {
assert!(is_structural_variant_ctor("Box::new"));
assert!(is_structural_variant_ctor("std::option::Option::Some"));
// User-defined upper-camel-case variant name participates the
// same way name list is not part of the contract.
// same way, name list is not part of the contract.
assert!(is_structural_variant_ctor("MyResult::Ok"));
assert!(is_structural_variant_ctor("Wrapper"));
}

View file

@ -1,6 +1,6 @@
//! String abstract domain for abstract interpretation.
//!
//! Tracks known prefix, suffix, and — when provably bounded — the finite set
//! Tracks known prefix, suffix, and, when provably bounded, the finite set
//! of possible concrete string values. Used for SSRF suppression (URL prefix
//! proves host is locked), command-injection suppression (lookup result
//! bounded to a safe set of literals), and general string analysis.
@ -78,7 +78,7 @@ impl StringFact {
/// the finite domain is `{s}`.
///
/// Empty prefix/suffix are normalised to `None` because "starts/ends with
/// the empty string" carries no constraint keeping `Some("")` would
/// the empty string" carries no constraint, keeping `Some("")` would
/// break join idempotence (`Some("")` ⊔ `Some("")` collapses to `None`).
pub fn exact(s: &str) -> Self {
let prefix = truncate_prefix(s);
@ -134,7 +134,7 @@ impl StringFact {
/// Inputs are sorted and deduped. If the cardinality exceeds
/// [`MAX_DOMAIN_SIZE`] or the input is empty, the domain collapses to
/// `None` (Top on this sub-field). The prefix/suffix sub-fields remain
/// unset callers can combine with [`Self::exact`] for single-element
/// unset, callers can combine with [`Self::exact`] for single-element
/// sets if tighter facts are desired.
pub fn finite_set(values: Vec<String>) -> Self {
let mut v = values;
@ -411,7 +411,7 @@ fn truncate_suffix(s: &str) -> String {
/// Longest common prefix of two strings, char-aligned.
///
/// Iterates by `char` rather than `byte` so multi-byte UTF-8 code points are
/// either kept whole or dropped a byte-wise comparison would slice into the
/// either kept whole or dropped, a byte-wise comparison would slice into the
/// middle of a code point and produce mojibake (`x as char` on a UTF-8
/// continuation byte yields a garbage Latin-1 character).
pub fn longest_common_prefix(a: &str, b: &str) -> String {
@ -746,7 +746,7 @@ mod tests {
let a = StringFact::from_prefix("https://api.example.com/");
let b = StringFact::from_prefix("https://db.example.com/");
let r = a.join(&b);
// Common prefix is "https://" anything past that diverges.
// Common prefix is "https://", anything past that diverges.
assert_eq!(
r.prefix.as_deref(),
Some("https://"),
@ -781,7 +781,7 @@ mod tests {
]
}
/// `x ⊔ x = x` join is idempotent across all sample shapes.
/// `x ⊔ x = x`, join is idempotent across all sample shapes.
#[test]
fn join_idempotent_string() {
for a in sample_strings() {
@ -789,7 +789,7 @@ mod tests {
}
}
/// `x ⊔ y = y ⊔ x` join is commutative.
/// `x ⊔ y = y ⊔ x`, join is commutative.
#[test]
fn join_commutative_string() {
let xs = sample_strings();
@ -806,7 +806,7 @@ mod tests {
}
}
/// `x ⊓ x = x` meet is idempotent.
/// `x ⊓ x = x`, meet is idempotent.
#[test]
fn meet_idempotent_string() {
for a in sample_strings() {
@ -814,7 +814,7 @@ mod tests {
}
}
/// `x ⊓ y = y ⊓ x` meet is commutative.
/// `x ⊓ y = y ⊓ x`, meet is commutative.
#[test]
fn meet_commutative_string() {
let xs = sample_strings();
@ -844,7 +844,7 @@ mod tests {
}
}
/// `x ⊑ x` leq is reflexive.
/// `x ⊑ x`, leq is reflexive.
#[test]
fn leq_reflexive_string() {
for a in sample_strings() {
@ -852,7 +852,7 @@ mod tests {
}
}
/// **Soundness**: `widen(a, b) ⊒ join(a, b)` widening must
/// **Soundness**: `widen(a, b) ⊒ join(a, b)`, widening must
/// over-approximate join, otherwise dataflow loses information.
#[test]
fn widen_over_approximates_join_string() {
@ -905,7 +905,7 @@ mod tests {
}
}
/// Empty-string exact value must distinguish from Top it is a
/// Empty-string exact value must distinguish from Top, it is a
/// singleton (`{""}`), not unconstrained. After the empty-prefix
/// normalisation, prefix/suffix are `None` (carry no extra info)
/// but the `domain` field still pins the value to exactly `""`.