nyx/src/abstract_interp/string_domain.rs

1011 lines
34 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! String abstract domain for abstract interpretation.
//!
//! Tracks known prefix, suffix, and, when provably bounded, the finite set
//! of possible concrete string values. Used for SSRF suppression (URL prefix
//! proves host is locked), command-injection suppression (lookup result
//! bounded to a safe set of literals), and general string analysis.
use crate::state::lattice::{AbstractDomain, Lattice};
use serde::{Deserialize, Serialize};
/// Maximum tracked prefix length (bytes).
pub const MAX_PREFIX_LEN: usize = 256;
/// Maximum tracked suffix length (bytes).
pub const MAX_SUFFIX_LEN: usize = 128;
/// Maximum tracked finite-domain cardinality. Beyond this, `domain` widens
/// to `None` (Top on the domain sub-field).
pub const MAX_DOMAIN_SIZE: usize = 16;
/// Single-character shell metacharacters. A string containing any of these
/// cannot be passed as a single shell word without escaping, so bounded
/// sets containing them cannot suppress `Cap::SHELL_ESCAPE`.
const SHELL_METACHARS: &[char] = &[
';', '|', '&', '`', '$', '>', '<', '(', ')', '\n', '\r', '\0', '\\', '"', '\'', ' ', '\t',
];
/// Return `true` when `s` contains no shell metacharacter and is therefore
/// safe to pass as a single shell token.
pub fn is_shell_safe_literal(s: &str) -> bool {
!s.chars().any(|c| SHELL_METACHARS.contains(&c))
}
/// String abstract domain: tracks known prefix, suffix, and finite domain.
///
/// Lattice ordering:
/// - `Bottom` ⊑ everything (unsatisfiable)
/// - Concrete facts ⊑ `Top` (no knowledge)
/// - `Some(prefix)` ⊑ `None` (no prefix known)
/// - `Some({a,b})` ⊑ `Some({a,b,c})` ⊑ `None` (subset → wider → Top)
///
/// Prefix, suffix, and domain are independent: a value can carry any subset
/// of the three.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct StringFact {
/// Known prefix of the string. `None` = unknown.
pub prefix: Option<String>,
/// Known suffix of the string. `None` = unknown.
pub suffix: Option<String>,
/// Known finite set of possible concrete values. `None` = unknown set.
/// `Some(vec)` with `vec.len() <= MAX_DOMAIN_SIZE` = value ∈ `vec`.
/// Always sorted and deduped.
pub domain: Option<Vec<String>>,
/// True when this fact is Bottom (unsatisfiable).
is_bottom: bool,
}
impl StringFact {
/// Top: no knowledge about the string.
pub fn top() -> Self {
Self {
prefix: None,
suffix: None,
domain: None,
is_bottom: false,
}
}
/// Bottom: unsatisfiable / empty set.
pub fn bottom() -> Self {
Self {
prefix: None,
suffix: None,
domain: None,
is_bottom: true,
}
}
/// Exact known string value: prefix and suffix are the full string, and
/// the finite domain is `{s}`.
///
/// Empty prefix/suffix are normalised to `None` because "starts/ends with
/// the empty string" carries no constraint, keeping `Some("")` would
/// break join idempotence (`Some("")` ⊔ `Some("")` collapses to `None`).
pub fn exact(s: &str) -> Self {
let prefix = truncate_prefix(s);
let suffix = truncate_suffix(s);
Self {
prefix: if prefix.is_empty() {
None
} else {
Some(prefix)
},
suffix: if suffix.is_empty() {
None
} else {
Some(suffix)
},
domain: Some(vec![s.to_string()]),
is_bottom: false,
}
}
/// Known prefix only. Empty `p` normalises to no-prefix-info (`None`).
pub fn from_prefix(p: &str) -> Self {
let prefix = truncate_prefix(p);
Self {
prefix: if prefix.is_empty() {
None
} else {
Some(prefix)
},
suffix: None,
domain: None,
is_bottom: false,
}
}
/// Known suffix only. Empty `s` normalises to no-suffix-info (`None`).
pub fn from_suffix(s: &str) -> Self {
let suffix = truncate_suffix(s);
Self {
prefix: None,
suffix: if suffix.is_empty() {
None
} else {
Some(suffix)
},
domain: None,
is_bottom: false,
}
}
/// Known finite set of possible concrete values.
///
/// Inputs are sorted and deduped. If the cardinality exceeds
/// [`MAX_DOMAIN_SIZE`] or the input is empty, the domain collapses to
/// `None` (Top on this sub-field). The prefix/suffix sub-fields remain
/// unset, callers can combine with [`Self::exact`] for single-element
/// sets if tighter facts are desired.
pub fn finite_set(values: Vec<String>) -> Self {
let mut v = values;
v.sort();
v.dedup();
let domain = if v.is_empty() || v.len() > MAX_DOMAIN_SIZE {
None
} else {
Some(v)
};
Self {
prefix: None,
suffix: None,
domain,
is_bottom: false,
}
}
pub fn is_top(&self) -> bool {
!self.is_bottom && self.prefix.is_none() && self.suffix.is_none() && self.domain.is_none()
}
pub fn is_bottom(&self) -> bool {
self.is_bottom
}
/// Returns `true` when the finite domain is known and every element is
/// free of shell metacharacters. Used to suppress `Cap::SHELL_ESCAPE`
/// when the payload is provably bounded to a safe set of words.
pub fn is_finite_shell_safe(&self) -> bool {
match &self.domain {
Some(values) if !values.is_empty() => values.iter().all(|s| is_shell_safe_literal(s)),
_ => false,
}
}
// ── Lattice operations ──────────────────────────────────────────────
/// Join: longest common prefix (LCP), longest common suffix (LCS), and
/// set union of finite domains (clipped at [`MAX_DOMAIN_SIZE`]).
pub fn join(&self, other: &Self) -> Self {
if self.is_bottom {
return other.clone();
}
if other.is_bottom {
return self.clone();
}
let prefix = match (&self.prefix, &other.prefix) {
(Some(a), Some(b)) => {
let lcp = longest_common_prefix(a, b);
if lcp.is_empty() { None } else { Some(lcp) }
}
_ => None,
};
let suffix = match (&self.suffix, &other.suffix) {
(Some(a), Some(b)) => {
let lcs = longest_common_suffix(a, b);
if lcs.is_empty() { None } else { Some(lcs) }
}
_ => None,
};
let domain = match (&self.domain, &other.domain) {
(Some(a), Some(b)) => {
let mut merged: Vec<String> = Vec::with_capacity(a.len() + b.len());
merged.extend_from_slice(a);
merged.extend_from_slice(b);
merged.sort();
merged.dedup();
if merged.len() > MAX_DOMAIN_SIZE {
None
} else {
Some(merged)
}
}
_ => None,
};
Self {
prefix,
suffix,
domain,
is_bottom: false,
}
}
/// Meet: intersection of all three sub-fields (conservative).
pub fn meet(&self, other: &Self) -> Self {
if self.is_bottom || other.is_bottom {
return Self::bottom();
}
let prefix = match (&self.prefix, &other.prefix) {
(Some(a), Some(b)) => {
if a.starts_with(b.as_str()) {
Some(a.clone())
} else if b.starts_with(a.as_str()) {
Some(b.clone())
} else {
return Self::bottom();
}
}
(Some(a), None) => Some(a.clone()),
(None, Some(b)) => Some(b.clone()),
(None, None) => None,
};
let suffix = match (&self.suffix, &other.suffix) {
(Some(a), Some(b)) => {
if a.ends_with(b.as_str()) {
Some(a.clone())
} else if b.ends_with(a.as_str()) {
Some(b.clone())
} else {
return Self::bottom();
}
}
(Some(a), None) => Some(a.clone()),
(None, Some(b)) => Some(b.clone()),
(None, None) => None,
};
let domain = match (&self.domain, &other.domain) {
(Some(a), Some(b)) => {
let inter: Vec<String> = a
.iter()
.filter(|s| b.binary_search(s).is_ok())
.cloned()
.collect();
if inter.is_empty() {
return Self::bottom();
}
Some(inter)
}
(Some(a), None) => Some(a.clone()),
(None, Some(b)) => Some(b.clone()),
(None, None) => None,
};
Self {
prefix,
suffix,
domain,
is_bottom: false,
}
}
/// Widen: drop any sub-field that changed between iterations.
pub fn widen(&self, other: &Self) -> Self {
if self.is_bottom {
return other.clone();
}
if other.is_bottom {
return self.clone();
}
let prefix = if self.prefix == other.prefix {
self.prefix.clone()
} else {
None
};
let suffix = if self.suffix == other.suffix {
self.suffix.clone()
} else {
None
};
let domain = if self.domain == other.domain {
self.domain.clone()
} else {
None
};
Self {
prefix,
suffix,
domain,
is_bottom: false,
}
}
pub fn leq(&self, other: &Self) -> bool {
if self.is_bottom {
return true;
}
if other.is_bottom {
return false;
}
let prefix_ok = match (&self.prefix, &other.prefix) {
(_, None) => true,
(None, Some(_)) => false,
(Some(a), Some(b)) => a.starts_with(b.as_str()),
};
let suffix_ok = match (&self.suffix, &other.suffix) {
(_, None) => true,
(None, Some(_)) => false,
(Some(a), Some(b)) => a.ends_with(b.as_str()),
};
let domain_ok = match (&self.domain, &other.domain) {
(_, None) => true,
(None, Some(_)) => false,
(Some(a), Some(b)) => a.iter().all(|s| b.binary_search(s).is_ok()),
};
prefix_ok && suffix_ok && domain_ok
}
// ── Transfer functions ──────────────────────────────────────────────
/// String concatenation: `self ++ other`.
///
/// - Prefix of result = prefix of `self` (left operand)
/// - Suffix of result = suffix of `other` (right operand)
/// - Domain: cross-product is too explosive to track; collapse to `None`.
pub fn concat(&self, other: &Self) -> Self {
if self.is_bottom || other.is_bottom {
return Self::bottom();
}
Self {
prefix: self.prefix.clone(),
suffix: other.suffix.clone(),
domain: None,
is_bottom: false,
}
}
/// SSRF helper: build a fact for `new URL(path, base)` where `base` is a
/// literal origin (`https://api.example.com`). The result behaves as
/// `base ++ path`, the locked-host prefix survives even when the path
/// component carries arbitrary taint, and the fact's `prefix` is what
/// `is_string_safe_for_ssrf` consults to suppress the SSRF sink.
///
/// `path` carries any string knowledge for the path component (typically
/// `StringFact::top()`). When the base already ends in `/`, the helper
/// keeps it as-is; otherwise appends a `/` so the prefix unambiguously
/// includes the path separator (the SSRF check looks for
/// `scheme://host/`).
pub fn from_url_with_base(base: &str, path: &Self) -> Self {
let mut anchor = base.to_string();
if !anchor.ends_with('/') {
anchor.push('/');
}
StringFact::exact(&anchor).concat(path)
}
}
impl Lattice for StringFact {
fn bot() -> Self {
Self::bottom()
}
fn join(&self, other: &Self) -> Self {
self.join(other)
}
fn leq(&self, other: &Self) -> bool {
self.leq(other)
}
}
impl AbstractDomain for StringFact {
fn top() -> Self {
Self::top()
}
fn meet(&self, other: &Self) -> Self {
self.meet(other)
}
fn widen(&self, other: &Self) -> Self {
self.widen(other)
}
}
// ── Helpers ─────────────────────────────────────────────────────────────
fn truncate_prefix(s: &str) -> String {
if s.len() <= MAX_PREFIX_LEN {
s.to_string()
} else {
// Find a char boundary at or before MAX_PREFIX_LEN
let mut end = MAX_PREFIX_LEN;
while end > 0 && !s.is_char_boundary(end) {
end -= 1;
}
s[..end].to_string()
}
}
fn truncate_suffix(s: &str) -> String {
if s.len() <= MAX_SUFFIX_LEN {
s.to_string()
} else {
let start = s.len() - MAX_SUFFIX_LEN;
let mut start = start;
while start < s.len() && !s.is_char_boundary(start) {
start += 1;
}
s[start..].to_string()
}
}
/// Longest common prefix of two strings, char-aligned.
///
/// Iterates by `char` rather than `byte` so multi-byte UTF-8 code points are
/// either kept whole or dropped, a byte-wise comparison would slice into the
/// middle of a code point and produce mojibake (`x as char` on a UTF-8
/// continuation byte yields a garbage Latin-1 character).
pub fn longest_common_prefix(a: &str, b: &str) -> String {
a.chars()
.zip(b.chars())
.take_while(|(x, y)| x == y)
.map(|(x, _)| x)
.collect()
}
/// Longest common suffix of two strings, char-aligned.
pub fn longest_common_suffix(a: &str, b: &str) -> String {
let mut lcs: Vec<char> = a
.chars()
.rev()
.zip(b.chars().rev())
.take_while(|(x, y)| x == y)
.map(|(x, _)| x)
.collect();
lcs.reverse();
lcs.into_iter().collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn top_and_bottom() {
assert!(StringFact::top().is_top());
assert!(!StringFact::top().is_bottom());
assert!(StringFact::bottom().is_bottom());
assert!(!StringFact::bottom().is_top());
}
#[test]
fn exact_sets_both() {
let f = StringFact::exact("hello");
assert_eq!(f.prefix.as_deref(), Some("hello"));
assert_eq!(f.suffix.as_deref(), Some("hello"));
assert_eq!(f.domain.as_deref(), Some(&["hello".to_string()][..]));
}
// ── LCP / LCS helpers ───────────────────────────────────────────
#[test]
fn lcp_basic() {
assert_eq!(longest_common_prefix("abcdef", "abcxyz"), "abc");
assert_eq!(longest_common_prefix("abc", "abc"), "abc");
assert_eq!(longest_common_prefix("abc", "xyz"), "");
assert_eq!(longest_common_prefix("", "abc"), "");
}
#[test]
fn lcs_basic() {
assert_eq!(longest_common_suffix("hello.json", "world.json"), ".json");
assert_eq!(longest_common_suffix("abc", "xyz"), "");
assert_eq!(longest_common_suffix("abc", "abc"), "abc");
}
// ── Join ────────────────────────────────────────────────────────
#[test]
fn join_same_prefix() {
let a = StringFact::from_prefix("https://api.com/users/");
let b = StringFact::from_prefix("https://api.com/items/");
let j = a.join(&b);
assert_eq!(j.prefix.as_deref(), Some("https://api.com/"));
}
#[test]
fn join_no_common_prefix() {
let a = StringFact::from_prefix("https://a.com/");
let b = StringFact::from_prefix("http://b.com/");
let j = a.join(&b);
assert_eq!(j.prefix.as_deref(), Some("http")); // common: "http"
}
#[test]
fn join_suffix() {
let a = StringFact::from_suffix(".json");
let b = StringFact::from_suffix(".json");
assert_eq!(a.join(&b).suffix.as_deref(), Some(".json"));
}
#[test]
fn join_different_suffix() {
let a = StringFact::from_suffix(".json");
let b = StringFact::from_suffix(".xml");
assert_eq!(a.join(&b).suffix, None);
}
#[test]
fn join_with_bottom() {
let a = StringFact::from_prefix("hello");
assert_eq!(a.join(&StringFact::bottom()), a);
assert_eq!(StringFact::bottom().join(&a), a);
}
#[test]
fn join_finite_sets_union() {
let a = StringFact::finite_set(vec!["ls".into(), "cat".into()]);
let b = StringFact::finite_set(vec!["true".into(), "ls".into()]);
let j = a.join(&b);
let d = j.domain.expect("union");
assert_eq!(d, vec!["cat", "ls", "true"]);
}
#[test]
fn join_finite_sets_overflow_to_top() {
// 9 + 9 = 18 > MAX_DOMAIN_SIZE = 16 → domain collapses to None.
let a = StringFact::finite_set((0..9).map(|n| format!("a{n}")).collect::<Vec<_>>());
let b = StringFact::finite_set((0..9).map(|n| format!("b{n}")).collect::<Vec<_>>());
let j = a.join(&b);
assert!(j.domain.is_none());
}
#[test]
fn join_unknown_domain_yields_top() {
let a = StringFact::finite_set(vec!["x".into()]);
let b = StringFact::from_prefix("x");
assert!(a.join(&b).domain.is_none());
}
// ── Meet ────────────────────────────────────────────────────────
#[test]
fn meet_consistent_prefix() {
let a = StringFact::from_prefix("https://");
let b = StringFact::from_prefix("https://api.com/");
let m = a.meet(&b);
assert_eq!(m.prefix.as_deref(), Some("https://api.com/"));
}
#[test]
fn meet_contradictory_prefix() {
let a = StringFact::from_prefix("https://a.com/");
let b = StringFact::from_prefix("https://b.com/");
assert!(a.meet(&b).is_bottom());
}
#[test]
fn meet_finite_sets_intersect() {
let a = StringFact::finite_set(vec!["ls".into(), "cat".into(), "true".into()]);
let b = StringFact::finite_set(vec!["ls".into(), "true".into()]);
let m = a.meet(&b);
assert_eq!(
m.domain.as_deref(),
Some(&["ls".to_string(), "true".to_string()][..])
);
}
#[test]
fn meet_finite_sets_empty_is_bottom() {
let a = StringFact::finite_set(vec!["ls".into()]);
let b = StringFact::finite_set(vec!["cat".into()]);
assert!(a.meet(&b).is_bottom());
}
// ── Widen ───────────────────────────────────────────────────────
#[test]
fn widen_stable() {
let a = StringFact::from_prefix("https://api.com/");
assert_eq!(a.widen(&a), a);
}
#[test]
fn widen_changed_prefix() {
let old = StringFact::from_prefix("https://api.com/v1/");
let new = StringFact::from_prefix("https://api.com/v2/");
let w = old.widen(&new);
assert_eq!(w.prefix, None); // changed → dropped
}
#[test]
fn widen_changed_domain() {
let old = StringFact::finite_set(vec!["ls".into()]);
let new = StringFact::finite_set(vec!["ls".into(), "cat".into()]);
assert!(old.widen(&new).domain.is_none());
}
// ── Concat ──────────────────────────────────────────────────────
#[test]
fn concat_exact() {
let a = StringFact::exact("hello");
let b = StringFact::exact(" world");
let c = a.concat(&b);
assert_eq!(c.prefix.as_deref(), Some("hello"));
assert_eq!(c.suffix.as_deref(), Some(" world"));
// domain drops because cross-product is not tracked
assert!(c.domain.is_none());
}
#[test]
fn concat_prefix_with_top() {
let a = StringFact::from_prefix("https://api.com/");
let b = StringFact::top();
let c = a.concat(&b);
assert_eq!(c.prefix.as_deref(), Some("https://api.com/"));
assert_eq!(c.suffix, None);
}
#[test]
fn concat_top_with_suffix() {
let a = StringFact::top();
let b = StringFact::from_suffix(".json");
let c = a.concat(&b);
assert_eq!(c.prefix, None);
assert_eq!(c.suffix.as_deref(), Some(".json"));
}
// ── Leq ─────────────────────────────────────────────────────────
#[test]
fn leq_more_specific_prefix() {
let specific = StringFact::from_prefix("https://api.com/users/");
let general = StringFact::from_prefix("https://api.com/");
assert!(specific.leq(&general));
assert!(!general.leq(&specific));
}
#[test]
fn leq_top_greatest() {
let a = StringFact::from_prefix("hello");
assert!(a.leq(&StringFact::top()));
assert!(!StringFact::top().leq(&a));
}
#[test]
fn leq_bottom_least() {
assert!(StringFact::bottom().leq(&StringFact::top()));
assert!(StringFact::bottom().leq(&StringFact::from_prefix("x")));
}
#[test]
fn leq_finite_subset() {
let sub = StringFact::finite_set(vec!["ls".into()]);
let sup = StringFact::finite_set(vec!["ls".into(), "cat".into()]);
assert!(sub.leq(&sup));
assert!(!sup.leq(&sub));
}
// ── Finite-set / shell safety ───────────────────────────────────
#[test]
fn finite_set_sorts_and_dedups() {
let f = StringFact::finite_set(vec!["b".into(), "a".into(), "a".into()]);
assert_eq!(
f.domain.as_deref(),
Some(&["a".to_string(), "b".to_string()][..])
);
}
#[test]
fn finite_set_overflow_is_top() {
let many: Vec<String> = (0..(MAX_DOMAIN_SIZE + 1))
.map(|n| format!("v{n}"))
.collect();
let f = StringFact::finite_set(many);
assert!(f.domain.is_none());
}
#[test]
fn finite_set_empty_is_top() {
let f = StringFact::finite_set(vec![]);
assert!(f.domain.is_none());
assert!(f.is_top());
}
#[test]
fn shell_safe_detects_metachars() {
assert!(is_shell_safe_literal("ls"));
assert!(is_shell_safe_literal("cat"));
assert!(is_shell_safe_literal("no-metachars"));
assert!(!is_shell_safe_literal("rm;reboot"));
assert!(!is_shell_safe_literal("echo $HOME"));
assert!(!is_shell_safe_literal("a|b"));
assert!(!is_shell_safe_literal("a b")); // whitespace splits shell words
}
#[test]
fn is_finite_shell_safe_only_when_bounded() {
assert!(!StringFact::top().is_finite_shell_safe());
assert!(!StringFact::from_prefix("ls").is_finite_shell_safe());
assert!(StringFact::finite_set(vec!["ls".into(), "cat".into()]).is_finite_shell_safe());
assert!(
!StringFact::finite_set(vec!["ls".into(), "rm;reboot".into()]).is_finite_shell_safe()
);
}
/// `concat("", x)` and `concat(x, "")` must round-trip the
/// non-empty operand's prefix/suffix. The current `concat` keeps
/// LHS prefix and RHS suffix verbatim. After empty-string
/// normalisation, `exact("")` carries no prefix/suffix info, so
/// the LHS prefix is `None` (unknown) and only the RHS suffix
/// survives.
#[test]
fn concat_empty_string_lhs_preserves_rhs_suffix() {
let empty = StringFact::exact("");
let rhs = StringFact::exact("x");
let r = empty.concat(&rhs);
assert_eq!(r.prefix, None);
assert_eq!(r.suffix.as_deref(), Some("x"));
}
#[test]
fn concat_empty_string_rhs_preserves_lhs_prefix() {
let lhs = StringFact::exact("x");
let empty = StringFact::exact("");
let r = lhs.concat(&empty);
assert_eq!(r.prefix.as_deref(), Some("x"));
assert_eq!(r.suffix, None);
}
/// Bottom is concat-absorbing: concat with bottom in either
/// position yields bottom (no flow can reach the call site).
#[test]
fn concat_with_bottom_is_bottom() {
let bot = StringFact::bottom();
let any = StringFact::exact("anything");
assert!(bot.concat(&any).is_bottom());
assert!(any.concat(&bot).is_bottom());
}
/// Joining two distinct URL prefixes must reduce to their LCP, not
/// fall through to `None`. This is the property SSRF prefix-lock
/// suppression depends on at phi nodes.
#[test]
fn join_distinct_urls_reduces_to_lcp() {
let a = StringFact::from_prefix("https://api.example.com/");
let b = StringFact::from_prefix("https://db.example.com/");
let r = a.join(&b);
// Common prefix is "https://", anything past that diverges.
assert_eq!(
r.prefix.as_deref(),
Some("https://"),
"join must compute LCP, not drop the prefix entirely"
);
}
/// Meet of two prefix-locks with no overlap must collapse to
/// bottom (it represents an unsatisfiable conjunction).
#[test]
fn meet_disjoint_prefixes_is_bottom() {
let a = StringFact::from_prefix("/var/");
let b = StringFact::from_prefix("/etc/");
let r = a.meet(&b);
assert!(
r.is_bottom(),
"meet of disjoint prefix-locks must be bottom"
);
}
// ── Additional lattice algebra laws ──────────────────────────────
fn sample_strings() -> Vec<StringFact> {
vec![
StringFact::bottom(),
StringFact::top(),
StringFact::exact(""),
StringFact::exact("hello"),
StringFact::from_prefix("https://"),
StringFact::from_suffix(".com"),
StringFact::finite_set(vec!["a".into(), "b".into()]),
]
}
/// `x ⊔ x = x`, join is idempotent across all sample shapes.
#[test]
fn join_idempotent_string() {
for a in sample_strings() {
assert_eq!(a.join(&a), a, "join not idempotent for {:?}", a);
}
}
/// `x ⊔ y = y ⊔ x`, join is commutative.
#[test]
fn join_commutative_string() {
let xs = sample_strings();
for a in &xs {
for b in &xs {
assert_eq!(
a.join(b),
b.join(a),
"join not commutative for {:?} / {:?}",
a,
b
);
}
}
}
/// `x ⊓ x = x`, meet is idempotent.
#[test]
fn meet_idempotent_string() {
for a in sample_strings() {
assert_eq!(a.meet(&a), a, "meet not idempotent for {:?}", a);
}
}
/// `x ⊓ y = y ⊓ x`, meet is commutative.
#[test]
fn meet_commutative_string() {
let xs = sample_strings();
for a in &xs {
for b in &xs {
assert_eq!(
a.meet(b),
b.meet(a),
"meet not commutative for {:?} / {:?}",
a,
b
);
}
}
}
/// `x ⊓ = x` and `x ⊓ ⊥ = ⊥`.
#[test]
fn meet_identity_string() {
for a in sample_strings() {
assert_eq!(a.meet(&StringFact::top()), a, "x ⊓ failed for {:?}", a);
assert!(
a.meet(&StringFact::bottom()).is_bottom(),
"x ⊓ ⊥ failed for {:?}",
a
);
}
}
/// `x ⊑ x`, leq is reflexive.
#[test]
fn leq_reflexive_string() {
for a in sample_strings() {
assert!(a.leq(&a), "x ⊑ x failed for {:?}", a);
}
}
/// **Soundness**: `widen(a, b) ⊒ join(a, b)`, widening must
/// over-approximate join, otherwise dataflow loses information.
#[test]
fn widen_over_approximates_join_string() {
let xs = sample_strings();
for a in &xs {
for b in &xs {
let j = a.join(b);
let w = a.widen(b);
assert!(
j.leq(&w),
"widen({:?}, {:?}) = {:?} does not over-approximate join = {:?}",
a,
b,
w,
j
);
}
}
}
#[test]
fn widen_idempotent_string() {
for a in sample_strings() {
assert_eq!(a.widen(&a), a, "widen(x, x) failed for {:?}", a);
}
}
/// Join is upper bound: `a ⊑ a ⊔ b` and `b ⊑ a ⊔ b`.
#[test]
fn join_is_upper_bound_string() {
let xs = sample_strings();
for a in &xs {
for b in &xs {
let j = a.join(b);
assert!(
a.leq(&j),
"a ⊑ a ⊔ b failed for {:?}, {:?} (join={:?})",
a,
b,
j
);
assert!(
b.leq(&j),
"b ⊑ a ⊔ b failed for {:?}, {:?} (join={:?})",
a,
b,
j
);
}
}
}
/// Empty-string exact value must distinguish from Top, it is a
/// singleton (`{""}`), not unconstrained. After the empty-prefix
/// normalisation, prefix/suffix are `None` (carry no extra info)
/// but the `domain` field still pins the value to exactly `""`.
#[test]
fn exact_empty_string_is_not_top() {
let e = StringFact::exact("");
assert!(!e.is_top(), "exact(\"\") must not be Top");
assert!(!e.is_bottom(), "exact(\"\") must not be Bottom");
assert_eq!(e.prefix, None, "empty prefix normalised to None");
assert_eq!(e.suffix, None, "empty suffix normalised to None");
assert_eq!(e.domain.as_deref(), Some(&[String::new()][..]));
}
/// LCP/LCS with multi-byte UTF-8 chars must not split a code point
/// (would produce invalid UTF-8 strings or panic).
#[test]
fn lcp_lcs_unicode_safe() {
// Both start with é (2-byte char in UTF-8).
let a = StringFact::exact("éclair");
let b = StringFact::exact("éclat");
let j = a.join(&b);
// LCP should be "écla" (still valid UTF-8). At minimum it must
// be a valid Rust string and not panic.
let prefix = j.prefix.as_deref().unwrap_or("");
assert!(prefix.is_char_boundary(prefix.len()));
assert!(prefix.starts_with('é'));
// Suffix with multibyte: "café" vs "naïvé" share "é" suffix?
// Simpler: both end with "好" (3-byte CJK).
let a = StringFact::exact("你好");
let b = StringFact::exact("您好");
let j = a.join(&b);
let suffix = j.suffix.as_deref().unwrap_or("");
assert!(suffix.is_char_boundary(0) && suffix.is_char_boundary(suffix.len()));
assert!(suffix.ends_with('好'));
}
/// Phase 08: a URL prefix-lock obtained from `new URL(path, base)`
/// must survive concatenation with a tainted (Top-suffix) path
/// component. The `is_string_safe_for_ssrf` check only consults the
/// `prefix`, so the locked-host base must remain intact even when the
/// path-side fact carries no knowledge.
#[test]
fn from_url_with_base_locks_prefix_under_tainted_suffix() {
let base = "https://api.cal.com";
let tainted_path = StringFact::top();
let f = StringFact::from_url_with_base(base, &tainted_path);
assert_eq!(
f.prefix.as_deref(),
Some("https://api.cal.com/"),
"prefix lock must include the path separator"
);
// The path component contributes no suffix knowledge, the result
// must mirror that without losing the prefix lock.
assert!(
f.suffix.is_none(),
"suffix is unknown when path-side fact is Top"
);
}
/// A concrete path component contributes its suffix knowledge to the
/// concatenated URL fact while the base prefix stays locked.
#[test]
fn from_url_with_base_keeps_prefix_with_concrete_path_suffix() {
let base = "https://api.cal.com/";
let path = StringFact::from_suffix(".json");
let f = StringFact::from_url_with_base(base, &path);
assert_eq!(f.prefix.as_deref(), Some("https://api.cal.com/"));
assert_eq!(f.suffix.as_deref(), Some(".json"));
}
/// Concat with empty-string `exact("")` should preserve the other
/// side's prefix/suffix knowledge (empty is the identity).
#[test]
fn concat_with_empty_exact_preserves_other() {
let s = StringFact::exact("hello");
let e = StringFact::exact("");
let r = s.concat(&e);
// Concat should preserve prefix from `s`.
assert_eq!(r.prefix.as_deref(), Some("hello"));
let r2 = e.concat(&s);
assert_eq!(r2.suffix.as_deref(), Some("hello"));
}
}