mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
1011 lines
34 KiB
Rust
1011 lines
34 KiB
Rust
//! String abstract domain for abstract interpretation.
|
||
//!
|
||
//! Tracks known prefix, suffix, and, when provably bounded, the finite set
|
||
//! of possible concrete string values. Used for SSRF suppression (URL prefix
|
||
//! proves host is locked), command-injection suppression (lookup result
|
||
//! bounded to a safe set of literals), and general string analysis.
|
||
|
||
use crate::state::lattice::{AbstractDomain, Lattice};
|
||
use serde::{Deserialize, Serialize};
|
||
|
||
/// Maximum tracked prefix length (bytes).
|
||
pub const MAX_PREFIX_LEN: usize = 256;
|
||
/// Maximum tracked suffix length (bytes).
|
||
pub const MAX_SUFFIX_LEN: usize = 128;
|
||
/// Maximum tracked finite-domain cardinality. Beyond this, `domain` widens
|
||
/// to `None` (Top on the domain sub-field).
|
||
pub const MAX_DOMAIN_SIZE: usize = 16;
|
||
|
||
/// Single-character shell metacharacters. A string containing any of these
|
||
/// cannot be passed as a single shell word without escaping, so bounded
|
||
/// sets containing them cannot suppress `Cap::SHELL_ESCAPE`.
|
||
const SHELL_METACHARS: &[char] = &[
|
||
';', '|', '&', '`', '$', '>', '<', '(', ')', '\n', '\r', '\0', '\\', '"', '\'', ' ', '\t',
|
||
];
|
||
|
||
/// Return `true` when `s` contains no shell metacharacter and is therefore
|
||
/// safe to pass as a single shell token.
|
||
pub fn is_shell_safe_literal(s: &str) -> bool {
|
||
!s.chars().any(|c| SHELL_METACHARS.contains(&c))
|
||
}
|
||
|
||
/// String abstract domain: tracks known prefix, suffix, and finite domain.
|
||
///
|
||
/// Lattice ordering:
|
||
/// - `Bottom` ⊑ everything (unsatisfiable)
|
||
/// - Concrete facts ⊑ `Top` (no knowledge)
|
||
/// - `Some(prefix)` ⊑ `None` (no prefix known)
|
||
/// - `Some({a,b})` ⊑ `Some({a,b,c})` ⊑ `None` (subset → wider → Top)
|
||
///
|
||
/// Prefix, suffix, and domain are independent: a value can carry any subset
|
||
/// of the three.
|
||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||
pub struct StringFact {
|
||
/// Known prefix of the string. `None` = unknown.
|
||
pub prefix: Option<String>,
|
||
/// Known suffix of the string. `None` = unknown.
|
||
pub suffix: Option<String>,
|
||
/// Known finite set of possible concrete values. `None` = unknown set.
|
||
/// `Some(vec)` with `vec.len() <= MAX_DOMAIN_SIZE` = value ∈ `vec`.
|
||
/// Always sorted and deduped.
|
||
pub domain: Option<Vec<String>>,
|
||
/// True when this fact is Bottom (unsatisfiable).
|
||
is_bottom: bool,
|
||
}
|
||
|
||
impl StringFact {
|
||
/// Top: no knowledge about the string.
|
||
pub fn top() -> Self {
|
||
Self {
|
||
prefix: None,
|
||
suffix: None,
|
||
domain: None,
|
||
is_bottom: false,
|
||
}
|
||
}
|
||
|
||
/// Bottom: unsatisfiable / empty set.
|
||
pub fn bottom() -> Self {
|
||
Self {
|
||
prefix: None,
|
||
suffix: None,
|
||
domain: None,
|
||
is_bottom: true,
|
||
}
|
||
}
|
||
|
||
/// Exact known string value: prefix and suffix are the full string, and
|
||
/// the finite domain is `{s}`.
|
||
///
|
||
/// Empty prefix/suffix are normalised to `None` because "starts/ends with
|
||
/// the empty string" carries no constraint, keeping `Some("")` would
|
||
/// break join idempotence (`Some("")` ⊔ `Some("")` collapses to `None`).
|
||
pub fn exact(s: &str) -> Self {
|
||
let prefix = truncate_prefix(s);
|
||
let suffix = truncate_suffix(s);
|
||
Self {
|
||
prefix: if prefix.is_empty() {
|
||
None
|
||
} else {
|
||
Some(prefix)
|
||
},
|
||
suffix: if suffix.is_empty() {
|
||
None
|
||
} else {
|
||
Some(suffix)
|
||
},
|
||
domain: Some(vec![s.to_string()]),
|
||
is_bottom: false,
|
||
}
|
||
}
|
||
|
||
/// Known prefix only. Empty `p` normalises to no-prefix-info (`None`).
|
||
pub fn from_prefix(p: &str) -> Self {
|
||
let prefix = truncate_prefix(p);
|
||
Self {
|
||
prefix: if prefix.is_empty() {
|
||
None
|
||
} else {
|
||
Some(prefix)
|
||
},
|
||
suffix: None,
|
||
domain: None,
|
||
is_bottom: false,
|
||
}
|
||
}
|
||
|
||
/// Known suffix only. Empty `s` normalises to no-suffix-info (`None`).
|
||
pub fn from_suffix(s: &str) -> Self {
|
||
let suffix = truncate_suffix(s);
|
||
Self {
|
||
prefix: None,
|
||
suffix: if suffix.is_empty() {
|
||
None
|
||
} else {
|
||
Some(suffix)
|
||
},
|
||
domain: None,
|
||
is_bottom: false,
|
||
}
|
||
}
|
||
|
||
/// Known finite set of possible concrete values.
|
||
///
|
||
/// Inputs are sorted and deduped. If the cardinality exceeds
|
||
/// [`MAX_DOMAIN_SIZE`] or the input is empty, the domain collapses to
|
||
/// `None` (Top on this sub-field). The prefix/suffix sub-fields remain
|
||
/// unset, callers can combine with [`Self::exact`] for single-element
|
||
/// sets if tighter facts are desired.
|
||
pub fn finite_set(values: Vec<String>) -> Self {
|
||
let mut v = values;
|
||
v.sort();
|
||
v.dedup();
|
||
let domain = if v.is_empty() || v.len() > MAX_DOMAIN_SIZE {
|
||
None
|
||
} else {
|
||
Some(v)
|
||
};
|
||
Self {
|
||
prefix: None,
|
||
suffix: None,
|
||
domain,
|
||
is_bottom: false,
|
||
}
|
||
}
|
||
|
||
pub fn is_top(&self) -> bool {
|
||
!self.is_bottom && self.prefix.is_none() && self.suffix.is_none() && self.domain.is_none()
|
||
}
|
||
|
||
pub fn is_bottom(&self) -> bool {
|
||
self.is_bottom
|
||
}
|
||
|
||
/// Returns `true` when the finite domain is known and every element is
|
||
/// free of shell metacharacters. Used to suppress `Cap::SHELL_ESCAPE`
|
||
/// when the payload is provably bounded to a safe set of words.
|
||
pub fn is_finite_shell_safe(&self) -> bool {
|
||
match &self.domain {
|
||
Some(values) if !values.is_empty() => values.iter().all(|s| is_shell_safe_literal(s)),
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
// ── Lattice operations ──────────────────────────────────────────────
|
||
|
||
/// Join: longest common prefix (LCP), longest common suffix (LCS), and
|
||
/// set union of finite domains (clipped at [`MAX_DOMAIN_SIZE`]).
|
||
pub fn join(&self, other: &Self) -> Self {
|
||
if self.is_bottom {
|
||
return other.clone();
|
||
}
|
||
if other.is_bottom {
|
||
return self.clone();
|
||
}
|
||
let prefix = match (&self.prefix, &other.prefix) {
|
||
(Some(a), Some(b)) => {
|
||
let lcp = longest_common_prefix(a, b);
|
||
if lcp.is_empty() { None } else { Some(lcp) }
|
||
}
|
||
_ => None,
|
||
};
|
||
let suffix = match (&self.suffix, &other.suffix) {
|
||
(Some(a), Some(b)) => {
|
||
let lcs = longest_common_suffix(a, b);
|
||
if lcs.is_empty() { None } else { Some(lcs) }
|
||
}
|
||
_ => None,
|
||
};
|
||
let domain = match (&self.domain, &other.domain) {
|
||
(Some(a), Some(b)) => {
|
||
let mut merged: Vec<String> = Vec::with_capacity(a.len() + b.len());
|
||
merged.extend_from_slice(a);
|
||
merged.extend_from_slice(b);
|
||
merged.sort();
|
||
merged.dedup();
|
||
if merged.len() > MAX_DOMAIN_SIZE {
|
||
None
|
||
} else {
|
||
Some(merged)
|
||
}
|
||
}
|
||
_ => None,
|
||
};
|
||
Self {
|
||
prefix,
|
||
suffix,
|
||
domain,
|
||
is_bottom: false,
|
||
}
|
||
}
|
||
|
||
/// Meet: intersection of all three sub-fields (conservative).
|
||
pub fn meet(&self, other: &Self) -> Self {
|
||
if self.is_bottom || other.is_bottom {
|
||
return Self::bottom();
|
||
}
|
||
let prefix = match (&self.prefix, &other.prefix) {
|
||
(Some(a), Some(b)) => {
|
||
if a.starts_with(b.as_str()) {
|
||
Some(a.clone())
|
||
} else if b.starts_with(a.as_str()) {
|
||
Some(b.clone())
|
||
} else {
|
||
return Self::bottom();
|
||
}
|
||
}
|
||
(Some(a), None) => Some(a.clone()),
|
||
(None, Some(b)) => Some(b.clone()),
|
||
(None, None) => None,
|
||
};
|
||
let suffix = match (&self.suffix, &other.suffix) {
|
||
(Some(a), Some(b)) => {
|
||
if a.ends_with(b.as_str()) {
|
||
Some(a.clone())
|
||
} else if b.ends_with(a.as_str()) {
|
||
Some(b.clone())
|
||
} else {
|
||
return Self::bottom();
|
||
}
|
||
}
|
||
(Some(a), None) => Some(a.clone()),
|
||
(None, Some(b)) => Some(b.clone()),
|
||
(None, None) => None,
|
||
};
|
||
let domain = match (&self.domain, &other.domain) {
|
||
(Some(a), Some(b)) => {
|
||
let inter: Vec<String> = a
|
||
.iter()
|
||
.filter(|s| b.binary_search(s).is_ok())
|
||
.cloned()
|
||
.collect();
|
||
if inter.is_empty() {
|
||
return Self::bottom();
|
||
}
|
||
Some(inter)
|
||
}
|
||
(Some(a), None) => Some(a.clone()),
|
||
(None, Some(b)) => Some(b.clone()),
|
||
(None, None) => None,
|
||
};
|
||
Self {
|
||
prefix,
|
||
suffix,
|
||
domain,
|
||
is_bottom: false,
|
||
}
|
||
}
|
||
|
||
/// Widen: drop any sub-field that changed between iterations.
|
||
pub fn widen(&self, other: &Self) -> Self {
|
||
if self.is_bottom {
|
||
return other.clone();
|
||
}
|
||
if other.is_bottom {
|
||
return self.clone();
|
||
}
|
||
let prefix = if self.prefix == other.prefix {
|
||
self.prefix.clone()
|
||
} else {
|
||
None
|
||
};
|
||
let suffix = if self.suffix == other.suffix {
|
||
self.suffix.clone()
|
||
} else {
|
||
None
|
||
};
|
||
let domain = if self.domain == other.domain {
|
||
self.domain.clone()
|
||
} else {
|
||
None
|
||
};
|
||
Self {
|
||
prefix,
|
||
suffix,
|
||
domain,
|
||
is_bottom: false,
|
||
}
|
||
}
|
||
|
||
pub fn leq(&self, other: &Self) -> bool {
|
||
if self.is_bottom {
|
||
return true;
|
||
}
|
||
if other.is_bottom {
|
||
return false;
|
||
}
|
||
let prefix_ok = match (&self.prefix, &other.prefix) {
|
||
(_, None) => true,
|
||
(None, Some(_)) => false,
|
||
(Some(a), Some(b)) => a.starts_with(b.as_str()),
|
||
};
|
||
let suffix_ok = match (&self.suffix, &other.suffix) {
|
||
(_, None) => true,
|
||
(None, Some(_)) => false,
|
||
(Some(a), Some(b)) => a.ends_with(b.as_str()),
|
||
};
|
||
let domain_ok = match (&self.domain, &other.domain) {
|
||
(_, None) => true,
|
||
(None, Some(_)) => false,
|
||
(Some(a), Some(b)) => a.iter().all(|s| b.binary_search(s).is_ok()),
|
||
};
|
||
prefix_ok && suffix_ok && domain_ok
|
||
}
|
||
|
||
// ── Transfer functions ──────────────────────────────────────────────
|
||
|
||
/// String concatenation: `self ++ other`.
|
||
///
|
||
/// - Prefix of result = prefix of `self` (left operand)
|
||
/// - Suffix of result = suffix of `other` (right operand)
|
||
/// - Domain: cross-product is too explosive to track; collapse to `None`.
|
||
pub fn concat(&self, other: &Self) -> Self {
|
||
if self.is_bottom || other.is_bottom {
|
||
return Self::bottom();
|
||
}
|
||
Self {
|
||
prefix: self.prefix.clone(),
|
||
suffix: other.suffix.clone(),
|
||
domain: None,
|
||
is_bottom: false,
|
||
}
|
||
}
|
||
|
||
/// SSRF helper: build a fact for `new URL(path, base)` where `base` is a
|
||
/// literal origin (`https://api.example.com`). The result behaves as
|
||
/// `base ++ path`, the locked-host prefix survives even when the path
|
||
/// component carries arbitrary taint, and the fact's `prefix` is what
|
||
/// `is_string_safe_for_ssrf` consults to suppress the SSRF sink.
|
||
///
|
||
/// `path` carries any string knowledge for the path component (typically
|
||
/// `StringFact::top()`). When the base already ends in `/`, the helper
|
||
/// keeps it as-is; otherwise appends a `/` so the prefix unambiguously
|
||
/// includes the path separator (the SSRF check looks for
|
||
/// `scheme://host/`).
|
||
pub fn from_url_with_base(base: &str, path: &Self) -> Self {
|
||
let mut anchor = base.to_string();
|
||
if !anchor.ends_with('/') {
|
||
anchor.push('/');
|
||
}
|
||
StringFact::exact(&anchor).concat(path)
|
||
}
|
||
}
|
||
|
||
impl Lattice for StringFact {
|
||
fn bot() -> Self {
|
||
Self::bottom()
|
||
}
|
||
|
||
fn join(&self, other: &Self) -> Self {
|
||
self.join(other)
|
||
}
|
||
|
||
fn leq(&self, other: &Self) -> bool {
|
||
self.leq(other)
|
||
}
|
||
}
|
||
|
||
impl AbstractDomain for StringFact {
|
||
fn top() -> Self {
|
||
Self::top()
|
||
}
|
||
|
||
fn meet(&self, other: &Self) -> Self {
|
||
self.meet(other)
|
||
}
|
||
|
||
fn widen(&self, other: &Self) -> Self {
|
||
self.widen(other)
|
||
}
|
||
}
|
||
|
||
// ── Helpers ─────────────────────────────────────────────────────────────
|
||
|
||
fn truncate_prefix(s: &str) -> String {
|
||
if s.len() <= MAX_PREFIX_LEN {
|
||
s.to_string()
|
||
} else {
|
||
// Find a char boundary at or before MAX_PREFIX_LEN
|
||
let mut end = MAX_PREFIX_LEN;
|
||
while end > 0 && !s.is_char_boundary(end) {
|
||
end -= 1;
|
||
}
|
||
s[..end].to_string()
|
||
}
|
||
}
|
||
|
||
fn truncate_suffix(s: &str) -> String {
|
||
if s.len() <= MAX_SUFFIX_LEN {
|
||
s.to_string()
|
||
} else {
|
||
let start = s.len() - MAX_SUFFIX_LEN;
|
||
let mut start = start;
|
||
while start < s.len() && !s.is_char_boundary(start) {
|
||
start += 1;
|
||
}
|
||
s[start..].to_string()
|
||
}
|
||
}
|
||
|
||
/// Longest common prefix of two strings, char-aligned.
|
||
///
|
||
/// Iterates by `char` rather than `byte` so multi-byte UTF-8 code points are
|
||
/// either kept whole or dropped, a byte-wise comparison would slice into the
|
||
/// middle of a code point and produce mojibake (`x as char` on a UTF-8
|
||
/// continuation byte yields a garbage Latin-1 character).
|
||
pub fn longest_common_prefix(a: &str, b: &str) -> String {
|
||
a.chars()
|
||
.zip(b.chars())
|
||
.take_while(|(x, y)| x == y)
|
||
.map(|(x, _)| x)
|
||
.collect()
|
||
}
|
||
|
||
/// Longest common suffix of two strings, char-aligned.
|
||
pub fn longest_common_suffix(a: &str, b: &str) -> String {
|
||
let mut lcs: Vec<char> = a
|
||
.chars()
|
||
.rev()
|
||
.zip(b.chars().rev())
|
||
.take_while(|(x, y)| x == y)
|
||
.map(|(x, _)| x)
|
||
.collect();
|
||
lcs.reverse();
|
||
lcs.into_iter().collect()
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn top_and_bottom() {
|
||
assert!(StringFact::top().is_top());
|
||
assert!(!StringFact::top().is_bottom());
|
||
assert!(StringFact::bottom().is_bottom());
|
||
assert!(!StringFact::bottom().is_top());
|
||
}
|
||
|
||
#[test]
|
||
fn exact_sets_both() {
|
||
let f = StringFact::exact("hello");
|
||
assert_eq!(f.prefix.as_deref(), Some("hello"));
|
||
assert_eq!(f.suffix.as_deref(), Some("hello"));
|
||
assert_eq!(f.domain.as_deref(), Some(&["hello".to_string()][..]));
|
||
}
|
||
|
||
// ── LCP / LCS helpers ───────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn lcp_basic() {
|
||
assert_eq!(longest_common_prefix("abcdef", "abcxyz"), "abc");
|
||
assert_eq!(longest_common_prefix("abc", "abc"), "abc");
|
||
assert_eq!(longest_common_prefix("abc", "xyz"), "");
|
||
assert_eq!(longest_common_prefix("", "abc"), "");
|
||
}
|
||
|
||
#[test]
|
||
fn lcs_basic() {
|
||
assert_eq!(longest_common_suffix("hello.json", "world.json"), ".json");
|
||
assert_eq!(longest_common_suffix("abc", "xyz"), "");
|
||
assert_eq!(longest_common_suffix("abc", "abc"), "abc");
|
||
}
|
||
|
||
// ── Join ────────────────────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn join_same_prefix() {
|
||
let a = StringFact::from_prefix("https://api.com/users/");
|
||
let b = StringFact::from_prefix("https://api.com/items/");
|
||
let j = a.join(&b);
|
||
assert_eq!(j.prefix.as_deref(), Some("https://api.com/"));
|
||
}
|
||
|
||
#[test]
|
||
fn join_no_common_prefix() {
|
||
let a = StringFact::from_prefix("https://a.com/");
|
||
let b = StringFact::from_prefix("http://b.com/");
|
||
let j = a.join(&b);
|
||
assert_eq!(j.prefix.as_deref(), Some("http")); // common: "http"
|
||
}
|
||
|
||
#[test]
|
||
fn join_suffix() {
|
||
let a = StringFact::from_suffix(".json");
|
||
let b = StringFact::from_suffix(".json");
|
||
assert_eq!(a.join(&b).suffix.as_deref(), Some(".json"));
|
||
}
|
||
|
||
#[test]
|
||
fn join_different_suffix() {
|
||
let a = StringFact::from_suffix(".json");
|
||
let b = StringFact::from_suffix(".xml");
|
||
assert_eq!(a.join(&b).suffix, None);
|
||
}
|
||
|
||
#[test]
|
||
fn join_with_bottom() {
|
||
let a = StringFact::from_prefix("hello");
|
||
assert_eq!(a.join(&StringFact::bottom()), a);
|
||
assert_eq!(StringFact::bottom().join(&a), a);
|
||
}
|
||
|
||
#[test]
|
||
fn join_finite_sets_union() {
|
||
let a = StringFact::finite_set(vec!["ls".into(), "cat".into()]);
|
||
let b = StringFact::finite_set(vec!["true".into(), "ls".into()]);
|
||
let j = a.join(&b);
|
||
let d = j.domain.expect("union");
|
||
assert_eq!(d, vec!["cat", "ls", "true"]);
|
||
}
|
||
|
||
#[test]
|
||
fn join_finite_sets_overflow_to_top() {
|
||
// 9 + 9 = 18 > MAX_DOMAIN_SIZE = 16 → domain collapses to None.
|
||
let a = StringFact::finite_set((0..9).map(|n| format!("a{n}")).collect::<Vec<_>>());
|
||
let b = StringFact::finite_set((0..9).map(|n| format!("b{n}")).collect::<Vec<_>>());
|
||
let j = a.join(&b);
|
||
assert!(j.domain.is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn join_unknown_domain_yields_top() {
|
||
let a = StringFact::finite_set(vec!["x".into()]);
|
||
let b = StringFact::from_prefix("x");
|
||
assert!(a.join(&b).domain.is_none());
|
||
}
|
||
|
||
// ── Meet ────────────────────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn meet_consistent_prefix() {
|
||
let a = StringFact::from_prefix("https://");
|
||
let b = StringFact::from_prefix("https://api.com/");
|
||
let m = a.meet(&b);
|
||
assert_eq!(m.prefix.as_deref(), Some("https://api.com/"));
|
||
}
|
||
|
||
#[test]
|
||
fn meet_contradictory_prefix() {
|
||
let a = StringFact::from_prefix("https://a.com/");
|
||
let b = StringFact::from_prefix("https://b.com/");
|
||
assert!(a.meet(&b).is_bottom());
|
||
}
|
||
|
||
#[test]
|
||
fn meet_finite_sets_intersect() {
|
||
let a = StringFact::finite_set(vec!["ls".into(), "cat".into(), "true".into()]);
|
||
let b = StringFact::finite_set(vec!["ls".into(), "true".into()]);
|
||
let m = a.meet(&b);
|
||
assert_eq!(
|
||
m.domain.as_deref(),
|
||
Some(&["ls".to_string(), "true".to_string()][..])
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn meet_finite_sets_empty_is_bottom() {
|
||
let a = StringFact::finite_set(vec!["ls".into()]);
|
||
let b = StringFact::finite_set(vec!["cat".into()]);
|
||
assert!(a.meet(&b).is_bottom());
|
||
}
|
||
|
||
// ── Widen ───────────────────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn widen_stable() {
|
||
let a = StringFact::from_prefix("https://api.com/");
|
||
assert_eq!(a.widen(&a), a);
|
||
}
|
||
|
||
#[test]
|
||
fn widen_changed_prefix() {
|
||
let old = StringFact::from_prefix("https://api.com/v1/");
|
||
let new = StringFact::from_prefix("https://api.com/v2/");
|
||
let w = old.widen(&new);
|
||
assert_eq!(w.prefix, None); // changed → dropped
|
||
}
|
||
|
||
#[test]
|
||
fn widen_changed_domain() {
|
||
let old = StringFact::finite_set(vec!["ls".into()]);
|
||
let new = StringFact::finite_set(vec!["ls".into(), "cat".into()]);
|
||
assert!(old.widen(&new).domain.is_none());
|
||
}
|
||
|
||
// ── Concat ──────────────────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn concat_exact() {
|
||
let a = StringFact::exact("hello");
|
||
let b = StringFact::exact(" world");
|
||
let c = a.concat(&b);
|
||
assert_eq!(c.prefix.as_deref(), Some("hello"));
|
||
assert_eq!(c.suffix.as_deref(), Some(" world"));
|
||
// domain drops because cross-product is not tracked
|
||
assert!(c.domain.is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn concat_prefix_with_top() {
|
||
let a = StringFact::from_prefix("https://api.com/");
|
||
let b = StringFact::top();
|
||
let c = a.concat(&b);
|
||
assert_eq!(c.prefix.as_deref(), Some("https://api.com/"));
|
||
assert_eq!(c.suffix, None);
|
||
}
|
||
|
||
#[test]
|
||
fn concat_top_with_suffix() {
|
||
let a = StringFact::top();
|
||
let b = StringFact::from_suffix(".json");
|
||
let c = a.concat(&b);
|
||
assert_eq!(c.prefix, None);
|
||
assert_eq!(c.suffix.as_deref(), Some(".json"));
|
||
}
|
||
|
||
// ── Leq ─────────────────────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn leq_more_specific_prefix() {
|
||
let specific = StringFact::from_prefix("https://api.com/users/");
|
||
let general = StringFact::from_prefix("https://api.com/");
|
||
assert!(specific.leq(&general));
|
||
assert!(!general.leq(&specific));
|
||
}
|
||
|
||
#[test]
|
||
fn leq_top_greatest() {
|
||
let a = StringFact::from_prefix("hello");
|
||
assert!(a.leq(&StringFact::top()));
|
||
assert!(!StringFact::top().leq(&a));
|
||
}
|
||
|
||
#[test]
|
||
fn leq_bottom_least() {
|
||
assert!(StringFact::bottom().leq(&StringFact::top()));
|
||
assert!(StringFact::bottom().leq(&StringFact::from_prefix("x")));
|
||
}
|
||
|
||
#[test]
|
||
fn leq_finite_subset() {
|
||
let sub = StringFact::finite_set(vec!["ls".into()]);
|
||
let sup = StringFact::finite_set(vec!["ls".into(), "cat".into()]);
|
||
assert!(sub.leq(&sup));
|
||
assert!(!sup.leq(&sub));
|
||
}
|
||
|
||
// ── Finite-set / shell safety ───────────────────────────────────
|
||
|
||
#[test]
|
||
fn finite_set_sorts_and_dedups() {
|
||
let f = StringFact::finite_set(vec!["b".into(), "a".into(), "a".into()]);
|
||
assert_eq!(
|
||
f.domain.as_deref(),
|
||
Some(&["a".to_string(), "b".to_string()][..])
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn finite_set_overflow_is_top() {
|
||
let many: Vec<String> = (0..(MAX_DOMAIN_SIZE + 1))
|
||
.map(|n| format!("v{n}"))
|
||
.collect();
|
||
let f = StringFact::finite_set(many);
|
||
assert!(f.domain.is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn finite_set_empty_is_top() {
|
||
let f = StringFact::finite_set(vec![]);
|
||
assert!(f.domain.is_none());
|
||
assert!(f.is_top());
|
||
}
|
||
|
||
#[test]
|
||
fn shell_safe_detects_metachars() {
|
||
assert!(is_shell_safe_literal("ls"));
|
||
assert!(is_shell_safe_literal("cat"));
|
||
assert!(is_shell_safe_literal("no-metachars"));
|
||
assert!(!is_shell_safe_literal("rm;reboot"));
|
||
assert!(!is_shell_safe_literal("echo $HOME"));
|
||
assert!(!is_shell_safe_literal("a|b"));
|
||
assert!(!is_shell_safe_literal("a b")); // whitespace splits shell words
|
||
}
|
||
|
||
#[test]
|
||
fn is_finite_shell_safe_only_when_bounded() {
|
||
assert!(!StringFact::top().is_finite_shell_safe());
|
||
assert!(!StringFact::from_prefix("ls").is_finite_shell_safe());
|
||
assert!(StringFact::finite_set(vec!["ls".into(), "cat".into()]).is_finite_shell_safe());
|
||
assert!(
|
||
!StringFact::finite_set(vec!["ls".into(), "rm;reboot".into()]).is_finite_shell_safe()
|
||
);
|
||
}
|
||
|
||
/// `concat("", x)` and `concat(x, "")` must round-trip the
|
||
/// non-empty operand's prefix/suffix. The current `concat` keeps
|
||
/// LHS prefix and RHS suffix verbatim. After empty-string
|
||
/// normalisation, `exact("")` carries no prefix/suffix info, so
|
||
/// the LHS prefix is `None` (unknown) and only the RHS suffix
|
||
/// survives.
|
||
#[test]
|
||
fn concat_empty_string_lhs_preserves_rhs_suffix() {
|
||
let empty = StringFact::exact("");
|
||
let rhs = StringFact::exact("x");
|
||
let r = empty.concat(&rhs);
|
||
assert_eq!(r.prefix, None);
|
||
assert_eq!(r.suffix.as_deref(), Some("x"));
|
||
}
|
||
|
||
#[test]
|
||
fn concat_empty_string_rhs_preserves_lhs_prefix() {
|
||
let lhs = StringFact::exact("x");
|
||
let empty = StringFact::exact("");
|
||
let r = lhs.concat(&empty);
|
||
assert_eq!(r.prefix.as_deref(), Some("x"));
|
||
assert_eq!(r.suffix, None);
|
||
}
|
||
|
||
/// Bottom is concat-absorbing: concat with bottom in either
|
||
/// position yields bottom (no flow can reach the call site).
|
||
#[test]
|
||
fn concat_with_bottom_is_bottom() {
|
||
let bot = StringFact::bottom();
|
||
let any = StringFact::exact("anything");
|
||
assert!(bot.concat(&any).is_bottom());
|
||
assert!(any.concat(&bot).is_bottom());
|
||
}
|
||
|
||
/// Joining two distinct URL prefixes must reduce to their LCP, not
|
||
/// fall through to `None`. This is the property SSRF prefix-lock
|
||
/// suppression depends on at phi nodes.
|
||
#[test]
|
||
fn join_distinct_urls_reduces_to_lcp() {
|
||
let a = StringFact::from_prefix("https://api.example.com/");
|
||
let b = StringFact::from_prefix("https://db.example.com/");
|
||
let r = a.join(&b);
|
||
// Common prefix is "https://", anything past that diverges.
|
||
assert_eq!(
|
||
r.prefix.as_deref(),
|
||
Some("https://"),
|
||
"join must compute LCP, not drop the prefix entirely"
|
||
);
|
||
}
|
||
|
||
/// Meet of two prefix-locks with no overlap must collapse to
|
||
/// bottom (it represents an unsatisfiable conjunction).
|
||
#[test]
|
||
fn meet_disjoint_prefixes_is_bottom() {
|
||
let a = StringFact::from_prefix("/var/");
|
||
let b = StringFact::from_prefix("/etc/");
|
||
let r = a.meet(&b);
|
||
assert!(
|
||
r.is_bottom(),
|
||
"meet of disjoint prefix-locks must be bottom"
|
||
);
|
||
}
|
||
|
||
// ── Additional lattice algebra laws ──────────────────────────────
|
||
|
||
fn sample_strings() -> Vec<StringFact> {
|
||
vec![
|
||
StringFact::bottom(),
|
||
StringFact::top(),
|
||
StringFact::exact(""),
|
||
StringFact::exact("hello"),
|
||
StringFact::from_prefix("https://"),
|
||
StringFact::from_suffix(".com"),
|
||
StringFact::finite_set(vec!["a".into(), "b".into()]),
|
||
]
|
||
}
|
||
|
||
/// `x ⊔ x = x`, join is idempotent across all sample shapes.
|
||
#[test]
|
||
fn join_idempotent_string() {
|
||
for a in sample_strings() {
|
||
assert_eq!(a.join(&a), a, "join not idempotent for {:?}", a);
|
||
}
|
||
}
|
||
|
||
/// `x ⊔ y = y ⊔ x`, join is commutative.
|
||
#[test]
|
||
fn join_commutative_string() {
|
||
let xs = sample_strings();
|
||
for a in &xs {
|
||
for b in &xs {
|
||
assert_eq!(
|
||
a.join(b),
|
||
b.join(a),
|
||
"join not commutative for {:?} / {:?}",
|
||
a,
|
||
b
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
/// `x ⊓ x = x`, meet is idempotent.
|
||
#[test]
|
||
fn meet_idempotent_string() {
|
||
for a in sample_strings() {
|
||
assert_eq!(a.meet(&a), a, "meet not idempotent for {:?}", a);
|
||
}
|
||
}
|
||
|
||
/// `x ⊓ y = y ⊓ x`, meet is commutative.
|
||
#[test]
|
||
fn meet_commutative_string() {
|
||
let xs = sample_strings();
|
||
for a in &xs {
|
||
for b in &xs {
|
||
assert_eq!(
|
||
a.meet(b),
|
||
b.meet(a),
|
||
"meet not commutative for {:?} / {:?}",
|
||
a,
|
||
b
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
/// `x ⊓ ⊤ = x` and `x ⊓ ⊥ = ⊥`.
|
||
#[test]
|
||
fn meet_identity_string() {
|
||
for a in sample_strings() {
|
||
assert_eq!(a.meet(&StringFact::top()), a, "x ⊓ ⊤ failed for {:?}", a);
|
||
assert!(
|
||
a.meet(&StringFact::bottom()).is_bottom(),
|
||
"x ⊓ ⊥ failed for {:?}",
|
||
a
|
||
);
|
||
}
|
||
}
|
||
|
||
/// `x ⊑ x`, leq is reflexive.
|
||
#[test]
|
||
fn leq_reflexive_string() {
|
||
for a in sample_strings() {
|
||
assert!(a.leq(&a), "x ⊑ x failed for {:?}", a);
|
||
}
|
||
}
|
||
|
||
/// **Soundness**: `widen(a, b) ⊒ join(a, b)`, widening must
|
||
/// over-approximate join, otherwise dataflow loses information.
|
||
#[test]
|
||
fn widen_over_approximates_join_string() {
|
||
let xs = sample_strings();
|
||
for a in &xs {
|
||
for b in &xs {
|
||
let j = a.join(b);
|
||
let w = a.widen(b);
|
||
assert!(
|
||
j.leq(&w),
|
||
"widen({:?}, {:?}) = {:?} does not over-approximate join = {:?}",
|
||
a,
|
||
b,
|
||
w,
|
||
j
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn widen_idempotent_string() {
|
||
for a in sample_strings() {
|
||
assert_eq!(a.widen(&a), a, "widen(x, x) failed for {:?}", a);
|
||
}
|
||
}
|
||
|
||
/// Join is upper bound: `a ⊑ a ⊔ b` and `b ⊑ a ⊔ b`.
|
||
#[test]
|
||
fn join_is_upper_bound_string() {
|
||
let xs = sample_strings();
|
||
for a in &xs {
|
||
for b in &xs {
|
||
let j = a.join(b);
|
||
assert!(
|
||
a.leq(&j),
|
||
"a ⊑ a ⊔ b failed for {:?}, {:?} (join={:?})",
|
||
a,
|
||
b,
|
||
j
|
||
);
|
||
assert!(
|
||
b.leq(&j),
|
||
"b ⊑ a ⊔ b failed for {:?}, {:?} (join={:?})",
|
||
a,
|
||
b,
|
||
j
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Empty-string exact value must distinguish from Top, it is a
|
||
/// singleton (`{""}`), not unconstrained. After the empty-prefix
|
||
/// normalisation, prefix/suffix are `None` (carry no extra info)
|
||
/// but the `domain` field still pins the value to exactly `""`.
|
||
#[test]
|
||
fn exact_empty_string_is_not_top() {
|
||
let e = StringFact::exact("");
|
||
assert!(!e.is_top(), "exact(\"\") must not be Top");
|
||
assert!(!e.is_bottom(), "exact(\"\") must not be Bottom");
|
||
assert_eq!(e.prefix, None, "empty prefix normalised to None");
|
||
assert_eq!(e.suffix, None, "empty suffix normalised to None");
|
||
assert_eq!(e.domain.as_deref(), Some(&[String::new()][..]));
|
||
}
|
||
|
||
/// LCP/LCS with multi-byte UTF-8 chars must not split a code point
|
||
/// (would produce invalid UTF-8 strings or panic).
|
||
#[test]
|
||
fn lcp_lcs_unicode_safe() {
|
||
// Both start with é (2-byte char in UTF-8).
|
||
let a = StringFact::exact("éclair");
|
||
let b = StringFact::exact("éclat");
|
||
let j = a.join(&b);
|
||
// LCP should be "écla" (still valid UTF-8). At minimum it must
|
||
// be a valid Rust string and not panic.
|
||
let prefix = j.prefix.as_deref().unwrap_or("");
|
||
assert!(prefix.is_char_boundary(prefix.len()));
|
||
assert!(prefix.starts_with('é'));
|
||
|
||
// Suffix with multibyte: "café" vs "naïvé" share "é" suffix?
|
||
// Simpler: both end with "好" (3-byte CJK).
|
||
let a = StringFact::exact("你好");
|
||
let b = StringFact::exact("您好");
|
||
let j = a.join(&b);
|
||
let suffix = j.suffix.as_deref().unwrap_or("");
|
||
assert!(suffix.is_char_boundary(0) && suffix.is_char_boundary(suffix.len()));
|
||
assert!(suffix.ends_with('好'));
|
||
}
|
||
|
||
/// Phase 08: a URL prefix-lock obtained from `new URL(path, base)`
|
||
/// must survive concatenation with a tainted (Top-suffix) path
|
||
/// component. The `is_string_safe_for_ssrf` check only consults the
|
||
/// `prefix`, so the locked-host base must remain intact even when the
|
||
/// path-side fact carries no knowledge.
|
||
#[test]
|
||
fn from_url_with_base_locks_prefix_under_tainted_suffix() {
|
||
let base = "https://api.cal.com";
|
||
let tainted_path = StringFact::top();
|
||
let f = StringFact::from_url_with_base(base, &tainted_path);
|
||
assert_eq!(
|
||
f.prefix.as_deref(),
|
||
Some("https://api.cal.com/"),
|
||
"prefix lock must include the path separator"
|
||
);
|
||
// The path component contributes no suffix knowledge, the result
|
||
// must mirror that without losing the prefix lock.
|
||
assert!(
|
||
f.suffix.is_none(),
|
||
"suffix is unknown when path-side fact is Top"
|
||
);
|
||
}
|
||
|
||
/// A concrete path component contributes its suffix knowledge to the
|
||
/// concatenated URL fact while the base prefix stays locked.
|
||
#[test]
|
||
fn from_url_with_base_keeps_prefix_with_concrete_path_suffix() {
|
||
let base = "https://api.cal.com/";
|
||
let path = StringFact::from_suffix(".json");
|
||
let f = StringFact::from_url_with_base(base, &path);
|
||
assert_eq!(f.prefix.as_deref(), Some("https://api.cal.com/"));
|
||
assert_eq!(f.suffix.as_deref(), Some(".json"));
|
||
}
|
||
|
||
/// Concat with empty-string `exact("")` should preserve the other
|
||
/// side's prefix/suffix knowledge (empty is the identity).
|
||
#[test]
|
||
fn concat_with_empty_exact_preserves_other() {
|
||
let s = StringFact::exact("hello");
|
||
let e = StringFact::exact("");
|
||
let r = s.concat(&e);
|
||
// Concat should preserve prefix from `s`.
|
||
assert_eq!(r.prefix.as_deref(), Some("hello"));
|
||
let r2 = e.concat(&s);
|
||
assert_eq!(r2.suffix.as_deref(), Some("hello"));
|
||
}
|
||
}
|