nyx/src/abstract_interp/mod.rs

//! Abstract interpretation framework.
//!
//! Provides a product abstract domain ([`AbstractValue`]) composing independent
//! subdomains:
//! - [`IntervalFact`]: numeric interval `[lo, hi]` with arithmetic transfer
//! - [`StringFact`]: string prefix + suffix with concatenation transfer
//! - [`BitFact`]: known-zero/known-one bit masks for bitwise transfer
//!
//! Abstract values are stored per-SSA-value in [`AbstractState`], which is
//! carried through the taint analysis worklist in `SsaTaintState`. The framework
//! propagates abstract values forward through SSA operations, joins at CFG
//! merges, and widens at loop heads to ensure termination.
//!
//! ## Feature gate
//!
//! Enabled by default.  Disable via `analysis.engine.abstract_interpretation
//! = false` in `nyx.conf` or the `--no-abstract-interp` CLI flag.

pub mod bit_domain;
pub mod interval;
pub mod path_domain;
pub mod string_domain;

pub use bit_domain::BitFact;
pub use interval::IntervalFact;
pub use path_domain::{PathFact, Tri};
pub use string_domain::StringFact;

use crate::ssa::ir::SsaValue;
use crate::state::lattice::{AbstractDomain, Lattice};
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;

/// Feature gate: check if abstract interpretation is enabled.
///
/// Controlled by `analysis.engine.abstract_interpretation` in `nyx.conf`
/// (default `true`) or the `--abstract-interp / --no-abstract-interp` CLI
/// flag.  The legacy `NYX_ABSTRACT_INTERP` env var is consulted only when no
/// runtime has been installed (library use / legacy tests).
pub fn is_enabled() -> bool {
    crate::utils::analysis_options::current().abstract_interpretation
}

// ── AbstractValue ───────────────────────────────────────────────────────

/// Per-SSA-value abstract element: product of all subdomains.
///
/// Each subdomain is independent, join, meet, widen, and leq are applied
/// component-wise. Adding a new subdomain requires adding a field here
/// and updating the component-wise implementations.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct AbstractValue {
    pub interval: IntervalFact,
    pub string: StringFact,
    pub bits: BitFact,
    #[serde(default, skip_serializing_if = "path_fact_is_top")]
    pub path: PathFact,
}

fn path_fact_is_top(p: &PathFact) -> bool {
    p.is_top()
}

impl AbstractValue {
    pub fn top() -> Self {
        Self {
            interval: IntervalFact::top(),
            string: StringFact::top(),
            bits: BitFact::top(),
            path: PathFact::top(),
        }
    }

    pub fn bottom() -> Self {
        Self {
            interval: IntervalFact::bottom(),
            string: StringFact::bottom(),
            bits: BitFact::bottom(),
            path: PathFact::bottom(),
        }
    }

    /// Construct a value with a specific [`PathFact`] and every other
    /// subdomain at Top.  Used by the Rust path-primitive transfer rules.
    pub fn with_path_fact(path: PathFact) -> Self {
        Self {
            interval: IntervalFact::top(),
            string: StringFact::top(),
            bits: BitFact::top(),
            path,
        }
    }

    pub fn is_top(&self) -> bool {
        self.interval.is_top() && self.string.is_top() && self.bits.is_top() && self.path.is_top()
    }

    pub fn is_bottom(&self) -> bool {
        self.interval.is_bottom()
            && self.string.is_bottom()
            && self.bits.is_bottom()
            && self.path.is_bottom()
    }

    pub fn join(&self, other: &Self) -> Self {
        Self {
            interval: self.interval.join(&other.interval),
            string: self.string.join(&other.string),
            bits: self.bits.join(&other.bits),
            path: self.path.join(&other.path),
        }
    }

    pub fn meet(&self, other: &Self) -> Self {
        Self {
            interval: self.interval.meet(&other.interval),
            string: self.string.meet(&other.string),
            bits: <BitFact as AbstractDomain>::meet(&self.bits, &other.bits),
            path: <PathFact as AbstractDomain>::meet(&self.path, &other.path),
        }
    }

    pub fn widen(&self, other: &Self) -> Self {
        Self {
            interval: self.interval.widen(&other.interval),
            string: self.string.widen(&other.string),
            bits: self.bits.widen(&other.bits),
            path: self.path.widen(&other.path),
        }
    }

    pub fn leq(&self, other: &Self) -> bool {
        self.interval.leq(&other.interval)
            && self.string.leq(&other.string)
            && self.bits.leq(&other.bits)
            && self.path.leq(&other.path)
    }
}

impl Lattice for AbstractValue {
    fn bot() -> Self {
        Self::bottom()
    }

    fn join(&self, other: &Self) -> Self {
        self.join(other)
    }

    fn leq(&self, other: &Self) -> bool {
        self.leq(other)
    }
}

impl AbstractDomain for AbstractValue {
    fn top() -> Self {
        Self::top()
    }

    fn meet(&self, other: &Self) -> Self {
        self.meet(other)
    }

    fn widen(&self, other: &Self) -> Self {
        self.widen(other)
    }
}

// ── AbstractTransfer ────────────────────────────────────────────────────

/// Maximum length of a literal prefix tracked by [`StringTransfer::LiteralPrefix`].
///
/// Caps the on-disk summary size when a callee produces a long known prefix.
/// The interval domain already has a natural bound (two `i64`s); the string
/// side needs an explicit cap so a callee that returns a 10KB constant does
/// not balloon every cross-file summary that references it.
pub const MAX_LITERAL_PREFIX_LEN: usize = 64;

/// Per-parameter interval-to-return transform.
///
/// This is a **bounded** description of how a caller-known interval on one
/// parameter maps to the callee's return interval.  The forms are intentionally
/// restricted so the summary size stays constant regardless of callee body
/// complexity:
///
/// * [`IntervalTransfer::Top`], no interval knowledge crosses (default).
/// * [`IntervalTransfer::Identity`], return = param (pass-through).
/// * [`IntervalTransfer::Affine`], return = param * `mul` + `add` with
///   `i64` constants; overflow defaults to Top at apply time.
/// * [`IntervalTransfer::Clamped`], return is always in `[lo, hi]` regardless
///   of input.  Captures callee-intrinsic bounds (e.g. `saturating` ops).
///
/// No unbounded expression trees, no nesting.  A callee whose behaviour does
/// not fit one of these forms falls back to `Top`, we never try to encode
/// richer algebra in the summary.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum IntervalTransfer {
    #[default]
    Top,
    Identity,
    Affine {
        add: i64,
        mul: i64,
    },
    Clamped {
        lo: i64,
        hi: i64,
    },
}

impl IntervalTransfer {
    /// Apply the transform to a caller-known input interval.
    pub fn apply(&self, input: &IntervalFact) -> IntervalFact {
        match self {
            Self::Top => IntervalFact::top(),
            Self::Identity => input.clone(),
            Self::Affine { add, mul } => input
                .mul(&IntervalFact::exact(*mul))
                .add(&IntervalFact::exact(*add)),
            Self::Clamped { lo, hi } if lo <= hi => IntervalFact {
                lo: Some(*lo),
                hi: Some(*hi),
            },
            Self::Clamped { .. } => IntervalFact::top(),
        }
    }

    /// Join two transforms.  Used when multiple return paths produce
    /// differing transforms for the same parameter: the aggregate is the
    /// widest safe form.
    pub fn join(&self, other: &Self) -> Self {
        use IntervalTransfer::*;
        match (self, other) {
            (Top, _) | (_, Top) => Top,
            (a, b) if a == b => a.clone(),
            (Clamped { lo: a, hi: b }, Clamped { lo: c, hi: d }) => Clamped {
                lo: (*a).min(*c),
                hi: (*b).max(*d),
            },
            // Identity ⊔ anything else = Top (different flow shapes).
            _ => Top,
        }
    }
}

/// Per-parameter string-to-return transform.
///
/// Mirrors [`IntervalTransfer`] for the string subdomain.  Bounded by
/// [`MAX_LITERAL_PREFIX_LEN`] to keep summary size constant.
///
/// * [`StringTransfer::Unknown`], default.
/// * [`StringTransfer::Identity`], return = param.
/// * [`StringTransfer::LiteralPrefix`], return has this literal prefix
///   regardless of input (callee-intrinsic).
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum StringTransfer {
    #[default]
    Unknown,
    Identity,
    LiteralPrefix(String),
}

impl StringTransfer {
    /// Construct a `LiteralPrefix`, truncating to [`MAX_LITERAL_PREFIX_LEN`]
    /// and degrading to `Unknown` on empty input.
    pub fn literal_prefix(s: &str) -> Self {
        if s.is_empty() {
            return Self::Unknown;
        }
        if s.len() <= MAX_LITERAL_PREFIX_LEN {
            Self::LiteralPrefix(s.to_string())
        } else {
            // Truncate on a char boundary to stay valid UTF-8.
            let mut cut = MAX_LITERAL_PREFIX_LEN;
            while cut > 0 && !s.is_char_boundary(cut) {
                cut -= 1;
            }
            if cut == 0 {
                Self::Unknown
            } else {
                Self::LiteralPrefix(s[..cut].to_string())
            }
        }
    }

    /// Apply the transform to a caller-known input string fact.
    pub fn apply(&self, input: &StringFact) -> StringFact {
        match self {
            Self::Unknown => StringFact::top(),
            Self::Identity => input.clone(),
            Self::LiteralPrefix(p) => StringFact::from_prefix(p),
        }
    }

    /// Join two transforms.
    pub fn join(&self, other: &Self) -> Self {
        use StringTransfer::*;
        match (self, other) {
            (Unknown, _) | (_, Unknown) => Unknown,
            (a, b) if a == b => a.clone(),
            (LiteralPrefix(a), LiteralPrefix(b)) => {
                // Longest common prefix.
                let lcp: String = a
                    .chars()
                    .zip(b.chars())
                    .take_while(|(x, y)| x == y)
                    .map(|(x, _)| x)
                    .collect();
                if lcp.is_empty() {
                    Unknown
                } else {
                    Self::literal_prefix(&lcp)
                }
            }
            // Identity vs LiteralPrefix → Unknown (different flow shapes).
            _ => Unknown,
        }
    }
}

/// Per-parameter abstract-domain transfer channel.
///
/// Combines the per-subdomain transforms into one record attached to each
/// parameter in [`crate::summary::ssa_summary::SsaFuncSummary`].  Used at
/// cross-file call sites to synthesise a return abstract value from the
/// caller's knowledge of each argument, without having to re-run the callee.
///
/// Composition rule: `apply(input) = (interval.apply, string.apply,
/// bits=top)`.  The bit domain is always Top, we do not track cross-file
/// bit transfers.
#[derive(Clone, Debug, PartialEq, Eq, Default, Serialize, Deserialize)]
pub struct AbstractTransfer {
    #[serde(default, skip_serializing_if = "is_interval_top")]
    pub interval: IntervalTransfer,
    #[serde(default, skip_serializing_if = "is_string_unknown")]
    pub string: StringTransfer,
}

fn is_interval_top(t: &IntervalTransfer) -> bool {
    matches!(t, IntervalTransfer::Top)
}

fn is_string_unknown(t: &StringTransfer) -> bool {
    matches!(t, StringTransfer::Unknown)
}

impl AbstractTransfer {
    /// Fully-imprecise transfer: no information crosses.  Used as the
    /// conservative default when a parameter's flow does not fit any of the
    /// bounded forms.
    pub fn top() -> Self {
        Self::default()
    }

    /// True when neither subdomain carries any information, equivalent to
    /// "omit this entry entirely".
    pub fn is_top(&self) -> bool {
        is_interval_top(&self.interval) && is_string_unknown(&self.string)
    }

    /// Apply the transform to a caller-known input abstract value.
    pub fn apply(&self, input: &AbstractValue) -> AbstractValue {
        AbstractValue {
            interval: self.interval.apply(&input.interval),
            string: self.string.apply(&input.string),
            bits: BitFact::top(),
            path: PathFact::top(),
        }
    }

    /// Join two transfers component-wise.
    pub fn join(&self, other: &Self) -> Self {
        Self {
            interval: self.interval.join(&other.interval),
            string: self.string.join(&other.string),
        }
    }
}

// ── AbstractState ───────────────────────────────────────────────────────

/// Maximum abstract values tracked per block (performance bound).
const MAX_ABSTRACT_VALUES: usize = 64;

/// Per-block abstract state: sorted map from SsaValue → AbstractValue.
///
/// Values not in the map are implicitly Top (no knowledge). Sorted by
/// SsaValue for O(n) merge-join, matching the pattern used by
/// `SsaTaintState.values`.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct AbstractState {
    values: SmallVec<[(SsaValue, AbstractValue); 8]>,
}

impl AbstractState {
    pub fn empty() -> Self {
        Self {
            values: SmallVec::new(),
        }
    }

    /// Get abstract value for an SSA value. Returns Top if absent.
    pub fn get(&self, v: SsaValue) -> AbstractValue {
        self.values
            .binary_search_by_key(&v, |(id, _)| *id)
            .ok()
            .map(|idx| self.values[idx].1.clone())
            .unwrap_or_else(AbstractValue::top)
    }

    /// Set abstract value for an SSA value. Drops Top values to save space.
    pub fn set(&mut self, v: SsaValue, val: AbstractValue) {
        if val.is_top() {
            // Don't store Top, it's the default
            if let Ok(idx) = self.values.binary_search_by_key(&v, |(id, _)| *id) {
                self.values.remove(idx);
            }
            return;
        }
        match self.values.binary_search_by_key(&v, |(id, _)| *id) {
            Ok(idx) => self.values[idx].1 = val,
            Err(idx) => {
                if self.values.len() < MAX_ABSTRACT_VALUES {
                    self.values.insert(idx, (v, val));
                }
                // Over budget: silently drop (conservative, defaults to Top)
            }
        }
    }

    /// Merge-join two abstract states. Values present in both are joined;
    /// values present in only one side are dropped (absent = Top, join with
    /// Top = Top).
    pub fn join(&self, other: &Self) -> Self {
        let mut result = SmallVec::with_capacity(self.values.len().min(other.values.len()));
        let (mut i, mut j) = (0, 0);

        while i < self.values.len() && j < other.values.len() {
            match self.values[i].0.cmp(&other.values[j].0) {
                std::cmp::Ordering::Less => {
                    // Only in self → join with Top = Top → drop
                    i += 1;
                }
                std::cmp::Ordering::Greater => {
                    // Only in other → drop
                    j += 1;
                }
                std::cmp::Ordering::Equal => {
                    let joined = self.values[i].1.join(&other.values[j].1);
                    if !joined.is_top() {
                        result.push((self.values[i].0, joined));
                    }
                    i += 1;
                    j += 1;
                }
            }
        }

        Self { values: result }
    }

    /// Merge-widen: for values present in both states, apply widening.
    /// Values present in only one side are dropped (Top).
    pub fn widen(&self, other: &Self) -> Self {
        let mut result = SmallVec::with_capacity(self.values.len().min(other.values.len()));
        let (mut i, mut j) = (0, 0);

        while i < self.values.len() && j < other.values.len() {
            match self.values[i].0.cmp(&other.values[j].0) {
                std::cmp::Ordering::Less => {
                    i += 1;
                }
                std::cmp::Ordering::Greater => {
                    j += 1;
                }
                std::cmp::Ordering::Equal => {
                    let widened = self.values[i].1.widen(&other.values[j].1);
                    if !widened.is_top() {
                        result.push((self.values[i].0, widened));
                    }
                    i += 1;
                    j += 1;
                }
            }
        }

        Self { values: result }
    }

    /// Partial order: self ⊑ other.
    pub fn leq(&self, other: &Self) -> bool {
        // Every non-Top entry in self must have a corresponding entry in other
        // with self[v] ⊑ other[v]. Entries only in other are fine (Top ⊑ anything
        // is false, but absent self entries are Top which is handled).
        for (v, val) in &self.values {
            let other_val = other.get(*v);
            if !val.leq(&other_val) {
                return false;
            }
        }
        true
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn abstract_value_top_bottom() {
        assert!(AbstractValue::top().is_top());
        assert!(AbstractValue::bottom().is_bottom());
        assert!(!AbstractValue::top().is_bottom());
        assert!(!AbstractValue::bottom().is_top());
    }

    #[test]
    fn abstract_value_join_componentwise() {
        let a = AbstractValue {
            interval: IntervalFact::exact(1),
            string: StringFact::from_prefix("https://a.com/"),
            bits: BitFact::top(),
            path: PathFact::top(),
        };
        let b = AbstractValue {
            interval: IntervalFact::exact(5),
            string: StringFact::from_prefix("https://b.com/"),
            bits: BitFact::top(),
            path: PathFact::top(),
        };
        let j = a.join(&b);
        assert_eq!(j.interval.lo, Some(1));
        assert_eq!(j.interval.hi, Some(5));
        assert_eq!(j.string.prefix.as_deref(), Some("https://"));
    }

    #[test]
    fn abstract_value_widen_componentwise() {
        let old = AbstractValue {
            interval: IntervalFact {
                lo: Some(0),
                hi: Some(5),
            },
            string: StringFact::from_prefix("hello"),
            bits: BitFact::top(),
            path: PathFact::top(),
        };
        let new = AbstractValue {
            interval: IntervalFact {
                lo: Some(0),
                hi: Some(10),
            },
            string: StringFact::from_prefix("hello"),
            bits: BitFact::top(),
            path: PathFact::top(),
        };
        let w = old.widen(&new);
        assert_eq!(w.interval.lo, Some(0)); // stable
        assert_eq!(w.interval.hi, None); // grew → widened
        assert_eq!(w.string.prefix.as_deref(), Some("hello")); // stable
    }

    #[test]
    fn abstract_state_get_default_top() {
        let state = AbstractState::empty();
        assert!(state.get(SsaValue(42)).is_top());
    }

    #[test]
    fn abstract_state_set_get() {
        let mut state = AbstractState::empty();
        let val = AbstractValue {
            interval: IntervalFact::exact(10),
            string: StringFact::top(),
            bits: BitFact::top(),
            path: PathFact::top(),
        };
        state.set(SsaValue(1), val.clone());
        assert_eq!(state.get(SsaValue(1)), val);
    }

    #[test]
    fn abstract_state_set_top_removes() {
        let mut state = AbstractState::empty();
        state.set(
            SsaValue(1),
            AbstractValue {
                interval: IntervalFact::exact(5),
                string: StringFact::top(),
                bits: BitFact::top(),
                path: PathFact::top(),
            },
        );
        assert!(!state.get(SsaValue(1)).is_top());
        state.set(SsaValue(1), AbstractValue::top());
        assert!(state.get(SsaValue(1)).is_top());
        assert!(state.values.is_empty());
    }

    #[test]
    fn abstract_state_join() {
        let mut a = AbstractState::empty();
        a.set(
            SsaValue(1),
            AbstractValue {
                interval: IntervalFact::exact(3),
                string: StringFact::top(),
                bits: BitFact::top(),
                path: PathFact::top(),
            },
        );
        a.set(
            SsaValue(2),
            AbstractValue {
                interval: IntervalFact::exact(10),
                string: StringFact::top(),
                bits: BitFact::top(),
                path: PathFact::top(),
            },
        );

        let mut b = AbstractState::empty();
        b.set(
            SsaValue(1),
            AbstractValue {
                interval: IntervalFact::exact(7),
                string: StringFact::top(),
                bits: BitFact::top(),
                path: PathFact::top(),
            },
        );
        // SsaValue(2) not in b → join drops it (Top)

        let j = a.join(&b);
        // SsaValue(1): join [3,3] and [7,7] = [3,7]
        let v1 = j.get(SsaValue(1));
        assert_eq!(v1.interval.lo, Some(3));
        assert_eq!(v1.interval.hi, Some(7));
        // SsaValue(2): only in a → dropped to Top
        assert!(j.get(SsaValue(2)).is_top());
    }

    #[test]
    fn abstract_state_widen() {
        let mut old = AbstractState::empty();
        old.set(
            SsaValue(1),
            AbstractValue {
                interval: IntervalFact {
                    lo: Some(0),
                    hi: Some(5),
                },
                string: StringFact::top(),
                bits: BitFact::top(),
                path: PathFact::top(),
            },
        );

        let mut new = AbstractState::empty();
        new.set(
            SsaValue(1),
            AbstractValue {
                interval: IntervalFact {
                    lo: Some(0),
                    hi: Some(10),
                },
                string: StringFact::top(),
                bits: BitFact::top(),
                path: PathFact::top(),
            },
        );

        let w = old.widen(&new);
        let v1 = w.get(SsaValue(1));
        assert_eq!(v1.interval.lo, Some(0)); // stable
        assert_eq!(v1.interval.hi, None); // grew → widened
    }

    #[test]
    fn loop_carried_phi_join_and_widen() {
        // Simulate: x = 0; loop { x = phi(0, x+1) }
        // Iteration 1: join([0,0], [1,1]) = [0,1]
        let init = IntervalFact::exact(0);
        let inc1 = IntervalFact::exact(1);
        let phi1 = init.join(&inc1);
        assert_eq!(phi1.lo, Some(0));
        assert_eq!(phi1.hi, Some(1));

        // Iteration 2: join([0,1], [1,2]) = [0,2]
        let inc2 = IntervalFact {
            lo: Some(1),
            hi: Some(2),
        };
        let phi2 = phi1.join(&inc2);
        assert_eq!(phi2.lo, Some(0));
        assert_eq!(phi2.hi, Some(2));

        // Widen: [0,1] vs [0,2] → upper bound grew → [0, None]
        let widened = phi1.widen(&phi2);
        assert_eq!(widened.lo, Some(0));
        assert_eq!(widened.hi, None);

        // Iteration 3: join([0,None], [1,None]) = [0,None] (stable!)
        let inc3 = IntervalFact {
            lo: Some(1),
            hi: None,
        };
        let phi3 = widened.join(&inc3);
        assert_eq!(phi3.lo, Some(0));
        assert_eq!(phi3.hi, None);
        assert_eq!(phi3, widened); // converged
    }
}