//! Analysis-engine options: stable, serializable toggles that control which //! analysis passes run inside the scanner. //! //! These are the release-grade knobs that used to live as ad-hoc `NYX_*` //! environment variables (`NYX_CONSTRAINT`, `NYX_ABSTRACT_INTERP`, `NYX_SYMEX`, //! `NYX_CROSS_FILE_SYMEX`, `NYX_SYMEX_INTERPROC`, `NYX_CONTEXT_SENSITIVE`, //! `NYX_PARSE_TIMEOUT_MS`, `NYX_SMT`). They are now a single struct loaded //! from the `[analysis.engine]` section of `nyx.conf` and overridable by CLI //! flags. //! //! Engine code calls [`current`] to read the active options. Before a scan //! begins, the CLI entry point installs a resolved [`AnalysisOptions`] via //! [`install`]. Library consumers that never call `install` get //! [`AnalysisOptions::default`], which is the documented release default. //! //! The legacy `NYX_*` variables still read **only** when no runtime has been //! installed and serve as a last-resort override for library users; running //! the `nyx` binary always goes through the configured runtime. use serde::{Deserialize, Serialize}; use std::sync::RwLock; /// Default parse timeout (milliseconds). See [`AnalysisOptions::parse_timeout_ms`]. pub const DEFAULT_PARSE_TIMEOUT_MS: u64 = 10_000; /// Default upper bound on the number of taint origins tracked per lattice /// value. Raised from the historical `4` to `32` so realistic codebases /// with wide joins (many param sources, deep helper chains) no longer /// silently drop origin attribution. Tunable via /// [`AnalysisOptions::max_origins`], see /// `src/taint/ssa_transfer/state.rs::effective_max_origins`. pub const DEFAULT_MAX_ORIGINS: u32 = 32; /// Minimum permitted `max_origins` value. A cap of `0` would make origin /// tracking impossible (every merge would truncate); the test override /// still accepts `0` through its own path, but runtime config clamps to /// at least `1` so production scans always carry *some* provenance. pub const MIN_MAX_ORIGINS: u32 = 1; /// Default upper bound on the number of abstract heap objects tracked per /// intra-procedural points-to set. Set to `32`, high enough that /// realistic factory/builder/DI patterns (routine 10–30 allocation sites /// aliased into one variable) stay precise, low enough to keep /// `HeapState` join/clone cost bounded in the worklist. Tunable via /// [`AnalysisOptions::max_pointsto`], see /// `src/ssa/heap.rs::effective_max_pointsto`. pub const DEFAULT_MAX_POINTSTO: u32 = 32; /// Minimum permitted `max_pointsto` value. A cap of `0` would make /// points-to tracking impossible; runtime config clamps to at least `1`. pub const MIN_MAX_POINTSTO: u32 = 1; /// Options for the symbolic-execution pipeline. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(default)] pub struct SymexOptions { /// Run the symex pass at all. When `false`, findings get no /// `symbolic` verdict and cross-file body extraction is skipped. pub enabled: bool, /// Persist and consult cross-file SSA bodies so symex can model /// callees defined in other files. pub cross_file: bool, /// Dive into intra-file callee bodies during symex (k ≥ 2 via the /// interprocedural frame stack). pub interprocedural: bool, /// Use the SMT backend when available. Only meaningful when nyx is /// compiled with the `smt` feature; silently ignored otherwise. pub smt: bool, } impl Default for SymexOptions { fn default() -> Self { Self { enabled: true, cross_file: true, interprocedural: true, smt: true, } } } /// Stable configuration for the analysis engine. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(default)] pub struct AnalysisOptions { /// Path-constraint solving. Prunes infeasible paths from the taint /// worklist and records unsat contexts in findings. pub constraint_solving: bool, /// Abstract interpretation: interval/string/bit domains carried through /// the SSA worklist and used to suppress provably safe sinks. pub abstract_interpretation: bool, /// k=1 context-sensitive inlining for intra-file callees. pub context_sensitive: bool, /// Symbolic-execution pipeline. pub symex: SymexOptions, /// Demand-driven backwards taint analysis from sinks. /// /// When enabled, after forward pass 2 completes, a backwards walk runs /// from each sink's tainted SSA operands to corroborate or rule out the /// forward finding. Corroborated findings get a `backwards-confirmed` /// note; flows the backward walk proves infeasible get a /// `backwards-infeasible` note that caps confidence. Defaults off. pub backwards_analysis: bool, /// Per-file tree-sitter parse timeout in milliseconds. `0` disables the /// cap entirely (not recommended outside of controlled benchmarks). pub parse_timeout_ms: u64, /// Maximum taint origins retained per lattice value. /// /// Controls both [`crate::taint::domain::VarTaint::origins`] and /// the equivalent per-object bound inside the heap state. When a /// merge would exceed this bound, origins are dropped deterministically /// (sorted by source location) and an /// [`crate::engine_notes::EngineNote::OriginsTruncated`] note is /// recorded on the affected finding. Raising this reduces the /// chance of silent under-reporting at the cost of slightly wider /// lattice values. See [`DEFAULT_MAX_ORIGINS`]. pub max_origins: u32, /// Maximum abstract heap objects retained per intra-procedural /// points-to set. /// /// When an allocation-site union would exceed this bound, the /// largest-keyed heap objects are dropped and an /// [`crate::engine_notes::EngineNote::PointsToTruncated`] note is /// recorded. Taint flows that should have reached the dropped /// objects via this aliasing path are lost (under-report). Raise /// for factory-heavy codebases where truncation is observed; lower /// only when points-to width is a measured bottleneck. See /// [`DEFAULT_MAX_POINTSTO`]. pub max_pointsto: u32, } impl Default for AnalysisOptions { fn default() -> Self { Self { constraint_solving: true, abstract_interpretation: true, context_sensitive: true, symex: SymexOptions::default(), backwards_analysis: false, parse_timeout_ms: DEFAULT_PARSE_TIMEOUT_MS, max_origins: DEFAULT_MAX_ORIGINS, max_pointsto: DEFAULT_MAX_POINTSTO, } } } /// Process-wide installed options. Accessors fall back to /// [`AnalysisOptions::default`] (with env-var overrides for backward /// compatibility) until a caller installs a value. /// /// A `RwLock` is used rather than a `OnceLock` so that long-lived callers /// (notably `nyx serve`, which resolves the engine profile per scan /// request) can replace the installed options between scans via /// [`reinstall`]. Within a single scan run, engine toggles must not /// change mid-flight, the caller is responsible for that invariant /// (`JobManager`'s single-scan guarantee provides it in the server). static RUNTIME: RwLock> = RwLock::new(None); /// Install the process-wide analysis options, first-wins. Subsequent /// calls are a no-op and return `false`, matching the semantics the CLI /// entry point relies on (one install per process lifetime for non-serve /// commands). Servers that resolve options per request should use /// [`reinstall`] instead. pub fn install(opts: AnalysisOptions) -> bool { let mut guard = RUNTIME.write().expect("analysis options RwLock poisoned"); if guard.is_some() { return false; } *guard = Some(opts); true } /// Replace the installed options unconditionally. Intended for the HTTP /// server's scan thread, which re-resolves the engine profile from each /// incoming request; `install`'s first-wins semantics would otherwise /// pin the first scan's choice for the lifetime of the server. Callers /// must ensure no scan is concurrently reading `current()`, in practice /// this means calling `reinstall` before the scan's rayon pool starts. pub fn reinstall(opts: AnalysisOptions) { *RUNTIME.write().expect("analysis options RwLock poisoned") = Some(opts); } /// Read the active options. Returns the installed runtime when present, /// otherwise defaults merged with env-var fallbacks (legacy path). pub fn current() -> AnalysisOptions { if let Some(rt) = *RUNTIME.read().expect("analysis options RwLock poisoned") { return rt; } // Legacy env-var fallback: applies only when no runtime has been // installed (primarily for library consumers and old tests). Logged // at debug level so CI/test output isn't spammed. AnalysisOptions { constraint_solving: env_bool_default("NYX_CONSTRAINT", true), abstract_interpretation: env_bool_default("NYX_ABSTRACT_INTERP", true), context_sensitive: env_bool_default("NYX_CONTEXT_SENSITIVE", true), symex: SymexOptions { enabled: env_bool_default("NYX_SYMEX", true), cross_file: env_bool_default("NYX_CROSS_FILE_SYMEX", true), interprocedural: env_bool_default("NYX_SYMEX_INTERPROC", true), smt: env_bool_default("NYX_SMT", true), }, backwards_analysis: env_bool_default("NYX_BACKWARDS", false), parse_timeout_ms: env_u64_default("NYX_PARSE_TIMEOUT_MS", DEFAULT_PARSE_TIMEOUT_MS), max_origins: env_u32_default("NYX_MAX_ORIGINS", DEFAULT_MAX_ORIGINS).max(MIN_MAX_ORIGINS), max_pointsto: env_u32_default("NYX_MAX_POINTSTO", DEFAULT_MAX_POINTSTO) .max(MIN_MAX_POINTSTO), } } fn env_bool_default(key: &str, default: bool) -> bool { match std::env::var(key) { Ok(v) => !(v == "0" || v.eq_ignore_ascii_case("false")), Err(_) => default, } } fn env_u64_default(key: &str, default: u64) -> u64 { match std::env::var(key) { Ok(v) => v.parse::().unwrap_or(default), Err(_) => default, } } fn env_u32_default(key: &str, default: u32) -> u32 { match std::env::var(key) { Ok(v) => v.parse::().unwrap_or(default), Err(_) => default, } } #[cfg(test)] mod tests { use super::*; #[test] fn defaults_match_documented() { let opts = AnalysisOptions::default(); assert!(opts.constraint_solving); assert!(opts.abstract_interpretation); assert!(opts.context_sensitive); assert!(opts.symex.enabled); assert!(opts.symex.cross_file); assert!(opts.symex.interprocedural); assert!(opts.symex.smt); assert!(!opts.backwards_analysis, "backwards analysis defaults off"); assert_eq!(opts.parse_timeout_ms, DEFAULT_PARSE_TIMEOUT_MS); assert_eq!(opts.max_origins, DEFAULT_MAX_ORIGINS); assert_eq!(opts.max_pointsto, DEFAULT_MAX_POINTSTO); } #[test] fn toml_roundtrip() { let opts = AnalysisOptions { constraint_solving: false, abstract_interpretation: true, context_sensitive: false, symex: SymexOptions { enabled: true, cross_file: false, interprocedural: true, smt: false, }, backwards_analysis: true, parse_timeout_ms: 5_000, max_origins: 64, max_pointsto: 48, }; let s = toml::to_string(&opts).unwrap(); let back: AnalysisOptions = toml::from_str(&s).unwrap(); assert_eq!(opts, back); } }