mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
272 lines
12 KiB
Rust
272 lines
12 KiB
Rust
//! Analysis-engine options: stable, serializable toggles that control which
|
||
//! analysis passes run inside the scanner.
|
||
//!
|
||
//! These are the release-grade knobs that used to live as ad-hoc `NYX_*`
|
||
//! environment variables (`NYX_CONSTRAINT`, `NYX_ABSTRACT_INTERP`, `NYX_SYMEX`,
|
||
//! `NYX_CROSS_FILE_SYMEX`, `NYX_SYMEX_INTERPROC`, `NYX_CONTEXT_SENSITIVE`,
|
||
//! `NYX_PARSE_TIMEOUT_MS`, `NYX_SMT`). They are now a single struct loaded
|
||
//! from the `[analysis.engine]` section of `nyx.conf` and overridable by CLI
|
||
//! flags.
|
||
//!
|
||
//! Engine code calls [`current`] to read the active options. Before a scan
|
||
//! begins, the CLI entry point installs a resolved [`AnalysisOptions`] via
|
||
//! [`install`]. Library consumers that never call `install` get
|
||
//! [`AnalysisOptions::default`], which is the documented release default.
|
||
//!
|
||
//! The legacy `NYX_*` variables still read **only** when no runtime has been
|
||
//! installed and serve as a last-resort override for library users; running
|
||
//! the `nyx` binary always goes through the configured runtime.
|
||
|
||
use serde::{Deserialize, Serialize};
|
||
use std::sync::RwLock;
|
||
|
||
/// Default parse timeout (milliseconds). See [`AnalysisOptions::parse_timeout_ms`].
|
||
pub const DEFAULT_PARSE_TIMEOUT_MS: u64 = 10_000;
|
||
|
||
/// Default upper bound on the number of taint origins tracked per lattice
|
||
/// value. Raised from the historical `4` to `32` so realistic codebases
|
||
/// with wide joins (many param sources, deep helper chains) no longer
|
||
/// silently drop origin attribution. Tunable via
|
||
/// [`AnalysisOptions::max_origins`], see
|
||
/// `src/taint/ssa_transfer/state.rs::effective_max_origins`.
|
||
pub const DEFAULT_MAX_ORIGINS: u32 = 32;
|
||
|
||
/// Minimum permitted `max_origins` value. A cap of `0` would make origin
|
||
/// tracking impossible (every merge would truncate); the test override
|
||
/// still accepts `0` through its own path, but runtime config clamps to
|
||
/// at least `1` so production scans always carry *some* provenance.
|
||
pub const MIN_MAX_ORIGINS: u32 = 1;
|
||
|
||
/// Default upper bound on the number of abstract heap objects tracked per
|
||
/// intra-procedural points-to set. Set to `32`, high enough that
|
||
/// realistic factory/builder/DI patterns (routine 10–30 allocation sites
|
||
/// aliased into one variable) stay precise, low enough to keep
|
||
/// `HeapState` join/clone cost bounded in the worklist. Tunable via
|
||
/// [`AnalysisOptions::max_pointsto`], see
|
||
/// `src/ssa/heap.rs::effective_max_pointsto`.
|
||
pub const DEFAULT_MAX_POINTSTO: u32 = 32;
|
||
|
||
/// Minimum permitted `max_pointsto` value. A cap of `0` would make
|
||
/// points-to tracking impossible; runtime config clamps to at least `1`.
|
||
pub const MIN_MAX_POINTSTO: u32 = 1;
|
||
|
||
/// Options for the symbolic-execution pipeline.
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||
#[serde(default)]
|
||
pub struct SymexOptions {
|
||
/// Run the symex pass at all. When `false`, findings get no
|
||
/// `symbolic` verdict and cross-file body extraction is skipped.
|
||
pub enabled: bool,
|
||
/// Persist and consult cross-file SSA bodies so symex can model
|
||
/// callees defined in other files.
|
||
pub cross_file: bool,
|
||
/// Dive into intra-file callee bodies during symex (k ≥ 2 via the
|
||
/// interprocedural frame stack).
|
||
pub interprocedural: bool,
|
||
/// Use the SMT backend when available. Only meaningful when nyx is
|
||
/// compiled with the `smt` feature; silently ignored otherwise.
|
||
pub smt: bool,
|
||
}
|
||
|
||
impl Default for SymexOptions {
|
||
fn default() -> Self {
|
||
Self {
|
||
enabled: true,
|
||
cross_file: true,
|
||
interprocedural: true,
|
||
smt: true,
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Stable configuration for the analysis engine.
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||
#[serde(default)]
|
||
pub struct AnalysisOptions {
|
||
/// Path-constraint solving. Prunes infeasible paths from the taint
|
||
/// worklist and records unsat contexts in findings.
|
||
pub constraint_solving: bool,
|
||
/// Abstract interpretation: interval/string/bit domains carried through
|
||
/// the SSA worklist and used to suppress provably safe sinks.
|
||
pub abstract_interpretation: bool,
|
||
/// k=1 context-sensitive inlining for intra-file callees.
|
||
pub context_sensitive: bool,
|
||
/// Symbolic-execution pipeline.
|
||
pub symex: SymexOptions,
|
||
/// Demand-driven backwards taint analysis from sinks.
|
||
///
|
||
/// When enabled, after forward pass 2 completes, a backwards walk runs
|
||
/// from each sink's tainted SSA operands to corroborate or rule out the
|
||
/// forward finding. Corroborated findings get a `backwards-confirmed`
|
||
/// note; flows the backward walk proves infeasible get a
|
||
/// `backwards-infeasible` note that caps confidence. Defaults off.
|
||
pub backwards_analysis: bool,
|
||
/// Per-file tree-sitter parse timeout in milliseconds. `0` disables the
|
||
/// cap entirely (not recommended outside of controlled benchmarks).
|
||
pub parse_timeout_ms: u64,
|
||
/// Maximum taint origins retained per lattice value.
|
||
///
|
||
/// Controls both [`crate::taint::domain::VarTaint::origins`] and
|
||
/// the equivalent per-object bound inside the heap state. When a
|
||
/// merge would exceed this bound, origins are dropped deterministically
|
||
/// (sorted by source location) and an
|
||
/// [`crate::engine_notes::EngineNote::OriginsTruncated`] note is
|
||
/// recorded on the affected finding. Raising this reduces the
|
||
/// chance of silent under-reporting at the cost of slightly wider
|
||
/// lattice values. See [`DEFAULT_MAX_ORIGINS`].
|
||
pub max_origins: u32,
|
||
/// Maximum abstract heap objects retained per intra-procedural
|
||
/// points-to set.
|
||
///
|
||
/// When an allocation-site union would exceed this bound, the
|
||
/// largest-keyed heap objects are dropped and an
|
||
/// [`crate::engine_notes::EngineNote::PointsToTruncated`] note is
|
||
/// recorded. Taint flows that should have reached the dropped
|
||
/// objects via this aliasing path are lost (under-report). Raise
|
||
/// for factory-heavy codebases where truncation is observed; lower
|
||
/// only when points-to width is a measured bottleneck. See
|
||
/// [`DEFAULT_MAX_POINTSTO`].
|
||
pub max_pointsto: u32,
|
||
}
|
||
|
||
impl Default for AnalysisOptions {
|
||
fn default() -> Self {
|
||
Self {
|
||
constraint_solving: true,
|
||
abstract_interpretation: true,
|
||
context_sensitive: true,
|
||
symex: SymexOptions::default(),
|
||
backwards_analysis: false,
|
||
parse_timeout_ms: DEFAULT_PARSE_TIMEOUT_MS,
|
||
max_origins: DEFAULT_MAX_ORIGINS,
|
||
max_pointsto: DEFAULT_MAX_POINTSTO,
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Process-wide installed options. Accessors fall back to
|
||
/// [`AnalysisOptions::default`] (with env-var overrides for backward
|
||
/// compatibility) until a caller installs a value.
|
||
///
|
||
/// A `RwLock` is used rather than a `OnceLock` so that long-lived callers
|
||
/// (notably `nyx serve`, which resolves the engine profile per scan
|
||
/// request) can replace the installed options between scans via
|
||
/// [`reinstall`]. Within a single scan run, engine toggles must not
|
||
/// change mid-flight, the caller is responsible for that invariant
|
||
/// (`JobManager`'s single-scan guarantee provides it in the server).
|
||
static RUNTIME: RwLock<Option<AnalysisOptions>> = RwLock::new(None);
|
||
|
||
/// Install the process-wide analysis options, first-wins. Subsequent
|
||
/// calls are a no-op and return `false`, matching the semantics the CLI
|
||
/// entry point relies on (one install per process lifetime for non-serve
|
||
/// commands). Servers that resolve options per request should use
|
||
/// [`reinstall`] instead.
|
||
pub fn install(opts: AnalysisOptions) -> bool {
|
||
let mut guard = RUNTIME.write().expect("analysis options RwLock poisoned");
|
||
if guard.is_some() {
|
||
return false;
|
||
}
|
||
*guard = Some(opts);
|
||
true
|
||
}
|
||
|
||
/// Replace the installed options unconditionally. Intended for the HTTP
|
||
/// server's scan thread, which re-resolves the engine profile from each
|
||
/// incoming request; `install`'s first-wins semantics would otherwise
|
||
/// pin the first scan's choice for the lifetime of the server. Callers
|
||
/// must ensure no scan is concurrently reading `current()`, in practice
|
||
/// this means calling `reinstall` before the scan's rayon pool starts.
|
||
pub fn reinstall(opts: AnalysisOptions) {
|
||
*RUNTIME.write().expect("analysis options RwLock poisoned") = Some(opts);
|
||
}
|
||
|
||
/// Read the active options. Returns the installed runtime when present,
|
||
/// otherwise defaults merged with env-var fallbacks (legacy path).
|
||
pub fn current() -> AnalysisOptions {
|
||
if let Some(rt) = *RUNTIME.read().expect("analysis options RwLock poisoned") {
|
||
return rt;
|
||
}
|
||
// Legacy env-var fallback: applies only when no runtime has been
|
||
// installed (primarily for library consumers and old tests). Logged
|
||
// at debug level so CI/test output isn't spammed.
|
||
AnalysisOptions {
|
||
constraint_solving: env_bool_default("NYX_CONSTRAINT", true),
|
||
abstract_interpretation: env_bool_default("NYX_ABSTRACT_INTERP", true),
|
||
context_sensitive: env_bool_default("NYX_CONTEXT_SENSITIVE", true),
|
||
symex: SymexOptions {
|
||
enabled: env_bool_default("NYX_SYMEX", true),
|
||
cross_file: env_bool_default("NYX_CROSS_FILE_SYMEX", true),
|
||
interprocedural: env_bool_default("NYX_SYMEX_INTERPROC", true),
|
||
smt: env_bool_default("NYX_SMT", true),
|
||
},
|
||
backwards_analysis: env_bool_default("NYX_BACKWARDS", false),
|
||
parse_timeout_ms: env_u64_default("NYX_PARSE_TIMEOUT_MS", DEFAULT_PARSE_TIMEOUT_MS),
|
||
max_origins: env_u32_default("NYX_MAX_ORIGINS", DEFAULT_MAX_ORIGINS).max(MIN_MAX_ORIGINS),
|
||
max_pointsto: env_u32_default("NYX_MAX_POINTSTO", DEFAULT_MAX_POINTSTO)
|
||
.max(MIN_MAX_POINTSTO),
|
||
}
|
||
}
|
||
|
||
fn env_bool_default(key: &str, default: bool) -> bool {
|
||
match std::env::var(key) {
|
||
Ok(v) => !(v == "0" || v.eq_ignore_ascii_case("false")),
|
||
Err(_) => default,
|
||
}
|
||
}
|
||
|
||
fn env_u64_default(key: &str, default: u64) -> u64 {
|
||
match std::env::var(key) {
|
||
Ok(v) => v.parse::<u64>().unwrap_or(default),
|
||
Err(_) => default,
|
||
}
|
||
}
|
||
|
||
fn env_u32_default(key: &str, default: u32) -> u32 {
|
||
match std::env::var(key) {
|
||
Ok(v) => v.parse::<u32>().unwrap_or(default),
|
||
Err(_) => default,
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn defaults_match_documented() {
|
||
let opts = AnalysisOptions::default();
|
||
assert!(opts.constraint_solving);
|
||
assert!(opts.abstract_interpretation);
|
||
assert!(opts.context_sensitive);
|
||
assert!(opts.symex.enabled);
|
||
assert!(opts.symex.cross_file);
|
||
assert!(opts.symex.interprocedural);
|
||
assert!(opts.symex.smt);
|
||
assert!(!opts.backwards_analysis, "backwards analysis defaults off");
|
||
assert_eq!(opts.parse_timeout_ms, DEFAULT_PARSE_TIMEOUT_MS);
|
||
assert_eq!(opts.max_origins, DEFAULT_MAX_ORIGINS);
|
||
assert_eq!(opts.max_pointsto, DEFAULT_MAX_POINTSTO);
|
||
}
|
||
|
||
#[test]
|
||
fn toml_roundtrip() {
|
||
let opts = AnalysisOptions {
|
||
constraint_solving: false,
|
||
abstract_interpretation: true,
|
||
context_sensitive: false,
|
||
symex: SymexOptions {
|
||
enabled: true,
|
||
cross_file: false,
|
||
interprocedural: true,
|
||
smt: false,
|
||
},
|
||
backwards_analysis: true,
|
||
parse_timeout_ms: 5_000,
|
||
max_origins: 64,
|
||
max_pointsto: 48,
|
||
};
|
||
let s = toml::to_string(&opts).unwrap();
|
||
let back: AnalysisOptions = toml::from_str(&s).unwrap();
|
||
assert_eq!(opts, back);
|
||
}
|
||
}
|