mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
* chore: Exclude CLAUDE.md from Cargo.toml * feat: add callgraph module and integrate into main analysis flow * feat: enhance CLI with new severity filtering and analysis modes * feat: update CHANGELOG with recent enhancements and fixes to severity filtering and output handling * feat: implement state-model dataflow analysis for resource lifecycle and auth state * feat: enhance diagnostic output formatting and add evidence structure * feat: implement attack surface ranking for diagnostics with scoring and sorting * feat: add comprehensive documentation for installation, usage, and rules reference * feat: add multiple language support for command execution and evaluation endpoints * feat: implement inline suppression for findings using `nyx:ignore` comments * feat: add confidence levels to AST patterns and update output structure * feat: implement low-noise prioritization system with category filtering, rollup grouping, and configurable budgets * feat: bump version to 0.4.0 and update changelog with new features and improvements * feat: add dead code allowances to various functions in mod.rs and real_world_tests.rs
573 lines
18 KiB
Rust
573 lines
18 KiB
Rust
use crate::errors::NyxResult;
|
||
use crate::patterns::Severity;
|
||
use console::style;
|
||
use serde::{Deserialize, Serialize};
|
||
use std::collections::HashMap;
|
||
use std::fs;
|
||
use std::path::Path;
|
||
use toml;
|
||
|
||
static DEFAULT_CONFIG_TOML: &str = include_str!("../../default-nyx.conf");
|
||
|
||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)]
|
||
#[serde(rename_all = "lowercase")]
|
||
pub enum AnalysisMode {
|
||
#[default]
|
||
Full,
|
||
Ast,
|
||
Taint,
|
||
}
|
||
|
||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||
#[serde(default)]
|
||
pub struct ScannerConfig {
|
||
/// The analysis mode to use.
|
||
pub mode: AnalysisMode,
|
||
|
||
/// The minimum severity level to output
|
||
pub min_severity: Severity,
|
||
|
||
/// The maximum file size to scan, in megabytes.
|
||
pub max_file_size_mb: Option<u64>,
|
||
|
||
/// File extensions to exclude from scanning.
|
||
pub excluded_extensions: Vec<String>,
|
||
|
||
/// Directories to exclude from scanning.
|
||
pub excluded_directories: Vec<String>,
|
||
|
||
/// Excluded files
|
||
pub excluded_files: Vec<String>,
|
||
|
||
/// Whether to respect the global ignore file or not.
|
||
pub read_global_ignore: bool,
|
||
|
||
/// Whether to respect VCS ignore files (`.gitignore`, ..) or not.
|
||
pub read_vcsignore: bool,
|
||
|
||
/// Whether to require a `.git` directory to respect gitignore files.
|
||
pub require_git_to_read_vcsignore: bool,
|
||
|
||
/// Whether to limit the search to starting file system or not.
|
||
pub one_file_system: bool,
|
||
|
||
/// Whether to follow symlinks or not.
|
||
pub follow_symlinks: bool,
|
||
|
||
/// Whether to scan hidden files or not.
|
||
pub scan_hidden_files: bool,
|
||
|
||
/// Whether to include findings from non-production paths (tests, vendor,
|
||
/// benchmarks, etc.) at their original severity. When false (default),
|
||
/// findings in these paths are downgraded by one severity tier.
|
||
pub include_nonprod: bool,
|
||
|
||
/// Enable the state-model dataflow engine for resource lifecycle and
|
||
/// auth-state analysis. Default: false (opt-in).
|
||
pub enable_state_analysis: bool,
|
||
}
|
||
impl Default for ScannerConfig {
|
||
fn default() -> Self {
|
||
Self {
|
||
mode: AnalysisMode::Full,
|
||
min_severity: Severity::Low,
|
||
max_file_size_mb: None,
|
||
excluded_extensions: vec![
|
||
"jpg", "png", "gif", "mp4", "avi", "mkv", "zip", "tar", "gz", "exe", "dll", "so",
|
||
]
|
||
.into_iter()
|
||
.map(str::to_owned)
|
||
.collect(),
|
||
excluded_directories: vec![
|
||
"node_modules",
|
||
".git",
|
||
"target",
|
||
".vscode",
|
||
".idea",
|
||
"build",
|
||
"dist",
|
||
]
|
||
.into_iter()
|
||
.map(str::to_owned)
|
||
.collect(),
|
||
excluded_files: vec![].into_iter().map(str::to_owned).collect(),
|
||
read_global_ignore: false,
|
||
read_vcsignore: true,
|
||
require_git_to_read_vcsignore: true,
|
||
one_file_system: false,
|
||
follow_symlinks: false,
|
||
scan_hidden_files: false,
|
||
include_nonprod: false,
|
||
enable_state_analysis: false,
|
||
}
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||
#[serde(default)]
|
||
pub struct DatabaseConfig {
|
||
/// Custom path for database
|
||
pub path: String,
|
||
|
||
/// The number of days to keep database files for. TODO: IMPLEMENT
|
||
pub auto_cleanup_days: u32,
|
||
|
||
/// The maximum size of the database, in megabytes. TODO: IMPLEMENT
|
||
pub max_db_size_mb: u64,
|
||
|
||
/// Whether to run a VACUUM on startup or not.
|
||
pub vacuum_on_startup: bool,
|
||
}
|
||
impl Default for DatabaseConfig {
|
||
fn default() -> Self {
|
||
Self {
|
||
path: String::from(""),
|
||
auto_cleanup_days: 30,
|
||
max_db_size_mb: 1024,
|
||
vacuum_on_startup: false,
|
||
}
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||
#[serde(default)]
|
||
pub struct OutputConfig {
|
||
/// The default output format.
|
||
pub default_format: String,
|
||
|
||
/// Whether to print anything to the console or not.
|
||
pub quiet: bool,
|
||
|
||
/// The maximum number of results to show.
|
||
pub max_results: Option<u32>,
|
||
|
||
/// Enable attack-surface ranking to sort findings by exploitability.
|
||
pub attack_surface_ranking: bool,
|
||
|
||
/// Minimum attack-surface score to include in output.
|
||
/// Findings below this threshold are dropped after ranking.
|
||
/// `None` means no minimum (all findings shown).
|
||
pub min_score: Option<u32>,
|
||
|
||
/// Minimum confidence level to include in output.
|
||
/// `None` means no minimum (all findings shown).
|
||
#[serde(
|
||
default,
|
||
skip_serializing_if = "Option::is_none",
|
||
deserialize_with = "deserialize_confidence_opt"
|
||
)]
|
||
pub min_confidence: Option<crate::evidence::Confidence>,
|
||
|
||
/// Include Quality-category findings (excluded by default).
|
||
#[serde(default)]
|
||
pub include_quality: bool,
|
||
|
||
/// Show all findings: disables category filtering, rollups, and LOW budgets.
|
||
#[serde(default)]
|
||
pub show_all: bool,
|
||
|
||
/// Maximum total LOW findings to show.
|
||
#[serde(default = "default_max_low")]
|
||
pub max_low: u32,
|
||
|
||
/// Maximum LOW findings per file.
|
||
#[serde(default = "default_max_low_per_file")]
|
||
pub max_low_per_file: u32,
|
||
|
||
/// Maximum LOW findings per rule.
|
||
#[serde(default = "default_max_low_per_rule")]
|
||
pub max_low_per_rule: u32,
|
||
|
||
/// Number of example locations to store in rollup findings.
|
||
#[serde(default = "default_rollup_examples")]
|
||
pub rollup_examples: u32,
|
||
}
|
||
|
||
fn default_max_low() -> u32 {
|
||
20
|
||
}
|
||
fn default_max_low_per_file() -> u32 {
|
||
1
|
||
}
|
||
fn default_max_low_per_rule() -> u32 {
|
||
10
|
||
}
|
||
fn default_rollup_examples() -> u32 {
|
||
5
|
||
}
|
||
|
||
impl Default for OutputConfig {
|
||
fn default() -> Self {
|
||
Self {
|
||
default_format: "console".into(),
|
||
quiet: false,
|
||
max_results: None,
|
||
attack_surface_ranking: true,
|
||
min_score: None,
|
||
min_confidence: None,
|
||
include_quality: false,
|
||
show_all: false,
|
||
max_low: 20,
|
||
max_low_per_file: 1,
|
||
max_low_per_rule: 10,
|
||
rollup_examples: 5,
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Deserialize an optional Confidence from a TOML string.
|
||
fn deserialize_confidence_opt<'de, D>(
|
||
deserializer: D,
|
||
) -> Result<Option<crate::evidence::Confidence>, D::Error>
|
||
where
|
||
D: serde::Deserializer<'de>,
|
||
{
|
||
let opt: Option<String> = Option::deserialize(deserializer)?;
|
||
match opt {
|
||
None => Ok(None),
|
||
Some(s) => s
|
||
.parse::<crate::evidence::Confidence>()
|
||
.map(Some)
|
||
.map_err(serde::de::Error::custom),
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||
#[serde(default)]
|
||
pub struct PerformanceConfig {
|
||
/// The maximum search depth, or `None` if no maximum search depth should be set.
|
||
///
|
||
/// A depth of `1` includes all files under the current directory, a depth of `2` also includes
|
||
/// all files under subdirectories of the current directory, etc.
|
||
pub max_depth: Option<usize>,
|
||
|
||
/// The minimum depth for reported entries, or `None`.
|
||
pub min_depth: Option<usize>,
|
||
|
||
/// Whether to stop traversing into matching directories.
|
||
pub prune: bool,
|
||
|
||
/// The maximum number of worker threads to use., or `None` to auto-detect.
|
||
pub worker_threads: Option<usize>,
|
||
|
||
/// The maximum number of entries to index in a single chunk.
|
||
pub batch_size: usize,
|
||
|
||
/// capacity = threads × this
|
||
pub channel_multiplier: usize,
|
||
|
||
/// The stack size for Rayon threads, in bytes.
|
||
pub rayon_thread_stack_size: usize,
|
||
|
||
/// Timeout on individual files // TODO: IMPLEMENT
|
||
pub scan_timeout_secs: Option<u64>,
|
||
|
||
/// The maximum amount of memory to use, in megabytes.
|
||
pub memory_limit_mb: u64, // TODO: IMPLEMENT
|
||
}
|
||
|
||
impl Default for PerformanceConfig {
|
||
fn default() -> Self {
|
||
Self {
|
||
max_depth: None,
|
||
min_depth: None,
|
||
prune: false,
|
||
worker_threads: None,
|
||
batch_size: 100usize,
|
||
channel_multiplier: 4usize,
|
||
rayon_thread_stack_size: 8 * 1024 * 1024, // 2 MiB
|
||
scan_timeout_secs: None,
|
||
memory_limit_mb: 512,
|
||
}
|
||
}
|
||
}
|
||
|
||
/// A single user-defined label rule from config.
|
||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||
pub struct ConfigLabelRule {
|
||
pub matchers: Vec<String>,
|
||
/// "source", "sanitizer", or "sink"
|
||
pub kind: String,
|
||
/// Capability name: "html_escape", "shell_escape", "url_encode", "json_parse",
|
||
/// "env_var", "file_io", or "all"
|
||
pub cap: String,
|
||
}
|
||
|
||
/// Per-language analysis configuration from config file.
|
||
#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq, Eq)]
|
||
#[serde(default)]
|
||
pub struct LanguageAnalysisConfig {
|
||
pub rules: Vec<ConfigLabelRule>,
|
||
pub terminators: Vec<String>,
|
||
pub event_handlers: Vec<String>,
|
||
}
|
||
|
||
/// Top-level analysis rules config, keyed by language slug.
|
||
#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq, Eq)]
|
||
#[serde(default)]
|
||
pub struct AnalysisRulesConfig {
|
||
pub languages: HashMap<String, LanguageAnalysisConfig>,
|
||
}
|
||
|
||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||
#[serde(default)]
|
||
#[derive(Default)]
|
||
pub struct Config {
|
||
pub scanner: ScannerConfig,
|
||
pub database: DatabaseConfig,
|
||
pub output: OutputConfig,
|
||
pub performance: PerformanceConfig,
|
||
pub analysis: AnalysisRulesConfig,
|
||
}
|
||
|
||
impl Config {
|
||
/// Load config and return `(config, optional_note)`.
|
||
///
|
||
/// The note is a formatted status message about which config file was
|
||
/// loaded (or that defaults are in use). The caller decides whether to
|
||
/// print it based on output format / quiet mode.
|
||
pub fn load(config_dir: &Path) -> NyxResult<(Self, Option<String>)> {
|
||
let mut config = Config::default();
|
||
|
||
let default_config_path = config_dir.join("nyx.conf");
|
||
if !default_config_path.exists() {
|
||
create_example_config(config_dir)?;
|
||
}
|
||
|
||
let user_config_path = config_dir.join("nyx.local");
|
||
let note = if user_config_path.exists() {
|
||
let user_config_content = fs::read_to_string(&user_config_path)?;
|
||
let user_config: Config = toml::from_str(&user_config_content)?;
|
||
|
||
config = merge_configs(config, user_config);
|
||
|
||
Some(format!(
|
||
"{}: Loaded user config from: {}\n",
|
||
style("note").green().bold(),
|
||
style(user_config_path.display())
|
||
.underlined()
|
||
.white()
|
||
.bold()
|
||
))
|
||
} else {
|
||
Some(format!(
|
||
"{}: Using {} configuration.\n Create file in '{}' to customize.\n",
|
||
style("note").green().bold(),
|
||
style("default").bold(),
|
||
style(user_config_path.display())
|
||
.underlined()
|
||
.white()
|
||
.bold()
|
||
))
|
||
};
|
||
|
||
Ok((config, note))
|
||
}
|
||
}
|
||
|
||
fn create_example_config(config_dir: &Path) -> NyxResult<()> {
|
||
let example_path = config_dir.join("nyx.conf");
|
||
if !example_path.exists() {
|
||
fs::write(&example_path, DEFAULT_CONFIG_TOML)?;
|
||
tracing::debug!("Example config created at: {}", example_path.display());
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
/// Merge user config into default config, preserving defaults where the user didn't
|
||
/// supply new exclusions and overriding everything else.
|
||
fn merge_configs(mut default: Config, user: Config) -> Config {
|
||
// --- ScannerConfig ---
|
||
default.scanner.mode = user.scanner.mode;
|
||
default.scanner.min_severity = user.scanner.min_severity;
|
||
default.scanner.max_file_size_mb = user.scanner.max_file_size_mb;
|
||
default.scanner.read_global_ignore = user.scanner.read_global_ignore;
|
||
default.scanner.read_vcsignore = user.scanner.read_vcsignore;
|
||
default.scanner.require_git_to_read_vcsignore = user.scanner.require_git_to_read_vcsignore;
|
||
default.scanner.one_file_system = user.scanner.one_file_system;
|
||
default.scanner.follow_symlinks = user.scanner.follow_symlinks;
|
||
default.scanner.scan_hidden_files = user.scanner.scan_hidden_files;
|
||
default.scanner.include_nonprod = user.scanner.include_nonprod;
|
||
default.scanner.enable_state_analysis = user.scanner.enable_state_analysis;
|
||
|
||
// Merge exclusion lists (default ⊔ user), then sort & dedupe
|
||
default
|
||
.scanner
|
||
.excluded_extensions
|
||
.extend(user.scanner.excluded_extensions);
|
||
default
|
||
.scanner
|
||
.excluded_directories
|
||
.extend(user.scanner.excluded_directories);
|
||
default.scanner.excluded_extensions.sort_unstable();
|
||
default.scanner.excluded_extensions.dedup();
|
||
default.scanner.excluded_directories.sort_unstable();
|
||
default.scanner.excluded_directories.dedup();
|
||
|
||
// --- DatabaseConfig ---
|
||
default.database.path = user.database.path;
|
||
default.database.auto_cleanup_days = user.database.auto_cleanup_days;
|
||
default.database.max_db_size_mb = user.database.max_db_size_mb;
|
||
default.database.vacuum_on_startup = user.database.vacuum_on_startup;
|
||
|
||
// --- OutputConfig ---
|
||
default.output.default_format = user.output.default_format;
|
||
default.output.quiet = user.output.quiet;
|
||
default.output.max_results = user.output.max_results;
|
||
default.output.attack_surface_ranking = user.output.attack_surface_ranking;
|
||
default.output.min_score = user.output.min_score;
|
||
default.output.min_confidence = user.output.min_confidence;
|
||
default.output.include_quality = user.output.include_quality;
|
||
default.output.show_all = user.output.show_all;
|
||
default.output.max_low = user.output.max_low;
|
||
default.output.max_low_per_file = user.output.max_low_per_file;
|
||
default.output.max_low_per_rule = user.output.max_low_per_rule;
|
||
default.output.rollup_examples = user.output.rollup_examples;
|
||
|
||
// --- PerformanceConfig ---
|
||
default.performance.max_depth = user.performance.max_depth;
|
||
default.performance.min_depth = user.performance.min_depth;
|
||
default.performance.prune = user.performance.prune;
|
||
default.performance.worker_threads = user.performance.worker_threads;
|
||
default.performance.batch_size = user.performance.batch_size;
|
||
default.performance.channel_multiplier = user.performance.channel_multiplier;
|
||
default.performance.rayon_thread_stack_size = user.performance.rayon_thread_stack_size;
|
||
default.performance.scan_timeout_secs = user.performance.scan_timeout_secs;
|
||
default.performance.memory_limit_mb = user.performance.memory_limit_mb;
|
||
|
||
// --- AnalysisRulesConfig ---
|
||
for (lang, user_lang_cfg) in user.analysis.languages {
|
||
let entry = default.analysis.languages.entry(lang).or_default();
|
||
|
||
// Union-merge rules with dedup
|
||
for rule in user_lang_cfg.rules {
|
||
if !entry.rules.contains(&rule) {
|
||
entry.rules.push(rule);
|
||
}
|
||
}
|
||
|
||
// Union-merge terminators with dedup
|
||
for t in user_lang_cfg.terminators {
|
||
if !entry.terminators.contains(&t) {
|
||
entry.terminators.push(t);
|
||
}
|
||
}
|
||
|
||
// Union-merge event_handlers with dedup
|
||
for eh in user_lang_cfg.event_handlers {
|
||
if !entry.event_handlers.contains(&eh) {
|
||
entry.event_handlers.push(eh);
|
||
}
|
||
}
|
||
}
|
||
|
||
default
|
||
}
|
||
|
||
#[test]
|
||
fn merge_configs_dedupes_and_keeps_order() {
|
||
let mut default_cfg = Config::default();
|
||
default_cfg.scanner.excluded_extensions = vec!["rs".into(), "toml".into()];
|
||
|
||
let mut user_cfg = Config::default();
|
||
user_cfg.scanner.excluded_extensions = vec!["jpg".into(), "rs".into()];
|
||
|
||
let merged = merge_configs(default_cfg, user_cfg);
|
||
|
||
assert_eq!(
|
||
merged.scanner.excluded_extensions,
|
||
vec!["jpg", "rs", "toml"]
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn merge_analysis_rules_unions_and_dedupes() {
|
||
let mut default_cfg = Config::default();
|
||
default_cfg.analysis.languages.insert(
|
||
"javascript".into(),
|
||
LanguageAnalysisConfig {
|
||
rules: vec![ConfigLabelRule {
|
||
matchers: vec!["escapeHtml".into()],
|
||
kind: "sanitizer".into(),
|
||
cap: "html_escape".into(),
|
||
}],
|
||
terminators: vec!["process.exit".into()],
|
||
event_handlers: vec![],
|
||
},
|
||
);
|
||
|
||
let mut user_cfg = Config::default();
|
||
user_cfg.analysis.languages.insert(
|
||
"javascript".into(),
|
||
LanguageAnalysisConfig {
|
||
rules: vec![
|
||
ConfigLabelRule {
|
||
matchers: vec!["escapeHtml".into()],
|
||
kind: "sanitizer".into(),
|
||
cap: "html_escape".into(),
|
||
},
|
||
ConfigLabelRule {
|
||
matchers: vec!["sanitizeUrl".into()],
|
||
kind: "sanitizer".into(),
|
||
cap: "url_encode".into(),
|
||
},
|
||
],
|
||
terminators: vec!["process.exit".into(), "abort".into()],
|
||
event_handlers: vec!["addEventListener".into()],
|
||
},
|
||
);
|
||
|
||
let merged = merge_configs(default_cfg, user_cfg);
|
||
let js = merged.analysis.languages.get("javascript").unwrap();
|
||
assert_eq!(js.rules.len(), 2); // deduped
|
||
assert_eq!(js.terminators, vec!["process.exit", "abort"]);
|
||
assert_eq!(js.event_handlers, vec!["addEventListener"]);
|
||
}
|
||
|
||
#[test]
|
||
fn analysis_config_toml_roundtrip() {
|
||
let toml_str = r#"
|
||
[analysis.languages.javascript]
|
||
terminators = ["process.exit"]
|
||
event_handlers = ["addEventListener"]
|
||
|
||
[[analysis.languages.javascript.rules]]
|
||
matchers = ["escapeHtml"]
|
||
kind = "sanitizer"
|
||
cap = "html_escape"
|
||
"#;
|
||
let cfg: Config = toml::from_str(toml_str).unwrap();
|
||
let js = cfg.analysis.languages.get("javascript").unwrap();
|
||
assert_eq!(js.rules.len(), 1);
|
||
assert_eq!(js.rules[0].matchers, vec!["escapeHtml"]);
|
||
assert_eq!(js.rules[0].kind, "sanitizer");
|
||
assert_eq!(js.rules[0].cap, "html_escape");
|
||
assert_eq!(js.terminators, vec!["process.exit"]);
|
||
assert_eq!(js.event_handlers, vec!["addEventListener"]);
|
||
}
|
||
|
||
#[test]
|
||
fn load_creates_example_and_reads_user_overrides() {
|
||
let cfg_dir = tempfile::tempdir().unwrap();
|
||
let cfg_path = cfg_dir.path();
|
||
|
||
let user_toml = r#"
|
||
[scanner]
|
||
one_file_system = true
|
||
excluded_extensions = ["foo"]
|
||
|
||
[output]
|
||
quiet = true
|
||
"#;
|
||
fs::write(cfg_path.join("nyx.local"), user_toml).unwrap();
|
||
|
||
let (cfg, _note) = Config::load(cfg_path).expect("Config::load should succeed");
|
||
|
||
assert!(cfg_path.join("nyx.conf").is_file());
|
||
|
||
assert!(cfg.scanner.one_file_system);
|
||
assert!(cfg.output.quiet);
|
||
assert!(cfg.scanner.excluded_extensions.contains(&"foo".to_string()));
|
||
|
||
assert!(!cfg.scanner.follow_symlinks);
|
||
}
|