mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Added experimental control flow analysis and syntax classification for rust lang (#22)
* Introduce control flow graph (CFG) support: - Added `cfg.rs` with CFG generation and analysis utilities. - Integrated `petgraph` library for graph-based computations. - Updated `ast.rs` to utilize CFG for function analysis. - Modified `Cargo.toml` and `Cargo.lock` to include new dependencies. - Improved static analysis with taint tracking through CFG paths. * feat: enhance control flow analysis with taint tracking and node labeling * feat: improve control flow graph with enhanced node handling and new tests * Remove unnecessary reference marker in `byte_offset_to_point` comment. * Remove unnecessary reference marker in `byte_offset_to_point` comment. * Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis. * Refactor CFG and taint tracking logic: - Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes. - Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification. - Refined `push_node` to handle method call expressions with object-function pairing. - Simplified code handling in trivia skipping and debug-only logic. * Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`. * Add targeted CFG taint-tracking tests to enhance analysis coverage. * Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`. * Update README with installation instructions for Cargo and GitHub releases. * Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules. * Refactor `labels.rs`: - Removed redundant `LabelRule` entries for cleaner rule definitions. - Adjusted matching logic to prioritize suffix and prefix matches effectively. * Refactor `labels.rs`: - Removed redundant `LabelRule` entries for cleaner rule definitions. - Adjusted matching logic to prioritize suffix and prefix matches effectively. * Add test for taint tracking with multiple sources in `cfg.rs`. * Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic. * refactor: split `labels.rs` into modular structure with language-specific files * refactor: split `labels.rs` into modular structure with language-specific files * refactor: clean up SQL table definitions in `database.rs` for better readability * refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling * refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details * refactor: remove redundant header from README.md for improved clarity * feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages * feat: introduce analysis modes for enhanced scanner configuration and diagnostics * feat: define Kind enum for syntax classification in control flow analysis * feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes * refactor: clean up imports and formatting in AST and CFG modules for improved readability * refactor: simplify function signatures and improve code readability in CFG and module files * fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB * refactor: update string formatting in clean and project modules for consistency * refactor: fix indentation in clean.rs for improved readability --------- Co-authored-by: elipeter <eli.peter@es.fcm.travel>
This commit is contained in:
parent
fd65360818
commit
3c21efba75
21 changed files with 1585 additions and 79 deletions
|
|
@ -8,9 +8,21 @@ use toml;
|
|||
|
||||
static DEFAULT_CONFIG_TOML: &str = include_str!("../../default-nyx.conf");
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum AnalysisMode {
|
||||
#[default]
|
||||
Full,
|
||||
Ast,
|
||||
Taint,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(default)]
|
||||
pub struct ScannerConfig {
|
||||
/// The analysis mode to use.
|
||||
pub mode: AnalysisMode,
|
||||
|
||||
/// The minimum severity level to output
|
||||
pub min_severity: Severity,
|
||||
|
||||
|
|
@ -47,6 +59,7 @@ pub struct ScannerConfig {
|
|||
impl Default for ScannerConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
mode: AnalysisMode::Full,
|
||||
min_severity: Severity::Low,
|
||||
max_file_size_mb: None,
|
||||
excluded_extensions: vec![
|
||||
|
|
@ -151,6 +164,9 @@ pub struct PerformanceConfig {
|
|||
/// capacity = threads × this
|
||||
pub channel_multiplier: usize,
|
||||
|
||||
/// The stack size for Rayon threads, in bytes.
|
||||
pub rayon_thread_stack_size: usize,
|
||||
|
||||
/// Timeout on individual files // TODO: IMPLEMENT
|
||||
pub scan_timeout_secs: Option<u64>,
|
||||
|
||||
|
|
@ -167,6 +183,7 @@ impl Default for PerformanceConfig {
|
|||
worker_threads: None,
|
||||
batch_size: 100usize,
|
||||
channel_multiplier: 4usize,
|
||||
rayon_thread_stack_size: 8 * 1024 * 1024, // 2 MiB
|
||||
scan_timeout_secs: None,
|
||||
memory_limit_mb: 512,
|
||||
}
|
||||
|
|
@ -236,6 +253,7 @@ fn create_example_config(config_dir: &Path) -> NyxResult<()> {
|
|||
/// supply new exclusions and overriding everything else.
|
||||
fn merge_configs(mut default: Config, user: Config) -> Config {
|
||||
// --- ScannerConfig ---
|
||||
default.scanner.mode = user.scanner.mode;
|
||||
default.scanner.min_severity = user.scanner.min_severity;
|
||||
default.scanner.max_file_size_mb = user.scanner.max_file_size_mb;
|
||||
default.scanner.read_global_ignore = user.scanner.read_global_ignore;
|
||||
|
|
@ -277,6 +295,7 @@ fn merge_configs(mut default: Config, user: Config) -> Config {
|
|||
default.performance.worker_threads = user.performance.worker_threads;
|
||||
default.performance.batch_size = user.performance.batch_size;
|
||||
default.performance.channel_multiplier = user.performance.channel_multiplier;
|
||||
default.performance.rayon_thread_stack_size = user.performance.rayon_thread_stack_size;
|
||||
default.performance.scan_timeout_secs = user.performance.scan_timeout_secs;
|
||||
default.performance.memory_limit_mb = user.performance.memory_limit_mb;
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ pub fn get_project_info(project_path: &Path, config_dir: &Path) -> NyxResult<(St
|
|||
.ok_or_else(|| NyxError::Other("Unable to determine project name".into()))?;
|
||||
|
||||
let db_name = sanitize_project_name(project_name);
|
||||
let db_path = config_dir.join(format!("{}.sqlite", db_name));
|
||||
let db_path = config_dir.join(format!("{db_name}.sqlite"));
|
||||
|
||||
Ok((project_name.to_owned(), db_path))
|
||||
}
|
||||
|
|
@ -41,7 +41,7 @@ fn sanitize_project_name_is_idempotent_and_lossless_enough() {
|
|||
];
|
||||
|
||||
for (input, expected) in samples {
|
||||
assert_eq!(sanitize_project_name(input), expected, "input: {}", input);
|
||||
assert_eq!(sanitize_project_name(input), expected, "input: {input}");
|
||||
assert_eq!(sanitize_project_name(expected), expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue