mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-30 20:39:39 +02:00
Added experimental control flow analysis and syntax classification for rust lang (#22)
* Introduce control flow graph (CFG) support: - Added `cfg.rs` with CFG generation and analysis utilities. - Integrated `petgraph` library for graph-based computations. - Updated `ast.rs` to utilize CFG for function analysis. - Modified `Cargo.toml` and `Cargo.lock` to include new dependencies. - Improved static analysis with taint tracking through CFG paths. * feat: enhance control flow analysis with taint tracking and node labeling * feat: improve control flow graph with enhanced node handling and new tests * Remove unnecessary reference marker in `byte_offset_to_point` comment. * Remove unnecessary reference marker in `byte_offset_to_point` comment. * Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis. * Refactor CFG and taint tracking logic: - Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes. - Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification. - Refined `push_node` to handle method call expressions with object-function pairing. - Simplified code handling in trivia skipping and debug-only logic. * Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`. * Add targeted CFG taint-tracking tests to enhance analysis coverage. * Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`. * Update README with installation instructions for Cargo and GitHub releases. * Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules. * Refactor `labels.rs`: - Removed redundant `LabelRule` entries for cleaner rule definitions. - Adjusted matching logic to prioritize suffix and prefix matches effectively. * Refactor `labels.rs`: - Removed redundant `LabelRule` entries for cleaner rule definitions. - Adjusted matching logic to prioritize suffix and prefix matches effectively. * Add test for taint tracking with multiple sources in `cfg.rs`. * Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic. * refactor: split `labels.rs` into modular structure with language-specific files * refactor: split `labels.rs` into modular structure with language-specific files * refactor: clean up SQL table definitions in `database.rs` for better readability * refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling * refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details * refactor: remove redundant header from README.md for improved clarity * feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages * feat: introduce analysis modes for enhanced scanner configuration and diagnostics * feat: define Kind enum for syntax classification in control flow analysis * feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes * refactor: clean up imports and formatting in AST and CFG modules for improved readability * refactor: simplify function signatures and improve code readability in CFG and module files * fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB * refactor: update string formatting in clean and project modules for consistency * refactor: fix indentation in clean.rs for improved readability --------- Co-authored-by: elipeter <eli.peter@es.fcm.travel>
This commit is contained in:
parent
fd65360818
commit
3c21efba75
21 changed files with 1585 additions and 79 deletions
17
src/labels/javascript.rs
Normal file
17
src/labels/javascript.rs
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
use crate::labels::{Cap, DataLabel, LabelRule};
|
||||
|
||||
// TODO: refactor this
|
||||
pub static RULES: &[LabelRule] = &[
|
||||
LabelRule {
|
||||
matchers: &["document.location", "window.location"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["JSON.parse"],
|
||||
label: DataLabel::Sanitizer(Cap::JSON_PARSE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["eval"],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
];
|
||||
121
src/labels/mod.rs
Normal file
121
src/labels/mod.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
mod javascript;
|
||||
mod rust;
|
||||
|
||||
use bitflags::bitflags;
|
||||
use once_cell::sync::Lazy;
|
||||
use phf::Map;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// A single rule: if the AST text equals (or ends with) one of the `matchers`,
|
||||
/// the node gets `label`.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct LabelRule {
|
||||
pub matchers: &'static [&'static str],
|
||||
pub label: DataLabel,
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Cap: u8 {
|
||||
const ENV_VAR = 0b0000_0001;
|
||||
const HTML_ESCAPE = 0b0000_0010;
|
||||
const SHELL_ESCAPE = 0b0000_0100;
|
||||
const URL_ENCODE = 0b0000_1000;
|
||||
const JSON_PARSE = 0b0001_0000;
|
||||
// ADD MORE
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Kind {
|
||||
If,
|
||||
InfiniteLoop,
|
||||
While,
|
||||
For,
|
||||
LoopBody,
|
||||
CallFn,
|
||||
CallMethod,
|
||||
CallMacro,
|
||||
Break,
|
||||
Continue,
|
||||
Return,
|
||||
Block,
|
||||
SourceFile,
|
||||
Function,
|
||||
Assignment,
|
||||
CallWrapper,
|
||||
Trivia,
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum DataLabel {
|
||||
Source(Cap),
|
||||
Sanitizer(Cap),
|
||||
Sink(Cap),
|
||||
}
|
||||
|
||||
static REGISTRY: Lazy<HashMap<&'static str, &'static [LabelRule]>> = Lazy::new(|| {
|
||||
let mut m = HashMap::new();
|
||||
m.insert("rust", rust::RULES);
|
||||
m.insert("rs", rust::RULES);
|
||||
|
||||
m.insert("javascript", javascript::RULES);
|
||||
m.insert("js", javascript::RULES);
|
||||
|
||||
// add more languages in one line:
|
||||
// m.insert("go", go::RULES);
|
||||
|
||||
m
|
||||
});
|
||||
|
||||
type FastMap = &'static Map<&'static str, Kind>;
|
||||
|
||||
pub(crate) static CLASSIFIERS: Lazy<HashMap<&'static str, FastMap>> = Lazy::new(|| {
|
||||
let mut m = HashMap::new();
|
||||
m.insert("rust", &rust::KINDS);
|
||||
m.insert("rs", &rust::KINDS);
|
||||
|
||||
// m.insert("javascript", &javascript::KINDS);
|
||||
// m.insert("js", &javascript::KINDS);
|
||||
|
||||
// todo: add more languages
|
||||
m
|
||||
});
|
||||
|
||||
#[inline(always)]
|
||||
pub fn lookup(lang: &str, raw: &str) -> Kind {
|
||||
CLASSIFIERS
|
||||
.get(lang)
|
||||
.and_then(|m| m.get(raw).copied())
|
||||
.unwrap_or(Kind::Other)
|
||||
}
|
||||
|
||||
/// Try to classify a piece of syntax text.
|
||||
/// `lang` is the canonicalised language key (“rust”, “javascript”, …).
|
||||
pub fn classify(lang: &str, text: &str) -> Option<DataLabel> {
|
||||
let key = lang.to_ascii_lowercase();
|
||||
let rules = REGISTRY.get(key.as_str())?;
|
||||
let head = text.split(['(', '<']).next().unwrap_or("");
|
||||
|
||||
let text_lc = head.trim().to_ascii_lowercase();
|
||||
|
||||
for rule in *rules {
|
||||
for raw in rule.matchers {
|
||||
let m = raw.to_ascii_lowercase();
|
||||
|
||||
if m.ends_with('_') {
|
||||
if text_lc.starts_with(&m) {
|
||||
return Some(rule.label);
|
||||
}
|
||||
} else if text_lc.ends_with(&m) {
|
||||
let start = text_lc.len() - m.len();
|
||||
let ok = start == 0 || matches!(text_lc.as_bytes()[start - 1], b'.' | b':');
|
||||
if ok {
|
||||
return Some(rule.label);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
72
src/labels/rust.rs
Normal file
72
src/labels/rust.rs
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
use crate::labels::{Cap, DataLabel, Kind, LabelRule};
|
||||
use phf::{Map, phf_map};
|
||||
|
||||
pub static RULES: &[LabelRule] = &[
|
||||
// ─────────── Sources ───────────
|
||||
LabelRule {
|
||||
matchers: &["std::env::var", "env::var"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
// `fn sanitize_*(&str) -> String`
|
||||
LabelRule {
|
||||
matchers: &["html_escape::encode_safe", "sanitize_", "sanitize_html"],
|
||||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["shell_escape::unix::escape"],
|
||||
label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
// ─────────── Sinks ─────────────
|
||||
// All the key points where untrusted strings reach the OS shell.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"command::new",
|
||||
"std::process::command::new",
|
||||
"command::arg",
|
||||
"command::args",
|
||||
"command::status",
|
||||
"command::output",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
"if_expression" => Kind::If,
|
||||
"loop_expression" => Kind::InfiniteLoop,
|
||||
"loop_statement" => Kind::LoopBody,
|
||||
"while_statement" => Kind::While,
|
||||
"for_statement" => Kind::For,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"break_expression" => Kind::Break,
|
||||
"break_statement" => Kind::Break,
|
||||
"continue_expression" => Kind::Continue,
|
||||
"continue_statement" => Kind::Continue,
|
||||
|
||||
// structure
|
||||
"source_file" => Kind::SourceFile,
|
||||
"block" => Kind::Block,
|
||||
"function_item" => Kind::Function,
|
||||
|
||||
// data-flow
|
||||
"call_expression" => Kind::CallFn,
|
||||
"method_call_expression" => Kind::CallMethod,
|
||||
"macro_invocation" => Kind::CallMacro,
|
||||
"let_declaration" => Kind::CallWrapper,
|
||||
"expression_statement" => Kind::CallWrapper,
|
||||
"assignment_expression" => Kind::Assignment,
|
||||
|
||||
// trivia
|
||||
"line_comment" => Kind::Trivia,
|
||||
"block_comment" => Kind::Trivia,
|
||||
";" => Kind::Trivia, "," => Kind::Trivia,
|
||||
"(" => Kind::Trivia, ")" => Kind::Trivia,
|
||||
"{" => Kind::Trivia, "}" => Kind::Trivia, "\n" => Kind::Trivia,
|
||||
"use_declaration" => Kind::Trivia,
|
||||
"attribute_item" => Kind::Trivia,
|
||||
"mod_item" => Kind::Trivia,
|
||||
"type_item" => Kind::Trivia,
|
||||
};
|
||||
Loading…
Add table
Add a link
Reference in a new issue