mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Added experimental control flow analysis and syntax classification for rust lang (#22)
* Introduce control flow graph (CFG) support: - Added `cfg.rs` with CFG generation and analysis utilities. - Integrated `petgraph` library for graph-based computations. - Updated `ast.rs` to utilize CFG for function analysis. - Modified `Cargo.toml` and `Cargo.lock` to include new dependencies. - Improved static analysis with taint tracking through CFG paths. * feat: enhance control flow analysis with taint tracking and node labeling * feat: improve control flow graph with enhanced node handling and new tests * Remove unnecessary reference marker in `byte_offset_to_point` comment. * Remove unnecessary reference marker in `byte_offset_to_point` comment. * Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis. * Refactor CFG and taint tracking logic: - Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes. - Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification. - Refined `push_node` to handle method call expressions with object-function pairing. - Simplified code handling in trivia skipping and debug-only logic. * Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`. * Add targeted CFG taint-tracking tests to enhance analysis coverage. * Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`. * Update README with installation instructions for Cargo and GitHub releases. * Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules. * Refactor `labels.rs`: - Removed redundant `LabelRule` entries for cleaner rule definitions. - Adjusted matching logic to prioritize suffix and prefix matches effectively. * Refactor `labels.rs`: - Removed redundant `LabelRule` entries for cleaner rule definitions. - Adjusted matching logic to prioritize suffix and prefix matches effectively. * Add test for taint tracking with multiple sources in `cfg.rs`. * Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic. * refactor: split `labels.rs` into modular structure with language-specific files * refactor: split `labels.rs` into modular structure with language-specific files * refactor: clean up SQL table definitions in `database.rs` for better readability * refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling * refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details * refactor: remove redundant header from README.md for improved clarity * feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages * feat: introduce analysis modes for enhanced scanner configuration and diagnostics * feat: define Kind enum for syntax classification in control flow analysis * feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes * refactor: clean up imports and formatting in AST and CFG modules for improved readability * refactor: simplify function signatures and improve code readability in CFG and module files * fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB * refactor: update string formatting in clean and project modules for consistency * refactor: fix indentation in clean.rs for improved readability --------- Co-authored-by: elipeter <eli.peter@es.fcm.travel>
This commit is contained in:
parent
fd65360818
commit
3c21efba75
21 changed files with 1585 additions and 79 deletions
|
|
@ -16,28 +16,35 @@ pub mod index {
|
|||
const SCHEMA: &str = r#"
|
||||
PRAGMA foreign_keys = ON;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
project TEXT NOT NULL,
|
||||
path TEXT NOT NULL,
|
||||
hash BLOB NOT NULL,
|
||||
mtime INTEGER NOT NULL,
|
||||
CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
project TEXT NOT NULL,
|
||||
path TEXT NOT NULL,
|
||||
hash BLOB NOT NULL,
|
||||
mtime INTEGER NOT NULL,
|
||||
scanned_at INTEGER NOT NULL,
|
||||
UNIQUE(project, path)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS issues (
|
||||
file_id INTEGER NOT NULL
|
||||
CREATE TABLE IF NOT EXISTS issues (file_id INTEGER NOT NULL
|
||||
REFERENCES files(id)
|
||||
ON DELETE CASCADE,
|
||||
rule_id TEXT NOT NULL,
|
||||
severity TEXT NOT NULL,
|
||||
line INTEGER NOT NULL,
|
||||
col INTEGER NOT NULL,
|
||||
PRIMARY KEY (file_id, rule_id, line, col)
|
||||
);
|
||||
rule_id TEXT NOT NULL,
|
||||
severity TEXT NOT NULL,
|
||||
line INTEGER NOT NULL,
|
||||
col INTEGER NOT NULL,
|
||||
PRIMARY KEY (file_id, rule_id, line, col));
|
||||
|
||||
CREATE TABLE IF NOT EXISTS function_summaries (hash TEXT PRIMARY KEY,
|
||||
project TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
lang TEXT NOT NULL,
|
||||
summary TEXT NOT NULL,
|
||||
updated_at INTEGER NOT NULL);
|
||||
"#;
|
||||
|
||||
// TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
|
||||
// TODO: ADD DROP AND GIVE A CLI PARAMETER FOR DROP
|
||||
|
||||
/// A single issue row, ready for insertion.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IssueRow<'a> {
|
||||
|
|
@ -189,6 +196,50 @@ pub mod index {
|
|||
Ok(issue_iter.filter_map(Result::ok).collect())
|
||||
}
|
||||
|
||||
// pub fn upsert_summary(
|
||||
// &mut self,
|
||||
// project: &str,
|
||||
// path: &Path,
|
||||
// hash: &str,
|
||||
// s: &crate::summary::FuncSummary,
|
||||
// ) -> NyxResult<()> {
|
||||
// let conn = self.c();
|
||||
// let now = chrono::Utc::now().timestamp_millis(); // i64
|
||||
//
|
||||
// conn.execute(
|
||||
// "INSERT INTO function_summaries (hash, project, name, lang, summary, updated_at)
|
||||
// VALUES (?1, ?2, ?3, ?4, ?5, ?6)
|
||||
// ON CONFLICT(hash) DO UPDATE SET summary = excluded.summary,
|
||||
// updated_at = excluded.updated_at",
|
||||
// (
|
||||
// hash,
|
||||
// project,
|
||||
// &s.name,
|
||||
// path.extension().and_then(|e| e.to_str()).unwrap_or_default(),
|
||||
// serde_json::to_string(s).unwrap(), //TODO REPLACE UNWRAP
|
||||
// now,
|
||||
// ),
|
||||
// )?;
|
||||
// Ok(())
|
||||
// }
|
||||
//
|
||||
// pub fn load_all_summaries(&self, project: &str) -> NyxResult<Vec<crate::summary::FuncSummary<'static>>> {
|
||||
// let mut stmt = self
|
||||
// .c()
|
||||
// .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
|
||||
//
|
||||
// let iter = stmt.query_map([project], |row| {
|
||||
// let json: String = row.get(0)?;
|
||||
// Ok(serde_json::from_str::<crate::summary::FuncSummary>(json.as_str()).unwrap()) // TODO: REPLACE UNWRAP
|
||||
// })?;
|
||||
//
|
||||
// Ok(iter
|
||||
// .collect::<Result<Vec<_>, _>>()?
|
||||
// .into_iter()
|
||||
// .map(|s| unsafe { std::mem::transmute::<_, crate::summary::FuncSummary<'static>>(s) })
|
||||
// .collect())
|
||||
// }
|
||||
|
||||
/// gets files from the database
|
||||
pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
|
||||
let mut stmt = self.c().prepare(
|
||||
|
|
@ -214,6 +265,7 @@ pub mod index {
|
|||
|
||||
DROP TABLE IF EXISTS issues;
|
||||
DROP TABLE IF EXISTS files;
|
||||
DROP TABLE IF EXISTS function_summaries;
|
||||
|
||||
PRAGMA foreign_keys = ON;
|
||||
VACUUM;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue