Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support:

- Added `cfg.rs` with CFG generation and analysis utilities.
- Integrated `petgraph` library for graph-based computations.
- Updated `ast.rs` to utilize CFG for function analysis.
- Modified `Cargo.toml` and `Cargo.lock` to include new dependencies.
- Improved static analysis with taint tracking through CFG paths.

* feat: enhance control flow analysis with taint tracking and node labeling

* feat: improve control flow graph with enhanced node handling and new tests

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Remove unnecessary reference marker in `byte_offset_to_point` comment.

* Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis.

* Refactor CFG and taint tracking logic:

- Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes.
- Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification.
- Refined `push_node` to handle method call expressions with object-function pairing.
- Simplified code handling in trivia skipping and debug-only logic.

* Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`.

* Add targeted CFG taint-tracking tests to enhance analysis coverage.

* Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`.

* Update README with installation instructions for Cargo and GitHub releases.

* Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Refactor `labels.rs`:

- Removed redundant `LabelRule` entries for cleaner rule definitions.
- Adjusted matching logic to prioritize suffix and prefix matches effectively.

* Add test for taint tracking with multiple sources in `cfg.rs`.

* Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic.

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: split `labels.rs` into modular structure with language-specific files

* refactor: clean up SQL table definitions in `database.rs` for better readability

* refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling

* refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details

* refactor: remove redundant header from README.md for improved clarity

* feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages

* feat: introduce analysis modes for enhanced scanner configuration and diagnostics

* feat: define Kind enum for syntax classification in control flow analysis

* feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes

* refactor: clean up imports and formatting in AST and CFG modules for improved readability

* refactor: simplify function signatures and improve code readability in CFG and module files

* fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB

* refactor: update string formatting in clean and project modules for consistency

* refactor: fix indentation in clean.rs for improved readability

---------

Co-authored-by: elipeter <eli.peter@es.fcm.travel>
This commit is contained in:
Eli Peter 2025-06-28 17:36:14 +02:00 committed by GitHub
parent fd65360818
commit 3c21efba75
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1585 additions and 79 deletions

View file

@ -16,28 +16,35 @@ pub mod index {
const SCHEMA: &str = r#"
PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
path TEXT NOT NULL,
hash BLOB NOT NULL,
mtime INTEGER NOT NULL,
CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
path TEXT NOT NULL,
hash BLOB NOT NULL,
mtime INTEGER NOT NULL,
scanned_at INTEGER NOT NULL,
UNIQUE(project, path)
);
CREATE TABLE IF NOT EXISTS issues (
file_id INTEGER NOT NULL
CREATE TABLE IF NOT EXISTS issues (file_id INTEGER NOT NULL
REFERENCES files(id)
ON DELETE CASCADE,
rule_id TEXT NOT NULL,
severity TEXT NOT NULL,
line INTEGER NOT NULL,
col INTEGER NOT NULL,
PRIMARY KEY (file_id, rule_id, line, col)
);
rule_id TEXT NOT NULL,
severity TEXT NOT NULL,
line INTEGER NOT NULL,
col INTEGER NOT NULL,
PRIMARY KEY (file_id, rule_id, line, col));
CREATE TABLE IF NOT EXISTS function_summaries (hash TEXT PRIMARY KEY,
project TEXT NOT NULL,
name TEXT NOT NULL,
lang TEXT NOT NULL,
summary TEXT NOT NULL,
updated_at INTEGER NOT NULL);
"#;
// TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
// TODO: ADD DROP AND GIVE A CLI PARAMETER FOR DROP
/// A single issue row, ready for insertion.
#[derive(Debug, Clone)]
pub struct IssueRow<'a> {
@ -189,6 +196,50 @@ pub mod index {
Ok(issue_iter.filter_map(Result::ok).collect())
}
// pub fn upsert_summary(
// &mut self,
// project: &str,
// path: &Path,
// hash: &str,
// s: &crate::summary::FuncSummary,
// ) -> NyxResult<()> {
// let conn = self.c();
// let now = chrono::Utc::now().timestamp_millis(); // i64
//
// conn.execute(
// "INSERT INTO function_summaries (hash, project, name, lang, summary, updated_at)
// VALUES (?1, ?2, ?3, ?4, ?5, ?6)
// ON CONFLICT(hash) DO UPDATE SET summary = excluded.summary,
// updated_at = excluded.updated_at",
// (
// hash,
// project,
// &s.name,
// path.extension().and_then(|e| e.to_str()).unwrap_or_default(),
// serde_json::to_string(s).unwrap(), //TODO REPLACE UNWRAP
// now,
// ),
// )?;
// Ok(())
// }
//
// pub fn load_all_summaries(&self, project: &str) -> NyxResult<Vec<crate::summary::FuncSummary<'static>>> {
// let mut stmt = self
// .c()
// .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
//
// let iter = stmt.query_map([project], |row| {
// let json: String = row.get(0)?;
// Ok(serde_json::from_str::<crate::summary::FuncSummary>(json.as_str()).unwrap()) // TODO: REPLACE UNWRAP
// })?;
//
// Ok(iter
// .collect::<Result<Vec<_>, _>>()?
// .into_iter()
// .map(|s| unsafe { std::mem::transmute::<_, crate::summary::FuncSummary<'static>>(s) })
// .collect())
// }
/// gets files from the database
pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
let mut stmt = self.c().prepare(
@ -214,6 +265,7 @@ pub mod index {
DROP TABLE IF EXISTS issues;
DROP TABLE IF EXISTS files;
DROP TABLE IF EXISTS function_summaries;
PRAGMA foreign_keys = ON;
VACUUM;