Feat/full cfg (#30)

* feat: Enhance control flow analysis with function summaries and taint analysis * feat: Update taint analysis to utilize function summaries for enhanced tracking * Refactor `walk.rs` batch processing and override handling: - Renamed `Batcher` to `BatchSender` for clarity. - Added `BatchSender::new` constructor for cleaner initialization. - Simplified batch size management in `BatchSender`. - Extracted `build_overrides` function for reusable override construction. - Improved error handling and validation in override building. - Enhanced performance with directory and file type filtering in `walk`. * Improve logging and streamline directory walk process: - Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion. - Optimized and simplified `filter_entry` logic for directory and file type filters. - Improved metadata checks and max file size enforcement during the scan. * Refactor and optimize taint tracking, label rules, and directory walk process: - Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing. - Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency. - Removed unused `set_hash` function and redundant imports across files. - Improved batch sender logic in `walk.rs`, renaming key components for clarity. - Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return. - Expanded label rules with additional matchers for sources, sanitizers, and sinks. - Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup. * fix: fixed let chains error in walk.rs * fix: updated dependencies * fix: updated dependencies * chore: Remove standard error in scan.rs * feat: Introduce function summaries for enhanced taint and control flow analysis * feat: Enhance taint analysis with interop support and function summaries * feat: Add configuration analysis module and enhance matcher rules * feat: Add arity column to function_summaries and handle schema migration * fix: fixed clippy &PathBuf warnings * chore: Update dependencies and versioning in Cargo files * docs: Update README to enhance clarity and detail on features and analysis modes * chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes * docs: Update SECURITY.md to clarify version support status --------- Co-authored-by: elipeter <eli.peter@es.fcm.travel>
2026-06-27 20:29:39 +02:00 · 2026-02-24 23:44:07 -05:00 · 2026-02-24 23:44:07 -05:00 · f96a89e7c1
commit f96a89e7c1
parent 8cbbec7d90
87 changed files with 11505 additions and 1099 deletions
--- a/examples/cfg_analysis/example.js
+++ b/examples/cfg_analysis/example.js
@ -0,0 +1,74 @@
+/**
+ EXPECTED OUTPUT (high-level):
+
+ 1) cfg-unguarded-sink (High / High confidence)
+ - handler(req,res): source req.body.cmd flows to child_process.exec(cmd) without sanitizer/guard.
+ - Should rank high (entry-point-ish function name 'handler', close to entry).
+
+ 2) cfg-auth-gap (High / Medium)
+ - handler is entry-point-ish (name matches handler/route/api conventions).
+ - No auth guard dominates sink (require_auth / is_authenticated / is_admin / authorize).
+
+ 3) cfg-error-fallthrough (Medium / Medium)
+ - Example: if (err) { console.log(err); } then exec(...) still runs.
+ - This is the JS analogue of your Go heuristic. If your implementation only targets Go, this should be NO finding.
+ If you later generalize, this file includes a pattern you can test against.
+
+ 4) cfg-unguarded-sink (HTML) (Medium/High)
+ - req.query.html is written into innerHTML without DOMPurify.sanitize
+
+ 5) No findings for safe paths:
+ - safeHandler uses encodeURIComponent before exec (URL_ENCODE sanitizer) OR uses a dedicated sanitizer you map to SHELL_ESCAPE.
+ NOTE: encodeURIComponent is URL_ENCODE, not SHELL_ESCAPE — so for SHELL_ESCAPE sinks, it may still be flagged depending on your caps logic.
+ The “definitely safe” case here uses a dummy sanitize_shell() wrapper to match your Rust-style naming if you add it for JS later.
+ - safeHtml uses DOMPurify.sanitize before innerHTML (HTML_ESCAPE).
+
+ Taint / dataflow:
+ - should find taint from req.body / req.query / process.env sources to exec/eval/innerHTML sinks.
+ */
+
+const child_process = require("child_process");
+
+// ─── Entry-point-ish + unguarded shell sink + auth gap ────────────────────────────
+function handler(req, res) {
+    // Source (Cap::all): req.body
+    const cmd = req.body.cmd;
+
+    // Vulnerable sink (Cap::SHELL_ESCAPE): child_process.exec
+    child_process.exec(cmd);
+
+    res.end("ok");
+}
+
+// ─── Guarded HTML sink (should NOT be flagged) ────────────────────────────────────
+function safeHtml(req, res, DOMPurify) {
+    const html = req.query.html; // Source
+    const cleaned = DOMPurify.sanitize(html); // Sanitizer(HTML_ESCAPE)
+    document.getElementById("app").innerHTML = cleaned; // Sink(HTML_ESCAPE)
+    res.end("ok");
+}
+
+// ─── Unguarded HTML sink (should be flagged) ─────────────────────────────────────
+function unsafeHtml(req, res) {
+    const html = req.query.html; // Source
+    document.getElementById("app").innerHTML = html; // Sink(HTML_ESCAPE) without sanitizer
+    res.end("ok");
+}
+
+// ─── Heuristic error fallthrough pattern (JS analogue) ───────────────────────────
+// If your error-handling analysis is Go-only, ignore this for now.
+// If generalized later, it should be flagged.
+function errFallthrough(req, res) {
+    const err = req.query.err;
+    if (err) {
+        console.log(err);
+    }
+    child_process.exec(req.body.cmd);
+    res.end("ok");
+}
+
+// ─── Optional: eval sink (should be flagged) ─────────────────────────────────────
+function evalSink(req) {
+    const payload = process.env.PAYLOAD; // Source
+    eval(payload); // Sink(SHELL_ESCAPE) per your rules
+}
--- a/examples/cfg_analysis/example.rs
+++ b/examples/cfg_analysis/example.rs
@ -0,0 +1,99 @@
+/*!
+EXPECTED OUTPUT (high-level):
+
+1) cfg-unguarded-sink (High / High confidence)
+   - In handle_request(): user input from std::env::var("INPUT") flows to std::process::Command::new("sh").arg(&input)
+   - No dominating SHELL_ESCAPE sanitizer or validation guard for that value.
+   - This should rank very high in scoring (entry-point-ish name + close to entry + shell sink).
+
+2) cfg-auth-gap (High / Medium confidence)
+   - handle_request() looks like an entry-point (name matches handle_*)
+   - Contains a shell sink without an auth guard (require_auth / is_authenticated / is_admin etc.)
+
+3) cfg-resource-leak (Medium / High or Medium confidence)
+   - alloc_then_return_leak(): malloc without free on an early return path.
+
+4) cfg-unreachable-sanitizer or cfg-unreachable-guard (Medium/Low)
+   - unreachable_sanitizer(): sanitizer call in unreachable block.
+
+5) taint / dataflow (existing BFS taint engine):
+   - should detect at least one taint finding for:
+       env::var source -> Command sink
+   - should NOT flag safe_shell() because it uses shell_escape::unix::escape(&input) and passes `safe`.
+
+Notes:
+- This fixture intentionally contains both vulnerable and safe patterns, plus unreachable code and resource misuse,
+  to exercise cfg_analysis::{unreachable, guards, auth, resources, scoring}.
+*/
+
+use std::process::Command;
+
+// ─── CFG: Entry-point-ish + unguarded sink + auth gap ─────────────────────────────
+
+pub fn handle_request() {
+  // Source (Cap::all)
+  let input = std::env::var("INPUT").unwrap();
+
+  // Vulnerable sink (Cap::SHELL_ESCAPE)
+  Command::new("sh").arg(&input).status().unwrap();
+}
+
+// ─── CFG: Guarded sink (should NOT produce cfg-unguarded-sink) ────────────────────
+
+pub fn safe_shell() {
+  let input = std::env::var("INPUT").unwrap();
+
+  // Sanitizer (Cap::SHELL_ESCAPE)
+  let safe = shell_escape::unix::escape(&input);
+
+  // Sink, but guarded by dominating sanitizer
+  Command::new("sh").arg(&safe).status().unwrap();
+}
+
+// ─── CFG: Unreachable sanitizer (should report unreachable sanitizer/guard) ───────
+
+pub fn unreachable_sanitizer() {
+  let input = std::env::var("INPUT").unwrap();
+
+  return;
+
+  // This block is unreachable; should produce an unreachable finding for sanitizer call.
+  let _safe = shell_escape::unix::escape(&input);
+}
+
+// ─── CFG: Resource misuse (malloc without free on some exit path) ─────────────────
+
+extern "C" {
+  fn malloc(size: usize) -> *mut u8;
+  fn free(ptr: *mut u8);
+}
+
+pub fn alloc_then_return_leak(flag: bool) {
+  unsafe {
+    let p = malloc(128);
+
+    // Early return leaks `p` on this path.
+    if flag {
+      return;
+    }
+
+    free(p);
+  }
+}
+
+// ─── Extra: HTML sink labeling sanity (optional) ──────────────────────────────────
+
+// `sink_html` is a test marker recognized as Sink(HTML_ESCAPE) by the label rules.
+// In real code this would be something like response.body(), template.render(), etc.
+fn sink_html(_s: &str) {}
+
+pub fn html_print() {
+  let raw = std::env::var("HTML").unwrap();
+  sink_html(&raw);
+}
+
+pub fn html_print_sanitized() {
+  let raw = std::env::var("HTML").unwrap();
+  let safe = html_escape::encode_safe(&raw);
+  sink_html(&safe);
+}