Added experimental control flow analysis and syntax classification for rust lang (#22)

* Introduce control flow graph (CFG) support: - Added `cfg.rs` with CFG generation and analysis utilities. - Integrated `petgraph` library for graph-based computations. - Updated `ast.rs` to utilize CFG for function analysis. - Modified `Cargo.toml` and `Cargo.lock` to include new dependencies. - Improved static analysis with taint tracking through CFG paths. * feat: enhance control flow analysis with taint tracking and node labeling * feat: improve control flow graph with enhanced node handling and new tests * Remove unnecessary reference marker in `byte_offset_to_point` comment. * Remove unnecessary reference marker in `byte_offset_to_point` comment. * Refactor `ast.rs` for performance and clarity; enhance `cfg.rs` with recursive CFG generation and improved classification logic for AST analysis. * Refactor CFG and taint tracking logic: - Enhanced `cfg.rs` with inline helper function `text_of` for cleaner UTF-8 handling in AST nodes. - Expanded `labels.rs` rules with detailed `Sources`, `Sanitizers`, and `Sinks` for improved classification. - Refined `push_node` to handle method call expressions with object-function pairing. - Simplified code handling in trivia skipping and debug-only logic. * Enhance `cfg.rs` with `first_call_ident` helper and improve identifier extraction logic in `push_node`. * Add targeted CFG taint-tracking tests to enhance analysis coverage. * Enhance CFG generation with loop expression handling and improve taint tracking logic. Add new sanitization example in `examples/sanitize/example.rs`. * Update README with installation instructions for Cargo and GitHub releases. * Expand taint-tracking with precise `def-use` computation and enhance `labels.rs` for detailed classification. Extend `examples/sanitize` with realistic scenarios demonstrating new rules. * Refactor `labels.rs`: - Removed redundant `LabelRule` entries for cleaner rule definitions. - Adjusted matching logic to prioritize suffix and prefix matches effectively. * Refactor `labels.rs`: - Removed redundant `LabelRule` entries for cleaner rule definitions. - Adjusted matching logic to prioritize suffix and prefix matches effectively. * Add test for taint tracking with multiple sources in `cfg.rs`. * Add `function_summaries` table and implement summary upsert/load methods. Refactor to handle summary storage and retrieval efficiently, with placeholder clean/drop logic. * refactor: split `labels.rs` into modular structure with language-specific files * refactor: split `labels.rs` into modular structure with language-specific files * refactor: clean up SQL table definitions in `database.rs` for better readability * refactor: simplify CFG structure by removing lifetime parameters and enhancing taint metadata handling * refactor: update TODO comments in `cfg.rs` to clarify future enhancements for cap labels and function details * refactor: remove redundant header from README.md for improved clarity * feat: add PHF-based syntax classifiers and Kind enum for efficient syntax mapping across languages * feat: introduce analysis modes for enhanced scanner configuration and diagnostics * feat: define Kind enum for syntax classification in control flow analysis * feat: bump version to 0.2.0-alpha and update CHANGELOG for new features and fixes * refactor: clean up imports and formatting in AST and CFG modules for improved readability * refactor: simplify function signatures and improve code readability in CFG and module files * fix: correct rayon_thread_stack_size comment to reflect actual value of 8 MiB * refactor: update string formatting in clean and project modules for consistency * refactor: fix indentation in clean.rs for improved readability --------- Co-authored-by: elipeter <eli.peter@es.fcm.travel>
2026-07-21 21:31:03 +02:00 · 2025-06-28 17:36:14 +02:00 · 2025-06-28 17:36:14 +02:00 · 3c21efba75
commit 3c21efba75
parent fd65360818
21 changed files with 1585 additions and 79 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,12 +5,18 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

-## [Unreleased]
+## [0.2.0-alpha] - 2025-06-28
+
+### Added
+- Experimental intra‑procedural CFG + taint analysis for Rust. Nyx now builds a control‑flow graph, applies data‑flow rules, and flags unsanitised Source → Sink paths (e.g. env::var → Command::new).
+- O(1) node‑kind lookup via per‑language PHF tables for zero‑cost dispatch.
+- Six unit tests covering conditionals, loops, sanitizers, and multiple sources.
+- Debug channel target=cfg (use RUST_LOG=nyx::cfg=debug) to inspect generated graphs.

 ### Fixed
 - Fixed a bug in the release pipeline where Windows was trying to call the zip, PowerShell doesn't have a zip command

-## [0.1.1] - 2025-06-25
+## [0.1.1-alpha] - 2025-06-25

 ### Fixed
 - Fixed a bug where the `scan --no-index` command would not respect the `max_results` config setting (#1)
@ -18,7 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - Integration tests covering indexing and scanning pipelines (#3, #4, #5, #8)

-## [0.1.0] - 2025-06-25
+## [0.1.0-alpha] - 2025-06-25

 ### Added
 - Initial alpha release of **Nyx** CLI tool
--- a/Cargo.lock
+++ b/Cargo.lock
@ -11,6 +11,12 @@ dependencies = [
 "memchr",
 ]

+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
 [[package]]
 name = "android-tzdata"
 version = "0.1.1"
@ -62,7 +68,7 @@ version = "1.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.59.0",
 ]

 [[package]]
@ -73,7 +79,7 @@ checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
 dependencies = [
 "anstyle",
 "once_cell_polyfill",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]

 [[package]]
@ -210,15 +216,15 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"

 [[package]]
 name = "console"
-version = "0.15.11"
+version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
+checksum = "2e09ced7ebbccb63b4c65413d821f2e00ce54c5ca4514ddc6b3c892fdbcbc69d"
 dependencies = [
 "encode_unicode",
 "libc",
 "once_cell",
 "unicode-width",
- "windows-sys",
+ "windows-sys 0.60.2",
 ]

 [[package]]
@ -308,7 +314,7 @@ dependencies = [
 "libc",
 "option-ext",
 "redox_users",
- "windows-sys",
+ "windows-sys 0.60.2",
 ]

 [[package]]
@ -336,7 +342,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
 dependencies = [
 "libc",
- "windows-sys",
+ "windows-sys 0.60.2",
 ]

 [[package]]
@ -357,6 +363,12 @@ version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"

+[[package]]
+name = "fixedbitset"
+version = "0.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
+
 [[package]]
 name = "foldhash"
 version = "0.1.5"
@ -405,6 +417,8 @@ version = "0.15.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
 dependencies = [
+ "allocator-api2",
+ "equivalent",
 "foldhash",
 ]

@ -608,8 +622,9 @@ dependencies = [

 [[package]]
 name = "nyx-scanner"
-version = "0.1.1"
+version = "0.2.0-alpha"
 dependencies = [
+ "bitflags",
 "blake3",
 "bytesize",
 "chrono",
@ -621,6 +636,8 @@ dependencies = [
 "ignore",
 "num_cpus",
 "once_cell",
+ "petgraph",
+ "phf",
 "r2d2",
 "r2d2_sqlite",
 "rayon",
@ -688,7 +705,62 @@ dependencies = [
 "libc",
 "redox_syscall",
 "smallvec",
- "windows-targets",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "petgraph"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca"
+dependencies = [
+ "fixedbitset",
+ "hashbrown",
+ "indexmap",
+ "serde",
+]
+
+[[package]]
+name = "phf"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
+dependencies = [
+ "phf_macros",
+ "phf_shared",
+ "serde",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2cbb1126afed61dd6368748dae63b1ee7dc480191c6262a3b4ff1e29d86a6c5b"
+dependencies = [
+ "fastrand",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d713258393a82f091ead52047ca779d37e5766226d009de21696c4e667044368"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981"
+dependencies = [
+ "siphasher",
 ]

 [[package]]
@ -901,7 +973,7 @@ dependencies = [
 "errno",
 "libc",
 "linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]

 [[package]]
@ -997,6 +1069,12 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"

+[[package]]
+name = "siphasher"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
+
 [[package]]
 name = "smallvec"
 version = "1.15.1"
@ -1036,7 +1114,7 @@ dependencies = [
 "getrandom 0.3.3",
 "once_cell",
 "rustix",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]

 [[package]]
@ -1482,7 +1560,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.59.0",
 ]

 [[package]]
@ -1556,7 +1634,16 @@ version = "0.59.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.2",
 ]

 [[package]]
@ -1565,14 +1652,30 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
+dependencies = [
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
 ]

 [[package]]
@ -1581,48 +1684,96 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"

+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"

+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"

+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
 [[package]]
 name = "windows_i686_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"

+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"

+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"

+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"

+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+
 [[package]]
 name = "winnow"
 version = "0.7.11"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "nyx-scanner"
-version = "0.1.1"
+version = "0.2.0-alpha"
 edition = "2024"
 description = "A CLI security scanner for automating vulnerability checks"
 license = "GPL-3.0"
@ -49,10 +49,13 @@ tree-sitter-ruby = "0.23.1"
 crossbeam-channel = "0.5.15"
 blake3 = "1.8.2"
 once_cell = "1.21.3"
-console = "0.15.11"
+console = "0.16.0"
 rayon = "1.10.0"
 r2d2 = "0.8.10"
 bytesize  = "2.0.1"
 chrono    = { version = "0.4.41", default-features = false, features = ["std", "clock"] }
 thiserror = "2.0.12"
 dashmap = "7.0.0-rc2"
+petgraph = "0.8.2"
+bitflags = "2.9.1"
+phf = { version = "0.12.1", features = ["macros"] }
--- a/README.md
+++ b/README.md
@ -1,13 +1,11 @@
 <div align="center">
  <img src="assets/logo.png" alt="nyx logo" width="300"/>

-# Nyx
-
 **Fast, cross-language cli vulnerability scanner.**

 [![crates.io](https://img.shields.io/crates/v/nyx-scanner.svg)](https://crates.io/crates/nyx-scanner)
 [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
-[![Rust](https://img.shields.io/badge/rust-1.70+-orange.svg)](https://www.rust-lang.org)
+[![Rust 1.85+](https://img.shields.io/badge/rust-1.85%2B-orange)](https://www.rust-lang.org)
 [![CI](https://img.shields.io/github/actions/workflow/status/ecpeter23/nyx/ci.yml?branch=master)](https://github.com/ecpeter23/nyx/actions)
 </div>

@ -18,7 +16,7 @@
 **Nyx** is a lightweight lightning-fast Rust‑native command‑line tool that detects potentially dangerous code patterns across several programming languages. It combines the accuracy of [`tree‑sitter`](https://tree-sitter.github.io/) parsing with a curated rule set and an optional SQLite‑backed index to deliver fast, repeatable scans on projects of any size.

 > **Project status – Alpha**   
-> Nyx is under active development. The public interface, rule set, and output formats may change without notice while we stabilize the core. Please pin exact versions in production environments.
+> Nyx is under active development. The public interface, rule set, and output formats may change without notice while we stabilise the core. The new CFG + taint engine is experimental and Rust-only for now – please report any crashes or false-positives. Pin exact versions in production environments

 ---

@ -50,17 +48,49 @@

 ## Installation

+### Install crate
+```bash
+$ cargo install nyx-scanner
+```
+
+### Install Github release
+1. Navigate to the [Releases](https://github.com/ecpeter23/nyx/releases) page of the repository.
+2. Download the appropriate binary for your system:
+
+    ```nyx-x86_64-unknown-linux-gnu.zip``` for Linux
+
+    ```nyx-x86_64-pc-windows-msvc.zip``` for Windows
+
+    ```nyx-x86_64-apple-darwin.zip``` or ```nyx-aarch64-apple-darwin.zip``` for macOS (Intel or Apple Silicon)
+
+3. Unzip the file and move the executable to a directory in your system PATH:
+    ```bash
+    # Example for Unix systems
+    unzip nyx-x86_64-unknown-linux-gnu.zip
+    chmod +x nyx
+    sudo mv nyx /usr/local/bin/
+    ```
+    ```bash
+    # Example for Windows in PowerShell
+    Expand-Archive -Path nyx-x86_64-pc-windows-msvc.zip -DestinationPath .
+    Move-Item -Path .\nyx.exe -Destination "C:\Program Files\Nyx\"  # Add to PATH manually if needed
+    ```
+   
+4. Verify the installation:
+     ```bash
+    nyx --version
+    ```
 ### Build from source

 ```bash
-$ git clone https://github.com/<your‑org>/nyx.git
+$ git clone https://github.com/ecpeter23/nyx.git
 $ cd nyx
 $ cargo build --release
 # optional – copy the binary into PATH
 $ cargo install --path .
 ```

-Nyx targets **stable Rust 1.78 or later**.
+Nyx targets **stable Rust 1.85 or later**.

 ---

@ -142,18 +172,29 @@ A fully documented `nyx.conf` is generated automatically on first run.

 ## Roadmap

-| Area                  | Planned Improvements                                                      |
-|-----------------------|---------------------------------------------------------------------------|
-| More language support | Plans to create rule sets for over 100 languages for maximum coverage     |
-| Control‑flow analysis | Generation of CFGs for deeper reasoning about execution paths             |
-| Taint tracking        | Intra‑ / inter‑procedural tracing of untrusted data from sources to sinks |
-| Output formats        | Full SARIF 2.1.0, JUnit XML, HTML report generator                        |
-| Rule updates          | Remote rule feed with signature verification                              |
+| Area                  | Planned Improvements                                                                                  |
+|-----------------------|-------------------------------------------------------------------------------------------------------|
+| More language support | Plans to create rule sets for over 100 languages for maximum coverage                                 |
+| Control‑flow analysis | Inter‑procedural function summaries. Cap label propagation & bit‑flag checks. Loop/branch sensitivity |
+| Taint tracking        | Intra‑ / inter‑procedural tracing of untrusted data from sources to sinks                             |
+| Output formats        | Full SARIF 2.1.0, JUnit XML, HTML report generator                                                    |
+| Rule updates          | Remote rule feed with signature verification                                                          |
+| Performance & UX      | Incremental CFG cache, progress‑bar UX, smart file‑watch re‑scan                                      |

 Community feedback will help shape priorities; please open an issue to discuss proposed changes.

 ---

+## Experimental Features & Feedback
+
+The new Rust intra‑procedural CFG + taint engine is not enabled.
+
+Expect rough edges: slightly slower scans, occasional false positives, limited language coverage.
+
+Please open an issue for every crash, panic, or suspicious result – attach the minimal code snippet and mention the Nyx version.
+
+---
+
 ## Contributing

 Pull requests are welcome. To contribute:
--- a/default-nyx.conf
+++ b/default-nyx.conf
@ -8,6 +8,10 @@

 [scanner]

+## If full uses both ast patterns and cfg taint analysis,
+##   Possible values: full | ast | cfg
+mode = "full"
+
 ## Minimum severity level to include in the report
 ##   Possible values: Low | Medium | High | Critical
 min_severity = "Low"
@ -96,6 +100,9 @@ batch_size = 100
 ## Channel capacity multiplier (capacity = threads × this)
 channel_multiplier = 4

+## Maximum stack size for Rayon threads (bytes)
+rayon_thread_stack_size = 8 * 1024 * 1024  # 8 MiB
+
 ## Timeout on individual files (seconds); null = none  (UNIMPLEMENTED)
 scan_timeout_secs = null

--- a/examples/sanatize/example.rs
+++ b/examples/sanatize/example.rs
@ -0,0 +1,96 @@
+//! demo.rs  —  realistic taint-tracking playground
+//! `cargo add html-escape shell-escape` before compiling.
+
+use std::{env, process::Command, fs};
+
+#[derive(Default)]
+struct UserCtx {
+    query: String,          // potentially tainted
+    sanitized: String,      // should remain clean
+}
+
+/// ----------   helper wrappers so we get nice Source / Sink labels   ----------
+fn source_env(var: &str) -> String {
+    env::var(var).unwrap_or_default()                          // Source(env-var)
+}
+
+fn source_file(path: &str) -> String {
+    fs::read_to_string(path).unwrap_or_default()               // Source(file-io)
+}
+
+fn sink_shell(arg: &str) {
+    Command::new("sh").arg(arg).status().unwrap();             // Sink(process-spawn)
+}
+
+fn sink_html(out: &str) {
+    println!("{out}");                                         // Sink(html-out)
+}
+
+fn sanitize_html(s: &str) -> String {
+    html_escape::encode_safe(s)                                // Sanitizer(html-escape)
+}
+
+fn sanitize_shell(s: &str) -> String {
+    shell_escape::unix::escape(s.into()).into_owned()          // Sanitizer(shell-escape)
+}
+
+/// ----------   1. Main demo fuction   ----------
+fn main() {
+    // FLOW A ────────────────────────────────────────────────────────────────
+    // env → sanitized → safe shell
+    let raw = source_env("USER_CMD");
+    let clean = sanitize_shell(&raw);
+    sink_shell(&clean);                       // EXPECT: SAFE
+
+    // FLOW B ────────────────────────────────────────────────────────────────
+    // env → if-else, only one branch escapes
+    let arg = source_env("ANOTHER");
+    if arg.len() > 5 {
+        sink_shell(&arg);                     // EXPECT: UNSAFE  (branch tainted)
+    } else {
+        let escaped = sanitize_shell(&arg);
+        sink_shell(&escaped);                 // safe
+    }
+
+    // FLOW C ────────────────────────────────────────────────────────────────
+    // file → while loop → HTML sanitizer cleared
+    let mut data = source_file("/tmp/input.txt");
+    while data.len() < 32 {
+        data.push('x');
+    }
+    let html_ok = sanitize_html(&data);
+    sink_html(&html_ok);                      // safe
+
+    // FLOW D ────────────────────────────────────────────────────────────────
+    // file → struct field → match → unsanitised HTML
+    let mut ctx = UserCtx::default();
+    ctx.query = source_file("/tmp/q.txt");
+    // overwrite the clean field; `ctx.sanitized` is *not* tainted
+    ctx.sanitized = sanitize_html("constant");
+    match ctx {
+        UserCtx { query, sanitized } if query.contains("DROP") => {
+            sink_html(&query);                // EXPECT: UNSAFE
+        }
+        _ => {
+            sink_html(&ctx.sanitized);        // safe
+        }
+    }
+
+    // FLOW E ────────────────────────────────────────────────────────────────
+    // source → function call → reassignment clears taint
+    let mut name = source_env("USER");        // tainted
+    greet(&name);                            // just prints
+    name = "anonymous".into();               // kills taint
+    greet(&name);                            // safe
+
+    // FLOW F ────────────────────────────────────────────────────────────────
+    // Multiple sanitizers, only the *right* one matters
+    let cmd = source_env("MIXED");
+    let partly = sanitize_html(&cmd);        // wrong sanitizer
+    sink_shell(&partly);                     // EXPECT: UNSAFE
+}
+
+/// helper (non-sink) function
+fn greet(who: &str) {
+    println!("Hello, {who}");
+}
--- a/examples/standard/test.rs
+++ b/examples/standard/test.rs
@ -0,0 +1,9 @@
+use std::{env, process::Command};
+fn main() {
+  let y = env::var("SAFE").unwrap();
+
+  let x = env::var("DANGEROUS").unwrap();
+  let clean = html_escape::encode_safe(&y);
+  Command::new("sh").arg(x).status().unwrap();
+  Command::new("sh").arg(clean).status().unwrap();
+}
--- a/src/ast.rs
+++ b/src/ast.rs
@ -1,5 +1,8 @@
+use crate::cfg::{analyse_function, build_cfg};
 use crate::commands::scan::Diag;
 use crate::errors::{NyxError, NyxResult};
+use crate::patterns::Severity;
+use crate::utils::config::AnalysisMode;
 use crate::utils::ext::lowercase_ext;
 use crate::utils::{Config, query_cache};
 use std::cell::RefCell;
@ -10,6 +13,16 @@ thread_local! {
    static PARSER: RefCell<tree_sitter::Parser> = RefCell::new(tree_sitter::Parser::new());
 }

+/// Convenience alias for node indices.
+fn byte_offset_to_point(tree: &tree_sitter::Tree, byte: usize) -> tree_sitter::Point {
+    // `descendant_for_byte_range` gives us *some* node that starts at `byte`,
+    // `start_position` turns that into rows & columns (both 0-based)
+    tree.root_node()
+        .descendant_for_byte_range(byte, byte)
+        .map(|n| n.start_position())
+        .unwrap_or_else(|| tree_sitter::Point { row: 0, column: 0 })
+}
+
 pub(crate) fn run_rules_on_file(path: &Path, cfg: &Config) -> NyxResult<Vec<Diag>> {
    tracing::debug!("Running rules on: {}", path.display());
    let bytes = std::fs::read(path)?;
@ -47,30 +60,58 @@ pub(crate) fn run_rules_on_file(path: &Path, cfg: &Config) -> NyxResult<Vec<Diag
            .ok_or_else(|| NyxError::Other("tree-sitter failed".into()))
    })?;

-    let root = _tree.root_node();
-
-    let compiled = query_cache::for_lang(lang_slug, ts_lang);
-    let mut cursor = QueryCursor::new();
    let mut out = Vec::new();

-    for cq in compiled.iter() {
-        if cfg.scanner.min_severity <= cq.meta.severity {
-            continue;
+    if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Taint {
+        tracing::debug!("Running taint analysis on: {}", path.display());
+        let (cfg_graph, entry) = build_cfg(&_tree, &bytes, lang_slug);
+
+        for p in analyse_function(&cfg_graph, entry) {
+            let src_byte = cfg_graph[p.first().copied().unwrap()].span.0;
+            let point = byte_offset_to_point(&_tree, src_byte);
+
+            out.push(Diag {
+                path: path.to_string_lossy().into_owned(),
+                line: point.row + 1,
+                col: point.column + 1,
+                severity: Severity::High,
+                id: "taint-unsanitised-flow".into(),
+            });
        }
-        let mut matches = cursor.matches(&cq.query, root, &*bytes);
-        while let Some(m) = matches.next() {
-            if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
-                let point = cap.node.start_position();
-                out.push(Diag {
-                    path: path.to_string_lossy().into_owned(),
-                    line: point.row + 1,
-                    col: point.column + 1,
-                    severity: cq.meta.severity,
-                    id: cq.meta.id.to_owned(),
-                });
+    }
+
+    if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Ast {
+        let root = _tree.root_node();
+
+        let compiled = query_cache::for_lang(lang_slug, ts_lang);
+        let mut cursor = QueryCursor::new();
+
+        for cq in compiled.iter() {
+            if cfg.scanner.min_severity <= cq.meta.severity {
+                continue;
+            }
+            let mut matches = cursor.matches(&cq.query, root, &*bytes);
+            while let Some(m) = matches.next() {
+                if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
+                    let point = cap.node.start_position();
+                    out.push(Diag {
+                        path: path.to_string_lossy().into_owned(),
+                        line: point.row + 1,
+                        col: point.column + 1,
+                        severity: cq.meta.severity,
+                        id: cq.meta.id.to_owned(),
+                    });
+                }
            }
        }
    }
+
+    // Check to ensure no duplicates (DOUBLE-CHECK EFFICIENCY)
+    out.sort_by(|a, b| (a.line, a.col, &a.id, a.severity).cmp(&(b.line, b.col, &b.id, b.severity)));
+    out.dedup_by(|a, b| {
+        a.line == b.line && a.col == b.col && a.id == b.id && a.severity == b.severity
+    });
+
    Ok(out)
 }

--- a/src/cfg.rs
+++ b/src/cfg.rs
@ -0,0 +1,829 @@
+use petgraph::algo::dominators::{Dominators, simple_fast};
+use petgraph::prelude::*;
+use tracing::debug;
+use tree_sitter::{Node, Tree};
+
+use crate::labels::{DataLabel, Kind, classify, lookup};
+use std::collections::HashSet;
+use std::hash::{DefaultHasher, Hash, Hasher};
+
+// WHAT WE STILL NEED TO DO:
+// todo: add the cap labels and remove the bit flags after each sanitizer, checking the bit flags with the sink
+//
+//
+// 1.
+// We need to analyze the CFG and add function details to the nodes.
+// And upload each functions status to a cache with the specific status of the function, for example what source it has, what sink it has, what sanitizer it has, and what taint it has.
+//
+// 2.
+// For each taint from a function we will see if it gets tainted in a function if not, we will add it to a list of potentially tainted functions
+// then, after we analyze all the functions, we will see if any of the potentially tainted functions are actually tainted
+//
+// 3.
+
+/// -------------------------------------------------------------------------
+///  Public AST‑to‑CFG data structures
+/// -------------------------------------------------------------------------
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum StmtKind {
+    Entry,
+    Exit,
+    Seq,
+    If,
+    Loop,
+    Break,
+    Continue,
+    Return,
+    Call,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub enum EdgeKind {
+    Seq,   // ordinary fall‑through
+    True,  // `cond == true` branch
+    False, // `cond == false` branch
+    Back,  // back‑edge that closes a loop
+}
+
+#[derive(Debug, Clone)]
+pub struct NodeInfo {
+    pub kind: StmtKind,
+    pub span: (usize, usize),     // byte offsets in the original file
+    pub label: Option<DataLabel>, // taint classification if any
+    pub defines: Option<String>,  // variable written by this stmt
+    pub uses: Vec<String>,        // variables read
+}
+
+pub type Cfg = Graph<NodeInfo, EdgeKind>;
+
+// -------------------------------------------------------------------------
+//                      Utility helpers
+// -------------------------------------------------------------------------
+
+/// Return the text of a node.
+#[inline]
+fn text_of<'a>(n: Node<'a>, code: &'a [u8]) -> Option<String> {
+    std::str::from_utf8(&code[n.start_byte()..n.end_byte()])
+        .ok()
+        .map(|s| s.to_string())
+}
+
+/// Return the callee identifier for the first call / method / macro inside `n`.
+fn first_call_ident<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option<String> {
+    let mut cursor = n.walk();
+    for c in n.children(&mut cursor) {
+        match lookup(lang, c.kind()) {
+            Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
+                // Re-use the same logic we have in `push_node`
+                return match lookup(lang, c.kind()) {
+                    Kind::CallFn => c
+                        .child_by_field_name("function")
+                        .and_then(|f| text_of(f, code)),
+                    Kind::CallMethod => {
+                        let func = c
+                            .child_by_field_name("method")
+                            .or_else(|| c.child_by_field_name("name"))
+                            .and_then(|f| text_of(f, code));
+                        let recv = c
+                            .child_by_field_name("object")
+                            .and_then(|f| text_of(f, code));
+                        match (recv, func) {
+                            (Some(r), Some(f)) => Some(format!("{r}::{f}")),
+                            (_, Some(f)) => Some(f.to_string()),
+                            _ => None,
+                        }
+                    }
+                    Kind::CallMacro => c
+                        .child_by_field_name("macro")
+                        .and_then(|f| text_of(f, code)),
+                    _ => None,
+                };
+            }
+            _ => {}
+        }
+    }
+    None
+}
+
+/// Create a node in one short borrow and optionally attach a taint label.
+fn push_node<'a>(
+    g: &mut Cfg,
+    kind: StmtKind,
+    ast: Node<'a>,
+    lang: &str,
+    code: &'a [u8],
+) -> NodeIndex {
+    /* ── 1.  IDENTIFIER EXTRACTION ─────────────────────────────────────── */
+
+    // Primary guess (varies by AST kind)
+    let mut text = match lookup(lang, ast.kind()) {
+        // plain `foo(bar)` style call
+        Kind::CallFn => ast
+            .child_by_field_name("function")
+            .and_then(|n| text_of(n, code))
+            .unwrap_or_default(),
+
+        // method / UFCS call  `recv.method()`  or  `Type::func()`
+        Kind::CallMethod => {
+            let func = ast
+                .child_by_field_name("method")
+                .or_else(|| ast.child_by_field_name("name"))
+                .and_then(|n| text_of(n, code));
+            let recv = ast
+                .child_by_field_name("object")
+                .and_then(|n| text_of(n, code));
+            match (recv, func) {
+                (Some(r), Some(f)) => format!("{r}::{f}"),
+                (_, Some(f)) => f,
+                _ => String::new(),
+            }
+        }
+
+        // `my_macro!(…)`
+        Kind::CallMacro => ast
+            .child_by_field_name("macro")
+            .and_then(|n| text_of(n, code))
+            .unwrap_or_default(),
+
+        // everything else – fallback to raw slice
+        _ => text_of(ast, code).unwrap_or_default(),
+    };
+
+    // If this is a `let` or `expression_statement` that *contains* a call,
+    // prefer the first inner call identifier instead of the whole line.
+    if matches!(lookup(lang, ast.kind()), Kind::CallWrapper) {
+        if let Some(inner) = first_call_ident(ast, lang, code) {
+            text = inner;
+        }
+    }
+
+    /* ── 2.  LABEL LOOK-UP  ───────────────────────────────────────────── */
+
+    let label = classify(lang, &text);
+    let span = (ast.start_byte(), ast.end_byte());
+
+    /* ── 3.  GRAPH INSERTION + DEBUG ──────────────────────────────────── */
+
+    let (defines, uses) = def_use(ast, code);
+
+    let idx = g.add_node(NodeInfo {
+        kind,
+        span,
+        label,
+        defines,
+        uses,
+    });
+
+    debug!(
+        target: "cfg",
+        "node {} ← {:?} txt=`{}` span={:?} label={:?}",
+        idx.index(),
+        kind,
+        text,
+        span,
+        label
+    );
+    idx
+}
+
+/// Add the same edge (of the same kind) from every node in `froms` to `to`.
+#[inline]
+fn connect_all(g: &mut Cfg, froms: &[NodeIndex], to: NodeIndex, kind: EdgeKind) {
+    for &f in froms {
+        debug!(target: "cfg", "edge {} → {} ({:?})", f.index(), to.index(), kind);
+        g.add_edge(f, to, kind);
+    }
+}
+
+// -------------------------------------------------------------------------
+//    The recursive *work‑horse* that converts an AST node into a CFG slice.
+//    Returns the set of *exit* nodes that need to be wired further.
+// -------------------------------------------------------------------------
+fn build_sub<'a>(
+    ast: Node<'a>,
+    preds: &[NodeIndex], // predecessor frontier
+    g: &mut Cfg,
+    lang: &str,
+    code: &'a [u8],
+) -> Vec<NodeIndex> {
+    match lookup(lang, ast.kind()) {
+        // ─────────────────────────────────────────────────────────────────
+        //  IF‑/ELSE: two branches that re‑merge afterwards
+        // ─────────────────────────────────────────────────────────────────
+        Kind::If => {
+            // Condition node
+            let cond = push_node(g, StmtKind::If, ast, lang, code);
+            connect_all(g, preds, cond, EdgeKind::Seq);
+
+            // Locate then & else blocks
+            let (then_block, else_block) = {
+                let mut cursor = ast.walk();
+                let blocks: Vec<_> = ast
+                    .children(&mut cursor)
+                    .filter(|n| n.kind() == "block")
+                    .collect();
+                (blocks.first().copied(), blocks.get(1).copied())
+            };
+
+            // THEN branch
+            let then_exits = if let Some(b) = then_block {
+                let exits = build_sub(b, &[cond], g, lang, code);
+                // True edges leave the condition
+                if let Some(&first) = exits.first() {
+                    connect_all(g, &[cond], first, EdgeKind::True);
+                }
+                exits
+            } else {
+                vec![cond]
+            };
+
+            // ELSE branch
+            let else_exits = if let Some(b) = else_block {
+                let exits = build_sub(b, &[cond], g, lang, code);
+                if let Some(&first) = exits.first() {
+                    connect_all(g, &[cond], first, EdgeKind::False);
+                }
+                exits
+            } else {
+                // No explicit else → non-taken branch flows to the *then* exits
+                if let Some(&first) = then_exits.first() {
+                    connect_all(g, &[cond], first, EdgeKind::False);
+                }
+                then_exits.clone()
+            };
+
+            // Frontier = union of both branches
+            then_exits.into_iter().chain(else_exits).collect()
+        }
+
+        Kind::InfiniteLoop => {
+            // Synthetic header node
+            let header = push_node(g, StmtKind::Loop, ast, lang, code);
+            connect_all(g, preds, header, EdgeKind::Seq);
+
+            // The body is the single `block` child
+            let body = ast.child_by_field_name("body").expect("loop without body");
+            let body_exits = build_sub(body, &[header], g, lang, code);
+
+            // Back-edge from every linear exit to header
+            for &e in &body_exits {
+                connect_all(g, &[e], header, EdgeKind::Back);
+            }
+            // `loop` may break → those exits are frontiers too
+            body_exits.into_iter().chain([header]).collect()
+        }
+
+        // ─────────────────────────────────────────────────────────────────
+        //  WHILE / FOR: classic loop with a back edge.
+        // ─────────────────────────────────────────────────────────────────
+        Kind::While | Kind::For => {
+            let header = push_node(g, StmtKind::Loop, ast, lang, code);
+            connect_all(g, preds, header, EdgeKind::Seq);
+
+            // Body = first (and usually only) block child.
+            let body = ast
+                .child_by_field_name("body")
+                .or_else(|| {
+                    let mut c = ast.walk();
+                    ast.children(&mut c).find(|n| n.kind() == "block")
+                })
+                .expect("loop without body");
+
+            let body_exits = build_sub(body, &[header], g, lang, code);
+
+            // Back‑edge for every linear exit → header.
+            for &e in &body_exits {
+                connect_all(g, &[e], header, EdgeKind::Back);
+            }
+            // Falling out of the loop = header’s false branch.
+            vec![header]
+        }
+
+        // ─────────────────────────────────────────────────────────────────
+        //  Control-flow sinks (return / break / continue).
+        // ─────────────────────────────────────────────────────────────────
+        Kind::Return => {
+            let ret = push_node(g, StmtKind::Return, ast, lang, code);
+            connect_all(g, preds, ret, EdgeKind::Seq);
+            Vec::new() // terminates this path
+        }
+        Kind::Break => {
+            let brk = push_node(g, StmtKind::Break, ast, lang, code);
+            connect_all(g, preds, brk, EdgeKind::Seq);
+            Vec::new()
+        }
+        Kind::Continue => {
+            let cont = push_node(g, StmtKind::Continue, ast, lang, code);
+            connect_all(g, preds, cont, EdgeKind::Seq);
+            Vec::new()
+        }
+
+        // ─────────────────────────────────────────────────────────────────
+        //  BLOCK: statements execute sequentially
+        // ─────────────────────────────────────────────────────────────────
+        Kind::SourceFile | Kind::Block => {
+            let mut cursor = ast.walk();
+            let mut frontier = preds.to_vec();
+            for child in ast.children(&mut cursor) {
+                frontier = build_sub(child, &frontier, g, lang, code);
+            }
+            frontier
+        }
+
+        // Function item – create a header and dive into its body
+        Kind::Function => {
+            let header = push_node(g, StmtKind::Seq, ast, lang, code);
+            connect_all(g, preds, header, EdgeKind::Seq);
+
+            if let Some(body) = ast.child_by_field_name("body") {
+                build_sub(body, &[header], g, lang, code)
+            } else {
+                vec![header] // declaration w/o body
+            }
+        }
+
+        // Statements that **may** contain a call ---------------------------------
+        Kind::CallWrapper => {
+            let mut cursor = ast.walk();
+
+            if let Some(inner) = ast.children(&mut cursor).find(|c| {
+                matches!(
+                    lookup(lang, c.kind()),
+                    Kind::InfiniteLoop | Kind::While | Kind::For | Kind::If
+                )
+            }) {
+                return build_sub(inner, preds, g, lang, code);
+            }
+
+            let has_call = ast.children(&mut cursor).any(|c| {
+                matches!(
+                    lookup(lang, c.kind()),
+                    Kind::CallFn | Kind::CallMethod | Kind::CallMacro
+                )
+            });
+
+            let kind = if has_call {
+                StmtKind::Call
+            } else {
+                StmtKind::Seq
+            };
+            let node = push_node(g, kind, ast, lang, code);
+            connect_all(g, preds, node, EdgeKind::Seq);
+            vec![node]
+        }
+
+        // Trivia we drop completely ---------------------------------------------
+        // "line_comment" | "block_comment"
+        // | ";" | "," | "(" | ")" | "{" | "}" | "\n"
+        // | "use_declaration"
+        // | "attribute_item"
+        // | "mod_item" | "type_item"
+        Kind::Trivia => preds.to_vec(),
+
+        // ─────────────────────────────────────────────────────────────────
+        //  Every other node = simple sequential statement
+        // ─────────────────────────────────────────────────────────────────
+        _ => {
+            let n = push_node(g, StmtKind::Seq, ast, lang, code);
+            connect_all(g, preds, n, EdgeKind::Seq);
+            vec![n]
+        }
+    }
+}
+
+// -------------------------------------------------------------------------
+//  === PUBLIC ENTRY POINT =================================================
+// -------------------------------------------------------------------------
+
+/// Build an intraprocedural CFG and return (graph, entry_node).
+///
+/// * Walks the Tree‑Sitter AST.
+/// * Creates `StmtKind::*` nodes only for *statement‑level* constructs to keep
+///   the graph compact.
+/// * Wires a synthetic `Entry` node in front and a synthetic `Exit` node after
+///   all real sinks.
+pub(crate) fn build_cfg<'a>(tree: &'a Tree, code: &'a [u8], lang: &str) -> (Cfg, NodeIndex) {
+    debug!(target: "cfg", "Building CFG for {:?}", tree.root_node());
+
+    let mut g: Cfg = Graph::with_capacity(128, 256);
+    let entry = g.add_node(NodeInfo {
+        kind: StmtKind::Entry,
+        span: (0, 0),
+        label: None,
+        defines: None,
+        uses: Vec::new(),
+    });
+    let exit = g.add_node(NodeInfo {
+        kind: StmtKind::Exit,
+        span: (code.len(), code.len()),
+        label: None,
+        defines: None,
+        uses: Vec::new(),
+    });
+
+    // Build the body below the synthetic ENTRY.
+    let exits = build_sub(tree.root_node(), &[entry], &mut g, lang, code);
+
+    // Wire every real exit to our synthetic EXIT node.
+    for e in exits {
+        connect_all(&mut g, &[e], exit, EdgeKind::Seq);
+    }
+
+    debug!(target: "cfg", "CFG DONE — nodes: {}, edges: {}", g.node_count(), g.edge_count());
+
+    if cfg!(debug_assertions) {
+        // List every node
+        for idx in g.node_indices() {
+            debug!(target: "cfg", "  node {:>3}: {:?}", idx.index(), g[idx]);
+        }
+        // List every edge
+        for e in g.edge_references() {
+            debug!(
+                target: "cfg",
+                "  edge {:>3} → {:<3} ({:?})",
+                e.source().index(),
+                e.target().index(),
+                e.weight()
+            );
+        }
+
+        // Reachability check
+        let mut reachable: HashSet<NodeIndex> = Default::default();
+        let mut bfs = Bfs::new(&g, entry);
+        while let Some(nx) = bfs.next(&g) {
+            reachable.insert(nx);
+        }
+        debug!(
+            target: "cfg",
+            "reachable nodes: {}/{}",
+            reachable.len(),
+            g.node_count()
+        );
+        if reachable.len() != g.node_count() {
+            let unreachable: Vec<_> = g
+                .node_indices()
+                .filter(|i| !reachable.contains(i))
+                .collect();
+            debug!(target: "cfg", "‼︎ unreachable nodes: {:?}", unreachable);
+        }
+
+        // (Optional) Dominator tree sanity check
+        let doms: Dominators<_> = simple_fast(&g, entry);
+        debug!(target: "cfg", "dominator tree computed (len = {:?})", doms);
+    }
+
+    (g, entry)
+}
+
+/* ---------- TAINT-ANALYSIS PASSES ---------- */
+/// Recursively collect every identifier that occurs inside `n`.
+fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
+    if n.kind() == "identifier" {
+        if let Some(txt) = text_of(n, code) {
+            out.push(txt);
+        }
+    } else {
+        let mut c = n.walk();
+        for ch in n.children(&mut c) {
+            collect_idents(ch, code, out);
+        }
+    }
+}
+
+/// Return `(defines, uses)` for the AST fragment `ast`.
+fn def_use(ast: Node, code: &[u8]) -> (Option<String>, Vec<String>) {
+    match ast.kind() {
+        // `let <pat> = <val>;`
+        "let_declaration" => {
+            let mut defs = None;
+            let mut uses = Vec::new();
+
+            if let Some(pat) = ast.child_by_field_name("pattern") {
+                // first identifier inside the pattern = variable name
+                let mut tmp = Vec::<String>::new();
+                collect_idents(pat, code, &mut tmp);
+                defs = tmp.into_iter().next();
+            }
+            if let Some(val) = ast.child_by_field_name("value") {
+                collect_idents(val, code, &mut uses);
+            }
+            (defs, uses)
+        }
+
+        // Plain assignment `x = y + z`
+        "assignment_expression" => {
+            let mut defs = None;
+            let mut uses = Vec::new();
+            if let Some(lhs) = ast.child_by_field_name("left") {
+                let mut tmp = Vec::<String>::new();
+                collect_idents(lhs, code, &mut tmp);
+                defs = tmp.pop();
+            }
+            if let Some(rhs) = ast.child_by_field_name("right") {
+                collect_idents(rhs, code, &mut uses);
+            }
+            (defs, uses)
+        }
+
+        // everything else – no definition, but may read vars
+        _ => {
+            let mut uses = Vec::new();
+            collect_idents(ast, code, &mut uses);
+            (None, uses)
+        }
+    }
+}
+
+fn set_hash(s: &HashSet<String>) -> u64 {
+    let mut v: Vec<_> = s.iter().collect();
+    v.sort(); // deterministic
+    let mut h = DefaultHasher::new();
+    v.hash(&mut h);
+    h.finish()
+}
+
+fn apply_taint(node: &NodeInfo, taint: &HashSet<String>) -> HashSet<String> {
+    let mut out = taint.clone();
+
+    match node.label {
+        // A new untrusted value enters the program
+        Some(DataLabel::Source(_)) => {
+            if let Some(d) = &node.defines {
+                out.insert(d.clone());
+            }
+        }
+        // Anything written by a sanitizer becomes clean – whatever its
+        // arguments were is irrelevant here.
+        Some(DataLabel::Sanitizer(_)) => {
+            if let Some(d) = &node.defines {
+                out.remove(d);
+            }
+        }
+
+        // A function call *returning* tainted/clean data ----------------------
+        // (`let v = source_*()` or `let v = sanitize_*(x)`)
+        _ if node.kind == StmtKind::Call => {
+            if let Some(d) = &node.defines {
+                match node.label {
+                    Some(DataLabel::Source(_)) => {
+                        out.insert(d.clone());
+                    } // gen
+                    Some(DataLabel::Sanitizer(_)) => {
+                        out.remove(d);
+                    } // kill
+                    _ => { /* normal flow handled below */ }
+                }
+            }
+        }
+
+        // All other statements: classic gen/kill for assignments
+        _ => {
+            if let Some(d) = &node.defines {
+                let rhs_tainted = node.uses.iter().any(|u| out.contains(u));
+                if rhs_tainted {
+                    out.insert(d.clone());
+                } else {
+                    out.remove(d);
+                }
+            }
+        }
+    }
+
+    out
+}
+
+pub fn analyse_function(cfg: &Cfg, entry: NodeIndex) -> Vec<Vec<NodeIndex>> {
+    use std::collections::{HashMap, HashSet, VecDeque};
+
+    /// Queue item: current CFG node + taint map that holds here
+    #[derive(Clone)]
+    struct Item {
+        node: NodeIndex,
+        taint: HashSet<String>,
+    }
+
+    // (node, taint_hash)  →  predecessor key   (for path rebuild)
+    type Key = (NodeIndex, u64);
+    let mut pred: HashMap<Key, Key> = HashMap::new();
+
+    // Seen states so we do not revisit them infinitely
+    let mut seen: HashSet<Key> = HashSet::new();
+
+    // Resulting Source→Sink paths
+    let mut findings: Vec<Vec<NodeIndex>> = Vec::new();
+
+    let mut q = VecDeque::new();
+    q.push_back(Item {
+        node: entry,
+        taint: HashSet::new(),
+    });
+    seen.insert((entry, 0));
+
+    while let Some(Item { node, taint }) = q.pop_front() {
+        let updated = apply_taint(&cfg[node], &taint); // step effect
+
+        /* ----------     SINK CHECK     ---------- */
+        if let Some(DataLabel::Sink(_)) = cfg[node].label {
+            if cfg[node].uses.iter().any(|u| updated.contains(u)) {
+                // reconstruct path back to *any* Source
+                let mut p: Vec<NodeIndex> = vec![node];
+                let mut k = (node, set_hash(&taint)); // predecessor key
+
+                while let Some(&(prev, _)) = pred.get(&k) {
+                    p.push(prev);
+                    if matches!(cfg[prev].label, Some(DataLabel::Source(_))) {
+                        break;
+                    }
+                    // climb further
+                    let prev_hash = pred.get(&k).map(|(_, h)| *h).unwrap_or(0);
+                    k = (prev, prev_hash);
+                }
+                p.reverse();
+                findings.push(p);
+            }
+        }
+
+        /* ----------   BFS successor step   ---------- */
+        for succ in cfg.neighbors(node) {
+            let key = (succ, set_hash(&updated));
+            if !seen.contains(&key) {
+                seen.insert(key);
+                pred.insert(key, (node, set_hash(&taint)));
+                q.push_back(Item {
+                    node: succ,
+                    taint: updated.clone(),
+                });
+            }
+        }
+    }
+
+    findings
+}
+
+#[test]
+fn env_to_arg_is_flagged() {
+    use tree_sitter::Language;
+    let src = br#"
+        use std::env; use std::process::Command;
+        fn main() {
+            let x = env::var("DANGEROUS_ARG").unwrap();
+            Command::new("sh").arg(x).status().unwrap();
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry) = build_cfg(&tree, src, "rust");
+    let findings = analyse_function(&cfg, entry);
+
+    assert_eq!(findings.len(), 1); // exactly one unsanitised Source→Sink
+}
+
+#[test]
+fn taint_through_if_else() {
+    use tree_sitter::Language;
+    let src = br#"
+        use std::env; use std::process::Command;
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let safe = html_escape::encode_safe(&x);
+
+            if x.len() > 5 {
+                Command::new("sh").arg(&x).status().unwrap();   // UNSAFE
+            } else {
+                Command::new("sh").arg(&safe).status().unwrap(); // SAFE
+            }
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry) = build_cfg(&tree, src, "rust");
+    let findings = analyse_function(&cfg, entry);
+
+    // exactly one path (via the True branch) should be flagged
+    assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn taint_through_while_loop() {
+    use tree_sitter::Language;
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let mut x = env::var("DANGEROUS").unwrap();
+            while x.len() < 100 {                       // Loop header (Loop)
+                x.push_str("a");
+            }
+            Command::new("sh").arg(x).status().unwrap(); // Should be flagged
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry) = build_cfg(&tree, src, "rust");
+    let findings = analyse_function(&cfg, entry);
+    assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn taint_killed_by_sanitizer() {
+    use tree_sitter::Language;
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let clean = html_escape::encode_safe(&x);    // sanitizer node
+            Command::new("sh").arg(clean).status().unwrap();  // SAFE
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry) = build_cfg(&tree, src, "rust");
+    let findings = analyse_function(&cfg, entry);
+    assert!(findings.is_empty());
+}
+
+#[test]
+fn taint_breaks_out_of_loop() {
+    use tree_sitter::Language;
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            loop {
+                let x = env::var("DANGEROUS").unwrap();
+                Command::new("sh").arg(&x).status().unwrap(); // vulnerable
+                break;
+            }
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry) = build_cfg(&tree, src, "rust");
+    let findings = analyse_function(&cfg, entry);
+    assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn test_two_sources() {
+    use tree_sitter::Language;
+    let src = br#"
+        use std::{env, process::Command};
+        fn main() {
+            let x = env::var("DANGEROUS").unwrap();
+            let y = env::var("SAFE").unwrap();
+            let clean = html_escape::encode_safe(&y);
+            Command::new("sh").arg(x).status().unwrap();
+            Command::new("sh").arg(clean).status().unwrap();
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry) = build_cfg(&tree, src, "rust");
+    let findings = analyse_function(&cfg, entry);
+    assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn test_should_not_panic_on_empty_function() {
+    use tree_sitter::Language;
+    let src = br#"
+        use std::{env, process::Command};
+        fn f() {
+            if cond() {
+                return;
+            }
+            do_something();
+        }"#;
+
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+        .unwrap();
+    let tree = parser.parse(src as &[u8], None).unwrap();
+
+    let (cfg, entry) = build_cfg(&tree, src, "rust");
+    let findings = analyse_function(&cfg, entry);
+    assert!(findings.is_empty());
+}
--- a/src/cli.rs
+++ b/src/cli.rs
@ -32,6 +32,15 @@ pub enum Commands {
        /// Show only high severity issues
        #[arg(long)]
        high_only: bool,
+
+        #[arg(long)]
+        ast_only: bool,
+
+        #[arg(long)]
+        cfg_only: bool,
+
+        #[arg(long)]
+        all_targets: bool,
    },

    /// Manage project indexes
--- a/src/commands/clean.rs
+++ b/src/commands/clean.rs
@ -12,7 +12,7 @@ pub fn handle(project: Option<String>, all: bool, config_dir: &std::path::Path)
        }
        println!("{}", style("✔ All indexes cleaned").green().bold());
    } else if let Some(proj_name) = project {
-        let db_path = config_dir.join(format!("{}.sqlite", proj_name));
+        let db_path = config_dir.join(format!("{proj_name}.sqlite"));
        if db_path.exists() {
            fs::remove_file(&db_path)?;
            println!(
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@ -6,7 +6,7 @@ pub mod scan;
 use crate::cli::Commands;
 use crate::errors::NyxResult;
 use crate::patterns::Severity;
-use crate::utils::config::Config;
+use crate::utils::config::{AnalysisMode, Config};
 use std::path::Path;

 pub fn handle_command(
@ -21,11 +21,26 @@ pub fn handle_command(
            rebuild_index,
            format,
            high_only,
+            ast_only,
+            cfg_only,
+            all_targets,
        } => {
            if high_only {
                config.scanner.min_severity = Severity::High
            };

+            if ast_only {
+                config.scanner.mode = AnalysisMode::Ast
+            };
+
+            if cfg_only {
+                config.scanner.mode = AnalysisMode::Taint
+            };
+
+            if all_targets {
+                config.scanner.mode = AnalysisMode::Full
+            };
+
            scan::handle(&path, no_index, rebuild_index, format, database_dir, config)
        }
        Commands::Index { action } => index::handle(action, database_dir, config),
--- a/src/commands/scan.rs
+++ b/src/commands/scan.rs
@ -68,7 +68,7 @@ pub fn handle(
            println!("{}", style(path).blue().underlined());
            for d in issues {
                println!(
-                    "  {:>4}:{:<4}  [{}]  {}",
+                    "  {:>4}:{:<4}  [{:}]  {:}",
                    d.line,
                    d.col,
                    d.severity,
@ -145,6 +145,17 @@ pub fn scan_with_index_parallel(
            } else {
                idx.get_issues_from_file(&path).unwrap_or_default()
            };
+
+            match cfg.scanner.mode {
+                crate::utils::config::AnalysisMode::Ast => {
+                    diags.retain(|d| !d.id.starts_with("taint"));
+                }
+                crate::utils::config::AnalysisMode::Taint => {
+                    diags.retain(|d| d.id.starts_with("taint"));
+                }
+                crate::utils::config::AnalysisMode::Full => {}
+            }
+
            if !diags.is_empty() {
                diag_map
                    .entry(path.to_string_lossy().to_string())
--- a/src/database.rs
+++ b/src/database.rs
@ -16,28 +16,35 @@ pub mod index {
    const SCHEMA: &str = r#"
        PRAGMA foreign_keys = ON;

-        CREATE TABLE IF NOT EXISTS files (
-            id         INTEGER PRIMARY KEY AUTOINCREMENT,
-            project    TEXT    NOT NULL,
-            path       TEXT    NOT NULL,
-            hash       BLOB    NOT NULL,
-            mtime      INTEGER NOT NULL,
+        CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT,
+            project TEXT NOT NULL,
+            path TEXT NOT NULL,
+            hash BLOB NOT NULL,
+            mtime INTEGER NOT NULL,
            scanned_at INTEGER NOT NULL,
            UNIQUE(project, path)
        );

-        CREATE TABLE IF NOT EXISTS issues (
-            file_id    INTEGER NOT NULL
+        CREATE TABLE IF NOT EXISTS issues (file_id INTEGER NOT NULL
                              REFERENCES files(id)
                              ON DELETE CASCADE,
-            rule_id    TEXT    NOT NULL,
-            severity   TEXT    NOT NULL,
-            line       INTEGER NOT NULL,
-            col        INTEGER NOT NULL,
-            PRIMARY KEY (file_id, rule_id, line, col)
-        );
+            rule_id TEXT NOT NULL,
+            severity TEXT NOT NULL,
+            line INTEGER NOT NULL,
+            col INTEGER NOT NULL,
+            PRIMARY KEY (file_id, rule_id, line, col));
+
+        CREATE TABLE IF NOT EXISTS function_summaries (hash TEXT PRIMARY KEY,
+            project TEXT NOT NULL,
+            name TEXT NOT NULL,
+            lang TEXT NOT NULL,
+            summary TEXT NOT NULL,
+            updated_at INTEGER NOT NULL);
    "#;

+    // TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
+    // TODO: ADD DROP AND GIVE A CLI PARAMETER FOR DROP
+
    /// A single issue row, ready for insertion.
    #[derive(Debug, Clone)]
    pub struct IssueRow<'a> {
@ -189,6 +196,50 @@ pub mod index {
            Ok(issue_iter.filter_map(Result::ok).collect())
        }

+        // pub fn upsert_summary(
+        //     &mut self,
+        //     project: &str,
+        //     path: &Path,
+        //     hash: &str,
+        //     s: &crate::summary::FuncSummary,
+        // ) -> NyxResult<()> {
+        //     let conn = self.c();
+        //     let now  = chrono::Utc::now().timestamp_millis(); // i64
+        //
+        //     conn.execute(
+        //         "INSERT INTO function_summaries (hash, project, name, lang, summary, updated_at)
+        //              VALUES (?1, ?2, ?3, ?4, ?5, ?6)
+        //              ON CONFLICT(hash) DO UPDATE SET summary = excluded.summary,
+        //                                              updated_at = excluded.updated_at",
+        //         (
+        //             hash,
+        //             project,
+        //             &s.name,
+        //             path.extension().and_then(|e| e.to_str()).unwrap_or_default(),
+        //             serde_json::to_string(s).unwrap(), //TODO REPLACE UNWRAP
+        //             now,
+        //         ),
+        //     )?;
+        //     Ok(())
+        // }
+        //
+        // pub fn load_all_summaries(&self, project: &str) -> NyxResult<Vec<crate::summary::FuncSummary<'static>>> {
+        //     let mut stmt = self
+        //         .c()
+        //         .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
+        //
+        //     let iter = stmt.query_map([project], |row| {
+        //         let json: String = row.get(0)?;
+        //         Ok(serde_json::from_str::<crate::summary::FuncSummary>(json.as_str()).unwrap()) // TODO: REPLACE UNWRAP
+        //     })?;
+        //
+        //     Ok(iter
+        //         .collect::<Result<Vec<_>, _>>()?
+        //         .into_iter()
+        //         .map(|s| unsafe { std::mem::transmute::<_, crate::summary::FuncSummary<'static>>(s) })
+        //         .collect())
+        // }
+
        /// gets files from the database
        pub fn get_files(&self, project: &str) -> NyxResult<Vec<PathBuf>> {
            let mut stmt = self.c().prepare(
@ -214,6 +265,7 @@ pub mod index {

        DROP TABLE IF EXISTS issues;
        DROP TABLE IF EXISTS files;
+        DROP TABLE IF EXISTS function_summaries;

        PRAGMA foreign_keys = ON;
        VACUUM;
--- a/src/labels/javascript.rs
+++ b/src/labels/javascript.rs
@ -0,0 +1,17 @@
+use crate::labels::{Cap, DataLabel, LabelRule};
+
+// TODO: refactor this
+pub static RULES: &[LabelRule] = &[
+    LabelRule {
+        matchers: &["document.location", "window.location"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    LabelRule {
+        matchers: &["JSON.parse"],
+        label: DataLabel::Sanitizer(Cap::JSON_PARSE),
+    },
+    LabelRule {
+        matchers: &["eval"],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+];
--- a/src/labels/mod.rs
+++ b/src/labels/mod.rs
@ -0,0 +1,121 @@
+mod javascript;
+mod rust;
+
+use bitflags::bitflags;
+use once_cell::sync::Lazy;
+use phf::Map;
+use std::collections::HashMap;
+
+/// A single rule: if the AST text equals (or ends with) one of the `matchers`,
+/// the node gets `label`.
+#[derive(Debug, Clone, Copy)]
+pub struct LabelRule {
+    pub matchers: &'static [&'static str],
+    pub label: DataLabel,
+}
+
+bitflags! {
+    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+    pub struct Cap: u8 {
+        const ENV_VAR      = 0b0000_0001;
+        const HTML_ESCAPE  = 0b0000_0010;
+        const SHELL_ESCAPE = 0b0000_0100;
+        const URL_ENCODE   = 0b0000_1000;
+        const JSON_PARSE   = 0b0001_0000;
+        // ADD MORE
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Kind {
+    If,
+    InfiniteLoop,
+    While,
+    For,
+    LoopBody,
+    CallFn,
+    CallMethod,
+    CallMacro,
+    Break,
+    Continue,
+    Return,
+    Block,
+    SourceFile,
+    Function,
+    Assignment,
+    CallWrapper,
+    Trivia,
+    Other,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DataLabel {
+    Source(Cap),
+    Sanitizer(Cap),
+    Sink(Cap),
+}
+
+static REGISTRY: Lazy<HashMap<&'static str, &'static [LabelRule]>> = Lazy::new(|| {
+    let mut m = HashMap::new();
+    m.insert("rust", rust::RULES);
+    m.insert("rs", rust::RULES);
+
+    m.insert("javascript", javascript::RULES);
+    m.insert("js", javascript::RULES);
+
+    // add more languages in one line:
+    // m.insert("go", go::RULES);
+
+    m
+});
+
+type FastMap = &'static Map<&'static str, Kind>;
+
+pub(crate) static CLASSIFIERS: Lazy<HashMap<&'static str, FastMap>> = Lazy::new(|| {
+    let mut m = HashMap::new();
+    m.insert("rust", &rust::KINDS);
+    m.insert("rs", &rust::KINDS);
+
+    // m.insert("javascript",  &javascript::KINDS);
+    // m.insert("js",          &javascript::KINDS);
+
+    // todo: add more languages
+    m
+});
+
+#[inline(always)]
+pub fn lookup(lang: &str, raw: &str) -> Kind {
+    CLASSIFIERS
+        .get(lang)
+        .and_then(|m| m.get(raw).copied())
+        .unwrap_or(Kind::Other)
+}
+
+/// Try to classify a piece of syntax text.
+/// `lang` is the canonicalised language key (“rust”, “javascript”, …).
+pub fn classify(lang: &str, text: &str) -> Option<DataLabel> {
+    let key = lang.to_ascii_lowercase();
+    let rules = REGISTRY.get(key.as_str())?;
+    let head = text.split(['(', '<']).next().unwrap_or("");
+
+    let text_lc = head.trim().to_ascii_lowercase();
+
+    for rule in *rules {
+        for raw in rule.matchers {
+            let m = raw.to_ascii_lowercase();
+
+            if m.ends_with('_') {
+                if text_lc.starts_with(&m) {
+                    return Some(rule.label);
+                }
+            } else if text_lc.ends_with(&m) {
+                let start = text_lc.len() - m.len();
+                let ok = start == 0 || matches!(text_lc.as_bytes()[start - 1], b'.' | b':');
+                if ok {
+                    return Some(rule.label);
+                }
+            }
+        }
+    }
+    None
+}
--- a/src/labels/rust.rs
+++ b/src/labels/rust.rs
@ -0,0 +1,72 @@
+use crate::labels::{Cap, DataLabel, Kind, LabelRule};
+use phf::{Map, phf_map};
+
+pub static RULES: &[LabelRule] = &[
+    // ─────────── Sources ───────────
+    LabelRule {
+        matchers: &["std::env::var", "env::var"],
+        label: DataLabel::Source(Cap::all()),
+    },
+    // ───────── Sanitizers ──────────
+    // `fn sanitize_*(&str) -> String`
+    LabelRule {
+        matchers: &["html_escape::encode_safe", "sanitize_", "sanitize_html"],
+        label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+    },
+    LabelRule {
+        matchers: &["shell_escape::unix::escape"],
+        label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
+    },
+    // ─────────── Sinks ─────────────
+    //  All the key points where untrusted strings reach the OS shell.
+    LabelRule {
+        matchers: &[
+            "command::new",
+            "std::process::command::new",
+            "command::arg",
+            "command::args",
+            "command::status",
+            "command::output",
+        ],
+        label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+    },
+];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+    // control-flow
+    "if_expression"        => Kind::If,
+    "loop_expression"      => Kind::InfiniteLoop,
+    "loop_statement"       => Kind::LoopBody,
+    "while_statement"      => Kind::While,
+    "for_statement"        => Kind::For,
+
+    "return_statement"     => Kind::Return,
+    "break_expression"     => Kind::Break,
+    "break_statement"      => Kind::Break,
+    "continue_expression"  => Kind::Continue,
+    "continue_statement"   => Kind::Continue,
+
+    // structure
+    "source_file"          => Kind::SourceFile,
+    "block"                => Kind::Block,
+    "function_item"        => Kind::Function,
+
+    // data-flow
+    "call_expression"        => Kind::CallFn,
+    "method_call_expression" => Kind::CallMethod,
+    "macro_invocation"       => Kind::CallMacro,
+    "let_declaration"        => Kind::CallWrapper,
+    "expression_statement"   => Kind::CallWrapper,
+    "assignment_expression"  => Kind::Assignment,
+
+    // trivia
+    "line_comment"     => Kind::Trivia,
+    "block_comment"    => Kind::Trivia,
+    ";" => Kind::Trivia, "," => Kind::Trivia,
+    "(" => Kind::Trivia, ")" => Kind::Trivia,
+    "{" => Kind::Trivia, "}" => Kind::Trivia, "\n" => Kind::Trivia,
+    "use_declaration"  => Kind::Trivia,
+    "attribute_item"   => Kind::Trivia,
+    "mod_item"         => Kind::Trivia,
+    "type_item"        => Kind::Trivia,
+};
--- a/src/main.rs
+++ b/src/main.rs
@ -1,8 +1,10 @@
 mod ast;
+mod cfg;
 mod cli;
 mod commands;
 mod database;
 mod errors;
+mod labels;
 mod patterns;
 mod utils;
 mod walk;
@ -59,6 +61,11 @@ fn main() -> NyxResult<()> {

    let mut config = Config::load(config_dir)?;

+    rayon::ThreadPoolBuilder::new()
+        .stack_size(config.performance.rayon_thread_stack_size)
+        .build_global()
+        .expect("set rayon stack size");
+
    commands::handle_command(cli.command, database_dir, &mut config)?;

    println!(
--- a/src/patterns/mod.rs
+++ b/src/patterns/mod.rs
@ -92,7 +92,7 @@ static REGISTRY: Lazy<HashMap<&'static str, &'static [Pattern]>> = Lazy::new(||
    m.insert("cpp", cpp::PATTERNS);
    m.insert("c++", cpp::PATTERNS);

-    // ---- Other languages in the folder ----
+    // ---- Other patterns in the folder ----
    m.insert("java", java::PATTERNS);
    m.insert("go", go::PATTERNS);
    m.insert("php", php::PATTERNS);
@ -101,14 +101,14 @@ static REGISTRY: Lazy<HashMap<&'static str, &'static [Pattern]>> = Lazy::new(||
    m.insert("ruby", ruby::PATTERNS);
    m.insert("rb", ruby::PATTERNS);

-    tracing::debug!("AST-pattern registry initialised ({} languages)", m.len());
+    tracing::debug!("AST-pattern registry initialised ({} patterns)", m.len());

    m
 });

 /// Return all patterns for the requested language (case-insensitive).
 ///
-/// Unknown languages yield an **empty** `Vec`.
+/// Unknown patterns yield an **empty** `Vec`.
 pub fn load(lang: &str) -> Vec<Pattern> {
    let key = lang.to_ascii_lowercase();
    REGISTRY.get(key.as_str()).copied().unwrap_or(&[]).to_vec()
--- a/src/utils/config.rs
+++ b/src/utils/config.rs
@ -8,9 +8,21 @@ use toml;

 static DEFAULT_CONFIG_TOML: &str = include_str!("../../default-nyx.conf");

+#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum AnalysisMode {
+    #[default]
+    Full,
+    Ast,
+    Taint,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(default)]
 pub struct ScannerConfig {
+    /// The analysis mode to use.
+    pub mode: AnalysisMode,
+
    /// The minimum severity level to output
    pub min_severity: Severity,

@ -47,6 +59,7 @@ pub struct ScannerConfig {
 impl Default for ScannerConfig {
    fn default() -> Self {
        Self {
+            mode: AnalysisMode::Full,
            min_severity: Severity::Low,
            max_file_size_mb: None,
            excluded_extensions: vec![
@ -151,6 +164,9 @@ pub struct PerformanceConfig {
    /// capacity = threads × this
    pub channel_multiplier: usize,

+    /// The stack size for Rayon threads, in bytes.
+    pub rayon_thread_stack_size: usize,
+
    /// Timeout on individual files // TODO: IMPLEMENT
    pub scan_timeout_secs: Option<u64>,

@ -167,6 +183,7 @@ impl Default for PerformanceConfig {
            worker_threads: None,
            batch_size: 100usize,
            channel_multiplier: 4usize,
+            rayon_thread_stack_size: 8 * 1024 * 1024, // 2 MiB
            scan_timeout_secs: None,
            memory_limit_mb: 512,
        }
@ -236,6 +253,7 @@ fn create_example_config(config_dir: &Path) -> NyxResult<()> {
 /// supply new exclusions and overriding everything else.
 fn merge_configs(mut default: Config, user: Config) -> Config {
    // --- ScannerConfig ---
+    default.scanner.mode = user.scanner.mode;
    default.scanner.min_severity = user.scanner.min_severity;
    default.scanner.max_file_size_mb = user.scanner.max_file_size_mb;
    default.scanner.read_global_ignore = user.scanner.read_global_ignore;
@ -277,6 +295,7 @@ fn merge_configs(mut default: Config, user: Config) -> Config {
    default.performance.worker_threads = user.performance.worker_threads;
    default.performance.batch_size = user.performance.batch_size;
    default.performance.channel_multiplier = user.performance.channel_multiplier;
+    default.performance.rayon_thread_stack_size = user.performance.rayon_thread_stack_size;
    default.performance.scan_timeout_secs = user.performance.scan_timeout_secs;
    default.performance.memory_limit_mb = user.performance.memory_limit_mb;

--- a/src/utils/project.rs
+++ b/src/utils/project.rs
@ -9,7 +9,7 @@ pub fn get_project_info(project_path: &Path, config_dir: &Path) -> NyxResult<(St
        .ok_or_else(|| NyxError::Other("Unable to determine project name".into()))?;

    let db_name = sanitize_project_name(project_name);
-    let db_path = config_dir.join(format!("{}.sqlite", db_name));
+    let db_path = config_dir.join(format!("{db_name}.sqlite"));

    Ok((project_name.to_owned(), db_path))
 }
@ -41,7 +41,7 @@ fn sanitize_project_name_is_idempotent_and_lossless_enough() {
    ];

    for (input, expected) in samples {
-        assert_eq!(sanitize_project_name(input), expected, "input: {}", input);
+        assert_eq!(sanitize_project_name(input), expected, "input: {input}");
        assert_eq!(sanitize_project_name(expected), expected);
    }
 }