diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8ded1a99..05ea48fd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,12 +5,18 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## [Unreleased]
+## [0.2.0-alpha] - 2025-06-28
+
+### Added
+- Experimental intra‑procedural CFG + taint analysis for Rust. Nyx now builds a control‑flow graph, applies data‑flow rules, and flags unsanitised Source → Sink paths (e.g. env::var → Command::new).
+- O(1) node‑kind lookup via per‑language PHF tables for zero‑cost dispatch.
+- Six unit tests covering conditionals, loops, sanitizers, and multiple sources.
+- Debug channel target=cfg (use RUST_LOG=nyx::cfg=debug) to inspect generated graphs.
### Fixed
- Fixed a bug in the release pipeline where Windows was trying to call the zip, PowerShell doesn't have a zip command
-## [0.1.1] - 2025-06-25
+## [0.1.1-alpha] - 2025-06-25
### Fixed
- Fixed a bug where the `scan --no-index` command would not respect the `max_results` config setting (#1)
@@ -18,7 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Integration tests covering indexing and scanning pipelines (#3, #4, #5, #8)
-## [0.1.0] - 2025-06-25
+## [0.1.0-alpha] - 2025-06-25
### Added
- Initial alpha release of **Nyx** CLI tool
diff --git a/Cargo.lock b/Cargo.lock
index 824ce5c2..e570effc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11,6 +11,12 @@ dependencies = [
"memchr",
]
+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
[[package]]
name = "android-tzdata"
version = "0.1.1"
@@ -62,7 +68,7 @@ version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
dependencies = [
- "windows-sys",
+ "windows-sys 0.59.0",
]
[[package]]
@@ -73,7 +79,7 @@ checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
dependencies = [
"anstyle",
"once_cell_polyfill",
- "windows-sys",
+ "windows-sys 0.59.0",
]
[[package]]
@@ -210,15 +216,15 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "console"
-version = "0.15.11"
+version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
+checksum = "2e09ced7ebbccb63b4c65413d821f2e00ce54c5ca4514ddc6b3c892fdbcbc69d"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"unicode-width",
- "windows-sys",
+ "windows-sys 0.60.2",
]
[[package]]
@@ -308,7 +314,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users",
- "windows-sys",
+ "windows-sys 0.60.2",
]
[[package]]
@@ -336,7 +342,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
dependencies = [
"libc",
- "windows-sys",
+ "windows-sys 0.60.2",
]
[[package]]
@@ -357,6 +363,12 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+[[package]]
+name = "fixedbitset"
+version = "0.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
+
[[package]]
name = "foldhash"
version = "0.1.5"
@@ -405,6 +417,8 @@ version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
dependencies = [
+ "allocator-api2",
+ "equivalent",
"foldhash",
]
@@ -608,8 +622,9 @@ dependencies = [
[[package]]
name = "nyx-scanner"
-version = "0.1.1"
+version = "0.2.0-alpha"
dependencies = [
+ "bitflags",
"blake3",
"bytesize",
"chrono",
@@ -621,6 +636,8 @@ dependencies = [
"ignore",
"num_cpus",
"once_cell",
+ "petgraph",
+ "phf",
"r2d2",
"r2d2_sqlite",
"rayon",
@@ -688,7 +705,62 @@ dependencies = [
"libc",
"redox_syscall",
"smallvec",
- "windows-targets",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "petgraph"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca"
+dependencies = [
+ "fixedbitset",
+ "hashbrown",
+ "indexmap",
+ "serde",
+]
+
+[[package]]
+name = "phf"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
+dependencies = [
+ "phf_macros",
+ "phf_shared",
+ "serde",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2cbb1126afed61dd6368748dae63b1ee7dc480191c6262a3b4ff1e29d86a6c5b"
+dependencies = [
+ "fastrand",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d713258393a82f091ead52047ca779d37e5766226d009de21696c4e667044368"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981"
+dependencies = [
+ "siphasher",
]
[[package]]
@@ -901,7 +973,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.59.0",
]
[[package]]
@@ -997,6 +1069,12 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+[[package]]
+name = "siphasher"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
+
[[package]]
name = "smallvec"
version = "1.15.1"
@@ -1036,7 +1114,7 @@ dependencies = [
"getrandom 0.3.3",
"once_cell",
"rustix",
- "windows-sys",
+ "windows-sys 0.59.0",
]
[[package]]
@@ -1482,7 +1560,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
- "windows-sys",
+ "windows-sys 0.59.0",
]
[[package]]
@@ -1556,7 +1634,16 @@ version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
- "windows-targets",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.2",
]
[[package]]
@@ -1565,14 +1652,30 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
+dependencies = [
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
]
[[package]]
@@ -1581,48 +1684,96 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+
[[package]]
name = "winnow"
version = "0.7.11"
diff --git a/Cargo.toml b/Cargo.toml
index b1ed089f..6a4d1e22 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "nyx-scanner"
-version = "0.1.1"
+version = "0.2.0-alpha"
edition = "2024"
description = "A CLI security scanner for automating vulnerability checks"
license = "GPL-3.0"
@@ -49,10 +49,13 @@ tree-sitter-ruby = "0.23.1"
crossbeam-channel = "0.5.15"
blake3 = "1.8.2"
once_cell = "1.21.3"
-console = "0.15.11"
+console = "0.16.0"
rayon = "1.10.0"
r2d2 = "0.8.10"
bytesize = "2.0.1"
chrono = { version = "0.4.41", default-features = false, features = ["std", "clock"] }
thiserror = "2.0.12"
dashmap = "7.0.0-rc2"
+petgraph = "0.8.2"
+bitflags = "2.9.1"
+phf = { version = "0.12.1", features = ["macros"] }
diff --git a/README.md b/README.md
index 0d78268d..401ea902 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,11 @@

-# Nyx
-
**Fast, cross-language cli vulnerability scanner.**
[](https://crates.io/crates/nyx-scanner)
[](https://www.gnu.org/licenses/gpl-3.0)
-[](https://www.rust-lang.org)
+[](https://www.rust-lang.org)
[](https://github.com/ecpeter23/nyx/actions)
@@ -18,7 +16,7 @@
**Nyx** is a lightweight lightning-fast Rust‑native command‑line tool that detects potentially dangerous code patterns across several programming languages. It combines the accuracy of [`tree‑sitter`](https://tree-sitter.github.io/) parsing with a curated rule set and an optional SQLite‑backed index to deliver fast, repeatable scans on projects of any size.
> **Project status – Alpha**
-> Nyx is under active development. The public interface, rule set, and output formats may change without notice while we stabilize the core. Please pin exact versions in production environments.
+> Nyx is under active development. The public interface, rule set, and output formats may change without notice while we stabilise the core. The new CFG + taint engine is experimental and Rust-only for now – please report any crashes or false-positives. Pin exact versions in production environments
---
@@ -50,17 +48,49 @@
## Installation
+### Install crate
+```bash
+$ cargo install nyx-scanner
+```
+
+### Install Github release
+1. Navigate to the [Releases](https://github.com/ecpeter23/nyx/releases) page of the repository.
+2. Download the appropriate binary for your system:
+
+ ```nyx-x86_64-unknown-linux-gnu.zip``` for Linux
+
+ ```nyx-x86_64-pc-windows-msvc.zip``` for Windows
+
+ ```nyx-x86_64-apple-darwin.zip``` or ```nyx-aarch64-apple-darwin.zip``` for macOS (Intel or Apple Silicon)
+
+3. Unzip the file and move the executable to a directory in your system PATH:
+ ```bash
+ # Example for Unix systems
+ unzip nyx-x86_64-unknown-linux-gnu.zip
+ chmod +x nyx
+ sudo mv nyx /usr/local/bin/
+ ```
+ ```bash
+ # Example for Windows in PowerShell
+ Expand-Archive -Path nyx-x86_64-pc-windows-msvc.zip -DestinationPath .
+ Move-Item -Path .\nyx.exe -Destination "C:\Program Files\Nyx\" # Add to PATH manually if needed
+ ```
+
+4. Verify the installation:
+ ```bash
+ nyx --version
+ ```
### Build from source
```bash
-$ git clone https://github.com//nyx.git
+$ git clone https://github.com/ecpeter23/nyx.git
$ cd nyx
$ cargo build --release
# optional – copy the binary into PATH
$ cargo install --path .
```
-Nyx targets **stable Rust 1.78 or later**.
+Nyx targets **stable Rust 1.85 or later**.
---
@@ -142,18 +172,29 @@ A fully documented `nyx.conf` is generated automatically on first run.
## Roadmap
-| Area | Planned Improvements |
-|-----------------------|---------------------------------------------------------------------------|
-| More language support | Plans to create rule sets for over 100 languages for maximum coverage |
-| Control‑flow analysis | Generation of CFGs for deeper reasoning about execution paths |
-| Taint tracking | Intra‑ / inter‑procedural tracing of untrusted data from sources to sinks |
-| Output formats | Full SARIF 2.1.0, JUnit XML, HTML report generator |
-| Rule updates | Remote rule feed with signature verification |
+| Area | Planned Improvements |
+|-----------------------|-------------------------------------------------------------------------------------------------------|
+| More language support | Plans to create rule sets for over 100 languages for maximum coverage |
+| Control‑flow analysis | Inter‑procedural function summaries. Cap label propagation & bit‑flag checks. Loop/branch sensitivity |
+| Taint tracking | Intra‑ / inter‑procedural tracing of untrusted data from sources to sinks |
+| Output formats | Full SARIF 2.1.0, JUnit XML, HTML report generator |
+| Rule updates | Remote rule feed with signature verification |
+| Performance & UX | Incremental CFG cache, progress‑bar UX, smart file‑watch re‑scan |
Community feedback will help shape priorities; please open an issue to discuss proposed changes.
---
+## Experimental Features & Feedback
+
+The new Rust intra‑procedural CFG + taint engine is not enabled.
+
+Expect rough edges: slightly slower scans, occasional false positives, limited language coverage.
+
+Please open an issue for every crash, panic, or suspicious result – attach the minimal code snippet and mention the Nyx version.
+
+---
+
## Contributing
Pull requests are welcome. To contribute:
diff --git a/default-nyx.conf b/default-nyx.conf
index 9a6d32e7..e996772a 100644
--- a/default-nyx.conf
+++ b/default-nyx.conf
@@ -8,6 +8,10 @@
[scanner]
+## If full uses both ast patterns and cfg taint analysis,
+## Possible values: full | ast | cfg
+mode = "full"
+
## Minimum severity level to include in the report
## Possible values: Low | Medium | High | Critical
min_severity = "Low"
@@ -96,6 +100,9 @@ batch_size = 100
## Channel capacity multiplier (capacity = threads × this)
channel_multiplier = 4
+## Maximum stack size for Rayon threads (bytes)
+rayon_thread_stack_size = 8 * 1024 * 1024 # 8 MiB
+
## Timeout on individual files (seconds); null = none (UNIMPLEMENTED)
scan_timeout_secs = null
diff --git a/examples/sanatize/example.rs b/examples/sanatize/example.rs
new file mode 100644
index 00000000..c01f2923
--- /dev/null
+++ b/examples/sanatize/example.rs
@@ -0,0 +1,96 @@
+//! demo.rs — realistic taint-tracking playground
+//! `cargo add html-escape shell-escape` before compiling.
+
+use std::{env, process::Command, fs};
+
+#[derive(Default)]
+struct UserCtx {
+ query: String, // potentially tainted
+ sanitized: String, // should remain clean
+}
+
+/// ---------- helper wrappers so we get nice Source / Sink labels ----------
+fn source_env(var: &str) -> String {
+ env::var(var).unwrap_or_default() // Source(env-var)
+}
+
+fn source_file(path: &str) -> String {
+ fs::read_to_string(path).unwrap_or_default() // Source(file-io)
+}
+
+fn sink_shell(arg: &str) {
+ Command::new("sh").arg(arg).status().unwrap(); // Sink(process-spawn)
+}
+
+fn sink_html(out: &str) {
+ println!("{out}"); // Sink(html-out)
+}
+
+fn sanitize_html(s: &str) -> String {
+ html_escape::encode_safe(s) // Sanitizer(html-escape)
+}
+
+fn sanitize_shell(s: &str) -> String {
+ shell_escape::unix::escape(s.into()).into_owned() // Sanitizer(shell-escape)
+}
+
+/// ---------- 1. Main demo fuction ----------
+fn main() {
+ // FLOW A ────────────────────────────────────────────────────────────────
+ // env → sanitized → safe shell
+ let raw = source_env("USER_CMD");
+ let clean = sanitize_shell(&raw);
+ sink_shell(&clean); // EXPECT: SAFE
+
+ // FLOW B ────────────────────────────────────────────────────────────────
+ // env → if-else, only one branch escapes
+ let arg = source_env("ANOTHER");
+ if arg.len() > 5 {
+ sink_shell(&arg); // EXPECT: UNSAFE (branch tainted)
+ } else {
+ let escaped = sanitize_shell(&arg);
+ sink_shell(&escaped); // safe
+ }
+
+ // FLOW C ────────────────────────────────────────────────────────────────
+ // file → while loop → HTML sanitizer cleared
+ let mut data = source_file("/tmp/input.txt");
+ while data.len() < 32 {
+ data.push('x');
+ }
+ let html_ok = sanitize_html(&data);
+ sink_html(&html_ok); // safe
+
+ // FLOW D ────────────────────────────────────────────────────────────────
+ // file → struct field → match → unsanitised HTML
+ let mut ctx = UserCtx::default();
+ ctx.query = source_file("/tmp/q.txt");
+ // overwrite the clean field; `ctx.sanitized` is *not* tainted
+ ctx.sanitized = sanitize_html("constant");
+ match ctx {
+ UserCtx { query, sanitized } if query.contains("DROP") => {
+ sink_html(&query); // EXPECT: UNSAFE
+ }
+ _ => {
+ sink_html(&ctx.sanitized); // safe
+ }
+ }
+
+ // FLOW E ────────────────────────────────────────────────────────────────
+ // source → function call → reassignment clears taint
+ let mut name = source_env("USER"); // tainted
+ greet(&name); // just prints
+ name = "anonymous".into(); // kills taint
+ greet(&name); // safe
+
+ // FLOW F ────────────────────────────────────────────────────────────────
+ // Multiple sanitizers, only the *right* one matters
+ let cmd = source_env("MIXED");
+ let partly = sanitize_html(&cmd); // wrong sanitizer
+ sink_shell(&partly); // EXPECT: UNSAFE
+}
+
+/// helper (non-sink) function
+fn greet(who: &str) {
+ println!("Hello, {who}");
+}
\ No newline at end of file
diff --git a/examples/standard/test.rs b/examples/standard/test.rs
new file mode 100644
index 00000000..ff89b18e
--- /dev/null
+++ b/examples/standard/test.rs
@@ -0,0 +1,9 @@
+use std::{env, process::Command};
+fn main() {
+ let y = env::var("SAFE").unwrap();
+
+ let x = env::var("DANGEROUS").unwrap();
+ let clean = html_escape::encode_safe(&y);
+ Command::new("sh").arg(x).status().unwrap();
+ Command::new("sh").arg(clean).status().unwrap();
+}
\ No newline at end of file
diff --git a/src/ast.rs b/src/ast.rs
index 6b3ac445..6fdadb12 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -1,5 +1,8 @@
+use crate::cfg::{analyse_function, build_cfg};
use crate::commands::scan::Diag;
use crate::errors::{NyxError, NyxResult};
+use crate::patterns::Severity;
+use crate::utils::config::AnalysisMode;
use crate::utils::ext::lowercase_ext;
use crate::utils::{Config, query_cache};
use std::cell::RefCell;
@@ -10,6 +13,16 @@ thread_local! {
static PARSER: RefCell = RefCell::new(tree_sitter::Parser::new());
}
+/// Convenience alias for node indices.
+fn byte_offset_to_point(tree: &tree_sitter::Tree, byte: usize) -> tree_sitter::Point {
+ // `descendant_for_byte_range` gives us *some* node that starts at `byte`,
+ // `start_position` turns that into rows & columns (both 0-based)
+ tree.root_node()
+ .descendant_for_byte_range(byte, byte)
+ .map(|n| n.start_position())
+ .unwrap_or_else(|| tree_sitter::Point { row: 0, column: 0 })
+}
+
pub(crate) fn run_rules_on_file(path: &Path, cfg: &Config) -> NyxResult> {
tracing::debug!("Running rules on: {}", path.display());
let bytes = std::fs::read(path)?;
@@ -47,30 +60,58 @@ pub(crate) fn run_rules_on_file(path: &Path, cfg: &Config) -> NyxResult, // taint classification if any
+ pub defines: Option, // variable written by this stmt
+ pub uses: Vec, // variables read
+}
+
+pub type Cfg = Graph;
+
+// -------------------------------------------------------------------------
+// Utility helpers
+// -------------------------------------------------------------------------
+
+/// Return the text of a node.
+#[inline]
+fn text_of<'a>(n: Node<'a>, code: &'a [u8]) -> Option {
+ std::str::from_utf8(&code[n.start_byte()..n.end_byte()])
+ .ok()
+ .map(|s| s.to_string())
+}
+
+/// Return the callee identifier for the first call / method / macro inside `n`.
+fn first_call_ident<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option {
+ let mut cursor = n.walk();
+ for c in n.children(&mut cursor) {
+ match lookup(lang, c.kind()) {
+ Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
+ // Re-use the same logic we have in `push_node`
+ return match lookup(lang, c.kind()) {
+ Kind::CallFn => c
+ .child_by_field_name("function")
+ .and_then(|f| text_of(f, code)),
+ Kind::CallMethod => {
+ let func = c
+ .child_by_field_name("method")
+ .or_else(|| c.child_by_field_name("name"))
+ .and_then(|f| text_of(f, code));
+ let recv = c
+ .child_by_field_name("object")
+ .and_then(|f| text_of(f, code));
+ match (recv, func) {
+ (Some(r), Some(f)) => Some(format!("{r}::{f}")),
+ (_, Some(f)) => Some(f.to_string()),
+ _ => None,
+ }
+ }
+ Kind::CallMacro => c
+ .child_by_field_name("macro")
+ .and_then(|f| text_of(f, code)),
+ _ => None,
+ };
+ }
+ _ => {}
+ }
+ }
+ None
+}
+
+/// Create a node in one short borrow and optionally attach a taint label.
+fn push_node<'a>(
+ g: &mut Cfg,
+ kind: StmtKind,
+ ast: Node<'a>,
+ lang: &str,
+ code: &'a [u8],
+) -> NodeIndex {
+ /* ── 1. IDENTIFIER EXTRACTION ─────────────────────────────────────── */
+
+ // Primary guess (varies by AST kind)
+ let mut text = match lookup(lang, ast.kind()) {
+ // plain `foo(bar)` style call
+ Kind::CallFn => ast
+ .child_by_field_name("function")
+ .and_then(|n| text_of(n, code))
+ .unwrap_or_default(),
+
+ // method / UFCS call `recv.method()` or `Type::func()`
+ Kind::CallMethod => {
+ let func = ast
+ .child_by_field_name("method")
+ .or_else(|| ast.child_by_field_name("name"))
+ .and_then(|n| text_of(n, code));
+ let recv = ast
+ .child_by_field_name("object")
+ .and_then(|n| text_of(n, code));
+ match (recv, func) {
+ (Some(r), Some(f)) => format!("{r}::{f}"),
+ (_, Some(f)) => f,
+ _ => String::new(),
+ }
+ }
+
+ // `my_macro!(…)`
+ Kind::CallMacro => ast
+ .child_by_field_name("macro")
+ .and_then(|n| text_of(n, code))
+ .unwrap_or_default(),
+
+ // everything else – fallback to raw slice
+ _ => text_of(ast, code).unwrap_or_default(),
+ };
+
+ // If this is a `let` or `expression_statement` that *contains* a call,
+ // prefer the first inner call identifier instead of the whole line.
+ if matches!(lookup(lang, ast.kind()), Kind::CallWrapper) {
+ if let Some(inner) = first_call_ident(ast, lang, code) {
+ text = inner;
+ }
+ }
+
+ /* ── 2. LABEL LOOK-UP ───────────────────────────────────────────── */
+
+ let label = classify(lang, &text);
+ let span = (ast.start_byte(), ast.end_byte());
+
+ /* ── 3. GRAPH INSERTION + DEBUG ──────────────────────────────────── */
+
+ let (defines, uses) = def_use(ast, code);
+
+ let idx = g.add_node(NodeInfo {
+ kind,
+ span,
+ label,
+ defines,
+ uses,
+ });
+
+ debug!(
+ target: "cfg",
+ "node {} ← {:?} txt=`{}` span={:?} label={:?}",
+ idx.index(),
+ kind,
+ text,
+ span,
+ label
+ );
+ idx
+}
+
+/// Add the same edge (of the same kind) from every node in `froms` to `to`.
+#[inline]
+fn connect_all(g: &mut Cfg, froms: &[NodeIndex], to: NodeIndex, kind: EdgeKind) {
+ for &f in froms {
+ debug!(target: "cfg", "edge {} → {} ({:?})", f.index(), to.index(), kind);
+ g.add_edge(f, to, kind);
+ }
+}
+
+// -------------------------------------------------------------------------
+// The recursive *work‑horse* that converts an AST node into a CFG slice.
+// Returns the set of *exit* nodes that need to be wired further.
+// -------------------------------------------------------------------------
+fn build_sub<'a>(
+ ast: Node<'a>,
+ preds: &[NodeIndex], // predecessor frontier
+ g: &mut Cfg,
+ lang: &str,
+ code: &'a [u8],
+) -> Vec {
+ match lookup(lang, ast.kind()) {
+ // ─────────────────────────────────────────────────────────────────
+ // IF‑/ELSE: two branches that re‑merge afterwards
+ // ─────────────────────────────────────────────────────────────────
+ Kind::If => {
+ // Condition node
+ let cond = push_node(g, StmtKind::If, ast, lang, code);
+ connect_all(g, preds, cond, EdgeKind::Seq);
+
+ // Locate then & else blocks
+ let (then_block, else_block) = {
+ let mut cursor = ast.walk();
+ let blocks: Vec<_> = ast
+ .children(&mut cursor)
+ .filter(|n| n.kind() == "block")
+ .collect();
+ (blocks.first().copied(), blocks.get(1).copied())
+ };
+
+ // THEN branch
+ let then_exits = if let Some(b) = then_block {
+ let exits = build_sub(b, &[cond], g, lang, code);
+ // True edges leave the condition
+ if let Some(&first) = exits.first() {
+ connect_all(g, &[cond], first, EdgeKind::True);
+ }
+ exits
+ } else {
+ vec![cond]
+ };
+
+ // ELSE branch
+ let else_exits = if let Some(b) = else_block {
+ let exits = build_sub(b, &[cond], g, lang, code);
+ if let Some(&first) = exits.first() {
+ connect_all(g, &[cond], first, EdgeKind::False);
+ }
+ exits
+ } else {
+ // No explicit else → non-taken branch flows to the *then* exits
+ if let Some(&first) = then_exits.first() {
+ connect_all(g, &[cond], first, EdgeKind::False);
+ }
+ then_exits.clone()
+ };
+
+ // Frontier = union of both branches
+ then_exits.into_iter().chain(else_exits).collect()
+ }
+
+ Kind::InfiniteLoop => {
+ // Synthetic header node
+ let header = push_node(g, StmtKind::Loop, ast, lang, code);
+ connect_all(g, preds, header, EdgeKind::Seq);
+
+ // The body is the single `block` child
+ let body = ast.child_by_field_name("body").expect("loop without body");
+ let body_exits = build_sub(body, &[header], g, lang, code);
+
+ // Back-edge from every linear exit to header
+ for &e in &body_exits {
+ connect_all(g, &[e], header, EdgeKind::Back);
+ }
+ // `loop` may break → those exits are frontiers too
+ body_exits.into_iter().chain([header]).collect()
+ }
+
+ // ─────────────────────────────────────────────────────────────────
+ // WHILE / FOR: classic loop with a back edge.
+ // ─────────────────────────────────────────────────────────────────
+ Kind::While | Kind::For => {
+ let header = push_node(g, StmtKind::Loop, ast, lang, code);
+ connect_all(g, preds, header, EdgeKind::Seq);
+
+ // Body = first (and usually only) block child.
+ let body = ast
+ .child_by_field_name("body")
+ .or_else(|| {
+ let mut c = ast.walk();
+ ast.children(&mut c).find(|n| n.kind() == "block")
+ })
+ .expect("loop without body");
+
+ let body_exits = build_sub(body, &[header], g, lang, code);
+
+ // Back‑edge for every linear exit → header.
+ for &e in &body_exits {
+ connect_all(g, &[e], header, EdgeKind::Back);
+ }
+ // Falling out of the loop = header’s false branch.
+ vec![header]
+ }
+
+ // ─────────────────────────────────────────────────────────────────
+ // Control-flow sinks (return / break / continue).
+ // ─────────────────────────────────────────────────────────────────
+ Kind::Return => {
+ let ret = push_node(g, StmtKind::Return, ast, lang, code);
+ connect_all(g, preds, ret, EdgeKind::Seq);
+ Vec::new() // terminates this path
+ }
+ Kind::Break => {
+ let brk = push_node(g, StmtKind::Break, ast, lang, code);
+ connect_all(g, preds, brk, EdgeKind::Seq);
+ Vec::new()
+ }
+ Kind::Continue => {
+ let cont = push_node(g, StmtKind::Continue, ast, lang, code);
+ connect_all(g, preds, cont, EdgeKind::Seq);
+ Vec::new()
+ }
+
+ // ─────────────────────────────────────────────────────────────────
+ // BLOCK: statements execute sequentially
+ // ─────────────────────────────────────────────────────────────────
+ Kind::SourceFile | Kind::Block => {
+ let mut cursor = ast.walk();
+ let mut frontier = preds.to_vec();
+ for child in ast.children(&mut cursor) {
+ frontier = build_sub(child, &frontier, g, lang, code);
+ }
+ frontier
+ }
+
+ // Function item – create a header and dive into its body
+ Kind::Function => {
+ let header = push_node(g, StmtKind::Seq, ast, lang, code);
+ connect_all(g, preds, header, EdgeKind::Seq);
+
+ if let Some(body) = ast.child_by_field_name("body") {
+ build_sub(body, &[header], g, lang, code)
+ } else {
+ vec![header] // declaration w/o body
+ }
+ }
+
+ // Statements that **may** contain a call ---------------------------------
+ Kind::CallWrapper => {
+ let mut cursor = ast.walk();
+
+ if let Some(inner) = ast.children(&mut cursor).find(|c| {
+ matches!(
+ lookup(lang, c.kind()),
+ Kind::InfiniteLoop | Kind::While | Kind::For | Kind::If
+ )
+ }) {
+ return build_sub(inner, preds, g, lang, code);
+ }
+
+ let has_call = ast.children(&mut cursor).any(|c| {
+ matches!(
+ lookup(lang, c.kind()),
+ Kind::CallFn | Kind::CallMethod | Kind::CallMacro
+ )
+ });
+
+ let kind = if has_call {
+ StmtKind::Call
+ } else {
+ StmtKind::Seq
+ };
+ let node = push_node(g, kind, ast, lang, code);
+ connect_all(g, preds, node, EdgeKind::Seq);
+ vec![node]
+ }
+
+ // Trivia we drop completely ---------------------------------------------
+ // "line_comment" | "block_comment"
+ // | ";" | "," | "(" | ")" | "{" | "}" | "\n"
+ // | "use_declaration"
+ // | "attribute_item"
+ // | "mod_item" | "type_item"
+ Kind::Trivia => preds.to_vec(),
+
+ // ─────────────────────────────────────────────────────────────────
+ // Every other node = simple sequential statement
+ // ─────────────────────────────────────────────────────────────────
+ _ => {
+ let n = push_node(g, StmtKind::Seq, ast, lang, code);
+ connect_all(g, preds, n, EdgeKind::Seq);
+ vec![n]
+ }
+ }
+}
+
+// -------------------------------------------------------------------------
+// === PUBLIC ENTRY POINT =================================================
+// -------------------------------------------------------------------------
+
+/// Build an intraprocedural CFG and return (graph, entry_node).
+///
+/// * Walks the Tree‑Sitter AST.
+/// * Creates `StmtKind::*` nodes only for *statement‑level* constructs to keep
+/// the graph compact.
+/// * Wires a synthetic `Entry` node in front and a synthetic `Exit` node after
+/// all real sinks.
+pub(crate) fn build_cfg<'a>(tree: &'a Tree, code: &'a [u8], lang: &str) -> (Cfg, NodeIndex) {
+ debug!(target: "cfg", "Building CFG for {:?}", tree.root_node());
+
+ let mut g: Cfg = Graph::with_capacity(128, 256);
+ let entry = g.add_node(NodeInfo {
+ kind: StmtKind::Entry,
+ span: (0, 0),
+ label: None,
+ defines: None,
+ uses: Vec::new(),
+ });
+ let exit = g.add_node(NodeInfo {
+ kind: StmtKind::Exit,
+ span: (code.len(), code.len()),
+ label: None,
+ defines: None,
+ uses: Vec::new(),
+ });
+
+ // Build the body below the synthetic ENTRY.
+ let exits = build_sub(tree.root_node(), &[entry], &mut g, lang, code);
+
+ // Wire every real exit to our synthetic EXIT node.
+ for e in exits {
+ connect_all(&mut g, &[e], exit, EdgeKind::Seq);
+ }
+
+ debug!(target: "cfg", "CFG DONE — nodes: {}, edges: {}", g.node_count(), g.edge_count());
+
+ if cfg!(debug_assertions) {
+ // List every node
+ for idx in g.node_indices() {
+ debug!(target: "cfg", " node {:>3}: {:?}", idx.index(), g[idx]);
+ }
+ // List every edge
+ for e in g.edge_references() {
+ debug!(
+ target: "cfg",
+ " edge {:>3} → {:<3} ({:?})",
+ e.source().index(),
+ e.target().index(),
+ e.weight()
+ );
+ }
+
+ // Reachability check
+ let mut reachable: HashSet = Default::default();
+ let mut bfs = Bfs::new(&g, entry);
+ while let Some(nx) = bfs.next(&g) {
+ reachable.insert(nx);
+ }
+ debug!(
+ target: "cfg",
+ "reachable nodes: {}/{}",
+ reachable.len(),
+ g.node_count()
+ );
+ if reachable.len() != g.node_count() {
+ let unreachable: Vec<_> = g
+ .node_indices()
+ .filter(|i| !reachable.contains(i))
+ .collect();
+ debug!(target: "cfg", "‼︎ unreachable nodes: {:?}", unreachable);
+ }
+
+ // (Optional) Dominator tree sanity check
+ let doms: Dominators<_> = simple_fast(&g, entry);
+ debug!(target: "cfg", "dominator tree computed (len = {:?})", doms);
+ }
+
+ (g, entry)
+}
+
+/* ---------- TAINT-ANALYSIS PASSES ---------- */
+/// Recursively collect every identifier that occurs inside `n`.
+fn collect_idents(n: Node, code: &[u8], out: &mut Vec) {
+ if n.kind() == "identifier" {
+ if let Some(txt) = text_of(n, code) {
+ out.push(txt);
+ }
+ } else {
+ let mut c = n.walk();
+ for ch in n.children(&mut c) {
+ collect_idents(ch, code, out);
+ }
+ }
+}
+
+/// Return `(defines, uses)` for the AST fragment `ast`.
+fn def_use(ast: Node, code: &[u8]) -> (Option, Vec) {
+ match ast.kind() {
+ // `let = ;`
+ "let_declaration" => {
+ let mut defs = None;
+ let mut uses = Vec::new();
+
+ if let Some(pat) = ast.child_by_field_name("pattern") {
+ // first identifier inside the pattern = variable name
+ let mut tmp = Vec::::new();
+ collect_idents(pat, code, &mut tmp);
+ defs = tmp.into_iter().next();
+ }
+ if let Some(val) = ast.child_by_field_name("value") {
+ collect_idents(val, code, &mut uses);
+ }
+ (defs, uses)
+ }
+
+ // Plain assignment `x = y + z`
+ "assignment_expression" => {
+ let mut defs = None;
+ let mut uses = Vec::new();
+ if let Some(lhs) = ast.child_by_field_name("left") {
+ let mut tmp = Vec::::new();
+ collect_idents(lhs, code, &mut tmp);
+ defs = tmp.pop();
+ }
+ if let Some(rhs) = ast.child_by_field_name("right") {
+ collect_idents(rhs, code, &mut uses);
+ }
+ (defs, uses)
+ }
+
+ // everything else – no definition, but may read vars
+ _ => {
+ let mut uses = Vec::new();
+ collect_idents(ast, code, &mut uses);
+ (None, uses)
+ }
+ }
+}
+
+fn set_hash(s: &HashSet) -> u64 {
+ let mut v: Vec<_> = s.iter().collect();
+ v.sort(); // deterministic
+ let mut h = DefaultHasher::new();
+ v.hash(&mut h);
+ h.finish()
+}
+
+fn apply_taint(node: &NodeInfo, taint: &HashSet) -> HashSet {
+ let mut out = taint.clone();
+
+ match node.label {
+ // A new untrusted value enters the program
+ Some(DataLabel::Source(_)) => {
+ if let Some(d) = &node.defines {
+ out.insert(d.clone());
+ }
+ }
+ // Anything written by a sanitizer becomes clean – whatever its
+ // arguments were is irrelevant here.
+ Some(DataLabel::Sanitizer(_)) => {
+ if let Some(d) = &node.defines {
+ out.remove(d);
+ }
+ }
+
+ // A function call *returning* tainted/clean data ----------------------
+ // (`let v = source_*()` or `let v = sanitize_*(x)`)
+ _ if node.kind == StmtKind::Call => {
+ if let Some(d) = &node.defines {
+ match node.label {
+ Some(DataLabel::Source(_)) => {
+ out.insert(d.clone());
+ } // gen
+ Some(DataLabel::Sanitizer(_)) => {
+ out.remove(d);
+ } // kill
+ _ => { /* normal flow handled below */ }
+ }
+ }
+ }
+
+ // All other statements: classic gen/kill for assignments
+ _ => {
+ if let Some(d) = &node.defines {
+ let rhs_tainted = node.uses.iter().any(|u| out.contains(u));
+ if rhs_tainted {
+ out.insert(d.clone());
+ } else {
+ out.remove(d);
+ }
+ }
+ }
+ }
+
+ out
+}
+
+pub fn analyse_function(cfg: &Cfg, entry: NodeIndex) -> Vec> {
+ use std::collections::{HashMap, HashSet, VecDeque};
+
+ /// Queue item: current CFG node + taint map that holds here
+ #[derive(Clone)]
+ struct Item {
+ node: NodeIndex,
+ taint: HashSet,
+ }
+
+ // (node, taint_hash) → predecessor key (for path rebuild)
+ type Key = (NodeIndex, u64);
+ let mut pred: HashMap = HashMap::new();
+
+ // Seen states so we do not revisit them infinitely
+ let mut seen: HashSet = HashSet::new();
+
+ // Resulting Source→Sink paths
+ let mut findings: Vec> = Vec::new();
+
+ let mut q = VecDeque::new();
+ q.push_back(Item {
+ node: entry,
+ taint: HashSet::new(),
+ });
+ seen.insert((entry, 0));
+
+ while let Some(Item { node, taint }) = q.pop_front() {
+ let updated = apply_taint(&cfg[node], &taint); // step effect
+
+ /* ---------- SINK CHECK ---------- */
+ if let Some(DataLabel::Sink(_)) = cfg[node].label {
+ if cfg[node].uses.iter().any(|u| updated.contains(u)) {
+ // reconstruct path back to *any* Source
+ let mut p: Vec = vec![node];
+ let mut k = (node, set_hash(&taint)); // predecessor key
+
+ while let Some(&(prev, _)) = pred.get(&k) {
+ p.push(prev);
+ if matches!(cfg[prev].label, Some(DataLabel::Source(_))) {
+ break;
+ }
+ // climb further
+ let prev_hash = pred.get(&k).map(|(_, h)| *h).unwrap_or(0);
+ k = (prev, prev_hash);
+ }
+ p.reverse();
+ findings.push(p);
+ }
+ }
+
+ /* ---------- BFS successor step ---------- */
+ for succ in cfg.neighbors(node) {
+ let key = (succ, set_hash(&updated));
+ if !seen.contains(&key) {
+ seen.insert(key);
+ pred.insert(key, (node, set_hash(&taint)));
+ q.push_back(Item {
+ node: succ,
+ taint: updated.clone(),
+ });
+ }
+ }
+ }
+
+ findings
+}
+
+#[test]
+fn env_to_arg_is_flagged() {
+ use tree_sitter::Language;
+ let src = br#"
+ use std::env; use std::process::Command;
+ fn main() {
+ let x = env::var("DANGEROUS_ARG").unwrap();
+ Command::new("sh").arg(x).status().unwrap();
+ }"#;
+
+ let mut parser = tree_sitter::Parser::new();
+ parser
+ .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+ .unwrap();
+ let tree = parser.parse(src as &[u8], None).unwrap();
+
+ let (cfg, entry) = build_cfg(&tree, src, "rust");
+ let findings = analyse_function(&cfg, entry);
+
+ assert_eq!(findings.len(), 1); // exactly one unsanitised Source→Sink
+}
+
+#[test]
+fn taint_through_if_else() {
+ use tree_sitter::Language;
+ let src = br#"
+ use std::env; use std::process::Command;
+ fn main() {
+ let x = env::var("DANGEROUS").unwrap();
+ let safe = html_escape::encode_safe(&x);
+
+ if x.len() > 5 {
+ Command::new("sh").arg(&x).status().unwrap(); // UNSAFE
+ } else {
+ Command::new("sh").arg(&safe).status().unwrap(); // SAFE
+ }
+ }"#;
+
+ let mut parser = tree_sitter::Parser::new();
+ parser
+ .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+ .unwrap();
+ let tree = parser.parse(src as &[u8], None).unwrap();
+
+ let (cfg, entry) = build_cfg(&tree, src, "rust");
+ let findings = analyse_function(&cfg, entry);
+
+ // exactly one path (via the True branch) should be flagged
+ assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn taint_through_while_loop() {
+ use tree_sitter::Language;
+ let src = br#"
+ use std::{env, process::Command};
+ fn main() {
+ let mut x = env::var("DANGEROUS").unwrap();
+ while x.len() < 100 { // Loop header (Loop)
+ x.push_str("a");
+ }
+ Command::new("sh").arg(x).status().unwrap(); // Should be flagged
+ }"#;
+
+ let mut parser = tree_sitter::Parser::new();
+ parser
+ .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+ .unwrap();
+ let tree = parser.parse(src as &[u8], None).unwrap();
+
+ let (cfg, entry) = build_cfg(&tree, src, "rust");
+ let findings = analyse_function(&cfg, entry);
+ assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn taint_killed_by_sanitizer() {
+ use tree_sitter::Language;
+ let src = br#"
+ use std::{env, process::Command};
+ fn main() {
+ let x = env::var("DANGEROUS").unwrap();
+ let clean = html_escape::encode_safe(&x); // sanitizer node
+ Command::new("sh").arg(clean).status().unwrap(); // SAFE
+ }"#;
+
+ let mut parser = tree_sitter::Parser::new();
+ parser
+ .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+ .unwrap();
+ let tree = parser.parse(src as &[u8], None).unwrap();
+
+ let (cfg, entry) = build_cfg(&tree, src, "rust");
+ let findings = analyse_function(&cfg, entry);
+ assert!(findings.is_empty());
+}
+
+#[test]
+fn taint_breaks_out_of_loop() {
+ use tree_sitter::Language;
+ let src = br#"
+ use std::{env, process::Command};
+ fn main() {
+ loop {
+ let x = env::var("DANGEROUS").unwrap();
+ Command::new("sh").arg(&x).status().unwrap(); // vulnerable
+ break;
+ }
+ }"#;
+
+ let mut parser = tree_sitter::Parser::new();
+ parser
+ .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+ .unwrap();
+ let tree = parser.parse(src as &[u8], None).unwrap();
+
+ let (cfg, entry) = build_cfg(&tree, src, "rust");
+ let findings = analyse_function(&cfg, entry);
+ assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn test_two_sources() {
+ use tree_sitter::Language;
+ let src = br#"
+ use std::{env, process::Command};
+ fn main() {
+ let x = env::var("DANGEROUS").unwrap();
+ let y = env::var("SAFE").unwrap();
+ let clean = html_escape::encode_safe(&y);
+ Command::new("sh").arg(x).status().unwrap();
+ Command::new("sh").arg(clean).status().unwrap();
+ }"#;
+
+ let mut parser = tree_sitter::Parser::new();
+ parser
+ .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+ .unwrap();
+ let tree = parser.parse(src as &[u8], None).unwrap();
+
+ let (cfg, entry) = build_cfg(&tree, src, "rust");
+ let findings = analyse_function(&cfg, entry);
+ assert_eq!(findings.len(), 1);
+}
+
+#[test]
+fn test_should_not_panic_on_empty_function() {
+ use tree_sitter::Language;
+ let src = br#"
+ use std::{env, process::Command};
+ fn f() {
+ if cond() {
+ return;
+ }
+ do_something();
+ }"#;
+
+ let mut parser = tree_sitter::Parser::new();
+ parser
+ .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
+ .unwrap();
+ let tree = parser.parse(src as &[u8], None).unwrap();
+
+ let (cfg, entry) = build_cfg(&tree, src, "rust");
+ let findings = analyse_function(&cfg, entry);
+ assert!(findings.is_empty());
+}
diff --git a/src/cli.rs b/src/cli.rs
index 9b03018b..19a1df0f 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -32,6 +32,15 @@ pub enum Commands {
/// Show only high severity issues
#[arg(long)]
high_only: bool,
+
+ #[arg(long)]
+ ast_only: bool,
+
+ #[arg(long)]
+ cfg_only: bool,
+
+ #[arg(long)]
+ all_targets: bool,
},
/// Manage project indexes
diff --git a/src/commands/clean.rs b/src/commands/clean.rs
index 336de2f0..6932ae40 100644
--- a/src/commands/clean.rs
+++ b/src/commands/clean.rs
@@ -12,7 +12,7 @@ pub fn handle(project: Option, all: bool, config_dir: &std::path::Path)
}
println!("{}", style("✔ All indexes cleaned").green().bold());
} else if let Some(proj_name) = project {
- let db_path = config_dir.join(format!("{}.sqlite", proj_name));
+ let db_path = config_dir.join(format!("{proj_name}.sqlite"));
if db_path.exists() {
fs::remove_file(&db_path)?;
println!(
diff --git a/src/commands/mod.rs b/src/commands/mod.rs
index eb5ef7a4..e3f8236e 100644
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@@ -6,7 +6,7 @@ pub mod scan;
use crate::cli::Commands;
use crate::errors::NyxResult;
use crate::patterns::Severity;
-use crate::utils::config::Config;
+use crate::utils::config::{AnalysisMode, Config};
use std::path::Path;
pub fn handle_command(
@@ -21,11 +21,26 @@ pub fn handle_command(
rebuild_index,
format,
high_only,
+ ast_only,
+ cfg_only,
+ all_targets,
} => {
if high_only {
config.scanner.min_severity = Severity::High
};
+ if ast_only {
+ config.scanner.mode = AnalysisMode::Ast
+ };
+
+ if cfg_only {
+ config.scanner.mode = AnalysisMode::Taint
+ };
+
+ if all_targets {
+ config.scanner.mode = AnalysisMode::Full
+ };
+
scan::handle(&path, no_index, rebuild_index, format, database_dir, config)
}
Commands::Index { action } => index::handle(action, database_dir, config),
diff --git a/src/commands/scan.rs b/src/commands/scan.rs
index 00b46d90..098bf3d3 100644
--- a/src/commands/scan.rs
+++ b/src/commands/scan.rs
@@ -68,7 +68,7 @@ pub fn handle(
println!("{}", style(path).blue().underlined());
for d in issues {
println!(
- " {:>4}:{:<4} [{}] {}",
+ " {:>4}:{:<4} [{:}] {:}",
d.line,
d.col,
d.severity,
@@ -145,6 +145,17 @@ pub fn scan_with_index_parallel(
} else {
idx.get_issues_from_file(&path).unwrap_or_default()
};
+
+ match cfg.scanner.mode {
+ crate::utils::config::AnalysisMode::Ast => {
+ diags.retain(|d| !d.id.starts_with("taint"));
+ }
+ crate::utils::config::AnalysisMode::Taint => {
+ diags.retain(|d| d.id.starts_with("taint"));
+ }
+ crate::utils::config::AnalysisMode::Full => {}
+ }
+
if !diags.is_empty() {
diag_map
.entry(path.to_string_lossy().to_string())
diff --git a/src/database.rs b/src/database.rs
index f4d5ab0e..c647669d 100644
--- a/src/database.rs
+++ b/src/database.rs
@@ -16,28 +16,35 @@ pub mod index {
const SCHEMA: &str = r#"
PRAGMA foreign_keys = ON;
- CREATE TABLE IF NOT EXISTS files (
- id INTEGER PRIMARY KEY AUTOINCREMENT,
- project TEXT NOT NULL,
- path TEXT NOT NULL,
- hash BLOB NOT NULL,
- mtime INTEGER NOT NULL,
+ CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT,
+ project TEXT NOT NULL,
+ path TEXT NOT NULL,
+ hash BLOB NOT NULL,
+ mtime INTEGER NOT NULL,
scanned_at INTEGER NOT NULL,
UNIQUE(project, path)
);
- CREATE TABLE IF NOT EXISTS issues (
- file_id INTEGER NOT NULL
+ CREATE TABLE IF NOT EXISTS issues (file_id INTEGER NOT NULL
REFERENCES files(id)
ON DELETE CASCADE,
- rule_id TEXT NOT NULL,
- severity TEXT NOT NULL,
- line INTEGER NOT NULL,
- col INTEGER NOT NULL,
- PRIMARY KEY (file_id, rule_id, line, col)
- );
+ rule_id TEXT NOT NULL,
+ severity TEXT NOT NULL,
+ line INTEGER NOT NULL,
+ col INTEGER NOT NULL,
+ PRIMARY KEY (file_id, rule_id, line, col));
+
+ CREATE TABLE IF NOT EXISTS function_summaries (hash TEXT PRIMARY KEY,
+ project TEXT NOT NULL,
+ name TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ summary TEXT NOT NULL,
+ updated_at INTEGER NOT NULL);
"#;
+ // TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
+ // TODO: ADD DROP AND GIVE A CLI PARAMETER FOR DROP
+
/// A single issue row, ready for insertion.
#[derive(Debug, Clone)]
pub struct IssueRow<'a> {
@@ -189,6 +196,50 @@ pub mod index {
Ok(issue_iter.filter_map(Result::ok).collect())
}
+ // pub fn upsert_summary(
+ // &mut self,
+ // project: &str,
+ // path: &Path,
+ // hash: &str,
+ // s: &crate::summary::FuncSummary,
+ // ) -> NyxResult<()> {
+ // let conn = self.c();
+ // let now = chrono::Utc::now().timestamp_millis(); // i64
+ //
+ // conn.execute(
+ // "INSERT INTO function_summaries (hash, project, name, lang, summary, updated_at)
+ // VALUES (?1, ?2, ?3, ?4, ?5, ?6)
+ // ON CONFLICT(hash) DO UPDATE SET summary = excluded.summary,
+ // updated_at = excluded.updated_at",
+ // (
+ // hash,
+ // project,
+ // &s.name,
+ // path.extension().and_then(|e| e.to_str()).unwrap_or_default(),
+ // serde_json::to_string(s).unwrap(), //TODO REPLACE UNWRAP
+ // now,
+ // ),
+ // )?;
+ // Ok(())
+ // }
+ //
+ // pub fn load_all_summaries(&self, project: &str) -> NyxResult>> {
+ // let mut stmt = self
+ // .c()
+ // .prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
+ //
+ // let iter = stmt.query_map([project], |row| {
+ // let json: String = row.get(0)?;
+ // Ok(serde_json::from_str::(json.as_str()).unwrap()) // TODO: REPLACE UNWRAP
+ // })?;
+ //
+ // Ok(iter
+ // .collect::, _>>()?
+ // .into_iter()
+ // .map(|s| unsafe { std::mem::transmute::<_, crate::summary::FuncSummary<'static>>(s) })
+ // .collect())
+ // }
+
/// gets files from the database
pub fn get_files(&self, project: &str) -> NyxResult> {
let mut stmt = self.c().prepare(
@@ -214,6 +265,7 @@ pub mod index {
DROP TABLE IF EXISTS issues;
DROP TABLE IF EXISTS files;
+ DROP TABLE IF EXISTS function_summaries;
PRAGMA foreign_keys = ON;
VACUUM;
diff --git a/src/labels/javascript.rs b/src/labels/javascript.rs
new file mode 100644
index 00000000..b8b99c21
--- /dev/null
+++ b/src/labels/javascript.rs
@@ -0,0 +1,17 @@
+use crate::labels::{Cap, DataLabel, LabelRule};
+
+// TODO: refactor this
+pub static RULES: &[LabelRule] = &[
+ LabelRule {
+ matchers: &["document.location", "window.location"],
+ label: DataLabel::Source(Cap::all()),
+ },
+ LabelRule {
+ matchers: &["JSON.parse"],
+ label: DataLabel::Sanitizer(Cap::JSON_PARSE),
+ },
+ LabelRule {
+ matchers: &["eval"],
+ label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+ },
+];
diff --git a/src/labels/mod.rs b/src/labels/mod.rs
new file mode 100644
index 00000000..48d81222
--- /dev/null
+++ b/src/labels/mod.rs
@@ -0,0 +1,121 @@
+mod javascript;
+mod rust;
+
+use bitflags::bitflags;
+use once_cell::sync::Lazy;
+use phf::Map;
+use std::collections::HashMap;
+
+/// A single rule: if the AST text equals (or ends with) one of the `matchers`,
+/// the node gets `label`.
+#[derive(Debug, Clone, Copy)]
+pub struct LabelRule {
+ pub matchers: &'static [&'static str],
+ pub label: DataLabel,
+}
+
+bitflags! {
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+ pub struct Cap: u8 {
+ const ENV_VAR = 0b0000_0001;
+ const HTML_ESCAPE = 0b0000_0010;
+ const SHELL_ESCAPE = 0b0000_0100;
+ const URL_ENCODE = 0b0000_1000;
+ const JSON_PARSE = 0b0001_0000;
+ // ADD MORE
+ }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Kind {
+ If,
+ InfiniteLoop,
+ While,
+ For,
+ LoopBody,
+ CallFn,
+ CallMethod,
+ CallMacro,
+ Break,
+ Continue,
+ Return,
+ Block,
+ SourceFile,
+ Function,
+ Assignment,
+ CallWrapper,
+ Trivia,
+ Other,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DataLabel {
+ Source(Cap),
+ Sanitizer(Cap),
+ Sink(Cap),
+}
+
+static REGISTRY: Lazy> = Lazy::new(|| {
+ let mut m = HashMap::new();
+ m.insert("rust", rust::RULES);
+ m.insert("rs", rust::RULES);
+
+ m.insert("javascript", javascript::RULES);
+ m.insert("js", javascript::RULES);
+
+ // add more languages in one line:
+ // m.insert("go", go::RULES);
+
+ m
+});
+
+type FastMap = &'static Map<&'static str, Kind>;
+
+pub(crate) static CLASSIFIERS: Lazy> = Lazy::new(|| {
+ let mut m = HashMap::new();
+ m.insert("rust", &rust::KINDS);
+ m.insert("rs", &rust::KINDS);
+
+ // m.insert("javascript", &javascript::KINDS);
+ // m.insert("js", &javascript::KINDS);
+
+ // todo: add more languages
+ m
+});
+
+#[inline(always)]
+pub fn lookup(lang: &str, raw: &str) -> Kind {
+ CLASSIFIERS
+ .get(lang)
+ .and_then(|m| m.get(raw).copied())
+ .unwrap_or(Kind::Other)
+}
+
+/// Try to classify a piece of syntax text.
+/// `lang` is the canonicalised language key (“rust”, “javascript”, …).
+pub fn classify(lang: &str, text: &str) -> Option {
+ let key = lang.to_ascii_lowercase();
+ let rules = REGISTRY.get(key.as_str())?;
+ let head = text.split(['(', '<']).next().unwrap_or("");
+
+ let text_lc = head.trim().to_ascii_lowercase();
+
+ for rule in *rules {
+ for raw in rule.matchers {
+ let m = raw.to_ascii_lowercase();
+
+ if m.ends_with('_') {
+ if text_lc.starts_with(&m) {
+ return Some(rule.label);
+ }
+ } else if text_lc.ends_with(&m) {
+ let start = text_lc.len() - m.len();
+ let ok = start == 0 || matches!(text_lc.as_bytes()[start - 1], b'.' | b':');
+ if ok {
+ return Some(rule.label);
+ }
+ }
+ }
+ }
+ None
+}
diff --git a/src/labels/rust.rs b/src/labels/rust.rs
new file mode 100644
index 00000000..9a84dbad
--- /dev/null
+++ b/src/labels/rust.rs
@@ -0,0 +1,72 @@
+use crate::labels::{Cap, DataLabel, Kind, LabelRule};
+use phf::{Map, phf_map};
+
+pub static RULES: &[LabelRule] = &[
+ // ─────────── Sources ───────────
+ LabelRule {
+ matchers: &["std::env::var", "env::var"],
+ label: DataLabel::Source(Cap::all()),
+ },
+ // ───────── Sanitizers ──────────
+ // `fn sanitize_*(&str) -> String`
+ LabelRule {
+ matchers: &["html_escape::encode_safe", "sanitize_", "sanitize_html"],
+ label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
+ },
+ LabelRule {
+ matchers: &["shell_escape::unix::escape"],
+ label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
+ },
+ // ─────────── Sinks ─────────────
+ // All the key points where untrusted strings reach the OS shell.
+ LabelRule {
+ matchers: &[
+ "command::new",
+ "std::process::command::new",
+ "command::arg",
+ "command::args",
+ "command::status",
+ "command::output",
+ ],
+ label: DataLabel::Sink(Cap::SHELL_ESCAPE),
+ },
+];
+
+pub static KINDS: Map<&'static str, Kind> = phf_map! {
+ // control-flow
+ "if_expression" => Kind::If,
+ "loop_expression" => Kind::InfiniteLoop,
+ "loop_statement" => Kind::LoopBody,
+ "while_statement" => Kind::While,
+ "for_statement" => Kind::For,
+
+ "return_statement" => Kind::Return,
+ "break_expression" => Kind::Break,
+ "break_statement" => Kind::Break,
+ "continue_expression" => Kind::Continue,
+ "continue_statement" => Kind::Continue,
+
+ // structure
+ "source_file" => Kind::SourceFile,
+ "block" => Kind::Block,
+ "function_item" => Kind::Function,
+
+ // data-flow
+ "call_expression" => Kind::CallFn,
+ "method_call_expression" => Kind::CallMethod,
+ "macro_invocation" => Kind::CallMacro,
+ "let_declaration" => Kind::CallWrapper,
+ "expression_statement" => Kind::CallWrapper,
+ "assignment_expression" => Kind::Assignment,
+
+ // trivia
+ "line_comment" => Kind::Trivia,
+ "block_comment" => Kind::Trivia,
+ ";" => Kind::Trivia, "," => Kind::Trivia,
+ "(" => Kind::Trivia, ")" => Kind::Trivia,
+ "{" => Kind::Trivia, "}" => Kind::Trivia, "\n" => Kind::Trivia,
+ "use_declaration" => Kind::Trivia,
+ "attribute_item" => Kind::Trivia,
+ "mod_item" => Kind::Trivia,
+ "type_item" => Kind::Trivia,
+};
diff --git a/src/main.rs b/src/main.rs
index d1871a55..d6afbd62 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,8 +1,10 @@
mod ast;
+mod cfg;
mod cli;
mod commands;
mod database;
mod errors;
+mod labels;
mod patterns;
mod utils;
mod walk;
@@ -59,6 +61,11 @@ fn main() -> NyxResult<()> {
let mut config = Config::load(config_dir)?;
+ rayon::ThreadPoolBuilder::new()
+ .stack_size(config.performance.rayon_thread_stack_size)
+ .build_global()
+ .expect("set rayon stack size");
+
commands::handle_command(cli.command, database_dir, &mut config)?;
println!(
diff --git a/src/patterns/mod.rs b/src/patterns/mod.rs
index 0cc4173d..d90f0194 100644
--- a/src/patterns/mod.rs
+++ b/src/patterns/mod.rs
@@ -92,7 +92,7 @@ static REGISTRY: Lazy> = Lazy::new(||
m.insert("cpp", cpp::PATTERNS);
m.insert("c++", cpp::PATTERNS);
- // ---- Other languages in the folder ----
+ // ---- Other patterns in the folder ----
m.insert("java", java::PATTERNS);
m.insert("go", go::PATTERNS);
m.insert("php", php::PATTERNS);
@@ -101,14 +101,14 @@ static REGISTRY: Lazy> = Lazy::new(||
m.insert("ruby", ruby::PATTERNS);
m.insert("rb", ruby::PATTERNS);
- tracing::debug!("AST-pattern registry initialised ({} languages)", m.len());
+ tracing::debug!("AST-pattern registry initialised ({} patterns)", m.len());
m
});
/// Return all patterns for the requested language (case-insensitive).
///
-/// Unknown languages yield an **empty** `Vec`.
+/// Unknown patterns yield an **empty** `Vec`.
pub fn load(lang: &str) -> Vec {
let key = lang.to_ascii_lowercase();
REGISTRY.get(key.as_str()).copied().unwrap_or(&[]).to_vec()
diff --git a/src/utils/config.rs b/src/utils/config.rs
index 5d5339d1..32048e0e 100644
--- a/src/utils/config.rs
+++ b/src/utils/config.rs
@@ -8,9 +8,21 @@ use toml;
static DEFAULT_CONFIG_TOML: &str = include_str!("../../default-nyx.conf");
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum AnalysisMode {
+ #[default]
+ Full,
+ Ast,
+ Taint,
+}
+
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(default)]
pub struct ScannerConfig {
+ /// The analysis mode to use.
+ pub mode: AnalysisMode,
+
/// The minimum severity level to output
pub min_severity: Severity,
@@ -47,6 +59,7 @@ pub struct ScannerConfig {
impl Default for ScannerConfig {
fn default() -> Self {
Self {
+ mode: AnalysisMode::Full,
min_severity: Severity::Low,
max_file_size_mb: None,
excluded_extensions: vec![
@@ -151,6 +164,9 @@ pub struct PerformanceConfig {
/// capacity = threads × this
pub channel_multiplier: usize,
+ /// The stack size for Rayon threads, in bytes.
+ pub rayon_thread_stack_size: usize,
+
/// Timeout on individual files // TODO: IMPLEMENT
pub scan_timeout_secs: Option,
@@ -167,6 +183,7 @@ impl Default for PerformanceConfig {
worker_threads: None,
batch_size: 100usize,
channel_multiplier: 4usize,
+ rayon_thread_stack_size: 8 * 1024 * 1024, // 2 MiB
scan_timeout_secs: None,
memory_limit_mb: 512,
}
@@ -236,6 +253,7 @@ fn create_example_config(config_dir: &Path) -> NyxResult<()> {
/// supply new exclusions and overriding everything else.
fn merge_configs(mut default: Config, user: Config) -> Config {
// --- ScannerConfig ---
+ default.scanner.mode = user.scanner.mode;
default.scanner.min_severity = user.scanner.min_severity;
default.scanner.max_file_size_mb = user.scanner.max_file_size_mb;
default.scanner.read_global_ignore = user.scanner.read_global_ignore;
@@ -277,6 +295,7 @@ fn merge_configs(mut default: Config, user: Config) -> Config {
default.performance.worker_threads = user.performance.worker_threads;
default.performance.batch_size = user.performance.batch_size;
default.performance.channel_multiplier = user.performance.channel_multiplier;
+ default.performance.rayon_thread_stack_size = user.performance.rayon_thread_stack_size;
default.performance.scan_timeout_secs = user.performance.scan_timeout_secs;
default.performance.memory_limit_mb = user.performance.memory_limit_mb;
diff --git a/src/utils/project.rs b/src/utils/project.rs
index 269ee0e8..ca63887f 100644
--- a/src/utils/project.rs
+++ b/src/utils/project.rs
@@ -9,7 +9,7 @@ pub fn get_project_info(project_path: &Path, config_dir: &Path) -> NyxResult<(St
.ok_or_else(|| NyxError::Other("Unable to determine project name".into()))?;
let db_name = sanitize_project_name(project_name);
- let db_path = config_dir.join(format!("{}.sqlite", db_name));
+ let db_path = config_dir.join(format!("{db_name}.sqlite"));
Ok((project_name.to_owned(), db_path))
}
@@ -41,7 +41,7 @@ fn sanitize_project_name_is_idempotent_and_lossless_enough() {
];
for (input, expected) in samples {
- assert_eq!(sanitize_project_name(input), expected, "input: {}", input);
+ assert_eq!(sanitize_project_name(input), expected, "input: {input}");
assert_eq!(sanitize_project_name(expected), expected);
}
}