* chore: Exclude CLAUDE.md from Cargo.toml

* feat: add callgraph module and integrate into main analysis flow

* feat: enhance CLI with new severity filtering and analysis modes

* feat: update CHANGELOG with recent enhancements and fixes to severity filtering and output handling

* feat: implement state-model dataflow analysis for resource lifecycle and auth state

* feat: enhance diagnostic output formatting and add evidence structure

* feat: implement attack surface ranking for diagnostics with scoring and sorting

* feat: add comprehensive documentation for installation, usage, and rules reference

* feat: add multiple language support for command execution and evaluation endpoints

* feat: implement inline suppression for findings using `nyx:ignore` comments

* feat: add confidence levels to AST patterns and update output structure

* feat: implement low-noise prioritization system with category filtering, rollup grouping, and configurable budgets

* feat: bump version to 0.4.0 and update changelog with new features and improvements

* feat: add dead code allowances to various functions in mod.rs and real_world_tests.rs
This commit is contained in:
Eli Peter 2026-02-25 21:16:36 -05:00 committed by GitHub
parent 19b578c5c4
commit 1bbe4b1cfb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
456 changed files with 25628 additions and 1228 deletions

View file

@ -0,0 +1,10 @@
#include <cstdlib>
#include <functional>
#include <string>
std::function<void()> create_dangerous_lambda(const char *user_input) {
std::string cmd = std::string("echo ") + user_input;
return [cmd]() {
system(cmd.c_str());
};
}

View file

@ -0,0 +1,24 @@
{
"description": "system() call inside lambda capturing user input by value",
"tags": [
"cmdi",
"cfg",
"lambda"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cpp.cmdi.system",
"severity": null,
"must_match": true,
"line_range": [
6,
10
],
"evidence_contains": [],
"notes": "system() called inside lambda with captured user-derived command string"
}
]
}

View file

@ -0,0 +1,19 @@
#include <cstdlib>
#include <cstdio>
namespace security {
void validate(const char *input) {
if (input == nullptr) return;
}
}
namespace execution {
void run(const char *cmd) {
system(cmd);
}
}
void handler(const char *user_input) {
security::validate(user_input);
execution::run(user_input);
}

View file

@ -0,0 +1,35 @@
{
"description": "system() in namespace \u2014 validation function does not actually sanitize input",
"tags": [
"cmdi",
"cfg",
"namespace"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cpp.cmdi.system",
"severity": null,
"must_match": true,
"line_range": [
10,
14
],
"evidence_contains": [],
"notes": "system(cmd) in execution::run \u2014 AST pattern detects system() call"
},
{
"rule_id": "taint-unsanitised-flow",
"severity": null,
"must_match": false,
"line_range": [
15,
20
],
"evidence_contains": [],
"notes": "user_input flows through validate (which only null-checks) to system \u2014 aspirational cross-function taint"
}
]
}

View file

@ -0,0 +1,19 @@
#include <fstream>
#include <cstdio>
#include <string>
std::string read_raii(const char *path) {
std::ifstream file(path);
std::string content;
std::getline(file, content);
return content;
// RAII: ifstream destructor closes
}
std::string read_manual(const char *path) {
FILE *f = fopen(path, "r");
char buf[256];
fgets(buf, sizeof(buf), f);
// f not closed -- manual leak
return std::string(buf);
}

View file

@ -0,0 +1,24 @@
{
"description": "RAII ifstream vs manual FILE* \u2014 RAII auto-closes, manual version leaks",
"tags": [
"cfg",
"resource-leak",
"raii"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cfg-resource-leak",
"severity": null,
"must_match": false,
"line_range": [
12,
20
],
"evidence_contains": [],
"notes": "fopen at line 14 never fclose'd in read_manual \u2014 aspirational CFG finding"
}
]
}

View file

@ -0,0 +1,30 @@
#include <cstdio>
#include <stdexcept>
void process_file(const char *path) {
FILE *f = fopen(path, "r");
try {
char buf[256];
if (fgets(buf, sizeof(buf), f) == NULL) {
throw std::runtime_error("read failed");
}
fclose(f);
} catch (...) {
// f leaked in catch
throw;
}
}
void process_safe(const char *path) {
FILE *f = fopen(path, "r");
try {
char buf[256];
if (fgets(buf, sizeof(buf), f) == NULL) {
fclose(f);
throw std::runtime_error("read failed");
}
fclose(f);
} catch (...) {
throw;
}
}

View file

@ -0,0 +1,24 @@
{
"description": "Resource leak in exception path: fopen not closed when exception thrown. Safe version closes before throw.",
"tags": [
"cfg",
"resource-leak",
"exception"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cfg-resource-leak",
"severity": null,
"must_match": false,
"line_range": [
3,
17
],
"evidence_contains": [],
"notes": "fopen at line 5 leaked when exception thrown at line 9 \u2014 catch block re-throws without closing. Aspirational."
}
]
}

View file

@ -0,0 +1,10 @@
#include <cstdio>
#include <cstdlib>
#include <string>
void dangerous(const char *user_input) {
char cmd[256];
sprintf(cmd, "cat %s", user_input);
system(cmd);
printf(user_input); // also format string vuln
}

View file

@ -0,0 +1,35 @@
{
"description": "Multiple vulnerabilities: command injection via system() and format string via printf(user_input)",
"tags": [
"cmdi",
"fmt",
"mixed"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cpp.cmdi.system",
"severity": null,
"must_match": true,
"line_range": [
6,
10
],
"evidence_contains": [],
"notes": "system(cmd) where cmd built from user input via sprintf"
},
{
"rule_id": "cpp.memory.printf_no_fmt",
"severity": null,
"must_match": true,
"line_range": [
7,
11
],
"evidence_contains": [],
"notes": "printf(user_input) \u2014 user-controlled format string"
}
]
}

View file

@ -0,0 +1,10 @@
#include <cstdlib>
#include <cstdio>
void env_leak() {
const char *path = std::getenv("USER_PATH");
FILE *f = fopen(path, "r");
char buf[1024];
fgets(buf, sizeof(buf), f);
// taint (getenv -> fopen) + resource leak
}

View file

@ -0,0 +1,35 @@
{
"description": "Combined taint and resource leak: std::getenv flows to fopen, file handle never closed",
"tags": [
"taint",
"state",
"mixed"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"severity": null,
"must_match": true,
"line_range": [
3,
8
],
"evidence_contains": [],
"notes": "std::getenv(\"USER_PATH\") flows to fopen as file path \u2014 path traversal"
},
{
"rule_id": "state-resource-leak",
"severity": null,
"must_match": false,
"line_range": [
4,
11
],
"evidence_contains": [],
"notes": "fopen at line 6 never closed \u2014 aspirational state finding"
}
]
}

View file

@ -0,0 +1,27 @@
#include <cstdio>
void leak() {
FILE *f = fopen("/tmp/test", "r");
char buf[256];
fgets(buf, sizeof(buf), f);
}
void clean() {
FILE *f = fopen("/tmp/test", "r");
char buf[256];
fgets(buf, sizeof(buf), f);
fclose(f);
}
void double_close() {
FILE *f = fopen("/tmp/test", "r");
fclose(f);
fclose(f);
}
void use_after_close() {
FILE *f = fopen("/tmp/test", "r");
fclose(f);
char buf[256];
fgets(buf, sizeof(buf), f);
}

View file

@ -0,0 +1,45 @@
{
"description": "C++ FILE* lifecycle patterns: leak, double close, use after close",
"tags": [
"state",
"resource-lifecycle"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "state-resource-leak",
"severity": null,
"must_match": true,
"line_range": [
2,
8
],
"evidence_contains": [],
"notes": "fopen at line 4 never closed in leak()"
},
{
"rule_id": "state-double-close",
"severity": null,
"must_match": true,
"line_range": [
16,
21
],
"evidence_contains": [],
"notes": "fclose called twice on same FILE* in double_close()"
},
{
"rule_id": "state-use-after-close",
"severity": null,
"must_match": true,
"line_range": [
23,
29
],
"evidence_contains": [],
"notes": "fgets on f after fclose in use_after_close()"
}
]
}

View file

@ -0,0 +1,11 @@
#include <cstdlib>
#include <cstring>
void branch_leak(int flag) {
char *buf = (char*)malloc(256);
if (flag) {
strcpy(buf, "hello");
free(buf);
}
// buf leaked if !flag
}

View file

@ -0,0 +1,24 @@
{
"description": "C++ malloc branch leak: only freed in one branch of conditional",
"tags": [
"state",
"resource-leak",
"branching"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "state-resource-leak-possible",
"severity": null,
"must_match": false,
"line_range": [
3,
12
],
"evidence_contains": [],
"notes": "malloc at line 5 only freed when flag is true \u2014 aspirational branch-aware state analysis"
}
]
}

View file

@ -0,0 +1,18 @@
#include <cstring>
void leak() {
char *buf = new char[1024];
strcpy(buf, "hello");
}
void clean() {
char *buf = new char[1024];
strcpy(buf, "hello");
delete[] buf;
}
void double_delete() {
char *buf = new char[1024];
delete[] buf;
delete[] buf;
}

View file

@ -0,0 +1,34 @@
{
"description": "C++ new[]/delete[] lifecycle: leak and double delete patterns",
"tags": [
"state",
"resource-lifecycle"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "state-resource-leak",
"severity": null,
"must_match": false,
"line_range": [
2,
7
],
"evidence_contains": [],
"notes": "new char[1024] at line 4 never deleted \u2014 aspirational, requires new/delete tracking"
},
{
"rule_id": "state-double-close",
"severity": null,
"must_match": false,
"line_range": [
14,
19
],
"evidence_contains": [],
"notes": "delete[] called twice \u2014 aspirational, requires new/delete tracking"
}
]
}

View file

@ -0,0 +1,12 @@
#include <memory>
#include <cstdlib>
void smart_clean() {
auto ptr = std::make_unique<int>(42);
// automatically cleaned up
}
void raw_leak() {
int *ptr = new int(42);
// never deleted
}

View file

@ -0,0 +1,24 @@
{
"description": "Smart pointer vs raw new: unique_ptr auto-cleans, raw pointer leaks",
"tags": [
"state",
"resource-leak",
"smart-pointer"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "state-resource-leak",
"severity": null,
"must_match": false,
"line_range": [
8,
13
],
"evidence_contains": [],
"notes": "new int(42) at line 10 never deleted \u2014 aspirational, requires new/delete tracking"
}
]
}

View file

@ -0,0 +1,16 @@
#include <cstdlib>
#include <string>
void execute_user_cmd() {
const char *cmd = std::getenv("USER_CMD");
system(cmd);
}
void execute_safe() {
const char *cmd = std::getenv("USER_CMD");
if (cmd == nullptr) return;
std::string s(cmd);
if (s == "ls" || s == "date") {
system(cmd);
}
}

View file

@ -0,0 +1,45 @@
{
"description": "C++ command injection: std::getenv flows to system(). Safe version uses allowlist.",
"tags": [
"taint",
"cmdi"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cpp.cmdi.system",
"severity": null,
"must_match": true,
"line_range": [
4,
8
],
"evidence_contains": [],
"notes": "system(cmd) where cmd comes from std::getenv"
},
{
"rule_id": "taint-unsanitised-flow",
"severity": null,
"must_match": true,
"line_range": [
3,
8
],
"evidence_contains": [],
"notes": "std::getenv flows directly to system() without sanitization"
},
{
"rule_id": "cpp.cmdi.system",
"severity": null,
"must_match": true,
"line_range": [
12,
16
],
"evidence_contains": [],
"notes": "AST pattern still matches system() in safe version"
}
]
}

View file

@ -0,0 +1,9 @@
#include <cstdlib>
#include <string>
int main() {
char *home = std::getenv("HOME");
std::string cmd = "ls " + std::string(home);
system(cmd.c_str());
return 0;
}

View file

@ -0,0 +1,34 @@
{
"description": "Environment variable concatenated into system() call \u2014 command injection via HOME",
"tags": [
"taint",
"cmdi"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cpp.cmdi.system",
"severity": null,
"must_match": true,
"line_range": [
5,
9
],
"evidence_contains": [],
"notes": "system() called with command built from std::getenv(\"HOME\")"
},
{
"rule_id": "taint-unsanitised-flow",
"severity": null,
"must_match": true,
"line_range": [
3,
9
],
"evidence_contains": [],
"notes": "std::getenv flows through string concatenation into system()"
}
]
}

View file

@ -0,0 +1,10 @@
#include <cstdio>
#include <cstdlib>
void print_unsafe(const char *user_input) {
printf(user_input);
}
void print_safe(const char *user_input) {
printf("%s", user_input);
}

View file

@ -0,0 +1,23 @@
{
"description": "C++ format string vulnerability: printf with user-controlled format argument",
"tags": [
"taint",
"fmt"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cpp.memory.printf_no_fmt",
"severity": null,
"must_match": true,
"line_range": [
3,
7
],
"evidence_contains": [],
"notes": "printf(user_input) \u2014 user-controlled format string"
}
]
}

View file

@ -0,0 +1,12 @@
#include <cstdio>
#include <cstring>
void copy_unsafe(const char *input) {
char buf[64];
strcpy(buf, input);
}
void gets_input() {
char buf[128];
gets(buf);
}

View file

@ -0,0 +1,34 @@
{
"description": "C++ legacy C function usage: strcpy and gets without bounds checking",
"tags": [
"mem",
"buffer-overflow"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cpp.memory.strcpy",
"severity": null,
"must_match": true,
"line_range": [
4,
8
],
"evidence_contains": [],
"notes": "strcpy without bounds check in C++ code"
},
{
"rule_id": "cpp.memory.gets",
"severity": null,
"must_match": true,
"line_range": [
9,
13
],
"evidence_contains": [],
"notes": "gets() always unsafe \u2014 no bounds checking"
}
]
}

View file

@ -0,0 +1,13 @@
#include <cstdio>
#include <cstdlib>
#include <string>
void run_command(const std::string &user_input) {
std::string cmd = "grep " + user_input + " /var/log/syslog";
FILE *fp = popen(cmd.c_str(), "r");
char buf[1024];
while (fgets(buf, sizeof(buf), fp)) {
printf("%s", buf);
}
pclose(fp);
}

View file

@ -0,0 +1,23 @@
{
"description": "Command injection via popen: user input concatenated into shell command string",
"tags": [
"taint",
"cmdi"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cpp.cmdi.popen",
"severity": null,
"must_match": true,
"line_range": [
5,
9
],
"evidence_contains": [],
"notes": "popen executes command string built from user input via string concatenation"
}
]
}

View file

@ -0,0 +1,12 @@
#include <cstring>
#include <cstdio>
struct Header {
int type;
int length;
};
void parse_packet(const char *data) {
Header *hdr = reinterpret_cast<Header*>(const_cast<char*>(data));
printf("Type: %d, Length: %d\n", hdr->type, hdr->length);
}

View file

@ -0,0 +1,34 @@
{
"description": "Dangerous C++ casts: reinterpret_cast and const_cast used to parse raw data",
"tags": [
"cast",
"unsafe"
],
"modes": [
"full"
],
"expected": [
{
"rule_id": "cpp.memory.reinterpret_cast",
"severity": null,
"must_match": true,
"line_range": [
8,
12
],
"evidence_contains": [],
"notes": "reinterpret_cast<Header*> \u2014 type punning raw bytes to struct pointer"
},
{
"rule_id": "cpp.memory.const_cast",
"severity": null,
"must_match": true,
"line_range": [
8,
12
],
"evidence_contains": [],
"notes": "const_cast<char*> removes const qualifier from data pointer"
}
]
}