mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-30 20:39:39 +02:00
feat(ssa): optimize branch condition handling via constant folding, enhance precision for taint analysis, and expand OWASP Benchmark support
This commit is contained in:
parent
ec76c9e08f
commit
9c99f6c6a9
22 changed files with 1020 additions and 17 deletions
|
|
@ -55,6 +55,19 @@ _CAP_BIT_TABLE = [
|
|||
(1 << 20, "prototype_pollution"),
|
||||
]
|
||||
|
||||
# Static lens (see --static): SHELL_ESCAPE (1<<2) is the command-injection sink
|
||||
# cap for *every* language (`grep SHELL_ESCAPE src/labels/` — all Sink uses are
|
||||
# command-exec; CODE_EXEC=1<<10 is the eval/code-exec variant, also cmdi). In a
|
||||
# normal `nyx scan` (no dynamic confirmation) a Java cmdi finding carries only
|
||||
# SHELL_ESCAPE; the SHELL_ESCAPE→CODE_EXEC remap that buckets it as cmdi is gated
|
||||
# on VerifyStatus::Confirmed (src/commands/scan.rs), so with 0 confirmations the
|
||||
# default table leaves these in "other" and the cmdi cell reads 0/0/N. The
|
||||
# static lens appends SHELL_ESCAPE→cmdi at the LOWEST priority (after every other
|
||||
# bit) so a SHELL_ESCAPE-only finding buckets as cmdi while a finding that also
|
||||
# carries a higher-priority sink bit (e.g. FILE_IO) keeps its existing bucket.
|
||||
# Opt-in via --static so the default confirmed-recall bucketing is byte-identical.
|
||||
_CAP_BIT_TABLE_STATIC = _CAP_BIT_TABLE + [(1 << 2, "cmdi")] # SHELL_ESCAPE
|
||||
|
||||
# Substring → cap lookup for rule IDs. Order matters: most specific first.
|
||||
_CAP_RULE_TABLE = [
|
||||
("path_traversal", "path_traversal"),
|
||||
|
|
@ -83,12 +96,13 @@ def load_json(path: str) -> object:
|
|||
return json.load(f)
|
||||
|
||||
|
||||
def cap_of(finding: dict) -> str:
|
||||
def cap_of(finding: dict, static_lens: bool = False) -> str:
|
||||
# 1. Prefer evidence.sink_caps bitmask — the engine's own classification.
|
||||
ev = finding.get("evidence", {}) or {}
|
||||
sink_caps = ev.get("sink_caps")
|
||||
if isinstance(sink_caps, int) and sink_caps:
|
||||
for bit, name in _CAP_BIT_TABLE:
|
||||
table = _CAP_BIT_TABLE_STATIC if static_lens else _CAP_BIT_TABLE
|
||||
for bit, name in table:
|
||||
if sink_caps & bit:
|
||||
return name
|
||||
# 2. Fall back to rule id substring (e.g. py.cmdi.os_system, java.deser.readobject).
|
||||
|
|
@ -383,6 +397,20 @@ def main() -> int:
|
|||
default="",
|
||||
help="path to a previous results JSON; fail on monotonic-improvement regression",
|
||||
)
|
||||
p.add_argument(
|
||||
"--static",
|
||||
action="store_true",
|
||||
help=(
|
||||
"static lens: bucket SHELL_ESCAPE (1<<2) findings as cmdi even when "
|
||||
"they are unconfirmed. Java (and other) command-exec sinks carry "
|
||||
"SHELL_ESCAPE and only get remapped to CODE_EXEC on dynamic Confirm; "
|
||||
"without this flag, an env with 0 confirmations reads the cmdi cell "
|
||||
"as 0/0/N regardless of static quality. SHELL_ESCAPE is the "
|
||||
"command-injection sink cap for every language, so this is sound "
|
||||
"globally; it is opt-in only so the default confirmed-recall "
|
||||
"bucketing stays byte-identical."
|
||||
),
|
||||
)
|
||||
args = p.parse_args()
|
||||
lang_filter = {l.strip() for l in args.lang.split(",") if l.strip()}
|
||||
|
||||
|
|
@ -418,7 +446,7 @@ def main() -> int:
|
|||
continue
|
||||
f_path = f.get("path", "")
|
||||
f_line = f.get("line", 0)
|
||||
f_cap = cap_of(f)
|
||||
f_cap = cap_of(f, static_lens=args.static)
|
||||
for idx, entry in enumerate(not_vuln):
|
||||
if idx in used:
|
||||
continue
|
||||
|
|
@ -455,7 +483,7 @@ def main() -> int:
|
|||
)
|
||||
|
||||
for f in findings:
|
||||
cap = cap_of(f)
|
||||
cap = cap_of(f, static_lens=args.static)
|
||||
lang = lang_of(f)
|
||||
key = (cap, lang)
|
||||
ev = f.get("evidence", {}) or {}
|
||||
|
|
@ -501,7 +529,7 @@ def main() -> int:
|
|||
for f in findings:
|
||||
f_path = f.get("path", "")
|
||||
f_line = f.get("line", 0)
|
||||
f_cap = cap_of(f)
|
||||
f_cap = cap_of(f, static_lens=args.static)
|
||||
cap = f_cap
|
||||
lang = lang_of(f)
|
||||
cell_key = (cap, lang)
|
||||
|
|
|
|||
|
|
@ -46,6 +46,8 @@ def write_json(path: Path, data: object) -> None:
|
|||
# Cap bit positions cribbed from tabulate.py / src/labels/mod.rs.
|
||||
SINK_BIT_SQL = 1 << 7 # SQL_QUERY
|
||||
SINK_BIT_CMDI = 1 << 10 # CODE_EXEC
|
||||
SINK_BIT_SHELL = 1 << 2 # SHELL_ESCAPE (Java/other command-exec sink)
|
||||
SINK_BIT_FILE = 1 << 5 # FILE_IO (path_traversal)
|
||||
|
||||
|
||||
def python_finding(cap_bit: int, path: str, line: int, status: str | None) -> dict:
|
||||
|
|
@ -353,6 +355,91 @@ def test_lang_filter_scopes_findings_and_gt(tmp: Path) -> None:
|
|||
assert all(lang != "javascript" for _cap, lang in cells), cells
|
||||
|
||||
|
||||
def test_static_lens_buckets_shell_escape_as_cmdi(tmp: Path) -> None:
|
||||
# Caveat-1 fix: in an env with 0 dynamic confirmations a Java command-exec
|
||||
# finding carries only SHELL_ESCAPE (1<<2), which the default bit table
|
||||
# leaves in "other" — so the cmdi cell reads 0 TP / N FN regardless of
|
||||
# static quality. --static appends SHELL_ESCAPE→cmdi so static recall is
|
||||
# measurable without dynamic confirmation.
|
||||
gt = tmp / "gt.json"
|
||||
write_json(
|
||||
gt,
|
||||
[{"path": "testcode/Cmd.java", "line": 0, "cap": "cmdi", "vuln": True}],
|
||||
)
|
||||
# Real Java taint findings carry id "taint-unsanitised-flow" (no cap
|
||||
# substring), so the rule-id fallback yields "other" — not the sqli/cmdi
|
||||
# the hand-crafted python_finding id would imply.
|
||||
java_cmdi = {
|
||||
"path": "/x/testcode/Cmd.java",
|
||||
"line": 10,
|
||||
"col": 0,
|
||||
"id": "taint-unsanitised-flow",
|
||||
"evidence": {"sink_caps": SINK_BIT_SHELL, "dynamic_verdict": {"status": "NotConfirmed"}},
|
||||
}
|
||||
scan = tmp / "scan.json"
|
||||
write_json(scan, {"findings": [java_cmdi]})
|
||||
|
||||
# Default lens: the finding buckets as "other", so cmdi shows the GT
|
||||
# positive as a pure FN (recall 0) — the measurement gap.
|
||||
default = tmp / "default.json"
|
||||
write_json(default, [])
|
||||
proc = run_tabulate(
|
||||
"--label", "owasp",
|
||||
"--scan", str(scan),
|
||||
"--ground-truth", str(gt),
|
||||
"--append", str(default),
|
||||
)
|
||||
assert proc.returncode == 0, proc.stdout + proc.stderr
|
||||
cells = {(c["cap"], c["lang"]): c for c in json.loads(default.read_text())[-1]["cells"]}
|
||||
assert ("cmdi", "java") in cells and cells[("cmdi", "java")]["tp"] == 0, cells
|
||||
assert cells[("cmdi", "java")]["fn"] == 1, cells[("cmdi", "java")]
|
||||
assert ("other", "java") in cells, f"SHELL_ESCAPE must bucket as other by default: {list(cells)}"
|
||||
|
||||
# Static lens: the finding buckets as cmdi → recall measurable (TP=1, FN=0).
|
||||
static = tmp / "static.json"
|
||||
write_json(static, [])
|
||||
proc = run_tabulate(
|
||||
"--label", "owasp",
|
||||
"--scan", str(scan),
|
||||
"--ground-truth", str(gt),
|
||||
"--static",
|
||||
"--append", str(static),
|
||||
)
|
||||
assert proc.returncode == 0, proc.stdout + proc.stderr
|
||||
cells = {(c["cap"], c["lang"]): c for c in json.loads(static.read_text())[-1]["cells"]}
|
||||
cmdi = cells[("cmdi", "java")]
|
||||
assert cmdi["tp"] == 1 and cmdi["fn"] == 0, cmdi
|
||||
assert ("other", "java") not in cells, f"static lens must reclaim the other-bucketed finding: {list(cells)}"
|
||||
|
||||
|
||||
def test_static_lens_preserves_higher_priority_bits(tmp: Path) -> None:
|
||||
# A finding carrying BOTH FILE_IO and SHELL_ESCAPE must keep bucketing as
|
||||
# path_traversal under the static lens (SHELL_ESCAPE is appended at lowest
|
||||
# priority), so the static lens never steals a finding from a non-cmdi cell.
|
||||
scan = tmp / "scan.json"
|
||||
write_json(
|
||||
scan,
|
||||
{
|
||||
"findings": [
|
||||
python_finding(SINK_BIT_FILE | SINK_BIT_SHELL, "B.java", 10, "NotConfirmed"),
|
||||
]
|
||||
},
|
||||
)
|
||||
for flag in ([], ["--static"]):
|
||||
append = tmp / f"out{len(flag)}.json"
|
||||
write_json(append, [])
|
||||
proc = run_tabulate(
|
||||
"--label", "x",
|
||||
"--scan", str(scan),
|
||||
"--inhouse",
|
||||
"--append", str(append),
|
||||
*flag,
|
||||
)
|
||||
assert proc.returncode == 0, proc.stdout + proc.stderr
|
||||
caps = {c["cap"] for c in json.loads(append.read_text())[-1]["cells"]}
|
||||
assert caps == {"path_traversal"}, f"flag={flag}: {caps}"
|
||||
|
||||
|
||||
def test_budget_malformed_exits_3(tmp: Path) -> None:
|
||||
bad = tmp / "bad.toml"
|
||||
bad.write_text("[default]\nunsupported_rate = not_a_number\n")
|
||||
|
|
@ -661,6 +748,8 @@ def main() -> int:
|
|||
test_manual_triage_stamps_wrong_confirmed,
|
||||
test_manual_triage_ignores_vuln_true_entries,
|
||||
test_lang_filter_scopes_findings_and_gt,
|
||||
test_static_lens_buckets_shell_escape_as_cmdi,
|
||||
test_static_lens_preserves_higher_priority_bits,
|
||||
test_budget_malformed_exits_3,
|
||||
test_relative_gt_path_suffix_matches_absolute_finding,
|
||||
test_unmatched_gt_positive_lands_in_lang_cell,
|
||||
|
|
|
|||
19
tests/fixtures/real_world/java/taint/cmdi_deadbranch_const_safe.expect.json
vendored
Normal file
19
tests/fixtures/real_world/java/taint/cmdi_deadbranch_const_safe.expect.json
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"description": "Dead-branch constant condition (OWASP Benchmark cmdi non-vulnerable shape). `(7*42) - num > 200` with num=86 is 208 > 200 — always true — so `bar` is the constant string and the `else bar = param` arm is statically dead. The constant-branch fold (src/ssa/const_prop.rs::fold_constant_branches) evaluates the captured CondArith tree, prunes the dead edge, and drops the tainted phi operand AND neutralises the dead block so copy-prop cannot alias `bar`<->`param`. Result: `r.exec(cmd + bar)` carries no taint. Asserts NO taint finding fires (strict_unexpected promotes any taint-unsanitised-flow to a hard failure).",
|
||||
"tags": [
|
||||
"taint",
|
||||
"cmdi",
|
||||
"servlet",
|
||||
"runtime",
|
||||
"dead-branch",
|
||||
"const-fold",
|
||||
"precision"
|
||||
],
|
||||
"modes": [
|
||||
"full"
|
||||
],
|
||||
"strict_unexpected": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected": []
|
||||
}
|
||||
27
tests/fixtures/real_world/java/taint/cmdi_deadbranch_const_safe.java
vendored
Normal file
27
tests/fixtures/real_world/java/taint/cmdi_deadbranch_const_safe.java
vendored
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import java.io.*;
|
||||
import javax.servlet.http.*;
|
||||
|
||||
// Dead-branch constant condition (OWASP Benchmark cmdi non-vulnerable shape).
|
||||
// The guard `(7*42) - num > 200` is `294 - 86 = 208 > 200`, i.e. ALWAYS true,
|
||||
// so `bar` is provably the constant string and the tainted `else` arm
|
||||
// (`bar = param`) is unreachable. The constant-branch fold
|
||||
// (`fold_constant_branches`) must prune the dead edge and drop the tainted
|
||||
// phi operand so `r.exec(cmd + bar)` carries no attacker data — NO finding.
|
||||
public class DeadBranchConstSafe extends HttpServlet {
|
||||
protected void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws IOException {
|
||||
String param = request.getHeader("vector");
|
||||
|
||||
String bar;
|
||||
int num = 86;
|
||||
if ((7 * 42) - num > 200) {
|
||||
bar = "This_should_always_happen";
|
||||
} else {
|
||||
bar = param;
|
||||
}
|
||||
|
||||
String cmd = "echo ";
|
||||
Runtime r = Runtime.getRuntime();
|
||||
Process p = r.exec(cmd + bar);
|
||||
}
|
||||
}
|
||||
32
tests/fixtures/real_world/java/taint/cmdi_deadbranch_param_vuln.expect.json
vendored
Normal file
32
tests/fixtures/real_world/java/taint/cmdi_deadbranch_param_vuln.expect.json
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"description": "Dead-branch constant condition with VULNERABLE polarity. `(500/42) + num > 200` is `11 + 196 = 207 > 200` (integer division) — always true — and the TRUE arm assigns the tainted `param`, so the reachable branch carries taint and only the `else bar = \"...\"` arm is dead. The constant-branch fold must prune the DEAD else edge while keeping the live `bar = param`, so the command-injection finding at `r.exec(cmd + bar)` MUST still fire. Zero-false-negative guard: it proves the fold never prunes the reachable (tainted) arm.",
|
||||
"tags": [
|
||||
"taint",
|
||||
"cmdi",
|
||||
"servlet",
|
||||
"runtime",
|
||||
"dead-branch",
|
||||
"const-fold",
|
||||
"no-false-negative"
|
||||
],
|
||||
"modes": [
|
||||
"full"
|
||||
],
|
||||
"strict_unexpected": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected": [
|
||||
{
|
||||
"rule_id": "taint-unsanitised-flow",
|
||||
"severity": "HIGH",
|
||||
"must_match": true,
|
||||
"line_range": [
|
||||
26,
|
||||
26
|
||||
],
|
||||
"evidence_contains": [],
|
||||
"notes": "request.getHeader (line 15) flows into bar on the always-taken true arm (line 21), then into r.exec at line 26. Exactly one finding survives.",
|
||||
"max_count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
28
tests/fixtures/real_world/java/taint/cmdi_deadbranch_param_vuln.java
vendored
Normal file
28
tests/fixtures/real_world/java/taint/cmdi_deadbranch_param_vuln.java
vendored
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
import java.io.*;
|
||||
import javax.servlet.http.*;
|
||||
|
||||
// Dead-branch constant condition, VULNERABLE polarity (OWASP Benchmark cmdi
|
||||
// vulnerable shape). The guard `(500/42) + num > 200` is `11 + 196 = 207 > 200`
|
||||
// using integer division — ALWAYS true — and the TRUE arm assigns the tainted
|
||||
// `param`. So the live branch carries taint and the `else bar = "never"` arm is
|
||||
// dead. The constant-branch fold must prune the DEAD (else) edge and keep the
|
||||
// reachable tainted `bar = param`, so `r.exec(cmd + bar)` MUST still fire. This
|
||||
// is the zero-false-negative guard: the fold must never prune the live arm.
|
||||
public class DeadBranchParamVuln extends HttpServlet {
|
||||
protected void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws IOException {
|
||||
String param = request.getHeader("vector");
|
||||
|
||||
String bar;
|
||||
int num = 196;
|
||||
if ((500 / 42) + num > 200) {
|
||||
bar = param;
|
||||
} else {
|
||||
bar = "This_should_never_happen";
|
||||
}
|
||||
|
||||
String cmd = "echo ";
|
||||
Runtime r = Runtime.getRuntime();
|
||||
Process p = r.exec(cmd + bar);
|
||||
}
|
||||
}
|
||||
29
tests/fixtures/real_world/java/taint/cmdi_processbuilder_command.expect.json
vendored
Normal file
29
tests/fixtures/real_world/java/taint/cmdi_processbuilder_command.expect.json
vendored
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"description": "HttpServletRequest parameter flows through a List into ProcessBuilder.command(argList) — command injection via the setter form (list attached separately from the constructor, then pb.start()). This is the dominant OWASP Benchmark cmdi shape; resolved via type-qualified ProcessBuilder.command sink on the typed receiver plus container-element taint on the argument list.",
|
||||
"tags": [
|
||||
"taint",
|
||||
"cmdi",
|
||||
"servlet",
|
||||
"container"
|
||||
],
|
||||
"modes": [
|
||||
"full"
|
||||
],
|
||||
"strict_unexpected": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected": [
|
||||
{
|
||||
"rule_id": "taint-unsanitised-flow",
|
||||
"severity": "HIGH",
|
||||
"must_match": true,
|
||||
"line_range": [
|
||||
16,
|
||||
16
|
||||
],
|
||||
"evidence_contains": [],
|
||||
"notes": "request.getParameter (line 8) is concatenated into a list element (argList.add at line 13), the list is attached to ProcessBuilder via pb.command(argList) at line 16, and executed by pb.start() at line 17. The type-qualified ProcessBuilder.command sink fires at line 16 on the tainted container argument. Exactly one finding survives.",
|
||||
"max_count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
19
tests/fixtures/real_world/java/taint/cmdi_processbuilder_command.java
vendored
Normal file
19
tests/fixtures/real_world/java/taint/cmdi_processbuilder_command.java
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
import java.io.*;
|
||||
import java.util.*;
|
||||
import javax.servlet.http.*;
|
||||
|
||||
public class ProcessCommandHandler extends HttpServlet {
|
||||
protected void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws IOException {
|
||||
String param = request.getParameter("vector");
|
||||
|
||||
List<String> argList = new ArrayList<String>();
|
||||
argList.add("sh");
|
||||
argList.add("-c");
|
||||
argList.add("echo " + param);
|
||||
|
||||
ProcessBuilder pb = new ProcessBuilder();
|
||||
pb.command(argList);
|
||||
pb.start();
|
||||
}
|
||||
}
|
||||
30
tests/fixtures/real_world/java/taint/cmdi_runtime_split_receiver.expect.json
vendored
Normal file
30
tests/fixtures/real_world/java/taint/cmdi_runtime_split_receiver.expect.json
vendored
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
{
|
||||
"description": "HttpServletRequest header flows into a String[] env array passed to a split-receiver Runtime.exec — command injection via the `Runtime r = Runtime.getRuntime(); ... r.exec(cmd, argsEnv)` shape (the dominant remaining OWASP Benchmark cmdi form). The callee text at the sink is `r.exec`, which does not suffix-match the flat `Runtime.exec` rule; resolution depends on the receiver `r` carrying TypeKind::Runtime (from the `Runtime.getRuntime()` factory / the `Runtime` declared type) so the type-qualified resolver rewrites `r.exec` → `Runtime.exec`. Taint is in the env array (arg 1), so no payload-arg restriction may be applied.",
|
||||
"tags": [
|
||||
"taint",
|
||||
"cmdi",
|
||||
"servlet",
|
||||
"runtime",
|
||||
"split-receiver"
|
||||
],
|
||||
"modes": [
|
||||
"full"
|
||||
],
|
||||
"strict_unexpected": [
|
||||
"taint-unsanitised-flow"
|
||||
],
|
||||
"expected": [
|
||||
{
|
||||
"rule_id": "taint-unsanitised-flow",
|
||||
"severity": "HIGH",
|
||||
"must_match": true,
|
||||
"line_range": [
|
||||
16,
|
||||
16
|
||||
],
|
||||
"evidence_contains": [],
|
||||
"notes": "request.getHeader (line 7) flows into the env array element argsEnv (line 15), which is passed as arg 1 of r.exec at line 16. The receiver r is typed Runtime via Runtime.getRuntime() (line 13), so the type-qualified Runtime.exec sink fires at the split-receiver call. Exactly one finding survives.",
|
||||
"max_count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
18
tests/fixtures/real_world/java/taint/cmdi_runtime_split_receiver.java
vendored
Normal file
18
tests/fixtures/real_world/java/taint/cmdi_runtime_split_receiver.java
vendored
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import java.io.*;
|
||||
import javax.servlet.http.*;
|
||||
|
||||
public class RuntimeSplitReceiverHandler extends HttpServlet {
|
||||
protected void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws IOException {
|
||||
String param = request.getHeader("vector");
|
||||
|
||||
// Split-receiver Runtime.exec: the receiver is bound to a local in
|
||||
// one statement, then exec is called on it in another. The OWASP
|
||||
// Benchmark cmdi shape places the tainted data in the environment
|
||||
// array (arg 1), not the command (arg 0).
|
||||
Runtime r = Runtime.getRuntime();
|
||||
String[] args = { "/bin/sh", "-c", "echo nyx" };
|
||||
String[] argsEnv = { "TAINT=" + param };
|
||||
r.exec(args, argsEnv);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue