mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
1695 lines
64 KiB
Rust
1695 lines
64 KiB
Rust
//! # Recall-gap integration harness (phase 01 baseline)
|
||
//!
|
||
//! Pitboss phase 01 stands up the skeleton; phases 02–11 grow it. The suite
|
||
//! is green on a fresh `master` because every gap-area test starts
|
||
//! `#[ignore]`d, so this file compiles and runs without depending on engine
|
||
//! work that has not landed yet.
|
||
//!
|
||
//! ## Where fixtures live
|
||
//!
|
||
//! Each gap area owns a subdirectory under
|
||
//! `tests/fixtures/realistic/<area>/`. The phase that un-ignores a test is
|
||
//! responsible for populating its fixture. Fixtures are copied into a fresh
|
||
//! tempdir per scan (see [`common::recall::scan_fixture`]) so SQLite,
|
||
//! `nyx.conf`, or stray index artefacts cannot leak between tests.
|
||
//!
|
||
//! ## `ExpectedFinding` shape
|
||
//!
|
||
//! Each test asserts findings with a tuple of
|
||
//! `(rule_id, file_suffix, sink_line, source_line)`:
|
||
//!
|
||
//! - `rule_id` — exact prefix match on `Diag.id`. Taint findings carry a
|
||
//! trailing ` (source N:M)` suffix that the matcher strips before
|
||
//! comparison.
|
||
//! - `file_suffix` — `Diag.path.ends_with(file_suffix)`, which lets callers
|
||
//! ignore the tempdir prefix supplied by the harness.
|
||
//! - `sink_line` — exact match on `Diag.line` (1-based).
|
||
//! - `source_line` — optional `N` parsed from the ` (source N:M)` suffix
|
||
//! on `Diag.id`. Use `None` when the originating line is unstable across
|
||
//! refactors of the fixture.
|
||
//!
|
||
//! ## Phase ownership
|
||
//!
|
||
//! Every phase un-ignores exactly the tests it owns. The mapping is stable:
|
||
//!
|
||
//! | Phase | Test fn |
|
||
//! |-------|-------------------------------|
|
||
//! | 02 | `async_await` |
|
||
//! | 03 | `promise_then_callback`, |
|
||
//! | | `promise_all_taint`, |
|
||
//! | | `for_await_of_stream`, |
|
||
//! | | `promise_then_chain_reentrant`|
|
||
//! | 05 | `fs_promises_*` |
|
||
//! | 06 | `jsx_dangerous_html` |
|
||
//! | 07 | `orm_builders` |
|
||
//! | TBD | `ssrf_url_builders`, |
|
||
//! | | `cross_package_ipa`, |
|
||
//! | | `nextjs_entrypoints` |
|
||
//!
|
||
//! Phase 04 ships the TS/JS module resolver foundation but un-ignores no
|
||
//! gap tests of its own — the resolver feeds `FuncKey.namespace` for later
|
||
//! phases. Phases beyond the table may add further `#[ignore]`d tests;
|
||
//! do not move tests between owners.
|
||
|
||
mod common;
|
||
|
||
use common::recall::{ExpectedFinding, assert_finding, assert_finding_with_cap, scan_fixture};
|
||
use nyx_scanner::labels::Cap;
|
||
use std::path::Path;
|
||
|
||
#[test]
|
||
fn async_await_js() {
|
||
let findings = scan_fixture("async_await");
|
||
// JS form — exercises the JavaScript `await_expression` KINDS-map entry.
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "handler.js",
|
||
sink_line: 6,
|
||
source_line: Some(4),
|
||
},
|
||
);
|
||
// TS form — same source/sink shape, exercises the TypeScript
|
||
// `await_expression` KINDS-map entry. Without this assertion the
|
||
// `.ts` fixture was scanned implicitly via `scan_fixture("async_await")`
|
||
// (smoke only), with no positive guarantee that the TS grammar's
|
||
// await-forwarding lowered taint identically. Source attributes to
|
||
// line 3 (the typed-extractor `req: { body: string }` parameter) —
|
||
// the typed-formal pipeline tags the parameter itself as the taint
|
||
// origin, which is the canonical handler-input shape rather than the
|
||
// intermediate `req.body` access on line 4.
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "handler.ts",
|
||
sink_line: 5,
|
||
source_line: Some(3),
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 12 recall-gap (Python). tree-sitter-python emits `await x` as a
|
||
/// named `await` node (no `_expression` suffix). Without the
|
||
/// `"await" => Kind::AwaitForward` entry in `src/labels/python.rs` and the
|
||
/// corresponding `Kind`-driven `is_await_forward` flag in `cfg::push_node`,
|
||
/// the engine never models the await boundary as a 1:1 forward and the
|
||
/// FastAPI-shape `await request.json()` source never reaches `cursor.execute`.
|
||
#[test]
|
||
fn async_await_py() {
|
||
let findings = scan_fixture("async_await/handler.py");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "handler.py",
|
||
sink_line: 8,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 12 recall-gap (Python combinator). `asyncio.gather(...)` is
|
||
/// registered as `PromiseCombinatorKind::All` for Python in
|
||
/// `is_promise_combinator`; argument taint unions onto the awaited result.
|
||
#[test]
|
||
fn async_await_py_gather() {
|
||
let findings = scan_fixture("async_await/gather.py");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "gather.py",
|
||
sink_line: 14,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 12 recall-gap (Rust). `x.await` is now mapped explicitly to
|
||
/// `Kind::AwaitForward` in `src/labels/rust.rs`; the `is_await_forward`
|
||
/// flag is set via `lookup(lang, ast.kind()) == Kind::AwaitForward`
|
||
/// rather than the raw-string `ast.kind() == "await_expression"` check.
|
||
/// The header-shape source flows across the await into the
|
||
/// `Command::new("sh").arg(&cmd)` shell-injection sink.
|
||
#[test]
|
||
fn async_await_rs() {
|
||
let findings = scan_fixture("async_await/handler.rs");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "handler.rs",
|
||
sink_line: 26,
|
||
source_line: Some(25),
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 12 recall-gap (Rust combinator). `tokio::join!(...)` is a
|
||
/// `macro_invocation` whose args live inside a `token_tree`.
|
||
/// `extract_arg_uses` walks the token_tree splitting on `,` so the SSA
|
||
/// Call carries one arg group per future, and
|
||
/// `is_promise_combinator("rust", "tokio::join")` routes it through the
|
||
/// existing combinator transfer. The unioned env-var taint flows into
|
||
/// `reqwest::get` (SSRF sink).
|
||
#[test]
|
||
fn async_await_rs_join() {
|
||
let findings = scan_fixture("async_await/tokio_join.rs");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "tokio_join.rs",
|
||
sink_line: 11,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 12 deferred-fix (Rust combinator, bare macro form).
|
||
/// `use tokio::join;` brings the macro into scope and the call site uses
|
||
/// `join!(...)`. `cfg::push_node` rewrites the bare macro callee text to
|
||
/// `tokio::join` when an import witness is present, so the existing
|
||
/// combinator transfer fires the same way as for the qualified form.
|
||
#[test]
|
||
fn async_await_rs_join_bare() {
|
||
let findings = scan_fixture("async_await/tokio_join_bare.rs");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "tokio_join_bare.rs",
|
||
sink_line: 13,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 03 recall-gap: `.then(cb)` propagates the receiver Promise's
|
||
/// resolved value into the callback's first parameter. The taint trace
|
||
/// attributes at the inner `db.query(data)` sink via the callback-pattern
|
||
/// emission paired with the chain-hop site promotion that lifts the
|
||
/// callback's own-body sink coordinates into the trace finding's primary
|
||
/// location.
|
||
#[test]
|
||
fn promise_then_callback() {
|
||
let findings = scan_fixture("promise_then_callback");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "promise_then_callback.ts",
|
||
sink_line: 9,
|
||
source_line: Some(7),
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 03 recall-gap: `Promise.all([...])` returns a value carrying the
|
||
/// union of element taints; `p.then(cb)` then exposes it to the sink at
|
||
/// `db.query(items)` via the callback-pattern emission with chain-hop
|
||
/// site promotion.
|
||
#[test]
|
||
fn promise_all_taint() {
|
||
let findings = scan_fixture("promise_all_taint");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "promise_all_taint.ts",
|
||
sink_line: 8,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Per-element precision for `const [a, b] = await Promise.all([safe,
|
||
/// tainted])`. The SSA lowering rewrite in src/ssa/lower.rs maps each
|
||
/// destructure binding to `Assign(arg_uses[0][i])`, so `a` binds only to
|
||
/// the literal `"ok"` and `b` binds only to the tainted `req.body`. The
|
||
/// scalar union from `try_apply_promise_combinator` is bypassed for the
|
||
/// per-binding values.
|
||
///
|
||
/// Skip-slot cases (`const [, b]`, `const [a, ,]`) also need pattern-position
|
||
/// indexing: `TaintMeta.array_pattern_indices` carries the source-order
|
||
/// position of each binding so the rewrite picks `pd_args[index]` rather
|
||
/// than `pd_args[binding_offset]`.
|
||
#[test]
|
||
fn promise_all_destruct_per_index() {
|
||
let findings = scan_fixture("promise_all_destruct");
|
||
|
||
// Positive: line 17 sink reachable from req.body via index-1 binding.
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "promise_all_destruct_fp.ts",
|
||
sink_line: 17,
|
||
source_line: None,
|
||
},
|
||
);
|
||
|
||
// Negative: line 16 binds `a` to the literal "ok"; pre-fix the scalar
|
||
// union painted `a` with req.body's taint and produced a FP here.
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with("promise_all_destruct_fp.ts")
|
||
&& f.line == 16
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"destructure index-0 binding `a` must not carry req.body taint; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("promise_all_destruct_fp.ts"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
|
||
// Skip-slot positives: only the index-aligned tainted bindings should fire.
|
||
for sink_line in [24usize, 36] {
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "promise_all_skip_slots.ts",
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
|
||
// Skip-slot negatives: lines 28 (`c` from `[, c]` of `[tainted, safe]`)
|
||
// and 32 (`d` from `[d, ,]` of `[safe, tainted, "extra"]`) must NOT fire.
|
||
for forbidden_line in [28usize, 32] {
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with("promise_all_skip_slots.ts")
|
||
&& f.line == forbidden_line
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"skip-slot binding at line {forbidden_line} must not carry req.body taint; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("promise_all_skip_slots.ts"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
// Python `asyncio.gather` destructure: `pattern_list` + `tuple_pattern`
|
||
// share the same per-index rewrite as JS/TS arrays. Positives at lines
|
||
// 32 / 40 / 50 (tainted-aligned bindings) must fire; negatives at lines
|
||
// 33 / 41 / 51 (safe-aligned bindings) must NOT fire.
|
||
for sink_line in [32usize, 40, 50] {
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "asyncio_gather_destruct_fp.py",
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
for forbidden_line in [33usize, 41, 51] {
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with("asyncio_gather_destruct_fp.py")
|
||
&& f.line == forbidden_line
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"Python asyncio.gather binding at line {forbidden_line} must not carry request.args taint; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("asyncio_gather_destruct_fp.py"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
// Bare-array RHS destructure (`const [a, b] = [safe, tainted]`)
|
||
// mirror of the Promise.all destructure precision, gated on
|
||
// `info.call.callee.is_none()` so the combinator path is not
|
||
// affected. Each binding emits its own SSA op keyed on the
|
||
// source-order RHS slot.
|
||
for sink_line in [28usize, 36] {
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "bare_array_literal_destruct_fp.ts",
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
for forbidden_line in [27usize, 37, 44] {
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with("bare_array_literal_destruct_fp.ts")
|
||
&& f.line == forbidden_line
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"JS/TS bare-array binding at line {forbidden_line} must not carry req.body taint; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("bare_array_literal_destruct_fp.ts"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
// Ruby parallel assignment `a, b = [array_literal]` now gets per-index
|
||
// precision via the bare-array RHS rewrite at `src/ssa/lower.rs`.
|
||
// Each binding emits its own SSA op keyed on its source-order RHS
|
||
// slot — ident slots Assign the slot's value, literal slots emit
|
||
// Const(None). Positives at handler lines 25 / 32 / 37 (tainted-
|
||
// aligned bindings) must fire; negatives at 26 / 31 / 38 / 39
|
||
// (literal-aligned bindings) must NOT fire.
|
||
for sink_line in [23usize, 30, 35] {
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "ruby_parallel_assignment_fp.rb",
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
for forbidden_line in [24usize, 29, 36, 37] {
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with("ruby_parallel_assignment_fp.rb")
|
||
&& f.line == forbidden_line
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"Ruby parallel assignment binding at line {forbidden_line} must not carry name taint; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("ruby_parallel_assignment_fp.rb"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
// Complex-slot bare-array RHS destructure (`const [a, b] =
|
||
// [normalize(req.body.cmd), 'static']`). The helper now classifies
|
||
// call / binary / subscript / member access / template-string slots
|
||
// as `Complex(inner_uses)` rather than bailing. Each Complex slot
|
||
// emits a slot-scoped `Assign` (or `Source` when the outer node
|
||
// carries a Source label), so the literal-aligned binding is
|
||
// correctly clean. Positives at lines 32 / 39 / 46 / 54 / 62 fire;
|
||
// negatives at lines 33 / 40 / 47 / 55 / 63 must NOT fire.
|
||
for sink_line in [32usize, 39, 46, 54, 62] {
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "complex_slot_destruct_fp.ts",
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
for forbidden_line in [33usize, 40, 47, 55, 63] {
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with("complex_slot_destruct_fp.ts")
|
||
&& f.line == forbidden_line
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"complex-slot literal binding at line {forbidden_line} must not carry req.body taint; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("complex_slot_destruct_fp.ts"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
// Per-slot Source classification: when two Complex slots sit next to
|
||
// each other and ONLY one slot's subtree contains a Source-classified
|
||
// member-expression, the safe Complex sibling stays slot-scoped instead
|
||
// of inheriting the outer-node Source. Pre-session 0047 the legacy
|
||
// outer-node fallback painted both slots, producing a FP on the safe
|
||
// sibling's binding.
|
||
for sink_line in [27usize, 34, 41] {
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "complex_complex_per_slot_fp.ts",
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
for forbidden_line in [28usize, 35, 42] {
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with("complex_complex_per_slot_fp.ts")
|
||
&& f.line == forbidden_line
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"safe Complex sibling at line {forbidden_line} must not inherit per-slot Source; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("complex_complex_per_slot_fp.ts"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
// Slot-scoped transitive taint: when the outer destructure node
|
||
// carries a Source label AND another Complex slot's subtree classifies
|
||
// as Source, the safe Complex sibling whose own subtree contains an
|
||
// identifier bound to a tainted local (e.g.
|
||
// `helper(tainted_local)` where `tainted_local = req.body.cmd`)
|
||
// must still propagate the inner ident's taint through the slot-scoped
|
||
// `Assign`. Pre-session 0048 the kill arm emitted `Const(None)` which
|
||
// dropped the transitive taint.
|
||
for sink_line in [29usize, 30, 36] {
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "complex_transitive_taint_fp.ts",
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
{
|
||
let forbidden_line = 37usize;
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with("complex_transitive_taint_fp.ts")
|
||
&& f.line == forbidden_line
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"safe Complex sibling at line {forbidden_line} must not inherit outer Source; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("complex_transitive_taint_fp.ts"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
}
|
||
|
||
/// Phase 03 recall-gap: `for await (const x of iter)` taints `x` from the
|
||
/// iterator (Web Streams / async-iterable request body).
|
||
#[test]
|
||
fn for_await_of_stream() {
|
||
let findings = scan_fixture("for_await_of_stream");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "for_await_of_stream.ts",
|
||
sink_line: 5,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 03 re-entrancy guard: a 2-deep `.then` chain whose inner callback
|
||
/// awaits another promise. Confirms the inline cache does not deadlock and
|
||
/// k=1 depth is still enforced. Outer-level taint must still reach the sink
|
||
/// even when the inner level cannot recurse.
|
||
#[test]
|
||
fn promise_then_chain_reentrant() {
|
||
let findings = scan_fixture("promise_then_chain");
|
||
// The chain deliberately has two `.then` levels. At k=1 the inner
|
||
// `.then(inner)` cannot recurse, so the engine treats the inner
|
||
// callback's body as opaque and propagates conservatively. We only
|
||
// assert the run does not panic and produces *some* finding for this
|
||
// file (taint reaches the inner sink via the outer flow).
|
||
let any = findings
|
||
.iter()
|
||
.any(|f| f.path.ends_with("promise_then_chain.ts"));
|
||
assert!(
|
||
any,
|
||
"expected at least one finding from promise_then_chain.ts, got:\n{}",
|
||
findings
|
||
.iter()
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
/// Phase 05 recall-gap: `import { readFile } from 'fs/promises'` →
|
||
/// `await readFile(req.body.path)` is a FILE_IO sink. The bare-name
|
||
/// `readFile` matcher only fires because the file's import table maps
|
||
/// the binding to `fs/promises`, satisfying the
|
||
/// `LabelGate::ImportedFromModule` gate.
|
||
#[test]
|
||
fn fs_promises_readfile() {
|
||
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_readfile.ts");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "path_traversal_fs_promises_readfile.ts",
|
||
sink_line: 10,
|
||
source_line: Some(9),
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 05 recall-gap: `await open(req.query.path, "r")` ─ same gate,
|
||
/// different fs/promises method. Confirms the matcher list covers
|
||
/// `open` alongside `readFile`.
|
||
#[test]
|
||
fn fs_promises_open() {
|
||
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_open.ts");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "path_traversal_fs_promises_open.ts",
|
||
sink_line: 10,
|
||
source_line: Some(9),
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 05 recall-gap: the `node:` URL specifier flavour — `import {
|
||
/// writeFile } from 'node:fs/promises'`. Both spellings must satisfy
|
||
/// the gate.
|
||
#[test]
|
||
fn fs_promises_node_import() {
|
||
let findings = scan_fixture("fs_promises/path_traversal_node_fs_promises_import.ts");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "path_traversal_node_fs_promises_import.ts",
|
||
sink_line: 10,
|
||
source_line: Some(9),
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 05 recall-gap: namespace-import shape — `import * as fsp from
|
||
/// 'fs/promises'`. `fsp.readFile(...)` must satisfy the gate via the
|
||
/// receiver-name path of the local-import view.
|
||
#[test]
|
||
fn fs_promises_namespace_import() {
|
||
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_namespace.ts");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "path_traversal_fs_promises_namespace.ts",
|
||
sink_line: 11,
|
||
source_line: Some(10),
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 05 recall-gap: CommonJS require shape — `const { readFile } =
|
||
/// require('fs/promises')`. `extract_local_import_view` records the
|
||
/// destructured binding so the bare-name call still satisfies the gate.
|
||
#[test]
|
||
fn fs_promises_require_form() {
|
||
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_require.ts");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "path_traversal_fs_promises_require.ts",
|
||
sink_line: 10,
|
||
source_line: Some(9),
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 05 recall-gap: namespace-of-namespace alias —
|
||
/// `import * as fs from 'fs'; const fsp = fs.promises;`. The
|
||
/// promises-alias extension on `extract_local_import_view` adds
|
||
/// `fsp -> fs/promises` so `fsp.readFile(path)` satisfies the gate
|
||
/// without an explicit `import ... from 'fs/promises'` line.
|
||
#[test]
|
||
fn fs_promises_alias_form() {
|
||
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_alias.ts");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "path_traversal_fs_promises_alias.ts",
|
||
sink_line: 14,
|
||
source_line: Some(13),
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 05 recall-gap: CommonJS form of the alias shape —
|
||
/// `const fsp = require('fs').promises;`. Same gate as the ESM-import
|
||
/// alias above; promises-alias recognises the `.promises` projection on
|
||
/// the bare `require('fs')` call.
|
||
#[test]
|
||
fn fs_promises_alias_require_form() {
|
||
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_alias_require.ts");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "path_traversal_fs_promises_alias_require.ts",
|
||
sink_line: 12,
|
||
source_line: Some(11),
|
||
},
|
||
);
|
||
}
|
||
|
||
/// Phase 05 negative: a user-defined `readFile` (no import) must not
|
||
/// fire the gated FILE_IO sink. The whole point of the import gate.
|
||
#[test]
|
||
fn fs_promises_safe_userfn() {
|
||
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_safe_userfn.ts");
|
||
let leak = findings.iter().any(|f| {
|
||
f.path
|
||
.ends_with("path_traversal_fs_promises_safe_userfn.ts")
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"user-defined readFile should not fire the fs/promises gate; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f
|
||
.path
|
||
.ends_with("path_traversal_fs_promises_safe_userfn.ts"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
/// Phase 06 recall-gap: React JSX `<div dangerouslySetInnerHTML={{__html:
|
||
/// x}} />`. The CFG builder synthesises a sink call from the JSX
|
||
/// attribute, so the auto-seeded `input` formal flows into HTML_ESCAPE at
|
||
/// the `__html: input` value-span line.
|
||
#[test]
|
||
fn jsx_dangerous_html() {
|
||
let findings = scan_fixture("jsx_dangerous_html");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "page.tsx",
|
||
sink_line: 8,
|
||
source_line: None,
|
||
},
|
||
);
|
||
// Negative — `__html` is a string literal, no taint flows.
|
||
let leak_literal = findings.iter().any(|f| {
|
||
f.path.ends_with("page_safe_literal.tsx")
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
});
|
||
assert!(
|
||
!leak_literal,
|
||
"literal __html must not fire dangerouslySetInnerHTML; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("page_safe_literal.tsx"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
// Negative — `__html: DOMPurify.sanitize(input)` is sanitized.
|
||
let leak_indirect = findings.iter().any(|f| {
|
||
f.path.ends_with("page_indirect.tsx")
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
});
|
||
assert!(
|
||
!leak_indirect,
|
||
"DOMPurify.sanitize-routed payload must not fire dangerouslySetInnerHTML; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("page_indirect.tsx"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
// Negative — `__html: pipe(input, sanitizeHtml, DOMPurify.sanitize)` —
|
||
// the fp-ts composition recogniser detects sanitizers in argument
|
||
// position and suppresses the synthetic sink's argument-side flow.
|
||
let leak_pipe = findings.iter().any(|f| {
|
||
f.path.ends_with("page_pipe.tsx")
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
});
|
||
assert!(
|
||
!leak_pipe,
|
||
"pipe(...sanitizers) payload must not fire dangerouslySetInnerHTML; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("page_pipe.tsx"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
// Positive (item 11) — JSX inside a ternary RHS branch. The synthesis
|
||
// hook in `lower_ternary_branch` reaches the `__html: input` value span
|
||
// even though the wrapping arm short-circuits into the ternary diamond.
|
||
let hits_ternary: Vec<&_> = findings
|
||
.iter()
|
||
.filter(|f| {
|
||
f.path.ends_with("page_ternary.tsx")
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
})
|
||
.collect();
|
||
assert!(
|
||
!hits_ternary.is_empty(),
|
||
"ternary-branch dangerouslySetInnerHTML must fire a sink; got nothing for page_ternary.tsx"
|
||
);
|
||
}
|
||
|
||
/// Phase 07 recall-gap: ORM query-builder raw-SQL escape hatches.
|
||
///
|
||
/// Coverage:
|
||
/// - Drizzle `sql.raw(x)` and tagged-template `sql\`...\`` shapes
|
||
/// (leading-id `ImportedFromModule(&["drizzle-orm"])` gate)
|
||
/// - Sequelize `sequelize.literal(x)` via receiver-type
|
||
/// qualification (`TypeKind::Sequelize` → `Sequelize.literal`)
|
||
/// - TypeORM `repo.query(...)` via receiver-type qualification
|
||
/// (`TypeKind::TypeOrmRepo` → `TypeOrmRepo.query`)
|
||
/// - Knex `db.whereRaw(...)` via the new file-level
|
||
/// `FileImportsModule(&["knex"])` gate
|
||
///
|
||
/// Negatives:
|
||
/// - parameterised TypeORM `repo.query("...", [const])` stays silent
|
||
/// - bare `whereRaw` / `literal` calls in a file without ORM imports
|
||
#[test]
|
||
fn orm_builders() {
|
||
let findings = scan_fixture("orm_builders");
|
||
|
||
// (file, sink_line) — sink_line points at the actual SQL builder call.
|
||
// `sqli_typeorm_query.ts` previously asserted line 17 (`res.json(rows)`)
|
||
// and was satisfied by a coincidental XSS finding; the real
|
||
// `repo.query(...)` sink lives on line 16, and the cap-aware assertion
|
||
// below pins the SQL_QUERY capability so an XSS regression cannot mask
|
||
// a missing receiver-type-qualified ORM rule.
|
||
let positives = [
|
||
("sqli_drizzle_sql_raw.ts", 13usize),
|
||
("sqli_drizzle_tagged_template.ts", 14usize),
|
||
("sqli_sequelize_literal.ts", 14usize),
|
||
("sqli_typeorm_query.ts", 16usize),
|
||
("sqli_knex_where_raw.ts", 15usize),
|
||
("sqli_mikroorm_execute.ts", 13usize),
|
||
];
|
||
for (file, sink_line) in positives {
|
||
assert_finding_with_cap(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: file,
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
Cap::SQL_QUERY.bits(),
|
||
);
|
||
}
|
||
|
||
let negatives = [
|
||
"sqli_typeorm_safe_parameterized.ts",
|
||
"sqli_no_orm_import_safe.ts",
|
||
"sqli_knex_type_only_safe.ts",
|
||
];
|
||
for file in negatives {
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
&& f.evidence
|
||
.as_ref()
|
||
.map(|e| (e.sink_caps & Cap::SQL_QUERY.bits()) != 0)
|
||
.unwrap_or(false)
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"ORM negative fixture {file} must not fire SQL_QUERY; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
}
|
||
|
||
/// Phase 08 recall-gap: SSRF URL-builder shapes.
|
||
///
|
||
/// Coverage:
|
||
/// - `new URL(taintedPath)` propagates the path arg's taint into the
|
||
/// constructed URL value (no label rule, no summary — covered by the
|
||
/// URL-constructor pass added in Phase 08).
|
||
/// - `u.searchParams.set(k, taintedV)` / `.append(...)` taints the
|
||
/// receiver URL via the searchParams alias rule.
|
||
/// - `fetch({ url: taintedUrl, ... })` flows through the destination-
|
||
/// aware filter on the SSRF gate.
|
||
/// - `fetch(target)` where `target: URL` carries SSA-level
|
||
/// TypeKind::Url and the constructor-propagated taint.
|
||
///
|
||
/// Negative:
|
||
/// - `new URL(req.body.path, "https://api.cal.com")` — the literal
|
||
/// base anchors an origin-locked StringFact prefix that
|
||
/// `is_string_safe_for_ssrf` honours, so the SSRF stays silent.
|
||
#[test]
|
||
fn ssrf_url_builders() {
|
||
let findings = scan_fixture("ssrf_url_builders");
|
||
|
||
let positives = [
|
||
("ssrf_new_url.ts", 12usize),
|
||
("ssrf_searchparams_set.ts", 13usize),
|
||
("ssrf_searchparams_append.ts", 12usize),
|
||
("ssrf_fetch_object_form.ts", 11usize),
|
||
("ssrf_fetch_url_typed_arg.ts", 13usize),
|
||
("ssrf_fetch_object_shorthand.ts", 13usize),
|
||
("ssrf_fetch_object_shorthand.ts", 19usize),
|
||
];
|
||
for (file, sink_line) in positives {
|
||
assert_finding_with_cap(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: file,
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
Cap::SSRF.bits(),
|
||
);
|
||
}
|
||
|
||
// Negative: origin-locked `new URL(path, "https://api.cal.com")` must
|
||
// not fire SSRF — the abstract-string prefix-lock suppresses it.
|
||
let negative = "ssrf_url_origin_locked.ts";
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with(negative)
|
||
&& f.evidence
|
||
.as_ref()
|
||
.map(|e| (e.sink_caps & Cap::SSRF.bits()) != 0)
|
||
.unwrap_or(false)
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"origin-locked URL must not fire SSRF; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(negative))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
/// Phase 14 recall-gap: cross-language SSRF + URL-builder coverage.
|
||
///
|
||
/// Mirrors `ssrf_url_builders` (JS/TS) for Python, Java, Rust, Go, Ruby,
|
||
/// PHP. Each language carries:
|
||
///
|
||
/// * positive — a tainted source flowing into the language's
|
||
/// canonical HTTP client sink, asserting `Cap::SSRF` fires.
|
||
/// * origin-locked negative — a `(literal_base, tainted_path)` URL
|
||
/// builder shape; the abstract-string prefix lock honoured by
|
||
/// `is_string_safe_for_ssrf` suppresses the SSRF sink.
|
||
/// * search-params positive — a tainted URL passed positionally to
|
||
/// a Phase 14-added sink (`OkHttpClient.newCall`,
|
||
/// `\GuzzleHttp\Client::request`, etc.) so the new label rules
|
||
/// see real exercise alongside the existing flat sinks.
|
||
#[test]
|
||
fn ssrf_cross_language() {
|
||
let findings = scan_fixture("ssrf");
|
||
|
||
let positives = [
|
||
// Python — tainted full URL flowing into requests.get / request.
|
||
"ssrf_py_positive.py",
|
||
"ssrf_py_search_params.py",
|
||
// Java — HttpClient.send + OkHttpClient.newCall (Phase 14 sink).
|
||
"SsrfJavaPositive.java",
|
||
"SsrfJavaSearchParams.java",
|
||
// Rust — reqwest::get + Client::new.get (chained verb-on-instance).
|
||
"ssrf_rs_positive.rs",
|
||
"ssrf_rs_search_params.rs",
|
||
// Go — http.Get + http.NewRequest.
|
||
"ssrf_go_positive.go",
|
||
"ssrf_go_search_params.go",
|
||
// Ruby — Net::HTTP.get + Faraday.get (Phase 14 sink).
|
||
"ssrf_rb_positive.rb",
|
||
"ssrf_rb_search_params.rb",
|
||
// Ruby Faraday.new(url: tainted) construction-time SSRF and
|
||
// Net::HTTP.start(host, port, proxy_addr: tainted) proxy-tainted
|
||
// Destination gates added in the Phase 14 follow-up.
|
||
"ssrf_rb_faraday_new.rb",
|
||
"ssrf_rb_net_http_proxy.rb",
|
||
// PHP — curl_exec via curl_setopt CURLOPT_URL gate (Phase 14)
|
||
// + Guzzle Client::request (Phase 14 sink).
|
||
"ssrf_php_positive.php",
|
||
"ssrf_php_search_params.php",
|
||
];
|
||
for file in positives {
|
||
let hit = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& f.evidence
|
||
.as_ref()
|
||
.map(|e| (e.sink_caps & Cap::SSRF.bits()) != 0)
|
||
.unwrap_or(false)
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
});
|
||
assert!(
|
||
hit,
|
||
"SSRF expected to fire on {file}; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
let negatives = [
|
||
"ssrf_py_origin_locked.py",
|
||
"SsrfJavaOriginLocked.java",
|
||
"ssrf_rs_origin_locked.rs",
|
||
"ssrf_rs_origin_locked_const_fmt.rs",
|
||
"ssrf_go_origin_locked.go",
|
||
"ssrf_rb_origin_locked.rb",
|
||
"ssrf_rb_origin_locked_interp.rb",
|
||
"ssrf_php_origin_locked.php",
|
||
];
|
||
for file in negatives {
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& f.evidence
|
||
.as_ref()
|
||
.map(|e| (e.sink_caps & Cap::SSRF.bits()) != 0)
|
||
.unwrap_or(false)
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"origin-locked SSRF must stay silent on {file}; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
}
|
||
|
||
/// Phase 15 recall-gap: cross-language ORM and raw-SQL coverage.
|
||
///
|
||
/// Mirrors `orm_builders` (JS/TS) for Python, Java, Ruby, Go, PHP.
|
||
/// Each language carries:
|
||
///
|
||
/// * positive raw-string concat — tainted user input concatenated
|
||
/// into the SQL string flowing into the language's canonical
|
||
/// SQL_QUERY sink.
|
||
/// * positive interpolation — same shape but using language-native
|
||
/// interpolation (Python f-string inside `text(...)`, Java
|
||
/// `String.format`, Ruby `"#{...}"`, Go `fmt.Sprintf`, PHP
|
||
/// `"$var"`).
|
||
/// * negative parameterised — the parameterised API form with
|
||
/// literal SQL template + constant bind args, mirroring phase
|
||
/// 07's safe-parameterised approach.
|
||
#[test]
|
||
fn orm_xlang() {
|
||
let findings = scan_fixture("sqli_xlang");
|
||
|
||
let positives = [
|
||
// (file, sink_line)
|
||
("sqli_py_psycopg2_concat.py", 16usize),
|
||
("sqli_py_sqlalchemy_text_fstring.py", 18usize),
|
||
("SqliJavaConcat.java", 18usize),
|
||
("SqliJavaHibernateNative.java", 14usize),
|
||
("SqliJavaHibernateNamedSession.java", 19usize),
|
||
("SqliJavaHibernateChainedSession.java", 23usize),
|
||
("sqli_rb_concat.rb", 8usize),
|
||
("sqli_rb_where_interp.rb", 9usize),
|
||
("sqli_go_concat.go", 14usize),
|
||
("sqli_go_gorm_raw.go", 20usize),
|
||
("sqli_go_gorm_raw_named.go", 28usize),
|
||
("sqli_py_django_qs_bound.py", 14usize),
|
||
("sqli_py_django_qs_bare.py", 16usize),
|
||
("sqli_php_pdo_concat.php", 9usize),
|
||
("sqli_php_doctrine_interp.php", 10usize),
|
||
];
|
||
for (file, sink_line) in positives {
|
||
assert_finding_with_cap(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: file,
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
Cap::SQL_QUERY.bits(),
|
||
);
|
||
}
|
||
|
||
let negatives = [
|
||
"sqli_py_param_safe.py",
|
||
// Phase 15 deferred-fix: tainted bind args at arg 1 of
|
||
// `cursor.execute("SELECT ... WHERE x = %s", (tainted,))` must
|
||
// stay silent on SQL_QUERY because `payload_args = &[0]` on the
|
||
// Destination gate restricts the sink scan to arg 0.
|
||
"sqli_py_param_tainted_binds.py",
|
||
"SqliJavaParamSafe.java",
|
||
// Phase 15 deferred-fix (Java): tainted `setParameter` bind
|
||
// value on a constant `entityManager.createQuery(...)` template
|
||
// must stay silent on SQL_QUERY. Mirrors the Python tainted-
|
||
// binds shape; the Java Destination gate on the createQuery
|
||
// family carries `payload_args = &[0]`.
|
||
"SqliJavaParamTaintedBinds.java",
|
||
"sqli_rb_param_safe.rb",
|
||
"sqli_go_param_safe.go",
|
||
// Phase 15 deferred-fix (Go): tainted bind value at arg 2 of
|
||
// `db.QueryContext(ctx, sql, tainted)` must stay silent. The
|
||
// Destination gate on `db.QueryContext` carries
|
||
// `payload_args = &[1]`, restricting the sink scan to the SQL
|
||
// string at arg 1.
|
||
"sqli_go_param_tainted_binds.go",
|
||
"sqli_php_param_safe.php",
|
||
];
|
||
for file in negatives {
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& f.evidence
|
||
.as_ref()
|
||
.map(|e| (e.sink_caps & Cap::SQL_QUERY.bits()) != 0)
|
||
.unwrap_or(false)
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"parameterised SQLi negative {file} must stay silent on SQL_QUERY; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
}
|
||
|
||
/// Phase 09 recall-gap: cross-package IPA via FuncKey namespace
|
||
/// resolution. `unsafeHandler` calls `escapeHtmlNoop` (a passthrough
|
||
/// imported from `@scope/util/sanitize`); the engine sees the imported
|
||
/// callee's SSA summary via step 0.7 of `resolve_callee_full` and
|
||
/// therefore propagates `req.query.x` taint into `res.send` on line 7.
|
||
/// `safeHandler` calls `stripTags` (a real `replace`-based sanitizer
|
||
/// imported from `@scope/util/strip`) and must stay silent.
|
||
#[test]
|
||
fn cross_package_ipa() {
|
||
let findings = scan_fixture("cross_package_ipa");
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: "handler.ts",
|
||
sink_line: 7,
|
||
source_line: Some(5),
|
||
},
|
||
);
|
||
let safe_hit = findings.iter().any(|f| {
|
||
f.id.starts_with("taint-unsanitised-flow") && f.path.ends_with("handler.ts") && f.line == 13
|
||
});
|
||
assert!(
|
||
!safe_hit,
|
||
"cross-package sanitizer fixture must stay silent at handler.ts:13; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with("handler.ts"))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
/// Phase 10 recall-gap: Next.js entry-point detection. Coverage:
|
||
/// - App Router POST handler at `app/api/users/route.ts`: the first
|
||
/// formal is typed as `TypeKind::Request`, so `await req.json()`
|
||
/// surfaces as a SQL_QUERY sink at the `db.query(body)` call.
|
||
/// - File-level `'use server'` directive
|
||
/// (`nextjs_server_action.ts`, `nextjs_use_server_directive.ts`):
|
||
/// every exported function's formals are seeded as Source taint
|
||
/// at SSA entry.
|
||
/// - Function-level `'use server'`
|
||
/// (`nextjs_use_server_function_level.ts`): only the directive-
|
||
/// bearing function is treated as a server action.
|
||
/// - `<form action={fn}>` JSX binding (`nextjs_form_action.tsx`):
|
||
/// the named callee is tagged `EntryKind::FormAction` and its
|
||
/// first formal is seeded as adversary input.
|
||
/// - `next/headers` `cookies()` import-gated source: the gated rule
|
||
/// fires only when `cookies` is bound from `next/headers`.
|
||
#[test]
|
||
fn nextjs_entrypoints() {
|
||
let findings = scan_fixture("nextjs_entrypoints");
|
||
|
||
// Each fixture asserts the SQL sink fires.
|
||
let positives = [
|
||
("route.ts", 11usize),
|
||
("nextjs_server_action.ts", 11usize),
|
||
("nextjs_use_server_directive.ts", 9usize),
|
||
("nextjs_use_server_function_level.ts", 8usize),
|
||
("nextjs_form_action.tsx", 10usize),
|
||
("nextjs_cookies_source.ts", 12usize),
|
||
];
|
||
for (file, sink_line) in positives {
|
||
assert_finding(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: file,
|
||
sink_line,
|
||
source_line: None,
|
||
},
|
||
);
|
||
}
|
||
}
|
||
|
||
/// Phase 13 recall-gap (cross-language path traversal). Five
|
||
/// languages, one positive + one sanitized fixture each, exercising the
|
||
/// new `Path.read_text` (Python), `Files.readAllBytes` (Java),
|
||
/// `tokio::fs::read` (Rust), `os.ReadFile` (Go), and `File.write`
|
||
/// (Ruby) FILE_IO sinks added in Phase 13. Sanitized fixtures
|
||
/// canonicalise the path through the language-native sanitiser
|
||
/// (`Path.resolve` / `Path.normalize` / `PathBuf::canonicalize` /
|
||
/// `filepath.Clean` / `Pathname#cleanpath`) and demonstrate the safe
|
||
/// pattern by structuring the call chain so no FILE_IO sink reaches the
|
||
/// canonical value, keeping the fixture silent.
|
||
#[test]
|
||
fn path_traversal_xlang() {
|
||
let positives = [
|
||
// (file, sink_line, source_line)
|
||
("path_traversal.py", 12usize, Some(11usize)),
|
||
("PathTraversal.java", 16, Some(15)),
|
||
("path_traversal.rs", 22, Some(21)),
|
||
("path_traversal.go", 14, Some(13)),
|
||
("path_traversal.rb", 7, Some(6)),
|
||
];
|
||
for (file, sink_line, source_line) in positives {
|
||
let findings = scan_fixture(&format!("path_traversal/{file}"));
|
||
assert_finding_with_cap(
|
||
&findings,
|
||
ExpectedFinding {
|
||
rule_id: "taint-unsanitised-flow",
|
||
file_suffix: file,
|
||
sink_line,
|
||
source_line,
|
||
},
|
||
Cap::FILE_IO.bits(),
|
||
);
|
||
}
|
||
|
||
let negatives = [
|
||
"path_traversal_safe.py",
|
||
"PathTraversalSafe.java",
|
||
"path_traversal_safe.rs",
|
||
"path_traversal_safe.go",
|
||
"path_traversal_safe.rb",
|
||
];
|
||
for file in negatives {
|
||
let findings = scan_fixture(&format!("path_traversal/{file}"));
|
||
let leak = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
});
|
||
assert!(
|
||
!leak,
|
||
"path_traversal sanitized fixture {file} must stay silent; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
}
|
||
|
||
/// Phase 16 recall-gap: cross-language framework entry-point detection.
|
||
///
|
||
/// One fixture per framework, each takes a request input (function-formal
|
||
/// or path-captured kwarg) and pipes it to a language-native sink. Every
|
||
/// fixture must fire the expected sink with the request parameter as
|
||
/// Source via the entry-kind seeding policy in `taint/ssa_transfer/mod.rs`.
|
||
///
|
||
/// The Spring fixture composes with phase 15 (Hibernate
|
||
/// `entityManager.createNativeQuery`), proving cross-phase composition
|
||
/// holds across languages.
|
||
#[test]
|
||
fn entry_points_xlang() {
|
||
let findings = scan_fixture("entry_points_xlang");
|
||
|
||
let positives = [
|
||
"django_view.py",
|
||
"fastapi_route.py",
|
||
"flask_route.py",
|
||
"spring_controller.java",
|
||
"rails_action.rb",
|
||
"axum_handler.rs",
|
||
"actix_handler.rs",
|
||
"gin_handler.go",
|
||
"express_route.js",
|
||
];
|
||
for file in positives {
|
||
let hit = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& (f.id.starts_with("taint-unsanitised-flow")
|
||
|| f.id.starts_with("cfg-unguarded-sink"))
|
||
});
|
||
assert!(
|
||
hit,
|
||
"Phase 16 entry-point fixture {file} must fire a taint sink; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
}
|
||
|
||
/// Rust entry-kind seeding precision: typed extractor formals
|
||
/// (`Query<T>`, `Json<T>`, `Form<T>`, `Path<T>`, `web::*<T>`) get
|
||
/// painted as `Source(UserInput)`, while denylist DI handles
|
||
/// (`State<T>`, `Extension<T>`, ...) do not. Without this guard, the
|
||
/// scoped-lowering lift for Rust handlers would FP-fire every
|
||
/// database / shared-state sink consuming a pool handle. The
|
||
/// positive shape asserts the rule_id is specifically
|
||
/// `taint-unsanitised-flow` (not `cfg-unguarded-sink`), so a future
|
||
/// regression that drops entry-kind seeding is forcing-function
|
||
/// caught.
|
||
#[test]
|
||
fn rust_entry_kind_typed_extractor_seeding() {
|
||
let findings = scan_fixture("entry_points_xlang_rust");
|
||
let positives = [
|
||
("axum_query_typed_extractor.rs", 12usize),
|
||
("actix_path_typed_extractor.rs", 11usize),
|
||
];
|
||
for (file, sink_line) in positives {
|
||
let hit = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
&& f.line == sink_line
|
||
});
|
||
assert!(
|
||
hit,
|
||
"Rust typed-extractor handler {file}:{sink_line} must fire \
|
||
`taint-unsanitised-flow`; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
// Negative: State<Arc<DbPool>> formals must not produce
|
||
// taint-unsanitised-flow findings. cfg-unguarded-sink is fine
|
||
// — that is the pre-existing structural backup, not a seeding
|
||
// claim against the formal.
|
||
let state_taint_findings: Vec<&_> = findings
|
||
.iter()
|
||
.filter(|f| {
|
||
f.path.ends_with("axum_state_denylist.rs") && f.id.starts_with("taint-unsanitised-flow")
|
||
})
|
||
.collect();
|
||
assert!(
|
||
state_taint_findings.is_empty(),
|
||
"State<DbPool> formals must not be painted as Source; got:\n{}",
|
||
state_taint_findings
|
||
.iter()
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
/// Python entry-kind seeding precision for `FlaskRoute`: path-bound
|
||
/// formals (`@app.route("/u/<name>")` + `def view(name):`) get painted
|
||
/// as `Source(UserInput)`, while routes without path captures stay
|
||
/// un-seeded. Without per-formal route-capture gating, Python handlers
|
||
/// fell back to `cfg-unguarded-sink` for path-bound flows. The
|
||
/// positive shape asserts the rule_id is specifically
|
||
/// `taint-unsanitised-flow` (not `cfg-unguarded-sink`), so a future
|
||
/// regression that drops entry-kind seeding is forcing-function
|
||
/// caught. The negative shape pins the absence of taint findings on a
|
||
/// no-capture route (no formals, no seed, no flow).
|
||
#[test]
|
||
fn python_flask_route_path_capture_seeding() {
|
||
let findings = scan_fixture("entry_points_xlang_python");
|
||
let positives = [
|
||
("flask_path_capture.py", 14usize),
|
||
("flask_converter_capture.py", 14usize),
|
||
];
|
||
for (file, sink_line) in positives {
|
||
let hit = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
&& f.line == sink_line
|
||
});
|
||
assert!(
|
||
hit,
|
||
"Python Flask path-capture handler {file}:{sink_line} must fire \
|
||
`taint-unsanitised-flow`; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
// Negative: a Flask route with no path captures and a literal
|
||
// sink argument must not surface `taint-unsanitised-flow`.
|
||
let no_capture_taint: Vec<&_> = findings
|
||
.iter()
|
||
.filter(|f| {
|
||
f.path.ends_with("flask_no_capture.py") && f.id.starts_with("taint-unsanitised-flow")
|
||
})
|
||
.collect();
|
||
assert!(
|
||
no_capture_taint.is_empty(),
|
||
"Flask route without path captures must not paint formals as Source; got:\n{}",
|
||
no_capture_taint
|
||
.iter()
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
/// Python FastAPI entry-kind seeding precision for `FastApiRoute`:
|
||
/// path-bound formals from `{name}` brace-segment captures
|
||
/// (`@app.get("/items/{item_id}")` + `def read_item(item_id: str):`)
|
||
/// AND Annotated typed extractors (`q: Annotated[str, Query()]`) get
|
||
/// painted as `Source(UserInput)`. Formals that carry a `Depends(...)`
|
||
/// default or a non-extractor type annotation (`db: Session`,
|
||
/// `request: Request`) stay un-seeded. Without per-formal gating,
|
||
/// FastAPI handlers fell back to `cfg-unguarded-sink` for path-bound
|
||
/// flows. The positive shapes assert the rule_id is specifically
|
||
/// `taint-unsanitised-flow`, so a future regression that drops
|
||
/// entry-kind seeding is forcing-function caught. The negative shape
|
||
/// pins the absence of `taint-unsanitised-flow` on a DI-only handler.
|
||
#[test]
|
||
fn python_fastapi_route_per_formal_seeding() {
|
||
let findings = scan_fixture("entry_points_xlang_python_fastapi");
|
||
let positives = [
|
||
("fastapi_path_capture.py", 18usize),
|
||
("fastapi_annotated_query.py", 17usize),
|
||
];
|
||
for (file, sink_line) in positives {
|
||
let hit = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
&& f.line == sink_line
|
||
});
|
||
assert!(
|
||
hit,
|
||
"Python FastAPI handler {file}:{sink_line} must fire \
|
||
`taint-unsanitised-flow`; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
let depends_taint: Vec<&_> = findings
|
||
.iter()
|
||
.filter(|f| {
|
||
f.path.ends_with("fastapi_depends_denylist.py")
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
})
|
||
.collect();
|
||
assert!(
|
||
depends_taint.is_empty(),
|
||
"FastAPI Depends(...) DI handle must not be painted as Source; got:\n{}",
|
||
depends_taint
|
||
.iter()
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
/// Ruby Sinatra entry-kind seeding precision for `SinatraRoute`:
|
||
/// path-bound block formals (`get "/u/:name" do |name| ... end`)
|
||
/// get painted as `Source(UserInput)`, while routes without path
|
||
/// captures stay un-seeded. Without per-formal route-capture
|
||
/// gating, Sinatra handlers fell back to `cfg-unguarded-sink` for
|
||
/// path-bound flows. The positive shape asserts the rule_id is
|
||
/// specifically `taint-unsanitised-flow`, so a future regression
|
||
/// that drops entry-kind seeding is forcing-function caught. The
|
||
/// negative shape pins the absence of taint findings on a
|
||
/// no-capture route (no block formals, no seed, no flow).
|
||
#[test]
|
||
fn ruby_sinatra_route_path_capture_seeding() {
|
||
let findings = scan_fixture("entry_points_xlang_ruby");
|
||
let positives = [
|
||
("sinatra_path_capture.rb", 9usize),
|
||
("sinatra_multi_capture.rb", 8usize),
|
||
];
|
||
for (file, sink_line) in positives {
|
||
let hit = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
&& f.line == sink_line
|
||
});
|
||
assert!(
|
||
hit,
|
||
"Ruby Sinatra path-capture handler {file}:{sink_line} must fire \
|
||
`taint-unsanitised-flow`; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
let no_capture_taint: Vec<&_> = findings
|
||
.iter()
|
||
.filter(|f| {
|
||
f.path.ends_with("sinatra_no_capture.rb") && f.id.starts_with("taint-unsanitised-flow")
|
||
})
|
||
.collect();
|
||
assert!(
|
||
no_capture_taint.is_empty(),
|
||
"Sinatra route without path captures must not paint formals as Source; got:\n{}",
|
||
no_capture_taint
|
||
.iter()
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
|
||
/// Go entry-kind precision: `GinRoute` (`*gin.Context`,
|
||
/// `echo.Context`, `*fiber.Ctx`, `iris.Context`) and `GoNetHttp`
|
||
/// (`(w http.ResponseWriter, r *http.Request)`) handlers route
|
||
/// adversary bytes through access-path label rules
|
||
/// (`c.Query`, `c.Param`, `c.PostForm`, `r.URL.Query`,
|
||
/// `r.FormValue`, `r.Header.Get`, ...) rather than via flat
|
||
/// formal seeding. Same precedent as the Express
|
||
/// `seed_at_all=false` arm: painting the bare `c` / `r` object
|
||
/// as `Source(Cap::all())` re-fires excluded lifecycle methods
|
||
/// (`c.AbortWithStatus`, `r.Context()`, etc.) as structural
|
||
/// sinks. The positive shapes assert the rule_id is specifically
|
||
/// `taint-unsanitised-flow` (not the OR-cfg-unguarded-sink path
|
||
/// the cross-language `entry_points_xlang` test accepts), so a
|
||
/// future regression that mis-classifies access paths is
|
||
/// forcing-function caught.
|
||
#[test]
|
||
fn go_entry_kind_label_rules_carry_request() {
|
||
let findings = scan_fixture("entry_points_xlang");
|
||
let positives = [
|
||
("gin_handler.go", 24usize),
|
||
("net_http_handler.go", 21usize),
|
||
];
|
||
for (file, sink_line) in positives {
|
||
let hit = findings.iter().any(|f| {
|
||
f.path.ends_with(file)
|
||
&& f.id.starts_with("taint-unsanitised-flow")
|
||
&& f.line == sink_line
|
||
});
|
||
assert!(
|
||
hit,
|
||
"Go handler {file}:{sink_line} must fire \
|
||
`taint-unsanitised-flow` via access-path label rules; got:\n{}",
|
||
findings
|
||
.iter()
|
||
.filter(|f| f.path.ends_with(file))
|
||
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
|
||
.collect::<Vec<_>>()
|
||
.join("\n"),
|
||
);
|
||
}
|
||
}
|
||
|
||
/// Phase 11 + 17 acceptance: every per-target baseline JSON in
|
||
/// `tests/recall_targets/` (Phase 11 JS targets) and
|
||
/// `tests/recall_targets/xlang/<lang>/` (Phase 17 cross-lang targets)
|
||
/// exists, parses via `serde_json`, and every finding entry carries
|
||
/// a `verdict: "TP" | "FP" | "needs_review"` label. Marked `#[ignore]`
|
||
/// because `cargo test --release` should not require a populated
|
||
/// baseline directory on a clean clone — the `validate_recall.sh`
|
||
/// runbook is the authoritative way to refresh these. Run explicitly
|
||
/// with `cargo test --release --test recall_gaps --
|
||
/// --ignored validate_real_world_targets`.
|
||
#[test]
|
||
#[ignore]
|
||
fn validate_real_world_targets() {
|
||
let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/recall_targets");
|
||
|
||
// Phase 11 JS targets — ship at the top level.
|
||
let js_targets = [
|
||
"cal_com",
|
||
"vercel_commerce",
|
||
"shadcn_examples",
|
||
"blitz_apps",
|
||
];
|
||
let mut paths: Vec<std::path::PathBuf> = js_targets
|
||
.iter()
|
||
.map(|t| root.join(format!("{t}.json")))
|
||
.collect();
|
||
|
||
// Phase 17 cross-lang targets — under `xlang/<lang>/<target>.json`.
|
||
// Derived from filesystem inspection so adding a new lang/target only
|
||
// requires dropping the JSON file under `tests/recall_targets/xlang/`.
|
||
let xlang_root = root.join("xlang");
|
||
if let Ok(entries) = std::fs::read_dir(&xlang_root) {
|
||
let mut lang_dirs: Vec<std::path::PathBuf> = entries
|
||
.filter_map(|e| e.ok().map(|e| e.path()))
|
||
.filter(|p| p.is_dir())
|
||
.collect();
|
||
lang_dirs.sort();
|
||
for lang_dir in lang_dirs {
|
||
let mut json_paths: Vec<std::path::PathBuf> = std::fs::read_dir(&lang_dir)
|
||
.unwrap_or_else(|e| panic!("read xlang dir {}: {e}", lang_dir.display()))
|
||
.filter_map(|e| e.ok().map(|e| e.path()))
|
||
.filter(|p| p.extension().and_then(|s| s.to_str()) == Some("json"))
|
||
.collect();
|
||
json_paths.sort();
|
||
paths.extend(json_paths);
|
||
}
|
||
}
|
||
|
||
for path in &paths {
|
||
let raw = std::fs::read_to_string(path)
|
||
.unwrap_or_else(|e| panic!("read baseline {}: {e}", path.display()));
|
||
let value: serde_json::Value = serde_json::from_str(&raw)
|
||
.unwrap_or_else(|e| panic!("parse baseline {}: {e}", path.display()));
|
||
let obj = value
|
||
.as_object()
|
||
.unwrap_or_else(|| panic!("baseline {} must be a JSON object", path.display()));
|
||
for key in [
|
||
"target",
|
||
"clone_url",
|
||
"captured_against",
|
||
"captured_on",
|
||
"pinned_commit",
|
||
] {
|
||
assert!(
|
||
obj.contains_key(key),
|
||
"baseline {} must record `{key}`",
|
||
path.display()
|
||
);
|
||
}
|
||
let findings = obj
|
||
.get("findings")
|
||
.and_then(|v| v.as_array())
|
||
.unwrap_or_else(|| panic!("baseline {} must record `findings: []`", path.display()));
|
||
for (i, f) in findings.iter().enumerate() {
|
||
let verdict = f
|
||
.get("verdict")
|
||
.and_then(|v| v.as_str())
|
||
.unwrap_or_else(|| {
|
||
panic!("baseline {} finding {i} missing `verdict`", path.display())
|
||
});
|
||
assert!(
|
||
matches!(verdict, "TP" | "FP" | "needs_review"),
|
||
"baseline {} finding {i} has invalid verdict {verdict:?} (must be TP|FP|needs_review)",
|
||
path.display()
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn baseline_loads() {
|
||
let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/recall_gaps_baseline.json");
|
||
let raw = std::fs::read_to_string(&path)
|
||
.unwrap_or_else(|e| panic!("read baseline {}: {e}", path.display()));
|
||
let value: serde_json::Value = serde_json::from_str(&raw)
|
||
.unwrap_or_else(|e| panic!("parse baseline {}: {e}", path.display()));
|
||
assert!(value.is_object(), "baseline must be a JSON object");
|
||
assert!(
|
||
value.get("recall_gaps_tests").is_some(),
|
||
"baseline must record `recall_gaps_tests`"
|
||
);
|
||
assert!(
|
||
value.get("corpus_finding_lines").is_some(),
|
||
"baseline must record `corpus_finding_lines`"
|
||
);
|
||
let corpus = value.get("corpus_finding_lines").unwrap();
|
||
let rule_full = corpus.get("rule_id_full").unwrap_or_else(|| {
|
||
panic!(
|
||
"baseline must record `corpus_finding_lines.rule_id_full` (per-rule snapshot, not just top-15) so phases 03-11 can prove rule-level non-regression"
|
||
)
|
||
});
|
||
let map = rule_full
|
||
.as_object()
|
||
.expect("`rule_id_full` must be a JSON object mapping rule_id → count");
|
||
let distinct = corpus
|
||
.get("rule_id_distinct")
|
||
.and_then(|v| v.as_u64())
|
||
.unwrap_or(0) as usize;
|
||
assert_eq!(
|
||
map.len(),
|
||
distinct,
|
||
"rule_id_full ({}) must cover every distinct rule_id ({})",
|
||
map.len(),
|
||
distinct
|
||
);
|
||
}
|