nyx/tests/recall_gaps.rs

1696 lines
64 KiB
Rust
Raw Permalink Normal View History

//! # Recall-gap integration harness (phase 01 baseline)
//!
//! Pitboss phase 01 stands up the skeleton; phases 0211 grow it. The suite
//! is green on a fresh `master` because every gap-area test starts
//! `#[ignore]`d, so this file compiles and runs without depending on engine
//! work that has not landed yet.
//!
//! ## Where fixtures live
//!
//! Each gap area owns a subdirectory under
//! `tests/fixtures/realistic/<area>/`. The phase that un-ignores a test is
//! responsible for populating its fixture. Fixtures are copied into a fresh
//! tempdir per scan (see [`common::recall::scan_fixture`]) so SQLite,
//! `nyx.conf`, or stray index artefacts cannot leak between tests.
//!
//! ## `ExpectedFinding` shape
//!
//! Each test asserts findings with a tuple of
//! `(rule_id, file_suffix, sink_line, source_line)`:
//!
//! - `rule_id` — exact prefix match on `Diag.id`. Taint findings carry a
//! trailing ` (source N:M)` suffix that the matcher strips before
//! comparison.
//! - `file_suffix` — `Diag.path.ends_with(file_suffix)`, which lets callers
//! ignore the tempdir prefix supplied by the harness.
//! - `sink_line` — exact match on `Diag.line` (1-based).
//! - `source_line` — optional `N` parsed from the ` (source N:M)` suffix
//! on `Diag.id`. Use `None` when the originating line is unstable across
//! refactors of the fixture.
//!
//! ## Phase ownership
//!
//! Every phase un-ignores exactly the tests it owns. The mapping is stable:
//!
//! | Phase | Test fn |
//! |-------|-------------------------------|
//! | 02 | `async_await` |
//! | 03 | `promise_then_callback`, |
//! | | `promise_all_taint`, |
//! | | `for_await_of_stream`, |
//! | | `promise_then_chain_reentrant`|
//! | 05 | `fs_promises_*` |
//! | 06 | `jsx_dangerous_html` |
//! | 07 | `orm_builders` |
//! | TBD | `ssrf_url_builders`, |
//! | | `cross_package_ipa`, |
//! | | `nextjs_entrypoints` |
//!
//! Phase 04 ships the TS/JS module resolver foundation but un-ignores no
//! gap tests of its own — the resolver feeds `FuncKey.namespace` for later
//! phases. Phases beyond the table may add further `#[ignore]`d tests;
//! do not move tests between owners.
mod common;
use common::recall::{ExpectedFinding, assert_finding, assert_finding_with_cap, scan_fixture};
use nyx_scanner::labels::Cap;
use std::path::Path;
#[test]
fn async_await_js() {
let findings = scan_fixture("async_await");
// JS form — exercises the JavaScript `await_expression` KINDS-map entry.
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "handler.js",
sink_line: 6,
source_line: Some(4),
},
);
// TS form — same source/sink shape, exercises the TypeScript
// `await_expression` KINDS-map entry. Without this assertion the
// `.ts` fixture was scanned implicitly via `scan_fixture("async_await")`
// (smoke only), with no positive guarantee that the TS grammar's
// await-forwarding lowered taint identically. Source attributes to
// line 3 (the typed-extractor `req: { body: string }` parameter) —
// the typed-formal pipeline tags the parameter itself as the taint
// origin, which is the canonical handler-input shape rather than the
// intermediate `req.body` access on line 4.
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "handler.ts",
sink_line: 5,
source_line: Some(3),
},
);
}
/// Phase 12 recall-gap (Python). tree-sitter-python emits `await x` as a
/// named `await` node (no `_expression` suffix). Without the
/// `"await" => Kind::AwaitForward` entry in `src/labels/python.rs` and the
/// corresponding `Kind`-driven `is_await_forward` flag in `cfg::push_node`,
/// the engine never models the await boundary as a 1:1 forward and the
/// FastAPI-shape `await request.json()` source never reaches `cursor.execute`.
#[test]
fn async_await_py() {
let findings = scan_fixture("async_await/handler.py");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "handler.py",
sink_line: 8,
source_line: None,
},
);
}
/// Phase 12 recall-gap (Python combinator). `asyncio.gather(...)` is
/// registered as `PromiseCombinatorKind::All` for Python in
/// `is_promise_combinator`; argument taint unions onto the awaited result.
#[test]
fn async_await_py_gather() {
let findings = scan_fixture("async_await/gather.py");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "gather.py",
sink_line: 14,
source_line: None,
},
);
}
/// Phase 12 recall-gap (Rust). `x.await` is now mapped explicitly to
/// `Kind::AwaitForward` in `src/labels/rust.rs`; the `is_await_forward`
/// flag is set via `lookup(lang, ast.kind()) == Kind::AwaitForward`
/// rather than the raw-string `ast.kind() == "await_expression"` check.
/// The header-shape source flows across the await into the
/// `Command::new("sh").arg(&cmd)` shell-injection sink.
#[test]
fn async_await_rs() {
let findings = scan_fixture("async_await/handler.rs");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "handler.rs",
sink_line: 26,
source_line: Some(25),
},
);
}
/// Phase 12 recall-gap (Rust combinator). `tokio::join!(...)` is a
/// `macro_invocation` whose args live inside a `token_tree`.
/// `extract_arg_uses` walks the token_tree splitting on `,` so the SSA
/// Call carries one arg group per future, and
/// `is_promise_combinator("rust", "tokio::join")` routes it through the
/// existing combinator transfer. The unioned env-var taint flows into
/// `reqwest::get` (SSRF sink).
#[test]
fn async_await_rs_join() {
let findings = scan_fixture("async_await/tokio_join.rs");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "tokio_join.rs",
sink_line: 11,
source_line: None,
},
);
}
/// Phase 12 deferred-fix (Rust combinator, bare macro form).
/// `use tokio::join;` brings the macro into scope and the call site uses
/// `join!(...)`. `cfg::push_node` rewrites the bare macro callee text to
/// `tokio::join` when an import witness is present, so the existing
/// combinator transfer fires the same way as for the qualified form.
#[test]
fn async_await_rs_join_bare() {
let findings = scan_fixture("async_await/tokio_join_bare.rs");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "tokio_join_bare.rs",
sink_line: 13,
source_line: None,
},
);
}
/// Phase 03 recall-gap: `.then(cb)` propagates the receiver Promise's
/// resolved value into the callback's first parameter. The taint trace
/// attributes at the inner `db.query(data)` sink via the callback-pattern
/// emission paired with the chain-hop site promotion that lifts the
/// callback's own-body sink coordinates into the trace finding's primary
/// location.
#[test]
fn promise_then_callback() {
let findings = scan_fixture("promise_then_callback");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "promise_then_callback.ts",
sink_line: 9,
source_line: Some(7),
},
);
}
/// Phase 03 recall-gap: `Promise.all([...])` returns a value carrying the
/// union of element taints; `p.then(cb)` then exposes it to the sink at
/// `db.query(items)` via the callback-pattern emission with chain-hop
/// site promotion.
#[test]
fn promise_all_taint() {
let findings = scan_fixture("promise_all_taint");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "promise_all_taint.ts",
sink_line: 8,
source_line: None,
},
);
}
/// Per-element precision for `const [a, b] = await Promise.all([safe,
/// tainted])`. The SSA lowering rewrite in src/ssa/lower.rs maps each
/// destructure binding to `Assign(arg_uses[0][i])`, so `a` binds only to
/// the literal `"ok"` and `b` binds only to the tainted `req.body`. The
/// scalar union from `try_apply_promise_combinator` is bypassed for the
/// per-binding values.
///
/// Skip-slot cases (`const [, b]`, `const [a, ,]`) also need pattern-position
/// indexing: `TaintMeta.array_pattern_indices` carries the source-order
/// position of each binding so the rewrite picks `pd_args[index]` rather
/// than `pd_args[binding_offset]`.
#[test]
fn promise_all_destruct_per_index() {
let findings = scan_fixture("promise_all_destruct");
// Positive: line 17 sink reachable from req.body via index-1 binding.
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "promise_all_destruct_fp.ts",
sink_line: 17,
source_line: None,
},
);
// Negative: line 16 binds `a` to the literal "ok"; pre-fix the scalar
// union painted `a` with req.body's taint and produced a FP here.
let leak = findings.iter().any(|f| {
f.path.ends_with("promise_all_destruct_fp.ts")
&& f.line == 16
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"destructure index-0 binding `a` must not carry req.body taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("promise_all_destruct_fp.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
// Skip-slot positives: only the index-aligned tainted bindings should fire.
for sink_line in [24usize, 36] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "promise_all_skip_slots.ts",
sink_line,
source_line: None,
},
);
}
// Skip-slot negatives: lines 28 (`c` from `[, c]` of `[tainted, safe]`)
// and 32 (`d` from `[d, ,]` of `[safe, tainted, "extra"]`) must NOT fire.
for forbidden_line in [28usize, 32] {
let leak = findings.iter().any(|f| {
f.path.ends_with("promise_all_skip_slots.ts")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"skip-slot binding at line {forbidden_line} must not carry req.body taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("promise_all_skip_slots.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Python `asyncio.gather` destructure: `pattern_list` + `tuple_pattern`
// share the same per-index rewrite as JS/TS arrays. Positives at lines
// 32 / 40 / 50 (tainted-aligned bindings) must fire; negatives at lines
// 33 / 41 / 51 (safe-aligned bindings) must NOT fire.
for sink_line in [32usize, 40, 50] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "asyncio_gather_destruct_fp.py",
sink_line,
source_line: None,
},
);
}
for forbidden_line in [33usize, 41, 51] {
let leak = findings.iter().any(|f| {
f.path.ends_with("asyncio_gather_destruct_fp.py")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"Python asyncio.gather binding at line {forbidden_line} must not carry request.args taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("asyncio_gather_destruct_fp.py"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Bare-array RHS destructure (`const [a, b] = [safe, tainted]`)
// mirror of the Promise.all destructure precision, gated on
// `info.call.callee.is_none()` so the combinator path is not
// affected. Each binding emits its own SSA op keyed on the
// source-order RHS slot.
for sink_line in [28usize, 36] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "bare_array_literal_destruct_fp.ts",
sink_line,
source_line: None,
},
);
}
for forbidden_line in [27usize, 37, 44] {
let leak = findings.iter().any(|f| {
f.path.ends_with("bare_array_literal_destruct_fp.ts")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"JS/TS bare-array binding at line {forbidden_line} must not carry req.body taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("bare_array_literal_destruct_fp.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Ruby parallel assignment `a, b = [array_literal]` now gets per-index
// precision via the bare-array RHS rewrite at `src/ssa/lower.rs`.
// Each binding emits its own SSA op keyed on its source-order RHS
// slot — ident slots Assign the slot's value, literal slots emit
// Const(None). Positives at handler lines 25 / 32 / 37 (tainted-
// aligned bindings) must fire; negatives at 26 / 31 / 38 / 39
// (literal-aligned bindings) must NOT fire.
for sink_line in [23usize, 30, 35] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "ruby_parallel_assignment_fp.rb",
sink_line,
source_line: None,
},
);
}
for forbidden_line in [24usize, 29, 36, 37] {
let leak = findings.iter().any(|f| {
f.path.ends_with("ruby_parallel_assignment_fp.rb")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"Ruby parallel assignment binding at line {forbidden_line} must not carry name taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("ruby_parallel_assignment_fp.rb"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Complex-slot bare-array RHS destructure (`const [a, b] =
// [normalize(req.body.cmd), 'static']`). The helper now classifies
// call / binary / subscript / member access / template-string slots
// as `Complex(inner_uses)` rather than bailing. Each Complex slot
// emits a slot-scoped `Assign` (or `Source` when the outer node
// carries a Source label), so the literal-aligned binding is
// correctly clean. Positives at lines 32 / 39 / 46 / 54 / 62 fire;
// negatives at lines 33 / 40 / 47 / 55 / 63 must NOT fire.
for sink_line in [32usize, 39, 46, 54, 62] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "complex_slot_destruct_fp.ts",
sink_line,
source_line: None,
},
);
}
for forbidden_line in [33usize, 40, 47, 55, 63] {
let leak = findings.iter().any(|f| {
f.path.ends_with("complex_slot_destruct_fp.ts")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"complex-slot literal binding at line {forbidden_line} must not carry req.body taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("complex_slot_destruct_fp.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Per-slot Source classification: when two Complex slots sit next to
// each other and ONLY one slot's subtree contains a Source-classified
// member-expression, the safe Complex sibling stays slot-scoped instead
// of inheriting the outer-node Source. Pre-session 0047 the legacy
// outer-node fallback painted both slots, producing a FP on the safe
// sibling's binding.
for sink_line in [27usize, 34, 41] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "complex_complex_per_slot_fp.ts",
sink_line,
source_line: None,
},
);
}
for forbidden_line in [28usize, 35, 42] {
let leak = findings.iter().any(|f| {
f.path.ends_with("complex_complex_per_slot_fp.ts")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"safe Complex sibling at line {forbidden_line} must not inherit per-slot Source; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("complex_complex_per_slot_fp.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Slot-scoped transitive taint: when the outer destructure node
// carries a Source label AND another Complex slot's subtree classifies
// as Source, the safe Complex sibling whose own subtree contains an
// identifier bound to a tainted local (e.g.
// `helper(tainted_local)` where `tainted_local = req.body.cmd`)
// must still propagate the inner ident's taint through the slot-scoped
// `Assign`. Pre-session 0048 the kill arm emitted `Const(None)` which
// dropped the transitive taint.
for sink_line in [29usize, 30, 36] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "complex_transitive_taint_fp.ts",
sink_line,
source_line: None,
},
);
}
{
let forbidden_line = 37usize;
let leak = findings.iter().any(|f| {
f.path.ends_with("complex_transitive_taint_fp.ts")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"safe Complex sibling at line {forbidden_line} must not inherit outer Source; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("complex_transitive_taint_fp.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 03 recall-gap: `for await (const x of iter)` taints `x` from the
/// iterator (Web Streams / async-iterable request body).
#[test]
fn for_await_of_stream() {
let findings = scan_fixture("for_await_of_stream");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "for_await_of_stream.ts",
sink_line: 5,
source_line: None,
},
);
}
/// Phase 03 re-entrancy guard: a 2-deep `.then` chain whose inner callback
/// awaits another promise. Confirms the inline cache does not deadlock and
/// k=1 depth is still enforced. Outer-level taint must still reach the sink
/// even when the inner level cannot recurse.
#[test]
fn promise_then_chain_reentrant() {
let findings = scan_fixture("promise_then_chain");
// The chain deliberately has two `.then` levels. At k=1 the inner
// `.then(inner)` cannot recurse, so the engine treats the inner
// callback's body as opaque and propagates conservatively. We only
// assert the run does not panic and produces *some* finding for this
// file (taint reaches the inner sink via the outer flow).
let any = findings
.iter()
.any(|f| f.path.ends_with("promise_then_chain.ts"));
assert!(
any,
"expected at least one finding from promise_then_chain.ts, got:\n{}",
findings
.iter()
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Phase 05 recall-gap: `import { readFile } from 'fs/promises'` →
/// `await readFile(req.body.path)` is a FILE_IO sink. The bare-name
/// `readFile` matcher only fires because the file's import table maps
/// the binding to `fs/promises`, satisfying the
/// `LabelGate::ImportedFromModule` gate.
#[test]
fn fs_promises_readfile() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_readfile.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_readfile.ts",
sink_line: 10,
source_line: Some(9),
},
);
}
/// Phase 05 recall-gap: `await open(req.query.path, "r")` ─ same gate,
/// different fs/promises method. Confirms the matcher list covers
/// `open` alongside `readFile`.
#[test]
fn fs_promises_open() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_open.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_open.ts",
sink_line: 10,
source_line: Some(9),
},
);
}
/// Phase 05 recall-gap: the `node:` URL specifier flavour — `import {
/// writeFile } from 'node:fs/promises'`. Both spellings must satisfy
/// the gate.
#[test]
fn fs_promises_node_import() {
let findings = scan_fixture("fs_promises/path_traversal_node_fs_promises_import.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_node_fs_promises_import.ts",
sink_line: 10,
source_line: Some(9),
},
);
}
/// Phase 05 recall-gap: namespace-import shape — `import * as fsp from
/// 'fs/promises'`. `fsp.readFile(...)` must satisfy the gate via the
/// receiver-name path of the local-import view.
#[test]
fn fs_promises_namespace_import() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_namespace.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_namespace.ts",
sink_line: 11,
source_line: Some(10),
},
);
}
/// Phase 05 recall-gap: CommonJS require shape — `const { readFile } =
/// require('fs/promises')`. `extract_local_import_view` records the
/// destructured binding so the bare-name call still satisfies the gate.
#[test]
fn fs_promises_require_form() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_require.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_require.ts",
sink_line: 10,
source_line: Some(9),
},
);
}
/// Phase 05 recall-gap: namespace-of-namespace alias —
/// `import * as fs from 'fs'; const fsp = fs.promises;`. The
/// promises-alias extension on `extract_local_import_view` adds
/// `fsp -> fs/promises` so `fsp.readFile(path)` satisfies the gate
/// without an explicit `import ... from 'fs/promises'` line.
#[test]
fn fs_promises_alias_form() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_alias.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_alias.ts",
sink_line: 14,
source_line: Some(13),
},
);
}
/// Phase 05 recall-gap: CommonJS form of the alias shape —
/// `const fsp = require('fs').promises;`. Same gate as the ESM-import
/// alias above; promises-alias recognises the `.promises` projection on
/// the bare `require('fs')` call.
#[test]
fn fs_promises_alias_require_form() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_alias_require.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_alias_require.ts",
sink_line: 12,
source_line: Some(11),
},
);
}
/// Phase 05 negative: a user-defined `readFile` (no import) must not
/// fire the gated FILE_IO sink. The whole point of the import gate.
#[test]
fn fs_promises_safe_userfn() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_safe_userfn.ts");
let leak = findings.iter().any(|f| {
f.path
.ends_with("path_traversal_fs_promises_safe_userfn.ts")
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak,
"user-defined readFile should not fire the fs/promises gate; got:\n{}",
findings
.iter()
.filter(|f| f
.path
.ends_with("path_traversal_fs_promises_safe_userfn.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Phase 06 recall-gap: React JSX `<div dangerouslySetInnerHTML={{__html:
/// x}} />`. The CFG builder synthesises a sink call from the JSX
/// attribute, so the auto-seeded `input` formal flows into HTML_ESCAPE at
/// the `__html: input` value-span line.
#[test]
fn jsx_dangerous_html() {
let findings = scan_fixture("jsx_dangerous_html");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "page.tsx",
sink_line: 8,
source_line: None,
},
);
// Negative — `__html` is a string literal, no taint flows.
let leak_literal = findings.iter().any(|f| {
f.path.ends_with("page_safe_literal.tsx")
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak_literal,
"literal __html must not fire dangerouslySetInnerHTML; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("page_safe_literal.tsx"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
// Negative — `__html: DOMPurify.sanitize(input)` is sanitized.
let leak_indirect = findings.iter().any(|f| {
f.path.ends_with("page_indirect.tsx")
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak_indirect,
"DOMPurify.sanitize-routed payload must not fire dangerouslySetInnerHTML; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("page_indirect.tsx"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
// Negative — `__html: pipe(input, sanitizeHtml, DOMPurify.sanitize)` —
// the fp-ts composition recogniser detects sanitizers in argument
// position and suppresses the synthetic sink's argument-side flow.
let leak_pipe = findings.iter().any(|f| {
f.path.ends_with("page_pipe.tsx")
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak_pipe,
"pipe(...sanitizers) payload must not fire dangerouslySetInnerHTML; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("page_pipe.tsx"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
// Positive (item 11) — JSX inside a ternary RHS branch. The synthesis
// hook in `lower_ternary_branch` reaches the `__html: input` value span
// even though the wrapping arm short-circuits into the ternary diamond.
let hits_ternary: Vec<&_> = findings
.iter()
.filter(|f| {
f.path.ends_with("page_ternary.tsx")
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
})
.collect();
assert!(
!hits_ternary.is_empty(),
"ternary-branch dangerouslySetInnerHTML must fire a sink; got nothing for page_ternary.tsx"
);
}
/// Phase 07 recall-gap: ORM query-builder raw-SQL escape hatches.
///
/// Coverage:
/// - Drizzle `sql.raw(x)` and tagged-template `sql\`...\`` shapes
/// (leading-id `ImportedFromModule(&["drizzle-orm"])` gate)
/// - Sequelize `sequelize.literal(x)` via receiver-type
/// qualification (`TypeKind::Sequelize` → `Sequelize.literal`)
/// - TypeORM `repo.query(...)` via receiver-type qualification
/// (`TypeKind::TypeOrmRepo` → `TypeOrmRepo.query`)
/// - Knex `db.whereRaw(...)` via the new file-level
/// `FileImportsModule(&["knex"])` gate
///
/// Negatives:
/// - parameterised TypeORM `repo.query("...", [const])` stays silent
/// - bare `whereRaw` / `literal` calls in a file without ORM imports
#[test]
fn orm_builders() {
let findings = scan_fixture("orm_builders");
// (file, sink_line) — sink_line points at the actual SQL builder call.
// `sqli_typeorm_query.ts` previously asserted line 17 (`res.json(rows)`)
// and was satisfied by a coincidental XSS finding; the real
// `repo.query(...)` sink lives on line 16, and the cap-aware assertion
// below pins the SQL_QUERY capability so an XSS regression cannot mask
// a missing receiver-type-qualified ORM rule.
let positives = [
("sqli_drizzle_sql_raw.ts", 13usize),
("sqli_drizzle_tagged_template.ts", 14usize),
("sqli_sequelize_literal.ts", 14usize),
("sqli_typeorm_query.ts", 16usize),
("sqli_knex_where_raw.ts", 15usize),
("sqli_mikroorm_execute.ts", 13usize),
];
for (file, sink_line) in positives {
assert_finding_with_cap(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: file,
sink_line,
source_line: None,
},
Cap::SQL_QUERY.bits(),
);
}
let negatives = [
"sqli_typeorm_safe_parameterized.ts",
"sqli_no_orm_import_safe.ts",
"sqli_knex_type_only_safe.ts",
];
for file in negatives {
let leak = findings.iter().any(|f| {
f.path.ends_with(file)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
&& f.evidence
.as_ref()
.map(|e| (e.sink_caps & Cap::SQL_QUERY.bits()) != 0)
.unwrap_or(false)
});
assert!(
!leak,
"ORM negative fixture {file} must not fire SQL_QUERY; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 08 recall-gap: SSRF URL-builder shapes.
///
/// Coverage:
/// - `new URL(taintedPath)` propagates the path arg's taint into the
/// constructed URL value (no label rule, no summary — covered by the
/// URL-constructor pass added in Phase 08).
/// - `u.searchParams.set(k, taintedV)` / `.append(...)` taints the
/// receiver URL via the searchParams alias rule.
/// - `fetch({ url: taintedUrl, ... })` flows through the destination-
/// aware filter on the SSRF gate.
/// - `fetch(target)` where `target: URL` carries SSA-level
/// TypeKind::Url and the constructor-propagated taint.
///
/// Negative:
/// - `new URL(req.body.path, "https://api.cal.com")` — the literal
/// base anchors an origin-locked StringFact prefix that
/// `is_string_safe_for_ssrf` honours, so the SSRF stays silent.
#[test]
fn ssrf_url_builders() {
let findings = scan_fixture("ssrf_url_builders");
let positives = [
("ssrf_new_url.ts", 12usize),
("ssrf_searchparams_set.ts", 13usize),
("ssrf_searchparams_append.ts", 12usize),
("ssrf_fetch_object_form.ts", 11usize),
("ssrf_fetch_url_typed_arg.ts", 13usize),
("ssrf_fetch_object_shorthand.ts", 13usize),
("ssrf_fetch_object_shorthand.ts", 19usize),
];
for (file, sink_line) in positives {
assert_finding_with_cap(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: file,
sink_line,
source_line: None,
},
Cap::SSRF.bits(),
);
}
// Negative: origin-locked `new URL(path, "https://api.cal.com")` must
// not fire SSRF — the abstract-string prefix-lock suppresses it.
let negative = "ssrf_url_origin_locked.ts";
let leak = findings.iter().any(|f| {
f.path.ends_with(negative)
&& f.evidence
.as_ref()
.map(|e| (e.sink_caps & Cap::SSRF.bits()) != 0)
.unwrap_or(false)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak,
"origin-locked URL must not fire SSRF; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(negative))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Phase 14 recall-gap: cross-language SSRF + URL-builder coverage.
///
/// Mirrors `ssrf_url_builders` (JS/TS) for Python, Java, Rust, Go, Ruby,
/// PHP. Each language carries:
///
/// * positive — a tainted source flowing into the language's
/// canonical HTTP client sink, asserting `Cap::SSRF` fires.
/// * origin-locked negative — a `(literal_base, tainted_path)` URL
/// builder shape; the abstract-string prefix lock honoured by
/// `is_string_safe_for_ssrf` suppresses the SSRF sink.
/// * search-params positive — a tainted URL passed positionally to
/// a Phase 14-added sink (`OkHttpClient.newCall`,
/// `\GuzzleHttp\Client::request`, etc.) so the new label rules
/// see real exercise alongside the existing flat sinks.
#[test]
fn ssrf_cross_language() {
let findings = scan_fixture("ssrf");
let positives = [
// Python — tainted full URL flowing into requests.get / request.
"ssrf_py_positive.py",
"ssrf_py_search_params.py",
// Java — HttpClient.send + OkHttpClient.newCall (Phase 14 sink).
"SsrfJavaPositive.java",
"SsrfJavaSearchParams.java",
// Rust — reqwest::get + Client::new.get (chained verb-on-instance).
"ssrf_rs_positive.rs",
"ssrf_rs_search_params.rs",
// Go — http.Get + http.NewRequest.
"ssrf_go_positive.go",
"ssrf_go_search_params.go",
// Ruby — Net::HTTP.get + Faraday.get (Phase 14 sink).
"ssrf_rb_positive.rb",
"ssrf_rb_search_params.rb",
// Ruby Faraday.new(url: tainted) construction-time SSRF and
// Net::HTTP.start(host, port, proxy_addr: tainted) proxy-tainted
// Destination gates added in the Phase 14 follow-up.
"ssrf_rb_faraday_new.rb",
"ssrf_rb_net_http_proxy.rb",
// PHP — curl_exec via curl_setopt CURLOPT_URL gate (Phase 14)
// + Guzzle Client::request (Phase 14 sink).
"ssrf_php_positive.php",
"ssrf_php_search_params.php",
];
for file in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.evidence
.as_ref()
.map(|e| (e.sink_caps & Cap::SSRF.bits()) != 0)
.unwrap_or(false)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
hit,
"SSRF expected to fire on {file}; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
let negatives = [
"ssrf_py_origin_locked.py",
"SsrfJavaOriginLocked.java",
"ssrf_rs_origin_locked.rs",
"ssrf_rs_origin_locked_const_fmt.rs",
"ssrf_go_origin_locked.go",
"ssrf_rb_origin_locked.rb",
"ssrf_rb_origin_locked_interp.rb",
"ssrf_php_origin_locked.php",
];
for file in negatives {
let leak = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.evidence
.as_ref()
.map(|e| (e.sink_caps & Cap::SSRF.bits()) != 0)
.unwrap_or(false)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak,
"origin-locked SSRF must stay silent on {file}; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 15 recall-gap: cross-language ORM and raw-SQL coverage.
///
/// Mirrors `orm_builders` (JS/TS) for Python, Java, Ruby, Go, PHP.
/// Each language carries:
///
/// * positive raw-string concat — tainted user input concatenated
/// into the SQL string flowing into the language's canonical
/// SQL_QUERY sink.
/// * positive interpolation — same shape but using language-native
/// interpolation (Python f-string inside `text(...)`, Java
/// `String.format`, Ruby `"#{...}"`, Go `fmt.Sprintf`, PHP
/// `"$var"`).
/// * negative parameterised — the parameterised API form with
/// literal SQL template + constant bind args, mirroring phase
/// 07's safe-parameterised approach.
#[test]
fn orm_xlang() {
let findings = scan_fixture("sqli_xlang");
let positives = [
// (file, sink_line)
("sqli_py_psycopg2_concat.py", 16usize),
("sqli_py_sqlalchemy_text_fstring.py", 18usize),
("SqliJavaConcat.java", 18usize),
("SqliJavaHibernateNative.java", 14usize),
("SqliJavaHibernateNamedSession.java", 19usize),
("SqliJavaHibernateChainedSession.java", 23usize),
("sqli_rb_concat.rb", 8usize),
("sqli_rb_where_interp.rb", 9usize),
("sqli_go_concat.go", 14usize),
("sqli_go_gorm_raw.go", 20usize),
("sqli_go_gorm_raw_named.go", 28usize),
("sqli_py_django_qs_bound.py", 14usize),
("sqli_py_django_qs_bare.py", 16usize),
("sqli_php_pdo_concat.php", 9usize),
("sqli_php_doctrine_interp.php", 10usize),
];
for (file, sink_line) in positives {
assert_finding_with_cap(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: file,
sink_line,
source_line: None,
},
Cap::SQL_QUERY.bits(),
);
}
let negatives = [
"sqli_py_param_safe.py",
// Phase 15 deferred-fix: tainted bind args at arg 1 of
// `cursor.execute("SELECT ... WHERE x = %s", (tainted,))` must
// stay silent on SQL_QUERY because `payload_args = &[0]` on the
// Destination gate restricts the sink scan to arg 0.
"sqli_py_param_tainted_binds.py",
"SqliJavaParamSafe.java",
// Phase 15 deferred-fix (Java): tainted `setParameter` bind
// value on a constant `entityManager.createQuery(...)` template
// must stay silent on SQL_QUERY. Mirrors the Python tainted-
// binds shape; the Java Destination gate on the createQuery
// family carries `payload_args = &[0]`.
"SqliJavaParamTaintedBinds.java",
"sqli_rb_param_safe.rb",
"sqli_go_param_safe.go",
// Phase 15 deferred-fix (Go): tainted bind value at arg 2 of
// `db.QueryContext(ctx, sql, tainted)` must stay silent. The
// Destination gate on `db.QueryContext` carries
// `payload_args = &[1]`, restricting the sink scan to the SQL
// string at arg 1.
"sqli_go_param_tainted_binds.go",
"sqli_php_param_safe.php",
];
for file in negatives {
let leak = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.evidence
.as_ref()
.map(|e| (e.sink_caps & Cap::SQL_QUERY.bits()) != 0)
.unwrap_or(false)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak,
"parameterised SQLi negative {file} must stay silent on SQL_QUERY; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 09 recall-gap: cross-package IPA via FuncKey namespace
/// resolution. `unsafeHandler` calls `escapeHtmlNoop` (a passthrough
/// imported from `@scope/util/sanitize`); the engine sees the imported
/// callee's SSA summary via step 0.7 of `resolve_callee_full` and
/// therefore propagates `req.query.x` taint into `res.send` on line 7.
/// `safeHandler` calls `stripTags` (a real `replace`-based sanitizer
/// imported from `@scope/util/strip`) and must stay silent.
#[test]
fn cross_package_ipa() {
let findings = scan_fixture("cross_package_ipa");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "handler.ts",
sink_line: 7,
source_line: Some(5),
},
);
let safe_hit = findings.iter().any(|f| {
f.id.starts_with("taint-unsanitised-flow") && f.path.ends_with("handler.ts") && f.line == 13
});
assert!(
!safe_hit,
"cross-package sanitizer fixture must stay silent at handler.ts:13; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("handler.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Phase 10 recall-gap: Next.js entry-point detection. Coverage:
/// - App Router POST handler at `app/api/users/route.ts`: the first
/// formal is typed as `TypeKind::Request`, so `await req.json()`
/// surfaces as a SQL_QUERY sink at the `db.query(body)` call.
/// - File-level `'use server'` directive
/// (`nextjs_server_action.ts`, `nextjs_use_server_directive.ts`):
/// every exported function's formals are seeded as Source taint
/// at SSA entry.
/// - Function-level `'use server'`
/// (`nextjs_use_server_function_level.ts`): only the directive-
/// bearing function is treated as a server action.
/// - `<form action={fn}>` JSX binding (`nextjs_form_action.tsx`):
/// the named callee is tagged `EntryKind::FormAction` and its
/// first formal is seeded as adversary input.
/// - `next/headers` `cookies()` import-gated source: the gated rule
/// fires only when `cookies` is bound from `next/headers`.
#[test]
fn nextjs_entrypoints() {
let findings = scan_fixture("nextjs_entrypoints");
// Each fixture asserts the SQL sink fires.
let positives = [
("route.ts", 11usize),
("nextjs_server_action.ts", 11usize),
("nextjs_use_server_directive.ts", 9usize),
("nextjs_use_server_function_level.ts", 8usize),
("nextjs_form_action.tsx", 10usize),
("nextjs_cookies_source.ts", 12usize),
];
for (file, sink_line) in positives {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: file,
sink_line,
source_line: None,
},
);
}
}
/// Phase 13 recall-gap (cross-language path traversal). Five
/// languages, one positive + one sanitized fixture each, exercising the
/// new `Path.read_text` (Python), `Files.readAllBytes` (Java),
/// `tokio::fs::read` (Rust), `os.ReadFile` (Go), and `File.write`
/// (Ruby) FILE_IO sinks added in Phase 13. Sanitized fixtures
/// canonicalise the path through the language-native sanitiser
/// (`Path.resolve` / `Path.normalize` / `PathBuf::canonicalize` /
/// `filepath.Clean` / `Pathname#cleanpath`) and demonstrate the safe
/// pattern by structuring the call chain so no FILE_IO sink reaches the
/// canonical value, keeping the fixture silent.
#[test]
fn path_traversal_xlang() {
let positives = [
// (file, sink_line, source_line)
("path_traversal.py", 12usize, Some(11usize)),
("PathTraversal.java", 16, Some(15)),
("path_traversal.rs", 22, Some(21)),
("path_traversal.go", 14, Some(13)),
("path_traversal.rb", 7, Some(6)),
];
for (file, sink_line, source_line) in positives {
let findings = scan_fixture(&format!("path_traversal/{file}"));
assert_finding_with_cap(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: file,
sink_line,
source_line,
},
Cap::FILE_IO.bits(),
);
}
let negatives = [
"path_traversal_safe.py",
"PathTraversalSafe.java",
"path_traversal_safe.rs",
"path_traversal_safe.go",
"path_traversal_safe.rb",
];
for file in negatives {
let findings = scan_fixture(&format!("path_traversal/{file}"));
let leak = findings.iter().any(|f| {
f.path.ends_with(file)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak,
"path_traversal sanitized fixture {file} must stay silent; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 16 recall-gap: cross-language framework entry-point detection.
///
/// One fixture per framework, each takes a request input (function-formal
/// or path-captured kwarg) and pipes it to a language-native sink. Every
/// fixture must fire the expected sink with the request parameter as
/// Source via the entry-kind seeding policy in `taint/ssa_transfer/mod.rs`.
///
/// The Spring fixture composes with phase 15 (Hibernate
/// `entityManager.createNativeQuery`), proving cross-phase composition
/// holds across languages.
#[test]
fn entry_points_xlang() {
let findings = scan_fixture("entry_points_xlang");
let positives = [
"django_view.py",
"fastapi_route.py",
"flask_route.py",
"spring_controller.java",
"rails_action.rb",
"axum_handler.rs",
"actix_handler.rs",
"gin_handler.go",
"express_route.js",
];
for file in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
hit,
"Phase 16 entry-point fixture {file} must fire a taint sink; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Rust entry-kind seeding precision: typed extractor formals
/// (`Query<T>`, `Json<T>`, `Form<T>`, `Path<T>`, `web::*<T>`) get
/// painted as `Source(UserInput)`, while denylist DI handles
/// (`State<T>`, `Extension<T>`, ...) do not. Without this guard, the
/// scoped-lowering lift for Rust handlers would FP-fire every
/// database / shared-state sink consuming a pool handle. The
/// positive shape asserts the rule_id is specifically
/// `taint-unsanitised-flow` (not `cfg-unguarded-sink`), so a future
/// regression that drops entry-kind seeding is forcing-function
/// caught.
#[test]
fn rust_entry_kind_typed_extractor_seeding() {
let findings = scan_fixture("entry_points_xlang_rust");
let positives = [
("axum_query_typed_extractor.rs", 12usize),
("actix_path_typed_extractor.rs", 11usize),
];
for (file, sink_line) in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.id.starts_with("taint-unsanitised-flow")
&& f.line == sink_line
});
assert!(
hit,
"Rust typed-extractor handler {file}:{sink_line} must fire \
`taint-unsanitised-flow`; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Negative: State<Arc<DbPool>> formals must not produce
// taint-unsanitised-flow findings. cfg-unguarded-sink is fine
// — that is the pre-existing structural backup, not a seeding
// claim against the formal.
let state_taint_findings: Vec<&_> = findings
.iter()
.filter(|f| {
f.path.ends_with("axum_state_denylist.rs") && f.id.starts_with("taint-unsanitised-flow")
})
.collect();
assert!(
state_taint_findings.is_empty(),
"State<DbPool> formals must not be painted as Source; got:\n{}",
state_taint_findings
.iter()
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Python entry-kind seeding precision for `FlaskRoute`: path-bound
/// formals (`@app.route("/u/<name>")` + `def view(name):`) get painted
/// as `Source(UserInput)`, while routes without path captures stay
/// un-seeded. Without per-formal route-capture gating, Python handlers
/// fell back to `cfg-unguarded-sink` for path-bound flows. The
/// positive shape asserts the rule_id is specifically
/// `taint-unsanitised-flow` (not `cfg-unguarded-sink`), so a future
/// regression that drops entry-kind seeding is forcing-function
/// caught. The negative shape pins the absence of taint findings on a
/// no-capture route (no formals, no seed, no flow).
#[test]
fn python_flask_route_path_capture_seeding() {
let findings = scan_fixture("entry_points_xlang_python");
let positives = [
("flask_path_capture.py", 14usize),
("flask_converter_capture.py", 14usize),
];
for (file, sink_line) in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.id.starts_with("taint-unsanitised-flow")
&& f.line == sink_line
});
assert!(
hit,
"Python Flask path-capture handler {file}:{sink_line} must fire \
`taint-unsanitised-flow`; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Negative: a Flask route with no path captures and a literal
// sink argument must not surface `taint-unsanitised-flow`.
let no_capture_taint: Vec<&_> = findings
.iter()
.filter(|f| {
f.path.ends_with("flask_no_capture.py") && f.id.starts_with("taint-unsanitised-flow")
})
.collect();
assert!(
no_capture_taint.is_empty(),
"Flask route without path captures must not paint formals as Source; got:\n{}",
no_capture_taint
.iter()
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Python FastAPI entry-kind seeding precision for `FastApiRoute`:
/// path-bound formals from `{name}` brace-segment captures
/// (`@app.get("/items/{item_id}")` + `def read_item(item_id: str):`)
/// AND Annotated typed extractors (`q: Annotated[str, Query()]`) get
/// painted as `Source(UserInput)`. Formals that carry a `Depends(...)`
/// default or a non-extractor type annotation (`db: Session`,
/// `request: Request`) stay un-seeded. Without per-formal gating,
/// FastAPI handlers fell back to `cfg-unguarded-sink` for path-bound
/// flows. The positive shapes assert the rule_id is specifically
/// `taint-unsanitised-flow`, so a future regression that drops
/// entry-kind seeding is forcing-function caught. The negative shape
/// pins the absence of `taint-unsanitised-flow` on a DI-only handler.
#[test]
fn python_fastapi_route_per_formal_seeding() {
let findings = scan_fixture("entry_points_xlang_python_fastapi");
let positives = [
("fastapi_path_capture.py", 18usize),
("fastapi_annotated_query.py", 17usize),
];
for (file, sink_line) in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.id.starts_with("taint-unsanitised-flow")
&& f.line == sink_line
});
assert!(
hit,
"Python FastAPI handler {file}:{sink_line} must fire \
`taint-unsanitised-flow`; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
let depends_taint: Vec<&_> = findings
.iter()
.filter(|f| {
f.path.ends_with("fastapi_depends_denylist.py")
&& f.id.starts_with("taint-unsanitised-flow")
})
.collect();
assert!(
depends_taint.is_empty(),
"FastAPI Depends(...) DI handle must not be painted as Source; got:\n{}",
depends_taint
.iter()
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Ruby Sinatra entry-kind seeding precision for `SinatraRoute`:
/// path-bound block formals (`get "/u/:name" do |name| ... end`)
/// get painted as `Source(UserInput)`, while routes without path
/// captures stay un-seeded. Without per-formal route-capture
/// gating, Sinatra handlers fell back to `cfg-unguarded-sink` for
/// path-bound flows. The positive shape asserts the rule_id is
/// specifically `taint-unsanitised-flow`, so a future regression
/// that drops entry-kind seeding is forcing-function caught. The
/// negative shape pins the absence of taint findings on a
/// no-capture route (no block formals, no seed, no flow).
#[test]
fn ruby_sinatra_route_path_capture_seeding() {
let findings = scan_fixture("entry_points_xlang_ruby");
let positives = [
("sinatra_path_capture.rb", 9usize),
("sinatra_multi_capture.rb", 8usize),
];
for (file, sink_line) in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.id.starts_with("taint-unsanitised-flow")
&& f.line == sink_line
});
assert!(
hit,
"Ruby Sinatra path-capture handler {file}:{sink_line} must fire \
`taint-unsanitised-flow`; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
let no_capture_taint: Vec<&_> = findings
.iter()
.filter(|f| {
f.path.ends_with("sinatra_no_capture.rb") && f.id.starts_with("taint-unsanitised-flow")
})
.collect();
assert!(
no_capture_taint.is_empty(),
"Sinatra route without path captures must not paint formals as Source; got:\n{}",
no_capture_taint
.iter()
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Go entry-kind precision: `GinRoute` (`*gin.Context`,
/// `echo.Context`, `*fiber.Ctx`, `iris.Context`) and `GoNetHttp`
/// (`(w http.ResponseWriter, r *http.Request)`) handlers route
/// adversary bytes through access-path label rules
/// (`c.Query`, `c.Param`, `c.PostForm`, `r.URL.Query`,
/// `r.FormValue`, `r.Header.Get`, ...) rather than via flat
/// formal seeding. Same precedent as the Express
/// `seed_at_all=false` arm: painting the bare `c` / `r` object
/// as `Source(Cap::all())` re-fires excluded lifecycle methods
/// (`c.AbortWithStatus`, `r.Context()`, etc.) as structural
/// sinks. The positive shapes assert the rule_id is specifically
/// `taint-unsanitised-flow` (not the OR-cfg-unguarded-sink path
/// the cross-language `entry_points_xlang` test accepts), so a
/// future regression that mis-classifies access paths is
/// forcing-function caught.
#[test]
fn go_entry_kind_label_rules_carry_request() {
let findings = scan_fixture("entry_points_xlang");
let positives = [
("gin_handler.go", 24usize),
("net_http_handler.go", 21usize),
];
for (file, sink_line) in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.id.starts_with("taint-unsanitised-flow")
&& f.line == sink_line
});
assert!(
hit,
"Go handler {file}:{sink_line} must fire \
`taint-unsanitised-flow` via access-path label rules; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 11 + 17 acceptance: every per-target baseline JSON in
/// `tests/recall_targets/` (Phase 11 JS targets) and
/// `tests/recall_targets/xlang/<lang>/` (Phase 17 cross-lang targets)
/// exists, parses via `serde_json`, and every finding entry carries
/// a `verdict: "TP" | "FP" | "needs_review"` label. Marked `#[ignore]`
/// because `cargo test --release` should not require a populated
/// baseline directory on a clean clone — the `validate_recall.sh`
/// runbook is the authoritative way to refresh these. Run explicitly
/// with `cargo test --release --test recall_gaps --
/// --ignored validate_real_world_targets`.
#[test]
#[ignore]
fn validate_real_world_targets() {
let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/recall_targets");
// Phase 11 JS targets — ship at the top level.
let js_targets = [
"cal_com",
"vercel_commerce",
"shadcn_examples",
"blitz_apps",
];
let mut paths: Vec<std::path::PathBuf> = js_targets
.iter()
.map(|t| root.join(format!("{t}.json")))
.collect();
// Phase 17 cross-lang targets — under `xlang/<lang>/<target>.json`.
// Derived from filesystem inspection so adding a new lang/target only
// requires dropping the JSON file under `tests/recall_targets/xlang/`.
let xlang_root = root.join("xlang");
if let Ok(entries) = std::fs::read_dir(&xlang_root) {
let mut lang_dirs: Vec<std::path::PathBuf> = entries
.filter_map(|e| e.ok().map(|e| e.path()))
.filter(|p| p.is_dir())
.collect();
lang_dirs.sort();
for lang_dir in lang_dirs {
let mut json_paths: Vec<std::path::PathBuf> = std::fs::read_dir(&lang_dir)
.unwrap_or_else(|e| panic!("read xlang dir {}: {e}", lang_dir.display()))
.filter_map(|e| e.ok().map(|e| e.path()))
.filter(|p| p.extension().and_then(|s| s.to_str()) == Some("json"))
.collect();
json_paths.sort();
paths.extend(json_paths);
}
}
for path in &paths {
let raw = std::fs::read_to_string(path)
.unwrap_or_else(|e| panic!("read baseline {}: {e}", path.display()));
let value: serde_json::Value = serde_json::from_str(&raw)
.unwrap_or_else(|e| panic!("parse baseline {}: {e}", path.display()));
let obj = value
.as_object()
.unwrap_or_else(|| panic!("baseline {} must be a JSON object", path.display()));
for key in [
"target",
"clone_url",
"captured_against",
"captured_on",
"pinned_commit",
] {
assert!(
obj.contains_key(key),
"baseline {} must record `{key}`",
path.display()
);
}
let findings = obj
.get("findings")
.and_then(|v| v.as_array())
.unwrap_or_else(|| panic!("baseline {} must record `findings: []`", path.display()));
for (i, f) in findings.iter().enumerate() {
let verdict = f
.get("verdict")
.and_then(|v| v.as_str())
.unwrap_or_else(|| {
panic!("baseline {} finding {i} missing `verdict`", path.display())
});
assert!(
matches!(verdict, "TP" | "FP" | "needs_review"),
"baseline {} finding {i} has invalid verdict {verdict:?} (must be TP|FP|needs_review)",
path.display()
);
}
}
}
#[test]
fn baseline_loads() {
let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/recall_gaps_baseline.json");
let raw = std::fs::read_to_string(&path)
.unwrap_or_else(|e| panic!("read baseline {}: {e}", path.display()));
let value: serde_json::Value = serde_json::from_str(&raw)
.unwrap_or_else(|e| panic!("parse baseline {}: {e}", path.display()));
assert!(value.is_object(), "baseline must be a JSON object");
assert!(
value.get("recall_gaps_tests").is_some(),
"baseline must record `recall_gaps_tests`"
);
assert!(
value.get("corpus_finding_lines").is_some(),
"baseline must record `corpus_finding_lines`"
);
let corpus = value.get("corpus_finding_lines").unwrap();
let rule_full = corpus.get("rule_id_full").unwrap_or_else(|| {
panic!(
"baseline must record `corpus_finding_lines.rule_id_full` (per-rule snapshot, not just top-15) so phases 03-11 can prove rule-level non-regression"
)
});
let map = rule_full
.as_object()
.expect("`rule_id_full` must be a JSON object mapping rule_id → count");
let distinct = corpus
.get("rule_id_distinct")
.and_then(|v| v.as_u64())
.unwrap_or(0) as usize;
assert_eq!(
map.len(),
distinct,
"rule_id_full ({}) must cover every distinct rule_id ({})",
map.len(),
distinct
);
}