nyx/tests/recall_gaps.rs

1695 lines
64 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! # Recall-gap integration harness (phase 01 baseline)
//!
//! Pitboss phase 01 stands up the skeleton; phases 0211 grow it. The suite
//! is green on a fresh `master` because every gap-area test starts
//! `#[ignore]`d, so this file compiles and runs without depending on engine
//! work that has not landed yet.
//!
//! ## Where fixtures live
//!
//! Each gap area owns a subdirectory under
//! `tests/fixtures/realistic/<area>/`. The phase that un-ignores a test is
//! responsible for populating its fixture. Fixtures are copied into a fresh
//! tempdir per scan (see [`common::recall::scan_fixture`]) so SQLite,
//! `nyx.conf`, or stray index artefacts cannot leak between tests.
//!
//! ## `ExpectedFinding` shape
//!
//! Each test asserts findings with a tuple of
//! `(rule_id, file_suffix, sink_line, source_line)`:
//!
//! - `rule_id` — exact prefix match on `Diag.id`. Taint findings carry a
//! trailing ` (source N:M)` suffix that the matcher strips before
//! comparison.
//! - `file_suffix` — `Diag.path.ends_with(file_suffix)`, which lets callers
//! ignore the tempdir prefix supplied by the harness.
//! - `sink_line` — exact match on `Diag.line` (1-based).
//! - `source_line` — optional `N` parsed from the ` (source N:M)` suffix
//! on `Diag.id`. Use `None` when the originating line is unstable across
//! refactors of the fixture.
//!
//! ## Phase ownership
//!
//! Every phase un-ignores exactly the tests it owns. The mapping is stable:
//!
//! | Phase | Test fn |
//! |-------|-------------------------------|
//! | 02 | `async_await` |
//! | 03 | `promise_then_callback`, |
//! | | `promise_all_taint`, |
//! | | `for_await_of_stream`, |
//! | | `promise_then_chain_reentrant`|
//! | 05 | `fs_promises_*` |
//! | 06 | `jsx_dangerous_html` |
//! | 07 | `orm_builders` |
//! | TBD | `ssrf_url_builders`, |
//! | | `cross_package_ipa`, |
//! | | `nextjs_entrypoints` |
//!
//! Phase 04 ships the TS/JS module resolver foundation but un-ignores no
//! gap tests of its own — the resolver feeds `FuncKey.namespace` for later
//! phases. Phases beyond the table may add further `#[ignore]`d tests;
//! do not move tests between owners.
mod common;
use common::recall::{ExpectedFinding, assert_finding, assert_finding_with_cap, scan_fixture};
use nyx_scanner::labels::Cap;
use std::path::Path;
#[test]
fn async_await_js() {
let findings = scan_fixture("async_await");
// JS form — exercises the JavaScript `await_expression` KINDS-map entry.
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "handler.js",
sink_line: 6,
source_line: Some(4),
},
);
// TS form — same source/sink shape, exercises the TypeScript
// `await_expression` KINDS-map entry. Without this assertion the
// `.ts` fixture was scanned implicitly via `scan_fixture("async_await")`
// (smoke only), with no positive guarantee that the TS grammar's
// await-forwarding lowered taint identically. Source attributes to
// line 3 (the typed-extractor `req: { body: string }` parameter) —
// the typed-formal pipeline tags the parameter itself as the taint
// origin, which is the canonical handler-input shape rather than the
// intermediate `req.body` access on line 4.
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "handler.ts",
sink_line: 5,
source_line: Some(3),
},
);
}
/// Phase 12 recall-gap (Python). tree-sitter-python emits `await x` as a
/// named `await` node (no `_expression` suffix). Without the
/// `"await" => Kind::AwaitForward` entry in `src/labels/python.rs` and the
/// corresponding `Kind`-driven `is_await_forward` flag in `cfg::push_node`,
/// the engine never models the await boundary as a 1:1 forward and the
/// FastAPI-shape `await request.json()` source never reaches `cursor.execute`.
#[test]
fn async_await_py() {
let findings = scan_fixture("async_await/handler.py");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "handler.py",
sink_line: 8,
source_line: None,
},
);
}
/// Phase 12 recall-gap (Python combinator). `asyncio.gather(...)` is
/// registered as `PromiseCombinatorKind::All` for Python in
/// `is_promise_combinator`; argument taint unions onto the awaited result.
#[test]
fn async_await_py_gather() {
let findings = scan_fixture("async_await/gather.py");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "gather.py",
sink_line: 14,
source_line: None,
},
);
}
/// Phase 12 recall-gap (Rust). `x.await` is now mapped explicitly to
/// `Kind::AwaitForward` in `src/labels/rust.rs`; the `is_await_forward`
/// flag is set via `lookup(lang, ast.kind()) == Kind::AwaitForward`
/// rather than the raw-string `ast.kind() == "await_expression"` check.
/// The header-shape source flows across the await into the
/// `Command::new("sh").arg(&cmd)` shell-injection sink.
#[test]
fn async_await_rs() {
let findings = scan_fixture("async_await/handler.rs");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "handler.rs",
sink_line: 26,
source_line: Some(25),
},
);
}
/// Phase 12 recall-gap (Rust combinator). `tokio::join!(...)` is a
/// `macro_invocation` whose args live inside a `token_tree`.
/// `extract_arg_uses` walks the token_tree splitting on `,` so the SSA
/// Call carries one arg group per future, and
/// `is_promise_combinator("rust", "tokio::join")` routes it through the
/// existing combinator transfer. The unioned env-var taint flows into
/// `reqwest::get` (SSRF sink).
#[test]
fn async_await_rs_join() {
let findings = scan_fixture("async_await/tokio_join.rs");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "tokio_join.rs",
sink_line: 11,
source_line: None,
},
);
}
/// Phase 12 deferred-fix (Rust combinator, bare macro form).
/// `use tokio::join;` brings the macro into scope and the call site uses
/// `join!(...)`. `cfg::push_node` rewrites the bare macro callee text to
/// `tokio::join` when an import witness is present, so the existing
/// combinator transfer fires the same way as for the qualified form.
#[test]
fn async_await_rs_join_bare() {
let findings = scan_fixture("async_await/tokio_join_bare.rs");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "tokio_join_bare.rs",
sink_line: 13,
source_line: None,
},
);
}
/// Phase 03 recall-gap: `.then(cb)` propagates the receiver Promise's
/// resolved value into the callback's first parameter. The taint trace
/// attributes at the inner `db.query(data)` sink via the callback-pattern
/// emission paired with the chain-hop site promotion that lifts the
/// callback's own-body sink coordinates into the trace finding's primary
/// location.
#[test]
fn promise_then_callback() {
let findings = scan_fixture("promise_then_callback");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "promise_then_callback.ts",
sink_line: 9,
source_line: Some(7),
},
);
}
/// Phase 03 recall-gap: `Promise.all([...])` returns a value carrying the
/// union of element taints; `p.then(cb)` then exposes it to the sink at
/// `db.query(items)` via the callback-pattern emission with chain-hop
/// site promotion.
#[test]
fn promise_all_taint() {
let findings = scan_fixture("promise_all_taint");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "promise_all_taint.ts",
sink_line: 8,
source_line: None,
},
);
}
/// Per-element precision for `const [a, b] = await Promise.all([safe,
/// tainted])`. The SSA lowering rewrite in src/ssa/lower.rs maps each
/// destructure binding to `Assign(arg_uses[0][i])`, so `a` binds only to
/// the literal `"ok"` and `b` binds only to the tainted `req.body`. The
/// scalar union from `try_apply_promise_combinator` is bypassed for the
/// per-binding values.
///
/// Skip-slot cases (`const [, b]`, `const [a, ,]`) also need pattern-position
/// indexing: `TaintMeta.array_pattern_indices` carries the source-order
/// position of each binding so the rewrite picks `pd_args[index]` rather
/// than `pd_args[binding_offset]`.
#[test]
fn promise_all_destruct_per_index() {
let findings = scan_fixture("promise_all_destruct");
// Positive: line 17 sink reachable from req.body via index-1 binding.
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "promise_all_destruct_fp.ts",
sink_line: 17,
source_line: None,
},
);
// Negative: line 16 binds `a` to the literal "ok"; pre-fix the scalar
// union painted `a` with req.body's taint and produced a FP here.
let leak = findings.iter().any(|f| {
f.path.ends_with("promise_all_destruct_fp.ts")
&& f.line == 16
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"destructure index-0 binding `a` must not carry req.body taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("promise_all_destruct_fp.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
// Skip-slot positives: only the index-aligned tainted bindings should fire.
for sink_line in [24usize, 36] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "promise_all_skip_slots.ts",
sink_line,
source_line: None,
},
);
}
// Skip-slot negatives: lines 28 (`c` from `[, c]` of `[tainted, safe]`)
// and 32 (`d` from `[d, ,]` of `[safe, tainted, "extra"]`) must NOT fire.
for forbidden_line in [28usize, 32] {
let leak = findings.iter().any(|f| {
f.path.ends_with("promise_all_skip_slots.ts")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"skip-slot binding at line {forbidden_line} must not carry req.body taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("promise_all_skip_slots.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Python `asyncio.gather` destructure: `pattern_list` + `tuple_pattern`
// share the same per-index rewrite as JS/TS arrays. Positives at lines
// 32 / 40 / 50 (tainted-aligned bindings) must fire; negatives at lines
// 33 / 41 / 51 (safe-aligned bindings) must NOT fire.
for sink_line in [32usize, 40, 50] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "asyncio_gather_destruct_fp.py",
sink_line,
source_line: None,
},
);
}
for forbidden_line in [33usize, 41, 51] {
let leak = findings.iter().any(|f| {
f.path.ends_with("asyncio_gather_destruct_fp.py")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"Python asyncio.gather binding at line {forbidden_line} must not carry request.args taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("asyncio_gather_destruct_fp.py"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Bare-array RHS destructure (`const [a, b] = [safe, tainted]`)
// mirror of the Promise.all destructure precision, gated on
// `info.call.callee.is_none()` so the combinator path is not
// affected. Each binding emits its own SSA op keyed on the
// source-order RHS slot.
for sink_line in [28usize, 36] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "bare_array_literal_destruct_fp.ts",
sink_line,
source_line: None,
},
);
}
for forbidden_line in [27usize, 37, 44] {
let leak = findings.iter().any(|f| {
f.path.ends_with("bare_array_literal_destruct_fp.ts")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"JS/TS bare-array binding at line {forbidden_line} must not carry req.body taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("bare_array_literal_destruct_fp.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Ruby parallel assignment `a, b = [array_literal]` now gets per-index
// precision via the bare-array RHS rewrite at `src/ssa/lower.rs`.
// Each binding emits its own SSA op keyed on its source-order RHS
// slot — ident slots Assign the slot's value, literal slots emit
// Const(None). Positives at handler lines 25 / 32 / 37 (tainted-
// aligned bindings) must fire; negatives at 26 / 31 / 38 / 39
// (literal-aligned bindings) must NOT fire.
for sink_line in [23usize, 30, 35] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "ruby_parallel_assignment_fp.rb",
sink_line,
source_line: None,
},
);
}
for forbidden_line in [24usize, 29, 36, 37] {
let leak = findings.iter().any(|f| {
f.path.ends_with("ruby_parallel_assignment_fp.rb")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"Ruby parallel assignment binding at line {forbidden_line} must not carry name taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("ruby_parallel_assignment_fp.rb"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Complex-slot bare-array RHS destructure (`const [a, b] =
// [normalize(req.body.cmd), 'static']`). The helper now classifies
// call / binary / subscript / member access / template-string slots
// as `Complex(inner_uses)` rather than bailing. Each Complex slot
// emits a slot-scoped `Assign` (or `Source` when the outer node
// carries a Source label), so the literal-aligned binding is
// correctly clean. Positives at lines 32 / 39 / 46 / 54 / 62 fire;
// negatives at lines 33 / 40 / 47 / 55 / 63 must NOT fire.
for sink_line in [32usize, 39, 46, 54, 62] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "complex_slot_destruct_fp.ts",
sink_line,
source_line: None,
},
);
}
for forbidden_line in [33usize, 40, 47, 55, 63] {
let leak = findings.iter().any(|f| {
f.path.ends_with("complex_slot_destruct_fp.ts")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"complex-slot literal binding at line {forbidden_line} must not carry req.body taint; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("complex_slot_destruct_fp.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Per-slot Source classification: when two Complex slots sit next to
// each other and ONLY one slot's subtree contains a Source-classified
// member-expression, the safe Complex sibling stays slot-scoped instead
// of inheriting the outer-node Source. Pre-session 0047 the legacy
// outer-node fallback painted both slots, producing a FP on the safe
// sibling's binding.
for sink_line in [27usize, 34, 41] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "complex_complex_per_slot_fp.ts",
sink_line,
source_line: None,
},
);
}
for forbidden_line in [28usize, 35, 42] {
let leak = findings.iter().any(|f| {
f.path.ends_with("complex_complex_per_slot_fp.ts")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"safe Complex sibling at line {forbidden_line} must not inherit per-slot Source; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("complex_complex_per_slot_fp.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Slot-scoped transitive taint: when the outer destructure node
// carries a Source label AND another Complex slot's subtree classifies
// as Source, the safe Complex sibling whose own subtree contains an
// identifier bound to a tainted local (e.g.
// `helper(tainted_local)` where `tainted_local = req.body.cmd`)
// must still propagate the inner ident's taint through the slot-scoped
// `Assign`. Pre-session 0048 the kill arm emitted `Const(None)` which
// dropped the transitive taint.
for sink_line in [29usize, 30, 36] {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "complex_transitive_taint_fp.ts",
sink_line,
source_line: None,
},
);
}
{
let forbidden_line = 37usize;
let leak = findings.iter().any(|f| {
f.path.ends_with("complex_transitive_taint_fp.ts")
&& f.line == forbidden_line
&& f.id.starts_with("taint-unsanitised-flow")
});
assert!(
!leak,
"safe Complex sibling at line {forbidden_line} must not inherit outer Source; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("complex_transitive_taint_fp.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 03 recall-gap: `for await (const x of iter)` taints `x` from the
/// iterator (Web Streams / async-iterable request body).
#[test]
fn for_await_of_stream() {
let findings = scan_fixture("for_await_of_stream");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "for_await_of_stream.ts",
sink_line: 5,
source_line: None,
},
);
}
/// Phase 03 re-entrancy guard: a 2-deep `.then` chain whose inner callback
/// awaits another promise. Confirms the inline cache does not deadlock and
/// k=1 depth is still enforced. Outer-level taint must still reach the sink
/// even when the inner level cannot recurse.
#[test]
fn promise_then_chain_reentrant() {
let findings = scan_fixture("promise_then_chain");
// The chain deliberately has two `.then` levels. At k=1 the inner
// `.then(inner)` cannot recurse, so the engine treats the inner
// callback's body as opaque and propagates conservatively. We only
// assert the run does not panic and produces *some* finding for this
// file (taint reaches the inner sink via the outer flow).
let any = findings
.iter()
.any(|f| f.path.ends_with("promise_then_chain.ts"));
assert!(
any,
"expected at least one finding from promise_then_chain.ts, got:\n{}",
findings
.iter()
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Phase 05 recall-gap: `import { readFile } from 'fs/promises'` →
/// `await readFile(req.body.path)` is a FILE_IO sink. The bare-name
/// `readFile` matcher only fires because the file's import table maps
/// the binding to `fs/promises`, satisfying the
/// `LabelGate::ImportedFromModule` gate.
#[test]
fn fs_promises_readfile() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_readfile.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_readfile.ts",
sink_line: 10,
source_line: Some(9),
},
);
}
/// Phase 05 recall-gap: `await open(req.query.path, "r")` ─ same gate,
/// different fs/promises method. Confirms the matcher list covers
/// `open` alongside `readFile`.
#[test]
fn fs_promises_open() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_open.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_open.ts",
sink_line: 10,
source_line: Some(9),
},
);
}
/// Phase 05 recall-gap: the `node:` URL specifier flavour — `import {
/// writeFile } from 'node:fs/promises'`. Both spellings must satisfy
/// the gate.
#[test]
fn fs_promises_node_import() {
let findings = scan_fixture("fs_promises/path_traversal_node_fs_promises_import.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_node_fs_promises_import.ts",
sink_line: 10,
source_line: Some(9),
},
);
}
/// Phase 05 recall-gap: namespace-import shape — `import * as fsp from
/// 'fs/promises'`. `fsp.readFile(...)` must satisfy the gate via the
/// receiver-name path of the local-import view.
#[test]
fn fs_promises_namespace_import() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_namespace.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_namespace.ts",
sink_line: 11,
source_line: Some(10),
},
);
}
/// Phase 05 recall-gap: CommonJS require shape — `const { readFile } =
/// require('fs/promises')`. `extract_local_import_view` records the
/// destructured binding so the bare-name call still satisfies the gate.
#[test]
fn fs_promises_require_form() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_require.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_require.ts",
sink_line: 10,
source_line: Some(9),
},
);
}
/// Phase 05 recall-gap: namespace-of-namespace alias —
/// `import * as fs from 'fs'; const fsp = fs.promises;`. The
/// promises-alias extension on `extract_local_import_view` adds
/// `fsp -> fs/promises` so `fsp.readFile(path)` satisfies the gate
/// without an explicit `import ... from 'fs/promises'` line.
#[test]
fn fs_promises_alias_form() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_alias.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_alias.ts",
sink_line: 14,
source_line: Some(13),
},
);
}
/// Phase 05 recall-gap: CommonJS form of the alias shape —
/// `const fsp = require('fs').promises;`. Same gate as the ESM-import
/// alias above; promises-alias recognises the `.promises` projection on
/// the bare `require('fs')` call.
#[test]
fn fs_promises_alias_require_form() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_alias_require.ts");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "path_traversal_fs_promises_alias_require.ts",
sink_line: 12,
source_line: Some(11),
},
);
}
/// Phase 05 negative: a user-defined `readFile` (no import) must not
/// fire the gated FILE_IO sink. The whole point of the import gate.
#[test]
fn fs_promises_safe_userfn() {
let findings = scan_fixture("fs_promises/path_traversal_fs_promises_safe_userfn.ts");
let leak = findings.iter().any(|f| {
f.path
.ends_with("path_traversal_fs_promises_safe_userfn.ts")
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak,
"user-defined readFile should not fire the fs/promises gate; got:\n{}",
findings
.iter()
.filter(|f| f
.path
.ends_with("path_traversal_fs_promises_safe_userfn.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Phase 06 recall-gap: React JSX `<div dangerouslySetInnerHTML={{__html:
/// x}} />`. The CFG builder synthesises a sink call from the JSX
/// attribute, so the auto-seeded `input` formal flows into HTML_ESCAPE at
/// the `__html: input` value-span line.
#[test]
fn jsx_dangerous_html() {
let findings = scan_fixture("jsx_dangerous_html");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "page.tsx",
sink_line: 8,
source_line: None,
},
);
// Negative — `__html` is a string literal, no taint flows.
let leak_literal = findings.iter().any(|f| {
f.path.ends_with("page_safe_literal.tsx")
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak_literal,
"literal __html must not fire dangerouslySetInnerHTML; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("page_safe_literal.tsx"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
// Negative — `__html: DOMPurify.sanitize(input)` is sanitized.
let leak_indirect = findings.iter().any(|f| {
f.path.ends_with("page_indirect.tsx")
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak_indirect,
"DOMPurify.sanitize-routed payload must not fire dangerouslySetInnerHTML; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("page_indirect.tsx"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
// Negative — `__html: pipe(input, sanitizeHtml, DOMPurify.sanitize)` —
// the fp-ts composition recogniser detects sanitizers in argument
// position and suppresses the synthetic sink's argument-side flow.
let leak_pipe = findings.iter().any(|f| {
f.path.ends_with("page_pipe.tsx")
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak_pipe,
"pipe(...sanitizers) payload must not fire dangerouslySetInnerHTML; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("page_pipe.tsx"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
// Positive (item 11) — JSX inside a ternary RHS branch. The synthesis
// hook in `lower_ternary_branch` reaches the `__html: input` value span
// even though the wrapping arm short-circuits into the ternary diamond.
let hits_ternary: Vec<&_> = findings
.iter()
.filter(|f| {
f.path.ends_with("page_ternary.tsx")
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
})
.collect();
assert!(
!hits_ternary.is_empty(),
"ternary-branch dangerouslySetInnerHTML must fire a sink; got nothing for page_ternary.tsx"
);
}
/// Phase 07 recall-gap: ORM query-builder raw-SQL escape hatches.
///
/// Coverage:
/// - Drizzle `sql.raw(x)` and tagged-template `sql\`...\`` shapes
/// (leading-id `ImportedFromModule(&["drizzle-orm"])` gate)
/// - Sequelize `sequelize.literal(x)` via receiver-type
/// qualification (`TypeKind::Sequelize` → `Sequelize.literal`)
/// - TypeORM `repo.query(...)` via receiver-type qualification
/// (`TypeKind::TypeOrmRepo` → `TypeOrmRepo.query`)
/// - Knex `db.whereRaw(...)` via the new file-level
/// `FileImportsModule(&["knex"])` gate
///
/// Negatives:
/// - parameterised TypeORM `repo.query("...", [const])` stays silent
/// - bare `whereRaw` / `literal` calls in a file without ORM imports
#[test]
fn orm_builders() {
let findings = scan_fixture("orm_builders");
// (file, sink_line) — sink_line points at the actual SQL builder call.
// `sqli_typeorm_query.ts` previously asserted line 17 (`res.json(rows)`)
// and was satisfied by a coincidental XSS finding; the real
// `repo.query(...)` sink lives on line 16, and the cap-aware assertion
// below pins the SQL_QUERY capability so an XSS regression cannot mask
// a missing receiver-type-qualified ORM rule.
let positives = [
("sqli_drizzle_sql_raw.ts", 13usize),
("sqli_drizzle_tagged_template.ts", 14usize),
("sqli_sequelize_literal.ts", 14usize),
("sqli_typeorm_query.ts", 16usize),
("sqli_knex_where_raw.ts", 15usize),
("sqli_mikroorm_execute.ts", 13usize),
];
for (file, sink_line) in positives {
assert_finding_with_cap(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: file,
sink_line,
source_line: None,
},
Cap::SQL_QUERY.bits(),
);
}
let negatives = [
"sqli_typeorm_safe_parameterized.ts",
"sqli_no_orm_import_safe.ts",
"sqli_knex_type_only_safe.ts",
];
for file in negatives {
let leak = findings.iter().any(|f| {
f.path.ends_with(file)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
&& f.evidence
.as_ref()
.map(|e| (e.sink_caps & Cap::SQL_QUERY.bits()) != 0)
.unwrap_or(false)
});
assert!(
!leak,
"ORM negative fixture {file} must not fire SQL_QUERY; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 08 recall-gap: SSRF URL-builder shapes.
///
/// Coverage:
/// - `new URL(taintedPath)` propagates the path arg's taint into the
/// constructed URL value (no label rule, no summary — covered by the
/// URL-constructor pass added in Phase 08).
/// - `u.searchParams.set(k, taintedV)` / `.append(...)` taints the
/// receiver URL via the searchParams alias rule.
/// - `fetch({ url: taintedUrl, ... })` flows through the destination-
/// aware filter on the SSRF gate.
/// - `fetch(target)` where `target: URL` carries SSA-level
/// TypeKind::Url and the constructor-propagated taint.
///
/// Negative:
/// - `new URL(req.body.path, "https://api.cal.com")` — the literal
/// base anchors an origin-locked StringFact prefix that
/// `is_string_safe_for_ssrf` honours, so the SSRF stays silent.
#[test]
fn ssrf_url_builders() {
let findings = scan_fixture("ssrf_url_builders");
let positives = [
("ssrf_new_url.ts", 12usize),
("ssrf_searchparams_set.ts", 13usize),
("ssrf_searchparams_append.ts", 12usize),
("ssrf_fetch_object_form.ts", 11usize),
("ssrf_fetch_url_typed_arg.ts", 13usize),
("ssrf_fetch_object_shorthand.ts", 13usize),
("ssrf_fetch_object_shorthand.ts", 19usize),
];
for (file, sink_line) in positives {
assert_finding_with_cap(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: file,
sink_line,
source_line: None,
},
Cap::SSRF.bits(),
);
}
// Negative: origin-locked `new URL(path, "https://api.cal.com")` must
// not fire SSRF — the abstract-string prefix-lock suppresses it.
let negative = "ssrf_url_origin_locked.ts";
let leak = findings.iter().any(|f| {
f.path.ends_with(negative)
&& f.evidence
.as_ref()
.map(|e| (e.sink_caps & Cap::SSRF.bits()) != 0)
.unwrap_or(false)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak,
"origin-locked URL must not fire SSRF; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(negative))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Phase 14 recall-gap: cross-language SSRF + URL-builder coverage.
///
/// Mirrors `ssrf_url_builders` (JS/TS) for Python, Java, Rust, Go, Ruby,
/// PHP. Each language carries:
///
/// * positive — a tainted source flowing into the language's
/// canonical HTTP client sink, asserting `Cap::SSRF` fires.
/// * origin-locked negative — a `(literal_base, tainted_path)` URL
/// builder shape; the abstract-string prefix lock honoured by
/// `is_string_safe_for_ssrf` suppresses the SSRF sink.
/// * search-params positive — a tainted URL passed positionally to
/// a Phase 14-added sink (`OkHttpClient.newCall`,
/// `\GuzzleHttp\Client::request`, etc.) so the new label rules
/// see real exercise alongside the existing flat sinks.
#[test]
fn ssrf_cross_language() {
let findings = scan_fixture("ssrf");
let positives = [
// Python — tainted full URL flowing into requests.get / request.
"ssrf_py_positive.py",
"ssrf_py_search_params.py",
// Java — HttpClient.send + OkHttpClient.newCall (Phase 14 sink).
"SsrfJavaPositive.java",
"SsrfJavaSearchParams.java",
// Rust — reqwest::get + Client::new.get (chained verb-on-instance).
"ssrf_rs_positive.rs",
"ssrf_rs_search_params.rs",
// Go — http.Get + http.NewRequest.
"ssrf_go_positive.go",
"ssrf_go_search_params.go",
// Ruby — Net::HTTP.get + Faraday.get (Phase 14 sink).
"ssrf_rb_positive.rb",
"ssrf_rb_search_params.rb",
// Ruby Faraday.new(url: tainted) construction-time SSRF and
// Net::HTTP.start(host, port, proxy_addr: tainted) proxy-tainted
// Destination gates added in the Phase 14 follow-up.
"ssrf_rb_faraday_new.rb",
"ssrf_rb_net_http_proxy.rb",
// PHP — curl_exec via curl_setopt CURLOPT_URL gate (Phase 14)
// + Guzzle Client::request (Phase 14 sink).
"ssrf_php_positive.php",
"ssrf_php_search_params.php",
];
for file in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.evidence
.as_ref()
.map(|e| (e.sink_caps & Cap::SSRF.bits()) != 0)
.unwrap_or(false)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
hit,
"SSRF expected to fire on {file}; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
let negatives = [
"ssrf_py_origin_locked.py",
"SsrfJavaOriginLocked.java",
"ssrf_rs_origin_locked.rs",
"ssrf_rs_origin_locked_const_fmt.rs",
"ssrf_go_origin_locked.go",
"ssrf_rb_origin_locked.rb",
"ssrf_rb_origin_locked_interp.rb",
"ssrf_php_origin_locked.php",
];
for file in negatives {
let leak = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.evidence
.as_ref()
.map(|e| (e.sink_caps & Cap::SSRF.bits()) != 0)
.unwrap_or(false)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak,
"origin-locked SSRF must stay silent on {file}; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 15 recall-gap: cross-language ORM and raw-SQL coverage.
///
/// Mirrors `orm_builders` (JS/TS) for Python, Java, Ruby, Go, PHP.
/// Each language carries:
///
/// * positive raw-string concat — tainted user input concatenated
/// into the SQL string flowing into the language's canonical
/// SQL_QUERY sink.
/// * positive interpolation — same shape but using language-native
/// interpolation (Python f-string inside `text(...)`, Java
/// `String.format`, Ruby `"#{...}"`, Go `fmt.Sprintf`, PHP
/// `"$var"`).
/// * negative parameterised — the parameterised API form with
/// literal SQL template + constant bind args, mirroring phase
/// 07's safe-parameterised approach.
#[test]
fn orm_xlang() {
let findings = scan_fixture("sqli_xlang");
let positives = [
// (file, sink_line)
("sqli_py_psycopg2_concat.py", 16usize),
("sqli_py_sqlalchemy_text_fstring.py", 18usize),
("SqliJavaConcat.java", 18usize),
("SqliJavaHibernateNative.java", 14usize),
("SqliJavaHibernateNamedSession.java", 19usize),
("SqliJavaHibernateChainedSession.java", 23usize),
("sqli_rb_concat.rb", 8usize),
("sqli_rb_where_interp.rb", 9usize),
("sqli_go_concat.go", 14usize),
("sqli_go_gorm_raw.go", 20usize),
("sqli_go_gorm_raw_named.go", 28usize),
("sqli_py_django_qs_bound.py", 14usize),
("sqli_py_django_qs_bare.py", 16usize),
("sqli_php_pdo_concat.php", 9usize),
("sqli_php_doctrine_interp.php", 10usize),
];
for (file, sink_line) in positives {
assert_finding_with_cap(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: file,
sink_line,
source_line: None,
},
Cap::SQL_QUERY.bits(),
);
}
let negatives = [
"sqli_py_param_safe.py",
// Phase 15 deferred-fix: tainted bind args at arg 1 of
// `cursor.execute("SELECT ... WHERE x = %s", (tainted,))` must
// stay silent on SQL_QUERY because `payload_args = &[0]` on the
// Destination gate restricts the sink scan to arg 0.
"sqli_py_param_tainted_binds.py",
"SqliJavaParamSafe.java",
// Phase 15 deferred-fix (Java): tainted `setParameter` bind
// value on a constant `entityManager.createQuery(...)` template
// must stay silent on SQL_QUERY. Mirrors the Python tainted-
// binds shape; the Java Destination gate on the createQuery
// family carries `payload_args = &[0]`.
"SqliJavaParamTaintedBinds.java",
"sqli_rb_param_safe.rb",
"sqli_go_param_safe.go",
// Phase 15 deferred-fix (Go): tainted bind value at arg 2 of
// `db.QueryContext(ctx, sql, tainted)` must stay silent. The
// Destination gate on `db.QueryContext` carries
// `payload_args = &[1]`, restricting the sink scan to the SQL
// string at arg 1.
"sqli_go_param_tainted_binds.go",
"sqli_php_param_safe.php",
];
for file in negatives {
let leak = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.evidence
.as_ref()
.map(|e| (e.sink_caps & Cap::SQL_QUERY.bits()) != 0)
.unwrap_or(false)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak,
"parameterised SQLi negative {file} must stay silent on SQL_QUERY; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 09 recall-gap: cross-package IPA via FuncKey namespace
/// resolution. `unsafeHandler` calls `escapeHtmlNoop` (a passthrough
/// imported from `@scope/util/sanitize`); the engine sees the imported
/// callee's SSA summary via step 0.7 of `resolve_callee_full` and
/// therefore propagates `req.query.x` taint into `res.send` on line 7.
/// `safeHandler` calls `stripTags` (a real `replace`-based sanitizer
/// imported from `@scope/util/strip`) and must stay silent.
#[test]
fn cross_package_ipa() {
let findings = scan_fixture("cross_package_ipa");
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: "handler.ts",
sink_line: 7,
source_line: Some(5),
},
);
let safe_hit = findings.iter().any(|f| {
f.id.starts_with("taint-unsanitised-flow") && f.path.ends_with("handler.ts") && f.line == 13
});
assert!(
!safe_hit,
"cross-package sanitizer fixture must stay silent at handler.ts:13; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with("handler.ts"))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Phase 10 recall-gap: Next.js entry-point detection. Coverage:
/// - App Router POST handler at `app/api/users/route.ts`: the first
/// formal is typed as `TypeKind::Request`, so `await req.json()`
/// surfaces as a SQL_QUERY sink at the `db.query(body)` call.
/// - File-level `'use server'` directive
/// (`nextjs_server_action.ts`, `nextjs_use_server_directive.ts`):
/// every exported function's formals are seeded as Source taint
/// at SSA entry.
/// - Function-level `'use server'`
/// (`nextjs_use_server_function_level.ts`): only the directive-
/// bearing function is treated as a server action.
/// - `<form action={fn}>` JSX binding (`nextjs_form_action.tsx`):
/// the named callee is tagged `EntryKind::FormAction` and its
/// first formal is seeded as adversary input.
/// - `next/headers` `cookies()` import-gated source: the gated rule
/// fires only when `cookies` is bound from `next/headers`.
#[test]
fn nextjs_entrypoints() {
let findings = scan_fixture("nextjs_entrypoints");
// Each fixture asserts the SQL sink fires.
let positives = [
("route.ts", 11usize),
("nextjs_server_action.ts", 11usize),
("nextjs_use_server_directive.ts", 9usize),
("nextjs_use_server_function_level.ts", 8usize),
("nextjs_form_action.tsx", 10usize),
("nextjs_cookies_source.ts", 12usize),
];
for (file, sink_line) in positives {
assert_finding(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: file,
sink_line,
source_line: None,
},
);
}
}
/// Phase 13 recall-gap (cross-language path traversal). Five
/// languages, one positive + one sanitized fixture each, exercising the
/// new `Path.read_text` (Python), `Files.readAllBytes` (Java),
/// `tokio::fs::read` (Rust), `os.ReadFile` (Go), and `File.write`
/// (Ruby) FILE_IO sinks added in Phase 13. Sanitized fixtures
/// canonicalise the path through the language-native sanitiser
/// (`Path.resolve` / `Path.normalize` / `PathBuf::canonicalize` /
/// `filepath.Clean` / `Pathname#cleanpath`) and demonstrate the safe
/// pattern by structuring the call chain so no FILE_IO sink reaches the
/// canonical value, keeping the fixture silent.
#[test]
fn path_traversal_xlang() {
let positives = [
// (file, sink_line, source_line)
("path_traversal.py", 12usize, Some(11usize)),
("PathTraversal.java", 16, Some(15)),
("path_traversal.rs", 22, Some(21)),
("path_traversal.go", 14, Some(13)),
("path_traversal.rb", 7, Some(6)),
];
for (file, sink_line, source_line) in positives {
let findings = scan_fixture(&format!("path_traversal/{file}"));
assert_finding_with_cap(
&findings,
ExpectedFinding {
rule_id: "taint-unsanitised-flow",
file_suffix: file,
sink_line,
source_line,
},
Cap::FILE_IO.bits(),
);
}
let negatives = [
"path_traversal_safe.py",
"PathTraversalSafe.java",
"path_traversal_safe.rs",
"path_traversal_safe.go",
"path_traversal_safe.rb",
];
for file in negatives {
let findings = scan_fixture(&format!("path_traversal/{file}"));
let leak = findings.iter().any(|f| {
f.path.ends_with(file)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
!leak,
"path_traversal sanitized fixture {file} must stay silent; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 16 recall-gap: cross-language framework entry-point detection.
///
/// One fixture per framework, each takes a request input (function-formal
/// or path-captured kwarg) and pipes it to a language-native sink. Every
/// fixture must fire the expected sink with the request parameter as
/// Source via the entry-kind seeding policy in `taint/ssa_transfer/mod.rs`.
///
/// The Spring fixture composes with phase 15 (Hibernate
/// `entityManager.createNativeQuery`), proving cross-phase composition
/// holds across languages.
#[test]
fn entry_points_xlang() {
let findings = scan_fixture("entry_points_xlang");
let positives = [
"django_view.py",
"fastapi_route.py",
"flask_route.py",
"spring_controller.java",
"rails_action.rb",
"axum_handler.rs",
"actix_handler.rs",
"gin_handler.go",
"express_route.js",
];
for file in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& (f.id.starts_with("taint-unsanitised-flow")
|| f.id.starts_with("cfg-unguarded-sink"))
});
assert!(
hit,
"Phase 16 entry-point fixture {file} must fire a taint sink; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Rust entry-kind seeding precision: typed extractor formals
/// (`Query<T>`, `Json<T>`, `Form<T>`, `Path<T>`, `web::*<T>`) get
/// painted as `Source(UserInput)`, while denylist DI handles
/// (`State<T>`, `Extension<T>`, ...) do not. Without this guard, the
/// scoped-lowering lift for Rust handlers would FP-fire every
/// database / shared-state sink consuming a pool handle. The
/// positive shape asserts the rule_id is specifically
/// `taint-unsanitised-flow` (not `cfg-unguarded-sink`), so a future
/// regression that drops entry-kind seeding is forcing-function
/// caught.
#[test]
fn rust_entry_kind_typed_extractor_seeding() {
let findings = scan_fixture("entry_points_xlang_rust");
let positives = [
("axum_query_typed_extractor.rs", 12usize),
("actix_path_typed_extractor.rs", 11usize),
];
for (file, sink_line) in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.id.starts_with("taint-unsanitised-flow")
&& f.line == sink_line
});
assert!(
hit,
"Rust typed-extractor handler {file}:{sink_line} must fire \
`taint-unsanitised-flow`; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Negative: State<Arc<DbPool>> formals must not produce
// taint-unsanitised-flow findings. cfg-unguarded-sink is fine
// — that is the pre-existing structural backup, not a seeding
// claim against the formal.
let state_taint_findings: Vec<&_> = findings
.iter()
.filter(|f| {
f.path.ends_with("axum_state_denylist.rs") && f.id.starts_with("taint-unsanitised-flow")
})
.collect();
assert!(
state_taint_findings.is_empty(),
"State<DbPool> formals must not be painted as Source; got:\n{}",
state_taint_findings
.iter()
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Python entry-kind seeding precision for `FlaskRoute`: path-bound
/// formals (`@app.route("/u/<name>")` + `def view(name):`) get painted
/// as `Source(UserInput)`, while routes without path captures stay
/// un-seeded. Without per-formal route-capture gating, Python handlers
/// fell back to `cfg-unguarded-sink` for path-bound flows. The
/// positive shape asserts the rule_id is specifically
/// `taint-unsanitised-flow` (not `cfg-unguarded-sink`), so a future
/// regression that drops entry-kind seeding is forcing-function
/// caught. The negative shape pins the absence of taint findings on a
/// no-capture route (no formals, no seed, no flow).
#[test]
fn python_flask_route_path_capture_seeding() {
let findings = scan_fixture("entry_points_xlang_python");
let positives = [
("flask_path_capture.py", 14usize),
("flask_converter_capture.py", 14usize),
];
for (file, sink_line) in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.id.starts_with("taint-unsanitised-flow")
&& f.line == sink_line
});
assert!(
hit,
"Python Flask path-capture handler {file}:{sink_line} must fire \
`taint-unsanitised-flow`; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
// Negative: a Flask route with no path captures and a literal
// sink argument must not surface `taint-unsanitised-flow`.
let no_capture_taint: Vec<&_> = findings
.iter()
.filter(|f| {
f.path.ends_with("flask_no_capture.py") && f.id.starts_with("taint-unsanitised-flow")
})
.collect();
assert!(
no_capture_taint.is_empty(),
"Flask route without path captures must not paint formals as Source; got:\n{}",
no_capture_taint
.iter()
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Python FastAPI entry-kind seeding precision for `FastApiRoute`:
/// path-bound formals from `{name}` brace-segment captures
/// (`@app.get("/items/{item_id}")` + `def read_item(item_id: str):`)
/// AND Annotated typed extractors (`q: Annotated[str, Query()]`) get
/// painted as `Source(UserInput)`. Formals that carry a `Depends(...)`
/// default or a non-extractor type annotation (`db: Session`,
/// `request: Request`) stay un-seeded. Without per-formal gating,
/// FastAPI handlers fell back to `cfg-unguarded-sink` for path-bound
/// flows. The positive shapes assert the rule_id is specifically
/// `taint-unsanitised-flow`, so a future regression that drops
/// entry-kind seeding is forcing-function caught. The negative shape
/// pins the absence of `taint-unsanitised-flow` on a DI-only handler.
#[test]
fn python_fastapi_route_per_formal_seeding() {
let findings = scan_fixture("entry_points_xlang_python_fastapi");
let positives = [
("fastapi_path_capture.py", 18usize),
("fastapi_annotated_query.py", 17usize),
];
for (file, sink_line) in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.id.starts_with("taint-unsanitised-flow")
&& f.line == sink_line
});
assert!(
hit,
"Python FastAPI handler {file}:{sink_line} must fire \
`taint-unsanitised-flow`; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
let depends_taint: Vec<&_> = findings
.iter()
.filter(|f| {
f.path.ends_with("fastapi_depends_denylist.py")
&& f.id.starts_with("taint-unsanitised-flow")
})
.collect();
assert!(
depends_taint.is_empty(),
"FastAPI Depends(...) DI handle must not be painted as Source; got:\n{}",
depends_taint
.iter()
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Ruby Sinatra entry-kind seeding precision for `SinatraRoute`:
/// path-bound block formals (`get "/u/:name" do |name| ... end`)
/// get painted as `Source(UserInput)`, while routes without path
/// captures stay un-seeded. Without per-formal route-capture
/// gating, Sinatra handlers fell back to `cfg-unguarded-sink` for
/// path-bound flows. The positive shape asserts the rule_id is
/// specifically `taint-unsanitised-flow`, so a future regression
/// that drops entry-kind seeding is forcing-function caught. The
/// negative shape pins the absence of taint findings on a
/// no-capture route (no block formals, no seed, no flow).
#[test]
fn ruby_sinatra_route_path_capture_seeding() {
let findings = scan_fixture("entry_points_xlang_ruby");
let positives = [
("sinatra_path_capture.rb", 9usize),
("sinatra_multi_capture.rb", 8usize),
];
for (file, sink_line) in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.id.starts_with("taint-unsanitised-flow")
&& f.line == sink_line
});
assert!(
hit,
"Ruby Sinatra path-capture handler {file}:{sink_line} must fire \
`taint-unsanitised-flow`; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
let no_capture_taint: Vec<&_> = findings
.iter()
.filter(|f| {
f.path.ends_with("sinatra_no_capture.rb") && f.id.starts_with("taint-unsanitised-flow")
})
.collect();
assert!(
no_capture_taint.is_empty(),
"Sinatra route without path captures must not paint formals as Source; got:\n{}",
no_capture_taint
.iter()
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
/// Go entry-kind precision: `GinRoute` (`*gin.Context`,
/// `echo.Context`, `*fiber.Ctx`, `iris.Context`) and `GoNetHttp`
/// (`(w http.ResponseWriter, r *http.Request)`) handlers route
/// adversary bytes through access-path label rules
/// (`c.Query`, `c.Param`, `c.PostForm`, `r.URL.Query`,
/// `r.FormValue`, `r.Header.Get`, ...) rather than via flat
/// formal seeding. Same precedent as the Express
/// `seed_at_all=false` arm: painting the bare `c` / `r` object
/// as `Source(Cap::all())` re-fires excluded lifecycle methods
/// (`c.AbortWithStatus`, `r.Context()`, etc.) as structural
/// sinks. The positive shapes assert the rule_id is specifically
/// `taint-unsanitised-flow` (not the OR-cfg-unguarded-sink path
/// the cross-language `entry_points_xlang` test accepts), so a
/// future regression that mis-classifies access paths is
/// forcing-function caught.
#[test]
fn go_entry_kind_label_rules_carry_request() {
let findings = scan_fixture("entry_points_xlang");
let positives = [
("gin_handler.go", 24usize),
("net_http_handler.go", 21usize),
];
for (file, sink_line) in positives {
let hit = findings.iter().any(|f| {
f.path.ends_with(file)
&& f.id.starts_with("taint-unsanitised-flow")
&& f.line == sink_line
});
assert!(
hit,
"Go handler {file}:{sink_line} must fire \
`taint-unsanitised-flow` via access-path label rules; got:\n{}",
findings
.iter()
.filter(|f| f.path.ends_with(file))
.map(|f| format!(" {} :: {}:{}", f.id, f.path, f.line))
.collect::<Vec<_>>()
.join("\n"),
);
}
}
/// Phase 11 + 17 acceptance: every per-target baseline JSON in
/// `tests/recall_targets/` (Phase 11 JS targets) and
/// `tests/recall_targets/xlang/<lang>/` (Phase 17 cross-lang targets)
/// exists, parses via `serde_json`, and every finding entry carries
/// a `verdict: "TP" | "FP" | "needs_review"` label. Marked `#[ignore]`
/// because `cargo test --release` should not require a populated
/// baseline directory on a clean clone — the `validate_recall.sh`
/// runbook is the authoritative way to refresh these. Run explicitly
/// with `cargo test --release --test recall_gaps --
/// --ignored validate_real_world_targets`.
#[test]
#[ignore]
fn validate_real_world_targets() {
let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/recall_targets");
// Phase 11 JS targets — ship at the top level.
let js_targets = [
"cal_com",
"vercel_commerce",
"shadcn_examples",
"blitz_apps",
];
let mut paths: Vec<std::path::PathBuf> = js_targets
.iter()
.map(|t| root.join(format!("{t}.json")))
.collect();
// Phase 17 cross-lang targets — under `xlang/<lang>/<target>.json`.
// Derived from filesystem inspection so adding a new lang/target only
// requires dropping the JSON file under `tests/recall_targets/xlang/`.
let xlang_root = root.join("xlang");
if let Ok(entries) = std::fs::read_dir(&xlang_root) {
let mut lang_dirs: Vec<std::path::PathBuf> = entries
.filter_map(|e| e.ok().map(|e| e.path()))
.filter(|p| p.is_dir())
.collect();
lang_dirs.sort();
for lang_dir in lang_dirs {
let mut json_paths: Vec<std::path::PathBuf> = std::fs::read_dir(&lang_dir)
.unwrap_or_else(|e| panic!("read xlang dir {}: {e}", lang_dir.display()))
.filter_map(|e| e.ok().map(|e| e.path()))
.filter(|p| p.extension().and_then(|s| s.to_str()) == Some("json"))
.collect();
json_paths.sort();
paths.extend(json_paths);
}
}
for path in &paths {
let raw = std::fs::read_to_string(path)
.unwrap_or_else(|e| panic!("read baseline {}: {e}", path.display()));
let value: serde_json::Value = serde_json::from_str(&raw)
.unwrap_or_else(|e| panic!("parse baseline {}: {e}", path.display()));
let obj = value
.as_object()
.unwrap_or_else(|| panic!("baseline {} must be a JSON object", path.display()));
for key in [
"target",
"clone_url",
"captured_against",
"captured_on",
"pinned_commit",
] {
assert!(
obj.contains_key(key),
"baseline {} must record `{key}`",
path.display()
);
}
let findings = obj
.get("findings")
.and_then(|v| v.as_array())
.unwrap_or_else(|| panic!("baseline {} must record `findings: []`", path.display()));
for (i, f) in findings.iter().enumerate() {
let verdict = f
.get("verdict")
.and_then(|v| v.as_str())
.unwrap_or_else(|| {
panic!("baseline {} finding {i} missing `verdict`", path.display())
});
assert!(
matches!(verdict, "TP" | "FP" | "needs_review"),
"baseline {} finding {i} has invalid verdict {verdict:?} (must be TP|FP|needs_review)",
path.display()
);
}
}
}
#[test]
fn baseline_loads() {
let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/recall_gaps_baseline.json");
let raw = std::fs::read_to_string(&path)
.unwrap_or_else(|e| panic!("read baseline {}: {e}", path.display()));
let value: serde_json::Value = serde_json::from_str(&raw)
.unwrap_or_else(|e| panic!("parse baseline {}: {e}", path.display()));
assert!(value.is_object(), "baseline must be a JSON object");
assert!(
value.get("recall_gaps_tests").is_some(),
"baseline must record `recall_gaps_tests`"
);
assert!(
value.get("corpus_finding_lines").is_some(),
"baseline must record `corpus_finding_lines`"
);
let corpus = value.get("corpus_finding_lines").unwrap();
let rule_full = corpus.get("rule_id_full").unwrap_or_else(|| {
panic!(
"baseline must record `corpus_finding_lines.rule_id_full` (per-rule snapshot, not just top-15) so phases 03-11 can prove rule-level non-regression"
)
});
let map = rule_full
.as_object()
.expect("`rule_id_full` must be a JSON object mapping rule_id → count");
let distinct = corpus
.get("rule_id_distinct")
.and_then(|v| v.as_u64())
.unwrap_or(0) as usize;
assert_eq!(
map.len(),
distinct,
"rule_id_full ({}) must cover every distinct rule_id ({})",
map.len(),
distinct
);
}