Critical bug fixes and recall improvements (#68)

This commit is contained in:
Eli Peter 2026-05-11 12:42:39 -04:00 committed by GitHub
parent 7d0e7320e2
commit 55247b7fcd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
352 changed files with 60069 additions and 900 deletions

View file

@ -1368,11 +1368,15 @@ fn truncate_prefix_lock(s: &str) -> String {
}
}
/// Longest common prefix, char-aligned so multi-byte UTF-8 sequences are
/// kept whole. The earlier byte-iteration form re-encoded continuation
/// bytes as Latin-1 chars and produced mojibake; the same fix lives at
/// `crate::abstract_interp::string_domain::longest_common_prefix`.
fn longest_common_prefix(a: &str, b: &str) -> String {
a.bytes()
.zip(b.bytes())
a.chars()
.zip(b.chars())
.take_while(|(x, y)| x == y)
.map(|(x, _)| x as char)
.map(|(x, _)| x)
.collect()
}
@ -1380,6 +1384,24 @@ fn longest_common_prefix(a: &str, b: &str) -> String {
mod tests {
use super::*;
// ── LCP helper ──────────────────────────────────────────────────────
#[test]
fn lcp_basic() {
assert_eq!(longest_common_prefix("abcdef", "abcxyz"), "abc");
assert_eq!(longest_common_prefix("abc", "abc"), "abc");
assert_eq!(longest_common_prefix("", "abc"), "");
}
#[test]
fn lcp_keeps_utf8_codepoints_whole() {
// Without char-alignment, byte iteration would emit the
// continuation byte 0xA9 as a separate char and corrupt the
// prefix. Both the 2-byte and 3-byte UTF-8 cases must survive.
assert_eq!(longest_common_prefix("héllo", "héllo!"), "héllo");
assert_eq!(longest_common_prefix("名前.json", "名前.txt"), "名前.");
}
// ── Tri lattice laws ────────────────────────────────────────────────
#[test]

View file

@ -350,6 +350,25 @@ impl StringFact {
is_bottom: false,
}
}
/// SSRF helper: build a fact for `new URL(path, base)` where `base` is a
/// literal origin (`https://api.example.com`). The result behaves as
/// `base ++ path`, the locked-host prefix survives even when the path
/// component carries arbitrary taint, and the fact's `prefix` is what
/// `is_string_safe_for_ssrf` consults to suppress the SSRF sink.
///
/// `path` carries any string knowledge for the path component (typically
/// `StringFact::top()`). When the base already ends in `/`, the helper
/// keeps it as-is; otherwise appends a `/` so the prefix unambiguously
/// includes the path separator (the SSRF check looks for
/// `scheme://host/`).
pub fn from_url_with_base(base: &str, path: &Self) -> Self {
let mut anchor = base.to_string();
if !anchor.ends_with('/') {
anchor.push('/');
}
StringFact::exact(&anchor).concat(path)
}
}
impl Lattice for StringFact {
@ -943,6 +962,40 @@ mod tests {
assert!(suffix.ends_with('好'));
}
/// Phase 08: a URL prefix-lock obtained from `new URL(path, base)`
/// must survive concatenation with a tainted (Top-suffix) path
/// component. The `is_string_safe_for_ssrf` check only consults the
/// `prefix`, so the locked-host base must remain intact even when the
/// path-side fact carries no knowledge.
#[test]
fn from_url_with_base_locks_prefix_under_tainted_suffix() {
let base = "https://api.cal.com";
let tainted_path = StringFact::top();
let f = StringFact::from_url_with_base(base, &tainted_path);
assert_eq!(
f.prefix.as_deref(),
Some("https://api.cal.com/"),
"prefix lock must include the path separator"
);
// The path component contributes no suffix knowledge, the result
// must mirror that without losing the prefix lock.
assert!(
f.suffix.is_none(),
"suffix is unknown when path-side fact is Top"
);
}
/// A concrete path component contributes its suffix knowledge to the
/// concatenated URL fact while the base prefix stays locked.
#[test]
fn from_url_with_base_keeps_prefix_with_concrete_path_suffix() {
let base = "https://api.cal.com/";
let path = StringFact::from_suffix(".json");
let f = StringFact::from_url_with_base(base, &path);
assert_eq!(f.prefix.as_deref(), Some("https://api.cal.com/"));
assert_eq!(f.suffix.as_deref(), Some(".json"));
}
/// Concat with empty-string `exact("")` should preserve the other
/// side's prefix/suffix knowledge (empty is the identity).
#[test]

2713
src/ast.rs

File diff suppressed because it is too large Load diff

View file

@ -90,6 +90,13 @@ fn check_ownership_gaps(
if op.sink_class.is_some_and(|c| !c.is_auth_relevant()) {
continue;
}
// NextAuth callbacks are themselves the authentication
// boundary, both reads and mutations inside them operate on
// identity context, so suppress regardless of op kind.
// Other auth helpers stay read-only-suppressed.
if is_nextauth_callback_unit(unit) {
continue;
}
if op.kind == OperationKind::Read && unit_is_auth_helper(unit) {
continue;
}
@ -105,6 +112,40 @@ fn check_ownership_gaps(
if is_delegated_read_with_actor_context(unit, op, &relevant_subjects) {
continue;
}
// Owner-equality scoping: when the same call composes a
// foreign-id subject with an actor-context subject (e.g.
// `db.findFirst({where: {id: input.id, userId: ctx.user.id}})`
// in a TRPC handler), the actor pin tenant-scopes the
// query to the authenticated user. The relevant_subjects
// filter has already excluded actor-context entries; if
// the unfiltered op.subjects still carries an
// actor-context subject, the missing co-binding is the
// owner-eq witness.
//
// `is_actor_context_subject` is constrained: it only
// accepts subjects whose base is in
// `is_self_scoped_session_base` (`req.user`,
// `ctx.session.user`, etc.) OR in the per-unit
// `self_scoped_session_bases` set populated by the
// typed-extractor pre-pass (TRPC alias matches,
// NextAuth callback formals). Generic `user.id` /
// `me.id` does not qualify, so unrelated co-occurrences
// do not over-suppress.
//
// Trade-off: a privesc-via-`data` shape like
// `db.update({where: {id: input.id}, data: {ownerId: ctx.user.id}})`
// would also be suppressed because both subjects appear
// at the call site without arg-position info. That
// pattern is rare and would need its own rule. The
// owner-eq common case removes ~70 cal.com FPs and
// matches the canonical Express / TRPC scoping idiom.
let has_actor_co_subject = op
.subjects
.iter()
.any(|s| is_actor_context_subject(s, unit));
if has_actor_co_subject {
continue;
}
if !has_prior_subject_auth(unit, op, &relevant_subjects) {
findings.push(AuthFinding {
rule_id: rules.rule_id("missing_ownership_check"),
@ -879,7 +920,7 @@ fn unit_is_auth_helper(unit: &AnalysisUnit) -> bool {
.filter(|c| c.is_ascii_alphanumeric())
.map(|c| c.to_ascii_lowercase())
.collect();
(normalized.starts_with("has")
if (normalized.starts_with("has")
|| normalized.starts_with("check")
|| normalized.starts_with("require")
|| normalized.starts_with("verify")
@ -891,6 +932,62 @@ fn unit_is_auth_helper(unit: &AnalysisUnit) -> bool {
|| normalized.contains("access")
|| normalized.contains("permission")
|| normalized.contains("authoriz"))
{
return true;
}
is_nextauth_callback_unit(unit)
}
/// True when this unit IS, or LEXICALLY CONTAINS, a NextAuth
/// (next-auth) callback definition.
///
/// Two shapes are recognised:
/// * A unit whose name is `signIn` / `session` / `jwt` / `redirect` /
/// `authorize` / `authorized` AND whose destructured params include
/// a canonical NextAuth formal (`user` / `token` / `account` /
/// `profile` / `credentials` / `session` / `trigger`). Matches the
/// flat `export const authOptions = { callbacks: { ... } }` shape
/// where the top-level unit-creation pass walks into the object
/// literal and produces one unit per method.
/// * A unit whose body contains an object literal with a
/// `callbacks: { ... }` property naming at least one NextAuth
/// callback (set by `body_returns_nextauth_options` at extract
/// time). Matches the `export const getOptions = (...) =>
/// ({ callbacks: { ... } })` shape where the inner callback
/// methods do not become their own units — operations from their
/// bodies get accumulated under the outer arrow's unit, so the
/// outer unit's name (`getOptions`) is the only handle the
/// suppressor can latch onto.
///
/// NextAuth callbacks ARE the authentication boundary; operations on
/// `user.id` / `existingUser.id` inside them resolve the authenticated
/// identity, they do not look up a tenant-scoped resource based on
/// untrusted input.
fn is_nextauth_callback_unit(unit: &AnalysisUnit) -> bool {
if unit.is_nextauth_options_factory {
return true;
}
let Some(name) = unit.name.as_deref() else {
return false;
};
if !matches!(
name,
"signIn" | "session" | "jwt" | "redirect" | "authorize" | "authorized"
) {
return false;
}
const SIGNAL_PARAMS: &[&str] = &[
"user",
"token",
"account",
"profile",
"credentials",
"session",
"trigger",
];
unit.params
.iter()
.any(|p| SIGNAL_PARAMS.contains(&p.as_str()))
}
fn is_delegated_read_with_actor_context(
@ -1118,6 +1215,7 @@ mod tests {
typed_bounded_vars: HashSet::new(),
typed_bounded_dto_fields: HashMap::new(),
self_scoped_session_bases: HashSet::new(),
is_nextauth_options_factory: false,
}
}

View file

@ -712,6 +712,8 @@ pub fn build_function_unit_with_meta(
.cloned()
.collect();
let is_nextauth_options_factory = body_returns_nextauth_options(node, bytes);
AnalysisUnit {
kind,
name,
@ -734,9 +736,207 @@ pub fn build_function_unit_with_meta(
typed_bounded_vars: preseeded_bounded,
typed_bounded_dto_fields: std::collections::HashMap::new(),
self_scoped_session_bases: state.self_scoped_session_bases,
is_nextauth_options_factory,
}
}
/// True when the function body at `node` is a NextAuth authority
/// surface. Recognises two shapes:
///
/// 1. An object literal with a `callbacks: { ... }` property whose
/// nested entries name at least one canonical NextAuth callback
/// (`signIn`, `session`, `jwt`, `redirect`, `authorize`,
/// `authorized`). Matches the cal.com idiom
/// `export const getOptions = (...) => ({ callbacks: { ... } })`.
///
/// 2. An object literal whose entries name at least one distinctive
/// NextAuth Adapter method (`getUserByAccount`, `linkAccount`,
/// `unlinkAccount`, `createVerificationToken`,
/// `useVerificationToken`, `getSessionAndUser`) AND at least one
/// other canonical Adapter method. Matches the cal.com idiom
/// `function CalComAdapter(prisma): Adapter { return { ... } }`
/// where the returned Adapter object holds the implementation.
///
/// In both shapes the inner method bodies are NOT enumerated as
/// separate units (object method shorthands stay anonymous), so every
/// identity-resolution operation from the inner methods accumulates
/// onto the outer factory's unit. Without this flag the outer unit's
/// name is `getOptions` / `CalComAdapter`, so `is_nextauth_callback_unit`
/// cannot match by name and the missing-ownership rule fires on every
/// identity lookup inside the surface.
///
/// JS/TS-only by construction (matches `object` / `pair` /
/// `method_definition` / `shorthand_property_identifier` node kinds).
/// Returns false on other languages.
fn body_returns_nextauth_options(node: Node<'_>, bytes: &[u8]) -> bool {
fn scan(node: Node<'_>, bytes: &[u8]) -> bool {
if matches!(node.kind(), "object" | "object_expression")
&& (object_has_nextauth_callbacks_property(node, bytes)
|| object_is_nextauth_adapter(node, bytes))
{
return true;
}
for child in named_children(node) {
if scan(child, bytes) {
return true;
}
}
false
}
scan(node, bytes)
}
fn object_has_nextauth_callbacks_property(node: Node<'_>, bytes: &[u8]) -> bool {
for entry in named_children(node) {
let Some((key_text, value_node)) = object_entry_key_value(entry, bytes) else {
continue;
};
if key_text != "callbacks" {
continue;
}
if matches!(value_node.kind(), "object" | "object_expression")
&& object_contains_nextauth_callback_method(value_node, bytes)
{
return true;
}
}
false
}
fn object_contains_nextauth_callback_method(node: Node<'_>, bytes: &[u8]) -> bool {
for entry in named_children(node) {
if entry.kind() == "method_definition" {
if let Some(name_node) = entry.child_by_field_name("name") {
let name = text(name_node, bytes);
if is_nextauth_callback_name(&name) {
return true;
}
}
continue;
}
if let Some((key_text, _value_node)) = object_entry_key_value(entry, bytes)
&& is_nextauth_callback_name(&key_text)
{
return true;
}
}
false
}
fn object_entry_key_value<'a>(entry: Node<'a>, bytes: &[u8]) -> Option<(String, Node<'a>)> {
match entry.kind() {
"pair" => {
let key = entry.child_by_field_name("key")?;
let value = entry.child_by_field_name("value")?;
Some((object_key_text(key, bytes), value))
}
"method_definition" => {
let name = entry.child_by_field_name("name")?;
Some((text(name, bytes), entry))
}
_ => None,
}
}
fn object_key_text(node: Node<'_>, bytes: &[u8]) -> String {
match node.kind() {
"property_identifier" | "identifier" | "shorthand_property_identifier" => text(node, bytes),
"string" | "string_literal" => {
let raw = text(node, bytes);
raw.trim_matches(|c| c == '"' || c == '\'' || c == '`')
.to_string()
}
"computed_property_name" => {
if let Some(inner) = node.named_child(0) {
object_key_text(inner, bytes)
} else {
String::new()
}
}
_ => text(node, bytes),
}
}
fn is_nextauth_callback_name(name: &str) -> bool {
matches!(
name,
"signIn" | "session" | "jwt" | "redirect" | "authorize" | "authorized"
)
}
/// True when the object literal at `node` looks like a NextAuth
/// Adapter implementation: at least one distinctive Adapter method
/// name AND at least two canonical Adapter method names overall.
/// The distinctive subset (`getUserByAccount`, `linkAccount`,
/// `unlinkAccount`, `createVerificationToken`, `useVerificationToken`,
/// `getSessionAndUser`) names operations that are unique to the
/// NextAuth Adapter contract; the broader canonical set (createUser /
/// getUser / getUserByEmail / updateUser / deleteUser / createSession /
/// updateSession / deleteSession) overlaps with generic CRUD repos, so
/// the distinctive-name witness gates the recognition.
fn object_is_nextauth_adapter(node: Node<'_>, bytes: &[u8]) -> bool {
let mut distinctive_seen = false;
let mut total = 0_usize;
for entry in named_children(node) {
let Some(key_text) = adapter_object_entry_key(entry, bytes) else {
continue;
};
if !is_nextauth_adapter_method_name(&key_text) {
continue;
}
total += 1;
if is_nextauth_adapter_distinctive_method_name(&key_text) {
distinctive_seen = true;
}
}
distinctive_seen && total >= 2
}
fn adapter_object_entry_key(entry: Node<'_>, bytes: &[u8]) -> Option<String> {
match entry.kind() {
"method_definition" => entry
.child_by_field_name("name")
.map(|n| object_key_text(n, bytes)),
"pair" => entry
.child_by_field_name("key")
.map(|n| object_key_text(n, bytes)),
"shorthand_property_identifier" => Some(text(entry, bytes)),
_ => None,
}
}
fn is_nextauth_adapter_method_name(name: &str) -> bool {
matches!(
name,
"createUser"
| "getUser"
| "getUserByEmail"
| "getUserByAccount"
| "updateUser"
| "deleteUser"
| "linkAccount"
| "unlinkAccount"
| "createSession"
| "getSessionAndUser"
| "updateSession"
| "deleteSession"
| "createVerificationToken"
| "useVerificationToken"
)
}
fn is_nextauth_adapter_distinctive_method_name(name: &str) -> bool {
matches!(
name,
"getUserByAccount"
| "linkAccount"
| "unlinkAccount"
| "createVerificationToken"
| "useVerificationToken"
| "getSessionAndUser"
)
}
#[derive(Default)]
struct UnitState {
call_sites: Vec<CallSite>,
@ -832,14 +1032,13 @@ fn collect_unit_state(
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
collect_call(node, bytes, rules, state)
}
"if_statement" | "elif_clause" | "while_statement" | "do_statement" | "if" | "unless"
| "if_modifier" | "unless_modifier" | "while_modifier" | "until_modifier"
| "while_expression" => {
"while_statement" | "do_statement" | "while_modifier" | "until_modifier"
| "while_expression" | "unless" | "unless_modifier" => {
if let Some(condition) = node.child_by_field_name("condition") {
collect_condition(condition, bytes, rules, state);
}
}
"if_expression" => {
"if_statement" | "elif_clause" | "if_expression" | "if" | "if_modifier" => {
if let Some(condition) = node.child_by_field_name("condition") {
collect_condition(condition, bytes, rules, state);
}
@ -868,6 +1067,12 @@ fn collect_unit_state(
collect_self_actor_binding(node, bytes, rules, state);
collect_self_actor_id_binding(node, bytes, state);
collect_const_string_binding(node, bytes, state);
// JS/TS row-fetch declarators (`const webhook = await
// repo.findById(id)`) need row-population recognition so
// the post-fetch ownership-equality detector can attribute
// back to the row's let line. `collect_row_population`
// accepts the `name` field used by `variable_declarator`.
collect_row_population(node, bytes, state);
}
// Go `id := "id"` / Python `id = "id"` / Java `String id = "id";` /
// Ruby `id = "id"`, language-specific binding nodes that the
@ -1336,11 +1541,13 @@ fn collect_member_alias_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitSt
/// flagged despite a textual auth check on the resulting row.
fn collect_row_population(node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
// Most languages expose `pattern`/`value` on let / const / var
// declarations. Ruby `assignment` uses `left`/`right` instead, so
// accept either. When both fields are missing, the node isn't an
// RHS-bound binding and we skip.
// declarations. Ruby `assignment` uses `left`/`right` instead.
// JS/TS `variable_declarator` uses `name`/`value`. Accept any of
// them; when none is present the node isn't an RHS-bound binding
// and we skip.
let Some(pattern) = node
.child_by_field_name("pattern")
.or_else(|| node.child_by_field_name("name"))
.or_else(|| node.child_by_field_name("left"))
else {
return;
@ -2784,8 +2991,8 @@ fn detect_ownership_equality_check(if_node: Node<'_>, bytes: &[u8], state: &mut
let Some(operator) = binary_operator_text(condition, bytes) else {
return;
};
let is_ne = matches!(operator.as_str(), "!=" | "ne");
let is_eq = matches!(operator.as_str(), "==" | "eq");
let is_ne = matches!(operator.as_str(), "!=" | "!==" | "ne");
let is_eq = matches!(operator.as_str(), "==" | "===" | "eq");
if !is_ne && !is_eq {
return;
}
@ -2801,7 +3008,7 @@ fn detect_ownership_equality_check(if_node: Node<'_>, bytes: &[u8], state: &mut
return;
};
if !branch_has_early_exit(fail_branch) {
if !branch_has_early_exit(fail_branch, bytes) {
return;
}
@ -2925,18 +3132,63 @@ fn resolve_else_block(alt: Node<'_>) -> Node<'_> {
alt
}
fn branch_has_early_exit(branch: Node<'_>) -> bool {
named_children(branch).into_iter().any(node_is_early_exit)
fn branch_has_early_exit(branch: Node<'_>, bytes: &[u8]) -> bool {
named_children(branch)
.into_iter()
.any(|n| node_is_early_exit(n, bytes))
}
fn node_is_early_exit(node: Node<'_>) -> bool {
fn node_is_early_exit(node: Node<'_>, bytes: &[u8]) -> bool {
match node.kind() {
"return_expression" | "return_statement" => true,
"expression_statement" => named_children(node).into_iter().any(node_is_early_exit),
// Throwing aborts execution flow. Common in JS/TS / Java
// (`throw new ForbiddenException()`), Python (`raise ...`),
// Ruby (`raise ...`).
"throw_statement" | "throw_expression" | "raise_statement" => true,
// A call whose callee name is in the framework denial set
// (`notFound()` / `redirect()` / `abort()` / `forbidden()` /
// `unauthorized()` / etc.) terminates the request. These
// helpers either throw under the hood (Next.js, Flask) or
// exit the process (`process.exit`, `sys.exit`).
"call_expression" | "call" | "method_invocation" => is_denial_call(node, bytes),
"expression_statement" => named_children(node)
.into_iter()
.any(|n| node_is_early_exit(n, bytes)),
_ => false,
}
}
/// Recognise calls that act as request-terminating denial helpers.
///
/// The callee name is matched against a curated set of framework
/// idioms. This is read in `node_is_early_exit` from inside the
/// row-ownership-equality detector, where the ambient context already
/// requires an `owner.field` vs. `self.id` binary comparison; the
/// denial-call match is only the early-exit witness, not the auth
/// signal itself.
fn is_denial_call(call_node: Node<'_>, bytes: &[u8]) -> bool {
let Some(callee_node) = call_node
.child_by_field_name("function")
.or_else(|| call_node.child_by_field_name("name"))
else {
return false;
};
let callee_text = text(callee_node, bytes);
let trimmed = callee_text.trim();
let leaf = trimmed.rsplit('.').next().unwrap_or(trimmed);
let leaf = leaf.rsplit("::").next().unwrap_or(leaf);
matches!(
leaf,
"notFound"
| "redirect"
| "permanentRedirect"
| "unauthorized"
| "forbidden"
| "abort"
| "halt"
)
}
pub(super) fn is_owner_field_subject(subject: &ValueRef) -> bool {
let raw = match subject.source_kind {
ValueSourceKind::ArrayIndex => subject.base.as_deref().unwrap_or(&subject.name),
@ -5419,4 +5671,220 @@ mod tests {
));
}
}
#[test]
fn trpc_options_destructure_param_seeds_self_scoped_session_base() {
// Cal.com-shaped TRPC handler: parameter is a destructured
// options alias whose `ctx` field's nested type literal
// references `TrpcSessionUser`. `FileMeta::scan` adds
// `GetOptions` to `trpc_alias_names` (body-text marker hit);
// `collect_trpc_ctx_param` then fires on the
// `required_parameter` and seeds `ctx.user` into the unit's
// `self_scoped_session_bases`.
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter::Language::from(
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
))
.unwrap();
let src = br#"
type TrpcSessionUser = { id: number };
type GetOptions = {
ctx: { user: NonNullable<TrpcSessionUser> };
input: { id: number };
};
export const handleGet = async ({ ctx, input }: GetOptions) => {
return prisma.booking.findFirst({ where: { id: input.id, userId: ctx.user.id } });
};
"#;
let tree = parser.parse(src.as_slice(), None).unwrap();
let meta = super::FileMeta::scan(tree.root_node(), src);
assert!(
meta.trpc_alias_names.contains("GetOptions"),
"trpc_alias_names missing GetOptions: {:?}",
meta.trpc_alias_names
);
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
let unit = model
.units
.iter()
.find(|u| u.name.as_deref() == Some("handleGet"))
.expect("handleGet unit");
assert!(
unit.self_scoped_session_bases.contains("ctx.user"),
"self_scoped_session_bases missing ctx.user: {:?}",
unit.self_scoped_session_bases
);
}
/// Pin the JS/TS post-fetch ownership-equality recogniser added in
/// session 0011. The `if_statement` arm of `collect_unit_state`
/// must dispatch to `detect_ownership_equality_check` (previously
/// only `if_expression` did), the strict `!==` operator must be
/// recognised as inequality, the framework denial helper
/// `notFound()` must count as an early-exit witness, and the JS/TS
/// `variable_declarator` arm must populate `row_population_data`
/// so the synthetic `Ownership` AuthCheck attributes back to the
/// row's let line.
#[test]
fn detect_post_fetch_ownership_jsts_with_strict_neq_and_denial_call() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter::Language::from(
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
))
.unwrap();
let src = br#"
declare class Repo { findById(id: string): Promise<{ userId: number }>; }
declare function getServerSession(): Promise<{ user?: { id: number } } | null>;
declare function notFound(): never;
export async function handleGet({ id }: { id: string }) {
const session = await getServerSession();
if (!session?.user?.id) return null;
const repo: Repo = new Repo();
const webhook = await repo.findById(id);
if (webhook.userId !== session.user.id) {
notFound();
}
return webhook;
}
"#;
let tree = parser.parse(src.as_slice(), None).unwrap();
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
let unit = model
.units
.iter()
.find(|u| u.name.as_deref() == Some("handleGet"))
.expect("handleGet unit");
let webhook_pop = unit
.row_population_data
.get("webhook")
.expect("collect_row_population must populate `webhook` from variable_declarator");
// The `let webhook = await repo.findById(id)` line should
// anchor at the call site, not the let line. In this fixture
// both are on the same line so the back-dating is invisible
// here, the assertion is that the entry exists.
assert!(webhook_pop.0 > 0);
let owner_check = unit
.auth_checks
.iter()
.find(|c| matches!(c.kind, super::AuthCheckKind::Ownership))
.expect("ownership-equality detector must emit an Ownership AuthCheck");
let owner_subject = owner_check
.subjects
.iter()
.find(|s| s.field.as_deref() == Some("userId"))
.expect("Ownership AuthCheck must carry the owner field subject");
assert_eq!(
owner_subject.base.as_deref(),
Some("webhook"),
"owner subject base must be the row var: {:?}",
owner_subject
);
}
/// Pin the NextAuth Adapter factory recogniser added in session
/// 0030. `body_returns_nextauth_options` must flip on for the
/// cal.com `function CalComAdapter(client): Adapter { return {
/// createUser, getUser, getUserByAccount, ... } }` shape so that
/// `is_nextauth_callback_unit` suppresses the missing-ownership
/// rule across the inner Adapter methods (their operations
/// accumulate onto the outer factory's unit).
#[test]
fn nextauth_adapter_factory_flags_outer_unit() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter::Language::from(
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
))
.unwrap();
let src = br#"
declare const prismaClient: any;
export default function CalComAdapter(client: any) {
return {
createUser: async (data: { email: string }) => {
const user = await prismaClient.user.create({ data });
return user;
},
getUser: async (id: string) => {
const user = await prismaClient.user.findUnique({ where: { id } });
return user;
},
async getUserByAccount(providerAccountId: { provider: string; providerAccountId: string }) {
const account = await prismaClient.account.findUnique({
where: { provider_providerAccountId: providerAccountId },
select: { user: true },
});
return account?.user ?? null;
},
createVerificationToken: async (data: any) => prismaClient.verificationToken.create({ data }),
useVerificationToken: async (identifier: any) => prismaClient.verificationToken.delete({ where: identifier }),
linkAccount: async (account: any) => prismaClient.account.create({ data: account }),
unlinkAccount: async (providerAccountId: any) => prismaClient.account.delete({ where: providerAccountId }),
};
}
"#;
let tree = parser.parse(src.as_slice(), None).unwrap();
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
let unit = model
.units
.iter()
.find(|u| u.name.as_deref() == Some("CalComAdapter"))
.expect("CalComAdapter unit");
assert!(
unit.is_nextauth_options_factory,
"Adapter factory must set is_nextauth_options_factory: \
{:?}",
unit.name
);
}
/// Negative: a generic CRUD repo with `createUser` / `getUser` /
/// `updateUser` / `deleteUser` (no Adapter-distinctive method
/// names) must NOT be flagged as a NextAuth Adapter. Without the
/// distinctive-name gate any plain user repo would suppress
/// missing-ownership findings.
#[test]
fn nextauth_adapter_recogniser_rejects_generic_crud_repo() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter::Language::from(
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
))
.unwrap();
let src = br#"
declare const db: any;
export function makeUserRepo() {
return {
createUser: async (data: any) => db.user.create({ data }),
getUser: async (id: string) => db.user.findUnique({ where: { id } }),
updateUser: async (id: string, data: any) => db.user.update({ where: { id }, data }),
deleteUser: async (id: string) => db.user.delete({ where: { id } }),
};
}
"#;
let tree = parser.parse(src.as_slice(), None).unwrap();
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
let unit = model
.units
.iter()
.find(|u| u.name.as_deref() == Some("makeUserRepo"))
.expect("makeUserRepo unit");
assert!(
!unit.is_nextauth_options_factory,
"generic CRUD repo must NOT be flagged as Adapter: {:?}",
unit.name
);
}
}

View file

@ -1090,6 +1090,7 @@ mod tests {
typed_bounded_vars: HashSet::new(),
typed_bounded_dto_fields: HashMap::new(),
self_scoped_session_bases: HashSet::new(),
is_nextauth_options_factory: false,
}
}
@ -1205,6 +1206,7 @@ mod tests {
typed_bounded_vars: HashSet::new(),
typed_bounded_dto_fields: HashMap::new(),
self_scoped_session_bases: HashSet::new(),
is_nextauth_options_factory: false,
}
}

View file

@ -282,6 +282,23 @@ pub struct AnalysisUnit {
/// destructures route through a base chain, not a top-level
/// binding.
pub self_scoped_session_bases: HashSet<String>,
/// True when this JS/TS unit is the body of a NextAuth options
/// factory: its function body contains an object literal with a
/// `callbacks: { ... }` property whose nested entries name at
/// least one NextAuth canonical callback (`signIn` / `session` /
/// `jwt` / `redirect` / `authorize` / `authorized`). Set by
/// `build_function_unit_with_meta` when the file structures the
/// options as `export const X = (...) => ({ callbacks: { ... } })`
/// (cal.com's `getOptions` shape) rather than the flat
/// `export const authOptions = { callbacks: { ... } }` shape.
/// Operations inside the inner callback bodies still get
/// accumulated under the outer factory unit (the unit-creation
/// pass does not descend into object-literal method shorthands),
/// so the outer unit is the only place the auth analyser can
/// recognise the identity-resolution context. Consulted by
/// `is_nextauth_callback_unit` so the missing-ownership check
/// suppresses operations inside the factory.
pub is_nextauth_options_factory: bool,
}
/// Per-function summary of which positional parameters are

View file

@ -521,10 +521,21 @@ pub(super) fn build_switch<'a>(
) -> Vec<NodeIndex> {
// Locate the case container. Most grammars expose it as field "body"
// (JS/TS, Java, C, C++); Go puts cases as direct children of the switch.
//
// Per-language gotcha: Go's `expression_case` / `default_case` /
// `type_case` / `communication_case` map to `Kind::Block` (so the case
// body is iterated by the Block handler), so a naive "first Block
// child" fallback latches onto the FIRST case as the container, then
// walks the case's interior looking for case-like children, finds none,
// and falls through to the empty-cases early return (CFG dead-end:
// dispatch If has no False edge, every post-switch statement becomes
// unreachable). Skip case-kind nodes when picking the container so
// Go's flat "cases-as-direct-children" shape uses `ast` itself.
let body = ast.child_by_field_name("body").or_else(|| {
let mut c = ast.walk();
ast.children(&mut c)
.find(|n| matches!(lookup(lang, n.kind()), Kind::Block))
ast.children(&mut c).find(|n| {
matches!(lookup(lang, n.kind()), Kind::Block) && !is_switch_case_kind(n.kind())
})
});
let container = body.unwrap_or(ast);

View file

@ -1202,6 +1202,8 @@ fn clone_preserves_all_sub_structs() {
defines: Some("r".into()),
uses: vec!["a".into(), "b".into()],
extra_defines: vec!["c".into()],
array_pattern_indices: smallvec::SmallVec::new(),
rhs_array_elements: smallvec::SmallVec::new(),
},
ast: AstMeta {
span: (10, 100),
@ -1501,6 +1503,105 @@ fn rust_println_macro_named_arg_lifted() {
assert!(found, "no println! macro_invocation node found");
}
/// `format!(URL_FMT, path)` where `URL_FMT` resolves to a top-level
/// `const &str` literal must seed a `string_prefix` on the let-binding
/// node so `is_string_safe_for_ssrf` can lock the host the same way
/// `format!("https://api/{}", path)` does. The bridge fires only when
/// the first non-string token in the macro is an identifier whose
/// matching `const_item` has a string-literal value.
#[test]
fn rust_format_macro_const_first_arg_seeds_string_prefix() {
let src = b"const URL_FMT: &str = \"https://api.example.com/users/{}\";\n\
fn f(path: String) { let u = format!(URL_FMT, path); }";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
let mut prefix: Option<String> = None;
for n in cfg.node_indices() {
let info = &cfg[n];
if info.taint.defines.as_deref() == Some("u")
&& let Some(p) = info.string_prefix.as_deref()
{
prefix = Some(p.to_string());
}
}
assert_eq!(
prefix.as_deref(),
Some("https://api.example.com/users/"),
"expected URL_FMT const to bridge into the format!() string_prefix",
);
}
/// Counter-test: when the named const has no string-literal initializer
/// (e.g. `const X: usize = 4;`), the bridge must not fabricate a
/// prefix from a non-string value.
#[test]
fn rust_format_macro_const_first_arg_non_string_skipped() {
let src = b"const N: usize = 4;\n\
fn f(path: String) { let u = format!(N, path); }";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
for n in cfg.node_indices() {
let info = &cfg[n];
if info.taint.defines.as_deref() == Some("u") {
assert!(
info.string_prefix.is_none(),
"non-string const must not seed a prefix; got {:?}",
info.string_prefix
);
}
}
}
/// `static NAME: &str = "...";` declarations participate alongside
/// `const_item`: both shapes carry a `name` field and a string-literal
/// `value` so the bridge resolves either form identically.
#[test]
fn rust_format_macro_static_first_arg_seeds_string_prefix() {
let src = b"static API_BASE: &str = \"https://api.example.com/users/{}\";\n\
fn f(path: String) { let u = format!(API_BASE, path); }";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
let mut prefix: Option<String> = None;
for n in cfg.node_indices() {
let info = &cfg[n];
if info.taint.defines.as_deref() == Some("u")
&& let Some(p) = info.string_prefix.as_deref()
{
prefix = Some(p.to_string());
}
}
assert_eq!(
prefix.as_deref(),
Some("https://api.example.com/users/"),
"expected static API_BASE to bridge into the format!() string_prefix",
);
}
/// A const declared inside a function body must not bridge: only
/// file-level `const_item` declarations participate to keep the
/// lookup deterministic. (The macro's first arg can shadow a
/// file-level const with an inner-fn const, but inner consts are
/// off-scope for the AST-time prefix bridge.)
#[test]
fn rust_format_macro_inner_const_not_bridged() {
let src = b"fn f(path: String) {\n\
const URL_FMT: &str = \"https://api/{}\";\n\
let u = format!(URL_FMT, path);\n\
}";
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
for n in cfg.node_indices() {
let info = &cfg[n];
if info.taint.defines.as_deref() == Some("u") {
assert!(
info.string_prefix.is_none(),
"inner-fn const must not bridge; got {:?}",
info.string_prefix
);
}
}
}
#[test]
fn go_no_import_bindings() {
let src = b"package main\nimport alias \"fmt\"\n";
@ -2354,6 +2455,29 @@ fn py_subscript_write_lowers_to_index_set_call() {
});
}
#[test]
fn go_selector_expression_call_sets_receiver() {
// Regression for Phase 15 deferred GORM tuple-return case.
// Go's `userDb.Raw(sql)` parses as `call_expression` whose `function`
// field is a `selector_expression` (operand=userDb, field=Raw).
// The CFG-side `Kind::CallFn` arm must extract `userDb` as the
// receiver so type-qualified resolution can rewrite `userDb.Raw` →
// `GormDb.Raw` once `userDb`'s SSA value is tagged via
// `constructor_type(Lang::Go, "gorm.Open")`. Pre-fix the arm only
// recognised JS/TS `member_expression`, Python `attribute`, and Rust
// `field_expression`; Go fell through to receiver=None.
let src = br#"package main
func f(userDb int) {
userDb.Raw("SELECT 1")
}
"#;
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "go", ts_lang);
let node =
find_node_with_callee(&cfg, "userDb.Raw").expect("go: userDb.Raw node should be present");
assert_eq!(node.call.receiver.as_deref(), Some("userDb"));
}
#[test]
fn go_index_expr_read_lowers_to_index_get_call() {
with_pointer_on(|| {
@ -3217,3 +3341,620 @@ fn js_ternary_branch_subscript_source_classified() {
"expected ternary subscript branch defining `x` to carry a Source label"
);
}
/// Regression: Go's `switch` with no `default` arm and an only-case body
/// that returns must keep post-switch statements reachable from entry.
///
/// `expression_case` / `default_case` / `type_case` / `communication_case`
/// all map to `Kind::Block` so the case body is iterated by the Block
/// handler, but `build_switch`'s container fallback ("first Block child")
/// would latch onto the FIRST case as the container. Walking the case's
/// interior for case-like children finds nothing, the empty-cases early
/// return fires, and the dispatch If has no False edge: every post-switch
/// statement becomes unreachable, lighting up `cfg-unreachable-sanitizer`
/// on real code (gin's `binding/form_mapping.go::setTimeField`, line 469
/// `if isUTC, _ := strconv.ParseBool(...); isUTC` after a no-default
/// `switch tf := strings.ToLower(timeFormat); tf` on the unix epoch
/// formats).
#[test]
fn go_switch_no_default_keeps_post_switch_reachable() {
use petgraph::visit::Bfs;
use std::collections::HashSet;
let src = br#"package p
func f(x string) bool {
switch tf := x; tf {
case "unix":
return false
}
after()
return true
}
"#;
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
let (cfg, entry) = parse_and_build(src, "go", ts_lang);
let mut reachable: HashSet<NodeIndex> = HashSet::new();
let mut bfs = Bfs::new(&cfg, entry);
while let Some(n) = bfs.next(&cfg) {
reachable.insert(n);
}
let after = cfg
.node_indices()
.find(|&n| cfg[n].call.callee.as_deref() == Some("after"))
.expect("expected after() Call node");
assert!(
reachable.contains(&after),
"post-switch `after()` must be reachable from entry; got reachable={:?}",
reachable
);
}
/// `qs = User.objects` at module/function level lowers as a Python
/// `expression_statement` wrapping an `assignment`. The CFG-level
/// `member_field` detector must unwrap the wrapper and pick up
/// `Some("objects")` from the inner RHS so the type-fact pass can tag
/// the bound value as `DjangoQuerySet`.
#[test]
fn python_member_field_assignment_detected_for_bare_objects() {
let src = b"def view(req):\n qs = User.objects\n";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "python", ts_lang);
let detected: Vec<Option<String>> = cfg
.node_indices()
.filter_map(|n| {
let info = &cfg[n];
if info.taint.defines.as_deref() == Some("qs") {
Some(info.member_field.clone())
} else {
None
}
})
.collect();
assert!(
detected.iter().any(|m| m.as_deref() == Some("objects")),
"expected at least one `qs = ...` CFG node with member_field=Some(\"objects\"); got {:?}",
detected
);
}
/// Negative shape: `qs = User.something_else` must NOT set
/// `member_field == Some("objects")`. Guards against the unwrap
/// accidentally picking up the wrong field name.
#[test]
fn python_member_field_assignment_non_objects_does_not_match() {
let src = b"def view(req):\n qs = User.profile\n";
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
let (cfg, _entry) = parse_and_build(src, "python", ts_lang);
let detected: Vec<Option<String>> = cfg
.node_indices()
.filter_map(|n| {
let info = &cfg[n];
if info.taint.defines.as_deref() == Some("qs") {
Some(info.member_field.clone())
} else {
None
}
})
.collect();
assert!(
detected.iter().any(|m| m.as_deref() == Some("profile")),
"expected `qs = User.profile` to detect member_field=Some(\"profile\"); got {:?}",
detected
);
assert!(
detected.iter().all(|m| m.as_deref() != Some("objects")),
"must not falsely tag non-`objects` field; got {:?}",
detected
);
}
/// Phase 15 chained-shape closure: a Java local of the form
/// `Session sess = sf.openSession();` registers `(fn_start, "sess")`
/// → `TypeKind::HibernateSession` in the per-file local-receiver-types
/// map, so `find_classifiable_inner_call` can rewrite the chained
/// inner `sess.createNativeQuery(...)` to
/// `HibernateSession.createNativeQuery` when the legacy literal-
/// receiver classify misses.
#[test]
fn java_hibernate_session_open_registers_local_receiver_type() {
let src = br#"
class Foo {
void bar(SessionFactory sf, String sql) {
Session sess = sf.openSession();
sess.createNativeQuery(sql).getResultList();
}
}
"#;
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
let _ = parse_to_file_cfg(src, "java", ts_lang);
// The TLS map is cleared at the end of `build_cfg`, but the
// public lookup helper consults it during construction. Re-run
// population manually for the assertion.
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_java::LANGUAGE))
.unwrap();
let tree = parser.parse(src.as_slice(), None).unwrap();
super::populate_local_receiver_types(&tree, "java", src);
// Walk to find the function body's start_byte.
fn find_method_start(node: tree_sitter::Node<'_>) -> Option<usize> {
if node.kind() == "method_declaration" {
return Some(node.start_byte());
}
let mut c = node.walk();
for child in node.children(&mut c) {
if let Some(s) = find_method_start(child) {
return Some(s);
}
}
None
}
let fn_start = find_method_start(tree.root_node()).expect("method_declaration in fixture");
let got = super::lookup_local_receiver_type(fn_start, "sess");
assert_eq!(
got,
Some(crate::ssa::type_facts::TypeKind::HibernateSession),
"local `Session sess = sf.openSession()` should bind to HibernateSession"
);
// Cleanup so the TLS state doesn't leak into other tests.
super::LOCAL_RECEIVER_TYPES.with(|cell| cell.borrow_mut().clear());
}
/// Same Java per-file map: a local whose RHS is unrelated (no
/// `constructor_type` match) must NOT register. Confirms the
/// recogniser is anchored on `constructor_type`'s callee classifier
/// rather than the declared receiver type, so a generic
/// `Session foo = computeFoo()` doesn't bleed an unrelated method
/// into the type-qualified pool.
#[test]
fn java_unrecognised_rhs_does_not_register_local_receiver_type() {
let src = br#"
class Foo {
void bar() {
Session sess = computeSomethingUnrelated();
sess.doSomething();
}
}
"#;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_java::LANGUAGE))
.unwrap();
let tree = parser.parse(src.as_slice(), None).unwrap();
super::populate_local_receiver_types(&tree, "java", src);
fn find_method_start(node: tree_sitter::Node<'_>) -> Option<usize> {
if node.kind() == "method_declaration" {
return Some(node.start_byte());
}
let mut c = node.walk();
for child in node.children(&mut c) {
if let Some(s) = find_method_start(child) {
return Some(s);
}
}
None
}
let fn_start = find_method_start(tree.root_node()).expect("method_declaration in fixture");
let got = super::lookup_local_receiver_type(fn_start, "sess");
assert_eq!(
got, None,
"unrecognised RHS `computeSomethingUnrelated()` must not register a receiver-type"
);
super::LOCAL_RECEIVER_TYPES.with(|cell| cell.borrow_mut().clear());
}
/// `collect_array_pattern_bindings_indexed` walks JS/TS `array_pattern`
/// children in source order and records `(name, position)` for each
/// simple-identifier binding. Skip slots (commas with no binding
/// between) advance the position counter without emitting a binding,
/// so `const [, b]` produces `[("b", 1)]` and `const [a, ,]` produces
/// `[("a", 0)]`. Complex sub-patterns (`assignment_pattern`,
/// `rest_pattern`, nested `array_pattern`) cause the helper to return
/// an empty vec so the lowering rewrite falls back to scalar union.
#[test]
fn array_pattern_indexed_bindings_recognise_skip_slots() {
use super::helpers::collect_array_pattern_bindings_indexed;
fn first_array_pattern<'t>(n: tree_sitter::Node<'t>) -> Option<tree_sitter::Node<'t>> {
if n.kind() == "array_pattern" {
return Some(n);
}
let mut c = n.walk();
for child in n.children(&mut c) {
if let Some(found) = first_array_pattern(child) {
return Some(found);
}
}
None
}
fn parse_first(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_javascript::LANGUAGE))
.unwrap();
let tree = parser.parse(src, None).unwrap();
(tree, src.to_vec())
}
fn run_case(src: &[u8]) -> Vec<(String, usize)> {
let (tree, bytes) = parse_first(src);
let pat = first_array_pattern(tree.root_node()).expect("array_pattern in fixture");
collect_array_pattern_bindings_indexed(pat, &bytes)
.into_iter()
.collect()
}
assert_eq!(
run_case(b"const [a, b] = x;"),
vec![("a".into(), 0), ("b".into(), 1)],
);
assert_eq!(run_case(b"const [, b] = x;"), vec![("b".into(), 1)]);
assert_eq!(run_case(b"const [a, ,] = x;"), vec![("a".into(), 0)]);
assert_eq!(
run_case(b"const [a, , c] = x;"),
vec![("a".into(), 0), ("c".into(), 2)],
);
// Rest patterns bail to empty so callers fall back to scalar union.
assert!(run_case(b"const [a, ...rest] = x;").is_empty());
// Default value patterns also bail.
assert!(run_case(b"const [a = 1, b] = x;").is_empty());
// Nested array patterns bail.
assert!(run_case(b"const [[a, b], c] = x;").is_empty());
}
/// Rust `tuple_pattern` shares the helper. The `_` wildcard
/// (`_pattern` node) advances the position counter without binding,
/// mirroring JS skip-slot semantics. Other complex sub-patterns
/// (tuple-struct, parenthesized) bail to empty.
#[test]
fn tuple_pattern_indexed_bindings_recognise_rust_wildcards() {
use super::helpers::collect_array_pattern_bindings_indexed;
fn first_tuple_pattern<'t>(n: tree_sitter::Node<'t>) -> Option<tree_sitter::Node<'t>> {
if n.kind() == "tuple_pattern" {
return Some(n);
}
let mut c = n.walk();
for child in n.children(&mut c) {
if let Some(found) = first_tuple_pattern(child) {
return Some(found);
}
}
None
}
fn parse_first_rust(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src, None).unwrap();
(tree, src.to_vec())
}
fn run_case(src: &[u8]) -> Vec<(String, usize)> {
let (tree, bytes) = parse_first_rust(src);
let pat = first_tuple_pattern(tree.root_node()).expect("tuple_pattern in fixture");
collect_array_pattern_bindings_indexed(pat, &bytes)
.into_iter()
.collect()
}
assert_eq!(
run_case(b"fn f() { let (a, b) = (1, 2); }"),
vec![("a".into(), 0), ("b".into(), 1)],
);
assert_eq!(
run_case(b"fn f() { let (_, b) = (1, 2); }"),
vec![("b".into(), 1)],
);
assert_eq!(
run_case(b"fn f() { let (a, _) = (1, 2); }"),
vec![("a".into(), 0)],
);
assert_eq!(
run_case(b"fn f() { let (a, _, c) = (1, 2, 3); }"),
vec![("a".into(), 0), ("c".into(), 2)],
);
}
/// Python `pattern_list` (bare `a, b = ...`) and `tuple_pattern`
/// (parenthesised `(a, b) = ...`) share the helper. Python's `_` is
/// a normal identifier binding (not a wildcard), so every identifier
/// child emits a `(name, position)` entry — `_` lands at its source
/// position alongside any other names. `list_splat_pattern`
/// (`a, *rest`) bails to empty so callers fall back to scalar union.
#[test]
fn pattern_list_indexed_bindings_recognise_python_destructure() {
use super::helpers::collect_array_pattern_bindings_indexed;
fn first_pattern<'t>(
n: tree_sitter::Node<'t>,
kinds: &[&str],
) -> Option<tree_sitter::Node<'t>> {
if kinds.contains(&n.kind()) {
return Some(n);
}
let mut c = n.walk();
for child in n.children(&mut c) {
if let Some(found) = first_pattern(child, kinds) {
return Some(found);
}
}
None
}
fn parse_first_python(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_python::LANGUAGE))
.unwrap();
let tree = parser.parse(src, None).unwrap();
(tree, src.to_vec())
}
fn run_case(src: &[u8], kinds: &[&str]) -> Vec<(String, usize)> {
let (tree, bytes) = parse_first_python(src);
let pat = first_pattern(tree.root_node(), kinds)
.unwrap_or_else(|| panic!("no {kinds:?} in fixture"));
collect_array_pattern_bindings_indexed(pat, &bytes)
.into_iter()
.collect()
}
// Bare comma-list `a, b = ...` is `pattern_list`.
assert_eq!(
run_case(b"a, b = (1, 2)\n", &["pattern_list"]),
vec![("a".into(), 0), ("b".into(), 1)],
);
// Three-binding bare comma list.
assert_eq!(
run_case(b"a, b, c = (1, 2, 3)\n", &["pattern_list"]),
vec![("a".into(), 0), ("b".into(), 1), ("c".into(), 2)],
);
// Underscore is a regular identifier binding in Python.
assert_eq!(
run_case(b"_, b = (1, 2)\n", &["pattern_list"]),
vec![("_".into(), 0), ("b".into(), 1)],
);
assert_eq!(
run_case(b"a, _ = (1, 2)\n", &["pattern_list"]),
vec![("a".into(), 0), ("_".into(), 1)],
);
// Parenthesised destructure surfaces as `tuple_pattern`.
assert_eq!(
run_case(b"(a, b) = (1, 2)\n", &["tuple_pattern"]),
vec![("a".into(), 0), ("b".into(), 1)],
);
// Splat / rest bindings bail because positional mapping breaks.
assert!(run_case(b"a, *rest = (1, 2, 3)\n", &["pattern_list"]).is_empty());
// Nested destructure bails — recogniser doesn't recurse into
// sub-patterns to preserve flat-binding-only semantics.
assert!(run_case(b"(a, b), c = ((1, 2), 3)\n", &["pattern_list"]).is_empty());
}
/// Ruby `left_assignment_list` is the LHS node tree-sitter-ruby produces
/// for `a, b = ...`. The helper walks comma-separated identifier
/// children in source order, emitting `(name, position)` for each.
/// Ruby `_` is a normal identifier (matches Python convention).
/// `rest_assignment` (`*rest`) and `destructured_left_assignment`
/// (parenthesised nested destructure) hit the bail branch so callers
/// fall back to scalar union for those advanced shapes.
#[test]
fn left_assignment_list_indexed_bindings_recognise_ruby_destructure() {
use super::helpers::collect_array_pattern_bindings_indexed;
fn first_left_assignment_list<'t>(n: tree_sitter::Node<'t>) -> Option<tree_sitter::Node<'t>> {
if n.kind() == "left_assignment_list" {
return Some(n);
}
let mut c = n.walk();
for child in n.children(&mut c) {
if let Some(found) = first_left_assignment_list(child) {
return Some(found);
}
}
None
}
fn parse_first_ruby(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&Language::from(tree_sitter_ruby::LANGUAGE))
.unwrap();
let tree = parser.parse(src, None).unwrap();
(tree, src.to_vec())
}
fn run_case(src: &[u8]) -> Vec<(String, usize)> {
let (tree, bytes) = parse_first_ruby(src);
let pat =
first_left_assignment_list(tree.root_node()).expect("left_assignment_list in fixture");
collect_array_pattern_bindings_indexed(pat, &bytes)
.into_iter()
.collect()
}
assert_eq!(
run_case(b"a, b = [x, y]\n"),
vec![("a".into(), 0), ("b".into(), 1)],
);
assert_eq!(
run_case(b"a, b, c = [x, y, z]\n"),
vec![("a".into(), 0), ("b".into(), 1), ("c".into(), 2)],
);
// Underscore is a regular identifier binding in Ruby (idiomatic
// "unused" marker, but still resolvable in scope).
assert_eq!(
run_case(b"_, b = [x, y]\n"),
vec![("_".into(), 0), ("b".into(), 1)],
);
assert_eq!(
run_case(b"a, _ = [x, y]\n"),
vec![("a".into(), 0), ("_".into(), 1)],
);
// Call return value, helper walks LHS regardless of RHS shape.
assert_eq!(
run_case(b"a, b = func()\n"),
vec![("a".into(), 0), ("b".into(), 1)],
);
// Splat tail bails because rest_assignment is a complex sub-pattern.
assert!(run_case(b"a, *rest = [x, y, z]\n").is_empty());
// Parenthesised nested destructure bails because
// destructured_left_assignment isn't in the simple-identifier
// whitelist.
assert!(run_case(b"(a, b) = [x, y]\n").is_empty());
}
/// Helper for `src/ssa/lower.rs` bare-array destructure rewrite.
/// Walks the RHS of a destructure assignment and emits one slot per
/// source-order element. Each slot is `Ident(name)`, `Literal`, or
/// `Complex(inner_uses)`. Bails (empty) on shapes that shift index
/// alignment (spread / list splat).
#[test]
fn rhs_array_literal_elements_recognise_per_language_shapes() {
use super::RhsArraySlot;
use super::helpers::collect_rhs_array_literal_elements;
fn parse(lang_label: &str, src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
let mut parser = tree_sitter::Parser::new();
let lang = match lang_label {
"javascript" => Language::from(tree_sitter_javascript::LANGUAGE),
"typescript" => Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
"python" => Language::from(tree_sitter_python::LANGUAGE),
"ruby" => Language::from(tree_sitter_ruby::LANGUAGE),
"rust" => Language::from(tree_sitter_rust::LANGUAGE),
other => panic!("unsupported lang: {}", other),
};
parser.set_language(&lang).unwrap();
let tree = parser.parse(src, None).unwrap();
(tree, src.to_vec())
}
fn find_first<'t>(n: tree_sitter::Node<'t>, kinds: &[&str]) -> Option<tree_sitter::Node<'t>> {
if kinds.iter().any(|k| *k == n.kind()) {
return Some(n);
}
let mut c = n.walk();
for child in n.children(&mut c) {
if let Some(found) = find_first(child, kinds) {
return Some(found);
}
}
None
}
fn run(lang: &str, src: &[u8], rhs_kinds: &[&str]) -> Vec<RhsArraySlot> {
let (tree, bytes) = parse(lang, src);
let rhs = find_first(tree.root_node(), rhs_kinds).expect("rhs in fixture");
collect_rhs_array_literal_elements(rhs, lang, &bytes, None)
.into_iter()
.collect()
}
fn ident(name: &str) -> RhsArraySlot {
RhsArraySlot::Ident(name.to_string())
}
fn complex(uses: &[&str]) -> RhsArraySlot {
RhsArraySlot::Complex {
uses: uses.iter().map(|s| s.to_string()).collect(),
source_cap: crate::labels::Cap::empty(),
}
}
fn complex_source(uses: &[&str]) -> RhsArraySlot {
RhsArraySlot::Complex {
uses: uses.iter().map(|s| s.to_string()).collect(),
source_cap: crate::labels::Cap::all(),
}
}
// JS/TS `array` literal: two bare idents.
assert_eq!(
run("javascript", b"const _ = [safe, tainted];\n", &["array"]),
vec![ident("safe"), ident("tainted")],
);
// JS/TS `array` mixed ident + string literal.
assert_eq!(
run("javascript", b"const _ = [tainted, \"ok\"];\n", &["array"]),
vec![ident("tainted"), RhsArraySlot::Literal],
);
// JS/TS now classifies a call as `Complex` carrying inner idents
// rather than bailing. `collect_idents_with_paths` lifts both paths
// and bare idents, so a member access surfaces as the dotted path
// (e.g. `req.query.x`) followed by its component idents.
assert_eq!(
run("javascript", b"const _ = [fn(x), 'lit'];\n", &["array"]),
vec![complex(&["fn", "x"]), RhsArraySlot::Literal],
);
// JS/TS member access becomes Complex; dotted path + component idents.
// Per-slot Source classification fires when the slot's subtree carries
// a member-expression that strip-and-retry-classifies as Source
// (`req.query.x` → strip `.x` → `req.query` matches the JS Source rule).
assert_eq!(
run(
"javascript",
b"const _ = [req.query.x, 'lit'];\n",
&["array"],
),
vec![
complex_source(&["req.query.x", "req", "query", "x"]),
RhsArraySlot::Literal,
],
);
// Sibling-precision: a Source-classified Complex slot ALONGSIDE a
// Complex slot whose subtree does NOT classify as Source. Pre-session
// 0047 every Complex slot was conservatively re-emitted as Source by
// the outer-node fallback in `src/ssa/lower.rs`; with per-slot
// classification the safe sibling stays empty so the SSA lowering can
// emit `Assign(safe)` instead.
assert_eq!(
run(
"javascript",
b"const _ = [process.env.X, helper(local)];\n",
&["array"],
),
vec![
complex_source(&["process.env.X", "process", "env", "X"]),
complex(&["helper", "local"]),
],
);
// JS/TS spread bails entirely (index alignment shifts).
assert!(run("javascript", b"const _ = [...arr, b];\n", &["array"]).is_empty());
// JS/TS binary expression becomes Complex with the inner ident.
assert_eq!(
run(
"javascript",
b"const _ = ['log-' + x, 'lit'];\n",
&["array"],
),
vec![complex(&["x"]), RhsArraySlot::Literal],
);
// Python `list` shape.
assert_eq!(
run("python", b"a = [safe, tainted]\n", &["list"]),
vec![ident("safe"), ident("tainted")],
);
// Python `expression_list` (bare commas RHS in `a, b = x, y`).
assert_eq!(
run("python", b"a, b = safe, tainted\n", &["expression_list"]),
vec![ident("safe"), ident("tainted")],
);
// Python `tuple` (parenthesised).
assert_eq!(
run("python", b"x = (safe, 42)\n", &["tuple"]),
vec![ident("safe"), RhsArraySlot::Literal],
);
// Python list-splat bails.
assert!(run("python", b"x = [*a, b]\n", &["list"]).is_empty());
// Ruby `array`.
assert_eq!(
run("ruby", b"a, b = [safe, tainted]\n", &["array"]),
vec![ident("safe"), ident("tainted")],
);
// Ruby `array` with literal + ident.
assert_eq!(
run("ruby", b"a, b = [tainted, \"safe\"]\n", &["array"]),
vec![ident("tainted"), RhsArraySlot::Literal],
);
// Rust `tuple_expression`.
assert_eq!(
run(
"rust",
b"fn f(safe: &str, tainted: &str) { let _ = (safe, tainted); }\n",
&["tuple_expression"]
),
vec![ident("safe"), ident("tainted")],
);
// Non-array-shape node returns empty (defensive guard).
assert!(run("javascript", b"const x = tainted;\n", &["identifier"]).is_empty());
}

View file

@ -2,7 +2,7 @@ use super::helpers::first_member_label;
use super::{
AstMeta, Cfg, EdgeKind, MAX_COND_VARS, MAX_CONDITION_TEXT_LEN, NodeInfo, StmtKind,
collect_idents, connect_all, detect_eq_with_const, detect_negation, has_call_descendant,
member_expr_text, push_node, text_of,
member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
};
use crate::labels::{DataLabel, LangAnalysisRules, classify};
use crate::utils::snippet::truncate_at_char_boundary;
@ -378,7 +378,24 @@ pub(super) fn lower_ternary_branch<'a>(
}
connect_all(g, preds, node, pred_edge);
vec![node]
// React JSX `dangerouslySetInnerHTML={{__html: x}}` synthesis when the
// branch expression is itself a JSX element (or contains one as a
// descendant). Without this, `cond ? <div dangerouslySetInnerHTML=...
// /> : null` and similar ternary-RHS shapes never reach the
// `Kind::Return` / `Kind::Assignment` arms that own the synthesis hook,
// because `build_ternary_diamond` lowers each branch directly.
let post_jsx = try_lower_jsx_dangerous_html(
branch_ast,
&[node],
g,
lang,
code,
enclosing_func,
call_ordinal,
analysis_rules,
);
post_jsx
}
/// Extract `(lhs_ast, ternary_ast)` when `outer_ast` is an expression-statement

View file

@ -554,3 +554,469 @@ fn collect_ruby_symbol_list(node: Node<'_>, code: &[u8], out: &mut Vec<String>)
_ => {}
}
}
/// Extract route-path capture variable names from framework routing decorators
/// on a function AST node.
///
/// Supported languages:
/// * Python: walks Flask-style `@app.route("/users/<name>")`,
/// blueprint-prefixed `@bp.get("/u/<int:id>")`, and verb-shaped
/// `@router.post("/<path:slug>")` decorators. Returns inner names from
/// `<name>` / `<conv:name>` brace-segments.
/// * Ruby: walks Sinatra `get "/u/:name" do |name| ... end`. The
/// `func_node` is the `do_block`; its parent `call` carries the verb
/// in the `method` field and the path pattern in the first positional
/// string argument. Returns inner names from `:name` colon-segments.
///
/// Functions without a recognised routing pattern return an empty `Vec`.
/// Strict additive: downstream consumers gate the result via
/// `param.contains(name)` so empty captures preserve today's behaviour.
pub(super) fn extract_route_path_captures<'a>(
func_node: Node<'a>,
lang: &str,
code: &'a [u8],
) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
match lang {
"python" => extract_python_route_captures(func_node, code, &mut out),
"ruby" => extract_ruby_route_captures(func_node, code, &mut out),
_ => {}
}
out
}
fn extract_python_route_captures<'a>(func_node: Node<'a>, code: &'a [u8], out: &mut Vec<String>) {
let Some(parent) = func_node.parent() else {
return;
};
if parent.kind() != "decorated_definition" {
return;
}
let mut w = parent.walk();
for ch in parent.children(&mut w) {
if ch.kind() != "decorator" {
continue;
}
let mut dw = ch.walk();
let Some(expr) = ch.children(&mut dw).find(|c| c.kind() != "@") else {
continue;
};
if expr.kind() != "call" {
continue;
}
let Some(target) = expr.child_by_field_name("function") else {
continue;
};
if target.kind() != "attribute" {
continue;
}
let Some(attr) = target.child_by_field_name("attribute") else {
continue;
};
let Some(attr_text) = text_of(attr, code) else {
continue;
};
let attr_lower = attr_text.to_ascii_lowercase();
let is_route_verb = matches!(
attr_lower.as_str(),
"route" | "get" | "post" | "put" | "patch" | "delete" | "head" | "options"
);
if !is_route_verb {
continue;
}
let Some(args) = expr.child_by_field_name("arguments") else {
continue;
};
let Some(pattern) = first_positional_string_arg(args, code) else {
continue;
};
collect_flask_path_captures(&pattern, out);
collect_fastapi_path_captures(&pattern, out);
}
}
/// Walk up from a Ruby `do_block` / `block` to the enclosing `call`.
/// If the call's method is a Sinatra-style HTTP verb and its first
/// positional argument is a static string literal, parse Sinatra
/// `:name` path captures into `out`.
fn extract_ruby_route_captures<'a>(func_node: Node<'a>, code: &'a [u8], out: &mut Vec<String>) {
let Some(parent) = func_node.parent() else {
return;
};
if parent.kind() != "call" {
return;
}
let Some(method_node) = parent.child_by_field_name("method") else {
return;
};
let Some(verb) = text_of(method_node, code) else {
return;
};
let verb_lc = verb.to_ascii_lowercase();
let is_sinatra_verb = matches!(
verb_lc.as_str(),
"get" | "post" | "put" | "patch" | "delete" | "head" | "options" | "link" | "unlink"
);
if !is_sinatra_verb {
return;
}
let Some(args) = parent.child_by_field_name("arguments") else {
return;
};
let Some(pattern) = first_positional_string_arg_ruby(args, code) else {
return;
};
collect_sinatra_path_captures(&pattern, out);
}
/// Return the literal text of the first positional string argument inside a
/// Python `argument_list`. Skips keyword args and non-string positionals.
fn first_positional_string_arg(args: Node<'_>, code: &[u8]) -> Option<String> {
let mut cursor = args.walk();
for arg in args.children(&mut cursor) {
match arg.kind() {
"(" | ")" | "," => continue,
"keyword_argument" => continue,
"string" => {
return python_string_text(arg, code);
}
_ => return None,
}
}
None
}
/// Strip Python string-literal quoting from a `string` AST node. Rejects
/// f-strings (interpolation children present) because the captured pattern
/// is not statically known.
fn python_string_text(node: Node<'_>, code: &[u8]) -> Option<String> {
let mut cursor = node.walk();
for ch in node.children(&mut cursor) {
if ch.kind() == "interpolation" {
return None;
}
}
let raw = text_of(node, code)?;
let trimmed = raw.trim();
let trimmed = trimmed.trim_start_matches(['r', 'R', 'b', 'B', 'u', 'U', 'f', 'F']);
let stripped = trimmed
.strip_prefix("\"\"\"")
.and_then(|s| s.strip_suffix("\"\"\""))
.or_else(|| {
trimmed
.strip_prefix("'''")
.and_then(|s| s.strip_suffix("'''"))
})
.or_else(|| trimmed.strip_prefix('"').and_then(|s| s.strip_suffix('"')))
.or_else(|| {
trimmed
.strip_prefix('\'')
.and_then(|s| s.strip_suffix('\''))
})?;
Some(stripped.to_string())
}
/// Return the literal text of the first positional string argument inside a
/// Ruby `argument_list`. Hash literals (`pair`), block arguments,
/// hash-splat arguments, and non-string positionals all return `None`.
fn first_positional_string_arg_ruby(args: Node<'_>, code: &[u8]) -> Option<String> {
let mut cursor = args.walk();
for arg in args.children(&mut cursor) {
match arg.kind() {
"(" | ")" | "," => continue,
"pair" | "hash" | "block_argument" | "hash_splat_argument" => return None,
"string" => return ruby_string_text(arg, code),
_ => return None,
}
}
None
}
/// Strip Ruby string-literal quoting from a `string` AST node. Rejects
/// strings with `#{...}` interpolation (the captured pattern is not
/// statically known). Returns the concatenation of `string_content`
/// children.
fn ruby_string_text(node: Node<'_>, code: &[u8]) -> Option<String> {
let mut cursor = node.walk();
let mut content = String::new();
let mut had_content = false;
for ch in node.children(&mut cursor) {
match ch.kind() {
"interpolation" => return None,
"string_content" => {
if let Some(t) = text_of(ch, code) {
content.push_str(&t);
had_content = true;
}
}
_ => continue,
}
}
if had_content { Some(content) } else { None }
}
/// Parse Sinatra-style `:name` capture segments out of a route pattern.
/// A capture is a `:` followed by an identifier-ish run of bytes
/// (`[A-Za-z0-9_]+`). Only fires when `:` is at pattern start or
/// immediately follows `/`, so `Foo::Bar` style names embedded in a
/// non-routing string are not mis-parsed as captures.
fn collect_sinatra_path_captures(pattern: &str, out: &mut Vec<String>) {
let bytes = pattern.as_bytes();
let mut i = 0;
while i < bytes.len() {
let at_segment_boundary = i == 0 || bytes[i - 1] == b'/';
if bytes[i] == b':' && at_segment_boundary {
let mut j = i + 1;
while j < bytes.len() && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_') {
j += 1;
}
if j > i + 1 {
let name = &pattern[i + 1..j];
let lower = name.to_ascii_lowercase();
if !out.iter().any(|existing| existing == &lower) {
out.push(lower);
}
}
i = j;
} else {
i += 1;
}
}
}
/// Parse FastAPI / Starlette-style `{name}` / `{name:converter}` capture
/// segments out of a route pattern. Pushes the inner name (lowercased)
/// into `out`. FastAPI puts the name FIRST (`{item_id:int}`), unlike
/// Flask which puts the converter first (`<int:item_id>`). Skips
/// malformed segments (no closing `}`, empty name) and rejects names
/// with non-identifier characters.
fn collect_fastapi_path_captures(pattern: &str, out: &mut Vec<String>) {
let bytes = pattern.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'{' {
let mut j = i + 1;
while j < bytes.len() && bytes[j] != b'}' {
j += 1;
}
if j >= bytes.len() {
break;
}
let inner = &pattern[i + 1..j];
let name = inner.split(':').next().unwrap_or(inner).trim();
if !name.is_empty() && name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
let lower = name.to_ascii_lowercase();
if !out.iter().any(|existing| existing == &lower) {
out.push(lower);
}
}
i = j + 1;
} else {
i += 1;
}
}
}
/// Parse Flask-style `<conv:name>` / `<name>` capture segments out of a
/// route pattern. Pushes the inner name (lowercased) into `out`. Skips
/// malformed segments (no closing `>`, empty name).
fn collect_flask_path_captures(pattern: &str, out: &mut Vec<String>) {
let bytes = pattern.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'<' {
let mut j = i + 1;
while j < bytes.len() && bytes[j] != b'>' {
j += 1;
}
if j >= bytes.len() {
break;
}
let inner = &pattern[i + 1..j];
let name = match inner.rsplit_once(':') {
Some((_, n)) => n,
None => inner,
};
let name = name.trim();
if !name.is_empty() && name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
let lower = name.to_ascii_lowercase();
if !out.iter().any(|existing| existing == &lower) {
out.push(lower);
}
}
i = j + 1;
} else {
i += 1;
}
}
}
#[cfg(test)]
mod path_capture_tests {
use super::*;
fn collect_for(pat: &str) -> Vec<String> {
let mut out = Vec::new();
collect_flask_path_captures(pat, &mut out);
out
}
#[test]
fn extracts_bare_capture() {
assert_eq!(collect_for("/users/<name>"), vec!["name".to_string()]);
}
#[test]
fn extracts_converter_capture() {
assert_eq!(
collect_for("/items/<int:item_id>"),
vec!["item_id".to_string()]
);
}
#[test]
fn extracts_path_converter() {
assert_eq!(collect_for("/x/<path:slug>"), vec!["slug".to_string()]);
}
#[test]
fn extracts_multiple_captures() {
assert_eq!(
collect_for("/u/<uid>/post/<int:pid>"),
vec!["uid".to_string(), "pid".to_string()]
);
}
#[test]
fn dedupes_repeated_names() {
let mut out = Vec::new();
collect_flask_path_captures("/<a>/<a>", &mut out);
assert_eq!(out, vec!["a".to_string()]);
}
#[test]
fn rejects_unclosed_brace() {
assert_eq!(collect_for("/<oops"), Vec::<String>::new());
}
#[test]
fn rejects_non_ident_chars() {
assert_eq!(collect_for("/<bad name>"), Vec::<String>::new());
assert_eq!(collect_for("/<name!>"), Vec::<String>::new());
}
#[test]
fn empty_when_no_captures() {
assert_eq!(collect_for("/static/path"), Vec::<String>::new());
}
fn collect_sinatra_for(pat: &str) -> Vec<String> {
let mut out = Vec::new();
collect_sinatra_path_captures(pat, &mut out);
out
}
#[test]
fn sinatra_extracts_bare_capture() {
assert_eq!(
collect_sinatra_for("/users/:name"),
vec!["name".to_string()]
);
}
#[test]
fn sinatra_extracts_multiple_captures() {
assert_eq!(
collect_sinatra_for("/u/:uid/post/:pid"),
vec!["uid".to_string(), "pid".to_string()]
);
}
#[test]
fn sinatra_extracts_leading_capture() {
assert_eq!(collect_sinatra_for(":root"), vec!["root".to_string()]);
}
#[test]
fn sinatra_dedupes_repeated_names() {
let mut out = Vec::new();
collect_sinatra_path_captures("/:a/:a", &mut out);
assert_eq!(out, vec!["a".to_string()]);
}
#[test]
fn sinatra_ignores_double_colon() {
assert_eq!(collect_sinatra_for("/Foo::Bar"), Vec::<String>::new());
}
#[test]
fn sinatra_ignores_lone_colon() {
assert_eq!(collect_sinatra_for("/users/:"), Vec::<String>::new());
}
#[test]
fn sinatra_empty_when_no_captures() {
assert_eq!(collect_sinatra_for("/static/path"), Vec::<String>::new());
}
fn collect_fastapi_for(pat: &str) -> Vec<String> {
let mut out = Vec::new();
collect_fastapi_path_captures(pat, &mut out);
out
}
#[test]
fn fastapi_extracts_bare_capture() {
assert_eq!(
collect_fastapi_for("/items/{item_id}"),
vec!["item_id".to_string()]
);
}
#[test]
fn fastapi_extracts_converter_capture() {
assert_eq!(
collect_fastapi_for("/items/{item_id:int}"),
vec!["item_id".to_string()]
);
}
#[test]
fn fastapi_extracts_path_converter() {
assert_eq!(
collect_fastapi_for("/files/{file_path:path}"),
vec!["file_path".to_string()]
);
}
#[test]
fn fastapi_extracts_multiple_captures() {
assert_eq!(
collect_fastapi_for("/u/{uid}/post/{pid:int}"),
vec!["uid".to_string(), "pid".to_string()]
);
}
#[test]
fn fastapi_dedupes_repeated_names() {
let mut out = Vec::new();
collect_fastapi_path_captures("/{a}/{a}", &mut out);
assert_eq!(out, vec!["a".to_string()]);
}
#[test]
fn fastapi_rejects_unclosed_brace() {
assert_eq!(collect_fastapi_for("/{oops"), Vec::<String>::new());
}
#[test]
fn fastapi_rejects_non_ident_chars() {
assert_eq!(collect_fastapi_for("/{bad name}"), Vec::<String>::new());
assert_eq!(collect_fastapi_for("/{name!}"), Vec::<String>::new());
}
#[test]
fn fastapi_empty_when_no_captures() {
assert_eq!(collect_fastapi_for("/static/path"), Vec::<String>::new());
}
}

View file

@ -1,6 +1,7 @@
use super::anon_fn_name;
use super::conditions::unwrap_parens;
use crate::labels::{DataLabel, Kind, classify, lookup};
use smallvec::SmallVec;
use tree_sitter::Node;
// -------------------------------------------------------------------------
@ -210,7 +211,7 @@ pub(crate) fn first_call_ident_with_span<'a>(
.and_then(|f| root_receiver_text(f, lang, code));
match (recv, func) {
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
(_, Some(f)) => Some(f.to_string()),
(_, Some(f)) => Some(f),
_ => None,
}
}
@ -269,6 +270,11 @@ pub(crate) fn find_classifiable_inner_call<'a>(
}
match lookup(lang, c.kind()) {
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
// For CallMethod we also remember the bare receiver
// identifier so we can try a type-qualified rewrite
// when the literal classify misses.
let mut method_receiver: Option<String> = None;
let mut method_name: Option<String> = None;
let ident = match lookup(lang, c.kind()) {
Kind::CallFn => c
.child_by_field_name("function")
@ -286,6 +292,8 @@ pub(crate) fn find_classifiable_inner_call<'a>(
.or_else(|| c.child_by_field_name("receiver"))
.or_else(|| c.child_by_field_name("scope"))
.and_then(|f| root_receiver_text(f, lang, code));
method_receiver = recv.clone();
method_name = func.clone();
match (recv, func) {
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
(_, Some(f)) => Some(f),
@ -302,6 +310,36 @@ pub(crate) fn find_classifiable_inner_call<'a>(
{
return Some((id.clone(), lbl, (c.start_byte(), c.end_byte())));
}
// Receiver-type rewrite fallback: when the literal
// `recv.method` text didn't classify, AND we're inside
// a chained call (parent `n` is itself a call), look
// up `recv`'s locally-bound type and retry with the
// type prefix. E.g. for
// `sess.createNativeQuery(sql).getResultList()`, the
// inner `sess.createNativeQuery` rewrites to
// `HibernateSession.createNativeQuery` (rule fires).
//
// Gated on `n` being a Call-kind so the rewrite only
// fires on chain-hop inner calls. When `n` is an
// expression-statement / variable-declarator / etc.
// the candidate `c` IS the outermost call of the
// statement, and the SSA-time
// `resolve_type_qualified_labels` path handles it
// with multi-label semantics that single-label
// `classify` here would erase.
let parent_is_call = matches!(
lookup(lang, n.kind()),
Kind::CallFn | Kind::CallMethod | Kind::CallMacro
);
if parent_is_call
&& let (Some(recv), Some(method)) = (method_receiver, method_name)
&& let Some(prefix) = crate::cfg::local_receiver_type_prefix(c, &recv, lang)
{
let alt = format!("{prefix}.{method}");
if let Some(lbl) = classify(lang, &alt, extra) {
return Some((alt, lbl, (c.start_byte(), c.end_byte())));
}
}
// Recurse into arguments of this call
if let Some(found) = find_classifiable_inner_call(c, lang, code, extra) {
return Some(found);
@ -412,6 +450,16 @@ pub(crate) fn first_member_label(
}
// PHP/Python/Ruby subscript access: `$_GET['cmd']`, `os.environ['KEY']`, `params[:cmd]`
// Try to classify the object (before the `[`) as a source.
//
// Source-only on the receiver: a subscript reads a value from the
// receiver, so a Sink label found on the receiver text (e.g.
// `response.headers['content-type']`, where `response.headers`
// matches the JS HEADER_INJECTION sink rule) describes the
// *target* of a hypothetical write, not this read. Promoting it
// would fire phantom sinks at every `body =
// response.headers["X"]`-shape line. Sinks/Sanitizers reachable
// via callable positions (function-arg, method-receiver) still
// flow through the outer recursive walk below.
"subscript_expression" | "subscript" | "element_reference" => {
if let Some(obj) = n
.child_by_field_name("object")
@ -419,15 +467,23 @@ pub(crate) fn first_member_label(
.or_else(|| n.child(0))
{
if let Some(txt) = text_of(obj, code)
&& let Some(lbl) = classify(lang, &txt, extra_labels)
&& let Some(lbl @ DataLabel::Source(_)) = classify(lang, &txt, extra_labels)
{
return Some(lbl);
}
// Recurse into the object for nested member accesses
if let Some(lbl) = first_member_label(obj, lang, code, extra_labels) {
// Recurse into the object for nested member accesses, but
// keep the same Source-only restriction as above by passing
// through the dedicated source-only walker.
if let Some(lbl @ DataLabel::Source(_)) =
first_member_label(obj, lang, code, extra_labels)
{
return Some(lbl);
}
}
// Suppress further descent into this subscript node, the outer
// child-walk loop would otherwise enter the receiver via the
// member_expression arm and reattach a value-extraction Sink.
return None;
}
_ => {}
}
@ -678,6 +734,7 @@ pub(crate) fn collect_idents_with_paths(
"identifier"
| "field_identifier"
| "property_identifier"
| "shorthand_property_identifier"
| "shorthand_property_identifier_pattern" => {
if let Some(txt) = text_of(n, code) {
idents.push(txt);
@ -697,16 +754,241 @@ pub(crate) fn collect_idents_with_paths(
}
}
/// Walk an array/tuple destructure pattern in source order and return
/// each simple-identifier binding paired with its position index.
///
/// Recognises:
/// * JS/TS `array_pattern` — `const [a, b] = ...`, `const [, b] = ...`,
/// `const [a, ,] = ...`. Skip slots (commas with no binding between)
/// advance the position counter without emitting a binding.
/// * Rust `tuple_pattern` — `let (a, _, b) = ...`. `_pattern` (wildcard)
/// advances the position counter without emitting a binding.
/// * Python `pattern_list` / `tuple_pattern` — `a, b = ...` and
/// `(a, b) = ...`. Python `_` is a normal identifier binding (not a
/// wildcard), so every `identifier` child emits a (name, position)
/// entry.
/// * Ruby `left_assignment_list` — `a, b = ...`. Bare comma-list LHS
/// produced by `assignment` whose RHS is an array literal, a call
/// return, or another tuple-yielding expression. Ruby `_` is a normal
/// identifier (matches Python convention; `_` may still be referenced
/// later in scope). Splat (`*rest` parsed as `rest_assignment`) and
/// parenthesised nested destructure (`destructured_left_assignment`)
/// hit the bail branch and fall back to scalar union.
///
/// Returns an empty `SmallVec` when the pattern is not one of the above
/// kinds OR contains complex sub-patterns (`assignment_pattern` for
/// `[a = 1, b]`, `rest_pattern` for `[a, ...rest]`, Python
/// `list_splat_pattern` for `a, *rest = ...`, Ruby `rest_assignment` for
/// `a, *rest = ...`, nested `array_pattern`, `object_pattern`,
/// `destructured_left_assignment`). Callers treat the empty return as
/// "no position-aware rewrite available; fall back to scalar union".
pub(crate) fn collect_array_pattern_bindings_indexed(
pat: Node,
code: &[u8],
) -> SmallVec<[(String, usize); 4]> {
let mut out: SmallVec<[(String, usize); 4]> = SmallVec::new();
let kind = pat.kind();
if !matches!(
kind,
"array_pattern" | "tuple_pattern" | "pattern_list" | "left_assignment_list"
) {
return out;
}
let mut cursor = pat.walk();
let mut pos: usize = 0;
for child in pat.children(&mut cursor) {
match child.kind() {
"[" | "]" | "(" | ")" => {}
"," => {
pos += 1;
}
"identifier" | "shorthand_property_identifier_pattern" => {
if let Some(txt) = text_of(child, code) {
out.push((txt, pos));
}
}
// Rust wildcard `_` in tuple_pattern. Advances position counter
// without binding; no emit. Tree-sitter-rust models the
// wildcard as a leaf node whose `kind()` is literally "_".
"_" => {}
_ => {
// Complex sub-pattern. Bail by clearing — caller treats
// empty as "no position-aware rewrite", preserving the
// pre-existing scalar-union behavior for these shapes.
out.clear();
return out;
}
}
}
out
}
/// Walk an array-literal-shape RHS node and return one slot per source-order
/// element. Each slot is one of:
/// * `RhsArraySlot::Ident(name)` — bare identifier element.
/// * `RhsArraySlot::Literal` — syntactic literal (string, number, bool,
/// null/nil).
/// * `RhsArraySlot::Complex(uses)` — call / binary / subscript / member
/// access / nested array literal / etc. `uses` carries the inner
/// identifier names (member-access paths first, bare idents second)
/// harvested from the slot's subtree via `collect_idents_with_paths`.
///
/// Recognised RHS kinds:
/// * JS/TS / Ruby `array` — `[a, b]`
/// * Python `list` — `[a, b]`
/// * Python `tuple` — `(a, b)`
/// * Python `expression_list` — bare comma form `a, b`
/// * Rust `tuple_expression` — `(a, b)`
///
/// Bails (returns empty) when the RHS is not one of these kinds OR contains
/// a slot whose shape would shift index alignment (spread, list splat).
/// Callers treat empty as "no per-element rewrite available; fall back to
/// scalar union".
pub(crate) fn collect_rhs_array_literal_elements(
rhs: Node,
lang: &str,
code: &[u8],
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
) -> SmallVec<[crate::cfg::RhsArraySlot; 4]> {
use crate::cfg::RhsArraySlot;
use crate::labels::{Cap, DataLabel};
// Per-slot source classification: when a slot's own subtree carries a
// Source-labeled member-expression / subscript, capture the Cap so the
// SSA destructure rewrite emits Source for THIS slot specifically and
// lets sibling Complex slots stay slot-scoped Assign. Falls back to
// Cap::empty() when no per-slot source is recognised; the lowering
// path then consults the outer-node Source flag for conservative
// preservation of legacy behavior on shapes whose source pattern
// doesn't text-classify (e.g. a subscript on a tainted local).
let slot_source_cap = |slot: Node| -> Cap {
match first_member_label(slot, lang, code, extra_labels) {
Some(DataLabel::Source(c)) => c,
_ => Cap::empty(),
}
};
let mut out: SmallVec<[RhsArraySlot; 4]> = SmallVec::new();
let kind = rhs.kind();
if !matches!(
kind,
"array" | "array_literal" | "list" | "tuple" | "tuple_expression" | "expression_list"
) {
return out;
}
let mut cursor = rhs.walk();
for child in rhs.named_children(&mut cursor) {
let ck = child.kind();
match ck {
"identifier"
| "shorthand_property_identifier"
| "shorthand_property_identifier_pattern"
| "field_identifier"
| "property_identifier" => match text_of(child, code) {
Some(txt) => out.push(RhsArraySlot::Ident(txt)),
None => {
out.clear();
return out;
}
},
"variable_name" => match text_of(child, code) {
Some(txt) => out.push(RhsArraySlot::Ident(txt.trim_start_matches('$').to_string())),
None => {
out.clear();
return out;
}
},
// Syntactic literal slots: no ident, no taint contribution.
// Names follow tree-sitter's per-grammar literal kinds across
// the supported languages.
"string"
| "string_literal"
| "raw_string_literal"
| "interpreted_string_literal"
| "concatenated_string"
| "integer"
| "integer_literal"
| "float"
| "float_literal"
| "number"
| "numeric_literal"
| "true"
| "false"
| "boolean_literal"
| "boolean"
| "null"
| "null_literal"
| "nil"
| "none"
| "None"
| "undefined" => {
out.push(RhsArraySlot::Literal);
}
// Spread / list-splat shift index alignment unpredictably
// (`[...arr, b]` may expand to N elements at index 0). Bail
// so callers fall back to scalar union.
"spread_element" | "list_splat" | "list_splat_pattern" | "splat_argument"
| "unary_splat" | "splat_expression" => {
out.clear();
return out;
}
// Interpolated strings carry inner identifier uses. Treat as
// Complex so the slot picks up the contributions from
// `${user.id}` etc.
"template_string" | "string_interpolation" | "interpolation" | "encapsed_string" => {
let mut idents = Vec::new();
let mut paths = Vec::new();
collect_idents_with_paths(child, code, &mut idents, &mut paths);
let mut uses: SmallVec<[String; 4]> = SmallVec::new();
for p in paths {
uses.push(p);
}
for ident in idents {
if !uses.iter().any(|u| u == &ident) {
uses.push(ident);
}
}
let source_cap = slot_source_cap(child);
out.push(RhsArraySlot::Complex { uses, source_cap });
}
// Everything else (call, member access, binary, subscript,
// unary, ternary, nested array literal, etc.) is a "complex"
// slot. Harvest inner ident uses so the SSA lowering can paint
// the binding with this slot's contributions only — not the
// union of every ident on the RHS.
_ => {
let mut idents = Vec::new();
let mut paths = Vec::new();
collect_idents_with_paths(child, code, &mut idents, &mut paths);
let mut uses: SmallVec<[String; 4]> = SmallVec::new();
for p in paths {
uses.push(p);
}
for ident in idents {
if !uses.iter().any(|u| u == &ident) {
uses.push(ident);
}
}
let source_cap = slot_source_cap(child);
out.push(RhsArraySlot::Complex { uses, source_cap });
}
}
}
out
}
/// Recursively collect every identifier that occurs inside `n`.
///
/// Recognises `identifier` (most languages), `variable_name` (PHP),
/// `field_identifier` (Go), `property_identifier` (JS/TS), and
/// `shorthand_property_identifier_pattern` (JS/TS destructuring).
/// `shorthand_property_identifier` / `shorthand_property_identifier_pattern`
/// (JS/TS object-literal shorthand uses and destructuring binding patterns).
pub(crate) fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
match n.kind() {
"identifier"
| "field_identifier"
| "property_identifier"
| "shorthand_property_identifier"
| "shorthand_property_identifier_pattern"
// PHP `name`: leaf node carrying the bare identifier text for
// function/method names and similar grammar slots. Without this

View file

@ -337,7 +337,7 @@ fn collect_ruby<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mu
&& let Some(t) = text_of(c, code)
{
let leaf = t.rsplit("::").next().unwrap_or(&t).to_string();
push(sub.clone(), leaf);
push(sub, leaf);
break;
}
}

View file

@ -1,8 +1,140 @@
use super::{
ImportBinding, ImportBindings, PromisifyAlias, PromisifyAliases, member_expr_text, text_of,
};
use std::collections::HashMap;
use tree_sitter::{Node, Tree};
/// File-local view of every JS/TS import binding: local-name → source-module
/// specifier (verbatim from the `import` / `require` site, without `node:`
/// stripping). Built once per CFG pass; consumed by the gated-label
/// post-pass via [`crate::labels::ClassificationContext::local_imports`].
///
/// Records every binding regardless of aliasing (the legacy
/// [`extract_import_bindings`] only preserves *renamed* bindings, which is
/// not enough for Phase 05's `import { readFile } from 'fs/promises'`
/// shape where `local_name == imported_name`).
///
/// Shares its top-level walk with [`crate::resolve::walk_js_top_level_imports`]
/// so the import-clause / require-declarator parsing logic only lives in one
/// place; this view simply discards the resolver verdict and side-effect-only
/// markers.
pub(super) fn extract_local_import_view(tree: &Tree, code: &[u8]) -> HashMap<String, String> {
let mut out: HashMap<String, String> = HashMap::new();
for raw in crate::resolve::walk_js_top_level_imports(tree, code) {
if raw.local.is_empty() {
continue;
}
out.insert(raw.local, raw.source_spec);
}
extend_with_promises_alias(tree, code, &mut out);
out
}
/// Recognise top-level `const fsp = fs.promises;` /
/// `const fsp = require('fs').promises;` aliasing and add the new local
/// name to the import view as `fs/promises` (or `node:fs/promises`,
/// whichever the source binding spelt).
///
/// The Phase 05 `LabelGate::ImportedFromModule(&["fs/promises", ...])`
/// only consults `local_imports[leading_identifier(callee)]`. Without
/// this extension, `fsp.readFile(x)` evades the gate because `fsp`
/// itself is not an import binding — only the underlying `fs`
/// namespace is.
fn extend_with_promises_alias(tree: &Tree, code: &[u8], out: &mut HashMap<String, String>) {
let root = tree.root_node();
let mut top_cursor = root.walk();
for child in root.children(&mut top_cursor) {
if !matches!(child.kind(), "lexical_declaration" | "variable_declaration") {
continue;
}
let mut decl_cursor = child.walk();
for decl in child.children(&mut decl_cursor) {
if decl.kind() != "variable_declarator" {
continue;
}
let (Some(name_node), Some(value_node)) = (
decl.child_by_field_name("name"),
decl.child_by_field_name("value"),
) else {
continue;
};
if name_node.kind() != "identifier" {
continue;
}
let Some(local_name) = text_of(name_node, code) else {
continue;
};
if value_node.kind() != "member_expression" {
continue;
}
let property = value_node
.child_by_field_name("property")
.and_then(|p| text_of(p, code));
if property.as_deref() != Some("promises") {
continue;
}
let Some(obj) = value_node.child_by_field_name("object") else {
continue;
};
let Some(source) = promises_alias_source(obj, code, out) else {
continue;
};
// Don't override an existing import entry for the same name —
// an explicit import of `fsp` from `fs/promises` already says
// what we'd be inferring here.
out.entry(local_name).or_insert(source);
}
}
}
/// Resolve the object side of a `<lhs> = <obj>.promises` member-expression
/// to a source-module string when `<obj>` is a known `fs` binding.
///
/// Recognised shapes:
/// - identifier `X` where `local_imports[X]` is `fs` or `node:fs`
/// - `require('fs')` / `require("node:fs")` call expression
fn promises_alias_source(
obj: Node,
code: &[u8],
imports_so_far: &HashMap<String, String>,
) -> Option<String> {
match obj.kind() {
"identifier" => {
let id = text_of(obj, code)?;
let module = imports_so_far.get(&id)?;
map_fs_module_to_promises(module)
}
"call_expression" => {
let func = obj.child_by_field_name("function")?;
if text_of(func, code).as_deref() != Some("require") {
return None;
}
let args = obj.child_by_field_name("arguments")?;
let mut cursor = args.walk();
for arg in args.children(&mut cursor) {
if !matches!(arg.kind(), "string" | "template_string") {
continue;
}
let raw = text_of(arg, code)?;
let spec = raw.trim_matches(|c: char| c == '\'' || c == '"' || c == '`');
return map_fs_module_to_promises(spec);
}
None
}
_ => None,
}
}
fn map_fs_module_to_promises(module: &str) -> Option<String> {
if module.eq_ignore_ascii_case("fs") {
Some("fs/promises".to_string())
} else if module.eq_ignore_ascii_case("node:fs") {
Some("node:fs/promises".to_string())
} else {
None
}
}
// -------------------------------------------------------------------------
// Import binding extraction
// -------------------------------------------------------------------------
@ -360,6 +492,129 @@ fn extract_require_module(node: Node, code: &[u8]) -> Option<String> {
None
}
/// Per-file Rust scan: did the file `use` a join-style macro from `tokio` or
/// `futures`? Returns the crate prefix to use when the file calls a bare
/// `join!` / `try_join!` macro.
///
/// Rationale: tree-sitter records `tokio::join!(...)` with a fully qualified
/// `macro` field text, but `use tokio::join; ... join!(a, b)` records the
/// bare leaf. Without this lookup, the SSA-level promise-combinator
/// recogniser (`crate::labels::is_promise_combinator`) misses the bare form
/// and the macro's argument taint is dropped. Conservative: returns `None`
/// when both `tokio::<name>` and `futures::<name>` are imported (ambiguous)
/// or when neither is, leaving the bare `join` callee alone.
pub(super) fn rust_bare_join_crate_prefix(
root: Node,
code: &[u8],
leaf: &str,
) -> Option<&'static str> {
if !matches!(leaf, "join" | "try_join") {
return None;
}
let mut cursor = root.walk();
let mut tokio_seen = false;
let mut futures_seen = false;
for child in root.children(&mut cursor) {
if child.kind() != "use_declaration" {
continue;
}
if rust_use_decl_imports_leaf(child, code, "tokio", leaf) {
tokio_seen = true;
}
if rust_use_decl_imports_leaf(child, code, "futures", leaf) {
futures_seen = true;
}
}
match (tokio_seen, futures_seen) {
(true, false) => Some("tokio"),
(false, true) => Some("futures"),
_ => None,
}
}
/// True when `use_decl` brings `<crate_prefix>::<leaf>` into scope.
///
/// Recognises the common shapes:
/// * `use tokio::join;` → leaf at the path tail
/// * `use tokio::{join, select};` → leaf inside a use_list
/// * `use tokio::join as my_join;` → aliased; we detect the
/// original path even though the aliased name is unused (the macro is
/// typically invoked under its alias, but if the alias and the bare form
/// collide the rewrite is still safe).
/// * `use tokio::*;` is NOT recognised — wildcard imports are too permissive
/// for the bare-leaf rewrite to stay precise.
fn rust_use_decl_imports_leaf(use_decl: Node, code: &[u8], crate_prefix: &str, leaf: &str) -> bool {
let mut stack = vec![use_decl];
while let Some(node) = stack.pop() {
match node.kind() {
// `use tokio::join;` — argument is a `scoped_identifier`.
"scoped_identifier" => {
if scoped_identifier_matches(node, code, crate_prefix, leaf) {
return true;
}
}
// `use tokio::{join, select};` — the `path` field is `tokio`,
// and a `use_list` enumerates leaves.
"scoped_use_list" => {
let path_ok = node
.child_by_field_name("path")
.and_then(|p| text_of(p, code))
.as_deref()
== Some(crate_prefix);
if path_ok && let Some(list) = node.child_by_field_name("list") {
let mut lc = list.walk();
for entry in list.named_children(&mut lc) {
match entry.kind() {
"identifier" if text_of(entry, code).as_deref() == Some(leaf) => {
return true;
}
"use_as_clause"
if entry
.child_by_field_name("path")
.and_then(|p| text_of(p, code))
.as_deref()
== Some(leaf) =>
{
return true;
}
_ => {}
}
}
}
}
// `use tokio::join as my_join;` — aliased clause sits directly
// under the use_declaration; check the path side.
"use_as_clause" => {
if let Some(p) = node.child_by_field_name("path")
&& p.kind() == "scoped_identifier"
&& scoped_identifier_matches(p, code, crate_prefix, leaf)
{
return true;
}
}
_ => {
// Walk children for nested groups (`use a::{b::{c, d}}`).
let mut c = node.walk();
for ch in node.children(&mut c) {
stack.push(ch);
}
}
}
}
false
}
fn scoped_identifier_matches(node: Node, code: &[u8], crate_prefix: &str, leaf: &str) -> bool {
let path_text = node
.child_by_field_name("path")
.and_then(|p| text_of(p, code));
let leaf_text = node
.child_by_field_name("name")
.and_then(|n| text_of(n, code));
matches!((path_text.as_deref(), leaf_text.as_deref()),
(Some(p), Some(l)) if p == crate_prefix && l == leaf)
}
// -------------------------------------------------------------------------
// === PUBLIC ENTRY POINT =================================================
// -------------------------------------------------------------------------

View file

@ -1,22 +1,45 @@
use super::conditions::unwrap_parens;
use super::helpers::{collect_array_pattern_bindings_indexed, collect_rhs_array_literal_elements};
use super::{
anon_fn_name, collect_idents, collect_idents_with_paths, find_constructor_type_child,
first_call_ident, root_receiver_text, text_of,
};
use crate::labels::{Cap, Kind, lookup};
use smallvec::SmallVec;
use tree_sitter::Node;
/// Find the inner CallFn/CallMethod/CallMacro node within an AST node.
/// For direct call nodes, returns the node itself. For wrappers, searches
/// up to two levels of children.
/// up to two levels of children, transparently descending through
/// `await_expression` / `yield_expression` (`Kind::AwaitForward`) wrappers
/// so `const x = await foo(y)` reaches the inner `call_expression` at
/// effective depth 3 (`lexical_declaration > variable_declarator >
/// await_expression > call_expression`).
pub(super) fn find_call_node<'a>(n: Node<'a>, lang: &str) -> Option<Node<'a>> {
match lookup(lang, n.kind()) {
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => Some(n),
Kind::AwaitForward => {
// Transparent wrapper: descend into the awaited expression.
let mut cursor = n.walk();
for c in n.children(&mut cursor) {
if let Some(found) = find_call_node(c, lang) {
return Some(found);
}
}
None
}
_ => {
let mut cursor = n.walk();
for c in n.children(&mut cursor) {
match lookup(lang, c.kind()) {
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return Some(c),
// Skip past await/yield wrappers without consuming a
// recursion level — the wrapper itself is transparent.
Kind::AwaitForward => {
if let Some(found) = find_call_node(c, lang) {
return Some(found);
}
}
_ => {}
}
}
@ -25,11 +48,14 @@ pub(super) fn find_call_node<'a>(n: Node<'a>, lang: &str) -> Option<Node<'a>> {
for c in n.children(&mut cursor2) {
let mut cursor3 = c.walk();
for gc in c.children(&mut cursor3) {
if matches!(
lookup(lang, gc.kind()),
Kind::CallFn | Kind::CallMethod | Kind::CallMacro
) {
return Some(gc);
match lookup(lang, gc.kind()) {
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return Some(gc),
Kind::AwaitForward => {
if let Some(found) = find_call_node(gc, lang) {
return Some(found);
}
}
_ => {}
}
}
}
@ -108,9 +134,43 @@ pub(super) fn extract_destination_field_pairs(
raw
}
}),
// Computed keys like `[someVar]` can't be statically
// resolved, skip (conservative: not a destination field).
"computed_property_name" => continue,
// Computed keys: resolve only when the inner expression
// is a pure string literal (`['url']`). Dynamic forms
// (`[someVar]`, `[`url-${i}`]`, ``[`url`]`` with
// interpolation) stay conservative-skip.
"computed_property_name" => {
let mut inner_cursor = key_node.walk();
let inner = key_node.named_children(&mut inner_cursor).find(|c| {
!matches!(c.kind(), "comment" | "block_comment" | "line_comment")
});
match inner.map(|n| (n.kind(), n)) {
Some(("string" | "string_literal", n)) => text_of(n, code).map(|raw| {
if raw.len() >= 2 {
raw[1..raw.len() - 1].to_string()
} else {
raw
}
}),
// Template strings only when no interpolation
// (no `template_substitution` children).
Some(("template_string", n))
if {
let mut tc = n.walk();
!n.named_children(&mut tc)
.any(|c| c.kind() == "template_substitution")
} =>
{
text_of(n, code).map(|raw| {
if raw.len() >= 2 {
raw[1..raw.len() - 1].to_string()
} else {
raw
}
})
}
_ => continue,
}
}
_ => text_of(key_node, code),
};
let Some(key) = key_text else {
@ -144,6 +204,13 @@ pub(super) fn extract_destination_field_pairs(
/// `requests.post(url, data=tainted, json=safe)` where `data` and `json` are
/// `keyword_argument` siblings of the positional URL.
///
/// Also covers Ruby, where tree-sitter-ruby emits `pair` nodes (with
/// `key`/`value` fields) directly under `argument_list` for the
/// `Faraday.new(url: x)` / `Net::HTTP.start(host, port, proxy_addr: prx)`
/// kwarg shape. The `key` is typically a `hash_key_symbol` whose text is the
/// bare identifier (`url`); `simple_symbol` (`:url`) and string keys are
/// normalised by stripping a leading `:` or wrapping quotes.
///
/// Returns the union of matching kwargs, preserving the kwarg name in the
/// `field` slot so callers can still attribute findings per-field. Empty
/// when no matching kwargs exist or the call has no `arguments` field.
@ -162,22 +229,38 @@ pub(super) fn extract_destination_kwarg_pairs(
let mut cursor = args_node.walk();
for child in args_node.named_children(&mut cursor) {
let kind = child.kind();
if kind != "keyword_argument" && kind != "named_argument" {
let (name_node, value_node) = if kind == "keyword_argument" || kind == "named_argument" {
let named_count = child.named_child_count();
(
child
.child_by_field_name("name")
.or_else(|| child.named_child(0)),
child
.child_by_field_name("value")
.or_else(|| child.named_child(named_count.saturating_sub(1) as u32)),
)
} else if kind == "pair" {
// Ruby `pair` node sits directly under `argument_list` for
// kwarg-style call args (`f(url: x)`). `key`/`value` fields
// are populated; key text is `hash_key_symbol` ("url"),
// `simple_symbol` (":url"), or a string literal.
(
child.child_by_field_name("key"),
child.child_by_field_name("value"),
)
} else {
continue;
}
let named_count = child.named_child_count();
let name_node = child
.child_by_field_name("name")
.or_else(|| child.named_child(0));
let value_node = child
.child_by_field_name("value")
.or_else(|| child.named_child(named_count.saturating_sub(1) as u32));
};
let (Some(nn), Some(vn)) = (name_node, value_node) else {
continue;
};
let Some(name) = text_of(nn, code) else {
let Some(name_raw) = text_of(nn, code) else {
continue;
};
let name = name_raw
.trim_start_matches(':')
.trim_matches(['"', '\''])
.to_string();
if !fields.iter().any(|&f| f == name) {
continue;
}
@ -387,11 +470,9 @@ pub(super) fn extract_const_macro_arg(
// C/C++ identifier / PHP `name` node for define-style constants.
// Scoped C++ identifiers (`Curl::OPT_POSTFIELDS`) and PHP namespaced
// names also surface here so the dangerous_values match catches them.
"identifier" | "name" | "qualified_name" | "scoped_identifier" => {
text_of(arg, code).map(|s| s.to_string())
}
"identifier" | "name" | "qualified_name" | "scoped_identifier" => text_of(arg, code),
// Ruby bare constant (`NOENT`) — leaf form.
"constant" => text_of(arg, code).map(|s| s.to_string()),
"constant" => text_of(arg, code),
// Ruby scope-qualified constant (`Nokogiri::XML::ParseOptions::NOENT`).
// Return only the rightmost `name` segment so the gate's
// `dangerous_values` list can stay identifier-bare instead of
@ -400,8 +481,7 @@ pub(super) fn extract_const_macro_arg(
"scope_resolution" => arg
.child_by_field_name("name")
.and_then(|n| text_of(n, code))
.map(|s| s.to_string())
.or_else(|| text_of(arg, code).map(|s| s.to_string())),
.or_else(|| text_of(arg, code)),
// Integer literals at the activation arg position. PHP / C / C++
// commonly use plain `0` to opt into the safe-default option set
// (e.g. `simplexml_load_string($xml, "SimpleXMLElement", 0)`). The
@ -409,7 +489,7 @@ pub(super) fn extract_const_macro_arg(
// the literal text lets the comparison fail against `LIBXML_NOENT`
// and suppresses the conservative-fire branch.
"integer" | "integer_literal" | "number_literal" | "decimal_integer_literal" => {
text_of(arg, code).map(|s| s.to_string())
text_of(arg, code)
}
_ => None,
}
@ -443,7 +523,7 @@ pub(super) fn extract_const_keyword_arg(
// distinguish literal-safe from dynamic.
return match value_node.kind() {
"true" | "false" | "none" | "integer" | "float" | "string" | "string_literal"
| "identifier" => text_of(value_node, code).map(|s| s.to_string()),
| "identifier" => text_of(value_node, code),
_ => None,
}
.filter(|_| {
@ -537,7 +617,7 @@ pub(super) fn extract_object_arg_property(
let val_node = unwrap_parens(val_node);
return match val_node.kind() {
"true" | "false" | "null" | "undefined" | "number" | "string" | "string_literal" => {
text_of(val_node, code).map(|s| s.to_string())
text_of(val_node, code)
}
// JS booleans true/false are their own node kinds (above), but
// some grammar versions wrap them as identifier literals; surface
@ -811,7 +891,7 @@ pub(super) fn js_chain_outer_method_for_inner<'a>(
if inner_matched {
return function
.child_by_field_name("property")
.and_then(|p| text_of(p, code).map(|s| s.to_string()));
.and_then(|p| text_of(p, code));
}
}
// Recurse: outer chain may have more depth (`a.b().c().d()` ,
@ -1518,6 +1598,18 @@ pub(super) fn extract_arg_uses(call_node: Node, code: &[u8]) -> Vec<Vec<String>>
return result;
}
// Rust `tokio::join!` / `futures::join!` (and their `try_*` variants).
// tree-sitter-rust models macro args as a `token_tree` rather than an
// `arguments` field, so a vanilla extraction returns nothing. Walk the
// top-level token_tree splitting on `,` separators, lifting identifiers
// out of each chunk so the existing PromiseCombinator transfer can union
// arg-side taint into the resulting tuple value.
if call_node.kind() == "macro_invocation"
&& let Some(arg_uses) = extract_rust_macro_join_arg_uses(call_node, code)
{
return arg_uses;
}
let Some(args_node) = call_node.child_by_field_name("arguments") else {
return Vec::new();
};
@ -1551,6 +1643,82 @@ pub(super) fn extract_arg_uses(call_node: Node, code: &[u8]) -> Vec<Vec<String>>
result
}
/// `tokio::join!` / `futures::join!` (and their `try_*` variants) bundle
/// concurrently-awaited futures into a tuple result. tree-sitter-rust
/// represents the args as a `token_tree` whose children alternate between
/// expressions and `,` separators (`token_tree` itself nests on every
/// parenthesised group, e.g. the `(x)` inside `fetch(x)`). Walk the
/// top-level token_tree, segment by `,` leaves, and lift identifiers out
/// of each chunk so the SSA Call op carries one positional arg per future.
///
/// Returns `Some(arg_uses)` only when the macro is one of the recognised
/// join macros, so `extract_arg_uses` can fall through to its normal
/// `arguments`-field path for every other macro shape (`format!`,
/// `println!`, custom DSL macros) where arg lifting could disturb existing
/// label / SSA flow.
pub(super) fn extract_rust_macro_join_arg_uses(
call_node: Node,
code: &[u8],
) -> Option<Vec<Vec<String>>> {
let macro_node = call_node.child_by_field_name("macro")?;
let macro_text = text_of(macro_node, code)?;
if !is_rust_join_macro(&macro_text) {
return None;
}
let tt = match call_node.child_by_field_name("token_tree") {
Some(t) => t,
None => {
let mut cursor = call_node.walk();
call_node
.children(&mut cursor)
.find(|c| c.kind() == "token_tree")?
}
};
let mut chunks: Vec<Vec<Node>> = vec![Vec::new()];
let mut cursor = tt.walk();
for child in tt.children(&mut cursor) {
// Skip the surrounding `(`/`)` punctuation.
if !child.is_named() {
let kind = child.kind();
if kind == "," {
chunks.push(Vec::new());
continue;
}
if kind == "(" || kind == ")" {
continue;
}
}
chunks.last_mut().unwrap().push(child);
}
let mut result = Vec::new();
for chunk in chunks {
if chunk.is_empty() {
continue;
}
let mut idents = Vec::new();
let mut paths = Vec::new();
for n in chunk {
collect_idents_with_paths(n, code, &mut idents, &mut paths);
}
let mut combined = paths;
combined.extend(idents);
result.push(combined);
}
Some(result)
}
fn is_rust_join_macro(macro_text: &str) -> bool {
matches!(
macro_text,
"tokio::join"
| "tokio::try_join"
| "futures::join"
| "futures::try_join"
| "join"
| "try_join"
)
}
/// Extract keyword / named argument bindings for a call node.
///
/// Returns `Vec<(name, uses)>` where `uses` are the identifier references
@ -1891,11 +2059,31 @@ pub(super) fn call_ident_of<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Opti
.child_by_field_name("method")
.or_else(|| n.child_by_field_name("name"))
.and_then(|f| text_of(f, code));
let recv = n
let recv_node = n
.child_by_field_name("object")
.or_else(|| n.child_by_field_name("receiver"))
.or_else(|| n.child_by_field_name("scope"))
.and_then(|f| root_receiver_text(f, lang, code));
.or_else(|| n.child_by_field_name("scope"));
let recv = recv_node.and_then(|f| root_receiver_text(f, lang, code));
// Preserve Java `.getClass()` segment in the chained callee text
// so downstream predicates (e.g.
// [`crate::ssa::type_facts::is_safe_string_producing_callee`])
// can recognise idiomatic `obj.getClass().<accessor>()` chains.
// Without this, `root_receiver_text` collapses the chain to
// `obj.<accessor>`, indistinguishable from a user-defined method.
let recv = if lang == "java"
&& let Some(rn) = recv_node
&& lookup(lang, rn.kind()) == Kind::CallMethod
&& let Some(inner_method) = rn
.child_by_field_name("method")
.or_else(|| rn.child_by_field_name("name"))
.and_then(|f| text_of(f, code))
&& inner_method == "getClass"
&& let Some(r) = recv
{
Some(format!("{r}.getClass"))
} else {
recv
};
match (recv, func) {
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
(_, Some(f)) => Some(f),
@ -1984,7 +2172,7 @@ pub(super) fn extract_arg_string_literals(call_node: Node, code: &[u8]) -> Vec<O
| "integer"
| "number"
| "number_literal"
| "decimal_literal" => text_of(target, code).map(|s| s.to_string()),
| "decimal_literal" => text_of(target, code),
_ => None,
};
result.push(literal);
@ -2003,7 +2191,7 @@ pub(super) fn strip_literal_quotes(raw: &str, node: Node, code: &[u8]) -> Option
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
if child.kind() == "string_content" {
return text_of(child, code).map(|s| s.to_string());
return text_of(child, code);
}
}
if raw.len() >= 2 {
@ -2044,20 +2232,43 @@ pub(super) fn extract_arg_callees(call_node: Node, lang: &str, code: &[u8]) -> V
result
}
/// Return `(defines, uses)` for the AST fragment `ast`.
/// Returns (defines, uses, extra_defines) where extra_defines captures additional
/// bindings from destructuring patterns beyond the primary define.
/// Return `(defines, uses, extra_defines, array_pattern_indices,
/// rhs_array_elements)` for the AST fragment `ast`.
///
/// `extra_defines` captures additional bindings from destructuring patterns
/// beyond the primary define. `array_pattern_indices`, when non-empty, gives
/// the source-order position of each binding in `iter::once(defines).chain(
/// extra_defines)` for `array_pattern` / `tuple_pattern` LHS shapes. Empty
/// for non-array destructures and for non-skip array patterns where callers
/// can derive sequential 0..N indices implicitly.
///
/// `rhs_array_elements`, when non-empty, gives source-order RHS slots for
/// destructure-from-array-literal shapes (`const [a, b] = [safe, tainted]`,
/// `let (a, b) = (safe, tainted)`, Python `a, b = safe, tainted`). Each slot
/// is `Some(ident)` for a bare-ident element or `None` for a syntactic
/// literal. Empty when RHS isn't an array-literal shape or any element is
/// too complex; callers fall back to scalar union in that case.
#[allow(clippy::type_complexity)]
pub(super) fn def_use(
ast: Node,
lang: &str,
code: &[u8],
) -> (Option<String>, Vec<String>, Vec<String>) {
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
) -> (
Option<String>,
Vec<String>,
Vec<String>,
SmallVec<[usize; 4]>,
SmallVec<[crate::cfg::RhsArraySlot; 4]>,
) {
match lookup(lang, ast.kind()) {
// Declaration wrappers (let, var, short_var_declaration, etc.)
Kind::CallWrapper => {
let mut defs = None;
let mut extra_defs = Vec::new();
let mut uses = Vec::new();
let mut pattern_indices: SmallVec<[usize; 4]> = SmallVec::new();
let mut rhs_array_elements: SmallVec<[crate::cfg::RhsArraySlot; 4]> = SmallVec::new();
// Try direct field names first (Rust `let_declaration`, Go `short_var_declaration`)
let def_node = ast
@ -2076,17 +2287,30 @@ pub(super) fn def_use(
if def_node.is_some() || val_node.is_some() {
if let Some(pat) = def_node {
let mut idents = Vec::new();
let mut paths = Vec::new();
collect_idents_with_paths(pat, code, &mut idents, &mut paths);
let first = paths.pop().or_else(|| idents.first().cloned());
// Remaining idents are extra defines (for destructuring)
for ident in &idents {
if first.as_ref() != Some(ident) {
extra_defs.push(ident.clone());
let bindings = collect_array_pattern_bindings_indexed(pat, code);
if !bindings.is_empty() {
let mut iter = bindings.into_iter();
if let Some((first_name, first_idx)) = iter.next() {
defs = Some(first_name);
pattern_indices.push(first_idx);
}
for (name, idx) in iter {
extra_defs.push(name);
pattern_indices.push(idx);
}
} else {
let mut idents = Vec::new();
let mut paths = Vec::new();
collect_idents_with_paths(pat, code, &mut idents, &mut paths);
let first = paths.pop().or_else(|| idents.first().cloned());
// Remaining idents are extra defines (for destructuring)
for ident in &idents {
if first.as_ref() != Some(ident) {
extra_defs.push(ident.clone());
}
}
defs = first;
}
defs = first;
}
if let Some(val) = val_node {
let mut idents = Vec::new();
@ -2099,6 +2323,14 @@ pub(super) fn def_use(
// the format-string bytes, not as a separate AST
// argument node, so collect_idents misses it.
uses.extend(extract_rust_format_macro_named_idents_in(val, code));
// When the LHS is a recognised destructure pattern AND
// the RHS is a bare array-literal shape (no call), record
// per-element idents so the SSA destructure rewrite can
// map each binding to its specific RHS slot.
if !pattern_indices.is_empty() {
rhs_array_elements =
collect_rhs_array_literal_elements(val, lang, code, extra_labels);
}
}
} else {
// Try nested declarator pattern (JS/TS `lexical_declaration` → `variable_declarator`,
@ -2135,16 +2367,29 @@ pub(super) fn def_use(
if let Some(name_node) = child_name
&& defs.is_none()
{
let mut idents = Vec::new();
let mut paths = Vec::new();
collect_idents_with_paths(name_node, code, &mut idents, &mut paths);
let first = paths.pop().or_else(|| idents.first().cloned());
for ident in &idents {
if first.as_ref() != Some(ident) {
extra_defs.push(ident.clone());
let bindings = collect_array_pattern_bindings_indexed(name_node, code);
if !bindings.is_empty() {
let mut iter = bindings.into_iter();
if let Some((first_name, first_idx)) = iter.next() {
defs = Some(first_name);
pattern_indices.push(first_idx);
}
for (name, idx) in iter {
extra_defs.push(name);
pattern_indices.push(idx);
}
} else {
let mut idents = Vec::new();
let mut paths = Vec::new();
collect_idents_with_paths(name_node, code, &mut idents, &mut paths);
let first = paths.pop().or_else(|| idents.first().cloned());
for ident in &idents {
if first.as_ref() != Some(ident) {
extra_defs.push(ident.clone());
}
}
defs = first;
}
defs = first;
}
if let Some(val_node) = child_value {
let mut idents = Vec::new();
@ -2153,6 +2398,14 @@ pub(super) fn def_use(
uses.extend(paths);
uses.extend(idents);
uses.extend(extract_rust_format_macro_named_idents_in(val_node, code));
if !pattern_indices.is_empty() && rhs_array_elements.is_empty() {
rhs_array_elements = collect_rhs_array_literal_elements(
val_node,
lang,
code,
extra_labels,
);
}
}
}
}
@ -2168,19 +2421,42 @@ pub(super) fn def_use(
uses.extend(extract_rust_format_macro_named_idents_in(ast, code));
}
}
(defs, uses, extra_defs)
(defs, uses, extra_defs, pattern_indices, rhs_array_elements)
}
// Plain assignment `x = y`
// Plain assignment `x = y` or destructuring assignment such as
// Python `a, b = await asyncio.gather(...)` whose LHS surfaces as
// a `pattern_list` / `tuple_pattern`. When the LHS is a
// destructure pattern that the indexed helper recognises, the
// primary binding lands in `defs`, the rest land in `extra_defs`,
// and `pattern_indices` carries source-order positions so the
// SSA lowering's destructure-promise rewrite can paint each
// binding from the matching combinator argument.
Kind::Assignment => {
let mut defs = None;
let mut extra_defs = Vec::new();
let mut pattern_indices: SmallVec<[usize; 4]> = SmallVec::new();
let mut rhs_array_elements: SmallVec<[crate::cfg::RhsArraySlot; 4]> = SmallVec::new();
let mut uses = Vec::new();
if let Some(lhs) = ast.child_by_field_name("left") {
let mut idents = Vec::new();
let mut paths = Vec::new();
collect_idents_with_paths(lhs, code, &mut idents, &mut paths);
// Prefer dotted path (member expression) over last ident
defs = paths.pop().or_else(|| idents.pop());
let bindings = collect_array_pattern_bindings_indexed(lhs, code);
if !bindings.is_empty() {
let mut iter = bindings.into_iter();
if let Some((first_name, first_idx)) = iter.next() {
defs = Some(first_name);
pattern_indices.push(first_idx);
}
for (name, idx) in iter {
extra_defs.push(name);
pattern_indices.push(idx);
}
} else {
let mut idents = Vec::new();
let mut paths = Vec::new();
collect_idents_with_paths(lhs, code, &mut idents, &mut paths);
// Prefer dotted path (member expression) over last ident
defs = paths.pop().or_else(|| idents.pop());
}
}
if let Some(rhs) = ast.child_by_field_name("right") {
let mut idents = Vec::new();
@ -2189,8 +2465,16 @@ pub(super) fn def_use(
uses.extend(paths);
uses.extend(idents);
uses.extend(extract_rust_format_macro_named_idents_in(rhs, code));
// When the LHS is a recognised destructure pattern AND the
// RHS is a bare array-literal shape, record per-element
// idents so the SSA destructure rewrite can map each
// binding to its specific RHS slot.
if !pattern_indices.is_empty() {
rhs_array_elements =
collect_rhs_array_literal_elements(rhs, lang, code, extra_labels);
}
}
(defs, uses, vec![])
(defs, uses, extra_defs, pattern_indices, rhs_array_elements)
}
// iflet / whilelet, the `let_condition` binds a variable from
@ -2215,7 +2499,7 @@ pub(super) fn def_use(
if let Some(val) = c.child_by_field_name("value") {
collect_idents(val, code, &mut uses);
}
return (defs, uses, vec![]);
return (defs, uses, vec![], SmallVec::new(), SmallVec::new());
}
let mut idents = Vec::new();
@ -2223,7 +2507,7 @@ pub(super) fn def_use(
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
let mut uses = paths;
uses.extend(idents);
(None, uses, vec![])
(None, uses, vec![], SmallVec::new(), SmallVec::new())
}
// for-in / for-of / Python `for x in iter:` ─────────────────────────
@ -2267,7 +2551,7 @@ pub(super) fn def_use(
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
let mut uses = paths;
uses.extend(idents);
return (None, uses, vec![]);
return (None, uses, vec![], SmallVec::new(), SmallVec::new());
}
let mut defs: Option<String> = None;
@ -2293,7 +2577,7 @@ pub(super) fn def_use(
uses.extend(paths);
uses.extend(idents);
}
(defs, uses, extra_defs)
(defs, uses, extra_defs, SmallVec::new(), SmallVec::new())
}
// everything else no definition, but may read vars
@ -2303,7 +2587,7 @@ pub(super) fn def_use(
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
let mut uses = paths;
uses.extend(idents);
(None, uses, vec![])
(None, uses, vec![], SmallVec::new(), SmallVec::new())
}
}
}

File diff suppressed because it is too large Load diff

882
src/cfg/safe_fields.rs Normal file
View file

@ -0,0 +1,882 @@
//! Per-file extraction of class fields whose `.get(...)` lookups are
//! provably safe.
//!
//! Recognises Java `final` fields whose initializer is `Map.of(K1, V1,
//! K2, V2, ...)` with all string-literal arguments. At a downstream
//! `<FIELD>.get(taintedKey)` call the result is bounded to the literal
//! value set, so the SSA taint engine can suppress propagation from the
//! key to the result. Without this pre-pass the engine sees `<FIELD>`
//! as a free identifier with no SSA value, fails to resolve the
//! container, and falls back to default arg-to-result propagation.
//!
//! Strictly additive: unrecognised initializer shapes (factory chains,
//! `Map.ofEntries`, builders) produce no entry and the engine keeps
//! its prior behaviour.
use std::cell::RefCell;
use std::collections::HashMap;
use tree_sitter::Node;
use super::helpers::text_of;
thread_local! {
/// Per-file safe-lookup field map published by [`with_safe_lookup_fields`]
/// around taint passes that need it. The SSA taint engine's container
/// Load fallback consults this view via [`safe_lookup_field_values`] when
/// the receiver is a free identifier (no SSA value to resolve against).
static SAFE_LOOKUP_FIELDS_TLS: RefCell<Option<HashMap<String, Vec<String>>>> =
const { RefCell::new(None) };
}
/// Run `f` with `fields` published as the per-thread safe-lookup view.
/// Restores the prior value on drop so nested calls compose; pass `None`
/// to suppress the gate for callers that lack a file context.
pub fn with_safe_lookup_fields<R>(
fields: Option<&HashMap<String, Vec<String>>>,
f: impl FnOnce() -> R,
) -> R {
let prev = SAFE_LOOKUP_FIELDS_TLS.with(|cell| {
cell.borrow_mut()
.replace(fields.cloned().unwrap_or_default())
});
let restore_to = if fields.is_some() { prev } else { None };
struct Guard(Option<HashMap<String, Vec<String>>>);
impl Drop for Guard {
fn drop(&mut self) {
SAFE_LOOKUP_FIELDS_TLS.with(|cell| *cell.borrow_mut() = self.0.take());
}
}
let _guard = Guard(restore_to);
f()
}
/// Look up the literal value set for a safe field. Returns `None` when
/// no view is published, the field is not a known safe lookup, or the
/// value list is empty.
pub fn safe_lookup_field_values(name: &str) -> Option<Vec<String>> {
SAFE_LOOKUP_FIELDS_TLS.with(|cell| {
let borrowed = cell.borrow();
let map = borrowed.as_ref()?;
let values = map.get(name)?;
if values.is_empty() {
None
} else {
Some(values.clone())
}
})
}
/// Per-file safe-lookup field map: field name → finite set of literal
/// values that `<field>.get(...)` may return. Empty for non-Java files.
pub fn collect_safe_lookup_fields(
root: Node<'_>,
lang: &str,
code: &[u8],
) -> HashMap<String, Vec<String>> {
let mut out: HashMap<String, Vec<String>> = HashMap::new();
if lang == "java" {
collect_java(root, code, &mut out);
}
out
}
/// Per-file file-level constant scalar map: name → literal value text.
///
/// Recognises declarations that bind a name to a primitive scalar literal at
/// file or class scope, where the per-function SSA const-prop has no view of
/// the binding (the name is a free identifier from inside any function body):
///
/// - Java: `static final TYPE NAME = LITERAL;` fields (any class depth).
/// - Python: `NAME = LITERAL` at module scope.
/// - Go: `const NAME = LITERAL` and `const NAME TYPE = LITERAL` at package scope.
/// - Rust: `const NAME: TYPE = LITERAL;` and `static NAME: TYPE = LITERAL;` at
/// crate or module scope.
///
/// Used by `cfg_analysis::guards` to suppress `cfg-unguarded-sink` when a
/// sink's argument is one of these bindings. `LITERAL` covers strings (no
/// interpolation), integers in any supported base, floats, booleans, null /
/// nil / None, and unary negation / not over those.
///
/// Empty for unsupported languages. Scalar means single-value, not
/// container; the `Map.of(...)` form is captured by
/// [`collect_safe_lookup_fields`].
pub fn collect_class_constant_scalars(
root: Node<'_>,
lang: &str,
code: &[u8],
) -> HashMap<String, String> {
let mut out: HashMap<String, String> = HashMap::new();
match lang {
"java" => collect_java_constant_scalars(root, code, &mut out),
"python" => collect_python_constant_scalars(root, code, &mut out),
"go" => collect_go_constant_scalars(root, code, &mut out),
"rust" => collect_rust_constant_scalars(root, code, &mut out),
_ => {}
}
out
}
fn collect_java_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
walk(root, &mut |node| {
if node.kind() != "field_declaration" {
return;
}
if !has_static_modifier(node) || !has_final_modifier(node) {
return;
}
// A single `field_declaration` may carry multiple
// `variable_declarator` children (`static final int A = 1, B = 2;`).
// Iterate every declarator field; tree-sitter exposes them under
// the `declarator` field name as repeated entries.
let mut cursor = node.walk();
for child in node.children_by_field_name("declarator", &mut cursor) {
let Some(name_node) = child.child_by_field_name("name") else {
continue;
};
let Some(field_name) = text_of(name_node, code) else {
continue;
};
let Some(value_node) = child.child_by_field_name("value") else {
continue;
};
let Some(literal) = scalar_literal_text(value_node, code) else {
continue;
};
out.insert(field_name, literal);
}
});
}
/// Python: module-level `NAME = LITERAL` assignments. Only top-level
/// expression statements are considered; assignments inside function bodies,
/// class bodies, or other blocks are out of scope (a per-function SSA pass
/// already sees those).
fn collect_python_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
if root.kind() != "module" {
return;
}
let mut cursor = root.walk();
for child in root.named_children(&mut cursor) {
if child.kind() != "expression_statement" {
continue;
}
let Some(assign) = child.named_child(0) else {
continue;
};
if assign.kind() != "assignment" {
continue;
}
let Some(target) = assign.child_by_field_name("left") else {
continue;
};
if target.kind() != "identifier" {
continue;
}
let Some(name) = text_of(target, code) else {
continue;
};
let Some(value) = assign.child_by_field_name("right") else {
continue;
};
let Some(literal) = python_scalar_literal_text(value, code) else {
continue;
};
out.insert(name, literal);
}
}
/// Go: package-level `const NAME = LITERAL` and `const NAME TYPE = LITERAL`,
/// including the grouped `const (...)` form. Iterates direct
/// `const_declaration` children of the source file, then per-`const_spec`
/// reads the `name` list and `value` expression list, binding by position.
fn collect_go_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
if root.kind() != "source_file" {
return;
}
let mut cursor = root.walk();
for child in root.named_children(&mut cursor) {
if child.kind() != "const_declaration" {
continue;
}
let mut spec_cursor = child.walk();
for spec in child.named_children(&mut spec_cursor) {
if spec.kind() != "const_spec" {
continue;
}
collect_go_const_spec(spec, code, out);
}
}
}
fn collect_go_const_spec(spec: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
// tree-sitter-go `const_spec`:
// name: <identifier> (repeated) — one or more identifiers
// value: <expression_list> — list of value expressions
// For a multi-target spec `const A, B = 1, 2`, identifiers and values pair
// up positionally. The simpler single-target form parses the same way
// with one entry per side.
let mut name_cursor = spec.walk();
let names: Vec<Node<'_>> = spec
.children_by_field_name("name", &mut name_cursor)
.collect();
if names.is_empty() {
return;
}
let Some(value_list) = spec.child_by_field_name("value") else {
return;
};
let mut value_cursor = value_list.walk();
let values: Vec<Node<'_>> = value_list.named_children(&mut value_cursor).collect();
if values.len() != names.len() {
return;
}
for (name_node, value_node) in names.iter().zip(values.iter()) {
if name_node.kind() != "identifier" {
continue;
}
let Some(name) = text_of(*name_node, code) else {
continue;
};
let Some(literal) = go_scalar_literal_text(*value_node, code) else {
continue;
};
out.insert(name, literal);
}
}
/// Rust: module-level `const NAME: TYPE = LITERAL;` and `static NAME: TYPE =
/// LITERAL;`. Only direct children of `source_file` participate so a `const`
/// defined inside a function body does not bleed across scopes.
fn collect_rust_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
if root.kind() != "source_file" {
return;
}
let mut cursor = root.walk();
for child in root.named_children(&mut cursor) {
if !matches!(child.kind(), "const_item" | "static_item") {
continue;
}
let Some(name_node) = child.child_by_field_name("name") else {
continue;
};
let Some(name) = text_of(name_node, code) else {
continue;
};
let Some(value_node) = child.child_by_field_name("value") else {
continue;
};
let Some(literal) = rust_scalar_literal_text(value_node, code) else {
continue;
};
out.insert(name, literal);
}
}
/// `true` when `field_declaration` carries a `static` modifier.
fn has_static_modifier(field_decl: Node<'_>) -> bool {
let mut cursor = field_decl.walk();
for child in field_decl.children(&mut cursor) {
if child.kind() != "modifiers" {
continue;
}
let mut sub = child.walk();
for mod_child in child.children(&mut sub) {
if mod_child.kind() == "static" {
return true;
}
}
}
false
}
/// Return the source text when `value` is a primitive scalar literal node.
/// Covers the Java grammar's literal kinds. Returns `None` for compound
/// expressions, identifier references, method invocations, and other
/// non-literal initializers.
fn scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
match value.kind() {
"string_literal"
| "decimal_integer_literal"
| "hex_integer_literal"
| "octal_integer_literal"
| "binary_integer_literal"
| "decimal_floating_point_literal"
| "hex_floating_point_literal"
| "character_literal"
| "true"
| "false"
| "null_literal" => text_of(value, code),
// Unary `-1`, `+0`, `!true` over a literal child still resolve to a
// compile-time constant; recurse into the operand.
"unary_expression" => {
let operand = value.child_by_field_name("operand")?;
scalar_literal_text(operand, code)
}
_ => None,
}
}
/// Python scalar literal classifier. Rejects f-strings with interpolation
/// (`f"x{var}"` parses as `string` with an `interpolation` child); returns
/// the source text otherwise.
fn python_scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
match value.kind() {
"string" => {
if python_string_has_interpolation(value) {
None
} else {
text_of(value, code)
}
}
"integer" | "float" | "true" | "false" | "none" => text_of(value, code),
"unary_operator" => {
let operand = value.child_by_field_name("argument")?;
python_scalar_literal_text(operand, code)
}
_ => None,
}
}
fn python_string_has_interpolation(node: Node<'_>) -> bool {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "interpolation" {
return true;
}
}
false
}
/// Go scalar literal classifier. `interpreted_string_literal` and
/// `raw_string_literal` cover both `"x"` and `` `x` `` forms.
fn go_scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
match value.kind() {
"interpreted_string_literal"
| "raw_string_literal"
| "int_literal"
| "float_literal"
| "imaginary_literal"
| "rune_literal"
| "true"
| "false"
| "nil" => text_of(value, code),
"unary_expression" => {
let operand = value.child_by_field_name("operand")?;
go_scalar_literal_text(operand, code)
}
_ => None,
}
}
/// Rust scalar literal classifier. Accepts `string_literal`, `raw_string_literal`
/// (both unwrappable to a single text run), integer / float / boolean / char.
fn rust_scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
match value.kind() {
"string_literal" | "raw_string_literal" | "integer_literal" | "float_literal"
| "char_literal" | "boolean_literal" => text_of(value, code),
// `true` / `false` are leaf identifier-ish nodes in some grammars but
// tree-sitter-rust gives them the `boolean_literal` kind; defensively
// accept the leaf form too in case the grammar is upgraded.
"true" | "false" => text_of(value, code),
"unary_expression" => {
let mut cursor = value.walk();
value
.named_children(&mut cursor)
.find_map(|c| rust_scalar_literal_text(c, code))
}
_ => None,
}
}
fn collect_java(root: Node<'_>, code: &[u8], out: &mut HashMap<String, Vec<String>>) {
walk(root, &mut |node| {
if node.kind() != "field_declaration" {
return;
}
if !has_final_modifier(node) {
return;
}
let Some(decl) = node.child_by_field_name("declarator") else {
return;
};
let Some(name_node) = decl.child_by_field_name("name") else {
return;
};
let Some(field_name) = text_of(name_node, code) else {
return;
};
let Some(value_node) = decl.child_by_field_name("value") else {
return;
};
let Some(values) = extract_map_of_literal_values(value_node, code) else {
return;
};
out.insert(field_name, values);
});
}
/// `true` when `field_declaration` carries a `final` modifier (static or
/// instance — both block reassignment after construction).
fn has_final_modifier(field_decl: Node<'_>) -> bool {
let mut cursor = field_decl.walk();
for child in field_decl.children(&mut cursor) {
if child.kind() != "modifiers" {
continue;
}
let mut sub = child.walk();
for mod_child in child.children(&mut sub) {
if mod_child.kind() == "final" {
return true;
}
}
}
false
}
/// If `value_node` is `Map.of(LIT, LIT, LIT, LIT, ...)` with at least one
/// key/value pair and every argument a `string_literal`, return the
/// value-position literals (positions 1, 3, 5, ...).
fn extract_map_of_literal_values(value_node: Node<'_>, code: &[u8]) -> Option<Vec<String>> {
if value_node.kind() != "method_invocation" {
return None;
}
let object_node = value_node.child_by_field_name("object")?;
let method_node = value_node.child_by_field_name("name")?;
let method_text = text_of(method_node, code)?;
if method_text != "of" {
return None;
}
if !receiver_is_map_class(object_node, code) {
return None;
}
let args_node = value_node.child_by_field_name("arguments")?;
let mut cursor = args_node.walk();
let args: Vec<Node<'_>> = args_node.named_children(&mut cursor).collect();
if args.is_empty() || !args.len().is_multiple_of(2) {
return None;
}
let mut values = Vec::with_capacity(args.len() / 2);
for (i, arg) in args.iter().enumerate() {
if arg.kind() != "string_literal" {
return None;
}
if i % 2 == 1 {
let literal = string_literal_value(*arg, code)?;
values.push(literal);
}
}
Some(values)
}
/// `true` when `node` resolves to the `Map` class — either the bare
/// identifier `Map` or a `field_access` whose tail segment is `Map`
/// (covers `java.util.Map.of(...)`).
fn receiver_is_map_class(node: Node<'_>, code: &[u8]) -> bool {
match node.kind() {
"identifier" => text_of(node, code).as_deref() == Some("Map"),
"field_access" => {
// tail segment lives on the `field` field
let Some(field) = node.child_by_field_name("field") else {
return false;
};
text_of(field, code).as_deref() == Some("Map")
}
_ => false,
}
}
/// Extract the inner content of a Java `string_literal` node. The
/// grammar wraps the value in `string_fragment` children between quote
/// tokens; concatenate every `string_fragment` so escaped quotes inside
/// the literal are not lost. Returns `None` for literals containing
/// interpolation / escape-sequence children that do not classify as a
/// pure string fragment.
fn string_literal_value(node: Node<'_>, code: &[u8]) -> Option<String> {
let mut cursor = node.walk();
let mut out = String::new();
let mut saw_fragment = false;
for child in node.named_children(&mut cursor) {
match child.kind() {
"string_fragment" => {
saw_fragment = true;
out.push_str(&text_of(child, code)?);
}
"escape_sequence" => {
// A real escape sequence keeps the literal pure-string but
// we cannot trivially decode it; return None to be
// conservative on header-injection safety.
return None;
}
_ => return None,
}
}
if saw_fragment {
Some(out)
} else {
// Empty literal `""` — has no `string_fragment` children but is
// a valid empty string.
let raw = text_of(node, code)?;
if raw == "\"\"" {
Some(String::new())
} else {
None
}
}
}
fn walk<'a, F: FnMut(Node<'a>)>(node: Node<'a>, f: &mut F) {
f(node);
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
walk(child, f);
}
}
#[cfg(test)]
mod tests {
use super::*;
use tree_sitter::Parser;
fn collect(src: &str) -> HashMap<String, Vec<String>> {
let mut p = Parser::new();
p.set_language(&tree_sitter_java::LANGUAGE.into()).unwrap();
let tree = p.parse(src, None).unwrap();
collect_safe_lookup_fields(tree.root_node(), "java", src.as_bytes())
}
#[test]
fn static_final_map_of_two_pairs() {
let src = r#"
class C {
private static final java.util.Map<String, String> T = Map.of(
"a", "x", "b", "y"
);
}
"#;
let out = collect(src);
assert_eq!(out.get("T"), Some(&vec!["x".to_string(), "y".to_string()]));
}
#[test]
fn instance_final_map_of_one_pair() {
let src = r#"
class C {
private final java.util.Map<String, String> T = Map.of("a", "x");
}
"#;
let out = collect(src);
assert_eq!(out.get("T"), Some(&vec!["x".to_string()]));
}
#[test]
fn rejects_non_final_field() {
let src = r#"
class C {
private static java.util.Map<String, String> T = Map.of("a", "x");
}
"#;
let out = collect(src);
assert!(out.is_empty());
}
#[test]
fn rejects_non_literal_value() {
let src = r#"
class C {
private static final String SAFE = "x";
private static final java.util.Map<String, String> T = Map.of("a", SAFE);
}
"#;
let out = collect(src);
// SAFE is an identifier, not a string_literal — even though const-
// foldable, the syntactic check rejects to stay simple.
assert!(!out.contains_key("T"));
}
#[test]
fn rejects_odd_arg_count() {
// Compiler would reject this too, but the extractor must not panic.
let src = r#"
class C {
private static final java.util.Map<String, String> T = Map.of("a", "x", "b");
}
"#;
let out = collect(src);
assert!(out.is_empty());
}
#[test]
fn rejects_empty_map_of() {
let src = r#"
class C {
private static final java.util.Map<String, String> T = Map.of();
}
"#;
let out = collect(src);
assert!(out.is_empty());
}
#[test]
fn fully_qualified_map_of() {
let src = r#"
class C {
private static final java.util.Map<String, String> T = java.util.Map.of(
"a", "x", "b", "y"
);
}
"#;
let out = collect(src);
assert_eq!(out.get("T"), Some(&vec!["x".to_string(), "y".to_string()]));
}
#[test]
fn rejects_escape_sequence_value() {
let src = r#"
class C {
private static final java.util.Map<String, String> T = Map.of(
"a", "with\nnewline"
);
}
"#;
let out = collect(src);
// `\n` would smuggle a CRLF-style metachar through the static
// gate; conservative reject keeps header-injection suppression
// honest.
assert!(!out.contains_key("T"));
}
#[test]
fn ignores_non_java_lang() {
let src = "const x = 1;";
let mut p = Parser::new();
p.set_language(&tree_sitter_javascript::LANGUAGE.into())
.unwrap();
let tree = p.parse(src, None).unwrap();
let out = collect_safe_lookup_fields(tree.root_node(), "javascript", src.as_bytes());
assert!(out.is_empty());
}
fn collect_consts(src: &str) -> HashMap<String, String> {
let mut p = Parser::new();
p.set_language(&tree_sitter_java::LANGUAGE.into()).unwrap();
let tree = p.parse(src, None).unwrap();
collect_class_constant_scalars(tree.root_node(), "java", src.as_bytes())
}
#[test]
fn class_constants_capture_string_int_bool() {
let src = r#"
class C {
private static final String DRIVER = "com.mysql.cj.jdbc.Driver";
public static final int LIMIT = 100;
static final boolean DEBUG = false;
}
"#;
let out = collect_consts(src);
assert_eq!(
out.get("DRIVER"),
Some(&"\"com.mysql.cj.jdbc.Driver\"".to_string())
);
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
assert_eq!(out.get("DEBUG"), Some(&"false".to_string()));
}
#[test]
fn class_constants_capture_multi_declarator() {
let src = r#"
class C {
private static final int A = 1, B = 2, C2 = 3;
}
"#;
let out = collect_consts(src);
assert_eq!(out.get("A"), Some(&"1".to_string()));
assert_eq!(out.get("B"), Some(&"2".to_string()));
assert_eq!(out.get("C2"), Some(&"3".to_string()));
}
#[test]
fn class_constants_capture_unary_negation() {
let src = r#"
class C {
private static final int OFFSET = -1;
}
"#;
let out = collect_consts(src);
// text_of returns the operand text, not the wrapper text.
assert_eq!(out.get("OFFSET"), Some(&"1".to_string()));
}
#[test]
fn class_constants_reject_non_static() {
let src = r#"
class C {
private final String NAME = "x";
}
"#;
let out = collect_consts(src);
assert!(!out.contains_key("NAME"));
}
#[test]
fn class_constants_reject_non_final() {
let src = r#"
class C {
private static String NAME = "x";
}
"#;
let out = collect_consts(src);
assert!(!out.contains_key("NAME"));
}
#[test]
fn class_constants_reject_identifier_value() {
let src = r#"
class C {
private static final String OTHER = computed();
private static final String COPY = OTHER;
}
"#;
let out = collect_consts(src);
assert!(!out.contains_key("OTHER"));
assert!(!out.contains_key("COPY"));
}
#[test]
fn class_constants_capture_inside_inner_class() {
let src = r#"
class Outer {
static class Inner {
private static final String DRIVER = "x";
}
}
"#;
let out = collect_consts(src);
assert_eq!(out.get("DRIVER"), Some(&"\"x\"".to_string()));
}
#[test]
fn class_constants_ignore_non_supported_lang() {
let src = "const x = 1;";
let mut p = Parser::new();
p.set_language(&tree_sitter_javascript::LANGUAGE.into())
.unwrap();
let tree = p.parse(src, None).unwrap();
let out = collect_class_constant_scalars(tree.root_node(), "javascript", src.as_bytes());
assert!(out.is_empty());
}
fn collect_consts_lang(src: &str, lang: &str) -> HashMap<String, String> {
let mut p = Parser::new();
match lang {
"python" => p
.set_language(&tree_sitter_python::LANGUAGE.into())
.unwrap(),
"go" => p.set_language(&tree_sitter_go::LANGUAGE.into()).unwrap(),
"rust" => p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap(),
_ => unreachable!("unsupported lang in test helper: {lang}"),
};
let tree = p.parse(src, None).unwrap();
collect_class_constant_scalars(tree.root_node(), lang, src.as_bytes())
}
#[test]
fn python_module_constants_capture_scalars() {
let src = "DRIVER = \"sqlite3\"\nLIMIT = 100\nDEBUG = False\nNAME = None\n";
let out = collect_consts_lang(src, "python");
assert_eq!(out.get("DRIVER"), Some(&"\"sqlite3\"".to_string()));
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
assert_eq!(out.get("DEBUG"), Some(&"False".to_string()));
assert_eq!(out.get("NAME"), Some(&"None".to_string()));
}
#[test]
fn python_module_constants_capture_unary_negation() {
// The recogniser recurses into the operand and returns its text, so
// `OFFSET = -1` stores `"1"`. The downstream suppression consumer
// only cares about name binding, not the decoded numeric value.
let src = "OFFSET = -1\n";
let out = collect_consts_lang(src, "python");
assert_eq!(out.get("OFFSET"), Some(&"1".to_string()));
}
#[test]
fn python_module_constants_reject_fstring_with_interpolation() {
let src = "import os\nVAR = f\"hi {os.getcwd()}\"\n";
let out = collect_consts_lang(src, "python");
assert!(!out.contains_key("VAR"));
}
#[test]
fn python_module_constants_reject_call_value() {
let src = "from os import getcwd\nPATH = getcwd()\n";
let out = collect_consts_lang(src, "python");
assert!(!out.contains_key("PATH"));
}
#[test]
fn python_module_constants_skip_inside_function_body() {
// An assignment inside a function body is per-function SSA's job.
// Only top-level module assignments should land in the map.
let src = "def f():\n INNER = \"x\"\n return INNER\n";
let out = collect_consts_lang(src, "python");
assert!(!out.contains_key("INNER"));
}
#[test]
fn go_package_constants_capture_scalars() {
let src =
"package main\nconst DRIVER = \"postgres\"\nconst LIMIT = 100\nconst FLAG = true\n";
let out = collect_consts_lang(src, "go");
assert_eq!(out.get("DRIVER"), Some(&"\"postgres\"".to_string()));
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
assert_eq!(out.get("FLAG"), Some(&"true".to_string()));
}
#[test]
fn go_package_constants_capture_grouped_const_block() {
let src = "package main\nconst (\n A = \"x\"\n B int = 42\n C = false\n)\n";
let out = collect_consts_lang(src, "go");
assert_eq!(out.get("A"), Some(&"\"x\"".to_string()));
assert_eq!(out.get("B"), Some(&"42".to_string()));
assert_eq!(out.get("C"), Some(&"false".to_string()));
}
#[test]
fn go_package_constants_reject_non_literal() {
let src = "package main\nconst OTHER = foo()\n";
let out = collect_consts_lang(src, "go");
assert!(!out.contains_key("OTHER"));
}
#[test]
fn go_package_constants_skip_inside_function_body() {
// `const` inside a function body is per-function SSA's territory.
let src = "package main\nfunc f() string { const INNER = \"x\"; return INNER }\n";
let out = collect_consts_lang(src, "go");
assert!(!out.contains_key("INNER"));
}
#[test]
fn rust_module_consts_capture_scalars() {
let src = "const DRIVER: &str = \"sqlite\";\nconst LIMIT: i32 = 100;\nstatic FLAG: bool = false;\n";
let out = collect_consts_lang(src, "rust");
assert_eq!(out.get("DRIVER"), Some(&"\"sqlite\"".to_string()));
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
assert_eq!(out.get("FLAG"), Some(&"false".to_string()));
}
#[test]
fn rust_module_consts_reject_non_literal() {
let src = "const VAL: i32 = some_func();\n";
let out = collect_consts_lang(src, "rust");
assert!(!out.contains_key("VAL"));
}
#[test]
fn rust_module_consts_skip_inside_function_body() {
let src = "fn f() -> &'static str { const INNER: &str = \"x\"; INNER }\n";
let out = collect_consts_lang(src, "rust");
assert!(!out.contains_key("INNER"));
}
}

File diff suppressed because it is too large Load diff

View file

@ -208,6 +208,13 @@ pub struct AnalysisContext<'a> {
/// in a callback the per-body CFG can't observe. When `None`, no
/// closure-based suppression is applied.
pub closure_released_var_names: Option<&'a std::collections::HashSet<String>>,
/// Class-level constant scalars discovered for this file, keyed by
/// the unqualified field name (Java `static final TYPE NAME = LIT;`).
/// Used by `cfg_analysis::guards` to treat identifiers referencing
/// these fields as compile-time constants for the
/// `cfg-unguarded-sink` all-args-constant check. `None` outside Java
/// and on call sites that have not threaded the map through.
pub class_constant_scalars: Option<&'a std::collections::HashMap<String, String>>,
}
pub trait CfgAnalysis {

View file

@ -10,6 +10,43 @@ use std::collections::HashSet;
pub struct ResourceMisuse;
/// Distinguishes `obj.connect("event-name", handler)` event-handler
/// registrations from real database-connection acquires.
///
/// Recognises the canonical handler shape: a string-literal first arg
/// that does not look like a URL (`scheme://`), plus a second positional
/// argument that resolves to a single identifier (the callable being
/// registered). SQLAlchemy `engine.connect()` and `sqlite3.connect(
/// "path.db")` either pass zero args or a single string, so they fall
/// through and the leak check still fires.
///
/// Kept out of the static `exclude_acquire` list because that list is
/// callee-substring-only; this check needs to read argument shape from
/// the call node.
fn is_event_handler_register_shape(info: &crate::cfg::NodeInfo) -> bool {
let Some(first_literal) = info
.call
.arg_string_literals
.first()
.and_then(|x| x.as_ref())
else {
return false;
};
if first_literal.contains("://") {
return false;
}
let Some(second_uses) = info.call.arg_uses.get(1) else {
return false;
};
// A bare identifier (`callback`) lands as `["callback"]`; a
// member-access ref (`self._on_status`) lands as `["self",
// "_on_status"]`. Both are valid handler shapes. Real DB connects
// either have no second positional or pass a non-ident value
// (string literal for `connect("user", "pass", ...)`), which lands
// as an empty `arg_uses[1]`.
!second_uses.is_empty()
}
/// Find nodes matching acquire patterns for a given resource pair,
/// excluding any that match `exclude_patterns`.
fn find_acquire_nodes(
@ -517,6 +554,21 @@ impl CfgAnalysis for ResourceMisuse {
if ctx.cfg[acquire].managed_resource {
continue;
}
// Suppress `obj.connect("event-name", callback)` event-
// handler registrations that share the `connect` /
// `cursor` callee suffix with real DB acquires. Sphinx
// app.connect("config-inited", on_init), Flask blueprint
// handlers, and MQTT client.connect("topic", on_msg) all
// pass a string literal event name plus a callable
// identifier; SQLAlchemy `engine.connect()` and
// `sqlite3.connect("path.db")` either have no args or a
// single string arg. Gated on the `db connection`
// resource name so file/socket/mutex pairs are untouched.
if pair.resource_name == "db connection"
&& is_event_handler_register_shape(&ctx.cfg[acquire])
{
continue;
}
// SAFE-FOR-FIELD-LHS (Go only): skip member-expression
// LHS acquires. `b.cpuprof = os.Create(...)` transfers
// ownership to the containing struct; closure
@ -598,3 +650,83 @@ impl CfgAnalysis for ResourceMisuse {
findings
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cfg::{CallMeta, NodeInfo, StmtKind};
fn call_node(arg_string_literals: Vec<Option<String>>, arg_uses: Vec<Vec<String>>) -> NodeInfo {
NodeInfo {
kind: StmtKind::Call,
call: CallMeta {
callee: Some("obj.connect".into()),
arg_string_literals,
arg_uses,
..Default::default()
},
..Default::default()
}
}
#[test]
fn event_handler_shape_recognises_sphinx_connect() {
// app.connect("config-inited", _on_init)
let info = call_node(
vec![Some("config-inited".into()), None],
vec![vec![], vec!["_on_init".into()]],
);
assert!(is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_recognises_self_method_callback() {
// client.connect("device/+", self._on_status)
let info = call_node(
vec![Some("device/+".into()), None],
vec![vec![], vec!["self".into(), "_on_status".into()]],
);
assert!(is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_rejects_url_first_arg() {
// engine.connect("postgres://localhost/mydb")
let info = call_node(vec![Some("postgres://localhost/mydb".into())], vec![vec![]]);
assert!(!is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_rejects_oracle_string_args() {
// cx_Oracle.connect("user", "pass", "dsn") -- arg1 is a literal,
// no identifier in `arg_uses[1]`.
let info = call_node(
vec![Some("user".into()), Some("pass".into()), Some("dsn".into())],
vec![vec![], vec![], vec![]],
);
assert!(!is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_rejects_no_args() {
// engine.connect()
let info = call_node(vec![], vec![]);
assert!(!is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_rejects_single_string_arg() {
// sqlite3.connect("path.db")
let info = call_node(vec![Some("path.db".into())], vec![vec![]]);
assert!(!is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_rejects_ident_first_arg() {
// signal.connect(receiver_func, sender=...) -- handled by the
// static exclude list `signal.connect`, but the shape check
// should also gate it out: first arg is not a string literal.
let info = call_node(vec![None], vec![vec!["receiver_func".into()]]);
assert!(!is_event_handler_register_shape(&info));
}
}

View file

@ -35,6 +35,7 @@ fn parse_and_analyse<A: CfgAnalysis>(
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
analysis.run(&ctx)
}
@ -65,6 +66,7 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
run_all(&ctx)
}
@ -100,6 +102,7 @@ fn parse_and_run_all_with_taint(
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
run_all(&ctx)
}
@ -219,6 +222,7 @@ fn parse_and_analyse_with_ssa<A: CfgAnalysis>(
type_facts: facts.as_ref().map(|f| &f.type_facts),
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
analysis.run(&ctx)
}
@ -1235,6 +1239,7 @@ fn config_sanitizer_suppresses_unguarded_sink() {
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
let findings = run_all(&ctx);
@ -1715,6 +1720,7 @@ fn cfg_only_no_taint_produces_low_severity() {
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
let findings = guards::UnguardedSink.run(&ctx);

View file

@ -215,7 +215,7 @@ fn print_label_row(r: &RuleInfo) {
String::new()
} else {
let joined = r.matchers.join(", ");
format!(" {joined}")
format!(" {joined}")
};
println!(
" {} {:<10} {:<10} {:<14}{}{}",

View file

@ -245,6 +245,25 @@ pub(crate) fn ensure_framework_ctx(root: &Path, cfg: &Config) -> Option<Config>
Some(c)
}
/// Build a [`crate::resolve::ModuleGraph`] for `root` and stash it on a
/// clone of `cfg`. Returns `None` when the cfg already carries one or
/// when the build produced an empty graph.
///
/// Mirrors `ensure_framework_ctx`'s lifecycle: scan-path entry points
/// call this once between the file walk and pass 1, the graph is shared
/// across all per-file analysis via `Config::module_graph`. Building is
/// best-effort, errors during fs walk land as missing entries rather
/// than aborts.
pub(crate) fn ensure_module_graph(root: &Path, cfg: &Config) -> Option<Config> {
if cfg.module_graph.is_some() {
return None;
}
let graph = crate::resolve::build_module_graph(&[root.to_path_buf()]);
let mut c = cfg.clone();
c.module_graph = Some(std::sync::Arc::new(graph));
Some(c)
}
/// Does `path` belong to a Preview-tier language (C or C++)?
///
/// Drives the one-time `preview-tier scan` banner in `handle()`. Tracks
@ -1085,6 +1104,7 @@ fn run_topo_batches(
.collect();
let mut ssa_count: usize = 0;
let mg = cfg.module_graph.as_deref();
for (path, diags, summaries, ssa_summaries, _ssa_bodies) in batch_results {
// Phase-B: replace (not append) this file's diags
// so the cache always reflects the latest
@ -1093,7 +1113,7 @@ fn run_topo_batches(
diags_by_file.insert(path, diags);
for s in summaries {
let key = s.func_key(root_str_ref);
let key = s.func_key_with_resolver(root_str_ref, mg);
global_summaries.insert(key, s);
}
@ -1143,7 +1163,7 @@ fn run_topo_batches(
.iter()
.filter(|p| {
let abs = p.to_string_lossy();
let rel = crate::symbol::normalize_namespace(&abs, root_str_ref);
let rel = crate::symbol::namespace_with_package(&abs, root_str_ref, mg);
namespaces_needing_reanalysis.contains(&rel)
})
.map(|p| (*p).clone())
@ -1182,7 +1202,7 @@ fn run_topo_batches(
batch = batch_idx,
dirty = dirty_files.len(),
"SCC converged by snapshot but dirty_files non-empty; \
call graph disagrees with summary diff accepting \
call graph disagrees with summary diff, accepting \
snapshot as authoritative"
);
converged = true;
@ -1230,7 +1250,7 @@ fn run_topo_batches(
cap = scc_cap,
cross_file = cross_file_scc,
reason = reason.tag(),
"SCC batch did not converge within safety cap results \
"SCC batch did not converge within safety cap, results \
may be imprecise. This usually indicates a very large \
mutually-recursive region or a non-monotone summary \
refinement; please file a bug with a reproducer."
@ -1376,12 +1396,13 @@ fn run_topo_batches(
let mut refined_ssa: usize = 0;
let mut refined_bodies: usize = 0;
let mut refined_auth: usize = 0;
let mg = cfg.module_graph.as_deref();
for (_path, diags, summaries, ssa_summaries, ssa_bodies, auth_summaries) in
batch_results
{
batch_diags.extend(diags);
for s in summaries {
let key = s.func_key(root_str_ref);
let key = s.func_key_with_resolver(root_str_ref, mg);
global_summaries.insert(key, s);
refined_summaries += 1;
}
@ -1568,6 +1589,15 @@ pub(crate) fn scan_filesystem_with_observer(
};
tracing::info!(file_count = all_paths.len(), "file walk complete");
// ── Build TS/JS module graph once for the scan root ──────────────────
// Phase 04: resolver foundation. The graph is built between walk and
// pass 1 so every per-file analysis (CFG-time import classification,
// pass-2 cross-file lookup) sees the same view. Build cost is bounded
// (no AST parsing, manifests only) and the result lives behind an
// `Arc` on `Config::module_graph`.
let owned_cfg_with_graph = ensure_module_graph(root, cfg);
let cfg = owned_cfg_with_graph.as_ref().unwrap_or(cfg);
if let Some(flag) = preview_tier_seen {
if all_paths.iter().any(|p| is_preview_tier_path(p)) {
flag.store(true, Ordering::Relaxed);
@ -1704,6 +1734,7 @@ pub(crate) fn scan_filesystem_with_observer(
show_progress,
);
let root_str = root.to_string_lossy();
let mg = cfg.module_graph.as_deref();
let gs = all_paths
.par_iter()
@ -1720,7 +1751,7 @@ pub(crate) fn scan_filesystem_with_observer(
let first_lang = r.summaries.first().map(|s| s.lang.clone());
for s in r.summaries {
let key = s.func_key(Some(&root_str));
let key = s.func_key_with_resolver(Some(&root_str), mg);
local_gs.insert(key, s);
}
@ -1754,6 +1785,16 @@ pub(crate) fn scan_filesystem_with_observer(
local_gs.insert_router_facts(module_id, facts);
}
// Phase-09 indexed-mode parity: cache the
// file's cross-package import map by namespace
// so an inlined callee body loaded from SQLite
// (where the body's own Arc is stripped by
// `#[serde(skip)]`) can recover its package
// boundary at step 0.7.
if let Some((ns, map)) = r.cross_package_imports {
local_gs.insert_cross_package_imports(ns, map);
}
// Record language for progress
if let Some(p) = progress {
if let Some(ref lang) = first_lang {
@ -2057,6 +2098,12 @@ pub fn scan_with_index_parallel_observer(
);
}
// Phase 04: build the TS/JS module graph between fs walk and pass 1
// so the indexed scan path sees the same resolver state as the
// non-indexed path (`scan_filesystem_with_observer`).
let owned_cfg_with_graph = ensure_module_graph(scan_root, cfg);
let cfg = owned_cfg_with_graph.as_ref().unwrap_or(cfg);
let current_files: HashSet<PathBuf> = files.iter().cloned().collect();
let removed_files: Vec<PathBuf> = indexed_files
.into_iter()
@ -2139,7 +2186,7 @@ pub fn scan_with_index_parallel_observer(
)
},
) {
Ok((func_sums, ssa_sums, ssa_bodies, auth_sums)) => {
Ok((func_sums, ssa_sums, ssa_bodies, auth_sums, cross_pkg_imports)) => {
if let Some(p) = &progress_ref {
p.inc_parsed(1);
if let Some(lang) = func_sums.first().map(|s| s.lang.as_str()) {
@ -2193,8 +2240,12 @@ pub fn scan_with_index_parallel_observer(
.collect();
// Single transaction for all four caches:
// one fsync per file instead of four.
let cpi_arg = cross_pkg_imports
.as_ref()
.map(|(ns, map)| (ns.as_str(), map.as_ref()));
if let Err(e) = idx.replace_all_for_file(
path, &hash, &func_sums, &ssa_rows, &body_rows, &auth_rows,
cpi_arg,
) {
record_persist_error(
&persist_errors_ref,
@ -2268,7 +2319,11 @@ pub fn scan_with_index_parallel_observer(
crate::symbol::Lang::from_slug(&lang_str).unwrap_or(crate::symbol::Lang::Rust);
// Use persisted namespace; fall back to normalized file_path
let ns = if namespace.is_empty() {
crate::symbol::normalize_namespace(&file_path, Some(&root_str))
crate::symbol::namespace_with_package(
&file_path,
Some(&root_str),
cfg.module_graph.as_deref(),
)
} else {
namespace
};
@ -2289,6 +2344,23 @@ pub fn scan_with_index_parallel_observer(
}
}
// Load Phase-09 cross-package import maps so an inlined callee
// body loaded from SQLite (where the body's own Arc is stripped
// by `#[serde(skip)]`) can recover its package boundary at
// step 0.7. Indexed-mode parity with `scan_filesystem`.
match idx.load_all_cross_package_imports() {
Ok(rows) => {
for (_file_path, namespace, map) in rows {
if !map.is_empty() {
gs.insert_cross_package_imports(namespace, std::sync::Arc::new(map));
}
}
}
Err(e) => {
tracing::warn!("failed to load cross_package_imports from DB: {e}");
}
}
// Load cross-file callee bodies from DB
let body_count = if crate::symex::cross_file_symex_enabled() {
match idx.load_all_ssa_bodies() {
@ -2309,7 +2381,11 @@ pub fn scan_with_index_parallel_observer(
let lang = crate::symbol::Lang::from_slug(&lang_str)
.unwrap_or(crate::symbol::Lang::Rust);
let ns = if namespace.is_empty() {
crate::symbol::normalize_namespace(&file_path, Some(&root_str))
crate::symbol::namespace_with_package(
&file_path,
Some(&root_str),
cfg.module_graph.as_deref(),
)
} else {
namespace
};
@ -2363,7 +2439,11 @@ pub fn scan_with_index_parallel_observer(
let lang =
crate::symbol::Lang::from_slug(&lang_str).unwrap_or(crate::symbol::Lang::Rust);
let ns = if namespace.is_empty() {
crate::symbol::normalize_namespace(&file_path, Some(&root_str))
crate::symbol::namespace_with_package(
&file_path,
Some(&root_str),
cfg.module_graph.as_deref(),
)
} else {
namespace
};

View file

@ -201,6 +201,36 @@ fn type_kind_index(kind: &TypeKind) -> u32 {
// domain has no dedicated slot, share the Object index so
// singleton recovery still maps to a meaningful TypeKind.
TypeKind::NullPrototypeObject => 3,
// FileSystemPromisesNs is a JS-only namespace receiver type used
// by the Phase 05 fs/promises sink resolver. The bitset domain
// has no dedicated slot; share the Object index so singleton
// recovery still hands back a usable TypeKind.
TypeKind::FileSystemPromisesNs => 3,
// Phase 07 ORM receiver TypeKinds. They participate only in the
// type-qualified callee resolver via their `label_prefix()`; the
// bitset domain's flow-sensitive narrowing has no dedicated slot
// for them, so collapse to Object (3). Singleton recovery from
// the index will hand back `Object`, which is a benign upper
// bound for the ORM receiver shapes.
TypeKind::Sequelize
| TypeKind::TypeOrmRepo
| TypeKind::TypeOrmManager
| TypeKind::MikroOrmEm => 3,
// Phase 10 — `Request` is a Web-platform receiver type used
// by the App Router entry-point seeding path; it shares the
// Object slot for the same reason the ORM TypeKinds do.
TypeKind::Request => 3,
// Phase 15 — cross-language ORM receiver TypeKinds. Same
// rationale as the Phase 07 ORM TypeKinds above; they
// participate only in the type-qualified callee resolver via
// `label_prefix()` and have no dedicated slot in the bitset
// domain.
TypeKind::SqlAlchemySession
| TypeKind::DjangoQuerySet
| TypeKind::ActiveRecordRelation
| TypeKind::GormDb
| TypeKind::SqlxDb
| TypeKind::HibernateSession => 3,
}
}

View file

@ -612,6 +612,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
}
}

View file

@ -59,6 +59,7 @@ pub mod index {
disambig INTEGER,
kind TEXT NOT NULL DEFAULT 'fn',
summary TEXT NOT NULL,
entry_kind TEXT,
updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);
@ -76,6 +77,7 @@ pub mod index {
disambig INTEGER,
kind TEXT NOT NULL DEFAULT 'fn',
summary TEXT NOT NULL,
entry_kind TEXT,
updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);
@ -114,6 +116,17 @@ pub mod index {
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);
CREATE TABLE IF NOT EXISTS cross_package_imports (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL,
file_path TEXT NOT NULL,
file_hash BLOB NOT NULL,
namespace TEXT NOT NULL,
imports BLOB NOT NULL,
updated_at INTEGER NOT NULL,
UNIQUE(project, file_path)
);
CREATE TABLE IF NOT EXISTS scans (
id TEXT PRIMARY KEY,
status TEXT NOT NULL,
@ -204,6 +217,8 @@ pub mod index {
ON ssa_function_bodies(project, file_path);
CREATE INDEX IF NOT EXISTS idx_auth_check_summaries_project_file
ON auth_check_summaries(project, file_path);
CREATE INDEX IF NOT EXISTS idx_cross_package_imports_project_file
ON cross_package_imports(project, file_path);
"#;
/// Engine version used to detect stale caches across upgrades.
@ -311,7 +326,17 @@ pub mod index {
// workers on machines with more cores than that during the
// parallel indexing pass. Size the pool to comfortably hold
// a connection per rayon thread plus a small slack.
let max_conns = (num_cpus::get() as u32 + 4).max(16);
//
// `NYX_INDEX_POOL_MAX` overrides the auto-sized default. Use it in
// fd-constrained environments (test sandboxes, containers with low
// ulimit) where many parallel indexed scans would otherwise exhaust
// EMFILE: each pooled SQLite WAL connection costs ~3 fds (db + -wal
// + -shm), so 30 parallel scans × 16 conns × 3 fds = 1440 fds.
let max_conns = std::env::var("NYX_INDEX_POOL_MAX")
.ok()
.and_then(|v| v.parse::<u32>().ok())
.filter(|n| *n >= 1)
.unwrap_or_else(|| (num_cpus::get() as u32 + 4).max(16));
let pool = Arc::new(Pool::builder().max_size(max_conns).build(manager)?);
{
@ -400,6 +425,14 @@ pub mod index {
conn.execute_batch(SCHEMA)?;
}
// Phase 10 — `entry_kind` column on (ssa_)function_summaries.
// Non-destructive `ALTER TABLE ... ADD COLUMN` so existing
// rows survive the upgrade. The column is nullable; the
// INSERT paths write the JSON-encoded `EntryKind` text or
// NULL when the function is not an entry point.
Self::ensure_column(&conn, "function_summaries", "entry_kind", "TEXT")?;
Self::ensure_column(&conn, "ssa_function_summaries", "entry_kind", "TEXT")?;
// Ensure the auth_check_summaries table exists for DBs
// created before this column set was introduced. The
// `CREATE TABLE IF NOT EXISTS` in SCHEMA handles new DBs;
@ -419,6 +452,26 @@ pub mod index {
conn.execute_batch(SCHEMA)?;
}
// Phase 09 indexed-mode parity: ensure the
// `cross_package_imports` table exists for DBs created
// before this column set was introduced. `CREATE TABLE
// IF NOT EXISTS` in SCHEMA handles new DBs; this branch
// only fires when the table is missing entirely from a
// pre-existing DB.
let cpi_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master
WHERE type = 'table' AND name = 'cross_package_imports'",
[],
|_| Ok(true),
)
.optional()?
.unwrap_or(false);
if !cpi_exists {
tracing::info!("creating cross_package_imports table");
conn.execute_batch(SCHEMA)?;
}
// Schema version check: invalidate cached summary tables
// when the on-disk artefact layout has changed in an
// incompatible way, independently of the engine version.
@ -433,6 +486,33 @@ pub mod index {
Ok(pool)
}
/// Add a column to an existing table when it is missing.
///
/// Non-destructive: leaves all existing rows untouched, populating
/// the new column with NULL. Used to thread additive schema
/// changes (Phase 10's `entry_kind`) into pre-existing databases
/// without forcing a full cache rebuild.
fn ensure_column(
conn: &Connection,
table: &str,
column: &str,
sqlite_type: &str,
) -> NyxResult<()> {
let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
let cols: std::collections::HashSet<String> = stmt
.query_map([], |r| r.get::<_, String>(1))?
.filter_map(Result::ok)
.collect();
if cols.contains(column) {
return Ok(());
}
tracing::info!("adding column {column} to {table}");
conn.execute_batch(&format!(
"ALTER TABLE {table} ADD COLUMN {column} {sqlite_type}"
))?;
Ok(())
}
/// Check stored schema version against the compiled-in value.
///
/// On mismatch (including first-time open), wipe the cached
@ -468,7 +548,8 @@ pub mod index {
DELETE FROM function_summaries;
DELETE FROM ssa_function_summaries;
DELETE FROM auth_check_summaries;
DELETE FROM files;",
DELETE FROM files;
DROP TABLE IF EXISTS cross_package_imports;",
)?;
conn.execute_batch(SCHEMA)?;
conn.execute(
@ -801,14 +882,19 @@ pub mod index {
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO function_summaries
(project, file_path, file_hash, name, arity, lang,
container, disambig, kind, summary, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
container, disambig, kind, summary, entry_kind, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)?;
for s in summaries {
let json = serde_json::to_string(s)
.map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
let disambig_sql = s.disambig.map(|d| d as i64);
let entry_kind_sql = s
.entry_kind
.as_ref()
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
.filter(|s| !s.is_empty());
stmt.execute(params![
self.project,
path_str,
@ -820,6 +906,7 @@ pub mod index {
disambig_sql,
s.kind.as_str(),
json,
entry_kind_sql,
now
])?;
}
@ -863,8 +950,8 @@ pub mod index {
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO ssa_function_summaries
(project, file_path, file_hash, name, arity, lang, namespace,
container, disambig, kind, summary, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
container, disambig, kind, summary, entry_kind, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)",
)?;
for (name, arity, lang, namespace, container, disambig, kind, summary) in summaries
@ -872,6 +959,11 @@ pub mod index {
let json = serde_json::to_string(summary)
.map_err(|e| NyxError::Msg(format!("SSA summary serialise: {e}")))?;
let disambig_sql = disambig.map(|d| d as i64);
let entry_kind_sql = summary
.entry_kind
.as_ref()
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
.filter(|s| !s.is_empty());
stmt.execute(params![
self.project,
path_str,
@ -884,6 +976,7 @@ pub mod index {
disambig_sql,
kind.as_str(),
json,
entry_kind_sql,
now
])?;
}
@ -1392,6 +1485,10 @@ pub mod index {
crate::symbol::FuncKind,
crate::auth_analysis::model::AuthCheckSummary,
)],
cross_package_imports: Option<(
&str,
&std::collections::HashMap<String, crate::symbol::FuncKey>,
)>,
) -> NyxResult<()> {
let tx = self.conn.transaction()?;
let path_str = file_path.to_string_lossy();
@ -1406,13 +1503,18 @@ pub mod index {
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO function_summaries
(project, file_path, file_hash, name, arity, lang,
container, disambig, kind, summary, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
container, disambig, kind, summary, entry_kind, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)?;
for s in func_summaries {
let json = serde_json::to_string(s)
.map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
let disambig_sql = s.disambig.map(|d| d as i64);
let entry_kind_sql = s
.entry_kind
.as_ref()
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
.filter(|s| !s.is_empty());
stmt.execute(params![
self.project,
path_str,
@ -1424,6 +1526,7 @@ pub mod index {
disambig_sql,
s.kind.as_str(),
json,
entry_kind_sql,
now
])?;
}
@ -1439,8 +1542,8 @@ pub mod index {
let mut stmt = tx.prepare(
"INSERT OR REPLACE INTO ssa_function_summaries
(project, file_path, file_hash, name, arity, lang, namespace,
container, disambig, kind, summary, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
container, disambig, kind, summary, entry_kind, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)",
)?;
for (name, arity, lang, namespace, container, disambig, kind, summary) in
ssa_summaries
@ -1448,6 +1551,11 @@ pub mod index {
let json = serde_json::to_string(summary)
.map_err(|e| NyxError::Msg(format!("SSA summary serialise: {e}")))?;
let disambig_sql = disambig.map(|d| d as i64);
let entry_kind_sql = summary
.entry_kind
.as_ref()
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
.filter(|s| !s.is_empty());
stmt.execute(params![
self.project,
path_str,
@ -1460,6 +1568,7 @@ pub mod index {
disambig_sql,
kind.as_str(),
json,
entry_kind_sql,
now
])?;
}
@ -1536,6 +1645,26 @@ pub mod index {
}
}
// cross_package_imports: replace this file's row, even with
// an empty input, so a file that lost its imports does not
// leave stale resolutions in the cache.
tx.execute(
"DELETE FROM cross_package_imports WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str],
)?;
if let Some((namespace, map)) = cross_package_imports
&& !map.is_empty()
{
let blob = rmp_serde::to_vec_named(map)
.map_err(|e| NyxError::Msg(format!("cross_package_imports serialise: {e}")))?;
tx.execute(
"INSERT OR REPLACE INTO cross_package_imports
(project, file_path, file_hash, namespace, imports, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
params![self.project, path_str, file_hash, namespace, blob, now],
)?;
}
tx.commit()?;
Ok(())
}
@ -1622,6 +1751,61 @@ pub mod index {
Ok(out)
}
/// Load every persisted per-file Phase-09 cross-package import map
/// for this project.
///
/// Returns rows as `(file_path, namespace, imports_map)`. Used by
/// pass 2 of indexed scans to populate
/// `GlobalSummaries::cross_package_imports_by_namespace`, recovering
/// the per-file import view that
/// [`crate::taint::ssa_transfer::CalleeSsaBody::cross_package_imports`]
/// loses across SQLite round-trip (`#[serde(skip)]`).
pub fn load_all_cross_package_imports(
&self,
) -> NyxResult<
Vec<(
String,
String,
std::collections::HashMap<String, crate::symbol::FuncKey>,
)>,
> {
let mut stmt = self.c().prepare(
"SELECT file_path, namespace, imports
FROM cross_package_imports WHERE project = ?1",
)?;
let rows: Vec<(String, String, Vec<u8>)> = stmt
.query_map([&self.project], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, Vec<u8>>(2)?,
))
})?
.filter_map(|r| match r {
Ok(v) => Some(v),
Err(e) => {
tracing::warn!("failed to read cross_package_imports row: {e}");
None
}
})
.collect();
let mut out = Vec::with_capacity(rows.len());
for (fp, ns, blob) in rows {
match rmp_serde::from_slice::<
std::collections::HashMap<String, crate::symbol::FuncKey>,
>(&blob)
{
Ok(map) => out.push((fp, ns, map)),
Err(e) => {
tracing::warn!("failed to deserialize cross_package_imports blob: {e}");
}
}
}
Ok(out)
}
/// Remove a file and all derived persisted state for this project.
///
/// This deletes the file row, issues, and all persisted summary rows so
@ -1659,6 +1843,10 @@ pub mod index {
"DELETE FROM auth_check_summaries WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str.as_ref()],
)?;
tx.execute(
"DELETE FROM cross_package_imports WHERE project = ?1 AND file_path = ?2",
params![self.project, path_str.as_ref()],
)?;
tx.commit()?;
Ok(())
@ -2539,6 +2727,7 @@ fn ssa_summaries_round_trip() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
),
(
@ -2575,6 +2764,7 @@ fn ssa_summaries_round_trip() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
),
];
@ -2749,6 +2939,7 @@ fn ssa_summaries_hash_rescan_replaces_stale() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
)];
idx.replace_ssa_summaries_for_file(&f, &hash_v1, &sums_v1)
@ -2787,6 +2978,7 @@ fn ssa_summaries_hash_rescan_replaces_stale() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
)];
idx.replace_ssa_summaries_for_file(&f, &hash_v2, &sums_v2)
@ -2846,6 +3038,7 @@ fn clear_drops_ssa_summaries_table() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
)];
idx.replace_ssa_summaries_for_file(&f, &hash, &sums)
@ -2903,6 +3096,7 @@ fn make_test_callee_body(
field_interner: crate::ssa::ir::FieldInterner::new(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
},
opt: crate::ssa::OptimizeResult {
const_values: std::collections::HashMap::new(),
@ -2921,9 +3115,58 @@ fn make_test_callee_body(
param_count,
node_meta: std::collections::HashMap::new(),
body_graph: None,
cross_package_imports: std::sync::Arc::new(std::collections::HashMap::new()),
}
}
#[test]
fn cross_package_imports_round_trip_via_replace_all_for_file() {
use crate::symbol::{FuncKey, FuncKind, Lang};
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
let f = td.path().join("caller.ts");
std::fs::write(&f, "import { escape } from '@scope/util';").unwrap();
let pool = index::Indexer::init(&db).unwrap();
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
let hash = index::Indexer::digest_bytes(b"caller content");
let mut imports: std::collections::HashMap<String, FuncKey> = std::collections::HashMap::new();
imports.insert(
"escape".to_string(),
FuncKey {
lang: Lang::TypeScript,
namespace: "packages/util/src/escape.ts".to_string(),
container: String::new(),
name: "escape".to_string(),
arity: None,
disambig: None,
kind: FuncKind::Function,
},
);
idx.replace_all_for_file(&f, &hash, &[], &[], &[], &[], Some(("caller.ts", &imports)))
.unwrap();
let loaded = idx.load_all_cross_package_imports().unwrap();
assert_eq!(loaded.len(), 1);
let (fp, ns, map) = &loaded[0];
assert_eq!(fp, &f.to_string_lossy().to_string());
assert_eq!(ns, "caller.ts");
assert_eq!(map.len(), 1);
let key = map
.get("escape")
.expect("escape binding survives round-trip");
assert_eq!(key.namespace, "packages/util/src/escape.ts");
assert_eq!(key.name, "escape");
assert_eq!(key.lang, Lang::TypeScript);
// Empty input on rescan should drop the row.
idx.replace_all_for_file(&f, &hash, &[], &[], &[], &[], None)
.unwrap();
assert!(idx.load_all_cross_package_imports().unwrap().is_empty());
}
#[test]
fn ssa_bodies_round_trip() {
let td = tempfile::tempdir().unwrap();
@ -3122,6 +3365,7 @@ fn make_test_ssa_summary() -> crate::summary::ssa_summary::SsaFuncSummary {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
}
}
@ -3436,6 +3680,153 @@ fn missing_ssa_namespace_column_triggers_recreate() {
assert_eq!(idx.load_all_ssa_summaries().unwrap().len(), 1);
}
/// Phase 10 migration test. Build a database whose
/// `(ssa_)function_summaries` tables are at the post-Phase 09 shape
/// (namespace + container + disambig + kind columns present, but no
/// `entry_kind` column). Insert a row directly so the migration must
/// preserve it. After `init`, the column should exist on both tables
/// without dropping the pre-existing data.
#[test]
fn entry_kind_column_added_in_place_without_data_loss() {
let td = tempfile::tempdir().unwrap();
let db = td.path().join("nyx.sqlite");
// Hand-build a pre-Phase-10 schema (no `entry_kind` column).
{
let conn = rusqlite::Connection::open(&db).unwrap();
conn.execute_batch(
"CREATE TABLE files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL, path TEXT NOT NULL,
hash BLOB NOT NULL, mtime INTEGER NOT NULL,
scanned_at INTEGER NOT NULL, UNIQUE(project, path)
);
CREATE TABLE function_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL, file_path TEXT NOT NULL,
file_hash BLOB NOT NULL, name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL,
container TEXT NOT NULL DEFAULT '',
disambig INTEGER,
kind TEXT NOT NULL DEFAULT 'fn',
summary TEXT NOT NULL, updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);
CREATE TABLE ssa_function_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project TEXT NOT NULL, file_path TEXT NOT NULL,
file_hash BLOB NOT NULL, name TEXT NOT NULL,
arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL,
namespace TEXT NOT NULL DEFAULT '',
container TEXT NOT NULL DEFAULT '',
disambig INTEGER,
kind TEXT NOT NULL DEFAULT 'fn',
summary TEXT NOT NULL, updated_at INTEGER NOT NULL,
UNIQUE(project, file_path, name, container, arity, disambig, kind)
);",
)
.unwrap();
conn.execute(
"INSERT INTO function_summaries
(project, file_path, file_hash, name, arity, lang,
container, disambig, kind, summary, updated_at)
VALUES ('proj', 'lib.py', X'00', 'old_func', 1, 'python',
'', NULL, 'fn', '{}', 0)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO ssa_function_summaries
(project, file_path, file_hash, name, arity, lang,
namespace, container, disambig, kind, summary, updated_at)
VALUES ('proj', 'lib.py', X'00', 'old_func', 1, 'python',
'', '', NULL, 'fn', '{}', 0)",
[],
)
.unwrap();
// Pre-populate the metadata so `check_schema_version` and
// `check_engine_version` consider the database current and do
// not wipe the rows we just inserted. The point of this test
// is the in-place `ALTER TABLE`; the version checks are a
// separate concern.
conn.execute(
"CREATE TABLE IF NOT EXISTS nyx_metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)",
[],
)
.unwrap();
conn.execute(
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('schema_version', ?1)",
rusqlite::params![index::SCHEMA_VERSION],
)
.unwrap();
conn.execute(
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('engine_version', ?1)",
rusqlite::params![index::ENGINE_VERSION],
)
.unwrap();
}
// Open via init — should non-destructively ALTER both tables to
// add `entry_kind`, leaving the seeded rows intact.
let pool = index::Indexer::init(&db).unwrap();
let conn = pool.get().unwrap();
let cols_for = |table: &str| {
let mut stmt = conn
.prepare(&format!("PRAGMA table_info({table})"))
.unwrap();
let v: Vec<String> = stmt
.query_map([], |r| r.get::<_, String>(1))
.unwrap()
.filter_map(Result::ok)
.collect();
v
};
assert!(
cols_for("function_summaries")
.iter()
.any(|c| c == "entry_kind"),
"function_summaries.entry_kind missing after migration"
);
assert!(
cols_for("ssa_function_summaries")
.iter()
.any(|c| c == "entry_kind"),
"ssa_function_summaries.entry_kind missing after migration"
);
// Pre-existing rows survive the migration.
let func_rows: i64 = conn
.query_row(
"SELECT COUNT(*) FROM function_summaries WHERE project = 'proj'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(func_rows, 1, "pre-existing function_summaries row was lost");
let ssa_rows: i64 = conn
.query_row(
"SELECT COUNT(*) FROM ssa_function_summaries WHERE project = 'proj'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(
ssa_rows, 1,
"pre-existing ssa_function_summaries row was lost"
);
// Existing rows have NULL entry_kind by default.
let entry_kind_value: Option<String> = conn
.query_row(
"SELECT entry_kind FROM function_summaries WHERE project = 'proj'",
[],
|r| r.get(0),
)
.unwrap();
assert!(entry_kind_value.is_none());
}
#[test]
fn valid_schema_no_recreate() {
let td = tempfile::tempdir().unwrap();

1720
src/entry_points/mod.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -73,6 +73,27 @@ pub static RULES: &[LabelRule] = &[
"db.Exec",
"db.QueryRow",
"db.Prepare",
// Phase 15 — GORM `db.Raw(sql)` raw-SQL passthrough. GORM's
// `*gorm.DB` is conventionally bound to a `db`-named receiver,
// so the suffix `db.Raw` carries the GORM semantic without
// colliding with stdlib `*sql.DB` (which has no `Raw` method).
// The `GormDb.Raw` type-qualified variant in the receiver-typed
// rule list below covers receivers tagged from `gorm.Open(...)`
// with non-`db` names.
"db.Raw",
// Phase 15 — `database/sql`-context variants. `db.QueryContext`,
// `db.ExecContext`, `db.QueryRowContext`, `db.PrepareContext`
// accept the SQL string at arg 1 (after `ctx`). Receivers
// typed as `*sql.DB` / `*sql.Tx` / `*sql.Stmt` resolve via
// suffix-matching on `db.<verb>`; calls on differently-named
// bound receivers (`tx.QueryContext(...)`) only suffix-match
// when the receiver text ends with `db` (covers `userDb`,
// `pgDb`, etc.). More-precise receiver typing is in scope
// for `DatabaseConnection.<verb>` rules below.
"db.QueryContext",
"db.ExecContext",
"db.QueryRowContext",
"db.PrepareContext",
// goqu raw SQL literal builders: `goqu.L(s)` and the alias
// `goqu.Lit(s)` insert `s` verbatim into the generated SQL with no
// parameterisation. CVE-2026-41422 (daptin) loops a user-controlled
@ -88,6 +109,36 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — receiver-typed Go ORM/raw-SQL sinks. `*gorm.DB` (set by
// `constructor_type` for `gorm.Open(...)`) exposes `Raw(sql)` and
// `Exec(sql)` as raw-SQL passthrough; the type-qualified resolver
// rewrites `db.Raw(...)` → `GormDb.Raw`. `*sqlx.DB` likewise gets
// `NamedExec` / `NamedQuery` / `Select` / `Get` rewriting via
// `SqlxDb.<verb>`. `DatabaseConnection.<verb>` covers the stdlib
// `*sql.DB` / `*sql.Tx` receivers tagged by the existing
// `sql.Open` / `sql.OpenDB` constructor mapping — currently the
// chained QueryContext shape suffix-matches `db.QueryContext` above,
// so `DatabaseConnection.QueryContext` is here for receivers whose
// identifier text doesn't end in `db`.
LabelRule {
matchers: &[
"GormDb.Raw",
"GormDb.Exec",
"SqlxDb.NamedExec",
"SqlxDb.NamedQuery",
"SqlxDb.Select",
"SqlxDb.Get",
"SqlxDb.MustExec",
"DatabaseConnection.QueryContext",
"DatabaseConnection.ExecContext",
"DatabaseConnection.QueryRowContext",
"DatabaseConnection.Query",
"DatabaseConnection.Exec",
"DatabaseConnection.QueryRow",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// fmt.Printf/Sprintf write to stdout or build strings in memory, not
// security sinks. fmt.Fprintf writes to an io.Writer (often http.ResponseWriter)
// so it IS a security sink for XSS.
@ -576,6 +627,363 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &[],
},
},
// ── SQL execute payload-arg gating (Phase 15 deferred fix, Go) ────────
//
// Mirrors the Python resolution recorded in `python::GATED_SINKS`. The
// flat rules above already classify these callees as `Sink(SQL_QUERY)`
// on every argument. `database/sql` and the Go ORM/raw-SQL ecosystem
// (GORM, sqlx, goqu) follow the convention that the SQL string is at
// arg 0 (or arg 1 for the `*Context` variants whose first arg is a
// `context.Context`); subsequent positional arguments are bind values
// sent through the driver's parameterised path. Tainted bind values
// are SAFE; tainted SQL is the SQLi vector.
//
// Destination-activation gates carry the same `Sink(SQL_QUERY)` label
// as the flat rule (cap dedupes against the flat label) and propagate
// `payload_args: &[0]` (or `&[1]` for `*Context` shapes) into
// `sink_payload_args`, narrowing the SSA sink scan to the SQL position.
SinkGate {
callee_matcher: "db.Query",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.Exec",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.QueryRow",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.Prepare",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.Raw",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// `*Context` variants take `ctx` at arg 0 and the SQL string at arg 1.
SinkGate {
callee_matcher: "db.QueryContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.ExecContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.QueryRowContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.PrepareContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// goqu raw SQL literal builders. Single arg, payload at 0.
SinkGate {
callee_matcher: "goqu.L",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "goqu.Lit",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// Receiver-typed (case-sensitive, matching the flat rule): GORM / sqlx
// / `*sql.DB` typed via `constructor_type`. All take SQL at arg 0
// EXCEPT the `*Context` variants on `DatabaseConnection`, which take
// SQL at arg 1.
SinkGate {
callee_matcher: "GormDb.Raw",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "GormDb.Exec",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlxDb.NamedExec",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlxDb.NamedQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlxDb.Select",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlxDb.Get",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlxDb.MustExec",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.Query",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.Exec",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.QueryRow",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.QueryContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.ExecContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.QueryRowContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {

View file

@ -94,6 +94,21 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — JPA / Hibernate `Query.setParameter(name, value)` /
// `Query.setParameterList(...)` bind a positional / named parameter
// and return the same query object. The bind step does NOT inject
// the value into the SQL string; the value is sent as a separate
// parameter through the JDBC layer at execution. Treating
// `setParameter` / `setParameterList` as a SQL_QUERY sanitizer
// clears any taint inadvertently smeared onto the chain return so
// downstream `.getResultList()` / `.executeUpdate()` calls see a
// clean value. Case-sensitive: these are JPA-specific verb names
// and the chain shape is canonical.
LabelRule {
matchers: &["setParameter", "setParameterList"],
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
case_sensitive: true,
},
// ─────────── Sinks ─────────────
LabelRule {
matchers: &["Runtime.exec", "ProcessBuilder"],
@ -125,6 +140,72 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::CODE_EXEC),
case_sensitive: false,
},
// Phase 13 — java.nio.file path-traversal sinks. `Files.<verb>` is
// the modern stdlib API for read/write/copy/move/delete operations;
// each takes a `Path` (or `Path` + payload) as arg 0. Default
// arg→return propagation smears taint through `Paths.get(...)`
// (forwarder) so the path arg of these calls inherits any taint
// present on the components. `FileInputStream` / `FileOutputStream` /
// `RandomAccessFile` are constructor-style sinks: `new
// FileInputStream(path)` reaches the FILE_IO sink at the
// `object_creation_expression` level (mapped to `Kind::CallFn` in
// Java's KINDS). Receiver-typing already maps these classes to
// `TypeKind::FileHandle` (see `class_name_to_type_kind`) so chained
// method calls on the resulting handle resolve via type-qualified
// labels, but the construction call itself is the canonical
// path-traversal vector.
LabelRule {
matchers: &[
"Files.readString",
"Files.readAllBytes",
"Files.readAllLines",
"Files.write",
"Files.writeString",
"Files.lines",
"Files.copy",
"Files.move",
"Files.delete",
"Files.deleteIfExists",
"Files.newInputStream",
"Files.newOutputStream",
"Files.newBufferedReader",
"Files.newBufferedWriter",
"FileInputStream",
"FileOutputStream",
"RandomAccessFile",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: true,
},
// Phase 13 — `Path.normalize()` collapses `.` / `..` segments and
// is the canonical Java path-traversal sanitiser when paired with
// a `startsWith(base)` containment check (not modelled here; the
// sanitiser rule clears the FILE_IO cap on the call's return,
// which is sufficient for the cap-based gate to suppress the
// sink finding). Case-sensitive: `Path.normalize` is unique to
// `java.nio.file.Path`; bare `normalize` would over-fire on
// `Locale.normalize`, `BigDecimal.normalize`, etc.
LabelRule {
matchers: &[
"Path.normalize",
// Canonical Java path-traversal sanitiser idiom:
// `base.resolve(name).normalize()`. CFG paren-strip yields
// callee text `<receiver>.resolve.normalize`; the bare 2-call
// `resolve.normalize` suffix is unique to `java.nio.file.Path`
// (no overload across the supported corpus produces the same
// chain text). Case-sensitive on the leaf chain to avoid
// colliding with non-path `.resolve()`-then-`.normalize()`
// shapes in unrelated grammars.
"resolve.normalize",
// Receiver-bound shape `Paths.get(p).normalize()` — the
// `Paths.get` constructor mapping in `ssa/type_facts.rs` types
// the receiver as `FileHandle`, so the type-qualified resolver
// rewrites `<v>.normalize` → `FileHandle.normalize` here.
"FileHandle.normalize",
],
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: true,
},
// HTTP response sinks, println/print are broad (also match System.out)
// but necessary to catch response.getWriter().println() via suffix matching.
LabelRule {
@ -134,12 +215,34 @@ pub static RULES: &[LabelRule] = &[
},
// openConnection() is the standard java.net.URL API for initiating a connection.
// It is the correct interception point, the URL is already set on the object.
//
// Phase 14 — additional SSRF entry points covered:
// * `URL.openStream` — equivalent of `URL.openConnection().getInputStream()`,
// fetches the resource at the URL directly. Bare `openStream`
// suffix is unique to `java.net.URL` in the supported corpus.
// * `OkHttpClient.newCall(Request)` — Square OkHttp's request
// dispatch entry point. The `Request` is built via a
// `Request.Builder().url(u).build()` chain whose default
// arg→return propagation smears URL taint through the chain.
// * `RestTemplate.getForEntity` / `RestTemplate.headForHeaders` —
// read-shaped Spring verbs that take the URL at arg 0.
LabelRule {
matchers: &[
"openConnection",
"openStream",
"HttpClient.send",
"HttpClient.sendAsync",
// Phase 14 — `OkHttpClient.newCall(Request)` and the
// generic `HttpClient.newCall` form OkHttp resolves to via
// the JAVA_HIERARCHY (OkHttpClient → HttpClient). Both
// forms are covered so a constructor-typed receiver
// (HttpClient) and a class-named receiver (OkHttpClient)
// both fire.
"HttpClient.newCall",
"OkHttpClient.newCall",
"getForObject",
"getForEntity",
"headForHeaders",
"RestTemplate.exchange",
"postForObject",
"postForEntity",
@ -246,8 +349,34 @@ pub static RULES: &[LabelRule] = &[
matchers: &[
"entityManager.createNativeQuery",
"entityManager.createQuery",
"em.createNativeQuery",
"em.createQuery",
"session.createQuery",
"session.createSQLQuery",
"session.createNativeQuery",
// Phase 15 — Spring Data JPA / Hibernate factory chains:
// `getEntityManager().createNativeQuery(...)` /
// `getSession().createQuery(...)` reduce to
// `getEntityManager.createNativeQuery` /
// `getSession.createQuery` after the chain-normalisation
// strips parens.
"getEntityManager.createNativeQuery",
"getEntityManager.createQuery",
"getSession.createQuery",
"getSession.createSQLQuery",
"getSession.createNativeQuery",
// Type-qualified Hibernate Session matchers fire when the
// receiver carries a `TypeKind::HibernateSession` fact (set
// by `constructor_type` for `sessionFactory.openSession()` /
// `sessionFactory.getCurrentSession()` /
// `sessionFactory.openStatelessSession()` returns). Closes
// the arbitrary-receiver-name shape (`sess`,
// `hibernateSession`, etc.) the flat `session.*` matchers
// above only catch when receiver is literally named
// `session`.
"HibernateSession.createQuery",
"HibernateSession.createSQLQuery",
"HibernateSession.createNativeQuery",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
@ -484,6 +613,385 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &[],
},
},
// ── SQL execute payload-arg gating (Phase 15 deferred fix, Java) ──────
//
// Mirrors the Python resolution recorded in `python::GATED_SINKS`: the
// flat rules above already classify these callees as `Sink(SQL_QUERY)`
// on every argument. The JDBC / JPA / Hibernate / Spring conventions
// are that arg 0 is the SQL template (or HQL/JPQL string) and any
// remaining arguments are bind values, RowMappers, result-set classes,
// or other non-SQL payloads. Tainted bind values are SAFE because the
// driver / JPA layer escapes them; tainted SQL is the SQLi vector.
//
// These Destination-activation gates carry the same `Sink(SQL_QUERY)`
// label as the flat rule (so cap dedupes against the flat label) but
// propagate `payload_args: &[0]` into `sink_payload_args`, narrowing the
// SSA sink scan to arg 0 only. Receiver-typed `DatabaseConnection.*`
// forms are case-sensitive, matching the flat rule.
SinkGate {
callee_matcher: "executeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "executeUpdate",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.executeBatch",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.executeLargeUpdate",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// Spring JdbcTemplate verbs. All take SQL at arg 0; remaining args are
// bind values (`Object[]` / varargs) or `RowMapper` / `ResultSetExtractor`
// / class hints — all non-SQL payloads.
SinkGate {
callee_matcher: "jdbcTemplate.query",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "jdbcTemplate.update",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "jdbcTemplate.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "jdbcTemplate.queryForObject",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "jdbcTemplate.queryForList",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// JPA / Hibernate factories. `createQuery(sql)` / `createQuery(sql, ResultClass)`
// both take the SQL/JPQL/HQL string at arg 0; the optional `ResultClass`
// at arg 1 is metadata, not SQL.
SinkGate {
callee_matcher: "entityManager.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "entityManager.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "em.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "em.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "session.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "session.createSQLQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "session.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "getEntityManager.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "getEntityManager.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "getSession.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "getSession.createSQLQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "getSession.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// Type-qualified Hibernate Session gates. Mirror the
// `session.create*` family above so type-qualified resolution at
// sink-firing time consults `payload_args = &[0]` and suppresses
// tainted bind-arg shapes that route through `setParameter` /
// `setString` rather than the raw query string. Receivers carry
// `TypeKind::HibernateSession` via `constructor_type`'s
// `openSession` / `getCurrentSession` / `openStatelessSession`
// arms.
SinkGate {
callee_matcher: "HibernateSession.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "HibernateSession.createSQLQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "HibernateSession.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {

View file

@ -1,5 +1,6 @@
use crate::labels::{
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
Cap, DataLabel, GateActivation, GatedLabelRule, Kind, LabelGate, LabelRule, ParamConfig,
RuntimeLabelRule, SinkGate,
};
use crate::utils::project::{DetectedFramework, FrameworkContext};
use phf::{Map, phf_map};
@ -29,6 +30,21 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Source(Cap::all()),
case_sensitive: false,
},
// Phase 10 — Web `Request` receiver-method reads. Triggered when
// the SSA receiver carries `TypeKind::Request` and the
// type-qualified resolver rewrites `req.json()` → `Request.json`
// etc. Mirrors the matching list in `labels/typescript.rs`.
LabelRule {
matchers: &[
"Request.json",
"Request.formData",
"Request.text",
"Request.url",
"Request.headers.get",
],
label: DataLabel::Source(Cap::all()),
case_sensitive: true,
},
// ───────── Sanitizers ──────────
LabelRule {
matchers: &["JSON.parse"],
@ -253,6 +269,40 @@ pub static RULES: &[LabelRule] = &[
"fs.unlinkSync",
"fs.readdir",
"fs.readdirSync",
// Phase 05 — `node:fs/promises` member-access forms covered
// here. Bare-name forms (`readFile`, `open`, ...) and
// `fsp.readFile` namespace-import forms ride the gated
// matcher in `GATED_LABEL_RULES`. Receiver-type fallback
// synthesises `FileSystemPromisesNs.<method>` (handled
// below).
"fs.promises.readFile",
"fs.promises.writeFile",
"fs.promises.unlink",
"fs.promises.open",
"fs.promises.stat",
"fs.promises.readdir",
"fs.promises.mkdir",
"fs.promises.rmdir",
"fs.promises.rm",
"fs.promises.appendFile",
"fs.promises.copyFile",
"fs.promises.rename",
"fs.promises.truncate",
"fs.promises.chmod",
"FileSystemPromisesNs.readFile",
"FileSystemPromisesNs.writeFile",
"FileSystemPromisesNs.unlink",
"FileSystemPromisesNs.open",
"FileSystemPromisesNs.stat",
"FileSystemPromisesNs.readdir",
"FileSystemPromisesNs.mkdir",
"FileSystemPromisesNs.rmdir",
"FileSystemPromisesNs.rm",
"FileSystemPromisesNs.appendFile",
"FileSystemPromisesNs.copyFile",
"FileSystemPromisesNs.rename",
"FileSystemPromisesNs.truncate",
"FileSystemPromisesNs.chmod",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
@ -310,6 +360,31 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// ── Phase 07 — ORM query-builder receiver-typed sinks ──
//
// Each rule here matches a callee text constructed by
// `resolve_type_qualified_labels` when a value's inferred TypeKind has a
// `label_prefix()`. The matcher form `<TypePrefix>.<method>` is the
// wire shape produced by that helper. The receiver TypeKinds
// themselves are populated by [`crate::ssa::type_facts::constructor_type`]
// (TS/JS branch): `new Sequelize(...)` → `Sequelize`,
// `getRepository(Entity)` → `TypeOrmRepo`,
// `getManager()` → `TypeOrmManager`,
// `createEntityManager()` → `MikroOrmEm`. Without a typed receiver the
// qualified callee text is never built, so these rules cannot misfire on
// unrelated `.literal()` / `.query()` / `.execute()` methods.
LabelRule {
matchers: &[
"Sequelize.literal",
"TypeOrmRepo.query",
"TypeOrmRepo.createQueryBuilder",
"TypeOrmManager.query",
"TypeOrmManager.createQueryBuilder",
"MikroOrmEm.execute",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// ─── LDAP injection sinks ───
//
// `ldapjs`: both the bound-variable idiom
@ -527,6 +602,75 @@ pub static EXCLUDES: &[&str] = &[
"exec.start",
];
/// Phase 05 — `node:fs/promises` path-traversal sinks. The matcher list
/// holds the bare-name and `<ns>.<method>` member-access shapes; the
/// [`LabelGate::ImportedFromModule`] gate suppresses bare-name matches
/// unless the file actually imports the method from `node:fs/promises`
/// or `fs/promises`. Bare-name only — `fs.promises.readFile`-style
/// member-access forms continue to fire via the flat FILE_IO matcher
/// list (no gate needed because the `fs.promises.` prefix is itself
/// witness to the resolution).
pub static GATED_LABEL_RULES: &[GatedLabelRule] = &[
GatedLabelRule {
matchers: &[
"readFile",
"writeFile",
"unlink",
"open",
"stat",
"readdir",
"mkdir",
"rmdir",
"rm",
"appendFile",
"copyFile",
"rename",
"truncate",
"chmod",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
gate: LabelGate::ImportedFromModule(&["node:fs/promises", "fs/promises"]),
},
// Phase 07 — Knex bare-name raw-SQL escape hatches. The receiver in
// `db.whereRaw(sql)` shape is an arbitrary local binding (`db`, `qb`,
// `users`, ...) so leading-identifier gating cannot witness the
// import. Phase 07 deferred-item 10 tightening: require the file to
// bind the conventional value-import name `knex` (lowercase) so that
// type-only shapes like `import { Knex } from 'knex'` (for
// `Knex.QueryBuilder` type annotations) do not over-fire the gate.
GatedLabelRule {
matchers: &["whereRaw", "orderByRaw", "havingRaw"],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
gate: LabelGate::FileImportsModuleAsLocalName {
modules: &["knex"],
local_names: &["knex"],
},
},
// Phase 07 — Drizzle `sql` template-tag builder. Two shapes:
// - `sql.raw(x)` → callee text "sql.raw" (member call)
// - `sql\`SELECT ${x}\`` → callee text "sql" (tag call)
// Both leading-identifier-gate against the imported `sql` symbol from
// `drizzle-orm`. `=sql` is exact-only so unrelated `.sql()` methods do
// not collide; `sql.raw` carries its own member-access matcher.
GatedLabelRule {
matchers: &["=sql", "sql.raw"],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
gate: LabelGate::ImportedFromModule(&["drizzle-orm"]),
},
// Phase 10 — Next.js `cookies()` / `headers()` from `next/headers`
// return adversary-controlled request-bound state. Mirrors the
// entry in `labels/typescript.rs::GATED_LABEL_RULES`.
GatedLabelRule {
matchers: &["cookies", "headers"],
label: DataLabel::Source(Cap::all()),
case_sensitive: true,
gate: LabelGate::ImportedFromModule(&["next/headers"]),
},
];
pub static GATED_SINKS: &[SinkGate] = &[
SinkGate {
callee_matcher: "setAttribute",
@ -1316,6 +1460,8 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
"variable_declaration" => Kind::CallWrapper,
"lexical_declaration" => Kind::CallWrapper,
"expression_statement" => Kind::CallWrapper,
"await_expression" => Kind::AwaitForward,
"jsx_attribute" => Kind::JsxAttr,
// trivia
"comment" => Kind::Trivia,

View file

@ -38,6 +38,61 @@ pub struct LabelRule {
pub case_sensitive: bool,
}
/// Activation gate carried by a [`GatedLabelRule`]. Phase 05 introduces the
/// import-derived gate so JS/TS bare-name `fs/promises` sinks (`readFile`,
/// `writeFile`, ...) only fire when the call resolves to that module — a
/// flat bare-name match would over-fire on user-defined `readFile` helpers.
#[derive(Debug, Clone, Copy)]
pub enum LabelGate {
/// Fires only when the call's leading identifier is locally bound by an
/// import / `require` whose `source_module` equals one of the listed
/// specifiers. The synthetic prefix `FileSystemPromisesNs.` produced by
/// receiver-type qualification also satisfies the gate (see Phase 05's
/// `TypeKind::FileSystemPromisesNs`).
ImportedFromModule(&'static [&'static str]),
/// Fires when *any* local-name in the file's import view resolves to one
/// of the listed specifiers, regardless of which identifier leads the
/// call. Used for Phase 07 ORM bare-name method sinks (Knex's `whereRaw`
/// / `orderByRaw` / `havingRaw`) where the receiver is a query-builder
/// instance whose binding name is arbitrary (`db`, `qb`, `users`, ...)
/// and the import witness is the package itself.
FileImportsModule(&'static [&'static str]),
/// Fires when the file's import view binds at least one of `local_names`
/// to one of `modules`. Tighter than [`Self::FileImportsModule`]: type-only
/// or peripheral named-import shapes (e.g. `import { Knex } from 'knex'`
/// for type-only use of `Knex.QueryBuilder`) do not satisfy the gate
/// unless the conventional value-binding name (`knex`, lowercase) is also
/// present. Used for Phase 07 deferred-item 10's tightening of the Knex
/// `whereRaw` / `orderByRaw` / `havingRaw` gate.
FileImportsModuleAsLocalName {
modules: &'static [&'static str],
local_names: &'static [&'static str],
},
}
/// A label rule that only fires when its [`LabelGate`] is satisfied at the
/// call site. The matcher / label / case-sensitivity semantics mirror
/// [`LabelRule`]; the gate is checked by [`classify_all_ctx`] using the
/// caller-supplied [`ClassificationContext`].
#[derive(Debug, Clone, Copy)]
pub struct GatedLabelRule {
pub matchers: &'static [&'static str],
pub label: DataLabel,
pub case_sensitive: bool,
pub gate: LabelGate,
}
/// Per-file context consulted by [`classify_all_ctx`] when evaluating
/// gated rules. Threaded from the CFG layer's gated post-pass; `None`
/// elsewhere keeps existing classification paths intact.
#[derive(Debug, Default, Clone, Copy)]
pub struct ClassificationContext<'a> {
/// Local-name → source-module view of the file's imports. The map is
/// computed at CFG build time (see `cfg::imports::extract_local_import_view`)
/// so the gate fires before the project-wide resolver runs.
pub local_imports: Option<&'a std::collections::HashMap<String, String>>,
}
/// Sentinel returned by [`classify_gated_sink`] for the dynamic/unknown-activation
/// branch: the gate fires conservatively and every positional argument must be
/// considered a potential tainted payload, not just the explicit `payload_args`.
@ -300,6 +355,17 @@ pub enum Kind {
/// any other sequential statement in the CFG but explicitly classified so
/// code that inspects `Kind` can recognise it.
Seq,
/// Async-await unary forward. An `await x` expression evaluates `x` and
/// resolves to the same value/taint, modelled as a 1:1 copy. Lowered to
/// SSA as `SsaOp::Assign(operand)` so taint, origins, and abstract value
/// pass through unchanged.
AwaitForward,
/// JSX attribute (`<Tag name={value} />`). Dispatched in the CFG so the
/// builder can recognise React-specific shapes such as
/// `dangerouslySetInnerHTML={{ __html: x }}` and synthesise a sink call.
/// The attribute name is read from the AST at CFG-build time, not carried
/// in this enum (which must remain `Copy` for `phf_map` storage).
JsxAttr,
Other,
}
@ -445,6 +511,19 @@ static GATED_REGISTRY: Lazy<HashMap<&'static str, &'static [SinkGate]>> = Lazy::
m
});
/// Per-language registry of [`GatedLabelRule`] entries. Phase 05 wires
/// JS/TS only (the `fs/promises` FILE_IO matcher set); other languages
/// fall back to an empty slice.
static GATED_LABEL_REGISTRY: Lazy<HashMap<&'static str, &'static [GatedLabelRule]>> =
Lazy::new(|| {
let mut m = HashMap::new();
m.insert("javascript", javascript::GATED_LABEL_RULES);
m.insert("js", javascript::GATED_LABEL_RULES);
m.insert("typescript", typescript::GATED_LABEL_RULES);
m.insert("ts", typescript::GATED_LABEL_RULES);
m
});
/// Feature flag for the Python prototype-pollution gates. Disabled by
/// default; set `NYX_PYTHON_PROTO_POLLUTION=1` (or `true`) to enable
/// `dict.update` / `__dict__.update` proto-pollution detection.
@ -599,6 +678,89 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
.unwrap_or(Kind::Other)
}
/// Promise-callback methods (`p.then(cb)`, `p.catch(cb)`, `p.finally(cb)`).
///
/// These are not sinks. The taint engine consumes this predicate to recognise
/// the receiver as a Promise whose resolved value will be fed to the callback's
/// first parameter. See phase 03 of `plan.md` for the recall-gap rationale.
///
/// JS/TS only. `callee_leaf` is expected to be the post-`callee_leaf_name`
/// short form (e.g. `"then"`, not `"p.then"`).
pub fn is_promise_callback_method(lang: &str, callee_leaf: &str) -> bool {
if !matches!(lang, "javascript" | "js" | "typescript" | "ts" | "tsx") {
return false;
}
matches!(callee_leaf, "then" | "catch" | "finally")
}
/// Static `Promise.*` combinator a call resolves to, or `None`.
///
/// Combinators wrap arguments into a single Promise:
/// * `Promise.resolve(x)` — identity for `x`.
/// * `Promise.all([a, b])` — array whose elements have per-arg taint.
/// * `Promise.allSettled([...])` — same shape as `all`, conservative union.
/// * `Promise.race([...])` — first-to-settle, conservative union.
///
/// `callee` is the full callee text (e.g. `"Promise.all"`) since the leaf
/// segment alone (`"all"`) is too generic to match safely.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PromiseCombinatorKind {
Resolve,
All,
AllSettled,
Race,
}
/// Lang-agnostic recognition of any promise combinator callee text. Used by
/// SSA lowering, which doesn't carry a `lang` argument.
pub fn is_any_promise_combinator(callee: &str) -> Option<PromiseCombinatorKind> {
match callee {
"Promise.resolve" => Some(PromiseCombinatorKind::Resolve),
"Promise.all" => Some(PromiseCombinatorKind::All),
"Promise.allSettled" => Some(PromiseCombinatorKind::AllSettled),
"Promise.race" => Some(PromiseCombinatorKind::Race),
"asyncio.gather" | "asyncio.wait" => Some(PromiseCombinatorKind::All),
"tokio::join" | "tokio::try_join" | "futures::join" | "futures::try_join" => {
Some(PromiseCombinatorKind::All)
}
_ => None,
}
}
pub fn is_promise_combinator(lang: &str, callee: &str) -> Option<PromiseCombinatorKind> {
match lang {
"javascript" | "js" | "typescript" | "ts" | "tsx" => match callee {
"Promise.resolve" => Some(PromiseCombinatorKind::Resolve),
"Promise.all" => Some(PromiseCombinatorKind::All),
"Promise.allSettled" => Some(PromiseCombinatorKind::AllSettled),
"Promise.race" => Some(PromiseCombinatorKind::Race),
_ => None,
},
// Python: `asyncio.gather(...)` / `asyncio.wait(...)` resolve to a
// tuple/list whose elements carry the union of argument taints.
// `asyncio.wait` returns `(done, pending)` sets but the same
// conservative scalar-union approximation applies, downstream
// destructuring already taints all bindings.
"python" | "py" => match callee {
"asyncio.gather" | "asyncio.wait" => Some(PromiseCombinatorKind::All),
_ => None,
},
// Rust: `tokio::join!` / `futures::join!` (and their `try_*`
// variants) evaluate every future concurrently and bind the
// tuple of resolved values. `cfg::push_node` rewrites the
// macro_invocation's `arg_uses` so each future's tainted inputs
// surface as a positional arg; this combinator entry then unions
// them onto the tuple value.
"rust" | "rs" => match callee {
"tokio::join" | "tokio::try_join" | "futures::join" | "futures::try_join" => {
Some(PromiseCombinatorKind::All)
}
_ => None,
},
_ => None,
}
}
/// The kind of taint source, used to refine finding severity.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
@ -953,6 +1115,17 @@ fn ends_with_cs(haystack: &[u8], needle: &[u8], case_sensitive: bool) -> bool {
}
}
/// Allocation-free ASCII-case-insensitive prefix check on `&str` inputs.
/// Used by the gated-sink dispatch hot path where the previous
/// `value.to_ascii_lowercase().starts_with(&p.to_ascii_lowercase())` pair
/// allocated two `String` values per check.
#[inline]
fn starts_with_ignore_ascii_case(haystack: &str, needle: &str) -> bool {
let h = haystack.as_bytes();
let n = needle.as_bytes();
h.len() >= n.len() && h[..n.len()].eq_ignore_ascii_case(n)
}
/// Prefix check with configurable case sensitivity. The `=` exact-match
/// sigil is meaningless for prefix matchers (which by definition match many
/// suffixes); it is stripped if present so a malformed matcher like
@ -1028,6 +1201,9 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
// For chained calls like `r.URL.Query().Get`, also strip internal
// `().` segments to produce a normalized form like `r.URL.Query.Get`.
// `normalize_chained_call` returns `Cow::Borrowed` when no rewrite is
// needed, so the alloc is paid only on inputs that actually require
// it.
let full_normalized = normalize_chained_call(text);
let full_norm_bytes = full_normalized.as_bytes();
@ -1116,6 +1292,9 @@ pub fn classify_all(
return SmallVec::new();
}
// `normalize_chained_call` returns `Cow::Borrowed` when no rewrite
// is needed, so the alloc is paid only on inputs that actually
// require it. The hot classify path runs on every CFG node.
let full_normalized = normalize_chained_call(text);
let full_norm_bytes = full_normalized.as_bytes();
@ -1198,6 +1377,228 @@ pub fn classify_all(
out
}
/// Classify a call with an optional [`ClassificationContext`] enabling
/// gated rule evaluation.
///
/// This is a strict superset of [`classify_all`]: the same flat-rule
/// matching runs first, then any per-language [`GatedLabelRule`] is
/// evaluated against `ctx`. A `None` context (or a context with no
/// `local_imports`) leaves only the synthetic receiver-type prefix
/// (e.g. `FileSystemPromisesNs.`) able to satisfy the gate.
pub fn classify_all_ctx(
lang: &str,
text: &str,
extra: Option<&[RuntimeLabelRule]>,
ctx: Option<&ClassificationContext<'_>>,
) -> SmallVec<[DataLabel; 2]> {
let mut out = classify_all(lang, text, extra);
classify_gated_into(lang, text, ctx, &mut out);
out
}
/// Run only the gated-rule pass — skip the flat [`classify_all`] scan.
///
/// Use when the caller has already classified `text` with the flat rules
/// during initial CFG construction and only needs the gate-conditioned
/// labels (which require a per-file [`ClassificationContext`] not
/// available at the original classification site).
pub fn classify_gated_only(
lang: &str,
text: &str,
ctx: Option<&ClassificationContext<'_>>,
) -> SmallVec<[DataLabel; 2]> {
let mut out = SmallVec::new();
classify_gated_into(lang, text, ctx, &mut out);
out
}
fn classify_gated_into(
lang: &str,
text: &str,
ctx: Option<&ClassificationContext<'_>>,
out: &mut SmallVec<[DataLabel; 2]>,
) {
let gated = match GATED_LABEL_REGISTRY.get(lang).or_else(|| {
let key = lang.to_ascii_lowercase();
GATED_LABEL_REGISTRY.get(key.as_str())
}) {
Some(g) => *g,
None => return,
};
if gated.is_empty() {
return;
}
let head = text.split(['(', '<']).next().unwrap_or("");
let trimmed = head.trim().as_bytes();
if is_excluded(lang, trimmed) {
return;
}
let full_normalized = normalize_chained_call(text);
let full_norm_bytes = full_normalized.as_bytes();
#[inline]
fn push_dedup(out: &mut SmallVec<[DataLabel; 2]>, label: DataLabel) {
if !out.contains(&label) {
out.push(label);
}
}
// Pass 1: exact / suffix.
for rule in gated {
for raw in rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_') {
continue;
}
let matches = match_suffix_cs(trimmed, m, rule.case_sensitive)
|| match_suffix_cs(full_norm_bytes, m, rule.case_sensitive);
if matches && gate_satisfied(&rule.gate, head, ctx) {
push_dedup(out, rule.label);
}
}
}
// Pass 2: prefix.
for rule in gated {
for raw in rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_')
&& (starts_with_cs(trimmed, m, rule.case_sensitive)
|| starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
&& gate_satisfied(&rule.gate, head, ctx)
{
push_dedup(out, rule.label);
}
}
}
}
/// Restricted payload-arg positions for known type-qualified sink callees.
///
/// Phase 07's ORM raw-SQL receiver methods (`TypeOrmRepo.query`,
/// `TypeOrmManager.query`, `MikroOrmEm.execute`, etc.) take the SQL
/// template at arg 0 and bind / parameter arrays at arg 1+. The flat
/// label rule alone cannot encode this and would FP on
/// `repo.query("SELECT $1", [tainted])`. When the type-qualified
/// resolver synthesises one of these callees, this lookup returns the
/// payload positions to which sink-taint checks must be restricted.
///
/// Sequelize.literal(sql) is single-arg, so `&[0]` is also correct
/// (no precision loss vs the unconditional flat rule).
pub fn type_qualified_sink_payload_args(qualified_callee: &str) -> Option<&'static [usize]> {
match qualified_callee {
"Sequelize.literal"
| "TypeOrmRepo.query"
| "TypeOrmRepo.createQueryBuilder"
| "TypeOrmManager.query"
| "TypeOrmManager.createQueryBuilder"
| "MikroOrmEm.execute" => Some(&[0]),
_ => None,
}
}
/// Receiver-type prefixes that count as a witness for a given module
/// specifier on a [`LabelGate::ImportedFromModule`] gate.
///
/// When SSA receiver-type qualification synthesises a callee like
/// `FileSystemPromisesNs.readFile(...)`, the leading identifier becomes
/// the type prefix rather than an imported binding. Each gate module
/// can declare which type prefixes legitimise the gate firing without
/// a textual import witness. Returning an empty slice means the gate
/// must fall back to the `local_imports` map alone.
fn receiver_type_prefixes_for_module(module: &str) -> &'static [&'static str] {
if module.eq_ignore_ascii_case("node:fs/promises") || module.eq_ignore_ascii_case("fs/promises")
{
&["FileSystemPromisesNs"]
} else {
&[]
}
}
/// Evaluate a [`LabelGate`] against the call's leading identifier and the
/// caller-supplied context. Receiver-type qualification can satisfy
/// [`LabelGate::ImportedFromModule`] via
/// [`receiver_type_prefixes_for_module`].
fn gate_satisfied(
gate: &LabelGate,
callee_head: &str,
ctx: Option<&ClassificationContext<'_>>,
) -> bool {
match gate {
LabelGate::ImportedFromModule(modules) => {
let leading = leading_identifier(callee_head);
for m in modules.iter() {
for prefix in receiver_type_prefixes_for_module(m) {
if leading == *prefix {
return true;
}
}
}
let Some(ctx) = ctx else {
return false;
};
let Some(map) = ctx.local_imports else {
return false;
};
let Some(source_module) = map.get(leading) else {
return false;
};
modules
.iter()
.any(|m| source_module.eq_ignore_ascii_case(m))
}
LabelGate::FileImportsModule(modules) => {
let Some(ctx) = ctx else {
return false;
};
let Some(map) = ctx.local_imports else {
return false;
};
map.values().any(|source_module| {
modules
.iter()
.any(|m| source_module.eq_ignore_ascii_case(m))
})
}
LabelGate::FileImportsModuleAsLocalName {
modules,
local_names,
} => {
let Some(ctx) = ctx else {
return false;
};
let Some(map) = ctx.local_imports else {
return false;
};
local_names.iter().any(|name| {
map.get(*name).is_some_and(|source_module| {
modules
.iter()
.any(|m| source_module.eq_ignore_ascii_case(m))
})
})
}
}
}
/// Leading identifier of a call expression's text — the segment up to the
/// first `.`, `:`, `(`, or `<`. Used to drive ImportTable lookups.
fn leading_identifier(callee_head: &str) -> &str {
let bytes = callee_head.as_bytes();
let mut end = 0;
for (i, b) in bytes.iter().enumerate() {
match b {
b'.' | b':' | b'(' | b'<' | b' ' | b'[' => {
end = i;
return &callee_head[..end];
}
_ => {}
}
end = i + 1;
}
&callee_head[..end]
}
/// Result of a gated-sink classification.
///
/// `label` is the sink capability the callee contributes at this site.
@ -1289,8 +1690,7 @@ pub fn classify_gated_sink(
}
match const_keyword_arg(name) {
Some(v) => {
let lower = v.to_ascii_lowercase();
if values.iter().any(|dv| lower == dv.to_ascii_lowercase()) {
if values.iter().any(|dv| v.eq_ignore_ascii_case(dv)) {
any_dangerous = true;
break;
}
@ -1332,15 +1732,14 @@ pub fn classify_gated_sink(
match activation_value {
Some(value) => {
let lower = value.to_ascii_lowercase();
let is_dangerous = gate
.dangerous_values
.iter()
.any(|v| lower == v.to_ascii_lowercase())
.any(|v| value.eq_ignore_ascii_case(v))
|| gate
.dangerous_prefixes
.iter()
.any(|p| lower.starts_with(&p.to_ascii_lowercase()));
.any(|p| starts_with_ignore_ascii_case(&value, p));
if is_dangerous {
out.push(GateMatch {
label: gate.label,
@ -1379,7 +1778,7 @@ pub fn classify_gated_sink(
/// Public wrapper for `normalize_chained_call` so callers outside the module
/// can share the same normalization used by the label classifier.
pub fn normalize_chained_call_for_classify(text: &str) -> String {
normalize_chained_call(text)
normalize_chained_call(text).into_owned()
}
/// Return the bare method-name segment of a callee text. Returns the
@ -1394,38 +1793,79 @@ pub fn bare_method_name(callee: &str) -> &str {
/// Normalize a chained method call: strip `()` between `.` segments.
/// e.g. `r.URL.Query().Get` → `r.URL.Query.Get`
/// e.g. `r.URL.Query().Get("host")` → `r.URL.Query.Get`
fn normalize_chained_call(text: &str) -> String {
let mut result = String::with_capacity(text.len());
///
/// Returns a borrow when no transformation is required (no `()` between
/// `.` segments and no leading `<`), avoiding the heap allocation. Only
/// pays for a `String` when the input actually needs rewriting; the hot
/// classify path runs on every CFG node so the borrow case dominates.
fn normalize_chained_call(text: &str) -> std::borrow::Cow<'_, str> {
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'(' => {
// Skip from `(` to matching `)`, but only if followed by `.`
// This handles `Query().Get` → `Query.Get`
let mut depth = 1u32;
let mut j = i + 1;
while j < bytes.len() && depth > 0 {
if bytes[j] == b'(' {
depth += 1;
} else if bytes[j] == b')' {
depth -= 1;
match bytes[j] {
b'(' => depth += 1,
b')' => depth -= 1,
_ => {}
}
j += 1;
}
if j >= bytes.len() || bytes[j] == b'.' {
return std::borrow::Cow::Owned(normalize_chained_call_owned(text, i));
}
i += 1;
}
b'<' => return std::borrow::Cow::Borrowed(&text[..i]),
_ => i += 1,
}
}
std::borrow::Cow::Borrowed(text)
}
/// Slow path for `normalize_chained_call`: runs only when the input
/// actually contains a `(...)` group followed by `.` (the case that
/// requires removing characters). `prefix_end` is the byte offset of the
/// first transformation point so the prefix can be copied wholesale.
///
/// `(`, `)`, `<`, and `.` are all ASCII, so byte-level scanning is safe
/// for control characters. Non-ASCII identifier bytes are copied as
/// contiguous slices to keep multi-byte UTF-8 sequences intact.
fn normalize_chained_call_owned(text: &str, prefix_end: usize) -> String {
let bytes = text.as_bytes();
let mut result = String::with_capacity(text.len());
result.push_str(&text[..prefix_end]);
let mut i = prefix_end;
while i < bytes.len() {
match bytes[i] {
b'(' => {
let mut depth = 1u32;
let mut j = i + 1;
while j < bytes.len() && depth > 0 {
match bytes[j] {
b'(' => depth += 1,
b')' => depth -= 1,
_ => {}
}
j += 1;
}
// If we're at end or next char is `.`, skip the parens
if j >= bytes.len() || bytes[j] == b'.' {
i = j;
} else {
// Keep the paren content (unusual case)
result.push('(');
i += 1;
}
}
b'<' => break, // Stop at generic args
b'<' => break,
_ => {
result.push(bytes[i] as char);
i += 1;
let start = i;
while i < bytes.len() && !matches!(bytes[i], b'(' | b'<') {
i += 1;
}
result.push_str(&text[start..i]);
}
}
}
@ -1979,6 +2419,58 @@ mod tests {
assert_eq!(lookup_receiver_validator("python", "joinpath"), None);
}
#[test]
fn normalize_chained_call_borrows_when_no_change() {
// No parens, no `<` → no rewrite, borrow returned.
let r = normalize_chained_call("plain");
assert!(matches!(r, std::borrow::Cow::Borrowed(_)));
assert_eq!(r.as_ref(), "plain");
// `(` mid-token but not at end of any `.` chain → still owned
// because the function's policy collapses any `(` followed by
// EOL or `.`. Use a callee with a non-collapsing shape: bare
// dotted text.
let r = normalize_chained_call("a.b.c");
assert!(matches!(r, std::borrow::Cow::Borrowed(_)));
assert_eq!(r.as_ref(), "a.b.c");
// Truncate at `<` (generics) is a borrow with shorter slice.
let r = normalize_chained_call("Vec<T>");
assert!(matches!(r, std::borrow::Cow::Borrowed(_)));
assert_eq!(r.as_ref(), "Vec");
}
#[test]
fn normalize_chained_call_collapses_paren_dot_chain() {
let r = normalize_chained_call("r.URL.Query().Get");
assert_eq!(r.as_ref(), "r.URL.Query.Get");
let r = normalize_chained_call("a.b().c().d");
assert_eq!(r.as_ref(), "a.b.c.d");
// Last paren-call before EOL is also collapsed (j >= bytes.len()).
let r = normalize_chained_call("a.b()");
assert_eq!(r.as_ref(), "a.b");
}
#[test]
fn normalize_chained_call_preserves_utf8_after_collapse() {
// Greek lowercase letters are 2-byte UTF-8 sequences. The slow
// path must not split them when copying tail bytes after a
// collapsed `(...)` group.
let r = normalize_chained_call("obj.func().αβγ");
assert_eq!(r.as_ref(), "obj.func.αβγ");
// CJK ideographs are 3-byte sequences. Same invariant.
let r = normalize_chained_call("a.b().名前");
assert_eq!(r.as_ref(), "a.b.名前");
// Emoji (4-byte sequence) inside an identifier. Engines never
// see this in practice but the byte loop must not corrupt it.
let r = normalize_chained_call("x.y().🦀_id");
assert_eq!(r.as_ref(), "x.y.🦀_id");
}
#[test]
fn bare_method_name_strips_chain() {
// No-dot input → returned as-is.
@ -2739,6 +3231,26 @@ mod tests {
assert_eq!(result[0], DataLabel::Sink(Cap::HTML_ESCAPE));
}
#[test]
fn starts_with_ignore_ascii_case_matches_canonical_shapes() {
assert!(starts_with_ignore_ascii_case(
"FILE://etc/passwd",
"file://"
));
assert!(starts_with_ignore_ascii_case(
"file://etc/passwd",
"FILE://"
));
assert!(starts_with_ignore_ascii_case("http://", "http://"));
assert!(starts_with_ignore_ascii_case("http://", ""));
assert!(!starts_with_ignore_ascii_case("http", "https"));
assert!(!starts_with_ignore_ascii_case("", "x"));
// Multibyte UTF-8: the helper is intentionally ASCII-only; non-ASCII
// bytes compare byte-for-byte (no Unicode case folding).
assert!(starts_with_ignore_ascii_case("café", "café"));
assert!(!starts_with_ignore_ascii_case("café", "CAFÉ"));
}
#[test]
fn classify_all_dual_label_php() {
let result = classify_all("php", "file_get_contents", None);

View file

@ -48,9 +48,29 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: false,
},
// PDO parameterized queries
// PDO parameterized queries. `prepareStatement` covers Drupal's
// Database\\Connection convention (and any PSR-style wrapper that
// uses the longer name); semantically identical to `prepare` —
// both return a statement object, the bind step ships values as
// out-of-band parameters, no concatenation occurs.
LabelRule {
matchers: &["prepare", "bindParam", "bindValue"],
matchers: &["prepare", "prepareStatement", "bindParam", "bindValue"],
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — `mysqli_real_escape_string($conn, $s)` and
// `pg_escape_string($s)` apply driver-side escaping for legacy
// string-concat shapes. Treat as SQL_QUERY sanitizers so the
// value-replacement clears the cap on the call return.
// `addslashes` is intentionally excluded — it does NOT cover
// multibyte / charset-aware injection vectors.
LabelRule {
matchers: &[
"mysqli_real_escape_string",
"pg_escape_string",
"pg_escape_literal",
"pg_escape_identifier",
],
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
case_sensitive: false,
},
@ -121,10 +141,39 @@ pub static RULES: &[LabelRule] = &[
"pdo.query",
"mysqli.real_query",
"mysqli_real_query",
// Phase 15 — `PDOStatement::execute` (with no args) executes a
// prepared statement; when prepared from a tainted string the
// bind step does NOT prevent injection (the SQL was already
// built unsafely). The receiver-text suffix is `stmt.execute`.
// Distinct from the bare `execute` matcher (already on the
// generic SQL_QUERY rule via `query` matcher) because the
// OOP `$stmt->execute()` shape skips the SQL-string arg.
"stmt.execute",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — Doctrine ORM raw-SQL passthrough APIs. Doctrine's
// `EntityManager::createQuery($dql)` accepts a DQL string;
// `createNativeQuery($sql, $rsm)` accepts a native SQL string;
// `getConnection()->executeQuery($sql)` /
// `getConnection()->executeStatement($sql)` are the low-level
// Connection passthroughs that route to the underlying driver
// verbatim. Suffix-matching covers both bound-receiver shapes
// (`$em->createQuery($dql)`) and the documentation-style
// class-qualified call form (`EntityManager.createQuery`).
LabelRule {
matchers: &[
"EntityManager.createQuery",
"EntityManager.createNativeQuery",
"createQuery",
"createNativeQuery",
"executeQuery",
"executeStatement",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// Laravel Eloquent: raw SQL methods.
// DB::raw() → scoped_call_expression, callee text "DB.raw".
// whereRaw/selectRaw/orderByRaw/havingRaw → member_call_expression on query builder.
@ -133,6 +182,22 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — Laravel raw-SQL execution facade methods. `DB::select`,
// `DB::statement`, `DB::insert`, `DB::update`, `DB::delete`,
// `DB::unprepared` all accept a literal SQL string; the
// `unprepared` form is the explicit no-bind escape hatch.
LabelRule {
matchers: &[
"DB.select",
"DB.statement",
"DB.insert",
"DB.update",
"DB.delete",
"DB.unprepared",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// NOTE: `file_get_contents` and `fopen` can fetch URLs (SSRF vector) and
// local files (LFI vector — `file://` scheme). As a Sink(SSRF) they only
// fire when the argument is tainted. `fopen` is the canonical low-level
@ -145,6 +210,32 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// Phase 14 — `\GuzzleHttp\Client::request($method, $url, ...)` and the
// verb-shorthand methods `$client->get($url)` / `->head($url)` /
// `->options($url)`. The read-shaped verbs carry the URL at arg 0
// and have no body argument, so a flat SSRF sink is FP-safe. The
// body-bearing verbs (`post` / `put` / `patch`) live on the
// DATA_EXFIL list above; their URL-position SSRF is covered via
// `Client.request` (arg 1 is URL) below as a flat sink — Guzzle
// does not expose argument-role-aware metadata that would let the
// gate distinguish URL from body, but the source-sensitivity gate
// already silences plain `$_GET` / `$_POST` flows so the
// remaining FP surface is small.
LabelRule {
matchers: &[
"Client.get",
"Client.head",
"Client.options",
"Client.request",
"HttpClient.get",
"HttpClient.head",
"HttpClient.request",
"Http.get",
"Http.head",
],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: true,
},
// ── Cross-boundary data exfiltration ──────────────────────────────────
//
// Body-bearing outbound HTTP verb methods on the major PHP HTTP clients.
@ -343,6 +434,26 @@ pub static GATED_SINKS: &[SinkGate] = &[
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// Phase 14 — `curl_setopt($ch, CURLOPT_URL, $url)` is the canonical
// pre-`curl_exec` URL bind. Tainted `$url` reaching this option is
// SSRF; the `curl_exec($ch)` flat sink above also fires on the
// tainted handle but only when the handle's taint propagates
// through opaque resource state, which the engine cannot follow
// across `curl_setopt` calls. Activating the SSRF cap directly at
// the option-bind site catches the flow at the construction step
// independent of the handle-flow analysis.
SinkGate {
callee_matcher: "curl_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_URL"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// PHP `header($line)` HEADER_INJECTION sink. Modelled as a gate so
// it can coexist with the OPEN_REDIRECT gate below: the multi-gate
// SSA dispatch needs each capability declared on its own gate filter

View file

@ -97,6 +97,39 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
},
// Phase 13 — pathlib / aiofiles / shutil path-traversal sinks.
// Chained constructor + method shapes (`Path(p).read_text()`) reduce
// via paren-strip to the matcher text below; the path argument is
// the sink payload. Receiver-bound shapes (`p = Path(...);
// p.read_text()`) are not covered here without a `pathlib.Path`
// TypeKind override and are left for a future phase.
LabelRule {
matchers: &[
"Path.open",
"Path.read_text",
"Path.write_text",
"Path.read_bytes",
"Path.write_bytes",
// Receiver-bound shapes (`p = Path(name); p.read_text()`)
// resolve via the `TypeKind::FileHandle` constructor mapping
// for `Path(...)` in `ssa/type_facts.rs`, which lets the
// type-qualified resolver rewrite `p.read_text` →
// `FileHandle.read_text` against the matchers below.
"FileHandle.open",
"FileHandle.read_text",
"FileHandle.write_text",
"FileHandle.read_bytes",
"FileHandle.write_bytes",
"aiofiles.open",
"shutil.copy",
"shutil.copy2",
"shutil.copyfile",
"shutil.move",
"shutil.rmtree",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: true,
},
LabelRule {
matchers: &[
"argparse.parse_args",
@ -157,6 +190,22 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: false,
},
// Phase 13 — `pathlib.Path.resolve(strict=True)` raises if the
// resolved path doesn't exist; the canonical / strict form is the
// documented path-traversal sanitiser. Strict-mode argument
// inspection is not modeled (the rule fires for any `.resolve()`
// chained on a `Path(...)`); the false-clear risk on
// `Path(...).resolve()` (non-strict) is an accepted trade-off
// because the non-strict form still resolves symlinks and
// collapses `..` segments, which dominates the path-traversal
// attack surface. Case-sensitive: `Path.resolve` is the literal
// pathlib method name; bare `resolve` is too broad (Django URL
// resolvers, Promise.resolve in JS-style libs).
LabelRule {
matchers: &["Path.resolve", "FileHandle.resolve"],
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: true,
},
// ─────────── Sinks ─────────────
// Flask sinks
LabelRule {
@ -218,6 +267,26 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — receiver-typed ORM sinks. `SqlAlchemySession.execute`
// / `SqlAlchemySession.scalar` / `SqlAlchemySession.scalars` etc.
// are produced when the receiver carries `TypeKind::SqlAlchemySession`
// (set by `constructor_type` for `sessionmaker()` / `Session(engine)` /
// `engine.connect()`). `DjangoQuerySet.raw` / `DjangoQuerySet.extra`
// fire on `Model.objects.raw(sql)` / `Model.objects.extra(...)` shapes
// when the receiver was tagged via the `Model.objects` access path.
// `ActiveRecordRelation` is registered in `labels/ruby.rs`.
LabelRule {
matchers: &[
"SqlAlchemySession.execute",
"SqlAlchemySession.scalar",
"SqlAlchemySession.scalars",
"SqlAlchemySession.exec_driver_sql",
"DjangoQuerySet.raw",
"DjangoQuerySet.extra",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// SQL injection: sqlite3 / SQLAlchemy / generic DB connection execute.
LabelRule {
matchers: &[
@ -1245,6 +1314,214 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["data"],
},
},
// ── SQL execute payload-arg gating (Phase 15 deferred fix) ────────────
//
// The flat label rules above already classify these callees as
// `Sink(SQL_QUERY)` on every argument. The DB-API convention is that
// arg 0 is the SQL string and arg 1+ are parameterised bind values
// (`cursor.execute("SELECT * FROM t WHERE id = %s", (user_id,))`). Tainted
// bind values are SAFE because the driver escapes them; tainted SQL is
// the SQLi vector. These Destination-activation gates carry the same
// `Sink(SQL_QUERY)` label so they dedupe against the flat rule, but
// their `payload_args: &[0]` propagates into `sink_payload_args`,
// narrowing the SSA sink scan to arg 0 only.
SinkGate {
callee_matcher: "cursor.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "cursor.executemany",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "conn.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "connection.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "session.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "engine.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "objects.raw",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// Receiver-typed forms; same payload shape (sql at arg 0).
SinkGate {
callee_matcher: "SqlAlchemySession.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlAlchemySession.scalar",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlAlchemySession.scalars",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlAlchemySession.exec_driver_sql",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DjangoQuerySet.raw",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DjangoQuerySet.extra",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
/// Prototype-pollution-style gates for Python. Opt-in via the
@ -1329,6 +1606,13 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
"call" => Kind::CallFn,
"assignment" => Kind::Assignment,
"expression_statement" => Kind::CallWrapper,
// tree-sitter-python emits `await x` as a named `await` node (no
// `_expression` suffix, unlike JS/TS). Map it to `AwaitForward` so
// the SSA lowering forwards the awaited value 1:1, mirroring the
// JS/TS contract. Async-for in Python is plain `for_statement` with
// an unnamed `async` token child; the iterator-text rewrite in
// `cfg::push_node` covers both sync and async forms uniformly.
"await" => Kind::AwaitForward,
// trivia
"comment" => Kind::Trivia,

View file

@ -113,7 +113,25 @@ pub static RULES: &[LabelRule] = &[
// in the resource-lifecycle acquire/release pair (cfg_analysis::RUBY_RESOURCES),
// so this entry is additive, it does not disturb resource-leak detection.
LabelRule {
matchers: &["File.open", "File.new", "File.read", "IO.read"],
matchers: &[
"File.open",
"File.new",
"File.read",
"IO.read",
// Phase 13 — write-side and directory-listing path-traversal
// sinks. `Pathname.new(p)` is conservative: a Pathname
// construction with attacker-controlled `p` is the documented
// entry point for downstream Path / File operations and
// surfaces the path-traversal vector at the construction
// site. `Dir.entries` / `Dir.glob` enumerate filesystem
// contents, so a tainted path argument is a directory
// disclosure / glob-injection vector.
"File.write",
"IO.write",
"Pathname.new",
"Dir.entries",
"Dir.glob",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
},
@ -136,10 +154,28 @@ pub static RULES: &[LabelRule] = &[
matchers: &[
"Net::HTTP.get",
"Net::HTTP.post",
// Phase 14 — `Net::HTTP.start(host, port, ...)` is a session
// factory whose host argument is the SSRF vector when
// tainted. `Net::HTTP.get_response(uri)` is a stdlib
// convenience wrapper around `start` + `request_get`.
"Net::HTTP.start",
"Net::HTTP.get_response",
"URI.open",
"OpenURI.open_uri",
"HTTParty.get",
"HTTParty.post",
// Phase 14 — Faraday::Connection verb methods on a typed
// receiver. `Faraday.new(url: base)` produces an
// `HttpClient`-typed value (see `constructor_type`); the
// `client.get(path)` chain resolves through the
// type-qualified `HttpClient.get` rule below. Bare
// `Faraday.get` / `.post` / etc. are the module-level
// shorthand the existing `Faraday.post` matcher already
// covers for DATA_EXFIL; SSRF needs the read-shaped
// verbs registered explicitly.
"Faraday.get",
"Faraday.head",
"Faraday.delete",
],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
@ -214,11 +250,41 @@ pub static RULES: &[LabelRule] = &[
case_sensitive: false,
},
// SQL injection: ActiveRecord unsafe raw-query execution APIs.
// Phase 15 expands coverage with `exec_query` (the raw-SQL execution
// verb on the ActiveRecord connection adapter) and `select_value` /
// `select_values` / `select_rows` (driver-level select helpers that
// accept a literal SQL string).
LabelRule {
matchers: &["find_by_sql", "connection.execute", "select_all"],
matchers: &[
"find_by_sql",
"connection.execute",
"select_all",
"exec_query",
"select_value",
"select_values",
"select_rows",
"select_one",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — receiver-typed ActiveRecord raw-SQL sinks. The
// `ActiveRecordRelation` TypeKind is set by `constructor_type` on
// class-method scope chains (`User.where(...)` etc.); type-qualified
// resolution rewrites `relation.find_by_sql(sql)` →
// `ActiveRecordRelation.find_by_sql` so the chained shape is caught
// even when the receiver text has lost its model-class prefix.
LabelRule {
matchers: &[
"ActiveRecordRelation.find_by_sql",
"ActiveRecordRelation.exec_query",
"ActiveRecordRelation.select_all",
"ActiveRecordRelation.select_one",
"ActiveRecordRelation.select_value",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// SQL injection: ActiveRecord query methods that accept raw SQL strings.
// `where` and `order` are the most common Rails SQLi vectors when called
// with string interpolation (e.g., User.where("name = '#{params[:name]}'")).
@ -383,6 +449,32 @@ pub static RULES: &[LabelRule] = &[
/// `Nokogiri::XML::ParseOptions::DEFAULT_XML`); any non-dangerous
/// scope-qualified constant disables the gate.
pub static GATED_SINKS: &[SinkGate] = &[
// `Faraday.new(url: tainted)` — base-URL kwarg controls the destination
// origin for every subsequent verb call on the returned client
// (`client.get(path)` / `.post` / etc.). When the kwarg value is
// attacker-controlled, the constructor itself is the SSRF entry point;
// the existing type-qualified rules on `HttpClient.get` / `.post` only
// cover taint flowing into the per-call `path` arg.
//
// Activation is `Destination` on positional position 0 with a single
// `url` field; tree-sitter-ruby emits the kwarg as a `pair` node sibling
// of the positional args, and `extract_destination_kwarg_pairs` walks
// those pairs (Ruby support added alongside this gate in
// `cfg::literals::extract_destination_kwarg_pairs`).
SinkGate {
callee_matcher: "Faraday.new",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &["url"],
},
},
// `Nokogiri::XML(xml, url=nil, encoding=nil, options=NIL)` — top-level
// module method. arg 3 carries the parse-option flag literal.
//

View file

@ -60,6 +60,26 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
case_sensitive: false,
},
// Phase 13 — `Path::canonicalize` (and `tokio::fs::canonicalize`) is
// the canonical Rust path-traversal sanitiser when paired with a
// `starts_with(&base)` containment check. Same convention as the
// Java / Python `.normalize()` / `.resolve()` sanitiser rules: the
// call clears the FILE_IO cap on its return so the cap-based gate
// suppresses the downstream `tokio::fs::*` / `std::fs::*` sink.
// Bare `canonicalize` would over-fire on unrelated APIs (e.g.
// `Url::canonicalize`); the qualified forms below are unique to
// path-handling.
LabelRule {
matchers: &[
"Path.canonicalize",
"PathBuf.canonicalize",
"fs::canonicalize",
"std::fs::canonicalize",
"tokio::fs::canonicalize",
],
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: false,
},
// ─────────── Sinks ─────────────
LabelRule {
matchers: &[
@ -90,6 +110,21 @@ pub static RULES: &[LabelRule] = &[
"fs::copy",
"File::open",
"File::create",
// Phase 13 — `tokio::fs` async path-traversal sinks. The
// suffix matchers also catch the bare `tokio::fs::File::open`
// chain after paren-strip. `tokio::fs::*` is the
// async-runtime-bound mirror of `std::fs::*`; same path
// arg-0 semantics.
"tokio::fs::read",
"tokio::fs::read_to_string",
"tokio::fs::write",
"tokio::fs::remove_file",
"tokio::fs::remove_dir",
"tokio::fs::remove_dir_all",
"tokio::fs::rename",
"tokio::fs::copy",
"tokio::fs::File::open",
"tokio::fs::File::create",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
@ -105,6 +140,12 @@ pub static RULES: &[LabelRule] = &[
"reqwest::Client.head",
"reqwest::Client.patch",
"reqwest::Client.request",
// Phase 14 — hyper Client `request(req)` dispatch entry. The
// `req` builder chain (covered by the type-qualified
// RequestBuilder.* / Request::builder.* rules below) smears
// URL taint into the request value via default propagation.
"hyper::Client.request",
"hyper::client::Client.request",
// Chained constructor + verb form: `reqwest::Client::new()
// .post(url)` reduces (via root-receiver collapse) to chain
// text `Client::new.post`, so existing `Client.post` matchers
@ -370,6 +411,10 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
"let_declaration" => Kind::CallWrapper,
"expression_statement" => Kind::CallWrapper,
"assignment_expression" => Kind::Assignment,
// `x.await` postfix. Documented per-language so the contract does
// not depend on the raw-string fallback in `cfg::push_node`; SSA
// lowering emits `Assign(operand)` for these nodes.
"await_expression" => Kind::AwaitForward,
// struct expressions, recurse so env::var() calls inside field
// initialisers produce Source-labelled CFG nodes (needed for summaries).

View file

@ -1,5 +1,6 @@
use crate::labels::{
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
Cap, DataLabel, GateActivation, GatedLabelRule, Kind, LabelGate, LabelRule, ParamConfig,
RuntimeLabelRule, SinkGate,
};
use crate::utils::project::{DetectedFramework, FrameworkContext};
use phf::{Map, phf_map};
@ -29,6 +30,24 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Source(Cap::all()),
case_sensitive: false,
},
// Phase 10 — Web `Request` receiver-method reads. Triggered when
// the SSA receiver carries `TypeKind::Request` (Next.js App
// Router handler's first formal) and the type-qualified resolver
// rewrites `req.json()` → `Request.json` etc. The reads return
// user-controlled bytes / strings; the matchers also cover
// `Request.url` and `Request.headers.get(...)` which both expose
// header / URL state to the handler.
LabelRule {
matchers: &[
"Request.json",
"Request.formData",
"Request.text",
"Request.url",
"Request.headers.get",
],
label: DataLabel::Source(Cap::all()),
case_sensitive: true,
},
// ───────── Sanitizers ──────────
LabelRule {
matchers: &["JSON.parse"],
@ -215,6 +234,40 @@ pub static RULES: &[LabelRule] = &[
"fs.unlinkSync",
"fs.readdir",
"fs.readdirSync",
// Phase 05 — `node:fs/promises` member-access forms covered
// here. Bare-name forms (`readFile`, `open`, ...) and
// `fsp.readFile` namespace-import forms ride the gated
// matcher in `GATED_LABEL_RULES`. Receiver-type fallback
// synthesises `FileSystemPromisesNs.<method>` (handled
// below).
"fs.promises.readFile",
"fs.promises.writeFile",
"fs.promises.unlink",
"fs.promises.open",
"fs.promises.stat",
"fs.promises.readdir",
"fs.promises.mkdir",
"fs.promises.rmdir",
"fs.promises.rm",
"fs.promises.appendFile",
"fs.promises.copyFile",
"fs.promises.rename",
"fs.promises.truncate",
"fs.promises.chmod",
"FileSystemPromisesNs.readFile",
"FileSystemPromisesNs.writeFile",
"FileSystemPromisesNs.unlink",
"FileSystemPromisesNs.open",
"FileSystemPromisesNs.stat",
"FileSystemPromisesNs.readdir",
"FileSystemPromisesNs.mkdir",
"FileSystemPromisesNs.rmdir",
"FileSystemPromisesNs.rm",
"FileSystemPromisesNs.appendFile",
"FileSystemPromisesNs.copyFile",
"FileSystemPromisesNs.rename",
"FileSystemPromisesNs.truncate",
"FileSystemPromisesNs.chmod",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
@ -255,6 +308,25 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// ── Phase 07 — ORM query-builder receiver-typed sinks ──
// See `labels/javascript.rs` for the design rationale; mirrored here so
// TypeScript fixtures pick up the same coverage. Receiver TypeKinds
// are populated by [`crate::ssa::type_facts::constructor_type`] for
// `new Sequelize(...)` / `getRepository(...)` / `getManager()` /
// `createEntityManager()`; the type-qualified resolver rewrites
// `<recv>.<method>` → `<TypePrefix>.<method>` against these matchers.
LabelRule {
matchers: &[
"Sequelize.literal",
"TypeOrmRepo.query",
"TypeOrmRepo.createQueryBuilder",
"TypeOrmManager.query",
"TypeOrmManager.createQueryBuilder",
"MikroOrmEm.execute",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// ─── LDAP injection sinks ───
//
// Mirror of `labels/javascript.rs`; ldapjs / ts-ldapjs has the same
@ -391,6 +463,67 @@ pub static EXCLUDES: &[&str] = &[
"exec.start",
];
/// Phase 05 — `node:fs/promises` path-traversal sinks. See
/// `javascript.rs::GATED_LABEL_RULES` for the design rationale; both
/// language registries carry the same matcher list to keep .ts and .js
/// fixtures in lockstep.
pub static GATED_LABEL_RULES: &[GatedLabelRule] = &[
GatedLabelRule {
matchers: &[
"readFile",
"writeFile",
"unlink",
"open",
"stat",
"readdir",
"mkdir",
"rmdir",
"rm",
"appendFile",
"copyFile",
"rename",
"truncate",
"chmod",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
gate: LabelGate::ImportedFromModule(&["node:fs/promises", "fs/promises"]),
},
// Phase 07 — Knex bare-name raw-SQL escape hatches. See
// `labels/javascript.rs::GATED_LABEL_RULES` for the rationale; this
// mirror keeps `.ts` and `.js` fixtures in lockstep.
GatedLabelRule {
matchers: &["whereRaw", "orderByRaw", "havingRaw"],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
gate: LabelGate::FileImportsModuleAsLocalName {
modules: &["knex"],
local_names: &["knex"],
},
},
// Phase 07 — Drizzle `sql` template-tag builder. See
// `labels/javascript.rs::GATED_LABEL_RULES` for the two callee
// shapes covered (`sql\`...\`` and `sql.raw(...)`).
GatedLabelRule {
matchers: &["=sql", "sql.raw"],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
gate: LabelGate::ImportedFromModule(&["drizzle-orm"]),
},
// Phase 10 — Next.js `cookies()` / `headers()` helpers from the
// `next/headers` module return adversary-controlled
// request-bound state (cookies carry session tokens, headers
// carry auth material). Gated on the import so app-internal
// helpers named `cookies` or `headers` keep their default
// classification.
GatedLabelRule {
matchers: &["cookies", "headers"],
label: DataLabel::Source(Cap::all()),
case_sensitive: true,
gate: LabelGate::ImportedFromModule(&["next/headers"]),
},
];
pub static GATED_SINKS: &[SinkGate] = &[
SinkGate {
callee_matcher: "setAttribute",
@ -958,6 +1091,8 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
"expression_statement" => Kind::CallWrapper,
"as_expression" => Kind::Seq,
"type_assertion" => Kind::Seq,
"await_expression" => Kind::AwaitForward,
"jsx_attribute" => Kind::JsxAttr,
// trivia
"comment" => Kind::Trivia,

View file

@ -100,6 +100,7 @@ pub mod constraint;
pub mod convergence_telemetry;
pub mod database;
pub mod engine_notes;
pub mod entry_points;
pub mod errors;
pub mod evidence;
pub mod fmt;
@ -109,6 +110,7 @@ pub mod output;
pub mod patterns;
pub mod pointer;
pub mod rank;
pub mod resolve;
pub mod rust_resolve;
#[cfg(feature = "serve")]
pub mod server;

View file

@ -668,6 +668,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
}
}
}
@ -884,6 +885,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let facts = analyse_body(&body, body_id());
assert!(facts.is_trivial());

1042
src/resolve/mod.rs Normal file

File diff suppressed because it is too large Load diff

380
src/resolve/tests.rs Normal file
View file

@ -0,0 +1,380 @@
//! Phase-04 resolver tests.
//!
//! Six specifier shapes (relative, parent-relative, scoped package,
//! tsconfig path alias, node builtin, missing) plus a memory-ceiling
//! guard. Each test sets up a synthetic tree under
//! `tests/fixtures/resolver/` (or a `tempfile::TempDir` for the cheap
//! ceiling test), constructs a [`ModuleGraph`] via [`build_module_graph`],
//! and asserts the resolver verdict.
use super::*;
use std::path::PathBuf;
fn fixture_root() -> PathBuf {
let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
p.push("tests/fixtures/resolver");
p
}
fn root() -> PathBuf {
let r = fixture_root();
if r.exists() {
r.canonicalize().unwrap_or(r)
} else {
r
}
}
#[test]
fn resolves_relative_specifier() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let resolved = graph
.resolve_specifier(&importer, "./foo")
.expect("relative spec must classify");
let file = resolved.file.expect("./foo must resolve");
assert!(
file.ends_with("apps/web/src/foo.ts"),
"unexpected resolution: {}",
file.display()
);
assert!(!resolved.is_builtin);
}
#[test]
fn resolves_parent_relative_specifier() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let resolved = graph
.resolve_specifier(&importer, "../bar/baz")
.expect("../bar/baz must classify");
let file = resolved.file.expect("../bar/baz must resolve");
assert!(
file.ends_with("apps/web/bar/baz.ts"),
"unexpected resolution: {}",
file.display()
);
}
#[test]
fn resolves_scoped_package_import() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let resolved = graph
.resolve_specifier(&importer, "@scope/util")
.expect("@scope/util must classify");
assert_eq!(resolved.package.as_deref(), Some("@scope/util"));
let file = resolved.file.expect("@scope/util must resolve to a file");
assert!(
file.ends_with("packages/util/src/index.ts") || file.ends_with("packages/util/index.ts"),
"unexpected resolution: {}",
file.display()
);
}
#[test]
fn resolves_tsconfig_path_alias() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let resolved = graph
.resolve_specifier(&importer, "@/lib/x")
.expect("@/lib/x must classify");
let file = resolved.file.expect("@/lib/x must resolve");
assert!(
file.ends_with("apps/web/src/lib/x.ts"),
"unexpected resolution: {}",
file.display()
);
}
#[test]
fn classifies_node_builtin_specifier() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let resolved = graph
.resolve_specifier(&importer, "node:fs/promises")
.expect("node:fs/promises must classify");
assert!(resolved.is_builtin);
assert!(resolved.file.is_none());
assert!(resolved.package.is_none());
let bare = graph
.resolve_specifier(&importer, "fs")
.expect("bare 'fs' must classify");
assert!(bare.is_builtin);
}
#[test]
fn missing_module_returns_none_resolved_file() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let resolved = graph
.resolve_specifier(&importer, "no-such-package")
.expect("non-empty spec must classify");
assert!(!resolved.is_builtin);
assert!(resolved.file.is_none(), "missing module must not resolve");
assert!(resolved.package.is_none());
}
#[test]
fn package_for_returns_innermost_match() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let inner = r.join("packages/util/src/index.ts");
let outer_pkg = graph
.package_for(&inner)
.expect("file under packages/util belongs to a package");
assert_eq!(outer_pkg.name, "@scope/util");
let app_file = r.join("apps/web/src/index.ts");
let web_pkg = graph
.package_for(&app_file)
.expect("file under apps/web belongs to a package");
assert_eq!(web_pkg.name, "web-app");
}
#[test]
fn project_namespace_prefixes_when_in_package() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let in_pkg = r.join("packages/util/src/index.ts");
let ns = graph.project_namespace_for(&in_pkg, &r);
assert!(
ns.starts_with("@scope/util::"),
"expected package-prefixed namespace, got {ns}"
);
let outside = std::env::temp_dir().join("nyx-resolver-outside.ts");
let plain = graph.project_namespace_for(&outside, &r);
assert!(
!plain.contains("::"),
"outside-package namespace must be plain: {plain}"
);
}
/// `"exports"."."` conditional map: `import` branch wins over `default`,
/// and the legacy `main` field is shadowed when exports resolve.
#[test]
fn resolves_exports_root_conditional() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let resolved = graph
.resolve_specifier(&importer, "@scope/exports-pkg")
.expect("@scope/exports-pkg must classify");
assert_eq!(resolved.package.as_deref(), Some("@scope/exports-pkg"));
let file = resolved.file.expect("@scope/exports-pkg must resolve");
assert!(
file.ends_with("exports-pkg/src/main.ts"),
"expected import-branch main.ts, got {}",
file.display()
);
}
/// Exact subpath key (`"./sub": "./src/sub.ts"`) resolves before any
/// pattern fallback would fire.
#[test]
fn resolves_exports_exact_subpath() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let resolved = graph
.resolve_specifier(&importer, "@scope/exports-pkg/sub")
.expect("subpath spec must classify");
let file = resolved.file.expect("./sub must resolve");
assert!(
file.ends_with("exports-pkg/src/sub.ts"),
"unexpected resolution: {}",
file.display()
);
}
/// Wildcard pattern (`"./feat/*": "./src/feat/*.ts"`) substitutes the
/// matched tail into the target.
#[test]
fn resolves_exports_wildcard_subpath() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let resolved = graph
.resolve_specifier(&importer, "@scope/exports-pkg/feat/widget")
.expect("wildcard subpath must classify");
let file = resolved.file.expect("./feat/widget must resolve");
assert!(
file.ends_with("exports-pkg/src/feat/widget.ts"),
"unexpected resolution: {}",
file.display()
);
}
/// `null` value blocks the subpath: resolver returns no file rather than
/// falling back to a direct path join.
#[test]
fn exports_null_blocks_subpath() {
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let resolved = graph
.resolve_specifier(&importer, "@scope/exports-pkg/blocked")
.expect("blocked spec must classify");
assert!(
resolved.file.is_none(),
"null exports value must not resolve, got {:?}",
resolved.file
);
}
#[test]
fn module_graph_is_cheap() {
use std::time::Instant;
let r = root();
let bytes_before = approximate_rss_kib();
let start = Instant::now();
let graph = build_module_graph(std::slice::from_ref(&r));
let elapsed = start.elapsed();
let bytes_after = approximate_rss_kib();
assert!(
elapsed.as_millis() < 50,
"build_module_graph took {}ms (>50ms ceiling)",
elapsed.as_millis()
);
let delta_kib = bytes_after.saturating_sub(bytes_before);
assert!(
delta_kib < 10 * 1024,
"build_module_graph added {delta_kib} KiB RSS (>10 MiB ceiling)"
);
assert!(
!graph.packages().is_empty(),
"fixture tree must have packages"
);
}
/// Parse a TypeScript file with tree-sitter and run
/// [`extract_resolved_imports`] against it. Tests pull this through to
/// keep the parsing setup in one place.
fn extract_imports_for(file: &std::path::Path, graph: &ModuleGraph) -> Vec<ImportBinding> {
let bytes = std::fs::read(file).expect("read fixture file");
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter::Language::from(
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
))
.expect("load TS grammar");
let tree = parser.parse(&bytes, None).expect("parse fixture");
extract_resolved_imports(&tree, &bytes, file, graph, "typescript")
}
#[test]
fn parses_imports_from_fixture_file() {
// Verify `extract_resolved_imports` lifts the same four binding shapes
// that `tests/fixtures/resolver/apps/web/src/index.ts` exercises:
// relative, parent-relative, scoped package, tsconfig path alias, plus
// the `node:fs/promises` builtin. Phases 09/10 thread these bindings
// through cross-file taint, so the parsed-file integration path must
// produce the rows the resolver tests already cover via
// `resolve_specifier`.
let r = root();
let graph = build_module_graph(std::slice::from_ref(&r));
let importer = r.join("apps/web/src/index.ts");
let bindings = extract_imports_for(&importer, &graph);
let by_local: std::collections::HashMap<&str, &ImportBinding> = bindings
.iter()
.map(|b| (b.local_name.as_str(), b))
.collect();
// `import { foo } from "./foo"` — relative.
let foo = by_local.get("foo").expect("foo binding present");
assert_eq!(foo.source_module, "./foo");
assert_eq!(foo.exported_name.as_deref(), Some("foo"));
let foo_file = foo.resolved_file.as_ref().expect("./foo resolves");
assert!(
foo_file.ends_with("apps/web/src/foo.ts"),
"foo unexpected: {}",
foo_file.display()
);
// `import { baz } from "../bar/baz"` — parent-relative.
let baz = by_local.get("baz").expect("baz binding present");
assert_eq!(baz.source_module, "../bar/baz");
let baz_file = baz.resolved_file.as_ref().expect("../bar/baz resolves");
assert!(
baz_file.ends_with("apps/web/bar/baz.ts"),
"baz unexpected: {}",
baz_file.display()
);
// `import { util } from "@scope/util"` — scoped package.
let util = by_local.get("util").expect("util binding present");
assert_eq!(util.source_module, "@scope/util");
assert!(
util.resolved_file.is_some(),
"@scope/util must resolve to a file"
);
// `import { x } from "@/lib/x"` — tsconfig path alias.
let x = by_local.get("x").expect("x binding present");
assert_eq!(x.source_module, "@/lib/x");
let x_file = x.resolved_file.as_ref().expect("@/lib/x resolves");
assert!(
x_file.ends_with("apps/web/src/lib/x.ts"),
"x unexpected: {}",
x_file.display()
);
// `import { promises as fs } from "node:fs/promises"` — node builtin.
// Local-name binding must use the alias `fs`, not the original `promises`.
let fs = by_local.get("fs").expect("fs alias binding present");
assert_eq!(fs.source_module, "node:fs/promises");
assert_eq!(fs.exported_name.as_deref(), Some("promises"));
assert!(
fs.resolved_file.is_none(),
"node:* builtin must not carry a resolved file"
);
}
/// Best-effort RSS reader. Returns 0 on any failure, the test only uses
/// the delta and treats "0 → 0" as "below ceiling".
fn approximate_rss_kib() -> u64 {
#[cfg(target_os = "linux")]
{
std::fs::read_to_string("/proc/self/status")
.ok()
.and_then(|s| {
s.lines().find(|l| l.starts_with("VmRSS:")).and_then(|l| {
l.split_whitespace()
.nth(1)
.and_then(|n| n.parse::<u64>().ok())
})
})
.unwrap_or(0)
}
#[cfg(target_os = "macos")]
{
let output = std::process::Command::new("ps")
.args(["-o", "rss=", "-p", &std::process::id().to_string()])
.output()
.ok();
output
.and_then(|o| {
String::from_utf8(o.stdout)
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
})
.unwrap_or(0)
}
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
{
0
}
}

View file

@ -137,7 +137,7 @@ mod tests {
AppState {
scan_root: scan_root.clone(),
config_dir: scan_root.clone(),
database_dir: scan_root.clone(),
database_dir: scan_root,
security: LocalServerSecurity::new(port),
config: Arc::new(RwLock::new(Config::default())),
job_manager: Arc::new(JobManager::new(4, 8 * 1024 * 1024)),

View file

@ -1187,6 +1187,18 @@ fn type_kind_tag(k: &TypeKind) -> String {
TypeKind::Template => "Template".into(),
TypeKind::Dto(_) => "Dto".into(),
TypeKind::NullPrototypeObject => "NullPrototypeObject".into(),
TypeKind::FileSystemPromisesNs => "FileSystemPromisesNs".into(),
TypeKind::Sequelize => "Sequelize".into(),
TypeKind::TypeOrmRepo => "TypeOrmRepo".into(),
TypeKind::TypeOrmManager => "TypeOrmManager".into(),
TypeKind::MikroOrmEm => "MikroOrmEm".into(),
TypeKind::Request => "Request".into(),
TypeKind::SqlAlchemySession => "SqlAlchemySession".into(),
TypeKind::DjangoQuerySet => "DjangoQuerySet".into(),
TypeKind::ActiveRecordRelation => "ActiveRecordRelation".into(),
TypeKind::GormDb => "GormDb".into(),
TypeKind::SqlxDb => "SqlxDb".into(),
TypeKind::HibernateSession => "HibernateSession".into(),
}
}
@ -1565,6 +1577,10 @@ pub fn analyse_function_taint(
auto_seed_handler_params: matches!(lang, Lang::JavaScript | Lang::TypeScript),
cross_file_bodies: global_summaries.and_then(|gs| gs.bodies_by_key()),
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
crate::taint::ssa_transfer::run_ssa_taint_full_with_exits(ssa, cfg, &transfer)
@ -1628,7 +1644,7 @@ pub fn analyse_file_summaries(
config: &Config,
) -> Result<GlobalSummaries, StatusCode> {
let bytes = std::fs::read(file_path).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (func_summaries, ssa_rows, _ssa_bodies, auth_rows) =
let (func_summaries, ssa_rows, _ssa_bodies, auth_rows, cross_pkg_imports) =
crate::ast::extract_all_summaries_from_bytes(&bytes, file_path, config, None)
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
@ -1640,6 +1656,9 @@ pub fn analyse_file_summaries(
for (key, auth_summary) in auth_rows {
global.insert_auth(key, auth_summary);
}
if let Some((ns, map)) = cross_pkg_imports {
global.insert_cross_package_imports(ns, map);
}
Ok(global)
}
@ -1883,6 +1902,7 @@ function consume() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
@ -2039,6 +2059,7 @@ async function recentAuditLogs() {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let facts = analyse_body(&body, BodyId(0));

View file

@ -169,7 +169,7 @@ impl JobManager {
started_at: Some(chrono::Utc::now().to_rfc3339()),
finished_at: None,
duration_secs: None,
engine_version: Some(engine_version.clone()),
engine_version: Some(engine_version),
languages: None,
files_scanned: None,
files_skipped: None,
@ -261,7 +261,7 @@ impl JobManager {
let languages: Vec<String> = progress_snap.languages.keys().cloned().collect();
let files_scanned = progress_snap.files_discovered;
let files_skipped = progress_snap.files_skipped;
let timing = progress_snap.timing.clone();
let timing = progress_snap.timing;
let finished_at = chrono::Utc::now();
// Prepare the final state outside the lock.
@ -292,9 +292,9 @@ impl JobManager {
if let Some(job) = jobs.get_mut(&jid) {
job.finished_at = Some(finished_at);
job.duration_secs = Some(elapsed);
job.languages = Some(languages.clone());
job.languages = Some(languages);
job.files_scanned = Some(files_scanned);
job.timing = Some(timing.clone());
job.timing = Some(timing);
job.status = status.clone();
job.findings = diags;
job.error = error_str.clone();
@ -590,7 +590,7 @@ handleRequest({ query: { name: '<b>x</b>' } }, { send() {} });
let id = manager
.start_scan(
project_dir.clone(),
project_dir,
test_config(),
tx,
Some(Arc::clone(&pool)),

View file

@ -161,7 +161,7 @@ async fn add_rule(
.or_default();
let new_rule = crate::utils::config::ConfigLabelRule {
matchers: rule.matchers.clone(),
matchers: rule.matchers,
kind: rule_kind,
cap: cap_name,
case_sensitive: false,
@ -242,7 +242,7 @@ async fn add_terminator(
.entry(term.lang.clone())
.or_default();
if !lang_cfg.terminators.contains(&term.name) {
lang_cfg.terminators.push(term.name.clone());
lang_cfg.terminators.push(term.name);
}
}

View file

@ -447,6 +447,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
)],
)
@ -520,6 +521,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
},
false,
false,
@ -544,6 +546,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
},
true,
true,
@ -568,6 +571,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
},
true,
false,
@ -666,6 +670,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
)],
)

View file

@ -149,7 +149,7 @@ async fn overview(State(state): State<AppState>) -> Json<OverviewResponse> {
latest_scan_id,
latest_scan_at,
by_severity: summary.by_severity.clone(),
by_category: summary.by_category.clone(),
by_category: summary.by_category,
by_language,
top_files,
top_directories,

View file

@ -309,13 +309,12 @@ async fn get_scan_findings(
let per_page = query.per_page.unwrap_or(50).min(200);
let start = (page - 1) * per_page;
let scan_root = state.scan_root.clone();
let page_findings: Vec<FindingView> = filtered
.into_iter()
.enumerate()
.skip(start)
.take(per_page)
.map(|(i, d)| models::finding_from_diag_with_context(i, d, &scan_root))
.map(|(i, d)| models::finding_from_diag_with_context(i, d, &state.scan_root))
.collect();
Ok(Json(serde_json::json!({
@ -361,8 +360,6 @@ async fn compare_scans(
.push((i, d));
}
let scan_root = state.scan_root.clone();
let mut new_findings = Vec::new();
let mut fixed_findings = Vec::new();
let mut changed_findings = Vec::new();
@ -378,7 +375,7 @@ async fn compare_scans(
for i in 0..matched {
let (idx, diag) = right_group[i];
let (_, left_diag) = left_group[i];
let view = models::finding_from_diag_with_context(idx, diag, &scan_root);
let view = models::finding_from_diag_with_context(idx, diag, &state.scan_root);
let changes = compute_field_changes(left_diag, diag);
if changes.is_empty() {
unchanged_findings.push(ComparedFinding {
@ -397,7 +394,7 @@ async fn compare_scans(
for &(idx, diag) in &right_group[matched..] {
new_findings.push(ComparedFinding {
fingerprint: fp.clone(),
finding: models::finding_from_diag_with_context(idx, diag, &scan_root),
finding: models::finding_from_diag_with_context(idx, diag, &state.scan_root),
});
}
} else {
@ -405,7 +402,7 @@ async fn compare_scans(
for &(idx, diag) in right_group {
new_findings.push(ComparedFinding {
fingerprint: fp.clone(),
finding: models::finding_from_diag_with_context(idx, diag, &scan_root),
finding: models::finding_from_diag_with_context(idx, diag, &state.scan_root),
});
}
}
@ -419,7 +416,7 @@ async fn compare_scans(
for &(idx, diag) in &left_group[start..] {
fixed_findings.push(ComparedFinding {
fingerprint: fp.clone(),
finding: models::finding_from_diag_with_context(idx, diag, &scan_root),
finding: models::finding_from_diag_with_context(idx, diag, &state.scan_root),
});
}
}

View file

@ -219,6 +219,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
}
}

View file

@ -741,6 +741,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
}
}

View file

@ -217,6 +217,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let (eliminated, copy_map) = copy_propagate(&mut body, &cfg);
@ -300,6 +301,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let (eliminated, copy_map) = copy_propagate(&mut body, &cfg);
@ -372,6 +374,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
(cfg, body)
}
@ -496,6 +499,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let (eliminated, _map) = copy_propagate(&mut body, &cfg);
assert_eq!(eliminated, 0, "two-operand Assign is not a copy");
@ -577,6 +581,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let (eliminated, _) = copy_propagate(&mut body, &cfg);
assert_eq!(eliminated, 1, "v1 should be eliminated");
@ -676,6 +681,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let (eliminated, _map) = copy_propagate(&mut body, &cfg);
assert_eq!(eliminated, 1);
@ -726,6 +732,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let (eliminated, map) = copy_propagate(&mut body, &cfg);
assert_eq!(eliminated, 0);

View file

@ -219,6 +219,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);
@ -269,6 +270,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);
@ -320,6 +322,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);
@ -367,6 +370,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);
@ -406,6 +410,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);
@ -472,6 +477,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);
@ -541,6 +547,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);
@ -603,6 +610,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);
@ -655,6 +663,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);
@ -744,6 +753,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);
@ -823,6 +833,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let removed = eliminate_dead_defs(&mut body, &cfg);

View file

@ -790,6 +790,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let errs = check_structural_invariants(&body);
assert!(
@ -839,6 +840,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let errs = check_structural_invariants(&body);
assert!(
@ -891,6 +893,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let errs = check_structural_invariants(&body);
assert!(
@ -921,6 +924,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let errs = check_structural_invariants(&body);
assert!(

View file

@ -373,6 +373,22 @@ pub struct SsaBody {
/// produced before this field existed.
#[serde(default)]
pub synthetic_externals: HashSet<SsaValue>,
/// SSA values whose [`SsaOp::Assign`] is a slot-scoped binding from a
/// bare-array destructure rewrite (see `bare_array_ops` in
/// [`crate::ssa::lower`]). The Assign transfer arm in
/// [`crate::taint::ssa_transfer`] consults this set to skip the
/// `info.taint.labels` Source pickup that would otherwise bleed the
/// outer destructure node's Source label into the slot-scoped binding.
///
/// Operand union still runs normally, so transitive taint via an
/// inner ident (e.g. `helper(tainted_local)` in slot 1 of
/// `[req.body.other, helper(tainted_local)]`) propagates through the
/// Assign's operands without inheriting the outer-node Source.
///
/// Empty by default; only the per-slot kill arm in the bare-array
/// destructure lowering populates this set.
#[serde(default)]
pub slot_scoped_assigns: HashSet<SsaValue>,
}
impl SsaBody {
@ -581,6 +597,7 @@ mod tests {
field_interner: FieldInterner::new(),
field_writes: HashMap::new(),
synthetic_externals: HashSet::new(),
slot_scoped_assigns: HashSet::new(),
};
let fid = body.intern_field("mu");
body.blocks[0].body.push(SsaInst {

View file

@ -257,6 +257,7 @@ fn lower_to_ssa_inner(
field_interner,
field_writes,
synthetic_externals,
slot_scoped_assigns,
) = rename_variables(
cfg,
&blocks_nodes,
@ -326,6 +327,7 @@ fn lower_to_ssa_inner(
field_interner,
field_writes,
synthetic_externals,
slot_scoped_assigns,
};
// 9. Catch-block reachability invariant.
@ -957,6 +959,7 @@ fn rename_variables(
crate::ssa::ir::FieldInterner,
HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)>,
HashSet<SsaValue>,
HashSet<SsaValue>,
) {
let num_blocks = blocks_nodes.len();
let mut next_value: u32 = 0;
@ -973,6 +976,10 @@ fn rename_variables(
// Populated below at the synthetic-Assign emission site. Read by
// the taint engine to lift the assign into a structural field WRITE.
let mut field_writes: HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)> = HashMap::new();
// SSA values whose `Assign` comes from a bare-array destructure
// slot-scoped kill arm; the taint engine consults this set to skip
// outer-node Source label pickup while still unioning operand taint.
let mut slot_scoped_assigns: HashSet<SsaValue> = HashSet::new();
// Per-variable rename stacks
let mut var_stacks: HashMap<String, Vec<SsaValue>> = HashMap::new();
@ -1041,6 +1048,7 @@ fn rename_variables(
nop_nodes: &HashSet<NodeIndex>,
field_interner: &mut crate::ssa::ir::FieldInterner,
field_writes: &mut HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)>,
slot_scoped_assigns: &mut HashSet<SsaValue>,
) {
let block_id = BlockId(block_idx as u32);
@ -1258,6 +1266,27 @@ fn rename_variables(
} else {
SsaOp::Assign(uses)
}
} else if info.is_await_forward
&& info.call.callee.is_none()
&& !info.taint.uses.is_empty()
{
// `await x` resolves to the same value as `x` — model as a 1:1
// copy so taint, origins, and abstract-domain facts forward
// unchanged. Gated on `callee.is_none()` so an await-wrapped
// call still lowers as a Call op rather than being collapsed
// to Assign (today CFG splits `await foo(x)` into two nodes,
// but the guard keeps the invariant explicit).
let uses: SmallVec<[SsaValue; 4]> = info
.taint
.uses
.iter()
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
.collect();
if uses.is_empty() {
SsaOp::Nop
} else {
SsaOp::Assign(uses)
}
} else if matches!(
info.kind,
StmtKind::Entry
@ -1344,15 +1373,311 @@ fn rename_variables(
cfg_node_map.insert(node, v);
// Clone op for potential extra_defines before moving into SsaInst
let primary_op_for_extras = if info.taint.extra_defines.is_empty() {
// Promise.all-style array-destructure precision: when a CallWrapper
// node binds an array_pattern (`const [a, b] = await Promise.all(
// [x, y])` or `let (a, b) = tokio::join!(x, y)`) and the value is a
// promise combinator that produces an array/tuple of per-element
// results (`Promise.all`, `Promise.allSettled`, `asyncio.gather`,
// `tokio::join!` and friends), rewrite the per-binding SSA so each
// binding sees only its own index's taint instead of the scalar
// union that `try_apply_promise_combinator` would produce.
//
// Two argument shapes are supported:
// (a) literal-array (JS/Python): one positional arg whose
// collected idents represent the array elements in order,
// e.g. `Promise.all([x, y])` → args = [[x, y]].
// (b) positional (Rust macros): N positional args, each one
// ident, e.g. `tokio::join!(x, y)` → args = [[x], [y]].
//
// `Promise.race` and `Promise.resolve` are excluded: the awaited
// value of a race is whichever promise wins (a single value, not
// an array), and destructuring that value index-by-index does not
// correspond to the args.
// The rewrite fires when:
// - the call is a promise combinator that produces an array of
// per-element results (`All` / `AllSettled`), AND
// - the LHS destructures into >= 2 bindings (sequential case
// where `extra_defines` is non-empty), OR
// - the LHS is an array_pattern with at least one skip slot
// (`array_pattern_indices` is non-empty, even if `extra_defines`
// itself is empty — `const [, b]` is a single-binding pattern
// whose index is 1, not 0).
let is_combinator_rewrite_target = matches!(
info.call
.callee
.as_deref()
.and_then(crate::labels::is_any_promise_combinator),
Some(
crate::labels::PromiseCombinatorKind::All
| crate::labels::PromiseCombinatorKind::AllSettled
)
);
// Indices for each binding in source order: primary at index 0,
// then extras. Falls back to sequential 0..N when the AST didn't
// record explicit indices (non-array_pattern destructures and
// tuple_pattern shapes that contain no wildcards).
let binding_indices: SmallVec<[usize; 4]> =
if !info.taint.array_pattern_indices.is_empty() {
info.taint.array_pattern_indices.clone()
} else if !info.taint.extra_defines.is_empty() {
(0..=info.taint.extra_defines.len()).collect()
} else {
SmallVec::new()
};
let promise_destruct_args: Option<SmallVec<[SsaValue; 4]>> =
if is_combinator_rewrite_target && !binding_indices.is_empty() {
let max_index = binding_indices.iter().copied().max().unwrap_or(0);
let needed = max_index + 1;
// Use `info.call.arg_uses` directly rather than the
// build_call_args-derived `args`, which may include an
// implicit "uses not in arg_uses" group appended for chain
// bookkeeping that would inflate the apparent arity.
let arg_uses = &info.call.arg_uses;
let map_idents = |idents: &[String]| -> Option<SmallVec<[SsaValue; 4]>> {
let mapped: SmallVec<[SsaValue; 4]> = idents
.iter()
.take(needed)
.filter_map(|ident| {
var_stacks.get(ident).and_then(|s| s.last().copied())
})
.collect();
if mapped.len() == needed {
Some(mapped)
} else {
None
}
};
if arg_uses.len() == 1 && arg_uses[0].len() >= needed {
// Shape (a): single positional arg whose idents are the
// array elements in source order (`Promise.all([x, y])`,
// `asyncio.gather([x, y])`).
map_idents(&arg_uses[0])
} else if arg_uses.len() >= needed
&& arg_uses.iter().take(needed).all(|g| g.len() == 1)
{
// Shape (b): N positional args, each with one ident
// (`tokio::join!(x, y)`).
let names: Vec<&String> =
arg_uses.iter().take(needed).map(|g| &g[0]).collect();
let mapped: SmallVec<[SsaValue; 4]> = names
.iter()
.filter_map(|ident| {
var_stacks
.get(ident.as_str())
.and_then(|s| s.last().copied())
})
.collect();
if mapped.len() == needed {
Some(mapped)
} else {
None
}
} else {
None
}
} else {
None
};
// Bare-array RHS destructure precision: when the LHS is an
// array_pattern / tuple_pattern / pattern_list / left_assignment_list
// AND the RHS is a bare array-literal, build per-source-position
// ops so each binding sees only its index's element instead of
// the scalar union of every RHS ident.
//
// Three slot shapes are recognised by `collect_rhs_array_literal_elements`:
//
// * `Ident(name)` — bare identifier. Emit `Assign(reaching_def)`.
// * `Literal` — syntactic literal (string/number/etc.). Emit
// `Const(None)` so the binding carries no taint.
// * `Complex(uses)` — call / binary / subscript / member access /
// interpolated string / nested array literal / etc. Emit
// `Assign(union of inner ident reaching defs)` — slot-scoped
// union, not the whole-RHS union the legacy path produced.
// Falls back to `Const(None)` when no inner idents resolve
// (pure literal subexpression like `1 + 2`).
//
// Closes FPs like `const [a, b] = [safe, tainted]; exec(b);`
// (Ident shape) and `const [c, d] = [fn(req.x), 'lit']; exec(d);`
// (Complex shape) where the legacy union painted the safe binding.
//
// The promise-combinator path above has already populated
// `promise_destruct_args` when its preconditions held, so the
// mutual exclusion is gated through `promise_destruct_args.is_none()`
// rather than `info.call.callee.is_none()`. The earlier
// callee-none gate was wrong because the outer
// variable_declarator node picks up `info.call.callee` whenever
// the RHS text matches a Source label — which is exactly the
// case where we need the per-slot rewrite most.
// The outer node may carry a `DataLabel::Source(_)` whose
// classification matched somewhere in the RHS expression text
// (`req.body.cmd`, `process.env.X`, etc.). For multi-slot
// RHS we can't statically partition WHICH slot caused that
// match, but it must originate from a Complex slot (Literal
// and bare-Ident slots whose names resolve through
// `var_stacks` carry their own SsaValue identity). Treat
// Complex slots as Source-emitting when the outer label set
// included Source — strict precision improvement over the
// legacy union path which painted EVERY slot, including
// Literal, with the outer Source.
let outer_is_source = info
.taint
.labels
.iter()
.any(|l| matches!(l, crate::labels::DataLabel::Source(_)));
// Per-slot Source classification (see `RhsArraySlot::Complex.source_cap`):
// when at least one Complex slot's own subtree classified as
// Source, we know which slot(s) carried the source pattern, so
// sibling Complex slots without their own source_cap stay
// slot-scoped (Assign / Const). Otherwise (the outer node
// matched but no per-slot classifier fired — typical of subscript
// chains and other shapes whose source flows via reaching-def
// rather than static text), fall back to the conservative
// "all-Complex-are-Source" emission for legacy preservation.
use crate::cfg::RhsArraySlot;
let any_slot_has_source_cap = info.taint.rhs_array_elements.iter().any(|s| {
matches!(
s,
RhsArraySlot::Complex { source_cap, .. }
if !source_cap.is_empty()
)
});
let effective_outer_fallback = outer_is_source && !any_slot_has_source_cap;
let bare_array_ops: Option<(SmallVec<[SsaOp; 4]>, SmallVec<[bool; 4]>)> =
if !info.taint.rhs_array_elements.is_empty()
&& !binding_indices.is_empty()
&& promise_destruct_args.is_none()
{
let max_index = binding_indices.iter().copied().max().unwrap_or(0);
let needed = max_index + 1;
if info.taint.rhs_array_elements.len() < needed {
None
} else {
let mut per_pos: SmallVec<[SsaOp; 4]> = SmallVec::new();
let mut slot_scoped_mask: SmallVec<[bool; 4]> = SmallVec::new();
let mut bail = false;
for slot in info.taint.rhs_array_elements.iter().take(needed) {
let mut is_slot_scoped = false;
let slot_op = match slot {
RhsArraySlot::Ident(ident) => {
match var_stacks
.get(ident.as_str())
.and_then(|s| s.last().copied())
{
Some(sv) => SsaOp::Assign(SmallVec::from_elem(sv, 1)),
None => {
bail = true;
break;
}
}
}
RhsArraySlot::Literal => SsaOp::Const(None),
RhsArraySlot::Complex {
uses: inner_uses,
source_cap,
} => {
let mut mapped: SmallVec<[SsaValue; 4]> = SmallVec::new();
for ident in inner_uses.iter() {
if let Some(sv) = var_stacks
.get(ident.as_str())
.and_then(|s| s.last().copied())
{
if !mapped.contains(&sv) {
mapped.push(sv);
}
}
}
if !source_cap.is_empty() {
// Per-slot classification found a Source
// pattern (e.g. `req.body.cmd`) inside
// THIS slot's subtree. Emit Source so the
// binding inherits the outer-node Source
// caps for this slot's index.
SsaOp::Source
} else if outer_is_source && any_slot_has_source_cap {
// Some OTHER slot's subtree classified as
// Source; this slot did NOT. Emit
// Assign(mapped) and mark the slot as
// slot-scoped so the taint transfer's
// Assign arm skips outer-node Source
// label pickup for this binding (without
// losing transitive taint through inner
// uses). When `mapped` is empty, fall
// back to Const(None) — the binding
// carries no taint anyway.
if mapped.is_empty() {
SsaOp::Const(None)
} else {
is_slot_scoped = true;
SsaOp::Assign(mapped.clone())
}
} else if effective_outer_fallback {
// Outer-node Source label but no
// per-slot classifier fired on any slot
// (typical of subscript-on-tainted-local
// shapes). Preserve legacy conservative
// emission for unrecognised shapes.
SsaOp::Source
} else if mapped.is_empty() {
SsaOp::Const(None)
} else {
SsaOp::Assign(mapped)
}
}
};
per_pos.push(slot_op);
slot_scoped_mask.push(is_slot_scoped);
}
if bail {
None
} else {
Some((per_pos, slot_scoped_mask))
}
}
} else {
None
};
// Clone op for potential extra_defines before moving into SsaInst.
// For the destructure-promise / bare-array rewrites, the
// per-extra ops are built explicitly below, so the shared clone
// path is bypassed.
let primary_op_for_extras = if info.taint.extra_defines.is_empty()
|| promise_destruct_args.is_some()
|| bare_array_ops.is_some()
{
None
} else {
Some(op.clone())
};
// Override primary op to single-operand Assign when the
// destructure-promise rewrite fires. The primary's source-order
// index is `binding_indices[0]` — non-zero for skip-leading
// patterns like `const [, b]` where `b` is the FIRST (and only)
// binding but lives at pattern position 1.
let primary_op = if let Some(ref args) = promise_destruct_args {
let primary_idx = binding_indices.first().copied().unwrap_or(0);
let pick = args.get(primary_idx).copied().unwrap_or(args[0]);
SsaOp::Assign(SmallVec::from_elem(pick, 1))
} else if let Some((ref per_pos, ref slot_scoped_mask)) = bare_array_ops {
let primary_idx = binding_indices.first().copied().unwrap_or(0);
if slot_scoped_mask.get(primary_idx).copied().unwrap_or(false) {
slot_scoped_assigns.insert(v);
}
per_pos
.get(primary_idx)
.cloned()
.unwrap_or(SsaOp::Const(None))
} else {
op
};
ssa_blocks[block_idx].body.push(SsaInst {
value: v,
op,
op: primary_op,
cfg_node: node,
var_name: var_name_for_ssa.clone(),
span: info.ast.span,
@ -1423,7 +1748,66 @@ fn rename_variables(
// Emit extra SSA instructions for destructuring bindings.
// Each extra define inherits the same op (Source/Call/Assign) as the primary.
if let Some(ref primary_op) = primary_op_for_extras {
//
// For the destructure-promise rewrite, each extra emits an Assign
// on its corresponding indexed argument so per-element taint is
// preserved instead of the scalar union. The source-order index
// for `extra_defines[i]` is `binding_indices[i + 1]` — accounts
// for skip slots like `const [a, , b]` where `b` sits at index 2,
// not at index 1.
if let Some(ref pd_args) = promise_destruct_args {
for (i, extra_def) in info.taint.extra_defines.iter().enumerate() {
let ev = SsaValue(*next_value);
*next_value += 1;
value_defs.push(ValueDef {
var_name: Some(extra_def.clone()),
cfg_node: node,
block: block_id,
});
var_stacks.entry(extra_def.clone()).or_default().push(ev);
let extra_idx = binding_indices.get(i + 1).copied().unwrap_or(i + 1);
let arg = pd_args.get(extra_idx).copied().unwrap_or(pd_args[0]);
ssa_blocks[block_idx].body.push(SsaInst {
value: ev,
op: SsaOp::Assign(SmallVec::from_elem(arg, 1)),
cfg_node: node,
var_name: Some(extra_def.clone()),
span: info.ast.span,
});
}
} else if let Some((ref per_pos, ref slot_scoped_mask)) = bare_array_ops {
// Bare-array RHS destructure: each extra emits the op for its
// source-order RHS position. Ident slots emit Assign of the
// ident's reaching SSA value; literal slots emit Const(None).
// Slot-scoped Assigns are registered in
// `slot_scoped_assigns` so the taint transfer skips
// outer-node Source pickup for those bindings.
for (i, extra_def) in info.taint.extra_defines.iter().enumerate() {
let ev = SsaValue(*next_value);
*next_value += 1;
value_defs.push(ValueDef {
var_name: Some(extra_def.clone()),
cfg_node: node,
block: block_id,
});
var_stacks.entry(extra_def.clone()).or_default().push(ev);
let extra_idx = binding_indices.get(i + 1).copied().unwrap_or(i + 1);
let op_for_extra = per_pos
.get(extra_idx)
.cloned()
.unwrap_or(SsaOp::Const(None));
if slot_scoped_mask.get(extra_idx).copied().unwrap_or(false) {
slot_scoped_assigns.insert(ev);
}
ssa_blocks[block_idx].body.push(SsaInst {
value: ev,
op: op_for_extra,
cfg_node: node,
var_name: Some(extra_def.clone()),
span: info.ast.span,
});
}
} else if let Some(ref primary_op) = primary_op_for_extras {
for extra_def in &info.taint.extra_defines {
let ev = SsaValue(*next_value);
*next_value += 1;
@ -1685,6 +2069,7 @@ fn rename_variables(
nop_nodes,
field_interner,
field_writes,
slot_scoped_assigns,
);
}
@ -1802,6 +2187,7 @@ fn rename_variables(
nop_nodes,
&mut field_interner,
&mut field_writes,
&mut slot_scoped_assigns,
);
// Process orphan blocks (e.g. catch blocks disconnected after exception edge removal).
@ -1843,6 +2229,7 @@ fn rename_variables(
nop_nodes,
&mut field_interner,
&mut field_writes,
&mut slot_scoped_assigns,
);
}
}
@ -1855,6 +2242,7 @@ fn rename_variables(
field_interner,
field_writes,
synthetic_externals,
slot_scoped_assigns,
)
}

View file

@ -419,6 +419,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
}
}

View file

@ -442,6 +442,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let cfg: Cfg = Graph::new();
let const_values = HashMap::new();

File diff suppressed because it is too large Load diff

View file

@ -298,6 +298,16 @@ pub fn build_resource_method_summaries(
) {
continue;
}
// Skip acquires whose lifetime is bounded by a managed cleanup
// scope (Python `with`, Java try-with-resources, Ruby
// File.open-with-block, Rust RAII). The acquired handle is
// released before the method returns, so propagating an
// Acquire effect onto the caller's receiver creates an FP
// class where callers of `def foo(self): with open(...): ...`
// are flagged as leaking the receiver.
if info.managed_resource {
continue;
}
let callee = match &info.call.callee {
Some(c) => c.to_ascii_lowercase(),
None => continue,
@ -308,6 +318,20 @@ pub fn build_resource_method_summaries(
.iter()
.any(|a| transfer::callee_matches_pub(&callee, a))
{
// The receiver-proxy mechanism (state/transfer.rs)
// matches a method-name summary against `recv.method()`
// call sites and marks the receiver as OPEN. This is
// only meaningful when the acquire actually binds a
// resource into receiver state (`self.fd = open(...)`,
// `this.fd = fs.openSync(...)`). Acquires with no
// binding (`return open(...)`) or with a local-only
// binding (`f = open(...); f.close()`) do not transfer
// ownership onto the caller's receiver. Gate the
// summary on a defines field so anonymous and local-
// only acquires no longer leak through this path.
if info.taint.defines.is_none() {
continue;
}
summaries.push(transfer::ResourceMethodSummary {
method_name: method_name.clone(),
effect: transfer::ResourceEffect::Acquire,

View file

@ -33,6 +33,20 @@ use std::hash::{Hash, Hasher};
/// Pairs a [`Cap`] with the source location of the consuming
/// instruction so cross-file findings can attribute to the callee
/// rather than the caller call-site.
///
/// `from_chain` distinguishes two flavours of recorded site:
/// * `false`, the site was resolved via the body-local locator span,
/// i.e. it points at a sink instruction in the function's own body.
/// * `true`, the site was promoted from a deeper callee through
/// `event.primary_sink_site`, i.e. this function's summary carries
/// a chain-hop marker for a sink several frames down.
///
/// Pass-2 emission gates promotion of a site into `Finding.primary_location`
/// on `from_chain || file_rel != caller_file_rel`: same-file single-hop
/// helpers keep call-site emission (matching benchmark and real-world
/// fixture calibration), multi-hop chains and cross-file callees surface
/// the deep sink line. See "Multi-hop intra-file sink attribution gap"
/// in deferred.md for the design tradeoff.
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
pub struct SinkSite {
#[serde(default, skip_serializing_if = "String::is_empty")]
@ -44,11 +58,18 @@ pub struct SinkSite {
#[serde(default, skip_serializing_if = "String::is_empty")]
pub snippet: String,
pub cap: Cap,
/// True when this site was promoted from a deeper callee's summary
/// (`event.primary_sink_site` chain-hop), false when recorded from
/// the function's own locator span. See struct docs.
#[serde(default, skip_serializing_if = "is_false")]
pub from_chain: bool,
}
impl SinkSite {
/// Dedup key: two sites with the same `(file_rel, line, col, cap)`
/// describe the same consumption and collapse on merge.
/// describe the same consumption and collapse on merge. `from_chain`
/// is intentionally excluded, the upgrade rule in [`union_sink_sites`]
/// takes over when two sites with different `from_chain` collide.
pub(crate) fn dedup_key(&self) -> (&str, u32, u32, u32) {
(self.file_rel.as_str(), self.line, self.col, self.cap.bits())
}
@ -62,10 +83,15 @@ impl SinkSite {
col: 0,
snippet: String::new(),
cap,
from_chain: false,
}
}
}
fn is_false(b: &bool) -> bool {
!*b
}
/// Tree/bytes context for resolving a CFG span to a [`SinkSite`].
/// Threaded as `Option<&Locator>` so extraction paths without tree
/// access can pass `None` cheaply.
@ -93,6 +119,7 @@ impl<'a> SinkSiteLocator<'a> {
col: (point.column + 1) as u32,
snippet,
cap,
from_chain: false,
}
}
}
@ -101,11 +128,17 @@ pub(crate) use crate::utils::snippet::line_snippet;
/// Union two `SmallVec<[SinkSite; 1]>` lists with `(file_rel, line, col,
/// cap)` dedup. Preserves insertion order of `existing` then appends any
/// new sites from `incoming` not already present.
/// new sites from `incoming` not already present. When two sites with the
/// same dedup key collide, `from_chain=true` wins, so a chain-hop marker is
/// never lost when a same-file locator span happens to share coordinates.
pub(crate) fn union_sink_sites(existing: &mut SmallVec<[SinkSite; 1]>, incoming: &[SinkSite]) {
for site in incoming {
let key = site.dedup_key();
if !existing.iter().any(|s| s.dedup_key() == key) {
if let Some(ex) = existing.iter_mut().find(|s| s.dedup_key() == key) {
if site.from_chain && !ex.from_chain {
ex.from_chain = true;
}
} else {
existing.push(site.clone());
}
}
@ -388,6 +421,16 @@ pub struct FuncSummary {
/// [`crate::callgraph::TypeHierarchyIndex`].
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub hierarchy_edges: Vec<(String, String)>,
/// Phase-10 Next.js entry-point classification. When `Some(_)`,
/// the function is treated as an externally-driven entry point
/// whose parameters are seeded as `TaintOrigin::Source` at SSA
/// entry, mirroring the way an HTTP request handler's formals are
/// adversary-controlled by default. `None` for ordinary
/// helpers — pass-2 keeps its existing baseline-subtraction
/// semantics.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub entry_kind: Option<crate::entry_points::EntryKind>,
}
// ── Cap conversion helpers ──────────────────────────────────────────────
@ -428,6 +471,35 @@ impl FuncSummary {
kind: self.kind,
}
}
/// Phase-04 [`FuncKey`] builder that consults a project-wide
/// [`crate::resolve::ModuleGraph`].
///
/// When the file producing this summary lies inside a discovered
/// package, `namespace` becomes `"@scope/name::src/file.ts"`;
/// otherwise the result matches [`Self::func_key`] exactly.
/// Phase 04 only adds the helper, no resolution call site uses
/// it. Phase 10 switches the JS/TS pass-1 path to call this
/// instead of [`Self::func_key`].
pub fn func_key_with_resolver(
&self,
scan_root: Option<&str>,
module_graph: Option<&crate::resolve::ModuleGraph>,
) -> FuncKey {
FuncKey {
lang: Lang::from_slug(&self.lang).unwrap_or(Lang::Rust),
namespace: crate::symbol::namespace_with_package(
&self.file_path,
scan_root,
module_graph,
),
container: self.container.clone(),
name: self.name.clone(),
arity: Some(self.param_count),
disambig: self.disambig,
kind: self.kind,
}
}
}
// ── Callee resolution ────────────────────────────────────────────────────
@ -543,6 +615,26 @@ pub struct GlobalSummaries {
/// Precise SSA-derived per-parameter summaries, keyed by `FuncKey`.
/// These take precedence over `FuncSummary` during callee resolution.
ssa_by_key: HashMap<FuncKey, SsaFuncSummary>,
/// Sibling index over [`Self::ssa_by_key`] keyed by
/// `(lang, namespace, name)`. Populated in lockstep with `ssa_by_key`
/// (every `insert_ssa` / `merge` adds the key). Used by the
/// cross-package SSA resolution path (step 0.7 in
/// `taint::ssa_transfer::resolve_callee`) to avoid an
/// `O(|ssa_by_key|)` linear scan per cross-package call site:
/// the resolver looks up the candidate `Vec<FuncKey>` and narrows
/// to a single hit by container / arity / disambig. Strictly
/// additive: when the index is empty (e.g. tests that never insert
/// SSA summaries) the resolver falls back to its existing flat
/// paths.
///
/// Note: SSA summaries are append-only on `GlobalSummaries` (no
/// remove/clear methods), so the index never needs invalidation.
/// Synthetic-disambig probing in
/// [`Self::reconcile_ssa_summary_key`] only mutates the inserted
/// key's `disambig` field, never the `(lang, namespace, name)`
/// triple, so the index value still points at every relevant
/// `FuncKey` after reconciliation.
ssa_by_lang_ns_name: HashMap<(Lang, String, String), Vec<FuncKey>>,
/// Cross-file callee bodies for interprocedural symbolic execution.
/// Keyed by `FuncKey` (same identity model as SSA summaries).
bodies_by_key: HashMap<FuncKey, crate::taint::ssa_transfer::CalleeSsaBody>,
@ -564,6 +656,16 @@ pub struct GlobalSummaries {
/// execution-API auth-recognition gap on routes attached to bare
/// child routers.
router_facts_by_module: HashMap<String, crate::auth_analysis::router_facts::PerFileRouterFacts>,
/// Per-file Phase-09 cross-package import maps, keyed by file
/// namespace (scan-root-relative path, the same form
/// [`FuncKey::namespace`] uses). Populated in pass 1 from each
/// file's [`crate::cfg::FileCfg::resolved_imports`] and consumed by
/// `inline_analyse_callee` when the inlined callee body's own
/// `cross_package_imports` Arc is empty (i.e. the body was loaded
/// from SQLite, where the field is `#[serde(skip)]`). Closes the
/// indexed-mode parity gap on transitive cross-package IPA inside
/// inlined frames.
cross_package_imports_by_namespace: HashMap<String, std::sync::Arc<HashMap<String, FuncKey>>>,
/// Type hierarchy index for runtime virtual-dispatch fan-out.
///
/// Installed by [`Self::install_hierarchy`] after pass 1 from the
@ -864,6 +966,7 @@ impl GlobalSummaries {
}
// SSA summaries: last-writer-wins (exact-key replacement, no unioning)
for (key, ssa_sum) in other.ssa_by_key {
self.index_ssa_key(&key);
self.ssa_by_key.insert(key, ssa_sum);
}
// Cross-file bodies: last-writer-wins
@ -879,6 +982,10 @@ impl GlobalSummaries {
for (module_id, facts) in other.router_facts_by_module {
self.router_facts_by_module.insert(module_id, facts);
}
// Cross-package imports: last-writer-wins per namespace.
for (ns, map) in other.cross_package_imports_by_namespace {
self.cross_package_imports_by_namespace.insert(ns, map);
}
// Hierarchy index: invalidate after a merge so the next consumer
// sees a freshly-built view that includes `other`'s edges. The
// alternative, point-merging two indexes, is racy when the
@ -966,9 +1073,41 @@ impl GlobalSummaries {
} else {
self.reconcile_ssa_summary_key(key, &summary)
};
self.index_ssa_key(&key);
self.ssa_by_key.insert(key, summary);
}
/// Push `key` onto the secondary `(lang, namespace, name)` index.
/// Idempotent: a re-insert at the same triple does not duplicate
/// the key in the candidate vector.
fn index_ssa_key(&mut self, key: &FuncKey) {
let triple = (key.lang, key.namespace.clone(), key.name.clone());
let bucket = self.ssa_by_lang_ns_name.entry(triple).or_default();
if !bucket.contains(key) {
bucket.push(key.clone());
}
}
/// Look up SSA summary `FuncKey`s by `(lang, namespace, name)`.
/// Returns `&[]` when no SSA summary at that triple has been
/// stored. Used by the cross-package resolution path so the
/// step-0.7 narrowing can iterate only the candidate set rather
/// than every persisted SSA key.
pub fn ssa_keys_by_qualified(&self, lang: Lang, namespace: &str, name: &str) -> &[FuncKey] {
// Borrow against (Lang, &str, &str) avoiding allocation by
// looking up with a tuple of owned Strings only when present.
// HashMap requires equivalent hash; (Lang, String, String)
// hashes the same as the equivalent tuple of equivalent
// values, so we construct a small owned key for the probe.
// Profile-light: this runs once per cross-package callee and
// both string clones are short (namespace path + leaf name).
let probe = (lang, namespace.to_string(), name.to_string());
self.ssa_by_lang_ns_name
.get(&probe)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
/// Exact lookup of an SSA summary by fully-qualified key.
pub fn get_ssa(&self, key: &FuncKey) -> Option<&SsaFuncSummary> {
self.ssa_by_key.get(key)
@ -1088,6 +1227,38 @@ impl GlobalSummaries {
self.router_facts_by_module.len()
}
/// Insert a per-file Phase-09 cross-package import map. Last-writer-wins
/// per namespace key — re-analysing a file produces a fresh snapshot
/// of its `(local_name → FuncKey)` resolutions.
pub fn insert_cross_package_imports(
&mut self,
namespace: String,
map: std::sync::Arc<HashMap<String, FuncKey>>,
) {
if map.is_empty() {
return;
}
self.cross_package_imports_by_namespace
.insert(namespace, map);
}
/// Look up a per-file cross-package import map by file namespace.
/// Used by [`crate::taint::ssa_transfer`]'s inline-analysis frame to
/// recover the callee body's own import view when the body was loaded
/// from SQLite (where the Arc on `CalleeSsaBody` is stripped by
/// `#[serde(skip)]`).
pub fn get_cross_package_imports(
&self,
namespace: &str,
) -> Option<&std::sync::Arc<HashMap<String, FuncKey>>> {
self.cross_package_imports_by_namespace.get(namespace)
}
/// Count of files that contributed cross-package import maps.
pub fn cross_package_imports_len(&self) -> usize {
self.cross_package_imports_by_namespace.len()
}
/// Insert a cross-file callee body.
///
/// See [`insert_ssa`](Self::insert_ssa) for the identity-safety rule.
@ -1149,8 +1320,10 @@ impl GlobalSummaries {
pub fn is_empty(&self) -> bool {
self.by_key.is_empty()
&& self.ssa_by_key.is_empty()
&& self.ssa_by_lang_ns_name.is_empty()
&& self.auth_by_key.is_empty()
&& self.router_facts_by_module.is_empty()
&& self.cross_package_imports_by_namespace.is_empty()
}
/// Iterate over all (key, summary) pairs.
@ -1683,6 +1856,10 @@ impl std::fmt::Debug for GlobalSummaries {
.field("bodies_len", &self.bodies_by_key.len())
.field("auth_len", &self.auth_by_key.len())
.field("router_facts_len", &self.router_facts_by_module.len())
.field(
"cross_package_imports_len",
&self.cross_package_imports_by_namespace.len(),
)
.finish()
}
}

View file

@ -347,6 +347,14 @@ pub struct SsaFuncSummary {
/// on both vulnerable and patched code.
#[serde(default, skip_serializing_if = "SmallVec::is_empty")]
pub validated_params_to_return: SmallVec<[usize; 2]>,
/// Phase-10 Next.js entry-point classification. Mirrors
/// [`crate::summary::FuncSummary::entry_kind`] — recorded on the
/// SSA summary so cross-file consumers don't have to consult the
/// coarse `FuncSummary` to know whether the callee is an entry
/// point. `None` for ordinary helpers.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub entry_kind: Option<crate::entry_points::EntryKind>,
}
/// A per-return-path [`PathFact`] entry.

View file

@ -530,6 +530,7 @@ fn ssa_summary_serde_round_trip_identity() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
let json = serde_json::to_string(&summary).unwrap();
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
@ -564,6 +565,7 @@ fn ssa_summary_serde_round_trip_strip_bits() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
let json = serde_json::to_string(&summary).unwrap();
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
@ -595,6 +597,7 @@ fn ssa_summary_serde_round_trip_add_bits() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
let json = serde_json::to_string(&summary).unwrap();
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
@ -633,6 +636,7 @@ fn ssa_summary_serde_round_trip_all_variants() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
let json = serde_json::to_string(&summary).unwrap();
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
@ -673,6 +677,7 @@ fn global_summaries_insert_ssa_exact_key_replacement() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
gs.insert_ssa(key.clone(), v1.clone());
assert_eq!(gs.get_ssa(&key), Some(&v1));
@ -701,6 +706,7 @@ fn global_summaries_insert_ssa_exact_key_replacement() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
gs.insert_ssa(key.clone(), v2.clone());
assert_eq!(gs.get_ssa(&key), Some(&v2));
@ -749,6 +755,7 @@ fn global_summaries_merge_with_ssa_entries() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
let sum_b = SsaFuncSummary {
param_to_return: vec![],
@ -773,6 +780,7 @@ fn global_summaries_merge_with_ssa_entries() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
gs1.insert_ssa(key_a.clone(), sum_a.clone());
@ -821,6 +829,7 @@ fn global_summaries_is_empty_considers_ssa() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
@ -852,6 +861,7 @@ fn ssa_summary_serde_round_trip_param_to_sink_param() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
let json = serde_json::to_string(&summary).unwrap();
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
@ -898,6 +908,7 @@ fn ssa_summary_serde_round_trip_container_fields() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
let json = serde_json::to_string(&summary).unwrap();
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
@ -954,6 +965,7 @@ fn ssa_summary_serde_round_trip_return_abstract() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
let json = serde_json::to_string(&summary).unwrap();
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
@ -1029,6 +1041,7 @@ fn make_callee_body(
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
},
opt: crate::ssa::OptimizeResult {
const_values: std::collections::HashMap::new(),
@ -1047,6 +1060,7 @@ fn make_callee_body(
param_count,
node_meta: std::collections::HashMap::new(),
body_graph: None,
cross_package_imports: std::sync::Arc::new(std::collections::HashMap::new()),
}
}
@ -1478,6 +1492,7 @@ fn global_summaries_resolve_body_requires_body_present() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
// Don't insert body
@ -3415,6 +3430,7 @@ fn sink_site_serde_round_trip_solo() {
col: 9,
snippet: "Command::new(\"sh\").arg(cmd).status()".into(),
cap: Cap::CODE_EXEC | Cap::SHELL_ESCAPE,
from_chain: false,
};
let json = serde_json::to_string(&site).unwrap();
let back: SinkSite = serde_json::from_str(&json).unwrap();
@ -3446,6 +3462,7 @@ fn ssa_summary_serde_round_trip_with_sink_sites() {
col: 4,
snippet: "cursor.execute(sql)".into(),
cap: Cap::SQL_QUERY,
from_chain: false,
};
let site_b = SinkSite {
file_rel: "exec.py".into(),
@ -3453,6 +3470,7 @@ fn ssa_summary_serde_round_trip_with_sink_sites() {
col: 12,
snippet: "subprocess.call(cmd, shell=True)".into(),
cap: Cap::CODE_EXEC | Cap::SHELL_ESCAPE,
from_chain: false,
};
let summary = SsaFuncSummary {
param_to_return: vec![(0, TaintTransform::Identity)],
@ -3526,6 +3544,7 @@ fn merge_unions_sink_sites_with_dedup() {
col: 1,
snippet: "execute(sql)".into(),
cap: Cap::SQL_QUERY,
from_chain: false,
};
let site_b = SinkSite {
file_rel: "svc.py".into(),
@ -3533,6 +3552,7 @@ fn merge_unions_sink_sites_with_dedup() {
col: 4,
snippet: "os.system(cmd)".into(),
cap: Cap::CODE_EXEC,
from_chain: false,
};
let mut left = FuncSummary {
@ -3623,6 +3643,7 @@ fn cf4_return_path_transform_serde_round_trip() {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
};
let json = serde_json::to_string(&summary).unwrap();
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
@ -4459,3 +4480,95 @@ mod hierarchy_widened_tests {
assert!(post_merge_reinstalled.contains(&k_sub));
}
}
#[test]
fn cross_package_imports_round_trip_via_global_summaries() {
use crate::symbol::{FuncKey, FuncKind, Lang};
let mut gs = GlobalSummaries::new();
let mut map: std::collections::HashMap<String, FuncKey> = std::collections::HashMap::new();
map.insert(
"escape".to_string(),
FuncKey {
lang: Lang::TypeScript,
namespace: "packages/util/src/escape.ts".to_string(),
container: String::new(),
name: "escape".to_string(),
arity: None,
disambig: None,
kind: FuncKind::Function,
},
);
let arc = std::sync::Arc::new(map);
gs.insert_cross_package_imports("apps/api/handler.ts".to_string(), arc.clone());
assert_eq!(gs.cross_package_imports_len(), 1);
let looked_up = gs
.get_cross_package_imports("apps/api/handler.ts")
.expect("namespace lookup must hit");
assert_eq!(looked_up.len(), 1);
assert!(looked_up.contains_key("escape"));
assert!(gs.get_cross_package_imports("missing").is_none());
// Inserting an empty map is a no-op so the index does not get
// polluted with bookkeeping rows when a file's resolver produces
// no resolved bindings.
gs.insert_cross_package_imports(
"apps/api/no_imports.ts".to_string(),
std::sync::Arc::new(std::collections::HashMap::new()),
);
assert_eq!(gs.cross_package_imports_len(), 1);
}
#[test]
fn cross_package_imports_merged_across_thread_local_summaries() {
use crate::symbol::{FuncKey, FuncKind, Lang};
let mut gs_a = GlobalSummaries::new();
let mut map_a: std::collections::HashMap<String, FuncKey> = std::collections::HashMap::new();
map_a.insert(
"escape".to_string(),
FuncKey {
lang: Lang::TypeScript,
namespace: "packages/util/src/escape.ts".to_string(),
container: String::new(),
name: "escape".to_string(),
arity: None,
disambig: None,
kind: FuncKind::Function,
},
);
gs_a.insert_cross_package_imports(
"apps/api/handler_a.ts".to_string(),
std::sync::Arc::new(map_a),
);
let mut gs_b = GlobalSummaries::new();
let mut map_b: std::collections::HashMap<String, FuncKey> = std::collections::HashMap::new();
map_b.insert(
"format".to_string(),
FuncKey {
lang: Lang::TypeScript,
namespace: "packages/util/src/format.ts".to_string(),
container: String::new(),
name: "format".to_string(),
arity: None,
disambig: None,
kind: FuncKind::Function,
},
);
gs_b.insert_cross_package_imports(
"apps/api/handler_b.ts".to_string(),
std::sync::Arc::new(map_b),
);
gs_a.merge(gs_b);
assert_eq!(gs_a.cross_package_imports_len(), 2);
assert!(
gs_a.get_cross_package_imports("apps/api/handler_a.ts")
.is_some()
);
assert!(
gs_a.get_cross_package_imports("apps/api/handler_b.ts")
.is_some()
);
}

View file

@ -262,5 +262,31 @@ pub fn normalize_namespace(abs_path: &str, root: Option<&str>) -> String {
abs_path.to_string()
}
/// Phase-04 namespace builder that prefixes a project-relative path with
/// the canonical package name when the importer file lies inside a
/// resolved [`crate::resolve::PackageEntry`].
///
/// Returns `"@scope/name::src/file.ts"` when the file is in a package
/// and `"src/file.ts"` (the same value `normalize_namespace` produces)
/// otherwise. Phase 04 ships this helper unused at the resolution
/// site, phase 10 will route [`FuncKey`] construction through it for
/// JS/TS files so cross-file callee lookup honours the package
/// boundary.
pub fn namespace_with_package(
abs_path: &str,
root: Option<&str>,
module_graph: Option<&crate::resolve::ModuleGraph>,
) -> String {
let plain = normalize_namespace(abs_path, root);
let Some(graph) = module_graph else {
return plain;
};
let path = std::path::Path::new(abs_path);
match graph.package_for(path) {
Some(pkg) => format!("{}::{}", pkg.name, plain),
None => plain,
}
}
#[cfg(test)]
mod tests;

View file

@ -1384,6 +1384,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let empty_succs = HashMap::new();
@ -1445,6 +1446,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let empty_succs = HashMap::new();
@ -1579,6 +1581,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let finding = make_finding(n0, n1);
@ -1688,6 +1691,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
// Finding path goes through B0 → B1 → B3
@ -1836,6 +1840,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let finding = Finding {
@ -1950,6 +1955,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let mut exc_succs: HashMap<BlockId, SmallVec<[BlockId; 2]>> = HashMap::new();
@ -2018,6 +2024,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let mut exc_succs: HashMap<BlockId, SmallVec<[BlockId; 2]>> = HashMap::new();
@ -2127,6 +2134,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let finding = Finding {

View file

@ -391,6 +391,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -438,6 +439,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -521,6 +523,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -585,6 +588,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -667,6 +671,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -740,6 +745,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -776,6 +782,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -834,6 +841,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -916,6 +924,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -996,6 +1005,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -1033,6 +1043,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);

View file

@ -381,6 +381,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let finding = Finding {
@ -456,6 +457,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let finding = Finding {
@ -560,6 +562,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let ctx = SymexContext {
@ -622,6 +625,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let ctx = SymexContext {

View file

@ -355,6 +355,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let witness = state.get_sink_witness(&finding, &ssa);
@ -397,6 +398,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
assert_eq!(state.get_sink_witness(&finding, &ssa), None);
@ -436,6 +438,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
assert_eq!(state.get_sink_witness(&finding, &ssa), None);
@ -478,6 +481,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
state.widen_at_loop_head(BlockId(0), &ssa);
@ -523,6 +527,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
state.widen_at_loop_head(BlockId(0), &ssa);
@ -568,6 +573,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
state.widen_at_loop_head(BlockId(0), &ssa);

View file

@ -1014,6 +1014,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
}
}
@ -1595,6 +1596,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
let ctx = make_summary_ctx(&gs);
@ -1665,6 +1667,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
let ctx = make_summary_ctx(&gs);
@ -1735,6 +1738,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
let ctx = make_summary_ctx(&gs);
@ -1800,6 +1804,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
let ctx = make_summary_ctx(&gs);
@ -1865,6 +1870,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
let ctx = make_summary_ctx(&gs);
@ -2064,6 +2070,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
@ -2144,6 +2151,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
@ -2225,6 +2233,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
// Second "send", in ns B, also with same arity → ambiguous bare-name
@ -2256,6 +2265,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
// Also register the type-qualified name so Attempt 1 can find it
@ -2287,6 +2297,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
@ -2367,6 +2378,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
@ -2449,6 +2461,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
insert_java_summary(
@ -2479,6 +2492,7 @@ mod tests {
typed_call_receivers: vec![],
validated_params_to_return: smallvec::SmallVec::new(),
param_to_gate_filters: vec![],
entry_kind: None,
},
);
// No "HttpClient.send" summary registered, disambiguation has 0 exact matches

View file

@ -797,6 +797,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let finding = Finding {
@ -854,6 +855,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let cfg = Cfg::new();
let finding = Finding {
@ -917,6 +919,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let finding = Finding {
@ -981,6 +984,7 @@ mod tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let finding = Finding {

View file

@ -753,6 +753,7 @@ mod tests {
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
(ssa, cfg)
@ -843,6 +844,7 @@ mod tests {
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let demand = DemandState::new(Cap::all());
let (step, next) = backward_transfer(&ssa, SsaValue(0), &demand);
@ -876,6 +878,7 @@ mod tests {
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let demand = DemandState::new(Cap::all());
let (step, _next) = backward_transfer(&ssa, SsaValue(0), &demand);
@ -964,6 +967,7 @@ mod tests {
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let demand = DemandState::new(Cap::all());
@ -1053,6 +1057,7 @@ mod tests {
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let ctx = BackwardsCtx::new(&ssa, &cfg, Lang::JavaScript);

View file

@ -403,6 +403,86 @@ fn compute_module_aliases_for_summary(
crate::ssa::const_prop::collect_module_aliases(ssa, &cp.values)
}
/// Build a per-file cross-package import lookup for Phase 09 cross-file IPA.
///
/// For each [`crate::resolve::ImportBinding`] whose resolver verdict
/// produced a concrete `(resolved_file, exported_name)` pair, builds the
/// canonical [`FuncKey`] of the imported function in its own file's
/// scan-root-relative namespace and stores it under the caller-file's
/// local binding name.
///
/// Returns an empty map when the file has no resolved imports (non-JS/TS
/// files, scans without a `ModuleGraph`, side-effect-only imports, or
/// builtin/unresolved specifiers). The caller passes `None` to
/// `SsaTaintTransfer::cross_package_imports` in that case.
///
/// `module_graph` aligns the target [`FuncKey::namespace`] with the
/// package-prefixed form that `FuncSummary::func_key_with_resolver`
/// produces on the cross-file storage side: when the resolved file lies
/// inside a discovered package the namespace becomes
/// `"@scope/name::src/file.ts"`, otherwise it falls back to plain
/// `normalize_namespace`. Step 0.7 of `resolve_callee_full` looks up
/// `(lang, namespace, name)` against `GlobalSummaries::ssa_by_key`
/// where the SSA-side keys are now produced via the same
/// `namespace_with_package` shape (callers in `crate::ast::ParsedFile`
/// pre-compute the package-prefixed namespace before invoking
/// `lower_all_functions_from_bodies`), so the two sides agree even
/// when two packages share a project-relative file path.
///
/// `module_graph = None` (single-package scans, non-JS/TS files, unit
/// tests, indexed-mode SQLite fallback) collapses to the historical
/// `normalize_namespace` behaviour, keeping the migration strictly
/// additive for any consumer that does not opt in.
///
/// The constructed key intentionally leaves `container`, `arity`,
/// `disambig`, and `kind` at their defaults — the resolver verdict only
/// fixes the `(lang, namespace, name)` triple, and step 0.7 of
/// `resolve_callee_full` matches against `GlobalSummaries::ssa_by_key`
/// using only those three fields plus an arity hint when available.
pub fn build_cross_package_func_keys(
resolved_imports: &[crate::resolve::ImportBinding],
scan_root: Option<&str>,
module_graph: Option<&crate::resolve::ModuleGraph>,
caller_lang: Lang,
) -> HashMap<String, FuncKey> {
let mut out: HashMap<String, FuncKey> = HashMap::new();
for binding in resolved_imports {
let Some(ref resolved_file) = binding.resolved_file else {
continue;
};
let Some(ref exported_name) = binding.exported_name else {
continue;
};
if exported_name.is_empty()
|| exported_name == "*"
|| exported_name == "default"
|| binding.local_name.is_empty()
{
// Side-effect / namespace / default imports do not map to a
// single named export; step 0.7 needs a concrete leaf name.
continue;
}
let target_lang = resolved_file
.extension()
.and_then(|e| e.to_str())
.and_then(Lang::from_extension)
.unwrap_or(caller_lang);
let abs = resolved_file.to_string_lossy();
let namespace = crate::symbol::namespace_with_package(&abs, scan_root, module_graph);
let key = FuncKey {
lang: target_lang,
namespace,
container: String::new(),
name: exported_name.clone(),
arity: None,
disambig: None,
kind: FuncKind::Function,
};
out.insert(binding.local_name.clone(), key);
}
out
}
/// Run taint analysis on all bodies in a file.
///
/// Uses a unified multi-body analysis for all languages:
@ -432,25 +512,32 @@ pub fn analyse_file(
ssa_transfer::reset_all_validated_spans();
// No locator: pass-2 intra-file summaries are transient (not persisted)
// and behavior depends on SinkSite.cap only, which is always populated.
let (ssa_summaries, callee_bodies) = lower_all_functions_from_bodies(
file_cfg,
caller_lang,
caller_namespace,
local_summaries,
global_summaries,
None,
);
analyse_file_with_lowered(
file_cfg,
local_summaries,
global_summaries,
caller_lang,
caller_namespace,
interop_edges,
extra_labels,
&ssa_summaries,
&callee_bodies,
)
crate::ssa::type_facts::with_file_imports(Some(&file_cfg.local_imports), || {
crate::cfg::safe_fields::with_safe_lookup_fields(Some(&file_cfg.safe_lookup_fields), || {
let (ssa_summaries, callee_bodies) = lower_all_functions_from_bodies(
file_cfg,
caller_lang,
caller_namespace,
local_summaries,
global_summaries,
None,
None,
None,
);
analyse_file_with_lowered(
file_cfg,
local_summaries,
global_summaries,
caller_lang,
caller_namespace,
interop_edges,
extra_labels,
&ssa_summaries,
&callee_bodies,
None,
)
})
})
}
/// Same as [`analyse_file`] but takes pre-lowered SSA summaries + callee
@ -459,6 +546,10 @@ pub fn analyse_file(
/// the SSA-artifact extractor; the bare [`analyse_file`] entry-point keeps
/// its prior signature for any caller that does not have a pre-lowered
/// result handy.
///
/// `cross_package_imports` is the optional Phase-09 lookup map built via
/// [`build_cross_package_func_keys`]. `None` (the public-API default)
/// disables cross-package step 0.7 in `resolve_callee_full`.
#[allow(clippy::too_many_arguments)]
pub(crate) fn analyse_file_with_lowered(
file_cfg: &FileCfg,
@ -470,9 +561,49 @@ pub(crate) fn analyse_file_with_lowered(
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
ssa_summaries: &std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
callee_bodies: &std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
cross_package_imports: Option<&std::collections::HashMap<String, FuncKey>>,
) -> Vec<Finding> {
let _span = tracing::debug_span!("taint_analyse_file").entered();
// Publish the per-file local-import view so the ORM TypeKind gate
// inside [`crate::ssa::type_facts::constructor_type`] can read it
// during downstream `optimize_ssa_with_param_types` passes. The
// outer `analyse_file` already wraps this for its own
// `lower_all_functions_from_bodies` pre-pass; wrapping here too
// keeps direct callers (e.g. [`crate::ast::analyse_file_fused`])
// covered. Idempotent under nesting — the inner guard restores
// the outer value on drop.
crate::ssa::type_facts::with_file_imports(Some(&file_cfg.local_imports), || {
crate::cfg::safe_fields::with_safe_lookup_fields(Some(&file_cfg.safe_lookup_fields), || {
analyse_file_with_lowered_inner(
file_cfg,
local_summaries,
global_summaries,
caller_lang,
caller_namespace,
interop_edges,
extra_labels,
ssa_summaries,
callee_bodies,
cross_package_imports,
)
})
})
}
#[allow(clippy::too_many_arguments)]
fn analyse_file_with_lowered_inner(
file_cfg: &FileCfg,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
caller_lang: Lang,
caller_namespace: &str,
interop_edges: &[InteropEdge],
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
ssa_summaries: &std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
callee_bodies: &std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
cross_package_imports: Option<&std::collections::HashMap<String, FuncKey>>,
) -> Vec<Finding> {
// NOTE: the path-safe-suppressed span set is reset by the caller, not
// here. Per-parameter probes inside the lowering phase
// (`lower_all_functions_from_bodies`) can already publish spans via
@ -551,6 +682,7 @@ pub(crate) fn analyse_file_with_lowered(
max_iterations,
import_bindings_ref,
cross_file_bodies_ref,
cross_package_imports,
);
// 4. Deduplicate findings using a richer key that preserves distinct
@ -797,6 +929,34 @@ fn inject_external_type_facts(
}
}
/// Apply entry-kind-derived overrides to a body's `param_types` vector.
///
/// Today only `EntryKind::AppRouteHandler` triggers an override: the first
/// formal of a Next.js App Router handler always carries a Web `Request`,
/// regardless of the user's TypeScript annotation. Returns `Some(vec)` when
/// the override changes the vector, `None` otherwise. Folding the rule into
/// one helper keeps the two consumers (`analyse_body_with_seed` and
/// `lower_all_functions_from_bodies_inner`) in lockstep.
fn entry_kind_param_type_override(
entry_kind: Option<&crate::entry_points::EntryKind>,
param_types: &[Option<crate::ssa::type_facts::TypeKind>],
) -> Option<Vec<Option<crate::ssa::type_facts::TypeKind>>> {
if matches!(
entry_kind,
Some(crate::entry_points::EntryKind::AppRouteHandler { .. })
) {
let mut pt = param_types.to_vec();
if pt.is_empty() {
pt.push(Some(crate::ssa::type_facts::TypeKind::Request));
} else {
pt[0] = Some(crate::ssa::type_facts::TypeKind::Request);
}
Some(pt)
} else {
None
}
}
/// Analyse a single body with an optional parent seed.
///
/// Shared logic extracted from `analyse_multi_body` to avoid deep nesting.
@ -818,6 +978,7 @@ fn analyse_body_with_seed(
import_bindings: Option<&crate::cfg::ImportBindings>,
cross_file_bodies: Option<&std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>>,
parent_var_types: Option<&HashMap<String, crate::ssa::type_facts::TypeKind>>,
cross_package_imports: Option<&std::collections::HashMap<String, FuncKey>>,
) -> (
Vec<Finding>,
Option<HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>>,
@ -853,10 +1014,156 @@ fn analyse_body_with_seed(
// so that `cmd -> Runtime.exec(cmd)` picks up `cmd` as a handler param.
let is_java_lambda =
lang == Lang::Java && body.meta.kind == crate::cfg::BodyKind::AnonymousFunction;
// Java methods tagged with a Spring/JaxRs entry-point annotation need
// scoped lowering so the formal parameters (`@RequestParam String name`,
// `@PathParam Long id`, ...) materialise as `SsaOp::Param` ops that
// the entry-point seeding pass paints as `Source(UserInput)`. Restricted
// to Java because (a) JS/TS already use scoped lowering above, (b) Go
// and Ruby handlers introduce request-OBJECT formals (`r *http.Request`,
// implicit `params`) whose Cap::all() seeding triggers FPs at sinks
// that take the bare object (e.g. `http.Redirect(w, r, safe, code)`
// where `r` is the request, not the URL), and (c) Python free-name
// captures (`request`, `b64decode`) bubble up as synthetic externals
// and shift source attribution. Java methods don't have those
// free-capture shapes (every reference is via explicit qualification),
// so the precision-vs-recall trade lands on the precision side.
let is_java_entry_method = lang == Lang::Java
&& body.meta.kind == crate::cfg::BodyKind::NamedFunction
&& body.meta.func_key.as_ref().is_some_and(|k| {
let mut k = k.clone();
k.namespace = namespace.to_string();
ssa_summaries
.and_then(|m| m.get(&k))
.is_some_and(|s| s.entry_kind.is_some())
});
// Rust framework handlers (axum, actix-web, Rocket) need scoped
// lowering so the typed-extractor formals (`Query<T>`, `Json<T>`,
// `Form<T>`, `Path<T>`) materialise as `SsaOp::Param` ops that the
// entry-point seeding pass paints as `Source(UserInput)`. The
// per-formal seed decision is gated on a recovered `TypeKind` from
// `BodyMeta.param_types`: extractor-wrapped formals get
// `Some(TypeKind::Int|String|Bool|...)` (or a DTO type) via
// `rust_type_to_kind`, while denylist wrappers (`State<T>`,
// `Extension<T>`, `Pool<T>`, ...) and bare primitives stay `None`
// and are skipped at seed time. This keeps DI handles
// server-side without painting the database pool as adversary input.
let is_rust_entry_method = lang == Lang::Rust
&& body.meta.kind == crate::cfg::BodyKind::NamedFunction
&& body.meta.func_key.as_ref().is_some_and(|k| {
let mut k = k.clone();
k.namespace = namespace.to_string();
ssa_summaries.and_then(|m| m.get(&k)).is_some_and(|s| {
matches!(
s.entry_kind,
Some(crate::entry_points::EntryKind::AxumHandler)
| Some(crate::entry_points::EntryKind::ActixHandler)
| Some(crate::entry_points::EntryKind::RocketRoute)
)
})
});
// Python Flask handlers need scoped lowering so the route-bound formal
// parameters (`@app.route("/users/<name>")` + `def view(name):`)
// materialise as `SsaOp::Param` ops the entry-point seeding pass paints
// as `Source(UserInput)`. The per-formal seed decision is gated against
// `BodyMeta.param_route_capture`, so only formals whose names appear as
// path captures in the routing decorator are painted; implicit globals
// (`request`, `g`, `session`) and DI-injected formals stay un-seeded.
// Restricted to Flask (`FlaskRoute`) here because FastAPI / Django
// free-name capture shapes (`request`, `b64decode`) bubble up as
// synthetic externals under scoped lowering and shift source
// attribution, while Flask handlers have all formals = path captures
// (precision lands cleanly).
let is_python_flask_route = lang == Lang::Python
&& body.meta.kind == crate::cfg::BodyKind::NamedFunction
&& body
.meta
.param_route_capture
.iter()
.any(|captured| *captured)
&& body.meta.func_key.as_ref().is_some_and(|k| {
let mut k = k.clone();
k.namespace = namespace.to_string();
ssa_summaries.and_then(|m| m.get(&k)).is_some_and(|s| {
matches!(
s.entry_kind,
Some(crate::entry_points::EntryKind::FlaskRoute { .. })
)
})
});
// Ruby Sinatra route handlers need scoped lowering so the block
// parameters (`get "/u/:name" do |name| ... end`) materialise as
// `SsaOp::Param` ops the entry-point seeding pass paints as
// `Source(UserInput)`. Sinatra body bodies are anonymous (the
// `do_block` AST node has no name field), so `BodyKind` is
// `AnonymousFunction`; the gate accepts both anonymous and named.
// Per-formal seed decision is gated against
// `BodyMeta.param_route_capture`, so only block formals whose
// names appear as `:name` segments in the routing path are
// painted. Block formals not in the capture set fall back to
// existing label rules.
let is_ruby_sinatra_route = lang == Lang::Ruby
&& matches!(
body.meta.kind,
crate::cfg::BodyKind::NamedFunction | crate::cfg::BodyKind::AnonymousFunction
)
&& body
.meta
.param_route_capture
.iter()
.any(|captured| *captured)
&& body.meta.func_key.as_ref().is_some_and(|k| {
let mut k = k.clone();
k.namespace = namespace.to_string();
ssa_summaries.and_then(|m| m.get(&k)).is_some_and(|s| {
matches!(
s.entry_kind,
Some(crate::entry_points::EntryKind::SinatraRoute { .. })
)
})
});
// Python FastAPI / Starlette handlers need scoped lowering so the
// route-bound and typed-extractor formals materialise as `SsaOp::Param`
// ops that the entry-point seeding pass paints as `Source(UserInput)`.
// The per-formal decision in `ssa_transfer` consults BOTH
// `BodyMeta.param_route_capture` (for `{name}` brace-segment captures)
// and `type_facts.get_type(value)` (for `Annotated[T, Path()/Query()/Body()
// /Header()/Cookie()/Form()/File()]` typed extractors). Formals without
// either signal — `db: Session = Depends(get_db)`, `request: Request`,
// bare `session` — stay un-seeded, matching the Hard Rule 3 policy that
// unannotated formals are not adversary input.
//
// Gated on "at least one formal qualifies" to mirror the Flask gate:
// a handler with zero path captures and zero typed extractors gets the
// existing label-rule treatment (free-name captures of `request`,
// `b64decode`, etc. bubble up as synthetic externals without scoped
// lowering shifting attribution).
let is_python_fastapi_route = lang == Lang::Python
&& body.meta.kind == crate::cfg::BodyKind::NamedFunction
&& (body
.meta
.param_route_capture
.iter()
.any(|captured| *captured)
|| body.meta.param_types.iter().any(|t| t.is_some()))
&& body.meta.func_key.as_ref().is_some_and(|k| {
let mut k = k.clone();
k.namespace = namespace.to_string();
ssa_summaries.and_then(|m| m.get(&k)).is_some_and(|s| {
matches!(
s.entry_kind,
Some(crate::entry_points::EntryKind::FastApiRoute { .. })
)
})
});
let use_scoped_lowering = !is_toplevel
&& (matches!(lang, Lang::JavaScript | Lang::TypeScript)
|| has_nonempty_seed
|| is_java_lambda);
|| is_java_lambda
|| is_java_entry_method
|| is_rust_entry_method
|| is_python_flask_route
|| is_python_fastapi_route
|| is_ruby_sinatra_route);
let ssa_result = if use_scoped_lowering {
let func_name = body.meta.name.clone().unwrap_or_else(|| {
body.meta
@ -878,11 +1185,28 @@ fn analyse_body_with_seed(
match ssa_result {
Ok(mut ssa_body) => {
// App Router handlers carry a Web `Request` as their first
// formal. Override `param_types[0]` so the type-fact pass tags
// the formal as `TypeKind::Request` and receiver-method reads
// (`req.json()`, ...) rewrite to `Request.<method>` for
// type-qualified label resolution.
let body_entry_kind = body.meta.func_key.as_ref().and_then(|k| {
let mut k = k.clone();
k.namespace = namespace.to_string();
ssa_summaries
.and_then(|m| m.get(&k))
.and_then(|s| s.entry_kind.clone())
});
let overridden_param_types =
entry_kind_param_type_override(body_entry_kind.as_ref(), &body.meta.param_types);
let param_types_ref = overridden_param_types
.as_deref()
.unwrap_or(body.meta.param_types.as_slice());
let mut opt = crate::ssa::optimize_ssa_with_param_types(
&mut ssa_body,
cfg,
Some(lang),
&body.meta.param_types,
param_types_ref,
);
// Forward parent-body type facts onto closure-captured Param ops
// before any consumer reads `opt.type_facts`. This is the lever
@ -965,6 +1289,16 @@ fn analyse_body_with_seed(
&& body.meta.kind == crate::cfg::BodyKind::AnonymousFunction),
cross_file_bodies,
pointer_facts: pointer_facts.as_ref(),
cross_package_imports,
// Phase 10 — Next.js entry-point seeding (looked up
// above when overriding `param_types`).
entry_kind: body_entry_kind,
param_route_capture: if body.meta.param_route_capture.is_empty() {
None
} else {
Some(body.meta.param_route_capture.as_slice())
},
recording_summary: false,
};
let (events, block_states) =
ssa_transfer::run_ssa_taint_full(&ssa_body, cfg, &transfer);
@ -1098,6 +1432,7 @@ fn analyse_multi_body(
max_iterations: usize,
import_bindings: Option<&crate::cfg::ImportBindings>,
cross_file_bodies: Option<&std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>>,
cross_package_imports: Option<&std::collections::HashMap<String, FuncKey>>,
) -> Vec<Finding> {
let order = containment_order(&file_cfg.bodies);
let mut all_findings: Vec<Finding> = Vec::new();
@ -1144,6 +1479,7 @@ fn analyse_multi_body(
import_bindings,
cross_file_bodies,
parent_var_types,
cross_package_imports,
);
tracing::debug!(
body_id = body.meta.id.0,
@ -1340,6 +1676,7 @@ fn analyse_multi_body(
import_bindings,
cross_file_bodies,
parent_var_types,
cross_package_imports,
);
// Phase-B: replace (not append) this body's findings
// in the cache. Previous rounds' findings for this
@ -1688,6 +2025,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
/// resistant identity we have: same-name methods on different classes, same-
/// name overloads with different arity, and anonymous bodies at distinct
/// source spans all get distinct keys.
#[allow(clippy::too_many_arguments)]
pub(crate) fn lower_all_functions_from_bodies(
file_cfg: &FileCfg,
lang: Lang,
@ -1695,6 +2033,38 @@ pub(crate) fn lower_all_functions_from_bodies(
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
scan_root: Option<&str>,
module_graph: Option<&crate::resolve::ModuleGraph>,
) -> (
std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
) {
crate::ssa::type_facts::with_file_imports(Some(&file_cfg.local_imports), || {
crate::cfg::safe_fields::with_safe_lookup_fields(Some(&file_cfg.safe_lookup_fields), || {
lower_all_functions_from_bodies_inner(
file_cfg,
lang,
namespace,
local_summaries,
global_summaries,
locator,
scan_root,
module_graph,
)
})
})
}
#[allow(clippy::too_many_arguments)]
fn lower_all_functions_from_bodies_inner(
file_cfg: &FileCfg,
lang: Lang,
namespace: &str,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
scan_root: Option<&str>,
module_graph: Option<&crate::resolve::ModuleGraph>,
) -> (
std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
@ -1702,6 +2072,23 @@ pub(crate) fn lower_all_functions_from_bodies(
let mut summaries = std::collections::HashMap::new();
let mut bodies = std::collections::HashMap::new();
// Build the file's cross-package import map once and share it
// across every body produced from this file. The map mirrors what
// `analyse_file_with_lowered` builds at pass-2 entry, but storing
// it on each `CalleeSsaBody` lets the inline-analysis frame inside
// another file resolve the callee's local import names against
// the callee's own package boundary (Phase 09 step 0.7) instead of
// skipping the lookup entirely.
let cross_package_imports_arc = {
let map = build_cross_package_func_keys(
&file_cfg.resolved_imports,
scan_root,
module_graph,
lang,
);
std::sync::Arc::new(map)
};
for body in file_cfg.function_bodies() {
let _t_misc = std::time::Instant::now();
let func_name = body.meta.name.clone().unwrap_or_else(|| {
@ -1797,6 +2184,15 @@ pub(crate) fn lower_all_functions_from_bodies(
param_types_ref,
);
// Phase 10 — annotate entry-point summaries. The pass-2
// taint engine reads `entry_kind` to seed the function's
// formals as `TaintOrigin::Source` at SSA entry, mirroring
// an HTTP handler's adversary-controlled inputs. Always
// recorded even on empty summaries so caller-side resolution
// sees the entry classification through cross-file lookups.
let mut summary = summary;
summary.entry_kind = file_cfg.entry_kinds.get(&body.meta.span).cloned();
// Always insert the summary, even when all fields are empty/default.
// An empty summary tells resolve_callee "this function exists and has
// no taint effects", preventing fallthrough to the less precise old
@ -1804,18 +2200,34 @@ pub(crate) fn lower_all_functions_from_bodies(
// For zero-param functions we only insert when the summary carries
// the fresh-container signal (the only observable effect worth
// persisting for a parameter-less body).
if param_count > 0 || summary.points_to.returns_fresh_alloc {
//
// An entry-kind tag also keeps the summary in the map even
// for zero-param entry points so cross-file resolvers see it.
if param_count > 0
|| summary.points_to.returns_fresh_alloc
|| summary.entry_kind.is_some()
{
summaries.insert(key.clone(), summary);
}
perf_lower_record(1, _t_extract.elapsed().as_micros());
}
let _t_opt = std::time::Instant::now();
// Override `param_types[0]` for entry-kind-tagged formals (e.g. App
// Router handlers receive a Web `Request`). Other entry kinds keep
// the ambient param-type vector unchanged. See
// `entry_kind_param_type_override` for the full rule set.
let entry_kind_for_body = file_cfg.entry_kinds.get(&body.meta.span);
let overridden_param_types =
entry_kind_param_type_override(entry_kind_for_body, &body.meta.param_types);
let param_types_ref = overridden_param_types
.as_deref()
.unwrap_or(body.meta.param_types.as_slice());
let opt = crate::ssa::optimize_ssa_with_param_types(
&mut func_ssa,
&body.graph,
Some(lang),
&body.meta.param_types,
param_types_ref,
);
perf_lower_record(2, _t_opt.elapsed().as_micros());
@ -1857,6 +2269,7 @@ pub(crate) fn lower_all_functions_from_bodies(
param_count,
node_meta: std::collections::HashMap::new(),
body_graph: Some(body.graph.clone()),
cross_package_imports: std::sync::Arc::clone(&cross_package_imports_arc),
},
);
perf_lower_record(6, _t_misc2.elapsed().as_micros());
@ -2256,6 +2669,10 @@ fn augment_summaries_with_child_sinks(
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let (_parent_events, parent_block_states) =
@ -2320,6 +2737,10 @@ fn augment_summaries_with_child_sinks(
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let (child_events, _child_block_states) =
@ -2448,6 +2869,7 @@ type EligibleCalleeBodies = Vec<(FuncKey, ssa_transfer::CalleeSsaBody)>;
/// entry) and lowers each body's graph with its recorded entry/params. This
/// path is equivalent to what `analyse_file` uses at taint time, so the SSA
/// summaries produced here line up exactly with what pass 2 will consult.
#[allow(clippy::too_many_arguments)]
pub(crate) fn extract_ssa_artifacts_from_file_cfg(
file_cfg: &FileCfg,
lang: Lang,
@ -2455,6 +2877,8 @@ pub(crate) fn extract_ssa_artifacts_from_file_cfg(
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
scan_root: Option<&str>,
module_graph: Option<&crate::resolve::ModuleGraph>,
) -> (SsaArtifactSummaries, EligibleCalleeBodies) {
let (summaries, bodies) = lower_all_functions_from_bodies(
file_cfg,
@ -2463,6 +2887,8 @@ pub(crate) fn extract_ssa_artifacts_from_file_cfg(
local_summaries,
global_summaries,
locator,
scan_root,
module_graph,
);
let eligible_bodies = build_eligible_bodies(file_cfg, bodies);
(summaries, eligible_bodies)

View file

@ -142,6 +142,27 @@ pub struct CalleeSsaBody {
/// bodies.
#[serde(skip)]
pub body_graph: Option<crate::cfg::Cfg>,
/// The callee body's own file-level cross-package import map (Phase 09
/// step 0.7 keyset).
///
/// Populated when the body is freshly lowered with the file's
/// [`crate::cfg::FileCfg::resolved_imports`] in scope. Forwarded into
/// the inline-analysis child transfer so transitive cross-package
/// resolution inside an inlined frame can land in
/// `crate::summary::GlobalSummaries::ssa_by_key` using the callee's
/// own import view rather than the caller's (which would mis-resolve
/// names against the caller's package boundary).
///
/// Wrapped in `Arc` so every body in a file shares one heap
/// allocation; per-file bodies typically count in the tens to
/// hundreds, and import maps are append-only after construction.
/// `#[serde(skip)]` because the map is reproducible from the file's
/// `resolved_imports` and bears no identity on its own; an indexed
/// scan that loads a body from SQLite simply skips step 0.7 inside
/// the inlined frame (same conservative behaviour as before this
/// field existed).
#[serde(skip)]
pub cross_package_imports: std::sync::Arc<std::collections::HashMap<String, FuncKey>>,
}
/// Populate `node_meta` from the original CFG for cross-file persistence.

File diff suppressed because it is too large Load diff

View file

@ -264,6 +264,10 @@ pub fn extract_ssa_func_summary_full(
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: true,
};
let (events, block_states) = run_ssa_taint_full(ssa, cfg, &transfer);
@ -745,14 +749,36 @@ pub fn extract_ssa_func_summary_full(
if event.sink_caps.is_empty() {
continue;
}
let site = match locator {
Some(loc) => {
loc.site_for_span(cfg[event.sink_node].classification_span(), event.sink_caps)
// Preserve the deepest sink attribution across multi-hop summaries.
// When `event.primary_sink_site` is populated, the upstream
// resolver already pierced through a callee summary to the
// dangerous instruction's coordinates; promoting it here means a
// grandparent caller of this function sees `line N` of the
// innermost helper rather than `line M` of *this* function's
// call site to its child. Mark `from_chain = true` so pass-2
// emission can distinguish multi-hop chain markers (always
// promote into `Finding.primary_location`) from this body's own
// locator-resolved sink (only promote across file boundaries).
// Falls back to locator-based call-site attribution when the
// event is intra-procedural.
let site = match event.primary_sink_site.as_ref() {
Some(s) => {
let mut s = s.clone();
s.from_chain = true;
s
}
None => SinkSite::cap_only(event.sink_caps),
None => match locator {
Some(loc) => loc
.site_for_span(cfg[event.sink_node].classification_span(), event.sink_caps),
None => SinkSite::cap_only(event.sink_caps),
},
};
let key = site.dedup_key();
if !param_sites.iter().any(|s| s.dedup_key() == key) {
if let Some(existing) = param_sites.iter_mut().find(|s| s.dedup_key() == key) {
if site.from_chain && !existing.from_chain {
existing.from_chain = true;
}
} else {
param_sites.push(site);
}
}
@ -812,6 +838,10 @@ pub fn extract_ssa_func_summary_full(
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: true,
};
detect_source_to_callback_from_states(
ssa,
@ -867,6 +897,11 @@ pub fn extract_ssa_func_summary_full(
// caller patches it in.
typed_call_receivers: Vec::new(),
validated_params_to_return,
// Phase-10 entry-point classification is attached post-extraction
// by `taint::lower_all_functions_from_bodies` (which has access
// to `FileCfg::entry_kinds`). Empty here means the extractor
// itself does not carry the tag.
entry_kind: None,
}
}
@ -1112,11 +1147,25 @@ fn infer_summary_return_type(
continue;
}
// Only inspect the very last instruction in the returning block.
// Mirror the CFG-level `outer_callee` fallback (Phase 08 audit) so a
// CFG-rewritten callee (e.g. `req.body.path` displacing `URL` on
// `new URL(req.body.path, base)`) still resolves to the original
// constructor identifier preserved in `callee_text`.
if let Some(inst) = block.body.last()
&& let SsaOp::Call { callee, .. } = &inst.op
&& let Some(ty) = crate::ssa::type_facts::constructor_type(lang, callee)
&& let SsaOp::Call {
callee,
callee_text,
..
} = &inst.op
{
return Some(ty);
if let Some(ty) = crate::ssa::type_facts::constructor_type(lang, callee) {
return Some(ty);
}
if let Some(orig) = callee_text.as_deref()
&& let Some(ty) = crate::ssa::type_facts::constructor_type(lang, orig)
{
return Some(ty);
}
}
}
None

View file

@ -87,6 +87,7 @@ mod cross_file_tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
},
opt: crate::ssa::OptimizeResult {
const_values: std::collections::HashMap::new(),
@ -105,6 +106,7 @@ mod cross_file_tests {
param_count: 0,
node_meta: std::collections::HashMap::new(),
body_graph: None,
cross_package_imports: std::sync::Arc::new(std::collections::HashMap::new()),
}
}
@ -838,6 +840,7 @@ mod primary_sink_location_tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
}
}
@ -862,6 +865,7 @@ mod primary_sink_location_tests {
col: 10,
snippet: "Command::new(cmd).status()".into(),
cap: Cap::SHELL_ESCAPE,
from_chain: false,
};
let summary = SsaFuncSummary {
param_to_sink: vec![(0usize, smallvec![site.clone()])],
@ -886,6 +890,8 @@ mod primary_sink_location_tests {
&tainted,
Cap::SHELL_ESCAPE,
&summary.param_to_sink,
"caller.rs",
false,
);
assert_eq!(
primary_sites.len(),
@ -971,6 +977,7 @@ mod goto_succ_propagation_tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let cfg: Cfg = Graph::new();
@ -1009,6 +1016,10 @@ mod goto_succ_propagation_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
// A non-bottom exit state, the test only cares that *every* succ
@ -1065,6 +1076,7 @@ mod goto_succ_propagation_tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let cfg: Cfg = Graph::new();
let interner = SymbolInterner::new();
@ -1101,6 +1113,10 @@ mod goto_succ_propagation_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let exit_state = SsaTaintState::initial();
@ -1128,6 +1144,7 @@ mod goto_succ_propagation_tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
}
}
@ -1390,6 +1407,7 @@ mod goto_succ_propagation_tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
}
}
@ -1517,6 +1535,7 @@ mod receiver_candidates_field_proj_tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
}
}
@ -1604,6 +1623,7 @@ mod receiver_candidates_field_proj_tests {
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
slot_scoped_assigns: std::collections::HashSet::new(),
};
let cands =
super::super::receiver_candidates_for_type_lookup(SsaValue(0), Some(&body), Lang::Go);
@ -1739,6 +1759,7 @@ mod fanout_merge_tests {
col: 5,
snippet: "exec(q)".into(),
cap: Cap::from_bits(0b0001).unwrap(),
from_chain: false,
};
let unique_a = SinkSite {
file_rel: "src/a.rs".into(),
@ -1746,6 +1767,7 @@ mod fanout_merge_tests {
col: 3,
snippet: "do_a(q)".into(),
cap: Cap::from_bits(0b0001).unwrap(),
from_chain: false,
};
let unique_b = SinkSite {
file_rel: "src/b.rs".into(),
@ -1753,6 +1775,7 @@ mod fanout_merge_tests {
col: 7,
snippet: "do_b(q)".into(),
cap: Cap::from_bits(0b0001).unwrap(),
from_chain: false,
};
let mut a = empty();
a.param_to_sink_sites = vec![(0, smallvec![shared.clone(), unique_a.clone()])];
@ -2008,6 +2031,7 @@ mod field_write_tests {
field_interner,
field_writes,
synthetic_externals: HashSet::new(),
slot_scoped_assigns: HashSet::new(),
};
(body, cache_id)
}
@ -2056,6 +2080,10 @@ mod field_write_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: Some(pf),
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let mut state = SsaTaintState::initial();
@ -2140,6 +2168,10 @@ mod field_write_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let mut state = SsaTaintState::initial();
for inst in &body.blocks[0].body {
@ -2208,6 +2240,10 @@ mod field_write_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: Some(&pf),
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
// Pre-seed `validated_must` on `src` so the synth Assign
@ -2312,6 +2348,7 @@ mod field_write_tests {
m
},
synthetic_externals: HashSet::new(),
slot_scoped_assigns: HashSet::new(),
};
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(0));
// v0 is Const → empty pt, the hook should not insert anything.
@ -2354,6 +2391,10 @@ mod field_write_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: Some(&pf),
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let mut state = SsaTaintState::initial();
@ -2452,6 +2493,10 @@ mod container_elem_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: Some(pf),
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let mut state = SsaTaintState::initial();
@ -2549,6 +2594,7 @@ mod container_elem_tests {
field_writes: HashMap::new(),
synthetic_externals: HashSet::new(),
slot_scoped_assigns: HashSet::new(),
};
// Run pointer analysis first to confirm the result of `shift()`
@ -2689,6 +2735,7 @@ mod container_elem_tests {
field_writes: HashMap::new(),
synthetic_externals: HashSet::new(),
slot_scoped_assigns: HashSet::new(),
};
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(7));
@ -2731,6 +2778,10 @@ mod container_elem_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: Some(&pf),
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
// Seed `src` as validated_must before the push fires.
@ -2833,6 +2884,7 @@ mod container_elem_tests {
field_writes: HashMap::new(),
synthetic_externals: HashSet::new(),
slot_scoped_assigns: HashSet::new(),
};
let interner = SymbolInterner::new();
@ -2869,6 +2921,10 @@ mod container_elem_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let mut state = SsaTaintState::initial();
for inst in &body.blocks[0].body {
@ -2960,6 +3016,7 @@ mod cross_call_field_tests {
field_writes: HashMap::new(),
synthetic_externals: HashSet::new(),
slot_scoped_assigns: HashSet::new(),
};
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(7));
(body, cache_id, pf)
@ -3334,6 +3391,7 @@ mod field_taint_origin_cap_tests {
field_writes: HashMap::new(),
synthetic_externals: HashSet::new(),
slot_scoped_assigns: HashSet::new(),
};
(body, cache_id, cfg, n_proj)
}
@ -3425,6 +3483,10 @@ mod field_taint_origin_cap_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: Some(&pf),
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
for inst in &body.blocks[0].body {
transfer_inst(inst, &cfg, &body, &transfer, &mut state);
@ -3660,6 +3722,7 @@ mod pointer_lattice_worklist_tests {
field_interner,
field_writes,
synthetic_externals: HashSet::new(),
slot_scoped_assigns: HashSet::new(),
};
let mut interner = SymbolInterner::new();
@ -3713,6 +3776,10 @@ mod pointer_lattice_worklist_tests {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: Some(pf),
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
}
}

View file

@ -63,6 +63,10 @@ fn ssa_analyse_rust(src: &[u8]) -> Vec<Finding> {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let events = ssa_transfer::run_ssa_taint(&ssa, cfg, &transfer);
let mut findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, cfg);
@ -663,6 +667,7 @@ fn cross_file_sink_finding_carries_primary_location() {
col: 5,
snippet: "Command::new(\"sh\").arg(cmd).status().unwrap();".into(),
cap: Cap::SHELL_ESCAPE,
from_chain: false,
};
global.insert(
key,
@ -3788,6 +3793,10 @@ fn assert_ssa_integration(src: &[u8]) {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
@ -3926,6 +3935,10 @@ fn integ_php_echo_simple_var() {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
@ -3996,6 +4009,10 @@ fn integ_c_curl_handle_ssrf() {
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
cross_package_imports: None,
entry_kind: None,
param_route_capture: None,
recording_summary: false,
};
let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
@ -5481,6 +5498,8 @@ class Worker {
&file_cfg.summaries,
None,
None,
None,
None,
);
// Collect containers of every key named "process".
@ -5553,6 +5572,8 @@ function helper(x) {
&file_cfg.summaries,
None,
None,
None,
None,
);
let helper_keys: Vec<_> = summaries.keys().filter(|k| k.name == "helper").collect();
@ -5776,6 +5797,8 @@ class Reader {
&file_cfg.summaries,
None,
None,
None,
None,
);
let read_sum = summaries
@ -5821,6 +5844,8 @@ class Maker {
&file_cfg.summaries,
None,
None,
None,
None,
);
// make() has zero parameters and no fresh-allocation return, so the
@ -6837,6 +6862,55 @@ function handler(req, res) {
/// traversal flow alive end-to-end. Pins the precision claim — the
/// strip is element-of-array-after-filter scoped, not a wholesale
/// kill on any `<arr>.filter` call regardless of callback identity.
#[test]
fn callee_body_carries_file_cross_package_imports() {
// Phase 09: every `CalleeSsaBody` produced from a file's lowering
// pipeline should carry the file-level cross-package import map
// so the inline-analysis frame can resolve the callee's local
// names against the callee's own package boundary (step 0.7
// inside an inlined frame).
let src = b"export function passthrough(s) { return s; }\n";
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let mut file_cfg = parse_lang(src, "javascript", lang);
// Inject a synthetic resolved import binding the way the Phase 04
// resolver would for `import { helper } from "@scope/util/helper";`.
file_cfg
.resolved_imports
.push(crate::resolve::ImportBinding {
local_name: "helper".to_string(),
source_module: "@scope/util/helper".to_string(),
resolved_file: Some(std::path::PathBuf::from("/scope/util/src/helper.ts")),
exported_name: Some("helper".to_string()),
});
let (_summaries, bodies) = super::extract_ssa_artifacts_from_file_cfg(
&file_cfg,
Lang::JavaScript,
"test.js",
&file_cfg.summaries,
None,
None,
None,
None,
);
assert!(
!bodies.is_empty(),
"expected at least one eligible body for `passthrough`",
);
for (_key, body) in &bodies {
assert!(
!body.cross_package_imports.is_empty(),
"every body in a file with resolved imports should carry the file's cross-package import map; got an empty map",
);
assert!(
body.cross_package_imports.contains_key("helper"),
"expected the synthetic `helper` binding to surface in the body's cross-package import map",
);
}
}
#[test]
fn cve_2026_42353_filter_without_validator_callback_preserves_taint() {
let src = br#"
@ -6867,3 +6941,74 @@ function handler(req, res) {
"expected taint flow via filter(pickFirst) — pickFirst is not a recognised validator and must not strip taint; got 0 findings",
);
}
// ── Phase 09 cross-package namespace migration ─────────────────────────────
/// `build_cross_package_func_keys` produces a package-prefixed
/// [`FuncKey::namespace`] for files inside a discovered monorepo
/// package and a plain namespace otherwise.
///
/// Locks in the migration done as part of the deferred Phase 09 audit:
/// SSA summary keys produced by
/// [`crate::taint::lower_all_functions_from_bodies`] use
/// `namespace_with_package` for their namespace, so the cross-package
/// import map's `FuncKey::namespace` must agree for step 0.7 of
/// `resolve_callee_full` to land hits in
/// [`crate::summary::GlobalSummaries::ssa_by_key`].
#[test]
fn cross_package_func_keys_namespace_uses_resolver_when_available() {
use crate::resolve::{ImportBinding, build_module_graph};
use std::path::PathBuf;
let mut fixture_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
fixture_root.push("tests/fixtures/resolver");
let root = fixture_root
.canonicalize()
.unwrap_or_else(|_| fixture_root.clone());
let graph = build_module_graph(std::slice::from_ref(&root));
let resolved_file = root.join("packages/util/src/index.ts");
let binding = ImportBinding {
local_name: "doStuff".to_string(),
source_module: "@scope/util".to_string(),
resolved_file: Some(resolved_file.clone()),
exported_name: Some("doStuff".to_string()),
};
let scan_root = root.to_string_lossy().to_string();
let with_resolver = crate::taint::build_cross_package_func_keys(
std::slice::from_ref(&binding),
Some(&scan_root),
Some(&graph),
Lang::TypeScript,
);
let key = with_resolver
.get("doStuff")
.expect("resolved binding maps to a FuncKey");
assert!(
key.namespace.starts_with("@scope/util::"),
"expected package-prefixed namespace, got {ns}",
ns = key.namespace,
);
assert!(
key.namespace.ends_with("packages/util/src/index.ts"),
"expected the suffix to remain the scan-root-relative path, got {ns}",
ns = key.namespace,
);
let without_resolver = crate::taint::build_cross_package_func_keys(
std::slice::from_ref(&binding),
Some(&scan_root),
None,
Lang::TypeScript,
);
let plain = without_resolver
.get("doStuff")
.expect("plain binding maps to a FuncKey");
assert!(
!plain.namespace.contains("::"),
"without a resolver the namespace must stay plain, got {ns}",
ns = plain.namespace,
);
assert_eq!(plain.namespace, "packages/util/src/index.ts");
}

View file

@ -794,6 +794,13 @@ pub struct Config {
/// not persisted to config files.
#[serde(skip)]
pub framework_ctx: Option<crate::utils::project::FrameworkContext>,
/// TS/JS module resolver state, set by the scan pipeline once per scan
/// after the file walk and before pass 1. `None` outside the scan paths
/// (e.g. unit-test direct callers of `analyse_file_fused`); consumers
/// must treat absence as "no resolver hints available, fall back to
/// pre-resolver behaviour" rather than as a hard error.
#[serde(skip)]
pub module_graph: Option<std::sync::Arc<crate::resolve::ModuleGraph>>,
}
impl Config {