mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
Critical bug fixes and recall improvements (#68)
This commit is contained in:
parent
7d0e7320e2
commit
55247b7fcd
352 changed files with 60069 additions and 900 deletions
|
|
@ -1368,11 +1368,15 @@ fn truncate_prefix_lock(s: &str) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
/// Longest common prefix, char-aligned so multi-byte UTF-8 sequences are
|
||||
/// kept whole. The earlier byte-iteration form re-encoded continuation
|
||||
/// bytes as Latin-1 chars and produced mojibake; the same fix lives at
|
||||
/// `crate::abstract_interp::string_domain::longest_common_prefix`.
|
||||
fn longest_common_prefix(a: &str, b: &str) -> String {
|
||||
a.bytes()
|
||||
.zip(b.bytes())
|
||||
a.chars()
|
||||
.zip(b.chars())
|
||||
.take_while(|(x, y)| x == y)
|
||||
.map(|(x, _)| x as char)
|
||||
.map(|(x, _)| x)
|
||||
.collect()
|
||||
}
|
||||
|
||||
|
|
@ -1380,6 +1384,24 @@ fn longest_common_prefix(a: &str, b: &str) -> String {
|
|||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// ── LCP helper ──────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn lcp_basic() {
|
||||
assert_eq!(longest_common_prefix("abcdef", "abcxyz"), "abc");
|
||||
assert_eq!(longest_common_prefix("abc", "abc"), "abc");
|
||||
assert_eq!(longest_common_prefix("", "abc"), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lcp_keeps_utf8_codepoints_whole() {
|
||||
// Without char-alignment, byte iteration would emit the
|
||||
// continuation byte 0xA9 as a separate char and corrupt the
|
||||
// prefix. Both the 2-byte and 3-byte UTF-8 cases must survive.
|
||||
assert_eq!(longest_common_prefix("héllo", "héllo!"), "héllo");
|
||||
assert_eq!(longest_common_prefix("名前.json", "名前.txt"), "名前.");
|
||||
}
|
||||
|
||||
// ── Tri lattice laws ────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -350,6 +350,25 @@ impl StringFact {
|
|||
is_bottom: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// SSRF helper: build a fact for `new URL(path, base)` where `base` is a
|
||||
/// literal origin (`https://api.example.com`). The result behaves as
|
||||
/// `base ++ path`, the locked-host prefix survives even when the path
|
||||
/// component carries arbitrary taint, and the fact's `prefix` is what
|
||||
/// `is_string_safe_for_ssrf` consults to suppress the SSRF sink.
|
||||
///
|
||||
/// `path` carries any string knowledge for the path component (typically
|
||||
/// `StringFact::top()`). When the base already ends in `/`, the helper
|
||||
/// keeps it as-is; otherwise appends a `/` so the prefix unambiguously
|
||||
/// includes the path separator (the SSRF check looks for
|
||||
/// `scheme://host/`).
|
||||
pub fn from_url_with_base(base: &str, path: &Self) -> Self {
|
||||
let mut anchor = base.to_string();
|
||||
if !anchor.ends_with('/') {
|
||||
anchor.push('/');
|
||||
}
|
||||
StringFact::exact(&anchor).concat(path)
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for StringFact {
|
||||
|
|
@ -943,6 +962,40 @@ mod tests {
|
|||
assert!(suffix.ends_with('好'));
|
||||
}
|
||||
|
||||
/// Phase 08: a URL prefix-lock obtained from `new URL(path, base)`
|
||||
/// must survive concatenation with a tainted (Top-suffix) path
|
||||
/// component. The `is_string_safe_for_ssrf` check only consults the
|
||||
/// `prefix`, so the locked-host base must remain intact even when the
|
||||
/// path-side fact carries no knowledge.
|
||||
#[test]
|
||||
fn from_url_with_base_locks_prefix_under_tainted_suffix() {
|
||||
let base = "https://api.cal.com";
|
||||
let tainted_path = StringFact::top();
|
||||
let f = StringFact::from_url_with_base(base, &tainted_path);
|
||||
assert_eq!(
|
||||
f.prefix.as_deref(),
|
||||
Some("https://api.cal.com/"),
|
||||
"prefix lock must include the path separator"
|
||||
);
|
||||
// The path component contributes no suffix knowledge, the result
|
||||
// must mirror that without losing the prefix lock.
|
||||
assert!(
|
||||
f.suffix.is_none(),
|
||||
"suffix is unknown when path-side fact is Top"
|
||||
);
|
||||
}
|
||||
|
||||
/// A concrete path component contributes its suffix knowledge to the
|
||||
/// concatenated URL fact while the base prefix stays locked.
|
||||
#[test]
|
||||
fn from_url_with_base_keeps_prefix_with_concrete_path_suffix() {
|
||||
let base = "https://api.cal.com/";
|
||||
let path = StringFact::from_suffix(".json");
|
||||
let f = StringFact::from_url_with_base(base, &path);
|
||||
assert_eq!(f.prefix.as_deref(), Some("https://api.cal.com/"));
|
||||
assert_eq!(f.suffix.as_deref(), Some(".json"));
|
||||
}
|
||||
|
||||
/// Concat with empty-string `exact("")` should preserve the other
|
||||
/// side's prefix/suffix knowledge (empty is the identity).
|
||||
#[test]
|
||||
|
|
|
|||
2713
src/ast.rs
2713
src/ast.rs
File diff suppressed because it is too large
Load diff
|
|
@ -90,6 +90,13 @@ fn check_ownership_gaps(
|
|||
if op.sink_class.is_some_and(|c| !c.is_auth_relevant()) {
|
||||
continue;
|
||||
}
|
||||
// NextAuth callbacks are themselves the authentication
|
||||
// boundary, both reads and mutations inside them operate on
|
||||
// identity context, so suppress regardless of op kind.
|
||||
// Other auth helpers stay read-only-suppressed.
|
||||
if is_nextauth_callback_unit(unit) {
|
||||
continue;
|
||||
}
|
||||
if op.kind == OperationKind::Read && unit_is_auth_helper(unit) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -105,6 +112,40 @@ fn check_ownership_gaps(
|
|||
if is_delegated_read_with_actor_context(unit, op, &relevant_subjects) {
|
||||
continue;
|
||||
}
|
||||
// Owner-equality scoping: when the same call composes a
|
||||
// foreign-id subject with an actor-context subject (e.g.
|
||||
// `db.findFirst({where: {id: input.id, userId: ctx.user.id}})`
|
||||
// in a TRPC handler), the actor pin tenant-scopes the
|
||||
// query to the authenticated user. The relevant_subjects
|
||||
// filter has already excluded actor-context entries; if
|
||||
// the unfiltered op.subjects still carries an
|
||||
// actor-context subject, the missing co-binding is the
|
||||
// owner-eq witness.
|
||||
//
|
||||
// `is_actor_context_subject` is constrained: it only
|
||||
// accepts subjects whose base is in
|
||||
// `is_self_scoped_session_base` (`req.user`,
|
||||
// `ctx.session.user`, etc.) OR in the per-unit
|
||||
// `self_scoped_session_bases` set populated by the
|
||||
// typed-extractor pre-pass (TRPC alias matches,
|
||||
// NextAuth callback formals). Generic `user.id` /
|
||||
// `me.id` does not qualify, so unrelated co-occurrences
|
||||
// do not over-suppress.
|
||||
//
|
||||
// Trade-off: a privesc-via-`data` shape like
|
||||
// `db.update({where: {id: input.id}, data: {ownerId: ctx.user.id}})`
|
||||
// would also be suppressed because both subjects appear
|
||||
// at the call site without arg-position info. That
|
||||
// pattern is rare and would need its own rule. The
|
||||
// owner-eq common case removes ~70 cal.com FPs and
|
||||
// matches the canonical Express / TRPC scoping idiom.
|
||||
let has_actor_co_subject = op
|
||||
.subjects
|
||||
.iter()
|
||||
.any(|s| is_actor_context_subject(s, unit));
|
||||
if has_actor_co_subject {
|
||||
continue;
|
||||
}
|
||||
if !has_prior_subject_auth(unit, op, &relevant_subjects) {
|
||||
findings.push(AuthFinding {
|
||||
rule_id: rules.rule_id("missing_ownership_check"),
|
||||
|
|
@ -879,7 +920,7 @@ fn unit_is_auth_helper(unit: &AnalysisUnit) -> bool {
|
|||
.filter(|c| c.is_ascii_alphanumeric())
|
||||
.map(|c| c.to_ascii_lowercase())
|
||||
.collect();
|
||||
(normalized.starts_with("has")
|
||||
if (normalized.starts_with("has")
|
||||
|| normalized.starts_with("check")
|
||||
|| normalized.starts_with("require")
|
||||
|| normalized.starts_with("verify")
|
||||
|
|
@ -891,6 +932,62 @@ fn unit_is_auth_helper(unit: &AnalysisUnit) -> bool {
|
|||
|| normalized.contains("access")
|
||||
|| normalized.contains("permission")
|
||||
|| normalized.contains("authoriz"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
is_nextauth_callback_unit(unit)
|
||||
}
|
||||
|
||||
/// True when this unit IS, or LEXICALLY CONTAINS, a NextAuth
|
||||
/// (next-auth) callback definition.
|
||||
///
|
||||
/// Two shapes are recognised:
|
||||
/// * A unit whose name is `signIn` / `session` / `jwt` / `redirect` /
|
||||
/// `authorize` / `authorized` AND whose destructured params include
|
||||
/// a canonical NextAuth formal (`user` / `token` / `account` /
|
||||
/// `profile` / `credentials` / `session` / `trigger`). Matches the
|
||||
/// flat `export const authOptions = { callbacks: { ... } }` shape
|
||||
/// where the top-level unit-creation pass walks into the object
|
||||
/// literal and produces one unit per method.
|
||||
/// * A unit whose body contains an object literal with a
|
||||
/// `callbacks: { ... }` property naming at least one NextAuth
|
||||
/// callback (set by `body_returns_nextauth_options` at extract
|
||||
/// time). Matches the `export const getOptions = (...) =>
|
||||
/// ({ callbacks: { ... } })` shape where the inner callback
|
||||
/// methods do not become their own units — operations from their
|
||||
/// bodies get accumulated under the outer arrow's unit, so the
|
||||
/// outer unit's name (`getOptions`) is the only handle the
|
||||
/// suppressor can latch onto.
|
||||
///
|
||||
/// NextAuth callbacks ARE the authentication boundary; operations on
|
||||
/// `user.id` / `existingUser.id` inside them resolve the authenticated
|
||||
/// identity, they do not look up a tenant-scoped resource based on
|
||||
/// untrusted input.
|
||||
fn is_nextauth_callback_unit(unit: &AnalysisUnit) -> bool {
|
||||
if unit.is_nextauth_options_factory {
|
||||
return true;
|
||||
}
|
||||
let Some(name) = unit.name.as_deref() else {
|
||||
return false;
|
||||
};
|
||||
if !matches!(
|
||||
name,
|
||||
"signIn" | "session" | "jwt" | "redirect" | "authorize" | "authorized"
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
const SIGNAL_PARAMS: &[&str] = &[
|
||||
"user",
|
||||
"token",
|
||||
"account",
|
||||
"profile",
|
||||
"credentials",
|
||||
"session",
|
||||
"trigger",
|
||||
];
|
||||
unit.params
|
||||
.iter()
|
||||
.any(|p| SIGNAL_PARAMS.contains(&p.as_str()))
|
||||
}
|
||||
|
||||
fn is_delegated_read_with_actor_context(
|
||||
|
|
@ -1118,6 +1215,7 @@ mod tests {
|
|||
typed_bounded_vars: HashSet::new(),
|
||||
typed_bounded_dto_fields: HashMap::new(),
|
||||
self_scoped_session_bases: HashSet::new(),
|
||||
is_nextauth_options_factory: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -712,6 +712,8 @@ pub fn build_function_unit_with_meta(
|
|||
.cloned()
|
||||
.collect();
|
||||
|
||||
let is_nextauth_options_factory = body_returns_nextauth_options(node, bytes);
|
||||
|
||||
AnalysisUnit {
|
||||
kind,
|
||||
name,
|
||||
|
|
@ -734,9 +736,207 @@ pub fn build_function_unit_with_meta(
|
|||
typed_bounded_vars: preseeded_bounded,
|
||||
typed_bounded_dto_fields: std::collections::HashMap::new(),
|
||||
self_scoped_session_bases: state.self_scoped_session_bases,
|
||||
is_nextauth_options_factory,
|
||||
}
|
||||
}
|
||||
|
||||
/// True when the function body at `node` is a NextAuth authority
|
||||
/// surface. Recognises two shapes:
|
||||
///
|
||||
/// 1. An object literal with a `callbacks: { ... }` property whose
|
||||
/// nested entries name at least one canonical NextAuth callback
|
||||
/// (`signIn`, `session`, `jwt`, `redirect`, `authorize`,
|
||||
/// `authorized`). Matches the cal.com idiom
|
||||
/// `export const getOptions = (...) => ({ callbacks: { ... } })`.
|
||||
///
|
||||
/// 2. An object literal whose entries name at least one distinctive
|
||||
/// NextAuth Adapter method (`getUserByAccount`, `linkAccount`,
|
||||
/// `unlinkAccount`, `createVerificationToken`,
|
||||
/// `useVerificationToken`, `getSessionAndUser`) AND at least one
|
||||
/// other canonical Adapter method. Matches the cal.com idiom
|
||||
/// `function CalComAdapter(prisma): Adapter { return { ... } }`
|
||||
/// where the returned Adapter object holds the implementation.
|
||||
///
|
||||
/// In both shapes the inner method bodies are NOT enumerated as
|
||||
/// separate units (object method shorthands stay anonymous), so every
|
||||
/// identity-resolution operation from the inner methods accumulates
|
||||
/// onto the outer factory's unit. Without this flag the outer unit's
|
||||
/// name is `getOptions` / `CalComAdapter`, so `is_nextauth_callback_unit`
|
||||
/// cannot match by name and the missing-ownership rule fires on every
|
||||
/// identity lookup inside the surface.
|
||||
///
|
||||
/// JS/TS-only by construction (matches `object` / `pair` /
|
||||
/// `method_definition` / `shorthand_property_identifier` node kinds).
|
||||
/// Returns false on other languages.
|
||||
fn body_returns_nextauth_options(node: Node<'_>, bytes: &[u8]) -> bool {
|
||||
fn scan(node: Node<'_>, bytes: &[u8]) -> bool {
|
||||
if matches!(node.kind(), "object" | "object_expression")
|
||||
&& (object_has_nextauth_callbacks_property(node, bytes)
|
||||
|| object_is_nextauth_adapter(node, bytes))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
for child in named_children(node) {
|
||||
if scan(child, bytes) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
scan(node, bytes)
|
||||
}
|
||||
|
||||
fn object_has_nextauth_callbacks_property(node: Node<'_>, bytes: &[u8]) -> bool {
|
||||
for entry in named_children(node) {
|
||||
let Some((key_text, value_node)) = object_entry_key_value(entry, bytes) else {
|
||||
continue;
|
||||
};
|
||||
if key_text != "callbacks" {
|
||||
continue;
|
||||
}
|
||||
if matches!(value_node.kind(), "object" | "object_expression")
|
||||
&& object_contains_nextauth_callback_method(value_node, bytes)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn object_contains_nextauth_callback_method(node: Node<'_>, bytes: &[u8]) -> bool {
|
||||
for entry in named_children(node) {
|
||||
if entry.kind() == "method_definition" {
|
||||
if let Some(name_node) = entry.child_by_field_name("name") {
|
||||
let name = text(name_node, bytes);
|
||||
if is_nextauth_callback_name(&name) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if let Some((key_text, _value_node)) = object_entry_key_value(entry, bytes)
|
||||
&& is_nextauth_callback_name(&key_text)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn object_entry_key_value<'a>(entry: Node<'a>, bytes: &[u8]) -> Option<(String, Node<'a>)> {
|
||||
match entry.kind() {
|
||||
"pair" => {
|
||||
let key = entry.child_by_field_name("key")?;
|
||||
let value = entry.child_by_field_name("value")?;
|
||||
Some((object_key_text(key, bytes), value))
|
||||
}
|
||||
"method_definition" => {
|
||||
let name = entry.child_by_field_name("name")?;
|
||||
Some((text(name, bytes), entry))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn object_key_text(node: Node<'_>, bytes: &[u8]) -> String {
|
||||
match node.kind() {
|
||||
"property_identifier" | "identifier" | "shorthand_property_identifier" => text(node, bytes),
|
||||
"string" | "string_literal" => {
|
||||
let raw = text(node, bytes);
|
||||
raw.trim_matches(|c| c == '"' || c == '\'' || c == '`')
|
||||
.to_string()
|
||||
}
|
||||
"computed_property_name" => {
|
||||
if let Some(inner) = node.named_child(0) {
|
||||
object_key_text(inner, bytes)
|
||||
} else {
|
||||
String::new()
|
||||
}
|
||||
}
|
||||
_ => text(node, bytes),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_nextauth_callback_name(name: &str) -> bool {
|
||||
matches!(
|
||||
name,
|
||||
"signIn" | "session" | "jwt" | "redirect" | "authorize" | "authorized"
|
||||
)
|
||||
}
|
||||
|
||||
/// True when the object literal at `node` looks like a NextAuth
|
||||
/// Adapter implementation: at least one distinctive Adapter method
|
||||
/// name AND at least two canonical Adapter method names overall.
|
||||
/// The distinctive subset (`getUserByAccount`, `linkAccount`,
|
||||
/// `unlinkAccount`, `createVerificationToken`, `useVerificationToken`,
|
||||
/// `getSessionAndUser`) names operations that are unique to the
|
||||
/// NextAuth Adapter contract; the broader canonical set (createUser /
|
||||
/// getUser / getUserByEmail / updateUser / deleteUser / createSession /
|
||||
/// updateSession / deleteSession) overlaps with generic CRUD repos, so
|
||||
/// the distinctive-name witness gates the recognition.
|
||||
fn object_is_nextauth_adapter(node: Node<'_>, bytes: &[u8]) -> bool {
|
||||
let mut distinctive_seen = false;
|
||||
let mut total = 0_usize;
|
||||
for entry in named_children(node) {
|
||||
let Some(key_text) = adapter_object_entry_key(entry, bytes) else {
|
||||
continue;
|
||||
};
|
||||
if !is_nextauth_adapter_method_name(&key_text) {
|
||||
continue;
|
||||
}
|
||||
total += 1;
|
||||
if is_nextauth_adapter_distinctive_method_name(&key_text) {
|
||||
distinctive_seen = true;
|
||||
}
|
||||
}
|
||||
distinctive_seen && total >= 2
|
||||
}
|
||||
|
||||
fn adapter_object_entry_key(entry: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||||
match entry.kind() {
|
||||
"method_definition" => entry
|
||||
.child_by_field_name("name")
|
||||
.map(|n| object_key_text(n, bytes)),
|
||||
"pair" => entry
|
||||
.child_by_field_name("key")
|
||||
.map(|n| object_key_text(n, bytes)),
|
||||
"shorthand_property_identifier" => Some(text(entry, bytes)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_nextauth_adapter_method_name(name: &str) -> bool {
|
||||
matches!(
|
||||
name,
|
||||
"createUser"
|
||||
| "getUser"
|
||||
| "getUserByEmail"
|
||||
| "getUserByAccount"
|
||||
| "updateUser"
|
||||
| "deleteUser"
|
||||
| "linkAccount"
|
||||
| "unlinkAccount"
|
||||
| "createSession"
|
||||
| "getSessionAndUser"
|
||||
| "updateSession"
|
||||
| "deleteSession"
|
||||
| "createVerificationToken"
|
||||
| "useVerificationToken"
|
||||
)
|
||||
}
|
||||
|
||||
fn is_nextauth_adapter_distinctive_method_name(name: &str) -> bool {
|
||||
matches!(
|
||||
name,
|
||||
"getUserByAccount"
|
||||
| "linkAccount"
|
||||
| "unlinkAccount"
|
||||
| "createVerificationToken"
|
||||
| "useVerificationToken"
|
||||
| "getSessionAndUser"
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct UnitState {
|
||||
call_sites: Vec<CallSite>,
|
||||
|
|
@ -832,14 +1032,13 @@ fn collect_unit_state(
|
|||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||||
collect_call(node, bytes, rules, state)
|
||||
}
|
||||
"if_statement" | "elif_clause" | "while_statement" | "do_statement" | "if" | "unless"
|
||||
| "if_modifier" | "unless_modifier" | "while_modifier" | "until_modifier"
|
||||
| "while_expression" => {
|
||||
"while_statement" | "do_statement" | "while_modifier" | "until_modifier"
|
||||
| "while_expression" | "unless" | "unless_modifier" => {
|
||||
if let Some(condition) = node.child_by_field_name("condition") {
|
||||
collect_condition(condition, bytes, rules, state);
|
||||
}
|
||||
}
|
||||
"if_expression" => {
|
||||
"if_statement" | "elif_clause" | "if_expression" | "if" | "if_modifier" => {
|
||||
if let Some(condition) = node.child_by_field_name("condition") {
|
||||
collect_condition(condition, bytes, rules, state);
|
||||
}
|
||||
|
|
@ -868,6 +1067,12 @@ fn collect_unit_state(
|
|||
collect_self_actor_binding(node, bytes, rules, state);
|
||||
collect_self_actor_id_binding(node, bytes, state);
|
||||
collect_const_string_binding(node, bytes, state);
|
||||
// JS/TS row-fetch declarators (`const webhook = await
|
||||
// repo.findById(id)`) need row-population recognition so
|
||||
// the post-fetch ownership-equality detector can attribute
|
||||
// back to the row's let line. `collect_row_population`
|
||||
// accepts the `name` field used by `variable_declarator`.
|
||||
collect_row_population(node, bytes, state);
|
||||
}
|
||||
// Go `id := "id"` / Python `id = "id"` / Java `String id = "id";` /
|
||||
// Ruby `id = "id"`, language-specific binding nodes that the
|
||||
|
|
@ -1336,11 +1541,13 @@ fn collect_member_alias_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitSt
|
|||
/// flagged despite a textual auth check on the resulting row.
|
||||
fn collect_row_population(node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
|
||||
// Most languages expose `pattern`/`value` on let / const / var
|
||||
// declarations. Ruby `assignment` uses `left`/`right` instead, so
|
||||
// accept either. When both fields are missing, the node isn't an
|
||||
// RHS-bound binding and we skip.
|
||||
// declarations. Ruby `assignment` uses `left`/`right` instead.
|
||||
// JS/TS `variable_declarator` uses `name`/`value`. Accept any of
|
||||
// them; when none is present the node isn't an RHS-bound binding
|
||||
// and we skip.
|
||||
let Some(pattern) = node
|
||||
.child_by_field_name("pattern")
|
||||
.or_else(|| node.child_by_field_name("name"))
|
||||
.or_else(|| node.child_by_field_name("left"))
|
||||
else {
|
||||
return;
|
||||
|
|
@ -2784,8 +2991,8 @@ fn detect_ownership_equality_check(if_node: Node<'_>, bytes: &[u8], state: &mut
|
|||
let Some(operator) = binary_operator_text(condition, bytes) else {
|
||||
return;
|
||||
};
|
||||
let is_ne = matches!(operator.as_str(), "!=" | "ne");
|
||||
let is_eq = matches!(operator.as_str(), "==" | "eq");
|
||||
let is_ne = matches!(operator.as_str(), "!=" | "!==" | "ne");
|
||||
let is_eq = matches!(operator.as_str(), "==" | "===" | "eq");
|
||||
if !is_ne && !is_eq {
|
||||
return;
|
||||
}
|
||||
|
|
@ -2801,7 +3008,7 @@ fn detect_ownership_equality_check(if_node: Node<'_>, bytes: &[u8], state: &mut
|
|||
return;
|
||||
};
|
||||
|
||||
if !branch_has_early_exit(fail_branch) {
|
||||
if !branch_has_early_exit(fail_branch, bytes) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -2925,18 +3132,63 @@ fn resolve_else_block(alt: Node<'_>) -> Node<'_> {
|
|||
alt
|
||||
}
|
||||
|
||||
fn branch_has_early_exit(branch: Node<'_>) -> bool {
|
||||
named_children(branch).into_iter().any(node_is_early_exit)
|
||||
fn branch_has_early_exit(branch: Node<'_>, bytes: &[u8]) -> bool {
|
||||
named_children(branch)
|
||||
.into_iter()
|
||||
.any(|n| node_is_early_exit(n, bytes))
|
||||
}
|
||||
|
||||
fn node_is_early_exit(node: Node<'_>) -> bool {
|
||||
fn node_is_early_exit(node: Node<'_>, bytes: &[u8]) -> bool {
|
||||
match node.kind() {
|
||||
"return_expression" | "return_statement" => true,
|
||||
"expression_statement" => named_children(node).into_iter().any(node_is_early_exit),
|
||||
// Throwing aborts execution flow. Common in JS/TS / Java
|
||||
// (`throw new ForbiddenException()`), Python (`raise ...`),
|
||||
// Ruby (`raise ...`).
|
||||
"throw_statement" | "throw_expression" | "raise_statement" => true,
|
||||
// A call whose callee name is in the framework denial set
|
||||
// (`notFound()` / `redirect()` / `abort()` / `forbidden()` /
|
||||
// `unauthorized()` / etc.) terminates the request. These
|
||||
// helpers either throw under the hood (Next.js, Flask) or
|
||||
// exit the process (`process.exit`, `sys.exit`).
|
||||
"call_expression" | "call" | "method_invocation" => is_denial_call(node, bytes),
|
||||
"expression_statement" => named_children(node)
|
||||
.into_iter()
|
||||
.any(|n| node_is_early_exit(n, bytes)),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Recognise calls that act as request-terminating denial helpers.
|
||||
///
|
||||
/// The callee name is matched against a curated set of framework
|
||||
/// idioms. This is read in `node_is_early_exit` from inside the
|
||||
/// row-ownership-equality detector, where the ambient context already
|
||||
/// requires an `owner.field` vs. `self.id` binary comparison; the
|
||||
/// denial-call match is only the early-exit witness, not the auth
|
||||
/// signal itself.
|
||||
fn is_denial_call(call_node: Node<'_>, bytes: &[u8]) -> bool {
|
||||
let Some(callee_node) = call_node
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| call_node.child_by_field_name("name"))
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
let callee_text = text(callee_node, bytes);
|
||||
let trimmed = callee_text.trim();
|
||||
let leaf = trimmed.rsplit('.').next().unwrap_or(trimmed);
|
||||
let leaf = leaf.rsplit("::").next().unwrap_or(leaf);
|
||||
matches!(
|
||||
leaf,
|
||||
"notFound"
|
||||
| "redirect"
|
||||
| "permanentRedirect"
|
||||
| "unauthorized"
|
||||
| "forbidden"
|
||||
| "abort"
|
||||
| "halt"
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn is_owner_field_subject(subject: &ValueRef) -> bool {
|
||||
let raw = match subject.source_kind {
|
||||
ValueSourceKind::ArrayIndex => subject.base.as_deref().unwrap_or(&subject.name),
|
||||
|
|
@ -5419,4 +5671,220 @@ mod tests {
|
|||
));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trpc_options_destructure_param_seeds_self_scoped_session_base() {
|
||||
// Cal.com-shaped TRPC handler: parameter is a destructured
|
||||
// options alias whose `ctx` field's nested type literal
|
||||
// references `TrpcSessionUser`. `FileMeta::scan` adds
|
||||
// `GetOptions` to `trpc_alias_names` (body-text marker hit);
|
||||
// `collect_trpc_ctx_param` then fires on the
|
||||
// `required_parameter` and seeds `ctx.user` into the unit's
|
||||
// `self_scoped_session_bases`.
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(
|
||||
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
|
||||
))
|
||||
.unwrap();
|
||||
let src = br#"
|
||||
type TrpcSessionUser = { id: number };
|
||||
type GetOptions = {
|
||||
ctx: { user: NonNullable<TrpcSessionUser> };
|
||||
input: { id: number };
|
||||
};
|
||||
export const handleGet = async ({ ctx, input }: GetOptions) => {
|
||||
return prisma.booking.findFirst({ where: { id: input.id, userId: ctx.user.id } });
|
||||
};
|
||||
"#;
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let meta = super::FileMeta::scan(tree.root_node(), src);
|
||||
assert!(
|
||||
meta.trpc_alias_names.contains("GetOptions"),
|
||||
"trpc_alias_names missing GetOptions: {:?}",
|
||||
meta.trpc_alias_names
|
||||
);
|
||||
|
||||
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
|
||||
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
|
||||
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
|
||||
let unit = model
|
||||
.units
|
||||
.iter()
|
||||
.find(|u| u.name.as_deref() == Some("handleGet"))
|
||||
.expect("handleGet unit");
|
||||
assert!(
|
||||
unit.self_scoped_session_bases.contains("ctx.user"),
|
||||
"self_scoped_session_bases missing ctx.user: {:?}",
|
||||
unit.self_scoped_session_bases
|
||||
);
|
||||
}
|
||||
|
||||
/// Pin the JS/TS post-fetch ownership-equality recogniser added in
|
||||
/// session 0011. The `if_statement` arm of `collect_unit_state`
|
||||
/// must dispatch to `detect_ownership_equality_check` (previously
|
||||
/// only `if_expression` did), the strict `!==` operator must be
|
||||
/// recognised as inequality, the framework denial helper
|
||||
/// `notFound()` must count as an early-exit witness, and the JS/TS
|
||||
/// `variable_declarator` arm must populate `row_population_data`
|
||||
/// so the synthetic `Ownership` AuthCheck attributes back to the
|
||||
/// row's let line.
|
||||
#[test]
|
||||
fn detect_post_fetch_ownership_jsts_with_strict_neq_and_denial_call() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(
|
||||
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
|
||||
))
|
||||
.unwrap();
|
||||
let src = br#"
|
||||
declare class Repo { findById(id: string): Promise<{ userId: number }>; }
|
||||
declare function getServerSession(): Promise<{ user?: { id: number } } | null>;
|
||||
declare function notFound(): never;
|
||||
export async function handleGet({ id }: { id: string }) {
|
||||
const session = await getServerSession();
|
||||
if (!session?.user?.id) return null;
|
||||
const repo: Repo = new Repo();
|
||||
const webhook = await repo.findById(id);
|
||||
if (webhook.userId !== session.user.id) {
|
||||
notFound();
|
||||
}
|
||||
return webhook;
|
||||
}
|
||||
"#;
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
|
||||
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
|
||||
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
|
||||
let unit = model
|
||||
.units
|
||||
.iter()
|
||||
.find(|u| u.name.as_deref() == Some("handleGet"))
|
||||
.expect("handleGet unit");
|
||||
|
||||
let webhook_pop = unit
|
||||
.row_population_data
|
||||
.get("webhook")
|
||||
.expect("collect_row_population must populate `webhook` from variable_declarator");
|
||||
// The `let webhook = await repo.findById(id)` line should
|
||||
// anchor at the call site, not the let line. In this fixture
|
||||
// both are on the same line so the back-dating is invisible
|
||||
// here, the assertion is that the entry exists.
|
||||
assert!(webhook_pop.0 > 0);
|
||||
|
||||
let owner_check = unit
|
||||
.auth_checks
|
||||
.iter()
|
||||
.find(|c| matches!(c.kind, super::AuthCheckKind::Ownership))
|
||||
.expect("ownership-equality detector must emit an Ownership AuthCheck");
|
||||
let owner_subject = owner_check
|
||||
.subjects
|
||||
.iter()
|
||||
.find(|s| s.field.as_deref() == Some("userId"))
|
||||
.expect("Ownership AuthCheck must carry the owner field subject");
|
||||
assert_eq!(
|
||||
owner_subject.base.as_deref(),
|
||||
Some("webhook"),
|
||||
"owner subject base must be the row var: {:?}",
|
||||
owner_subject
|
||||
);
|
||||
}
|
||||
|
||||
/// Pin the NextAuth Adapter factory recogniser added in session
|
||||
/// 0030. `body_returns_nextauth_options` must flip on for the
|
||||
/// cal.com `function CalComAdapter(client): Adapter { return {
|
||||
/// createUser, getUser, getUserByAccount, ... } }` shape so that
|
||||
/// `is_nextauth_callback_unit` suppresses the missing-ownership
|
||||
/// rule across the inner Adapter methods (their operations
|
||||
/// accumulate onto the outer factory's unit).
|
||||
#[test]
|
||||
fn nextauth_adapter_factory_flags_outer_unit() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(
|
||||
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
|
||||
))
|
||||
.unwrap();
|
||||
let src = br#"
|
||||
declare const prismaClient: any;
|
||||
export default function CalComAdapter(client: any) {
|
||||
return {
|
||||
createUser: async (data: { email: string }) => {
|
||||
const user = await prismaClient.user.create({ data });
|
||||
return user;
|
||||
},
|
||||
getUser: async (id: string) => {
|
||||
const user = await prismaClient.user.findUnique({ where: { id } });
|
||||
return user;
|
||||
},
|
||||
async getUserByAccount(providerAccountId: { provider: string; providerAccountId: string }) {
|
||||
const account = await prismaClient.account.findUnique({
|
||||
where: { provider_providerAccountId: providerAccountId },
|
||||
select: { user: true },
|
||||
});
|
||||
return account?.user ?? null;
|
||||
},
|
||||
createVerificationToken: async (data: any) => prismaClient.verificationToken.create({ data }),
|
||||
useVerificationToken: async (identifier: any) => prismaClient.verificationToken.delete({ where: identifier }),
|
||||
linkAccount: async (account: any) => prismaClient.account.create({ data: account }),
|
||||
unlinkAccount: async (providerAccountId: any) => prismaClient.account.delete({ where: providerAccountId }),
|
||||
};
|
||||
}
|
||||
"#;
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
|
||||
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
|
||||
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
|
||||
let unit = model
|
||||
.units
|
||||
.iter()
|
||||
.find(|u| u.name.as_deref() == Some("CalComAdapter"))
|
||||
.expect("CalComAdapter unit");
|
||||
assert!(
|
||||
unit.is_nextauth_options_factory,
|
||||
"Adapter factory must set is_nextauth_options_factory: \
|
||||
{:?}",
|
||||
unit.name
|
||||
);
|
||||
}
|
||||
|
||||
/// Negative: a generic CRUD repo with `createUser` / `getUser` /
|
||||
/// `updateUser` / `deleteUser` (no Adapter-distinctive method
|
||||
/// names) must NOT be flagged as a NextAuth Adapter. Without the
|
||||
/// distinctive-name gate any plain user repo would suppress
|
||||
/// missing-ownership findings.
|
||||
#[test]
|
||||
fn nextauth_adapter_recogniser_rejects_generic_crud_repo() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(
|
||||
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
|
||||
))
|
||||
.unwrap();
|
||||
let src = br#"
|
||||
declare const db: any;
|
||||
export function makeUserRepo() {
|
||||
return {
|
||||
createUser: async (data: any) => db.user.create({ data }),
|
||||
getUser: async (id: string) => db.user.findUnique({ where: { id } }),
|
||||
updateUser: async (id: string, data: any) => db.user.update({ where: { id }, data }),
|
||||
deleteUser: async (id: string) => db.user.delete({ where: { id } }),
|
||||
};
|
||||
}
|
||||
"#;
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
|
||||
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
|
||||
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
|
||||
let unit = model
|
||||
.units
|
||||
.iter()
|
||||
.find(|u| u.name.as_deref() == Some("makeUserRepo"))
|
||||
.expect("makeUserRepo unit");
|
||||
assert!(
|
||||
!unit.is_nextauth_options_factory,
|
||||
"generic CRUD repo must NOT be flagged as Adapter: {:?}",
|
||||
unit.name
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1090,6 +1090,7 @@ mod tests {
|
|||
typed_bounded_vars: HashSet::new(),
|
||||
typed_bounded_dto_fields: HashMap::new(),
|
||||
self_scoped_session_bases: HashSet::new(),
|
||||
is_nextauth_options_factory: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1205,6 +1206,7 @@ mod tests {
|
|||
typed_bounded_vars: HashSet::new(),
|
||||
typed_bounded_dto_fields: HashMap::new(),
|
||||
self_scoped_session_bases: HashSet::new(),
|
||||
is_nextauth_options_factory: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -282,6 +282,23 @@ pub struct AnalysisUnit {
|
|||
/// destructures route through a base chain, not a top-level
|
||||
/// binding.
|
||||
pub self_scoped_session_bases: HashSet<String>,
|
||||
/// True when this JS/TS unit is the body of a NextAuth options
|
||||
/// factory: its function body contains an object literal with a
|
||||
/// `callbacks: { ... }` property whose nested entries name at
|
||||
/// least one NextAuth canonical callback (`signIn` / `session` /
|
||||
/// `jwt` / `redirect` / `authorize` / `authorized`). Set by
|
||||
/// `build_function_unit_with_meta` when the file structures the
|
||||
/// options as `export const X = (...) => ({ callbacks: { ... } })`
|
||||
/// (cal.com's `getOptions` shape) rather than the flat
|
||||
/// `export const authOptions = { callbacks: { ... } }` shape.
|
||||
/// Operations inside the inner callback bodies still get
|
||||
/// accumulated under the outer factory unit (the unit-creation
|
||||
/// pass does not descend into object-literal method shorthands),
|
||||
/// so the outer unit is the only place the auth analyser can
|
||||
/// recognise the identity-resolution context. Consulted by
|
||||
/// `is_nextauth_callback_unit` so the missing-ownership check
|
||||
/// suppresses operations inside the factory.
|
||||
pub is_nextauth_options_factory: bool,
|
||||
}
|
||||
|
||||
/// Per-function summary of which positional parameters are
|
||||
|
|
|
|||
|
|
@ -521,10 +521,21 @@ pub(super) fn build_switch<'a>(
|
|||
) -> Vec<NodeIndex> {
|
||||
// Locate the case container. Most grammars expose it as field "body"
|
||||
// (JS/TS, Java, C, C++); Go puts cases as direct children of the switch.
|
||||
//
|
||||
// Per-language gotcha: Go's `expression_case` / `default_case` /
|
||||
// `type_case` / `communication_case` map to `Kind::Block` (so the case
|
||||
// body is iterated by the Block handler), so a naive "first Block
|
||||
// child" fallback latches onto the FIRST case as the container, then
|
||||
// walks the case's interior looking for case-like children, finds none,
|
||||
// and falls through to the empty-cases early return (CFG dead-end:
|
||||
// dispatch If has no False edge, every post-switch statement becomes
|
||||
// unreachable). Skip case-kind nodes when picking the container so
|
||||
// Go's flat "cases-as-direct-children" shape uses `ast` itself.
|
||||
let body = ast.child_by_field_name("body").or_else(|| {
|
||||
let mut c = ast.walk();
|
||||
ast.children(&mut c)
|
||||
.find(|n| matches!(lookup(lang, n.kind()), Kind::Block))
|
||||
ast.children(&mut c).find(|n| {
|
||||
matches!(lookup(lang, n.kind()), Kind::Block) && !is_switch_case_kind(n.kind())
|
||||
})
|
||||
});
|
||||
let container = body.unwrap_or(ast);
|
||||
|
||||
|
|
|
|||
|
|
@ -1202,6 +1202,8 @@ fn clone_preserves_all_sub_structs() {
|
|||
defines: Some("r".into()),
|
||||
uses: vec!["a".into(), "b".into()],
|
||||
extra_defines: vec!["c".into()],
|
||||
array_pattern_indices: smallvec::SmallVec::new(),
|
||||
rhs_array_elements: smallvec::SmallVec::new(),
|
||||
},
|
||||
ast: AstMeta {
|
||||
span: (10, 100),
|
||||
|
|
@ -1501,6 +1503,105 @@ fn rust_println_macro_named_arg_lifted() {
|
|||
assert!(found, "no println! macro_invocation node found");
|
||||
}
|
||||
|
||||
/// `format!(URL_FMT, path)` where `URL_FMT` resolves to a top-level
|
||||
/// `const &str` literal must seed a `string_prefix` on the let-binding
|
||||
/// node so `is_string_safe_for_ssrf` can lock the host the same way
|
||||
/// `format!("https://api/{}", path)` does. The bridge fires only when
|
||||
/// the first non-string token in the macro is an identifier whose
|
||||
/// matching `const_item` has a string-literal value.
|
||||
#[test]
|
||||
fn rust_format_macro_const_first_arg_seeds_string_prefix() {
|
||||
let src = b"const URL_FMT: &str = \"https://api.example.com/users/{}\";\n\
|
||||
fn f(path: String) { let u = format!(URL_FMT, path); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
let mut prefix: Option<String> = None;
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("u")
|
||||
&& let Some(p) = info.string_prefix.as_deref()
|
||||
{
|
||||
prefix = Some(p.to_string());
|
||||
}
|
||||
}
|
||||
assert_eq!(
|
||||
prefix.as_deref(),
|
||||
Some("https://api.example.com/users/"),
|
||||
"expected URL_FMT const to bridge into the format!() string_prefix",
|
||||
);
|
||||
}
|
||||
|
||||
/// Counter-test: when the named const has no string-literal initializer
|
||||
/// (e.g. `const X: usize = 4;`), the bridge must not fabricate a
|
||||
/// prefix from a non-string value.
|
||||
#[test]
|
||||
fn rust_format_macro_const_first_arg_non_string_skipped() {
|
||||
let src = b"const N: usize = 4;\n\
|
||||
fn f(path: String) { let u = format!(N, path); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("u") {
|
||||
assert!(
|
||||
info.string_prefix.is_none(),
|
||||
"non-string const must not seed a prefix; got {:?}",
|
||||
info.string_prefix
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `static NAME: &str = "...";` declarations participate alongside
|
||||
/// `const_item`: both shapes carry a `name` field and a string-literal
|
||||
/// `value` so the bridge resolves either form identically.
|
||||
#[test]
|
||||
fn rust_format_macro_static_first_arg_seeds_string_prefix() {
|
||||
let src = b"static API_BASE: &str = \"https://api.example.com/users/{}\";\n\
|
||||
fn f(path: String) { let u = format!(API_BASE, path); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
let mut prefix: Option<String> = None;
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("u")
|
||||
&& let Some(p) = info.string_prefix.as_deref()
|
||||
{
|
||||
prefix = Some(p.to_string());
|
||||
}
|
||||
}
|
||||
assert_eq!(
|
||||
prefix.as_deref(),
|
||||
Some("https://api.example.com/users/"),
|
||||
"expected static API_BASE to bridge into the format!() string_prefix",
|
||||
);
|
||||
}
|
||||
|
||||
/// A const declared inside a function body must not bridge: only
|
||||
/// file-level `const_item` declarations participate to keep the
|
||||
/// lookup deterministic. (The macro's first arg can shadow a
|
||||
/// file-level const with an inner-fn const, but inner consts are
|
||||
/// off-scope for the AST-time prefix bridge.)
|
||||
#[test]
|
||||
fn rust_format_macro_inner_const_not_bridged() {
|
||||
let src = b"fn f(path: String) {\n\
|
||||
const URL_FMT: &str = \"https://api/{}\";\n\
|
||||
let u = format!(URL_FMT, path);\n\
|
||||
}";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("u") {
|
||||
assert!(
|
||||
info.string_prefix.is_none(),
|
||||
"inner-fn const must not bridge; got {:?}",
|
||||
info.string_prefix
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_no_import_bindings() {
|
||||
let src = b"package main\nimport alias \"fmt\"\n";
|
||||
|
|
@ -2354,6 +2455,29 @@ fn py_subscript_write_lowers_to_index_set_call() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_selector_expression_call_sets_receiver() {
|
||||
// Regression for Phase 15 deferred GORM tuple-return case.
|
||||
// Go's `userDb.Raw(sql)` parses as `call_expression` whose `function`
|
||||
// field is a `selector_expression` (operand=userDb, field=Raw).
|
||||
// The CFG-side `Kind::CallFn` arm must extract `userDb` as the
|
||||
// receiver so type-qualified resolution can rewrite `userDb.Raw` →
|
||||
// `GormDb.Raw` once `userDb`'s SSA value is tagged via
|
||||
// `constructor_type(Lang::Go, "gorm.Open")`. Pre-fix the arm only
|
||||
// recognised JS/TS `member_expression`, Python `attribute`, and Rust
|
||||
// `field_expression`; Go fell through to receiver=None.
|
||||
let src = br#"package main
|
||||
func f(userDb int) {
|
||||
userDb.Raw("SELECT 1")
|
||||
}
|
||||
"#;
|
||||
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "go", ts_lang);
|
||||
let node =
|
||||
find_node_with_callee(&cfg, "userDb.Raw").expect("go: userDb.Raw node should be present");
|
||||
assert_eq!(node.call.receiver.as_deref(), Some("userDb"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_index_expr_read_lowers_to_index_get_call() {
|
||||
with_pointer_on(|| {
|
||||
|
|
@ -3217,3 +3341,620 @@ fn js_ternary_branch_subscript_source_classified() {
|
|||
"expected ternary subscript branch defining `x` to carry a Source label"
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: Go's `switch` with no `default` arm and an only-case body
|
||||
/// that returns must keep post-switch statements reachable from entry.
|
||||
///
|
||||
/// `expression_case` / `default_case` / `type_case` / `communication_case`
|
||||
/// all map to `Kind::Block` so the case body is iterated by the Block
|
||||
/// handler, but `build_switch`'s container fallback ("first Block child")
|
||||
/// would latch onto the FIRST case as the container. Walking the case's
|
||||
/// interior for case-like children finds nothing, the empty-cases early
|
||||
/// return fires, and the dispatch If has no False edge: every post-switch
|
||||
/// statement becomes unreachable, lighting up `cfg-unreachable-sanitizer`
|
||||
/// on real code (gin's `binding/form_mapping.go::setTimeField`, line 469
|
||||
/// `if isUTC, _ := strconv.ParseBool(...); isUTC` after a no-default
|
||||
/// `switch tf := strings.ToLower(timeFormat); tf` on the unix epoch
|
||||
/// formats).
|
||||
#[test]
|
||||
fn go_switch_no_default_keeps_post_switch_reachable() {
|
||||
use petgraph::visit::Bfs;
|
||||
use std::collections::HashSet;
|
||||
let src = br#"package p
|
||||
func f(x string) bool {
|
||||
switch tf := x; tf {
|
||||
case "unix":
|
||||
return false
|
||||
}
|
||||
after()
|
||||
return true
|
||||
}
|
||||
"#;
|
||||
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
|
||||
let (cfg, entry) = parse_and_build(src, "go", ts_lang);
|
||||
|
||||
let mut reachable: HashSet<NodeIndex> = HashSet::new();
|
||||
let mut bfs = Bfs::new(&cfg, entry);
|
||||
while let Some(n) = bfs.next(&cfg) {
|
||||
reachable.insert(n);
|
||||
}
|
||||
|
||||
let after = cfg
|
||||
.node_indices()
|
||||
.find(|&n| cfg[n].call.callee.as_deref() == Some("after"))
|
||||
.expect("expected after() Call node");
|
||||
assert!(
|
||||
reachable.contains(&after),
|
||||
"post-switch `after()` must be reachable from entry; got reachable={:?}",
|
||||
reachable
|
||||
);
|
||||
}
|
||||
|
||||
/// `qs = User.objects` at module/function level lowers as a Python
|
||||
/// `expression_statement` wrapping an `assignment`. The CFG-level
|
||||
/// `member_field` detector must unwrap the wrapper and pick up
|
||||
/// `Some("objects")` from the inner RHS so the type-fact pass can tag
|
||||
/// the bound value as `DjangoQuerySet`.
|
||||
#[test]
|
||||
fn python_member_field_assignment_detected_for_bare_objects() {
|
||||
let src = b"def view(req):\n qs = User.objects\n";
|
||||
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "python", ts_lang);
|
||||
let detected: Vec<Option<String>> = cfg
|
||||
.node_indices()
|
||||
.filter_map(|n| {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("qs") {
|
||||
Some(info.member_field.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
detected.iter().any(|m| m.as_deref() == Some("objects")),
|
||||
"expected at least one `qs = ...` CFG node with member_field=Some(\"objects\"); got {:?}",
|
||||
detected
|
||||
);
|
||||
}
|
||||
|
||||
/// Negative shape: `qs = User.something_else` must NOT set
|
||||
/// `member_field == Some("objects")`. Guards against the unwrap
|
||||
/// accidentally picking up the wrong field name.
|
||||
#[test]
|
||||
fn python_member_field_assignment_non_objects_does_not_match() {
|
||||
let src = b"def view(req):\n qs = User.profile\n";
|
||||
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "python", ts_lang);
|
||||
let detected: Vec<Option<String>> = cfg
|
||||
.node_indices()
|
||||
.filter_map(|n| {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("qs") {
|
||||
Some(info.member_field.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
detected.iter().any(|m| m.as_deref() == Some("profile")),
|
||||
"expected `qs = User.profile` to detect member_field=Some(\"profile\"); got {:?}",
|
||||
detected
|
||||
);
|
||||
assert!(
|
||||
detected.iter().all(|m| m.as_deref() != Some("objects")),
|
||||
"must not falsely tag non-`objects` field; got {:?}",
|
||||
detected
|
||||
);
|
||||
}
|
||||
|
||||
/// Phase 15 chained-shape closure: a Java local of the form
|
||||
/// `Session sess = sf.openSession();` registers `(fn_start, "sess")`
|
||||
/// → `TypeKind::HibernateSession` in the per-file local-receiver-types
|
||||
/// map, so `find_classifiable_inner_call` can rewrite the chained
|
||||
/// inner `sess.createNativeQuery(...)` to
|
||||
/// `HibernateSession.createNativeQuery` when the legacy literal-
|
||||
/// receiver classify misses.
|
||||
#[test]
|
||||
fn java_hibernate_session_open_registers_local_receiver_type() {
|
||||
let src = br#"
|
||||
class Foo {
|
||||
void bar(SessionFactory sf, String sql) {
|
||||
Session sess = sf.openSession();
|
||||
sess.createNativeQuery(sql).getResultList();
|
||||
}
|
||||
}
|
||||
"#;
|
||||
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
|
||||
let _ = parse_to_file_cfg(src, "java", ts_lang);
|
||||
// The TLS map is cleared at the end of `build_cfg`, but the
|
||||
// public lookup helper consults it during construction. Re-run
|
||||
// population manually for the assertion.
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_java::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
super::populate_local_receiver_types(&tree, "java", src);
|
||||
// Walk to find the function body's start_byte.
|
||||
fn find_method_start(node: tree_sitter::Node<'_>) -> Option<usize> {
|
||||
if node.kind() == "method_declaration" {
|
||||
return Some(node.start_byte());
|
||||
}
|
||||
let mut c = node.walk();
|
||||
for child in node.children(&mut c) {
|
||||
if let Some(s) = find_method_start(child) {
|
||||
return Some(s);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
let fn_start = find_method_start(tree.root_node()).expect("method_declaration in fixture");
|
||||
let got = super::lookup_local_receiver_type(fn_start, "sess");
|
||||
assert_eq!(
|
||||
got,
|
||||
Some(crate::ssa::type_facts::TypeKind::HibernateSession),
|
||||
"local `Session sess = sf.openSession()` should bind to HibernateSession"
|
||||
);
|
||||
// Cleanup so the TLS state doesn't leak into other tests.
|
||||
super::LOCAL_RECEIVER_TYPES.with(|cell| cell.borrow_mut().clear());
|
||||
}
|
||||
|
||||
/// Same Java per-file map: a local whose RHS is unrelated (no
|
||||
/// `constructor_type` match) must NOT register. Confirms the
|
||||
/// recogniser is anchored on `constructor_type`'s callee classifier
|
||||
/// rather than the declared receiver type, so a generic
|
||||
/// `Session foo = computeFoo()` doesn't bleed an unrelated method
|
||||
/// into the type-qualified pool.
|
||||
#[test]
|
||||
fn java_unrecognised_rhs_does_not_register_local_receiver_type() {
|
||||
let src = br#"
|
||||
class Foo {
|
||||
void bar() {
|
||||
Session sess = computeSomethingUnrelated();
|
||||
sess.doSomething();
|
||||
}
|
||||
}
|
||||
"#;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_java::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
super::populate_local_receiver_types(&tree, "java", src);
|
||||
fn find_method_start(node: tree_sitter::Node<'_>) -> Option<usize> {
|
||||
if node.kind() == "method_declaration" {
|
||||
return Some(node.start_byte());
|
||||
}
|
||||
let mut c = node.walk();
|
||||
for child in node.children(&mut c) {
|
||||
if let Some(s) = find_method_start(child) {
|
||||
return Some(s);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
let fn_start = find_method_start(tree.root_node()).expect("method_declaration in fixture");
|
||||
let got = super::lookup_local_receiver_type(fn_start, "sess");
|
||||
assert_eq!(
|
||||
got, None,
|
||||
"unrecognised RHS `computeSomethingUnrelated()` must not register a receiver-type"
|
||||
);
|
||||
super::LOCAL_RECEIVER_TYPES.with(|cell| cell.borrow_mut().clear());
|
||||
}
|
||||
|
||||
/// `collect_array_pattern_bindings_indexed` walks JS/TS `array_pattern`
|
||||
/// children in source order and records `(name, position)` for each
|
||||
/// simple-identifier binding. Skip slots (commas with no binding
|
||||
/// between) advance the position counter without emitting a binding,
|
||||
/// so `const [, b]` produces `[("b", 1)]` and `const [a, ,]` produces
|
||||
/// `[("a", 0)]`. Complex sub-patterns (`assignment_pattern`,
|
||||
/// `rest_pattern`, nested `array_pattern`) cause the helper to return
|
||||
/// an empty vec so the lowering rewrite falls back to scalar union.
|
||||
#[test]
|
||||
fn array_pattern_indexed_bindings_recognise_skip_slots() {
|
||||
use super::helpers::collect_array_pattern_bindings_indexed;
|
||||
fn first_array_pattern<'t>(n: tree_sitter::Node<'t>) -> Option<tree_sitter::Node<'t>> {
|
||||
if n.kind() == "array_pattern" {
|
||||
return Some(n);
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for child in n.children(&mut c) {
|
||||
if let Some(found) = first_array_pattern(child) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
fn parse_first(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_javascript::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.to_vec())
|
||||
}
|
||||
fn run_case(src: &[u8]) -> Vec<(String, usize)> {
|
||||
let (tree, bytes) = parse_first(src);
|
||||
let pat = first_array_pattern(tree.root_node()).expect("array_pattern in fixture");
|
||||
collect_array_pattern_bindings_indexed(pat, &bytes)
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
assert_eq!(
|
||||
run_case(b"const [a, b] = x;"),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(run_case(b"const [, b] = x;"), vec![("b".into(), 1)]);
|
||||
assert_eq!(run_case(b"const [a, ,] = x;"), vec![("a".into(), 0)]);
|
||||
assert_eq!(
|
||||
run_case(b"const [a, , c] = x;"),
|
||||
vec![("a".into(), 0), ("c".into(), 2)],
|
||||
);
|
||||
// Rest patterns bail to empty so callers fall back to scalar union.
|
||||
assert!(run_case(b"const [a, ...rest] = x;").is_empty());
|
||||
// Default value patterns also bail.
|
||||
assert!(run_case(b"const [a = 1, b] = x;").is_empty());
|
||||
// Nested array patterns bail.
|
||||
assert!(run_case(b"const [[a, b], c] = x;").is_empty());
|
||||
}
|
||||
|
||||
/// Rust `tuple_pattern` shares the helper. The `_` wildcard
|
||||
/// (`_pattern` node) advances the position counter without binding,
|
||||
/// mirroring JS skip-slot semantics. Other complex sub-patterns
|
||||
/// (tuple-struct, parenthesized) bail to empty.
|
||||
#[test]
|
||||
fn tuple_pattern_indexed_bindings_recognise_rust_wildcards() {
|
||||
use super::helpers::collect_array_pattern_bindings_indexed;
|
||||
fn first_tuple_pattern<'t>(n: tree_sitter::Node<'t>) -> Option<tree_sitter::Node<'t>> {
|
||||
if n.kind() == "tuple_pattern" {
|
||||
return Some(n);
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for child in n.children(&mut c) {
|
||||
if let Some(found) = first_tuple_pattern(child) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
fn parse_first_rust(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.to_vec())
|
||||
}
|
||||
fn run_case(src: &[u8]) -> Vec<(String, usize)> {
|
||||
let (tree, bytes) = parse_first_rust(src);
|
||||
let pat = first_tuple_pattern(tree.root_node()).expect("tuple_pattern in fixture");
|
||||
collect_array_pattern_bindings_indexed(pat, &bytes)
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
assert_eq!(
|
||||
run_case(b"fn f() { let (a, b) = (1, 2); }"),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"fn f() { let (_, b) = (1, 2); }"),
|
||||
vec![("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"fn f() { let (a, _) = (1, 2); }"),
|
||||
vec![("a".into(), 0)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"fn f() { let (a, _, c) = (1, 2, 3); }"),
|
||||
vec![("a".into(), 0), ("c".into(), 2)],
|
||||
);
|
||||
}
|
||||
|
||||
/// Python `pattern_list` (bare `a, b = ...`) and `tuple_pattern`
|
||||
/// (parenthesised `(a, b) = ...`) share the helper. Python's `_` is
|
||||
/// a normal identifier binding (not a wildcard), so every identifier
|
||||
/// child emits a `(name, position)` entry — `_` lands at its source
|
||||
/// position alongside any other names. `list_splat_pattern`
|
||||
/// (`a, *rest`) bails to empty so callers fall back to scalar union.
|
||||
#[test]
|
||||
fn pattern_list_indexed_bindings_recognise_python_destructure() {
|
||||
use super::helpers::collect_array_pattern_bindings_indexed;
|
||||
fn first_pattern<'t>(
|
||||
n: tree_sitter::Node<'t>,
|
||||
kinds: &[&str],
|
||||
) -> Option<tree_sitter::Node<'t>> {
|
||||
if kinds.contains(&n.kind()) {
|
||||
return Some(n);
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for child in n.children(&mut c) {
|
||||
if let Some(found) = first_pattern(child, kinds) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
fn parse_first_python(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_python::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.to_vec())
|
||||
}
|
||||
fn run_case(src: &[u8], kinds: &[&str]) -> Vec<(String, usize)> {
|
||||
let (tree, bytes) = parse_first_python(src);
|
||||
let pat = first_pattern(tree.root_node(), kinds)
|
||||
.unwrap_or_else(|| panic!("no {kinds:?} in fixture"));
|
||||
collect_array_pattern_bindings_indexed(pat, &bytes)
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
// Bare comma-list `a, b = ...` is `pattern_list`.
|
||||
assert_eq!(
|
||||
run_case(b"a, b = (1, 2)\n", &["pattern_list"]),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
// Three-binding bare comma list.
|
||||
assert_eq!(
|
||||
run_case(b"a, b, c = (1, 2, 3)\n", &["pattern_list"]),
|
||||
vec![("a".into(), 0), ("b".into(), 1), ("c".into(), 2)],
|
||||
);
|
||||
// Underscore is a regular identifier binding in Python.
|
||||
assert_eq!(
|
||||
run_case(b"_, b = (1, 2)\n", &["pattern_list"]),
|
||||
vec![("_".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"a, _ = (1, 2)\n", &["pattern_list"]),
|
||||
vec![("a".into(), 0), ("_".into(), 1)],
|
||||
);
|
||||
// Parenthesised destructure surfaces as `tuple_pattern`.
|
||||
assert_eq!(
|
||||
run_case(b"(a, b) = (1, 2)\n", &["tuple_pattern"]),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
// Splat / rest bindings bail because positional mapping breaks.
|
||||
assert!(run_case(b"a, *rest = (1, 2, 3)\n", &["pattern_list"]).is_empty());
|
||||
// Nested destructure bails — recogniser doesn't recurse into
|
||||
// sub-patterns to preserve flat-binding-only semantics.
|
||||
assert!(run_case(b"(a, b), c = ((1, 2), 3)\n", &["pattern_list"]).is_empty());
|
||||
}
|
||||
|
||||
/// Ruby `left_assignment_list` is the LHS node tree-sitter-ruby produces
|
||||
/// for `a, b = ...`. The helper walks comma-separated identifier
|
||||
/// children in source order, emitting `(name, position)` for each.
|
||||
/// Ruby `_` is a normal identifier (matches Python convention).
|
||||
/// `rest_assignment` (`*rest`) and `destructured_left_assignment`
|
||||
/// (parenthesised nested destructure) hit the bail branch so callers
|
||||
/// fall back to scalar union for those advanced shapes.
|
||||
#[test]
|
||||
fn left_assignment_list_indexed_bindings_recognise_ruby_destructure() {
|
||||
use super::helpers::collect_array_pattern_bindings_indexed;
|
||||
fn first_left_assignment_list<'t>(n: tree_sitter::Node<'t>) -> Option<tree_sitter::Node<'t>> {
|
||||
if n.kind() == "left_assignment_list" {
|
||||
return Some(n);
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for child in n.children(&mut c) {
|
||||
if let Some(found) = first_left_assignment_list(child) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
fn parse_first_ruby(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_ruby::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.to_vec())
|
||||
}
|
||||
fn run_case(src: &[u8]) -> Vec<(String, usize)> {
|
||||
let (tree, bytes) = parse_first_ruby(src);
|
||||
let pat =
|
||||
first_left_assignment_list(tree.root_node()).expect("left_assignment_list in fixture");
|
||||
collect_array_pattern_bindings_indexed(pat, &bytes)
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
assert_eq!(
|
||||
run_case(b"a, b = [x, y]\n"),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"a, b, c = [x, y, z]\n"),
|
||||
vec![("a".into(), 0), ("b".into(), 1), ("c".into(), 2)],
|
||||
);
|
||||
// Underscore is a regular identifier binding in Ruby (idiomatic
|
||||
// "unused" marker, but still resolvable in scope).
|
||||
assert_eq!(
|
||||
run_case(b"_, b = [x, y]\n"),
|
||||
vec![("_".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"a, _ = [x, y]\n"),
|
||||
vec![("a".into(), 0), ("_".into(), 1)],
|
||||
);
|
||||
// Call return value, helper walks LHS regardless of RHS shape.
|
||||
assert_eq!(
|
||||
run_case(b"a, b = func()\n"),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
// Splat tail bails because rest_assignment is a complex sub-pattern.
|
||||
assert!(run_case(b"a, *rest = [x, y, z]\n").is_empty());
|
||||
// Parenthesised nested destructure bails because
|
||||
// destructured_left_assignment isn't in the simple-identifier
|
||||
// whitelist.
|
||||
assert!(run_case(b"(a, b) = [x, y]\n").is_empty());
|
||||
}
|
||||
|
||||
/// Helper for `src/ssa/lower.rs` bare-array destructure rewrite.
|
||||
/// Walks the RHS of a destructure assignment and emits one slot per
|
||||
/// source-order element. Each slot is `Ident(name)`, `Literal`, or
|
||||
/// `Complex(inner_uses)`. Bails (empty) on shapes that shift index
|
||||
/// alignment (spread / list splat).
|
||||
#[test]
|
||||
fn rhs_array_literal_elements_recognise_per_language_shapes() {
|
||||
use super::RhsArraySlot;
|
||||
use super::helpers::collect_rhs_array_literal_elements;
|
||||
|
||||
fn parse(lang_label: &str, src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = match lang_label {
|
||||
"javascript" => Language::from(tree_sitter_javascript::LANGUAGE),
|
||||
"typescript" => Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
|
||||
"python" => Language::from(tree_sitter_python::LANGUAGE),
|
||||
"ruby" => Language::from(tree_sitter_ruby::LANGUAGE),
|
||||
"rust" => Language::from(tree_sitter_rust::LANGUAGE),
|
||||
other => panic!("unsupported lang: {}", other),
|
||||
};
|
||||
parser.set_language(&lang).unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.to_vec())
|
||||
}
|
||||
|
||||
fn find_first<'t>(n: tree_sitter::Node<'t>, kinds: &[&str]) -> Option<tree_sitter::Node<'t>> {
|
||||
if kinds.iter().any(|k| *k == n.kind()) {
|
||||
return Some(n);
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for child in n.children(&mut c) {
|
||||
if let Some(found) = find_first(child, kinds) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn run(lang: &str, src: &[u8], rhs_kinds: &[&str]) -> Vec<RhsArraySlot> {
|
||||
let (tree, bytes) = parse(lang, src);
|
||||
let rhs = find_first(tree.root_node(), rhs_kinds).expect("rhs in fixture");
|
||||
collect_rhs_array_literal_elements(rhs, lang, &bytes, None)
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn ident(name: &str) -> RhsArraySlot {
|
||||
RhsArraySlot::Ident(name.to_string())
|
||||
}
|
||||
fn complex(uses: &[&str]) -> RhsArraySlot {
|
||||
RhsArraySlot::Complex {
|
||||
uses: uses.iter().map(|s| s.to_string()).collect(),
|
||||
source_cap: crate::labels::Cap::empty(),
|
||||
}
|
||||
}
|
||||
fn complex_source(uses: &[&str]) -> RhsArraySlot {
|
||||
RhsArraySlot::Complex {
|
||||
uses: uses.iter().map(|s| s.to_string()).collect(),
|
||||
source_cap: crate::labels::Cap::all(),
|
||||
}
|
||||
}
|
||||
|
||||
// JS/TS `array` literal: two bare idents.
|
||||
assert_eq!(
|
||||
run("javascript", b"const _ = [safe, tainted];\n", &["array"]),
|
||||
vec![ident("safe"), ident("tainted")],
|
||||
);
|
||||
// JS/TS `array` mixed ident + string literal.
|
||||
assert_eq!(
|
||||
run("javascript", b"const _ = [tainted, \"ok\"];\n", &["array"]),
|
||||
vec![ident("tainted"), RhsArraySlot::Literal],
|
||||
);
|
||||
// JS/TS now classifies a call as `Complex` carrying inner idents
|
||||
// rather than bailing. `collect_idents_with_paths` lifts both paths
|
||||
// and bare idents, so a member access surfaces as the dotted path
|
||||
// (e.g. `req.query.x`) followed by its component idents.
|
||||
assert_eq!(
|
||||
run("javascript", b"const _ = [fn(x), 'lit'];\n", &["array"]),
|
||||
vec![complex(&["fn", "x"]), RhsArraySlot::Literal],
|
||||
);
|
||||
// JS/TS member access becomes Complex; dotted path + component idents.
|
||||
// Per-slot Source classification fires when the slot's subtree carries
|
||||
// a member-expression that strip-and-retry-classifies as Source
|
||||
// (`req.query.x` → strip `.x` → `req.query` matches the JS Source rule).
|
||||
assert_eq!(
|
||||
run(
|
||||
"javascript",
|
||||
b"const _ = [req.query.x, 'lit'];\n",
|
||||
&["array"],
|
||||
),
|
||||
vec![
|
||||
complex_source(&["req.query.x", "req", "query", "x"]),
|
||||
RhsArraySlot::Literal,
|
||||
],
|
||||
);
|
||||
// Sibling-precision: a Source-classified Complex slot ALONGSIDE a
|
||||
// Complex slot whose subtree does NOT classify as Source. Pre-session
|
||||
// 0047 every Complex slot was conservatively re-emitted as Source by
|
||||
// the outer-node fallback in `src/ssa/lower.rs`; with per-slot
|
||||
// classification the safe sibling stays empty so the SSA lowering can
|
||||
// emit `Assign(safe)` instead.
|
||||
assert_eq!(
|
||||
run(
|
||||
"javascript",
|
||||
b"const _ = [process.env.X, helper(local)];\n",
|
||||
&["array"],
|
||||
),
|
||||
vec![
|
||||
complex_source(&["process.env.X", "process", "env", "X"]),
|
||||
complex(&["helper", "local"]),
|
||||
],
|
||||
);
|
||||
// JS/TS spread bails entirely (index alignment shifts).
|
||||
assert!(run("javascript", b"const _ = [...arr, b];\n", &["array"]).is_empty());
|
||||
// JS/TS binary expression becomes Complex with the inner ident.
|
||||
assert_eq!(
|
||||
run(
|
||||
"javascript",
|
||||
b"const _ = ['log-' + x, 'lit'];\n",
|
||||
&["array"],
|
||||
),
|
||||
vec![complex(&["x"]), RhsArraySlot::Literal],
|
||||
);
|
||||
|
||||
// Python `list` shape.
|
||||
assert_eq!(
|
||||
run("python", b"a = [safe, tainted]\n", &["list"]),
|
||||
vec![ident("safe"), ident("tainted")],
|
||||
);
|
||||
// Python `expression_list` (bare commas RHS in `a, b = x, y`).
|
||||
assert_eq!(
|
||||
run("python", b"a, b = safe, tainted\n", &["expression_list"]),
|
||||
vec![ident("safe"), ident("tainted")],
|
||||
);
|
||||
// Python `tuple` (parenthesised).
|
||||
assert_eq!(
|
||||
run("python", b"x = (safe, 42)\n", &["tuple"]),
|
||||
vec![ident("safe"), RhsArraySlot::Literal],
|
||||
);
|
||||
// Python list-splat bails.
|
||||
assert!(run("python", b"x = [*a, b]\n", &["list"]).is_empty());
|
||||
|
||||
// Ruby `array`.
|
||||
assert_eq!(
|
||||
run("ruby", b"a, b = [safe, tainted]\n", &["array"]),
|
||||
vec![ident("safe"), ident("tainted")],
|
||||
);
|
||||
// Ruby `array` with literal + ident.
|
||||
assert_eq!(
|
||||
run("ruby", b"a, b = [tainted, \"safe\"]\n", &["array"]),
|
||||
vec![ident("tainted"), RhsArraySlot::Literal],
|
||||
);
|
||||
|
||||
// Rust `tuple_expression`.
|
||||
assert_eq!(
|
||||
run(
|
||||
"rust",
|
||||
b"fn f(safe: &str, tainted: &str) { let _ = (safe, tainted); }\n",
|
||||
&["tuple_expression"]
|
||||
),
|
||||
vec![ident("safe"), ident("tainted")],
|
||||
);
|
||||
|
||||
// Non-array-shape node returns empty (defensive guard).
|
||||
assert!(run("javascript", b"const x = tainted;\n", &["identifier"]).is_empty());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use super::helpers::first_member_label;
|
|||
use super::{
|
||||
AstMeta, Cfg, EdgeKind, MAX_COND_VARS, MAX_CONDITION_TEXT_LEN, NodeInfo, StmtKind,
|
||||
collect_idents, connect_all, detect_eq_with_const, detect_negation, has_call_descendant,
|
||||
member_expr_text, push_node, text_of,
|
||||
member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
|
||||
};
|
||||
use crate::labels::{DataLabel, LangAnalysisRules, classify};
|
||||
use crate::utils::snippet::truncate_at_char_boundary;
|
||||
|
|
@ -378,7 +378,24 @@ pub(super) fn lower_ternary_branch<'a>(
|
|||
}
|
||||
|
||||
connect_all(g, preds, node, pred_edge);
|
||||
vec![node]
|
||||
|
||||
// React JSX `dangerouslySetInnerHTML={{__html: x}}` synthesis when the
|
||||
// branch expression is itself a JSX element (or contains one as a
|
||||
// descendant). Without this, `cond ? <div dangerouslySetInnerHTML=...
|
||||
// /> : null` and similar ternary-RHS shapes never reach the
|
||||
// `Kind::Return` / `Kind::Assignment` arms that own the synthesis hook,
|
||||
// because `build_ternary_diamond` lowers each branch directly.
|
||||
let post_jsx = try_lower_jsx_dangerous_html(
|
||||
branch_ast,
|
||||
&[node],
|
||||
g,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
);
|
||||
post_jsx
|
||||
}
|
||||
|
||||
/// Extract `(lhs_ast, ternary_ast)` when `outer_ast` is an expression-statement
|
||||
|
|
|
|||
|
|
@ -554,3 +554,469 @@ fn collect_ruby_symbol_list(node: Node<'_>, code: &[u8], out: &mut Vec<String>)
|
|||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract route-path capture variable names from framework routing decorators
|
||||
/// on a function AST node.
|
||||
///
|
||||
/// Supported languages:
|
||||
/// * Python: walks Flask-style `@app.route("/users/<name>")`,
|
||||
/// blueprint-prefixed `@bp.get("/u/<int:id>")`, and verb-shaped
|
||||
/// `@router.post("/<path:slug>")` decorators. Returns inner names from
|
||||
/// `<name>` / `<conv:name>` brace-segments.
|
||||
/// * Ruby: walks Sinatra `get "/u/:name" do |name| ... end`. The
|
||||
/// `func_node` is the `do_block`; its parent `call` carries the verb
|
||||
/// in the `method` field and the path pattern in the first positional
|
||||
/// string argument. Returns inner names from `:name` colon-segments.
|
||||
///
|
||||
/// Functions without a recognised routing pattern return an empty `Vec`.
|
||||
/// Strict additive: downstream consumers gate the result via
|
||||
/// `param.contains(name)` so empty captures preserve today's behaviour.
|
||||
pub(super) fn extract_route_path_captures<'a>(
|
||||
func_node: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Vec<String> {
|
||||
let mut out: Vec<String> = Vec::new();
|
||||
match lang {
|
||||
"python" => extract_python_route_captures(func_node, code, &mut out),
|
||||
"ruby" => extract_ruby_route_captures(func_node, code, &mut out),
|
||||
_ => {}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn extract_python_route_captures<'a>(func_node: Node<'a>, code: &'a [u8], out: &mut Vec<String>) {
|
||||
let Some(parent) = func_node.parent() else {
|
||||
return;
|
||||
};
|
||||
if parent.kind() != "decorated_definition" {
|
||||
return;
|
||||
}
|
||||
let mut w = parent.walk();
|
||||
for ch in parent.children(&mut w) {
|
||||
if ch.kind() != "decorator" {
|
||||
continue;
|
||||
}
|
||||
let mut dw = ch.walk();
|
||||
let Some(expr) = ch.children(&mut dw).find(|c| c.kind() != "@") else {
|
||||
continue;
|
||||
};
|
||||
if expr.kind() != "call" {
|
||||
continue;
|
||||
}
|
||||
let Some(target) = expr.child_by_field_name("function") else {
|
||||
continue;
|
||||
};
|
||||
if target.kind() != "attribute" {
|
||||
continue;
|
||||
}
|
||||
let Some(attr) = target.child_by_field_name("attribute") else {
|
||||
continue;
|
||||
};
|
||||
let Some(attr_text) = text_of(attr, code) else {
|
||||
continue;
|
||||
};
|
||||
let attr_lower = attr_text.to_ascii_lowercase();
|
||||
let is_route_verb = matches!(
|
||||
attr_lower.as_str(),
|
||||
"route" | "get" | "post" | "put" | "patch" | "delete" | "head" | "options"
|
||||
);
|
||||
if !is_route_verb {
|
||||
continue;
|
||||
}
|
||||
let Some(args) = expr.child_by_field_name("arguments") else {
|
||||
continue;
|
||||
};
|
||||
let Some(pattern) = first_positional_string_arg(args, code) else {
|
||||
continue;
|
||||
};
|
||||
collect_flask_path_captures(&pattern, out);
|
||||
collect_fastapi_path_captures(&pattern, out);
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk up from a Ruby `do_block` / `block` to the enclosing `call`.
|
||||
/// If the call's method is a Sinatra-style HTTP verb and its first
|
||||
/// positional argument is a static string literal, parse Sinatra
|
||||
/// `:name` path captures into `out`.
|
||||
fn extract_ruby_route_captures<'a>(func_node: Node<'a>, code: &'a [u8], out: &mut Vec<String>) {
|
||||
let Some(parent) = func_node.parent() else {
|
||||
return;
|
||||
};
|
||||
if parent.kind() != "call" {
|
||||
return;
|
||||
}
|
||||
let Some(method_node) = parent.child_by_field_name("method") else {
|
||||
return;
|
||||
};
|
||||
let Some(verb) = text_of(method_node, code) else {
|
||||
return;
|
||||
};
|
||||
let verb_lc = verb.to_ascii_lowercase();
|
||||
let is_sinatra_verb = matches!(
|
||||
verb_lc.as_str(),
|
||||
"get" | "post" | "put" | "patch" | "delete" | "head" | "options" | "link" | "unlink"
|
||||
);
|
||||
if !is_sinatra_verb {
|
||||
return;
|
||||
}
|
||||
let Some(args) = parent.child_by_field_name("arguments") else {
|
||||
return;
|
||||
};
|
||||
let Some(pattern) = first_positional_string_arg_ruby(args, code) else {
|
||||
return;
|
||||
};
|
||||
collect_sinatra_path_captures(&pattern, out);
|
||||
}
|
||||
|
||||
/// Return the literal text of the first positional string argument inside a
|
||||
/// Python `argument_list`. Skips keyword args and non-string positionals.
|
||||
fn first_positional_string_arg(args: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
match arg.kind() {
|
||||
"(" | ")" | "," => continue,
|
||||
"keyword_argument" => continue,
|
||||
"string" => {
|
||||
return python_string_text(arg, code);
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Strip Python string-literal quoting from a `string` AST node. Rejects
|
||||
/// f-strings (interpolation children present) because the captured pattern
|
||||
/// is not statically known.
|
||||
fn python_string_text(node: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cursor = node.walk();
|
||||
for ch in node.children(&mut cursor) {
|
||||
if ch.kind() == "interpolation" {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
let raw = text_of(node, code)?;
|
||||
let trimmed = raw.trim();
|
||||
let trimmed = trimmed.trim_start_matches(['r', 'R', 'b', 'B', 'u', 'U', 'f', 'F']);
|
||||
let stripped = trimmed
|
||||
.strip_prefix("\"\"\"")
|
||||
.and_then(|s| s.strip_suffix("\"\"\""))
|
||||
.or_else(|| {
|
||||
trimmed
|
||||
.strip_prefix("'''")
|
||||
.and_then(|s| s.strip_suffix("'''"))
|
||||
})
|
||||
.or_else(|| trimmed.strip_prefix('"').and_then(|s| s.strip_suffix('"')))
|
||||
.or_else(|| {
|
||||
trimmed
|
||||
.strip_prefix('\'')
|
||||
.and_then(|s| s.strip_suffix('\''))
|
||||
})?;
|
||||
Some(stripped.to_string())
|
||||
}
|
||||
|
||||
/// Return the literal text of the first positional string argument inside a
|
||||
/// Ruby `argument_list`. Hash literals (`pair`), block arguments,
|
||||
/// hash-splat arguments, and non-string positionals all return `None`.
|
||||
fn first_positional_string_arg_ruby(args: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
match arg.kind() {
|
||||
"(" | ")" | "," => continue,
|
||||
"pair" | "hash" | "block_argument" | "hash_splat_argument" => return None,
|
||||
"string" => return ruby_string_text(arg, code),
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Strip Ruby string-literal quoting from a `string` AST node. Rejects
|
||||
/// strings with `#{...}` interpolation (the captured pattern is not
|
||||
/// statically known). Returns the concatenation of `string_content`
|
||||
/// children.
|
||||
fn ruby_string_text(node: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cursor = node.walk();
|
||||
let mut content = String::new();
|
||||
let mut had_content = false;
|
||||
for ch in node.children(&mut cursor) {
|
||||
match ch.kind() {
|
||||
"interpolation" => return None,
|
||||
"string_content" => {
|
||||
if let Some(t) = text_of(ch, code) {
|
||||
content.push_str(&t);
|
||||
had_content = true;
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
if had_content { Some(content) } else { None }
|
||||
}
|
||||
|
||||
/// Parse Sinatra-style `:name` capture segments out of a route pattern.
|
||||
/// A capture is a `:` followed by an identifier-ish run of bytes
|
||||
/// (`[A-Za-z0-9_]+`). Only fires when `:` is at pattern start or
|
||||
/// immediately follows `/`, so `Foo::Bar` style names embedded in a
|
||||
/// non-routing string are not mis-parsed as captures.
|
||||
fn collect_sinatra_path_captures(pattern: &str, out: &mut Vec<String>) {
|
||||
let bytes = pattern.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
let at_segment_boundary = i == 0 || bytes[i - 1] == b'/';
|
||||
if bytes[i] == b':' && at_segment_boundary {
|
||||
let mut j = i + 1;
|
||||
while j < bytes.len() && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_') {
|
||||
j += 1;
|
||||
}
|
||||
if j > i + 1 {
|
||||
let name = &pattern[i + 1..j];
|
||||
let lower = name.to_ascii_lowercase();
|
||||
if !out.iter().any(|existing| existing == &lower) {
|
||||
out.push(lower);
|
||||
}
|
||||
}
|
||||
i = j;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse FastAPI / Starlette-style `{name}` / `{name:converter}` capture
|
||||
/// segments out of a route pattern. Pushes the inner name (lowercased)
|
||||
/// into `out`. FastAPI puts the name FIRST (`{item_id:int}`), unlike
|
||||
/// Flask which puts the converter first (`<int:item_id>`). Skips
|
||||
/// malformed segments (no closing `}`, empty name) and rejects names
|
||||
/// with non-identifier characters.
|
||||
fn collect_fastapi_path_captures(pattern: &str, out: &mut Vec<String>) {
|
||||
let bytes = pattern.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
if bytes[i] == b'{' {
|
||||
let mut j = i + 1;
|
||||
while j < bytes.len() && bytes[j] != b'}' {
|
||||
j += 1;
|
||||
}
|
||||
if j >= bytes.len() {
|
||||
break;
|
||||
}
|
||||
let inner = &pattern[i + 1..j];
|
||||
let name = inner.split(':').next().unwrap_or(inner).trim();
|
||||
if !name.is_empty() && name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
|
||||
let lower = name.to_ascii_lowercase();
|
||||
if !out.iter().any(|existing| existing == &lower) {
|
||||
out.push(lower);
|
||||
}
|
||||
}
|
||||
i = j + 1;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse Flask-style `<conv:name>` / `<name>` capture segments out of a
|
||||
/// route pattern. Pushes the inner name (lowercased) into `out`. Skips
|
||||
/// malformed segments (no closing `>`, empty name).
|
||||
fn collect_flask_path_captures(pattern: &str, out: &mut Vec<String>) {
|
||||
let bytes = pattern.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
if bytes[i] == b'<' {
|
||||
let mut j = i + 1;
|
||||
while j < bytes.len() && bytes[j] != b'>' {
|
||||
j += 1;
|
||||
}
|
||||
if j >= bytes.len() {
|
||||
break;
|
||||
}
|
||||
let inner = &pattern[i + 1..j];
|
||||
let name = match inner.rsplit_once(':') {
|
||||
Some((_, n)) => n,
|
||||
None => inner,
|
||||
};
|
||||
let name = name.trim();
|
||||
if !name.is_empty() && name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
|
||||
let lower = name.to_ascii_lowercase();
|
||||
if !out.iter().any(|existing| existing == &lower) {
|
||||
out.push(lower);
|
||||
}
|
||||
}
|
||||
i = j + 1;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod path_capture_tests {
|
||||
use super::*;
|
||||
|
||||
fn collect_for(pat: &str) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
collect_flask_path_captures(pat, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_bare_capture() {
|
||||
assert_eq!(collect_for("/users/<name>"), vec!["name".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_converter_capture() {
|
||||
assert_eq!(
|
||||
collect_for("/items/<int:item_id>"),
|
||||
vec!["item_id".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_path_converter() {
|
||||
assert_eq!(collect_for("/x/<path:slug>"), vec!["slug".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_multiple_captures() {
|
||||
assert_eq!(
|
||||
collect_for("/u/<uid>/post/<int:pid>"),
|
||||
vec!["uid".to_string(), "pid".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dedupes_repeated_names() {
|
||||
let mut out = Vec::new();
|
||||
collect_flask_path_captures("/<a>/<a>", &mut out);
|
||||
assert_eq!(out, vec!["a".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_unclosed_brace() {
|
||||
assert_eq!(collect_for("/<oops"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_ident_chars() {
|
||||
assert_eq!(collect_for("/<bad name>"), Vec::<String>::new());
|
||||
assert_eq!(collect_for("/<name!>"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_when_no_captures() {
|
||||
assert_eq!(collect_for("/static/path"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
fn collect_sinatra_for(pat: &str) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
collect_sinatra_path_captures(pat, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_extracts_bare_capture() {
|
||||
assert_eq!(
|
||||
collect_sinatra_for("/users/:name"),
|
||||
vec!["name".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_extracts_multiple_captures() {
|
||||
assert_eq!(
|
||||
collect_sinatra_for("/u/:uid/post/:pid"),
|
||||
vec!["uid".to_string(), "pid".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_extracts_leading_capture() {
|
||||
assert_eq!(collect_sinatra_for(":root"), vec!["root".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_dedupes_repeated_names() {
|
||||
let mut out = Vec::new();
|
||||
collect_sinatra_path_captures("/:a/:a", &mut out);
|
||||
assert_eq!(out, vec!["a".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_ignores_double_colon() {
|
||||
assert_eq!(collect_sinatra_for("/Foo::Bar"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_ignores_lone_colon() {
|
||||
assert_eq!(collect_sinatra_for("/users/:"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_empty_when_no_captures() {
|
||||
assert_eq!(collect_sinatra_for("/static/path"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
fn collect_fastapi_for(pat: &str) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
collect_fastapi_path_captures(pat, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_extracts_bare_capture() {
|
||||
assert_eq!(
|
||||
collect_fastapi_for("/items/{item_id}"),
|
||||
vec!["item_id".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_extracts_converter_capture() {
|
||||
assert_eq!(
|
||||
collect_fastapi_for("/items/{item_id:int}"),
|
||||
vec!["item_id".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_extracts_path_converter() {
|
||||
assert_eq!(
|
||||
collect_fastapi_for("/files/{file_path:path}"),
|
||||
vec!["file_path".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_extracts_multiple_captures() {
|
||||
assert_eq!(
|
||||
collect_fastapi_for("/u/{uid}/post/{pid:int}"),
|
||||
vec!["uid".to_string(), "pid".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_dedupes_repeated_names() {
|
||||
let mut out = Vec::new();
|
||||
collect_fastapi_path_captures("/{a}/{a}", &mut out);
|
||||
assert_eq!(out, vec!["a".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_rejects_unclosed_brace() {
|
||||
assert_eq!(collect_fastapi_for("/{oops"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_rejects_non_ident_chars() {
|
||||
assert_eq!(collect_fastapi_for("/{bad name}"), Vec::<String>::new());
|
||||
assert_eq!(collect_fastapi_for("/{name!}"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_empty_when_no_captures() {
|
||||
assert_eq!(collect_fastapi_for("/static/path"), Vec::<String>::new());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use super::anon_fn_name;
|
||||
use super::conditions::unwrap_parens;
|
||||
use crate::labels::{DataLabel, Kind, classify, lookup};
|
||||
use smallvec::SmallVec;
|
||||
use tree_sitter::Node;
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
@ -210,7 +211,7 @@ pub(crate) fn first_call_ident_with_span<'a>(
|
|||
.and_then(|f| root_receiver_text(f, lang, code));
|
||||
match (recv, func) {
|
||||
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
|
||||
(_, Some(f)) => Some(f.to_string()),
|
||||
(_, Some(f)) => Some(f),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -269,6 +270,11 @@ pub(crate) fn find_classifiable_inner_call<'a>(
|
|||
}
|
||||
match lookup(lang, c.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
|
||||
// For CallMethod we also remember the bare receiver
|
||||
// identifier so we can try a type-qualified rewrite
|
||||
// when the literal classify misses.
|
||||
let mut method_receiver: Option<String> = None;
|
||||
let mut method_name: Option<String> = None;
|
||||
let ident = match lookup(lang, c.kind()) {
|
||||
Kind::CallFn => c
|
||||
.child_by_field_name("function")
|
||||
|
|
@ -286,6 +292,8 @@ pub(crate) fn find_classifiable_inner_call<'a>(
|
|||
.or_else(|| c.child_by_field_name("receiver"))
|
||||
.or_else(|| c.child_by_field_name("scope"))
|
||||
.and_then(|f| root_receiver_text(f, lang, code));
|
||||
method_receiver = recv.clone();
|
||||
method_name = func.clone();
|
||||
match (recv, func) {
|
||||
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
|
||||
(_, Some(f)) => Some(f),
|
||||
|
|
@ -302,6 +310,36 @@ pub(crate) fn find_classifiable_inner_call<'a>(
|
|||
{
|
||||
return Some((id.clone(), lbl, (c.start_byte(), c.end_byte())));
|
||||
}
|
||||
// Receiver-type rewrite fallback: when the literal
|
||||
// `recv.method` text didn't classify, AND we're inside
|
||||
// a chained call (parent `n` is itself a call), look
|
||||
// up `recv`'s locally-bound type and retry with the
|
||||
// type prefix. E.g. for
|
||||
// `sess.createNativeQuery(sql).getResultList()`, the
|
||||
// inner `sess.createNativeQuery` rewrites to
|
||||
// `HibernateSession.createNativeQuery` (rule fires).
|
||||
//
|
||||
// Gated on `n` being a Call-kind so the rewrite only
|
||||
// fires on chain-hop inner calls. When `n` is an
|
||||
// expression-statement / variable-declarator / etc.
|
||||
// the candidate `c` IS the outermost call of the
|
||||
// statement, and the SSA-time
|
||||
// `resolve_type_qualified_labels` path handles it
|
||||
// with multi-label semantics that single-label
|
||||
// `classify` here would erase.
|
||||
let parent_is_call = matches!(
|
||||
lookup(lang, n.kind()),
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro
|
||||
);
|
||||
if parent_is_call
|
||||
&& let (Some(recv), Some(method)) = (method_receiver, method_name)
|
||||
&& let Some(prefix) = crate::cfg::local_receiver_type_prefix(c, &recv, lang)
|
||||
{
|
||||
let alt = format!("{prefix}.{method}");
|
||||
if let Some(lbl) = classify(lang, &alt, extra) {
|
||||
return Some((alt, lbl, (c.start_byte(), c.end_byte())));
|
||||
}
|
||||
}
|
||||
// Recurse into arguments of this call
|
||||
if let Some(found) = find_classifiable_inner_call(c, lang, code, extra) {
|
||||
return Some(found);
|
||||
|
|
@ -412,6 +450,16 @@ pub(crate) fn first_member_label(
|
|||
}
|
||||
// PHP/Python/Ruby subscript access: `$_GET['cmd']`, `os.environ['KEY']`, `params[:cmd]`
|
||||
// Try to classify the object (before the `[`) as a source.
|
||||
//
|
||||
// Source-only on the receiver: a subscript reads a value from the
|
||||
// receiver, so a Sink label found on the receiver text (e.g.
|
||||
// `response.headers['content-type']`, where `response.headers`
|
||||
// matches the JS HEADER_INJECTION sink rule) describes the
|
||||
// *target* of a hypothetical write, not this read. Promoting it
|
||||
// would fire phantom sinks at every `body =
|
||||
// response.headers["X"]`-shape line. Sinks/Sanitizers reachable
|
||||
// via callable positions (function-arg, method-receiver) still
|
||||
// flow through the outer recursive walk below.
|
||||
"subscript_expression" | "subscript" | "element_reference" => {
|
||||
if let Some(obj) = n
|
||||
.child_by_field_name("object")
|
||||
|
|
@ -419,15 +467,23 @@ pub(crate) fn first_member_label(
|
|||
.or_else(|| n.child(0))
|
||||
{
|
||||
if let Some(txt) = text_of(obj, code)
|
||||
&& let Some(lbl) = classify(lang, &txt, extra_labels)
|
||||
&& let Some(lbl @ DataLabel::Source(_)) = classify(lang, &txt, extra_labels)
|
||||
{
|
||||
return Some(lbl);
|
||||
}
|
||||
// Recurse into the object for nested member accesses
|
||||
if let Some(lbl) = first_member_label(obj, lang, code, extra_labels) {
|
||||
// Recurse into the object for nested member accesses, but
|
||||
// keep the same Source-only restriction as above by passing
|
||||
// through the dedicated source-only walker.
|
||||
if let Some(lbl @ DataLabel::Source(_)) =
|
||||
first_member_label(obj, lang, code, extra_labels)
|
||||
{
|
||||
return Some(lbl);
|
||||
}
|
||||
}
|
||||
// Suppress further descent into this subscript node, the outer
|
||||
// child-walk loop would otherwise enter the receiver via the
|
||||
// member_expression arm and reattach a value-extraction Sink.
|
||||
return None;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
|
@ -678,6 +734,7 @@ pub(crate) fn collect_idents_with_paths(
|
|||
"identifier"
|
||||
| "field_identifier"
|
||||
| "property_identifier"
|
||||
| "shorthand_property_identifier"
|
||||
| "shorthand_property_identifier_pattern" => {
|
||||
if let Some(txt) = text_of(n, code) {
|
||||
idents.push(txt);
|
||||
|
|
@ -697,16 +754,241 @@ pub(crate) fn collect_idents_with_paths(
|
|||
}
|
||||
}
|
||||
|
||||
/// Walk an array/tuple destructure pattern in source order and return
|
||||
/// each simple-identifier binding paired with its position index.
|
||||
///
|
||||
/// Recognises:
|
||||
/// * JS/TS `array_pattern` — `const [a, b] = ...`, `const [, b] = ...`,
|
||||
/// `const [a, ,] = ...`. Skip slots (commas with no binding between)
|
||||
/// advance the position counter without emitting a binding.
|
||||
/// * Rust `tuple_pattern` — `let (a, _, b) = ...`. `_pattern` (wildcard)
|
||||
/// advances the position counter without emitting a binding.
|
||||
/// * Python `pattern_list` / `tuple_pattern` — `a, b = ...` and
|
||||
/// `(a, b) = ...`. Python `_` is a normal identifier binding (not a
|
||||
/// wildcard), so every `identifier` child emits a (name, position)
|
||||
/// entry.
|
||||
/// * Ruby `left_assignment_list` — `a, b = ...`. Bare comma-list LHS
|
||||
/// produced by `assignment` whose RHS is an array literal, a call
|
||||
/// return, or another tuple-yielding expression. Ruby `_` is a normal
|
||||
/// identifier (matches Python convention; `_` may still be referenced
|
||||
/// later in scope). Splat (`*rest` parsed as `rest_assignment`) and
|
||||
/// parenthesised nested destructure (`destructured_left_assignment`)
|
||||
/// hit the bail branch and fall back to scalar union.
|
||||
///
|
||||
/// Returns an empty `SmallVec` when the pattern is not one of the above
|
||||
/// kinds OR contains complex sub-patterns (`assignment_pattern` for
|
||||
/// `[a = 1, b]`, `rest_pattern` for `[a, ...rest]`, Python
|
||||
/// `list_splat_pattern` for `a, *rest = ...`, Ruby `rest_assignment` for
|
||||
/// `a, *rest = ...`, nested `array_pattern`, `object_pattern`,
|
||||
/// `destructured_left_assignment`). Callers treat the empty return as
|
||||
/// "no position-aware rewrite available; fall back to scalar union".
|
||||
pub(crate) fn collect_array_pattern_bindings_indexed(
|
||||
pat: Node,
|
||||
code: &[u8],
|
||||
) -> SmallVec<[(String, usize); 4]> {
|
||||
let mut out: SmallVec<[(String, usize); 4]> = SmallVec::new();
|
||||
let kind = pat.kind();
|
||||
if !matches!(
|
||||
kind,
|
||||
"array_pattern" | "tuple_pattern" | "pattern_list" | "left_assignment_list"
|
||||
) {
|
||||
return out;
|
||||
}
|
||||
let mut cursor = pat.walk();
|
||||
let mut pos: usize = 0;
|
||||
for child in pat.children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"[" | "]" | "(" | ")" => {}
|
||||
"," => {
|
||||
pos += 1;
|
||||
}
|
||||
"identifier" | "shorthand_property_identifier_pattern" => {
|
||||
if let Some(txt) = text_of(child, code) {
|
||||
out.push((txt, pos));
|
||||
}
|
||||
}
|
||||
// Rust wildcard `_` in tuple_pattern. Advances position counter
|
||||
// without binding; no emit. Tree-sitter-rust models the
|
||||
// wildcard as a leaf node whose `kind()` is literally "_".
|
||||
"_" => {}
|
||||
_ => {
|
||||
// Complex sub-pattern. Bail by clearing — caller treats
|
||||
// empty as "no position-aware rewrite", preserving the
|
||||
// pre-existing scalar-union behavior for these shapes.
|
||||
out.clear();
|
||||
return out;
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Walk an array-literal-shape RHS node and return one slot per source-order
|
||||
/// element. Each slot is one of:
|
||||
/// * `RhsArraySlot::Ident(name)` — bare identifier element.
|
||||
/// * `RhsArraySlot::Literal` — syntactic literal (string, number, bool,
|
||||
/// null/nil).
|
||||
/// * `RhsArraySlot::Complex(uses)` — call / binary / subscript / member
|
||||
/// access / nested array literal / etc. `uses` carries the inner
|
||||
/// identifier names (member-access paths first, bare idents second)
|
||||
/// harvested from the slot's subtree via `collect_idents_with_paths`.
|
||||
///
|
||||
/// Recognised RHS kinds:
|
||||
/// * JS/TS / Ruby `array` — `[a, b]`
|
||||
/// * Python `list` — `[a, b]`
|
||||
/// * Python `tuple` — `(a, b)`
|
||||
/// * Python `expression_list` — bare comma form `a, b`
|
||||
/// * Rust `tuple_expression` — `(a, b)`
|
||||
///
|
||||
/// Bails (returns empty) when the RHS is not one of these kinds OR contains
|
||||
/// a slot whose shape would shift index alignment (spread, list splat).
|
||||
/// Callers treat empty as "no per-element rewrite available; fall back to
|
||||
/// scalar union".
|
||||
pub(crate) fn collect_rhs_array_literal_elements(
|
||||
rhs: Node,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
|
||||
) -> SmallVec<[crate::cfg::RhsArraySlot; 4]> {
|
||||
use crate::cfg::RhsArraySlot;
|
||||
use crate::labels::{Cap, DataLabel};
|
||||
|
||||
// Per-slot source classification: when a slot's own subtree carries a
|
||||
// Source-labeled member-expression / subscript, capture the Cap so the
|
||||
// SSA destructure rewrite emits Source for THIS slot specifically and
|
||||
// lets sibling Complex slots stay slot-scoped Assign. Falls back to
|
||||
// Cap::empty() when no per-slot source is recognised; the lowering
|
||||
// path then consults the outer-node Source flag for conservative
|
||||
// preservation of legacy behavior on shapes whose source pattern
|
||||
// doesn't text-classify (e.g. a subscript on a tainted local).
|
||||
let slot_source_cap = |slot: Node| -> Cap {
|
||||
match first_member_label(slot, lang, code, extra_labels) {
|
||||
Some(DataLabel::Source(c)) => c,
|
||||
_ => Cap::empty(),
|
||||
}
|
||||
};
|
||||
|
||||
let mut out: SmallVec<[RhsArraySlot; 4]> = SmallVec::new();
|
||||
let kind = rhs.kind();
|
||||
if !matches!(
|
||||
kind,
|
||||
"array" | "array_literal" | "list" | "tuple" | "tuple_expression" | "expression_list"
|
||||
) {
|
||||
return out;
|
||||
}
|
||||
let mut cursor = rhs.walk();
|
||||
for child in rhs.named_children(&mut cursor) {
|
||||
let ck = child.kind();
|
||||
match ck {
|
||||
"identifier"
|
||||
| "shorthand_property_identifier"
|
||||
| "shorthand_property_identifier_pattern"
|
||||
| "field_identifier"
|
||||
| "property_identifier" => match text_of(child, code) {
|
||||
Some(txt) => out.push(RhsArraySlot::Ident(txt)),
|
||||
None => {
|
||||
out.clear();
|
||||
return out;
|
||||
}
|
||||
},
|
||||
"variable_name" => match text_of(child, code) {
|
||||
Some(txt) => out.push(RhsArraySlot::Ident(txt.trim_start_matches('$').to_string())),
|
||||
None => {
|
||||
out.clear();
|
||||
return out;
|
||||
}
|
||||
},
|
||||
// Syntactic literal slots: no ident, no taint contribution.
|
||||
// Names follow tree-sitter's per-grammar literal kinds across
|
||||
// the supported languages.
|
||||
"string"
|
||||
| "string_literal"
|
||||
| "raw_string_literal"
|
||||
| "interpreted_string_literal"
|
||||
| "concatenated_string"
|
||||
| "integer"
|
||||
| "integer_literal"
|
||||
| "float"
|
||||
| "float_literal"
|
||||
| "number"
|
||||
| "numeric_literal"
|
||||
| "true"
|
||||
| "false"
|
||||
| "boolean_literal"
|
||||
| "boolean"
|
||||
| "null"
|
||||
| "null_literal"
|
||||
| "nil"
|
||||
| "none"
|
||||
| "None"
|
||||
| "undefined" => {
|
||||
out.push(RhsArraySlot::Literal);
|
||||
}
|
||||
// Spread / list-splat shift index alignment unpredictably
|
||||
// (`[...arr, b]` may expand to N elements at index 0). Bail
|
||||
// so callers fall back to scalar union.
|
||||
"spread_element" | "list_splat" | "list_splat_pattern" | "splat_argument"
|
||||
| "unary_splat" | "splat_expression" => {
|
||||
out.clear();
|
||||
return out;
|
||||
}
|
||||
// Interpolated strings carry inner identifier uses. Treat as
|
||||
// Complex so the slot picks up the contributions from
|
||||
// `${user.id}` etc.
|
||||
"template_string" | "string_interpolation" | "interpolation" | "encapsed_string" => {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(child, code, &mut idents, &mut paths);
|
||||
let mut uses: SmallVec<[String; 4]> = SmallVec::new();
|
||||
for p in paths {
|
||||
uses.push(p);
|
||||
}
|
||||
for ident in idents {
|
||||
if !uses.iter().any(|u| u == &ident) {
|
||||
uses.push(ident);
|
||||
}
|
||||
}
|
||||
let source_cap = slot_source_cap(child);
|
||||
out.push(RhsArraySlot::Complex { uses, source_cap });
|
||||
}
|
||||
// Everything else (call, member access, binary, subscript,
|
||||
// unary, ternary, nested array literal, etc.) is a "complex"
|
||||
// slot. Harvest inner ident uses so the SSA lowering can paint
|
||||
// the binding with this slot's contributions only — not the
|
||||
// union of every ident on the RHS.
|
||||
_ => {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(child, code, &mut idents, &mut paths);
|
||||
let mut uses: SmallVec<[String; 4]> = SmallVec::new();
|
||||
for p in paths {
|
||||
uses.push(p);
|
||||
}
|
||||
for ident in idents {
|
||||
if !uses.iter().any(|u| u == &ident) {
|
||||
uses.push(ident);
|
||||
}
|
||||
}
|
||||
let source_cap = slot_source_cap(child);
|
||||
out.push(RhsArraySlot::Complex { uses, source_cap });
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Recursively collect every identifier that occurs inside `n`.
|
||||
///
|
||||
/// Recognises `identifier` (most languages), `variable_name` (PHP),
|
||||
/// `field_identifier` (Go), `property_identifier` (JS/TS), and
|
||||
/// `shorthand_property_identifier_pattern` (JS/TS destructuring).
|
||||
/// `shorthand_property_identifier` / `shorthand_property_identifier_pattern`
|
||||
/// (JS/TS object-literal shorthand uses and destructuring binding patterns).
|
||||
pub(crate) fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
|
||||
match n.kind() {
|
||||
"identifier"
|
||||
| "field_identifier"
|
||||
| "property_identifier"
|
||||
| "shorthand_property_identifier"
|
||||
| "shorthand_property_identifier_pattern"
|
||||
// PHP `name`: leaf node carrying the bare identifier text for
|
||||
// function/method names and similar grammar slots. Without this
|
||||
|
|
|
|||
|
|
@ -337,7 +337,7 @@ fn collect_ruby<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mu
|
|||
&& let Some(t) = text_of(c, code)
|
||||
{
|
||||
let leaf = t.rsplit("::").next().unwrap_or(&t).to_string();
|
||||
push(sub.clone(), leaf);
|
||||
push(sub, leaf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,140 @@
|
|||
use super::{
|
||||
ImportBinding, ImportBindings, PromisifyAlias, PromisifyAliases, member_expr_text, text_of,
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
/// File-local view of every JS/TS import binding: local-name → source-module
|
||||
/// specifier (verbatim from the `import` / `require` site, without `node:`
|
||||
/// stripping). Built once per CFG pass; consumed by the gated-label
|
||||
/// post-pass via [`crate::labels::ClassificationContext::local_imports`].
|
||||
///
|
||||
/// Records every binding regardless of aliasing (the legacy
|
||||
/// [`extract_import_bindings`] only preserves *renamed* bindings, which is
|
||||
/// not enough for Phase 05's `import { readFile } from 'fs/promises'`
|
||||
/// shape where `local_name == imported_name`).
|
||||
///
|
||||
/// Shares its top-level walk with [`crate::resolve::walk_js_top_level_imports`]
|
||||
/// so the import-clause / require-declarator parsing logic only lives in one
|
||||
/// place; this view simply discards the resolver verdict and side-effect-only
|
||||
/// markers.
|
||||
pub(super) fn extract_local_import_view(tree: &Tree, code: &[u8]) -> HashMap<String, String> {
|
||||
let mut out: HashMap<String, String> = HashMap::new();
|
||||
for raw in crate::resolve::walk_js_top_level_imports(tree, code) {
|
||||
if raw.local.is_empty() {
|
||||
continue;
|
||||
}
|
||||
out.insert(raw.local, raw.source_spec);
|
||||
}
|
||||
extend_with_promises_alias(tree, code, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
/// Recognise top-level `const fsp = fs.promises;` /
|
||||
/// `const fsp = require('fs').promises;` aliasing and add the new local
|
||||
/// name to the import view as `fs/promises` (or `node:fs/promises`,
|
||||
/// whichever the source binding spelt).
|
||||
///
|
||||
/// The Phase 05 `LabelGate::ImportedFromModule(&["fs/promises", ...])`
|
||||
/// only consults `local_imports[leading_identifier(callee)]`. Without
|
||||
/// this extension, `fsp.readFile(x)` evades the gate because `fsp`
|
||||
/// itself is not an import binding — only the underlying `fs`
|
||||
/// namespace is.
|
||||
fn extend_with_promises_alias(tree: &Tree, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
let root = tree.root_node();
|
||||
let mut top_cursor = root.walk();
|
||||
for child in root.children(&mut top_cursor) {
|
||||
if !matches!(child.kind(), "lexical_declaration" | "variable_declaration") {
|
||||
continue;
|
||||
}
|
||||
let mut decl_cursor = child.walk();
|
||||
for decl in child.children(&mut decl_cursor) {
|
||||
if decl.kind() != "variable_declarator" {
|
||||
continue;
|
||||
}
|
||||
let (Some(name_node), Some(value_node)) = (
|
||||
decl.child_by_field_name("name"),
|
||||
decl.child_by_field_name("value"),
|
||||
) else {
|
||||
continue;
|
||||
};
|
||||
if name_node.kind() != "identifier" {
|
||||
continue;
|
||||
}
|
||||
let Some(local_name) = text_of(name_node, code) else {
|
||||
continue;
|
||||
};
|
||||
if value_node.kind() != "member_expression" {
|
||||
continue;
|
||||
}
|
||||
let property = value_node
|
||||
.child_by_field_name("property")
|
||||
.and_then(|p| text_of(p, code));
|
||||
if property.as_deref() != Some("promises") {
|
||||
continue;
|
||||
}
|
||||
let Some(obj) = value_node.child_by_field_name("object") else {
|
||||
continue;
|
||||
};
|
||||
let Some(source) = promises_alias_source(obj, code, out) else {
|
||||
continue;
|
||||
};
|
||||
// Don't override an existing import entry for the same name —
|
||||
// an explicit import of `fsp` from `fs/promises` already says
|
||||
// what we'd be inferring here.
|
||||
out.entry(local_name).or_insert(source);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve the object side of a `<lhs> = <obj>.promises` member-expression
|
||||
/// to a source-module string when `<obj>` is a known `fs` binding.
|
||||
///
|
||||
/// Recognised shapes:
|
||||
/// - identifier `X` where `local_imports[X]` is `fs` or `node:fs`
|
||||
/// - `require('fs')` / `require("node:fs")` call expression
|
||||
fn promises_alias_source(
|
||||
obj: Node,
|
||||
code: &[u8],
|
||||
imports_so_far: &HashMap<String, String>,
|
||||
) -> Option<String> {
|
||||
match obj.kind() {
|
||||
"identifier" => {
|
||||
let id = text_of(obj, code)?;
|
||||
let module = imports_so_far.get(&id)?;
|
||||
map_fs_module_to_promises(module)
|
||||
}
|
||||
"call_expression" => {
|
||||
let func = obj.child_by_field_name("function")?;
|
||||
if text_of(func, code).as_deref() != Some("require") {
|
||||
return None;
|
||||
}
|
||||
let args = obj.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
if !matches!(arg.kind(), "string" | "template_string") {
|
||||
continue;
|
||||
}
|
||||
let raw = text_of(arg, code)?;
|
||||
let spec = raw.trim_matches(|c: char| c == '\'' || c == '"' || c == '`');
|
||||
return map_fs_module_to_promises(spec);
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn map_fs_module_to_promises(module: &str) -> Option<String> {
|
||||
if module.eq_ignore_ascii_case("fs") {
|
||||
Some("fs/promises".to_string())
|
||||
} else if module.eq_ignore_ascii_case("node:fs") {
|
||||
Some("node:fs/promises".to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Import binding extraction
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
@ -360,6 +492,129 @@ fn extract_require_module(node: Node, code: &[u8]) -> Option<String> {
|
|||
None
|
||||
}
|
||||
|
||||
/// Per-file Rust scan: did the file `use` a join-style macro from `tokio` or
|
||||
/// `futures`? Returns the crate prefix to use when the file calls a bare
|
||||
/// `join!` / `try_join!` macro.
|
||||
///
|
||||
/// Rationale: tree-sitter records `tokio::join!(...)` with a fully qualified
|
||||
/// `macro` field text, but `use tokio::join; ... join!(a, b)` records the
|
||||
/// bare leaf. Without this lookup, the SSA-level promise-combinator
|
||||
/// recogniser (`crate::labels::is_promise_combinator`) misses the bare form
|
||||
/// and the macro's argument taint is dropped. Conservative: returns `None`
|
||||
/// when both `tokio::<name>` and `futures::<name>` are imported (ambiguous)
|
||||
/// or when neither is, leaving the bare `join` callee alone.
|
||||
pub(super) fn rust_bare_join_crate_prefix(
|
||||
root: Node,
|
||||
code: &[u8],
|
||||
leaf: &str,
|
||||
) -> Option<&'static str> {
|
||||
if !matches!(leaf, "join" | "try_join") {
|
||||
return None;
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
let mut tokio_seen = false;
|
||||
let mut futures_seen = false;
|
||||
for child in root.children(&mut cursor) {
|
||||
if child.kind() != "use_declaration" {
|
||||
continue;
|
||||
}
|
||||
if rust_use_decl_imports_leaf(child, code, "tokio", leaf) {
|
||||
tokio_seen = true;
|
||||
}
|
||||
if rust_use_decl_imports_leaf(child, code, "futures", leaf) {
|
||||
futures_seen = true;
|
||||
}
|
||||
}
|
||||
match (tokio_seen, futures_seen) {
|
||||
(true, false) => Some("tokio"),
|
||||
(false, true) => Some("futures"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// True when `use_decl` brings `<crate_prefix>::<leaf>` into scope.
|
||||
///
|
||||
/// Recognises the common shapes:
|
||||
/// * `use tokio::join;` → leaf at the path tail
|
||||
/// * `use tokio::{join, select};` → leaf inside a use_list
|
||||
/// * `use tokio::join as my_join;` → aliased; we detect the
|
||||
/// original path even though the aliased name is unused (the macro is
|
||||
/// typically invoked under its alias, but if the alias and the bare form
|
||||
/// collide the rewrite is still safe).
|
||||
/// * `use tokio::*;` is NOT recognised — wildcard imports are too permissive
|
||||
/// for the bare-leaf rewrite to stay precise.
|
||||
fn rust_use_decl_imports_leaf(use_decl: Node, code: &[u8], crate_prefix: &str, leaf: &str) -> bool {
|
||||
let mut stack = vec![use_decl];
|
||||
while let Some(node) = stack.pop() {
|
||||
match node.kind() {
|
||||
// `use tokio::join;` — argument is a `scoped_identifier`.
|
||||
"scoped_identifier" => {
|
||||
if scoped_identifier_matches(node, code, crate_prefix, leaf) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// `use tokio::{join, select};` — the `path` field is `tokio`,
|
||||
// and a `use_list` enumerates leaves.
|
||||
"scoped_use_list" => {
|
||||
let path_ok = node
|
||||
.child_by_field_name("path")
|
||||
.and_then(|p| text_of(p, code))
|
||||
.as_deref()
|
||||
== Some(crate_prefix);
|
||||
if path_ok && let Some(list) = node.child_by_field_name("list") {
|
||||
let mut lc = list.walk();
|
||||
for entry in list.named_children(&mut lc) {
|
||||
match entry.kind() {
|
||||
"identifier" if text_of(entry, code).as_deref() == Some(leaf) => {
|
||||
return true;
|
||||
}
|
||||
"use_as_clause"
|
||||
if entry
|
||||
.child_by_field_name("path")
|
||||
.and_then(|p| text_of(p, code))
|
||||
.as_deref()
|
||||
== Some(leaf) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// `use tokio::join as my_join;` — aliased clause sits directly
|
||||
// under the use_declaration; check the path side.
|
||||
"use_as_clause" => {
|
||||
if let Some(p) = node.child_by_field_name("path")
|
||||
&& p.kind() == "scoped_identifier"
|
||||
&& scoped_identifier_matches(p, code, crate_prefix, leaf)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Walk children for nested groups (`use a::{b::{c, d}}`).
|
||||
let mut c = node.walk();
|
||||
for ch in node.children(&mut c) {
|
||||
stack.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn scoped_identifier_matches(node: Node, code: &[u8], crate_prefix: &str, leaf: &str) -> bool {
|
||||
let path_text = node
|
||||
.child_by_field_name("path")
|
||||
.and_then(|p| text_of(p, code));
|
||||
let leaf_text = node
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| text_of(n, code));
|
||||
matches!((path_text.as_deref(), leaf_text.as_deref()),
|
||||
(Some(p), Some(l)) if p == crate_prefix && l == leaf)
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// === PUBLIC ENTRY POINT =================================================
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -1,22 +1,45 @@
|
|||
use super::conditions::unwrap_parens;
|
||||
use super::helpers::{collect_array_pattern_bindings_indexed, collect_rhs_array_literal_elements};
|
||||
use super::{
|
||||
anon_fn_name, collect_idents, collect_idents_with_paths, find_constructor_type_child,
|
||||
first_call_ident, root_receiver_text, text_of,
|
||||
};
|
||||
use crate::labels::{Cap, Kind, lookup};
|
||||
use smallvec::SmallVec;
|
||||
use tree_sitter::Node;
|
||||
|
||||
/// Find the inner CallFn/CallMethod/CallMacro node within an AST node.
|
||||
/// For direct call nodes, returns the node itself. For wrappers, searches
|
||||
/// up to two levels of children.
|
||||
/// up to two levels of children, transparently descending through
|
||||
/// `await_expression` / `yield_expression` (`Kind::AwaitForward`) wrappers
|
||||
/// so `const x = await foo(y)` reaches the inner `call_expression` at
|
||||
/// effective depth 3 (`lexical_declaration > variable_declarator >
|
||||
/// await_expression > call_expression`).
|
||||
pub(super) fn find_call_node<'a>(n: Node<'a>, lang: &str) -> Option<Node<'a>> {
|
||||
match lookup(lang, n.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => Some(n),
|
||||
Kind::AwaitForward => {
|
||||
// Transparent wrapper: descend into the awaited expression.
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
if let Some(found) = find_call_node(c, lang) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => {
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
match lookup(lang, c.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return Some(c),
|
||||
// Skip past await/yield wrappers without consuming a
|
||||
// recursion level — the wrapper itself is transparent.
|
||||
Kind::AwaitForward => {
|
||||
if let Some(found) = find_call_node(c, lang) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
|
@ -25,11 +48,14 @@ pub(super) fn find_call_node<'a>(n: Node<'a>, lang: &str) -> Option<Node<'a>> {
|
|||
for c in n.children(&mut cursor2) {
|
||||
let mut cursor3 = c.walk();
|
||||
for gc in c.children(&mut cursor3) {
|
||||
if matches!(
|
||||
lookup(lang, gc.kind()),
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro
|
||||
) {
|
||||
return Some(gc);
|
||||
match lookup(lang, gc.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return Some(gc),
|
||||
Kind::AwaitForward => {
|
||||
if let Some(found) = find_call_node(gc, lang) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -108,9 +134,43 @@ pub(super) fn extract_destination_field_pairs(
|
|||
raw
|
||||
}
|
||||
}),
|
||||
// Computed keys like `[someVar]` can't be statically
|
||||
// resolved, skip (conservative: not a destination field).
|
||||
"computed_property_name" => continue,
|
||||
// Computed keys: resolve only when the inner expression
|
||||
// is a pure string literal (`['url']`). Dynamic forms
|
||||
// (`[someVar]`, `[`url-${i}`]`, ``[`url`]`` with
|
||||
// interpolation) stay conservative-skip.
|
||||
"computed_property_name" => {
|
||||
let mut inner_cursor = key_node.walk();
|
||||
let inner = key_node.named_children(&mut inner_cursor).find(|c| {
|
||||
!matches!(c.kind(), "comment" | "block_comment" | "line_comment")
|
||||
});
|
||||
match inner.map(|n| (n.kind(), n)) {
|
||||
Some(("string" | "string_literal", n)) => text_of(n, code).map(|raw| {
|
||||
if raw.len() >= 2 {
|
||||
raw[1..raw.len() - 1].to_string()
|
||||
} else {
|
||||
raw
|
||||
}
|
||||
}),
|
||||
// Template strings only when no interpolation
|
||||
// (no `template_substitution` children).
|
||||
Some(("template_string", n))
|
||||
if {
|
||||
let mut tc = n.walk();
|
||||
!n.named_children(&mut tc)
|
||||
.any(|c| c.kind() == "template_substitution")
|
||||
} =>
|
||||
{
|
||||
text_of(n, code).map(|raw| {
|
||||
if raw.len() >= 2 {
|
||||
raw[1..raw.len() - 1].to_string()
|
||||
} else {
|
||||
raw
|
||||
}
|
||||
})
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
_ => text_of(key_node, code),
|
||||
};
|
||||
let Some(key) = key_text else {
|
||||
|
|
@ -144,6 +204,13 @@ pub(super) fn extract_destination_field_pairs(
|
|||
/// `requests.post(url, data=tainted, json=safe)` where `data` and `json` are
|
||||
/// `keyword_argument` siblings of the positional URL.
|
||||
///
|
||||
/// Also covers Ruby, where tree-sitter-ruby emits `pair` nodes (with
|
||||
/// `key`/`value` fields) directly under `argument_list` for the
|
||||
/// `Faraday.new(url: x)` / `Net::HTTP.start(host, port, proxy_addr: prx)`
|
||||
/// kwarg shape. The `key` is typically a `hash_key_symbol` whose text is the
|
||||
/// bare identifier (`url`); `simple_symbol` (`:url`) and string keys are
|
||||
/// normalised by stripping a leading `:` or wrapping quotes.
|
||||
///
|
||||
/// Returns the union of matching kwargs, preserving the kwarg name in the
|
||||
/// `field` slot so callers can still attribute findings per-field. Empty
|
||||
/// when no matching kwargs exist or the call has no `arguments` field.
|
||||
|
|
@ -162,22 +229,38 @@ pub(super) fn extract_destination_kwarg_pairs(
|
|||
let mut cursor = args_node.walk();
|
||||
for child in args_node.named_children(&mut cursor) {
|
||||
let kind = child.kind();
|
||||
if kind != "keyword_argument" && kind != "named_argument" {
|
||||
let (name_node, value_node) = if kind == "keyword_argument" || kind == "named_argument" {
|
||||
let named_count = child.named_child_count();
|
||||
(
|
||||
child
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| child.named_child(0)),
|
||||
child
|
||||
.child_by_field_name("value")
|
||||
.or_else(|| child.named_child(named_count.saturating_sub(1) as u32)),
|
||||
)
|
||||
} else if kind == "pair" {
|
||||
// Ruby `pair` node sits directly under `argument_list` for
|
||||
// kwarg-style call args (`f(url: x)`). `key`/`value` fields
|
||||
// are populated; key text is `hash_key_symbol` ("url"),
|
||||
// `simple_symbol` (":url"), or a string literal.
|
||||
(
|
||||
child.child_by_field_name("key"),
|
||||
child.child_by_field_name("value"),
|
||||
)
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
let named_count = child.named_child_count();
|
||||
let name_node = child
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| child.named_child(0));
|
||||
let value_node = child
|
||||
.child_by_field_name("value")
|
||||
.or_else(|| child.named_child(named_count.saturating_sub(1) as u32));
|
||||
};
|
||||
let (Some(nn), Some(vn)) = (name_node, value_node) else {
|
||||
continue;
|
||||
};
|
||||
let Some(name) = text_of(nn, code) else {
|
||||
let Some(name_raw) = text_of(nn, code) else {
|
||||
continue;
|
||||
};
|
||||
let name = name_raw
|
||||
.trim_start_matches(':')
|
||||
.trim_matches(['"', '\''])
|
||||
.to_string();
|
||||
if !fields.iter().any(|&f| f == name) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -387,11 +470,9 @@ pub(super) fn extract_const_macro_arg(
|
|||
// C/C++ identifier / PHP `name` node for define-style constants.
|
||||
// Scoped C++ identifiers (`Curl::OPT_POSTFIELDS`) and PHP namespaced
|
||||
// names also surface here so the dangerous_values match catches them.
|
||||
"identifier" | "name" | "qualified_name" | "scoped_identifier" => {
|
||||
text_of(arg, code).map(|s| s.to_string())
|
||||
}
|
||||
"identifier" | "name" | "qualified_name" | "scoped_identifier" => text_of(arg, code),
|
||||
// Ruby bare constant (`NOENT`) — leaf form.
|
||||
"constant" => text_of(arg, code).map(|s| s.to_string()),
|
||||
"constant" => text_of(arg, code),
|
||||
// Ruby scope-qualified constant (`Nokogiri::XML::ParseOptions::NOENT`).
|
||||
// Return only the rightmost `name` segment so the gate's
|
||||
// `dangerous_values` list can stay identifier-bare instead of
|
||||
|
|
@ -400,8 +481,7 @@ pub(super) fn extract_const_macro_arg(
|
|||
"scope_resolution" => arg
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| text_of(n, code))
|
||||
.map(|s| s.to_string())
|
||||
.or_else(|| text_of(arg, code).map(|s| s.to_string())),
|
||||
.or_else(|| text_of(arg, code)),
|
||||
// Integer literals at the activation arg position. PHP / C / C++
|
||||
// commonly use plain `0` to opt into the safe-default option set
|
||||
// (e.g. `simplexml_load_string($xml, "SimpleXMLElement", 0)`). The
|
||||
|
|
@ -409,7 +489,7 @@ pub(super) fn extract_const_macro_arg(
|
|||
// the literal text lets the comparison fail against `LIBXML_NOENT`
|
||||
// and suppresses the conservative-fire branch.
|
||||
"integer" | "integer_literal" | "number_literal" | "decimal_integer_literal" => {
|
||||
text_of(arg, code).map(|s| s.to_string())
|
||||
text_of(arg, code)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
|
|
@ -443,7 +523,7 @@ pub(super) fn extract_const_keyword_arg(
|
|||
// distinguish literal-safe from dynamic.
|
||||
return match value_node.kind() {
|
||||
"true" | "false" | "none" | "integer" | "float" | "string" | "string_literal"
|
||||
| "identifier" => text_of(value_node, code).map(|s| s.to_string()),
|
||||
| "identifier" => text_of(value_node, code),
|
||||
_ => None,
|
||||
}
|
||||
.filter(|_| {
|
||||
|
|
@ -537,7 +617,7 @@ pub(super) fn extract_object_arg_property(
|
|||
let val_node = unwrap_parens(val_node);
|
||||
return match val_node.kind() {
|
||||
"true" | "false" | "null" | "undefined" | "number" | "string" | "string_literal" => {
|
||||
text_of(val_node, code).map(|s| s.to_string())
|
||||
text_of(val_node, code)
|
||||
}
|
||||
// JS booleans true/false are their own node kinds (above), but
|
||||
// some grammar versions wrap them as identifier literals; surface
|
||||
|
|
@ -811,7 +891,7 @@ pub(super) fn js_chain_outer_method_for_inner<'a>(
|
|||
if inner_matched {
|
||||
return function
|
||||
.child_by_field_name("property")
|
||||
.and_then(|p| text_of(p, code).map(|s| s.to_string()));
|
||||
.and_then(|p| text_of(p, code));
|
||||
}
|
||||
}
|
||||
// Recurse: outer chain may have more depth (`a.b().c().d()` ,
|
||||
|
|
@ -1518,6 +1598,18 @@ pub(super) fn extract_arg_uses(call_node: Node, code: &[u8]) -> Vec<Vec<String>>
|
|||
return result;
|
||||
}
|
||||
|
||||
// Rust `tokio::join!` / `futures::join!` (and their `try_*` variants).
|
||||
// tree-sitter-rust models macro args as a `token_tree` rather than an
|
||||
// `arguments` field, so a vanilla extraction returns nothing. Walk the
|
||||
// top-level token_tree splitting on `,` separators, lifting identifiers
|
||||
// out of each chunk so the existing PromiseCombinator transfer can union
|
||||
// arg-side taint into the resulting tuple value.
|
||||
if call_node.kind() == "macro_invocation"
|
||||
&& let Some(arg_uses) = extract_rust_macro_join_arg_uses(call_node, code)
|
||||
{
|
||||
return arg_uses;
|
||||
}
|
||||
|
||||
let Some(args_node) = call_node.child_by_field_name("arguments") else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
|
@ -1551,6 +1643,82 @@ pub(super) fn extract_arg_uses(call_node: Node, code: &[u8]) -> Vec<Vec<String>>
|
|||
result
|
||||
}
|
||||
|
||||
/// `tokio::join!` / `futures::join!` (and their `try_*` variants) bundle
|
||||
/// concurrently-awaited futures into a tuple result. tree-sitter-rust
|
||||
/// represents the args as a `token_tree` whose children alternate between
|
||||
/// expressions and `,` separators (`token_tree` itself nests on every
|
||||
/// parenthesised group, e.g. the `(x)` inside `fetch(x)`). Walk the
|
||||
/// top-level token_tree, segment by `,` leaves, and lift identifiers out
|
||||
/// of each chunk so the SSA Call op carries one positional arg per future.
|
||||
///
|
||||
/// Returns `Some(arg_uses)` only when the macro is one of the recognised
|
||||
/// join macros, so `extract_arg_uses` can fall through to its normal
|
||||
/// `arguments`-field path for every other macro shape (`format!`,
|
||||
/// `println!`, custom DSL macros) where arg lifting could disturb existing
|
||||
/// label / SSA flow.
|
||||
pub(super) fn extract_rust_macro_join_arg_uses(
|
||||
call_node: Node,
|
||||
code: &[u8],
|
||||
) -> Option<Vec<Vec<String>>> {
|
||||
let macro_node = call_node.child_by_field_name("macro")?;
|
||||
let macro_text = text_of(macro_node, code)?;
|
||||
if !is_rust_join_macro(¯o_text) {
|
||||
return None;
|
||||
}
|
||||
let tt = match call_node.child_by_field_name("token_tree") {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
let mut cursor = call_node.walk();
|
||||
call_node
|
||||
.children(&mut cursor)
|
||||
.find(|c| c.kind() == "token_tree")?
|
||||
}
|
||||
};
|
||||
let mut chunks: Vec<Vec<Node>> = vec![Vec::new()];
|
||||
let mut cursor = tt.walk();
|
||||
for child in tt.children(&mut cursor) {
|
||||
// Skip the surrounding `(`/`)` punctuation.
|
||||
if !child.is_named() {
|
||||
let kind = child.kind();
|
||||
if kind == "," {
|
||||
chunks.push(Vec::new());
|
||||
continue;
|
||||
}
|
||||
if kind == "(" || kind == ")" {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
chunks.last_mut().unwrap().push(child);
|
||||
}
|
||||
let mut result = Vec::new();
|
||||
for chunk in chunks {
|
||||
if chunk.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
for n in chunk {
|
||||
collect_idents_with_paths(n, code, &mut idents, &mut paths);
|
||||
}
|
||||
let mut combined = paths;
|
||||
combined.extend(idents);
|
||||
result.push(combined);
|
||||
}
|
||||
Some(result)
|
||||
}
|
||||
|
||||
fn is_rust_join_macro(macro_text: &str) -> bool {
|
||||
matches!(
|
||||
macro_text,
|
||||
"tokio::join"
|
||||
| "tokio::try_join"
|
||||
| "futures::join"
|
||||
| "futures::try_join"
|
||||
| "join"
|
||||
| "try_join"
|
||||
)
|
||||
}
|
||||
|
||||
/// Extract keyword / named argument bindings for a call node.
|
||||
///
|
||||
/// Returns `Vec<(name, uses)>` where `uses` are the identifier references
|
||||
|
|
@ -1891,11 +2059,31 @@ pub(super) fn call_ident_of<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Opti
|
|||
.child_by_field_name("method")
|
||||
.or_else(|| n.child_by_field_name("name"))
|
||||
.and_then(|f| text_of(f, code));
|
||||
let recv = n
|
||||
let recv_node = n
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| n.child_by_field_name("receiver"))
|
||||
.or_else(|| n.child_by_field_name("scope"))
|
||||
.and_then(|f| root_receiver_text(f, lang, code));
|
||||
.or_else(|| n.child_by_field_name("scope"));
|
||||
let recv = recv_node.and_then(|f| root_receiver_text(f, lang, code));
|
||||
// Preserve Java `.getClass()` segment in the chained callee text
|
||||
// so downstream predicates (e.g.
|
||||
// [`crate::ssa::type_facts::is_safe_string_producing_callee`])
|
||||
// can recognise idiomatic `obj.getClass().<accessor>()` chains.
|
||||
// Without this, `root_receiver_text` collapses the chain to
|
||||
// `obj.<accessor>`, indistinguishable from a user-defined method.
|
||||
let recv = if lang == "java"
|
||||
&& let Some(rn) = recv_node
|
||||
&& lookup(lang, rn.kind()) == Kind::CallMethod
|
||||
&& let Some(inner_method) = rn
|
||||
.child_by_field_name("method")
|
||||
.or_else(|| rn.child_by_field_name("name"))
|
||||
.and_then(|f| text_of(f, code))
|
||||
&& inner_method == "getClass"
|
||||
&& let Some(r) = recv
|
||||
{
|
||||
Some(format!("{r}.getClass"))
|
||||
} else {
|
||||
recv
|
||||
};
|
||||
match (recv, func) {
|
||||
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
|
||||
(_, Some(f)) => Some(f),
|
||||
|
|
@ -1984,7 +2172,7 @@ pub(super) fn extract_arg_string_literals(call_node: Node, code: &[u8]) -> Vec<O
|
|||
| "integer"
|
||||
| "number"
|
||||
| "number_literal"
|
||||
| "decimal_literal" => text_of(target, code).map(|s| s.to_string()),
|
||||
| "decimal_literal" => text_of(target, code),
|
||||
_ => None,
|
||||
};
|
||||
result.push(literal);
|
||||
|
|
@ -2003,7 +2191,7 @@ pub(super) fn strip_literal_quotes(raw: &str, node: Node, code: &[u8]) -> Option
|
|||
let mut cursor = node.walk();
|
||||
for child in node.named_children(&mut cursor) {
|
||||
if child.kind() == "string_content" {
|
||||
return text_of(child, code).map(|s| s.to_string());
|
||||
return text_of(child, code);
|
||||
}
|
||||
}
|
||||
if raw.len() >= 2 {
|
||||
|
|
@ -2044,20 +2232,43 @@ pub(super) fn extract_arg_callees(call_node: Node, lang: &str, code: &[u8]) -> V
|
|||
result
|
||||
}
|
||||
|
||||
/// Return `(defines, uses)` for the AST fragment `ast`.
|
||||
/// Returns (defines, uses, extra_defines) where extra_defines captures additional
|
||||
/// bindings from destructuring patterns beyond the primary define.
|
||||
/// Return `(defines, uses, extra_defines, array_pattern_indices,
|
||||
/// rhs_array_elements)` for the AST fragment `ast`.
|
||||
///
|
||||
/// `extra_defines` captures additional bindings from destructuring patterns
|
||||
/// beyond the primary define. `array_pattern_indices`, when non-empty, gives
|
||||
/// the source-order position of each binding in `iter::once(defines).chain(
|
||||
/// extra_defines)` for `array_pattern` / `tuple_pattern` LHS shapes. Empty
|
||||
/// for non-array destructures and for non-skip array patterns where callers
|
||||
/// can derive sequential 0..N indices implicitly.
|
||||
///
|
||||
/// `rhs_array_elements`, when non-empty, gives source-order RHS slots for
|
||||
/// destructure-from-array-literal shapes (`const [a, b] = [safe, tainted]`,
|
||||
/// `let (a, b) = (safe, tainted)`, Python `a, b = safe, tainted`). Each slot
|
||||
/// is `Some(ident)` for a bare-ident element or `None` for a syntactic
|
||||
/// literal. Empty when RHS isn't an array-literal shape or any element is
|
||||
/// too complex; callers fall back to scalar union in that case.
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub(super) fn def_use(
|
||||
ast: Node,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
) -> (Option<String>, Vec<String>, Vec<String>) {
|
||||
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
|
||||
) -> (
|
||||
Option<String>,
|
||||
Vec<String>,
|
||||
Vec<String>,
|
||||
SmallVec<[usize; 4]>,
|
||||
SmallVec<[crate::cfg::RhsArraySlot; 4]>,
|
||||
) {
|
||||
match lookup(lang, ast.kind()) {
|
||||
// Declaration wrappers (let, var, short_var_declaration, etc.)
|
||||
Kind::CallWrapper => {
|
||||
let mut defs = None;
|
||||
let mut extra_defs = Vec::new();
|
||||
let mut uses = Vec::new();
|
||||
let mut pattern_indices: SmallVec<[usize; 4]> = SmallVec::new();
|
||||
let mut rhs_array_elements: SmallVec<[crate::cfg::RhsArraySlot; 4]> = SmallVec::new();
|
||||
|
||||
// Try direct field names first (Rust `let_declaration`, Go `short_var_declaration`)
|
||||
let def_node = ast
|
||||
|
|
@ -2076,17 +2287,30 @@ pub(super) fn def_use(
|
|||
|
||||
if def_node.is_some() || val_node.is_some() {
|
||||
if let Some(pat) = def_node {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(pat, code, &mut idents, &mut paths);
|
||||
let first = paths.pop().or_else(|| idents.first().cloned());
|
||||
// Remaining idents are extra defines (for destructuring)
|
||||
for ident in &idents {
|
||||
if first.as_ref() != Some(ident) {
|
||||
extra_defs.push(ident.clone());
|
||||
let bindings = collect_array_pattern_bindings_indexed(pat, code);
|
||||
if !bindings.is_empty() {
|
||||
let mut iter = bindings.into_iter();
|
||||
if let Some((first_name, first_idx)) = iter.next() {
|
||||
defs = Some(first_name);
|
||||
pattern_indices.push(first_idx);
|
||||
}
|
||||
for (name, idx) in iter {
|
||||
extra_defs.push(name);
|
||||
pattern_indices.push(idx);
|
||||
}
|
||||
} else {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(pat, code, &mut idents, &mut paths);
|
||||
let first = paths.pop().or_else(|| idents.first().cloned());
|
||||
// Remaining idents are extra defines (for destructuring)
|
||||
for ident in &idents {
|
||||
if first.as_ref() != Some(ident) {
|
||||
extra_defs.push(ident.clone());
|
||||
}
|
||||
}
|
||||
defs = first;
|
||||
}
|
||||
defs = first;
|
||||
}
|
||||
if let Some(val) = val_node {
|
||||
let mut idents = Vec::new();
|
||||
|
|
@ -2099,6 +2323,14 @@ pub(super) fn def_use(
|
|||
// the format-string bytes, not as a separate AST
|
||||
// argument node, so collect_idents misses it.
|
||||
uses.extend(extract_rust_format_macro_named_idents_in(val, code));
|
||||
// When the LHS is a recognised destructure pattern AND
|
||||
// the RHS is a bare array-literal shape (no call), record
|
||||
// per-element idents so the SSA destructure rewrite can
|
||||
// map each binding to its specific RHS slot.
|
||||
if !pattern_indices.is_empty() {
|
||||
rhs_array_elements =
|
||||
collect_rhs_array_literal_elements(val, lang, code, extra_labels);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Try nested declarator pattern (JS/TS `lexical_declaration` → `variable_declarator`,
|
||||
|
|
@ -2135,16 +2367,29 @@ pub(super) fn def_use(
|
|||
if let Some(name_node) = child_name
|
||||
&& defs.is_none()
|
||||
{
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(name_node, code, &mut idents, &mut paths);
|
||||
let first = paths.pop().or_else(|| idents.first().cloned());
|
||||
for ident in &idents {
|
||||
if first.as_ref() != Some(ident) {
|
||||
extra_defs.push(ident.clone());
|
||||
let bindings = collect_array_pattern_bindings_indexed(name_node, code);
|
||||
if !bindings.is_empty() {
|
||||
let mut iter = bindings.into_iter();
|
||||
if let Some((first_name, first_idx)) = iter.next() {
|
||||
defs = Some(first_name);
|
||||
pattern_indices.push(first_idx);
|
||||
}
|
||||
for (name, idx) in iter {
|
||||
extra_defs.push(name);
|
||||
pattern_indices.push(idx);
|
||||
}
|
||||
} else {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(name_node, code, &mut idents, &mut paths);
|
||||
let first = paths.pop().or_else(|| idents.first().cloned());
|
||||
for ident in &idents {
|
||||
if first.as_ref() != Some(ident) {
|
||||
extra_defs.push(ident.clone());
|
||||
}
|
||||
}
|
||||
defs = first;
|
||||
}
|
||||
defs = first;
|
||||
}
|
||||
if let Some(val_node) = child_value {
|
||||
let mut idents = Vec::new();
|
||||
|
|
@ -2153,6 +2398,14 @@ pub(super) fn def_use(
|
|||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
uses.extend(extract_rust_format_macro_named_idents_in(val_node, code));
|
||||
if !pattern_indices.is_empty() && rhs_array_elements.is_empty() {
|
||||
rhs_array_elements = collect_rhs_array_literal_elements(
|
||||
val_node,
|
||||
lang,
|
||||
code,
|
||||
extra_labels,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2168,19 +2421,42 @@ pub(super) fn def_use(
|
|||
uses.extend(extract_rust_format_macro_named_idents_in(ast, code));
|
||||
}
|
||||
}
|
||||
(defs, uses, extra_defs)
|
||||
(defs, uses, extra_defs, pattern_indices, rhs_array_elements)
|
||||
}
|
||||
|
||||
// Plain assignment `x = y`
|
||||
// Plain assignment `x = y` or destructuring assignment such as
|
||||
// Python `a, b = await asyncio.gather(...)` whose LHS surfaces as
|
||||
// a `pattern_list` / `tuple_pattern`. When the LHS is a
|
||||
// destructure pattern that the indexed helper recognises, the
|
||||
// primary binding lands in `defs`, the rest land in `extra_defs`,
|
||||
// and `pattern_indices` carries source-order positions so the
|
||||
// SSA lowering's destructure-promise rewrite can paint each
|
||||
// binding from the matching combinator argument.
|
||||
Kind::Assignment => {
|
||||
let mut defs = None;
|
||||
let mut extra_defs = Vec::new();
|
||||
let mut pattern_indices: SmallVec<[usize; 4]> = SmallVec::new();
|
||||
let mut rhs_array_elements: SmallVec<[crate::cfg::RhsArraySlot; 4]> = SmallVec::new();
|
||||
let mut uses = Vec::new();
|
||||
if let Some(lhs) = ast.child_by_field_name("left") {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(lhs, code, &mut idents, &mut paths);
|
||||
// Prefer dotted path (member expression) over last ident
|
||||
defs = paths.pop().or_else(|| idents.pop());
|
||||
let bindings = collect_array_pattern_bindings_indexed(lhs, code);
|
||||
if !bindings.is_empty() {
|
||||
let mut iter = bindings.into_iter();
|
||||
if let Some((first_name, first_idx)) = iter.next() {
|
||||
defs = Some(first_name);
|
||||
pattern_indices.push(first_idx);
|
||||
}
|
||||
for (name, idx) in iter {
|
||||
extra_defs.push(name);
|
||||
pattern_indices.push(idx);
|
||||
}
|
||||
} else {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(lhs, code, &mut idents, &mut paths);
|
||||
// Prefer dotted path (member expression) over last ident
|
||||
defs = paths.pop().or_else(|| idents.pop());
|
||||
}
|
||||
}
|
||||
if let Some(rhs) = ast.child_by_field_name("right") {
|
||||
let mut idents = Vec::new();
|
||||
|
|
@ -2189,8 +2465,16 @@ pub(super) fn def_use(
|
|||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
uses.extend(extract_rust_format_macro_named_idents_in(rhs, code));
|
||||
// When the LHS is a recognised destructure pattern AND the
|
||||
// RHS is a bare array-literal shape, record per-element
|
||||
// idents so the SSA destructure rewrite can map each
|
||||
// binding to its specific RHS slot.
|
||||
if !pattern_indices.is_empty() {
|
||||
rhs_array_elements =
|
||||
collect_rhs_array_literal_elements(rhs, lang, code, extra_labels);
|
||||
}
|
||||
}
|
||||
(defs, uses, vec![])
|
||||
(defs, uses, extra_defs, pattern_indices, rhs_array_elements)
|
||||
}
|
||||
|
||||
// if‑let / while‑let, the `let_condition` binds a variable from
|
||||
|
|
@ -2215,7 +2499,7 @@ pub(super) fn def_use(
|
|||
if let Some(val) = c.child_by_field_name("value") {
|
||||
collect_idents(val, code, &mut uses);
|
||||
}
|
||||
return (defs, uses, vec![]);
|
||||
return (defs, uses, vec![], SmallVec::new(), SmallVec::new());
|
||||
}
|
||||
|
||||
let mut idents = Vec::new();
|
||||
|
|
@ -2223,7 +2507,7 @@ pub(super) fn def_use(
|
|||
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
|
||||
let mut uses = paths;
|
||||
uses.extend(idents);
|
||||
(None, uses, vec![])
|
||||
(None, uses, vec![], SmallVec::new(), SmallVec::new())
|
||||
}
|
||||
|
||||
// for-in / for-of / Python `for x in iter:` ─────────────────────────
|
||||
|
|
@ -2267,7 +2551,7 @@ pub(super) fn def_use(
|
|||
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
|
||||
let mut uses = paths;
|
||||
uses.extend(idents);
|
||||
return (None, uses, vec![]);
|
||||
return (None, uses, vec![], SmallVec::new(), SmallVec::new());
|
||||
}
|
||||
|
||||
let mut defs: Option<String> = None;
|
||||
|
|
@ -2293,7 +2577,7 @@ pub(super) fn def_use(
|
|||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
}
|
||||
(defs, uses, extra_defs)
|
||||
(defs, uses, extra_defs, SmallVec::new(), SmallVec::new())
|
||||
}
|
||||
|
||||
// everything else – no definition, but may read vars
|
||||
|
|
@ -2303,7 +2587,7 @@ pub(super) fn def_use(
|
|||
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
|
||||
let mut uses = paths;
|
||||
uses.extend(idents);
|
||||
(None, uses, vec![])
|
||||
(None, uses, vec![], SmallVec::new(), SmallVec::new())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
1236
src/cfg/mod.rs
1236
src/cfg/mod.rs
File diff suppressed because it is too large
Load diff
882
src/cfg/safe_fields.rs
Normal file
882
src/cfg/safe_fields.rs
Normal file
|
|
@ -0,0 +1,882 @@
|
|||
//! Per-file extraction of class fields whose `.get(...)` lookups are
|
||||
//! provably safe.
|
||||
//!
|
||||
//! Recognises Java `final` fields whose initializer is `Map.of(K1, V1,
|
||||
//! K2, V2, ...)` with all string-literal arguments. At a downstream
|
||||
//! `<FIELD>.get(taintedKey)` call the result is bounded to the literal
|
||||
//! value set, so the SSA taint engine can suppress propagation from the
|
||||
//! key to the result. Without this pre-pass the engine sees `<FIELD>`
|
||||
//! as a free identifier with no SSA value, fails to resolve the
|
||||
//! container, and falls back to default arg-to-result propagation.
|
||||
//!
|
||||
//! Strictly additive: unrecognised initializer shapes (factory chains,
|
||||
//! `Map.ofEntries`, builders) produce no entry and the engine keeps
|
||||
//! its prior behaviour.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use tree_sitter::Node;
|
||||
|
||||
use super::helpers::text_of;
|
||||
|
||||
thread_local! {
|
||||
/// Per-file safe-lookup field map published by [`with_safe_lookup_fields`]
|
||||
/// around taint passes that need it. The SSA taint engine's container
|
||||
/// Load fallback consults this view via [`safe_lookup_field_values`] when
|
||||
/// the receiver is a free identifier (no SSA value to resolve against).
|
||||
static SAFE_LOOKUP_FIELDS_TLS: RefCell<Option<HashMap<String, Vec<String>>>> =
|
||||
const { RefCell::new(None) };
|
||||
}
|
||||
|
||||
/// Run `f` with `fields` published as the per-thread safe-lookup view.
|
||||
/// Restores the prior value on drop so nested calls compose; pass `None`
|
||||
/// to suppress the gate for callers that lack a file context.
|
||||
pub fn with_safe_lookup_fields<R>(
|
||||
fields: Option<&HashMap<String, Vec<String>>>,
|
||||
f: impl FnOnce() -> R,
|
||||
) -> R {
|
||||
let prev = SAFE_LOOKUP_FIELDS_TLS.with(|cell| {
|
||||
cell.borrow_mut()
|
||||
.replace(fields.cloned().unwrap_or_default())
|
||||
});
|
||||
let restore_to = if fields.is_some() { prev } else { None };
|
||||
struct Guard(Option<HashMap<String, Vec<String>>>);
|
||||
impl Drop for Guard {
|
||||
fn drop(&mut self) {
|
||||
SAFE_LOOKUP_FIELDS_TLS.with(|cell| *cell.borrow_mut() = self.0.take());
|
||||
}
|
||||
}
|
||||
let _guard = Guard(restore_to);
|
||||
f()
|
||||
}
|
||||
|
||||
/// Look up the literal value set for a safe field. Returns `None` when
|
||||
/// no view is published, the field is not a known safe lookup, or the
|
||||
/// value list is empty.
|
||||
pub fn safe_lookup_field_values(name: &str) -> Option<Vec<String>> {
|
||||
SAFE_LOOKUP_FIELDS_TLS.with(|cell| {
|
||||
let borrowed = cell.borrow();
|
||||
let map = borrowed.as_ref()?;
|
||||
let values = map.get(name)?;
|
||||
if values.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(values.clone())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Per-file safe-lookup field map: field name → finite set of literal
|
||||
/// values that `<field>.get(...)` may return. Empty for non-Java files.
|
||||
pub fn collect_safe_lookup_fields(
|
||||
root: Node<'_>,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
) -> HashMap<String, Vec<String>> {
|
||||
let mut out: HashMap<String, Vec<String>> = HashMap::new();
|
||||
if lang == "java" {
|
||||
collect_java(root, code, &mut out);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Per-file file-level constant scalar map: name → literal value text.
|
||||
///
|
||||
/// Recognises declarations that bind a name to a primitive scalar literal at
|
||||
/// file or class scope, where the per-function SSA const-prop has no view of
|
||||
/// the binding (the name is a free identifier from inside any function body):
|
||||
///
|
||||
/// - Java: `static final TYPE NAME = LITERAL;` fields (any class depth).
|
||||
/// - Python: `NAME = LITERAL` at module scope.
|
||||
/// - Go: `const NAME = LITERAL` and `const NAME TYPE = LITERAL` at package scope.
|
||||
/// - Rust: `const NAME: TYPE = LITERAL;` and `static NAME: TYPE = LITERAL;` at
|
||||
/// crate or module scope.
|
||||
///
|
||||
/// Used by `cfg_analysis::guards` to suppress `cfg-unguarded-sink` when a
|
||||
/// sink's argument is one of these bindings. `LITERAL` covers strings (no
|
||||
/// interpolation), integers in any supported base, floats, booleans, null /
|
||||
/// nil / None, and unary negation / not over those.
|
||||
///
|
||||
/// Empty for unsupported languages. Scalar means single-value, not
|
||||
/// container; the `Map.of(...)` form is captured by
|
||||
/// [`collect_safe_lookup_fields`].
|
||||
pub fn collect_class_constant_scalars(
|
||||
root: Node<'_>,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
) -> HashMap<String, String> {
|
||||
let mut out: HashMap<String, String> = HashMap::new();
|
||||
match lang {
|
||||
"java" => collect_java_constant_scalars(root, code, &mut out),
|
||||
"python" => collect_python_constant_scalars(root, code, &mut out),
|
||||
"go" => collect_go_constant_scalars(root, code, &mut out),
|
||||
"rust" => collect_rust_constant_scalars(root, code, &mut out),
|
||||
_ => {}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn collect_java_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
walk(root, &mut |node| {
|
||||
if node.kind() != "field_declaration" {
|
||||
return;
|
||||
}
|
||||
if !has_static_modifier(node) || !has_final_modifier(node) {
|
||||
return;
|
||||
}
|
||||
// A single `field_declaration` may carry multiple
|
||||
// `variable_declarator` children (`static final int A = 1, B = 2;`).
|
||||
// Iterate every declarator field; tree-sitter exposes them under
|
||||
// the `declarator` field name as repeated entries.
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children_by_field_name("declarator", &mut cursor) {
|
||||
let Some(name_node) = child.child_by_field_name("name") else {
|
||||
continue;
|
||||
};
|
||||
let Some(field_name) = text_of(name_node, code) else {
|
||||
continue;
|
||||
};
|
||||
let Some(value_node) = child.child_by_field_name("value") else {
|
||||
continue;
|
||||
};
|
||||
let Some(literal) = scalar_literal_text(value_node, code) else {
|
||||
continue;
|
||||
};
|
||||
out.insert(field_name, literal);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Python: module-level `NAME = LITERAL` assignments. Only top-level
|
||||
/// expression statements are considered; assignments inside function bodies,
|
||||
/// class bodies, or other blocks are out of scope (a per-function SSA pass
|
||||
/// already sees those).
|
||||
fn collect_python_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
if root.kind() != "module" {
|
||||
return;
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
for child in root.named_children(&mut cursor) {
|
||||
if child.kind() != "expression_statement" {
|
||||
continue;
|
||||
}
|
||||
let Some(assign) = child.named_child(0) else {
|
||||
continue;
|
||||
};
|
||||
if assign.kind() != "assignment" {
|
||||
continue;
|
||||
}
|
||||
let Some(target) = assign.child_by_field_name("left") else {
|
||||
continue;
|
||||
};
|
||||
if target.kind() != "identifier" {
|
||||
continue;
|
||||
}
|
||||
let Some(name) = text_of(target, code) else {
|
||||
continue;
|
||||
};
|
||||
let Some(value) = assign.child_by_field_name("right") else {
|
||||
continue;
|
||||
};
|
||||
let Some(literal) = python_scalar_literal_text(value, code) else {
|
||||
continue;
|
||||
};
|
||||
out.insert(name, literal);
|
||||
}
|
||||
}
|
||||
|
||||
/// Go: package-level `const NAME = LITERAL` and `const NAME TYPE = LITERAL`,
|
||||
/// including the grouped `const (...)` form. Iterates direct
|
||||
/// `const_declaration` children of the source file, then per-`const_spec`
|
||||
/// reads the `name` list and `value` expression list, binding by position.
|
||||
fn collect_go_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
if root.kind() != "source_file" {
|
||||
return;
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
for child in root.named_children(&mut cursor) {
|
||||
if child.kind() != "const_declaration" {
|
||||
continue;
|
||||
}
|
||||
let mut spec_cursor = child.walk();
|
||||
for spec in child.named_children(&mut spec_cursor) {
|
||||
if spec.kind() != "const_spec" {
|
||||
continue;
|
||||
}
|
||||
collect_go_const_spec(spec, code, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_go_const_spec(spec: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
// tree-sitter-go `const_spec`:
|
||||
// name: <identifier> (repeated) — one or more identifiers
|
||||
// value: <expression_list> — list of value expressions
|
||||
// For a multi-target spec `const A, B = 1, 2`, identifiers and values pair
|
||||
// up positionally. The simpler single-target form parses the same way
|
||||
// with one entry per side.
|
||||
let mut name_cursor = spec.walk();
|
||||
let names: Vec<Node<'_>> = spec
|
||||
.children_by_field_name("name", &mut name_cursor)
|
||||
.collect();
|
||||
if names.is_empty() {
|
||||
return;
|
||||
}
|
||||
let Some(value_list) = spec.child_by_field_name("value") else {
|
||||
return;
|
||||
};
|
||||
let mut value_cursor = value_list.walk();
|
||||
let values: Vec<Node<'_>> = value_list.named_children(&mut value_cursor).collect();
|
||||
if values.len() != names.len() {
|
||||
return;
|
||||
}
|
||||
for (name_node, value_node) in names.iter().zip(values.iter()) {
|
||||
if name_node.kind() != "identifier" {
|
||||
continue;
|
||||
}
|
||||
let Some(name) = text_of(*name_node, code) else {
|
||||
continue;
|
||||
};
|
||||
let Some(literal) = go_scalar_literal_text(*value_node, code) else {
|
||||
continue;
|
||||
};
|
||||
out.insert(name, literal);
|
||||
}
|
||||
}
|
||||
|
||||
/// Rust: module-level `const NAME: TYPE = LITERAL;` and `static NAME: TYPE =
|
||||
/// LITERAL;`. Only direct children of `source_file` participate so a `const`
|
||||
/// defined inside a function body does not bleed across scopes.
|
||||
fn collect_rust_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
if root.kind() != "source_file" {
|
||||
return;
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
for child in root.named_children(&mut cursor) {
|
||||
if !matches!(child.kind(), "const_item" | "static_item") {
|
||||
continue;
|
||||
}
|
||||
let Some(name_node) = child.child_by_field_name("name") else {
|
||||
continue;
|
||||
};
|
||||
let Some(name) = text_of(name_node, code) else {
|
||||
continue;
|
||||
};
|
||||
let Some(value_node) = child.child_by_field_name("value") else {
|
||||
continue;
|
||||
};
|
||||
let Some(literal) = rust_scalar_literal_text(value_node, code) else {
|
||||
continue;
|
||||
};
|
||||
out.insert(name, literal);
|
||||
}
|
||||
}
|
||||
|
||||
/// `true` when `field_declaration` carries a `static` modifier.
|
||||
fn has_static_modifier(field_decl: Node<'_>) -> bool {
|
||||
let mut cursor = field_decl.walk();
|
||||
for child in field_decl.children(&mut cursor) {
|
||||
if child.kind() != "modifiers" {
|
||||
continue;
|
||||
}
|
||||
let mut sub = child.walk();
|
||||
for mod_child in child.children(&mut sub) {
|
||||
if mod_child.kind() == "static" {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Return the source text when `value` is a primitive scalar literal node.
|
||||
/// Covers the Java grammar's literal kinds. Returns `None` for compound
|
||||
/// expressions, identifier references, method invocations, and other
|
||||
/// non-literal initializers.
|
||||
fn scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
match value.kind() {
|
||||
"string_literal"
|
||||
| "decimal_integer_literal"
|
||||
| "hex_integer_literal"
|
||||
| "octal_integer_literal"
|
||||
| "binary_integer_literal"
|
||||
| "decimal_floating_point_literal"
|
||||
| "hex_floating_point_literal"
|
||||
| "character_literal"
|
||||
| "true"
|
||||
| "false"
|
||||
| "null_literal" => text_of(value, code),
|
||||
// Unary `-1`, `+0`, `!true` over a literal child still resolve to a
|
||||
// compile-time constant; recurse into the operand.
|
||||
"unary_expression" => {
|
||||
let operand = value.child_by_field_name("operand")?;
|
||||
scalar_literal_text(operand, code)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Python scalar literal classifier. Rejects f-strings with interpolation
|
||||
/// (`f"x{var}"` parses as `string` with an `interpolation` child); returns
|
||||
/// the source text otherwise.
|
||||
fn python_scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
match value.kind() {
|
||||
"string" => {
|
||||
if python_string_has_interpolation(value) {
|
||||
None
|
||||
} else {
|
||||
text_of(value, code)
|
||||
}
|
||||
}
|
||||
"integer" | "float" | "true" | "false" | "none" => text_of(value, code),
|
||||
"unary_operator" => {
|
||||
let operand = value.child_by_field_name("argument")?;
|
||||
python_scalar_literal_text(operand, code)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn python_string_has_interpolation(node: Node<'_>) -> bool {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if child.kind() == "interpolation" {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Go scalar literal classifier. `interpreted_string_literal` and
|
||||
/// `raw_string_literal` cover both `"x"` and `` `x` `` forms.
|
||||
fn go_scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
match value.kind() {
|
||||
"interpreted_string_literal"
|
||||
| "raw_string_literal"
|
||||
| "int_literal"
|
||||
| "float_literal"
|
||||
| "imaginary_literal"
|
||||
| "rune_literal"
|
||||
| "true"
|
||||
| "false"
|
||||
| "nil" => text_of(value, code),
|
||||
"unary_expression" => {
|
||||
let operand = value.child_by_field_name("operand")?;
|
||||
go_scalar_literal_text(operand, code)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Rust scalar literal classifier. Accepts `string_literal`, `raw_string_literal`
|
||||
/// (both unwrappable to a single text run), integer / float / boolean / char.
|
||||
fn rust_scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
match value.kind() {
|
||||
"string_literal" | "raw_string_literal" | "integer_literal" | "float_literal"
|
||||
| "char_literal" | "boolean_literal" => text_of(value, code),
|
||||
// `true` / `false` are leaf identifier-ish nodes in some grammars but
|
||||
// tree-sitter-rust gives them the `boolean_literal` kind; defensively
|
||||
// accept the leaf form too in case the grammar is upgraded.
|
||||
"true" | "false" => text_of(value, code),
|
||||
"unary_expression" => {
|
||||
let mut cursor = value.walk();
|
||||
value
|
||||
.named_children(&mut cursor)
|
||||
.find_map(|c| rust_scalar_literal_text(c, code))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_java(root: Node<'_>, code: &[u8], out: &mut HashMap<String, Vec<String>>) {
|
||||
walk(root, &mut |node| {
|
||||
if node.kind() != "field_declaration" {
|
||||
return;
|
||||
}
|
||||
if !has_final_modifier(node) {
|
||||
return;
|
||||
}
|
||||
let Some(decl) = node.child_by_field_name("declarator") else {
|
||||
return;
|
||||
};
|
||||
let Some(name_node) = decl.child_by_field_name("name") else {
|
||||
return;
|
||||
};
|
||||
let Some(field_name) = text_of(name_node, code) else {
|
||||
return;
|
||||
};
|
||||
let Some(value_node) = decl.child_by_field_name("value") else {
|
||||
return;
|
||||
};
|
||||
let Some(values) = extract_map_of_literal_values(value_node, code) else {
|
||||
return;
|
||||
};
|
||||
out.insert(field_name, values);
|
||||
});
|
||||
}
|
||||
|
||||
/// `true` when `field_declaration` carries a `final` modifier (static or
|
||||
/// instance — both block reassignment after construction).
|
||||
fn has_final_modifier(field_decl: Node<'_>) -> bool {
|
||||
let mut cursor = field_decl.walk();
|
||||
for child in field_decl.children(&mut cursor) {
|
||||
if child.kind() != "modifiers" {
|
||||
continue;
|
||||
}
|
||||
let mut sub = child.walk();
|
||||
for mod_child in child.children(&mut sub) {
|
||||
if mod_child.kind() == "final" {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// If `value_node` is `Map.of(LIT, LIT, LIT, LIT, ...)` with at least one
|
||||
/// key/value pair and every argument a `string_literal`, return the
|
||||
/// value-position literals (positions 1, 3, 5, ...).
|
||||
fn extract_map_of_literal_values(value_node: Node<'_>, code: &[u8]) -> Option<Vec<String>> {
|
||||
if value_node.kind() != "method_invocation" {
|
||||
return None;
|
||||
}
|
||||
let object_node = value_node.child_by_field_name("object")?;
|
||||
let method_node = value_node.child_by_field_name("name")?;
|
||||
let method_text = text_of(method_node, code)?;
|
||||
if method_text != "of" {
|
||||
return None;
|
||||
}
|
||||
if !receiver_is_map_class(object_node, code) {
|
||||
return None;
|
||||
}
|
||||
let args_node = value_node.child_by_field_name("arguments")?;
|
||||
let mut cursor = args_node.walk();
|
||||
let args: Vec<Node<'_>> = args_node.named_children(&mut cursor).collect();
|
||||
if args.is_empty() || !args.len().is_multiple_of(2) {
|
||||
return None;
|
||||
}
|
||||
let mut values = Vec::with_capacity(args.len() / 2);
|
||||
for (i, arg) in args.iter().enumerate() {
|
||||
if arg.kind() != "string_literal" {
|
||||
return None;
|
||||
}
|
||||
if i % 2 == 1 {
|
||||
let literal = string_literal_value(*arg, code)?;
|
||||
values.push(literal);
|
||||
}
|
||||
}
|
||||
Some(values)
|
||||
}
|
||||
|
||||
/// `true` when `node` resolves to the `Map` class — either the bare
|
||||
/// identifier `Map` or a `field_access` whose tail segment is `Map`
|
||||
/// (covers `java.util.Map.of(...)`).
|
||||
fn receiver_is_map_class(node: Node<'_>, code: &[u8]) -> bool {
|
||||
match node.kind() {
|
||||
"identifier" => text_of(node, code).as_deref() == Some("Map"),
|
||||
"field_access" => {
|
||||
// tail segment lives on the `field` field
|
||||
let Some(field) = node.child_by_field_name("field") else {
|
||||
return false;
|
||||
};
|
||||
text_of(field, code).as_deref() == Some("Map")
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the inner content of a Java `string_literal` node. The
|
||||
/// grammar wraps the value in `string_fragment` children between quote
|
||||
/// tokens; concatenate every `string_fragment` so escaped quotes inside
|
||||
/// the literal are not lost. Returns `None` for literals containing
|
||||
/// interpolation / escape-sequence children that do not classify as a
|
||||
/// pure string fragment.
|
||||
fn string_literal_value(node: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cursor = node.walk();
|
||||
let mut out = String::new();
|
||||
let mut saw_fragment = false;
|
||||
for child in node.named_children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"string_fragment" => {
|
||||
saw_fragment = true;
|
||||
out.push_str(&text_of(child, code)?);
|
||||
}
|
||||
"escape_sequence" => {
|
||||
// A real escape sequence keeps the literal pure-string but
|
||||
// we cannot trivially decode it; return None to be
|
||||
// conservative on header-injection safety.
|
||||
return None;
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
if saw_fragment {
|
||||
Some(out)
|
||||
} else {
|
||||
// Empty literal `""` — has no `string_fragment` children but is
|
||||
// a valid empty string.
|
||||
let raw = text_of(node, code)?;
|
||||
if raw == "\"\"" {
|
||||
Some(String::new())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn walk<'a, F: FnMut(Node<'a>)>(node: Node<'a>, f: &mut F) {
|
||||
f(node);
|
||||
let mut cursor = node.walk();
|
||||
for child in node.named_children(&mut cursor) {
|
||||
walk(child, f);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tree_sitter::Parser;
|
||||
|
||||
fn collect(src: &str) -> HashMap<String, Vec<String>> {
|
||||
let mut p = Parser::new();
|
||||
p.set_language(&tree_sitter_java::LANGUAGE.into()).unwrap();
|
||||
let tree = p.parse(src, None).unwrap();
|
||||
collect_safe_lookup_fields(tree.root_node(), "java", src.as_bytes())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn static_final_map_of_two_pairs() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final java.util.Map<String, String> T = Map.of(
|
||||
"a", "x", "b", "y"
|
||||
);
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert_eq!(out.get("T"), Some(&vec!["x".to_string(), "y".to_string()]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn instance_final_map_of_one_pair() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private final java.util.Map<String, String> T = Map.of("a", "x");
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert_eq!(out.get("T"), Some(&vec!["x".to_string()]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_final_field() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static java.util.Map<String, String> T = Map.of("a", "x");
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_literal_value() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final String SAFE = "x";
|
||||
private static final java.util.Map<String, String> T = Map.of("a", SAFE);
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
// SAFE is an identifier, not a string_literal — even though const-
|
||||
// foldable, the syntactic check rejects to stay simple.
|
||||
assert!(!out.contains_key("T"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_odd_arg_count() {
|
||||
// Compiler would reject this too, but the extractor must not panic.
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final java.util.Map<String, String> T = Map.of("a", "x", "b");
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_empty_map_of() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final java.util.Map<String, String> T = Map.of();
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fully_qualified_map_of() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final java.util.Map<String, String> T = java.util.Map.of(
|
||||
"a", "x", "b", "y"
|
||||
);
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert_eq!(out.get("T"), Some(&vec!["x".to_string(), "y".to_string()]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_escape_sequence_value() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final java.util.Map<String, String> T = Map.of(
|
||||
"a", "with\nnewline"
|
||||
);
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
// `\n` would smuggle a CRLF-style metachar through the static
|
||||
// gate; conservative reject keeps header-injection suppression
|
||||
// honest.
|
||||
assert!(!out.contains_key("T"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_non_java_lang() {
|
||||
let src = "const x = 1;";
|
||||
let mut p = Parser::new();
|
||||
p.set_language(&tree_sitter_javascript::LANGUAGE.into())
|
||||
.unwrap();
|
||||
let tree = p.parse(src, None).unwrap();
|
||||
let out = collect_safe_lookup_fields(tree.root_node(), "javascript", src.as_bytes());
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
fn collect_consts(src: &str) -> HashMap<String, String> {
|
||||
let mut p = Parser::new();
|
||||
p.set_language(&tree_sitter_java::LANGUAGE.into()).unwrap();
|
||||
let tree = p.parse(src, None).unwrap();
|
||||
collect_class_constant_scalars(tree.root_node(), "java", src.as_bytes())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_capture_string_int_bool() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final String DRIVER = "com.mysql.cj.jdbc.Driver";
|
||||
public static final int LIMIT = 100;
|
||||
static final boolean DEBUG = false;
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert_eq!(
|
||||
out.get("DRIVER"),
|
||||
Some(&"\"com.mysql.cj.jdbc.Driver\"".to_string())
|
||||
);
|
||||
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
|
||||
assert_eq!(out.get("DEBUG"), Some(&"false".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_capture_multi_declarator() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final int A = 1, B = 2, C2 = 3;
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert_eq!(out.get("A"), Some(&"1".to_string()));
|
||||
assert_eq!(out.get("B"), Some(&"2".to_string()));
|
||||
assert_eq!(out.get("C2"), Some(&"3".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_capture_unary_negation() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final int OFFSET = -1;
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
// text_of returns the operand text, not the wrapper text.
|
||||
assert_eq!(out.get("OFFSET"), Some(&"1".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_reject_non_static() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private final String NAME = "x";
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert!(!out.contains_key("NAME"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_reject_non_final() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static String NAME = "x";
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert!(!out.contains_key("NAME"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_reject_identifier_value() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final String OTHER = computed();
|
||||
private static final String COPY = OTHER;
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert!(!out.contains_key("OTHER"));
|
||||
assert!(!out.contains_key("COPY"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_capture_inside_inner_class() {
|
||||
let src = r#"
|
||||
class Outer {
|
||||
static class Inner {
|
||||
private static final String DRIVER = "x";
|
||||
}
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert_eq!(out.get("DRIVER"), Some(&"\"x\"".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_ignore_non_supported_lang() {
|
||||
let src = "const x = 1;";
|
||||
let mut p = Parser::new();
|
||||
p.set_language(&tree_sitter_javascript::LANGUAGE.into())
|
||||
.unwrap();
|
||||
let tree = p.parse(src, None).unwrap();
|
||||
let out = collect_class_constant_scalars(tree.root_node(), "javascript", src.as_bytes());
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
fn collect_consts_lang(src: &str, lang: &str) -> HashMap<String, String> {
|
||||
let mut p = Parser::new();
|
||||
match lang {
|
||||
"python" => p
|
||||
.set_language(&tree_sitter_python::LANGUAGE.into())
|
||||
.unwrap(),
|
||||
"go" => p.set_language(&tree_sitter_go::LANGUAGE.into()).unwrap(),
|
||||
"rust" => p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap(),
|
||||
_ => unreachable!("unsupported lang in test helper: {lang}"),
|
||||
};
|
||||
let tree = p.parse(src, None).unwrap();
|
||||
collect_class_constant_scalars(tree.root_node(), lang, src.as_bytes())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_module_constants_capture_scalars() {
|
||||
let src = "DRIVER = \"sqlite3\"\nLIMIT = 100\nDEBUG = False\nNAME = None\n";
|
||||
let out = collect_consts_lang(src, "python");
|
||||
assert_eq!(out.get("DRIVER"), Some(&"\"sqlite3\"".to_string()));
|
||||
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
|
||||
assert_eq!(out.get("DEBUG"), Some(&"False".to_string()));
|
||||
assert_eq!(out.get("NAME"), Some(&"None".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_module_constants_capture_unary_negation() {
|
||||
// The recogniser recurses into the operand and returns its text, so
|
||||
// `OFFSET = -1` stores `"1"`. The downstream suppression consumer
|
||||
// only cares about name binding, not the decoded numeric value.
|
||||
let src = "OFFSET = -1\n";
|
||||
let out = collect_consts_lang(src, "python");
|
||||
assert_eq!(out.get("OFFSET"), Some(&"1".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_module_constants_reject_fstring_with_interpolation() {
|
||||
let src = "import os\nVAR = f\"hi {os.getcwd()}\"\n";
|
||||
let out = collect_consts_lang(src, "python");
|
||||
assert!(!out.contains_key("VAR"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_module_constants_reject_call_value() {
|
||||
let src = "from os import getcwd\nPATH = getcwd()\n";
|
||||
let out = collect_consts_lang(src, "python");
|
||||
assert!(!out.contains_key("PATH"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_module_constants_skip_inside_function_body() {
|
||||
// An assignment inside a function body is per-function SSA's job.
|
||||
// Only top-level module assignments should land in the map.
|
||||
let src = "def f():\n INNER = \"x\"\n return INNER\n";
|
||||
let out = collect_consts_lang(src, "python");
|
||||
assert!(!out.contains_key("INNER"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_package_constants_capture_scalars() {
|
||||
let src =
|
||||
"package main\nconst DRIVER = \"postgres\"\nconst LIMIT = 100\nconst FLAG = true\n";
|
||||
let out = collect_consts_lang(src, "go");
|
||||
assert_eq!(out.get("DRIVER"), Some(&"\"postgres\"".to_string()));
|
||||
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
|
||||
assert_eq!(out.get("FLAG"), Some(&"true".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_package_constants_capture_grouped_const_block() {
|
||||
let src = "package main\nconst (\n A = \"x\"\n B int = 42\n C = false\n)\n";
|
||||
let out = collect_consts_lang(src, "go");
|
||||
assert_eq!(out.get("A"), Some(&"\"x\"".to_string()));
|
||||
assert_eq!(out.get("B"), Some(&"42".to_string()));
|
||||
assert_eq!(out.get("C"), Some(&"false".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_package_constants_reject_non_literal() {
|
||||
let src = "package main\nconst OTHER = foo()\n";
|
||||
let out = collect_consts_lang(src, "go");
|
||||
assert!(!out.contains_key("OTHER"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_package_constants_skip_inside_function_body() {
|
||||
// `const` inside a function body is per-function SSA's territory.
|
||||
let src = "package main\nfunc f() string { const INNER = \"x\"; return INNER }\n";
|
||||
let out = collect_consts_lang(src, "go");
|
||||
assert!(!out.contains_key("INNER"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_module_consts_capture_scalars() {
|
||||
let src = "const DRIVER: &str = \"sqlite\";\nconst LIMIT: i32 = 100;\nstatic FLAG: bool = false;\n";
|
||||
let out = collect_consts_lang(src, "rust");
|
||||
assert_eq!(out.get("DRIVER"), Some(&"\"sqlite\"".to_string()));
|
||||
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
|
||||
assert_eq!(out.get("FLAG"), Some(&"false".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_module_consts_reject_non_literal() {
|
||||
let src = "const VAL: i32 = some_func();\n";
|
||||
let out = collect_consts_lang(src, "rust");
|
||||
assert!(!out.contains_key("VAL"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_module_consts_skip_inside_function_body() {
|
||||
let src = "fn f() -> &'static str { const INNER: &str = \"x\"; INNER }\n";
|
||||
let out = collect_consts_lang(src, "rust");
|
||||
assert!(!out.contains_key("INNER"));
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -208,6 +208,13 @@ pub struct AnalysisContext<'a> {
|
|||
/// in a callback the per-body CFG can't observe. When `None`, no
|
||||
/// closure-based suppression is applied.
|
||||
pub closure_released_var_names: Option<&'a std::collections::HashSet<String>>,
|
||||
/// Class-level constant scalars discovered for this file, keyed by
|
||||
/// the unqualified field name (Java `static final TYPE NAME = LIT;`).
|
||||
/// Used by `cfg_analysis::guards` to treat identifiers referencing
|
||||
/// these fields as compile-time constants for the
|
||||
/// `cfg-unguarded-sink` all-args-constant check. `None` outside Java
|
||||
/// and on call sites that have not threaded the map through.
|
||||
pub class_constant_scalars: Option<&'a std::collections::HashMap<String, String>>,
|
||||
}
|
||||
|
||||
pub trait CfgAnalysis {
|
||||
|
|
|
|||
|
|
@ -10,6 +10,43 @@ use std::collections::HashSet;
|
|||
|
||||
pub struct ResourceMisuse;
|
||||
|
||||
/// Distinguishes `obj.connect("event-name", handler)` event-handler
|
||||
/// registrations from real database-connection acquires.
|
||||
///
|
||||
/// Recognises the canonical handler shape: a string-literal first arg
|
||||
/// that does not look like a URL (`scheme://`), plus a second positional
|
||||
/// argument that resolves to a single identifier (the callable being
|
||||
/// registered). SQLAlchemy `engine.connect()` and `sqlite3.connect(
|
||||
/// "path.db")` either pass zero args or a single string, so they fall
|
||||
/// through and the leak check still fires.
|
||||
///
|
||||
/// Kept out of the static `exclude_acquire` list because that list is
|
||||
/// callee-substring-only; this check needs to read argument shape from
|
||||
/// the call node.
|
||||
fn is_event_handler_register_shape(info: &crate::cfg::NodeInfo) -> bool {
|
||||
let Some(first_literal) = info
|
||||
.call
|
||||
.arg_string_literals
|
||||
.first()
|
||||
.and_then(|x| x.as_ref())
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
if first_literal.contains("://") {
|
||||
return false;
|
||||
}
|
||||
let Some(second_uses) = info.call.arg_uses.get(1) else {
|
||||
return false;
|
||||
};
|
||||
// A bare identifier (`callback`) lands as `["callback"]`; a
|
||||
// member-access ref (`self._on_status`) lands as `["self",
|
||||
// "_on_status"]`. Both are valid handler shapes. Real DB connects
|
||||
// either have no second positional or pass a non-ident value
|
||||
// (string literal for `connect("user", "pass", ...)`), which lands
|
||||
// as an empty `arg_uses[1]`.
|
||||
!second_uses.is_empty()
|
||||
}
|
||||
|
||||
/// Find nodes matching acquire patterns for a given resource pair,
|
||||
/// excluding any that match `exclude_patterns`.
|
||||
fn find_acquire_nodes(
|
||||
|
|
@ -517,6 +554,21 @@ impl CfgAnalysis for ResourceMisuse {
|
|||
if ctx.cfg[acquire].managed_resource {
|
||||
continue;
|
||||
}
|
||||
// Suppress `obj.connect("event-name", callback)` event-
|
||||
// handler registrations that share the `connect` /
|
||||
// `cursor` callee suffix with real DB acquires. Sphinx
|
||||
// app.connect("config-inited", on_init), Flask blueprint
|
||||
// handlers, and MQTT client.connect("topic", on_msg) all
|
||||
// pass a string literal event name plus a callable
|
||||
// identifier; SQLAlchemy `engine.connect()` and
|
||||
// `sqlite3.connect("path.db")` either have no args or a
|
||||
// single string arg. Gated on the `db connection`
|
||||
// resource name so file/socket/mutex pairs are untouched.
|
||||
if pair.resource_name == "db connection"
|
||||
&& is_event_handler_register_shape(&ctx.cfg[acquire])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// SAFE-FOR-FIELD-LHS (Go only): skip member-expression
|
||||
// LHS acquires. `b.cpuprof = os.Create(...)` transfers
|
||||
// ownership to the containing struct; closure
|
||||
|
|
@ -598,3 +650,83 @@ impl CfgAnalysis for ResourceMisuse {
|
|||
findings
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cfg::{CallMeta, NodeInfo, StmtKind};
|
||||
|
||||
fn call_node(arg_string_literals: Vec<Option<String>>, arg_uses: Vec<Vec<String>>) -> NodeInfo {
|
||||
NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
call: CallMeta {
|
||||
callee: Some("obj.connect".into()),
|
||||
arg_string_literals,
|
||||
arg_uses,
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn event_handler_shape_recognises_sphinx_connect() {
|
||||
// app.connect("config-inited", _on_init)
|
||||
let info = call_node(
|
||||
vec![Some("config-inited".into()), None],
|
||||
vec![vec![], vec!["_on_init".into()]],
|
||||
);
|
||||
assert!(is_event_handler_register_shape(&info));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn event_handler_shape_recognises_self_method_callback() {
|
||||
// client.connect("device/+", self._on_status)
|
||||
let info = call_node(
|
||||
vec![Some("device/+".into()), None],
|
||||
vec![vec![], vec!["self".into(), "_on_status".into()]],
|
||||
);
|
||||
assert!(is_event_handler_register_shape(&info));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn event_handler_shape_rejects_url_first_arg() {
|
||||
// engine.connect("postgres://localhost/mydb")
|
||||
let info = call_node(vec![Some("postgres://localhost/mydb".into())], vec![vec![]]);
|
||||
assert!(!is_event_handler_register_shape(&info));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn event_handler_shape_rejects_oracle_string_args() {
|
||||
// cx_Oracle.connect("user", "pass", "dsn") -- arg1 is a literal,
|
||||
// no identifier in `arg_uses[1]`.
|
||||
let info = call_node(
|
||||
vec![Some("user".into()), Some("pass".into()), Some("dsn".into())],
|
||||
vec![vec![], vec![], vec![]],
|
||||
);
|
||||
assert!(!is_event_handler_register_shape(&info));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn event_handler_shape_rejects_no_args() {
|
||||
// engine.connect()
|
||||
let info = call_node(vec![], vec![]);
|
||||
assert!(!is_event_handler_register_shape(&info));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn event_handler_shape_rejects_single_string_arg() {
|
||||
// sqlite3.connect("path.db")
|
||||
let info = call_node(vec![Some("path.db".into())], vec![vec![]]);
|
||||
assert!(!is_event_handler_register_shape(&info));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn event_handler_shape_rejects_ident_first_arg() {
|
||||
// signal.connect(receiver_func, sender=...) -- handled by the
|
||||
// static exclude list `signal.connect`, but the shape check
|
||||
// should also gate it out: first arg is not a string literal.
|
||||
let info = call_node(vec![None], vec![vec!["receiver_func".into()]]);
|
||||
assert!(!is_event_handler_register_shape(&info));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ fn parse_and_analyse<A: CfgAnalysis>(
|
|||
type_facts: None,
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
class_constant_scalars: None,
|
||||
};
|
||||
analysis.run(&ctx)
|
||||
}
|
||||
|
|
@ -65,6 +66,7 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
|
|||
type_facts: None,
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
class_constant_scalars: None,
|
||||
};
|
||||
run_all(&ctx)
|
||||
}
|
||||
|
|
@ -100,6 +102,7 @@ fn parse_and_run_all_with_taint(
|
|||
type_facts: None,
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
class_constant_scalars: None,
|
||||
};
|
||||
run_all(&ctx)
|
||||
}
|
||||
|
|
@ -219,6 +222,7 @@ fn parse_and_analyse_with_ssa<A: CfgAnalysis>(
|
|||
type_facts: facts.as_ref().map(|f| &f.type_facts),
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
class_constant_scalars: None,
|
||||
};
|
||||
analysis.run(&ctx)
|
||||
}
|
||||
|
|
@ -1235,6 +1239,7 @@ fn config_sanitizer_suppresses_unguarded_sink() {
|
|||
type_facts: None,
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
class_constant_scalars: None,
|
||||
};
|
||||
let findings = run_all(&ctx);
|
||||
|
||||
|
|
@ -1715,6 +1720,7 @@ fn cfg_only_no_taint_produces_low_severity() {
|
|||
type_facts: None,
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
class_constant_scalars: None,
|
||||
};
|
||||
let findings = guards::UnguardedSink.run(&ctx);
|
||||
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ fn print_label_row(r: &RuleInfo) {
|
|||
String::new()
|
||||
} else {
|
||||
let joined = r.matchers.join(", ");
|
||||
format!(" — {joined}")
|
||||
format!(" {joined}")
|
||||
};
|
||||
println!(
|
||||
" {} {:<10} {:<10} {:<14}{}{}",
|
||||
|
|
|
|||
|
|
@ -245,6 +245,25 @@ pub(crate) fn ensure_framework_ctx(root: &Path, cfg: &Config) -> Option<Config>
|
|||
Some(c)
|
||||
}
|
||||
|
||||
/// Build a [`crate::resolve::ModuleGraph`] for `root` and stash it on a
|
||||
/// clone of `cfg`. Returns `None` when the cfg already carries one or
|
||||
/// when the build produced an empty graph.
|
||||
///
|
||||
/// Mirrors `ensure_framework_ctx`'s lifecycle: scan-path entry points
|
||||
/// call this once between the file walk and pass 1, the graph is shared
|
||||
/// across all per-file analysis via `Config::module_graph`. Building is
|
||||
/// best-effort, errors during fs walk land as missing entries rather
|
||||
/// than aborts.
|
||||
pub(crate) fn ensure_module_graph(root: &Path, cfg: &Config) -> Option<Config> {
|
||||
if cfg.module_graph.is_some() {
|
||||
return None;
|
||||
}
|
||||
let graph = crate::resolve::build_module_graph(&[root.to_path_buf()]);
|
||||
let mut c = cfg.clone();
|
||||
c.module_graph = Some(std::sync::Arc::new(graph));
|
||||
Some(c)
|
||||
}
|
||||
|
||||
/// Does `path` belong to a Preview-tier language (C or C++)?
|
||||
///
|
||||
/// Drives the one-time `preview-tier scan` banner in `handle()`. Tracks
|
||||
|
|
@ -1085,6 +1104,7 @@ fn run_topo_batches(
|
|||
.collect();
|
||||
|
||||
let mut ssa_count: usize = 0;
|
||||
let mg = cfg.module_graph.as_deref();
|
||||
for (path, diags, summaries, ssa_summaries, _ssa_bodies) in batch_results {
|
||||
// Phase-B: replace (not append) this file's diags
|
||||
// so the cache always reflects the latest
|
||||
|
|
@ -1093,7 +1113,7 @@ fn run_topo_batches(
|
|||
diags_by_file.insert(path, diags);
|
||||
|
||||
for s in summaries {
|
||||
let key = s.func_key(root_str_ref);
|
||||
let key = s.func_key_with_resolver(root_str_ref, mg);
|
||||
global_summaries.insert(key, s);
|
||||
}
|
||||
|
||||
|
|
@ -1143,7 +1163,7 @@ fn run_topo_batches(
|
|||
.iter()
|
||||
.filter(|p| {
|
||||
let abs = p.to_string_lossy();
|
||||
let rel = crate::symbol::normalize_namespace(&abs, root_str_ref);
|
||||
let rel = crate::symbol::namespace_with_package(&abs, root_str_ref, mg);
|
||||
namespaces_needing_reanalysis.contains(&rel)
|
||||
})
|
||||
.map(|p| (*p).clone())
|
||||
|
|
@ -1182,7 +1202,7 @@ fn run_topo_batches(
|
|||
batch = batch_idx,
|
||||
dirty = dirty_files.len(),
|
||||
"SCC converged by snapshot but dirty_files non-empty; \
|
||||
call graph disagrees with summary diff — accepting \
|
||||
call graph disagrees with summary diff, accepting \
|
||||
snapshot as authoritative"
|
||||
);
|
||||
converged = true;
|
||||
|
|
@ -1230,7 +1250,7 @@ fn run_topo_batches(
|
|||
cap = scc_cap,
|
||||
cross_file = cross_file_scc,
|
||||
reason = reason.tag(),
|
||||
"SCC batch did not converge within safety cap — results \
|
||||
"SCC batch did not converge within safety cap, results \
|
||||
may be imprecise. This usually indicates a very large \
|
||||
mutually-recursive region or a non-monotone summary \
|
||||
refinement; please file a bug with a reproducer."
|
||||
|
|
@ -1376,12 +1396,13 @@ fn run_topo_batches(
|
|||
let mut refined_ssa: usize = 0;
|
||||
let mut refined_bodies: usize = 0;
|
||||
let mut refined_auth: usize = 0;
|
||||
let mg = cfg.module_graph.as_deref();
|
||||
for (_path, diags, summaries, ssa_summaries, ssa_bodies, auth_summaries) in
|
||||
batch_results
|
||||
{
|
||||
batch_diags.extend(diags);
|
||||
for s in summaries {
|
||||
let key = s.func_key(root_str_ref);
|
||||
let key = s.func_key_with_resolver(root_str_ref, mg);
|
||||
global_summaries.insert(key, s);
|
||||
refined_summaries += 1;
|
||||
}
|
||||
|
|
@ -1568,6 +1589,15 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
};
|
||||
tracing::info!(file_count = all_paths.len(), "file walk complete");
|
||||
|
||||
// ── Build TS/JS module graph once for the scan root ──────────────────
|
||||
// Phase 04: resolver foundation. The graph is built between walk and
|
||||
// pass 1 so every per-file analysis (CFG-time import classification,
|
||||
// pass-2 cross-file lookup) sees the same view. Build cost is bounded
|
||||
// (no AST parsing, manifests only) and the result lives behind an
|
||||
// `Arc` on `Config::module_graph`.
|
||||
let owned_cfg_with_graph = ensure_module_graph(root, cfg);
|
||||
let cfg = owned_cfg_with_graph.as_ref().unwrap_or(cfg);
|
||||
|
||||
if let Some(flag) = preview_tier_seen {
|
||||
if all_paths.iter().any(|p| is_preview_tier_path(p)) {
|
||||
flag.store(true, Ordering::Relaxed);
|
||||
|
|
@ -1704,6 +1734,7 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
show_progress,
|
||||
);
|
||||
let root_str = root.to_string_lossy();
|
||||
let mg = cfg.module_graph.as_deref();
|
||||
|
||||
let gs = all_paths
|
||||
.par_iter()
|
||||
|
|
@ -1720,7 +1751,7 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
let first_lang = r.summaries.first().map(|s| s.lang.clone());
|
||||
|
||||
for s in r.summaries {
|
||||
let key = s.func_key(Some(&root_str));
|
||||
let key = s.func_key_with_resolver(Some(&root_str), mg);
|
||||
local_gs.insert(key, s);
|
||||
}
|
||||
|
||||
|
|
@ -1754,6 +1785,16 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
local_gs.insert_router_facts(module_id, facts);
|
||||
}
|
||||
|
||||
// Phase-09 indexed-mode parity: cache the
|
||||
// file's cross-package import map by namespace
|
||||
// so an inlined callee body loaded from SQLite
|
||||
// (where the body's own Arc is stripped by
|
||||
// `#[serde(skip)]`) can recover its package
|
||||
// boundary at step 0.7.
|
||||
if let Some((ns, map)) = r.cross_package_imports {
|
||||
local_gs.insert_cross_package_imports(ns, map);
|
||||
}
|
||||
|
||||
// Record language for progress
|
||||
if let Some(p) = progress {
|
||||
if let Some(ref lang) = first_lang {
|
||||
|
|
@ -2057,6 +2098,12 @@ pub fn scan_with_index_parallel_observer(
|
|||
);
|
||||
}
|
||||
|
||||
// Phase 04: build the TS/JS module graph between fs walk and pass 1
|
||||
// so the indexed scan path sees the same resolver state as the
|
||||
// non-indexed path (`scan_filesystem_with_observer`).
|
||||
let owned_cfg_with_graph = ensure_module_graph(scan_root, cfg);
|
||||
let cfg = owned_cfg_with_graph.as_ref().unwrap_or(cfg);
|
||||
|
||||
let current_files: HashSet<PathBuf> = files.iter().cloned().collect();
|
||||
let removed_files: Vec<PathBuf> = indexed_files
|
||||
.into_iter()
|
||||
|
|
@ -2139,7 +2186,7 @@ pub fn scan_with_index_parallel_observer(
|
|||
)
|
||||
},
|
||||
) {
|
||||
Ok((func_sums, ssa_sums, ssa_bodies, auth_sums)) => {
|
||||
Ok((func_sums, ssa_sums, ssa_bodies, auth_sums, cross_pkg_imports)) => {
|
||||
if let Some(p) = &progress_ref {
|
||||
p.inc_parsed(1);
|
||||
if let Some(lang) = func_sums.first().map(|s| s.lang.as_str()) {
|
||||
|
|
@ -2193,8 +2240,12 @@ pub fn scan_with_index_parallel_observer(
|
|||
.collect();
|
||||
// Single transaction for all four caches:
|
||||
// one fsync per file instead of four.
|
||||
let cpi_arg = cross_pkg_imports
|
||||
.as_ref()
|
||||
.map(|(ns, map)| (ns.as_str(), map.as_ref()));
|
||||
if let Err(e) = idx.replace_all_for_file(
|
||||
path, &hash, &func_sums, &ssa_rows, &body_rows, &auth_rows,
|
||||
cpi_arg,
|
||||
) {
|
||||
record_persist_error(
|
||||
&persist_errors_ref,
|
||||
|
|
@ -2268,7 +2319,11 @@ pub fn scan_with_index_parallel_observer(
|
|||
crate::symbol::Lang::from_slug(&lang_str).unwrap_or(crate::symbol::Lang::Rust);
|
||||
// Use persisted namespace; fall back to normalized file_path
|
||||
let ns = if namespace.is_empty() {
|
||||
crate::symbol::normalize_namespace(&file_path, Some(&root_str))
|
||||
crate::symbol::namespace_with_package(
|
||||
&file_path,
|
||||
Some(&root_str),
|
||||
cfg.module_graph.as_deref(),
|
||||
)
|
||||
} else {
|
||||
namespace
|
||||
};
|
||||
|
|
@ -2289,6 +2344,23 @@ pub fn scan_with_index_parallel_observer(
|
|||
}
|
||||
}
|
||||
|
||||
// Load Phase-09 cross-package import maps so an inlined callee
|
||||
// body loaded from SQLite (where the body's own Arc is stripped
|
||||
// by `#[serde(skip)]`) can recover its package boundary at
|
||||
// step 0.7. Indexed-mode parity with `scan_filesystem`.
|
||||
match idx.load_all_cross_package_imports() {
|
||||
Ok(rows) => {
|
||||
for (_file_path, namespace, map) in rows {
|
||||
if !map.is_empty() {
|
||||
gs.insert_cross_package_imports(namespace, std::sync::Arc::new(map));
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("failed to load cross_package_imports from DB: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
// Load cross-file callee bodies from DB
|
||||
let body_count = if crate::symex::cross_file_symex_enabled() {
|
||||
match idx.load_all_ssa_bodies() {
|
||||
|
|
@ -2309,7 +2381,11 @@ pub fn scan_with_index_parallel_observer(
|
|||
let lang = crate::symbol::Lang::from_slug(&lang_str)
|
||||
.unwrap_or(crate::symbol::Lang::Rust);
|
||||
let ns = if namespace.is_empty() {
|
||||
crate::symbol::normalize_namespace(&file_path, Some(&root_str))
|
||||
crate::symbol::namespace_with_package(
|
||||
&file_path,
|
||||
Some(&root_str),
|
||||
cfg.module_graph.as_deref(),
|
||||
)
|
||||
} else {
|
||||
namespace
|
||||
};
|
||||
|
|
@ -2363,7 +2439,11 @@ pub fn scan_with_index_parallel_observer(
|
|||
let lang =
|
||||
crate::symbol::Lang::from_slug(&lang_str).unwrap_or(crate::symbol::Lang::Rust);
|
||||
let ns = if namespace.is_empty() {
|
||||
crate::symbol::normalize_namespace(&file_path, Some(&root_str))
|
||||
crate::symbol::namespace_with_package(
|
||||
&file_path,
|
||||
Some(&root_str),
|
||||
cfg.module_graph.as_deref(),
|
||||
)
|
||||
} else {
|
||||
namespace
|
||||
};
|
||||
|
|
|
|||
|
|
@ -201,6 +201,36 @@ fn type_kind_index(kind: &TypeKind) -> u32 {
|
|||
// domain has no dedicated slot, share the Object index so
|
||||
// singleton recovery still maps to a meaningful TypeKind.
|
||||
TypeKind::NullPrototypeObject => 3,
|
||||
// FileSystemPromisesNs is a JS-only namespace receiver type used
|
||||
// by the Phase 05 fs/promises sink resolver. The bitset domain
|
||||
// has no dedicated slot; share the Object index so singleton
|
||||
// recovery still hands back a usable TypeKind.
|
||||
TypeKind::FileSystemPromisesNs => 3,
|
||||
// Phase 07 ORM receiver TypeKinds. They participate only in the
|
||||
// type-qualified callee resolver via their `label_prefix()`; the
|
||||
// bitset domain's flow-sensitive narrowing has no dedicated slot
|
||||
// for them, so collapse to Object (3). Singleton recovery from
|
||||
// the index will hand back `Object`, which is a benign upper
|
||||
// bound for the ORM receiver shapes.
|
||||
TypeKind::Sequelize
|
||||
| TypeKind::TypeOrmRepo
|
||||
| TypeKind::TypeOrmManager
|
||||
| TypeKind::MikroOrmEm => 3,
|
||||
// Phase 10 — `Request` is a Web-platform receiver type used
|
||||
// by the App Router entry-point seeding path; it shares the
|
||||
// Object slot for the same reason the ORM TypeKinds do.
|
||||
TypeKind::Request => 3,
|
||||
// Phase 15 — cross-language ORM receiver TypeKinds. Same
|
||||
// rationale as the Phase 07 ORM TypeKinds above; they
|
||||
// participate only in the type-qualified callee resolver via
|
||||
// `label_prefix()` and have no dedicated slot in the bitset
|
||||
// domain.
|
||||
TypeKind::SqlAlchemySession
|
||||
| TypeKind::DjangoQuerySet
|
||||
| TypeKind::ActiveRecordRelation
|
||||
| TypeKind::GormDb
|
||||
| TypeKind::SqlxDb
|
||||
| TypeKind::HibernateSession => 3,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -612,6 +612,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
411
src/database.rs
411
src/database.rs
|
|
@ -59,6 +59,7 @@ pub mod index {
|
|||
disambig INTEGER,
|
||||
kind TEXT NOT NULL DEFAULT 'fn',
|
||||
summary TEXT NOT NULL,
|
||||
entry_kind TEXT,
|
||||
updated_at INTEGER NOT NULL,
|
||||
UNIQUE(project, file_path, name, container, arity, disambig, kind)
|
||||
);
|
||||
|
|
@ -76,6 +77,7 @@ pub mod index {
|
|||
disambig INTEGER,
|
||||
kind TEXT NOT NULL DEFAULT 'fn',
|
||||
summary TEXT NOT NULL,
|
||||
entry_kind TEXT,
|
||||
updated_at INTEGER NOT NULL,
|
||||
UNIQUE(project, file_path, name, container, arity, disambig, kind)
|
||||
);
|
||||
|
|
@ -114,6 +116,17 @@ pub mod index {
|
|||
UNIQUE(project, file_path, name, container, arity, disambig, kind)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS cross_package_imports (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
project TEXT NOT NULL,
|
||||
file_path TEXT NOT NULL,
|
||||
file_hash BLOB NOT NULL,
|
||||
namespace TEXT NOT NULL,
|
||||
imports BLOB NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
UNIQUE(project, file_path)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS scans (
|
||||
id TEXT PRIMARY KEY,
|
||||
status TEXT NOT NULL,
|
||||
|
|
@ -204,6 +217,8 @@ pub mod index {
|
|||
ON ssa_function_bodies(project, file_path);
|
||||
CREATE INDEX IF NOT EXISTS idx_auth_check_summaries_project_file
|
||||
ON auth_check_summaries(project, file_path);
|
||||
CREATE INDEX IF NOT EXISTS idx_cross_package_imports_project_file
|
||||
ON cross_package_imports(project, file_path);
|
||||
"#;
|
||||
|
||||
/// Engine version used to detect stale caches across upgrades.
|
||||
|
|
@ -311,7 +326,17 @@ pub mod index {
|
|||
// workers on machines with more cores than that during the
|
||||
// parallel indexing pass. Size the pool to comfortably hold
|
||||
// a connection per rayon thread plus a small slack.
|
||||
let max_conns = (num_cpus::get() as u32 + 4).max(16);
|
||||
//
|
||||
// `NYX_INDEX_POOL_MAX` overrides the auto-sized default. Use it in
|
||||
// fd-constrained environments (test sandboxes, containers with low
|
||||
// ulimit) where many parallel indexed scans would otherwise exhaust
|
||||
// EMFILE: each pooled SQLite WAL connection costs ~3 fds (db + -wal
|
||||
// + -shm), so 30 parallel scans × 16 conns × 3 fds = 1440 fds.
|
||||
let max_conns = std::env::var("NYX_INDEX_POOL_MAX")
|
||||
.ok()
|
||||
.and_then(|v| v.parse::<u32>().ok())
|
||||
.filter(|n| *n >= 1)
|
||||
.unwrap_or_else(|| (num_cpus::get() as u32 + 4).max(16));
|
||||
let pool = Arc::new(Pool::builder().max_size(max_conns).build(manager)?);
|
||||
|
||||
{
|
||||
|
|
@ -400,6 +425,14 @@ pub mod index {
|
|||
conn.execute_batch(SCHEMA)?;
|
||||
}
|
||||
|
||||
// Phase 10 — `entry_kind` column on (ssa_)function_summaries.
|
||||
// Non-destructive `ALTER TABLE ... ADD COLUMN` so existing
|
||||
// rows survive the upgrade. The column is nullable; the
|
||||
// INSERT paths write the JSON-encoded `EntryKind` text or
|
||||
// NULL when the function is not an entry point.
|
||||
Self::ensure_column(&conn, "function_summaries", "entry_kind", "TEXT")?;
|
||||
Self::ensure_column(&conn, "ssa_function_summaries", "entry_kind", "TEXT")?;
|
||||
|
||||
// Ensure the auth_check_summaries table exists for DBs
|
||||
// created before this column set was introduced. The
|
||||
// `CREATE TABLE IF NOT EXISTS` in SCHEMA handles new DBs;
|
||||
|
|
@ -419,6 +452,26 @@ pub mod index {
|
|||
conn.execute_batch(SCHEMA)?;
|
||||
}
|
||||
|
||||
// Phase 09 indexed-mode parity: ensure the
|
||||
// `cross_package_imports` table exists for DBs created
|
||||
// before this column set was introduced. `CREATE TABLE
|
||||
// IF NOT EXISTS` in SCHEMA handles new DBs; this branch
|
||||
// only fires when the table is missing entirely from a
|
||||
// pre-existing DB.
|
||||
let cpi_exists: bool = conn
|
||||
.query_row(
|
||||
"SELECT 1 FROM sqlite_master
|
||||
WHERE type = 'table' AND name = 'cross_package_imports'",
|
||||
[],
|
||||
|_| Ok(true),
|
||||
)
|
||||
.optional()?
|
||||
.unwrap_or(false);
|
||||
if !cpi_exists {
|
||||
tracing::info!("creating cross_package_imports table");
|
||||
conn.execute_batch(SCHEMA)?;
|
||||
}
|
||||
|
||||
// Schema version check: invalidate cached summary tables
|
||||
// when the on-disk artefact layout has changed in an
|
||||
// incompatible way, independently of the engine version.
|
||||
|
|
@ -433,6 +486,33 @@ pub mod index {
|
|||
Ok(pool)
|
||||
}
|
||||
|
||||
/// Add a column to an existing table when it is missing.
|
||||
///
|
||||
/// Non-destructive: leaves all existing rows untouched, populating
|
||||
/// the new column with NULL. Used to thread additive schema
|
||||
/// changes (Phase 10's `entry_kind`) into pre-existing databases
|
||||
/// without forcing a full cache rebuild.
|
||||
fn ensure_column(
|
||||
conn: &Connection,
|
||||
table: &str,
|
||||
column: &str,
|
||||
sqlite_type: &str,
|
||||
) -> NyxResult<()> {
|
||||
let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
|
||||
let cols: std::collections::HashSet<String> = stmt
|
||||
.query_map([], |r| r.get::<_, String>(1))?
|
||||
.filter_map(Result::ok)
|
||||
.collect();
|
||||
if cols.contains(column) {
|
||||
return Ok(());
|
||||
}
|
||||
tracing::info!("adding column {column} to {table}");
|
||||
conn.execute_batch(&format!(
|
||||
"ALTER TABLE {table} ADD COLUMN {column} {sqlite_type}"
|
||||
))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check stored schema version against the compiled-in value.
|
||||
///
|
||||
/// On mismatch (including first-time open), wipe the cached
|
||||
|
|
@ -468,7 +548,8 @@ pub mod index {
|
|||
DELETE FROM function_summaries;
|
||||
DELETE FROM ssa_function_summaries;
|
||||
DELETE FROM auth_check_summaries;
|
||||
DELETE FROM files;",
|
||||
DELETE FROM files;
|
||||
DROP TABLE IF EXISTS cross_package_imports;",
|
||||
)?;
|
||||
conn.execute_batch(SCHEMA)?;
|
||||
conn.execute(
|
||||
|
|
@ -801,14 +882,19 @@ pub mod index {
|
|||
let mut stmt = tx.prepare(
|
||||
"INSERT OR REPLACE INTO function_summaries
|
||||
(project, file_path, file_hash, name, arity, lang,
|
||||
container, disambig, kind, summary, updated_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
|
||||
container, disambig, kind, summary, entry_kind, updated_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
|
||||
)?;
|
||||
|
||||
for s in summaries {
|
||||
let json = serde_json::to_string(s)
|
||||
.map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
|
||||
let disambig_sql = s.disambig.map(|d| d as i64);
|
||||
let entry_kind_sql = s
|
||||
.entry_kind
|
||||
.as_ref()
|
||||
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
|
||||
.filter(|s| !s.is_empty());
|
||||
stmt.execute(params![
|
||||
self.project,
|
||||
path_str,
|
||||
|
|
@ -820,6 +906,7 @@ pub mod index {
|
|||
disambig_sql,
|
||||
s.kind.as_str(),
|
||||
json,
|
||||
entry_kind_sql,
|
||||
now
|
||||
])?;
|
||||
}
|
||||
|
|
@ -863,8 +950,8 @@ pub mod index {
|
|||
let mut stmt = tx.prepare(
|
||||
"INSERT OR REPLACE INTO ssa_function_summaries
|
||||
(project, file_path, file_hash, name, arity, lang, namespace,
|
||||
container, disambig, kind, summary, updated_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
|
||||
container, disambig, kind, summary, entry_kind, updated_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)",
|
||||
)?;
|
||||
|
||||
for (name, arity, lang, namespace, container, disambig, kind, summary) in summaries
|
||||
|
|
@ -872,6 +959,11 @@ pub mod index {
|
|||
let json = serde_json::to_string(summary)
|
||||
.map_err(|e| NyxError::Msg(format!("SSA summary serialise: {e}")))?;
|
||||
let disambig_sql = disambig.map(|d| d as i64);
|
||||
let entry_kind_sql = summary
|
||||
.entry_kind
|
||||
.as_ref()
|
||||
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
|
||||
.filter(|s| !s.is_empty());
|
||||
stmt.execute(params![
|
||||
self.project,
|
||||
path_str,
|
||||
|
|
@ -884,6 +976,7 @@ pub mod index {
|
|||
disambig_sql,
|
||||
kind.as_str(),
|
||||
json,
|
||||
entry_kind_sql,
|
||||
now
|
||||
])?;
|
||||
}
|
||||
|
|
@ -1392,6 +1485,10 @@ pub mod index {
|
|||
crate::symbol::FuncKind,
|
||||
crate::auth_analysis::model::AuthCheckSummary,
|
||||
)],
|
||||
cross_package_imports: Option<(
|
||||
&str,
|
||||
&std::collections::HashMap<String, crate::symbol::FuncKey>,
|
||||
)>,
|
||||
) -> NyxResult<()> {
|
||||
let tx = self.conn.transaction()?;
|
||||
let path_str = file_path.to_string_lossy();
|
||||
|
|
@ -1406,13 +1503,18 @@ pub mod index {
|
|||
let mut stmt = tx.prepare(
|
||||
"INSERT OR REPLACE INTO function_summaries
|
||||
(project, file_path, file_hash, name, arity, lang,
|
||||
container, disambig, kind, summary, updated_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
|
||||
container, disambig, kind, summary, entry_kind, updated_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
|
||||
)?;
|
||||
for s in func_summaries {
|
||||
let json = serde_json::to_string(s)
|
||||
.map_err(|e| NyxError::Msg(format!("summary serialise: {e}")))?;
|
||||
let disambig_sql = s.disambig.map(|d| d as i64);
|
||||
let entry_kind_sql = s
|
||||
.entry_kind
|
||||
.as_ref()
|
||||
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
|
||||
.filter(|s| !s.is_empty());
|
||||
stmt.execute(params![
|
||||
self.project,
|
||||
path_str,
|
||||
|
|
@ -1424,6 +1526,7 @@ pub mod index {
|
|||
disambig_sql,
|
||||
s.kind.as_str(),
|
||||
json,
|
||||
entry_kind_sql,
|
||||
now
|
||||
])?;
|
||||
}
|
||||
|
|
@ -1439,8 +1542,8 @@ pub mod index {
|
|||
let mut stmt = tx.prepare(
|
||||
"INSERT OR REPLACE INTO ssa_function_summaries
|
||||
(project, file_path, file_hash, name, arity, lang, namespace,
|
||||
container, disambig, kind, summary, updated_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
|
||||
container, disambig, kind, summary, entry_kind, updated_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)",
|
||||
)?;
|
||||
for (name, arity, lang, namespace, container, disambig, kind, summary) in
|
||||
ssa_summaries
|
||||
|
|
@ -1448,6 +1551,11 @@ pub mod index {
|
|||
let json = serde_json::to_string(summary)
|
||||
.map_err(|e| NyxError::Msg(format!("SSA summary serialise: {e}")))?;
|
||||
let disambig_sql = disambig.map(|d| d as i64);
|
||||
let entry_kind_sql = summary
|
||||
.entry_kind
|
||||
.as_ref()
|
||||
.map(|ek| serde_json::to_string(ek).unwrap_or_else(|_| String::new()))
|
||||
.filter(|s| !s.is_empty());
|
||||
stmt.execute(params![
|
||||
self.project,
|
||||
path_str,
|
||||
|
|
@ -1460,6 +1568,7 @@ pub mod index {
|
|||
disambig_sql,
|
||||
kind.as_str(),
|
||||
json,
|
||||
entry_kind_sql,
|
||||
now
|
||||
])?;
|
||||
}
|
||||
|
|
@ -1536,6 +1645,26 @@ pub mod index {
|
|||
}
|
||||
}
|
||||
|
||||
// cross_package_imports: replace this file's row, even with
|
||||
// an empty input, so a file that lost its imports does not
|
||||
// leave stale resolutions in the cache.
|
||||
tx.execute(
|
||||
"DELETE FROM cross_package_imports WHERE project = ?1 AND file_path = ?2",
|
||||
params![self.project, path_str],
|
||||
)?;
|
||||
if let Some((namespace, map)) = cross_package_imports
|
||||
&& !map.is_empty()
|
||||
{
|
||||
let blob = rmp_serde::to_vec_named(map)
|
||||
.map_err(|e| NyxError::Msg(format!("cross_package_imports serialise: {e}")))?;
|
||||
tx.execute(
|
||||
"INSERT OR REPLACE INTO cross_package_imports
|
||||
(project, file_path, file_hash, namespace, imports, updated_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||
params![self.project, path_str, file_hash, namespace, blob, now],
|
||||
)?;
|
||||
}
|
||||
|
||||
tx.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -1622,6 +1751,61 @@ pub mod index {
|
|||
Ok(out)
|
||||
}
|
||||
|
||||
/// Load every persisted per-file Phase-09 cross-package import map
|
||||
/// for this project.
|
||||
///
|
||||
/// Returns rows as `(file_path, namespace, imports_map)`. Used by
|
||||
/// pass 2 of indexed scans to populate
|
||||
/// `GlobalSummaries::cross_package_imports_by_namespace`, recovering
|
||||
/// the per-file import view that
|
||||
/// [`crate::taint::ssa_transfer::CalleeSsaBody::cross_package_imports`]
|
||||
/// loses across SQLite round-trip (`#[serde(skip)]`).
|
||||
pub fn load_all_cross_package_imports(
|
||||
&self,
|
||||
) -> NyxResult<
|
||||
Vec<(
|
||||
String,
|
||||
String,
|
||||
std::collections::HashMap<String, crate::symbol::FuncKey>,
|
||||
)>,
|
||||
> {
|
||||
let mut stmt = self.c().prepare(
|
||||
"SELECT file_path, namespace, imports
|
||||
FROM cross_package_imports WHERE project = ?1",
|
||||
)?;
|
||||
|
||||
let rows: Vec<(String, String, Vec<u8>)> = stmt
|
||||
.query_map([&self.project], |row| {
|
||||
Ok((
|
||||
row.get::<_, String>(0)?,
|
||||
row.get::<_, String>(1)?,
|
||||
row.get::<_, Vec<u8>>(2)?,
|
||||
))
|
||||
})?
|
||||
.filter_map(|r| match r {
|
||||
Ok(v) => Some(v),
|
||||
Err(e) => {
|
||||
tracing::warn!("failed to read cross_package_imports row: {e}");
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut out = Vec::with_capacity(rows.len());
|
||||
for (fp, ns, blob) in rows {
|
||||
match rmp_serde::from_slice::<
|
||||
std::collections::HashMap<String, crate::symbol::FuncKey>,
|
||||
>(&blob)
|
||||
{
|
||||
Ok(map) => out.push((fp, ns, map)),
|
||||
Err(e) => {
|
||||
tracing::warn!("failed to deserialize cross_package_imports blob: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// Remove a file and all derived persisted state for this project.
|
||||
///
|
||||
/// This deletes the file row, issues, and all persisted summary rows so
|
||||
|
|
@ -1659,6 +1843,10 @@ pub mod index {
|
|||
"DELETE FROM auth_check_summaries WHERE project = ?1 AND file_path = ?2",
|
||||
params![self.project, path_str.as_ref()],
|
||||
)?;
|
||||
tx.execute(
|
||||
"DELETE FROM cross_package_imports WHERE project = ?1 AND file_path = ?2",
|
||||
params![self.project, path_str.as_ref()],
|
||||
)?;
|
||||
|
||||
tx.commit()?;
|
||||
Ok(())
|
||||
|
|
@ -2539,6 +2727,7 @@ fn ssa_summaries_round_trip() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
),
|
||||
(
|
||||
|
|
@ -2575,6 +2764,7 @@ fn ssa_summaries_round_trip() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
),
|
||||
];
|
||||
|
|
@ -2749,6 +2939,7 @@ fn ssa_summaries_hash_rescan_replaces_stale() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
)];
|
||||
idx.replace_ssa_summaries_for_file(&f, &hash_v1, &sums_v1)
|
||||
|
|
@ -2787,6 +2978,7 @@ fn ssa_summaries_hash_rescan_replaces_stale() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
)];
|
||||
idx.replace_ssa_summaries_for_file(&f, &hash_v2, &sums_v2)
|
||||
|
|
@ -2846,6 +3038,7 @@ fn clear_drops_ssa_summaries_table() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
)];
|
||||
idx.replace_ssa_summaries_for_file(&f, &hash, &sums)
|
||||
|
|
@ -2903,6 +3096,7 @@ fn make_test_callee_body(
|
|||
field_interner: crate::ssa::ir::FieldInterner::new(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
},
|
||||
opt: crate::ssa::OptimizeResult {
|
||||
const_values: std::collections::HashMap::new(),
|
||||
|
|
@ -2921,9 +3115,58 @@ fn make_test_callee_body(
|
|||
param_count,
|
||||
node_meta: std::collections::HashMap::new(),
|
||||
body_graph: None,
|
||||
cross_package_imports: std::sync::Arc::new(std::collections::HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cross_package_imports_round_trip_via_replace_all_for_file() {
|
||||
use crate::symbol::{FuncKey, FuncKind, Lang};
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
let db = td.path().join("nyx.sqlite");
|
||||
let f = td.path().join("caller.ts");
|
||||
std::fs::write(&f, "import { escape } from '@scope/util';").unwrap();
|
||||
|
||||
let pool = index::Indexer::init(&db).unwrap();
|
||||
let mut idx = index::Indexer::from_pool("proj", &pool).unwrap();
|
||||
let hash = index::Indexer::digest_bytes(b"caller content");
|
||||
|
||||
let mut imports: std::collections::HashMap<String, FuncKey> = std::collections::HashMap::new();
|
||||
imports.insert(
|
||||
"escape".to_string(),
|
||||
FuncKey {
|
||||
lang: Lang::TypeScript,
|
||||
namespace: "packages/util/src/escape.ts".to_string(),
|
||||
container: String::new(),
|
||||
name: "escape".to_string(),
|
||||
arity: None,
|
||||
disambig: None,
|
||||
kind: FuncKind::Function,
|
||||
},
|
||||
);
|
||||
|
||||
idx.replace_all_for_file(&f, &hash, &[], &[], &[], &[], Some(("caller.ts", &imports)))
|
||||
.unwrap();
|
||||
|
||||
let loaded = idx.load_all_cross_package_imports().unwrap();
|
||||
assert_eq!(loaded.len(), 1);
|
||||
let (fp, ns, map) = &loaded[0];
|
||||
assert_eq!(fp, &f.to_string_lossy().to_string());
|
||||
assert_eq!(ns, "caller.ts");
|
||||
assert_eq!(map.len(), 1);
|
||||
let key = map
|
||||
.get("escape")
|
||||
.expect("escape binding survives round-trip");
|
||||
assert_eq!(key.namespace, "packages/util/src/escape.ts");
|
||||
assert_eq!(key.name, "escape");
|
||||
assert_eq!(key.lang, Lang::TypeScript);
|
||||
|
||||
// Empty input on rescan should drop the row.
|
||||
idx.replace_all_for_file(&f, &hash, &[], &[], &[], &[], None)
|
||||
.unwrap();
|
||||
assert!(idx.load_all_cross_package_imports().unwrap().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssa_bodies_round_trip() {
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
|
|
@ -3122,6 +3365,7 @@ fn make_test_ssa_summary() -> crate::summary::ssa_summary::SsaFuncSummary {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3436,6 +3680,153 @@ fn missing_ssa_namespace_column_triggers_recreate() {
|
|||
assert_eq!(idx.load_all_ssa_summaries().unwrap().len(), 1);
|
||||
}
|
||||
|
||||
/// Phase 10 migration test. Build a database whose
|
||||
/// `(ssa_)function_summaries` tables are at the post-Phase 09 shape
|
||||
/// (namespace + container + disambig + kind columns present, but no
|
||||
/// `entry_kind` column). Insert a row directly so the migration must
|
||||
/// preserve it. After `init`, the column should exist on both tables
|
||||
/// without dropping the pre-existing data.
|
||||
#[test]
|
||||
fn entry_kind_column_added_in_place_without_data_loss() {
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
let db = td.path().join("nyx.sqlite");
|
||||
|
||||
// Hand-build a pre-Phase-10 schema (no `entry_kind` column).
|
||||
{
|
||||
let conn = rusqlite::Connection::open(&db).unwrap();
|
||||
conn.execute_batch(
|
||||
"CREATE TABLE files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
project TEXT NOT NULL, path TEXT NOT NULL,
|
||||
hash BLOB NOT NULL, mtime INTEGER NOT NULL,
|
||||
scanned_at INTEGER NOT NULL, UNIQUE(project, path)
|
||||
);
|
||||
CREATE TABLE function_summaries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
project TEXT NOT NULL, file_path TEXT NOT NULL,
|
||||
file_hash BLOB NOT NULL, name TEXT NOT NULL,
|
||||
arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL,
|
||||
container TEXT NOT NULL DEFAULT '',
|
||||
disambig INTEGER,
|
||||
kind TEXT NOT NULL DEFAULT 'fn',
|
||||
summary TEXT NOT NULL, updated_at INTEGER NOT NULL,
|
||||
UNIQUE(project, file_path, name, container, arity, disambig, kind)
|
||||
);
|
||||
CREATE TABLE ssa_function_summaries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
project TEXT NOT NULL, file_path TEXT NOT NULL,
|
||||
file_hash BLOB NOT NULL, name TEXT NOT NULL,
|
||||
arity INTEGER NOT NULL DEFAULT -1, lang TEXT NOT NULL,
|
||||
namespace TEXT NOT NULL DEFAULT '',
|
||||
container TEXT NOT NULL DEFAULT '',
|
||||
disambig INTEGER,
|
||||
kind TEXT NOT NULL DEFAULT 'fn',
|
||||
summary TEXT NOT NULL, updated_at INTEGER NOT NULL,
|
||||
UNIQUE(project, file_path, name, container, arity, disambig, kind)
|
||||
);",
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO function_summaries
|
||||
(project, file_path, file_hash, name, arity, lang,
|
||||
container, disambig, kind, summary, updated_at)
|
||||
VALUES ('proj', 'lib.py', X'00', 'old_func', 1, 'python',
|
||||
'', NULL, 'fn', '{}', 0)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO ssa_function_summaries
|
||||
(project, file_path, file_hash, name, arity, lang,
|
||||
namespace, container, disambig, kind, summary, updated_at)
|
||||
VALUES ('proj', 'lib.py', X'00', 'old_func', 1, 'python',
|
||||
'', '', NULL, 'fn', '{}', 0)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
// Pre-populate the metadata so `check_schema_version` and
|
||||
// `check_engine_version` consider the database current and do
|
||||
// not wipe the rows we just inserted. The point of this test
|
||||
// is the in-place `ALTER TABLE`; the version checks are a
|
||||
// separate concern.
|
||||
conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS nyx_metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('schema_version', ?1)",
|
||||
rusqlite::params![index::SCHEMA_VERSION],
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO nyx_metadata (key, value) VALUES ('engine_version', ?1)",
|
||||
rusqlite::params![index::ENGINE_VERSION],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Open via init — should non-destructively ALTER both tables to
|
||||
// add `entry_kind`, leaving the seeded rows intact.
|
||||
let pool = index::Indexer::init(&db).unwrap();
|
||||
|
||||
let conn = pool.get().unwrap();
|
||||
let cols_for = |table: &str| {
|
||||
let mut stmt = conn
|
||||
.prepare(&format!("PRAGMA table_info({table})"))
|
||||
.unwrap();
|
||||
let v: Vec<String> = stmt
|
||||
.query_map([], |r| r.get::<_, String>(1))
|
||||
.unwrap()
|
||||
.filter_map(Result::ok)
|
||||
.collect();
|
||||
v
|
||||
};
|
||||
assert!(
|
||||
cols_for("function_summaries")
|
||||
.iter()
|
||||
.any(|c| c == "entry_kind"),
|
||||
"function_summaries.entry_kind missing after migration"
|
||||
);
|
||||
assert!(
|
||||
cols_for("ssa_function_summaries")
|
||||
.iter()
|
||||
.any(|c| c == "entry_kind"),
|
||||
"ssa_function_summaries.entry_kind missing after migration"
|
||||
);
|
||||
|
||||
// Pre-existing rows survive the migration.
|
||||
let func_rows: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM function_summaries WHERE project = 'proj'",
|
||||
[],
|
||||
|r| r.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(func_rows, 1, "pre-existing function_summaries row was lost");
|
||||
let ssa_rows: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM ssa_function_summaries WHERE project = 'proj'",
|
||||
[],
|
||||
|r| r.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
ssa_rows, 1,
|
||||
"pre-existing ssa_function_summaries row was lost"
|
||||
);
|
||||
|
||||
// Existing rows have NULL entry_kind by default.
|
||||
let entry_kind_value: Option<String> = conn
|
||||
.query_row(
|
||||
"SELECT entry_kind FROM function_summaries WHERE project = 'proj'",
|
||||
[],
|
||||
|r| r.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(entry_kind_value.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_schema_no_recreate() {
|
||||
let td = tempfile::tempdir().unwrap();
|
||||
|
|
|
|||
1720
src/entry_points/mod.rs
Normal file
1720
src/entry_points/mod.rs
Normal file
File diff suppressed because it is too large
Load diff
408
src/labels/go.rs
408
src/labels/go.rs
|
|
@ -73,6 +73,27 @@ pub static RULES: &[LabelRule] = &[
|
|||
"db.Exec",
|
||||
"db.QueryRow",
|
||||
"db.Prepare",
|
||||
// Phase 15 — GORM `db.Raw(sql)` raw-SQL passthrough. GORM's
|
||||
// `*gorm.DB` is conventionally bound to a `db`-named receiver,
|
||||
// so the suffix `db.Raw` carries the GORM semantic without
|
||||
// colliding with stdlib `*sql.DB` (which has no `Raw` method).
|
||||
// The `GormDb.Raw` type-qualified variant in the receiver-typed
|
||||
// rule list below covers receivers tagged from `gorm.Open(...)`
|
||||
// with non-`db` names.
|
||||
"db.Raw",
|
||||
// Phase 15 — `database/sql`-context variants. `db.QueryContext`,
|
||||
// `db.ExecContext`, `db.QueryRowContext`, `db.PrepareContext`
|
||||
// accept the SQL string at arg 1 (after `ctx`). Receivers
|
||||
// typed as `*sql.DB` / `*sql.Tx` / `*sql.Stmt` resolve via
|
||||
// suffix-matching on `db.<verb>`; calls on differently-named
|
||||
// bound receivers (`tx.QueryContext(...)`) only suffix-match
|
||||
// when the receiver text ends with `db` (covers `userDb`,
|
||||
// `pgDb`, etc.). More-precise receiver typing is in scope
|
||||
// for `DatabaseConnection.<verb>` rules below.
|
||||
"db.QueryContext",
|
||||
"db.ExecContext",
|
||||
"db.QueryRowContext",
|
||||
"db.PrepareContext",
|
||||
// goqu raw SQL literal builders: `goqu.L(s)` and the alias
|
||||
// `goqu.Lit(s)` insert `s` verbatim into the generated SQL with no
|
||||
// parameterisation. CVE-2026-41422 (daptin) loops a user-controlled
|
||||
|
|
@ -88,6 +109,36 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 15 — receiver-typed Go ORM/raw-SQL sinks. `*gorm.DB` (set by
|
||||
// `constructor_type` for `gorm.Open(...)`) exposes `Raw(sql)` and
|
||||
// `Exec(sql)` as raw-SQL passthrough; the type-qualified resolver
|
||||
// rewrites `db.Raw(...)` → `GormDb.Raw`. `*sqlx.DB` likewise gets
|
||||
// `NamedExec` / `NamedQuery` / `Select` / `Get` rewriting via
|
||||
// `SqlxDb.<verb>`. `DatabaseConnection.<verb>` covers the stdlib
|
||||
// `*sql.DB` / `*sql.Tx` receivers tagged by the existing
|
||||
// `sql.Open` / `sql.OpenDB` constructor mapping — currently the
|
||||
// chained QueryContext shape suffix-matches `db.QueryContext` above,
|
||||
// so `DatabaseConnection.QueryContext` is here for receivers whose
|
||||
// identifier text doesn't end in `db`.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"GormDb.Raw",
|
||||
"GormDb.Exec",
|
||||
"SqlxDb.NamedExec",
|
||||
"SqlxDb.NamedQuery",
|
||||
"SqlxDb.Select",
|
||||
"SqlxDb.Get",
|
||||
"SqlxDb.MustExec",
|
||||
"DatabaseConnection.QueryContext",
|
||||
"DatabaseConnection.ExecContext",
|
||||
"DatabaseConnection.QueryRowContext",
|
||||
"DatabaseConnection.Query",
|
||||
"DatabaseConnection.Exec",
|
||||
"DatabaseConnection.QueryRow",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// fmt.Printf/Sprintf write to stdout or build strings in memory, not
|
||||
// security sinks. fmt.Fprintf writes to an io.Writer (often http.ResponseWriter)
|
||||
// so it IS a security sink for XSS.
|
||||
|
|
@ -576,6 +627,363 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// ── SQL execute payload-arg gating (Phase 15 deferred fix, Go) ────────
|
||||
//
|
||||
// Mirrors the Python resolution recorded in `python::GATED_SINKS`. The
|
||||
// flat rules above already classify these callees as `Sink(SQL_QUERY)`
|
||||
// on every argument. `database/sql` and the Go ORM/raw-SQL ecosystem
|
||||
// (GORM, sqlx, goqu) follow the convention that the SQL string is at
|
||||
// arg 0 (or arg 1 for the `*Context` variants whose first arg is a
|
||||
// `context.Context`); subsequent positional arguments are bind values
|
||||
// sent through the driver's parameterised path. Tainted bind values
|
||||
// are SAFE; tainted SQL is the SQLi vector.
|
||||
//
|
||||
// Destination-activation gates carry the same `Sink(SQL_QUERY)` label
|
||||
// as the flat rule (cap dedupes against the flat label) and propagate
|
||||
// `payload_args: &[0]` (or `&[1]` for `*Context` shapes) into
|
||||
// `sink_payload_args`, narrowing the SSA sink scan to the SQL position.
|
||||
SinkGate {
|
||||
callee_matcher: "db.Query",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "db.Exec",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "db.QueryRow",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "db.Prepare",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "db.Raw",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `*Context` variants take `ctx` at arg 0 and the SQL string at arg 1.
|
||||
SinkGate {
|
||||
callee_matcher: "db.QueryContext",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "db.ExecContext",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "db.QueryRowContext",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "db.PrepareContext",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// goqu raw SQL literal builders. Single arg, payload at 0.
|
||||
SinkGate {
|
||||
callee_matcher: "goqu.L",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "goqu.Lit",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// Receiver-typed (case-sensitive, matching the flat rule): GORM / sqlx
|
||||
// / `*sql.DB` typed via `constructor_type`. All take SQL at arg 0
|
||||
// EXCEPT the `*Context` variants on `DatabaseConnection`, which take
|
||||
// SQL at arg 1.
|
||||
SinkGate {
|
||||
callee_matcher: "GormDb.Raw",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "GormDb.Exec",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "SqlxDb.NamedExec",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "SqlxDb.NamedQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "SqlxDb.Select",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "SqlxDb.Get",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "SqlxDb.MustExec",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DatabaseConnection.Query",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DatabaseConnection.Exec",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DatabaseConnection.QueryRow",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DatabaseConnection.QueryContext",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DatabaseConnection.ExecContext",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DatabaseConnection.QueryRowContext",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -94,6 +94,21 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 15 — JPA / Hibernate `Query.setParameter(name, value)` /
|
||||
// `Query.setParameterList(...)` bind a positional / named parameter
|
||||
// and return the same query object. The bind step does NOT inject
|
||||
// the value into the SQL string; the value is sent as a separate
|
||||
// parameter through the JDBC layer at execution. Treating
|
||||
// `setParameter` / `setParameterList` as a SQL_QUERY sanitizer
|
||||
// clears any taint inadvertently smeared onto the chain return so
|
||||
// downstream `.getResultList()` / `.executeUpdate()` calls see a
|
||||
// clean value. Case-sensitive: these are JPA-specific verb names
|
||||
// and the chain shape is canonical.
|
||||
LabelRule {
|
||||
matchers: &["setParameter", "setParameterList"],
|
||||
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ─────────── Sinks ─────────────
|
||||
LabelRule {
|
||||
matchers: &["Runtime.exec", "ProcessBuilder"],
|
||||
|
|
@ -125,6 +140,72 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::CODE_EXEC),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 13 — java.nio.file path-traversal sinks. `Files.<verb>` is
|
||||
// the modern stdlib API for read/write/copy/move/delete operations;
|
||||
// each takes a `Path` (or `Path` + payload) as arg 0. Default
|
||||
// arg→return propagation smears taint through `Paths.get(...)`
|
||||
// (forwarder) so the path arg of these calls inherits any taint
|
||||
// present on the components. `FileInputStream` / `FileOutputStream` /
|
||||
// `RandomAccessFile` are constructor-style sinks: `new
|
||||
// FileInputStream(path)` reaches the FILE_IO sink at the
|
||||
// `object_creation_expression` level (mapped to `Kind::CallFn` in
|
||||
// Java's KINDS). Receiver-typing already maps these classes to
|
||||
// `TypeKind::FileHandle` (see `class_name_to_type_kind`) so chained
|
||||
// method calls on the resulting handle resolve via type-qualified
|
||||
// labels, but the construction call itself is the canonical
|
||||
// path-traversal vector.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Files.readString",
|
||||
"Files.readAllBytes",
|
||||
"Files.readAllLines",
|
||||
"Files.write",
|
||||
"Files.writeString",
|
||||
"Files.lines",
|
||||
"Files.copy",
|
||||
"Files.move",
|
||||
"Files.delete",
|
||||
"Files.deleteIfExists",
|
||||
"Files.newInputStream",
|
||||
"Files.newOutputStream",
|
||||
"Files.newBufferedReader",
|
||||
"Files.newBufferedWriter",
|
||||
"FileInputStream",
|
||||
"FileOutputStream",
|
||||
"RandomAccessFile",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// Phase 13 — `Path.normalize()` collapses `.` / `..` segments and
|
||||
// is the canonical Java path-traversal sanitiser when paired with
|
||||
// a `startsWith(base)` containment check (not modelled here; the
|
||||
// sanitiser rule clears the FILE_IO cap on the call's return,
|
||||
// which is sufficient for the cap-based gate to suppress the
|
||||
// sink finding). Case-sensitive: `Path.normalize` is unique to
|
||||
// `java.nio.file.Path`; bare `normalize` would over-fire on
|
||||
// `Locale.normalize`, `BigDecimal.normalize`, etc.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Path.normalize",
|
||||
// Canonical Java path-traversal sanitiser idiom:
|
||||
// `base.resolve(name).normalize()`. CFG paren-strip yields
|
||||
// callee text `<receiver>.resolve.normalize`; the bare 2-call
|
||||
// `resolve.normalize` suffix is unique to `java.nio.file.Path`
|
||||
// (no overload across the supported corpus produces the same
|
||||
// chain text). Case-sensitive on the leaf chain to avoid
|
||||
// colliding with non-path `.resolve()`-then-`.normalize()`
|
||||
// shapes in unrelated grammars.
|
||||
"resolve.normalize",
|
||||
// Receiver-bound shape `Paths.get(p).normalize()` — the
|
||||
// `Paths.get` constructor mapping in `ssa/type_facts.rs` types
|
||||
// the receiver as `FileHandle`, so the type-qualified resolver
|
||||
// rewrites `<v>.normalize` → `FileHandle.normalize` here.
|
||||
"FileHandle.normalize",
|
||||
],
|
||||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// HTTP response sinks, println/print are broad (also match System.out)
|
||||
// but necessary to catch response.getWriter().println() via suffix matching.
|
||||
LabelRule {
|
||||
|
|
@ -134,12 +215,34 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
// openConnection() is the standard java.net.URL API for initiating a connection.
|
||||
// It is the correct interception point, the URL is already set on the object.
|
||||
//
|
||||
// Phase 14 — additional SSRF entry points covered:
|
||||
// * `URL.openStream` — equivalent of `URL.openConnection().getInputStream()`,
|
||||
// fetches the resource at the URL directly. Bare `openStream`
|
||||
// suffix is unique to `java.net.URL` in the supported corpus.
|
||||
// * `OkHttpClient.newCall(Request)` — Square OkHttp's request
|
||||
// dispatch entry point. The `Request` is built via a
|
||||
// `Request.Builder().url(u).build()` chain whose default
|
||||
// arg→return propagation smears URL taint through the chain.
|
||||
// * `RestTemplate.getForEntity` / `RestTemplate.headForHeaders` —
|
||||
// read-shaped Spring verbs that take the URL at arg 0.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"openConnection",
|
||||
"openStream",
|
||||
"HttpClient.send",
|
||||
"HttpClient.sendAsync",
|
||||
// Phase 14 — `OkHttpClient.newCall(Request)` and the
|
||||
// generic `HttpClient.newCall` form OkHttp resolves to via
|
||||
// the JAVA_HIERARCHY (OkHttpClient → HttpClient). Both
|
||||
// forms are covered so a constructor-typed receiver
|
||||
// (HttpClient) and a class-named receiver (OkHttpClient)
|
||||
// both fire.
|
||||
"HttpClient.newCall",
|
||||
"OkHttpClient.newCall",
|
||||
"getForObject",
|
||||
"getForEntity",
|
||||
"headForHeaders",
|
||||
"RestTemplate.exchange",
|
||||
"postForObject",
|
||||
"postForEntity",
|
||||
|
|
@ -246,8 +349,34 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &[
|
||||
"entityManager.createNativeQuery",
|
||||
"entityManager.createQuery",
|
||||
"em.createNativeQuery",
|
||||
"em.createQuery",
|
||||
"session.createQuery",
|
||||
"session.createSQLQuery",
|
||||
"session.createNativeQuery",
|
||||
// Phase 15 — Spring Data JPA / Hibernate factory chains:
|
||||
// `getEntityManager().createNativeQuery(...)` /
|
||||
// `getSession().createQuery(...)` reduce to
|
||||
// `getEntityManager.createNativeQuery` /
|
||||
// `getSession.createQuery` after the chain-normalisation
|
||||
// strips parens.
|
||||
"getEntityManager.createNativeQuery",
|
||||
"getEntityManager.createQuery",
|
||||
"getSession.createQuery",
|
||||
"getSession.createSQLQuery",
|
||||
"getSession.createNativeQuery",
|
||||
// Type-qualified Hibernate Session matchers fire when the
|
||||
// receiver carries a `TypeKind::HibernateSession` fact (set
|
||||
// by `constructor_type` for `sessionFactory.openSession()` /
|
||||
// `sessionFactory.getCurrentSession()` /
|
||||
// `sessionFactory.openStatelessSession()` returns). Closes
|
||||
// the arbitrary-receiver-name shape (`sess`,
|
||||
// `hibernateSession`, etc.) the flat `session.*` matchers
|
||||
// above only catch when receiver is literally named
|
||||
// `session`.
|
||||
"HibernateSession.createQuery",
|
||||
"HibernateSession.createSQLQuery",
|
||||
"HibernateSession.createNativeQuery",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
|
|
@ -484,6 +613,385 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// ── SQL execute payload-arg gating (Phase 15 deferred fix, Java) ──────
|
||||
//
|
||||
// Mirrors the Python resolution recorded in `python::GATED_SINKS`: the
|
||||
// flat rules above already classify these callees as `Sink(SQL_QUERY)`
|
||||
// on every argument. The JDBC / JPA / Hibernate / Spring conventions
|
||||
// are that arg 0 is the SQL template (or HQL/JPQL string) and any
|
||||
// remaining arguments are bind values, RowMappers, result-set classes,
|
||||
// or other non-SQL payloads. Tainted bind values are SAFE because the
|
||||
// driver / JPA layer escapes them; tainted SQL is the SQLi vector.
|
||||
//
|
||||
// These Destination-activation gates carry the same `Sink(SQL_QUERY)`
|
||||
// label as the flat rule (so cap dedupes against the flat label) but
|
||||
// propagate `payload_args: &[0]` into `sink_payload_args`, narrowing the
|
||||
// SSA sink scan to arg 0 only. Receiver-typed `DatabaseConnection.*`
|
||||
// forms are case-sensitive, matching the flat rule.
|
||||
SinkGate {
|
||||
callee_matcher: "executeQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "executeUpdate",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DatabaseConnection.execute",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DatabaseConnection.executeBatch",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DatabaseConnection.executeLargeUpdate",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// Spring JdbcTemplate verbs. All take SQL at arg 0; remaining args are
|
||||
// bind values (`Object[]` / varargs) or `RowMapper` / `ResultSetExtractor`
|
||||
// / class hints — all non-SQL payloads.
|
||||
SinkGate {
|
||||
callee_matcher: "jdbcTemplate.query",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "jdbcTemplate.update",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "jdbcTemplate.execute",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "jdbcTemplate.queryForObject",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "jdbcTemplate.queryForList",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// JPA / Hibernate factories. `createQuery(sql)` / `createQuery(sql, ResultClass)`
|
||||
// both take the SQL/JPQL/HQL string at arg 0; the optional `ResultClass`
|
||||
// at arg 1 is metadata, not SQL.
|
||||
SinkGate {
|
||||
callee_matcher: "entityManager.createQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "entityManager.createNativeQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "em.createQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "em.createNativeQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "session.createQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "session.createSQLQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "session.createNativeQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "getEntityManager.createQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "getEntityManager.createNativeQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "getSession.createQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "getSession.createSQLQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "getSession.createNativeQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// Type-qualified Hibernate Session gates. Mirror the
|
||||
// `session.create*` family above so type-qualified resolution at
|
||||
// sink-firing time consults `payload_args = &[0]` and suppresses
|
||||
// tainted bind-arg shapes that route through `setParameter` /
|
||||
// `setString` rather than the raw query string. Receivers carry
|
||||
// `TypeKind::HibernateSession` via `constructor_type`'s
|
||||
// `openSession` / `getCurrentSession` / `openStatelessSession`
|
||||
// arms.
|
||||
SinkGate {
|
||||
callee_matcher: "HibernateSession.createQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HibernateSession.createSQLQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HibernateSession.createNativeQuery",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use crate::labels::{
|
||||
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
|
||||
Cap, DataLabel, GateActivation, GatedLabelRule, Kind, LabelGate, LabelRule, ParamConfig,
|
||||
RuntimeLabelRule, SinkGate,
|
||||
};
|
||||
use crate::utils::project::{DetectedFramework, FrameworkContext};
|
||||
use phf::{Map, phf_map};
|
||||
|
|
@ -29,6 +30,21 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 10 — Web `Request` receiver-method reads. Triggered when
|
||||
// the SSA receiver carries `TypeKind::Request` and the
|
||||
// type-qualified resolver rewrites `req.json()` → `Request.json`
|
||||
// etc. Mirrors the matching list in `labels/typescript.rs`.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Request.json",
|
||||
"Request.formData",
|
||||
"Request.text",
|
||||
"Request.url",
|
||||
"Request.headers.get",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
LabelRule {
|
||||
matchers: &["JSON.parse"],
|
||||
|
|
@ -253,6 +269,40 @@ pub static RULES: &[LabelRule] = &[
|
|||
"fs.unlinkSync",
|
||||
"fs.readdir",
|
||||
"fs.readdirSync",
|
||||
// Phase 05 — `node:fs/promises` member-access forms covered
|
||||
// here. Bare-name forms (`readFile`, `open`, ...) and
|
||||
// `fsp.readFile` namespace-import forms ride the gated
|
||||
// matcher in `GATED_LABEL_RULES`. Receiver-type fallback
|
||||
// synthesises `FileSystemPromisesNs.<method>` (handled
|
||||
// below).
|
||||
"fs.promises.readFile",
|
||||
"fs.promises.writeFile",
|
||||
"fs.promises.unlink",
|
||||
"fs.promises.open",
|
||||
"fs.promises.stat",
|
||||
"fs.promises.readdir",
|
||||
"fs.promises.mkdir",
|
||||
"fs.promises.rmdir",
|
||||
"fs.promises.rm",
|
||||
"fs.promises.appendFile",
|
||||
"fs.promises.copyFile",
|
||||
"fs.promises.rename",
|
||||
"fs.promises.truncate",
|
||||
"fs.promises.chmod",
|
||||
"FileSystemPromisesNs.readFile",
|
||||
"FileSystemPromisesNs.writeFile",
|
||||
"FileSystemPromisesNs.unlink",
|
||||
"FileSystemPromisesNs.open",
|
||||
"FileSystemPromisesNs.stat",
|
||||
"FileSystemPromisesNs.readdir",
|
||||
"FileSystemPromisesNs.mkdir",
|
||||
"FileSystemPromisesNs.rmdir",
|
||||
"FileSystemPromisesNs.rm",
|
||||
"FileSystemPromisesNs.appendFile",
|
||||
"FileSystemPromisesNs.copyFile",
|
||||
"FileSystemPromisesNs.rename",
|
||||
"FileSystemPromisesNs.truncate",
|
||||
"FileSystemPromisesNs.chmod",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
|
|
@ -310,6 +360,31 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ── Phase 07 — ORM query-builder receiver-typed sinks ──
|
||||
//
|
||||
// Each rule here matches a callee text constructed by
|
||||
// `resolve_type_qualified_labels` when a value's inferred TypeKind has a
|
||||
// `label_prefix()`. The matcher form `<TypePrefix>.<method>` is the
|
||||
// wire shape produced by that helper. The receiver TypeKinds
|
||||
// themselves are populated by [`crate::ssa::type_facts::constructor_type`]
|
||||
// (TS/JS branch): `new Sequelize(...)` → `Sequelize`,
|
||||
// `getRepository(Entity)` → `TypeOrmRepo`,
|
||||
// `getManager()` → `TypeOrmManager`,
|
||||
// `createEntityManager()` → `MikroOrmEm`. Without a typed receiver the
|
||||
// qualified callee text is never built, so these rules cannot misfire on
|
||||
// unrelated `.literal()` / `.query()` / `.execute()` methods.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Sequelize.literal",
|
||||
"TypeOrmRepo.query",
|
||||
"TypeOrmRepo.createQueryBuilder",
|
||||
"TypeOrmManager.query",
|
||||
"TypeOrmManager.createQueryBuilder",
|
||||
"MikroOrmEm.execute",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ─── LDAP injection sinks ───
|
||||
//
|
||||
// `ldapjs`: both the bound-variable idiom
|
||||
|
|
@ -527,6 +602,75 @@ pub static EXCLUDES: &[&str] = &[
|
|||
"exec.start",
|
||||
];
|
||||
|
||||
/// Phase 05 — `node:fs/promises` path-traversal sinks. The matcher list
|
||||
/// holds the bare-name and `<ns>.<method>` member-access shapes; the
|
||||
/// [`LabelGate::ImportedFromModule`] gate suppresses bare-name matches
|
||||
/// unless the file actually imports the method from `node:fs/promises`
|
||||
/// or `fs/promises`. Bare-name only — `fs.promises.readFile`-style
|
||||
/// member-access forms continue to fire via the flat FILE_IO matcher
|
||||
/// list (no gate needed because the `fs.promises.` prefix is itself
|
||||
/// witness to the resolution).
|
||||
pub static GATED_LABEL_RULES: &[GatedLabelRule] = &[
|
||||
GatedLabelRule {
|
||||
matchers: &[
|
||||
"readFile",
|
||||
"writeFile",
|
||||
"unlink",
|
||||
"open",
|
||||
"stat",
|
||||
"readdir",
|
||||
"mkdir",
|
||||
"rmdir",
|
||||
"rm",
|
||||
"appendFile",
|
||||
"copyFile",
|
||||
"rename",
|
||||
"truncate",
|
||||
"chmod",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
gate: LabelGate::ImportedFromModule(&["node:fs/promises", "fs/promises"]),
|
||||
},
|
||||
// Phase 07 — Knex bare-name raw-SQL escape hatches. The receiver in
|
||||
// `db.whereRaw(sql)` shape is an arbitrary local binding (`db`, `qb`,
|
||||
// `users`, ...) so leading-identifier gating cannot witness the
|
||||
// import. Phase 07 deferred-item 10 tightening: require the file to
|
||||
// bind the conventional value-import name `knex` (lowercase) so that
|
||||
// type-only shapes like `import { Knex } from 'knex'` (for
|
||||
// `Knex.QueryBuilder` type annotations) do not over-fire the gate.
|
||||
GatedLabelRule {
|
||||
matchers: &["whereRaw", "orderByRaw", "havingRaw"],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
gate: LabelGate::FileImportsModuleAsLocalName {
|
||||
modules: &["knex"],
|
||||
local_names: &["knex"],
|
||||
},
|
||||
},
|
||||
// Phase 07 — Drizzle `sql` template-tag builder. Two shapes:
|
||||
// - `sql.raw(x)` → callee text "sql.raw" (member call)
|
||||
// - `sql\`SELECT ${x}\`` → callee text "sql" (tag call)
|
||||
// Both leading-identifier-gate against the imported `sql` symbol from
|
||||
// `drizzle-orm`. `=sql` is exact-only so unrelated `.sql()` methods do
|
||||
// not collide; `sql.raw` carries its own member-access matcher.
|
||||
GatedLabelRule {
|
||||
matchers: &["=sql", "sql.raw"],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
gate: LabelGate::ImportedFromModule(&["drizzle-orm"]),
|
||||
},
|
||||
// Phase 10 — Next.js `cookies()` / `headers()` from `next/headers`
|
||||
// return adversary-controlled request-bound state. Mirrors the
|
||||
// entry in `labels/typescript.rs::GATED_LABEL_RULES`.
|
||||
GatedLabelRule {
|
||||
matchers: &["cookies", "headers"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: true,
|
||||
gate: LabelGate::ImportedFromModule(&["next/headers"]),
|
||||
},
|
||||
];
|
||||
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
SinkGate {
|
||||
callee_matcher: "setAttribute",
|
||||
|
|
@ -1316,6 +1460,8 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"variable_declaration" => Kind::CallWrapper,
|
||||
"lexical_declaration" => Kind::CallWrapper,
|
||||
"expression_statement" => Kind::CallWrapper,
|
||||
"await_expression" => Kind::AwaitForward,
|
||||
"jsx_attribute" => Kind::JsxAttr,
|
||||
|
||||
// trivia
|
||||
"comment" => Kind::Trivia,
|
||||
|
|
|
|||
|
|
@ -38,6 +38,61 @@ pub struct LabelRule {
|
|||
pub case_sensitive: bool,
|
||||
}
|
||||
|
||||
/// Activation gate carried by a [`GatedLabelRule`]. Phase 05 introduces the
|
||||
/// import-derived gate so JS/TS bare-name `fs/promises` sinks (`readFile`,
|
||||
/// `writeFile`, ...) only fire when the call resolves to that module — a
|
||||
/// flat bare-name match would over-fire on user-defined `readFile` helpers.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum LabelGate {
|
||||
/// Fires only when the call's leading identifier is locally bound by an
|
||||
/// import / `require` whose `source_module` equals one of the listed
|
||||
/// specifiers. The synthetic prefix `FileSystemPromisesNs.` produced by
|
||||
/// receiver-type qualification also satisfies the gate (see Phase 05's
|
||||
/// `TypeKind::FileSystemPromisesNs`).
|
||||
ImportedFromModule(&'static [&'static str]),
|
||||
/// Fires when *any* local-name in the file's import view resolves to one
|
||||
/// of the listed specifiers, regardless of which identifier leads the
|
||||
/// call. Used for Phase 07 ORM bare-name method sinks (Knex's `whereRaw`
|
||||
/// / `orderByRaw` / `havingRaw`) where the receiver is a query-builder
|
||||
/// instance whose binding name is arbitrary (`db`, `qb`, `users`, ...)
|
||||
/// and the import witness is the package itself.
|
||||
FileImportsModule(&'static [&'static str]),
|
||||
/// Fires when the file's import view binds at least one of `local_names`
|
||||
/// to one of `modules`. Tighter than [`Self::FileImportsModule`]: type-only
|
||||
/// or peripheral named-import shapes (e.g. `import { Knex } from 'knex'`
|
||||
/// for type-only use of `Knex.QueryBuilder`) do not satisfy the gate
|
||||
/// unless the conventional value-binding name (`knex`, lowercase) is also
|
||||
/// present. Used for Phase 07 deferred-item 10's tightening of the Knex
|
||||
/// `whereRaw` / `orderByRaw` / `havingRaw` gate.
|
||||
FileImportsModuleAsLocalName {
|
||||
modules: &'static [&'static str],
|
||||
local_names: &'static [&'static str],
|
||||
},
|
||||
}
|
||||
|
||||
/// A label rule that only fires when its [`LabelGate`] is satisfied at the
|
||||
/// call site. The matcher / label / case-sensitivity semantics mirror
|
||||
/// [`LabelRule`]; the gate is checked by [`classify_all_ctx`] using the
|
||||
/// caller-supplied [`ClassificationContext`].
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct GatedLabelRule {
|
||||
pub matchers: &'static [&'static str],
|
||||
pub label: DataLabel,
|
||||
pub case_sensitive: bool,
|
||||
pub gate: LabelGate,
|
||||
}
|
||||
|
||||
/// Per-file context consulted by [`classify_all_ctx`] when evaluating
|
||||
/// gated rules. Threaded from the CFG layer's gated post-pass; `None`
|
||||
/// elsewhere keeps existing classification paths intact.
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct ClassificationContext<'a> {
|
||||
/// Local-name → source-module view of the file's imports. The map is
|
||||
/// computed at CFG build time (see `cfg::imports::extract_local_import_view`)
|
||||
/// so the gate fires before the project-wide resolver runs.
|
||||
pub local_imports: Option<&'a std::collections::HashMap<String, String>>,
|
||||
}
|
||||
|
||||
/// Sentinel returned by [`classify_gated_sink`] for the dynamic/unknown-activation
|
||||
/// branch: the gate fires conservatively and every positional argument must be
|
||||
/// considered a potential tainted payload, not just the explicit `payload_args`.
|
||||
|
|
@ -300,6 +355,17 @@ pub enum Kind {
|
|||
/// any other sequential statement in the CFG but explicitly classified so
|
||||
/// code that inspects `Kind` can recognise it.
|
||||
Seq,
|
||||
/// Async-await unary forward. An `await x` expression evaluates `x` and
|
||||
/// resolves to the same value/taint, modelled as a 1:1 copy. Lowered to
|
||||
/// SSA as `SsaOp::Assign(operand)` so taint, origins, and abstract value
|
||||
/// pass through unchanged.
|
||||
AwaitForward,
|
||||
/// JSX attribute (`<Tag name={value} />`). Dispatched in the CFG so the
|
||||
/// builder can recognise React-specific shapes such as
|
||||
/// `dangerouslySetInnerHTML={{ __html: x }}` and synthesise a sink call.
|
||||
/// The attribute name is read from the AST at CFG-build time, not carried
|
||||
/// in this enum (which must remain `Copy` for `phf_map` storage).
|
||||
JsxAttr,
|
||||
Other,
|
||||
}
|
||||
|
||||
|
|
@ -445,6 +511,19 @@ static GATED_REGISTRY: Lazy<HashMap<&'static str, &'static [SinkGate]>> = Lazy::
|
|||
m
|
||||
});
|
||||
|
||||
/// Per-language registry of [`GatedLabelRule`] entries. Phase 05 wires
|
||||
/// JS/TS only (the `fs/promises` FILE_IO matcher set); other languages
|
||||
/// fall back to an empty slice.
|
||||
static GATED_LABEL_REGISTRY: Lazy<HashMap<&'static str, &'static [GatedLabelRule]>> =
|
||||
Lazy::new(|| {
|
||||
let mut m = HashMap::new();
|
||||
m.insert("javascript", javascript::GATED_LABEL_RULES);
|
||||
m.insert("js", javascript::GATED_LABEL_RULES);
|
||||
m.insert("typescript", typescript::GATED_LABEL_RULES);
|
||||
m.insert("ts", typescript::GATED_LABEL_RULES);
|
||||
m
|
||||
});
|
||||
|
||||
/// Feature flag for the Python prototype-pollution gates. Disabled by
|
||||
/// default; set `NYX_PYTHON_PROTO_POLLUTION=1` (or `true`) to enable
|
||||
/// `dict.update` / `__dict__.update` proto-pollution detection.
|
||||
|
|
@ -599,6 +678,89 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
|
|||
.unwrap_or(Kind::Other)
|
||||
}
|
||||
|
||||
/// Promise-callback methods (`p.then(cb)`, `p.catch(cb)`, `p.finally(cb)`).
|
||||
///
|
||||
/// These are not sinks. The taint engine consumes this predicate to recognise
|
||||
/// the receiver as a Promise whose resolved value will be fed to the callback's
|
||||
/// first parameter. See phase 03 of `plan.md` for the recall-gap rationale.
|
||||
///
|
||||
/// JS/TS only. `callee_leaf` is expected to be the post-`callee_leaf_name`
|
||||
/// short form (e.g. `"then"`, not `"p.then"`).
|
||||
pub fn is_promise_callback_method(lang: &str, callee_leaf: &str) -> bool {
|
||||
if !matches!(lang, "javascript" | "js" | "typescript" | "ts" | "tsx") {
|
||||
return false;
|
||||
}
|
||||
matches!(callee_leaf, "then" | "catch" | "finally")
|
||||
}
|
||||
|
||||
/// Static `Promise.*` combinator a call resolves to, or `None`.
|
||||
///
|
||||
/// Combinators wrap arguments into a single Promise:
|
||||
/// * `Promise.resolve(x)` — identity for `x`.
|
||||
/// * `Promise.all([a, b])` — array whose elements have per-arg taint.
|
||||
/// * `Promise.allSettled([...])` — same shape as `all`, conservative union.
|
||||
/// * `Promise.race([...])` — first-to-settle, conservative union.
|
||||
///
|
||||
/// `callee` is the full callee text (e.g. `"Promise.all"`) since the leaf
|
||||
/// segment alone (`"all"`) is too generic to match safely.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PromiseCombinatorKind {
|
||||
Resolve,
|
||||
All,
|
||||
AllSettled,
|
||||
Race,
|
||||
}
|
||||
|
||||
/// Lang-agnostic recognition of any promise combinator callee text. Used by
|
||||
/// SSA lowering, which doesn't carry a `lang` argument.
|
||||
pub fn is_any_promise_combinator(callee: &str) -> Option<PromiseCombinatorKind> {
|
||||
match callee {
|
||||
"Promise.resolve" => Some(PromiseCombinatorKind::Resolve),
|
||||
"Promise.all" => Some(PromiseCombinatorKind::All),
|
||||
"Promise.allSettled" => Some(PromiseCombinatorKind::AllSettled),
|
||||
"Promise.race" => Some(PromiseCombinatorKind::Race),
|
||||
"asyncio.gather" | "asyncio.wait" => Some(PromiseCombinatorKind::All),
|
||||
"tokio::join" | "tokio::try_join" | "futures::join" | "futures::try_join" => {
|
||||
Some(PromiseCombinatorKind::All)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_promise_combinator(lang: &str, callee: &str) -> Option<PromiseCombinatorKind> {
|
||||
match lang {
|
||||
"javascript" | "js" | "typescript" | "ts" | "tsx" => match callee {
|
||||
"Promise.resolve" => Some(PromiseCombinatorKind::Resolve),
|
||||
"Promise.all" => Some(PromiseCombinatorKind::All),
|
||||
"Promise.allSettled" => Some(PromiseCombinatorKind::AllSettled),
|
||||
"Promise.race" => Some(PromiseCombinatorKind::Race),
|
||||
_ => None,
|
||||
},
|
||||
// Python: `asyncio.gather(...)` / `asyncio.wait(...)` resolve to a
|
||||
// tuple/list whose elements carry the union of argument taints.
|
||||
// `asyncio.wait` returns `(done, pending)` sets but the same
|
||||
// conservative scalar-union approximation applies, downstream
|
||||
// destructuring already taints all bindings.
|
||||
"python" | "py" => match callee {
|
||||
"asyncio.gather" | "asyncio.wait" => Some(PromiseCombinatorKind::All),
|
||||
_ => None,
|
||||
},
|
||||
// Rust: `tokio::join!` / `futures::join!` (and their `try_*`
|
||||
// variants) evaluate every future concurrently and bind the
|
||||
// tuple of resolved values. `cfg::push_node` rewrites the
|
||||
// macro_invocation's `arg_uses` so each future's tainted inputs
|
||||
// surface as a positional arg; this combinator entry then unions
|
||||
// them onto the tuple value.
|
||||
"rust" | "rs" => match callee {
|
||||
"tokio::join" | "tokio::try_join" | "futures::join" | "futures::try_join" => {
|
||||
Some(PromiseCombinatorKind::All)
|
||||
}
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// The kind of taint source, used to refine finding severity.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
|
|
@ -953,6 +1115,17 @@ fn ends_with_cs(haystack: &[u8], needle: &[u8], case_sensitive: bool) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
/// Allocation-free ASCII-case-insensitive prefix check on `&str` inputs.
|
||||
/// Used by the gated-sink dispatch hot path where the previous
|
||||
/// `value.to_ascii_lowercase().starts_with(&p.to_ascii_lowercase())` pair
|
||||
/// allocated two `String` values per check.
|
||||
#[inline]
|
||||
fn starts_with_ignore_ascii_case(haystack: &str, needle: &str) -> bool {
|
||||
let h = haystack.as_bytes();
|
||||
let n = needle.as_bytes();
|
||||
h.len() >= n.len() && h[..n.len()].eq_ignore_ascii_case(n)
|
||||
}
|
||||
|
||||
/// Prefix check with configurable case sensitivity. The `=` exact-match
|
||||
/// sigil is meaningless for prefix matchers (which by definition match many
|
||||
/// suffixes); it is stripped if present so a malformed matcher like
|
||||
|
|
@ -1028,6 +1201,9 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
|
|||
|
||||
// For chained calls like `r.URL.Query().Get`, also strip internal
|
||||
// `().` segments to produce a normalized form like `r.URL.Query.Get`.
|
||||
// `normalize_chained_call` returns `Cow::Borrowed` when no rewrite is
|
||||
// needed, so the alloc is paid only on inputs that actually require
|
||||
// it.
|
||||
let full_normalized = normalize_chained_call(text);
|
||||
let full_norm_bytes = full_normalized.as_bytes();
|
||||
|
||||
|
|
@ -1116,6 +1292,9 @@ pub fn classify_all(
|
|||
return SmallVec::new();
|
||||
}
|
||||
|
||||
// `normalize_chained_call` returns `Cow::Borrowed` when no rewrite
|
||||
// is needed, so the alloc is paid only on inputs that actually
|
||||
// require it. The hot classify path runs on every CFG node.
|
||||
let full_normalized = normalize_chained_call(text);
|
||||
let full_norm_bytes = full_normalized.as_bytes();
|
||||
|
||||
|
|
@ -1198,6 +1377,228 @@ pub fn classify_all(
|
|||
out
|
||||
}
|
||||
|
||||
/// Classify a call with an optional [`ClassificationContext`] enabling
|
||||
/// gated rule evaluation.
|
||||
///
|
||||
/// This is a strict superset of [`classify_all`]: the same flat-rule
|
||||
/// matching runs first, then any per-language [`GatedLabelRule`] is
|
||||
/// evaluated against `ctx`. A `None` context (or a context with no
|
||||
/// `local_imports`) leaves only the synthetic receiver-type prefix
|
||||
/// (e.g. `FileSystemPromisesNs.`) able to satisfy the gate.
|
||||
pub fn classify_all_ctx(
|
||||
lang: &str,
|
||||
text: &str,
|
||||
extra: Option<&[RuntimeLabelRule]>,
|
||||
ctx: Option<&ClassificationContext<'_>>,
|
||||
) -> SmallVec<[DataLabel; 2]> {
|
||||
let mut out = classify_all(lang, text, extra);
|
||||
classify_gated_into(lang, text, ctx, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
/// Run only the gated-rule pass — skip the flat [`classify_all`] scan.
|
||||
///
|
||||
/// Use when the caller has already classified `text` with the flat rules
|
||||
/// during initial CFG construction and only needs the gate-conditioned
|
||||
/// labels (which require a per-file [`ClassificationContext`] not
|
||||
/// available at the original classification site).
|
||||
pub fn classify_gated_only(
|
||||
lang: &str,
|
||||
text: &str,
|
||||
ctx: Option<&ClassificationContext<'_>>,
|
||||
) -> SmallVec<[DataLabel; 2]> {
|
||||
let mut out = SmallVec::new();
|
||||
classify_gated_into(lang, text, ctx, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
fn classify_gated_into(
|
||||
lang: &str,
|
||||
text: &str,
|
||||
ctx: Option<&ClassificationContext<'_>>,
|
||||
out: &mut SmallVec<[DataLabel; 2]>,
|
||||
) {
|
||||
let gated = match GATED_LABEL_REGISTRY.get(lang).or_else(|| {
|
||||
let key = lang.to_ascii_lowercase();
|
||||
GATED_LABEL_REGISTRY.get(key.as_str())
|
||||
}) {
|
||||
Some(g) => *g,
|
||||
None => return,
|
||||
};
|
||||
if gated.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let head = text.split(['(', '<']).next().unwrap_or("");
|
||||
let trimmed = head.trim().as_bytes();
|
||||
if is_excluded(lang, trimmed) {
|
||||
return;
|
||||
}
|
||||
let full_normalized = normalize_chained_call(text);
|
||||
let full_norm_bytes = full_normalized.as_bytes();
|
||||
|
||||
#[inline]
|
||||
fn push_dedup(out: &mut SmallVec<[DataLabel; 2]>, label: DataLabel) {
|
||||
if !out.contains(&label) {
|
||||
out.push(label);
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 1: exact / suffix.
|
||||
for rule in gated {
|
||||
for raw in rule.matchers {
|
||||
let m = raw.as_bytes();
|
||||
if m.last() == Some(&b'_') {
|
||||
continue;
|
||||
}
|
||||
let matches = match_suffix_cs(trimmed, m, rule.case_sensitive)
|
||||
|| match_suffix_cs(full_norm_bytes, m, rule.case_sensitive);
|
||||
if matches && gate_satisfied(&rule.gate, head, ctx) {
|
||||
push_dedup(out, rule.label);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Pass 2: prefix.
|
||||
for rule in gated {
|
||||
for raw in rule.matchers {
|
||||
let m = raw.as_bytes();
|
||||
if m.last() == Some(&b'_')
|
||||
&& (starts_with_cs(trimmed, m, rule.case_sensitive)
|
||||
|| starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
|
||||
&& gate_satisfied(&rule.gate, head, ctx)
|
||||
{
|
||||
push_dedup(out, rule.label);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Restricted payload-arg positions for known type-qualified sink callees.
|
||||
///
|
||||
/// Phase 07's ORM raw-SQL receiver methods (`TypeOrmRepo.query`,
|
||||
/// `TypeOrmManager.query`, `MikroOrmEm.execute`, etc.) take the SQL
|
||||
/// template at arg 0 and bind / parameter arrays at arg 1+. The flat
|
||||
/// label rule alone cannot encode this and would FP on
|
||||
/// `repo.query("SELECT $1", [tainted])`. When the type-qualified
|
||||
/// resolver synthesises one of these callees, this lookup returns the
|
||||
/// payload positions to which sink-taint checks must be restricted.
|
||||
///
|
||||
/// Sequelize.literal(sql) is single-arg, so `&[0]` is also correct
|
||||
/// (no precision loss vs the unconditional flat rule).
|
||||
pub fn type_qualified_sink_payload_args(qualified_callee: &str) -> Option<&'static [usize]> {
|
||||
match qualified_callee {
|
||||
"Sequelize.literal"
|
||||
| "TypeOrmRepo.query"
|
||||
| "TypeOrmRepo.createQueryBuilder"
|
||||
| "TypeOrmManager.query"
|
||||
| "TypeOrmManager.createQueryBuilder"
|
||||
| "MikroOrmEm.execute" => Some(&[0]),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Receiver-type prefixes that count as a witness for a given module
|
||||
/// specifier on a [`LabelGate::ImportedFromModule`] gate.
|
||||
///
|
||||
/// When SSA receiver-type qualification synthesises a callee like
|
||||
/// `FileSystemPromisesNs.readFile(...)`, the leading identifier becomes
|
||||
/// the type prefix rather than an imported binding. Each gate module
|
||||
/// can declare which type prefixes legitimise the gate firing without
|
||||
/// a textual import witness. Returning an empty slice means the gate
|
||||
/// must fall back to the `local_imports` map alone.
|
||||
fn receiver_type_prefixes_for_module(module: &str) -> &'static [&'static str] {
|
||||
if module.eq_ignore_ascii_case("node:fs/promises") || module.eq_ignore_ascii_case("fs/promises")
|
||||
{
|
||||
&["FileSystemPromisesNs"]
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate a [`LabelGate`] against the call's leading identifier and the
|
||||
/// caller-supplied context. Receiver-type qualification can satisfy
|
||||
/// [`LabelGate::ImportedFromModule`] via
|
||||
/// [`receiver_type_prefixes_for_module`].
|
||||
fn gate_satisfied(
|
||||
gate: &LabelGate,
|
||||
callee_head: &str,
|
||||
ctx: Option<&ClassificationContext<'_>>,
|
||||
) -> bool {
|
||||
match gate {
|
||||
LabelGate::ImportedFromModule(modules) => {
|
||||
let leading = leading_identifier(callee_head);
|
||||
for m in modules.iter() {
|
||||
for prefix in receiver_type_prefixes_for_module(m) {
|
||||
if leading == *prefix {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
let Some(ctx) = ctx else {
|
||||
return false;
|
||||
};
|
||||
let Some(map) = ctx.local_imports else {
|
||||
return false;
|
||||
};
|
||||
let Some(source_module) = map.get(leading) else {
|
||||
return false;
|
||||
};
|
||||
modules
|
||||
.iter()
|
||||
.any(|m| source_module.eq_ignore_ascii_case(m))
|
||||
}
|
||||
LabelGate::FileImportsModule(modules) => {
|
||||
let Some(ctx) = ctx else {
|
||||
return false;
|
||||
};
|
||||
let Some(map) = ctx.local_imports else {
|
||||
return false;
|
||||
};
|
||||
map.values().any(|source_module| {
|
||||
modules
|
||||
.iter()
|
||||
.any(|m| source_module.eq_ignore_ascii_case(m))
|
||||
})
|
||||
}
|
||||
LabelGate::FileImportsModuleAsLocalName {
|
||||
modules,
|
||||
local_names,
|
||||
} => {
|
||||
let Some(ctx) = ctx else {
|
||||
return false;
|
||||
};
|
||||
let Some(map) = ctx.local_imports else {
|
||||
return false;
|
||||
};
|
||||
local_names.iter().any(|name| {
|
||||
map.get(*name).is_some_and(|source_module| {
|
||||
modules
|
||||
.iter()
|
||||
.any(|m| source_module.eq_ignore_ascii_case(m))
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Leading identifier of a call expression's text — the segment up to the
|
||||
/// first `.`, `:`, `(`, or `<`. Used to drive ImportTable lookups.
|
||||
fn leading_identifier(callee_head: &str) -> &str {
|
||||
let bytes = callee_head.as_bytes();
|
||||
let mut end = 0;
|
||||
for (i, b) in bytes.iter().enumerate() {
|
||||
match b {
|
||||
b'.' | b':' | b'(' | b'<' | b' ' | b'[' => {
|
||||
end = i;
|
||||
return &callee_head[..end];
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
end = i + 1;
|
||||
}
|
||||
&callee_head[..end]
|
||||
}
|
||||
|
||||
/// Result of a gated-sink classification.
|
||||
///
|
||||
/// `label` is the sink capability the callee contributes at this site.
|
||||
|
|
@ -1289,8 +1690,7 @@ pub fn classify_gated_sink(
|
|||
}
|
||||
match const_keyword_arg(name) {
|
||||
Some(v) => {
|
||||
let lower = v.to_ascii_lowercase();
|
||||
if values.iter().any(|dv| lower == dv.to_ascii_lowercase()) {
|
||||
if values.iter().any(|dv| v.eq_ignore_ascii_case(dv)) {
|
||||
any_dangerous = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -1332,15 +1732,14 @@ pub fn classify_gated_sink(
|
|||
|
||||
match activation_value {
|
||||
Some(value) => {
|
||||
let lower = value.to_ascii_lowercase();
|
||||
let is_dangerous = gate
|
||||
.dangerous_values
|
||||
.iter()
|
||||
.any(|v| lower == v.to_ascii_lowercase())
|
||||
.any(|v| value.eq_ignore_ascii_case(v))
|
||||
|| gate
|
||||
.dangerous_prefixes
|
||||
.iter()
|
||||
.any(|p| lower.starts_with(&p.to_ascii_lowercase()));
|
||||
.any(|p| starts_with_ignore_ascii_case(&value, p));
|
||||
if is_dangerous {
|
||||
out.push(GateMatch {
|
||||
label: gate.label,
|
||||
|
|
@ -1379,7 +1778,7 @@ pub fn classify_gated_sink(
|
|||
/// Public wrapper for `normalize_chained_call` so callers outside the module
|
||||
/// can share the same normalization used by the label classifier.
|
||||
pub fn normalize_chained_call_for_classify(text: &str) -> String {
|
||||
normalize_chained_call(text)
|
||||
normalize_chained_call(text).into_owned()
|
||||
}
|
||||
|
||||
/// Return the bare method-name segment of a callee text. Returns the
|
||||
|
|
@ -1394,38 +1793,79 @@ pub fn bare_method_name(callee: &str) -> &str {
|
|||
/// Normalize a chained method call: strip `()` between `.` segments.
|
||||
/// e.g. `r.URL.Query().Get` → `r.URL.Query.Get`
|
||||
/// e.g. `r.URL.Query().Get("host")` → `r.URL.Query.Get`
|
||||
fn normalize_chained_call(text: &str) -> String {
|
||||
let mut result = String::with_capacity(text.len());
|
||||
///
|
||||
/// Returns a borrow when no transformation is required (no `()` between
|
||||
/// `.` segments and no leading `<`), avoiding the heap allocation. Only
|
||||
/// pays for a `String` when the input actually needs rewriting; the hot
|
||||
/// classify path runs on every CFG node so the borrow case dominates.
|
||||
fn normalize_chained_call(text: &str) -> std::borrow::Cow<'_, str> {
|
||||
let bytes = text.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
match bytes[i] {
|
||||
b'(' => {
|
||||
// Skip from `(` to matching `)`, but only if followed by `.`
|
||||
// This handles `Query().Get` → `Query.Get`
|
||||
let mut depth = 1u32;
|
||||
let mut j = i + 1;
|
||||
while j < bytes.len() && depth > 0 {
|
||||
if bytes[j] == b'(' {
|
||||
depth += 1;
|
||||
} else if bytes[j] == b')' {
|
||||
depth -= 1;
|
||||
match bytes[j] {
|
||||
b'(' => depth += 1,
|
||||
b')' => depth -= 1,
|
||||
_ => {}
|
||||
}
|
||||
j += 1;
|
||||
}
|
||||
if j >= bytes.len() || bytes[j] == b'.' {
|
||||
return std::borrow::Cow::Owned(normalize_chained_call_owned(text, i));
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
b'<' => return std::borrow::Cow::Borrowed(&text[..i]),
|
||||
_ => i += 1,
|
||||
}
|
||||
}
|
||||
std::borrow::Cow::Borrowed(text)
|
||||
}
|
||||
|
||||
/// Slow path for `normalize_chained_call`: runs only when the input
|
||||
/// actually contains a `(...)` group followed by `.` (the case that
|
||||
/// requires removing characters). `prefix_end` is the byte offset of the
|
||||
/// first transformation point so the prefix can be copied wholesale.
|
||||
///
|
||||
/// `(`, `)`, `<`, and `.` are all ASCII, so byte-level scanning is safe
|
||||
/// for control characters. Non-ASCII identifier bytes are copied as
|
||||
/// contiguous slices to keep multi-byte UTF-8 sequences intact.
|
||||
fn normalize_chained_call_owned(text: &str, prefix_end: usize) -> String {
|
||||
let bytes = text.as_bytes();
|
||||
let mut result = String::with_capacity(text.len());
|
||||
result.push_str(&text[..prefix_end]);
|
||||
let mut i = prefix_end;
|
||||
while i < bytes.len() {
|
||||
match bytes[i] {
|
||||
b'(' => {
|
||||
let mut depth = 1u32;
|
||||
let mut j = i + 1;
|
||||
while j < bytes.len() && depth > 0 {
|
||||
match bytes[j] {
|
||||
b'(' => depth += 1,
|
||||
b')' => depth -= 1,
|
||||
_ => {}
|
||||
}
|
||||
j += 1;
|
||||
}
|
||||
// If we're at end or next char is `.`, skip the parens
|
||||
if j >= bytes.len() || bytes[j] == b'.' {
|
||||
i = j;
|
||||
} else {
|
||||
// Keep the paren content (unusual case)
|
||||
result.push('(');
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
b'<' => break, // Stop at generic args
|
||||
b'<' => break,
|
||||
_ => {
|
||||
result.push(bytes[i] as char);
|
||||
i += 1;
|
||||
let start = i;
|
||||
while i < bytes.len() && !matches!(bytes[i], b'(' | b'<') {
|
||||
i += 1;
|
||||
}
|
||||
result.push_str(&text[start..i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1979,6 +2419,58 @@ mod tests {
|
|||
assert_eq!(lookup_receiver_validator("python", "joinpath"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_chained_call_borrows_when_no_change() {
|
||||
// No parens, no `<` → no rewrite, borrow returned.
|
||||
let r = normalize_chained_call("plain");
|
||||
assert!(matches!(r, std::borrow::Cow::Borrowed(_)));
|
||||
assert_eq!(r.as_ref(), "plain");
|
||||
|
||||
// `(` mid-token but not at end of any `.` chain → still owned
|
||||
// because the function's policy collapses any `(` followed by
|
||||
// EOL or `.`. Use a callee with a non-collapsing shape: bare
|
||||
// dotted text.
|
||||
let r = normalize_chained_call("a.b.c");
|
||||
assert!(matches!(r, std::borrow::Cow::Borrowed(_)));
|
||||
assert_eq!(r.as_ref(), "a.b.c");
|
||||
|
||||
// Truncate at `<` (generics) is a borrow with shorter slice.
|
||||
let r = normalize_chained_call("Vec<T>");
|
||||
assert!(matches!(r, std::borrow::Cow::Borrowed(_)));
|
||||
assert_eq!(r.as_ref(), "Vec");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_chained_call_collapses_paren_dot_chain() {
|
||||
let r = normalize_chained_call("r.URL.Query().Get");
|
||||
assert_eq!(r.as_ref(), "r.URL.Query.Get");
|
||||
|
||||
let r = normalize_chained_call("a.b().c().d");
|
||||
assert_eq!(r.as_ref(), "a.b.c.d");
|
||||
|
||||
// Last paren-call before EOL is also collapsed (j >= bytes.len()).
|
||||
let r = normalize_chained_call("a.b()");
|
||||
assert_eq!(r.as_ref(), "a.b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_chained_call_preserves_utf8_after_collapse() {
|
||||
// Greek lowercase letters are 2-byte UTF-8 sequences. The slow
|
||||
// path must not split them when copying tail bytes after a
|
||||
// collapsed `(...)` group.
|
||||
let r = normalize_chained_call("obj.func().αβγ");
|
||||
assert_eq!(r.as_ref(), "obj.func.αβγ");
|
||||
|
||||
// CJK ideographs are 3-byte sequences. Same invariant.
|
||||
let r = normalize_chained_call("a.b().名前");
|
||||
assert_eq!(r.as_ref(), "a.b.名前");
|
||||
|
||||
// Emoji (4-byte sequence) inside an identifier. Engines never
|
||||
// see this in practice but the byte loop must not corrupt it.
|
||||
let r = normalize_chained_call("x.y().🦀_id");
|
||||
assert_eq!(r.as_ref(), "x.y.🦀_id");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_method_name_strips_chain() {
|
||||
// No-dot input → returned as-is.
|
||||
|
|
@ -2739,6 +3231,26 @@ mod tests {
|
|||
assert_eq!(result[0], DataLabel::Sink(Cap::HTML_ESCAPE));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn starts_with_ignore_ascii_case_matches_canonical_shapes() {
|
||||
assert!(starts_with_ignore_ascii_case(
|
||||
"FILE://etc/passwd",
|
||||
"file://"
|
||||
));
|
||||
assert!(starts_with_ignore_ascii_case(
|
||||
"file://etc/passwd",
|
||||
"FILE://"
|
||||
));
|
||||
assert!(starts_with_ignore_ascii_case("http://", "http://"));
|
||||
assert!(starts_with_ignore_ascii_case("http://", ""));
|
||||
assert!(!starts_with_ignore_ascii_case("http", "https"));
|
||||
assert!(!starts_with_ignore_ascii_case("", "x"));
|
||||
// Multibyte UTF-8: the helper is intentionally ASCII-only; non-ASCII
|
||||
// bytes compare byte-for-byte (no Unicode case folding).
|
||||
assert!(starts_with_ignore_ascii_case("café", "café"));
|
||||
assert!(!starts_with_ignore_ascii_case("café", "CAFÉ"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_all_dual_label_php() {
|
||||
let result = classify_all("php", "file_get_contents", None);
|
||||
|
|
|
|||
|
|
@ -48,9 +48,29 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// PDO parameterized queries
|
||||
// PDO parameterized queries. `prepareStatement` covers Drupal's
|
||||
// Database\\Connection convention (and any PSR-style wrapper that
|
||||
// uses the longer name); semantically identical to `prepare` —
|
||||
// both return a statement object, the bind step ships values as
|
||||
// out-of-band parameters, no concatenation occurs.
|
||||
LabelRule {
|
||||
matchers: &["prepare", "bindParam", "bindValue"],
|
||||
matchers: &["prepare", "prepareStatement", "bindParam", "bindValue"],
|
||||
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 15 — `mysqli_real_escape_string($conn, $s)` and
|
||||
// `pg_escape_string($s)` apply driver-side escaping for legacy
|
||||
// string-concat shapes. Treat as SQL_QUERY sanitizers so the
|
||||
// value-replacement clears the cap on the call return.
|
||||
// `addslashes` is intentionally excluded — it does NOT cover
|
||||
// multibyte / charset-aware injection vectors.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"mysqli_real_escape_string",
|
||||
"pg_escape_string",
|
||||
"pg_escape_literal",
|
||||
"pg_escape_identifier",
|
||||
],
|
||||
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
|
@ -121,10 +141,39 @@ pub static RULES: &[LabelRule] = &[
|
|||
"pdo.query",
|
||||
"mysqli.real_query",
|
||||
"mysqli_real_query",
|
||||
// Phase 15 — `PDOStatement::execute` (with no args) executes a
|
||||
// prepared statement; when prepared from a tainted string the
|
||||
// bind step does NOT prevent injection (the SQL was already
|
||||
// built unsafely). The receiver-text suffix is `stmt.execute`.
|
||||
// Distinct from the bare `execute` matcher (already on the
|
||||
// generic SQL_QUERY rule via `query` matcher) because the
|
||||
// OOP `$stmt->execute()` shape skips the SQL-string arg.
|
||||
"stmt.execute",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 15 — Doctrine ORM raw-SQL passthrough APIs. Doctrine's
|
||||
// `EntityManager::createQuery($dql)` accepts a DQL string;
|
||||
// `createNativeQuery($sql, $rsm)` accepts a native SQL string;
|
||||
// `getConnection()->executeQuery($sql)` /
|
||||
// `getConnection()->executeStatement($sql)` are the low-level
|
||||
// Connection passthroughs that route to the underlying driver
|
||||
// verbatim. Suffix-matching covers both bound-receiver shapes
|
||||
// (`$em->createQuery($dql)`) and the documentation-style
|
||||
// class-qualified call form (`EntityManager.createQuery`).
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"EntityManager.createQuery",
|
||||
"EntityManager.createNativeQuery",
|
||||
"createQuery",
|
||||
"createNativeQuery",
|
||||
"executeQuery",
|
||||
"executeStatement",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// Laravel Eloquent: raw SQL methods.
|
||||
// DB::raw() → scoped_call_expression, callee text "DB.raw".
|
||||
// whereRaw/selectRaw/orderByRaw/havingRaw → member_call_expression on query builder.
|
||||
|
|
@ -133,6 +182,22 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 15 — Laravel raw-SQL execution facade methods. `DB::select`,
|
||||
// `DB::statement`, `DB::insert`, `DB::update`, `DB::delete`,
|
||||
// `DB::unprepared` all accept a literal SQL string; the
|
||||
// `unprepared` form is the explicit no-bind escape hatch.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"DB.select",
|
||||
"DB.statement",
|
||||
"DB.insert",
|
||||
"DB.update",
|
||||
"DB.delete",
|
||||
"DB.unprepared",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// NOTE: `file_get_contents` and `fopen` can fetch URLs (SSRF vector) and
|
||||
// local files (LFI vector — `file://` scheme). As a Sink(SSRF) they only
|
||||
// fire when the argument is tainted. `fopen` is the canonical low-level
|
||||
|
|
@ -145,6 +210,32 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 14 — `\GuzzleHttp\Client::request($method, $url, ...)` and the
|
||||
// verb-shorthand methods `$client->get($url)` / `->head($url)` /
|
||||
// `->options($url)`. The read-shaped verbs carry the URL at arg 0
|
||||
// and have no body argument, so a flat SSRF sink is FP-safe. The
|
||||
// body-bearing verbs (`post` / `put` / `patch`) live on the
|
||||
// DATA_EXFIL list above; their URL-position SSRF is covered via
|
||||
// `Client.request` (arg 1 is URL) below as a flat sink — Guzzle
|
||||
// does not expose argument-role-aware metadata that would let the
|
||||
// gate distinguish URL from body, but the source-sensitivity gate
|
||||
// already silences plain `$_GET` / `$_POST` flows so the
|
||||
// remaining FP surface is small.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Client.get",
|
||||
"Client.head",
|
||||
"Client.options",
|
||||
"Client.request",
|
||||
"HttpClient.get",
|
||||
"HttpClient.head",
|
||||
"HttpClient.request",
|
||||
"Http.get",
|
||||
"Http.head",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ── Cross-boundary data exfiltration ──────────────────────────────────
|
||||
//
|
||||
// Body-bearing outbound HTTP verb methods on the major PHP HTTP clients.
|
||||
|
|
@ -343,6 +434,26 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
// Phase 14 — `curl_setopt($ch, CURLOPT_URL, $url)` is the canonical
|
||||
// pre-`curl_exec` URL bind. Tainted `$url` reaching this option is
|
||||
// SSRF; the `curl_exec($ch)` flat sink above also fires on the
|
||||
// tainted handle but only when the handle's taint propagates
|
||||
// through opaque resource state, which the engine cannot follow
|
||||
// across `curl_setopt` calls. Activating the SSRF cap directly at
|
||||
// the option-bind site catches the flow at the construction step
|
||||
// independent of the handle-flow analysis.
|
||||
SinkGate {
|
||||
callee_matcher: "curl_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_URL"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
// PHP `header($line)` HEADER_INJECTION sink. Modelled as a gate so
|
||||
// it can coexist with the OPEN_REDIRECT gate below: the multi-gate
|
||||
// SSA dispatch needs each capability declared on its own gate filter
|
||||
|
|
|
|||
|
|
@ -97,6 +97,39 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 13 — pathlib / aiofiles / shutil path-traversal sinks.
|
||||
// Chained constructor + method shapes (`Path(p).read_text()`) reduce
|
||||
// via paren-strip to the matcher text below; the path argument is
|
||||
// the sink payload. Receiver-bound shapes (`p = Path(...);
|
||||
// p.read_text()`) are not covered here without a `pathlib.Path`
|
||||
// TypeKind override and are left for a future phase.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Path.open",
|
||||
"Path.read_text",
|
||||
"Path.write_text",
|
||||
"Path.read_bytes",
|
||||
"Path.write_bytes",
|
||||
// Receiver-bound shapes (`p = Path(name); p.read_text()`)
|
||||
// resolve via the `TypeKind::FileHandle` constructor mapping
|
||||
// for `Path(...)` in `ssa/type_facts.rs`, which lets the
|
||||
// type-qualified resolver rewrite `p.read_text` →
|
||||
// `FileHandle.read_text` against the matchers below.
|
||||
"FileHandle.open",
|
||||
"FileHandle.read_text",
|
||||
"FileHandle.write_text",
|
||||
"FileHandle.read_bytes",
|
||||
"FileHandle.write_bytes",
|
||||
"aiofiles.open",
|
||||
"shutil.copy",
|
||||
"shutil.copy2",
|
||||
"shutil.copyfile",
|
||||
"shutil.move",
|
||||
"shutil.rmtree",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
case_sensitive: true,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"argparse.parse_args",
|
||||
|
|
@ -157,6 +190,22 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 13 — `pathlib.Path.resolve(strict=True)` raises if the
|
||||
// resolved path doesn't exist; the canonical / strict form is the
|
||||
// documented path-traversal sanitiser. Strict-mode argument
|
||||
// inspection is not modeled (the rule fires for any `.resolve()`
|
||||
// chained on a `Path(...)`); the false-clear risk on
|
||||
// `Path(...).resolve()` (non-strict) is an accepted trade-off
|
||||
// because the non-strict form still resolves symlinks and
|
||||
// collapses `..` segments, which dominates the path-traversal
|
||||
// attack surface. Case-sensitive: `Path.resolve` is the literal
|
||||
// pathlib method name; bare `resolve` is too broad (Django URL
|
||||
// resolvers, Promise.resolve in JS-style libs).
|
||||
LabelRule {
|
||||
matchers: &["Path.resolve", "FileHandle.resolve"],
|
||||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ─────────── Sinks ─────────────
|
||||
// Flask sinks
|
||||
LabelRule {
|
||||
|
|
@ -218,6 +267,26 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 15 — receiver-typed ORM sinks. `SqlAlchemySession.execute`
|
||||
// / `SqlAlchemySession.scalar` / `SqlAlchemySession.scalars` etc.
|
||||
// are produced when the receiver carries `TypeKind::SqlAlchemySession`
|
||||
// (set by `constructor_type` for `sessionmaker()` / `Session(engine)` /
|
||||
// `engine.connect()`). `DjangoQuerySet.raw` / `DjangoQuerySet.extra`
|
||||
// fire on `Model.objects.raw(sql)` / `Model.objects.extra(...)` shapes
|
||||
// when the receiver was tagged via the `Model.objects` access path.
|
||||
// `ActiveRecordRelation` is registered in `labels/ruby.rs`.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"SqlAlchemySession.execute",
|
||||
"SqlAlchemySession.scalar",
|
||||
"SqlAlchemySession.scalars",
|
||||
"SqlAlchemySession.exec_driver_sql",
|
||||
"DjangoQuerySet.raw",
|
||||
"DjangoQuerySet.extra",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// SQL injection: sqlite3 / SQLAlchemy / generic DB connection execute.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
|
|
@ -1245,6 +1314,214 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["data"],
|
||||
},
|
||||
},
|
||||
// ── SQL execute payload-arg gating (Phase 15 deferred fix) ────────────
|
||||
//
|
||||
// The flat label rules above already classify these callees as
|
||||
// `Sink(SQL_QUERY)` on every argument. The DB-API convention is that
|
||||
// arg 0 is the SQL string and arg 1+ are parameterised bind values
|
||||
// (`cursor.execute("SELECT * FROM t WHERE id = %s", (user_id,))`). Tainted
|
||||
// bind values are SAFE because the driver escapes them; tainted SQL is
|
||||
// the SQLi vector. These Destination-activation gates carry the same
|
||||
// `Sink(SQL_QUERY)` label so they dedupe against the flat rule, but
|
||||
// their `payload_args: &[0]` propagates into `sink_payload_args`,
|
||||
// narrowing the SSA sink scan to arg 0 only.
|
||||
SinkGate {
|
||||
callee_matcher: "cursor.execute",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "cursor.executemany",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "conn.execute",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "connection.execute",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "session.execute",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "engine.execute",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "db.execute",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "objects.raw",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// Receiver-typed forms; same payload shape (sql at arg 0).
|
||||
SinkGate {
|
||||
callee_matcher: "SqlAlchemySession.execute",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "SqlAlchemySession.scalar",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "SqlAlchemySession.scalars",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "SqlAlchemySession.exec_driver_sql",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DjangoQuerySet.raw",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "DjangoQuerySet.extra",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
/// Prototype-pollution-style gates for Python. Opt-in via the
|
||||
|
|
@ -1329,6 +1606,13 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"call" => Kind::CallFn,
|
||||
"assignment" => Kind::Assignment,
|
||||
"expression_statement" => Kind::CallWrapper,
|
||||
// tree-sitter-python emits `await x` as a named `await` node (no
|
||||
// `_expression` suffix, unlike JS/TS). Map it to `AwaitForward` so
|
||||
// the SSA lowering forwards the awaited value 1:1, mirroring the
|
||||
// JS/TS contract. Async-for in Python is plain `for_statement` with
|
||||
// an unnamed `async` token child; the iterator-text rewrite in
|
||||
// `cfg::push_node` covers both sync and async forms uniformly.
|
||||
"await" => Kind::AwaitForward,
|
||||
|
||||
// trivia
|
||||
"comment" => Kind::Trivia,
|
||||
|
|
|
|||
|
|
@ -113,7 +113,25 @@ pub static RULES: &[LabelRule] = &[
|
|||
// in the resource-lifecycle acquire/release pair (cfg_analysis::RUBY_RESOURCES),
|
||||
// so this entry is additive, it does not disturb resource-leak detection.
|
||||
LabelRule {
|
||||
matchers: &["File.open", "File.new", "File.read", "IO.read"],
|
||||
matchers: &[
|
||||
"File.open",
|
||||
"File.new",
|
||||
"File.read",
|
||||
"IO.read",
|
||||
// Phase 13 — write-side and directory-listing path-traversal
|
||||
// sinks. `Pathname.new(p)` is conservative: a Pathname
|
||||
// construction with attacker-controlled `p` is the documented
|
||||
// entry point for downstream Path / File operations and
|
||||
// surfaces the path-traversal vector at the construction
|
||||
// site. `Dir.entries` / `Dir.glob` enumerate filesystem
|
||||
// contents, so a tainted path argument is a directory
|
||||
// disclosure / glob-injection vector.
|
||||
"File.write",
|
||||
"IO.write",
|
||||
"Pathname.new",
|
||||
"Dir.entries",
|
||||
"Dir.glob",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
|
@ -136,10 +154,28 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &[
|
||||
"Net::HTTP.get",
|
||||
"Net::HTTP.post",
|
||||
// Phase 14 — `Net::HTTP.start(host, port, ...)` is a session
|
||||
// factory whose host argument is the SSRF vector when
|
||||
// tainted. `Net::HTTP.get_response(uri)` is a stdlib
|
||||
// convenience wrapper around `start` + `request_get`.
|
||||
"Net::HTTP.start",
|
||||
"Net::HTTP.get_response",
|
||||
"URI.open",
|
||||
"OpenURI.open_uri",
|
||||
"HTTParty.get",
|
||||
"HTTParty.post",
|
||||
// Phase 14 — Faraday::Connection verb methods on a typed
|
||||
// receiver. `Faraday.new(url: base)` produces an
|
||||
// `HttpClient`-typed value (see `constructor_type`); the
|
||||
// `client.get(path)` chain resolves through the
|
||||
// type-qualified `HttpClient.get` rule below. Bare
|
||||
// `Faraday.get` / `.post` / etc. are the module-level
|
||||
// shorthand the existing `Faraday.post` matcher already
|
||||
// covers for DATA_EXFIL; SSRF needs the read-shaped
|
||||
// verbs registered explicitly.
|
||||
"Faraday.get",
|
||||
"Faraday.head",
|
||||
"Faraday.delete",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
|
|
@ -214,11 +250,41 @@ pub static RULES: &[LabelRule] = &[
|
|||
case_sensitive: false,
|
||||
},
|
||||
// SQL injection: ActiveRecord unsafe raw-query execution APIs.
|
||||
// Phase 15 expands coverage with `exec_query` (the raw-SQL execution
|
||||
// verb on the ActiveRecord connection adapter) and `select_value` /
|
||||
// `select_values` / `select_rows` (driver-level select helpers that
|
||||
// accept a literal SQL string).
|
||||
LabelRule {
|
||||
matchers: &["find_by_sql", "connection.execute", "select_all"],
|
||||
matchers: &[
|
||||
"find_by_sql",
|
||||
"connection.execute",
|
||||
"select_all",
|
||||
"exec_query",
|
||||
"select_value",
|
||||
"select_values",
|
||||
"select_rows",
|
||||
"select_one",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 15 — receiver-typed ActiveRecord raw-SQL sinks. The
|
||||
// `ActiveRecordRelation` TypeKind is set by `constructor_type` on
|
||||
// class-method scope chains (`User.where(...)` etc.); type-qualified
|
||||
// resolution rewrites `relation.find_by_sql(sql)` →
|
||||
// `ActiveRecordRelation.find_by_sql` so the chained shape is caught
|
||||
// even when the receiver text has lost its model-class prefix.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"ActiveRecordRelation.find_by_sql",
|
||||
"ActiveRecordRelation.exec_query",
|
||||
"ActiveRecordRelation.select_all",
|
||||
"ActiveRecordRelation.select_one",
|
||||
"ActiveRecordRelation.select_value",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// SQL injection: ActiveRecord query methods that accept raw SQL strings.
|
||||
// `where` and `order` are the most common Rails SQLi vectors when called
|
||||
// with string interpolation (e.g., User.where("name = '#{params[:name]}'")).
|
||||
|
|
@ -383,6 +449,32 @@ pub static RULES: &[LabelRule] = &[
|
|||
/// `Nokogiri::XML::ParseOptions::DEFAULT_XML`); any non-dangerous
|
||||
/// scope-qualified constant disables the gate.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
// `Faraday.new(url: tainted)` — base-URL kwarg controls the destination
|
||||
// origin for every subsequent verb call on the returned client
|
||||
// (`client.get(path)` / `.post` / etc.). When the kwarg value is
|
||||
// attacker-controlled, the constructor itself is the SSRF entry point;
|
||||
// the existing type-qualified rules on `HttpClient.get` / `.post` only
|
||||
// cover taint flowing into the per-call `path` arg.
|
||||
//
|
||||
// Activation is `Destination` on positional position 0 with a single
|
||||
// `url` field; tree-sitter-ruby emits the kwarg as a `pair` node sibling
|
||||
// of the positional args, and `extract_destination_kwarg_pairs` walks
|
||||
// those pairs (Ruby support added alongside this gate in
|
||||
// `cfg::literals::extract_destination_kwarg_pairs`).
|
||||
SinkGate {
|
||||
callee_matcher: "Faraday.new",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["url"],
|
||||
},
|
||||
},
|
||||
// `Nokogiri::XML(xml, url=nil, encoding=nil, options=NIL)` — top-level
|
||||
// module method. arg 3 carries the parse-option flag literal.
|
||||
//
|
||||
|
|
|
|||
|
|
@ -60,6 +60,26 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 13 — `Path::canonicalize` (and `tokio::fs::canonicalize`) is
|
||||
// the canonical Rust path-traversal sanitiser when paired with a
|
||||
// `starts_with(&base)` containment check. Same convention as the
|
||||
// Java / Python `.normalize()` / `.resolve()` sanitiser rules: the
|
||||
// call clears the FILE_IO cap on its return so the cap-based gate
|
||||
// suppresses the downstream `tokio::fs::*` / `std::fs::*` sink.
|
||||
// Bare `canonicalize` would over-fire on unrelated APIs (e.g.
|
||||
// `Url::canonicalize`); the qualified forms below are unique to
|
||||
// path-handling.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Path.canonicalize",
|
||||
"PathBuf.canonicalize",
|
||||
"fs::canonicalize",
|
||||
"std::fs::canonicalize",
|
||||
"tokio::fs::canonicalize",
|
||||
],
|
||||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─────────── Sinks ─────────────
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
|
|
@ -90,6 +110,21 @@ pub static RULES: &[LabelRule] = &[
|
|||
"fs::copy",
|
||||
"File::open",
|
||||
"File::create",
|
||||
// Phase 13 — `tokio::fs` async path-traversal sinks. The
|
||||
// suffix matchers also catch the bare `tokio::fs::File::open`
|
||||
// chain after paren-strip. `tokio::fs::*` is the
|
||||
// async-runtime-bound mirror of `std::fs::*`; same path
|
||||
// arg-0 semantics.
|
||||
"tokio::fs::read",
|
||||
"tokio::fs::read_to_string",
|
||||
"tokio::fs::write",
|
||||
"tokio::fs::remove_file",
|
||||
"tokio::fs::remove_dir",
|
||||
"tokio::fs::remove_dir_all",
|
||||
"tokio::fs::rename",
|
||||
"tokio::fs::copy",
|
||||
"tokio::fs::File::open",
|
||||
"tokio::fs::File::create",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
|
|
@ -105,6 +140,12 @@ pub static RULES: &[LabelRule] = &[
|
|||
"reqwest::Client.head",
|
||||
"reqwest::Client.patch",
|
||||
"reqwest::Client.request",
|
||||
// Phase 14 — hyper Client `request(req)` dispatch entry. The
|
||||
// `req` builder chain (covered by the type-qualified
|
||||
// RequestBuilder.* / Request::builder.* rules below) smears
|
||||
// URL taint into the request value via default propagation.
|
||||
"hyper::Client.request",
|
||||
"hyper::client::Client.request",
|
||||
// Chained constructor + verb form: `reqwest::Client::new()
|
||||
// .post(url)` reduces (via root-receiver collapse) to chain
|
||||
// text `Client::new.post`, so existing `Client.post` matchers
|
||||
|
|
@ -370,6 +411,10 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"let_declaration" => Kind::CallWrapper,
|
||||
"expression_statement" => Kind::CallWrapper,
|
||||
"assignment_expression" => Kind::Assignment,
|
||||
// `x.await` postfix. Documented per-language so the contract does
|
||||
// not depend on the raw-string fallback in `cfg::push_node`; SSA
|
||||
// lowering emits `Assign(operand)` for these nodes.
|
||||
"await_expression" => Kind::AwaitForward,
|
||||
|
||||
// struct expressions, recurse so env::var() calls inside field
|
||||
// initialisers produce Source-labelled CFG nodes (needed for summaries).
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use crate::labels::{
|
||||
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
|
||||
Cap, DataLabel, GateActivation, GatedLabelRule, Kind, LabelGate, LabelRule, ParamConfig,
|
||||
RuntimeLabelRule, SinkGate,
|
||||
};
|
||||
use crate::utils::project::{DetectedFramework, FrameworkContext};
|
||||
use phf::{Map, phf_map};
|
||||
|
|
@ -29,6 +30,24 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Phase 10 — Web `Request` receiver-method reads. Triggered when
|
||||
// the SSA receiver carries `TypeKind::Request` (Next.js App
|
||||
// Router handler's first formal) and the type-qualified resolver
|
||||
// rewrites `req.json()` → `Request.json` etc. The reads return
|
||||
// user-controlled bytes / strings; the matchers also cover
|
||||
// `Request.url` and `Request.headers.get(...)` which both expose
|
||||
// header / URL state to the handler.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Request.json",
|
||||
"Request.formData",
|
||||
"Request.text",
|
||||
"Request.url",
|
||||
"Request.headers.get",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
LabelRule {
|
||||
matchers: &["JSON.parse"],
|
||||
|
|
@ -215,6 +234,40 @@ pub static RULES: &[LabelRule] = &[
|
|||
"fs.unlinkSync",
|
||||
"fs.readdir",
|
||||
"fs.readdirSync",
|
||||
// Phase 05 — `node:fs/promises` member-access forms covered
|
||||
// here. Bare-name forms (`readFile`, `open`, ...) and
|
||||
// `fsp.readFile` namespace-import forms ride the gated
|
||||
// matcher in `GATED_LABEL_RULES`. Receiver-type fallback
|
||||
// synthesises `FileSystemPromisesNs.<method>` (handled
|
||||
// below).
|
||||
"fs.promises.readFile",
|
||||
"fs.promises.writeFile",
|
||||
"fs.promises.unlink",
|
||||
"fs.promises.open",
|
||||
"fs.promises.stat",
|
||||
"fs.promises.readdir",
|
||||
"fs.promises.mkdir",
|
||||
"fs.promises.rmdir",
|
||||
"fs.promises.rm",
|
||||
"fs.promises.appendFile",
|
||||
"fs.promises.copyFile",
|
||||
"fs.promises.rename",
|
||||
"fs.promises.truncate",
|
||||
"fs.promises.chmod",
|
||||
"FileSystemPromisesNs.readFile",
|
||||
"FileSystemPromisesNs.writeFile",
|
||||
"FileSystemPromisesNs.unlink",
|
||||
"FileSystemPromisesNs.open",
|
||||
"FileSystemPromisesNs.stat",
|
||||
"FileSystemPromisesNs.readdir",
|
||||
"FileSystemPromisesNs.mkdir",
|
||||
"FileSystemPromisesNs.rmdir",
|
||||
"FileSystemPromisesNs.rm",
|
||||
"FileSystemPromisesNs.appendFile",
|
||||
"FileSystemPromisesNs.copyFile",
|
||||
"FileSystemPromisesNs.rename",
|
||||
"FileSystemPromisesNs.truncate",
|
||||
"FileSystemPromisesNs.chmod",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
|
|
@ -255,6 +308,25 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ── Phase 07 — ORM query-builder receiver-typed sinks ──
|
||||
// See `labels/javascript.rs` for the design rationale; mirrored here so
|
||||
// TypeScript fixtures pick up the same coverage. Receiver TypeKinds
|
||||
// are populated by [`crate::ssa::type_facts::constructor_type`] for
|
||||
// `new Sequelize(...)` / `getRepository(...)` / `getManager()` /
|
||||
// `createEntityManager()`; the type-qualified resolver rewrites
|
||||
// `<recv>.<method>` → `<TypePrefix>.<method>` against these matchers.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Sequelize.literal",
|
||||
"TypeOrmRepo.query",
|
||||
"TypeOrmRepo.createQueryBuilder",
|
||||
"TypeOrmManager.query",
|
||||
"TypeOrmManager.createQueryBuilder",
|
||||
"MikroOrmEm.execute",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ─── LDAP injection sinks ───
|
||||
//
|
||||
// Mirror of `labels/javascript.rs`; ldapjs / ts-ldapjs has the same
|
||||
|
|
@ -391,6 +463,67 @@ pub static EXCLUDES: &[&str] = &[
|
|||
"exec.start",
|
||||
];
|
||||
|
||||
/// Phase 05 — `node:fs/promises` path-traversal sinks. See
|
||||
/// `javascript.rs::GATED_LABEL_RULES` for the design rationale; both
|
||||
/// language registries carry the same matcher list to keep .ts and .js
|
||||
/// fixtures in lockstep.
|
||||
pub static GATED_LABEL_RULES: &[GatedLabelRule] = &[
|
||||
GatedLabelRule {
|
||||
matchers: &[
|
||||
"readFile",
|
||||
"writeFile",
|
||||
"unlink",
|
||||
"open",
|
||||
"stat",
|
||||
"readdir",
|
||||
"mkdir",
|
||||
"rmdir",
|
||||
"rm",
|
||||
"appendFile",
|
||||
"copyFile",
|
||||
"rename",
|
||||
"truncate",
|
||||
"chmod",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
gate: LabelGate::ImportedFromModule(&["node:fs/promises", "fs/promises"]),
|
||||
},
|
||||
// Phase 07 — Knex bare-name raw-SQL escape hatches. See
|
||||
// `labels/javascript.rs::GATED_LABEL_RULES` for the rationale; this
|
||||
// mirror keeps `.ts` and `.js` fixtures in lockstep.
|
||||
GatedLabelRule {
|
||||
matchers: &["whereRaw", "orderByRaw", "havingRaw"],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
gate: LabelGate::FileImportsModuleAsLocalName {
|
||||
modules: &["knex"],
|
||||
local_names: &["knex"],
|
||||
},
|
||||
},
|
||||
// Phase 07 — Drizzle `sql` template-tag builder. See
|
||||
// `labels/javascript.rs::GATED_LABEL_RULES` for the two callee
|
||||
// shapes covered (`sql\`...\`` and `sql.raw(...)`).
|
||||
GatedLabelRule {
|
||||
matchers: &["=sql", "sql.raw"],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
gate: LabelGate::ImportedFromModule(&["drizzle-orm"]),
|
||||
},
|
||||
// Phase 10 — Next.js `cookies()` / `headers()` helpers from the
|
||||
// `next/headers` module return adversary-controlled
|
||||
// request-bound state (cookies carry session tokens, headers
|
||||
// carry auth material). Gated on the import so app-internal
|
||||
// helpers named `cookies` or `headers` keep their default
|
||||
// classification.
|
||||
GatedLabelRule {
|
||||
matchers: &["cookies", "headers"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: true,
|
||||
gate: LabelGate::ImportedFromModule(&["next/headers"]),
|
||||
},
|
||||
];
|
||||
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
SinkGate {
|
||||
callee_matcher: "setAttribute",
|
||||
|
|
@ -958,6 +1091,8 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"expression_statement" => Kind::CallWrapper,
|
||||
"as_expression" => Kind::Seq,
|
||||
"type_assertion" => Kind::Seq,
|
||||
"await_expression" => Kind::AwaitForward,
|
||||
"jsx_attribute" => Kind::JsxAttr,
|
||||
|
||||
// trivia
|
||||
"comment" => Kind::Trivia,
|
||||
|
|
|
|||
|
|
@ -100,6 +100,7 @@ pub mod constraint;
|
|||
pub mod convergence_telemetry;
|
||||
pub mod database;
|
||||
pub mod engine_notes;
|
||||
pub mod entry_points;
|
||||
pub mod errors;
|
||||
pub mod evidence;
|
||||
pub mod fmt;
|
||||
|
|
@ -109,6 +110,7 @@ pub mod output;
|
|||
pub mod patterns;
|
||||
pub mod pointer;
|
||||
pub mod rank;
|
||||
pub mod resolve;
|
||||
pub mod rust_resolve;
|
||||
#[cfg(feature = "serve")]
|
||||
pub mod server;
|
||||
|
|
|
|||
|
|
@ -668,6 +668,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -884,6 +885,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let facts = analyse_body(&body, body_id());
|
||||
assert!(facts.is_trivial());
|
||||
|
|
|
|||
1042
src/resolve/mod.rs
Normal file
1042
src/resolve/mod.rs
Normal file
File diff suppressed because it is too large
Load diff
380
src/resolve/tests.rs
Normal file
380
src/resolve/tests.rs
Normal file
|
|
@ -0,0 +1,380 @@
|
|||
//! Phase-04 resolver tests.
|
||||
//!
|
||||
//! Six specifier shapes (relative, parent-relative, scoped package,
|
||||
//! tsconfig path alias, node builtin, missing) plus a memory-ceiling
|
||||
//! guard. Each test sets up a synthetic tree under
|
||||
//! `tests/fixtures/resolver/` (or a `tempfile::TempDir` for the cheap
|
||||
//! ceiling test), constructs a [`ModuleGraph`] via [`build_module_graph`],
|
||||
//! and asserts the resolver verdict.
|
||||
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn fixture_root() -> PathBuf {
|
||||
let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
p.push("tests/fixtures/resolver");
|
||||
p
|
||||
}
|
||||
|
||||
fn root() -> PathBuf {
|
||||
let r = fixture_root();
|
||||
if r.exists() {
|
||||
r.canonicalize().unwrap_or(r)
|
||||
} else {
|
||||
r
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_relative_specifier() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let resolved = graph
|
||||
.resolve_specifier(&importer, "./foo")
|
||||
.expect("relative spec must classify");
|
||||
let file = resolved.file.expect("./foo must resolve");
|
||||
assert!(
|
||||
file.ends_with("apps/web/src/foo.ts"),
|
||||
"unexpected resolution: {}",
|
||||
file.display()
|
||||
);
|
||||
assert!(!resolved.is_builtin);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_parent_relative_specifier() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let resolved = graph
|
||||
.resolve_specifier(&importer, "../bar/baz")
|
||||
.expect("../bar/baz must classify");
|
||||
let file = resolved.file.expect("../bar/baz must resolve");
|
||||
assert!(
|
||||
file.ends_with("apps/web/bar/baz.ts"),
|
||||
"unexpected resolution: {}",
|
||||
file.display()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_scoped_package_import() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let resolved = graph
|
||||
.resolve_specifier(&importer, "@scope/util")
|
||||
.expect("@scope/util must classify");
|
||||
assert_eq!(resolved.package.as_deref(), Some("@scope/util"));
|
||||
let file = resolved.file.expect("@scope/util must resolve to a file");
|
||||
assert!(
|
||||
file.ends_with("packages/util/src/index.ts") || file.ends_with("packages/util/index.ts"),
|
||||
"unexpected resolution: {}",
|
||||
file.display()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_tsconfig_path_alias() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let resolved = graph
|
||||
.resolve_specifier(&importer, "@/lib/x")
|
||||
.expect("@/lib/x must classify");
|
||||
let file = resolved.file.expect("@/lib/x must resolve");
|
||||
assert!(
|
||||
file.ends_with("apps/web/src/lib/x.ts"),
|
||||
"unexpected resolution: {}",
|
||||
file.display()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classifies_node_builtin_specifier() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let resolved = graph
|
||||
.resolve_specifier(&importer, "node:fs/promises")
|
||||
.expect("node:fs/promises must classify");
|
||||
assert!(resolved.is_builtin);
|
||||
assert!(resolved.file.is_none());
|
||||
assert!(resolved.package.is_none());
|
||||
|
||||
let bare = graph
|
||||
.resolve_specifier(&importer, "fs")
|
||||
.expect("bare 'fs' must classify");
|
||||
assert!(bare.is_builtin);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_module_returns_none_resolved_file() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let resolved = graph
|
||||
.resolve_specifier(&importer, "no-such-package")
|
||||
.expect("non-empty spec must classify");
|
||||
assert!(!resolved.is_builtin);
|
||||
assert!(resolved.file.is_none(), "missing module must not resolve");
|
||||
assert!(resolved.package.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn package_for_returns_innermost_match() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let inner = r.join("packages/util/src/index.ts");
|
||||
let outer_pkg = graph
|
||||
.package_for(&inner)
|
||||
.expect("file under packages/util belongs to a package");
|
||||
assert_eq!(outer_pkg.name, "@scope/util");
|
||||
|
||||
let app_file = r.join("apps/web/src/index.ts");
|
||||
let web_pkg = graph
|
||||
.package_for(&app_file)
|
||||
.expect("file under apps/web belongs to a package");
|
||||
assert_eq!(web_pkg.name, "web-app");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn project_namespace_prefixes_when_in_package() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let in_pkg = r.join("packages/util/src/index.ts");
|
||||
let ns = graph.project_namespace_for(&in_pkg, &r);
|
||||
assert!(
|
||||
ns.starts_with("@scope/util::"),
|
||||
"expected package-prefixed namespace, got {ns}"
|
||||
);
|
||||
|
||||
let outside = std::env::temp_dir().join("nyx-resolver-outside.ts");
|
||||
let plain = graph.project_namespace_for(&outside, &r);
|
||||
assert!(
|
||||
!plain.contains("::"),
|
||||
"outside-package namespace must be plain: {plain}"
|
||||
);
|
||||
}
|
||||
|
||||
/// `"exports"."."` conditional map: `import` branch wins over `default`,
|
||||
/// and the legacy `main` field is shadowed when exports resolve.
|
||||
#[test]
|
||||
fn resolves_exports_root_conditional() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let resolved = graph
|
||||
.resolve_specifier(&importer, "@scope/exports-pkg")
|
||||
.expect("@scope/exports-pkg must classify");
|
||||
assert_eq!(resolved.package.as_deref(), Some("@scope/exports-pkg"));
|
||||
let file = resolved.file.expect("@scope/exports-pkg must resolve");
|
||||
assert!(
|
||||
file.ends_with("exports-pkg/src/main.ts"),
|
||||
"expected import-branch main.ts, got {}",
|
||||
file.display()
|
||||
);
|
||||
}
|
||||
|
||||
/// Exact subpath key (`"./sub": "./src/sub.ts"`) resolves before any
|
||||
/// pattern fallback would fire.
|
||||
#[test]
|
||||
fn resolves_exports_exact_subpath() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let resolved = graph
|
||||
.resolve_specifier(&importer, "@scope/exports-pkg/sub")
|
||||
.expect("subpath spec must classify");
|
||||
let file = resolved.file.expect("./sub must resolve");
|
||||
assert!(
|
||||
file.ends_with("exports-pkg/src/sub.ts"),
|
||||
"unexpected resolution: {}",
|
||||
file.display()
|
||||
);
|
||||
}
|
||||
|
||||
/// Wildcard pattern (`"./feat/*": "./src/feat/*.ts"`) substitutes the
|
||||
/// matched tail into the target.
|
||||
#[test]
|
||||
fn resolves_exports_wildcard_subpath() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let resolved = graph
|
||||
.resolve_specifier(&importer, "@scope/exports-pkg/feat/widget")
|
||||
.expect("wildcard subpath must classify");
|
||||
let file = resolved.file.expect("./feat/widget must resolve");
|
||||
assert!(
|
||||
file.ends_with("exports-pkg/src/feat/widget.ts"),
|
||||
"unexpected resolution: {}",
|
||||
file.display()
|
||||
);
|
||||
}
|
||||
|
||||
/// `null` value blocks the subpath: resolver returns no file rather than
|
||||
/// falling back to a direct path join.
|
||||
#[test]
|
||||
fn exports_null_blocks_subpath() {
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let resolved = graph
|
||||
.resolve_specifier(&importer, "@scope/exports-pkg/blocked")
|
||||
.expect("blocked spec must classify");
|
||||
assert!(
|
||||
resolved.file.is_none(),
|
||||
"null exports value must not resolve, got {:?}",
|
||||
resolved.file
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn module_graph_is_cheap() {
|
||||
use std::time::Instant;
|
||||
|
||||
let r = root();
|
||||
let bytes_before = approximate_rss_kib();
|
||||
let start = Instant::now();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let elapsed = start.elapsed();
|
||||
let bytes_after = approximate_rss_kib();
|
||||
|
||||
assert!(
|
||||
elapsed.as_millis() < 50,
|
||||
"build_module_graph took {}ms (>50ms ceiling)",
|
||||
elapsed.as_millis()
|
||||
);
|
||||
|
||||
let delta_kib = bytes_after.saturating_sub(bytes_before);
|
||||
assert!(
|
||||
delta_kib < 10 * 1024,
|
||||
"build_module_graph added {delta_kib} KiB RSS (>10 MiB ceiling)"
|
||||
);
|
||||
|
||||
assert!(
|
||||
!graph.packages().is_empty(),
|
||||
"fixture tree must have packages"
|
||||
);
|
||||
}
|
||||
|
||||
/// Parse a TypeScript file with tree-sitter and run
|
||||
/// [`extract_resolved_imports`] against it. Tests pull this through to
|
||||
/// keep the parsing setup in one place.
|
||||
fn extract_imports_for(file: &std::path::Path, graph: &ModuleGraph) -> Vec<ImportBinding> {
|
||||
let bytes = std::fs::read(file).expect("read fixture file");
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(
|
||||
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
|
||||
))
|
||||
.expect("load TS grammar");
|
||||
let tree = parser.parse(&bytes, None).expect("parse fixture");
|
||||
extract_resolved_imports(&tree, &bytes, file, graph, "typescript")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_imports_from_fixture_file() {
|
||||
// Verify `extract_resolved_imports` lifts the same four binding shapes
|
||||
// that `tests/fixtures/resolver/apps/web/src/index.ts` exercises:
|
||||
// relative, parent-relative, scoped package, tsconfig path alias, plus
|
||||
// the `node:fs/promises` builtin. Phases 09/10 thread these bindings
|
||||
// through cross-file taint, so the parsed-file integration path must
|
||||
// produce the rows the resolver tests already cover via
|
||||
// `resolve_specifier`.
|
||||
let r = root();
|
||||
let graph = build_module_graph(std::slice::from_ref(&r));
|
||||
let importer = r.join("apps/web/src/index.ts");
|
||||
let bindings = extract_imports_for(&importer, &graph);
|
||||
|
||||
let by_local: std::collections::HashMap<&str, &ImportBinding> = bindings
|
||||
.iter()
|
||||
.map(|b| (b.local_name.as_str(), b))
|
||||
.collect();
|
||||
|
||||
// `import { foo } from "./foo"` — relative.
|
||||
let foo = by_local.get("foo").expect("foo binding present");
|
||||
assert_eq!(foo.source_module, "./foo");
|
||||
assert_eq!(foo.exported_name.as_deref(), Some("foo"));
|
||||
let foo_file = foo.resolved_file.as_ref().expect("./foo resolves");
|
||||
assert!(
|
||||
foo_file.ends_with("apps/web/src/foo.ts"),
|
||||
"foo unexpected: {}",
|
||||
foo_file.display()
|
||||
);
|
||||
|
||||
// `import { baz } from "../bar/baz"` — parent-relative.
|
||||
let baz = by_local.get("baz").expect("baz binding present");
|
||||
assert_eq!(baz.source_module, "../bar/baz");
|
||||
let baz_file = baz.resolved_file.as_ref().expect("../bar/baz resolves");
|
||||
assert!(
|
||||
baz_file.ends_with("apps/web/bar/baz.ts"),
|
||||
"baz unexpected: {}",
|
||||
baz_file.display()
|
||||
);
|
||||
|
||||
// `import { util } from "@scope/util"` — scoped package.
|
||||
let util = by_local.get("util").expect("util binding present");
|
||||
assert_eq!(util.source_module, "@scope/util");
|
||||
assert!(
|
||||
util.resolved_file.is_some(),
|
||||
"@scope/util must resolve to a file"
|
||||
);
|
||||
|
||||
// `import { x } from "@/lib/x"` — tsconfig path alias.
|
||||
let x = by_local.get("x").expect("x binding present");
|
||||
assert_eq!(x.source_module, "@/lib/x");
|
||||
let x_file = x.resolved_file.as_ref().expect("@/lib/x resolves");
|
||||
assert!(
|
||||
x_file.ends_with("apps/web/src/lib/x.ts"),
|
||||
"x unexpected: {}",
|
||||
x_file.display()
|
||||
);
|
||||
|
||||
// `import { promises as fs } from "node:fs/promises"` — node builtin.
|
||||
// Local-name binding must use the alias `fs`, not the original `promises`.
|
||||
let fs = by_local.get("fs").expect("fs alias binding present");
|
||||
assert_eq!(fs.source_module, "node:fs/promises");
|
||||
assert_eq!(fs.exported_name.as_deref(), Some("promises"));
|
||||
assert!(
|
||||
fs.resolved_file.is_none(),
|
||||
"node:* builtin must not carry a resolved file"
|
||||
);
|
||||
}
|
||||
|
||||
/// Best-effort RSS reader. Returns 0 on any failure, the test only uses
|
||||
/// the delta and treats "0 → 0" as "below ceiling".
|
||||
fn approximate_rss_kib() -> u64 {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
std::fs::read_to_string("/proc/self/status")
|
||||
.ok()
|
||||
.and_then(|s| {
|
||||
s.lines().find(|l| l.starts_with("VmRSS:")).and_then(|l| {
|
||||
l.split_whitespace()
|
||||
.nth(1)
|
||||
.and_then(|n| n.parse::<u64>().ok())
|
||||
})
|
||||
})
|
||||
.unwrap_or(0)
|
||||
}
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
let output = std::process::Command::new("ps")
|
||||
.args(["-o", "rss=", "-p", &std::process::id().to_string()])
|
||||
.output()
|
||||
.ok();
|
||||
output
|
||||
.and_then(|o| {
|
||||
String::from_utf8(o.stdout)
|
||||
.ok()
|
||||
.and_then(|s| s.trim().parse::<u64>().ok())
|
||||
})
|
||||
.unwrap_or(0)
|
||||
}
|
||||
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
|
||||
{
|
||||
0
|
||||
}
|
||||
}
|
||||
|
|
@ -137,7 +137,7 @@ mod tests {
|
|||
AppState {
|
||||
scan_root: scan_root.clone(),
|
||||
config_dir: scan_root.clone(),
|
||||
database_dir: scan_root.clone(),
|
||||
database_dir: scan_root,
|
||||
security: LocalServerSecurity::new(port),
|
||||
config: Arc::new(RwLock::new(Config::default())),
|
||||
job_manager: Arc::new(JobManager::new(4, 8 * 1024 * 1024)),
|
||||
|
|
|
|||
|
|
@ -1187,6 +1187,18 @@ fn type_kind_tag(k: &TypeKind) -> String {
|
|||
TypeKind::Template => "Template".into(),
|
||||
TypeKind::Dto(_) => "Dto".into(),
|
||||
TypeKind::NullPrototypeObject => "NullPrototypeObject".into(),
|
||||
TypeKind::FileSystemPromisesNs => "FileSystemPromisesNs".into(),
|
||||
TypeKind::Sequelize => "Sequelize".into(),
|
||||
TypeKind::TypeOrmRepo => "TypeOrmRepo".into(),
|
||||
TypeKind::TypeOrmManager => "TypeOrmManager".into(),
|
||||
TypeKind::MikroOrmEm => "MikroOrmEm".into(),
|
||||
TypeKind::Request => "Request".into(),
|
||||
TypeKind::SqlAlchemySession => "SqlAlchemySession".into(),
|
||||
TypeKind::DjangoQuerySet => "DjangoQuerySet".into(),
|
||||
TypeKind::ActiveRecordRelation => "ActiveRecordRelation".into(),
|
||||
TypeKind::GormDb => "GormDb".into(),
|
||||
TypeKind::SqlxDb => "SqlxDb".into(),
|
||||
TypeKind::HibernateSession => "HibernateSession".into(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1565,6 +1577,10 @@ pub fn analyse_function_taint(
|
|||
auto_seed_handler_params: matches!(lang, Lang::JavaScript | Lang::TypeScript),
|
||||
cross_file_bodies: global_summaries.and_then(|gs| gs.bodies_by_key()),
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
|
||||
crate::taint::ssa_transfer::run_ssa_taint_full_with_exits(ssa, cfg, &transfer)
|
||||
|
|
@ -1628,7 +1644,7 @@ pub fn analyse_file_summaries(
|
|||
config: &Config,
|
||||
) -> Result<GlobalSummaries, StatusCode> {
|
||||
let bytes = std::fs::read(file_path).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let (func_summaries, ssa_rows, _ssa_bodies, auth_rows) =
|
||||
let (func_summaries, ssa_rows, _ssa_bodies, auth_rows, cross_pkg_imports) =
|
||||
crate::ast::extract_all_summaries_from_bytes(&bytes, file_path, config, None)
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
|
|
@ -1640,6 +1656,9 @@ pub fn analyse_file_summaries(
|
|||
for (key, auth_summary) in auth_rows {
|
||||
global.insert_auth(key, auth_summary);
|
||||
}
|
||||
if let Some((ns, map)) = cross_pkg_imports {
|
||||
global.insert_cross_package_imports(ns, map);
|
||||
}
|
||||
|
||||
Ok(global)
|
||||
}
|
||||
|
|
@ -1883,6 +1902,7 @@ function consume() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -2039,6 +2059,7 @@ async function recentAuditLogs() {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let facts = analyse_body(&body, BodyId(0));
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ impl JobManager {
|
|||
started_at: Some(chrono::Utc::now().to_rfc3339()),
|
||||
finished_at: None,
|
||||
duration_secs: None,
|
||||
engine_version: Some(engine_version.clone()),
|
||||
engine_version: Some(engine_version),
|
||||
languages: None,
|
||||
files_scanned: None,
|
||||
files_skipped: None,
|
||||
|
|
@ -261,7 +261,7 @@ impl JobManager {
|
|||
let languages: Vec<String> = progress_snap.languages.keys().cloned().collect();
|
||||
let files_scanned = progress_snap.files_discovered;
|
||||
let files_skipped = progress_snap.files_skipped;
|
||||
let timing = progress_snap.timing.clone();
|
||||
let timing = progress_snap.timing;
|
||||
let finished_at = chrono::Utc::now();
|
||||
|
||||
// Prepare the final state outside the lock.
|
||||
|
|
@ -292,9 +292,9 @@ impl JobManager {
|
|||
if let Some(job) = jobs.get_mut(&jid) {
|
||||
job.finished_at = Some(finished_at);
|
||||
job.duration_secs = Some(elapsed);
|
||||
job.languages = Some(languages.clone());
|
||||
job.languages = Some(languages);
|
||||
job.files_scanned = Some(files_scanned);
|
||||
job.timing = Some(timing.clone());
|
||||
job.timing = Some(timing);
|
||||
job.status = status.clone();
|
||||
job.findings = diags;
|
||||
job.error = error_str.clone();
|
||||
|
|
@ -590,7 +590,7 @@ handleRequest({ query: { name: '<b>x</b>' } }, { send() {} });
|
|||
|
||||
let id = manager
|
||||
.start_scan(
|
||||
project_dir.clone(),
|
||||
project_dir,
|
||||
test_config(),
|
||||
tx,
|
||||
Some(Arc::clone(&pool)),
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ async fn add_rule(
|
|||
.or_default();
|
||||
|
||||
let new_rule = crate::utils::config::ConfigLabelRule {
|
||||
matchers: rule.matchers.clone(),
|
||||
matchers: rule.matchers,
|
||||
kind: rule_kind,
|
||||
cap: cap_name,
|
||||
case_sensitive: false,
|
||||
|
|
@ -242,7 +242,7 @@ async fn add_terminator(
|
|||
.entry(term.lang.clone())
|
||||
.or_default();
|
||||
if !lang_cfg.terminators.contains(&term.name) {
|
||||
lang_cfg.terminators.push(term.name.clone());
|
||||
lang_cfg.terminators.push(term.name);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -447,6 +447,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
)],
|
||||
)
|
||||
|
|
@ -520,6 +521,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
},
|
||||
false,
|
||||
false,
|
||||
|
|
@ -544,6 +546,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
},
|
||||
true,
|
||||
true,
|
||||
|
|
@ -568,6 +571,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
},
|
||||
true,
|
||||
false,
|
||||
|
|
@ -666,6 +670,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
)],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ async fn overview(State(state): State<AppState>) -> Json<OverviewResponse> {
|
|||
latest_scan_id,
|
||||
latest_scan_at,
|
||||
by_severity: summary.by_severity.clone(),
|
||||
by_category: summary.by_category.clone(),
|
||||
by_category: summary.by_category,
|
||||
by_language,
|
||||
top_files,
|
||||
top_directories,
|
||||
|
|
|
|||
|
|
@ -309,13 +309,12 @@ async fn get_scan_findings(
|
|||
let per_page = query.per_page.unwrap_or(50).min(200);
|
||||
let start = (page - 1) * per_page;
|
||||
|
||||
let scan_root = state.scan_root.clone();
|
||||
let page_findings: Vec<FindingView> = filtered
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.skip(start)
|
||||
.take(per_page)
|
||||
.map(|(i, d)| models::finding_from_diag_with_context(i, d, &scan_root))
|
||||
.map(|(i, d)| models::finding_from_diag_with_context(i, d, &state.scan_root))
|
||||
.collect();
|
||||
|
||||
Ok(Json(serde_json::json!({
|
||||
|
|
@ -361,8 +360,6 @@ async fn compare_scans(
|
|||
.push((i, d));
|
||||
}
|
||||
|
||||
let scan_root = state.scan_root.clone();
|
||||
|
||||
let mut new_findings = Vec::new();
|
||||
let mut fixed_findings = Vec::new();
|
||||
let mut changed_findings = Vec::new();
|
||||
|
|
@ -378,7 +375,7 @@ async fn compare_scans(
|
|||
for i in 0..matched {
|
||||
let (idx, diag) = right_group[i];
|
||||
let (_, left_diag) = left_group[i];
|
||||
let view = models::finding_from_diag_with_context(idx, diag, &scan_root);
|
||||
let view = models::finding_from_diag_with_context(idx, diag, &state.scan_root);
|
||||
let changes = compute_field_changes(left_diag, diag);
|
||||
if changes.is_empty() {
|
||||
unchanged_findings.push(ComparedFinding {
|
||||
|
|
@ -397,7 +394,7 @@ async fn compare_scans(
|
|||
for &(idx, diag) in &right_group[matched..] {
|
||||
new_findings.push(ComparedFinding {
|
||||
fingerprint: fp.clone(),
|
||||
finding: models::finding_from_diag_with_context(idx, diag, &scan_root),
|
||||
finding: models::finding_from_diag_with_context(idx, diag, &state.scan_root),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
|
|
@ -405,7 +402,7 @@ async fn compare_scans(
|
|||
for &(idx, diag) in right_group {
|
||||
new_findings.push(ComparedFinding {
|
||||
fingerprint: fp.clone(),
|
||||
finding: models::finding_from_diag_with_context(idx, diag, &scan_root),
|
||||
finding: models::finding_from_diag_with_context(idx, diag, &state.scan_root),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -419,7 +416,7 @@ async fn compare_scans(
|
|||
for &(idx, diag) in &left_group[start..] {
|
||||
fixed_findings.push(ComparedFinding {
|
||||
fingerprint: fp.clone(),
|
||||
finding: models::finding_from_diag_with_context(idx, diag, &scan_root),
|
||||
finding: models::finding_from_diag_with_context(idx, diag, &state.scan_root),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -219,6 +219,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -741,6 +741,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -217,6 +217,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let (eliminated, copy_map) = copy_propagate(&mut body, &cfg);
|
||||
|
|
@ -300,6 +301,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let (eliminated, copy_map) = copy_propagate(&mut body, &cfg);
|
||||
|
|
@ -372,6 +374,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
(cfg, body)
|
||||
}
|
||||
|
|
@ -496,6 +499,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let (eliminated, _map) = copy_propagate(&mut body, &cfg);
|
||||
assert_eq!(eliminated, 0, "two-operand Assign is not a copy");
|
||||
|
|
@ -577,6 +581,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let (eliminated, _) = copy_propagate(&mut body, &cfg);
|
||||
assert_eq!(eliminated, 1, "v1 should be eliminated");
|
||||
|
|
@ -676,6 +681,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let (eliminated, _map) = copy_propagate(&mut body, &cfg);
|
||||
assert_eq!(eliminated, 1);
|
||||
|
|
@ -726,6 +732,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let (eliminated, map) = copy_propagate(&mut body, &cfg);
|
||||
assert_eq!(eliminated, 0);
|
||||
|
|
|
|||
|
|
@ -219,6 +219,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -269,6 +270,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -320,6 +322,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -367,6 +370,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -406,6 +410,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -472,6 +477,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -541,6 +547,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -603,6 +610,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -655,6 +663,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -744,6 +753,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -823,6 +833,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
|
|||
|
|
@ -790,6 +790,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let errs = check_structural_invariants(&body);
|
||||
assert!(
|
||||
|
|
@ -839,6 +840,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let errs = check_structural_invariants(&body);
|
||||
assert!(
|
||||
|
|
@ -891,6 +893,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let errs = check_structural_invariants(&body);
|
||||
assert!(
|
||||
|
|
@ -921,6 +924,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let errs = check_structural_invariants(&body);
|
||||
assert!(
|
||||
|
|
|
|||
|
|
@ -373,6 +373,22 @@ pub struct SsaBody {
|
|||
/// produced before this field existed.
|
||||
#[serde(default)]
|
||||
pub synthetic_externals: HashSet<SsaValue>,
|
||||
/// SSA values whose [`SsaOp::Assign`] is a slot-scoped binding from a
|
||||
/// bare-array destructure rewrite (see `bare_array_ops` in
|
||||
/// [`crate::ssa::lower`]). The Assign transfer arm in
|
||||
/// [`crate::taint::ssa_transfer`] consults this set to skip the
|
||||
/// `info.taint.labels` Source pickup that would otherwise bleed the
|
||||
/// outer destructure node's Source label into the slot-scoped binding.
|
||||
///
|
||||
/// Operand union still runs normally, so transitive taint via an
|
||||
/// inner ident (e.g. `helper(tainted_local)` in slot 1 of
|
||||
/// `[req.body.other, helper(tainted_local)]`) propagates through the
|
||||
/// Assign's operands without inheriting the outer-node Source.
|
||||
///
|
||||
/// Empty by default; only the per-slot kill arm in the bare-array
|
||||
/// destructure lowering populates this set.
|
||||
#[serde(default)]
|
||||
pub slot_scoped_assigns: HashSet<SsaValue>,
|
||||
}
|
||||
|
||||
impl SsaBody {
|
||||
|
|
@ -581,6 +597,7 @@ mod tests {
|
|||
field_interner: FieldInterner::new(),
|
||||
field_writes: HashMap::new(),
|
||||
synthetic_externals: HashSet::new(),
|
||||
slot_scoped_assigns: HashSet::new(),
|
||||
};
|
||||
let fid = body.intern_field("mu");
|
||||
body.blocks[0].body.push(SsaInst {
|
||||
|
|
|
|||
396
src/ssa/lower.rs
396
src/ssa/lower.rs
|
|
@ -257,6 +257,7 @@ fn lower_to_ssa_inner(
|
|||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals,
|
||||
slot_scoped_assigns,
|
||||
) = rename_variables(
|
||||
cfg,
|
||||
&blocks_nodes,
|
||||
|
|
@ -326,6 +327,7 @@ fn lower_to_ssa_inner(
|
|||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals,
|
||||
slot_scoped_assigns,
|
||||
};
|
||||
|
||||
// 9. Catch-block reachability invariant.
|
||||
|
|
@ -957,6 +959,7 @@ fn rename_variables(
|
|||
crate::ssa::ir::FieldInterner,
|
||||
HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)>,
|
||||
HashSet<SsaValue>,
|
||||
HashSet<SsaValue>,
|
||||
) {
|
||||
let num_blocks = blocks_nodes.len();
|
||||
let mut next_value: u32 = 0;
|
||||
|
|
@ -973,6 +976,10 @@ fn rename_variables(
|
|||
// Populated below at the synthetic-Assign emission site. Read by
|
||||
// the taint engine to lift the assign into a structural field WRITE.
|
||||
let mut field_writes: HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)> = HashMap::new();
|
||||
// SSA values whose `Assign` comes from a bare-array destructure
|
||||
// slot-scoped kill arm; the taint engine consults this set to skip
|
||||
// outer-node Source label pickup while still unioning operand taint.
|
||||
let mut slot_scoped_assigns: HashSet<SsaValue> = HashSet::new();
|
||||
|
||||
// Per-variable rename stacks
|
||||
let mut var_stacks: HashMap<String, Vec<SsaValue>> = HashMap::new();
|
||||
|
|
@ -1041,6 +1048,7 @@ fn rename_variables(
|
|||
nop_nodes: &HashSet<NodeIndex>,
|
||||
field_interner: &mut crate::ssa::ir::FieldInterner,
|
||||
field_writes: &mut HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)>,
|
||||
slot_scoped_assigns: &mut HashSet<SsaValue>,
|
||||
) {
|
||||
let block_id = BlockId(block_idx as u32);
|
||||
|
||||
|
|
@ -1258,6 +1266,27 @@ fn rename_variables(
|
|||
} else {
|
||||
SsaOp::Assign(uses)
|
||||
}
|
||||
} else if info.is_await_forward
|
||||
&& info.call.callee.is_none()
|
||||
&& !info.taint.uses.is_empty()
|
||||
{
|
||||
// `await x` resolves to the same value as `x` — model as a 1:1
|
||||
// copy so taint, origins, and abstract-domain facts forward
|
||||
// unchanged. Gated on `callee.is_none()` so an await-wrapped
|
||||
// call still lowers as a Call op rather than being collapsed
|
||||
// to Assign (today CFG splits `await foo(x)` into two nodes,
|
||||
// but the guard keeps the invariant explicit).
|
||||
let uses: SmallVec<[SsaValue; 4]> = info
|
||||
.taint
|
||||
.uses
|
||||
.iter()
|
||||
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
|
||||
.collect();
|
||||
if uses.is_empty() {
|
||||
SsaOp::Nop
|
||||
} else {
|
||||
SsaOp::Assign(uses)
|
||||
}
|
||||
} else if matches!(
|
||||
info.kind,
|
||||
StmtKind::Entry
|
||||
|
|
@ -1344,15 +1373,311 @@ fn rename_variables(
|
|||
|
||||
cfg_node_map.insert(node, v);
|
||||
|
||||
// Clone op for potential extra_defines before moving into SsaInst
|
||||
let primary_op_for_extras = if info.taint.extra_defines.is_empty() {
|
||||
// Promise.all-style array-destructure precision: when a CallWrapper
|
||||
// node binds an array_pattern (`const [a, b] = await Promise.all(
|
||||
// [x, y])` or `let (a, b) = tokio::join!(x, y)`) and the value is a
|
||||
// promise combinator that produces an array/tuple of per-element
|
||||
// results (`Promise.all`, `Promise.allSettled`, `asyncio.gather`,
|
||||
// `tokio::join!` and friends), rewrite the per-binding SSA so each
|
||||
// binding sees only its own index's taint instead of the scalar
|
||||
// union that `try_apply_promise_combinator` would produce.
|
||||
//
|
||||
// Two argument shapes are supported:
|
||||
// (a) literal-array (JS/Python): one positional arg whose
|
||||
// collected idents represent the array elements in order,
|
||||
// e.g. `Promise.all([x, y])` → args = [[x, y]].
|
||||
// (b) positional (Rust macros): N positional args, each one
|
||||
// ident, e.g. `tokio::join!(x, y)` → args = [[x], [y]].
|
||||
//
|
||||
// `Promise.race` and `Promise.resolve` are excluded: the awaited
|
||||
// value of a race is whichever promise wins (a single value, not
|
||||
// an array), and destructuring that value index-by-index does not
|
||||
// correspond to the args.
|
||||
// The rewrite fires when:
|
||||
// - the call is a promise combinator that produces an array of
|
||||
// per-element results (`All` / `AllSettled`), AND
|
||||
// - the LHS destructures into >= 2 bindings (sequential case
|
||||
// where `extra_defines` is non-empty), OR
|
||||
// - the LHS is an array_pattern with at least one skip slot
|
||||
// (`array_pattern_indices` is non-empty, even if `extra_defines`
|
||||
// itself is empty — `const [, b]` is a single-binding pattern
|
||||
// whose index is 1, not 0).
|
||||
let is_combinator_rewrite_target = matches!(
|
||||
info.call
|
||||
.callee
|
||||
.as_deref()
|
||||
.and_then(crate::labels::is_any_promise_combinator),
|
||||
Some(
|
||||
crate::labels::PromiseCombinatorKind::All
|
||||
| crate::labels::PromiseCombinatorKind::AllSettled
|
||||
)
|
||||
);
|
||||
// Indices for each binding in source order: primary at index 0,
|
||||
// then extras. Falls back to sequential 0..N when the AST didn't
|
||||
// record explicit indices (non-array_pattern destructures and
|
||||
// tuple_pattern shapes that contain no wildcards).
|
||||
let binding_indices: SmallVec<[usize; 4]> =
|
||||
if !info.taint.array_pattern_indices.is_empty() {
|
||||
info.taint.array_pattern_indices.clone()
|
||||
} else if !info.taint.extra_defines.is_empty() {
|
||||
(0..=info.taint.extra_defines.len()).collect()
|
||||
} else {
|
||||
SmallVec::new()
|
||||
};
|
||||
let promise_destruct_args: Option<SmallVec<[SsaValue; 4]>> =
|
||||
if is_combinator_rewrite_target && !binding_indices.is_empty() {
|
||||
let max_index = binding_indices.iter().copied().max().unwrap_or(0);
|
||||
let needed = max_index + 1;
|
||||
// Use `info.call.arg_uses` directly rather than the
|
||||
// build_call_args-derived `args`, which may include an
|
||||
// implicit "uses not in arg_uses" group appended for chain
|
||||
// bookkeeping that would inflate the apparent arity.
|
||||
let arg_uses = &info.call.arg_uses;
|
||||
let map_idents = |idents: &[String]| -> Option<SmallVec<[SsaValue; 4]>> {
|
||||
let mapped: SmallVec<[SsaValue; 4]> = idents
|
||||
.iter()
|
||||
.take(needed)
|
||||
.filter_map(|ident| {
|
||||
var_stacks.get(ident).and_then(|s| s.last().copied())
|
||||
})
|
||||
.collect();
|
||||
if mapped.len() == needed {
|
||||
Some(mapped)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
if arg_uses.len() == 1 && arg_uses[0].len() >= needed {
|
||||
// Shape (a): single positional arg whose idents are the
|
||||
// array elements in source order (`Promise.all([x, y])`,
|
||||
// `asyncio.gather([x, y])`).
|
||||
map_idents(&arg_uses[0])
|
||||
} else if arg_uses.len() >= needed
|
||||
&& arg_uses.iter().take(needed).all(|g| g.len() == 1)
|
||||
{
|
||||
// Shape (b): N positional args, each with one ident
|
||||
// (`tokio::join!(x, y)`).
|
||||
let names: Vec<&String> =
|
||||
arg_uses.iter().take(needed).map(|g| &g[0]).collect();
|
||||
let mapped: SmallVec<[SsaValue; 4]> = names
|
||||
.iter()
|
||||
.filter_map(|ident| {
|
||||
var_stacks
|
||||
.get(ident.as_str())
|
||||
.and_then(|s| s.last().copied())
|
||||
})
|
||||
.collect();
|
||||
if mapped.len() == needed {
|
||||
Some(mapped)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Bare-array RHS destructure precision: when the LHS is an
|
||||
// array_pattern / tuple_pattern / pattern_list / left_assignment_list
|
||||
// AND the RHS is a bare array-literal, build per-source-position
|
||||
// ops so each binding sees only its index's element instead of
|
||||
// the scalar union of every RHS ident.
|
||||
//
|
||||
// Three slot shapes are recognised by `collect_rhs_array_literal_elements`:
|
||||
//
|
||||
// * `Ident(name)` — bare identifier. Emit `Assign(reaching_def)`.
|
||||
// * `Literal` — syntactic literal (string/number/etc.). Emit
|
||||
// `Const(None)` so the binding carries no taint.
|
||||
// * `Complex(uses)` — call / binary / subscript / member access /
|
||||
// interpolated string / nested array literal / etc. Emit
|
||||
// `Assign(union of inner ident reaching defs)` — slot-scoped
|
||||
// union, not the whole-RHS union the legacy path produced.
|
||||
// Falls back to `Const(None)` when no inner idents resolve
|
||||
// (pure literal subexpression like `1 + 2`).
|
||||
//
|
||||
// Closes FPs like `const [a, b] = [safe, tainted]; exec(b);`
|
||||
// (Ident shape) and `const [c, d] = [fn(req.x), 'lit']; exec(d);`
|
||||
// (Complex shape) where the legacy union painted the safe binding.
|
||||
//
|
||||
// The promise-combinator path above has already populated
|
||||
// `promise_destruct_args` when its preconditions held, so the
|
||||
// mutual exclusion is gated through `promise_destruct_args.is_none()`
|
||||
// rather than `info.call.callee.is_none()`. The earlier
|
||||
// callee-none gate was wrong because the outer
|
||||
// variable_declarator node picks up `info.call.callee` whenever
|
||||
// the RHS text matches a Source label — which is exactly the
|
||||
// case where we need the per-slot rewrite most.
|
||||
// The outer node may carry a `DataLabel::Source(_)` whose
|
||||
// classification matched somewhere in the RHS expression text
|
||||
// (`req.body.cmd`, `process.env.X`, etc.). For multi-slot
|
||||
// RHS we can't statically partition WHICH slot caused that
|
||||
// match, but it must originate from a Complex slot (Literal
|
||||
// and bare-Ident slots whose names resolve through
|
||||
// `var_stacks` carry their own SsaValue identity). Treat
|
||||
// Complex slots as Source-emitting when the outer label set
|
||||
// included Source — strict precision improvement over the
|
||||
// legacy union path which painted EVERY slot, including
|
||||
// Literal, with the outer Source.
|
||||
let outer_is_source = info
|
||||
.taint
|
||||
.labels
|
||||
.iter()
|
||||
.any(|l| matches!(l, crate::labels::DataLabel::Source(_)));
|
||||
|
||||
// Per-slot Source classification (see `RhsArraySlot::Complex.source_cap`):
|
||||
// when at least one Complex slot's own subtree classified as
|
||||
// Source, we know which slot(s) carried the source pattern, so
|
||||
// sibling Complex slots without their own source_cap stay
|
||||
// slot-scoped (Assign / Const). Otherwise (the outer node
|
||||
// matched but no per-slot classifier fired — typical of subscript
|
||||
// chains and other shapes whose source flows via reaching-def
|
||||
// rather than static text), fall back to the conservative
|
||||
// "all-Complex-are-Source" emission for legacy preservation.
|
||||
use crate::cfg::RhsArraySlot;
|
||||
let any_slot_has_source_cap = info.taint.rhs_array_elements.iter().any(|s| {
|
||||
matches!(
|
||||
s,
|
||||
RhsArraySlot::Complex { source_cap, .. }
|
||||
if !source_cap.is_empty()
|
||||
)
|
||||
});
|
||||
let effective_outer_fallback = outer_is_source && !any_slot_has_source_cap;
|
||||
|
||||
let bare_array_ops: Option<(SmallVec<[SsaOp; 4]>, SmallVec<[bool; 4]>)> =
|
||||
if !info.taint.rhs_array_elements.is_empty()
|
||||
&& !binding_indices.is_empty()
|
||||
&& promise_destruct_args.is_none()
|
||||
{
|
||||
let max_index = binding_indices.iter().copied().max().unwrap_or(0);
|
||||
let needed = max_index + 1;
|
||||
if info.taint.rhs_array_elements.len() < needed {
|
||||
None
|
||||
} else {
|
||||
let mut per_pos: SmallVec<[SsaOp; 4]> = SmallVec::new();
|
||||
let mut slot_scoped_mask: SmallVec<[bool; 4]> = SmallVec::new();
|
||||
let mut bail = false;
|
||||
for slot in info.taint.rhs_array_elements.iter().take(needed) {
|
||||
let mut is_slot_scoped = false;
|
||||
let slot_op = match slot {
|
||||
RhsArraySlot::Ident(ident) => {
|
||||
match var_stacks
|
||||
.get(ident.as_str())
|
||||
.and_then(|s| s.last().copied())
|
||||
{
|
||||
Some(sv) => SsaOp::Assign(SmallVec::from_elem(sv, 1)),
|
||||
None => {
|
||||
bail = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
RhsArraySlot::Literal => SsaOp::Const(None),
|
||||
RhsArraySlot::Complex {
|
||||
uses: inner_uses,
|
||||
source_cap,
|
||||
} => {
|
||||
let mut mapped: SmallVec<[SsaValue; 4]> = SmallVec::new();
|
||||
for ident in inner_uses.iter() {
|
||||
if let Some(sv) = var_stacks
|
||||
.get(ident.as_str())
|
||||
.and_then(|s| s.last().copied())
|
||||
{
|
||||
if !mapped.contains(&sv) {
|
||||
mapped.push(sv);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !source_cap.is_empty() {
|
||||
// Per-slot classification found a Source
|
||||
// pattern (e.g. `req.body.cmd`) inside
|
||||
// THIS slot's subtree. Emit Source so the
|
||||
// binding inherits the outer-node Source
|
||||
// caps for this slot's index.
|
||||
SsaOp::Source
|
||||
} else if outer_is_source && any_slot_has_source_cap {
|
||||
// Some OTHER slot's subtree classified as
|
||||
// Source; this slot did NOT. Emit
|
||||
// Assign(mapped) and mark the slot as
|
||||
// slot-scoped so the taint transfer's
|
||||
// Assign arm skips outer-node Source
|
||||
// label pickup for this binding (without
|
||||
// losing transitive taint through inner
|
||||
// uses). When `mapped` is empty, fall
|
||||
// back to Const(None) — the binding
|
||||
// carries no taint anyway.
|
||||
if mapped.is_empty() {
|
||||
SsaOp::Const(None)
|
||||
} else {
|
||||
is_slot_scoped = true;
|
||||
SsaOp::Assign(mapped.clone())
|
||||
}
|
||||
} else if effective_outer_fallback {
|
||||
// Outer-node Source label but no
|
||||
// per-slot classifier fired on any slot
|
||||
// (typical of subscript-on-tainted-local
|
||||
// shapes). Preserve legacy conservative
|
||||
// emission for unrecognised shapes.
|
||||
SsaOp::Source
|
||||
} else if mapped.is_empty() {
|
||||
SsaOp::Const(None)
|
||||
} else {
|
||||
SsaOp::Assign(mapped)
|
||||
}
|
||||
}
|
||||
};
|
||||
per_pos.push(slot_op);
|
||||
slot_scoped_mask.push(is_slot_scoped);
|
||||
}
|
||||
if bail {
|
||||
None
|
||||
} else {
|
||||
Some((per_pos, slot_scoped_mask))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Clone op for potential extra_defines before moving into SsaInst.
|
||||
// For the destructure-promise / bare-array rewrites, the
|
||||
// per-extra ops are built explicitly below, so the shared clone
|
||||
// path is bypassed.
|
||||
let primary_op_for_extras = if info.taint.extra_defines.is_empty()
|
||||
|| promise_destruct_args.is_some()
|
||||
|| bare_array_ops.is_some()
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(op.clone())
|
||||
};
|
||||
|
||||
// Override primary op to single-operand Assign when the
|
||||
// destructure-promise rewrite fires. The primary's source-order
|
||||
// index is `binding_indices[0]` — non-zero for skip-leading
|
||||
// patterns like `const [, b]` where `b` is the FIRST (and only)
|
||||
// binding but lives at pattern position 1.
|
||||
let primary_op = if let Some(ref args) = promise_destruct_args {
|
||||
let primary_idx = binding_indices.first().copied().unwrap_or(0);
|
||||
let pick = args.get(primary_idx).copied().unwrap_or(args[0]);
|
||||
SsaOp::Assign(SmallVec::from_elem(pick, 1))
|
||||
} else if let Some((ref per_pos, ref slot_scoped_mask)) = bare_array_ops {
|
||||
let primary_idx = binding_indices.first().copied().unwrap_or(0);
|
||||
if slot_scoped_mask.get(primary_idx).copied().unwrap_or(false) {
|
||||
slot_scoped_assigns.insert(v);
|
||||
}
|
||||
per_pos
|
||||
.get(primary_idx)
|
||||
.cloned()
|
||||
.unwrap_or(SsaOp::Const(None))
|
||||
} else {
|
||||
op
|
||||
};
|
||||
|
||||
ssa_blocks[block_idx].body.push(SsaInst {
|
||||
value: v,
|
||||
op,
|
||||
op: primary_op,
|
||||
cfg_node: node,
|
||||
var_name: var_name_for_ssa.clone(),
|
||||
span: info.ast.span,
|
||||
|
|
@ -1423,7 +1748,66 @@ fn rename_variables(
|
|||
|
||||
// Emit extra SSA instructions for destructuring bindings.
|
||||
// Each extra define inherits the same op (Source/Call/Assign) as the primary.
|
||||
if let Some(ref primary_op) = primary_op_for_extras {
|
||||
//
|
||||
// For the destructure-promise rewrite, each extra emits an Assign
|
||||
// on its corresponding indexed argument so per-element taint is
|
||||
// preserved instead of the scalar union. The source-order index
|
||||
// for `extra_defines[i]` is `binding_indices[i + 1]` — accounts
|
||||
// for skip slots like `const [a, , b]` where `b` sits at index 2,
|
||||
// not at index 1.
|
||||
if let Some(ref pd_args) = promise_destruct_args {
|
||||
for (i, extra_def) in info.taint.extra_defines.iter().enumerate() {
|
||||
let ev = SsaValue(*next_value);
|
||||
*next_value += 1;
|
||||
value_defs.push(ValueDef {
|
||||
var_name: Some(extra_def.clone()),
|
||||
cfg_node: node,
|
||||
block: block_id,
|
||||
});
|
||||
var_stacks.entry(extra_def.clone()).or_default().push(ev);
|
||||
let extra_idx = binding_indices.get(i + 1).copied().unwrap_or(i + 1);
|
||||
let arg = pd_args.get(extra_idx).copied().unwrap_or(pd_args[0]);
|
||||
ssa_blocks[block_idx].body.push(SsaInst {
|
||||
value: ev,
|
||||
op: SsaOp::Assign(SmallVec::from_elem(arg, 1)),
|
||||
cfg_node: node,
|
||||
var_name: Some(extra_def.clone()),
|
||||
span: info.ast.span,
|
||||
});
|
||||
}
|
||||
} else if let Some((ref per_pos, ref slot_scoped_mask)) = bare_array_ops {
|
||||
// Bare-array RHS destructure: each extra emits the op for its
|
||||
// source-order RHS position. Ident slots emit Assign of the
|
||||
// ident's reaching SSA value; literal slots emit Const(None).
|
||||
// Slot-scoped Assigns are registered in
|
||||
// `slot_scoped_assigns` so the taint transfer skips
|
||||
// outer-node Source pickup for those bindings.
|
||||
for (i, extra_def) in info.taint.extra_defines.iter().enumerate() {
|
||||
let ev = SsaValue(*next_value);
|
||||
*next_value += 1;
|
||||
value_defs.push(ValueDef {
|
||||
var_name: Some(extra_def.clone()),
|
||||
cfg_node: node,
|
||||
block: block_id,
|
||||
});
|
||||
var_stacks.entry(extra_def.clone()).or_default().push(ev);
|
||||
let extra_idx = binding_indices.get(i + 1).copied().unwrap_or(i + 1);
|
||||
let op_for_extra = per_pos
|
||||
.get(extra_idx)
|
||||
.cloned()
|
||||
.unwrap_or(SsaOp::Const(None));
|
||||
if slot_scoped_mask.get(extra_idx).copied().unwrap_or(false) {
|
||||
slot_scoped_assigns.insert(ev);
|
||||
}
|
||||
ssa_blocks[block_idx].body.push(SsaInst {
|
||||
value: ev,
|
||||
op: op_for_extra,
|
||||
cfg_node: node,
|
||||
var_name: Some(extra_def.clone()),
|
||||
span: info.ast.span,
|
||||
});
|
||||
}
|
||||
} else if let Some(ref primary_op) = primary_op_for_extras {
|
||||
for extra_def in &info.taint.extra_defines {
|
||||
let ev = SsaValue(*next_value);
|
||||
*next_value += 1;
|
||||
|
|
@ -1685,6 +2069,7 @@ fn rename_variables(
|
|||
nop_nodes,
|
||||
field_interner,
|
||||
field_writes,
|
||||
slot_scoped_assigns,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1802,6 +2187,7 @@ fn rename_variables(
|
|||
nop_nodes,
|
||||
&mut field_interner,
|
||||
&mut field_writes,
|
||||
&mut slot_scoped_assigns,
|
||||
);
|
||||
|
||||
// Process orphan blocks (e.g. catch blocks disconnected after exception edge removal).
|
||||
|
|
@ -1843,6 +2229,7 @@ fn rename_variables(
|
|||
nop_nodes,
|
||||
&mut field_interner,
|
||||
&mut field_writes,
|
||||
&mut slot_scoped_assigns,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1855,6 +2242,7 @@ fn rename_variables(
|
|||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals,
|
||||
slot_scoped_assigns,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -419,6 +419,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -442,6 +442,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let cfg: Cfg = Graph::new();
|
||||
let const_values = HashMap::new();
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -298,6 +298,16 @@ pub fn build_resource_method_summaries(
|
|||
) {
|
||||
continue;
|
||||
}
|
||||
// Skip acquires whose lifetime is bounded by a managed cleanup
|
||||
// scope (Python `with`, Java try-with-resources, Ruby
|
||||
// File.open-with-block, Rust RAII). The acquired handle is
|
||||
// released before the method returns, so propagating an
|
||||
// Acquire effect onto the caller's receiver creates an FP
|
||||
// class where callers of `def foo(self): with open(...): ...`
|
||||
// are flagged as leaking the receiver.
|
||||
if info.managed_resource {
|
||||
continue;
|
||||
}
|
||||
let callee = match &info.call.callee {
|
||||
Some(c) => c.to_ascii_lowercase(),
|
||||
None => continue,
|
||||
|
|
@ -308,6 +318,20 @@ pub fn build_resource_method_summaries(
|
|||
.iter()
|
||||
.any(|a| transfer::callee_matches_pub(&callee, a))
|
||||
{
|
||||
// The receiver-proxy mechanism (state/transfer.rs)
|
||||
// matches a method-name summary against `recv.method()`
|
||||
// call sites and marks the receiver as OPEN. This is
|
||||
// only meaningful when the acquire actually binds a
|
||||
// resource into receiver state (`self.fd = open(...)`,
|
||||
// `this.fd = fs.openSync(...)`). Acquires with no
|
||||
// binding (`return open(...)`) or with a local-only
|
||||
// binding (`f = open(...); f.close()`) do not transfer
|
||||
// ownership onto the caller's receiver. Gate the
|
||||
// summary on a defines field so anonymous and local-
|
||||
// only acquires no longer leak through this path.
|
||||
if info.taint.defines.is_none() {
|
||||
continue;
|
||||
}
|
||||
summaries.push(transfer::ResourceMethodSummary {
|
||||
method_name: method_name.clone(),
|
||||
effect: transfer::ResourceEffect::Acquire,
|
||||
|
|
|
|||
|
|
@ -33,6 +33,20 @@ use std::hash::{Hash, Hasher};
|
|||
/// Pairs a [`Cap`] with the source location of the consuming
|
||||
/// instruction so cross-file findings can attribute to the callee
|
||||
/// rather than the caller call-site.
|
||||
///
|
||||
/// `from_chain` distinguishes two flavours of recorded site:
|
||||
/// * `false`, the site was resolved via the body-local locator span,
|
||||
/// i.e. it points at a sink instruction in the function's own body.
|
||||
/// * `true`, the site was promoted from a deeper callee through
|
||||
/// `event.primary_sink_site`, i.e. this function's summary carries
|
||||
/// a chain-hop marker for a sink several frames down.
|
||||
///
|
||||
/// Pass-2 emission gates promotion of a site into `Finding.primary_location`
|
||||
/// on `from_chain || file_rel != caller_file_rel`: same-file single-hop
|
||||
/// helpers keep call-site emission (matching benchmark and real-world
|
||||
/// fixture calibration), multi-hop chains and cross-file callees surface
|
||||
/// the deep sink line. See "Multi-hop intra-file sink attribution gap"
|
||||
/// in deferred.md for the design tradeoff.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub struct SinkSite {
|
||||
#[serde(default, skip_serializing_if = "String::is_empty")]
|
||||
|
|
@ -44,11 +58,18 @@ pub struct SinkSite {
|
|||
#[serde(default, skip_serializing_if = "String::is_empty")]
|
||||
pub snippet: String,
|
||||
pub cap: Cap,
|
||||
/// True when this site was promoted from a deeper callee's summary
|
||||
/// (`event.primary_sink_site` chain-hop), false when recorded from
|
||||
/// the function's own locator span. See struct docs.
|
||||
#[serde(default, skip_serializing_if = "is_false")]
|
||||
pub from_chain: bool,
|
||||
}
|
||||
|
||||
impl SinkSite {
|
||||
/// Dedup key: two sites with the same `(file_rel, line, col, cap)`
|
||||
/// describe the same consumption and collapse on merge.
|
||||
/// describe the same consumption and collapse on merge. `from_chain`
|
||||
/// is intentionally excluded, the upgrade rule in [`union_sink_sites`]
|
||||
/// takes over when two sites with different `from_chain` collide.
|
||||
pub(crate) fn dedup_key(&self) -> (&str, u32, u32, u32) {
|
||||
(self.file_rel.as_str(), self.line, self.col, self.cap.bits())
|
||||
}
|
||||
|
|
@ -62,10 +83,15 @@ impl SinkSite {
|
|||
col: 0,
|
||||
snippet: String::new(),
|
||||
cap,
|
||||
from_chain: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_false(b: &bool) -> bool {
|
||||
!*b
|
||||
}
|
||||
|
||||
/// Tree/bytes context for resolving a CFG span to a [`SinkSite`].
|
||||
/// Threaded as `Option<&Locator>` so extraction paths without tree
|
||||
/// access can pass `None` cheaply.
|
||||
|
|
@ -93,6 +119,7 @@ impl<'a> SinkSiteLocator<'a> {
|
|||
col: (point.column + 1) as u32,
|
||||
snippet,
|
||||
cap,
|
||||
from_chain: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -101,11 +128,17 @@ pub(crate) use crate::utils::snippet::line_snippet;
|
|||
|
||||
/// Union two `SmallVec<[SinkSite; 1]>` lists with `(file_rel, line, col,
|
||||
/// cap)` dedup. Preserves insertion order of `existing` then appends any
|
||||
/// new sites from `incoming` not already present.
|
||||
/// new sites from `incoming` not already present. When two sites with the
|
||||
/// same dedup key collide, `from_chain=true` wins, so a chain-hop marker is
|
||||
/// never lost when a same-file locator span happens to share coordinates.
|
||||
pub(crate) fn union_sink_sites(existing: &mut SmallVec<[SinkSite; 1]>, incoming: &[SinkSite]) {
|
||||
for site in incoming {
|
||||
let key = site.dedup_key();
|
||||
if !existing.iter().any(|s| s.dedup_key() == key) {
|
||||
if let Some(ex) = existing.iter_mut().find(|s| s.dedup_key() == key) {
|
||||
if site.from_chain && !ex.from_chain {
|
||||
ex.from_chain = true;
|
||||
}
|
||||
} else {
|
||||
existing.push(site.clone());
|
||||
}
|
||||
}
|
||||
|
|
@ -388,6 +421,16 @@ pub struct FuncSummary {
|
|||
/// [`crate::callgraph::TypeHierarchyIndex`].
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub hierarchy_edges: Vec<(String, String)>,
|
||||
|
||||
/// Phase-10 Next.js entry-point classification. When `Some(_)`,
|
||||
/// the function is treated as an externally-driven entry point
|
||||
/// whose parameters are seeded as `TaintOrigin::Source` at SSA
|
||||
/// entry, mirroring the way an HTTP request handler's formals are
|
||||
/// adversary-controlled by default. `None` for ordinary
|
||||
/// helpers — pass-2 keeps its existing baseline-subtraction
|
||||
/// semantics.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub entry_kind: Option<crate::entry_points::EntryKind>,
|
||||
}
|
||||
|
||||
// ── Cap conversion helpers ──────────────────────────────────────────────
|
||||
|
|
@ -428,6 +471,35 @@ impl FuncSummary {
|
|||
kind: self.kind,
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase-04 [`FuncKey`] builder that consults a project-wide
|
||||
/// [`crate::resolve::ModuleGraph`].
|
||||
///
|
||||
/// When the file producing this summary lies inside a discovered
|
||||
/// package, `namespace` becomes `"@scope/name::src/file.ts"`;
|
||||
/// otherwise the result matches [`Self::func_key`] exactly.
|
||||
/// Phase 04 only adds the helper, no resolution call site uses
|
||||
/// it. Phase 10 switches the JS/TS pass-1 path to call this
|
||||
/// instead of [`Self::func_key`].
|
||||
pub fn func_key_with_resolver(
|
||||
&self,
|
||||
scan_root: Option<&str>,
|
||||
module_graph: Option<&crate::resolve::ModuleGraph>,
|
||||
) -> FuncKey {
|
||||
FuncKey {
|
||||
lang: Lang::from_slug(&self.lang).unwrap_or(Lang::Rust),
|
||||
namespace: crate::symbol::namespace_with_package(
|
||||
&self.file_path,
|
||||
scan_root,
|
||||
module_graph,
|
||||
),
|
||||
container: self.container.clone(),
|
||||
name: self.name.clone(),
|
||||
arity: Some(self.param_count),
|
||||
disambig: self.disambig,
|
||||
kind: self.kind,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Callee resolution ────────────────────────────────────────────────────
|
||||
|
|
@ -543,6 +615,26 @@ pub struct GlobalSummaries {
|
|||
/// Precise SSA-derived per-parameter summaries, keyed by `FuncKey`.
|
||||
/// These take precedence over `FuncSummary` during callee resolution.
|
||||
ssa_by_key: HashMap<FuncKey, SsaFuncSummary>,
|
||||
/// Sibling index over [`Self::ssa_by_key`] keyed by
|
||||
/// `(lang, namespace, name)`. Populated in lockstep with `ssa_by_key`
|
||||
/// (every `insert_ssa` / `merge` adds the key). Used by the
|
||||
/// cross-package SSA resolution path (step 0.7 in
|
||||
/// `taint::ssa_transfer::resolve_callee`) to avoid an
|
||||
/// `O(|ssa_by_key|)` linear scan per cross-package call site:
|
||||
/// the resolver looks up the candidate `Vec<FuncKey>` and narrows
|
||||
/// to a single hit by container / arity / disambig. Strictly
|
||||
/// additive: when the index is empty (e.g. tests that never insert
|
||||
/// SSA summaries) the resolver falls back to its existing flat
|
||||
/// paths.
|
||||
///
|
||||
/// Note: SSA summaries are append-only on `GlobalSummaries` (no
|
||||
/// remove/clear methods), so the index never needs invalidation.
|
||||
/// Synthetic-disambig probing in
|
||||
/// [`Self::reconcile_ssa_summary_key`] only mutates the inserted
|
||||
/// key's `disambig` field, never the `(lang, namespace, name)`
|
||||
/// triple, so the index value still points at every relevant
|
||||
/// `FuncKey` after reconciliation.
|
||||
ssa_by_lang_ns_name: HashMap<(Lang, String, String), Vec<FuncKey>>,
|
||||
/// Cross-file callee bodies for interprocedural symbolic execution.
|
||||
/// Keyed by `FuncKey` (same identity model as SSA summaries).
|
||||
bodies_by_key: HashMap<FuncKey, crate::taint::ssa_transfer::CalleeSsaBody>,
|
||||
|
|
@ -564,6 +656,16 @@ pub struct GlobalSummaries {
|
|||
/// execution-API auth-recognition gap on routes attached to bare
|
||||
/// child routers.
|
||||
router_facts_by_module: HashMap<String, crate::auth_analysis::router_facts::PerFileRouterFacts>,
|
||||
/// Per-file Phase-09 cross-package import maps, keyed by file
|
||||
/// namespace (scan-root-relative path, the same form
|
||||
/// [`FuncKey::namespace`] uses). Populated in pass 1 from each
|
||||
/// file's [`crate::cfg::FileCfg::resolved_imports`] and consumed by
|
||||
/// `inline_analyse_callee` when the inlined callee body's own
|
||||
/// `cross_package_imports` Arc is empty (i.e. the body was loaded
|
||||
/// from SQLite, where the field is `#[serde(skip)]`). Closes the
|
||||
/// indexed-mode parity gap on transitive cross-package IPA inside
|
||||
/// inlined frames.
|
||||
cross_package_imports_by_namespace: HashMap<String, std::sync::Arc<HashMap<String, FuncKey>>>,
|
||||
/// Type hierarchy index for runtime virtual-dispatch fan-out.
|
||||
///
|
||||
/// Installed by [`Self::install_hierarchy`] after pass 1 from the
|
||||
|
|
@ -864,6 +966,7 @@ impl GlobalSummaries {
|
|||
}
|
||||
// SSA summaries: last-writer-wins (exact-key replacement, no unioning)
|
||||
for (key, ssa_sum) in other.ssa_by_key {
|
||||
self.index_ssa_key(&key);
|
||||
self.ssa_by_key.insert(key, ssa_sum);
|
||||
}
|
||||
// Cross-file bodies: last-writer-wins
|
||||
|
|
@ -879,6 +982,10 @@ impl GlobalSummaries {
|
|||
for (module_id, facts) in other.router_facts_by_module {
|
||||
self.router_facts_by_module.insert(module_id, facts);
|
||||
}
|
||||
// Cross-package imports: last-writer-wins per namespace.
|
||||
for (ns, map) in other.cross_package_imports_by_namespace {
|
||||
self.cross_package_imports_by_namespace.insert(ns, map);
|
||||
}
|
||||
// Hierarchy index: invalidate after a merge so the next consumer
|
||||
// sees a freshly-built view that includes `other`'s edges. The
|
||||
// alternative, point-merging two indexes, is racy when the
|
||||
|
|
@ -966,9 +1073,41 @@ impl GlobalSummaries {
|
|||
} else {
|
||||
self.reconcile_ssa_summary_key(key, &summary)
|
||||
};
|
||||
self.index_ssa_key(&key);
|
||||
self.ssa_by_key.insert(key, summary);
|
||||
}
|
||||
|
||||
/// Push `key` onto the secondary `(lang, namespace, name)` index.
|
||||
/// Idempotent: a re-insert at the same triple does not duplicate
|
||||
/// the key in the candidate vector.
|
||||
fn index_ssa_key(&mut self, key: &FuncKey) {
|
||||
let triple = (key.lang, key.namespace.clone(), key.name.clone());
|
||||
let bucket = self.ssa_by_lang_ns_name.entry(triple).or_default();
|
||||
if !bucket.contains(key) {
|
||||
bucket.push(key.clone());
|
||||
}
|
||||
}
|
||||
|
||||
/// Look up SSA summary `FuncKey`s by `(lang, namespace, name)`.
|
||||
/// Returns `&[]` when no SSA summary at that triple has been
|
||||
/// stored. Used by the cross-package resolution path so the
|
||||
/// step-0.7 narrowing can iterate only the candidate set rather
|
||||
/// than every persisted SSA key.
|
||||
pub fn ssa_keys_by_qualified(&self, lang: Lang, namespace: &str, name: &str) -> &[FuncKey] {
|
||||
// Borrow against (Lang, &str, &str) avoiding allocation by
|
||||
// looking up with a tuple of owned Strings only when present.
|
||||
// HashMap requires equivalent hash; (Lang, String, String)
|
||||
// hashes the same as the equivalent tuple of equivalent
|
||||
// values, so we construct a small owned key for the probe.
|
||||
// Profile-light: this runs once per cross-package callee and
|
||||
// both string clones are short (namespace path + leaf name).
|
||||
let probe = (lang, namespace.to_string(), name.to_string());
|
||||
self.ssa_by_lang_ns_name
|
||||
.get(&probe)
|
||||
.map(|v| v.as_slice())
|
||||
.unwrap_or(&[])
|
||||
}
|
||||
|
||||
/// Exact lookup of an SSA summary by fully-qualified key.
|
||||
pub fn get_ssa(&self, key: &FuncKey) -> Option<&SsaFuncSummary> {
|
||||
self.ssa_by_key.get(key)
|
||||
|
|
@ -1088,6 +1227,38 @@ impl GlobalSummaries {
|
|||
self.router_facts_by_module.len()
|
||||
}
|
||||
|
||||
/// Insert a per-file Phase-09 cross-package import map. Last-writer-wins
|
||||
/// per namespace key — re-analysing a file produces a fresh snapshot
|
||||
/// of its `(local_name → FuncKey)` resolutions.
|
||||
pub fn insert_cross_package_imports(
|
||||
&mut self,
|
||||
namespace: String,
|
||||
map: std::sync::Arc<HashMap<String, FuncKey>>,
|
||||
) {
|
||||
if map.is_empty() {
|
||||
return;
|
||||
}
|
||||
self.cross_package_imports_by_namespace
|
||||
.insert(namespace, map);
|
||||
}
|
||||
|
||||
/// Look up a per-file cross-package import map by file namespace.
|
||||
/// Used by [`crate::taint::ssa_transfer`]'s inline-analysis frame to
|
||||
/// recover the callee body's own import view when the body was loaded
|
||||
/// from SQLite (where the Arc on `CalleeSsaBody` is stripped by
|
||||
/// `#[serde(skip)]`).
|
||||
pub fn get_cross_package_imports(
|
||||
&self,
|
||||
namespace: &str,
|
||||
) -> Option<&std::sync::Arc<HashMap<String, FuncKey>>> {
|
||||
self.cross_package_imports_by_namespace.get(namespace)
|
||||
}
|
||||
|
||||
/// Count of files that contributed cross-package import maps.
|
||||
pub fn cross_package_imports_len(&self) -> usize {
|
||||
self.cross_package_imports_by_namespace.len()
|
||||
}
|
||||
|
||||
/// Insert a cross-file callee body.
|
||||
///
|
||||
/// See [`insert_ssa`](Self::insert_ssa) for the identity-safety rule.
|
||||
|
|
@ -1149,8 +1320,10 @@ impl GlobalSummaries {
|
|||
pub fn is_empty(&self) -> bool {
|
||||
self.by_key.is_empty()
|
||||
&& self.ssa_by_key.is_empty()
|
||||
&& self.ssa_by_lang_ns_name.is_empty()
|
||||
&& self.auth_by_key.is_empty()
|
||||
&& self.router_facts_by_module.is_empty()
|
||||
&& self.cross_package_imports_by_namespace.is_empty()
|
||||
}
|
||||
|
||||
/// Iterate over all (key, summary) pairs.
|
||||
|
|
@ -1683,6 +1856,10 @@ impl std::fmt::Debug for GlobalSummaries {
|
|||
.field("bodies_len", &self.bodies_by_key.len())
|
||||
.field("auth_len", &self.auth_by_key.len())
|
||||
.field("router_facts_len", &self.router_facts_by_module.len())
|
||||
.field(
|
||||
"cross_package_imports_len",
|
||||
&self.cross_package_imports_by_namespace.len(),
|
||||
)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -347,6 +347,14 @@ pub struct SsaFuncSummary {
|
|||
/// on both vulnerable and patched code.
|
||||
#[serde(default, skip_serializing_if = "SmallVec::is_empty")]
|
||||
pub validated_params_to_return: SmallVec<[usize; 2]>,
|
||||
|
||||
/// Phase-10 Next.js entry-point classification. Mirrors
|
||||
/// [`crate::summary::FuncSummary::entry_kind`] — recorded on the
|
||||
/// SSA summary so cross-file consumers don't have to consult the
|
||||
/// coarse `FuncSummary` to know whether the callee is an entry
|
||||
/// point. `None` for ordinary helpers.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub entry_kind: Option<crate::entry_points::EntryKind>,
|
||||
}
|
||||
|
||||
/// A per-return-path [`PathFact`] entry.
|
||||
|
|
|
|||
|
|
@ -530,6 +530,7 @@ fn ssa_summary_serde_round_trip_identity() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -564,6 +565,7 @@ fn ssa_summary_serde_round_trip_strip_bits() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -595,6 +597,7 @@ fn ssa_summary_serde_round_trip_add_bits() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -633,6 +636,7 @@ fn ssa_summary_serde_round_trip_all_variants() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -673,6 +677,7 @@ fn global_summaries_insert_ssa_exact_key_replacement() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
gs.insert_ssa(key.clone(), v1.clone());
|
||||
assert_eq!(gs.get_ssa(&key), Some(&v1));
|
||||
|
|
@ -701,6 +706,7 @@ fn global_summaries_insert_ssa_exact_key_replacement() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
gs.insert_ssa(key.clone(), v2.clone());
|
||||
assert_eq!(gs.get_ssa(&key), Some(&v2));
|
||||
|
|
@ -749,6 +755,7 @@ fn global_summaries_merge_with_ssa_entries() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
let sum_b = SsaFuncSummary {
|
||||
param_to_return: vec![],
|
||||
|
|
@ -773,6 +780,7 @@ fn global_summaries_merge_with_ssa_entries() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
|
||||
gs1.insert_ssa(key_a.clone(), sum_a.clone());
|
||||
|
|
@ -821,6 +829,7 @@ fn global_summaries_is_empty_considers_ssa() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -852,6 +861,7 @@ fn ssa_summary_serde_round_trip_param_to_sink_param() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -898,6 +908,7 @@ fn ssa_summary_serde_round_trip_container_fields() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -954,6 +965,7 @@ fn ssa_summary_serde_round_trip_return_abstract() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -1029,6 +1041,7 @@ fn make_callee_body(
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
},
|
||||
opt: crate::ssa::OptimizeResult {
|
||||
const_values: std::collections::HashMap::new(),
|
||||
|
|
@ -1047,6 +1060,7 @@ fn make_callee_body(
|
|||
param_count,
|
||||
node_meta: std::collections::HashMap::new(),
|
||||
body_graph: None,
|
||||
cross_package_imports: std::sync::Arc::new(std::collections::HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1478,6 +1492,7 @@ fn global_summaries_resolve_body_requires_body_present() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
// Don't insert body
|
||||
|
|
@ -3415,6 +3430,7 @@ fn sink_site_serde_round_trip_solo() {
|
|||
col: 9,
|
||||
snippet: "Command::new(\"sh\").arg(cmd).status()".into(),
|
||||
cap: Cap::CODE_EXEC | Cap::SHELL_ESCAPE,
|
||||
from_chain: false,
|
||||
};
|
||||
let json = serde_json::to_string(&site).unwrap();
|
||||
let back: SinkSite = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -3446,6 +3462,7 @@ fn ssa_summary_serde_round_trip_with_sink_sites() {
|
|||
col: 4,
|
||||
snippet: "cursor.execute(sql)".into(),
|
||||
cap: Cap::SQL_QUERY,
|
||||
from_chain: false,
|
||||
};
|
||||
let site_b = SinkSite {
|
||||
file_rel: "exec.py".into(),
|
||||
|
|
@ -3453,6 +3470,7 @@ fn ssa_summary_serde_round_trip_with_sink_sites() {
|
|||
col: 12,
|
||||
snippet: "subprocess.call(cmd, shell=True)".into(),
|
||||
cap: Cap::CODE_EXEC | Cap::SHELL_ESCAPE,
|
||||
from_chain: false,
|
||||
};
|
||||
let summary = SsaFuncSummary {
|
||||
param_to_return: vec![(0, TaintTransform::Identity)],
|
||||
|
|
@ -3526,6 +3544,7 @@ fn merge_unions_sink_sites_with_dedup() {
|
|||
col: 1,
|
||||
snippet: "execute(sql)".into(),
|
||||
cap: Cap::SQL_QUERY,
|
||||
from_chain: false,
|
||||
};
|
||||
let site_b = SinkSite {
|
||||
file_rel: "svc.py".into(),
|
||||
|
|
@ -3533,6 +3552,7 @@ fn merge_unions_sink_sites_with_dedup() {
|
|||
col: 4,
|
||||
snippet: "os.system(cmd)".into(),
|
||||
cap: Cap::CODE_EXEC,
|
||||
from_chain: false,
|
||||
};
|
||||
|
||||
let mut left = FuncSummary {
|
||||
|
|
@ -3623,6 +3643,7 @@ fn cf4_return_path_transform_serde_round_trip() {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -4459,3 +4480,95 @@ mod hierarchy_widened_tests {
|
|||
assert!(post_merge_reinstalled.contains(&k_sub));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cross_package_imports_round_trip_via_global_summaries() {
|
||||
use crate::symbol::{FuncKey, FuncKind, Lang};
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let mut map: std::collections::HashMap<String, FuncKey> = std::collections::HashMap::new();
|
||||
map.insert(
|
||||
"escape".to_string(),
|
||||
FuncKey {
|
||||
lang: Lang::TypeScript,
|
||||
namespace: "packages/util/src/escape.ts".to_string(),
|
||||
container: String::new(),
|
||||
name: "escape".to_string(),
|
||||
arity: None,
|
||||
disambig: None,
|
||||
kind: FuncKind::Function,
|
||||
},
|
||||
);
|
||||
let arc = std::sync::Arc::new(map);
|
||||
gs.insert_cross_package_imports("apps/api/handler.ts".to_string(), arc.clone());
|
||||
|
||||
assert_eq!(gs.cross_package_imports_len(), 1);
|
||||
let looked_up = gs
|
||||
.get_cross_package_imports("apps/api/handler.ts")
|
||||
.expect("namespace lookup must hit");
|
||||
assert_eq!(looked_up.len(), 1);
|
||||
assert!(looked_up.contains_key("escape"));
|
||||
assert!(gs.get_cross_package_imports("missing").is_none());
|
||||
|
||||
// Inserting an empty map is a no-op so the index does not get
|
||||
// polluted with bookkeeping rows when a file's resolver produces
|
||||
// no resolved bindings.
|
||||
gs.insert_cross_package_imports(
|
||||
"apps/api/no_imports.ts".to_string(),
|
||||
std::sync::Arc::new(std::collections::HashMap::new()),
|
||||
);
|
||||
assert_eq!(gs.cross_package_imports_len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cross_package_imports_merged_across_thread_local_summaries() {
|
||||
use crate::symbol::{FuncKey, FuncKind, Lang};
|
||||
|
||||
let mut gs_a = GlobalSummaries::new();
|
||||
let mut map_a: std::collections::HashMap<String, FuncKey> = std::collections::HashMap::new();
|
||||
map_a.insert(
|
||||
"escape".to_string(),
|
||||
FuncKey {
|
||||
lang: Lang::TypeScript,
|
||||
namespace: "packages/util/src/escape.ts".to_string(),
|
||||
container: String::new(),
|
||||
name: "escape".to_string(),
|
||||
arity: None,
|
||||
disambig: None,
|
||||
kind: FuncKind::Function,
|
||||
},
|
||||
);
|
||||
gs_a.insert_cross_package_imports(
|
||||
"apps/api/handler_a.ts".to_string(),
|
||||
std::sync::Arc::new(map_a),
|
||||
);
|
||||
|
||||
let mut gs_b = GlobalSummaries::new();
|
||||
let mut map_b: std::collections::HashMap<String, FuncKey> = std::collections::HashMap::new();
|
||||
map_b.insert(
|
||||
"format".to_string(),
|
||||
FuncKey {
|
||||
lang: Lang::TypeScript,
|
||||
namespace: "packages/util/src/format.ts".to_string(),
|
||||
container: String::new(),
|
||||
name: "format".to_string(),
|
||||
arity: None,
|
||||
disambig: None,
|
||||
kind: FuncKind::Function,
|
||||
},
|
||||
);
|
||||
gs_b.insert_cross_package_imports(
|
||||
"apps/api/handler_b.ts".to_string(),
|
||||
std::sync::Arc::new(map_b),
|
||||
);
|
||||
|
||||
gs_a.merge(gs_b);
|
||||
assert_eq!(gs_a.cross_package_imports_len(), 2);
|
||||
assert!(
|
||||
gs_a.get_cross_package_imports("apps/api/handler_a.ts")
|
||||
.is_some()
|
||||
);
|
||||
assert!(
|
||||
gs_a.get_cross_package_imports("apps/api/handler_b.ts")
|
||||
.is_some()
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -262,5 +262,31 @@ pub fn normalize_namespace(abs_path: &str, root: Option<&str>) -> String {
|
|||
abs_path.to_string()
|
||||
}
|
||||
|
||||
/// Phase-04 namespace builder that prefixes a project-relative path with
|
||||
/// the canonical package name when the importer file lies inside a
|
||||
/// resolved [`crate::resolve::PackageEntry`].
|
||||
///
|
||||
/// Returns `"@scope/name::src/file.ts"` when the file is in a package
|
||||
/// and `"src/file.ts"` (the same value `normalize_namespace` produces)
|
||||
/// otherwise. Phase 04 ships this helper unused at the resolution
|
||||
/// site, phase 10 will route [`FuncKey`] construction through it for
|
||||
/// JS/TS files so cross-file callee lookup honours the package
|
||||
/// boundary.
|
||||
pub fn namespace_with_package(
|
||||
abs_path: &str,
|
||||
root: Option<&str>,
|
||||
module_graph: Option<&crate::resolve::ModuleGraph>,
|
||||
) -> String {
|
||||
let plain = normalize_namespace(abs_path, root);
|
||||
let Some(graph) = module_graph else {
|
||||
return plain;
|
||||
};
|
||||
let path = std::path::Path::new(abs_path);
|
||||
match graph.package_for(path) {
|
||||
Some(pkg) => format!("{}::{}", pkg.name, plain),
|
||||
None => plain,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
|
|
|||
|
|
@ -1384,6 +1384,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let empty_succs = HashMap::new();
|
||||
|
|
@ -1445,6 +1446,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let empty_succs = HashMap::new();
|
||||
|
|
@ -1579,6 +1581,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = make_finding(n0, n1);
|
||||
|
|
@ -1688,6 +1691,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
// Finding path goes through B0 → B1 → B3
|
||||
|
|
@ -1836,6 +1840,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
@ -1950,6 +1955,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let mut exc_succs: HashMap<BlockId, SmallVec<[BlockId; 2]>> = HashMap::new();
|
||||
|
|
@ -2018,6 +2024,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let mut exc_succs: HashMap<BlockId, SmallVec<[BlockId; 2]>> = HashMap::new();
|
||||
|
|
@ -2127,6 +2134,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
|
|||
|
|
@ -391,6 +391,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -438,6 +439,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -521,6 +523,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -585,6 +588,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -667,6 +671,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -740,6 +745,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -776,6 +782,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -834,6 +841,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -916,6 +924,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -996,6 +1005,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -1033,6 +1043,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
|
|||
|
|
@ -381,6 +381,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
@ -456,6 +457,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
@ -560,6 +562,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let ctx = SymexContext {
|
||||
|
|
@ -622,6 +625,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let ctx = SymexContext {
|
||||
|
|
|
|||
|
|
@ -355,6 +355,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let witness = state.get_sink_witness(&finding, &ssa);
|
||||
|
|
@ -397,6 +398,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
assert_eq!(state.get_sink_witness(&finding, &ssa), None);
|
||||
|
|
@ -436,6 +438,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
assert_eq!(state.get_sink_witness(&finding, &ssa), None);
|
||||
|
|
@ -478,6 +481,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
state.widen_at_loop_head(BlockId(0), &ssa);
|
||||
|
|
@ -523,6 +527,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
state.widen_at_loop_head(BlockId(0), &ssa);
|
||||
|
|
@ -568,6 +573,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
state.widen_at_loop_head(BlockId(0), &ssa);
|
||||
|
|
|
|||
|
|
@ -1014,6 +1014,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1595,6 +1596,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
let ctx = make_summary_ctx(&gs);
|
||||
|
|
@ -1665,6 +1667,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
let ctx = make_summary_ctx(&gs);
|
||||
|
|
@ -1735,6 +1738,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
let ctx = make_summary_ctx(&gs);
|
||||
|
|
@ -1800,6 +1804,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
let ctx = make_summary_ctx(&gs);
|
||||
|
|
@ -1865,6 +1870,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
let ctx = make_summary_ctx(&gs);
|
||||
|
|
@ -2064,6 +2070,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -2144,6 +2151,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -2225,6 +2233,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
// Second "send", in ns B, also with same arity → ambiguous bare-name
|
||||
|
|
@ -2256,6 +2265,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
// Also register the type-qualified name so Attempt 1 can find it
|
||||
|
|
@ -2287,6 +2297,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -2367,6 +2378,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -2449,6 +2461,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
insert_java_summary(
|
||||
|
|
@ -2479,6 +2492,7 @@ mod tests {
|
|||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
entry_kind: None,
|
||||
},
|
||||
);
|
||||
// No "HttpClient.send" summary registered, disambiguation has 0 exact matches
|
||||
|
|
|
|||
|
|
@ -797,6 +797,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
@ -854,6 +855,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let cfg = Cfg::new();
|
||||
let finding = Finding {
|
||||
|
|
@ -917,6 +919,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
@ -981,6 +984,7 @@ mod tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
|
|||
|
|
@ -753,6 +753,7 @@ mod tests {
|
|||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
(ssa, cfg)
|
||||
|
|
@ -843,6 +844,7 @@ mod tests {
|
|||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let demand = DemandState::new(Cap::all());
|
||||
let (step, next) = backward_transfer(&ssa, SsaValue(0), &demand);
|
||||
|
|
@ -876,6 +878,7 @@ mod tests {
|
|||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let demand = DemandState::new(Cap::all());
|
||||
let (step, _next) = backward_transfer(&ssa, SsaValue(0), &demand);
|
||||
|
|
@ -964,6 +967,7 @@ mod tests {
|
|||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let demand = DemandState::new(Cap::all());
|
||||
|
|
@ -1053,6 +1057,7 @@ mod tests {
|
|||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let ctx = BackwardsCtx::new(&ssa, &cfg, Lang::JavaScript);
|
||||
|
|
|
|||
472
src/taint/mod.rs
472
src/taint/mod.rs
|
|
@ -403,6 +403,86 @@ fn compute_module_aliases_for_summary(
|
|||
crate::ssa::const_prop::collect_module_aliases(ssa, &cp.values)
|
||||
}
|
||||
|
||||
/// Build a per-file cross-package import lookup for Phase 09 cross-file IPA.
|
||||
///
|
||||
/// For each [`crate::resolve::ImportBinding`] whose resolver verdict
|
||||
/// produced a concrete `(resolved_file, exported_name)` pair, builds the
|
||||
/// canonical [`FuncKey`] of the imported function in its own file's
|
||||
/// scan-root-relative namespace and stores it under the caller-file's
|
||||
/// local binding name.
|
||||
///
|
||||
/// Returns an empty map when the file has no resolved imports (non-JS/TS
|
||||
/// files, scans without a `ModuleGraph`, side-effect-only imports, or
|
||||
/// builtin/unresolved specifiers). The caller passes `None` to
|
||||
/// `SsaTaintTransfer::cross_package_imports` in that case.
|
||||
///
|
||||
/// `module_graph` aligns the target [`FuncKey::namespace`] with the
|
||||
/// package-prefixed form that `FuncSummary::func_key_with_resolver`
|
||||
/// produces on the cross-file storage side: when the resolved file lies
|
||||
/// inside a discovered package the namespace becomes
|
||||
/// `"@scope/name::src/file.ts"`, otherwise it falls back to plain
|
||||
/// `normalize_namespace`. Step 0.7 of `resolve_callee_full` looks up
|
||||
/// `(lang, namespace, name)` against `GlobalSummaries::ssa_by_key`
|
||||
/// where the SSA-side keys are now produced via the same
|
||||
/// `namespace_with_package` shape (callers in `crate::ast::ParsedFile`
|
||||
/// pre-compute the package-prefixed namespace before invoking
|
||||
/// `lower_all_functions_from_bodies`), so the two sides agree even
|
||||
/// when two packages share a project-relative file path.
|
||||
///
|
||||
/// `module_graph = None` (single-package scans, non-JS/TS files, unit
|
||||
/// tests, indexed-mode SQLite fallback) collapses to the historical
|
||||
/// `normalize_namespace` behaviour, keeping the migration strictly
|
||||
/// additive for any consumer that does not opt in.
|
||||
///
|
||||
/// The constructed key intentionally leaves `container`, `arity`,
|
||||
/// `disambig`, and `kind` at their defaults — the resolver verdict only
|
||||
/// fixes the `(lang, namespace, name)` triple, and step 0.7 of
|
||||
/// `resolve_callee_full` matches against `GlobalSummaries::ssa_by_key`
|
||||
/// using only those three fields plus an arity hint when available.
|
||||
pub fn build_cross_package_func_keys(
|
||||
resolved_imports: &[crate::resolve::ImportBinding],
|
||||
scan_root: Option<&str>,
|
||||
module_graph: Option<&crate::resolve::ModuleGraph>,
|
||||
caller_lang: Lang,
|
||||
) -> HashMap<String, FuncKey> {
|
||||
let mut out: HashMap<String, FuncKey> = HashMap::new();
|
||||
for binding in resolved_imports {
|
||||
let Some(ref resolved_file) = binding.resolved_file else {
|
||||
continue;
|
||||
};
|
||||
let Some(ref exported_name) = binding.exported_name else {
|
||||
continue;
|
||||
};
|
||||
if exported_name.is_empty()
|
||||
|| exported_name == "*"
|
||||
|| exported_name == "default"
|
||||
|| binding.local_name.is_empty()
|
||||
{
|
||||
// Side-effect / namespace / default imports do not map to a
|
||||
// single named export; step 0.7 needs a concrete leaf name.
|
||||
continue;
|
||||
}
|
||||
let target_lang = resolved_file
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.and_then(Lang::from_extension)
|
||||
.unwrap_or(caller_lang);
|
||||
let abs = resolved_file.to_string_lossy();
|
||||
let namespace = crate::symbol::namespace_with_package(&abs, scan_root, module_graph);
|
||||
let key = FuncKey {
|
||||
lang: target_lang,
|
||||
namespace,
|
||||
container: String::new(),
|
||||
name: exported_name.clone(),
|
||||
arity: None,
|
||||
disambig: None,
|
||||
kind: FuncKind::Function,
|
||||
};
|
||||
out.insert(binding.local_name.clone(), key);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Run taint analysis on all bodies in a file.
|
||||
///
|
||||
/// Uses a unified multi-body analysis for all languages:
|
||||
|
|
@ -432,25 +512,32 @@ pub fn analyse_file(
|
|||
ssa_transfer::reset_all_validated_spans();
|
||||
// No locator: pass-2 intra-file summaries are transient (not persisted)
|
||||
// and behavior depends on SinkSite.cap only, which is always populated.
|
||||
let (ssa_summaries, callee_bodies) = lower_all_functions_from_bodies(
|
||||
file_cfg,
|
||||
caller_lang,
|
||||
caller_namespace,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
None,
|
||||
);
|
||||
analyse_file_with_lowered(
|
||||
file_cfg,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
caller_lang,
|
||||
caller_namespace,
|
||||
interop_edges,
|
||||
extra_labels,
|
||||
&ssa_summaries,
|
||||
&callee_bodies,
|
||||
)
|
||||
crate::ssa::type_facts::with_file_imports(Some(&file_cfg.local_imports), || {
|
||||
crate::cfg::safe_fields::with_safe_lookup_fields(Some(&file_cfg.safe_lookup_fields), || {
|
||||
let (ssa_summaries, callee_bodies) = lower_all_functions_from_bodies(
|
||||
file_cfg,
|
||||
caller_lang,
|
||||
caller_namespace,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
analyse_file_with_lowered(
|
||||
file_cfg,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
caller_lang,
|
||||
caller_namespace,
|
||||
interop_edges,
|
||||
extra_labels,
|
||||
&ssa_summaries,
|
||||
&callee_bodies,
|
||||
None,
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// Same as [`analyse_file`] but takes pre-lowered SSA summaries + callee
|
||||
|
|
@ -459,6 +546,10 @@ pub fn analyse_file(
|
|||
/// the SSA-artifact extractor; the bare [`analyse_file`] entry-point keeps
|
||||
/// its prior signature for any caller that does not have a pre-lowered
|
||||
/// result handy.
|
||||
///
|
||||
/// `cross_package_imports` is the optional Phase-09 lookup map built via
|
||||
/// [`build_cross_package_func_keys`]. `None` (the public-API default)
|
||||
/// disables cross-package step 0.7 in `resolve_callee_full`.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn analyse_file_with_lowered(
|
||||
file_cfg: &FileCfg,
|
||||
|
|
@ -470,9 +561,49 @@ pub(crate) fn analyse_file_with_lowered(
|
|||
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
|
||||
ssa_summaries: &std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
|
||||
callee_bodies: &std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
|
||||
cross_package_imports: Option<&std::collections::HashMap<String, FuncKey>>,
|
||||
) -> Vec<Finding> {
|
||||
let _span = tracing::debug_span!("taint_analyse_file").entered();
|
||||
|
||||
// Publish the per-file local-import view so the ORM TypeKind gate
|
||||
// inside [`crate::ssa::type_facts::constructor_type`] can read it
|
||||
// during downstream `optimize_ssa_with_param_types` passes. The
|
||||
// outer `analyse_file` already wraps this for its own
|
||||
// `lower_all_functions_from_bodies` pre-pass; wrapping here too
|
||||
// keeps direct callers (e.g. [`crate::ast::analyse_file_fused`])
|
||||
// covered. Idempotent under nesting — the inner guard restores
|
||||
// the outer value on drop.
|
||||
crate::ssa::type_facts::with_file_imports(Some(&file_cfg.local_imports), || {
|
||||
crate::cfg::safe_fields::with_safe_lookup_fields(Some(&file_cfg.safe_lookup_fields), || {
|
||||
analyse_file_with_lowered_inner(
|
||||
file_cfg,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
caller_lang,
|
||||
caller_namespace,
|
||||
interop_edges,
|
||||
extra_labels,
|
||||
ssa_summaries,
|
||||
callee_bodies,
|
||||
cross_package_imports,
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn analyse_file_with_lowered_inner(
|
||||
file_cfg: &FileCfg,
|
||||
local_summaries: &FuncSummaries,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
caller_lang: Lang,
|
||||
caller_namespace: &str,
|
||||
interop_edges: &[InteropEdge],
|
||||
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
|
||||
ssa_summaries: &std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
|
||||
callee_bodies: &std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
|
||||
cross_package_imports: Option<&std::collections::HashMap<String, FuncKey>>,
|
||||
) -> Vec<Finding> {
|
||||
// NOTE: the path-safe-suppressed span set is reset by the caller, not
|
||||
// here. Per-parameter probes inside the lowering phase
|
||||
// (`lower_all_functions_from_bodies`) can already publish spans via
|
||||
|
|
@ -551,6 +682,7 @@ pub(crate) fn analyse_file_with_lowered(
|
|||
max_iterations,
|
||||
import_bindings_ref,
|
||||
cross_file_bodies_ref,
|
||||
cross_package_imports,
|
||||
);
|
||||
|
||||
// 4. Deduplicate findings using a richer key that preserves distinct
|
||||
|
|
@ -797,6 +929,34 @@ fn inject_external_type_facts(
|
|||
}
|
||||
}
|
||||
|
||||
/// Apply entry-kind-derived overrides to a body's `param_types` vector.
|
||||
///
|
||||
/// Today only `EntryKind::AppRouteHandler` triggers an override: the first
|
||||
/// formal of a Next.js App Router handler always carries a Web `Request`,
|
||||
/// regardless of the user's TypeScript annotation. Returns `Some(vec)` when
|
||||
/// the override changes the vector, `None` otherwise. Folding the rule into
|
||||
/// one helper keeps the two consumers (`analyse_body_with_seed` and
|
||||
/// `lower_all_functions_from_bodies_inner`) in lockstep.
|
||||
fn entry_kind_param_type_override(
|
||||
entry_kind: Option<&crate::entry_points::EntryKind>,
|
||||
param_types: &[Option<crate::ssa::type_facts::TypeKind>],
|
||||
) -> Option<Vec<Option<crate::ssa::type_facts::TypeKind>>> {
|
||||
if matches!(
|
||||
entry_kind,
|
||||
Some(crate::entry_points::EntryKind::AppRouteHandler { .. })
|
||||
) {
|
||||
let mut pt = param_types.to_vec();
|
||||
if pt.is_empty() {
|
||||
pt.push(Some(crate::ssa::type_facts::TypeKind::Request));
|
||||
} else {
|
||||
pt[0] = Some(crate::ssa::type_facts::TypeKind::Request);
|
||||
}
|
||||
Some(pt)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Analyse a single body with an optional parent seed.
|
||||
///
|
||||
/// Shared logic extracted from `analyse_multi_body` to avoid deep nesting.
|
||||
|
|
@ -818,6 +978,7 @@ fn analyse_body_with_seed(
|
|||
import_bindings: Option<&crate::cfg::ImportBindings>,
|
||||
cross_file_bodies: Option<&std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>>,
|
||||
parent_var_types: Option<&HashMap<String, crate::ssa::type_facts::TypeKind>>,
|
||||
cross_package_imports: Option<&std::collections::HashMap<String, FuncKey>>,
|
||||
) -> (
|
||||
Vec<Finding>,
|
||||
Option<HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>>,
|
||||
|
|
@ -853,10 +1014,156 @@ fn analyse_body_with_seed(
|
|||
// so that `cmd -> Runtime.exec(cmd)` picks up `cmd` as a handler param.
|
||||
let is_java_lambda =
|
||||
lang == Lang::Java && body.meta.kind == crate::cfg::BodyKind::AnonymousFunction;
|
||||
// Java methods tagged with a Spring/JaxRs entry-point annotation need
|
||||
// scoped lowering so the formal parameters (`@RequestParam String name`,
|
||||
// `@PathParam Long id`, ...) materialise as `SsaOp::Param` ops that
|
||||
// the entry-point seeding pass paints as `Source(UserInput)`. Restricted
|
||||
// to Java because (a) JS/TS already use scoped lowering above, (b) Go
|
||||
// and Ruby handlers introduce request-OBJECT formals (`r *http.Request`,
|
||||
// implicit `params`) whose Cap::all() seeding triggers FPs at sinks
|
||||
// that take the bare object (e.g. `http.Redirect(w, r, safe, code)`
|
||||
// where `r` is the request, not the URL), and (c) Python free-name
|
||||
// captures (`request`, `b64decode`) bubble up as synthetic externals
|
||||
// and shift source attribution. Java methods don't have those
|
||||
// free-capture shapes (every reference is via explicit qualification),
|
||||
// so the precision-vs-recall trade lands on the precision side.
|
||||
let is_java_entry_method = lang == Lang::Java
|
||||
&& body.meta.kind == crate::cfg::BodyKind::NamedFunction
|
||||
&& body.meta.func_key.as_ref().is_some_and(|k| {
|
||||
let mut k = k.clone();
|
||||
k.namespace = namespace.to_string();
|
||||
ssa_summaries
|
||||
.and_then(|m| m.get(&k))
|
||||
.is_some_and(|s| s.entry_kind.is_some())
|
||||
});
|
||||
// Rust framework handlers (axum, actix-web, Rocket) need scoped
|
||||
// lowering so the typed-extractor formals (`Query<T>`, `Json<T>`,
|
||||
// `Form<T>`, `Path<T>`) materialise as `SsaOp::Param` ops that the
|
||||
// entry-point seeding pass paints as `Source(UserInput)`. The
|
||||
// per-formal seed decision is gated on a recovered `TypeKind` from
|
||||
// `BodyMeta.param_types`: extractor-wrapped formals get
|
||||
// `Some(TypeKind::Int|String|Bool|...)` (or a DTO type) via
|
||||
// `rust_type_to_kind`, while denylist wrappers (`State<T>`,
|
||||
// `Extension<T>`, `Pool<T>`, ...) and bare primitives stay `None`
|
||||
// and are skipped at seed time. This keeps DI handles
|
||||
// server-side without painting the database pool as adversary input.
|
||||
let is_rust_entry_method = lang == Lang::Rust
|
||||
&& body.meta.kind == crate::cfg::BodyKind::NamedFunction
|
||||
&& body.meta.func_key.as_ref().is_some_and(|k| {
|
||||
let mut k = k.clone();
|
||||
k.namespace = namespace.to_string();
|
||||
ssa_summaries.and_then(|m| m.get(&k)).is_some_and(|s| {
|
||||
matches!(
|
||||
s.entry_kind,
|
||||
Some(crate::entry_points::EntryKind::AxumHandler)
|
||||
| Some(crate::entry_points::EntryKind::ActixHandler)
|
||||
| Some(crate::entry_points::EntryKind::RocketRoute)
|
||||
)
|
||||
})
|
||||
});
|
||||
// Python Flask handlers need scoped lowering so the route-bound formal
|
||||
// parameters (`@app.route("/users/<name>")` + `def view(name):`)
|
||||
// materialise as `SsaOp::Param` ops the entry-point seeding pass paints
|
||||
// as `Source(UserInput)`. The per-formal seed decision is gated against
|
||||
// `BodyMeta.param_route_capture`, so only formals whose names appear as
|
||||
// path captures in the routing decorator are painted; implicit globals
|
||||
// (`request`, `g`, `session`) and DI-injected formals stay un-seeded.
|
||||
// Restricted to Flask (`FlaskRoute`) here because FastAPI / Django
|
||||
// free-name capture shapes (`request`, `b64decode`) bubble up as
|
||||
// synthetic externals under scoped lowering and shift source
|
||||
// attribution, while Flask handlers have all formals = path captures
|
||||
// (precision lands cleanly).
|
||||
let is_python_flask_route = lang == Lang::Python
|
||||
&& body.meta.kind == crate::cfg::BodyKind::NamedFunction
|
||||
&& body
|
||||
.meta
|
||||
.param_route_capture
|
||||
.iter()
|
||||
.any(|captured| *captured)
|
||||
&& body.meta.func_key.as_ref().is_some_and(|k| {
|
||||
let mut k = k.clone();
|
||||
k.namespace = namespace.to_string();
|
||||
ssa_summaries.and_then(|m| m.get(&k)).is_some_and(|s| {
|
||||
matches!(
|
||||
s.entry_kind,
|
||||
Some(crate::entry_points::EntryKind::FlaskRoute { .. })
|
||||
)
|
||||
})
|
||||
});
|
||||
// Ruby Sinatra route handlers need scoped lowering so the block
|
||||
// parameters (`get "/u/:name" do |name| ... end`) materialise as
|
||||
// `SsaOp::Param` ops the entry-point seeding pass paints as
|
||||
// `Source(UserInput)`. Sinatra body bodies are anonymous (the
|
||||
// `do_block` AST node has no name field), so `BodyKind` is
|
||||
// `AnonymousFunction`; the gate accepts both anonymous and named.
|
||||
// Per-formal seed decision is gated against
|
||||
// `BodyMeta.param_route_capture`, so only block formals whose
|
||||
// names appear as `:name` segments in the routing path are
|
||||
// painted. Block formals not in the capture set fall back to
|
||||
// existing label rules.
|
||||
let is_ruby_sinatra_route = lang == Lang::Ruby
|
||||
&& matches!(
|
||||
body.meta.kind,
|
||||
crate::cfg::BodyKind::NamedFunction | crate::cfg::BodyKind::AnonymousFunction
|
||||
)
|
||||
&& body
|
||||
.meta
|
||||
.param_route_capture
|
||||
.iter()
|
||||
.any(|captured| *captured)
|
||||
&& body.meta.func_key.as_ref().is_some_and(|k| {
|
||||
let mut k = k.clone();
|
||||
k.namespace = namespace.to_string();
|
||||
ssa_summaries.and_then(|m| m.get(&k)).is_some_and(|s| {
|
||||
matches!(
|
||||
s.entry_kind,
|
||||
Some(crate::entry_points::EntryKind::SinatraRoute { .. })
|
||||
)
|
||||
})
|
||||
});
|
||||
// Python FastAPI / Starlette handlers need scoped lowering so the
|
||||
// route-bound and typed-extractor formals materialise as `SsaOp::Param`
|
||||
// ops that the entry-point seeding pass paints as `Source(UserInput)`.
|
||||
// The per-formal decision in `ssa_transfer` consults BOTH
|
||||
// `BodyMeta.param_route_capture` (for `{name}` brace-segment captures)
|
||||
// and `type_facts.get_type(value)` (for `Annotated[T, Path()/Query()/Body()
|
||||
// /Header()/Cookie()/Form()/File()]` typed extractors). Formals without
|
||||
// either signal — `db: Session = Depends(get_db)`, `request: Request`,
|
||||
// bare `session` — stay un-seeded, matching the Hard Rule 3 policy that
|
||||
// unannotated formals are not adversary input.
|
||||
//
|
||||
// Gated on "at least one formal qualifies" to mirror the Flask gate:
|
||||
// a handler with zero path captures and zero typed extractors gets the
|
||||
// existing label-rule treatment (free-name captures of `request`,
|
||||
// `b64decode`, etc. bubble up as synthetic externals without scoped
|
||||
// lowering shifting attribution).
|
||||
let is_python_fastapi_route = lang == Lang::Python
|
||||
&& body.meta.kind == crate::cfg::BodyKind::NamedFunction
|
||||
&& (body
|
||||
.meta
|
||||
.param_route_capture
|
||||
.iter()
|
||||
.any(|captured| *captured)
|
||||
|| body.meta.param_types.iter().any(|t| t.is_some()))
|
||||
&& body.meta.func_key.as_ref().is_some_and(|k| {
|
||||
let mut k = k.clone();
|
||||
k.namespace = namespace.to_string();
|
||||
ssa_summaries.and_then(|m| m.get(&k)).is_some_and(|s| {
|
||||
matches!(
|
||||
s.entry_kind,
|
||||
Some(crate::entry_points::EntryKind::FastApiRoute { .. })
|
||||
)
|
||||
})
|
||||
});
|
||||
let use_scoped_lowering = !is_toplevel
|
||||
&& (matches!(lang, Lang::JavaScript | Lang::TypeScript)
|
||||
|| has_nonempty_seed
|
||||
|| is_java_lambda);
|
||||
|| is_java_lambda
|
||||
|| is_java_entry_method
|
||||
|| is_rust_entry_method
|
||||
|| is_python_flask_route
|
||||
|| is_python_fastapi_route
|
||||
|| is_ruby_sinatra_route);
|
||||
let ssa_result = if use_scoped_lowering {
|
||||
let func_name = body.meta.name.clone().unwrap_or_else(|| {
|
||||
body.meta
|
||||
|
|
@ -878,11 +1185,28 @@ fn analyse_body_with_seed(
|
|||
|
||||
match ssa_result {
|
||||
Ok(mut ssa_body) => {
|
||||
// App Router handlers carry a Web `Request` as their first
|
||||
// formal. Override `param_types[0]` so the type-fact pass tags
|
||||
// the formal as `TypeKind::Request` and receiver-method reads
|
||||
// (`req.json()`, ...) rewrite to `Request.<method>` for
|
||||
// type-qualified label resolution.
|
||||
let body_entry_kind = body.meta.func_key.as_ref().and_then(|k| {
|
||||
let mut k = k.clone();
|
||||
k.namespace = namespace.to_string();
|
||||
ssa_summaries
|
||||
.and_then(|m| m.get(&k))
|
||||
.and_then(|s| s.entry_kind.clone())
|
||||
});
|
||||
let overridden_param_types =
|
||||
entry_kind_param_type_override(body_entry_kind.as_ref(), &body.meta.param_types);
|
||||
let param_types_ref = overridden_param_types
|
||||
.as_deref()
|
||||
.unwrap_or(body.meta.param_types.as_slice());
|
||||
let mut opt = crate::ssa::optimize_ssa_with_param_types(
|
||||
&mut ssa_body,
|
||||
cfg,
|
||||
Some(lang),
|
||||
&body.meta.param_types,
|
||||
param_types_ref,
|
||||
);
|
||||
// Forward parent-body type facts onto closure-captured Param ops
|
||||
// before any consumer reads `opt.type_facts`. This is the lever
|
||||
|
|
@ -965,6 +1289,16 @@ fn analyse_body_with_seed(
|
|||
&& body.meta.kind == crate::cfg::BodyKind::AnonymousFunction),
|
||||
cross_file_bodies,
|
||||
pointer_facts: pointer_facts.as_ref(),
|
||||
cross_package_imports,
|
||||
// Phase 10 — Next.js entry-point seeding (looked up
|
||||
// above when overriding `param_types`).
|
||||
entry_kind: body_entry_kind,
|
||||
param_route_capture: if body.meta.param_route_capture.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(body.meta.param_route_capture.as_slice())
|
||||
},
|
||||
recording_summary: false,
|
||||
};
|
||||
let (events, block_states) =
|
||||
ssa_transfer::run_ssa_taint_full(&ssa_body, cfg, &transfer);
|
||||
|
|
@ -1098,6 +1432,7 @@ fn analyse_multi_body(
|
|||
max_iterations: usize,
|
||||
import_bindings: Option<&crate::cfg::ImportBindings>,
|
||||
cross_file_bodies: Option<&std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>>,
|
||||
cross_package_imports: Option<&std::collections::HashMap<String, FuncKey>>,
|
||||
) -> Vec<Finding> {
|
||||
let order = containment_order(&file_cfg.bodies);
|
||||
let mut all_findings: Vec<Finding> = Vec::new();
|
||||
|
|
@ -1144,6 +1479,7 @@ fn analyse_multi_body(
|
|||
import_bindings,
|
||||
cross_file_bodies,
|
||||
parent_var_types,
|
||||
cross_package_imports,
|
||||
);
|
||||
tracing::debug!(
|
||||
body_id = body.meta.id.0,
|
||||
|
|
@ -1340,6 +1676,7 @@ fn analyse_multi_body(
|
|||
import_bindings,
|
||||
cross_file_bodies,
|
||||
parent_var_types,
|
||||
cross_package_imports,
|
||||
);
|
||||
// Phase-B: replace (not append) this body's findings
|
||||
// in the cache. Previous rounds' findings for this
|
||||
|
|
@ -1688,6 +2025,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
|
|||
/// resistant identity we have: same-name methods on different classes, same-
|
||||
/// name overloads with different arity, and anonymous bodies at distinct
|
||||
/// source spans all get distinct keys.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn lower_all_functions_from_bodies(
|
||||
file_cfg: &FileCfg,
|
||||
lang: Lang,
|
||||
|
|
@ -1695,6 +2033,38 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
local_summaries: &FuncSummaries,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
|
||||
scan_root: Option<&str>,
|
||||
module_graph: Option<&crate::resolve::ModuleGraph>,
|
||||
) -> (
|
||||
std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
|
||||
std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
|
||||
) {
|
||||
crate::ssa::type_facts::with_file_imports(Some(&file_cfg.local_imports), || {
|
||||
crate::cfg::safe_fields::with_safe_lookup_fields(Some(&file_cfg.safe_lookup_fields), || {
|
||||
lower_all_functions_from_bodies_inner(
|
||||
file_cfg,
|
||||
lang,
|
||||
namespace,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
locator,
|
||||
scan_root,
|
||||
module_graph,
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn lower_all_functions_from_bodies_inner(
|
||||
file_cfg: &FileCfg,
|
||||
lang: Lang,
|
||||
namespace: &str,
|
||||
local_summaries: &FuncSummaries,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
|
||||
scan_root: Option<&str>,
|
||||
module_graph: Option<&crate::resolve::ModuleGraph>,
|
||||
) -> (
|
||||
std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
|
||||
std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
|
||||
|
|
@ -1702,6 +2072,23 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
let mut summaries = std::collections::HashMap::new();
|
||||
let mut bodies = std::collections::HashMap::new();
|
||||
|
||||
// Build the file's cross-package import map once and share it
|
||||
// across every body produced from this file. The map mirrors what
|
||||
// `analyse_file_with_lowered` builds at pass-2 entry, but storing
|
||||
// it on each `CalleeSsaBody` lets the inline-analysis frame inside
|
||||
// another file resolve the callee's local import names against
|
||||
// the callee's own package boundary (Phase 09 step 0.7) instead of
|
||||
// skipping the lookup entirely.
|
||||
let cross_package_imports_arc = {
|
||||
let map = build_cross_package_func_keys(
|
||||
&file_cfg.resolved_imports,
|
||||
scan_root,
|
||||
module_graph,
|
||||
lang,
|
||||
);
|
||||
std::sync::Arc::new(map)
|
||||
};
|
||||
|
||||
for body in file_cfg.function_bodies() {
|
||||
let _t_misc = std::time::Instant::now();
|
||||
let func_name = body.meta.name.clone().unwrap_or_else(|| {
|
||||
|
|
@ -1797,6 +2184,15 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
param_types_ref,
|
||||
);
|
||||
|
||||
// Phase 10 — annotate entry-point summaries. The pass-2
|
||||
// taint engine reads `entry_kind` to seed the function's
|
||||
// formals as `TaintOrigin::Source` at SSA entry, mirroring
|
||||
// an HTTP handler's adversary-controlled inputs. Always
|
||||
// recorded even on empty summaries so caller-side resolution
|
||||
// sees the entry classification through cross-file lookups.
|
||||
let mut summary = summary;
|
||||
summary.entry_kind = file_cfg.entry_kinds.get(&body.meta.span).cloned();
|
||||
|
||||
// Always insert the summary, even when all fields are empty/default.
|
||||
// An empty summary tells resolve_callee "this function exists and has
|
||||
// no taint effects", preventing fallthrough to the less precise old
|
||||
|
|
@ -1804,18 +2200,34 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
// For zero-param functions we only insert when the summary carries
|
||||
// the fresh-container signal (the only observable effect worth
|
||||
// persisting for a parameter-less body).
|
||||
if param_count > 0 || summary.points_to.returns_fresh_alloc {
|
||||
//
|
||||
// An entry-kind tag also keeps the summary in the map even
|
||||
// for zero-param entry points so cross-file resolvers see it.
|
||||
if param_count > 0
|
||||
|| summary.points_to.returns_fresh_alloc
|
||||
|| summary.entry_kind.is_some()
|
||||
{
|
||||
summaries.insert(key.clone(), summary);
|
||||
}
|
||||
perf_lower_record(1, _t_extract.elapsed().as_micros());
|
||||
}
|
||||
|
||||
let _t_opt = std::time::Instant::now();
|
||||
// Override `param_types[0]` for entry-kind-tagged formals (e.g. App
|
||||
// Router handlers receive a Web `Request`). Other entry kinds keep
|
||||
// the ambient param-type vector unchanged. See
|
||||
// `entry_kind_param_type_override` for the full rule set.
|
||||
let entry_kind_for_body = file_cfg.entry_kinds.get(&body.meta.span);
|
||||
let overridden_param_types =
|
||||
entry_kind_param_type_override(entry_kind_for_body, &body.meta.param_types);
|
||||
let param_types_ref = overridden_param_types
|
||||
.as_deref()
|
||||
.unwrap_or(body.meta.param_types.as_slice());
|
||||
let opt = crate::ssa::optimize_ssa_with_param_types(
|
||||
&mut func_ssa,
|
||||
&body.graph,
|
||||
Some(lang),
|
||||
&body.meta.param_types,
|
||||
param_types_ref,
|
||||
);
|
||||
perf_lower_record(2, _t_opt.elapsed().as_micros());
|
||||
|
||||
|
|
@ -1857,6 +2269,7 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
param_count,
|
||||
node_meta: std::collections::HashMap::new(),
|
||||
body_graph: Some(body.graph.clone()),
|
||||
cross_package_imports: std::sync::Arc::clone(&cross_package_imports_arc),
|
||||
},
|
||||
);
|
||||
perf_lower_record(6, _t_misc2.elapsed().as_micros());
|
||||
|
|
@ -2256,6 +2669,10 @@ fn augment_summaries_with_child_sinks(
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
|
||||
let (_parent_events, parent_block_states) =
|
||||
|
|
@ -2320,6 +2737,10 @@ fn augment_summaries_with_child_sinks(
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
|
||||
let (child_events, _child_block_states) =
|
||||
|
|
@ -2448,6 +2869,7 @@ type EligibleCalleeBodies = Vec<(FuncKey, ssa_transfer::CalleeSsaBody)>;
|
|||
/// entry) and lowers each body's graph with its recorded entry/params. This
|
||||
/// path is equivalent to what `analyse_file` uses at taint time, so the SSA
|
||||
/// summaries produced here line up exactly with what pass 2 will consult.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn extract_ssa_artifacts_from_file_cfg(
|
||||
file_cfg: &FileCfg,
|
||||
lang: Lang,
|
||||
|
|
@ -2455,6 +2877,8 @@ pub(crate) fn extract_ssa_artifacts_from_file_cfg(
|
|||
local_summaries: &FuncSummaries,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
|
||||
scan_root: Option<&str>,
|
||||
module_graph: Option<&crate::resolve::ModuleGraph>,
|
||||
) -> (SsaArtifactSummaries, EligibleCalleeBodies) {
|
||||
let (summaries, bodies) = lower_all_functions_from_bodies(
|
||||
file_cfg,
|
||||
|
|
@ -2463,6 +2887,8 @@ pub(crate) fn extract_ssa_artifacts_from_file_cfg(
|
|||
local_summaries,
|
||||
global_summaries,
|
||||
locator,
|
||||
scan_root,
|
||||
module_graph,
|
||||
);
|
||||
let eligible_bodies = build_eligible_bodies(file_cfg, bodies);
|
||||
(summaries, eligible_bodies)
|
||||
|
|
|
|||
|
|
@ -142,6 +142,27 @@ pub struct CalleeSsaBody {
|
|||
/// bodies.
|
||||
#[serde(skip)]
|
||||
pub body_graph: Option<crate::cfg::Cfg>,
|
||||
/// The callee body's own file-level cross-package import map (Phase 09
|
||||
/// step 0.7 keyset).
|
||||
///
|
||||
/// Populated when the body is freshly lowered with the file's
|
||||
/// [`crate::cfg::FileCfg::resolved_imports`] in scope. Forwarded into
|
||||
/// the inline-analysis child transfer so transitive cross-package
|
||||
/// resolution inside an inlined frame can land in
|
||||
/// `crate::summary::GlobalSummaries::ssa_by_key` using the callee's
|
||||
/// own import view rather than the caller's (which would mis-resolve
|
||||
/// names against the caller's package boundary).
|
||||
///
|
||||
/// Wrapped in `Arc` so every body in a file shares one heap
|
||||
/// allocation; per-file bodies typically count in the tens to
|
||||
/// hundreds, and import maps are append-only after construction.
|
||||
/// `#[serde(skip)]` because the map is reproducible from the file's
|
||||
/// `resolved_imports` and bears no identity on its own; an indexed
|
||||
/// scan that loads a body from SQLite simply skips step 0.7 inside
|
||||
/// the inlined frame (same conservative behaviour as before this
|
||||
/// field existed).
|
||||
#[serde(skip)]
|
||||
pub cross_package_imports: std::sync::Arc<std::collections::HashMap<String, FuncKey>>,
|
||||
}
|
||||
|
||||
/// Populate `node_meta` from the original CFG for cross-file persistence.
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -264,6 +264,10 @@ pub fn extract_ssa_func_summary_full(
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: true,
|
||||
};
|
||||
|
||||
let (events, block_states) = run_ssa_taint_full(ssa, cfg, &transfer);
|
||||
|
|
@ -745,14 +749,36 @@ pub fn extract_ssa_func_summary_full(
|
|||
if event.sink_caps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let site = match locator {
|
||||
Some(loc) => {
|
||||
loc.site_for_span(cfg[event.sink_node].classification_span(), event.sink_caps)
|
||||
// Preserve the deepest sink attribution across multi-hop summaries.
|
||||
// When `event.primary_sink_site` is populated, the upstream
|
||||
// resolver already pierced through a callee summary to the
|
||||
// dangerous instruction's coordinates; promoting it here means a
|
||||
// grandparent caller of this function sees `line N` of the
|
||||
// innermost helper rather than `line M` of *this* function's
|
||||
// call site to its child. Mark `from_chain = true` so pass-2
|
||||
// emission can distinguish multi-hop chain markers (always
|
||||
// promote into `Finding.primary_location`) from this body's own
|
||||
// locator-resolved sink (only promote across file boundaries).
|
||||
// Falls back to locator-based call-site attribution when the
|
||||
// event is intra-procedural.
|
||||
let site = match event.primary_sink_site.as_ref() {
|
||||
Some(s) => {
|
||||
let mut s = s.clone();
|
||||
s.from_chain = true;
|
||||
s
|
||||
}
|
||||
None => SinkSite::cap_only(event.sink_caps),
|
||||
None => match locator {
|
||||
Some(loc) => loc
|
||||
.site_for_span(cfg[event.sink_node].classification_span(), event.sink_caps),
|
||||
None => SinkSite::cap_only(event.sink_caps),
|
||||
},
|
||||
};
|
||||
let key = site.dedup_key();
|
||||
if !param_sites.iter().any(|s| s.dedup_key() == key) {
|
||||
if let Some(existing) = param_sites.iter_mut().find(|s| s.dedup_key() == key) {
|
||||
if site.from_chain && !existing.from_chain {
|
||||
existing.from_chain = true;
|
||||
}
|
||||
} else {
|
||||
param_sites.push(site);
|
||||
}
|
||||
}
|
||||
|
|
@ -812,6 +838,10 @@ pub fn extract_ssa_func_summary_full(
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: true,
|
||||
};
|
||||
detect_source_to_callback_from_states(
|
||||
ssa,
|
||||
|
|
@ -867,6 +897,11 @@ pub fn extract_ssa_func_summary_full(
|
|||
// caller patches it in.
|
||||
typed_call_receivers: Vec::new(),
|
||||
validated_params_to_return,
|
||||
// Phase-10 entry-point classification is attached post-extraction
|
||||
// by `taint::lower_all_functions_from_bodies` (which has access
|
||||
// to `FileCfg::entry_kinds`). Empty here means the extractor
|
||||
// itself does not carry the tag.
|
||||
entry_kind: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1112,11 +1147,25 @@ fn infer_summary_return_type(
|
|||
continue;
|
||||
}
|
||||
// Only inspect the very last instruction in the returning block.
|
||||
// Mirror the CFG-level `outer_callee` fallback (Phase 08 audit) so a
|
||||
// CFG-rewritten callee (e.g. `req.body.path` displacing `URL` on
|
||||
// `new URL(req.body.path, base)`) still resolves to the original
|
||||
// constructor identifier preserved in `callee_text`.
|
||||
if let Some(inst) = block.body.last()
|
||||
&& let SsaOp::Call { callee, .. } = &inst.op
|
||||
&& let Some(ty) = crate::ssa::type_facts::constructor_type(lang, callee)
|
||||
&& let SsaOp::Call {
|
||||
callee,
|
||||
callee_text,
|
||||
..
|
||||
} = &inst.op
|
||||
{
|
||||
return Some(ty);
|
||||
if let Some(ty) = crate::ssa::type_facts::constructor_type(lang, callee) {
|
||||
return Some(ty);
|
||||
}
|
||||
if let Some(orig) = callee_text.as_deref()
|
||||
&& let Some(ty) = crate::ssa::type_facts::constructor_type(lang, orig)
|
||||
{
|
||||
return Some(ty);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ mod cross_file_tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
},
|
||||
opt: crate::ssa::OptimizeResult {
|
||||
const_values: std::collections::HashMap::new(),
|
||||
|
|
@ -105,6 +106,7 @@ mod cross_file_tests {
|
|||
param_count: 0,
|
||||
node_meta: std::collections::HashMap::new(),
|
||||
body_graph: None,
|
||||
cross_package_imports: std::sync::Arc::new(std::collections::HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -838,6 +840,7 @@ mod primary_sink_location_tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -862,6 +865,7 @@ mod primary_sink_location_tests {
|
|||
col: 10,
|
||||
snippet: "Command::new(cmd).status()".into(),
|
||||
cap: Cap::SHELL_ESCAPE,
|
||||
from_chain: false,
|
||||
};
|
||||
let summary = SsaFuncSummary {
|
||||
param_to_sink: vec![(0usize, smallvec![site.clone()])],
|
||||
|
|
@ -886,6 +890,8 @@ mod primary_sink_location_tests {
|
|||
&tainted,
|
||||
Cap::SHELL_ESCAPE,
|
||||
&summary.param_to_sink,
|
||||
"caller.rs",
|
||||
false,
|
||||
);
|
||||
assert_eq!(
|
||||
primary_sites.len(),
|
||||
|
|
@ -971,6 +977,7 @@ mod goto_succ_propagation_tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let cfg: Cfg = Graph::new();
|
||||
|
|
@ -1009,6 +1016,10 @@ mod goto_succ_propagation_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
|
||||
// A non-bottom exit state, the test only cares that *every* succ
|
||||
|
|
@ -1065,6 +1076,7 @@ mod goto_succ_propagation_tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let cfg: Cfg = Graph::new();
|
||||
let interner = SymbolInterner::new();
|
||||
|
|
@ -1101,6 +1113,10 @@ mod goto_succ_propagation_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
let exit_state = SsaTaintState::initial();
|
||||
|
||||
|
|
@ -1128,6 +1144,7 @@ mod goto_succ_propagation_tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1390,6 +1407,7 @@ mod goto_succ_propagation_tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1517,6 +1535,7 @@ mod receiver_candidates_field_proj_tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1604,6 +1623,7 @@ mod receiver_candidates_field_proj_tests {
|
|||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
slot_scoped_assigns: std::collections::HashSet::new(),
|
||||
};
|
||||
let cands =
|
||||
super::super::receiver_candidates_for_type_lookup(SsaValue(0), Some(&body), Lang::Go);
|
||||
|
|
@ -1739,6 +1759,7 @@ mod fanout_merge_tests {
|
|||
col: 5,
|
||||
snippet: "exec(q)".into(),
|
||||
cap: Cap::from_bits(0b0001).unwrap(),
|
||||
from_chain: false,
|
||||
};
|
||||
let unique_a = SinkSite {
|
||||
file_rel: "src/a.rs".into(),
|
||||
|
|
@ -1746,6 +1767,7 @@ mod fanout_merge_tests {
|
|||
col: 3,
|
||||
snippet: "do_a(q)".into(),
|
||||
cap: Cap::from_bits(0b0001).unwrap(),
|
||||
from_chain: false,
|
||||
};
|
||||
let unique_b = SinkSite {
|
||||
file_rel: "src/b.rs".into(),
|
||||
|
|
@ -1753,6 +1775,7 @@ mod fanout_merge_tests {
|
|||
col: 7,
|
||||
snippet: "do_b(q)".into(),
|
||||
cap: Cap::from_bits(0b0001).unwrap(),
|
||||
from_chain: false,
|
||||
};
|
||||
let mut a = empty();
|
||||
a.param_to_sink_sites = vec![(0, smallvec![shared.clone(), unique_a.clone()])];
|
||||
|
|
@ -2008,6 +2031,7 @@ mod field_write_tests {
|
|||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals: HashSet::new(),
|
||||
slot_scoped_assigns: HashSet::new(),
|
||||
};
|
||||
(body, cache_id)
|
||||
}
|
||||
|
|
@ -2056,6 +2080,10 @@ mod field_write_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: Some(pf),
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
|
||||
let mut state = SsaTaintState::initial();
|
||||
|
|
@ -2140,6 +2168,10 @@ mod field_write_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
let mut state = SsaTaintState::initial();
|
||||
for inst in &body.blocks[0].body {
|
||||
|
|
@ -2208,6 +2240,10 @@ mod field_write_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: Some(&pf),
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
|
||||
// Pre-seed `validated_must` on `src` so the synth Assign
|
||||
|
|
@ -2312,6 +2348,7 @@ mod field_write_tests {
|
|||
m
|
||||
},
|
||||
synthetic_externals: HashSet::new(),
|
||||
slot_scoped_assigns: HashSet::new(),
|
||||
};
|
||||
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(0));
|
||||
// v0 is Const → empty pt, the hook should not insert anything.
|
||||
|
|
@ -2354,6 +2391,10 @@ mod field_write_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: Some(&pf),
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
|
||||
let mut state = SsaTaintState::initial();
|
||||
|
|
@ -2452,6 +2493,10 @@ mod container_elem_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: Some(pf),
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
|
||||
let mut state = SsaTaintState::initial();
|
||||
|
|
@ -2549,6 +2594,7 @@ mod container_elem_tests {
|
|||
field_writes: HashMap::new(),
|
||||
|
||||
synthetic_externals: HashSet::new(),
|
||||
slot_scoped_assigns: HashSet::new(),
|
||||
};
|
||||
|
||||
// Run pointer analysis first to confirm the result of `shift()`
|
||||
|
|
@ -2689,6 +2735,7 @@ mod container_elem_tests {
|
|||
field_writes: HashMap::new(),
|
||||
|
||||
synthetic_externals: HashSet::new(),
|
||||
slot_scoped_assigns: HashSet::new(),
|
||||
};
|
||||
|
||||
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(7));
|
||||
|
|
@ -2731,6 +2778,10 @@ mod container_elem_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: Some(&pf),
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
|
||||
// Seed `src` as validated_must before the push fires.
|
||||
|
|
@ -2833,6 +2884,7 @@ mod container_elem_tests {
|
|||
field_writes: HashMap::new(),
|
||||
|
||||
synthetic_externals: HashSet::new(),
|
||||
slot_scoped_assigns: HashSet::new(),
|
||||
};
|
||||
|
||||
let interner = SymbolInterner::new();
|
||||
|
|
@ -2869,6 +2921,10 @@ mod container_elem_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
let mut state = SsaTaintState::initial();
|
||||
for inst in &body.blocks[0].body {
|
||||
|
|
@ -2960,6 +3016,7 @@ mod cross_call_field_tests {
|
|||
field_writes: HashMap::new(),
|
||||
|
||||
synthetic_externals: HashSet::new(),
|
||||
slot_scoped_assigns: HashSet::new(),
|
||||
};
|
||||
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(7));
|
||||
(body, cache_id, pf)
|
||||
|
|
@ -3334,6 +3391,7 @@ mod field_taint_origin_cap_tests {
|
|||
field_writes: HashMap::new(),
|
||||
|
||||
synthetic_externals: HashSet::new(),
|
||||
slot_scoped_assigns: HashSet::new(),
|
||||
};
|
||||
(body, cache_id, cfg, n_proj)
|
||||
}
|
||||
|
|
@ -3425,6 +3483,10 @@ mod field_taint_origin_cap_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: Some(&pf),
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
for inst in &body.blocks[0].body {
|
||||
transfer_inst(inst, &cfg, &body, &transfer, &mut state);
|
||||
|
|
@ -3660,6 +3722,7 @@ mod pointer_lattice_worklist_tests {
|
|||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals: HashSet::new(),
|
||||
slot_scoped_assigns: HashSet::new(),
|
||||
};
|
||||
|
||||
let mut interner = SymbolInterner::new();
|
||||
|
|
@ -3713,6 +3776,10 @@ mod pointer_lattice_worklist_tests {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: Some(pf),
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -63,6 +63,10 @@ fn ssa_analyse_rust(src: &[u8]) -> Vec<Finding> {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
let events = ssa_transfer::run_ssa_taint(&ssa, cfg, &transfer);
|
||||
let mut findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, cfg);
|
||||
|
|
@ -663,6 +667,7 @@ fn cross_file_sink_finding_carries_primary_location() {
|
|||
col: 5,
|
||||
snippet: "Command::new(\"sh\").arg(cmd).status().unwrap();".into(),
|
||||
cap: Cap::SHELL_ESCAPE,
|
||||
from_chain: false,
|
||||
};
|
||||
global.insert(
|
||||
key,
|
||||
|
|
@ -3788,6 +3793,10 @@ fn assert_ssa_integration(src: &[u8]) {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
|
||||
let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
|
||||
|
|
@ -3926,6 +3935,10 @@ fn integ_php_echo_simple_var() {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
|
||||
let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
|
||||
|
|
@ -3996,6 +4009,10 @@ fn integ_c_curl_handle_ssrf() {
|
|||
auto_seed_handler_params: false,
|
||||
cross_file_bodies: None,
|
||||
pointer_facts: None,
|
||||
cross_package_imports: None,
|
||||
entry_kind: None,
|
||||
param_route_capture: None,
|
||||
recording_summary: false,
|
||||
};
|
||||
let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
|
||||
let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
|
||||
|
|
@ -5481,6 +5498,8 @@ class Worker {
|
|||
&file_cfg.summaries,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
// Collect containers of every key named "process".
|
||||
|
|
@ -5553,6 +5572,8 @@ function helper(x) {
|
|||
&file_cfg.summaries,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
let helper_keys: Vec<_> = summaries.keys().filter(|k| k.name == "helper").collect();
|
||||
|
|
@ -5776,6 +5797,8 @@ class Reader {
|
|||
&file_cfg.summaries,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
let read_sum = summaries
|
||||
|
|
@ -5821,6 +5844,8 @@ class Maker {
|
|||
&file_cfg.summaries,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
// make() has zero parameters and no fresh-allocation return, so the
|
||||
|
|
@ -6837,6 +6862,55 @@ function handler(req, res) {
|
|||
/// traversal flow alive end-to-end. Pins the precision claim — the
|
||||
/// strip is element-of-array-after-filter scoped, not a wholesale
|
||||
/// kill on any `<arr>.filter` call regardless of callback identity.
|
||||
#[test]
|
||||
fn callee_body_carries_file_cross_package_imports() {
|
||||
// Phase 09: every `CalleeSsaBody` produced from a file's lowering
|
||||
// pipeline should carry the file-level cross-package import map
|
||||
// so the inline-analysis frame can resolve the callee's local
|
||||
// names against the callee's own package boundary (step 0.7
|
||||
// inside an inlined frame).
|
||||
let src = b"export function passthrough(s) { return s; }\n";
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let mut file_cfg = parse_lang(src, "javascript", lang);
|
||||
|
||||
// Inject a synthetic resolved import binding the way the Phase 04
|
||||
// resolver would for `import { helper } from "@scope/util/helper";`.
|
||||
file_cfg
|
||||
.resolved_imports
|
||||
.push(crate::resolve::ImportBinding {
|
||||
local_name: "helper".to_string(),
|
||||
source_module: "@scope/util/helper".to_string(),
|
||||
resolved_file: Some(std::path::PathBuf::from("/scope/util/src/helper.ts")),
|
||||
exported_name: Some("helper".to_string()),
|
||||
});
|
||||
|
||||
let (_summaries, bodies) = super::extract_ssa_artifacts_from_file_cfg(
|
||||
&file_cfg,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&file_cfg.summaries,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(
|
||||
!bodies.is_empty(),
|
||||
"expected at least one eligible body for `passthrough`",
|
||||
);
|
||||
for (_key, body) in &bodies {
|
||||
assert!(
|
||||
!body.cross_package_imports.is_empty(),
|
||||
"every body in a file with resolved imports should carry the file's cross-package import map; got an empty map",
|
||||
);
|
||||
assert!(
|
||||
body.cross_package_imports.contains_key("helper"),
|
||||
"expected the synthetic `helper` binding to surface in the body's cross-package import map",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cve_2026_42353_filter_without_validator_callback_preserves_taint() {
|
||||
let src = br#"
|
||||
|
|
@ -6867,3 +6941,74 @@ function handler(req, res) {
|
|||
"expected taint flow via filter(pickFirst) — pickFirst is not a recognised validator and must not strip taint; got 0 findings",
|
||||
);
|
||||
}
|
||||
|
||||
// ── Phase 09 cross-package namespace migration ─────────────────────────────
|
||||
|
||||
/// `build_cross_package_func_keys` produces a package-prefixed
|
||||
/// [`FuncKey::namespace`] for files inside a discovered monorepo
|
||||
/// package and a plain namespace otherwise.
|
||||
///
|
||||
/// Locks in the migration done as part of the deferred Phase 09 audit:
|
||||
/// SSA summary keys produced by
|
||||
/// [`crate::taint::lower_all_functions_from_bodies`] use
|
||||
/// `namespace_with_package` for their namespace, so the cross-package
|
||||
/// import map's `FuncKey::namespace` must agree for step 0.7 of
|
||||
/// `resolve_callee_full` to land hits in
|
||||
/// [`crate::summary::GlobalSummaries::ssa_by_key`].
|
||||
#[test]
|
||||
fn cross_package_func_keys_namespace_uses_resolver_when_available() {
|
||||
use crate::resolve::{ImportBinding, build_module_graph};
|
||||
use std::path::PathBuf;
|
||||
|
||||
let mut fixture_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
fixture_root.push("tests/fixtures/resolver");
|
||||
let root = fixture_root
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| fixture_root.clone());
|
||||
let graph = build_module_graph(std::slice::from_ref(&root));
|
||||
|
||||
let resolved_file = root.join("packages/util/src/index.ts");
|
||||
let binding = ImportBinding {
|
||||
local_name: "doStuff".to_string(),
|
||||
source_module: "@scope/util".to_string(),
|
||||
resolved_file: Some(resolved_file.clone()),
|
||||
exported_name: Some("doStuff".to_string()),
|
||||
};
|
||||
let scan_root = root.to_string_lossy().to_string();
|
||||
|
||||
let with_resolver = crate::taint::build_cross_package_func_keys(
|
||||
std::slice::from_ref(&binding),
|
||||
Some(&scan_root),
|
||||
Some(&graph),
|
||||
Lang::TypeScript,
|
||||
);
|
||||
let key = with_resolver
|
||||
.get("doStuff")
|
||||
.expect("resolved binding maps to a FuncKey");
|
||||
assert!(
|
||||
key.namespace.starts_with("@scope/util::"),
|
||||
"expected package-prefixed namespace, got {ns}",
|
||||
ns = key.namespace,
|
||||
);
|
||||
assert!(
|
||||
key.namespace.ends_with("packages/util/src/index.ts"),
|
||||
"expected the suffix to remain the scan-root-relative path, got {ns}",
|
||||
ns = key.namespace,
|
||||
);
|
||||
|
||||
let without_resolver = crate::taint::build_cross_package_func_keys(
|
||||
std::slice::from_ref(&binding),
|
||||
Some(&scan_root),
|
||||
None,
|
||||
Lang::TypeScript,
|
||||
);
|
||||
let plain = without_resolver
|
||||
.get("doStuff")
|
||||
.expect("plain binding maps to a FuncKey");
|
||||
assert!(
|
||||
!plain.namespace.contains("::"),
|
||||
"without a resolver the namespace must stay plain, got {ns}",
|
||||
ns = plain.namespace,
|
||||
);
|
||||
assert_eq!(plain.namespace, "packages/util/src/index.ts");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -794,6 +794,13 @@ pub struct Config {
|
|||
/// not persisted to config files.
|
||||
#[serde(skip)]
|
||||
pub framework_ctx: Option<crate::utils::project::FrameworkContext>,
|
||||
/// TS/JS module resolver state, set by the scan pipeline once per scan
|
||||
/// after the file walk and before pass 1. `None` outside the scan paths
|
||||
/// (e.g. unit-test direct callers of `analyse_file_fused`); consumers
|
||||
/// must treat absence as "no resolver hints available, fall back to
|
||||
/// pre-resolver behaviour" rather than as a hard error.
|
||||
#[serde(skip)]
|
||||
pub module_graph: Option<std::sync::Arc<crate::resolve::ModuleGraph>>,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue