Critical bug fixes and recall improvements (#68)

This commit is contained in:
Eli Peter 2026-05-11 12:42:39 -04:00 committed by GitHub
parent 7d0e7320e2
commit 55247b7fcd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
352 changed files with 60069 additions and 900 deletions

View file

@ -73,6 +73,27 @@ pub static RULES: &[LabelRule] = &[
"db.Exec",
"db.QueryRow",
"db.Prepare",
// Phase 15 — GORM `db.Raw(sql)` raw-SQL passthrough. GORM's
// `*gorm.DB` is conventionally bound to a `db`-named receiver,
// so the suffix `db.Raw` carries the GORM semantic without
// colliding with stdlib `*sql.DB` (which has no `Raw` method).
// The `GormDb.Raw` type-qualified variant in the receiver-typed
// rule list below covers receivers tagged from `gorm.Open(...)`
// with non-`db` names.
"db.Raw",
// Phase 15 — `database/sql`-context variants. `db.QueryContext`,
// `db.ExecContext`, `db.QueryRowContext`, `db.PrepareContext`
// accept the SQL string at arg 1 (after `ctx`). Receivers
// typed as `*sql.DB` / `*sql.Tx` / `*sql.Stmt` resolve via
// suffix-matching on `db.<verb>`; calls on differently-named
// bound receivers (`tx.QueryContext(...)`) only suffix-match
// when the receiver text ends with `db` (covers `userDb`,
// `pgDb`, etc.). More-precise receiver typing is in scope
// for `DatabaseConnection.<verb>` rules below.
"db.QueryContext",
"db.ExecContext",
"db.QueryRowContext",
"db.PrepareContext",
// goqu raw SQL literal builders: `goqu.L(s)` and the alias
// `goqu.Lit(s)` insert `s` verbatim into the generated SQL with no
// parameterisation. CVE-2026-41422 (daptin) loops a user-controlled
@ -88,6 +109,36 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — receiver-typed Go ORM/raw-SQL sinks. `*gorm.DB` (set by
// `constructor_type` for `gorm.Open(...)`) exposes `Raw(sql)` and
// `Exec(sql)` as raw-SQL passthrough; the type-qualified resolver
// rewrites `db.Raw(...)` → `GormDb.Raw`. `*sqlx.DB` likewise gets
// `NamedExec` / `NamedQuery` / `Select` / `Get` rewriting via
// `SqlxDb.<verb>`. `DatabaseConnection.<verb>` covers the stdlib
// `*sql.DB` / `*sql.Tx` receivers tagged by the existing
// `sql.Open` / `sql.OpenDB` constructor mapping — currently the
// chained QueryContext shape suffix-matches `db.QueryContext` above,
// so `DatabaseConnection.QueryContext` is here for receivers whose
// identifier text doesn't end in `db`.
LabelRule {
matchers: &[
"GormDb.Raw",
"GormDb.Exec",
"SqlxDb.NamedExec",
"SqlxDb.NamedQuery",
"SqlxDb.Select",
"SqlxDb.Get",
"SqlxDb.MustExec",
"DatabaseConnection.QueryContext",
"DatabaseConnection.ExecContext",
"DatabaseConnection.QueryRowContext",
"DatabaseConnection.Query",
"DatabaseConnection.Exec",
"DatabaseConnection.QueryRow",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// fmt.Printf/Sprintf write to stdout or build strings in memory, not
// security sinks. fmt.Fprintf writes to an io.Writer (often http.ResponseWriter)
// so it IS a security sink for XSS.
@ -576,6 +627,363 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &[],
},
},
// ── SQL execute payload-arg gating (Phase 15 deferred fix, Go) ────────
//
// Mirrors the Python resolution recorded in `python::GATED_SINKS`. The
// flat rules above already classify these callees as `Sink(SQL_QUERY)`
// on every argument. `database/sql` and the Go ORM/raw-SQL ecosystem
// (GORM, sqlx, goqu) follow the convention that the SQL string is at
// arg 0 (or arg 1 for the `*Context` variants whose first arg is a
// `context.Context`); subsequent positional arguments are bind values
// sent through the driver's parameterised path. Tainted bind values
// are SAFE; tainted SQL is the SQLi vector.
//
// Destination-activation gates carry the same `Sink(SQL_QUERY)` label
// as the flat rule (cap dedupes against the flat label) and propagate
// `payload_args: &[0]` (or `&[1]` for `*Context` shapes) into
// `sink_payload_args`, narrowing the SSA sink scan to the SQL position.
SinkGate {
callee_matcher: "db.Query",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.Exec",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.QueryRow",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.Prepare",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.Raw",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// `*Context` variants take `ctx` at arg 0 and the SQL string at arg 1.
SinkGate {
callee_matcher: "db.QueryContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.ExecContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.QueryRowContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.PrepareContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// goqu raw SQL literal builders. Single arg, payload at 0.
SinkGate {
callee_matcher: "goqu.L",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "goqu.Lit",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// Receiver-typed (case-sensitive, matching the flat rule): GORM / sqlx
// / `*sql.DB` typed via `constructor_type`. All take SQL at arg 0
// EXCEPT the `*Context` variants on `DatabaseConnection`, which take
// SQL at arg 1.
SinkGate {
callee_matcher: "GormDb.Raw",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "GormDb.Exec",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlxDb.NamedExec",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlxDb.NamedQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlxDb.Select",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlxDb.Get",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlxDb.MustExec",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.Query",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.Exec",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.QueryRow",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.QueryContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.ExecContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.QueryRowContext",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {

View file

@ -94,6 +94,21 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — JPA / Hibernate `Query.setParameter(name, value)` /
// `Query.setParameterList(...)` bind a positional / named parameter
// and return the same query object. The bind step does NOT inject
// the value into the SQL string; the value is sent as a separate
// parameter through the JDBC layer at execution. Treating
// `setParameter` / `setParameterList` as a SQL_QUERY sanitizer
// clears any taint inadvertently smeared onto the chain return so
// downstream `.getResultList()` / `.executeUpdate()` calls see a
// clean value. Case-sensitive: these are JPA-specific verb names
// and the chain shape is canonical.
LabelRule {
matchers: &["setParameter", "setParameterList"],
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
case_sensitive: true,
},
// ─────────── Sinks ─────────────
LabelRule {
matchers: &["Runtime.exec", "ProcessBuilder"],
@ -125,6 +140,72 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::CODE_EXEC),
case_sensitive: false,
},
// Phase 13 — java.nio.file path-traversal sinks. `Files.<verb>` is
// the modern stdlib API for read/write/copy/move/delete operations;
// each takes a `Path` (or `Path` + payload) as arg 0. Default
// arg→return propagation smears taint through `Paths.get(...)`
// (forwarder) so the path arg of these calls inherits any taint
// present on the components. `FileInputStream` / `FileOutputStream` /
// `RandomAccessFile` are constructor-style sinks: `new
// FileInputStream(path)` reaches the FILE_IO sink at the
// `object_creation_expression` level (mapped to `Kind::CallFn` in
// Java's KINDS). Receiver-typing already maps these classes to
// `TypeKind::FileHandle` (see `class_name_to_type_kind`) so chained
// method calls on the resulting handle resolve via type-qualified
// labels, but the construction call itself is the canonical
// path-traversal vector.
LabelRule {
matchers: &[
"Files.readString",
"Files.readAllBytes",
"Files.readAllLines",
"Files.write",
"Files.writeString",
"Files.lines",
"Files.copy",
"Files.move",
"Files.delete",
"Files.deleteIfExists",
"Files.newInputStream",
"Files.newOutputStream",
"Files.newBufferedReader",
"Files.newBufferedWriter",
"FileInputStream",
"FileOutputStream",
"RandomAccessFile",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: true,
},
// Phase 13 — `Path.normalize()` collapses `.` / `..` segments and
// is the canonical Java path-traversal sanitiser when paired with
// a `startsWith(base)` containment check (not modelled here; the
// sanitiser rule clears the FILE_IO cap on the call's return,
// which is sufficient for the cap-based gate to suppress the
// sink finding). Case-sensitive: `Path.normalize` is unique to
// `java.nio.file.Path`; bare `normalize` would over-fire on
// `Locale.normalize`, `BigDecimal.normalize`, etc.
LabelRule {
matchers: &[
"Path.normalize",
// Canonical Java path-traversal sanitiser idiom:
// `base.resolve(name).normalize()`. CFG paren-strip yields
// callee text `<receiver>.resolve.normalize`; the bare 2-call
// `resolve.normalize` suffix is unique to `java.nio.file.Path`
// (no overload across the supported corpus produces the same
// chain text). Case-sensitive on the leaf chain to avoid
// colliding with non-path `.resolve()`-then-`.normalize()`
// shapes in unrelated grammars.
"resolve.normalize",
// Receiver-bound shape `Paths.get(p).normalize()` — the
// `Paths.get` constructor mapping in `ssa/type_facts.rs` types
// the receiver as `FileHandle`, so the type-qualified resolver
// rewrites `<v>.normalize` → `FileHandle.normalize` here.
"FileHandle.normalize",
],
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: true,
},
// HTTP response sinks, println/print are broad (also match System.out)
// but necessary to catch response.getWriter().println() via suffix matching.
LabelRule {
@ -134,12 +215,34 @@ pub static RULES: &[LabelRule] = &[
},
// openConnection() is the standard java.net.URL API for initiating a connection.
// It is the correct interception point, the URL is already set on the object.
//
// Phase 14 — additional SSRF entry points covered:
// * `URL.openStream` — equivalent of `URL.openConnection().getInputStream()`,
// fetches the resource at the URL directly. Bare `openStream`
// suffix is unique to `java.net.URL` in the supported corpus.
// * `OkHttpClient.newCall(Request)` — Square OkHttp's request
// dispatch entry point. The `Request` is built via a
// `Request.Builder().url(u).build()` chain whose default
// arg→return propagation smears URL taint through the chain.
// * `RestTemplate.getForEntity` / `RestTemplate.headForHeaders` —
// read-shaped Spring verbs that take the URL at arg 0.
LabelRule {
matchers: &[
"openConnection",
"openStream",
"HttpClient.send",
"HttpClient.sendAsync",
// Phase 14 — `OkHttpClient.newCall(Request)` and the
// generic `HttpClient.newCall` form OkHttp resolves to via
// the JAVA_HIERARCHY (OkHttpClient → HttpClient). Both
// forms are covered so a constructor-typed receiver
// (HttpClient) and a class-named receiver (OkHttpClient)
// both fire.
"HttpClient.newCall",
"OkHttpClient.newCall",
"getForObject",
"getForEntity",
"headForHeaders",
"RestTemplate.exchange",
"postForObject",
"postForEntity",
@ -246,8 +349,34 @@ pub static RULES: &[LabelRule] = &[
matchers: &[
"entityManager.createNativeQuery",
"entityManager.createQuery",
"em.createNativeQuery",
"em.createQuery",
"session.createQuery",
"session.createSQLQuery",
"session.createNativeQuery",
// Phase 15 — Spring Data JPA / Hibernate factory chains:
// `getEntityManager().createNativeQuery(...)` /
// `getSession().createQuery(...)` reduce to
// `getEntityManager.createNativeQuery` /
// `getSession.createQuery` after the chain-normalisation
// strips parens.
"getEntityManager.createNativeQuery",
"getEntityManager.createQuery",
"getSession.createQuery",
"getSession.createSQLQuery",
"getSession.createNativeQuery",
// Type-qualified Hibernate Session matchers fire when the
// receiver carries a `TypeKind::HibernateSession` fact (set
// by `constructor_type` for `sessionFactory.openSession()` /
// `sessionFactory.getCurrentSession()` /
// `sessionFactory.openStatelessSession()` returns). Closes
// the arbitrary-receiver-name shape (`sess`,
// `hibernateSession`, etc.) the flat `session.*` matchers
// above only catch when receiver is literally named
// `session`.
"HibernateSession.createQuery",
"HibernateSession.createSQLQuery",
"HibernateSession.createNativeQuery",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
@ -484,6 +613,385 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &[],
},
},
// ── SQL execute payload-arg gating (Phase 15 deferred fix, Java) ──────
//
// Mirrors the Python resolution recorded in `python::GATED_SINKS`: the
// flat rules above already classify these callees as `Sink(SQL_QUERY)`
// on every argument. The JDBC / JPA / Hibernate / Spring conventions
// are that arg 0 is the SQL template (or HQL/JPQL string) and any
// remaining arguments are bind values, RowMappers, result-set classes,
// or other non-SQL payloads. Tainted bind values are SAFE because the
// driver / JPA layer escapes them; tainted SQL is the SQLi vector.
//
// These Destination-activation gates carry the same `Sink(SQL_QUERY)`
// label as the flat rule (so cap dedupes against the flat label) but
// propagate `payload_args: &[0]` into `sink_payload_args`, narrowing the
// SSA sink scan to arg 0 only. Receiver-typed `DatabaseConnection.*`
// forms are case-sensitive, matching the flat rule.
SinkGate {
callee_matcher: "executeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "executeUpdate",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.executeBatch",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DatabaseConnection.executeLargeUpdate",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// Spring JdbcTemplate verbs. All take SQL at arg 0; remaining args are
// bind values (`Object[]` / varargs) or `RowMapper` / `ResultSetExtractor`
// / class hints — all non-SQL payloads.
SinkGate {
callee_matcher: "jdbcTemplate.query",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "jdbcTemplate.update",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "jdbcTemplate.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "jdbcTemplate.queryForObject",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "jdbcTemplate.queryForList",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// JPA / Hibernate factories. `createQuery(sql)` / `createQuery(sql, ResultClass)`
// both take the SQL/JPQL/HQL string at arg 0; the optional `ResultClass`
// at arg 1 is metadata, not SQL.
SinkGate {
callee_matcher: "entityManager.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "entityManager.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "em.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "em.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "session.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "session.createSQLQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "session.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "getEntityManager.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "getEntityManager.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "getSession.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "getSession.createSQLQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "getSession.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// Type-qualified Hibernate Session gates. Mirror the
// `session.create*` family above so type-qualified resolution at
// sink-firing time consults `payload_args = &[0]` and suppresses
// tainted bind-arg shapes that route through `setParameter` /
// `setString` rather than the raw query string. Receivers carry
// `TypeKind::HibernateSession` via `constructor_type`'s
// `openSession` / `getCurrentSession` / `openStatelessSession`
// arms.
SinkGate {
callee_matcher: "HibernateSession.createQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "HibernateSession.createSQLQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "HibernateSession.createNativeQuery",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {

View file

@ -1,5 +1,6 @@
use crate::labels::{
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
Cap, DataLabel, GateActivation, GatedLabelRule, Kind, LabelGate, LabelRule, ParamConfig,
RuntimeLabelRule, SinkGate,
};
use crate::utils::project::{DetectedFramework, FrameworkContext};
use phf::{Map, phf_map};
@ -29,6 +30,21 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Source(Cap::all()),
case_sensitive: false,
},
// Phase 10 — Web `Request` receiver-method reads. Triggered when
// the SSA receiver carries `TypeKind::Request` and the
// type-qualified resolver rewrites `req.json()` → `Request.json`
// etc. Mirrors the matching list in `labels/typescript.rs`.
LabelRule {
matchers: &[
"Request.json",
"Request.formData",
"Request.text",
"Request.url",
"Request.headers.get",
],
label: DataLabel::Source(Cap::all()),
case_sensitive: true,
},
// ───────── Sanitizers ──────────
LabelRule {
matchers: &["JSON.parse"],
@ -253,6 +269,40 @@ pub static RULES: &[LabelRule] = &[
"fs.unlinkSync",
"fs.readdir",
"fs.readdirSync",
// Phase 05 — `node:fs/promises` member-access forms covered
// here. Bare-name forms (`readFile`, `open`, ...) and
// `fsp.readFile` namespace-import forms ride the gated
// matcher in `GATED_LABEL_RULES`. Receiver-type fallback
// synthesises `FileSystemPromisesNs.<method>` (handled
// below).
"fs.promises.readFile",
"fs.promises.writeFile",
"fs.promises.unlink",
"fs.promises.open",
"fs.promises.stat",
"fs.promises.readdir",
"fs.promises.mkdir",
"fs.promises.rmdir",
"fs.promises.rm",
"fs.promises.appendFile",
"fs.promises.copyFile",
"fs.promises.rename",
"fs.promises.truncate",
"fs.promises.chmod",
"FileSystemPromisesNs.readFile",
"FileSystemPromisesNs.writeFile",
"FileSystemPromisesNs.unlink",
"FileSystemPromisesNs.open",
"FileSystemPromisesNs.stat",
"FileSystemPromisesNs.readdir",
"FileSystemPromisesNs.mkdir",
"FileSystemPromisesNs.rmdir",
"FileSystemPromisesNs.rm",
"FileSystemPromisesNs.appendFile",
"FileSystemPromisesNs.copyFile",
"FileSystemPromisesNs.rename",
"FileSystemPromisesNs.truncate",
"FileSystemPromisesNs.chmod",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
@ -310,6 +360,31 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// ── Phase 07 — ORM query-builder receiver-typed sinks ──
//
// Each rule here matches a callee text constructed by
// `resolve_type_qualified_labels` when a value's inferred TypeKind has a
// `label_prefix()`. The matcher form `<TypePrefix>.<method>` is the
// wire shape produced by that helper. The receiver TypeKinds
// themselves are populated by [`crate::ssa::type_facts::constructor_type`]
// (TS/JS branch): `new Sequelize(...)` → `Sequelize`,
// `getRepository(Entity)` → `TypeOrmRepo`,
// `getManager()` → `TypeOrmManager`,
// `createEntityManager()` → `MikroOrmEm`. Without a typed receiver the
// qualified callee text is never built, so these rules cannot misfire on
// unrelated `.literal()` / `.query()` / `.execute()` methods.
LabelRule {
matchers: &[
"Sequelize.literal",
"TypeOrmRepo.query",
"TypeOrmRepo.createQueryBuilder",
"TypeOrmManager.query",
"TypeOrmManager.createQueryBuilder",
"MikroOrmEm.execute",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// ─── LDAP injection sinks ───
//
// `ldapjs`: both the bound-variable idiom
@ -527,6 +602,75 @@ pub static EXCLUDES: &[&str] = &[
"exec.start",
];
/// Phase 05 — `node:fs/promises` path-traversal sinks. The matcher list
/// holds the bare-name and `<ns>.<method>` member-access shapes; the
/// [`LabelGate::ImportedFromModule`] gate suppresses bare-name matches
/// unless the file actually imports the method from `node:fs/promises`
/// or `fs/promises`. Bare-name only — `fs.promises.readFile`-style
/// member-access forms continue to fire via the flat FILE_IO matcher
/// list (no gate needed because the `fs.promises.` prefix is itself
/// witness to the resolution).
pub static GATED_LABEL_RULES: &[GatedLabelRule] = &[
GatedLabelRule {
matchers: &[
"readFile",
"writeFile",
"unlink",
"open",
"stat",
"readdir",
"mkdir",
"rmdir",
"rm",
"appendFile",
"copyFile",
"rename",
"truncate",
"chmod",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
gate: LabelGate::ImportedFromModule(&["node:fs/promises", "fs/promises"]),
},
// Phase 07 — Knex bare-name raw-SQL escape hatches. The receiver in
// `db.whereRaw(sql)` shape is an arbitrary local binding (`db`, `qb`,
// `users`, ...) so leading-identifier gating cannot witness the
// import. Phase 07 deferred-item 10 tightening: require the file to
// bind the conventional value-import name `knex` (lowercase) so that
// type-only shapes like `import { Knex } from 'knex'` (for
// `Knex.QueryBuilder` type annotations) do not over-fire the gate.
GatedLabelRule {
matchers: &["whereRaw", "orderByRaw", "havingRaw"],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
gate: LabelGate::FileImportsModuleAsLocalName {
modules: &["knex"],
local_names: &["knex"],
},
},
// Phase 07 — Drizzle `sql` template-tag builder. Two shapes:
// - `sql.raw(x)` → callee text "sql.raw" (member call)
// - `sql\`SELECT ${x}\`` → callee text "sql" (tag call)
// Both leading-identifier-gate against the imported `sql` symbol from
// `drizzle-orm`. `=sql` is exact-only so unrelated `.sql()` methods do
// not collide; `sql.raw` carries its own member-access matcher.
GatedLabelRule {
matchers: &["=sql", "sql.raw"],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
gate: LabelGate::ImportedFromModule(&["drizzle-orm"]),
},
// Phase 10 — Next.js `cookies()` / `headers()` from `next/headers`
// return adversary-controlled request-bound state. Mirrors the
// entry in `labels/typescript.rs::GATED_LABEL_RULES`.
GatedLabelRule {
matchers: &["cookies", "headers"],
label: DataLabel::Source(Cap::all()),
case_sensitive: true,
gate: LabelGate::ImportedFromModule(&["next/headers"]),
},
];
pub static GATED_SINKS: &[SinkGate] = &[
SinkGate {
callee_matcher: "setAttribute",
@ -1316,6 +1460,8 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
"variable_declaration" => Kind::CallWrapper,
"lexical_declaration" => Kind::CallWrapper,
"expression_statement" => Kind::CallWrapper,
"await_expression" => Kind::AwaitForward,
"jsx_attribute" => Kind::JsxAttr,
// trivia
"comment" => Kind::Trivia,

View file

@ -38,6 +38,61 @@ pub struct LabelRule {
pub case_sensitive: bool,
}
/// Activation gate carried by a [`GatedLabelRule`]. Phase 05 introduces the
/// import-derived gate so JS/TS bare-name `fs/promises` sinks (`readFile`,
/// `writeFile`, ...) only fire when the call resolves to that module — a
/// flat bare-name match would over-fire on user-defined `readFile` helpers.
#[derive(Debug, Clone, Copy)]
pub enum LabelGate {
/// Fires only when the call's leading identifier is locally bound by an
/// import / `require` whose `source_module` equals one of the listed
/// specifiers. The synthetic prefix `FileSystemPromisesNs.` produced by
/// receiver-type qualification also satisfies the gate (see Phase 05's
/// `TypeKind::FileSystemPromisesNs`).
ImportedFromModule(&'static [&'static str]),
/// Fires when *any* local-name in the file's import view resolves to one
/// of the listed specifiers, regardless of which identifier leads the
/// call. Used for Phase 07 ORM bare-name method sinks (Knex's `whereRaw`
/// / `orderByRaw` / `havingRaw`) where the receiver is a query-builder
/// instance whose binding name is arbitrary (`db`, `qb`, `users`, ...)
/// and the import witness is the package itself.
FileImportsModule(&'static [&'static str]),
/// Fires when the file's import view binds at least one of `local_names`
/// to one of `modules`. Tighter than [`Self::FileImportsModule`]: type-only
/// or peripheral named-import shapes (e.g. `import { Knex } from 'knex'`
/// for type-only use of `Knex.QueryBuilder`) do not satisfy the gate
/// unless the conventional value-binding name (`knex`, lowercase) is also
/// present. Used for Phase 07 deferred-item 10's tightening of the Knex
/// `whereRaw` / `orderByRaw` / `havingRaw` gate.
FileImportsModuleAsLocalName {
modules: &'static [&'static str],
local_names: &'static [&'static str],
},
}
/// A label rule that only fires when its [`LabelGate`] is satisfied at the
/// call site. The matcher / label / case-sensitivity semantics mirror
/// [`LabelRule`]; the gate is checked by [`classify_all_ctx`] using the
/// caller-supplied [`ClassificationContext`].
#[derive(Debug, Clone, Copy)]
pub struct GatedLabelRule {
pub matchers: &'static [&'static str],
pub label: DataLabel,
pub case_sensitive: bool,
pub gate: LabelGate,
}
/// Per-file context consulted by [`classify_all_ctx`] when evaluating
/// gated rules. Threaded from the CFG layer's gated post-pass; `None`
/// elsewhere keeps existing classification paths intact.
#[derive(Debug, Default, Clone, Copy)]
pub struct ClassificationContext<'a> {
/// Local-name → source-module view of the file's imports. The map is
/// computed at CFG build time (see `cfg::imports::extract_local_import_view`)
/// so the gate fires before the project-wide resolver runs.
pub local_imports: Option<&'a std::collections::HashMap<String, String>>,
}
/// Sentinel returned by [`classify_gated_sink`] for the dynamic/unknown-activation
/// branch: the gate fires conservatively and every positional argument must be
/// considered a potential tainted payload, not just the explicit `payload_args`.
@ -300,6 +355,17 @@ pub enum Kind {
/// any other sequential statement in the CFG but explicitly classified so
/// code that inspects `Kind` can recognise it.
Seq,
/// Async-await unary forward. An `await x` expression evaluates `x` and
/// resolves to the same value/taint, modelled as a 1:1 copy. Lowered to
/// SSA as `SsaOp::Assign(operand)` so taint, origins, and abstract value
/// pass through unchanged.
AwaitForward,
/// JSX attribute (`<Tag name={value} />`). Dispatched in the CFG so the
/// builder can recognise React-specific shapes such as
/// `dangerouslySetInnerHTML={{ __html: x }}` and synthesise a sink call.
/// The attribute name is read from the AST at CFG-build time, not carried
/// in this enum (which must remain `Copy` for `phf_map` storage).
JsxAttr,
Other,
}
@ -445,6 +511,19 @@ static GATED_REGISTRY: Lazy<HashMap<&'static str, &'static [SinkGate]>> = Lazy::
m
});
/// Per-language registry of [`GatedLabelRule`] entries. Phase 05 wires
/// JS/TS only (the `fs/promises` FILE_IO matcher set); other languages
/// fall back to an empty slice.
static GATED_LABEL_REGISTRY: Lazy<HashMap<&'static str, &'static [GatedLabelRule]>> =
Lazy::new(|| {
let mut m = HashMap::new();
m.insert("javascript", javascript::GATED_LABEL_RULES);
m.insert("js", javascript::GATED_LABEL_RULES);
m.insert("typescript", typescript::GATED_LABEL_RULES);
m.insert("ts", typescript::GATED_LABEL_RULES);
m
});
/// Feature flag for the Python prototype-pollution gates. Disabled by
/// default; set `NYX_PYTHON_PROTO_POLLUTION=1` (or `true`) to enable
/// `dict.update` / `__dict__.update` proto-pollution detection.
@ -599,6 +678,89 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
.unwrap_or(Kind::Other)
}
/// Promise-callback methods (`p.then(cb)`, `p.catch(cb)`, `p.finally(cb)`).
///
/// These are not sinks. The taint engine consumes this predicate to recognise
/// the receiver as a Promise whose resolved value will be fed to the callback's
/// first parameter. See phase 03 of `plan.md` for the recall-gap rationale.
///
/// JS/TS only. `callee_leaf` is expected to be the post-`callee_leaf_name`
/// short form (e.g. `"then"`, not `"p.then"`).
pub fn is_promise_callback_method(lang: &str, callee_leaf: &str) -> bool {
if !matches!(lang, "javascript" | "js" | "typescript" | "ts" | "tsx") {
return false;
}
matches!(callee_leaf, "then" | "catch" | "finally")
}
/// Static `Promise.*` combinator a call resolves to, or `None`.
///
/// Combinators wrap arguments into a single Promise:
/// * `Promise.resolve(x)` — identity for `x`.
/// * `Promise.all([a, b])` — array whose elements have per-arg taint.
/// * `Promise.allSettled([...])` — same shape as `all`, conservative union.
/// * `Promise.race([...])` — first-to-settle, conservative union.
///
/// `callee` is the full callee text (e.g. `"Promise.all"`) since the leaf
/// segment alone (`"all"`) is too generic to match safely.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PromiseCombinatorKind {
Resolve,
All,
AllSettled,
Race,
}
/// Lang-agnostic recognition of any promise combinator callee text. Used by
/// SSA lowering, which doesn't carry a `lang` argument.
pub fn is_any_promise_combinator(callee: &str) -> Option<PromiseCombinatorKind> {
match callee {
"Promise.resolve" => Some(PromiseCombinatorKind::Resolve),
"Promise.all" => Some(PromiseCombinatorKind::All),
"Promise.allSettled" => Some(PromiseCombinatorKind::AllSettled),
"Promise.race" => Some(PromiseCombinatorKind::Race),
"asyncio.gather" | "asyncio.wait" => Some(PromiseCombinatorKind::All),
"tokio::join" | "tokio::try_join" | "futures::join" | "futures::try_join" => {
Some(PromiseCombinatorKind::All)
}
_ => None,
}
}
pub fn is_promise_combinator(lang: &str, callee: &str) -> Option<PromiseCombinatorKind> {
match lang {
"javascript" | "js" | "typescript" | "ts" | "tsx" => match callee {
"Promise.resolve" => Some(PromiseCombinatorKind::Resolve),
"Promise.all" => Some(PromiseCombinatorKind::All),
"Promise.allSettled" => Some(PromiseCombinatorKind::AllSettled),
"Promise.race" => Some(PromiseCombinatorKind::Race),
_ => None,
},
// Python: `asyncio.gather(...)` / `asyncio.wait(...)` resolve to a
// tuple/list whose elements carry the union of argument taints.
// `asyncio.wait` returns `(done, pending)` sets but the same
// conservative scalar-union approximation applies, downstream
// destructuring already taints all bindings.
"python" | "py" => match callee {
"asyncio.gather" | "asyncio.wait" => Some(PromiseCombinatorKind::All),
_ => None,
},
// Rust: `tokio::join!` / `futures::join!` (and their `try_*`
// variants) evaluate every future concurrently and bind the
// tuple of resolved values. `cfg::push_node` rewrites the
// macro_invocation's `arg_uses` so each future's tainted inputs
// surface as a positional arg; this combinator entry then unions
// them onto the tuple value.
"rust" | "rs" => match callee {
"tokio::join" | "tokio::try_join" | "futures::join" | "futures::try_join" => {
Some(PromiseCombinatorKind::All)
}
_ => None,
},
_ => None,
}
}
/// The kind of taint source, used to refine finding severity.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
@ -953,6 +1115,17 @@ fn ends_with_cs(haystack: &[u8], needle: &[u8], case_sensitive: bool) -> bool {
}
}
/// Allocation-free ASCII-case-insensitive prefix check on `&str` inputs.
/// Used by the gated-sink dispatch hot path where the previous
/// `value.to_ascii_lowercase().starts_with(&p.to_ascii_lowercase())` pair
/// allocated two `String` values per check.
#[inline]
fn starts_with_ignore_ascii_case(haystack: &str, needle: &str) -> bool {
let h = haystack.as_bytes();
let n = needle.as_bytes();
h.len() >= n.len() && h[..n.len()].eq_ignore_ascii_case(n)
}
/// Prefix check with configurable case sensitivity. The `=` exact-match
/// sigil is meaningless for prefix matchers (which by definition match many
/// suffixes); it is stripped if present so a malformed matcher like
@ -1028,6 +1201,9 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
// For chained calls like `r.URL.Query().Get`, also strip internal
// `().` segments to produce a normalized form like `r.URL.Query.Get`.
// `normalize_chained_call` returns `Cow::Borrowed` when no rewrite is
// needed, so the alloc is paid only on inputs that actually require
// it.
let full_normalized = normalize_chained_call(text);
let full_norm_bytes = full_normalized.as_bytes();
@ -1116,6 +1292,9 @@ pub fn classify_all(
return SmallVec::new();
}
// `normalize_chained_call` returns `Cow::Borrowed` when no rewrite
// is needed, so the alloc is paid only on inputs that actually
// require it. The hot classify path runs on every CFG node.
let full_normalized = normalize_chained_call(text);
let full_norm_bytes = full_normalized.as_bytes();
@ -1198,6 +1377,228 @@ pub fn classify_all(
out
}
/// Classify a call with an optional [`ClassificationContext`] enabling
/// gated rule evaluation.
///
/// This is a strict superset of [`classify_all`]: the same flat-rule
/// matching runs first, then any per-language [`GatedLabelRule`] is
/// evaluated against `ctx`. A `None` context (or a context with no
/// `local_imports`) leaves only the synthetic receiver-type prefix
/// (e.g. `FileSystemPromisesNs.`) able to satisfy the gate.
pub fn classify_all_ctx(
lang: &str,
text: &str,
extra: Option<&[RuntimeLabelRule]>,
ctx: Option<&ClassificationContext<'_>>,
) -> SmallVec<[DataLabel; 2]> {
let mut out = classify_all(lang, text, extra);
classify_gated_into(lang, text, ctx, &mut out);
out
}
/// Run only the gated-rule pass — skip the flat [`classify_all`] scan.
///
/// Use when the caller has already classified `text` with the flat rules
/// during initial CFG construction and only needs the gate-conditioned
/// labels (which require a per-file [`ClassificationContext`] not
/// available at the original classification site).
pub fn classify_gated_only(
lang: &str,
text: &str,
ctx: Option<&ClassificationContext<'_>>,
) -> SmallVec<[DataLabel; 2]> {
let mut out = SmallVec::new();
classify_gated_into(lang, text, ctx, &mut out);
out
}
fn classify_gated_into(
lang: &str,
text: &str,
ctx: Option<&ClassificationContext<'_>>,
out: &mut SmallVec<[DataLabel; 2]>,
) {
let gated = match GATED_LABEL_REGISTRY.get(lang).or_else(|| {
let key = lang.to_ascii_lowercase();
GATED_LABEL_REGISTRY.get(key.as_str())
}) {
Some(g) => *g,
None => return,
};
if gated.is_empty() {
return;
}
let head = text.split(['(', '<']).next().unwrap_or("");
let trimmed = head.trim().as_bytes();
if is_excluded(lang, trimmed) {
return;
}
let full_normalized = normalize_chained_call(text);
let full_norm_bytes = full_normalized.as_bytes();
#[inline]
fn push_dedup(out: &mut SmallVec<[DataLabel; 2]>, label: DataLabel) {
if !out.contains(&label) {
out.push(label);
}
}
// Pass 1: exact / suffix.
for rule in gated {
for raw in rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_') {
continue;
}
let matches = match_suffix_cs(trimmed, m, rule.case_sensitive)
|| match_suffix_cs(full_norm_bytes, m, rule.case_sensitive);
if matches && gate_satisfied(&rule.gate, head, ctx) {
push_dedup(out, rule.label);
}
}
}
// Pass 2: prefix.
for rule in gated {
for raw in rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_')
&& (starts_with_cs(trimmed, m, rule.case_sensitive)
|| starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
&& gate_satisfied(&rule.gate, head, ctx)
{
push_dedup(out, rule.label);
}
}
}
}
/// Restricted payload-arg positions for known type-qualified sink callees.
///
/// Phase 07's ORM raw-SQL receiver methods (`TypeOrmRepo.query`,
/// `TypeOrmManager.query`, `MikroOrmEm.execute`, etc.) take the SQL
/// template at arg 0 and bind / parameter arrays at arg 1+. The flat
/// label rule alone cannot encode this and would FP on
/// `repo.query("SELECT $1", [tainted])`. When the type-qualified
/// resolver synthesises one of these callees, this lookup returns the
/// payload positions to which sink-taint checks must be restricted.
///
/// Sequelize.literal(sql) is single-arg, so `&[0]` is also correct
/// (no precision loss vs the unconditional flat rule).
pub fn type_qualified_sink_payload_args(qualified_callee: &str) -> Option<&'static [usize]> {
match qualified_callee {
"Sequelize.literal"
| "TypeOrmRepo.query"
| "TypeOrmRepo.createQueryBuilder"
| "TypeOrmManager.query"
| "TypeOrmManager.createQueryBuilder"
| "MikroOrmEm.execute" => Some(&[0]),
_ => None,
}
}
/// Receiver-type prefixes that count as a witness for a given module
/// specifier on a [`LabelGate::ImportedFromModule`] gate.
///
/// When SSA receiver-type qualification synthesises a callee like
/// `FileSystemPromisesNs.readFile(...)`, the leading identifier becomes
/// the type prefix rather than an imported binding. Each gate module
/// can declare which type prefixes legitimise the gate firing without
/// a textual import witness. Returning an empty slice means the gate
/// must fall back to the `local_imports` map alone.
fn receiver_type_prefixes_for_module(module: &str) -> &'static [&'static str] {
if module.eq_ignore_ascii_case("node:fs/promises") || module.eq_ignore_ascii_case("fs/promises")
{
&["FileSystemPromisesNs"]
} else {
&[]
}
}
/// Evaluate a [`LabelGate`] against the call's leading identifier and the
/// caller-supplied context. Receiver-type qualification can satisfy
/// [`LabelGate::ImportedFromModule`] via
/// [`receiver_type_prefixes_for_module`].
fn gate_satisfied(
gate: &LabelGate,
callee_head: &str,
ctx: Option<&ClassificationContext<'_>>,
) -> bool {
match gate {
LabelGate::ImportedFromModule(modules) => {
let leading = leading_identifier(callee_head);
for m in modules.iter() {
for prefix in receiver_type_prefixes_for_module(m) {
if leading == *prefix {
return true;
}
}
}
let Some(ctx) = ctx else {
return false;
};
let Some(map) = ctx.local_imports else {
return false;
};
let Some(source_module) = map.get(leading) else {
return false;
};
modules
.iter()
.any(|m| source_module.eq_ignore_ascii_case(m))
}
LabelGate::FileImportsModule(modules) => {
let Some(ctx) = ctx else {
return false;
};
let Some(map) = ctx.local_imports else {
return false;
};
map.values().any(|source_module| {
modules
.iter()
.any(|m| source_module.eq_ignore_ascii_case(m))
})
}
LabelGate::FileImportsModuleAsLocalName {
modules,
local_names,
} => {
let Some(ctx) = ctx else {
return false;
};
let Some(map) = ctx.local_imports else {
return false;
};
local_names.iter().any(|name| {
map.get(*name).is_some_and(|source_module| {
modules
.iter()
.any(|m| source_module.eq_ignore_ascii_case(m))
})
})
}
}
}
/// Leading identifier of a call expression's text — the segment up to the
/// first `.`, `:`, `(`, or `<`. Used to drive ImportTable lookups.
fn leading_identifier(callee_head: &str) -> &str {
let bytes = callee_head.as_bytes();
let mut end = 0;
for (i, b) in bytes.iter().enumerate() {
match b {
b'.' | b':' | b'(' | b'<' | b' ' | b'[' => {
end = i;
return &callee_head[..end];
}
_ => {}
}
end = i + 1;
}
&callee_head[..end]
}
/// Result of a gated-sink classification.
///
/// `label` is the sink capability the callee contributes at this site.
@ -1289,8 +1690,7 @@ pub fn classify_gated_sink(
}
match const_keyword_arg(name) {
Some(v) => {
let lower = v.to_ascii_lowercase();
if values.iter().any(|dv| lower == dv.to_ascii_lowercase()) {
if values.iter().any(|dv| v.eq_ignore_ascii_case(dv)) {
any_dangerous = true;
break;
}
@ -1332,15 +1732,14 @@ pub fn classify_gated_sink(
match activation_value {
Some(value) => {
let lower = value.to_ascii_lowercase();
let is_dangerous = gate
.dangerous_values
.iter()
.any(|v| lower == v.to_ascii_lowercase())
.any(|v| value.eq_ignore_ascii_case(v))
|| gate
.dangerous_prefixes
.iter()
.any(|p| lower.starts_with(&p.to_ascii_lowercase()));
.any(|p| starts_with_ignore_ascii_case(&value, p));
if is_dangerous {
out.push(GateMatch {
label: gate.label,
@ -1379,7 +1778,7 @@ pub fn classify_gated_sink(
/// Public wrapper for `normalize_chained_call` so callers outside the module
/// can share the same normalization used by the label classifier.
pub fn normalize_chained_call_for_classify(text: &str) -> String {
normalize_chained_call(text)
normalize_chained_call(text).into_owned()
}
/// Return the bare method-name segment of a callee text. Returns the
@ -1394,38 +1793,79 @@ pub fn bare_method_name(callee: &str) -> &str {
/// Normalize a chained method call: strip `()` between `.` segments.
/// e.g. `r.URL.Query().Get` → `r.URL.Query.Get`
/// e.g. `r.URL.Query().Get("host")` → `r.URL.Query.Get`
fn normalize_chained_call(text: &str) -> String {
let mut result = String::with_capacity(text.len());
///
/// Returns a borrow when no transformation is required (no `()` between
/// `.` segments and no leading `<`), avoiding the heap allocation. Only
/// pays for a `String` when the input actually needs rewriting; the hot
/// classify path runs on every CFG node so the borrow case dominates.
fn normalize_chained_call(text: &str) -> std::borrow::Cow<'_, str> {
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'(' => {
// Skip from `(` to matching `)`, but only if followed by `.`
// This handles `Query().Get` → `Query.Get`
let mut depth = 1u32;
let mut j = i + 1;
while j < bytes.len() && depth > 0 {
if bytes[j] == b'(' {
depth += 1;
} else if bytes[j] == b')' {
depth -= 1;
match bytes[j] {
b'(' => depth += 1,
b')' => depth -= 1,
_ => {}
}
j += 1;
}
if j >= bytes.len() || bytes[j] == b'.' {
return std::borrow::Cow::Owned(normalize_chained_call_owned(text, i));
}
i += 1;
}
b'<' => return std::borrow::Cow::Borrowed(&text[..i]),
_ => i += 1,
}
}
std::borrow::Cow::Borrowed(text)
}
/// Slow path for `normalize_chained_call`: runs only when the input
/// actually contains a `(...)` group followed by `.` (the case that
/// requires removing characters). `prefix_end` is the byte offset of the
/// first transformation point so the prefix can be copied wholesale.
///
/// `(`, `)`, `<`, and `.` are all ASCII, so byte-level scanning is safe
/// for control characters. Non-ASCII identifier bytes are copied as
/// contiguous slices to keep multi-byte UTF-8 sequences intact.
fn normalize_chained_call_owned(text: &str, prefix_end: usize) -> String {
let bytes = text.as_bytes();
let mut result = String::with_capacity(text.len());
result.push_str(&text[..prefix_end]);
let mut i = prefix_end;
while i < bytes.len() {
match bytes[i] {
b'(' => {
let mut depth = 1u32;
let mut j = i + 1;
while j < bytes.len() && depth > 0 {
match bytes[j] {
b'(' => depth += 1,
b')' => depth -= 1,
_ => {}
}
j += 1;
}
// If we're at end or next char is `.`, skip the parens
if j >= bytes.len() || bytes[j] == b'.' {
i = j;
} else {
// Keep the paren content (unusual case)
result.push('(');
i += 1;
}
}
b'<' => break, // Stop at generic args
b'<' => break,
_ => {
result.push(bytes[i] as char);
i += 1;
let start = i;
while i < bytes.len() && !matches!(bytes[i], b'(' | b'<') {
i += 1;
}
result.push_str(&text[start..i]);
}
}
}
@ -1979,6 +2419,58 @@ mod tests {
assert_eq!(lookup_receiver_validator("python", "joinpath"), None);
}
#[test]
fn normalize_chained_call_borrows_when_no_change() {
// No parens, no `<` → no rewrite, borrow returned.
let r = normalize_chained_call("plain");
assert!(matches!(r, std::borrow::Cow::Borrowed(_)));
assert_eq!(r.as_ref(), "plain");
// `(` mid-token but not at end of any `.` chain → still owned
// because the function's policy collapses any `(` followed by
// EOL or `.`. Use a callee with a non-collapsing shape: bare
// dotted text.
let r = normalize_chained_call("a.b.c");
assert!(matches!(r, std::borrow::Cow::Borrowed(_)));
assert_eq!(r.as_ref(), "a.b.c");
// Truncate at `<` (generics) is a borrow with shorter slice.
let r = normalize_chained_call("Vec<T>");
assert!(matches!(r, std::borrow::Cow::Borrowed(_)));
assert_eq!(r.as_ref(), "Vec");
}
#[test]
fn normalize_chained_call_collapses_paren_dot_chain() {
let r = normalize_chained_call("r.URL.Query().Get");
assert_eq!(r.as_ref(), "r.URL.Query.Get");
let r = normalize_chained_call("a.b().c().d");
assert_eq!(r.as_ref(), "a.b.c.d");
// Last paren-call before EOL is also collapsed (j >= bytes.len()).
let r = normalize_chained_call("a.b()");
assert_eq!(r.as_ref(), "a.b");
}
#[test]
fn normalize_chained_call_preserves_utf8_after_collapse() {
// Greek lowercase letters are 2-byte UTF-8 sequences. The slow
// path must not split them when copying tail bytes after a
// collapsed `(...)` group.
let r = normalize_chained_call("obj.func().αβγ");
assert_eq!(r.as_ref(), "obj.func.αβγ");
// CJK ideographs are 3-byte sequences. Same invariant.
let r = normalize_chained_call("a.b().名前");
assert_eq!(r.as_ref(), "a.b.名前");
// Emoji (4-byte sequence) inside an identifier. Engines never
// see this in practice but the byte loop must not corrupt it.
let r = normalize_chained_call("x.y().🦀_id");
assert_eq!(r.as_ref(), "x.y.🦀_id");
}
#[test]
fn bare_method_name_strips_chain() {
// No-dot input → returned as-is.
@ -2739,6 +3231,26 @@ mod tests {
assert_eq!(result[0], DataLabel::Sink(Cap::HTML_ESCAPE));
}
#[test]
fn starts_with_ignore_ascii_case_matches_canonical_shapes() {
assert!(starts_with_ignore_ascii_case(
"FILE://etc/passwd",
"file://"
));
assert!(starts_with_ignore_ascii_case(
"file://etc/passwd",
"FILE://"
));
assert!(starts_with_ignore_ascii_case("http://", "http://"));
assert!(starts_with_ignore_ascii_case("http://", ""));
assert!(!starts_with_ignore_ascii_case("http", "https"));
assert!(!starts_with_ignore_ascii_case("", "x"));
// Multibyte UTF-8: the helper is intentionally ASCII-only; non-ASCII
// bytes compare byte-for-byte (no Unicode case folding).
assert!(starts_with_ignore_ascii_case("café", "café"));
assert!(!starts_with_ignore_ascii_case("café", "CAFÉ"));
}
#[test]
fn classify_all_dual_label_php() {
let result = classify_all("php", "file_get_contents", None);

View file

@ -48,9 +48,29 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: false,
},
// PDO parameterized queries
// PDO parameterized queries. `prepareStatement` covers Drupal's
// Database\\Connection convention (and any PSR-style wrapper that
// uses the longer name); semantically identical to `prepare` —
// both return a statement object, the bind step ships values as
// out-of-band parameters, no concatenation occurs.
LabelRule {
matchers: &["prepare", "bindParam", "bindValue"],
matchers: &["prepare", "prepareStatement", "bindParam", "bindValue"],
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — `mysqli_real_escape_string($conn, $s)` and
// `pg_escape_string($s)` apply driver-side escaping for legacy
// string-concat shapes. Treat as SQL_QUERY sanitizers so the
// value-replacement clears the cap on the call return.
// `addslashes` is intentionally excluded — it does NOT cover
// multibyte / charset-aware injection vectors.
LabelRule {
matchers: &[
"mysqli_real_escape_string",
"pg_escape_string",
"pg_escape_literal",
"pg_escape_identifier",
],
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
case_sensitive: false,
},
@ -121,10 +141,39 @@ pub static RULES: &[LabelRule] = &[
"pdo.query",
"mysqli.real_query",
"mysqli_real_query",
// Phase 15 — `PDOStatement::execute` (with no args) executes a
// prepared statement; when prepared from a tainted string the
// bind step does NOT prevent injection (the SQL was already
// built unsafely). The receiver-text suffix is `stmt.execute`.
// Distinct from the bare `execute` matcher (already on the
// generic SQL_QUERY rule via `query` matcher) because the
// OOP `$stmt->execute()` shape skips the SQL-string arg.
"stmt.execute",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — Doctrine ORM raw-SQL passthrough APIs. Doctrine's
// `EntityManager::createQuery($dql)` accepts a DQL string;
// `createNativeQuery($sql, $rsm)` accepts a native SQL string;
// `getConnection()->executeQuery($sql)` /
// `getConnection()->executeStatement($sql)` are the low-level
// Connection passthroughs that route to the underlying driver
// verbatim. Suffix-matching covers both bound-receiver shapes
// (`$em->createQuery($dql)`) and the documentation-style
// class-qualified call form (`EntityManager.createQuery`).
LabelRule {
matchers: &[
"EntityManager.createQuery",
"EntityManager.createNativeQuery",
"createQuery",
"createNativeQuery",
"executeQuery",
"executeStatement",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// Laravel Eloquent: raw SQL methods.
// DB::raw() → scoped_call_expression, callee text "DB.raw".
// whereRaw/selectRaw/orderByRaw/havingRaw → member_call_expression on query builder.
@ -133,6 +182,22 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — Laravel raw-SQL execution facade methods. `DB::select`,
// `DB::statement`, `DB::insert`, `DB::update`, `DB::delete`,
// `DB::unprepared` all accept a literal SQL string; the
// `unprepared` form is the explicit no-bind escape hatch.
LabelRule {
matchers: &[
"DB.select",
"DB.statement",
"DB.insert",
"DB.update",
"DB.delete",
"DB.unprepared",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// NOTE: `file_get_contents` and `fopen` can fetch URLs (SSRF vector) and
// local files (LFI vector — `file://` scheme). As a Sink(SSRF) they only
// fire when the argument is tainted. `fopen` is the canonical low-level
@ -145,6 +210,32 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// Phase 14 — `\GuzzleHttp\Client::request($method, $url, ...)` and the
// verb-shorthand methods `$client->get($url)` / `->head($url)` /
// `->options($url)`. The read-shaped verbs carry the URL at arg 0
// and have no body argument, so a flat SSRF sink is FP-safe. The
// body-bearing verbs (`post` / `put` / `patch`) live on the
// DATA_EXFIL list above; their URL-position SSRF is covered via
// `Client.request` (arg 1 is URL) below as a flat sink — Guzzle
// does not expose argument-role-aware metadata that would let the
// gate distinguish URL from body, but the source-sensitivity gate
// already silences plain `$_GET` / `$_POST` flows so the
// remaining FP surface is small.
LabelRule {
matchers: &[
"Client.get",
"Client.head",
"Client.options",
"Client.request",
"HttpClient.get",
"HttpClient.head",
"HttpClient.request",
"Http.get",
"Http.head",
],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: true,
},
// ── Cross-boundary data exfiltration ──────────────────────────────────
//
// Body-bearing outbound HTTP verb methods on the major PHP HTTP clients.
@ -343,6 +434,26 @@ pub static GATED_SINKS: &[SinkGate] = &[
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// Phase 14 — `curl_setopt($ch, CURLOPT_URL, $url)` is the canonical
// pre-`curl_exec` URL bind. Tainted `$url` reaching this option is
// SSRF; the `curl_exec($ch)` flat sink above also fires on the
// tainted handle but only when the handle's taint propagates
// through opaque resource state, which the engine cannot follow
// across `curl_setopt` calls. Activating the SSRF cap directly at
// the option-bind site catches the flow at the construction step
// independent of the handle-flow analysis.
SinkGate {
callee_matcher: "curl_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_URL"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// PHP `header($line)` HEADER_INJECTION sink. Modelled as a gate so
// it can coexist with the OPEN_REDIRECT gate below: the multi-gate
// SSA dispatch needs each capability declared on its own gate filter

View file

@ -97,6 +97,39 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
},
// Phase 13 — pathlib / aiofiles / shutil path-traversal sinks.
// Chained constructor + method shapes (`Path(p).read_text()`) reduce
// via paren-strip to the matcher text below; the path argument is
// the sink payload. Receiver-bound shapes (`p = Path(...);
// p.read_text()`) are not covered here without a `pathlib.Path`
// TypeKind override and are left for a future phase.
LabelRule {
matchers: &[
"Path.open",
"Path.read_text",
"Path.write_text",
"Path.read_bytes",
"Path.write_bytes",
// Receiver-bound shapes (`p = Path(name); p.read_text()`)
// resolve via the `TypeKind::FileHandle` constructor mapping
// for `Path(...)` in `ssa/type_facts.rs`, which lets the
// type-qualified resolver rewrite `p.read_text` →
// `FileHandle.read_text` against the matchers below.
"FileHandle.open",
"FileHandle.read_text",
"FileHandle.write_text",
"FileHandle.read_bytes",
"FileHandle.write_bytes",
"aiofiles.open",
"shutil.copy",
"shutil.copy2",
"shutil.copyfile",
"shutil.move",
"shutil.rmtree",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: true,
},
LabelRule {
matchers: &[
"argparse.parse_args",
@ -157,6 +190,22 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: false,
},
// Phase 13 — `pathlib.Path.resolve(strict=True)` raises if the
// resolved path doesn't exist; the canonical / strict form is the
// documented path-traversal sanitiser. Strict-mode argument
// inspection is not modeled (the rule fires for any `.resolve()`
// chained on a `Path(...)`); the false-clear risk on
// `Path(...).resolve()` (non-strict) is an accepted trade-off
// because the non-strict form still resolves symlinks and
// collapses `..` segments, which dominates the path-traversal
// attack surface. Case-sensitive: `Path.resolve` is the literal
// pathlib method name; bare `resolve` is too broad (Django URL
// resolvers, Promise.resolve in JS-style libs).
LabelRule {
matchers: &["Path.resolve", "FileHandle.resolve"],
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: true,
},
// ─────────── Sinks ─────────────
// Flask sinks
LabelRule {
@ -218,6 +267,26 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — receiver-typed ORM sinks. `SqlAlchemySession.execute`
// / `SqlAlchemySession.scalar` / `SqlAlchemySession.scalars` etc.
// are produced when the receiver carries `TypeKind::SqlAlchemySession`
// (set by `constructor_type` for `sessionmaker()` / `Session(engine)` /
// `engine.connect()`). `DjangoQuerySet.raw` / `DjangoQuerySet.extra`
// fire on `Model.objects.raw(sql)` / `Model.objects.extra(...)` shapes
// when the receiver was tagged via the `Model.objects` access path.
// `ActiveRecordRelation` is registered in `labels/ruby.rs`.
LabelRule {
matchers: &[
"SqlAlchemySession.execute",
"SqlAlchemySession.scalar",
"SqlAlchemySession.scalars",
"SqlAlchemySession.exec_driver_sql",
"DjangoQuerySet.raw",
"DjangoQuerySet.extra",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// SQL injection: sqlite3 / SQLAlchemy / generic DB connection execute.
LabelRule {
matchers: &[
@ -1245,6 +1314,214 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["data"],
},
},
// ── SQL execute payload-arg gating (Phase 15 deferred fix) ────────────
//
// The flat label rules above already classify these callees as
// `Sink(SQL_QUERY)` on every argument. The DB-API convention is that
// arg 0 is the SQL string and arg 1+ are parameterised bind values
// (`cursor.execute("SELECT * FROM t WHERE id = %s", (user_id,))`). Tainted
// bind values are SAFE because the driver escapes them; tainted SQL is
// the SQLi vector. These Destination-activation gates carry the same
// `Sink(SQL_QUERY)` label so they dedupe against the flat rule, but
// their `payload_args: &[0]` propagates into `sink_payload_args`,
// narrowing the SSA sink scan to arg 0 only.
SinkGate {
callee_matcher: "cursor.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "cursor.executemany",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "conn.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "connection.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "session.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "engine.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "db.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "objects.raw",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// Receiver-typed forms; same payload shape (sql at arg 0).
SinkGate {
callee_matcher: "SqlAlchemySession.execute",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlAlchemySession.scalar",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlAlchemySession.scalars",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "SqlAlchemySession.exec_driver_sql",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DjangoQuerySet.raw",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "DjangoQuerySet.extra",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
/// Prototype-pollution-style gates for Python. Opt-in via the
@ -1329,6 +1606,13 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
"call" => Kind::CallFn,
"assignment" => Kind::Assignment,
"expression_statement" => Kind::CallWrapper,
// tree-sitter-python emits `await x` as a named `await` node (no
// `_expression` suffix, unlike JS/TS). Map it to `AwaitForward` so
// the SSA lowering forwards the awaited value 1:1, mirroring the
// JS/TS contract. Async-for in Python is plain `for_statement` with
// an unnamed `async` token child; the iterator-text rewrite in
// `cfg::push_node` covers both sync and async forms uniformly.
"await" => Kind::AwaitForward,
// trivia
"comment" => Kind::Trivia,

View file

@ -113,7 +113,25 @@ pub static RULES: &[LabelRule] = &[
// in the resource-lifecycle acquire/release pair (cfg_analysis::RUBY_RESOURCES),
// so this entry is additive, it does not disturb resource-leak detection.
LabelRule {
matchers: &["File.open", "File.new", "File.read", "IO.read"],
matchers: &[
"File.open",
"File.new",
"File.read",
"IO.read",
// Phase 13 — write-side and directory-listing path-traversal
// sinks. `Pathname.new(p)` is conservative: a Pathname
// construction with attacker-controlled `p` is the documented
// entry point for downstream Path / File operations and
// surfaces the path-traversal vector at the construction
// site. `Dir.entries` / `Dir.glob` enumerate filesystem
// contents, so a tainted path argument is a directory
// disclosure / glob-injection vector.
"File.write",
"IO.write",
"Pathname.new",
"Dir.entries",
"Dir.glob",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
},
@ -136,10 +154,28 @@ pub static RULES: &[LabelRule] = &[
matchers: &[
"Net::HTTP.get",
"Net::HTTP.post",
// Phase 14 — `Net::HTTP.start(host, port, ...)` is a session
// factory whose host argument is the SSRF vector when
// tainted. `Net::HTTP.get_response(uri)` is a stdlib
// convenience wrapper around `start` + `request_get`.
"Net::HTTP.start",
"Net::HTTP.get_response",
"URI.open",
"OpenURI.open_uri",
"HTTParty.get",
"HTTParty.post",
// Phase 14 — Faraday::Connection verb methods on a typed
// receiver. `Faraday.new(url: base)` produces an
// `HttpClient`-typed value (see `constructor_type`); the
// `client.get(path)` chain resolves through the
// type-qualified `HttpClient.get` rule below. Bare
// `Faraday.get` / `.post` / etc. are the module-level
// shorthand the existing `Faraday.post` matcher already
// covers for DATA_EXFIL; SSRF needs the read-shaped
// verbs registered explicitly.
"Faraday.get",
"Faraday.head",
"Faraday.delete",
],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
@ -214,11 +250,41 @@ pub static RULES: &[LabelRule] = &[
case_sensitive: false,
},
// SQL injection: ActiveRecord unsafe raw-query execution APIs.
// Phase 15 expands coverage with `exec_query` (the raw-SQL execution
// verb on the ActiveRecord connection adapter) and `select_value` /
// `select_values` / `select_rows` (driver-level select helpers that
// accept a literal SQL string).
LabelRule {
matchers: &["find_by_sql", "connection.execute", "select_all"],
matchers: &[
"find_by_sql",
"connection.execute",
"select_all",
"exec_query",
"select_value",
"select_values",
"select_rows",
"select_one",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// Phase 15 — receiver-typed ActiveRecord raw-SQL sinks. The
// `ActiveRecordRelation` TypeKind is set by `constructor_type` on
// class-method scope chains (`User.where(...)` etc.); type-qualified
// resolution rewrites `relation.find_by_sql(sql)` →
// `ActiveRecordRelation.find_by_sql` so the chained shape is caught
// even when the receiver text has lost its model-class prefix.
LabelRule {
matchers: &[
"ActiveRecordRelation.find_by_sql",
"ActiveRecordRelation.exec_query",
"ActiveRecordRelation.select_all",
"ActiveRecordRelation.select_one",
"ActiveRecordRelation.select_value",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// SQL injection: ActiveRecord query methods that accept raw SQL strings.
// `where` and `order` are the most common Rails SQLi vectors when called
// with string interpolation (e.g., User.where("name = '#{params[:name]}'")).
@ -383,6 +449,32 @@ pub static RULES: &[LabelRule] = &[
/// `Nokogiri::XML::ParseOptions::DEFAULT_XML`); any non-dangerous
/// scope-qualified constant disables the gate.
pub static GATED_SINKS: &[SinkGate] = &[
// `Faraday.new(url: tainted)` — base-URL kwarg controls the destination
// origin for every subsequent verb call on the returned client
// (`client.get(path)` / `.post` / etc.). When the kwarg value is
// attacker-controlled, the constructor itself is the SSRF entry point;
// the existing type-qualified rules on `HttpClient.get` / `.post` only
// cover taint flowing into the per-call `path` arg.
//
// Activation is `Destination` on positional position 0 with a single
// `url` field; tree-sitter-ruby emits the kwarg as a `pair` node sibling
// of the positional args, and `extract_destination_kwarg_pairs` walks
// those pairs (Ruby support added alongside this gate in
// `cfg::literals::extract_destination_kwarg_pairs`).
SinkGate {
callee_matcher: "Faraday.new",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &["url"],
},
},
// `Nokogiri::XML(xml, url=nil, encoding=nil, options=NIL)` — top-level
// module method. arg 3 carries the parse-option flag literal.
//

View file

@ -60,6 +60,26 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
case_sensitive: false,
},
// Phase 13 — `Path::canonicalize` (and `tokio::fs::canonicalize`) is
// the canonical Rust path-traversal sanitiser when paired with a
// `starts_with(&base)` containment check. Same convention as the
// Java / Python `.normalize()` / `.resolve()` sanitiser rules: the
// call clears the FILE_IO cap on its return so the cap-based gate
// suppresses the downstream `tokio::fs::*` / `std::fs::*` sink.
// Bare `canonicalize` would over-fire on unrelated APIs (e.g.
// `Url::canonicalize`); the qualified forms below are unique to
// path-handling.
LabelRule {
matchers: &[
"Path.canonicalize",
"PathBuf.canonicalize",
"fs::canonicalize",
"std::fs::canonicalize",
"tokio::fs::canonicalize",
],
label: DataLabel::Sanitizer(Cap::FILE_IO),
case_sensitive: false,
},
// ─────────── Sinks ─────────────
LabelRule {
matchers: &[
@ -90,6 +110,21 @@ pub static RULES: &[LabelRule] = &[
"fs::copy",
"File::open",
"File::create",
// Phase 13 — `tokio::fs` async path-traversal sinks. The
// suffix matchers also catch the bare `tokio::fs::File::open`
// chain after paren-strip. `tokio::fs::*` is the
// async-runtime-bound mirror of `std::fs::*`; same path
// arg-0 semantics.
"tokio::fs::read",
"tokio::fs::read_to_string",
"tokio::fs::write",
"tokio::fs::remove_file",
"tokio::fs::remove_dir",
"tokio::fs::remove_dir_all",
"tokio::fs::rename",
"tokio::fs::copy",
"tokio::fs::File::open",
"tokio::fs::File::create",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
@ -105,6 +140,12 @@ pub static RULES: &[LabelRule] = &[
"reqwest::Client.head",
"reqwest::Client.patch",
"reqwest::Client.request",
// Phase 14 — hyper Client `request(req)` dispatch entry. The
// `req` builder chain (covered by the type-qualified
// RequestBuilder.* / Request::builder.* rules below) smears
// URL taint into the request value via default propagation.
"hyper::Client.request",
"hyper::client::Client.request",
// Chained constructor + verb form: `reqwest::Client::new()
// .post(url)` reduces (via root-receiver collapse) to chain
// text `Client::new.post`, so existing `Client.post` matchers
@ -370,6 +411,10 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
"let_declaration" => Kind::CallWrapper,
"expression_statement" => Kind::CallWrapper,
"assignment_expression" => Kind::Assignment,
// `x.await` postfix. Documented per-language so the contract does
// not depend on the raw-string fallback in `cfg::push_node`; SSA
// lowering emits `Assign(operand)` for these nodes.
"await_expression" => Kind::AwaitForward,
// struct expressions, recurse so env::var() calls inside field
// initialisers produce Source-labelled CFG nodes (needed for summaries).

View file

@ -1,5 +1,6 @@
use crate::labels::{
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
Cap, DataLabel, GateActivation, GatedLabelRule, Kind, LabelGate, LabelRule, ParamConfig,
RuntimeLabelRule, SinkGate,
};
use crate::utils::project::{DetectedFramework, FrameworkContext};
use phf::{Map, phf_map};
@ -29,6 +30,24 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Source(Cap::all()),
case_sensitive: false,
},
// Phase 10 — Web `Request` receiver-method reads. Triggered when
// the SSA receiver carries `TypeKind::Request` (Next.js App
// Router handler's first formal) and the type-qualified resolver
// rewrites `req.json()` → `Request.json` etc. The reads return
// user-controlled bytes / strings; the matchers also cover
// `Request.url` and `Request.headers.get(...)` which both expose
// header / URL state to the handler.
LabelRule {
matchers: &[
"Request.json",
"Request.formData",
"Request.text",
"Request.url",
"Request.headers.get",
],
label: DataLabel::Source(Cap::all()),
case_sensitive: true,
},
// ───────── Sanitizers ──────────
LabelRule {
matchers: &["JSON.parse"],
@ -215,6 +234,40 @@ pub static RULES: &[LabelRule] = &[
"fs.unlinkSync",
"fs.readdir",
"fs.readdirSync",
// Phase 05 — `node:fs/promises` member-access forms covered
// here. Bare-name forms (`readFile`, `open`, ...) and
// `fsp.readFile` namespace-import forms ride the gated
// matcher in `GATED_LABEL_RULES`. Receiver-type fallback
// synthesises `FileSystemPromisesNs.<method>` (handled
// below).
"fs.promises.readFile",
"fs.promises.writeFile",
"fs.promises.unlink",
"fs.promises.open",
"fs.promises.stat",
"fs.promises.readdir",
"fs.promises.mkdir",
"fs.promises.rmdir",
"fs.promises.rm",
"fs.promises.appendFile",
"fs.promises.copyFile",
"fs.promises.rename",
"fs.promises.truncate",
"fs.promises.chmod",
"FileSystemPromisesNs.readFile",
"FileSystemPromisesNs.writeFile",
"FileSystemPromisesNs.unlink",
"FileSystemPromisesNs.open",
"FileSystemPromisesNs.stat",
"FileSystemPromisesNs.readdir",
"FileSystemPromisesNs.mkdir",
"FileSystemPromisesNs.rmdir",
"FileSystemPromisesNs.rm",
"FileSystemPromisesNs.appendFile",
"FileSystemPromisesNs.copyFile",
"FileSystemPromisesNs.rename",
"FileSystemPromisesNs.truncate",
"FileSystemPromisesNs.chmod",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
@ -255,6 +308,25 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// ── Phase 07 — ORM query-builder receiver-typed sinks ──
// See `labels/javascript.rs` for the design rationale; mirrored here so
// TypeScript fixtures pick up the same coverage. Receiver TypeKinds
// are populated by [`crate::ssa::type_facts::constructor_type`] for
// `new Sequelize(...)` / `getRepository(...)` / `getManager()` /
// `createEntityManager()`; the type-qualified resolver rewrites
// `<recv>.<method>` → `<TypePrefix>.<method>` against these matchers.
LabelRule {
matchers: &[
"Sequelize.literal",
"TypeOrmRepo.query",
"TypeOrmRepo.createQueryBuilder",
"TypeOrmManager.query",
"TypeOrmManager.createQueryBuilder",
"MikroOrmEm.execute",
],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// ─── LDAP injection sinks ───
//
// Mirror of `labels/javascript.rs`; ldapjs / ts-ldapjs has the same
@ -391,6 +463,67 @@ pub static EXCLUDES: &[&str] = &[
"exec.start",
];
/// Phase 05 — `node:fs/promises` path-traversal sinks. See
/// `javascript.rs::GATED_LABEL_RULES` for the design rationale; both
/// language registries carry the same matcher list to keep .ts and .js
/// fixtures in lockstep.
pub static GATED_LABEL_RULES: &[GatedLabelRule] = &[
GatedLabelRule {
matchers: &[
"readFile",
"writeFile",
"unlink",
"open",
"stat",
"readdir",
"mkdir",
"rmdir",
"rm",
"appendFile",
"copyFile",
"rename",
"truncate",
"chmod",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
gate: LabelGate::ImportedFromModule(&["node:fs/promises", "fs/promises"]),
},
// Phase 07 — Knex bare-name raw-SQL escape hatches. See
// `labels/javascript.rs::GATED_LABEL_RULES` for the rationale; this
// mirror keeps `.ts` and `.js` fixtures in lockstep.
GatedLabelRule {
matchers: &["whereRaw", "orderByRaw", "havingRaw"],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
gate: LabelGate::FileImportsModuleAsLocalName {
modules: &["knex"],
local_names: &["knex"],
},
},
// Phase 07 — Drizzle `sql` template-tag builder. See
// `labels/javascript.rs::GATED_LABEL_RULES` for the two callee
// shapes covered (`sql\`...\`` and `sql.raw(...)`).
GatedLabelRule {
matchers: &["=sql", "sql.raw"],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
gate: LabelGate::ImportedFromModule(&["drizzle-orm"]),
},
// Phase 10 — Next.js `cookies()` / `headers()` helpers from the
// `next/headers` module return adversary-controlled
// request-bound state (cookies carry session tokens, headers
// carry auth material). Gated on the import so app-internal
// helpers named `cookies` or `headers` keep their default
// classification.
GatedLabelRule {
matchers: &["cookies", "headers"],
label: DataLabel::Source(Cap::all()),
case_sensitive: true,
gate: LabelGate::ImportedFromModule(&["next/headers"]),
},
];
pub static GATED_SINKS: &[SinkGate] = &[
SinkGate {
callee_matcher: "setAttribute",
@ -958,6 +1091,8 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
"expression_statement" => Kind::CallWrapper,
"as_expression" => Kind::Seq,
"type_assertion" => Kind::Seq,
"await_expression" => Kind::AwaitForward,
"jsx_attribute" => Kind::JsxAttr,
// trivia
"comment" => Kind::Trivia,