mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
1071 lines
42 KiB
Rust
1071 lines
42 KiB
Rust
use crate::labels::{
|
||
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
|
||
};
|
||
use crate::utils::project::{DetectedFramework, FrameworkContext};
|
||
use phf::{Map, phf_map};
|
||
|
||
pub static RULES: &[LabelRule] = &[
|
||
// ─────────── Sources ───────────
|
||
LabelRule {
|
||
matchers: &["System.getenv"],
|
||
label: DataLabel::Source(Cap::all()),
|
||
case_sensitive: false,
|
||
},
|
||
LabelRule {
|
||
matchers: &[
|
||
"getParameter",
|
||
"getInputStream",
|
||
"getHeader",
|
||
"getCookies",
|
||
"getReader",
|
||
"getQueryString",
|
||
"getPathInfo",
|
||
"getRequestURI",
|
||
"getRequestURL",
|
||
"getServletPath",
|
||
"getContextPath",
|
||
],
|
||
label: DataLabel::Source(Cap::all()),
|
||
case_sensitive: false,
|
||
},
|
||
LabelRule {
|
||
matchers: &["readObject", "readLine", "ObjectMapper.readValue"],
|
||
label: DataLabel::Source(Cap::all()),
|
||
case_sensitive: false,
|
||
},
|
||
// Sensitive operator state: HTTP session attributes commonly carry
|
||
// auth tokens / CSRF tokens / signed user ids. Routed through the
|
||
// `Cookie` source-kind heuristic so DATA_EXFIL fires when these
|
||
// values leave the process via an outbound request body.
|
||
LabelRule {
|
||
matchers: &["HttpSession.getAttribute", "session.getAttribute"],
|
||
label: DataLabel::Source(Cap::all()),
|
||
case_sensitive: false,
|
||
},
|
||
// ───────── Sanitizers ──────────
|
||
LabelRule {
|
||
matchers: &["HtmlUtils.htmlEscape", "StringEscapeUtils.escapeHtml4"],
|
||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||
case_sensitive: false,
|
||
},
|
||
// OWASP ESAPI encoders
|
||
LabelRule {
|
||
matchers: &["Encoder.encodeForHTML", "Encoder.encodeForJavaScript"],
|
||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||
case_sensitive: false,
|
||
},
|
||
LabelRule {
|
||
matchers: &["Encoder.encodeForSQL"],
|
||
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
|
||
case_sensitive: false,
|
||
},
|
||
LabelRule {
|
||
matchers: &["Encoder.encodeForURL"],
|
||
label: DataLabel::Sanitizer(Cap::URL_ENCODE),
|
||
case_sensitive: false,
|
||
},
|
||
// OWASP ESAPI input validator, validates and canonicalizes input
|
||
LabelRule {
|
||
matchers: &["Validator.getValidInput"],
|
||
label: DataLabel::Sanitizer(Cap::all()),
|
||
case_sensitive: false,
|
||
},
|
||
// Type-check sanitizers, parsing to a primitive erases taint
|
||
LabelRule {
|
||
matchers: &[
|
||
"Integer.parseInt",
|
||
"Long.parseLong",
|
||
"Short.parseShort",
|
||
"Double.parseDouble",
|
||
"Integer.valueOf",
|
||
"Boolean.parseBoolean",
|
||
],
|
||
label: DataLabel::Sanitizer(Cap::all()),
|
||
case_sensitive: false,
|
||
},
|
||
LabelRule {
|
||
matchers: &["URLEncoder.encode"],
|
||
label: DataLabel::Sanitizer(Cap::URL_ENCODE),
|
||
case_sensitive: false,
|
||
},
|
||
// Parameterized queries prevent SQL injection
|
||
LabelRule {
|
||
matchers: &["prepareStatement"],
|
||
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
|
||
case_sensitive: false,
|
||
},
|
||
// Phase 15 — JPA / Hibernate `Query.setParameter(name, value)` /
|
||
// `Query.setParameterList(...)` bind a positional / named parameter
|
||
// and return the same query object. The bind step does NOT inject
|
||
// the value into the SQL string; the value is sent as a separate
|
||
// parameter through the JDBC layer at execution. Treating
|
||
// `setParameter` / `setParameterList` as a SQL_QUERY sanitizer
|
||
// clears any taint inadvertently smeared onto the chain return so
|
||
// downstream `.getResultList()` / `.executeUpdate()` calls see a
|
||
// clean value. Case-sensitive: these are JPA-specific verb names
|
||
// and the chain shape is canonical.
|
||
LabelRule {
|
||
matchers: &["setParameter", "setParameterList"],
|
||
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
|
||
case_sensitive: true,
|
||
},
|
||
// ─────────── Sinks ─────────────
|
||
LabelRule {
|
||
matchers: &["Runtime.exec", "ProcessBuilder"],
|
||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||
case_sensitive: false,
|
||
},
|
||
LabelRule {
|
||
matchers: &["executeQuery", "executeUpdate"],
|
||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||
case_sensitive: false,
|
||
},
|
||
// JDBC `Statement.execute(String)` / `executeBatch` / `executeLargeUpdate`.
|
||
// Bare `execute` over-fires (Runnable.run callbacks, Executor.execute,
|
||
// HttpClient.execute), so these only fire via type-qualified resolution
|
||
// when the receiver's TypeKind is DatabaseConnection (the kind both
|
||
// `Connection` and `Statement` map to in `class_name_to_type_kind`).
|
||
// Surfaced by GHSA-h8cj-hpmg-636v (Appsmith FilterDataServiceCE.dropTable).
|
||
LabelRule {
|
||
matchers: &[
|
||
"DatabaseConnection.execute",
|
||
"DatabaseConnection.executeBatch",
|
||
"DatabaseConnection.executeLargeUpdate",
|
||
],
|
||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||
case_sensitive: true,
|
||
},
|
||
LabelRule {
|
||
matchers: &["Class.forName"],
|
||
label: DataLabel::Sink(Cap::CODE_EXEC),
|
||
case_sensitive: false,
|
||
},
|
||
// Phase 13 — java.nio.file path-traversal sinks. `Files.<verb>` is
|
||
// the modern stdlib API for read/write/copy/move/delete operations;
|
||
// each takes a `Path` (or `Path` + payload) as arg 0. Default
|
||
// arg→return propagation smears taint through `Paths.get(...)`
|
||
// (forwarder) so the path arg of these calls inherits any taint
|
||
// present on the components. `FileInputStream` / `FileOutputStream` /
|
||
// `RandomAccessFile` are constructor-style sinks: `new
|
||
// FileInputStream(path)` reaches the FILE_IO sink at the
|
||
// `object_creation_expression` level (mapped to `Kind::CallFn` in
|
||
// Java's KINDS). Receiver-typing already maps these classes to
|
||
// `TypeKind::FileHandle` (see `class_name_to_type_kind`) so chained
|
||
// method calls on the resulting handle resolve via type-qualified
|
||
// labels, but the construction call itself is the canonical
|
||
// path-traversal vector.
|
||
LabelRule {
|
||
matchers: &[
|
||
"Files.readString",
|
||
"Files.readAllBytes",
|
||
"Files.readAllLines",
|
||
"Files.write",
|
||
"Files.writeString",
|
||
"Files.lines",
|
||
"Files.copy",
|
||
"Files.move",
|
||
"Files.delete",
|
||
"Files.deleteIfExists",
|
||
"Files.newInputStream",
|
||
"Files.newOutputStream",
|
||
"Files.newBufferedReader",
|
||
"Files.newBufferedWriter",
|
||
"FileInputStream",
|
||
"FileOutputStream",
|
||
"RandomAccessFile",
|
||
],
|
||
label: DataLabel::Sink(Cap::FILE_IO),
|
||
case_sensitive: true,
|
||
},
|
||
// Phase 13 — `Path.normalize()` collapses `.` / `..` segments and
|
||
// is the canonical Java path-traversal sanitiser when paired with
|
||
// a `startsWith(base)` containment check (not modelled here; the
|
||
// sanitiser rule clears the FILE_IO cap on the call's return,
|
||
// which is sufficient for the cap-based gate to suppress the
|
||
// sink finding). Case-sensitive: `Path.normalize` is unique to
|
||
// `java.nio.file.Path`; bare `normalize` would over-fire on
|
||
// `Locale.normalize`, `BigDecimal.normalize`, etc.
|
||
LabelRule {
|
||
matchers: &[
|
||
"Path.normalize",
|
||
// Canonical Java path-traversal sanitiser idiom:
|
||
// `base.resolve(name).normalize()`. CFG paren-strip yields
|
||
// callee text `<receiver>.resolve.normalize`; the bare 2-call
|
||
// `resolve.normalize` suffix is unique to `java.nio.file.Path`
|
||
// (no overload across the supported corpus produces the same
|
||
// chain text). Case-sensitive on the leaf chain to avoid
|
||
// colliding with non-path `.resolve()`-then-`.normalize()`
|
||
// shapes in unrelated grammars.
|
||
"resolve.normalize",
|
||
// Receiver-bound shape `Paths.get(p).normalize()` — the
|
||
// `Paths.get` constructor mapping in `ssa/type_facts.rs` types
|
||
// the receiver as `FileHandle`, so the type-qualified resolver
|
||
// rewrites `<v>.normalize` → `FileHandle.normalize` here.
|
||
"FileHandle.normalize",
|
||
],
|
||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||
case_sensitive: true,
|
||
},
|
||
// HTTP response sinks, println/print are broad (also match System.out)
|
||
// but necessary to catch response.getWriter().println() via suffix matching.
|
||
LabelRule {
|
||
matchers: &["println", "print"],
|
||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||
case_sensitive: false,
|
||
},
|
||
// openConnection() is the standard java.net.URL API for initiating a connection.
|
||
// It is the correct interception point, the URL is already set on the object.
|
||
//
|
||
// Phase 14 — additional SSRF entry points covered:
|
||
// * `URL.openStream` — equivalent of `URL.openConnection().getInputStream()`,
|
||
// fetches the resource at the URL directly. Bare `openStream`
|
||
// suffix is unique to `java.net.URL` in the supported corpus.
|
||
// * `OkHttpClient.newCall(Request)` — Square OkHttp's request
|
||
// dispatch entry point. The `Request` is built via a
|
||
// `Request.Builder().url(u).build()` chain whose default
|
||
// arg→return propagation smears URL taint through the chain.
|
||
// * `RestTemplate.getForEntity` / `RestTemplate.headForHeaders` —
|
||
// read-shaped Spring verbs that take the URL at arg 0.
|
||
LabelRule {
|
||
matchers: &[
|
||
"openConnection",
|
||
"openStream",
|
||
"HttpClient.send",
|
||
"HttpClient.sendAsync",
|
||
// Phase 14 — `OkHttpClient.newCall(Request)` and the
|
||
// generic `HttpClient.newCall` form OkHttp resolves to via
|
||
// the JAVA_HIERARCHY (OkHttpClient → HttpClient). Both
|
||
// forms are covered so a constructor-typed receiver
|
||
// (HttpClient) and a class-named receiver (OkHttpClient)
|
||
// both fire.
|
||
"HttpClient.newCall",
|
||
"OkHttpClient.newCall",
|
||
"getForObject",
|
||
"getForEntity",
|
||
"headForHeaders",
|
||
"RestTemplate.exchange",
|
||
"postForObject",
|
||
"postForEntity",
|
||
],
|
||
label: DataLabel::Sink(Cap::SSRF),
|
||
case_sensitive: false,
|
||
},
|
||
// ── Cross-boundary data exfiltration ──────────────────────────────────
|
||
//
|
||
// Outbound HTTP egress points where a Sensitive source (cookie, header,
|
||
// env, session attribute, db read) reaching the request body / payload
|
||
// is a cross-boundary disclosure distinct from SSRF. The flat-rule
|
||
// model relies on default arg → return propagation through builder
|
||
// chains: `HttpRequest.newBuilder().uri(u).POST(BodyPublishers.ofString(p)).build()`
|
||
// smears `p`-taint into the returned request, which then activates the
|
||
// sink at `client.send(req)`.
|
||
//
|
||
// Type-qualified resolution maps `restTemplate.postForObject(...)` →
|
||
// `HttpClient.postForObject` via the JAVA_HIERARCHY (RestTemplate,
|
||
// OkHttpClient, WebClient, CloseableHttpClient all subtype HttpClient),
|
||
// so a single set of `HttpClient.<method>` rules covers every framework
|
||
// in scope. Plain user input is silenced by the source-sensitivity
|
||
// gate in `effective_sink_caps`, so this fires only on cookies / headers
|
||
// / env / session / db.
|
||
LabelRule {
|
||
matchers: &[
|
||
// java.net.http: client.send(req) consumes a request that
|
||
// carries body-taint via BodyPublishers.ofString/ofByteArray/
|
||
// ofInputStream through the builder chain.
|
||
"HttpClient.send",
|
||
"HttpClient.sendAsync",
|
||
// Spring RestTemplate verbs that take a body / entity.
|
||
"postForObject",
|
||
"postForEntity",
|
||
"RestTemplate.exchange",
|
||
"RestTemplate.put",
|
||
"RestTemplate.patchForObject",
|
||
// Apache HttpClient: httpClient.execute(req) where req is an
|
||
// HttpPost / HttpPut / HttpPatch with .setEntity(StringEntity(p)).
|
||
// CloseableHttpClient subtypes HttpClient so type-qualified
|
||
// resolution rewrites client.execute → HttpClient.execute.
|
||
"HttpClient.execute",
|
||
// Spring WebClient body-binding step:
|
||
// webClient.post().uri(u).bodyValue(payload).retrieve().
|
||
// bodyValue is the explicit body-bind verb; default propagation
|
||
// carries the tainted body into the chain return so the sink
|
||
// attaches at the body-bind site itself (no cross-call needed).
|
||
"bodyValue",
|
||
// Apache HttpClient body-binding: the `setEntity` step on
|
||
// HttpPost / HttpPut / HttpPatch mutates the request rather
|
||
// than returning the builder, so the receiver's SSA value at
|
||
// the later `httpClient.execute(req)` does not carry body
|
||
// taint via the default smear (which threads through return
|
||
// values, not field mutations). Firing DATA_EXFIL at the
|
||
// setEntity call itself catches the body-binding directly.
|
||
// The matcher is specific enough to avoid collisions —
|
||
// `setEntity` is Apache-HttpClient-specific.
|
||
"setEntity",
|
||
// OkHttp builder body-binding shortcut: when the chain
|
||
// doesn't roll through `.post(body).build()` (e.g. a helper
|
||
// function returns the Builder mid-chain), `RequestBody`
|
||
// is bound via `.post(body)` / `.put(body)` / `.patch(body)`
|
||
// / `.delete(body)` directly on the Builder. These methods
|
||
// also exist on unrelated classes (NIO, Streams) but in the
|
||
// OkHttp idiom the receiver type is `Request.Builder`; the
|
||
// receiver-type widening from `Request.Builder` → HttpClient
|
||
// isn't currently modeled, so we fall back to suffix-name
|
||
// matchers and accept some receiver-agnostic firing risk.
|
||
// Conservative: omit these for v1 to avoid over-fire on
|
||
// non-OkHttp `post`/`put`/`patch` calls.
|
||
// OkHttp two-step: client.newCall(req).execute() / .enqueue().
|
||
// Chain normalization strips `()` between dots so the tree-
|
||
// sitter callee text `client.newCall(req).execute` matches the
|
||
// suffix `newCall.execute` after normalization.
|
||
"newCall.execute",
|
||
"newCall.enqueue",
|
||
],
|
||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||
case_sensitive: false,
|
||
},
|
||
LabelRule {
|
||
matchers: &[
|
||
"readObject",
|
||
"readUnshared",
|
||
"XMLDecoder.readObject",
|
||
"ObjectMapper.readValue",
|
||
],
|
||
label: DataLabel::Sink(Cap::DESERIALIZE),
|
||
case_sensitive: false,
|
||
},
|
||
// ─── Spring / JPA / Hibernate SQL sinks ───
|
||
LabelRule {
|
||
matchers: &[
|
||
"jdbcTemplate.query",
|
||
"jdbcTemplate.update",
|
||
"jdbcTemplate.execute",
|
||
"jdbcTemplate.queryForObject",
|
||
"jdbcTemplate.queryForList",
|
||
],
|
||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||
case_sensitive: false,
|
||
},
|
||
LabelRule {
|
||
matchers: &[
|
||
"entityManager.createNativeQuery",
|
||
"entityManager.createQuery",
|
||
"em.createNativeQuery",
|
||
"em.createQuery",
|
||
"session.createQuery",
|
||
"session.createSQLQuery",
|
||
"session.createNativeQuery",
|
||
// Phase 15 — Spring Data JPA / Hibernate factory chains:
|
||
// `getEntityManager().createNativeQuery(...)` /
|
||
// `getSession().createQuery(...)` reduce to
|
||
// `getEntityManager.createNativeQuery` /
|
||
// `getSession.createQuery` after the chain-normalisation
|
||
// strips parens.
|
||
"getEntityManager.createNativeQuery",
|
||
"getEntityManager.createQuery",
|
||
"getSession.createQuery",
|
||
"getSession.createSQLQuery",
|
||
"getSession.createNativeQuery",
|
||
// Type-qualified Hibernate Session matchers fire when the
|
||
// receiver carries a `TypeKind::HibernateSession` fact (set
|
||
// by `constructor_type` for `sessionFactory.openSession()` /
|
||
// `sessionFactory.getCurrentSession()` /
|
||
// `sessionFactory.openStatelessSession()` returns). Closes
|
||
// the arbitrary-receiver-name shape (`sess`,
|
||
// `hibernateSession`, etc.) the flat `session.*` matchers
|
||
// above only catch when receiver is literally named
|
||
// `session`.
|
||
"HibernateSession.createQuery",
|
||
"HibernateSession.createSQLQuery",
|
||
"HibernateSession.createNativeQuery",
|
||
],
|
||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||
case_sensitive: true,
|
||
},
|
||
// NOTE: Java logging (logger.info, log.warn, etc.) removed as sinks ,
|
||
// logging format injection is not a real security vulnerability in Java.
|
||
// String.format also removed, it builds strings in memory (not a sink);
|
||
// the real sink is wherever the formatted string is used (SQL, HTTP, etc.).
|
||
// ─── JNDI injection sinks ───
|
||
LabelRule {
|
||
matchers: &[
|
||
"InitialContext.lookup",
|
||
"ctx.lookup",
|
||
"context.lookup",
|
||
"dirContext.lookup",
|
||
],
|
||
label: DataLabel::Sink(Cap::CODE_EXEC),
|
||
case_sensitive: false,
|
||
},
|
||
// ─── LDAP injection sinks ───
|
||
//
|
||
// JNDI / Spring LDAP search APIs accept an attacker-influenceable filter
|
||
// expression as either the second positional argument (`DirContext.search(name,
|
||
// filter, controls)` / `LdapTemplate.search(base, filter, mapper)`). Without
|
||
// RFC 4515 escaping the filter can be rewritten to bypass authentication or
|
||
// exfiltrate directory entries. Type-qualified resolution rewrites
|
||
// `ctx.search(...)` → `LdapClient.search` when the receiver carries a
|
||
// `TypeKind::LdapClient` fact (set by `class_name_to_type_kind` for the
|
||
// declared types `DirContext`, `InitialDirContext`, `LdapContext`,
|
||
// `LdapTemplate`, or by `constructor_type` for `new InitialDirContext(...)`
|
||
// / `new InitialLdapContext(...)`). Direct flat matchers cover the
|
||
// documentation-style class-qualified call forms that bypass receiver
|
||
// typing.
|
||
LabelRule {
|
||
matchers: &[
|
||
"LdapClient.search",
|
||
"LdapClient.searchByEntity",
|
||
"LdapClient.searchForObject",
|
||
"LdapClient.searchForContext",
|
||
"DirContext.search",
|
||
"LdapTemplate.search",
|
||
"LdapTemplate.searchByEntity",
|
||
"LdapTemplate.searchForObject",
|
||
"LdapTemplate.searchForContext",
|
||
"ctx.search",
|
||
],
|
||
label: DataLabel::Sink(Cap::LDAP_INJECTION),
|
||
case_sensitive: true,
|
||
},
|
||
// ─── LDAP-filter sanitizers ───
|
||
//
|
||
// Spring LDAP's `LdapEncoder.filterEncode(s)` applies RFC 4515 escaping to
|
||
// metacharacters (`\`, `*`, `(`, `)`, ` |