use crate::labels::{ Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate, }; use crate::utils::project::{DetectedFramework, FrameworkContext}; use phf::{Map, phf_map}; pub static RULES: &[LabelRule] = &[ // ─────────── Sources ─────────── LabelRule { matchers: &["std::env::var", "env::var", "source_env"], label: DataLabel::Source(Cap::all()), case_sensitive: false, }, LabelRule { matchers: &["source_file"], label: DataLabel::Source(Cap::all()), case_sensitive: false, }, LabelRule { matchers: &["fs::read_to_string", "fs::read"], label: DataLabel::Source(Cap::all()), case_sensitive: false, }, // Inbound HTTP request metadata: headers, cookies, query strings, // and body extractors. These only carry caller-supplied bytes when // the framework binds them (the framework-conditional rules attach // the same labels for axum / actix / rocket extractors). Including // the bare suffix matchers here means a `req.headers().get("h")` // chain in non-framework code (e.g. internal helpers that take an // `&HeaderMap`) still surfaces as a Source. `infer_source_kind` // routes these to `Header` / `Cookie` (Sensitive), enabling // DATA_EXFIL gating downstream. LabelRule { matchers: &[ // Type-qualified (receiver typed as HttpRequest, HeaderMap, ...) "HttpRequest.headers", "HttpRequest.cookie", "HttpRequest.cookies", "Request.headers", "Request.cookies", "Request.uri", // Bare HeaderMap / cookie-jar accessors. "headers.get", "headers.get_all", "CookieJar.get", "CookieJar.get_private", "CookieJar.get_signed", ], label: DataLabel::Source(Cap::all()), case_sensitive: false, }, // ───────── Sanitizers ────────── LabelRule { matchers: &["html_escape::encode_safe", "sanitize_", "sanitize_html"], label: DataLabel::Sanitizer(Cap::HTML_ESCAPE), case_sensitive: false, }, LabelRule { matchers: &["shell_escape::unix::escape", "sanitize_shell"], label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE), case_sensitive: false, }, // Phase 13 — `Path::canonicalize` (and `tokio::fs::canonicalize`) is // the canonical Rust path-traversal sanitiser when paired with a // `starts_with(&base)` containment check. Same convention as the // Java / Python `.normalize()` / `.resolve()` sanitiser rules: the // call clears the FILE_IO cap on its return so the cap-based gate // suppresses the downstream `tokio::fs::*` / `std::fs::*` sink. // Bare `canonicalize` would over-fire on unrelated APIs (e.g. // `Url::canonicalize`); the qualified forms below are unique to // path-handling. LabelRule { matchers: &[ "Path.canonicalize", "PathBuf.canonicalize", "fs::canonicalize", "std::fs::canonicalize", "tokio::fs::canonicalize", ], label: DataLabel::Sanitizer(Cap::FILE_IO), case_sensitive: false, }, // ─────────── Sinks ───────────── LabelRule { matchers: &[ "command::new", "std::process::command::new", "command::arg", "command::args", "command::status", "command::output", ], label: DataLabel::Sink(Cap::SHELL_ESCAPE), case_sensitive: false, }, LabelRule { matchers: &["sink_html"], label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: false, }, LabelRule { matchers: &[ "fs::read_to_string", "fs::write", "fs::read", "fs::remove_file", "fs::remove_dir", "fs::remove_dir_all", "fs::rename", "fs::copy", "File::open", "File::create", // Phase 13 — `tokio::fs` async path-traversal sinks. The // suffix matchers also catch the bare `tokio::fs::File::open` // chain after paren-strip. `tokio::fs::*` is the // async-runtime-bound mirror of `std::fs::*`; same path // arg-0 semantics. "tokio::fs::read", "tokio::fs::read_to_string", "tokio::fs::write", "tokio::fs::remove_file", "tokio::fs::remove_dir", "tokio::fs::remove_dir_all", "tokio::fs::rename", "tokio::fs::copy", "tokio::fs::File::open", "tokio::fs::File::create", ], label: DataLabel::Sink(Cap::FILE_IO), case_sensitive: false, }, LabelRule { matchers: &[ "reqwest::get", "reqwest::Client.execute", "reqwest::Client.get", "reqwest::Client.post", "reqwest::Client.put", "reqwest::Client.delete", "reqwest::Client.head", "reqwest::Client.patch", "reqwest::Client.request", // Phase 14 — hyper Client `request(req)` dispatch entry. The // `req` builder chain (covered by the type-qualified // RequestBuilder.* / Request::builder.* rules below) smears // URL taint into the request value via default propagation. "hyper::Client.request", "hyper::client::Client.request", // Chained constructor + verb form: `reqwest::Client::new() // .post(url)` reduces (via root-receiver collapse) to chain // text `Client::new.post`, so existing `Client.post` matchers // miss it. Cover the chained shape directly. "Client::new.get", "Client::new.post", "Client::new.put", "Client::new.delete", "Client::new.head", "Client::new.patch", "Client::new.request", // surf free verbs are themselves SSRF gates , the URL is // their first positional argument. "surf::get", "surf::post", "surf::put", "surf::delete", "surf::head", "surf::patch", "surf::connect", "surf::trace", // ureq free verbs are HTTP request initiators. "ureq::get", "ureq::post", "ureq::put", "ureq::delete", "ureq::patch", "ureq::head", // Type-qualified (receiver typed as HttpClient) "HttpClient.get", "HttpClient.post", "HttpClient.put", "HttpClient.delete", "HttpClient.head", "HttpClient.patch", "HttpClient.request", "HttpClient.execute", "HttpClient.send", ], label: DataLabel::Sink(Cap::SSRF), case_sensitive: false, }, // Cross-boundary data exfiltration sinks. Outbound HTTP egress where // a Sensitive source (env, header, cookie, file, db) reaching the // request body / payload is a leak distinct from SSRF. Plain user // input is silenced by the source-sensitivity gate, so these only // fire when the source carries operator-bound state. // // Body-binding methods on the request builder: `body`, `json`, `form`, // `multipart` (reqwest); `body_string`, `body_json`, `body_bytes` // (surf); `send_string`, `send_json`, `send_form` (ureq, which // combines body-bind and dispatch). Plus `.send()` on an HttpClient // / RequestBuilder, where the chain receiver is typed. Chain text // matchers like `body.send` cover the all-in-one form // `Client::post(url).body(payload).send()`. LabelRule { matchers: &[ // Type-qualified terminal verbs (split form, typed receiver). "HttpClient.send", "HttpClient.execute", "RequestBuilder.send", // Type-qualified body-bind methods on a typed RequestBuilder. "RequestBuilder.body", "RequestBuilder.json", "RequestBuilder.form", "RequestBuilder.multipart", "RequestBuilder.body_string", "RequestBuilder.body_json", "RequestBuilder.body_bytes", "RequestBuilder.send_string", "RequestBuilder.send_json", "RequestBuilder.send_form", // surf / ureq method names that are unambiguous in Rust , // they only appear on HTTP request builders, so a bare-name // suffix matcher is safe. "body_string", "body_json", "body_bytes", "send_string", "send_json", "send_form", // Reqwest chain shapes. After paren-group strip the chain // text becomes `Client::post.body.send`, so the body-bind // verb sits before `.send` and a `body.send` suffix matcher // pins exfil-only firing to chains that actually bind a body. "body.send", "json.send", "form.send", "multipart.send", // hyper Request::builder().method(...).body(payload) , the // body-bind step is the leak point. `.unwrap` is a common // trailing identity method; we cover both shapes. "Request::builder.body", "Request::builder.method.body", "Request::builder.method.body.unwrap", "Request::builder.body.unwrap", // Two-step reqwest where the user has a dedicated `Client` // variable and uses `.execute(req)` on it. "Client::new.send", "Client::new.execute", ], label: DataLabel::Sink(Cap::DATA_EXFIL), case_sensitive: false, }, LabelRule { matchers: &[ "rusqlite::Connection.execute", "rusqlite::Connection.query", "rusqlite::Connection.query_row", "rusqlite::Connection.prepare", "sqlx::query", "sqlx::query_as", "sqlx::query_scalar", "diesel::sql_query", "postgres::Client.execute", "postgres::Client.query", "postgres::Client.prepare", // Type-qualified (receiver typed as DatabaseConnection) "DatabaseConnection.execute", "DatabaseConnection.query", "DatabaseConnection.query_row", "DatabaseConnection.prepare", ], label: DataLabel::Sink(Cap::SQL_QUERY), case_sensitive: false, }, LabelRule { matchers: &[ "serde_yaml::from_str", "serde_yaml::from_slice", "serde_yaml::from_reader", "bincode::deserialize", "bincode::deserialize_from", "rmp_serde::from_slice", "rmp_serde::from_read", "ciborium::from_reader", "ron::from_str", "toml::from_str", ], label: DataLabel::Sink(Cap::DESERIALIZE), case_sensitive: false, }, // ─── Header / CRLF injection sinks ─── // // `http::HeaderMap::insert(name, val)` / `append(...)` write a single // header value. The canonical idiom is `response.headers_mut().insert(...)` // (axum, actix-web `HttpResponse.headers_mut`, hyper `Response::headers_mut`). // After paren-group stripping the chain text becomes // `response.headers_mut.insert`, so suffix matchers on // `headers_mut.insert` / `headers_mut.append` cover the bound-receiver // form regardless of the response builder's concrete type. Tainted // strings without CRLF stripping enable response splitting. LabelRule { matchers: &["headers_mut.insert", "headers_mut.append"], label: DataLabel::Sink(Cap::HEADER_INJECTION), case_sensitive: false, }, LabelRule { matchers: &["strip_crlf", "escape_header", "sanitize_header"], label: DataLabel::Sanitizer(Cap::HEADER_INJECTION), case_sensitive: false, }, // ─── Open redirect sinks ─── // // axum / rocket `Redirect::to(url)` / `Redirect::permanent(url)` / // `Redirect::temporary(url)` build a 3xx response with the URL in the // `Location` header. Without an allowlist check, a tainted `url` is // the canonical Rust open-redirect vector. Listed unconditionally (not // gated on framework detection) so non-framework helpers / re-exports // still surface; the framework-conditional rules below are // intentionally not duplicating this label. Actix // `HttpResponse::Found().header("Location", x)` is covered by the // existing `header` HEADER_INJECTION sink and any Location-line // co-tagging is deferred to the abstract-string-domain pattern hook. LabelRule { matchers: &["Redirect::to", "Redirect::permanent", "Redirect::temporary"], label: DataLabel::Sink(Cap::OPEN_REDIRECT), case_sensitive: true, }, LabelRule { matchers: &[ "validate_redirect_url", "is_safe_redirect", "strip_scheme", "ensure_relative_url", "assert_relative_path", "is_relative_url", ], label: DataLabel::Sanitizer(Cap::OPEN_REDIRECT), case_sensitive: false, }, ]; /// Rust gated sinks. Argument-position-aware classification for callees /// where activation depends on a literal arg value rather than the bare /// callee name. pub static GATED_SINKS: &[SinkGate] = &[ // actix-web `HttpResponse::Found().header("Location", url)` (and other // builder variants like `Ok().header(...)`, `MovedPermanently().header(...)`). // After chain normalisation the callee text is e.g. // `HttpResponse.Found.header`; suffix matching on `header` covers every // builder variant. // // Activation: arg 0 case-insensitive equality with `"Location"`. When // arg 0 is a constant string equal to `Location` the gate fires and // checks payload arg 1 for taint; constants like `"Content-Type"` are // suppressed by the safe-literal branch. When arg 0 is dynamic the // gate fires conservatively (per the existing `setAttribute` / // `parseFromString` convention). // // Mirrors PHP's `=header` Location gate; the Rust analog is split // across two args (`name`, `value`) instead of PHP's single `Location: ...` // line. SinkGate { callee_matcher: "header", arg_index: 0, dangerous_values: &["Location"], dangerous_prefixes: &[], label: DataLabel::Sink(Cap::OPEN_REDIRECT), case_sensitive: true, payload_args: &[1], keyword_name: None, dangerous_kwargs: &[], activation: GateActivation::ValueMatch, }, ]; pub static KINDS: Map<&'static str, Kind> = phf_map! { // control-flow "if_expression" => Kind::If, "loop_expression" => Kind::InfiniteLoop, "while_statement" => Kind::While, "while_expression" => Kind::While, "for_statement" => Kind::For, "for_expression" => Kind::For, "return_statement" => Kind::Return, "return_expression" => Kind::Return, "break_expression" => Kind::Break, "break_statement" => Kind::Break, "continue_expression" => Kind::Continue, "continue_statement" => Kind::Continue, // structure "source_file" => Kind::SourceFile, "block" => Kind::Block, "else_clause" => Kind::Block, "match_expression" => Kind::Block, "match_block" => Kind::Block, "match_arm" => Kind::Block, "unsafe_block" => Kind::Block, "function_item" => Kind::Function, "closure_expression" => Kind::Function, "async_block" => Kind::Block, "impl_item" => Kind::Block, "trait_item" => Kind::Block, "declaration_list" => Kind::Block, // Inline modules `mod foo { ... }` wrap their items in a // `declaration_list`; map to Block so the CFG builder recurses into the // body and the `function_item`s inside are lowered, instead of dropping // the whole module (the old `Kind::Trivia` mapping discarded every // function/source/sink inside an inline module). "mod_item" => Kind::Block, // data-flow "call_expression" => Kind::CallFn, "method_call_expression" => Kind::CallMethod, "macro_invocation" => Kind::CallMacro, "let_declaration" => Kind::CallWrapper, "expression_statement" => Kind::CallWrapper, "assignment_expression" => Kind::Assignment, // `x.await` postfix. Documented per-language so the contract does // not depend on the raw-string fallback in `cfg::push_node`; SSA // lowering emits `Assign(operand)` for these nodes. "await_expression" => Kind::AwaitForward, // struct expressions, recurse so env::var() calls inside field // initialisers produce Source-labelled CFG nodes (needed for summaries). "struct_expression" => Kind::Block, "field_initializer_list" => Kind::Block, "field_initializer" => Kind::CallWrapper, // trivia "line_comment" => Kind::Trivia, "block_comment" => Kind::Trivia, ";" => Kind::Trivia, "," => Kind::Trivia, "(" => Kind::Trivia, ")" => Kind::Trivia, "{" => Kind::Trivia, "}" => Kind::Trivia, "\n" => Kind::Trivia, "use_declaration" => Kind::Trivia, "attribute_item" => Kind::Trivia, "type_item" => Kind::Trivia, }; pub static PARAM_CONFIG: ParamConfig = ParamConfig { params_field: "parameters", param_node_kinds: &["parameter"], self_param_kinds: &["self_parameter"], ident_fields: &["pattern"], }; /// Framework-conditional rules for Rust. pub fn framework_rules(ctx: &FrameworkContext) -> Vec { let mut rules = Vec::new(); if ctx.has(DetectedFramework::Axum) { rules.push(RuntimeLabelRule { matchers: vec![ "Path".into(), "Query".into(), "Json".into(), "Form".into(), "Multipart".into(), "HeaderMap".into(), "HeaderMap.get".into(), "Request.headers".into(), "Request.uri".into(), "headers.get".into(), ], label: DataLabel::Source(Cap::all()), case_sensitive: true, }); rules.push(RuntimeLabelRule { matchers: vec!["Html".into(), "IntoResponse".into()], label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: true, }); // `Redirect::to` is declared unconditionally as Sink(OPEN_REDIRECT) // in `RULES` above; no framework-conditional duplicate needed. } if ctx.has(DetectedFramework::ActixWeb) { rules.push(RuntimeLabelRule { matchers: vec![ "web::Path".into(), "web::Query".into(), "web::Json".into(), "web::Form".into(), "web::Bytes".into(), "HttpRequest".into(), "HttpRequest.headers".into(), "HttpRequest.cookie".into(), "HttpRequest.match_info".into(), "HttpRequest.query_string".into(), ], label: DataLabel::Source(Cap::all()), case_sensitive: true, }); rules.push(RuntimeLabelRule { matchers: vec![ "HttpResponse.body".into(), "HttpResponse.json".into(), "HttpResponse.content_type".into(), "body".into(), "json".into(), ], label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: true, }); } if ctx.has(DetectedFramework::Rocket) { rules.push(RuntimeLabelRule { matchers: vec![ "Json".into(), "Form".into(), "LenientForm".into(), "TempFile".into(), "CookieJar".into(), "CookieJar.get".into(), "CookieJar.get_private".into(), "Request.headers".into(), "Request.cookies".into(), ], label: DataLabel::Source(Cap::all()), case_sensitive: true, }); rules.push(RuntimeLabelRule { matchers: vec!["RawHtml".into(), "content::RawHtml".into(), "Html".into()], label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: true, }); // `Redirect::to` is declared unconditionally as Sink(OPEN_REDIRECT) // in `RULES` above; no framework-conditional duplicate needed. } rules } /// auth-as-taint label rules for Rust. Gated by /// `config.scanner.enable_auth_as_taint`; appended to the runtime rule set /// when the flag is enabled. These declare **sinks** (state-changing or /// outbound operations that should not be reached by an un-checked /// request-bound id) and **sanitizers** (ownership/membership guards that /// validate a caller-supplied id). pub fn phase_c_auth_rules() -> Vec { vec![ // ── Sinks requiring Cap::UNAUTHORIZED_ID ── // Realtime / pub-sub: broadcasting on a caller-supplied group/channel // id without first verifying membership is the canonical cross-tenant // leak. RuntimeLabelRule { matchers: vec![ "realtime::publish".into(), "realtime::publish_to_group".into(), "realtime::publish_to_channel".into(), "realtime::broadcast".into(), "broadcaster::send".into(), "broadcaster::publish".into(), "pubsub::publish".into(), ], label: DataLabel::Sink(Cap::UNAUTHORIZED_ID), case_sensitive: false, }, // Database mutations keyed by caller-supplied id. These overlay the // existing SQL_QUERY sink declarations (multi-label composition) so // a bare id carrying only UNAUTHORIZED_ID still fires. RuntimeLabelRule { matchers: vec![ "rusqlite::Connection.execute".into(), "postgres::Client.execute".into(), "sqlx::query".into(), "sqlx::query_as".into(), "diesel::insert_into".into(), "diesel::update".into(), "diesel::delete".into(), // Type-qualified (receiver typed as DatabaseConnection) "DatabaseConnection.execute".into(), "DatabaseConnection.query".into(), ], label: DataLabel::Sink(Cap::UNAUTHORIZED_ID), case_sensitive: false, }, // Outbound cache writes. RuntimeLabelRule { matchers: vec![ "redis::cmd".into(), "cache::set".into(), "cache::set_ex".into(), "cache::insert".into(), ], label: DataLabel::Sink(Cap::UNAUTHORIZED_ID), case_sensitive: false, }, // ── Sanitizers clearing Cap::UNAUTHORIZED_ID ── // Ownership and membership guards consumed via call-site // argument sanitization (see `is_auth_as_taint_arg_sanitizer`). RuntimeLabelRule { matchers: vec![ "check_ownership".into(), "has_ownership".into(), "require_ownership".into(), "ensure_ownership".into(), "is_owner".into(), "authorize".into(), "verify_access".into(), "has_permission".into(), "can_access".into(), "can_manage".into(), "require_group_member".into(), "require_org_member".into(), "require_workspace_member".into(), "require_tenant_member".into(), "require_team_member".into(), "require_membership".into(), "check_membership".into(), "authz::require".into(), "authz::check".into(), ], label: DataLabel::Sanitizer(Cap::UNAUTHORIZED_ID), case_sensitive: false, }, ] } #[cfg(test)] mod tests { use super::KINDS; use crate::labels::Kind; #[test] fn mod_item_is_walkable_block_not_trivia() { // Inline `mod foo { ... }` must be a Block so the CFG builder recurses // into the module body; the old Trivia mapping dropped every function, // source, and sink inside inline modules. assert_eq!(KINDS.get("mod_item"), Some(&Kind::Block)); assert_ne!(KINDS.get("mod_item"), Some(&Kind::Trivia)); } }