use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig, RuntimeLabelRule}; use crate::utils::project::{DetectedFramework, FrameworkContext}; use phf::{Map, phf_map}; pub static RULES: &[LabelRule] = &[ // ─────────── Sources ─────────── LabelRule { matchers: &["std::env::var", "env::var", "source_env"], label: DataLabel::Source(Cap::all()), case_sensitive: false, }, LabelRule { matchers: &["source_file"], label: DataLabel::Source(Cap::all()), case_sensitive: false, }, LabelRule { matchers: &["fs::read_to_string", "fs::read"], label: DataLabel::Source(Cap::all()), case_sensitive: false, }, // Inbound HTTP request metadata: headers, cookies, query strings, // and body extractors. These only carry caller-supplied bytes when // the framework binds them (the framework-conditional rules attach // the same labels for axum / actix / rocket extractors). Including // the bare suffix matchers here means a `req.headers().get("h")` // chain in non-framework code (e.g. internal helpers that take an // `&HeaderMap`) still surfaces as a Source. `infer_source_kind` // routes these to `Header` / `Cookie` (Sensitive), enabling // DATA_EXFIL gating downstream. LabelRule { matchers: &[ // Type-qualified (receiver typed as HttpRequest, HeaderMap, ...) "HttpRequest.headers", "HttpRequest.cookie", "HttpRequest.cookies", "Request.headers", "Request.cookies", "Request.uri", // Bare HeaderMap / cookie-jar accessors. "headers.get", "headers.get_all", "CookieJar.get", "CookieJar.get_private", "CookieJar.get_signed", ], label: DataLabel::Source(Cap::all()), case_sensitive: false, }, // ───────── Sanitizers ────────── LabelRule { matchers: &["html_escape::encode_safe", "sanitize_", "sanitize_html"], label: DataLabel::Sanitizer(Cap::HTML_ESCAPE), case_sensitive: false, }, LabelRule { matchers: &["shell_escape::unix::escape", "sanitize_shell"], label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE), case_sensitive: false, }, // ─────────── Sinks ───────────── LabelRule { matchers: &[ "command::new", "std::process::command::new", "command::arg", "command::args", "command::status", "command::output", ], label: DataLabel::Sink(Cap::SHELL_ESCAPE), case_sensitive: false, }, LabelRule { matchers: &["sink_html"], label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: false, }, LabelRule { matchers: &[ "fs::read_to_string", "fs::write", "fs::read", "fs::remove_file", "fs::remove_dir", "fs::remove_dir_all", "fs::rename", "fs::copy", "File::open", "File::create", ], label: DataLabel::Sink(Cap::FILE_IO), case_sensitive: false, }, LabelRule { matchers: &[ "reqwest::get", "reqwest::Client.execute", "reqwest::Client.get", "reqwest::Client.post", "reqwest::Client.put", "reqwest::Client.delete", "reqwest::Client.head", "reqwest::Client.patch", "reqwest::Client.request", // Chained constructor + verb form: `reqwest::Client::new() // .post(url)` reduces (via root-receiver collapse) to chain // text `Client::new.post`, so existing `Client.post` matchers // miss it. Cover the chained shape directly. "Client::new.get", "Client::new.post", "Client::new.put", "Client::new.delete", "Client::new.head", "Client::new.patch", "Client::new.request", // surf free verbs are themselves SSRF gates , the URL is // their first positional argument. "surf::get", "surf::post", "surf::put", "surf::delete", "surf::head", "surf::patch", "surf::connect", "surf::trace", // ureq free verbs are HTTP request initiators. "ureq::get", "ureq::post", "ureq::put", "ureq::delete", "ureq::patch", "ureq::head", // Type-qualified (receiver typed as HttpClient) "HttpClient.get", "HttpClient.post", "HttpClient.put", "HttpClient.delete", "HttpClient.head", "HttpClient.patch", "HttpClient.request", "HttpClient.execute", "HttpClient.send", ], label: DataLabel::Sink(Cap::SSRF), case_sensitive: false, }, // Cross-boundary data exfiltration sinks. Outbound HTTP egress where // a Sensitive source (env, header, cookie, file, db) reaching the // request body / payload is a leak distinct from SSRF. Plain user // input is silenced by the source-sensitivity gate, so these only // fire when the source carries operator-bound state. // // Body-binding methods on the request builder: `body`, `json`, `form`, // `multipart` (reqwest); `body_string`, `body_json`, `body_bytes` // (surf); `send_string`, `send_json`, `send_form` (ureq, which // combines body-bind and dispatch). Plus `.send()` on an HttpClient // / RequestBuilder, where the chain receiver is typed. Chain text // matchers like `body.send` cover the all-in-one form // `Client::post(url).body(payload).send()`. LabelRule { matchers: &[ // Type-qualified terminal verbs (split form, typed receiver). "HttpClient.send", "HttpClient.execute", "RequestBuilder.send", // Type-qualified body-bind methods on a typed RequestBuilder. "RequestBuilder.body", "RequestBuilder.json", "RequestBuilder.form", "RequestBuilder.multipart", "RequestBuilder.body_string", "RequestBuilder.body_json", "RequestBuilder.body_bytes", "RequestBuilder.send_string", "RequestBuilder.send_json", "RequestBuilder.send_form", // surf / ureq method names that are unambiguous in Rust , // they only appear on HTTP request builders, so a bare-name // suffix matcher is safe. "body_string", "body_json", "body_bytes", "send_string", "send_json", "send_form", // Reqwest chain shapes. After paren-group strip the chain // text becomes `Client::post.body.send`, so the body-bind // verb sits before `.send` and a `body.send` suffix matcher // pins exfil-only firing to chains that actually bind a body. "body.send", "json.send", "form.send", "multipart.send", // hyper Request::builder().method(...).body(payload) , the // body-bind step is the leak point. `.unwrap` is a common // trailing identity method; we cover both shapes. "Request::builder.body", "Request::builder.method.body", "Request::builder.method.body.unwrap", "Request::builder.body.unwrap", // Two-step reqwest where the user has a dedicated `Client` // variable and uses `.execute(req)` on it. "Client::new.send", "Client::new.execute", ], label: DataLabel::Sink(Cap::DATA_EXFIL), case_sensitive: false, }, LabelRule { matchers: &[ "rusqlite::Connection.execute", "rusqlite::Connection.query", "rusqlite::Connection.query_row", "rusqlite::Connection.prepare", "sqlx::query", "sqlx::query_as", "sqlx::query_scalar", "diesel::sql_query", "postgres::Client.execute", "postgres::Client.query", "postgres::Client.prepare", // Type-qualified (receiver typed as DatabaseConnection) "DatabaseConnection.execute", "DatabaseConnection.query", "DatabaseConnection.query_row", "DatabaseConnection.prepare", ], label: DataLabel::Sink(Cap::SQL_QUERY), case_sensitive: false, }, LabelRule { matchers: &[ "serde_yaml::from_str", "serde_yaml::from_slice", "serde_yaml::from_reader", "bincode::deserialize", "bincode::deserialize_from", "rmp_serde::from_slice", "rmp_serde::from_read", "ciborium::from_reader", "ron::from_str", "toml::from_str", ], label: DataLabel::Sink(Cap::DESERIALIZE), case_sensitive: false, }, ]; pub static KINDS: Map<&'static str, Kind> = phf_map! { // control-flow "if_expression" => Kind::If, "loop_expression" => Kind::InfiniteLoop, "while_statement" => Kind::While, "while_expression" => Kind::While, "for_statement" => Kind::For, "for_expression" => Kind::For, "return_statement" => Kind::Return, "return_expression" => Kind::Return, "break_expression" => Kind::Break, "break_statement" => Kind::Break, "continue_expression" => Kind::Continue, "continue_statement" => Kind::Continue, // structure "source_file" => Kind::SourceFile, "block" => Kind::Block, "else_clause" => Kind::Block, "match_expression" => Kind::Block, "match_block" => Kind::Block, "match_arm" => Kind::Block, "unsafe_block" => Kind::Block, "function_item" => Kind::Function, "closure_expression" => Kind::Function, "async_block" => Kind::Block, "impl_item" => Kind::Block, "trait_item" => Kind::Block, "declaration_list" => Kind::Block, // data-flow "call_expression" => Kind::CallFn, "method_call_expression" => Kind::CallMethod, "macro_invocation" => Kind::CallMacro, "let_declaration" => Kind::CallWrapper, "expression_statement" => Kind::CallWrapper, "assignment_expression" => Kind::Assignment, // struct expressions, recurse so env::var() calls inside field // initialisers produce Source-labelled CFG nodes (needed for summaries). "struct_expression" => Kind::Block, "field_initializer_list" => Kind::Block, "field_initializer" => Kind::CallWrapper, // trivia "line_comment" => Kind::Trivia, "block_comment" => Kind::Trivia, ";" => Kind::Trivia, "," => Kind::Trivia, "(" => Kind::Trivia, ")" => Kind::Trivia, "{" => Kind::Trivia, "}" => Kind::Trivia, "\n" => Kind::Trivia, "use_declaration" => Kind::Trivia, "attribute_item" => Kind::Trivia, "mod_item" => Kind::Trivia, "type_item" => Kind::Trivia, }; pub static PARAM_CONFIG: ParamConfig = ParamConfig { params_field: "parameters", param_node_kinds: &["parameter"], self_param_kinds: &["self_parameter"], ident_fields: &["pattern"], }; /// Framework-conditional rules for Rust. pub fn framework_rules(ctx: &FrameworkContext) -> Vec { let mut rules = Vec::new(); if ctx.has(DetectedFramework::Axum) { rules.push(RuntimeLabelRule { matchers: vec![ "Path".into(), "Query".into(), "Json".into(), "Form".into(), "Multipart".into(), "HeaderMap".into(), "HeaderMap.get".into(), "Request.headers".into(), "Request.uri".into(), "headers.get".into(), ], label: DataLabel::Source(Cap::all()), case_sensitive: true, }); rules.push(RuntimeLabelRule { matchers: vec!["Html".into(), "IntoResponse".into()], label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: true, }); rules.push(RuntimeLabelRule { matchers: vec!["Redirect::to".into()], label: DataLabel::Sink(Cap::SSRF), case_sensitive: true, }); } if ctx.has(DetectedFramework::ActixWeb) { rules.push(RuntimeLabelRule { matchers: vec![ "web::Path".into(), "web::Query".into(), "web::Json".into(), "web::Form".into(), "web::Bytes".into(), "HttpRequest".into(), "HttpRequest.headers".into(), "HttpRequest.cookie".into(), "HttpRequest.match_info".into(), "HttpRequest.query_string".into(), ], label: DataLabel::Source(Cap::all()), case_sensitive: true, }); rules.push(RuntimeLabelRule { matchers: vec![ "HttpResponse.body".into(), "HttpResponse.json".into(), "HttpResponse.content_type".into(), "body".into(), "json".into(), ], label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: true, }); } if ctx.has(DetectedFramework::Rocket) { rules.push(RuntimeLabelRule { matchers: vec![ "Json".into(), "Form".into(), "LenientForm".into(), "TempFile".into(), "CookieJar".into(), "CookieJar.get".into(), "CookieJar.get_private".into(), "Request.headers".into(), "Request.cookies".into(), ], label: DataLabel::Source(Cap::all()), case_sensitive: true, }); rules.push(RuntimeLabelRule { matchers: vec!["RawHtml".into(), "content::RawHtml".into(), "Html".into()], label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: true, }); rules.push(RuntimeLabelRule { matchers: vec!["Redirect::to".into()], label: DataLabel::Sink(Cap::SSRF), case_sensitive: true, }); } rules } /// auth-as-taint label rules for Rust. Gated by /// `config.scanner.enable_auth_as_taint`; appended to the runtime rule set /// when the flag is enabled. These declare **sinks** (state-changing or /// outbound operations that should not be reached by an un-checked /// request-bound id) and **sanitizers** (ownership/membership guards that /// validate a caller-supplied id). pub fn phase_c_auth_rules() -> Vec { vec![ // ── Sinks requiring Cap::UNAUTHORIZED_ID ── // Realtime / pub-sub: broadcasting on a caller-supplied group/channel // id without first verifying membership is the canonical cross-tenant // leak. RuntimeLabelRule { matchers: vec![ "realtime::publish".into(), "realtime::publish_to_group".into(), "realtime::publish_to_channel".into(), "realtime::broadcast".into(), "broadcaster::send".into(), "broadcaster::publish".into(), "pubsub::publish".into(), ], label: DataLabel::Sink(Cap::UNAUTHORIZED_ID), case_sensitive: false, }, // Database mutations keyed by caller-supplied id. These overlay the // existing SQL_QUERY sink declarations (multi-label composition) so // a bare id carrying only UNAUTHORIZED_ID still fires. RuntimeLabelRule { matchers: vec![ "rusqlite::Connection.execute".into(), "postgres::Client.execute".into(), "sqlx::query".into(), "sqlx::query_as".into(), "diesel::insert_into".into(), "diesel::update".into(), "diesel::delete".into(), // Type-qualified (receiver typed as DatabaseConnection) "DatabaseConnection.execute".into(), "DatabaseConnection.query".into(), ], label: DataLabel::Sink(Cap::UNAUTHORIZED_ID), case_sensitive: false, }, // Outbound cache writes. RuntimeLabelRule { matchers: vec![ "redis::cmd".into(), "cache::set".into(), "cache::set_ex".into(), "cache::insert".into(), ], label: DataLabel::Sink(Cap::UNAUTHORIZED_ID), case_sensitive: false, }, // ── Sanitizers clearing Cap::UNAUTHORIZED_ID ── // Ownership and membership guards consumed via call-site // argument sanitization (see `is_auth_as_taint_arg_sanitizer`). RuntimeLabelRule { matchers: vec![ "check_ownership".into(), "has_ownership".into(), "require_ownership".into(), "ensure_ownership".into(), "is_owner".into(), "authorize".into(), "verify_access".into(), "has_permission".into(), "can_access".into(), "can_manage".into(), "require_group_member".into(), "require_org_member".into(), "require_workspace_member".into(), "require_tenant_member".into(), "require_team_member".into(), "require_membership".into(), "check_membership".into(), "authz::require".into(), "authz::check".into(), ], label: DataLabel::Sanitizer(Cap::UNAUTHORIZED_ID), case_sensitive: false, }, ] }