mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Precision pass on auth and resource analysis (#63)
This commit is contained in:
parent
064801a3a4
commit
c7c5e0f3a1
62 changed files with 4248 additions and 138 deletions
|
|
@ -16,12 +16,15 @@ pub struct AuthFinding {
|
|||
pub fn run_checks(model: &AuthorizationModel, rules: &AuthAnalysisRules) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
let web_signal = model.lang_web_framework_signal;
|
||||
let lang = model.lang.as_str();
|
||||
findings.extend(check_admin_routes(model, rules));
|
||||
findings.extend(check_ownership_gaps(model, rules, web_signal));
|
||||
findings.extend(check_partial_batch_authorization(model, rules, web_signal));
|
||||
findings.extend(check_stale_authorization(model, rules, web_signal));
|
||||
findings.extend(check_ownership_gaps(model, rules, web_signal, lang));
|
||||
findings.extend(check_partial_batch_authorization(
|
||||
model, rules, web_signal, lang,
|
||||
));
|
||||
findings.extend(check_stale_authorization(model, rules, web_signal, lang));
|
||||
findings.extend(check_token_override_without_validation(
|
||||
model, rules, web_signal,
|
||||
model, rules, web_signal, lang,
|
||||
));
|
||||
findings.sort_by(|a, b| a.span.cmp(&b.span).then_with(|| a.rule_id.cmp(&b.rule_id)));
|
||||
findings.dedup_by(|a, b| a.span == b.span && a.rule_id == b.rule_id);
|
||||
|
|
@ -70,11 +73,12 @@ fn check_ownership_gaps(
|
|||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
lang: &str,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for unit in &model.units {
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal, lang) {
|
||||
continue;
|
||||
}
|
||||
for op in &unit.operations {
|
||||
|
|
@ -123,11 +127,12 @@ fn check_partial_batch_authorization(
|
|||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
lang: &str,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for unit in &model.units {
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal, lang) {
|
||||
continue;
|
||||
}
|
||||
for op in &unit.operations {
|
||||
|
|
@ -178,11 +183,12 @@ fn check_stale_authorization(
|
|||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
lang: &str,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for unit in &model.units {
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal, lang) {
|
||||
continue;
|
||||
}
|
||||
for op in unit.operations.iter().filter(|operation| {
|
||||
|
|
@ -226,6 +232,7 @@ fn check_token_override_without_validation(
|
|||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
lang: &str,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
|
|
@ -239,7 +246,7 @@ fn check_token_override_without_validation(
|
|||
// call shape happens to look token-y (`account.token = …;
|
||||
// account.save()`). Gate on positive user-input evidence so
|
||||
// these pure backend units are never claimed as a token flow.
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal, lang) {
|
||||
continue;
|
||||
}
|
||||
let Some(token_lookup) = unit
|
||||
|
|
@ -938,7 +945,7 @@ fn is_id_like_name(name: &str) -> bool {
|
|||
/// pure utility helpers fail all three conditions and are skipped ,
|
||||
/// they cannot, by construction, be the entry point of an
|
||||
/// authentication-bearing flow.
|
||||
fn unit_has_user_input_evidence(unit: &AnalysisUnit, web_signal: Option<bool>) -> bool {
|
||||
fn unit_has_user_input_evidence(unit: &AnalysisUnit, web_signal: Option<bool>, lang: &str) -> bool {
|
||||
if unit.kind == AnalysisUnitKind::RouteHandler {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -960,7 +967,9 @@ fn unit_has_user_input_evidence(unit: &AnalysisUnit, web_signal: Option<bool>) -
|
|||
if !unit.context_inputs.is_empty() {
|
||||
return true;
|
||||
}
|
||||
unit.params.iter().any(|p| is_external_input_param_name(p))
|
||||
unit.params
|
||||
.iter()
|
||||
.any(|p| is_external_input_param_name_for_lang(p, lang))
|
||||
}
|
||||
|
||||
/// Parameter-name heuristic: does this name carry external/user input
|
||||
|
|
@ -974,7 +983,33 @@ fn unit_has_user_input_evidence(unit: &AnalysisUnit, web_signal: Option<bool>) -
|
|||
/// Used by `unit_has_user_input_evidence` to recognise helper
|
||||
/// functions that, while not registered as route handlers, are
|
||||
/// clearly invoked with caller-supplied identifiers or request data.
|
||||
#[cfg(test)]
|
||||
fn is_external_input_param_name(name: &str) -> bool {
|
||||
is_external_input_param_name_for_lang(name, "")
|
||||
}
|
||||
|
||||
/// Lang-aware variant of [`is_external_input_param_name`]. When `lang`
|
||||
/// names a language whose framework conventions don't use the generic
|
||||
/// typed-extractor names from the JS/TS/Python ecosystems, the
|
||||
/// framework-name allow-list is narrowed accordingly.
|
||||
///
|
||||
/// Currently narrowed for Go. In Go the names `ctx` / `context` /
|
||||
/// `info` / `body` / `path` / `payload` / `dto` / `form` / `query` are
|
||||
/// not framework-request indicators — they're, respectively,
|
||||
/// `context.Context` (cancellation/value-bag from the stdlib) and a
|
||||
/// menagerie of struct-pointer payload params (`info *PackageInfo`,
|
||||
/// `opts *FooOptions`). Go's actual HTTP frameworks bind the request
|
||||
/// to a per-framework typed param (`r *http.Request`, `c *gin.Context`,
|
||||
/// `c echo.Context`, `c *fiber.Ctx`, `ctx *context.APIContext`); these
|
||||
/// arrive at the gate via `kind == RouteHandler` (set by the route
|
||||
/// extractor) or via the type-aware param filter in
|
||||
/// `extract::common::collect_param_names` (which keeps `ctx` only when
|
||||
/// its type is **not** the stdlib `context.Context`).
|
||||
///
|
||||
/// Real-repo trigger: `/Users/elipeter/oss/gitea` ─ ~1900
|
||||
/// `go.auth.missing_ownership_check` findings on backend helpers whose
|
||||
/// only "user-input evidence" was a `ctx context.Context` param name.
|
||||
fn is_external_input_param_name_for_lang(name: &str, lang: &str) -> bool {
|
||||
// Pytest / unittest.mock convention: parameters injected by
|
||||
// `@mock.patch(...)` decorators are universally named
|
||||
// `mock_<thing>` (`mock_project_id`, `mock_session`,
|
||||
|
|
@ -1011,6 +1046,13 @@ fn is_external_input_param_name(name: &str) -> bool {
|
|||
// matching on the name is a reliable proxy for the typed
|
||||
// extractor binding. Bare `c` is too common (incidental local
|
||||
// variable) to include without an additional type signal.
|
||||
if matches!(lang, "go") {
|
||||
// Go's allow-list: only `req` / `request` (the stdlib
|
||||
// `*http.Request` convention). All other names from the
|
||||
// generic allow-list have language-specific meanings in Go
|
||||
// that aren't user-input ─ see fn doc-comment above.
|
||||
return matches!(lower.as_str(), "req" | "request");
|
||||
}
|
||||
matches!(
|
||||
lower.as_str(),
|
||||
"req"
|
||||
|
|
@ -1361,23 +1403,23 @@ mod tests {
|
|||
// Function with no params and no context_inputs (Celery task
|
||||
// shape), must NOT count as user-input-bearing.
|
||||
let mut unit = empty_unit();
|
||||
assert!(!unit_has_user_input_evidence(&unit, None));
|
||||
assert!(!unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// Adding internal-typed params (apps, schema_editor, Django
|
||||
// migration RunPython callback shape) keeps the gate closed.
|
||||
unit.params.push("apps".into());
|
||||
unit.params.push("schema_editor".into());
|
||||
assert!(!unit_has_user_input_evidence(&unit, None));
|
||||
assert!(!unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// pytest hook shape: (config, items), gate stays closed.
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("config".into());
|
||||
unit.params.push("items".into());
|
||||
assert!(!unit_has_user_input_evidence(&unit, None));
|
||||
assert!(!unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// Adding an id-like param flips the gate open.
|
||||
unit.params.push("doc_id".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// Token-named param flips the gate open (Express helper
|
||||
// `acceptInvitation(token, currentUser, roleOverride)`).
|
||||
|
|
@ -1385,23 +1427,23 @@ mod tests {
|
|||
unit.params.push("token".into());
|
||||
unit.params.push("currentUser".into());
|
||||
unit.params.push("roleOverride".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// Framework request-name param flips the gate open
|
||||
// (Django/Flask `def view(request, project_id):`).
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("request".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// Axum/Actix typed-extractor convention name flips it open.
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("path".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// RouteHandler kind always wins, regardless of params.
|
||||
let mut unit = empty_unit();
|
||||
unit.kind = AnalysisUnitKind::RouteHandler;
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
}
|
||||
|
||||
/// Web-framework signal `Some(false)` (project's manifest was
|
||||
|
|
@ -1422,9 +1464,9 @@ mod tests {
|
|||
// every desktop helper.
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("session_id".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(true)));
|
||||
assert!(!unit_has_user_input_evidence(&unit, Some(false)));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(true), ""));
|
||||
assert!(!unit_has_user_input_evidence(&unit, Some(false), ""));
|
||||
|
||||
// Step 1 (RouteHandler) still wins regardless of the gate.
|
||||
// RouteHandler kind is set by framework extractors (axum /
|
||||
|
|
@ -1432,7 +1474,7 @@ mod tests {
|
|||
// robust enough to bypass the project-level gate even when
|
||||
// the manifest doesn't name the framework.
|
||||
unit.kind = AnalysisUnitKind::RouteHandler;
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(false)));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(false), ""));
|
||||
|
||||
// context_inputs arm: bare `session.foo` on a debug-session
|
||||
// handle (not an auth session) lands in `context_inputs` via
|
||||
|
|
@ -1448,9 +1490,9 @@ mod tests {
|
|||
index: None,
|
||||
span: (0, 0),
|
||||
});
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(true)));
|
||||
assert!(!unit_has_user_input_evidence(&unit, Some(false)));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(true), ""));
|
||||
assert!(!unit_has_user_input_evidence(&unit, Some(false), ""));
|
||||
}
|
||||
|
||||
/// `is_external_input_param_name` covers id-, token-, and
|
||||
|
|
@ -1499,6 +1541,47 @@ mod tests {
|
|||
assert!(!is_external_input_param_name("mocked_token"));
|
||||
}
|
||||
|
||||
/// Go-specific narrowing of the framework-request-name allow-list.
|
||||
///
|
||||
/// Go has no framework convention that uses the generic
|
||||
/// typed-extractor names from JS/TS/Python (`info`, `path`,
|
||||
/// `payload`, `body`, `dto`, `form`, `query`). In Go these are
|
||||
/// either struct-pointer payload params (`info *PackageInfo`),
|
||||
/// stdlib types (`ctx context.Context`), or local variables.
|
||||
/// The Go HTTP frameworks bind the request via per-framework typed
|
||||
/// params (`r *http.Request`, `c *gin.Context`, `c echo.Context`,
|
||||
/// `ctx *context.APIContext`), arriving at the gate via
|
||||
/// RouteHandler kind. Real-repo trigger:
|
||||
/// `/Users/elipeter/oss/gitea` ─ ~1900 helpers passing the gate
|
||||
/// solely on `ctx context.Context`.
|
||||
#[test]
|
||||
fn external_input_param_name_for_go_narrows_allowlist() {
|
||||
use super::is_external_input_param_name_for_lang as f;
|
||||
// ID-shaped + token-shaped names always fire (cross-language).
|
||||
assert!(f("user_id", "go"));
|
||||
assert!(f("repoID", "go"));
|
||||
assert!(f("access_token", "go"));
|
||||
// Stdlib `r *http.Request` convention preserved.
|
||||
assert!(f("req", "go"));
|
||||
assert!(f("request", "go"));
|
||||
// Names that Go does NOT use as a request indicator.
|
||||
assert!(!f("ctx", "go"));
|
||||
assert!(!f("context", "go"));
|
||||
assert!(!f("info", "go"));
|
||||
assert!(!f("body", "go"));
|
||||
assert!(!f("path", "go"));
|
||||
assert!(!f("payload", "go"));
|
||||
assert!(!f("dto", "go"));
|
||||
assert!(!f("form", "go"));
|
||||
assert!(!f("query", "go"));
|
||||
// Same names DO fire for non-Go languages (Express / NestJS /
|
||||
// FastAPI / Axum extractor conventions).
|
||||
assert!(f("ctx", "javascript"));
|
||||
assert!(f("body", "typescript"));
|
||||
assert!(f("path", "rust"));
|
||||
assert!(f("payload", "python"));
|
||||
}
|
||||
|
||||
/// Row-fetch exemption.
|
||||
///
|
||||
/// Row var declared at line 10; auth check naming the row appears
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ fn collect_top_level_from_node(
|
|||
}
|
||||
}
|
||||
}
|
||||
"program" | "source_file" | "module" | "class" | "class_declaration" | "class_body"
|
||||
"program" | "source_file" | "module" | "class_declaration" | "class_body"
|
||||
| "body_statement" => {
|
||||
for idx in 0..node.named_child_count() {
|
||||
let Some(child) = node.named_child(idx as u32) else {
|
||||
|
|
@ -113,10 +113,252 @@ fn collect_top_level_from_node(
|
|||
collect_top_level_from_node(child, bytes, rules, model, file_meta);
|
||||
}
|
||||
}
|
||||
// Ruby `class Foo; ... end`. Gate method descent through the
|
||||
// visibility / callback-target filter so private helpers and
|
||||
// `before_action :foo`-style callback targets are not emitted
|
||||
// as `Function` units (the upstream cause of
|
||||
// `rb.auth.missing_ownership_check` FPs on `set_X` row-fetch
|
||||
// helpers in mastodon / diaspora controllers). Non-method
|
||||
// class-body children (nested `class` / `module` /
|
||||
// `singleton_method`) still recurse normally.
|
||||
"class" => {
|
||||
let body = node.child_by_field_name("body");
|
||||
let visibility = body
|
||||
.map(|b| ruby_method_visibility(b, bytes))
|
||||
.unwrap_or_default();
|
||||
let callbacks = body
|
||||
.map(|b| ruby_callback_target_names(b, bytes))
|
||||
.unwrap_or_default();
|
||||
for idx in 0..node.named_child_count() {
|
||||
let Some(child) = node.named_child(idx as u32) else {
|
||||
continue;
|
||||
};
|
||||
if Some(child) == body {
|
||||
for body_idx in 0..child.named_child_count() {
|
||||
let Some(grand) = child.named_child(body_idx as u32) else {
|
||||
continue;
|
||||
};
|
||||
if grand.kind() == "method" {
|
||||
let name = function_name(grand, bytes).unwrap_or_default();
|
||||
if !name.is_empty()
|
||||
&& ruby_method_is_callback_or_private(
|
||||
&name,
|
||||
&visibility,
|
||||
&callbacks,
|
||||
)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
collect_top_level_from_node(grand, bytes, rules, model, file_meta);
|
||||
}
|
||||
} else {
|
||||
collect_top_level_from_node(child, bytes, rules, model, file_meta);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||||
pub enum RubyVisibility {
|
||||
Public,
|
||||
Protected,
|
||||
Private,
|
||||
}
|
||||
|
||||
/// Walk a Ruby class body in source order and attribute each method
|
||||
/// definition's visibility, mirroring Ruby's `private` / `protected` /
|
||||
/// `public` directive semantics.
|
||||
///
|
||||
/// Two directive forms are recognised:
|
||||
/// 1. **Bare** (`private`). Tree-sitter parses these as a top-level
|
||||
/// `(identifier "private")` sibling. Toggles default visibility
|
||||
/// for every subsequent method.
|
||||
/// 2. **Targeted** (`private :foo, :bar`). Parsed as
|
||||
/// `(call method:identifier arguments:argument_list ...)`.
|
||||
/// Explicitly marks the named methods; does not change default.
|
||||
pub fn ruby_method_visibility(
|
||||
body: Node<'_>,
|
||||
bytes: &[u8],
|
||||
) -> std::collections::HashMap<String, RubyVisibility> {
|
||||
use crate::auth_analysis::config::matches_name;
|
||||
use std::collections::HashMap;
|
||||
|
||||
let mut map: HashMap<String, RubyVisibility> = HashMap::new();
|
||||
let mut current = RubyVisibility::Public;
|
||||
for child in named_children(body) {
|
||||
match child.kind() {
|
||||
"identifier" => {
|
||||
if let Some(vis) = ruby_visibility_for_directive(text(child, bytes).trim()) {
|
||||
current = vis;
|
||||
}
|
||||
}
|
||||
"call" => {
|
||||
let callee_full = call_name(child, bytes);
|
||||
let callee = bare_method_name(&callee_full);
|
||||
let Some(target_vis) = ruby_visibility_for_directive(callee) else {
|
||||
continue;
|
||||
};
|
||||
let arguments = child.child_by_field_name("arguments");
|
||||
let args: Vec<Node<'_>> = arguments
|
||||
.map(|node| named_children(node))
|
||||
.unwrap_or_default();
|
||||
if args.is_empty() {
|
||||
current = target_vis;
|
||||
continue;
|
||||
}
|
||||
let mut targeted_any = false;
|
||||
for arg in args {
|
||||
for name in ruby_symbol_names(arg, bytes) {
|
||||
if name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
map.insert(name, target_vis);
|
||||
targeted_any = true;
|
||||
}
|
||||
if arg.kind() == "method"
|
||||
&& let Some(name_node) = arg.child_by_field_name("name")
|
||||
{
|
||||
let name = text(name_node, bytes);
|
||||
if !name.is_empty() {
|
||||
map.insert(name, target_vis);
|
||||
targeted_any = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !targeted_any {
|
||||
current = target_vis;
|
||||
}
|
||||
let _ = matches_name;
|
||||
}
|
||||
"method" => {
|
||||
if let Some(name_node) = child.child_by_field_name("name") {
|
||||
let name = text(name_node, bytes);
|
||||
if !name.is_empty() {
|
||||
map.insert(name, current);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
fn ruby_visibility_for_directive(name: &str) -> Option<RubyVisibility> {
|
||||
match name {
|
||||
"private" => Some(RubyVisibility::Private),
|
||||
"protected" => Some(RubyVisibility::Protected),
|
||||
"public" => Some(RubyVisibility::Public),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect names of methods registered as Rails filter callbacks
|
||||
/// (`before_action`, `after_action`, `around_action`, with their
|
||||
/// `prepend_*` / `append_*` / `skip_*` siblings, plus the legacy
|
||||
/// `*_filter` aliases). Such methods may be public but are invoked
|
||||
/// only as part of an action's request cycle, never as standalone
|
||||
/// routes — so emitting them as units produces spurious
|
||||
/// `missing_ownership_check` flags on the helper body's row fetches.
|
||||
pub fn ruby_callback_target_names(
|
||||
body: Node<'_>,
|
||||
bytes: &[u8],
|
||||
) -> std::collections::HashSet<String> {
|
||||
use std::collections::HashSet;
|
||||
|
||||
let mut targets: HashSet<String> = HashSet::new();
|
||||
for child in named_children(body) {
|
||||
if child.kind() != "call" {
|
||||
continue;
|
||||
}
|
||||
let callee_full = call_name(child, bytes);
|
||||
let callee = bare_method_name(&callee_full);
|
||||
if !ruby_is_filter_callback_directive(callee) {
|
||||
continue;
|
||||
}
|
||||
let Some(arguments) = child.child_by_field_name("arguments") else {
|
||||
continue;
|
||||
};
|
||||
for arg in named_children(arguments) {
|
||||
if arg.kind() == "pair" {
|
||||
continue;
|
||||
}
|
||||
for name in ruby_symbol_names(arg, bytes) {
|
||||
if name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
targets.insert(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
targets
|
||||
}
|
||||
|
||||
fn ruby_is_filter_callback_directive(name: &str) -> bool {
|
||||
matches!(
|
||||
name,
|
||||
"before_action"
|
||||
| "after_action"
|
||||
| "around_action"
|
||||
| "prepend_before_action"
|
||||
| "prepend_after_action"
|
||||
| "prepend_around_action"
|
||||
| "append_before_action"
|
||||
| "append_after_action"
|
||||
| "append_around_action"
|
||||
| "skip_before_action"
|
||||
| "skip_after_action"
|
||||
| "skip_around_action"
|
||||
| "before_filter"
|
||||
| "after_filter"
|
||||
| "around_filter"
|
||||
| "prepend_before_filter"
|
||||
| "prepend_after_filter"
|
||||
| "prepend_around_filter"
|
||||
| "append_before_filter"
|
||||
| "append_after_filter"
|
||||
| "append_around_filter"
|
||||
| "skip_before_filter"
|
||||
| "skip_after_filter"
|
||||
| "skip_around_filter"
|
||||
)
|
||||
}
|
||||
|
||||
fn ruby_symbol_names(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
||||
match node.kind() {
|
||||
"simple_symbol" | "hash_key_symbol" | "identifier" | "string" => {
|
||||
vec![
|
||||
strip_quotes(&text(node, bytes))
|
||||
.trim_start_matches(':')
|
||||
.to_string(),
|
||||
]
|
||||
}
|
||||
"array" => named_children(node)
|
||||
.into_iter()
|
||||
.flat_map(|child| ruby_symbol_names(child, bytes))
|
||||
.collect(),
|
||||
_ => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ruby_method_is_callback_or_private(
|
||||
name: &str,
|
||||
visibility: &std::collections::HashMap<String, RubyVisibility>,
|
||||
callbacks: &std::collections::HashSet<String>,
|
||||
) -> bool {
|
||||
let vis = visibility
|
||||
.get(name)
|
||||
.copied()
|
||||
.unwrap_or(RubyVisibility::Public);
|
||||
if vis != RubyVisibility::Public {
|
||||
return true;
|
||||
}
|
||||
callbacks.contains(name)
|
||||
}
|
||||
|
||||
fn function_unit_from_var_declarator(
|
||||
node: Node<'_>,
|
||||
bytes: &[u8],
|
||||
|
|
@ -3184,6 +3426,52 @@ fn collect_param_names(
|
|||
out.push(name);
|
||||
}
|
||||
}
|
||||
// Go `parameter_declaration` / `variadic_parameter_declaration`:
|
||||
// tree-sitter-go shape exposes `name` (one or more identifiers)
|
||||
// and `type` (the param's static type) as named fields. C/C++
|
||||
// also use `parameter_declaration` but with a `declarator`
|
||||
// field instead of `name`, so the `name`-field gate
|
||||
// distinguishes Go from C/C++ shapes without language plumbing.
|
||||
//
|
||||
// Two engine improvements at this site, both Go-specific:
|
||||
//
|
||||
// 1. Drop the entire param when its type is a known
|
||||
// non-user-input stdlib type. The dominant case is
|
||||
// `ctx context.Context`, the canonical first param of
|
||||
// nearly every Go function (cancellation / deadline /
|
||||
// value-bag, NOT an HTTP request). Without this gate the
|
||||
// bare param name `ctx` matches the framework-request-name
|
||||
// allow-list in `is_external_input_param_name`, opening
|
||||
// `unit_has_user_input_evidence` on every internal helper.
|
||||
// 2. Descend only into the `name` field so type-segment
|
||||
// identifiers don't pollute the param-name set. Without
|
||||
// this scope, `info *PackageInfo` contributes both `info`
|
||||
// and `PackageInfo` to `unit.params`; `path *Path` would
|
||||
// contribute `path` and `Path`, etc. Mirrors the Rust
|
||||
// `parameter` arm below.
|
||||
//
|
||||
// Real-repo trigger: `/Users/elipeter/oss/gitea` ─ ~1900
|
||||
// `go.auth.missing_ownership_check` findings on backend
|
||||
// helpers whose only "user-input evidence" was the ubiquitous
|
||||
// `ctx context.Context` first param.
|
||||
"parameter_declaration" | "variadic_parameter_declaration"
|
||||
if node.child_by_field_name("name").is_some() =>
|
||||
{
|
||||
if let Some(type_node) = node.child_by_field_name("type")
|
||||
&& is_go_non_user_input_type(type_node, bytes)
|
||||
{
|
||||
return;
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children_by_field_name("name", &mut cursor) {
|
||||
if child.kind() == "identifier" {
|
||||
let name = text(child, bytes);
|
||||
if !name.is_empty() && !out.contains(&name) {
|
||||
out.push(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Rust `parameter` node: descend ONLY into the `pattern` field so
|
||||
// type-segment identifiers don't pollute the param-name set.
|
||||
// Without this scope, `dst: &std::path::Path` contributes `std`,
|
||||
|
|
@ -3294,6 +3582,48 @@ fn collect_param_names(
|
|||
}
|
||||
}
|
||||
|
||||
/// Recognise Go parameter types that are categorically not user-input
|
||||
/// bearing. Used by the Go arm of [`collect_param_names`] to drop the
|
||||
/// param entirely (rather than push its name into `unit.params` and
|
||||
/// trip the framework-request-name allow-list in
|
||||
/// `is_external_input_param_name`).
|
||||
///
|
||||
/// Conservative: only matches the stdlib `context.Context` /
|
||||
/// `context.CancelFunc` interface idioms. These are the dominant
|
||||
/// cluster ─ ~1900 findings on `/Users/elipeter/oss/gitea` ─ and there
|
||||
/// is no shape under which they carry user input.
|
||||
///
|
||||
/// Implementation note: tree-sitter-go's `qualified_type` exposes
|
||||
/// `package` (identifier) and `name` (type_identifier) as named fields.
|
||||
/// Pointer-wrapping is rare for these (they're already interfaces) but
|
||||
/// is handled defensively by descending through `pointer_type`.
|
||||
fn is_go_non_user_input_type(type_node: Node<'_>, bytes: &[u8]) -> bool {
|
||||
let mut node = type_node;
|
||||
// Strip a single layer of pointer indirection if present.
|
||||
if node.kind() == "pointer_type" {
|
||||
if let Some(inner) = node.child_by_field_name("type") {
|
||||
node = inner;
|
||||
} else if let Some(inner) = node.named_child(0) {
|
||||
node = inner;
|
||||
}
|
||||
}
|
||||
if node.kind() != "qualified_type" {
|
||||
return false;
|
||||
}
|
||||
let pkg = node
|
||||
.child_by_field_name("package")
|
||||
.map(|n| text(n, bytes))
|
||||
.unwrap_or_default();
|
||||
let name = node
|
||||
.child_by_field_name("name")
|
||||
.map(|n| text(n, bytes))
|
||||
.unwrap_or_default();
|
||||
matches!(
|
||||
(pkg.as_str(), name.as_str()),
|
||||
("context", "Context") | ("context", "CancelFunc")
|
||||
)
|
||||
}
|
||||
|
||||
/// Ascii-lowered id-shape predicate used by the Python typed-param
|
||||
/// fallback in `collect_param_names`. Mirrors
|
||||
/// `auth_analysis::checks::is_id_like_name` (cannot share that fn
|
||||
|
|
@ -4451,4 +4781,242 @@ mod tests {
|
|||
assert!(params.contains(&"b".to_string()), "got {:?}", params);
|
||||
assert!(!params.contains(&"u32".to_string()), "got {:?}", params);
|
||||
}
|
||||
|
||||
/// Go's stdlib `context.Context` is the canonical first-param of
|
||||
/// most functions but is NOT user input ─ it carries deadline /
|
||||
/// cancellation / value-bag, never an HTTP request. The Go arm of
|
||||
/// `collect_param_names` drops the param entirely when its type is
|
||||
/// `context.Context` so the bare name `ctx` doesn't trip the
|
||||
/// framework-request-name allow-list.
|
||||
///
|
||||
/// Real-repo motivation:
|
||||
/// `/Users/elipeter/oss/gitea/services/packages/packages.go::AddFileToExistingPackage`
|
||||
/// and ~1900 sibling helpers passed
|
||||
/// `unit_has_user_input_evidence` solely on this param.
|
||||
#[test]
|
||||
fn collect_param_names_go_drops_context_context_param() {
|
||||
use super::function_params;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||||
.unwrap();
|
||||
let src = b"package x\nfunc GetPackage(ctx context.Context, info *PackageInfo) {}\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let func = (0..tree.root_node().named_child_count())
|
||||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||||
.find(|n| n.kind() == "function_declaration")
|
||||
.expect("file should have a function_declaration");
|
||||
let params = function_params(func, src);
|
||||
assert!(
|
||||
!params.contains(&"ctx".to_string()),
|
||||
"ctx context.Context must be dropped: got {:?}",
|
||||
params
|
||||
);
|
||||
assert!(
|
||||
!params.contains(&"context".to_string()) && !params.contains(&"Context".to_string()),
|
||||
"type-segment idents must not leak: got {:?}",
|
||||
params
|
||||
);
|
||||
assert!(
|
||||
params.contains(&"info".to_string()),
|
||||
"non-context typed params keep their name: got {:?}",
|
||||
params
|
||||
);
|
||||
assert!(
|
||||
!params.contains(&"PackageInfo".to_string()),
|
||||
"type-segment idents must not leak from non-context params either: got {:?}",
|
||||
params
|
||||
);
|
||||
}
|
||||
|
||||
/// Per-framework `*context.APIContext` (gitea), `*gin.Context`,
|
||||
/// `iris.Context`, `*fiber.Ctx` and similar ARE user input ─ the
|
||||
/// type-aware filter must NOT drop these. The non-stdlib package
|
||||
/// name distinguishes them from the stdlib `context.Context`.
|
||||
#[test]
|
||||
fn collect_param_names_go_keeps_framework_context_param() {
|
||||
use super::function_params;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||||
.unwrap();
|
||||
let src = b"package x\nfunc Handle(ctx *context.APIContext) {}\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let func = (0..tree.root_node().named_child_count())
|
||||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||||
.find(|n| n.kind() == "function_declaration")
|
||||
.expect("file should have a function_declaration");
|
||||
let params = function_params(func, src);
|
||||
assert!(
|
||||
params.contains(&"ctx".to_string()),
|
||||
"framework-bearing ctx must survive: got {:?}",
|
||||
params
|
||||
);
|
||||
}
|
||||
|
||||
/// Multiple-name single-type Go declarations (`a, b int`) must
|
||||
/// surface every name.
|
||||
#[test]
|
||||
fn collect_param_names_go_multi_name_param_decl() {
|
||||
use super::function_params;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||||
.unwrap();
|
||||
let src = b"package x\nfunc Add(a, b int, ctx context.Context) {}\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let func = (0..tree.root_node().named_child_count())
|
||||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||||
.find(|n| n.kind() == "function_declaration")
|
||||
.expect("file should have a function_declaration");
|
||||
let params = function_params(func, src);
|
||||
assert!(params.contains(&"a".to_string()), "got {:?}", params);
|
||||
assert!(params.contains(&"b".to_string()), "got {:?}", params);
|
||||
assert!(!params.contains(&"ctx".to_string()), "got {:?}", params);
|
||||
assert!(!params.contains(&"int".to_string()), "got {:?}", params);
|
||||
}
|
||||
|
||||
mod ruby_visibility_and_callbacks {
|
||||
use super::super::{
|
||||
RubyVisibility, ruby_callback_target_names, ruby_method_is_callback_or_private,
|
||||
ruby_method_visibility,
|
||||
};
|
||||
use tree_sitter::{Node, Parser, Tree};
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE))
|
||||
.unwrap();
|
||||
let bytes = src.as_bytes().to_vec();
|
||||
let tree = parser.parse(bytes.as_slice(), None).expect("parse");
|
||||
(tree, bytes)
|
||||
}
|
||||
|
||||
fn find_class_body<'a>(node: Node<'a>) -> Option<Node<'a>> {
|
||||
if node.kind() == "class" {
|
||||
return node.child_by_field_name("body");
|
||||
}
|
||||
for idx in 0..node.named_child_count() {
|
||||
let Some(child) = node.named_child(idx as u32) else {
|
||||
continue;
|
||||
};
|
||||
if let Some(body) = find_class_body(child) {
|
||||
return Some(body);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_private_directive_marks_subsequent_methods_private() {
|
||||
let src = "class C\n def public_a; end\n private\n def helper_b; end\n def helper_c; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let vis = ruby_method_visibility(body, &bytes);
|
||||
assert_eq!(vis.get("public_a").copied(), Some(RubyVisibility::Public));
|
||||
assert_eq!(vis.get("helper_b").copied(), Some(RubyVisibility::Private));
|
||||
assert_eq!(vis.get("helper_c").copied(), Some(RubyVisibility::Private));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn targeted_private_marks_only_named_methods() {
|
||||
let src = "class C\n def a; end\n def b; end\n def c; end\n private :a, :c\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let vis = ruby_method_visibility(body, &bytes);
|
||||
assert_eq!(vis.get("a").copied(), Some(RubyVisibility::Private));
|
||||
assert_eq!(vis.get("b").copied(), Some(RubyVisibility::Public));
|
||||
assert_eq!(vis.get("c").copied(), Some(RubyVisibility::Private));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn public_directive_re_opens_visibility() {
|
||||
let src = "class C\n private\n def a; end\n public\n def b; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let vis = ruby_method_visibility(body, &bytes);
|
||||
assert_eq!(vis.get("a").copied(), Some(RubyVisibility::Private));
|
||||
assert_eq!(vis.get("b").copied(), Some(RubyVisibility::Public));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn protected_directive_recognised() {
|
||||
let src = "class C\n protected\n def helper; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let vis = ruby_method_visibility(body, &bytes);
|
||||
assert_eq!(vis.get("helper").copied(), Some(RubyVisibility::Protected));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn before_action_collects_callback_target_names() {
|
||||
let src = "class C\n before_action :set_account\n before_action :set_user, only: [:show, :update]\n def show; end\n def set_account; end\n def set_user; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||||
assert!(callbacks.contains("set_account"));
|
||||
assert!(callbacks.contains("set_user"));
|
||||
// `only:` / `except:` keys must not pollute the target set.
|
||||
assert!(!callbacks.contains("show"));
|
||||
assert!(!callbacks.contains("update"));
|
||||
assert!(!callbacks.contains("only"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn before_action_block_form_yields_no_targets() {
|
||||
// Block form `before_action do ... end` carries no symbol arg.
|
||||
let src =
|
||||
"class C\n before_action do\n require_login\n end\n def show; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||||
assert!(callbacks.is_empty(), "got {:?}", callbacks);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skip_before_action_target_collected() {
|
||||
let src = "class C\n skip_before_action :authenticate_user!, only: [:index]\n def index; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||||
assert!(callbacks.contains("authenticate_user!"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_before_filter_alias_collected() {
|
||||
let src = "class C\n before_filter :legacy_helper\n def legacy_helper; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||||
assert!(callbacks.contains("legacy_helper"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn callback_target_or_private_predicate_combines_layers() {
|
||||
// Private method → suppressed.
|
||||
// Public callback target → suppressed.
|
||||
// Public non-callback method → kept.
|
||||
let src = "class C\n before_action :set_account\n def show; end\n def set_account; end\n private\n def helper; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let visibility = ruby_method_visibility(body, &bytes);
|
||||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||||
assert!(!ruby_method_is_callback_or_private(
|
||||
"show",
|
||||
&visibility,
|
||||
&callbacks
|
||||
));
|
||||
assert!(ruby_method_is_callback_or_private(
|
||||
"set_account",
|
||||
&visibility,
|
||||
&callbacks
|
||||
));
|
||||
assert!(ruby_method_is_callback_or_private(
|
||||
"helper",
|
||||
&visibility,
|
||||
&callbacks
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,11 +53,18 @@ pub fn extract_authorization_model(
|
|||
&actix_web::ActixWebExtractor,
|
||||
&rocket::RocketExtractor,
|
||||
];
|
||||
let mut model = AuthorizationModel::default();
|
||||
let mut model = AuthorizationModel {
|
||||
lang: lang.to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
for extractor in extractors {
|
||||
if extractor.supports(lang, framework_ctx) {
|
||||
model.extend(extractor.extract(tree, bytes, path, rules));
|
||||
let mut other = extractor.extract(tree, bytes, path, rules);
|
||||
// Preserve the canonical `lang` set above; sub-extractors
|
||||
// build their own default-initialised models with empty lang.
|
||||
other.lang = model.lang.clone();
|
||||
model.extend(other);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
use super::AuthExtractor;
|
||||
use super::common::{
|
||||
auth_check_from_call_site, build_function_unit, call_name, call_site_from_node, function_name,
|
||||
named_children, span, text,
|
||||
named_children, ruby_callback_target_names, ruby_method_is_callback_or_private,
|
||||
ruby_method_visibility, span, text,
|
||||
};
|
||||
use crate::auth_analysis::config::{AuthAnalysisRules, matches_name, strip_quotes};
|
||||
use crate::auth_analysis::model::{
|
||||
|
|
@ -102,6 +103,19 @@ fn maybe_collect_controller(
|
|||
);
|
||||
let controller_segment = underscore_segment(class_name.trim_end_matches("Controller"));
|
||||
let filter_directives = class_filter_directives(body, bytes);
|
||||
// Rails routes only dispatch to public instance methods that are
|
||||
// not registered as filter callbacks. Private / protected helpers
|
||||
// and methods named in `before_action :foo` / `after_action :bar`
|
||||
// run as part of an action's request cycle but are never
|
||||
// independently routable, so emitting them as RouteHandler units
|
||||
// produces FPs (e.g. `set_account` in
|
||||
// `mastodon/app/controllers/admin/accounts_controller.rb` does
|
||||
// `Account.find(params[:id])` inside a `private` block, with the
|
||||
// actual `authorize @account` check living in the public action
|
||||
// that triggers the callback). Skip them here; the action units
|
||||
// remain under analysis with their own auth context.
|
||||
let visibility = ruby_method_visibility(body, bytes);
|
||||
let callback_targets = ruby_callback_target_names(body, bytes);
|
||||
let controller_name = format!(
|
||||
"{}{}",
|
||||
if controller_namespace.is_empty() {
|
||||
|
|
@ -122,6 +136,9 @@ fn maybe_collect_controller(
|
|||
if action_name.is_empty() || action_name.ends_with('=') {
|
||||
continue;
|
||||
}
|
||||
if ruby_method_is_callback_or_private(&action_name, &visibility, &callback_targets) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let unit_idx = model.units.len();
|
||||
let route_name = format!("{controller_name}#{action_name}");
|
||||
|
|
|
|||
|
|
@ -362,6 +362,11 @@ pub struct AuthorizationModel {
|
|||
///
|
||||
/// Currently set only for Rust by `extract_authorization_model`.
|
||||
pub lang_web_framework_signal: Option<bool>,
|
||||
/// Source language of the file the model was built from. Used by
|
||||
/// `unit_has_user_input_evidence` to apply per-language narrowing
|
||||
/// of the framework-request-name allow-list. Empty string when no
|
||||
/// language was supplied (single-file unit-test paths).
|
||||
pub lang: String,
|
||||
}
|
||||
|
||||
impl AuthorizationModel {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue