Authorization analysis logic improvements (#61)

This commit is contained in:
Eli Peter 2026-05-02 16:44:49 -04:00 committed by GitHub
parent 3c89bddbf2
commit 40995e45e7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 4193 additions and 134 deletions

View file

@ -1,6 +1,6 @@
use super::config::AuthAnalysisRules;
use super::model::AuthorizationModel;
use crate::utils::project::FrameworkContext;
use crate::utils::project::{FrameworkContext, rust_file_imports_web_framework};
use std::path::Path;
use tree_sitter::Tree;
@ -61,6 +61,18 @@ pub fn extract_authorization_model(
}
}
// Per-language web-framework signal used to gate the param-name arm
// of `unit_has_user_input_evidence`. Combines the project-root
// manifest detection (`framework_ctx`) with a per-file `use`/`import`
// check, so a single file in a workspace whose root manifest does
// not name a web framework can still opt back in by directly
// importing one (e.g. `crates/collab/src/rpc.rs` in zed: workspace
// root has no axum, but the file uses `axum::Router`).
//
// Three-valued: `Some(true)` keeps step 3 firing, `Some(false)`
// suppresses it, `None` means no detection ran ─ behavior unchanged.
model.lang_web_framework_signal = compute_web_framework_signal(lang, framework_ctx, bytes);
// **Dedup units by span across extractors.** Multiple extractors
// (e.g. Flask + Django on a Python file) each call
// `collect_top_level_units`, producing one unit per top-level
@ -80,6 +92,53 @@ pub fn extract_authorization_model(
model
}
/// Compute the per-file web-framework signal used to gate the
/// param-name arm of `unit_has_user_input_evidence`.
///
/// Currently emits a non-`None` value only for Rust files. The Rust
/// auth analysis is the single biggest source of internal-helper FPs
/// in non-web crates (zed's GUI / editor crates); the other languages
/// have their own handler-classification policies that already filter
/// effectively, so they keep their existing behavior (None →
/// fall-through to the param-name heuristic) until each is validated.
///
/// Three-valued semantics:
/// * `Some(true)` ─ project root manifest names a Rust web framework
/// (axum / actix_web / rocket), OR the file directly imports one.
/// Param-name evidence stays on.
/// * `Some(false)` ─ project root manifest was inspected (Cargo.toml
/// exists) and named no Rust web framework, AND the file does not
/// directly import one. Param-name evidence is suppressed: the
/// project has no HTTP boundary in Rust.
/// * `None` ─ no detection ran (no `framework_ctx`, no Cargo.toml
/// inspected). Behavior unchanged.
fn compute_web_framework_signal(
lang: &str,
framework_ctx: Option<&FrameworkContext>,
bytes: &[u8],
) -> Option<bool> {
if !matches!(lang, "rust" | "rs") {
return None;
}
let project_signal = framework_ctx.and_then(|ctx| ctx.lang_has_web_framework("rust"));
if project_signal == Some(true) {
return Some(true);
}
// Project says "no Rust framework" or never inspected. Consult the
// file's own imports as a per-file fallback; if the file uses an
// axum / actix_web / rocket symbol directly, treat it as a handler
// file even when the workspace-root Cargo.toml does not list the
// crate. (Real example: zed's `crates/collab/src/rpc.rs` imports
// axum but the workspace root Cargo.toml does not.)
if rust_file_imports_web_framework(bytes) {
return Some(true);
}
// No file-level evidence either. Only flip to `Some(false)` if a
// Cargo.toml manifest was actually inspected — single-file scans
// without project context get `None` and preserve prior behavior.
project_signal
}
fn deduplicate_units_by_span(model: &mut AuthorizationModel) {
use crate::auth_analysis::model::{AnalysisUnit, AnalysisUnitKind};
use std::collections::HashMap;